From d6bb72499e434fc62f85084c9d58dcad9e9f9156 Mon Sep 17 00:00:00 2001 From: himmel Date: Mon, 25 May 2026 01:14:41 +0000 Subject: [PATCH 1/6] Dockerize ivy_mooncake for IvorySQL and add CI regression workflow - Dockerfile + .dockerignore + docker-compose.yml: build pg_mooncake on the IvorySQL 5.3 UBI8 base, bundling pg_duckdb + libduckdb, with shared_preload_libraries + duckdb GUCs wired into postgresql.conf.sample. - scripts/docker-build.sh: end-to-end helper to install docker, build the image, and smoke-test CREATE EXTENSION + mooncake.create_table. - pg_mooncake.control: rebrand comment for IvorySQL Oracle-compat distro. - .github/workflows/docker.yml: build/publish image on push/tag, PR smoke. - .github/workflows/regression.yml: PR/manual cargo pgrx regress inside the Dockerfile build stage; passes shared_preload_libraries via --postgresql-conf so pg_duckdb loads. - .gitignore: exclude developer-local docs/, alt-base Dockerfile, and the data-relocate helper that is machine-specific. Co-Authored-By: Claude Opus 4.7 (1M context) --- .dockerignore | 45 +++++++ .github/workflows/docker.yml | 107 ++++++++++++++++ .github/workflows/regression.yml | 143 +++++++++++++++++++++ .gitignore | 5 + Dockerfile | 213 +++++++++++++++++++++++++++---- docker-compose.yml | 45 +++++++ pg_mooncake.control | 2 +- scripts/docker-build.sh | 118 +++++++++++++++++ 8 files changed, 652 insertions(+), 26 deletions(-) create mode 100644 .dockerignore create mode 100644 .github/workflows/docker.yml create mode 100644 .github/workflows/regression.yml create mode 100644 docker-compose.yml create mode 100755 scripts/docker-build.sh diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..fa8a3b9 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,45 @@ +# Build artifacts +target/ +**/target/ +*.o +*.so +*.rlib +*.d + +# Submodule build artifacts (we rebuild inside the image) +ivy_duckdb/build/ +ivy_duckdb/third_party/duckdb/build/ +ivy_duckdb/*.so +ivy_moonlink/target/ +ivy_duckdb_mooncake/build/ + +# Cargo +.cargo/ +**/.cargo/ + +# Editor / IDE +.vscode/ +.idea/ +.devcontainer/ +*.swp +*.swo + +# OS +.DS_Store +Thumbs.db + +# Spec/plan/memory files (untracked, dev-local) +docs/ + +# Other +.git/ +.github/ +.gitignore +.gitmodules + +# But keep .gitmodules — needed to know submodule layout? No, COPY explicitly +# names submodules; .gitmodules not needed at build time. + +# Local test/log +*.log +/tmp/ diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml new file mode 100644 index 0000000..8106003 --- /dev/null +++ b/.github/workflows/docker.yml @@ -0,0 +1,107 @@ +name: docker + +on: + push: + branches: [main] + tags: ['v*'] + pull_request: + branches: [main] + workflow_dispatch: + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository_owner }}/ivy_mooncake + IVORYSQL_BASE: registry.highgo.com/ivorysql/ivorysql:5.3-ubi8 + +jobs: + build: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + steps: + - name: Checkout (with submodules) + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to GHCR + if: github.event_name != 'pull_request' + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=ref,event=branch,suffix=-ubi8 + type=ref,event=pr,suffix=-ubi8 + type=semver,pattern={{version}},suffix=-ubi8 + type=semver,pattern={{major}}.{{minor}},suffix=-ubi8 + type=sha,prefix=sha-,suffix=-ubi8 + type=raw,value=5.3-ubi8,enable={{is_default_branch}} + type=raw,value=latest,enable={{is_default_branch}} + + - name: Build and push (amd64) + uses: docker/build-push-action@v5 + with: + context: . + file: ./Dockerfile + platforms: linux/amd64 + build-args: | + IVORYSQL_BASE=${{ env.IVORYSQL_BASE }} + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max + + smoke-test: + runs-on: ubuntu-latest + if: github.event_name == 'pull_request' + needs: build + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Build image + run: | + docker build \ + --build-arg IVORYSQL_BASE=${{ env.IVORYSQL_BASE }} \ + -t ivy_mooncake:test . + + - name: Run container + run: | + docker run -d --name test \ + -e IVORYSQL_PASSWORD=password \ + ivy_mooncake:test + for i in $(seq 1 60); do + docker exec test pg_isready -U ivorysql -d postgres 2>/dev/null && { echo "ready in ${i}s"; break; } + sleep 1 + done + + - name: Verify CREATE EXTENSION pg_mooncake CASCADE + run: | + docker exec test psql -U ivorysql -d postgres -c " + CREATE EXTENSION pg_mooncake CASCADE; + SELECT extname, extversion FROM pg_extension WHERE extname IN ('pg_duckdb','pg_mooncake'); + " + + - name: Verify mooncake.create_table E2E + run: | + docker exec test psql -U ivorysql -d postgres <<'SQL' + CREATE TABLE t (id int PRIMARY KEY, v text); + ALTER TABLE t REPLICA IDENTITY FULL; + INSERT INTO t VALUES (1,'a'),(2,'b'); + CALL mooncake.create_table('t_mirror', 't'); + SELECT count(*) FROM t_mirror; + SQL diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml new file mode 100644 index 0000000..2d5dedd --- /dev/null +++ b/.github/workflows/regression.yml @@ -0,0 +1,143 @@ +name: regression + +# Run pgrx regression tests on every PR + manual dispatch. +# Mirrors `make test` (which calls `cargo pgrx regress --resetdb`) inside +# the project's Docker build stage so we exercise the real IvorySQL pg_config, +# pg_duckdb install, and the bgworker codepath end-to-end. + +on: + pull_request: + branches: [main] + workflow_dispatch: + +env: + IVORYSQL_BASE: registry.highgo.com/ivorysql/ivorysql:5.3-ubi8 + TEST_IMAGE: ivy_mooncake:regress-${{ github.run_id }} + +jobs: + regress: + runs-on: ubuntu-latest + timeout-minutes: 90 + permissions: + contents: read + steps: + - name: Checkout (with submodules) + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + # Build the `build` stage of the project Dockerfile. That stage ends + # with: + # - /ivy_mooncake = source tree (incl. all 3 submodules) + # - /etc/pgconfig = absolute path to IvorySQL pg_config + # - pg_duckdb + libduckdb.so installed into IvorySQL libdir/sharedir + # - rust 1.91.1 + cargo-pgrx 0.16.1 + `cargo pgrx init --pg18=$PG_CONFIG` done + # Stopping at this stage means we can run `cargo pgrx regress` from the + # same container without re-paying any build cost. + - name: Build (Dockerfile target=build) + uses: docker/build-push-action@v5 + with: + context: . + file: ./Dockerfile + target: build + load: true + tags: ${{ env.TEST_IMAGE }} + build-args: | + IVORYSQL_BASE=${{ env.IVORYSQL_BASE }} + cache-from: type=gha + cache-to: type=gha,mode=max + + - name: Run cargo pgrx regress (pg18) + id: regress + run: | + mkdir -p _artifacts + set -o pipefail + docker run --rm \ + -v "$PWD/_artifacts:/out" \ + -w /ivy_mooncake \ + -e RUST_BACKTRACE=1 \ + -e CARGO_TERM_COLOR=always \ + "${{ env.TEST_IMAGE }}" \ + bash -c ' + set -eux + + # PG refuses to run as root, so spin up an unprivileged user + # (tester) and have it drive cargo pgrx regress. + useradd -m -u 2000 -s /bin/bash tester + + # Share /root/.cargo and /root/.rustup with tester via chmod + # rather than cp -a / chown -R: on overlayfs chown forces a + # copy_up of every file (~1GB doubling), and a full chown of + # the build tree wastes minutes + GBs. chmod only flips + # metadata bits. + chmod o+rx /root + chmod -R o+rX /root/.cargo /root/.rustup + + # target/ from the image build is root-owned + stale once + # cargo recompiles with different feature flags during regress, + # so wipe it instead of chown-recursing. + rm -rf /ivy_mooncake/target + chown -R tester:tester /ivy_mooncake + + # /out is a host-mounted volume owned by the host user; tester + # would otherwise lack write perms. + chown tester:tester /out + + PG_CONFIG="$(cat /etc/pgconfig)" + LIBDIR="$($PG_CONFIG --pkglibdir)" + SHAREDIR="$($PG_CONFIG --sharedir)" + chmod -R a+rwX "$LIBDIR" "$SHAREDIR/extension" + + su - tester -c " + set -eux + # Share the root-owned cargo/rustup caches in place; per-user + # pgrx state still goes under tester home. + export CARGO_HOME=/root/.cargo + export RUSTUP_HOME=/root/.rustup + export PGRX_HOME=/home/tester/.pgrx + export PATH=/root/.cargo/bin:\$PATH + + cargo pgrx init --pg18=\"$PG_CONFIG\" + cd /ivy_mooncake + # pg_duckdb refuses to load unless it is in + # shared_preload_libraries; the IvorySQL base also expects + # liboracle_parser/ivorysql_ora at preload time. + cargo pgrx regress --resetdb \ + --postgresql-conf \"shared_preload_libraries=liboracle_parser,ivorysql_ora,pg_duckdb,pg_mooncake\" \ + --postgresql-conf \"wal_level=logical\" \ + --postgresql-conf \"duckdb.allow_community_extensions=true\" \ + 2>&1 | tee /out/regress.log + " + ' + + - name: Collect regression diffs on failure + if: failure() && steps.regress.outcome == 'failure' + run: | + docker run --rm \ + -v "$PWD/_artifacts:/out" \ + -w /ivy_mooncake \ + "${{ env.TEST_IMAGE }}" \ + bash -c ' + set -eu + # Best-effort: regression diffs land somewhere under the source + # tree depending on pgrx version, so just grab everything that + # could be useful for triage. + find . -name regression.diffs -exec cp -v {} /out/ \; 2>/dev/null || true + find . -name regression.out -exec cp -v {} /out/ \; 2>/dev/null || true + find . -path "*/pg_regress/results/*" -exec cp -v {} /out/ \; 2>/dev/null || true + # cargo target log (if any) + find . -name "postmaster.log" -exec cp -v {} /out/ \; 2>/dev/null || true + ls -la /out/ || true + ' || true + + - name: Upload regression artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: regression-artifacts-${{ github.run_id }} + path: _artifacts/ + if-no-files-found: ignore + retention-days: 14 diff --git a/.gitignore b/.gitignore index 1613d67..c0d5a20 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,8 @@ /tests/pg_regress/regression.diffs /tests/pg_regress/regression.out /tests/pg_regress/results/ + +# Local-only: developer notes, machine-specific helpers, alt-base image +docs/ +Dockerfile.ivorysql-base +scripts/docker-relocate-data.sh diff --git a/Dockerfile b/Dockerfile index 1e76008..f16f2f8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,39 +1,202 @@ -FROM postgres:18 AS build +# syntax=docker/dockerfile:1.6 +# +# ivy_mooncake — IvorySQL distribution of pg_mooncake (UBI8 base) +# +# Build: +# docker build -t ivorysql/ivy_mooncake:5.3-ubi8 . +# +# Run: +# docker run --name ivy_mooncake \ +# -e IVORYSQL_PASSWORD=password \ +# -p 5432:5432 -p 1521:1521 \ +# -v ivy_mooncake_data:/var/lib/ivorysql/data \ +# -v ivy_mooncake_warehouse:/tmp/moonlink_iceberg \ +# ivorysql/ivy_mooncake:5.3-ubi8 -RUN apt update \ - && apt install -y \ - curl \ - gcc \ - make \ - pkg-config \ - postgresql-server-dev-18 \ - && rm -rf /var/lib/apt/lists/* +ARG IVORYSQL_BASE=registry.highgo.com/ivorysql/ivorysql:5.3-ubi8 -RUN curl https://sh.rustup.rs | sh -s -- -y +# ============================================================================ +# Stage 1: build +# ============================================================================ +FROM ${IVORYSQL_BASE} AS build -ENV PATH="/root/.cargo/bin:$PATH" +USER 0 -RUN cargo install --locked cargo-pgrx@0.16.1 \ - && cargo pgrx init --pg18=$(which pg_config) +# Install build toolchain. UBI8 = RHEL-based, use dnf/microdnf, not apt. +RUN set -eux; \ + PKG=$(command -v dnf || command -v microdnf || command -v yum); \ + $PKG install -y --setopt=install_weak_deps=False --setopt=tsflags=nodocs \ + ca-certificates curl git which findutils \ + gcc gcc-c++ make cmake pkgconfig \ + openssl-devel readline-devel zlib-devel \ + lz4-devel libxml2-devel libpq-devel \ + libcurl-devel \ + clang clang-devel llvm-libs \ + ; \ + # ninja-build optional (in CRB/EPEL). Don't fail if absent. + $PKG install -y --enablerepo='*' ninja-build 2>/dev/null || true; \ + $PKG clean all || true; \ + rm -rf /var/cache/{dnf,yum,microdnf} 2>/dev/null || true -WORKDIR pg_mooncake +# Rust 1.91.1 + cargo-pgrx 0.16.1 (versions locked to project requirements). +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \ + | sh -s -- -y --default-toolchain 1.91.1 --profile minimal +ENV PATH="/root/.cargo/bin:${PATH}" +RUN cargo install --locked cargo-pgrx@0.16.1 -COPY Cargo.toml Makefile pg_mooncake.control . -COPY ivy_moonlink ivy_moonlink -COPY src src +# Locate IvorySQL pg_config. Override at build time if auto-detect fails: +# --build-arg IVORYSQL_PG_CONFIG=/path/to/pg_config +ARG IVORYSQL_PG_CONFIG= +RUN set -eux; \ + P="${IVORYSQL_PG_CONFIG}"; \ + if [ -z "${P}" ] || [ ! -x "${P}" ]; then \ + P="$(find / -name pg_config -executable -type f 2>/dev/null | grep -i ivorysql | head -1)"; \ + fi; \ + if [ -z "${P}" ] || [ ! -x "${P}" ]; then \ + P="$(command -v pg_config || true)"; \ + fi; \ + if [ -z "${P}" ] || [ ! -x "${P}" ]; then \ + echo "ERROR: cannot locate IvorySQL pg_config" >&2; \ + find / -name pg_config -executable 2>/dev/null | head -10 >&2; \ + exit 1; \ + fi; \ + echo "PG_CONFIG=${P}"; \ + "${P}" --version; \ + echo "${P}" > /etc/pgconfig -RUN make package +RUN cargo pgrx init --pg18="$(cat /etc/pgconfig)" -FROM pgduckdb/pgduckdb:18-main +WORKDIR /ivy_mooncake -COPY --from=build /pg_mooncake/target/release/pg_mooncake-pg18/ / +# Copy manifest + 3 submodules + sources. +COPY Cargo.toml Cargo.lock Makefile pg_mooncake.control rust-toolchain.toml ./ +COPY ivy_moonlink ./ivy_moonlink +COPY ivy_duckdb ./ivy_duckdb +COPY ivy_duckdb_mooncake ./ivy_duckdb_mooncake +COPY src ./src -USER root +# Bypass git-dependent make rules in submodules. .dockerignore excludes the +# host's .git/, so submodules' .git pointer files break. We replace each +# submodule's .git pointer with a self-contained minimal directory that +# carries the marker files the Makefile checks. Submodule SOURCES were +# copied in by the earlier COPY steps; we only need to fake the markers. +RUN set -eux; \ + # Replace gitlink files with fake .git dirs containing expected markers. + for sm in ivy_duckdb ivy_moonlink ivy_duckdb_mooncake; do \ + rm -f "${sm}/.git"; \ + mkdir -p "${sm}/.git/modules/third_party/duckdb"; \ + touch "${sm}/.git/modules/third_party/duckdb/HEAD"; \ + done; \ + # ivy_duckdb's nested duckdb sub-submodule also needs the same. + if [ -d ivy_duckdb/third_party/duckdb ]; then \ + rm -f ivy_duckdb/third_party/duckdb/.git; \ + mkdir -p ivy_duckdb/third_party/duckdb/.git; \ + touch ivy_duckdb/third_party/duckdb/.git/HEAD; \ + fi; \ + # Sanity: duckdb sources must be present (host pre-ran `git submodule + # update --init --recursive` before docker build). + test -f ivy_duckdb/third_party/duckdb/CMakeLists.txt \ + || (echo "ERROR: ivy_duckdb/third_party/duckdb/CMakeLists.txt missing." >&2; \ + echo "Run on host first: git submodule update --init --recursive" >&2; \ + exit 1) -RUN cat >> /usr/share/postgresql/postgresql.conf.sample <&2; exit 1); \ + test -d "${SHAREDIR}/extension" || (echo "ERROR: ${SHAREDIR}/extension missing" >&2; exit 1); \ + cp -av /build_output/lib/*.so "${LIBDIR}/"; \ + cp -av /build_output/share/extension/* "${SHAREDIR}/extension/"; \ + rm -rf /build_output; \ + # Append required PG configuration so initdb / restart picks it up. + SAMPLE="${SHAREDIR}/postgresql.conf.sample"; \ + test -f "${SAMPLE}" || (echo "ERROR: ${SAMPLE} missing" >&2; exit 1); \ + cat >> "${SAMPLE}" <<'EOF' + +# ---- ivy_mooncake configuration (added by Dockerfile) ---- +# liboracle_parser + ivorysql_ora come from the IvorySQL base. +# pg_duckdb + pg_mooncake added by this image. Order matters. +shared_preload_libraries = 'liboracle_parser,ivorysql_ora,pg_duckdb,pg_mooncake' wal_level = logical +duckdb.allow_community_extensions = true EOF -USER postgres +# Force UTF8 locale for initdb. pg_duckdb refuses to install on SQL_ASCII +# databases (its install SQL checks current_setting('server_encoding')). +# Use C.UTF-8 — universal, no langpack package needed (vs en_US.UTF-8 which +# requires glibc-langpack-en on UBI8). +ENV LANG=C.UTF-8 \ + LC_ALL=C.UTF-8 \ + LC_COLLATE=C.UTF-8 \ + LC_CTYPE=C.UTF-8 + +# Mooncake iceberg warehouse + temp dir (volume-mount in production). +ENV MOONCAKE_WAREHOUSE=/var/lib/ivorysql/mooncake +RUN mkdir -p "${MOONCAKE_WAREHOUSE}" /tmp/moonlink_temp_file \ + && chmod 0777 "${MOONCAKE_WAREHOUSE}" /tmp/moonlink_temp_file + +# Switch back to the highgo image's runtime user. +# Adjust if your base uses a different uid/name. +USER ivorysql diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..6908f82 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,45 @@ +# docker-compose.yml — quick start for ivy_mooncake on UBI8 +# +# docker compose up -d --build +# docker compose exec ivy_mooncake psql -U ivorysql -d postgres +# +# Base: registry.highgo.com/ivorysql/ivorysql:5.3-ubi8 + +services: + ivy_mooncake: + build: + context: . + dockerfile: Dockerfile + args: + IVORYSQL_BASE: registry.highgo.com/ivorysql/ivorysql:5.3-ubi8 + image: ivorysql/ivy_mooncake:5.3-ubi8 + container_name: ivy_mooncake + restart: unless-stopped + environment: + IVORYSQL_PASSWORD: password + # Force UTF8 (pg_duckdb refuses SQL_ASCII). + LANG: C.UTF-8 + LC_ALL: C.UTF-8 + # Set Oracle compatible_mode default at initdb (adjust per highgo base conventions). + IVORY_COMPATIBLE_MODE: oracle + ports: + - "5432:5432" # PG protocol + - "1521:1521" # IvorySQL Oracle listen + volumes: + # PG data dir — contains the heap + PGDATA/pg_mooncake/{moonlink.sock, _wal/...} + - ivy_mooncake_data:/var/lib/ivorysql/data + # Iceberg warehouse — mooncake's default mirror table storage. Path is + # hardcoded in ivy_moonlink as /tmp/moonlink_iceberg unless overridden + # per-table via the table_config JSON 4th arg of mooncake.create_table. + # Mounting it persists mirror data across container rebuilds. + - ivy_mooncake_warehouse:/tmp/moonlink_iceberg + shm_size: 256mb + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ivorysql -d postgres"] + interval: 10s + timeout: 5s + retries: 5 + +volumes: + ivy_mooncake_data: + ivy_mooncake_warehouse: diff --git a/pg_mooncake.control b/pg_mooncake.control index 81ec641..e096d5a 100644 --- a/pg_mooncake.control +++ b/pg_mooncake.control @@ -1,4 +1,4 @@ -comment = 'Real-time analytics on Postgres tables' +comment = 'Real-time analytics on Postgres tables with IvorySQL Oracle-compat support' default_version = '@CARGO_VERSION@' module_pathname = 'pg_mooncake' relocatable = false diff --git a/scripts/docker-build.sh b/scripts/docker-build.sh new file mode 100755 index 0000000..2762f62 --- /dev/null +++ b/scripts/docker-build.sh @@ -0,0 +1,118 @@ +#!/usr/bin/env bash +# scripts/docker-build.sh — install docker (if missing) + build ivy_mooncake image +set -euo pipefail + +IMAGE="ivorysql/ivy_mooncake:5.3-ubi8" +BASE="registry.highgo.com/ivorysql/ivorysql:5.3-ubi8" + +# ---------- 1. install docker if missing ---------- +if ! command -v docker >/dev/null 2>&1; then + echo "==> docker not found, installing" + if command -v apt-get >/dev/null 2>&1; then + # Debian/Ubuntu + sudo apt-get update + sudo apt-get install -y ca-certificates curl gnupg + sudo install -m 0755 -d /etc/apt/keyrings + curl -fsSL https://download.docker.com/linux/$(. /etc/os-release && echo "$ID")/gpg \ + | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg + sudo chmod a+r /etc/apt/keyrings/docker.gpg + OS_ID=$(. /etc/os-release && echo "$ID") + OS_CODENAME=$(. /etc/os-release && echo "${VERSION_CODENAME:-$UBUNTU_CODENAME}") + echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] \ +https://download.docker.com/linux/${OS_ID} ${OS_CODENAME} stable" \ + | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + sudo apt-get update + sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin + elif command -v dnf >/dev/null 2>&1; then + # RHEL/Rocky/Fedora + sudo dnf install -y dnf-plugins-core + sudo dnf config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo + sudo dnf install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin + elif command -v yum >/dev/null 2>&1; then + sudo yum install -y yum-utils + sudo yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo + sudo yum install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin + else + echo "ERROR: unsupported package manager. Install docker manually." >&2 + exit 1 + fi + sudo systemctl enable --now docker + sudo usermod -aG docker "$USER" || true + echo "==> docker installed. You may need to logout/login for group changes; using sudo for this script." +fi + +# Prefix all docker commands with sudo if current user not in docker group. +if id -nG "$USER" | grep -qw docker; then + DOCKER="docker" +else + DOCKER="sudo docker" + echo "==> using sudo for docker (re-login as $USER to use docker without sudo)" +fi + +$DOCKER --version + +# ---------- 2. login to highgo registry ---------- +echo "==> logging in to registry.highgo.com" +if ! $DOCKER pull "${BASE}" >/dev/null 2>&1; then + echo "Cannot pull ${BASE} without auth." + echo "Please run: $DOCKER login registry.highgo.com" + echo "Enter highgo registry credentials when prompted, then re-run this script." + $DOCKER login registry.highgo.com +fi + +# ---------- 3. ensure submodules ---------- +cd "$(dirname "$0")/.." +if [ ! -f ivy_duckdb/Makefile ] || [ ! -f ivy_moonlink/Cargo.toml ] || [ ! -d ivy_duckdb_mooncake ]; then + echo "==> initializing submodules" + git submodule update --init --recursive +fi + +# ---------- 4. build ---------- +echo "==> building ${IMAGE}" +$DOCKER build \ + --build-arg "IVORYSQL_BASE=${BASE}" \ + -t "${IMAGE}" \ + -f Dockerfile \ + . + +# ---------- 5. smoke test ---------- +echo "==> smoke test" +$DOCKER rm -f ivy_mooncake_smoke 2>/dev/null || true +CT=$($DOCKER run -d --name ivy_mooncake_smoke \ + -e IVORYSQL_PASSWORD=password \ + "${IMAGE}") +trap 'echo "==> container logs:"; $DOCKER logs ivy_mooncake_smoke 2>&1 | tail -50; $DOCKER stop ivy_mooncake_smoke >/dev/null 2>&1 || true' EXIT + +echo "==> waiting for postgres" +for i in $(seq 1 60); do + if $DOCKER exec "$CT" pg_isready -U ivorysql -d postgres >/dev/null 2>&1; then + echo " ready in ${i}s" + break + fi + sleep 1 +done + +echo "==> CREATE EXTENSION test" +$DOCKER exec "$CT" psql -U ivorysql -d postgres -c " +CREATE EXTENSION pg_mooncake CASCADE; +SELECT extname, extversion FROM pg_extension + WHERE extname IN ('pg_duckdb','pg_mooncake'); +" + +echo "==> mirror E2E test" +$DOCKER exec -i "$CT" psql -U ivorysql -d postgres <<'SQL' +CREATE TABLE t (id int PRIMARY KEY, v text); +ALTER TABLE t REPLICA IDENTITY FULL; +INSERT INTO t VALUES (1,'a'),(2,'b'); +CALL mooncake.create_table('t_mirror', 't'); +SELECT count(*) AS mirror_rows FROM t_mirror; +SQL + +echo "==> DONE: image '${IMAGE}' built and verified." +echo " Run: docker run --rm \\" +echo " -e IVORYSQL_PASSWORD=password \\" +echo " -p 5432:5432 -p 1521:1521 \\" +echo " -v ivy_mooncake_data:/var/lib/ivorysql/data \\" +echo " -v ivy_mooncake_warehouse:/tmp/moonlink_iceberg \\" +echo " ${IMAGE}" +echo " Or: docker compose up -d --build" From f3d56b225101735c61ff40091ca12a0391846973 Mon Sep 17 00:00:00 2001 From: himmel Date: Tue, 26 May 2026 06:57:01 +0000 Subject: [PATCH 2/6] Add pg_mooncake.enable_bgworker GUC to control bgworker registration Postmaster-context bool GUC gating whether the moonlink background worker is registered at PG startup. When 'off', pg_mooncake.so still loads (DDL functions remain) but no bgworker spawns. Default 'on' preserves existing behaviour. Use cases: - Run pg_mooncake.so for the schema/function bridge only, without paying the cost of moonlink_service when mirror tables aren't used. - Disable bgworker quickly when troubleshooting stuck mirror state, without recompiling without the 'bgworker' Cargo feature or removing pg_mooncake from shared_preload_libraries. - Single switch downstream packagers can toggle. The GUC is Postmaster context because RegisterBackgroundWorker is only legal during postmaster startup (shared_preload_libraries load). The check runs immediately before BackgroundWorkerBuilder::load() and skips registration entirely when the GUC is off; no register_failed warning on subsequent LOAD attempts. Verified end-to-end on IvorySQL 5.3 (PG 18 base): - default (on): bgworker process appears, moonlink.sock created - pg_mooncake.enable_bgworker=off: bgworker absent, pg_duckdb-only queries still work, SHOW reports off - toggle back to on across PG restarts respects new value Co-Authored-By: Claude Opus 4.7 (1M context) --- src/bgworker.rs | 7 +++++++ src/guc.rs | 30 ++++++++++++++++++++++++++++++ src/lib.rs | 8 +++++++- 3 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 src/guc.rs diff --git a/src/bgworker.rs b/src/bgworker.rs index fa1882b..94eb346 100644 --- a/src/bgworker.rs +++ b/src/bgworker.rs @@ -6,6 +6,13 @@ use pgrx::prelude::*; use std::time::Duration; pub(crate) fn init() { + // pg_mooncake.enable_bgworker GUC toggle. When off, skip registration + // so the moonlink bgworker never starts. Mirror tables won't work, but + // pg_duckdb-only queries still do. Takes effect on PG restart (the GUC + // is Postmaster context — RegisterBackgroundWorker is only legal here). + if !crate::guc::ENABLE_BGWORKER.get() { + return; + } BackgroundWorkerBuilder::new("moonlink") .set_library("pg_mooncake") .set_function("moonlink_main") diff --git a/src/guc.rs b/src/guc.rs new file mode 100644 index 0000000..e7fbfb4 --- /dev/null +++ b/src/guc.rs @@ -0,0 +1,30 @@ +// Custom GUCs registered by pg_mooncake. +// +// Postmaster context: changes only take effect on PG restart. This is +// required for `enable_bgworker` because RegisterBackgroundWorker can only +// be called from `_PG_init` running during shared_preload_libraries load. + +use pgrx::{GucContext, GucFlags, GucRegistry, GucSetting}; + +/// `pg_mooncake.enable_bgworker` — controls whether the moonlink background +/// worker is registered at postmaster startup. +/// +/// `on` (default): register and start moonlink, enabling mirror tables. +/// `off`: skip registration; pg_mooncake.so still loads but no bgworker +/// runs. Mirror functions error out, but pg_duckdb-only queries (external +/// data sources, postgres_scan, etc.) keep working. +pub(crate) static ENABLE_BGWORKER: GucSetting = GucSetting::::new(true); + +pub(crate) fn init() { + GucRegistry::define_bool_guc( + c"pg_mooncake.enable_bgworker", + c"Whether to start the moonlink background worker", + c"If off, pg_mooncake loads without registering the moonlink \ + background worker. Mirror tables will not function; \ + pg_duckdb-only queries still work. Takes effect on PG restart \ + (Postmaster context).", + &ENABLE_BGWORKER, + GucContext::Postmaster, + GucFlags::default(), + ); +} diff --git a/src/lib.rs b/src/lib.rs index f3b1a4d..97a0059 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,6 +2,8 @@ mod bgworker; mod duckdb_mooncake; mod functions; +#[cfg(feature = "bgworker")] +mod guc; mod table; mod utils; @@ -13,6 +15,10 @@ extension_sql_file!("./sql/bootstrap.sql", bootstrap); #[pg_guard] extern "C-unwind" fn _PG_init() { #[cfg(feature = "bgworker")] - bgworker::init(); + { + // Register GUCs first so bgworker::init can read pg_mooncake.enable_bgworker. + guc::init(); + bgworker::init(); + } table::init(); } From 5ea58a0790c4de105ca557bfaa45dd0349ce062d Mon Sep 17 00:00:00 2001 From: himmel Date: Tue, 26 May 2026 06:57:01 +0000 Subject: [PATCH 3/6] docker: translate IVY_* env vars to postgresql.conf via entrypoint shim Adds /usr/local/bin/ivy-apply-env.sh + ivy-entrypoint-shim.sh and a /docker-entrypoint-initdb.d/00-ivy-apply-env.sh hook. Together they idempotently apply IVY_* environment variables as PG GUC settings on every container start; first-time initdb is handled by the init.d hook so subsequent restarts and first launches behave the same. First env -> GUC mapping: IVY_MOONCAKE_ENABLE_BGWORKER -> pg_mooncake.enable_bgworker To add a new tunable, edit ivy-apply-env.sh and add another '[ -n "${IVY_FOO:-}" ] && apply foo.bar "${IVY_FOO}"' line; no Dockerfile or rebuild logic change needed. Usage: docker run -e IVY_MOONCAKE_ENABLE_BGWORKER=off ivy_mooncake:5.3-ubi8 Verified end-to-end: - default (no env): pg_mooncake.enable_bgworker=on (compiled default) - IVY_MOONCAKE_ENABLE_BGWORKER=off: conf populated, bgworker absent - toggle on/off across container restarts on shared PGDATA volume: postgresql.conf is overwritten (sed -i removes prior key) and the new value takes effect on the next PG startup Co-Authored-By: Claude Opus 4.7 (1M context) --- Dockerfile | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/Dockerfile b/Dockerfile index f16f2f8..ca84200 100644 --- a/Dockerfile +++ b/Dockerfile @@ -197,6 +197,66 @@ ENV MOONCAKE_WAREHOUSE=/var/lib/ivorysql/mooncake RUN mkdir -p "${MOONCAKE_WAREHOUSE}" /tmp/moonlink_temp_file \ && chmod 0777 "${MOONCAKE_WAREHOUSE}" /tmp/moonlink_temp_file +# ---------- env -> postgresql.conf shim ---------- +# Translate IVY_* environment variables into PG GUC settings at container +# start time. Lets users toggle pg_mooncake.* GUCs (and any future setting +# we expose) via plain `-e KEY=VAL` without rebuilding the image or editing +# postgresql.conf on a bind mount. +# +# Two trigger points: +# 1. ivy-entrypoint-shim.sh runs on every container start before exec'ing +# the base entrypoint. It applies env -> conf if postgresql.conf exists +# (i.e. PGDATA already initialized). +# 2. /docker-entrypoint-initdb.d/00-ivy-apply-env.sh runs after the base +# entrypoint's initdb on first start (when postgresql.conf is freshly +# created). It calls the same logic so the first start also honors +# env vars. +RUN set -eux; \ + cat > /usr/local/bin/ivy-apply-env.sh <<'APPLY' +#!/usr/bin/env bash +# Idempotently apply IVY_* env vars to $PGDATA/postgresql.conf. +# Safe to call repeatedly; replaces existing key with new value. +set -euo pipefail + +CONF="${PGDATA:-/var/local/ivorysql/ivorysql-5/data}/postgresql.conf" +[ -f "$CONF" ] || exit 0 # PGDATA not initialized yet, nothing to do + +apply() { + local key="$1" val="$2" + # Drop any prior value (commented or active), append fresh + sed -i "\\|^[[:space:]]*${key}[[:space:]]*=|d" "$CONF" + echo "${key} = ${val}" >> "$CONF" + echo "ivy-apply-env: ${key} = ${val}" >&2 +} + +[ -n "${IVY_MOONCAKE_ENABLE_BGWORKER:-}" ] && \ + apply pg_mooncake.enable_bgworker "${IVY_MOONCAKE_ENABLE_BGWORKER}" +# Add further IVY_* -> GUC mappings here as new tunables surface. + +exit 0 +APPLY +RUN set -eux; \ + cat > /usr/local/bin/ivy-entrypoint-shim.sh <<'SHIM' +#!/usr/bin/env bash +set -e +/usr/local/bin/ivy-apply-env.sh +exec /usr/local/bin/docker-entrypoint.sh "$@" +SHIM +RUN set -eux; \ + mkdir -p /docker-entrypoint-initdb.d; \ + cat > /docker-entrypoint-initdb.d/00-ivy-apply-env.sh <<'INITDB' +#!/usr/bin/env bash +# Runs after the base entrypoint's initdb on first container start. +/usr/local/bin/ivy-apply-env.sh +INITDB +RUN set -eux; \ + chmod 0755 /usr/local/bin/ivy-apply-env.sh \ + /usr/local/bin/ivy-entrypoint-shim.sh \ + /docker-entrypoint-initdb.d/00-ivy-apply-env.sh + +ENTRYPOINT ["/usr/local/bin/ivy-entrypoint-shim.sh"] +CMD ["postgres"] + # Switch back to the highgo image's runtime user. # Adjust if your base uses a different uid/name. USER ivorysql From ed94a85c8ef5f52692d9d4685b67dd854f176b75 Mon Sep 17 00:00:00 2001 From: himmel Date: Wed, 27 May 2026 08:29:21 +0000 Subject: [PATCH 4/6] Add mooncake.bgworker_status() diagnostic function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Returns a JSON object with three liveness signals: - guc_enabled: pg_mooncake.enable_bgworker setting value - socket_exists: whether $PGDATA/pg_mooncake/moonlink.sock exists - socket_listening: probe result of UnixStream::connect() to the socket Disambiguates "SHOW pg_mooncake.enable_bgworker = on" but mirror operations still fail: socket may exist as a stale leftover from a prior bgworker run that has since panicked, leaving 'socket_exists=true socket_listening=false' — the exact state behind the "mooncake_duckdb_cpp_init ... Connection refused (os error 111)" error. The probe runs in a worker thread with a 250ms hard ceiling so a hung socket cannot block the calling PG backend. Adds serde_json = "1" dependency. Verified end-to-end: - default (GUC on, bgworker healthy): all three true - GUC=off + restart: guc_enabled=false, socket_listening=false - toggle back to on: all true Co-Authored-By: Claude Opus 4.7 (1M context) --- Cargo.toml | 1 + src/functions.rs | 57 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 6896c90..c2230d9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,6 +28,7 @@ pgrx = "0.16.1" postgres.git = "https://github.com/Mooncake-Labs/rust-postgres.git" postgres-native-tls.git = "https://github.com/Mooncake-Labs/rust-postgres.git" regex = "1" +serde_json = "1" tokio = "1.48" [profile.release] diff --git a/src/functions.rs b/src/functions.rs index 6fbd0e2..05d4f51 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -230,3 +230,60 @@ fn create_mooncake_table(dst: &str, dst_uri: &str, src: &str, src_uri: &str) { .simple_query(&create_table_query) .unwrap_or_else(|_| panic!("error creating table: {dst}")); } + +/// Diagnostic helper for moonlink bgworker liveness. +/// +/// Returns a JSON object combining the GUC value with filesystem-level +/// evidence of whether the bgworker is actually running. Useful when +/// `SHOW pg_mooncake.enable_bgworker` reports `on` but mirror operations +/// still fail because the bgworker died or never bound the socket. +#[cfg(feature = "bgworker")] +#[pg_extern(sql = " +CREATE FUNCTION mooncake.bgworker_status() RETURNS json LANGUAGE c AS 'MODULE_PATHNAME', '@FUNCTION_NAME@'; +")] +fn bgworker_status() -> pgrx::Json { + use std::os::unix::net::UnixStream as StdUnixStream; + use std::path::Path; + use std::time::Duration; + + let guc_enabled = crate::guc::ENABLE_BGWORKER.get(); + + // Same relative path as utils::get_stream() — resolved against the PG + // process working directory, which is $PGDATA at runtime. + let socket_path = "pg_mooncake/moonlink.sock"; + + let socket_exists = Path::new(socket_path).exists(); + + // Probe connect with a tight timeout so we never block PG backend on + // a hung socket. `connect_timeout` is on `SocketAddr` for std streams + // but UnixStream needs the raw socket(2)+connect(2) sequence; a plain + // `connect()` returns immediately for Unix sockets either way (no + // resolution / handshake), so we just check the result. + let socket_listening = if socket_exists { + // Wrap in a thread so a misbehaving server doesn't block us. + // For Unix sockets `connect()` itself doesn't block on protocol, + // so this is just defense-in-depth. + let path = socket_path.to_string(); + let handle = std::thread::spawn(move || StdUnixStream::connect(&path).is_ok()); + // 250ms ceiling, then give up if probe stalled. + let start = std::time::Instant::now(); + loop { + if handle.is_finished() { + break handle.join().unwrap_or(false); + } + if start.elapsed() > Duration::from_millis(250) { + break false; + } + std::thread::sleep(Duration::from_millis(10)); + } + } else { + false + }; + + pgrx::Json(serde_json::json!({ + "guc_enabled": guc_enabled, + "socket_path": socket_path, + "socket_exists": socket_exists, + "socket_listening": socket_listening, + })) +} From 1a90727a0f6ceda735bbe8dd373e278c7ada049b Mon Sep 17 00:00:00 2001 From: himmel Date: Fri, 29 May 2026 07:58:03 +0000 Subject: [PATCH 5/6] Add mooncake.list_orphan_slots() and drop_orphan_slots() helpers Diagnose and force-clean orphan moonlink_slot_* replication slots and the moonlink_pub publication left behind when a mooncake table or extension is dropped without the bgworker cleaning up (bgworker dead, crash, or prior install). Complements the automatic DROP EXTENSION cleanup for the paths it cannot reach: retroactive orphans, DROP TABLE orphans, and bgworker-recreate race residue. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/functions.rs | 252 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 252 insertions(+) diff --git a/src/functions.rs b/src/functions.rs index 05d4f51..a6fdf30 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -287,3 +287,255 @@ fn bgworker_status() -> pgrx::Json { "socket_listening": socket_listening, })) } + +/// Enumerate moonlink_slot_* replication slots and flag orphans. +/// +/// A slot is considered orphan when: +/// - it lives in the current database, AND +/// - that database has zero tables using the mooncake table access method +/// (so no mirror table could possibly reference it). +/// +/// Slots belonging to other databases are reported with NULL counts and +/// is_orphan=false because their mooncake AM table count is not visible +/// from this connection. +/// +/// Usage: +/// SELECT * FROM mooncake.list_orphan_slots(); +/// SELECT * FROM mooncake.list_orphan_slots() WHERE is_orphan; +/// +/// Pair with `SELECT pg_drop_replication_slot(slot_name) ...` to clean up. +#[pg_extern(sql = " +CREATE FUNCTION mooncake.list_orphan_slots() RETURNS TABLE( + slot_name text, + slot_database text, + mirror_tables_count integer, + is_orphan boolean +) LANGUAGE c AS 'MODULE_PATHNAME', '@FUNCTION_NAME@'; +")] +fn list_orphan_slots() -> TableIterator< + 'static, + ( + name!(slot_name, String), + name!(slot_database, Option), + name!(mirror_tables_count, Option), + name!(is_orphan, bool), + ), +> { + let rows: Vec<(String, Option, Option, bool)> = Spi::connect(|client| { + let q = " + WITH local_mooncake_count AS ( + SELECT count(*)::int AS cnt + FROM pg_class c + JOIN pg_am a ON c.relam = a.oid + WHERE a.amname = 'mooncake' + ) + SELECT + s.slot_name::text AS slot_name, + s.database::text AS slot_database, + CASE WHEN s.database = current_database() + THEN lm.cnt + ELSE NULL + END AS mirror_tables_count, + CASE WHEN s.database = current_database() AND lm.cnt = 0 + THEN true + ELSE false + END AS is_orphan + FROM pg_replication_slots s + CROSS JOIN local_mooncake_count lm + WHERE s.slot_name LIKE 'moonlink_slot_%' + ORDER BY s.slot_name + "; + + client + .select(q, None, &[]) + .expect("error querying moonlink slots") + .map(|row| { + let slot_name: String = row + .get(1) + .expect("error reading slot_name") + .expect("slot_name is null"); + let slot_database: Option = + row.get(2).expect("error reading slot_database"); + let mirror_tables_count: Option = + row.get(3).expect("error reading mirror_tables_count"); + let is_orphan: bool = row + .get(4) + .expect("error reading is_orphan") + .unwrap_or(false); + (slot_name, slot_database, mirror_tables_count, is_orphan) + }) + .collect() + }); + + TableIterator::new(rows.into_iter()) +} + +/// Force-drop orphan moonlink replication slots in the current database. +/// +/// For each `moonlink_slot_*` whose database is the current database AND +/// the database has zero mooncake-AM tables: +/// 1. Terminate the active client (if any) via pg_terminate_backend, +/// since pg_drop_replication_slot refuses to drop an active slot. +/// 2. Drop the slot via pg_drop_replication_slot. +/// 3. After processing slots, drop `moonlink_pub` if it still exists +/// and there are no mooncake tables remaining (best-effort, ignored +/// on failure). +/// +/// Intended use: right after `CREATE EXTENSION pg_mooncake CASCADE` on a +/// database where a prior install left behind orphan slots. Calling this +/// before the moonlink bgworker has a chance to attach to the old slot +/// avoids the recovery panic loop. +/// +/// Returns one row per slot acted on. The `publication_dropped` column +/// is true only on the LAST row, reflecting whether moonlink_pub was +/// also cleaned. If no orphans existed but the publication did, a single +/// synthetic row with empty `slot_name` is emitted. +#[pg_extern(sql = " +CREATE FUNCTION mooncake.drop_orphan_slots() RETURNS TABLE( + slot_name text, + terminated_active_pid integer, + publication_dropped boolean +) LANGUAGE c AS 'MODULE_PATHNAME', '@FUNCTION_NAME@'; +")] +fn drop_orphan_slots() -> TableIterator< + 'static, + ( + name!(slot_name, String), + name!(terminated_active_pid, Option), + name!(publication_dropped, bool), + ), +> { + let rows: Vec<(String, Option, bool)> = Spi::connect_mut(|client| { + // 1) Decide whether this database has any mooncake-AM tables. + let no_mooncake_tables: bool = client + .select( + "SELECT count(*)::int = 0 + FROM pg_class c JOIN pg_am a ON c.relam = a.oid + WHERE a.amname = 'mooncake'", + None, + &[], + ) + .expect("checking mooncake tables") + .first() + .get::(1) + .expect("reading no_mooncake_tables") + .unwrap_or(false); + + // 2) Pull the orphan slot list with active_pid for termination. + let orphans: Vec<(String, Option)> = if no_mooncake_tables { + client + .select( + "SELECT slot_name::text, active_pid + FROM pg_replication_slots + WHERE slot_name LIKE 'moonlink_slot_%' + AND database = current_database()", + None, + &[], + ) + .expect("listing orphan slots") + .map(|row| { + let name: String = row + .get(1) + .expect("reading slot_name") + .expect("slot_name is null"); + let active_pid: Option = row.get(2).expect("reading active_pid"); + (name, active_pid) + }) + .collect() + } else { + // Database still has mooncake tables — refuse to nuke slots. + Vec::new() + }; + + let mut out: Vec<(String, Option, bool)> = Vec::new(); + let total = orphans.len(); + + for (i, (slot, active_pid)) in orphans.into_iter().enumerate() { + // Terminate the active replication client first if needed. + if let Some(pid) = active_pid { + let _ = client.update( + &format!("SELECT pg_terminate_backend({pid})"), + None, + &[], + ); + } + + // Drop the slot. Use quote_literal-style format to avoid SQL + // injection — slot_name is constrained by the LIKE filter, but + // be defensive in case someone wedges a weird identifier. + let drop_sql = format!( + "SELECT pg_drop_replication_slot({})", + quote_string_literal(&slot), + ); + if let Err(e) = client.update(&drop_sql, None, &[]) { + pgrx::warning!("failed to drop slot {}: {}", slot, e); + } + + // On the last orphan, also drop the publication if it exists + // (only safe when no mooncake tables remain, which is already + // implied by no_mooncake_tables=true). + let is_last = i == total - 1; + let publication_dropped = if is_last { + attempt_drop_publication(client) + } else { + false + }; + + out.push((slot, active_pid, publication_dropped)); + } + + // Edge case: no orphan slots but publication still lingers. + if out.is_empty() && no_mooncake_tables { + if attempt_drop_publication(client) { + out.push((String::new(), None, true)); + } + } + + out + }); + + TableIterator::new(rows.into_iter()) +} + +/// Helper: drop `moonlink_pub` if present; return whether it was dropped. +fn attempt_drop_publication(client: &mut pgrx::spi::SpiClient<'_>) -> bool { + let exists: bool = client + .select( + "SELECT count(*)::int > 0 FROM pg_publication WHERE pubname = 'moonlink_pub'", + None, + &[], + ) + .map(|t| { + t.first() + .get::(1) + .ok() + .flatten() + .unwrap_or(false) + }) + .unwrap_or(false); + + if exists { + if let Err(e) = client.update("DROP PUBLICATION IF EXISTS moonlink_pub", None, &[]) { + pgrx::warning!("failed to drop publication moonlink_pub: {}", e); + false + } else { + true + } + } else { + false + } +} + +/// Minimal single-quote escaping for `pg_drop_replication_slot('')`. +fn quote_string_literal(s: &str) -> String { + let mut out = String::with_capacity(s.len() + 2); + out.push('\''); + for ch in s.chars() { + if ch == '\'' { + out.push('\''); + } + out.push(ch); + } + out.push('\''); + out +} From 5cc171333127625df4a0db7541b8a314e07cec36 Mon Sep 17 00:00:00 2001 From: himmel Date: Fri, 29 May 2026 08:59:47 +0000 Subject: [PATCH 6/6] Clean up moonlink slots + publication on DROP EXTENSION Add a disowned (non-member) sql_drop event trigger that fires on DROP EXTENSION pg_mooncake, drops leftover moonlink_slot_* replication slots and the moonlink_pub publication in the current database (best-effort, WARNING on failure), then self-removes. A member sql_drop trigger does not fire for the command that drops its own extension, so the trigger is created and immediately disowned via ALTER EXTENSION ... DROP. Cleanup is pure SQL and does not depend on the moonlink bgworker. Add tests/pg_regress/zz_drop_cleanup (sorts last, no recreate) verifying the trigger installs, is a non-member, fires on drop, and self-removes. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/sql/bootstrap.sql | 70 +++++++++++++++++++ tests/pg_regress/expected/zz_drop_cleanup.out | 43 ++++++++++++ tests/pg_regress/sql/zz_drop_cleanup.sql | 28 ++++++++ 3 files changed, 141 insertions(+) create mode 100644 tests/pg_regress/expected/zz_drop_cleanup.out create mode 100644 tests/pg_regress/sql/zz_drop_cleanup.sql diff --git a/src/sql/bootstrap.sql b/src/sql/bootstrap.sql index eb04c76..54fa024 100644 --- a/src/sql/bootstrap.sql +++ b/src/sql/bootstrap.sql @@ -1,3 +1,73 @@ CREATE SCHEMA mooncake; SELECT duckdb.install_extension('mooncake', 'community'); + +-- DROP EXTENSION cleanup watcher. +-- +-- A member sql_drop trigger does NOT fire for the command that drops its own +-- extension. So this trigger is created and then DISOWNED via +-- ALTER EXTENSION ... DROP, making it a non-member that survives — and fires +-- on — DROP EXTENSION pg_mooncake. It lives in `public` because the `mooncake` +-- schema is removed during the same drop. +CREATE FUNCTION public.mooncake_extension_drop_cleanup() RETURNS event_trigger +LANGUAGE plpgsql AS $mooncake_drop_cleanup$ +DECLARE + is_mooncake_drop boolean := false; + slot record; +BEGIN + -- Act only when pg_mooncake itself is among the dropped objects. + SELECT true INTO is_mooncake_drop + FROM pg_event_trigger_dropped_objects() + WHERE object_type = 'extension' AND object_name = 'pg_mooncake' + LIMIT 1; + + IF NOT is_mooncake_drop THEN + RETURN; + END IF; + + -- Drop moonlink replication slots left behind in the current database. + -- The active holder (if any) is the moonlink walsender; terminate it + -- first since pg_drop_replication_slot refuses an active slot. + FOR slot IN + SELECT slot_name, active_pid + FROM pg_replication_slots + WHERE slot_name LIKE 'moonlink_slot_%' + AND database = current_database() + LOOP + BEGIN + IF slot.active_pid IS NOT NULL THEN + PERFORM pg_terminate_backend(slot.active_pid); + END IF; + PERFORM pg_drop_replication_slot(slot.slot_name); + EXCEPTION WHEN OTHERS THEN + RAISE WARNING 'pg_mooncake cleanup: failed to drop replication slot %: %', + slot.slot_name, SQLERRM; + END; + END LOOP; + + -- Drop the moonlink publication if it still exists. + BEGIN + DROP PUBLICATION IF EXISTS moonlink_pub; + EXCEPTION WHEN OTHERS THEN + RAISE WARNING 'pg_mooncake cleanup: failed to drop publication moonlink_pub: %', + SQLERRM; + END; + + -- Self-remove: not extension members, so they must clean themselves up. + BEGIN + DROP EVENT TRIGGER IF EXISTS mooncake_extension_drop_trigger; + DROP FUNCTION IF EXISTS public.mooncake_extension_drop_cleanup(); + EXCEPTION WHEN OTHERS THEN + RAISE WARNING 'pg_mooncake cleanup: failed to self-remove drop trigger: %', + SQLERRM; + END; +END; +$mooncake_drop_cleanup$; + +CREATE EVENT TRIGGER mooncake_extension_drop_trigger ON sql_drop + EXECUTE FUNCTION public.mooncake_extension_drop_cleanup(); + +-- Disown so the trigger/function survive and fire on DROP EXTENSION. +-- Legal mid-CREATE-EXTENSION (the extension catalog row already exists). +ALTER EXTENSION pg_mooncake DROP EVENT TRIGGER mooncake_extension_drop_trigger; +ALTER EXTENSION pg_mooncake DROP FUNCTION public.mooncake_extension_drop_cleanup(); diff --git a/tests/pg_regress/expected/zz_drop_cleanup.out b/tests/pg_regress/expected/zz_drop_cleanup.out new file mode 100644 index 0000000..26d3c2c --- /dev/null +++ b/tests/pg_regress/expected/zz_drop_cleanup.out @@ -0,0 +1,43 @@ +-- pg_mooncake DROP EXTENSION cleanup watcher. +-- Verifies the independent (non-member) sql_drop trigger is installed, +-- is NOT an extension member, and self-removes when pg_mooncake is dropped. +-- (Without the moonlink bgworker no replication slots exist, so the slot/pub +-- cleanup is a no-op here; this test covers the trigger mechanism itself.) +-- 1. Watcher event trigger is installed. +SELECT count(*) AS watcher_triggers +FROM pg_event_trigger +WHERE evtname = 'mooncake_extension_drop_trigger'; + watcher_triggers +------------------ + 1 +(1 row) + +-- 2. Watcher function is NOT an extension member (deptype 'e'). +SELECT count(*) AS watcher_is_member +FROM pg_depend d +JOIN pg_extension e ON e.oid = d.refobjid AND e.extname = 'pg_mooncake' +JOIN pg_proc p ON p.oid = d.objid +WHERE d.deptype = 'e' AND p.proname = 'mooncake_extension_drop_cleanup'; + watcher_is_member +------------------- + 0 +(1 row) + +-- 3. Dropping the extension fires the watcher, which self-removes. +DROP EXTENSION pg_mooncake CASCADE; +SELECT count(*) AS watcher_after_drop +FROM pg_event_trigger +WHERE evtname = 'mooncake_extension_drop_trigger'; + watcher_after_drop +-------------------- + 0 +(1 row) + +SELECT count(*) AS watcher_fn_after_drop +FROM pg_proc +WHERE proname = 'mooncake_extension_drop_cleanup'; + watcher_fn_after_drop +----------------------- + 0 +(1 row) + diff --git a/tests/pg_regress/sql/zz_drop_cleanup.sql b/tests/pg_regress/sql/zz_drop_cleanup.sql new file mode 100644 index 0000000..a66d8ef --- /dev/null +++ b/tests/pg_regress/sql/zz_drop_cleanup.sql @@ -0,0 +1,28 @@ +-- pg_mooncake DROP EXTENSION cleanup watcher. +-- Verifies the independent (non-member) sql_drop trigger is installed, +-- is NOT an extension member, and self-removes when pg_mooncake is dropped. +-- (Without the moonlink bgworker no replication slots exist, so the slot/pub +-- cleanup is a no-op here; this test covers the trigger mechanism itself.) + +-- 1. Watcher event trigger is installed. +SELECT count(*) AS watcher_triggers +FROM pg_event_trigger +WHERE evtname = 'mooncake_extension_drop_trigger'; + +-- 2. Watcher function is NOT an extension member (deptype 'e'). +SELECT count(*) AS watcher_is_member +FROM pg_depend d +JOIN pg_extension e ON e.oid = d.refobjid AND e.extname = 'pg_mooncake' +JOIN pg_proc p ON p.oid = d.objid +WHERE d.deptype = 'e' AND p.proname = 'mooncake_extension_drop_cleanup'; + +-- 3. Dropping the extension fires the watcher, which self-removes. +DROP EXTENSION pg_mooncake CASCADE; + +SELECT count(*) AS watcher_after_drop +FROM pg_event_trigger +WHERE evtname = 'mooncake_extension_drop_trigger'; + +SELECT count(*) AS watcher_fn_after_drop +FROM pg_proc +WHERE proname = 'mooncake_extension_drop_cleanup';