From 375cded7b65797579d2432a19189a7106aa593bb Mon Sep 17 00:00:00 2001 From: Gregory Pereira Date: Thu, 14 May 2026 18:06:41 -0400 Subject: [PATCH 1/2] build and publish a cpu container for the master store Signed-off-by: Gregory Pereira --- .github/workflows/publish-image.yaml | 57 ++++++++++++ deploy/k8s/mooncake-master.yaml | 129 +++++++++++++++++++++++++++ docker/Dockerfile.master | 103 +++++++++++++++++++++ 3 files changed, 289 insertions(+) create mode 100644 .github/workflows/publish-image.yaml create mode 100644 deploy/k8s/mooncake-master.yaml create mode 100644 docker/Dockerfile.master diff --git a/.github/workflows/publish-image.yaml b/.github/workflows/publish-image.yaml new file mode 100644 index 0000000000..b885916608 --- /dev/null +++ b/.github/workflows/publish-image.yaml @@ -0,0 +1,57 @@ +name: Publish Container Image + +on: + push: + tags: + - 'v*' + workflow_dispatch: {} + +permissions: + contents: read + packages: write + +jobs: + publish-master: + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + + - name: Free up disk space + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf /opt/hostedtoolcache/CodeQL + sudo rm -rf /usr/local/lib/android + df -h + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract version from tag + id: version + run: | + if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then + echo "image_tag=${{ github.sha }}" >> $GITHUB_OUTPUT + else + VERSION=${GITHUB_REF_NAME#v} + echo "image_tag=${VERSION}" >> $GITHUB_OUTPUT + fi + + - name: Build and push mooncake-master image + uses: docker/build-push-action@v6 + with: + context: . + file: docker/Dockerfile.master + push: true + tags: | + ghcr.io/${{ github.repository_owner }}/mooncake-master:${{ steps.version.outputs.image_tag }} + ghcr.io/${{ github.repository_owner }}/mooncake-master:latest diff --git a/deploy/k8s/mooncake-master.yaml b/deploy/k8s/mooncake-master.yaml new file mode 100644 index 0000000000..43348e25f9 --- /dev/null +++ b/deploy/k8s/mooncake-master.yaml @@ -0,0 +1,129 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: mooncake +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: mooncake-master-config + namespace: mooncake +data: + master.yaml: | + rpc_port: 50051 + rpc_thread_num: 4 + rpc_address: "0.0.0.0" + rpc_conn_timeout_seconds: 0 + rpc_enable_tcp_no_delay: true + + enable_metric_reporting: true + metrics_port: 9003 + + enable_http_metadata_server: true + http_metadata_server_host: "0.0.0.0" + http_metadata_server_port: 8080 + + default_kv_lease_ttl: 5000 + default_kv_soft_pin_ttl: 1800000 + allow_evict_soft_pinned_objects: true + eviction_ratio: 0.05 + eviction_high_watermark_ratio: 0.95 + + memory_allocator: "offset" + allocation_strategy: "random" +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: mooncake-master + namespace: mooncake + labels: + app.kubernetes.io/name: mooncake-master + app.kubernetes.io/component: master + app.kubernetes.io/part-of: mooncake +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: mooncake-master + template: + metadata: + labels: + app.kubernetes.io/name: mooncake-master + app.kubernetes.io/component: master + app.kubernetes.io/part-of: mooncake + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9003" + prometheus.io/path: "/metrics" + spec: + containers: + - name: mooncake-master + image: ghcr.io/kvcache-ai/mooncake-master:latest + command: ["mooncake_master"] + args: + - "--config_path=/etc/mooncake/master.yaml" + ports: + - name: rpc + containerPort: 50051 + protocol: TCP + - name: metrics + containerPort: 9003 + protocol: TCP + - name: metadata + containerPort: 8080 + protocol: TCP + readinessProbe: + httpGet: + path: /metrics + port: metrics + initialDelaySeconds: 5 + periodSeconds: 10 + livenessProbe: + httpGet: + path: /metrics + port: metrics + initialDelaySeconds: 10 + periodSeconds: 15 + resources: + requests: + cpu: "1" + memory: "1Gi" + limits: + cpu: "4" + memory: "4Gi" + volumeMounts: + - name: config + mountPath: /etc/mooncake + readOnly: true + volumes: + - name: config + configMap: + name: mooncake-master-config +--- +apiVersion: v1 +kind: Service +metadata: + name: mooncake-master + namespace: mooncake + labels: + app.kubernetes.io/name: mooncake-master + app.kubernetes.io/component: master + app.kubernetes.io/part-of: mooncake +spec: + type: ClusterIP + selector: + app.kubernetes.io/name: mooncake-master + ports: + - name: rpc + port: 50051 + targetPort: rpc + protocol: TCP + - name: metadata + port: 8080 + targetPort: metadata + protocol: TCP + - name: metrics + port: 9003 + targetPort: metrics + protocol: TCP diff --git a/docker/Dockerfile.master b/docker/Dockerfile.master new file mode 100644 index 0000000000..ab51294652 --- /dev/null +++ b/docker/Dockerfile.master @@ -0,0 +1,103 @@ +# syntax=docker/dockerfile:1.7 + +############################################################################### +# Stage 1: build Mooncake from source (non-CUDA) and produce a Python wheel +############################################################################### +ARG UBUNTU_VERSION=22.04 + +FROM ubuntu:${UBUNTU_VERSION} AS builder + +ENV DEBIAN_FRONTEND=noninteractive \ + PYTHONUNBUFFERED=1 \ + NON_CUDA_BUILD=1 + +ARG PYTHON_VERSION=3.10 +ARG PYPA_INDEX_URL=https://bootstrap.pypa.io +ARG CMAKE_BUILD_TYPE=Release + +ENV PYTHON_VERSION=${PYTHON_VERSION} \ + PATH="/usr/local/go/bin:${PATH}" + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + ca-certificates \ + curl \ + git \ + ninja-build \ + software-properties-common \ + pkg-config && \ + add-apt-repository -y ppa:deadsnakes/ppa && \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + python${PYTHON_VERSION} \ + python${PYTHON_VERSION}-dev \ + python${PYTHON_VERSION}-venv && \ + curl -sS ${PYPA_INDEX_URL}/get-pip.py | python${PYTHON_VERSION} && \ + update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1 && \ + update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 && \ + apt-get purge -y --auto-remove software-properties-common && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR /workspace +COPY . /workspace + +RUN bash dependencies.sh -y + +RUN mkdir -p build && \ + cd build && \ + cmake -G Ninja .. \ + -DBUILD_UNIT_TESTS=OFF \ + -DUSE_HTTP=ON \ + -DUSE_ETCD=ON \ + -DUSE_CUDA=OFF \ + -DWITH_EP=OFF \ + -DSTORE_USE_ETCD=ON \ + -DPython3_EXECUTABLE=/usr/bin/python${PYTHON_VERSION} \ + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} && \ + cmake --build . + +RUN OUTPUT_DIR=dist ./scripts/build_wheel.sh + +############################################################################### +# Stage 2: install the freshly built wheel into a minimal runtime image +############################################################################### +FROM ubuntu:${UBUNTU_VERSION} AS runtime + +ENV DEBIAN_FRONTEND=noninteractive \ + PYTHONUNBUFFERED=1 \ + PIP_NO_CACHE_DIR=1 + +ARG PYTHON_VERSION=3.10 +ARG PYPA_INDEX_URL=https://bootstrap.pypa.io +ENV PYTHON_VERSION=${PYTHON_VERSION} + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + ca-certificates \ + curl \ + software-properties-common \ + ibverbs-providers \ + rdma-core \ + libibverbs1 \ + librdmacm1 \ + libnuma1 \ + liburing2 \ + libyaml-0-2 \ + libcurl4 && \ + add-apt-repository -y ppa:deadsnakes/ppa && \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + python${PYTHON_VERSION} && \ + curl -sS ${PYPA_INDEX_URL}/get-pip.py | python${PYTHON_VERSION} && \ + update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1 && \ + update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 && \ + apt-get purge -y --auto-remove software-properties-common curl && \ + rm -rf /var/lib/apt/lists/* + +COPY --from=builder /workspace/mooncake-wheel/dist /tmp/mooncake-wheel +RUN python${PYTHON_VERSION} -m pip install --no-cache-dir /tmp/mooncake-wheel/*.whl && \ + rm -rf /tmp/mooncake-wheel /root/.cache/pip + +EXPOSE 50051 9003 8080 + +ENTRYPOINT ["mooncake_master"] From 1165f9afcd1b93569ead268f85a78ac0e4077c1b Mon Sep 17 00:00:00 2001 From: Gregory Pereira Date: Thu, 14 May 2026 21:35:46 -0400 Subject: [PATCH 2/2] k8s deployment manifests for the master store Signed-off-by: Gregory Pereira Signed-off-by: greg pereira --- deploy/k8s/monitoring/kustomization.yaml | 14 ++ deploy/k8s/monitoring/servicemonitor.yaml | 15 ++ .../mooncake-master-deployment/configmap.yaml | 33 +++++ .../deployment.yaml | 64 +++++++++ .../kustomization.yaml | 22 +++ .../mooncake-master-deployment/namespace.yaml | 4 + .../k8s/mooncake-master-deployment/pvc.yaml | 10 ++ .../mooncake-master-deployment/service.yaml | 25 ++++ deploy/k8s/mooncake-master.yaml | 129 ------------------ 9 files changed, 187 insertions(+), 129 deletions(-) create mode 100644 deploy/k8s/monitoring/kustomization.yaml create mode 100644 deploy/k8s/monitoring/servicemonitor.yaml create mode 100644 deploy/k8s/mooncake-master-deployment/configmap.yaml create mode 100644 deploy/k8s/mooncake-master-deployment/deployment.yaml create mode 100644 deploy/k8s/mooncake-master-deployment/kustomization.yaml create mode 100644 deploy/k8s/mooncake-master-deployment/namespace.yaml create mode 100644 deploy/k8s/mooncake-master-deployment/pvc.yaml create mode 100644 deploy/k8s/mooncake-master-deployment/service.yaml delete mode 100644 deploy/k8s/mooncake-master.yaml diff --git a/deploy/k8s/monitoring/kustomization.yaml b/deploy/k8s/monitoring/kustomization.yaml new file mode 100644 index 0000000000..91e32ed4d2 --- /dev/null +++ b/deploy/k8s/monitoring/kustomization.yaml @@ -0,0 +1,14 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: mooncake +resources: + - ../mooncake-master-deployment + - servicemonitor.yaml + +labels: + - pairs: + app.kubernetes.io/name: mooncake-master + app.kubernetes.io/component: master + app.kubernetes.io/part-of: mooncake + includeTemplates: true + includeSelectors: true \ No newline at end of file diff --git a/deploy/k8s/monitoring/servicemonitor.yaml b/deploy/k8s/monitoring/servicemonitor.yaml new file mode 100644 index 0000000000..e4b6ec6568 --- /dev/null +++ b/deploy/k8s/monitoring/servicemonitor.yaml @@ -0,0 +1,15 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: mooncake-master + labels: + app.kubernetes.io/name: mooncake-master + app.kubernetes.io/component: master + app.kubernetes.io/part-of: mooncake +spec: + selector: + matchLabels: + app.kubernetes.io/name: mooncake-master + endpoints: + - port: metrics + interval: 15s diff --git a/deploy/k8s/mooncake-master-deployment/configmap.yaml b/deploy/k8s/mooncake-master-deployment/configmap.yaml new file mode 100644 index 0000000000..2f4b40c0e2 --- /dev/null +++ b/deploy/k8s/mooncake-master-deployment/configmap.yaml @@ -0,0 +1,33 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: mooncake-master-config +data: + master.yaml: | + rpc_port: 50051 + rpc_thread_num: 4 + rpc_address: "0.0.0.0" + rpc_conn_timeout_seconds: 0 + rpc_enable_tcp_no_delay: true + + enable_metric_reporting: true + metrics_port: 9003 + + enable_http_metadata_server: true + http_metadata_server_host: "0.0.0.0" + http_metadata_server_port: 8080 + + default_kv_lease_ttl: 5000 + default_kv_soft_pin_ttl: 1800000 + allow_evict_soft_pinned_objects: true + eviction_ratio: 0.05 + eviction_high_watermark_ratio: 0.95 + + memory_allocator: "offset" + allocation_strategy: "random" + + enable_snapshot: true + enable_snapshot_restore: true + snapshot_interval_seconds: 60 + snapshot_retention_count: 3 + snapshot_object_store_type: "local" diff --git a/deploy/k8s/mooncake-master-deployment/deployment.yaml b/deploy/k8s/mooncake-master-deployment/deployment.yaml new file mode 100644 index 0000000000..9d17bee7f6 --- /dev/null +++ b/deploy/k8s/mooncake-master-deployment/deployment.yaml @@ -0,0 +1,64 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: mooncake-master +spec: + replicas: 1 + strategy: + type: Recreate + template: + metadata: + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9003" + prometheus.io/path: "/metrics" + spec: + containers: + - name: mooncake-master + image: mooncake-master + command: ["mooncake_master"] + args: + - "--config_path=/etc/mooncake/master.yaml" + env: + - name: MOONCAKE_SNAPSHOT_LOCAL_PATH + value: "/data/snapshots" + ports: + - name: rpc + containerPort: 50051 + protocol: TCP + - name: metrics + containerPort: 9003 + protocol: TCP + - name: metadata + containerPort: 8080 + protocol: TCP + readinessProbe: + tcpSocket: + port: rpc + initialDelaySeconds: 5 + periodSeconds: 10 + livenessProbe: + tcpSocket: + port: rpc + initialDelaySeconds: 10 + periodSeconds: 15 + resources: + requests: + cpu: "1" + memory: "1Gi" + limits: + cpu: "4" + memory: "4Gi" + volumeMounts: + - name: config + mountPath: /etc/mooncake + readOnly: true + - name: snapshots + mountPath: /data/snapshots + volumes: + - name: config + configMap: + name: mooncake-master-config + - name: snapshots + persistentVolumeClaim: + claimName: mooncake-master-snapshots diff --git a/deploy/k8s/mooncake-master-deployment/kustomization.yaml b/deploy/k8s/mooncake-master-deployment/kustomization.yaml new file mode 100644 index 0000000000..2a293e934e --- /dev/null +++ b/deploy/k8s/mooncake-master-deployment/kustomization.yaml @@ -0,0 +1,22 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: mooncake +resources: + - namespace.yaml + - configmap.yaml + - pvc.yaml + - deployment.yaml + - service.yaml + +labels: + - pairs: + app.kubernetes.io/name: mooncake-master + app.kubernetes.io/component: master + app.kubernetes.io/part-of: mooncake + includeTemplates: true + includeSelectors: true + +images: + - name: mooncake-master + newName: quay.io/grpereir/mooncake-master + newTag: test diff --git a/deploy/k8s/mooncake-master-deployment/namespace.yaml b/deploy/k8s/mooncake-master-deployment/namespace.yaml new file mode 100644 index 0000000000..17e3e66858 --- /dev/null +++ b/deploy/k8s/mooncake-master-deployment/namespace.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: mooncake diff --git a/deploy/k8s/mooncake-master-deployment/pvc.yaml b/deploy/k8s/mooncake-master-deployment/pvc.yaml new file mode 100644 index 0000000000..1cef240e18 --- /dev/null +++ b/deploy/k8s/mooncake-master-deployment/pvc.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: mooncake-master-snapshots +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi diff --git a/deploy/k8s/mooncake-master-deployment/service.yaml b/deploy/k8s/mooncake-master-deployment/service.yaml new file mode 100644 index 0000000000..faef59d1d6 --- /dev/null +++ b/deploy/k8s/mooncake-master-deployment/service.yaml @@ -0,0 +1,25 @@ +apiVersion: v1 +kind: Service +metadata: + name: mooncake-master + labels: + app.kubernetes.io/name: mooncake-master + app.kubernetes.io/component: master + app.kubernetes.io/part-of: mooncake +spec: + type: ClusterIP + selector: + app.kubernetes.io/name: mooncake-master + ports: + - name: rpc + port: 50051 + targetPort: rpc + protocol: TCP + - name: metadata + port: 8080 + targetPort: metadata + protocol: TCP + - name: metrics + port: 9003 + targetPort: metrics + protocol: TCP diff --git a/deploy/k8s/mooncake-master.yaml b/deploy/k8s/mooncake-master.yaml deleted file mode 100644 index 43348e25f9..0000000000 --- a/deploy/k8s/mooncake-master.yaml +++ /dev/null @@ -1,129 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: mooncake ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: mooncake-master-config - namespace: mooncake -data: - master.yaml: | - rpc_port: 50051 - rpc_thread_num: 4 - rpc_address: "0.0.0.0" - rpc_conn_timeout_seconds: 0 - rpc_enable_tcp_no_delay: true - - enable_metric_reporting: true - metrics_port: 9003 - - enable_http_metadata_server: true - http_metadata_server_host: "0.0.0.0" - http_metadata_server_port: 8080 - - default_kv_lease_ttl: 5000 - default_kv_soft_pin_ttl: 1800000 - allow_evict_soft_pinned_objects: true - eviction_ratio: 0.05 - eviction_high_watermark_ratio: 0.95 - - memory_allocator: "offset" - allocation_strategy: "random" ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: mooncake-master - namespace: mooncake - labels: - app.kubernetes.io/name: mooncake-master - app.kubernetes.io/component: master - app.kubernetes.io/part-of: mooncake -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: mooncake-master - template: - metadata: - labels: - app.kubernetes.io/name: mooncake-master - app.kubernetes.io/component: master - app.kubernetes.io/part-of: mooncake - annotations: - prometheus.io/scrape: "true" - prometheus.io/port: "9003" - prometheus.io/path: "/metrics" - spec: - containers: - - name: mooncake-master - image: ghcr.io/kvcache-ai/mooncake-master:latest - command: ["mooncake_master"] - args: - - "--config_path=/etc/mooncake/master.yaml" - ports: - - name: rpc - containerPort: 50051 - protocol: TCP - - name: metrics - containerPort: 9003 - protocol: TCP - - name: metadata - containerPort: 8080 - protocol: TCP - readinessProbe: - httpGet: - path: /metrics - port: metrics - initialDelaySeconds: 5 - periodSeconds: 10 - livenessProbe: - httpGet: - path: /metrics - port: metrics - initialDelaySeconds: 10 - periodSeconds: 15 - resources: - requests: - cpu: "1" - memory: "1Gi" - limits: - cpu: "4" - memory: "4Gi" - volumeMounts: - - name: config - mountPath: /etc/mooncake - readOnly: true - volumes: - - name: config - configMap: - name: mooncake-master-config ---- -apiVersion: v1 -kind: Service -metadata: - name: mooncake-master - namespace: mooncake - labels: - app.kubernetes.io/name: mooncake-master - app.kubernetes.io/component: master - app.kubernetes.io/part-of: mooncake -spec: - type: ClusterIP - selector: - app.kubernetes.io/name: mooncake-master - ports: - - name: rpc - port: 50051 - targetPort: rpc - protocol: TCP - - name: metadata - port: 8080 - targetPort: metadata - protocol: TCP - - name: metrics - port: 9003 - targetPort: metrics - protocol: TCP