Skip to content

Commit 7cbb895

Browse files
storage: support local filesystem storage with localPath config
Support Milvus standalone with local storage (COMMON_STORAGETYPE=local) by mapping minio.localPath / minio.backupLocalPath to the host directory that backs Milvus localStorage.path. LocalClient now resolves keys against baseDir = localPath/bucket so all keys returned by ListPrefix are relative, matching the S3 client. The CI test dumps the milvus and backup volume trees around backup and restore, and keeps restore temp files, so failures expose what is actually on disk where Milvus expects to read it. Signed-off-by: huanghaoyuanhhy <haoyuan.huang@zilliz.com>
1 parent adae7f7 commit 7cbb895

12 files changed

Lines changed: 520 additions & 121 deletions

File tree

.github/workflows/main.yaml

Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -972,6 +972,220 @@ jobs:
972972
973973
974974
975+
test-backup-restore-local-storage:
976+
needs: unit-test-go
977+
name: Backup and restore with local storage
978+
runs-on: ubuntu-latest
979+
strategy:
980+
fail-fast: false
981+
matrix:
982+
image_tag: [v2.5.20, 2.6-latest]
983+
984+
steps:
985+
- uses: actions/checkout@v6
986+
987+
- name: Set up Python 3.10
988+
uses: actions/setup-python@v6
989+
with:
990+
python-version: '3.10'
991+
cache: pip
992+
993+
- uses: actions/setup-go@v6
994+
name: Set up Go ${{ env.go-version }}
995+
with:
996+
go-version: ${{ env.go-version }}
997+
cache: true
998+
999+
- name: Build
1000+
timeout-minutes: 5
1001+
shell: bash
1002+
run: |
1003+
go get
1004+
go build
1005+
1006+
- name: Install dependency
1007+
timeout-minutes: 5
1008+
working-directory: tests
1009+
shell: bash
1010+
run: |
1011+
pip install -r requirements.txt --trusted-host https://test.pypi.org
1012+
1013+
- name: Deploy Milvus with local storage
1014+
timeout-minutes: 15
1015+
shell: bash
1016+
working-directory: deployment/standalone
1017+
run: |
1018+
tag=$(python ../../scripts/get_image_tag_by_short_name.py --tag ${{ matrix.image_tag }}) && echo $tag
1019+
mkdir -p volumes/milvus
1020+
sudo chmod -R 777 volumes
1021+
1022+
# Create embedEtcd.yaml
1023+
cat > embedEtcd.yaml <<'ETCD'
1024+
listen-client-urls: http://0.0.0.0:2379
1025+
advertise-client-urls: http://0.0.0.0:2379
1026+
quota-backend-bytes: 4294967296
1027+
auto-compaction-mode: revision
1028+
auto-compaction-retention: '1000'
1029+
ETCD
1030+
1031+
# Create user.yaml for custom config
1032+
cat > user.yaml <<'USERCFG'
1033+
log:
1034+
level: debug
1035+
dataNode:
1036+
segment:
1037+
insertBufSize: 4096
1038+
USERCFG
1039+
cat user.yaml
1040+
1041+
# Match official standalone docker deployment:
1042+
# https://milvus.io/docs/install_standalone-docker.md
1043+
sudo docker run -d \
1044+
--name milvus-standalone \
1045+
--security-opt seccomp:unconfined \
1046+
-e ETCD_USE_EMBED=true \
1047+
-e ETCD_DATA_DIR=/var/lib/milvus/etcd \
1048+
-e ETCD_CONFIG_PATH=/milvus/configs/embedEtcd.yaml \
1049+
-e COMMON_STORAGETYPE=local \
1050+
-e DEPLOY_MODE=STANDALONE \
1051+
-v $(pwd)/volumes/milvus:/var/lib/milvus \
1052+
-v $(pwd)/embedEtcd.yaml:/milvus/configs/embedEtcd.yaml \
1053+
-v $(pwd)/user.yaml:/milvus/configs/user.yaml \
1054+
-p 19530:19530 \
1055+
-p 9091:9091 \
1056+
-p 2379:2379 \
1057+
--health-cmd="curl -f http://localhost:9091/healthz" \
1058+
--health-interval=30s \
1059+
--health-start-period=90s \
1060+
--health-timeout=20s \
1061+
--health-retries=3 \
1062+
milvusdb/milvus:${tag} \
1063+
milvus run standalone
1064+
1065+
# Wait for healthy
1066+
for i in $(seq 1 60); do
1067+
status=$(sudo docker inspect --format='{{.State.Health.Status}}' milvus-standalone 2>/dev/null || echo "not ready")
1068+
echo "Attempt $i: $status"
1069+
if [ "$status" = "healthy" ]; then break; fi
1070+
sleep 5
1071+
done
1072+
sudo docker ps -a
1073+
# Fix permissions on Milvus data dir created by container root
1074+
sudo chmod -R 777 volumes
1075+
1076+
- name: Export container status after deploy
1077+
if: ${{ always() }}
1078+
shell: bash
1079+
working-directory: deployment/standalone
1080+
run: |
1081+
echo "=== Container Status ==="
1082+
sudo docker ps -a || true
1083+
echo "=== Standalone Container Logs ==="
1084+
sudo docker logs milvus-standalone 2>&1 | tail -100 || true
1085+
1086+
- name: Configure backup.yaml for local storage
1087+
timeout-minutes: 1
1088+
shell: bash
1089+
run: |
1090+
yq -i '.log.level = "debug"' configs/backup.yaml
1091+
yq -i '.minio.storageType = "local"' configs/backup.yaml
1092+
# Host path the milvus-backup binary uses to read/write local files.
1093+
yq -i '.minio.localPath = "deployment/standalone/volumes/milvus/data"' configs/backup.yaml
1094+
# Container path Milvus itself uses as localStorage.path. This is what
1095+
# we tell Milvus when we call bulk_insert, since its LocalChunkManager
1096+
# does not prepend RootPath to the requested path.
1097+
yq -i '.minio.milvusLocalPath = "/var/lib/milvus/data"' configs/backup.yaml
1098+
yq -i '.minio.bucketName = ""' configs/backup.yaml
1099+
yq -i '.minio.rootPath = ""' configs/backup.yaml
1100+
yq -i '.minio.backupStorageType = "local"' configs/backup.yaml
1101+
yq -i '.minio.backupLocalPath = "deployment/standalone/volumes/backup"' configs/backup.yaml
1102+
yq -i '.minio.backupBucketName = ""' configs/backup.yaml
1103+
yq -i '.minio.backupRootPath = "backup"' configs/backup.yaml
1104+
yq -i '.minio.crossStorage = true' configs/backup.yaml
1105+
# Keep restore temp files so we can inspect them if bulk insert fails.
1106+
yq -i '.backup.keepTempFiles = true' configs/backup.yaml
1107+
cat configs/backup.yaml
1108+
1109+
- name: Prepare data
1110+
timeout-minutes: 5
1111+
shell: bash
1112+
run: |
1113+
python example/prepare_data.py
1114+
1115+
- name: Fix permissions after data preparation
1116+
shell: bash
1117+
working-directory: deployment/standalone
1118+
run: |
1119+
sudo chmod -R 777 volumes
1120+
1121+
- name: List milvus volume before backup
1122+
shell: bash
1123+
working-directory: deployment/standalone
1124+
run: |
1125+
sudo apt-get install -y tree > /dev/null 2>&1 || true
1126+
echo "=== milvus volume tree (before backup) ==="
1127+
sudo tree volumes/milvus/data | head -500
1128+
1129+
- name: Backup
1130+
timeout-minutes: 5
1131+
shell: bash
1132+
run: |
1133+
./milvus-backup check
1134+
./milvus-backup list
1135+
./milvus-backup create -n my_backup
1136+
./milvus-backup list
1137+
1138+
- name: List backup volume after backup
1139+
if: ${{ always() }}
1140+
shell: bash
1141+
working-directory: deployment/standalone
1142+
run: |
1143+
echo "=== backup volume tree (after backup) ==="
1144+
sudo tree volumes/backup | head -500
1145+
1146+
- name: Restore backup
1147+
timeout-minutes: 5
1148+
shell: bash
1149+
run: |
1150+
./milvus-backup restore -n my_backup -s _recover
1151+
1152+
- name: List milvus volume after restore
1153+
if: ${{ always() }}
1154+
shell: bash
1155+
working-directory: deployment/standalone
1156+
run: |
1157+
echo "=== milvus volume tree (after restore) ==="
1158+
sudo tree volumes/milvus/data | head -500
1159+
1160+
- name: Verify data
1161+
timeout-minutes: 5
1162+
shell: bash
1163+
run: |
1164+
python example/verify_data.py
1165+
1166+
- name: Delete backup
1167+
timeout-minutes: 5
1168+
shell: bash
1169+
run: |
1170+
./milvus-backup delete -n my_backup
1171+
./milvus-backup list
1172+
1173+
- name: Export logs
1174+
if: ${{ always() }}
1175+
shell: bash
1176+
run: |
1177+
mkdir -p /tmp/ci_logs
1178+
sudo docker logs milvus-standalone > /tmp/ci_logs/standalone.log 2>&1 || true
1179+
1180+
- name: Upload logs
1181+
if: ${{ ! success() }}
1182+
uses: actions/upload-artifact@v7
1183+
with:
1184+
name: local-storage-logs-${{ matrix.image_tag }}
1185+
path: |
1186+
/tmp/ci_logs
1187+
./server.log
1188+
9751189
test-backup-restore-api:
9761190
name: Backup and restore api
9771191
runs-on: ubuntu-latest

.gitignore

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,7 @@ dist/
1111
.DS_Store
1212

1313
# Claude Code
14-
CLAUDE.local.md
14+
CLAUDE.local.md
15+
16+
# Git worktrees
17+
.worktrees/

configs/backup-local.yaml

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Example config for Milvus Standalone with local storage.
2+
#
3+
# This matches the official docker deployment:
4+
# https://milvus.io/docs/install_standalone-docker.md
5+
#
6+
# The official script runs:
7+
# docker run ... -e COMMON_STORAGETYPE=local -v $(pwd)/volumes/milvus:/var/lib/milvus ...
8+
#
9+
# In local storage mode, Milvus uses localStorage.path as the chunk manager root.
10+
# Unlike remote mode, minio.rootPath ("files") is NOT used as a path prefix.
11+
# Files are stored directly under localStorage.path:
12+
# /var/lib/milvus/data / insert_log / <collID> / ...
13+
#
14+
# Set localPath to the HOST path that maps to Milvus localStorage.path.
15+
# Set bucketName and rootPath to empty (not used in local storage mode).
16+
17+
log:
18+
level: info
19+
console: true
20+
21+
milvus:
22+
address: localhost
23+
port: 19530
24+
25+
minio:
26+
# Milvus storage — match your Milvus local storage config
27+
storageType: "local"
28+
localPath: "volumes/milvus/data" # host path to Milvus localStorage.path
29+
milvusLocalPath: "/var/lib/milvus/data" # container path Milvus uses as localStorage.path
30+
bucketName: "" # not used in local storage mode
31+
rootPath: "" # not used in local storage mode
32+
33+
# Backup storage
34+
backupStorageType: "local"
35+
backupLocalPath: "volumes/backup" # separate host directory for backup data
36+
backupBucketName: ""
37+
backupRootPath: "backup"

configs/backup.yaml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,22 @@ minio:
5858
bucketName: "a-bucket" # Milvus Bucket name in MinIO/S3, make it the same as your milvus instance
5959
rootPath: "files" # Milvus storage root path in MinIO/S3, make it the same as your milvus instance
6060

61+
# Local storage path, only used when storageType is "local".
62+
# The HOST path the milvus-backup binary uses to read/write local files.
63+
# In docker deployments this is the host bind-mount that backs Milvus
64+
# localStorage.path. Full path resolved by milvus-backup: localPath/bucketName/key.
65+
# In local mode, set bucketName and rootPath to empty since Milvus local storage
66+
# does not use them — files are stored directly under localStorage.path.
67+
localPath: ""
68+
69+
# Absolute path Milvus itself uses as localStorage.path (the chunk-manager
70+
# root inside the Milvus process). milvus-backup prepends this to the path
71+
# it sends to bulk_insert during restore, since Milvus's LocalChunkManager
72+
# does not prepend its own root. In docker deployments this is the
73+
# CONTAINER path (e.g., /var/lib/milvus/data), not the host bind-mount.
74+
# Required when restoring to a local-storage Milvus.
75+
milvusLocalPath: ""
76+
6177
# Backup storage configs, the storage you want to put the backup data
6278
backupStorageType: "minio" # support storage type: local, minio, s3, aws, gcp, ali(aliyun), azure, tc(tencent)
6379
backupAddress: localhost # Address of MinIO/S3
@@ -72,6 +88,10 @@ minio:
7288
backupRootPath: "backup" # Rootpath to store backup data. Backup data will store to backupBucketName/backupRootPath
7389
backupUseSSL: false # Access to MinIO/S3 with SSL
7490

91+
# Backup local storage path, only used when backupStorageType is "local".
92+
# Defaults to localPath if not set. Full path: backupLocalPath/backupBucketName/key
93+
backupLocalPath: ""
94+
7595
# If you need to back up or restore data between two different storage systems, direct client-side copying is not supported.
7696
# Set this option to true to enable data transfer through Milvus Backup.
7797
# Note: This option will be automatically set to true if `minio.storageType` and `minio.backupStorageType` differ.

core/restore/coll_task.go

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,12 @@ type collTask struct {
5757
backupStorage storage.Client
5858

5959
milvusStorage storage.Client
60+
// milvusLocalPath is Milvus's localStorage.path (absolute path inside the
61+
// Milvus process). Empty unless Milvus is configured for local storage and
62+
// the user set minio.milvusLocalPath. Prepended to bulk_insert paths so
63+
// the Milvus LocalChunkManager can resolve restore temp files — its
64+
// WalkWithPrefix does not prepend its own root.
65+
milvusLocalPath string
6066

6167
grpcCli milvus.Grpc
6268
restfulCli milvus.Restful
@@ -91,8 +97,9 @@ type collTaskArgs struct {
9197
keepTempFiles bool
9298
crossStorage bool
9399

94-
backupStorage storage.Client
95-
milvusStorage storage.Client
100+
backupStorage storage.Client
101+
milvusStorage storage.Client
102+
milvusLocalPath string
96103

97104
copySem *semaphore.Weighted
98105
bulkInsertSem *semaphore.Weighted
@@ -134,8 +141,9 @@ func newCollTask(args collTaskArgs) *collTask {
134141
keepTempFiles: args.keepTempFiles,
135142
backupDir: args.backupDir,
136143

137-
backupStorage: args.backupStorage,
138-
milvusStorage: args.milvusStorage,
144+
backupStorage: args.backupStorage,
145+
milvusStorage: args.milvusStorage,
146+
milvusLocalPath: args.milvusLocalPath,
139147

140148
grpcCli: args.grpcCli,
141149
restfulCli: args.restfulCli,
@@ -390,11 +398,21 @@ func (ct *collTask) restoreNotL0SegV1(ctx context.Context, part *backuppb.Partit
390398
return nil
391399
}
392400

393-
func toPaths(dir partitionDir) []string {
401+
// toPaths builds the [insertLogDir, deltaLogDir] argument for bulk_insert.
402+
// When the target Milvus uses local storage, milvusLocalPath is prepended so
403+
// the Milvus LocalChunkManager can resolve the path — it does not prepend its
404+
// own RootPath, unlike the remote/S3 chunk manager.
405+
func (ct *collTask) toPaths(dir partitionDir) []string {
406+
prepend := func(p string) string {
407+
if p == "" || ct.milvusLocalPath == "" {
408+
return p
409+
}
410+
return path.Join(ct.milvusLocalPath, p)
411+
}
394412
if len(dir.insertLogDir) == 0 {
395-
return []string{dir.deltaLogDir}
413+
return []string{prepend(dir.deltaLogDir)}
396414
}
397-
return []string{dir.insertLogDir, dir.deltaLogDir}
415+
return []string{prepend(dir.insertLogDir), prepend(dir.deltaLogDir)}
398416
}
399417

400418
func (ct *collTask) restoreNotL0SegV2(ctx context.Context, part *backuppb.PartitionBackupInfo) error {
@@ -741,13 +759,13 @@ func (ct *collTask) bulkInsertViaGrpc(ctx context.Context, partitionName string,
741759
g.Go(func() error {
742760
defer ct.bulkInsertSem.Release(1)
743761

744-
paths := toPaths(dir)
762+
paths := ct.toPaths(dir)
745763
ct.logger.Info("start bulk insert via grpc", zap.Strings("paths", paths), zap.String("partition", partitionName))
746764
in := milvus.GrpcBulkInsertInput{
747765
DB: ct.targetNS.DBName(),
748766
CollectionName: ct.targetNS.CollName(),
749767
PartitionName: partitionName,
750-
Paths: toPaths(dir),
768+
Paths: paths,
751769
BackupTS: b.timestamp,
752770
IsL0: b.isL0,
753771
StorageVersion: b.storageVersion,
@@ -813,7 +831,7 @@ func (ct *collTask) checkBulkInsertViaRestful(ctx context.Context, jobID string)
813831

814832
func (ct *collTask) bulkInsertViaRestful(ctx context.Context, partition string, b batch) error {
815833
ct.logger.Info("start bulk insert via restful", zap.Int("batch_num", len(b.partitionDirs)), zap.String("partition", partition))
816-
paths := lo.Map(b.partitionDirs, func(dir partitionDir, _ int) []string { return toPaths(dir) })
834+
paths := lo.Map(b.partitionDirs, func(dir partitionDir, _ int) []string { return ct.toPaths(dir) })
817835
in := milvus.BulkInsertV2Input{
818836
DB: ct.targetNS.DBName(),
819837
CollectionName: ct.targetNS.CollName(),

0 commit comments

Comments
 (0)