Skip to content

Commit 69dc9c6

Browse files
committed
update db-dump-google-collections.sh to work with new mongodb dump directory pattern
1 parent 172e0e6 commit 69dc9c6

1 file changed

Lines changed: 87 additions & 30 deletions

File tree

render-ws-with-mongo-db/db-dump-google-collections.sh

Lines changed: 87 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -6,42 +6,99 @@ set -e
66
# Generate mongodb collection dump files from a render-ws-mongodb Google Cloud VM container.
77
#
88
# Dump files for the collections are written to:
9-
# /mnt/disks/mongodb_dump_fs/dump/render-ws-mongodb-16c-64gb-[dump-suffix]/collections/[collection-dir | run-time]
10-
11-
if [ $# -lt 3 ]; then
12-
echo "
13-
Usage: $0 <db> <dump-suffix> <collection-pattern> [collection-dir]
9+
# /mnt/disks/mongodb_dump_fs/dump/<location>/<stage>/<project>/<slab-group>/<db>
10+
#
11+
# For example:
12+
# /mnt/disks/mongodb_dump_fs/dump/google/01_match/w61_serial_110_to_119/s115_to_s119_r00/match
13+
# /mnt/disks/mongodb_dump_fs/dump/google/02_align/w61_serial_090_to_099/s094_r00/render
1414

15-
Examples: $0 render par '.*_par_.*' w61_s140_to_149_par
16-
$0 match match '.*_s15[5-9]_.*' w61_s155_to_159
17-
$0 render align '.*align.*' w61_s070_to_071_r00_align
18-
$0 render ic2d '.*ic2d.*' w61_s100_to_s109_r00_ic2d_nc4_hist
15+
BASE_DUMP_DIR="/mnt/disks/mongodb_dump_fs/dump"
1916

20-
$0 render mat '.*w60_s360_r00_(gc|gc_mat|gc_mat_render)__.*'
21-
"
17+
if [ ! -d "${BASE_DUMP_DIR}" ]; then
18+
echo "ERROR: ${BASE_DUMP_DIR} not found"
2219
exit 1
2320
fi
2421

25-
RUN_TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
22+
echo "
23+
Select database:"
24+
select DB in "render" "match"; do
25+
case "${DB}" in
26+
render|match) break ;;
27+
*) echo " Invalid selection, please enter 1 or 2." ;;
28+
esac
29+
done
2630

27-
DB="${1}"
28-
BASE_DUMP_SUFFIX="${2}"
29-
COLLECTION_PATTERN="${3}"
30-
COLLECTION_DIR="${4:-${RUN_TIMESTAMP}}"
31+
LOCATION="google"
3132

32-
BASE_DUMP_DIR="/mnt/disks/mongodb_dump_fs/dump"
33-
if [ ! -d "${BASE_DUMP_DIR}" ]; then
34-
echo "ERROR: ${BASE_DUMP_DIR} not found"
35-
exit 1
36-
fi
33+
echo "
34+
Select stage:"
35+
select STAGE_CHOICE in "00_par" "01_match" "02_align" "03_ic2d_nc4_hist_rs0p5" "timestamp"; do
36+
case "${STAGE_CHOICE}" in
37+
00_par|01_match|02_align|03_ic2d_nc4_hist_rs0p5)
38+
STAGE="${STAGE_CHOICE}"
39+
break
40+
;;
41+
timestamp)
42+
STAGE=$(date +"%Y%m%d_%H%M%S")
43+
break
44+
;;
45+
*) echo " Invalid selection, please enter a number from the list." ;;
46+
esac
47+
done
3748

38-
# /mnt/disks/mongodb_dump_fs/dump/render-ws-mongodb-16c-64gb-align/collections/w61_s100_to_101_r00_align
39-
FULL_DUMP_DIR="${BASE_DUMP_DIR}/render-ws-mongodb-16c-64gb-${BASE_DUMP_SUFFIX}/collections/${COLLECTION_DIR}"
40-
if [ -d "${FULL_DUMP_DIR}" ]; then
41-
echo "ERROR: ${FULL_DUMP_DIR} already exists"
49+
echo "
50+
Select project:"
51+
PROJECTS=()
52+
for i in $(seq 0 10 150); do
53+
PROJECTS+=("$(printf "w61_serial_%03d_to_%03d" "$i" "$((i+9))")")
54+
done
55+
select PROJECT in "${PROJECTS[@]}"; do
56+
if [[ -n "${PROJECT}" ]]; then
57+
break
58+
else
59+
echo " Invalid selection, please enter a number from the list."
60+
fi
61+
done
62+
63+
echo "
64+
Enter slab-group (e.g. s115_to_s119_r00 s094_r00 20260421_test ):"
65+
while true; do
66+
read -rp " Slab-group: " SLAB_GROUP
67+
if [[ -n "${SLAB_GROUP}" ]]; then
68+
break
69+
fi
70+
echo " Slab-group must not be empty."
71+
done
72+
73+
echo "
74+
Enter collection pattern regex (e.g. .*_par_.* .*_s11[5-9]_.* .*align.* ):"
75+
while true; do
76+
read -rp " Pattern: " COLLECTION_PATTERN
77+
if [[ -n "${COLLECTION_PATTERN}" ]]; then
78+
break
79+
fi
80+
echo " Pattern must not be empty."
81+
done
82+
83+
# ----------------------------------------------------------------------------
84+
# Build and validate the target dump directory
85+
86+
FULL_SLAB_GROUP_DIR="${BASE_DUMP_DIR}/${LOCATION}/${STAGE}/${PROJECT}/${SLAB_GROUP}"
87+
FULL_DB_DUMP_DIR="${FULL_SLAB_GROUP_DIR}/${DB}"
88+
89+
if [ -d "${FULL_DB_DUMP_DIR}" ]; then
90+
echo "ERROR: ${FULL_DB_DUMP_DIR} already exists"
4291
exit 1
92+
else
93+
echo "
94+
Dump directory will be:
95+
${FULL_DB_DUMP_DIR}
96+
"
4397
fi
4498

99+
# ----------------------------------------------------------------------------
100+
# Find matching collections
101+
45102
CONNECTION_URI="mongodb://localhost:27017/${DB}"
46103
EVAL_CMD="printjson(db.getCollectionNames().filter(c => /${COLLECTION_PATTERN}/.test(c)).sort());"
47104

@@ -112,13 +169,13 @@ if [[ ! $REPLY =~ ^[Yy]$ ]]; then
112169
exit 1
113170
fi
114171

115-
mkdir -p "${FULL_DUMP_DIR}"
172+
mkdir -p "${FULL_SLAB_GROUP_DIR}"
116173

117174
DUMP_WAIT_SECONDS=3
118175

119176
for COLLECTION in ${COLLECTIONS}; do
120177

121-
mongodump --uri="${CONNECTION_URI}" --db="${DB}" --collection="${COLLECTION}" --gzip --out="${FULL_DUMP_DIR}"
178+
mongodump --uri="${CONNECTION_URI}" --db="${DB}" --collection="${COLLECTION}" --gzip --out="${FULL_SLAB_GROUP_DIR}"
122179

123180
if [ "${DB}" == "match" ]; then
124181
echo "sleeping for ${DUMP_WAIT_SECONDS} seconds in attempt to avoid container crash on larger dumps"
@@ -133,13 +190,13 @@ if [[ -v SMD_QUERY ]]; then
133190
dumping admin__stack_meta_data with query:
134191
${SMD_QUERY}]}
135192
"
136-
echo "${SMD_QUERY}]}" > "${FULL_DUMP_DIR}/render/admin__stack_meta_data.query.txt"
137-
mongodump --uri="${CONNECTION_URI}" --db="${DB}" --collection=admin__stack_meta_data --query "${SMD_QUERY}]}" --gzip --out="${FULL_DUMP_DIR}"
193+
echo "${SMD_QUERY}]}" > "${FULL_DB_DUMP_DIR}/admin__stack_meta_data.query.txt"
194+
mongodump --uri="${CONNECTION_URI}" --db="${DB}" --collection=admin__stack_meta_data --query "${SMD_QUERY}]}" --gzip --out="${FULL_SLAB_GROUP_DIR}"
138195

139196
COLLECTION_COUNT=$((COLLECTION_COUNT+1))
140197
fi
141198

142199
echo "
143200
Dumped ${COLLECTION_COUNT} collections to:
144-
${FULL_DUMP_DIR}
201+
${FULL_DB_DUMP_DIR}
145202
"

0 commit comments

Comments
 (0)