-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy paths3-sync2.sh
More file actions
executable file
·338 lines (328 loc) · 13.4 KB
/
s3-sync2.sh
File metadata and controls
executable file
·338 lines (328 loc) · 13.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
#!/bin/bash
# source scripts in src/*.sh
# shellcheck disable=SC1090
for f in "$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"/src/*.sh; do
. "$f"
done
# Default arguments
export AWS_CLI_CMD_SYNC_DOWN=
export AWS_CLI_CMD_SYNC_UP=
export AWS_CLI_OPTIONS=
export AWS_CLI_SYNC_OPTIONS=
export AWS_CLI_SYNC_OPTIONS_DOWN=
export AWS_CLI_SYNC_OPTIONS_UP=
[ "$LOCAL_PATH" ] || export LOCAL_PATH=
export S3_BUCKET=
[ "$S3_URI" ] || export S3_URI=
[ "$CF_DISTRIBUTION_ID" ] || export CF_DISTRIBUTION_ID=
[ "$CF_INVALIDATION_PATHS" ] || export CF_INVALIDATION_PATHS='/*'
[ "$DEBUG" ] || export DEBUG='ERROR'
[ "$DFS" ] || export DFS=0
export DFS_LOCK_FILE=
[ "$DFS_LOCK_TIMEOUT" ] || export DFS_LOCK_TIMEOUT=60
[ "$DFS_LOCK_WAIT" ] || export DFS_LOCK_WAIT=180
export DFS_UID=
export DFS_UID_HOSTNAME=0
export INIT_SYNC_DOWN=0
export INIT_SYNC_UP=0
export MAX_FAILURES=3
export MD5_NOT_PATH_OPT=
export MD5_SKIP_PATH=
[ "$POLL_INTERVAL" ] || export POLL_INTERVAL=30
export NODE_UID=
export KILLED=0
export ONLY_DOWN=0
export ONLY_UP=0
last_aws_option=
sync_failures=0
# Script arguments
while test $# -gt 0; do
case "$1" in
-h|--help)
echo " "
echo " This script facilitates bidirectional synchronization between a local file "
echo " system path and an Amazon S3 storage bucket by wrapping the (unidirectional) "
echo "aws s3 sync CLI."
echo " "
echo " s3-sync2 [options] <LocalPath> <S3Uri>"
echo " "
echo " "
echo " OPTIONS"
echo " "
echo " All standard aws s3 sync CLI options are supported, in addition to the "
echo " following s3-sync2 specific options."
echo " "
echo " "
echo " <LocalPath> Local directory to synchronize - e.g. /path/to/local/dir"
echo " "
echo " <S3Uri> Remote S3 URI - e.g. s3://mybucket/remote/dir"
echo " "
echo " --cf-dist-id | -c ID of a CloudFront distributuion to trigger edge cache "
echo " invalidations n when local changes occur."
echo " "
echo " --cf-inval-paths Value for the aws cloudfront create-invalidation --paths"
echo " argument. Default is invalidation of all cached objects: /*"
echo " "
echo " --debug Debug output level - one of ERROR (default), WARN, DEBUG"
echo " or NONE"
echo " "
echo " --dfs | -d Run as a quasi distributed file system wherein multiple "
echo " nodes can run this script concurrently. When enabled, an "
echo " additional distributed locking step is required when "
echo " synchronizing from <LocalPath> to <S3Uri>. To do so, S3's "
echo " read-after-write consistency model is leveraged in "
echo " conjunction with an object PUT operation where the object "
echo " contains a unique identifier for the node acquiring the "
echo " lock."
echo " "
echo " --dfs-lock-timeout | -t the maximum time (secs) permitted for a distributed lock "
echo " by another node before it is considered to be stale and "
echo " force released. Default is 60 (1 minute)"
echo " "
echo " --dfs-lock-wait | -w the maximum time (secs) to wait to acquire a distributed "
echo " lock before exiting with an error. Default is 180 (3 minutes)"
echo " "
echo " --dfs-uid-hostname use hostname as the DFS UID instead of /etc/machine-id"
echo " "
echo " --init-sync-down | -i if set, aws s3 sync <S3Uri> <LocalPath> will be invoked when"
echo " the script starts"
echo " "
echo " --init-sync-up | -u if set, aws s3 sync <LocalPath> <S3Uri> will be invoked when"
echo " the script starts"
echo " "
echo " --max-failures | -x max sychronization failures before exiting (0 for infinite)."
echo " Default is 3"
echo " "
echo " --md5-skip-path | -s by default, every file in <LocalPath> is used to generate "
echo " md5 checksums determining when contents have changed. The "
echo " script cannot translate --include/--exclude sync options"
echo " to local file paths. Use this option to alter this behavior"
echo " by specifying 1 or more paths in <LocalPath> to exclude"
echo " from checksum calculations. Do not repeat this option - if"
echo " multiple paths should be excluded, use pipes (|) to separate"
echo " each. Each path designated should be a child of <LocalPath>."
echo " Only directories may be specified and they should not "
echo " include the trailing slash"
echo " "
echo " --only-down Only synchronize from <S3Uri> to <LocalPath>"
echo " "
echo " --only-up Only synchronize from <LocalPath> to <S3Uri>"
echo " "
echo " --poll | -p frequency in seconds to check for both local and remote "
echo " changes and trigger the necessary synchronization - default "
echo " is 30. Must be between 0 and 3600. If 0, then script will "
echo " immediately exit after option validation and initial "
echo " synchronization"
echo " "
echo " --sync-opt-down-* An aws s3 sync option that should only be applied when "
echo " syncing down <S3Uri> to <LocalPath>. For example, to only "
echo " apply the --delete flag in this direction, set this option "
echo " --s3-opt-up-delete"
echo " "
echo " --sync-opt-up-* Same as above, but for syncing up <LocalPath> to <S3Uri>"
echo " "
exit 0
;;
--cf-dist-id|-c)
shift
CF_DISTRIBUTION_ID=$1
shift
;;
--cf-inval-paths)
shift
CF_INVALIDATION_PATHS=$1
shift
;;
--debug)
shift
DEBUG=$1
shift
;;
--dfs|-d)
shift
DFS=1
AWS_CLI_SYNC_OPTIONS_DOWN=" --exclude \"*/.s3-sync2.lock\"$AWS_CLI_SYNC_OPTIONS_DOWN"
;;
--dfs-lock-timeout|-t)
shift
DFS_LOCK_TIMEOUT=$1
shift
;;
--dfs-lock-wait|-w)
shift
DFS_LOCK_WAIT=$1
shift
;;
--dfs-uid-hostname)
shift
DFS_UID_HOSTNAME=1
;;
--init-sync-down|-i)
shift
INIT_SYNC_DOWN=1
;;
--init-sync-up|-u)
shift
INIT_SYNC_UP=1
;;
--max-failures|-x)
shift
MAX_FAILURES=$1
shift
;;
--md5-skip-path|-s)
shift
MD5_SKIP_PATH="${1//\~/$HOME}"
for path in $( echo "$MD5_SKIP_PATH" | tr "|" "\\n" ); do
MD5_NOT_PATH_OPT=" -not -path \"$path/*\"$MD5_NOT_PATH_OPT"
done
shift
;;
--only-down)
shift
ONLY_DOWN=1
;;
--only-up)
shift
ONLY_UP=1
;;
--poll|-p)
shift
POLL_INTERVAL=$1
shift
;;
--endpoint-url|--color|--profile|--region|--ca-bundle|--cli-read-timeout|--cli-connect-timeout)
opt="$1"
shift
export AWS_CLI_OPTIONS=" $opt $1$AWS_CLI_OPTIONS"
shift
;;
--no-verify-ssl|--no-sign-request)
export AWS_CLI_OPTIONS=" $1$AWS_CLI_OPTIONS"
shift
;;
--sync-opt-down-*)
AWS_CLI_SYNC_OPTIONS_DOWN="$AWS_CLI_SYNC_OPTIONS_DOWN ${1/sync\-opt\-down\-/}"
last_aws_option=down
shift
;;
--sync-opt-up-*)
AWS_CLI_SYNC_OPTIONS_UP="$AWS_CLI_SYNC_OPTIONS_UP ${1/sync\-opt\-up\-/}"
last_aws_option=up
shift
;;
*)
if [ "$1" = "--output" ]; then
print_msg "aws --output option is not supported and will be ignored" warning s3-sync2.sh $LINENO
shift
elif [ -z $LOCAL_PATH ] && [ -d "$1" ]; then
LOCAL_PATH=$1
elif [ -z $S3_URI ] && [ "${1:0:5}" = "s3://" ]; then
S3_URI=$1
S3_BUCKET=$(echo "${S3_URI:5}" | cut -d'/' -f1)
[ "$S3_URI" = "s3://$S3_BUCKET" ] && DFS_LOCK_FILE='.s3-sync2.lock' || DFS_LOCK_FILE="${S3_URI/s3:\/\/$S3_BUCKET\//}/.s3-sync2.lock"
elif [ "${1:0:2}" = "--" ]; then
AWS_CLI_SYNC_OPTIONS="$AWS_CLI_SYNC_OPTIONS $1"
last_aws_option=
elif [ "$1" ] && [ "$last_aws_option" = "down" ]; then
AWS_CLI_SYNC_OPTIONS_DOWN="$AWS_CLI_SYNC_OPTIONS_DOWN \"$1\""
elif [ "$1" ] && [ "$last_aws_option" = "up" ]; then
AWS_CLI_SYNC_OPTIONS_UP="$AWS_CLI_SYNC_OPTIONS_UP \"$1\""
elif [ "$1" ]; then
AWS_CLI_SYNC_OPTIONS="$AWS_CLI_SYNC_OPTIONS \"$1\""
fi
shift
;;
esac
done
# Full aws cli commands for downlink and uplink synchronization
AWS_CLI_CMD_SYNC_DOWN="aws$AWS_CLI_OPTIONS s3 sync $S3_URI $LOCAL_PATH$AWS_CLI_SYNC_OPTIONS$AWS_CLI_SYNC_OPTIONS_DOWN"
AWS_CLI_CMD_SYNC_UP="aws$AWS_CLI_OPTIONS s3 sync $LOCAL_PATH $S3_URI$AWS_CLI_SYNC_OPTIONS$AWS_CLI_SYNC_OPTIONS_UP"
print_msg "Initiating s3-sync2.sh [PID=$$] with the following runtime options:
[LOCAL_PATH=$LOCAL_PATH]
[S3_URI=$S3_URI]
[S3_BUCKET=$S3_BUCKET]
[CF_DISTRIBUTION_ID=$CF_DISTRIBUTION_ID]
[CF_INVALIDATION_PATHS=$CF_INVALIDATION_PATHS]
[DEBUG=$DEBUG]
[DFS=$DFS]
[DFS_LOCK_FILE=$DFS_LOCK_FILE]
[DFS_LOCK_TIMEOUT=$DFS_LOCK_TIMEOUT]
[DFS_LOCK_WAIT=$DFS_LOCK_WAIT]
[INIT_SYNC_DOWN=$INIT_SYNC_DOWN]
[INIT_SYNC_UP=$INIT_SYNC_UP]
[MAX_FAILURES=$MAX_FAILURES]
[MD5_NOT_PATH_OPT=$MD5_NOT_PATH_OPT]
[MD5_SKIP_PATH=$MD5_SKIP_PATH]
[POLL_INTERVAL=$POLL_INTERVAL]
[AWS_CLI_OPTIONS=$AWS_CLI_OPTIONS]
[AWS_CLI_CMD_SYNC_DOWN=$AWS_CLI_CMD_SYNC_DOWN]
[AWS_CLI_CMD_SYNC_UP=$AWS_CLI_CMD_SYNC_UP]" debug s3-sync2.sh $LINENO
# startup validation/initialization
startup
# trap SIGINT and SIGTERM (sets KILLED=1)
if [ "$POLL_INTERVAL" -gt 0 ]; then
trap cleanup SIGINT
trap cleanup SIGTERM
fi
# Initialization synchronizations
# Perform downilnk initializaiton if --init-sync-down set
if [ "$INIT_SYNC_DOWN" -eq 1 ] && [ "$ONLY_UP" -ne 1 ]; then
print_msg "Invoking downlink synchronization for --init-sync-down option" debug s3-sync2.sh $LINENO
if eval "$AWS_CLI_CMD_SYNC_DOWN"; then
print_msg "Downlink synchronization successful" debug s3-sync2.sh $LINENO
else
print_msg "Downlink synchronization failed" error s3-sync2.sh $LINENO
exit 1
fi
# Perform uplink initializaiton if --init-sync-up set
elif [ "$INIT_SYNC_UP" -eq 1 ] && [ "$ONLY_DOWN" -ne 1 ]; then
print_msg "Invoking uplink synchronization for --init-sync-up option" debug s3-sync2.sh $LINENO
if [ "$DFS" -ne 1 ] || eval s3_distributed_lock "$S3_BUCKET" "$DFS_LOCK_FILE" "$DFS_LOCK_TIMEOUT" "$DFS_LOCK_WAIT" "$DFS_UID"; then
if eval "$AWS_CLI_CMD_SYNC_UP"; then
print_msg "Uplink synchronization successful" debug s3-sync2.sh $LINENO
if [ "$DFS" -eq 1 ]; then
s3_distributed_unlock "$S3_BUCKET" "$DFS_LOCK_FILE" "$DFS_UID"
fi
else
print_msg "Uplink synchronization failed" error s3-sync2.sh $LINENO
exit 1
fi
else
print_msg "Uplink synchronization failed - unable to obtain DFS distributed lock" error s3_sync2.sh $LINENO
exit 1
fi
fi
# Use infinite loop to invoke synchronization every $POLL_INTERVAL seconds
interval=0
direction=
if [ "$ONLY_DOWN" -eq 1 ]; then
direction=down
elif [ "$ONLY_UP" -eq 1 ]; then
direction=up
fi
while :; do
interval=$(( interval + 1 ))
if [ "$POLL_INTERVAL" -eq 0 ]; then
print_msg "exiting due to --poll 0" debug s3-sync2.sh $LINENO
exit
elif [ "$KILLED" -eq 1 ]; then
if [ "$ONLY_DOWN" -ne 1 ]; then
print_msg "SIGINT or SIGTERM signal received - attempting 1 final uplink synchronization and exiting" warning s3-sync2.sh $LINENO
s3_sync2 up
fi
exit
elif ! s3_sync2 "$direction"; then
sync_failures=$(( sync_failures + 1 ))
print_msg "Synchronization failed [#$sync_failures of max $MAX_FAILURES]" error s3-sync2.sh $LINENO
if [ "$MAX_FAILURES" -gt 0 ] && [ "$sync_failures" -ge "$MAX_FAILURES" ]; then
print_msg "Max failures threshold $MAX_FAILURES reached - exiting" error s3-sync2.sh $LINENO
exit 1
fi
else
sync_failures=0
print_msg "Successfully invoked synchronization [#$interval] - sleeping $POLL_INTERVAL secs before next synchronization" debug s3-sync2.sh $LINENO
fi
sleep "$POLL_INTERVAL" &
wait
done