-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathdata_preprocessing_gcp_sa.yaml
More file actions
33 lines (25 loc) · 1 KB
/
data_preprocessing_gcp_sa.yaml
File metadata and controls
33 lines (25 loc) · 1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
resources:
cpus: 1+
envs:
DATA_BUCKET_NAME: sky-demo-data-test
DATA_BUCKET_STORE_TYPE: s3
GCP_SERVICE_ACCOUNT_JSON_PATH: null
file_mounts:
/data:
name: $DATA_BUCKET_NAME
store: $DATA_BUCKET_STORE_TYPE
/tmp/gcp-service-account.json: $GCP_SERVICE_ACCOUNT_JSON_PATH
setup: |
echo "Setting up dependencies for data preprocessing..."
curl -O https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-cli-linux-x86_64.tar.gz
tar -xf google-cloud-cli-linux-x86_64.tar.gz
./google-cloud-sdk/install.sh --quiet --path-update true
source ~/.bashrc
gcloud auth activate-service-account --key-file=/tmp/gcp-service-account.json
run: |
echo "Running data preprocessing on behalf of $(gcloud auth list --filter=status:ACTIVE --format="value(account)")..."
# Generate few files with random data to simulate data preprocessing
for i in {0..9}; do
dd if=/dev/urandom of=/data/file_$i bs=1M count=10
done
echo "Data preprocessing completed, wrote to $DATA_BUCKET_NAME"