Skip to content

Commit e2a0b62

Browse files
authored
Implement automated ingestion with dual-path routing and multi-service CI/CD
2 parents e2b61cb + 82a255d commit e2a0b62

File tree

18 files changed

+515
-47
lines changed

18 files changed

+515
-47
lines changed

.dockerignore

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,26 @@
1-
.git
2-
.github
3-
.vscode
1+
.git/
2+
.gitignore
3+
.gitattributes
4+
.github/
45

5-
runtime
6-
__pycache__
7-
*.pyc
8-
*.pyo
9-
*.pyd
6+
.venv/
7+
venv/
8+
env/
109

11-
.pytest_cache
12-
.ruff_cache
10+
__pycache__/
11+
*.py[cod]
12+
*$py.class
13+
.pytest_cache/
14+
.coverage
15+
.coveragerc
1316

14-
tests
15-
docs
17+
.vscode/
18+
.idea/
19+
pyrightconfig.json
1620

17-
data/published
18-
runtime
19-
data/run_artifact
20-
data/raw
21+
data/
22+
assets/
23+
docs/
24+
tests/
25+
26+
dev-requirements.txt

.github/workflows/cd-extract.yml

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
name: Deploy Data Extract Job
2+
3+
# Only run when merged changes happen in data_extract folder
4+
on:
5+
push:
6+
branches:
7+
- main
8+
paths:
9+
- 'data_extract/**'
10+
11+
permissions:
12+
contents: 'read'
13+
id-token: 'write'
14+
15+
# Service configurations in GCP
16+
env:
17+
REGION: us-east1
18+
REPO_NAME: operations-artifacts
19+
IMAGE_NAME: extractor
20+
JOB_NAME: drive-extractor
21+
22+
jobs:
23+
build-and-deploy:
24+
runs-on: ubuntu-latest
25+
steps:
26+
- name: Checkout Code
27+
uses: actions/checkout@v4
28+
29+
# Handshake to GCP
30+
- name: Authenticate to Google Cloud
31+
id: auth
32+
uses: google-github-actions/auth@v2
33+
with:
34+
workload_identity_provider: ${{ secrets.WIF_PROVIDER }}
35+
service_account: ${{ secrets.DEPLOYER_SA_EMAIL }}
36+
37+
- name: Set up Cloud SDK
38+
uses: google-github-actions/setup-gcloud@v2
39+
40+
# Handshake to Artifact registry
41+
- name: Configure Docker Auth
42+
run: gcloud auth configure-docker ${{ env.REGION }}-docker.pkg.dev --quiet
43+
44+
45+
- name: Build and Push Docker Image
46+
run: |
47+
# Full image path using the Git Commit Hash (github.sha)
48+
IMAGE_PATH="${{ env.REGION }}-docker.pkg.dev/${{ secrets.GCP_PROJECT_ID }}/${{ env.REPO_NAME }}/${{ env.IMAGE_NAME }}:${{ github.sha }}"
49+
50+
# Build from root to ensure COPY commands find the files
51+
docker build -f data_extract/Dockerfile -t $IMAGE_PATH .
52+
53+
# Push to Artifact Registry
54+
docker push $IMAGE_PATH
55+
56+
57+
- name: Deploy to Cloud Run Job
58+
run: |
59+
IMAGE_PATH="${{ env.REGION }}-docker.pkg.dev/${{ secrets.GCP_PROJECT_ID }}/${{ env.REPO_NAME }}/${{ env.IMAGE_NAME }}:${{ github.sha }}"
60+
61+
gcloud run jobs deploy ${{ env.JOB_NAME }} \
62+
--image $IMAGE_PATH \
63+
--region ${{ env.REGION }} \
64+
--service-account ${{ secrets.EXTRACTOR_SA_EMAIL }}

.github/workflows/cd-pipeline.yml

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
name: Deploy Data Pipeline Job
2+
3+
# Only run when merged changes happen in data_pipeline folder
4+
on:
5+
push:
6+
branches:
7+
- main
8+
paths:
9+
- 'data_pipeline/**'
10+
11+
permissions:
12+
contents: 'read'
13+
id-token: 'write'
14+
15+
# Service configurations in GCP
16+
env:
17+
REGION: us-east1
18+
REPO_NAME: operations-artifacts
19+
IMAGE_NAME: pipeline
20+
JOB_NAME: operations-pipeline
21+
22+
jobs:
23+
build-and-deploy:
24+
runs-on: ubuntu-latest
25+
steps:
26+
- name: Checkout Code
27+
uses: actions/checkout@v4
28+
29+
# Handshake to GCP
30+
- name: Authenticate to Google Cloud
31+
id: auth
32+
uses: google-github-actions/auth@v2
33+
with:
34+
workload_identity_provider: ${{ secrets.WIF_PROVIDER }}
35+
service_account: ${{ secrets.DEPLOYER_SA_EMAIL }}
36+
37+
- name: Set up Cloud SDK
38+
uses: google-github-actions/setup-gcloud@v2
39+
40+
# Handshake to Artifact registry
41+
- name: Configure Docker Auth
42+
run: gcloud auth configure-docker ${{ env.REGION }}-docker.pkg.dev --quiet
43+
44+
45+
- name: Build and Push Docker Image
46+
run: |
47+
# Full image path using the Git Commit Hash (github.sha)
48+
IMAGE_PATH="${{ env.REGION }}-docker.pkg.dev/${{ secrets.GCP_PROJECT_ID }}/${{ env.REPO_NAME }}/${{ env.IMAGE_NAME }}:${{ github.sha }}"
49+
50+
# Build from root to ensure COPY commands find the files
51+
docker build -f data_pipeline/Dockerfile -t $IMAGE_PATH .
52+
53+
# Push to Artifact Registry
54+
docker push $IMAGE_PATH
55+
56+
57+
- name: Deploy to Cloud Run Job
58+
run: |
59+
IMAGE_PATH="${{ env.REGION }}-docker.pkg.dev/${{ secrets.GCP_PROJECT_ID }}/${{ env.REPO_NAME }}/${{ env.IMAGE_NAME }}:${{ github.sha }}"
60+
61+
gcloud run jobs deploy ${{ env.JOB_NAME }} \
62+
--image $IMAGE_PATH \
63+
--region ${{ env.REGION }} \
64+
--service-account ${{ secrets.PIPELINE_SA_EMAIL }}

.github/workflows/ci.yml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: CI
1+
name: CI - Code Quality & Tests
22

33
on:
44
pull_request:
@@ -19,21 +19,21 @@ jobs:
1919
with:
2020
python-version: "3.11"
2121

22-
- name: Install dependencies
22+
- name: Install Dependencies
2323
run: |
2424
python -m pip install --upgrade pip
25-
pip install -r requirements.txt
25+
pip install -r dev-requirements.txt
2626
2727
- name: Set PYTHONPATH
2828
run: echo "PYTHONPATH=$PWD" >> $GITHUB_ENV
2929

30-
- name: Check formatting
30+
- name: Check Code Formatting
3131
run: black --check .
3232

33-
- name: Lint with ruff
33+
- name: Ruff Linting
3434
run: ruff check .
3535

36-
- name: Run tests with coverage enforcement
36+
- name: Run Tests with Coverage
3737
run: |
3838
pytest \
3939
--cov=data_pipeline \

.vscode/settings.json

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,6 @@
44
],
55
"python.testing.unittestEnabled": false,
66
"python.testing.pytestEnabled": true,
7-
"python-envs.pythonProjects": [
8-
{
9-
"path": "",
10-
"envManager": "ms-python.python:conda",
11-
"packageManager": "ms-python.python:conda"
12-
}
13-
]
7+
"python-envs.defaultEnvManager": "ms-python.python:conda",
8+
"python-envs.defaultPackageManager": "ms-python.python:conda"
149
}

Dockerfile

Lines changed: 0 additions & 11 deletions
This file was deleted.
-110 KB
Loading
1.35 KB
Loading

data_extract/Dockerfile

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
FROM python:3.11-slim
2+
3+
ENV PYTHONDONTWRITEBYTECODE=1
4+
ENV PYTHONUNBUFFERED=1
5+
6+
WORKDIR /app
7+
8+
COPY data_extract/requirements.txt .
9+
10+
RUN pip install --no-cache-dir -r requirements.txt
11+
12+
COPY data_extract/ ./data_extract/
13+
14+
CMD ["python", "-m", "data_extract.run_extract"]

data_extract/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)