From 667d26dc581c29f91b8c96bf7d8ad90590332315 Mon Sep 17 00:00:00 2001 From: Adithyrao1 Date: Wed, 10 Jun 2026 07:24:15 +0000 Subject: [PATCH 01/10] Initialising --- Untitled.ipynb | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 Untitled.ipynb diff --git a/Untitled.ipynb b/Untitled.ipynb new file mode 100644 index 00000000..f695ebe1 --- /dev/null +++ b/Untitled.ipynb @@ -0,0 +1,22 @@ +{ + "metadata": { + "kernelspec": { + "display_name": "Jupyter Notebook", + "name": "jupyter" + } + }, + "nbformat_minor": 5, + "nbformat": 4, + "cells": [ + { + "cell_type": "code", + "id": "29819d68-d8a2-4f82-b667-e7fbd790f073", + "metadata": { + "language": "python" + }, + "source": "", + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file From 71397988a71c40177f3bff885aea05e693380c2a Mon Sep 17 00:00:00 2001 From: Adithyrao1 Date: Wed, 10 Jun 2026 10:00:22 +0000 Subject: [PATCH 02/10] Implemented missing data fix --- .../hamburg_weather/notebooks/debugging.ipynb | 123 +++++++++++------- 1 file changed, 75 insertions(+), 48 deletions(-) diff --git a/module-1/hamburg_weather/notebooks/debugging.ipynb b/module-1/hamburg_weather/notebooks/debugging.ipynb index 0605b659..f85b332e 100644 --- a/module-1/hamburg_weather/notebooks/debugging.ipynb +++ b/module-1/hamburg_weather/notebooks/debugging.ipynb @@ -1,50 +1,77 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "3775908f-ca36-4846-8f38-5adca39217f2", - "metadata": { - "language": "sql", - "name": "cell1" - }, - "outputs": [], - "source": [ - "-- Set context\n", - "USE ROLE accountadmin;\n", - "USE DATABASE staging_tasty_bytes;\n", - "USE SCHEMA raw_pos;\n", - "USE WAREHOUSE compute_wh;" - ] + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "3775908f-ca36-4846-8f38-5adca39217f2", + "metadata": { + "language": "sql", + "name": "cell1", + "resultVariableName": "dataframe_1" + }, + "outputs": [], + "source": "%%sql -r dataframe_1\n-- Set context\nUSE ROLE accountadmin;\nUSE DATABASE staging_tasty_bytes;\nUSE SCHEMA raw_pos;\nUSE WAREHOUSE compute_wh;" + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dfd78d46-46f3-40a7-be43-4ab79cf72a9f", + "metadata": { + "language": "sql", + "name": "cell2", + "resultVariableName": "dataframe_2" + }, + "outputs": [], + "source": "%%sql -r dataframe_2\nSHOW TABLES IN staging_tasty_bytes.raw_pos;" + }, + { + "id": "bd4d16a5-b00c-4f2e-ac19-b3cc64dc0c98", + "cell_type": "code", + "metadata": { + "resultVariableName": "dataframe_4", + "language": "sql" + }, + "source": "CREATE OR ALTER TABLE STAGING_tasty_bytes.raw_pos.country\n(\n country_id NUMBER(18,0),\n country VARCHAR(16777216),\n iso_currency VARCHAR(3),\n iso_country VARCHAR(2),\n city VARCHAR(16777216),\n city_population VARCHAR(16777216),\n city_id NUMBER(19,0)\n);", + "outputs": [], + "execution_count": null + }, + { + "id": "4b6c9d8d-6981-41df-870f-b493aa90403f", + "cell_type": "code", + "metadata": { + "resultVariableName": "dataframe_3", + "language": "sql" + }, + "source": "-- country table load\nCOPY INTO STAGING_tasty_bytes.raw_pos.country\n(\n country_id,\n country,\n iso_currency,\n iso_country,\n city_id,\n city,\n city_population\n)\nFROM @STAGING_tasty_bytes.public.s3load/raw_pos/country/;", + "outputs": [], + "execution_count": null + }, + { + "id": "d8221689-4b7f-42b1-b9f2-667d6426e234", + "cell_type": "code", + "metadata": { + "resultVariableName": "dataframe_5", + "language": "sql" + }, + "source": "%%sql -r dataframe_5\n", + "outputs": [], + "execution_count": null + } + ], + "metadata": { + "kernelspec": { + "display_name": "Streamlit Notebook", + "name": "streamlit" + }, + "lastEditStatus": { + "authorEmail": "gilberto.hernandez@snowflake.com", + "authorId": "6002244633419", + "authorName": "GILBERTO", + "lastEditTime": 1741667434593, + "notebookId": "sd4b3qffhw3yzmzwpcr2", + "sessionId": "2ad2d56d-f367-4eba-abad-28f4dc5b5f95" + } }, - { - "cell_type": "code", - "execution_count": null, - "id": "dfd78d46-46f3-40a7-be43-4ab79cf72a9f", - "metadata": { - "language": "sql", - "name": "cell2" - }, - "outputs": [], - "source": [ - "SHOW TABLES IN staging_tasty_bytes.raw_pos;" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Streamlit Notebook", - "name": "streamlit" - }, - "lastEditStatus": { - "authorEmail": "gilberto.hernandez@snowflake.com", - "authorId": "6002244633419", - "authorName": "GILBERTO", - "lastEditTime": 1741667434593, - "notebookId": "sd4b3qffhw3yzmzwpcr2", - "sessionId": "2ad2d56d-f367-4eba-abad-28f4dc5b5f95" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file From c996aa7ca4410a95a0846fab549968a5dbb95c9c Mon Sep 17 00:00:00 2001 From: Adithya Nanuvala Date: Wed, 10 Jun 2026 15:51:49 +0530 Subject: [PATCH 03/10] made changes missing data --- .../pipeline/data/load_tasty_bytes.sql | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/module-1/hamburg_weather/pipeline/data/load_tasty_bytes.sql b/module-1/hamburg_weather/pipeline/data/load_tasty_bytes.sql index 4565f538..a8c8dbc0 100644 --- a/module-1/hamburg_weather/pipeline/data/load_tasty_bytes.sql +++ b/module-1/hamburg_weather/pipeline/data/load_tasty_bytes.sql @@ -64,7 +64,8 @@ CREATE TABLE {{env}}_tasty_bytes.raw_pos.country iso_currency VARCHAR(3), iso_country VARCHAR(2), city VARCHAR(16777216), - city_population VARCHAR(16777216) + city_population VARCHAR(16777216), + city_id NUMBER(19,0) ); @@ -293,17 +294,17 @@ USE WAREHOUSE demo_build_wh; -- country table load --- COPY INTO {{env}}_tasty_bytes.raw_pos.country --- ( --- country_id, --- country, --- iso_currency, --- iso_country, --- city_id, --- city, --- city_population --- ) --- FROM @{{env}}_tasty_bytes.public.s3load/raw_pos/country/; +COPY INTO {{env}}_tasty_bytes.raw_pos.country +( + country_id, + country, + iso_currency, + iso_country, + city_id, + city, + city_population +) +FROM @{{env}}_tasty_bytes.public.s3load/raw_pos/country/; -- franchise table load From 3b14dadc85f784f85b3ab0ac88afcc709a2a2a69 Mon Sep 17 00:00:00 2001 From: Adithyrao1 <121217911+Adithyrao1@users.noreply.github.com> Date: Thu, 11 Jun 2026 10:25:59 +0530 Subject: [PATCH 04/10] Add GitHub Actions workflow for data environment deployment --- .github/workflows/main.yml | 67 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 .github/workflows/main.yml diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 00000000..c2827b0b --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,67 @@ +name: Deploy Templatized Data Environment + +on: + pull_request: + types: [closed] + branches: + - staging + - main + push: + branches: + - staging + - main + +jobs: + deploy_data_environment: + # Only run when PR is merged or on direct push to environment branches + if: github.event_name == 'push' || (github.event_name == 'pull_request' && github.event.pull_request.merged == true) + runs-on: ubuntu-latest + + env: + # Read connection secret + SNOWFLAKE_CONNECTIONS_ADVANCED_DATA_ENGINEERING_SNOWFLAKE_ACCOUNT: ${{ secrets.SNOWFLAKE_ACCOUNT }} + SNOWFLAKE_CONNECTIONS_ADVANCED_DATA_ENGINEERING_SNOWFLAKE_USER: ${{ secrets.SNOWFLAKE_USER }} + SNOWFLAKE_CONNECTIONS_ADVANCED_DATA_ENGINEERING_SNOWFLAKE_PASSWORD: ${{ secrets.SNOWFLAKE_PASSWORD }} + + steps: + # Checkout step is necessary to access repository files + - name: Checkout repository + uses: actions/checkout@v4 + + # Determine environment based on target branch + - name: Set environment + id: set_env + run: | + TARGET_BRANCH="${GITHUB_BASE_REF:-${GITHUB_REF#refs/heads/}}" + + if [[ $TARGET_BRANCH == "staging" ]]; then + echo "DEPLOY_ENV=staging" >> $GITHUB_ENV + echo "Environment set to staging" + elif [[ $TARGET_BRANCH == "main" ]]; then + echo "DEPLOY_ENV=prod" >> $GITHUB_ENV + echo "Environment set to production" + else + echo "Unexpected branch: $TARGET_BRANCH" + exit 1 + fi + + # Install Snowflake CLI GitHub Action and point to config file + - name: Install SnowflakeCLI + uses: snowflakedb/snowflake-cli-action@v1.5 + with: + cli-version: "latest" + default-config-file-path: "config.toml" + + # Fetch latest changes from the repository to Snowflake + - name: Fetch latest changes to Snowflake + run: snow git fetch course_repo.public.advanced_data_engineering_snowflake + + # Deploy templates to the data environment - + - name: Deploy templates to data environment + run: | + # Export TARGET_BRANCH for use in this step + echo "Using branch: ${GITHUB_REF_NAME}" + snow git execute @advanced_data_engineering_snowflake/branches/{GITHUB_REF_NAME}/module-1/hamburg_weather/pipeline/data/ \ + -D "env='${{ ENV.DEPLOY_ENV }}'" \ + --database=COURSE_REPO \ + --schema=PUBLIC From a4eddc0dd7dc27dafa7208478a7b8469de8f0ff7 Mon Sep 17 00:00:00 2001 From: Adithyrao1 <121217911+Adithyrao1@users.noreply.github.com> Date: Thu, 11 Jun 2026 10:55:06 +0530 Subject: [PATCH 05/10] Refactor GitHub Actions workflow for Snowflake Updated Snowflake CLI action version and improved comments. --- .github/workflows/main.yml | 49 ++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c2827b0b..2ee86135 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -18,50 +18,57 @@ jobs: runs-on: ubuntu-latest env: - # Read connection secret + # Snowflake connection secrets SNOWFLAKE_CONNECTIONS_ADVANCED_DATA_ENGINEERING_SNOWFLAKE_ACCOUNT: ${{ secrets.SNOWFLAKE_ACCOUNT }} SNOWFLAKE_CONNECTIONS_ADVANCED_DATA_ENGINEERING_SNOWFLAKE_USER: ${{ secrets.SNOWFLAKE_USER }} SNOWFLAKE_CONNECTIONS_ADVANCED_DATA_ENGINEERING_SNOWFLAKE_PASSWORD: ${{ secrets.SNOWFLAKE_PASSWORD }} steps: - # Checkout step is necessary to access repository files + # Checkout repository - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 - # Determine environment based on target branch + # Determine deployment environment based on target branch - name: Set environment id: set_env + shell: bash run: | TARGET_BRANCH="${GITHUB_BASE_REF:-${GITHUB_REF#refs/heads/}}" - - if [[ $TARGET_BRANCH == "staging" ]]; then + + if [[ "$TARGET_BRANCH" == "staging" ]]; then echo "DEPLOY_ENV=staging" >> $GITHUB_ENV echo "Environment set to staging" - elif [[ $TARGET_BRANCH == "main" ]]; then + + elif [[ "$TARGET_BRANCH" == "main" ]]; then echo "DEPLOY_ENV=prod" >> $GITHUB_ENV echo "Environment set to production" + else echo "Unexpected branch: $TARGET_BRANCH" exit 1 fi - # Install Snowflake CLI GitHub Action and point to config file - - name: Install SnowflakeCLI - uses: snowflakedb/snowflake-cli-action@v1.5 + # Install Snowflake CLI + - name: Install Snowflake CLI + uses: snowflakedb/snowflake-cli-action@v1 with: - cli-version: "latest" - default-config-file-path: "config.toml" + cli-version: latest + default-config-file-path: config.toml - # Fetch latest changes from the repository to Snowflake + # Fetch latest repository changes into Snowflake - name: Fetch latest changes to Snowflake - run: snow git fetch course_repo.public.advanced_data_engineering_snowflake + run: | + snow git fetch course_repo.public.advanced_data_engineering_snowflake - # Deploy templates to the data environment - + # Execute SQL templates in the target environment - name: Deploy templates to data environment + shell: bash run: | - # Export TARGET_BRANCH for use in this step - echo "Using branch: ${GITHUB_REF_NAME}" - snow git execute @advanced_data_engineering_snowflake/branches/{GITHUB_REF_NAME}/module-1/hamburg_weather/pipeline/data/ \ - -D "env='${{ ENV.DEPLOY_ENV }}'" \ - --database=COURSE_REPO \ - --schema=PUBLIC + echo "Using Git branch: ${GITHUB_REF_NAME}" + echo "Deploying to environment: ${DEPLOY_ENV}" + + snow git execute \ + @advanced_data_engineering_snowflake/branches/${GITHUB_REF_NAME}/module-1/hamburg_weather/pipeline/data/ \ + -D "env='${DEPLOY_ENV}'" \ + --database COURSE_REPO \ + --schema PUBLIC From 5edda803b7c79b9c77dcff2bf7cc0d674038cf80 Mon Sep 17 00:00:00 2001 From: Adithyrao1 <121217911+Adithyrao1@users.noreply.github.com> Date: Thu, 11 Jun 2026 11:17:40 +0530 Subject: [PATCH 06/10] Upgrade Snowflake CLI action to version 2 --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 2ee86135..97ee38e7 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -50,7 +50,7 @@ jobs: # Install Snowflake CLI - name: Install Snowflake CLI - uses: snowflakedb/snowflake-cli-action@v1 + uses: snowflakedb/snowflake-cli-action@v2 with: cli-version: latest default-config-file-path: config.toml From 4894e31df14a9c56d848f0ba27793fb23543a4ac Mon Sep 17 00:00:00 2001 From: Adithyrao1 <121217911+Adithyrao1@users.noreply.github.com> Date: Thu, 11 Jun 2026 11:18:24 +0530 Subject: [PATCH 07/10] Update main.yml From 1056987fa4d28e061667c9a32d318ad0e179c24d Mon Sep 17 00:00:00 2001 From: Adithyrao1 <121217911+Adithyrao1@users.noreply.github.com> Date: Thu, 11 Jun 2026 11:32:36 +0530 Subject: [PATCH 08/10] Update GitHub Actions workflow for Snowflake deployment --- .github/workflows/main.yml | 47 ++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 97ee38e7..fb4cc62d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -2,10 +2,12 @@ name: Deploy Templatized Data Environment on: pull_request: - types: [closed] + types: + - closed branches: - staging - main + push: branches: - staging @@ -13,12 +15,15 @@ on: jobs: deploy_data_environment: - # Only run when PR is merged or on direct push to environment branches + # Run only for direct pushes or merged PRs if: github.event_name == 'push' || (github.event_name == 'pull_request' && github.event.pull_request.merged == true) runs-on: ubuntu-latest env: - # Snowflake connection secrets + # Tell Snowflake CLI where config.toml exists + SNOWFLAKE_HOME: ${{ github.workspace }} + + # Snowflake connection credentials from GitHub secrets SNOWFLAKE_CONNECTIONS_ADVANCED_DATA_ENGINEERING_SNOWFLAKE_ACCOUNT: ${{ secrets.SNOWFLAKE_ACCOUNT }} SNOWFLAKE_CONNECTIONS_ADVANCED_DATA_ENGINEERING_SNOWFLAKE_USER: ${{ secrets.SNOWFLAKE_USER }} SNOWFLAKE_CONNECTIONS_ADVANCED_DATA_ENGINEERING_SNOWFLAKE_PASSWORD: ${{ secrets.SNOWFLAKE_PASSWORD }} @@ -28,9 +33,21 @@ jobs: - name: Checkout repository uses: actions/checkout@v5 - # Determine deployment environment based on target branch - - name: Set environment - id: set_env + # Setup Python for Snowflake CLI installation + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + # Install Snowflake CLI + - name: Install Snowflake CLI + run: | + python -m pip install --upgrade pip + pip install snowflake-cli + snow --version + + # Determine deployment environment + - name: Set deployment environment shell: bash run: | TARGET_BRANCH="${GITHUB_BASE_REF:-${GITHUB_REF#refs/heads/}}" @@ -48,24 +65,24 @@ jobs: exit 1 fi - # Install Snowflake CLI - - name: Install Snowflake CLI - uses: snowflakedb/snowflake-cli-action@v2 - with: - cli-version: latest - default-config-file-path: config.toml + # Verify that Snowflake CLI can see the config file + - name: Verify Snowflake configuration + run: | + echo "SNOWFLAKE_HOME=${SNOWFLAKE_HOME}" + ls -la ${SNOWFLAKE_HOME} + cat ${SNOWFLAKE_HOME}/config.toml | head -20 - # Fetch latest repository changes into Snowflake + # Fetch latest Git repository changes into Snowflake - name: Fetch latest changes to Snowflake run: | snow git fetch course_repo.public.advanced_data_engineering_snowflake - # Execute SQL templates in the target environment + # Execute the templated SQL files - name: Deploy templates to data environment shell: bash run: | echo "Using Git branch: ${GITHUB_REF_NAME}" - echo "Deploying to environment: ${DEPLOY_ENV}" + echo "Deploying environment: ${DEPLOY_ENV}" snow git execute \ @advanced_data_engineering_snowflake/branches/${GITHUB_REF_NAME}/module-1/hamburg_weather/pipeline/data/ \ From 625bd21acd9dcb27e170c0a866f1aefd53ba1958 Mon Sep 17 00:00:00 2001 From: Adithyrao1 <121217911+Adithyrao1@users.noreply.github.com> Date: Thu, 11 Jun 2026 11:40:47 +0530 Subject: [PATCH 09/10] Fix permissions for Snowflake config.toml Added step to fix permissions for Snowflake config.toml file. --- .github/workflows/main.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index fb4cc62d..a2ebc266 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -72,6 +72,11 @@ jobs: ls -la ${SNOWFLAKE_HOME} cat ${SNOWFLAKE_HOME}/config.toml | head -20 + # Fix config.toml permissions + - name: Fix Snowflake config permissions + run: | + chmod 0600 ${SNOWFLAKE_HOME}/config.toml + # Fetch latest Git repository changes into Snowflake - name: Fetch latest changes to Snowflake run: | From efd82a54a0d956a7f4ebd6da82c9fa957476394f Mon Sep 17 00:00:00 2001 From: Adithyrao1 <121217911+Adithyrao1@users.noreply.github.com> Date: Thu, 11 Jun 2026 11:42:59 +0530 Subject: [PATCH 10/10] Update main.yml