diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 00000000..e7413e0c --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,67 @@ +name: Deploy Templatized Data Environment + +on: + pull_request: + types: [closed] + branches: + - staging + - main + push: + branches: + - staging + - main + +jobs: + deploy_data_environment: + # Only run when PR is merged or on direct push to environment branches + if: github.event_name == 'push' || (github.event_name == 'pull_request' && github.event.pull_request.merged == true) + runs-on: ubuntu-latest + + env: + # Read connection secret + SNOWFLAKE_CONNECTIONS_ADVANCED_DATA_ENGINEERING_SNOWFLAKE_ACCOUNT: ${{ secrets.SNOWFLAKE_ACCOUNT }} + SNOWFLAKE_CONNECTIONS_ADVANCED_DATA_ENGINEERING_SNOWFLAKE_USER: ${{ secrets.SNOWFLAKE_USER }} + SNOWFLAKE_CONNECTIONS_ADVANCED_DATA_ENGINEERING_SNOWFLAKE_PASSWORD: ${{ secrets.SNOWFLAKE_PASSWORD }} + + steps: + # Checkout step is necessary to access repository files + - name: Checkout repository + uses: actions/checkout@v4 + + # Determine environment based on target branch + - name: Set environment + id: set_env + run: | + TARGET_BRANCH="${GITHUB_BASE_REF:-${GITHUB_REF#refs/heads/}}" + + if [[ $TARGET_BRANCH == "staging" ]]; then + echo "DEPLOY_ENV=staging" >> $GITHUB_ENV + echo "Environment set to staging" + elif [[ $TARGET_BRANCH == "main" ]]; then + echo "DEPLOY_ENV=prod" >> $GITHUB_ENV + echo "Environment set to production" + else + echo "Unexpected branch: $TARGET_BRANCH" + exit 1 + fi + + # Install Snowflake CLI GitHub Action and point to config file + - name: Install SnowflakeCLI + uses: snowflakedb/snowflake-cli-action@v1.5 + with: + cli-version: "latest" + default-config-file-path: "config.toml" + + # Fetch latest changes from the repository to Snowflake + - name: Fetch latest changes to Snowflake + run: snow git fetch course_repo.public.advanced_data_engineering_snowflake + + # Deploy templates to the data environment - + - name: Deploy templates to data environment + run: | + # Export TARGET_BRANCH for use in this step + echo "Using branch: ${GITHUB_REF_NAME}" + snow git execute @advanced_data_engineering_snowflake/branches/${GITHUB_REF_NAME}/module-1/hamburg_weather/pipeline/data/ \ + -D "env='${{ env.DEPLOY_ENV }}'" \ + --database=COURSE_REPO \ + --schema=PUBLIC diff --git a/README.md b/README.md index 8d03450e..c9cd9ea3 100644 --- a/README.md +++ b/README.md @@ -32,8 +32,71 @@ All of the code that you need to successfully complete the course is within this * **module-2** – Corresponds to "Module 2: Observability with Snowflake" in the course. -The course instructor will also be sure to reference the exact folder and name of the file to use throughout the course, so that you can follow along. -#### Reporting issues or errata -If you encounter technical issues with this code as you complete the course (i.e. typos, missing code, broken links, etc.), please report those issues in the course through Coursera. Ensure the issue contains sufficient detail so that it can be properly addressed. +### Module 1 - Steps followed + +```text + git remote set-url origin https://github.com/nilanka-weeraman/advanced-data-engineering-snowflake + +------------------------------------------------------ +-- correct the marketplace fetched data table names +------------------------------------------------------ +``` + +#### create git PAT secret, git api integrtion & setup repository on Snowflake IDE + +```text + +USE ROLE accountadmin; +CREATE DATABASE course_repo; +USE SCHEMA public; +-- Create credentials +CREATE OR REPLACE SECRET course_repo.public.github_pat + TYPE = password + USERNAME = 'nilanka-weeraman' + PASSWORD = 'xxxxx'; + +-- Create the API integration +CREATE OR REPLACE API INTEGRATION git_api_integration + API_PROVIDER = git_https_api + API_ALLOWED_PREFIXES = ('https://github.com/nilanka-weeraman') -- URL to your GitHub profile + ALLOWED_AUTHENTICATION_SECRETS = (github_pat) + ENABLED = TRUE; + +-- Create the git repository object +CREATE OR REPLACE GIT REPOSITORY course_repo.public.advanced_data_engineering_snowflake + API_INTEGRATION = git_api_integration -- Name of the API integration defined above + ORIGIN = 'https://github.com/nilanka-weeraman/advanced-data-engineering-snowflake.git' -- Insert URL of forked repo + GIT_CREDENTIALS = course_repo.public.github_pat; + +-- List the git repositories +SHOW GIT REPOSITORIES; +``` + +### create staging & prod database objects, tables, views, procs, streams using script in git + +```text + snow git fetch advanced_data_engineering_snowflake --database=course_repo --schema=Public + + + snow git execute @advanced_data_engineering_snowflake/branches/main/module-1/hamburg_weather/pipeline/objects/ -D "env='STAGING'" + --database=course_repo --schema=Public + snow git execute @advanced_data_engineering_snowflake/branches/main/module-1/hamburg_weather/pipeline/objects/ -D "env='PROD'" + --database=course_repo --schema=Public +``` + +#### fix the issue in the load_tasty_bytes.sql file for DDL command + +```text + git fetch + git switch fix-missing-data-2 + git branch + git status + git add -p + git commit -m "fix missing data" + git push origin fix-missing-data-2 +``` + +#### create a pull request in git from 'fix-missing-data-2' to 'statging' + diff --git a/module-1/hamburg_weather/pipeline/data/load_tasty_bytes.sql b/module-1/hamburg_weather/pipeline/data/load_tasty_bytes.sql index 4565f538..d71324db 100644 --- a/module-1/hamburg_weather/pipeline/data/load_tasty_bytes.sql +++ b/module-1/hamburg_weather/pipeline/data/load_tasty_bytes.sql @@ -57,14 +57,15 @@ raw zone table build -- country table build -- todo: complete table build -CREATE TABLE {{env}}_tasty_bytes.raw_pos.country +CREATE OR ALTER TABLE {{env}}_tasty_bytes.raw_pos.country ( country_id NUMBER(18,0), country VARCHAR(16777216), iso_currency VARCHAR(3), iso_country VARCHAR(2), city VARCHAR(16777216), - city_population VARCHAR(16777216) + city_population VARCHAR(16777216), + city_id NUMBER(19,0) ); @@ -293,17 +294,17 @@ USE WAREHOUSE demo_build_wh; -- country table load --- COPY INTO {{env}}_tasty_bytes.raw_pos.country --- ( --- country_id, --- country, --- iso_currency, --- iso_country, --- city_id, --- city, --- city_population --- ) --- FROM @{{env}}_tasty_bytes.public.s3load/raw_pos/country/; +COPY INTO {{env}}_tasty_bytes.raw_pos.country +( + country_id, + country, + iso_currency, + iso_country, + city_id, + city, + city_population +) +FROM @{{env}}_tasty_bytes.public.s3load/raw_pos/country/; -- franchise table load diff --git a/module-1/hamburg_weather/pipeline/objects/views/views.sql b/module-1/hamburg_weather/pipeline/objects/views/views.sql index a0f9d593..bcc90a99 100644 --- a/module-1/hamburg_weather/pipeline/objects/views/views.sql +++ b/module-1/hamburg_weather/pipeline/objects/views/views.sql @@ -7,8 +7,11 @@ SELECT TO_VARCHAR(hd.date_valid_std, 'YYYY-MM') AS yyyy_mm, pc.city_name AS city, c.country AS country_desc -FROM WEATHER_SOURCE_LLC_FROSTBYTE.onpoint_id.history_day hd -JOIN WEATHER_SOURCE_LLC_FROSTBYTE.onpoint_id.postal_codes pc +-- FROM WEATHER_SOURCE_LLC_FROSTBYTE.onpoint_id.history_day hd +-- JOIN WEATHER_SOURCE_LLC_FROSTBYTE.onpoint_id.postal_codes pc +FROM FROSTBYTE_WEATHERSOURCE.onpoint_id.history_day hd +JOIN FROSTBYTE_WEATHERSOURCE.onpoint_id.postal_codes pc + ON pc.postal_code = hd.postal_code AND pc.country = hd.country JOIN {{env}}_tasty_bytes.raw_pos.country c