Skip to content

Commit f9ced38

Browse files
committed
update starter notebook and 02 notebook
1 parent e88149e commit f9ced38

File tree

2 files changed

+112
-50
lines changed

2 files changed

+112
-50
lines changed

FRED_0_START.ipynb

Lines changed: 105 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,14 @@
1818
"metadata": {
1919
"collapsed": false,
2020
"language": "sql",
21-
"name": "cell5",
22-
"vscode": {
23-
"languageId": "sql"
24-
}
21+
"name": "cell5"
2522
},
2623
"outputs": [],
2724
"source": [
2825
"SET MY_USER = CURRENT_USER();\n",
2926
"\n",
30-
"SET GITHUB_SECRET_USERNAME = 'yohanmarkose';\n",
27+
"-- Check on this \n",
28+
"SET GITHUB_SECRET_USERNAME = '';\n",
3129
"SET GITHUB_SECRET_PASSWORD = '';\n",
3230
"SET GITHUB_URL_PREFIX = 'https://github.com/BigDataIA-Spring2025-4';\n",
3331
"SET GITHUB_REPO_ORIGIN = 'https://github.com/BigDataIA-Spring2025-4/DAMG7245_Assignment03_Part02.git';"
@@ -40,10 +38,7 @@
4038
"metadata": {
4139
"collapsed": false,
4240
"language": "sql",
43-
"name": "cell6",
44-
"vscode": {
45-
"languageId": "sql"
46-
}
41+
"name": "cell6"
4742
},
4843
"outputs": [],
4944
"source": [
@@ -55,7 +50,7 @@
5550
"GRANT ROLE FRED_ROLE TO ROLE SYSADMIN;\n",
5651
"GRANT ROLE FRED_ROLE TO USER IDENTIFIER($MY_USER);\n",
5752
"\n",
58-
"-- GRANT CREATE INTEGRATION ON ACCOUNT TO ROLE FRED_ROLE;\n",
53+
"\n",
5954
"GRANT CREATE INTEGRATION ON ACCOUNT TO ROLE FRED_ROLE;\n",
6055
"GRANT EXECUTE TASK ON ACCOUNT TO ROLE FRED_ROLE;\n",
6156
"GRANT EXECUTE MANAGED TASK ON ACCOUNT TO ROLE FRED_ROLE;\n",
@@ -68,7 +63,11 @@
6863
"\n",
6964
"-- Warehouses\n",
7065
"CREATE OR REPLACE WAREHOUSE FRED_WH WAREHOUSE_SIZE = XSMALL, AUTO_SUSPEND = 300, AUTO_RESUME= TRUE;\n",
71-
"GRANT OWNERSHIP ON WAREHOUSE FRED_WH TO ROLE FRED_ROLE;"
66+
"GRANT OWNERSHIP ON WAREHOUSE FRED_WH TO ROLE FRED_ROLE;\n",
67+
"\n",
68+
"USE ROLE FRED_ROLE;\n",
69+
"USE WAREHOUSE FRED_WH;\n",
70+
"USE DATABASE FRED_DB;"
7271
]
7372
},
7473
{
@@ -77,27 +76,23 @@
7776
"id": "5aafe8cf-869d-457c-bb98-750af5ed18ff",
7877
"metadata": {
7978
"language": "sql",
80-
"name": "cell7",
81-
"vscode": {
82-
"languageId": "sql"
83-
}
79+
"name": "cell7"
8480
},
8581
"outputs": [],
8682
"source": [
8783
"-- ----------------------------------------------------------------------------\n",
8884
"-- Create the database level objects\n",
8985
"-- ----------------------------------------------------------------------------\n",
90-
"USE ROLE FRED_ROLE;\n",
91-
"USE WAREHOUSE FRED_WH;\n",
92-
"USE DATABASE FRED_DB;\n",
86+
"\n",
9387
"\n",
9488
"-- Schemas\n",
95-
"CREATE OR REPLACE SCHEMA INTEGRATIONS;\n",
96-
"CREATE OR REPLACE SCHEMA DEV_SCHEMA;\n",
97-
"CREATE OR REPLACE SCHEMA PROD_SCHEMA;\n",
98-
"CREATE OR REPLACE SCHEMA RAW_FRED;\n",
99-
"CREATE OR REPLACE SCHEMA HARMONIZED;\n",
100-
"CREATE OR REPLACE SCHEMA ANALYTICS;\n",
89+
"CREATE OR REPLACE SCHEMA INTEGRATIONS;;\n",
90+
"CREATE OR REPLACE SCHEMA DEV_RAW_FRED;\n",
91+
"CREATE OR REPLACE SCHEMA DEV_HARMONIZED;\n",
92+
"CREATE OR REPLACE SCHEMA DEV_ANALYTICS;\n",
93+
"CREATE OR REPLACE SCHEMA PROD_RAW_FRED;\n",
94+
"CREATE OR REPLACE SCHEMA PROD_HARMONIZED;\n",
95+
"CREATE OR REPLACE SCHEMA PROD_ANALYTICS;\n",
10196
"\n",
10297
"USE SCHEMA INTEGRATIONS;\n",
10398
"\n",
@@ -134,10 +129,7 @@
134129
"id": "951a7222-11dd-40d9-9dca-6e3b2c4c44be",
135130
"metadata": {
136131
"language": "sql",
137-
"name": "cell8",
138-
"vscode": {
139-
"languageId": "sql"
140-
}
132+
"name": "cell8"
141133
},
142134
"outputs": [],
143135
"source": [
@@ -156,11 +148,9 @@
156148
"execution_count": null,
157149
"id": "013fa734-d447-492c-87d5-ed37406162c0",
158150
"metadata": {
151+
"collapsed": false,
159152
"language": "sql",
160-
"name": "cell9",
161-
"vscode": {
162-
"languageId": "sql"
163-
}
153+
"name": "cell9"
164154
},
165155
"outputs": [],
166156
"source": [
@@ -169,27 +159,100 @@
169159
"USE SCHEMA FRED_DB.INTEGRATIONS;\n",
170160
"\n",
171161
"EXECUTE IMMEDIATE FROM @FRED_GIT_REPO/branches/main/scripts/deploy_notebooks.sql\n",
172-
" USING (env => 'DEV', branch => 'dev');"
162+
" USING (env => 'DEV', schema1 => 'RAW_FRED', schema2 => 'HARMONIZED', schema3 => 'ANALYTICS', branch => 'main');"
163+
]
164+
},
165+
{
166+
"cell_type": "code",
167+
"execution_count": null,
168+
"id": "b38e7cc2-c7da-4c30-9403-33b4ba6491f3",
169+
"metadata": {
170+
"collapsed": false,
171+
"language": "sql",
172+
"name": "cell3"
173+
},
174+
"outputs": [],
175+
"source": [
176+
"EXECUTE NOTEBOOK FRED_DB.DEV_ANALYTICS.\"DEV_03_analytics_table_processing\"()"
177+
]
178+
},
179+
{
180+
"cell_type": "code",
181+
"execution_count": null,
182+
"id": "e1b75120-92d4-4b16-9c70-edab9a798eaf",
183+
"metadata": {
184+
"language": "python",
185+
"name": "cell2"
186+
},
187+
"outputs": [],
188+
"source": [
189+
"# To call the sproc\n",
190+
"# from snowflake.snowpark.context import get_active_session\n",
191+
"# session = get_active_session()\n",
192+
"\n",
193+
"# session.use_schema(\"FRED_DB.DEV_ANALYTICS\")\n",
194+
"# # env = schema_name[:3]\n",
195+
"# session.sql(f\"CALL merge_fred_updates_sp('FRED_DB', 'DEV_ANALYTICS', 'DEV')\").collect()"
196+
]
197+
},
198+
{
199+
"cell_type": "code",
200+
"execution_count": null,
201+
"id": "3faa73a1-e3c2-4300-8321-a3b143de1966",
202+
"metadata": {
203+
"language": "python",
204+
"name": "cell10"
205+
},
206+
"outputs": [],
207+
"source": [
208+
"# session.sql(f\"CALL create_analytical_tables_sp('DEV_ANALYTICS', 'FRED_10Y_2Y')\").collect()"
209+
]
210+
},
211+
{
212+
"cell_type": "code",
213+
"execution_count": null,
214+
"id": "395bc402-be4a-4211-8e8b-c537212d72f6",
215+
"metadata": {
216+
"language": "sql",
217+
"name": "cell11"
218+
},
219+
"outputs": [],
220+
"source": [
221+
"-- select * from FRED_10Y_2Y where OBSERVATION_DATE > '2025-02-24';"
222+
]
223+
},
224+
{
225+
"cell_type": "code",
226+
"execution_count": null,
227+
"id": "5a73c7d9-6eaf-47b0-8c0b-e88bbce1d3ab",
228+
"metadata": {
229+
"collapsed": false,
230+
"language": "sql",
231+
"name": "cell1"
232+
},
233+
"outputs": [],
234+
"source": [
235+
"-- USE ROLE ACCOUNTADMIN;\n",
236+
"-- DROP ROLE FRED_ROLE;\n",
237+
"-- DROP API INTEGRATION FRED_GITHUB_API_INTEGRATION;\n",
238+
"-- DROP DATABASE FRED_DB;\n",
239+
"-- DROP WAREHOUSE FRED_WH;\n",
240+
"\n"
173241
]
174242
}
175243
],
176244
"metadata": {
177245
"kernelspec": {
178-
"display_name": "snowflake-demo",
179-
"language": "python",
180-
"name": "python3"
181-
},
182-
"language_info": {
183-
"name": "python",
184-
"version": "3.10.13"
246+
"display_name": "Streamlit Notebook",
247+
"name": "streamlit"
185248
},
186249
"lastEditStatus": {
187250
"authorEmail": "markose.y@northeastern.edu",
188251
"authorId": "6690310009356",
189252
"authorName": "YOHANMARKOSE",
190-
"lastEditTime": 1740438928741,
253+
"lastEditTime": 1740688224407,
191254
"notebookId": "ym2jm7mboxidom324g4k",
192-
"sessionId": "388e1396-b394-4106-92a5-561f533e7449"
255+
"sessionId": "ae5ae0eb-d657-4df5-a1e6-c07ee55fdbb2"
193256
}
194257
},
195258
"nbformat": 4,

notebooks/02_raw_to_harmonized/02_raw_to_harmonized.ipynb

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -54,23 +54,22 @@
5454
"env = schema_name[:3]\n",
5555
"def create_fred_view(session):\n",
5656
" raw_fred_10Y = session.table(f\"{database_name}.{env}_RAW_FRED.FRED_DGS10Y\").select(\n",
57-
" F.to_date(F.col('\"observation_date\"')).alias(\"OBSERVATION_DATE\"),\n",
58-
" F.col('\"DGS10\"').cast(\"float\").alias(\"10Y_YIELD\")\n",
57+
" F.to_date(F.col('\"date\"')).alias(\"OBSERVATION_DATE\"),\n",
58+
" F.col('\"value\"').cast(\"float\").alias(\"10Y_YIELD\")\n",
5959
" )\n",
6060
" \n",
6161
" raw_fred_2Y = session.table(f\"{database_name}.{env}_RAW_FRED.FRED_DGS2Y\").select(\n",
62-
" F.to_date(F.col('\"observation_date\"')).alias(\"OBSERVATION_DATE\"),\n",
63-
" F.col('\"DGS2\"').cast(\"float\").alias(\"2Y_YIELD\")\n",
62+
" F.to_date(F.col('\"date\"')).alias(\"OBSERVATION_DATE\"),\n",
63+
" F.col('\"value\"').cast(\"float\").alias(\"2Y_YIELD\")\n",
6464
" )\n",
6565
"\n",
66-
" harmonized_data = raw_fred_10Y.join(raw_fred_2Y, raw_fred_10Y['OBSERVATION_DATE'] == raw_fred_2Y['OBSERVATION_DATE']).select(\n",
67-
" raw_fred_10Y[\"OBSERVATION_DATE\"].alias(\"OBSERVATION_DATE\"), # Keep only one OBSERVATION_DATE\n",
66+
" harmonized_data = raw_fred_10Y.join(raw_fred_2Y, raw_fred_10Y[\"OBSERVATION_DATE\"] == raw_fred_2Y[\"OBSERVATION_DATE\"]).select(\n",
67+
" raw_fred_10Y[\"OBSERVATION_DATE\"].alias(\"OBSERVATION_DATE\"),\n",
6868
" F.col(\"10Y_YIELD\"),\n",
6969
" F.col(\"2Y_YIELD\")\n",
7070
" )\n",
7171
"\n",
72-
" # harmonized_data = harmonized_data.filter(\n",
73-
" # (F.col(\"YIELD_SPREAD\").isNotNull()))\n",
72+
"\n",
7473
" \n",
7574
" session.use_schema(f\"{database_name}.{schema_name}\")\n",
7675
" harmonized_data.write.mode(\"overwrite\").save_as_table('FRED_FLATTENED')\n",

0 commit comments

Comments
 (0)