Skip to content

Commit 6ed76fa

Browse files
Merge michael-green-demo into dev
2 parents da300b0 + 6dfcf9b commit 6ed76fa

3 files changed

Lines changed: 169 additions & 116 deletions

File tree

Lines changed: 116 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -1,116 +1,116 @@
1-
# Fabric notebook source
2-
3-
# METADATA ********************
4-
5-
# META {
6-
# META "kernel_info": {
7-
# META "name": "synapse_pyspark"
8-
# META },
9-
# META "dependencies": {
10-
# META "lakehouse": {
11-
# META "default_lakehouse": "c185283c-9dd9-4e40-a17c-aa6303e3a2e9",
12-
# META "default_lakehouse_name": "PatternsLakehouse",
13-
# META "default_lakehouse_workspace_id": "d7270f11-feba-4990-baa6-d45e47f23737",
14-
# META "known_lakehouses": [
15-
# META {
16-
# META "id": "c185283c-9dd9-4e40-a17c-aa6303e3a2e9"
17-
# META }
18-
# META ]
19-
# META }
20-
# META }
21-
# META }
22-
23-
# CELL ********************
24-
25-
# Import required libraries for schema definition
26-
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, DateType, TimestampType
27-
from datetime import date, datetime
28-
29-
# Define table names for the Healthcare industry domain
30-
# These 3 tables form a relational model: patients and doctors linked through appointments
31-
PATIENTS_TABLE = "patients"
32-
DOCTORS_TABLE = "doctors"
33-
APPOINTMENTS_TABLE = "appointments"
34-
35-
# -------------------------------------------------------
36-
# Define schemas explicitly — reused for both table creation and data insertion
37-
# to ensure type consistency and avoid merge field errors
38-
# -------------------------------------------------------
39-
40-
patients_schema = StructType([
41-
StructField("patient_id", IntegerType(), False),
42-
StructField("first_name", StringType(), False),
43-
StructField("last_name", StringType(), False),
44-
StructField("date_of_birth", TimestampType(), True),
45-
StructField("gender", StringType(), True),
46-
StructField("phone_number", StringType(), True),
47-
StructField("email", StringType(), True)
48-
])
49-
50-
doctors_schema = StructType([
51-
StructField("doctor_id", IntegerType(), False),
52-
StructField("first_name", StringType(), False),
53-
StructField("last_name", StringType(), False),
54-
StructField("specialty", StringType(), True),
55-
StructField("phone_number", StringType(), True),
56-
StructField("email", StringType(), True)
57-
])
58-
59-
appointments_schema = StructType([
60-
StructField("appointment_id", IntegerType(), False),
61-
StructField("patient_id", IntegerType(), False),
62-
StructField("doctor_id", IntegerType(), False),
63-
StructField("appointment_date", TimestampType(), False),
64-
StructField("reason", StringType(), True),
65-
StructField("status", StringType(), True)
66-
])
67-
68-
# -------------------------------------------------------
69-
# Insert data using saveAsTable which writes Delta files AND registers
70-
# tables in the Lakehouse metastore. The default Lakehouse is attached
71-
# via the notebook's META dependencies block, so simple table names resolve
72-
# correctly. Per Fabric docs: df.write.mode("overwrite").format("delta").saveAsTable(name)
73-
# -------------------------------------------------------
74-
75-
patients_data = [
76-
(1, "Alice", "Johnson", datetime(1985, 3, 15), "Female", "555-0101", "alice.johnson@email.com"),
77-
(2, "Bob", "Smith", datetime(1990, 7, 22), "Male", "555-0102", "bob.smith@email.com"),
78-
(3, "Carol", "Williams", datetime(1978, 11, 8), "Female", "555-0103", "carol.williams@email.com"),
79-
(4, "David", "Brown", datetime(2001, 1, 30), "Male", "555-0104", "david.brown@email.com"),
80-
(5, "Eva", "Davis", datetime(1995, 6, 12), "Female", "555-0105", "eva.davis@email.com")
81-
]
82-
patients_df = spark.createDataFrame(patients_data, patients_schema)
83-
patients_df.write.format("delta").mode("overwrite").saveAsTable(PATIENTS_TABLE)
84-
print(f"Inserted {patients_df.count()} rows into '{PATIENTS_TABLE}'.")
85-
86-
doctors_data = [
87-
(1, "Sarah", "Mitchell", "Cardiology", "555-0201", "sarah.mitchell@hospital.com"),
88-
(2, "James", "Anderson", "Neurology", "555-0202", "james.anderson@hospital.com"),
89-
(3, "Emily", "Thompson", "Pediatrics", "555-0203", "emily.thompson@hospital.com")
90-
]
91-
doctors_df = spark.createDataFrame(doctors_data, doctors_schema)
92-
doctors_df.write.format("delta").mode("overwrite").saveAsTable(DOCTORS_TABLE)
93-
print(f"Inserted {doctors_df.count()} rows into '{DOCTORS_TABLE}'.")
94-
95-
appointments_data = [
96-
(1, 1, 1, datetime(2026, 4, 10), "Annual checkup", "Completed"),
97-
(2, 2, 2, datetime(2026, 4, 11), "Headache consultation", "Completed"),
98-
(3, 3, 3, datetime(2026, 4, 12), "Child wellness visit", "Scheduled"),
99-
(4, 4, 1, datetime(2026, 4, 15), "Chest pain follow-up", "Scheduled"),
100-
(5, 5, 2, datetime(2026, 4, 18), "Neurological evaluation", "Scheduled"),
101-
(6, 1, 3, datetime(2026, 4, 20), "Flu symptoms", "Scheduled")
102-
]
103-
appointments_df = spark.createDataFrame(appointments_data, appointments_schema)
104-
appointments_df.write.format("delta").mode("overwrite").saveAsTable(APPOINTMENTS_TABLE)
105-
print(f"Inserted {appointments_df.count()} rows into '{APPOINTMENTS_TABLE}'.")
106-
107-
print("All tables created and data loaded successfully.")
108-
109-
110-
111-
# METADATA ********************
112-
113-
# META {
114-
# META "language": "python",
115-
# META "language_group": "synapse_pyspark"
116-
# META }
1+
# Fabric notebook source
2+
3+
# METADATA ********************
4+
5+
# META {
6+
# META "kernel_info": {
7+
# META "name": "synapse_pyspark"
8+
# META },
9+
# META "dependencies": {
10+
# META "lakehouse": {
11+
# META "default_lakehouse": "c185283c-9dd9-4e40-a17c-aa6303e3a2e9",
12+
# META "default_lakehouse_name": "PatternsLakehouse",
13+
# META "default_lakehouse_workspace_id": "d7270f11-feba-4990-baa6-d45e47f23737",
14+
# META "known_lakehouses": [
15+
# META {
16+
# META "id": "c185283c-9dd9-4e40-a17c-aa6303e3a2e9"
17+
# META }
18+
# META ]
19+
# META }
20+
# META }
21+
# META }
22+
23+
# CELL ********************
24+
25+
# Import required libraries for schema definition
26+
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, DateType, TimestampType
27+
from datetime import date, datetime
28+
29+
# Define table names for the Healthcare industry domain
30+
# These 3 tables form a relational model: patients and doctors linked through appointments
31+
PATIENTS_TABLE = "patients"
32+
DOCTORS_TABLE = "doctors"
33+
APPOINTMENTS_TABLE = "appointments"
34+
35+
# -------------------------------------------------------
36+
# Define schemas explicitly — reused for both table creation and data insertion
37+
# to ensure type consistency and avoid merge field errors
38+
# -------------------------------------------------------
39+
40+
patients_schema = StructType([
41+
StructField("patient_id", IntegerType(), False),
42+
StructField("first_name", StringType(), False),
43+
StructField("last_name", StringType(), False),
44+
StructField("date_of_birth", TimestampType(), True),
45+
StructField("gender", StringType(), True),
46+
StructField("phone_number", StringType(), True),
47+
StructField("email", StringType(), True)
48+
])
49+
50+
doctors_schema = StructType([
51+
StructField("doctor_id", IntegerType(), False),
52+
StructField("first_name", StringType(), False),
53+
StructField("last_name", StringType(), False),
54+
StructField("specialty", StringType(), True),
55+
StructField("phone_number", StringType(), True),
56+
StructField("email", StringType(), True)
57+
])
58+
59+
appointments_schema = StructType([
60+
StructField("appointment_id", IntegerType(), False),
61+
StructField("patient_id", IntegerType(), False),
62+
StructField("doctor_id", IntegerType(), False),
63+
StructField("appointment_date", TimestampType(), False),
64+
StructField("reason", StringType(), True),
65+
StructField("status", StringType(), True)
66+
])
67+
68+
# -------------------------------------------------------
69+
# Insert data using saveAsTable which writes Delta files AND registers
70+
# tables in the Lakehouse metastore. The default Lakehouse is attached
71+
# via the notebook's META dependencies block, so simple table names resolve
72+
# correctly. Per Fabric docs: df.write.mode("overwrite").format("delta").saveAsTable(name)
73+
# -------------------------------------------------------
74+
75+
patients_data = [
76+
(1, "Alice", "Johnson", datetime(1985, 3, 15), "Female", "555-0101", "alice.johnson@email.com"),
77+
(2, "Bob", "Smith", datetime(1990, 7, 22), "Male", "555-0102", "bob.smith@email.com"),
78+
(3, "Carol", "Williams", datetime(1978, 11, 8), "Female", "555-0103", "carol.williams@email.com"),
79+
(4, "David", "Brown", datetime(2001, 1, 30), "Male", "555-0104", "david.brown@email.com"),
80+
(5, "Eva", "Davis", datetime(1995, 6, 12), "Female", "555-0105", "eva.davis@email.com")
81+
]
82+
patients_df = spark.createDataFrame(patients_data, patients_schema)
83+
patients_df.write.format("delta").mode("overwrite").saveAsTable(PATIENTS_TABLE)
84+
print(f"Inserted {patients_df.count()} rows into '{PATIENTS_TABLE}'.")
85+
86+
doctors_data = [
87+
(1, "Sarah", "Mitchell", "Cardiology", "555-0201", "sarah.mitchell@hospital.com"),
88+
(2, "James", "Anderson", "Neurology", "555-0202", "james.anderson@hospital.com"),
89+
(3, "Emily", "Thompson", "Pediatrics", "555-0203", "emily.thompson@hospital.com")
90+
]
91+
doctors_df = spark.createDataFrame(doctors_data, doctors_schema)
92+
doctors_df.write.format("delta").mode("overwrite").saveAsTable(DOCTORS_TABLE)
93+
print(f"Inserted {doctors_df.count()} rows into '{DOCTORS_TABLE}'.")
94+
95+
appointments_data = [
96+
(1, 1, 1, datetime(2026, 4, 10), "Annual checkup", "Completed"),
97+
(2, 2, 2, datetime(2026, 4, 11), "Headache consultation", "Completed"),
98+
(3, 3, 3, datetime(2026, 4, 12), "Child wellness visit", "Scheduled"),
99+
(4, 4, 1, datetime(2026, 4, 15), "Chest pain follow-up", "Scheduled"),
100+
(5, 5, 2, datetime(2026, 4, 18), "Neurological evaluation", "Scheduled"),
101+
(6, 1, 3, datetime(2026, 4, 20), "Flu symptoms", "Scheduled")
102+
]
103+
appointments_df = spark.createDataFrame(appointments_data, appointments_schema)
104+
appointments_df.write.format("delta").mode("overwrite").saveAsTable(APPOINTMENTS_TABLE)
105+
print(f"Inserted {appointments_df.count()} rows into '{APPOINTMENTS_TABLE}'.")
106+
107+
print("All tables created and data loaded successfully.")
108+
109+
110+
111+
# METADATA ********************
112+
113+
# META {
114+
# META "language": "python",
115+
# META "language_group": "synapse_pyspark"
116+
# META }
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"$schema": "https://developer.microsoft.com/json-schemas/fabric/gitIntegration/platformProperties/2.0.0/schema.json",
3+
"metadata": {
4+
"type": "Notebook",
5+
"displayName": "Patterns_Demo"
6+
},
7+
"config": {
8+
"version": "2.0",
9+
"logicalId": "04b930ef-2a7d-b5ff-46d9-486ef779a8da"
10+
}
11+
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# Fabric notebook source
2+
3+
# METADATA ********************
4+
5+
# META {
6+
# META "kernel_info": {
7+
# META "name": "synapse_pyspark"
8+
# META },
9+
# META "dependencies": {}
10+
# META }
11+
12+
# CELL ********************
13+
14+
# Fabric notebook source
15+
16+
# CELL ********************
17+
VARIABLE_LIBRARY_NAME = "Patterns_Variables"
18+
19+
variable_library = notebookutils.variableLibrary.getLibrary(VARIABLE_LIBRARY_NAME)
20+
21+
target_workspace_id = variable_library.target_workspace_id
22+
target_lakehouse_id = variable_library.target_lakehouse_id
23+
target_table_name = "patients"
24+
25+
print(f"Variable Library: {VARIABLE_LIBRARY_NAME}")
26+
print(f"Workspace ID: {target_workspace_id}")
27+
print(f"Lakehouse ID: {target_lakehouse_id}")
28+
29+
# Build ABFS path
30+
full_path = f"abfss://{target_workspace_id}@onelake.dfs.fabric.microsoft.com/{target_lakehouse_id}/Tables/dbo/{target_table_name}"
31+
32+
# Load the table
33+
patients_df = spark.read.format("delta").load(full_path)
34+
patients_df.show()
35+
36+
37+
# METADATA ********************
38+
39+
# META {
40+
# META "language": "python",
41+
# META "language_group": "synapse_pyspark"
42+
# META }

0 commit comments

Comments
 (0)