diff --git a/1- Databricks Lakehouse Platform/1.0 - Creating Clusters.py b/1- Databricks Lakehouse Platform/1.0 - Creating Clusters.py
new file mode 100644
index 0000000..210da5a
--- /dev/null
+++ b/1- Databricks Lakehouse Platform/1.0 - Creating Clusters.py
@@ -0,0 +1,75 @@
+# Databricks notebook source
+# MAGIC %md
+# MAGIC
+# MAGIC ## Creating Clusters
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Creating a Demo Cluster
+# MAGIC
+# MAGIC Create a cluster with the following configurations:
+# MAGIC
+# MAGIC | Setting | Instructions |
+# MAGIC |--|--|
+# MAGIC |Cluster name|**Demo Cluster**|
+# MAGIC |Cluster mode|**Signle node**|
+# MAGIC |Runtime version|Select the Databricks runtime version 13.3 LTS|
+# MAGIC |Photon Acceleration| Uncheck the option |
+# MAGIC |Node type|4 cores|
+# MAGIC |Auto termination|30 minutes|
+# MAGIC
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC 1- Navigate to the **Compute** tab in the left side bar.
+# MAGIC
+# MAGIC 2- Under **All-purpose compute** tab, click **Create compute**.
+# MAGIC
+
+# COMMAND ----------
+
+# MAGIC %md-sandbox
+# MAGIC
+# MAGIC
+# MAGIC

+# MAGIC
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC 3- On top, click on the default name to change it. Name your cluster as **Demo Cluster**
+# MAGIC
+# MAGIC 4- Select **Single node** cluster
+# MAGIC
+
+# COMMAND ----------
+
+# MAGIC %md-sandbox
+# MAGIC
+# MAGIC
+# MAGIC

+# MAGIC
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC 5- Select the Databricks runtime version 13.3 LTS (Long Term Support)
+# MAGIC
+# MAGIC 6- Uncheck the option for the **Use Photon Acceleration**
+# MAGIC
+# MAGIC 7- Select a Node type of 4 cores
+# MAGIC
+# MAGIC 8- Set the auto termination of the cluster to 30 minutes
+# MAGIC
+# MAGIC 9- Lastly, click **Create compute**.
+# MAGIC
+
+# COMMAND ----------
+
+# MAGIC %md-sandbox
+# MAGIC
+# MAGIC
+# MAGIC

+# MAGIC
diff --git a/1- Databricks Lakehouse Platform/1.1 - Notebook Basics.py b/1- Databricks Lakehouse Platform/1.1 - Notebook Basics.py
index 65c4d54..aee2d60 100644
--- a/1- Databricks Lakehouse Platform/1.1 - Notebook Basics.py
+++ b/1- Databricks Lakehouse Platform/1.1 - Notebook Basics.py
@@ -12,31 +12,31 @@
# MAGIC # Title 1
# MAGIC ## Title 2
# MAGIC ### Title 3
-# MAGIC
+# MAGIC
# MAGIC text with a **bold** and *italicized* in it.
-# MAGIC
+# MAGIC
# MAGIC Ordered list
# MAGIC 1. first
# MAGIC 1. second
# MAGIC 1. third
-# MAGIC
+# MAGIC
# MAGIC Unordered list
# MAGIC * coffee
# MAGIC * tea
-# MAGIC * tea
-# MAGIC
-# MAGIC
+# MAGIC * milk
+# MAGIC
+# MAGIC
# MAGIC Images:
# MAGIC 
-# MAGIC
+# MAGIC
# MAGIC And of course, tables:
-# MAGIC
+# MAGIC
# MAGIC | user_id | user_name |
# MAGIC |---------|-----------|
# MAGIC | 1 | Adam |
# MAGIC | 2 | Sarah |
# MAGIC | 3 | John |
-# MAGIC
+# MAGIC
# MAGIC Links (or Embedded HTML): Managing Notebooks documentation
# COMMAND ----------
@@ -70,4 +70,9 @@
# COMMAND ----------
+print("End of the Notebook")
+
+# COMMAND ----------
+# MAGIC %md
+# MAGIC ## End of Notebook
diff --git a/1- Databricks Lakehouse Platform/1.2 - Understanding Delta Tables.sql b/1- Databricks Lakehouse Platform/1.2 - Understanding Delta Tables.sql
index e122db9..78942ab 100644
--- a/1- Databricks Lakehouse Platform/1.2 - Understanding Delta Tables.sql
+++ b/1- Databricks Lakehouse Platform/1.2 - Understanding Delta Tables.sql
@@ -1,16 +1,52 @@
-- Databricks notebook source
+-- MAGIC %md
+-- MAGIC ## Creating Delta Lake Tables
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC **Note:** If your workspace does not support the `hive_metastore` catalog, switch to the **unity-catalog** branch in this Git Folder.
+
+-- COMMAND ----------
+
+USE CATALOG hive_metastore
+
+-- COMMAND ----------
+
CREATE TABLE employees
(id INT, name STRING, salary DOUBLE);
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC
+-- MAGIC ## Catalog Explorer
+-- MAGIC
+-- MAGIC Check the created **employees** table in the **Catalog** explorer.
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC ## Inserting Data
+
+-- COMMAND ----------
+
INSERT INTO employees
VALUES
(1, "Adam", 3500.0),
- (2, "Sarah", 4020.5),
+ (2, "Sarah", 4020.5);
+
+INSERT INTO employees
+VALUES
(3, "John", 2999.3),
- (4, "Thomas", 4000.3),
- (5, "Anna", 2500.0),
+ (4, "Thomas", 4000.3);
+
+INSERT INTO employees
+VALUES
+ (5, "Anna", 2500.0);
+
+INSERT INTO employees
+VALUES
(6, "Kim", 6200.3)
-- COMMAND ----------
@@ -19,14 +55,29 @@ SELECT * FROM employees
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC ## Exploring Table Metadata
+
+-- COMMAND ----------
+
DESCRIBE DETAIL employees
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC ## Exploring Table Directory
+
+-- COMMAND ----------
+
-- MAGIC %fs ls 'dbfs:/user/hive/warehouse/employees'
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC ## Updating Table
+
+-- COMMAND ----------
+
UPDATE employees
SET salary = salary + 100
WHERE name LIKE "A%"
@@ -49,6 +100,11 @@ SELECT * FROM employees
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC ## Exploring Table History
+
+-- COMMAND ----------
+
DESCRIBE HISTORY employees
-- COMMAND ----------
@@ -57,7 +113,7 @@ DESCRIBE HISTORY employees
-- COMMAND ----------
--- MAGIC %fs head 'dbfs:/user/hive/warehouse/employees/_delta_log/00000000000000000002.json'
+-- MAGIC %fs head 'dbfs:/user/hive/warehouse/employees/_delta_log/00000000000000000005.json'
-- COMMAND ----------
diff --git a/1- Databricks Lakehouse Platform/1.3 - Advanced Delta Lake Features (1).sql b/1- Databricks Lakehouse Platform/1.3 - Advanced Delta Lake Features.sql
similarity index 74%
rename from 1- Databricks Lakehouse Platform/1.3 - Advanced Delta Lake Features (1).sql
rename to 1- Databricks Lakehouse Platform/1.3 - Advanced Delta Lake Features.sql
index 13fe336..30cb2e0 100644
--- a/1- Databricks Lakehouse Platform/1.3 - Advanced Delta Lake Features (1).sql
+++ b/1- Databricks Lakehouse Platform/1.3 - Advanced Delta Lake Features.sql
@@ -1,14 +1,24 @@
-- Databricks notebook source
+-- MAGIC %md
+-- MAGIC
+-- MAGIC ## Delta Time Travel
+
+-- COMMAND ----------
+
+USE CATALOG hive_metastore
+
+-- COMMAND ----------
+
DESCRIBE HISTORY employees
-- COMMAND ----------
SELECT *
-FROM employees VERSION AS OF 1
+FROM employees VERSION AS OF 4
-- COMMAND ----------
-SELECT * FROM employees@v1
+SELECT * FROM employees@v4
-- COMMAND ----------
@@ -20,7 +30,7 @@ SELECT * FROM employees
-- COMMAND ----------
-RESTORE TABLE employees TO VERSION AS OF 2
+RESTORE TABLE employees TO VERSION AS OF 5
-- COMMAND ----------
@@ -32,6 +42,12 @@ DESCRIBE HISTORY employees
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC
+-- MAGIC ## OPTIMIZE Command
+
+-- COMMAND ----------
+
DESCRIBE DETAIL employees
-- COMMAND ----------
@@ -53,6 +69,12 @@ DESCRIBE HISTORY employees
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC
+-- MAGIC ## VACUUM Command
+
+-- COMMAND ----------
+
VACUUM employees
-- COMMAND ----------
@@ -81,16 +103,18 @@ SELECT * FROM employees@v1
-- COMMAND ----------
-DROP TABLE employees
+-- MAGIC %md
+-- MAGIC
+-- MAGIC ## Dropping Tables
-- COMMAND ----------
-SELECT * FROM employees
+DROP TABLE employees
-- COMMAND ----------
--- MAGIC %fs ls 'dbfs:/user/hive/warehouse/employees'
+SELECT * FROM employees
-- COMMAND ----------
-
+-- MAGIC %fs ls 'dbfs:/user/hive/warehouse/employees'
diff --git a/1- Databricks Lakehouse Platform/1.4 - Databases and Tables on Databricks.sql b/1- Databricks Lakehouse Platform/1.4 - Databases and Tables on Databricks.sql
index 3bdd7ad..0c4eb86 100644
--- a/1- Databricks Lakehouse Platform/1.4 - Databases and Tables on Databricks.sql
+++ b/1- Databricks Lakehouse Platform/1.4 - Databases and Tables on Databricks.sql
@@ -1,4 +1,11 @@
-- Databricks notebook source
+-- MAGIC %md
+-- MAGIC ## Managed Tables
+
+-- COMMAND ----------
+
+USE CATALOG hive_metastore;
+
CREATE TABLE managed_default
(width INT, length INT, height INT);
@@ -11,6 +18,12 @@ DESCRIBE EXTENDED managed_default
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC
+-- MAGIC ## External Tables
+
+-- COMMAND ----------
+
CREATE TABLE external_default
(width INT, length INT, height INT)
LOCATION 'dbfs:/mnt/demo/external_default';
@@ -24,6 +37,12 @@ DESCRIBE EXTENDED external_default
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC
+-- MAGIC ## Dropping Tables
+
+-- COMMAND ----------
+
DROP TABLE managed_default
-- COMMAND ----------
@@ -40,6 +59,11 @@ DROP TABLE external_default
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC ## Creating Schemas
+
+-- COMMAND ----------
+
CREATE SCHEMA new_default
-- COMMAND ----------
@@ -80,7 +104,7 @@ DROP TABLE external_new_default;
-- COMMAND ----------
--- MAGIC %fs ls 'dbfs:/user/hive/warehouse/managed_new_default'
+-- MAGIC %fs ls 'dbfs:/user/hive/warehouse/new_default.db/managed_new_default'
-- COMMAND ----------
@@ -88,6 +112,11 @@ DROP TABLE external_new_default;
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC ## Creating Schemas in Custom Location
+
+-- COMMAND ----------
+
CREATE SCHEMA custom
LOCATION 'dbfs:/Shared/schemas/custom.db'
@@ -134,7 +163,3 @@ DROP TABLE external_custom;
-- COMMAND ----------
-- MAGIC %fs ls 'dbfs:/mnt/demo/external_custom'
-
--- COMMAND ----------
-
-
diff --git a/1- Databricks Lakehouse Platform/1.5A - Views.sql b/1- Databricks Lakehouse Platform/1.5A - Views.sql
index 16d569e..165aceb 100644
--- a/1- Databricks Lakehouse Platform/1.5A - Views.sql
+++ b/1- Databricks Lakehouse Platform/1.5A - Views.sql
@@ -1,4 +1,11 @@
-- Databricks notebook source
+-- MAGIC %md
+-- MAGIC ## Preparing Sample Data
+
+-- COMMAND ----------
+
+USE CATALOG hive_metastore;
+
CREATE TABLE IF NOT EXISTS smartphones
(id INT, name STRING, brand STRING, year INT);
@@ -20,6 +27,11 @@ SHOW TABLES
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC ## Creating Stored Views
+
+-- COMMAND ----------
+
CREATE VIEW view_apple_phones
AS SELECT *
FROM smartphones
@@ -35,6 +47,12 @@ SHOW TABLES;
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC
+-- MAGIC ## Creating Temporary Views
+
+-- COMMAND ----------
+
CREATE TEMP VIEW temp_view_phones_brands
AS SELECT DISTINCT brand
FROM smartphones;
@@ -47,6 +65,12 @@ SHOW TABLES;
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC
+-- MAGIC ## Creating Global Temporary Views
+
+-- COMMAND ----------
+
CREATE GLOBAL TEMP VIEW global_temp_view_latest_phones
AS SELECT * FROM smartphones
WHERE year > 2020
@@ -67,7 +91,3 @@ SHOW TABLES IN global_temp;
-- COMMAND ----------
SHOW TABLES
-
--- COMMAND ----------
-
-
diff --git a/1- Databricks Lakehouse Platform/1.5B - Views (Session 2).sql b/1- Databricks Lakehouse Platform/1.5B - Views (Session 2).sql
index bcb8b4e..e45e864 100644
--- a/1- Databricks Lakehouse Platform/1.5B - Views (Session 2).sql
+++ b/1- Databricks Lakehouse Platform/1.5B - Views (Session 2).sql
@@ -1,4 +1,8 @@
-- Databricks notebook source
+USE CATALOG hive_metastore;
+
+-- COMMAND ----------
+
SHOW TABLES;
-- COMMAND ----------
@@ -11,6 +15,12 @@ SELECT * FROM global_temp.global_temp_view_latest_phones;
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC
+-- MAGIC ## Dropping Views
+
+-- COMMAND ----------
+
DROP TABLE smartphones;
DROP VIEW view_apple_phones;
diff --git a/2- ELT with Spark SQL and Python/2.1 - Querying Files.sql b/2- ELT with Spark SQL and Python/2.1 - Querying Files.sql
index 7d26b9d..dc26764 100644
--- a/2- ELT with Spark SQL and Python/2.1 - Querying Files.sql
+++ b/2- ELT with Spark SQL and Python/2.1 - Querying Files.sql
@@ -1,12 +1,17 @@
-- Databricks notebook source
-- MAGIC %md-sandbox
--- MAGIC
+-- MAGIC
-- MAGIC
--- MAGIC

+-- MAGIC

-- MAGIC
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC ## Querying JSON
+
+-- COMMAND ----------
+
-- MAGIC %run ../Includes/Copy-Datasets
-- COMMAND ----------
@@ -39,14 +44,30 @@ SELECT count(*) FROM json.`${dataset.bookstore}/customers-json`
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC ## Querying text Format
+
+-- COMMAND ----------
+
SELECT * FROM text.`${dataset.bookstore}/customers-json`
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC ## Querying binaryFile Format
+
+-- COMMAND ----------
+
SELECT * FROM binaryFile.`${dataset.bookstore}/customers-json`
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC
+-- MAGIC ## Querying CSV
+
+-- COMMAND ----------
+
SELECT * FROM csv.`${dataset.bookstore}/books-csv`
-- COMMAND ----------
@@ -66,6 +87,12 @@ SELECT * FROM books_csv
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC
+-- MAGIC ## Limitations of Non-Delta Tables
+
+-- COMMAND ----------
+
DESCRIBE EXTENDED books_csv
-- COMMAND ----------
@@ -106,6 +133,11 @@ SELECT COUNT(*) FROM books_csv
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC ## CTAS Statements
+
+-- COMMAND ----------
+
CREATE TABLE customers AS
SELECT * FROM json.`${dataset.bookstore}/customers-json`;
@@ -137,7 +169,3 @@ SELECT * FROM books
-- COMMAND ----------
DESCRIBE EXTENDED books
-
--- COMMAND ----------
-
-
diff --git a/2- ELT with Spark SQL and Python/2.2 - Writing to Tables.sql b/2- ELT with Spark SQL and Python/2.2 - Writing to Tables.sql
index 91f3a23..ed17646 100644
--- a/2- ELT with Spark SQL and Python/2.2 - Writing to Tables.sql
+++ b/2- ELT with Spark SQL and Python/2.2 - Writing to Tables.sql
@@ -1,8 +1,8 @@
-- Databricks notebook source
-- MAGIC %md-sandbox
--- MAGIC
+-- MAGIC
-- MAGIC
--- MAGIC

+-- MAGIC

-- MAGIC
-- COMMAND ----------
@@ -20,6 +20,11 @@ SELECT * FROM orders
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC ## Overwriting Tables
+
+-- COMMAND ----------
+
CREATE OR REPLACE TABLE orders AS
SELECT * FROM parquet.`${dataset.bookstore}/orders`
@@ -43,6 +48,11 @@ SELECT *, current_timestamp() FROM parquet.`${dataset.bookstore}/orders`
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC ## Appending Data
+
+-- COMMAND ----------
+
INSERT INTO orders
SELECT * FROM parquet.`${dataset.bookstore}/orders-new`
@@ -52,6 +62,11 @@ SELECT count(*) FROM orders
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC ## Merging Data
+
+-- COMMAND ----------
+
CREATE OR REPLACE TEMP VIEW customers_updates AS
SELECT * FROM json.`${dataset.bookstore}/customers-json-new`;
diff --git a/2- ELT with Spark SQL and Python/2.3 - Advanced Transformations.sql b/2- ELT with Spark SQL and Python/2.3 - Advanced Transformations.sql
index 27a0baa..d611285 100644
--- a/2- ELT with Spark SQL and Python/2.3 - Advanced Transformations.sql
+++ b/2- ELT with Spark SQL and Python/2.3 - Advanced Transformations.sql
@@ -1,8 +1,8 @@
-- Databricks notebook source
-- MAGIC %md-sandbox
--- MAGIC
+-- MAGIC
-- MAGIC
--- MAGIC

+-- MAGIC

-- MAGIC
-- COMMAND ----------
@@ -11,6 +11,12 @@
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC
+-- MAGIC ## Parsing JSON Data
+
+-- COMMAND ----------
+
SELECT * FROM customers
-- COMMAND ----------
@@ -65,11 +71,21 @@ FROM orders
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC ## Explode Function
+
+-- COMMAND ----------
+
SELECT order_id, customer_id, explode(books) AS book
FROM orders
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC ## Collecting Rows
+
+-- COMMAND ----------
+
SELECT customer_id,
collect_set(order_id) AS orders_set,
collect_set(books.book_id) AS books_set
@@ -78,6 +94,12 @@ GROUP BY customer_id
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC
+-- MAGIC ##Flatten Arrays
+
+-- COMMAND ----------
+
SELECT customer_id,
collect_set(books.book_id) As before_flatten,
array_distinct(flatten(collect_set(books.book_id))) AS after_flatten
@@ -86,6 +108,12 @@ GROUP BY customer_id
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC
+-- MAGIC ##Join Operations
+
+-- COMMAND ----------
+
CREATE OR REPLACE VIEW orders_enriched AS
SELECT *
FROM (
@@ -98,6 +126,11 @@ SELECT * FROM orders_enriched
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC ## Set Operations
+
+-- COMMAND ----------
+
CREATE OR REPLACE TEMP VIEW orders_updates
AS SELECT * FROM parquet.`${dataset.bookstore}/orders-new`;
@@ -119,6 +152,11 @@ SELECT * FROM orders_updates
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC ## Reshaping Data with Pivot
+
+-- COMMAND ----------
+
CREATE OR REPLACE TABLE transactions AS
SELECT * FROM (
@@ -135,36 +173,3 @@ SELECT * FROM (
);
SELECT * FROM transactions
-
--- COMMAND ----------
-
-SELECT
- order_id,
- books,
- FILTER (books, i -> i.quantity >= 2) AS many_books
-FROM orders
-
--- COMMAND ----------
-
-SELECT order_id, many_books
-FROM (
- SELECT
- order_id,
- FILTER (books, i -> i.quantity >= 2) AS many_books
- FROM orders)
-WHERE size(many_books) > 0;
-
--- COMMAND ----------
-
-SELECT
- order_id,
- books,
- TRANSFORM (
- books,
- k -> CAST(k.subtotal * 0.8 AS INT)
- ) AS subtotal_after_discount
-FROM orders;
-
--- COMMAND ----------
-
-
diff --git a/2- ELT with Spark SQL and Python/2.4 - Higher Order Functions and SQL UDFs.sql b/2- ELT with Spark SQL and Python/2.4 - Higher Order Functions and SQL UDFs.sql
index 620fcf7..f0909d1 100644
--- a/2- ELT with Spark SQL and Python/2.4 - Higher Order Functions and SQL UDFs.sql
+++ b/2- ELT with Spark SQL and Python/2.4 - Higher Order Functions and SQL UDFs.sql
@@ -1,8 +1,8 @@
-- Databricks notebook source
-- MAGIC %md-sandbox
--- MAGIC
+-- MAGIC
-- MAGIC
--- MAGIC

+-- MAGIC

-- MAGIC
-- COMMAND ----------
@@ -15,6 +15,12 @@ SELECT * FROM orders
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC
+-- MAGIC ## Filtering Arrays
+
+-- COMMAND ----------
+
SELECT
order_id,
books,
@@ -33,6 +39,12 @@ WHERE size(multiple_copies) > 0;
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC
+-- MAGIC ## Transforming Arrays
+
+-- COMMAND ----------
+
SELECT
order_id,
books,
@@ -44,6 +56,11 @@ FROM orders;
-- COMMAND ----------
+-- MAGIC %md
+-- MAGIC ## User Defined Functions (UDF)
+
+-- COMMAND ----------
+
CREATE OR REPLACE FUNCTION get_url(email STRING)
RETURNS STRING
@@ -82,7 +99,3 @@ FROM customers
DROP FUNCTION get_url;
DROP FUNCTION site_type;
-
--- COMMAND ----------
-
-
diff --git a/3- Incremental Data Processing/3.1 - Structured Streaming.py b/3- Incremental Data Processing/3.1 - Structured Streaming.py
index 91e8c07..b341907 100644
--- a/3- Incremental Data Processing/3.1 - Structured Streaming.py
+++ b/3- Incremental Data Processing/3.1 - Structured Streaming.py
@@ -1,8 +1,8 @@
# Databricks notebook source
# MAGIC %md-sandbox
-# MAGIC
+# MAGIC
# MAGIC
-# MAGIC

+# MAGIC

# MAGIC
# COMMAND ----------
@@ -11,6 +11,12 @@
# COMMAND ----------
+# MAGIC %md
+# MAGIC
+# MAGIC ## Reading Stream
+
+# COMMAND ----------
+
(spark.readStream
.table("books")
.createOrReplaceTempView("books_streaming_tmp_vw")
@@ -18,11 +24,22 @@
# COMMAND ----------
+# MAGIC %md
+# MAGIC
+# MAGIC ## Displaying Streaming Data
+
+# COMMAND ----------
+
# MAGIC %sql
# MAGIC SELECT * FROM books_streaming_tmp_vw
# COMMAND ----------
+# MAGIC %md
+# MAGIC ## Applying Transformations
+
+# COMMAND ----------
+
# MAGIC %sql
# MAGIC SELECT author, count(book_id) AS total_books
# MAGIC FROM books_streaming_tmp_vw
@@ -30,6 +47,12 @@
# COMMAND ----------
+# MAGIC %md
+# MAGIC
+# MAGIC ## Unsupported Operations
+
+# COMMAND ----------
+
# MAGIC %sql
# MAGIC SELECT *
# MAGIC FROM books_streaming_tmp_vw
@@ -37,6 +60,12 @@
# COMMAND ----------
+# MAGIC %md
+# MAGIC
+# MAGIC ## Persisting Streaming Data
+
+# COMMAND ----------
+
# MAGIC %sql
# MAGIC CREATE OR REPLACE TEMP VIEW author_counts_tmp_vw AS (
# MAGIC SELECT author, count(book_id) AS total_books
@@ -62,6 +91,11 @@
# COMMAND ----------
+# MAGIC %md
+# MAGIC ## Adding New Data
+
+# COMMAND ----------
+
# MAGIC %sql
# MAGIC INSERT INTO books
# MAGIC values ("B19", "Introduction to Modeling and Simulation", "Mark W. Spong", "Computer Science", 25),
@@ -70,6 +104,11 @@
# COMMAND ----------
+# MAGIC %md
+# MAGIC ## Streaming in Batch Mode
+
+# COMMAND ----------
+
# MAGIC %sql
# MAGIC INSERT INTO books
# MAGIC values ("B16", "Hands-On Deep Learning Algorithms with Python", "Sudharsan Ravichandiran", "Computer Science", 25),
@@ -92,7 +131,3 @@
# MAGIC %sql
# MAGIC SELECT *
# MAGIC FROM author_counts
-
-# COMMAND ----------
-
-
diff --git a/3- Incremental Data Processing/3.2 - Auto Loader.py b/3- Incremental Data Processing/3.2 - Auto Loader.py
index 1c6f1bb..c669291 100644
--- a/3- Incremental Data Processing/3.2 - Auto Loader.py
+++ b/3- Incremental Data Processing/3.2 - Auto Loader.py
@@ -1,8 +1,8 @@
# Databricks notebook source
# MAGIC %md-sandbox
-# MAGIC
+# MAGIC
# MAGIC
-# MAGIC

+# MAGIC

# MAGIC
# COMMAND ----------
@@ -11,11 +11,23 @@
# COMMAND ----------
+# MAGIC %md
+# MAGIC
+# MAGIC ## Exploring The Source Directory
+
+# COMMAND ----------
+
files = dbutils.fs.ls(f"{dataset_bookstore}/orders-raw")
display(files)
# COMMAND ----------
+# MAGIC %md
+# MAGIC
+# MAGIC ## Auto Loader
+
+# COMMAND ----------
+
(spark.readStream
.format("cloudFiles")
.option("cloudFiles.format", "parquet")
@@ -38,6 +50,12 @@
# COMMAND ----------
+# MAGIC %md
+# MAGIC
+# MAGIC ## Landing New Files
+
+# COMMAND ----------
+
load_new_data()
# COMMAND ----------
@@ -52,18 +70,26 @@
# COMMAND ----------
-# MAGIC %sql
-# MAGIC DESCRIBE HISTORY orders_updates
+# MAGIC %md
+# MAGIC
+# MAGIC ## Exploring Table History
# COMMAND ----------
# MAGIC %sql
-# MAGIC DROP TABLE orders_updates
+# MAGIC DESCRIBE HISTORY orders_updates
# COMMAND ----------
-dbutils.fs.rm("dbfs:/mnt/demo/orders_checkpoint", True)
+# MAGIC %md
+# MAGIC
+# MAGIC ## Cleaning Up
# COMMAND ----------
+# MAGIC %sql
+# MAGIC DROP TABLE orders_updates
+
+# COMMAND ----------
+dbutils.fs.rm("dbfs:/mnt/demo/orders_checkpoint", True)
diff --git a/3- Incremental Data Processing/3.3 - Multi-Hop Architecture.py b/3- Incremental Data Processing/3.3 - Multi-Hop Architecture.py
index 15777c4..772cd53 100644
--- a/3- Incremental Data Processing/3.3 - Multi-Hop Architecture.py
+++ b/3- Incremental Data Processing/3.3 - Multi-Hop Architecture.py
@@ -1,8 +1,8 @@
# Databricks notebook source
# MAGIC %md-sandbox
-# MAGIC
+# MAGIC
# MAGIC
-# MAGIC

+# MAGIC

# MAGIC
# COMMAND ----------
@@ -11,11 +11,23 @@
# COMMAND ----------
+# MAGIC %md
+# MAGIC
+# MAGIC ## Exploring The Source dDirectory
+
+# COMMAND ----------
+
files = dbutils.fs.ls(f"{dataset_bookstore}/orders-raw")
display(files)
# COMMAND ----------
+# MAGIC %md
+# MAGIC
+# MAGIC ## Auto Loader
+
+# COMMAND ----------
+
(spark.readStream
.format("cloudFiles")
.option("cloudFiles.format", "parquet")
@@ -25,6 +37,12 @@
# COMMAND ----------
+# MAGIC %md
+# MAGIC
+# MAGIC ## Enriching Raw Data
+
+# COMMAND ----------
+
# MAGIC %sql
# MAGIC CREATE OR REPLACE TEMPORARY VIEW orders_tmp AS (
# MAGIC SELECT *, current_timestamp() arrival_time, input_file_name() source_file
@@ -38,6 +56,11 @@
# COMMAND ----------
+# MAGIC %md
+# MAGIC ## Creating Bronze Table
+
+# COMMAND ----------
+
(spark.table("orders_tmp")
.writeStream
.format("delta")
@@ -56,6 +79,12 @@
# COMMAND ----------
+# MAGIC %md
+# MAGIC
+# MAGIC #### Creating Static Lookup Table
+
+# COMMAND ----------
+
(spark.read
.format("json")
.load(f"{dataset_bookstore}/customers-json")
@@ -68,6 +97,11 @@
# COMMAND ----------
+# MAGIC %md
+# MAGIC ## Creating Silver Table
+
+# COMMAND ----------
+
(spark.readStream
.table("orders_bronze")
.createOrReplaceTempView("orders_bronze_tmp"))
@@ -108,6 +142,11 @@
# COMMAND ----------
+# MAGIC %md
+# MAGIC ## Creating Gold Table
+
+# COMMAND ----------
+
(spark.readStream
.table("orders_silver")
.createOrReplaceTempView("orders_silver_tmp"))
@@ -142,11 +181,13 @@
# COMMAND ----------
+# MAGIC %md
+# MAGIC
+# MAGIC ## Stopping active streams
+
+# COMMAND ----------
+
for s in spark.streams.active:
print("Stopping stream: " + s.id)
s.stop()
s.awaitTermination()
-
-# COMMAND ----------
-
-
diff --git a/4- Production Pipelines/4.1 - Delta Live Tables.sql b/4- Production Pipelines/4.1 - Delta Live Tables.sql
index 1968c3b..6db0b5e 100644
--- a/4- Production Pipelines/4.1 - Delta Live Tables.sql
+++ b/4- Production Pipelines/4.1 - Delta Live Tables.sql
@@ -1,19 +1,23 @@
-- Databricks notebook source
-- MAGIC %md
--- MAGIC
--- MAGIC
+-- MAGIC
+-- MAGIC
-- MAGIC # Delta Live Tables
-- COMMAND ----------
-- MAGIC %md-sandbox
--- MAGIC
+-- MAGIC
-- MAGIC
--- MAGIC

+-- MAGIC

-- MAGIC
-- COMMAND ----------
+SET datasets.path=dbfs:/mnt/demo-datasets/bookstore;
+
+-- COMMAND ----------
+
-- MAGIC %md
-- MAGIC ## Bronze Layer Tables
@@ -26,7 +30,7 @@
CREATE OR REFRESH STREAMING LIVE TABLE orders_raw
COMMENT "The raw books orders, ingested from orders-raw"
-AS SELECT * FROM cloud_files("${datasets_path}/orders-json-raw", "json",
+AS SELECT * FROM cloud_files("${datasets.path}/orders-json-raw", "json",
map("cloudFiles.inferColumnTypes", "true"))
-- COMMAND ----------
@@ -38,16 +42,16 @@ AS SELECT * FROM cloud_files("${datasets_path}/orders-json-raw", "json",
CREATE OR REFRESH LIVE TABLE customers
COMMENT "The customers lookup table, ingested from customers-json"
-AS SELECT * FROM json.`${datasets_path}/customers-json`
+AS SELECT * FROM json.`${datasets.path}/customers-json`
-- COMMAND ----------
-- MAGIC %md
--- MAGIC
--- MAGIC
--- MAGIC
+-- MAGIC
+-- MAGIC
+-- MAGIC
-- MAGIC ## Silver Layer Tables
--- MAGIC
+-- MAGIC
-- MAGIC #### orders_cleaned
-- COMMAND ----------
@@ -68,7 +72,7 @@ AS
-- MAGIC %md
-- MAGIC >> Constraint violation
--- MAGIC
+-- MAGIC
-- MAGIC | **`ON VIOLATION`** | Behavior |
-- MAGIC | --- | --- |
-- MAGIC | **`DROP ROW`** | Discard records that violate constraints |
@@ -78,8 +82,8 @@ AS
-- COMMAND ----------
-- MAGIC %md
--- MAGIC
--- MAGIC
+-- MAGIC
+-- MAGIC
-- MAGIC ## Gold Tables
-- COMMAND ----------
diff --git a/4- Production Pipelines/4.1.2 - Books Pipeline.sql b/4- Production Pipelines/4.1.2 - Books Pipeline.sql
index 79b0e38..a61af84 100644
--- a/4- Production Pipelines/4.1.2 - Books Pipeline.sql
+++ b/4- Production Pipelines/4.1.2 - Books Pipeline.sql
@@ -1,22 +1,26 @@
-- Databricks notebook source
+SET datasets.path=dbfs:/mnt/demo-datasets/bookstore;
+
+-- COMMAND ----------
+
-- MAGIC %md
--- MAGIC
--- MAGIC
--- MAGIC
+-- MAGIC
+-- MAGIC
+-- MAGIC
-- MAGIC ## Bronze Layer Tables
-- COMMAND ----------
CREATE OR REFRESH STREAMING LIVE TABLE books_bronze
COMMENT "The raw books data, ingested from CDC feed"
-AS SELECT * FROM cloud_files("${datasets_path}/books-cdc", "json")
+AS SELECT * FROM cloud_files("${datasets.path}/books-cdc", "json")
-- COMMAND ----------
-- MAGIC %md
--- MAGIC
--- MAGIC
--- MAGIC
+-- MAGIC
+-- MAGIC
+-- MAGIC
-- MAGIC ## Silver Layer Tables
-- COMMAND ----------
@@ -33,8 +37,8 @@ APPLY CHANGES INTO LIVE.books_silver
-- COMMAND ----------
-- MAGIC %md
--- MAGIC
--- MAGIC
+-- MAGIC
+-- MAGIC
-- MAGIC ## Gold Layer Tables
-- COMMAND ----------
diff --git a/4- Production Pipelines/4.2 - Pipeline Results.py b/4- Production Pipelines/4.2 - Pipeline Results.py
index b53935b..09b7144 100644
--- a/4- Production Pipelines/4.2 - Pipeline Results.py
+++ b/4- Production Pipelines/4.2 - Pipeline Results.py
@@ -20,9 +20,9 @@
# COMMAND ----------
# MAGIC %sql
-# MAGIC SELECT * FROM demo_bookstore_dlt_db.cn_daily_customer_books
+# MAGIC SELECT * FROM hive_metastore.demo_bookstore_dlt_db.cn_daily_customer_books
# COMMAND ----------
# MAGIC %sql
-# MAGIC SELECT * FROM demo_bookstore_dlt_db.fr_daily_customer_books
+# MAGIC SELECT * FROM hive_metastore.demo_bookstore_dlt_db.fr_daily_customer_books
diff --git a/4- Production Pipelines/4.3 - Land New Data Task.py b/4- Production Pipelines/4.3 - Land New Data Task.py
index be10bfb..404285e 100644
--- a/4- Production Pipelines/4.3 - Land New Data Task.py
+++ b/4- Production Pipelines/4.3 - Land New Data Task.py
@@ -4,3 +4,8 @@
# COMMAND ----------
load_new_json_data()
+
+# COMMAND ----------
+
+# MAGIC %sql
+# MAGIC SELECT * from json.`${dataset.bookstore}/books-cdc/02.json`
diff --git a/5- Data Governance/5.1 - Managing Permissions.sql b/5- Data Governance/5.1 - Managing Permissions.sql
new file mode 100644
index 0000000..db643d4
--- /dev/null
+++ b/5- Data Governance/5.1 - Managing Permissions.sql
@@ -0,0 +1,28 @@
+CREATE DATABASE IF NOT EXISTS hive_metastore.hr_db
+LOCATION 'dbfs:/mnt/demo/hr_db.db';
+
+CREATE TABLE hive_metastore.hr_db.employees (id INT, name STRING, salary DOUBLE, city STRING);
+
+INSERT INTO hive_metastore.hr_db.employees
+VALUES (1, "Anna", 2500, "Paris"),
+ (2, "Thomas", 3000, "London"),
+ (3, "Bilal", 3500, "Paris"),
+ (4, "Maya", 2000, "Paris"),
+ (5, "Sophie", 2500, "London"),
+ (6, "Adam", 3500, "London"),
+ (7, "Ali", 3000, "Paris");
+
+CREATE VIEW hive_metastore.hr_db.paris_emplyees_vw
+AS SELECT * FROM hive_metastore.hr_db.employees WHERE city = 'Paris';
+
+------------------------------------------------------
+
+GRANT SELECT, MODIFY, READ_METADATA, CREATE ON SCHEMA hive_metastore.hr_db TO hr_team;
+
+GRANT USAGE ON SCHEMA hive_metastore.hr_db TO hr_team;
+
+GRANT SELECT ON VIEW hive_metastore.hr_db.paris_emplyees_vw TO `adam@derar.cloud`;
+
+SHOW GRANTS ON SCHEMA hive_metastore.hr_db;
+
+SHOW GRANTS ON VIEW hive_metastore.hr_db.paris_emplyees_vw;
diff --git a/Includes/Copy-Datasets.py b/Includes/Copy-Datasets.py
index 44dcb8e..f50fc24 100644
--- a/Includes/Copy-Datasets.py
+++ b/Includes/Copy-Datasets.py
@@ -4,7 +4,9 @@ def path_exists(path):
dbutils.fs.ls(path)
return True
except Exception as e:
- if 'java.io.FileNotFoundException' in str(e):
+ msg = str(e)
+ if ("com.databricks.sql.io.CloudFileNotFoundException" in msg
+ or "java.io.FileNotFoundException" in msg):
return False
else:
raise
@@ -23,9 +25,12 @@ def download_dataset(source, target):
# COMMAND ----------
-data_source_uri = "wasbs://course-resources@dalhussein.blob.core.windows.net/datasets/bookstore/v1/"
+data_source_uri = "s3://dalhussein-courses/datasets/bookstore/v1/"
dataset_bookstore = 'dbfs:/mnt/demo-datasets/bookstore'
+data_catalog = 'hive_metastore'
spark.conf.set(f"dataset.bookstore", dataset_bookstore)
+spark.conf.set("fs.s3a.endpoint", "s3.eu-west-3.amazonaws.com")
+spark.conf.set("fs.s3a.aws.credentials.provider", "org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider")
# COMMAND ----------
@@ -39,6 +44,11 @@ def get_index(dir):
# COMMAND ----------
+def set_current_catalog(catalog_name):
+ spark.sql(f"USE CATALOG {catalog_name}")
+
+# COMMAND ----------
+
# Structured Streaming
streaming_dir = f"{dataset_bookstore}/orders-streaming"
raw_dir = f"{dataset_bookstore}/orders-raw"
@@ -95,3 +105,4 @@ def load_new_json_data(all=False):
# COMMAND ----------
download_dataset(data_source_uri, dataset_bookstore)
+set_current_catalog(data_catalog)
diff --git a/Includes/Setup.py b/Includes/Setup.py
index 494d5a8..e081a0a 100644
--- a/Includes/Setup.py
+++ b/Includes/Setup.py
@@ -1,2 +1,2 @@
# Databricks notebook source
-full_name = "Derar Alhussein"
+full_name = "Siva Pattem"
diff --git a/Includes/images/bookstore_schema.png b/Includes/images/bookstore_schema.png
new file mode 100644
index 0000000..47c66c7
Binary files /dev/null and b/Includes/images/bookstore_schema.png differ
diff --git a/Labs/1- Databricks Lakehouse Platform/1.0L - Creating Clusters.py b/Labs/1- Databricks Lakehouse Platform/1.0L - Creating Clusters.py
new file mode 100644
index 0000000..1c9d89c
--- /dev/null
+++ b/Labs/1- Databricks Lakehouse Platform/1.0L - Creating Clusters.py
@@ -0,0 +1,21 @@
+# Databricks notebook source
+# MAGIC %md
+# MAGIC
+# MAGIC ## Lab: Creating Clusters
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q1 - Creating a Demo Cluster
+# MAGIC
+# MAGIC Create a cluster with the following configurations:
+# MAGIC
+# MAGIC | Setting | Instructions |
+# MAGIC |--|--|
+# MAGIC |Cluster name|**Demo Cluster**|
+# MAGIC |Cluster mode|**Signle node**|
+# MAGIC |Runtime version|Select the Databricks runtime version 13.3 LTS|
+# MAGIC |Photon Acceleration| Uncheck the option |
+# MAGIC |Node type|4 cores|
+# MAGIC |Auto termination|30 minutes|
+# MAGIC
diff --git a/Labs/1- Databricks Lakehouse Platform/1.1L - Notebook Basics.py b/Labs/1- Databricks Lakehouse Platform/1.1L - Notebook Basics.py
new file mode 100644
index 0000000..0531ce7
--- /dev/null
+++ b/Labs/1- Databricks Lakehouse Platform/1.1L - Notebook Basics.py
@@ -0,0 +1,98 @@
+# Databricks notebook source
+# MAGIC %md
+# MAGIC
+# MAGIC ## Lab: Get started with Databricks Notebook
+# MAGIC
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q1 - Renaming the Notebook
+# MAGIC
+# MAGIC Change the name of the current notebook to "1.1L - My first lab"
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q2 - Attaching a cluster
+# MAGIC
+# MAGIC Attach the cluster you created previously to this notebook
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q3 - Execute a Python code
+# MAGIC
+# MAGIC In the below cell, fill in the blank to print the result of adding the two variables x and y
+
+# COMMAND ----------
+
+x = 5
+y = 10
+result = x + y
+
+--------------------
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q4 - Execute a SQL cell
+# MAGIC
+# MAGIC Change the language in the below cell to execute the SQL statement
+
+# COMMAND ----------
+
+_____________________
+
+SELECT 5 + 10
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q5 - Create a Markdown Cell
+# MAGIC
+# MAGIC 1. Insert a new cell below this one
+# MAGIC 1. In the new cell, add Markdown with a header and bullet points as shown in the following image
+# MAGIC
+# MAGIC
+# MAGIC

+# MAGIC
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q6 - Using %run command
+# MAGIC
+# MAGIC 1. Create a new Python notebook named **helper** in the current directory (i.e., in the **labs/1- Databricks Lakehouse Platform** folder)
+# MAGIC 1. In the **helper** notebook, declare a variable called **my_country** and assign it a value of your country name. For example:
+# MAGIC > my_country = "France"
+# MAGIC 1. In the following cell, execute a %run command to include the **helper** notebook into this current notebook
+# MAGIC > **Hint**: use a dot (**.**) to refer to the current directory
+
+# COMMAND ----------
+
+--------------------
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC Now, run the following cell to test that you are able to print the **my_country** variable
+
+# COMMAND ----------
+
+print(my_country)
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q7 - Functions definition
+# MAGIC 1. In the **helper** notebook, define a Python function named **addition()** that print the sum of two numbers
+# MAGIC 1. Execute again the above %run command to take the function definition into account
+# MAGIC 1. Run the below cell to call the function
+
+# COMMAND ----------
+
+num1 = 20
+num2 = 30
+
+addition(num1, num2)
diff --git a/Labs/1- Databricks Lakehouse Platform/1.2L - Delta Lake.sql b/Labs/1- Databricks Lakehouse Platform/1.2L - Delta Lake.sql
new file mode 100644
index 0000000..2aae47a
--- /dev/null
+++ b/Labs/1- Databricks Lakehouse Platform/1.2L - Delta Lake.sql
@@ -0,0 +1,97 @@
+-- Databricks notebook source
+-- MAGIC %md
+-- MAGIC
+-- MAGIC ## Lab: Delta Lake
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Creating a Delta Table
+-- MAGIC
+-- MAGIC Run the cell below to create the **persons** Delta Table, and apply some operations on it.
+
+-- COMMAND ----------
+
+USE CATALOG hive_metastore;
+
+CREATE OR REPLACE TABLE persons
+ (id INT, name STRING, age INT);
+
+INSERT INTO persons
+VALUES
+ (1, "Tom", 18),
+ (2, "Kumar", 25);
+
+INSERT INTO persons
+VALUES
+ (3, "Ali", 50),
+ (4, "Sandra", 35);
+
+INSERT INTO persons
+VALUES
+ (5, "Eric", 28),
+ (6, "Salma", 42);
+
+UPDATE persons
+SET age = age + 10
+WHERE id = 1;
+
+DELETE FROM persons
+WHERE name = "Eric";
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q1 - Querying Delta Lake table
+-- MAGIC
+-- MAGIC Query the data in the **persons** table using **SELECT** statement
+
+-- COMMAND ----------
+
+--------------------
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q2 - Checking table history
+-- MAGIC
+-- MAGIC Review the history of the table transactions using the **DESCRIBE HISTORY** command
+
+-- COMMAND ----------
+
+--------------------
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q3- Checking table metadata
+-- MAGIC
+-- MAGIC Review the basic metadata information of the table using the **DESCRIBE DETAIL** command
+
+-- COMMAND ----------
+
+--------------------
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q4- Exploring table directory
+-- MAGIC
+-- MAGIC Explore the table directory using the **%fs** magic command.
+-- MAGIC
+-- MAGIC >**Hint:** get the table location from the above metadata information
+
+-- COMMAND ----------
+
+--------------------
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q5- Exploring the transactions log
+-- MAGIC
+-- MAGIC Explore the **_delta_log** subfolder in the table directory
+
+-- COMMAND ----------
+
+--------------------
diff --git a/Labs/1- Databricks Lakehouse Platform/1.3L - Databases and Tables on Databricks.sql b/Labs/1- Databricks Lakehouse Platform/1.3L - Databases and Tables on Databricks.sql
new file mode 100644
index 0000000..883814a
--- /dev/null
+++ b/Labs/1- Databricks Lakehouse Platform/1.3L - Databases and Tables on Databricks.sql
@@ -0,0 +1,190 @@
+-- Databricks notebook source
+-- MAGIC %md
+-- MAGIC ## Lab: Databases and Tables on Databricks
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Setting the default catalog
+-- MAGIC
+-- MAGIC Run the cell below to set the current catalog to **hive_metastore**
+
+-- COMMAND ----------
+
+USE CATALOG hive_metastore;
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q1 - Creating managed table
+-- MAGIC
+-- MAGIC In the default database, create a managed table named **movies_managed** that has the following schema:
+-- MAGIC
+-- MAGIC
+-- MAGIC | Column Name | Column Type |
+-- MAGIC | --- | --- |
+-- MAGIC | title | STRING |
+-- MAGIC | category | STRING |
+-- MAGIC | length | FLOAT |
+-- MAGIC | release_date | DATE |
+
+-- COMMAND ----------
+
+--------------------
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC Review the extended metadata information of the table, and verify that:
+-- MAGIC 1. The table type is Managed
+-- MAGIC 1. The table is located under the default hive directory
+
+-- COMMAND ----------
+
+DESCRIBE EXTENDED movies_managed
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q2 - Creating external table
+-- MAGIC
+-- MAGIC In the default database, create an external Delta table named **actors_external**, and located under the directory:
+-- MAGIC **dbfs:/mnt/demo/actors_external**
+-- MAGIC
+-- MAGIC The schema for the table:
+-- MAGIC
+-- MAGIC | Column Name | Column Type |
+-- MAGIC | --- | --- |
+-- MAGIC | actor_id | INT |
+-- MAGIC | name | STRING |
+-- MAGIC | nationality | STRING |
+
+-- COMMAND ----------
+
+--------------------
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q4- Checking table metadata
+-- MAGIC
+-- MAGIC Review the extended metadata information of the table, and verify that:
+-- MAGIC 1. The table type is External
+-- MAGIC 1. The table is located under the directory: **dbfs:/mnt/demo/actors_external**
+
+-- COMMAND ----------
+
+DESCRIBE EXTENDED actors_external
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q3- Dropping manged table
+-- MAGIC
+-- MAGIC Drop the manged table **movies_managed**
+
+-- COMMAND ----------
+
+--------------------
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC Check that the directory of the managed table has been deleted
+-- MAGIC
+
+-- COMMAND ----------
+
+-- MAGIC %fs ls 'dbfs:/user/hive/warehouse/movies_managed'
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q4- Drop external table
+-- MAGIC
+-- MAGIC Drop the external table **actors_external**
+
+-- COMMAND ----------
+
+DROP TABLE actors_external
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC Check that the directory of the external table has **Not** been deleted
+
+-- COMMAND ----------
+
+-- MAGIC %fs ls 'dbfs:/mnt/demo/actors_external'
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q5- Creating new schema
+-- MAGIC
+-- MAGIC Create a new schema named **db_cinema**
+
+-- COMMAND ----------
+
+--------------------
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC
+-- MAGIC Review the extended metadata information of the database, and verify that the database is located under the default hive directory.
+-- MAGIC
+-- MAGIC Note that the database folder has the extenstion **.db**
+
+-- COMMAND ----------
+
+DESCRIBE DATABASE EXTENDED db_cinema
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC
+-- MAGIC Use the new schema to create the below **movies** table
+
+-- COMMAND ----------
+
+--------------------
+
+
+CREATE TABLE movies
+ (title STRING, category STRING, length INT, release_date DATE);
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q6- Creating new schema in custom location
+-- MAGIC
+-- MAGIC Create a new schema named **cinema_custom** in the directory: **dbfs:/Shared/schemas/cinema_custom.db**
+
+-- COMMAND ----------
+
+--------------------
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC Use the new schema to create the below **movies** table
+
+-- COMMAND ----------
+
+USE cinema_custom;
+
+CREATE TABLE movies
+ (title STRING, category STRING, length INT, release_date DATE);
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC Finally, review the extended metadata information of the table **movies**, and verify that:
+-- MAGIC
+-- MAGIC 1. The table type is Managed
+-- MAGIC 1. The table is located in the new database defined under the custom location
+
+-- COMMAND ----------
+
+DESCRIBE EXTENDED movies
diff --git a/Labs/2- ELT with Spark SQL and Python/2.1L - Querying Files.sql b/Labs/2- ELT with Spark SQL and Python/2.1L - Querying Files.sql
new file mode 100644
index 0000000..49b5942
--- /dev/null
+++ b/Labs/2- ELT with Spark SQL and Python/2.1L - Querying Files.sql
@@ -0,0 +1,118 @@
+-- Databricks notebook source
+-- MAGIC %md
+-- MAGIC
+-- MAGIC ## Lab: Querying Files
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC Run the following cell to setup the lab environment
+
+-- COMMAND ----------
+
+-- MAGIC %run ../Includes/Setup-Lab
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q1- Extracting data directly from Parquet files
+-- MAGIC
+-- MAGIC Use a SELECT statement to directly query the content of the Parquet files in the directory **${dataset.school}/enrollments**
+
+-- COMMAND ----------
+
+--------------------
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC Use the above SELECT query in a CTAS statement to create the table **enrollments**
+
+-- COMMAND ----------
+
+--------------------
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC
+-- MAGIC Run the below cell to ensure data was written as expected in the **enrollments** table
+
+-- COMMAND ----------
+
+SELECT * FROM enrollments
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q2- Registering Tables from JSON Files
+-- MAGIC
+-- MAGIC Use CTAS statement to create the table **students** from the json files in the directory: **${dataset.school}/students-json**
+-- MAGIC
+
+-- COMMAND ----------
+
+--------------------
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC
+-- MAGIC Run the below cell to ensure data was written as expected in the **students** table
+
+-- COMMAND ----------
+
+SELECT * FROM students
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q3- Registering Tables from CSV Files
+-- MAGIC
+-- MAGIC Create the temporary view **courses_tmp_vw** from the csv files in the directory: **${dataset.school}/courses-csv**
+-- MAGIC
+-- MAGIC Knowing that:
+-- MAGIC * The delimiter is semicolon (**;**)
+-- MAGIC * There is a header of column names in each file
+-- MAGIC
+-- MAGIC The schema for the view:
+-- MAGIC
+-- MAGIC | Column Name | Column Type |
+-- MAGIC | --- | --- |
+-- MAGIC | course_id | STRING |
+-- MAGIC | title | STRING |
+-- MAGIC | instructor | STRING |
+-- MAGIC | category | STRING |
+-- MAGIC | price | DOUBLE |
+
+-- COMMAND ----------
+
+--------------------
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC Create the manged table **courses** from the temporary view **courses_tmp_vw**
+
+-- COMMAND ----------
+
+--------------------
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC Run the below cell to ensure data was written as expected in the **courses** table
+
+-- COMMAND ----------
+
+SELECT * FROM courses
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC
+-- MAGIC Finally, review the metadata information of the table **courses**, and verify that the table type is Managed
+
+-- COMMAND ----------
+
+DESCRIBE EXTENDED courses
diff --git a/Labs/2- ELT with Spark SQL and Python/2.2L - Advanced ETL.sql b/Labs/2- ELT with Spark SQL and Python/2.2L - Advanced ETL.sql
new file mode 100644
index 0000000..186162f
--- /dev/null
+++ b/Labs/2- ELT with Spark SQL and Python/2.2L - Advanced ETL.sql
@@ -0,0 +1,102 @@
+-- Databricks notebook source
+-- MAGIC %md
+-- MAGIC
+-- MAGIC ## Lab: Advanced ETL
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC Run the following cell to setup the lab environment
+
+-- COMMAND ----------
+
+-- MAGIC %run ../Includes/Setup-Lab
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q1- Interacting with JSON data
+-- MAGIC
+-- MAGIC Review the nested data structures of the **profile** column in the **students** table created in the previous lab
+
+-- COMMAND ----------
+
+SELECT email, profile
+FROM students
+
+-- COMMAND ----------
+
+DESCRIBE students
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC
+-- MAGIC Use the appropriate syntax to access the **last_name** and **city** information from the **profile** column
+
+-- COMMAND ----------
+
+SELECT email, ______________ AS student_surname, ______________ AS student_city
+FROM students
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q2- Higher Order Functions
+-- MAGIC
+-- MAGIC Review the array column **courses** in the **enrollments** table created in the previous lab
+
+-- COMMAND ----------
+
+SELECT enroll_id, courses
+FROM enrollments
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC Filter this array column to keep only course elements having subtotal greater than 40
+
+-- COMMAND ----------
+
+SELECT
+ enroll_id,
+ courses,
+ ______________ AS large_totals
+FROM enrollments
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q3- SQL UDFs
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC
+-- MAGIC Define a UDF function named **get_letter_grade** that takes one parameter named **gpa** of type DOUBLE. It returns the corresponding letter grade as indicated in the following table:
+-- MAGIC
+-- MAGIC
+-- MAGIC
+-- MAGIC | GPA (4.0 Scale) | Grade Letter |
+-- MAGIC |---------|-----------|
+-- MAGIC | 3.50 - 4.0 | A |
+-- MAGIC | 2.75 - 3.44 | B |
+-- MAGIC | 2.0 - 2.74 | C |
+-- MAGIC | 0.0 - 1.99 | F |
+
+-- COMMAND ----------
+
+--------------------
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC
+-- MAGIC Let's apply the above UDF on the **students** table created in the previous lab
+-- MAGIC
+-- MAGIC Fill in the below query to call the defined UDF on the **gpa** column
+
+-- COMMAND ----------
+
+SELECT student_id, gpa, _______________ as letter_grade
+FROM students
diff --git a/Labs/3- Incremental Data Processing/3.1L - Spark Structured Streaming.py b/Labs/3- Incremental Data Processing/3.1L - Spark Structured Streaming.py
new file mode 100644
index 0000000..1f7ee52
--- /dev/null
+++ b/Labs/3- Incremental Data Processing/3.1L - Spark Structured Streaming.py
@@ -0,0 +1,113 @@
+# Databricks notebook source
+# MAGIC %md
+# MAGIC
+# MAGIC ## Lab: Spark Structured Streaming
+
+# COMMAND ----------
+
+# MAGIC %md-sandbox
+# MAGIC
+# MAGIC
+# MAGIC

+# MAGIC
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC Run the following cell to setup the lab environment
+
+# COMMAND ----------
+
+# MAGIC %run ../Includes/Setup-Lab
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q1- Auto Loader
+# MAGIC
+# MAGIC Use Auto Loader to incrementally load enrollments json files from the directory **{dataset_school}/enrollments-json-raw** into a streaming view called **`enrollments_tmp_vw`**
+# MAGIC
+
+# COMMAND ----------
+
+dataset_source = f"{dataset_school}/enrollments-json-raw"
+schema_location = "dbfs:/mnt/DE-Associate/checkpoints/school/enrollments_stats"
+
+
+(spark
+ .readStream
+ .___________________
+ .___________________
+ .____________________
+ .load(dataset_source)
+ .createOrReplaceTempView("enrollments_tmp_vw"))
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q2 - Calculating aggregations on streaming data
+# MAGIC
+# MAGIC Using CTAS syntax, define a new streaming view against **`enrollments_tmp_vw`** to count the number of enrollments per **`student_id`**. Name the aggregated field: **`enrollments_counts`**
+
+# COMMAND ----------
+
+# MAGIC %sql
+# MAGIC
+# MAGIC CREATE OR REPLACE TEMPORARY VIEW enrollments_per_student_tmp_vw AS
+# MAGIC SELECT ___________________
+# MAGIC
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q3 - Writing stream data
+# MAGIC
+# MAGIC Stream the aggregated data from the **`enrollments_per_student_tmp_vw`** view to a Delta table called **`enrollments_stats`**.
+
+# COMMAND ----------
+
+checkpoint_path = "dbfs:/mnt/DE-Associate/checkpoints/school/enrollments_stats"
+
+query = (spark._________________
+ ._________________
+ ._________________
+ ._________________
+ ._________________
+ )
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC
+# MAGIC Run the below cell to ensure data was written as expected in the **enrollments_stats** table
+
+# COMMAND ----------
+
+# MAGIC %sql
+# MAGIC SELECT * FROM enrollments_stats
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC Run the below cell to land a new json file of enrollments data
+
+# COMMAND ----------
+
+load_new_json_data()
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC Verify that the statistics have been updated in the table **enrollments_stats**
+
+# COMMAND ----------
+
+# MAGIC %sql
+# MAGIC SELECT * FROM enrollments_stats
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q4 - Canceling streaming query
+# MAGIC
+# MAGIC Finally, cancel the above streaming query
diff --git a/Labs/3- Incremental Data Processing/3.2L - Multi-Hop Architecture.py b/Labs/3- Incremental Data Processing/3.2L - Multi-Hop Architecture.py
new file mode 100644
index 0000000..ef8c23b
--- /dev/null
+++ b/Labs/3- Incremental Data Processing/3.2L - Multi-Hop Architecture.py
@@ -0,0 +1,175 @@
+# Databricks notebook source
+# MAGIC %md
+# MAGIC
+# MAGIC ## Lab: Multi-Hop Architecture
+
+# COMMAND ----------
+
+# MAGIC %md-sandbox
+# MAGIC
+# MAGIC
+# MAGIC

+# MAGIC
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC Run the following cell to setup the lab environment
+
+# COMMAND ----------
+
+# MAGIC %run ../Includes/Setup-Lab
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q1- Declaring Bronze Table
+# MAGIC
+# MAGIC Use Auto Loader to incrementally load enrollments json files from the directory **{dataset_school}/enrollments-json-raw** to a Delta table called **`bronze`**
+# MAGIC
+
+# COMMAND ----------
+
+dataset_source = f"{dataset_school}/enrollments-json-raw"
+bronze_checkpoint_path = "dbfs:/mnt/DE-Associate/checkpoints/school/bronze"
+schema_location = bronze_checkpoint_path
+
+(spark.readStream
+ .___________________
+ .___________________
+ .___________________
+ .load(dataset_source)
+ .writeStream
+ .___________________
+ .___________________
+ .table("bronze")
+)
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC Create a streaming temporary view **bronze_tmp** from the bronze table in order to perform transformations using SQL.
+
+# COMMAND ----------
+
+(spark
+ .readStream
+ .table("bronze")
+ .___________________("bronze_tmp"))
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q2 - Data Cleansing & Enrichment
+# MAGIC
+# MAGIC Using CTAS syntax, define a new streaming view **`bronze_cleaned_tmp`** against **`bronze_tmp`** that does the following:
+# MAGIC * Remove records with **quantity** of 0 item
+# MAGIC * Add a column called **`processing_time`** containing the current timestamp using the **current_timestamp()** function
+
+# COMMAND ----------
+
+# MAGIC %sql
+# MAGIC CREATE OR REPLACE TEMPORARY VIEW bronze_cleaned_tmp AS
+# MAGIC SELECT ___________________
+# MAGIC
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q3 - Declaring Silver Table
+# MAGIC
+# MAGIC Stream the data from **`bronze_cleaned_tmp`** to a table called **`silver`**.
+
+# COMMAND ----------
+
+silver_checkpoint_path = "dbfs:/mnt/DE-Associate/checkpoints/school/silver"
+
+(spark.table("bronze_cleaned_tmp")
+ .___________________
+ .___________________
+ .___________________
+ .table("silver")
+)
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC Let's create a streaming temporary view from the silver table in order to perform business-level aggregation using SQL
+
+# COMMAND ----------
+
+(spark
+ .readStream
+ .table("silver")
+ .createOrReplaceTempView("silver_tmp"))
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q4- Declaring Gold Table
+# MAGIC
+# MAGIC Using CTAS syntax, define a new streaming view **`enrollments_per_student_tmp_vw`** against **`silver_tmp`** to count the number of enrollments per **`student`**. Name the aggregated field: **`enrollments_count`**
+
+# COMMAND ----------
+
+# MAGIC %sql
+# MAGIC CREATE OR REPLACE TEMPORARY VIEW enrollments_per_student_tmp_vw AS
+# MAGIC SELECT ___________________
+# MAGIC
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC
+# MAGIC Stream the aggregated data from the **`enrollments_per_student_tmp_vw`** view to a Delta table called **`gold_enrollments_stats`**.
+
+# COMMAND ----------
+
+gold_checkpoint_path = "dbfs:/mnt/DE-Associate/checkpoints/school/gold_enrollments_stats"
+
+query = (spark.table("enrollments_per_student_tmp_vw")
+ .writeStream
+ .___________________
+ .___________________
+ .table("gold_enrollments_stats"))
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC Query the data in the **`gold_enrollments_stats`** table to ensure data was written as expected.
+
+# COMMAND ----------
+
+# MAGIC %sql
+# MAGIC SELECT * FROM gold_enrollments_stats
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC Run the below cell to land new a json file of enrollments data
+
+# COMMAND ----------
+
+load_new_json_data()
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC Wait for the new data to be propagated, and then run the below query to verify that the statistics have been updated in the table **gold_enrollments_stats**
+
+# COMMAND ----------
+
+# MAGIC %sql
+# MAGIC SELECT * FROM gold_enrollments_stats
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC Finally, run the below cell for canceling the above streaming queries
+
+# COMMAND ----------
+
+for s in spark.streams.active:
+ print("Stopping stream: " + s.id)
+ s.stop()
+ s.awaitTermination()
diff --git a/Labs/4- Production Pipelines/4.1L - Delta Live Tables.sql b/Labs/4- Production Pipelines/4.1L - Delta Live Tables.sql
new file mode 100644
index 0000000..58a845c
--- /dev/null
+++ b/Labs/4- Production Pipelines/4.1L - Delta Live Tables.sql
@@ -0,0 +1,123 @@
+-- Databricks notebook source
+-- MAGIC %md
+-- MAGIC
+-- MAGIC ## Lab: implementing a DLT pipeline
+-- MAGIC
+-- MAGIC > This notebook is **not intended** to be executed interactively, but rather to be deployed as a DLT pipeline from the **workflows** tab
+-- MAGIC
+-- MAGIC
+-- MAGIC * Help: DLT syntax documentation.
+
+-- COMMAND ----------
+
+-- MAGIC %md-sandbox
+-- MAGIC
+-- MAGIC
+-- MAGIC

+-- MAGIC
+
+-- COMMAND ----------
+
+SET datasets.path=dbfs:/mnt/DE-Associate/datasets/school;
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q1- Declaring Bronze Tables
+-- MAGIC
+-- MAGIC Declare a streaming live table, **`enrollments_bronze`**, that ingests JSON data incrementally using Auto Loader from the directory **"${datasets.path}/enrollments-json-raw"**
+
+-- COMMAND ----------
+
+CREATE ____________________
+AS SELECT * FROM cloud_files("${datasets.path}/enrollments-json-raw", "json",
+ map("cloudFiles.inferColumnTypes", "true"))
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC Declare a live table, **`students_bronze`**, that load data directly from JSON files in the directory **"${datasets.path}/students-json"**
+
+-- COMMAND ----------
+
+CREATE ____________________
+AS SELECT * FROM json.`${datasets.path}/students-json`
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q2 - Declaring Silver Table
+-- MAGIC
+-- MAGIC Declare a streaming live table, **`enrollments_cleaned`**, that:
+-- MAGIC
+-- MAGIC 1. Enrich the **enrollments_bronze** data through an inner join with the **`students_bronze`** table on the common **`student_id`** field to obtain the student's country
+-- MAGIC 1. Implement quality control by applying a constraint to drop records with a null **`email`**
+-- MAGIC 1. The table will have the following schema:
+-- MAGIC
+-- MAGIC | Field | Type |
+-- MAGIC | --- | --- |
+-- MAGIC | **`enroll_id`** | **`STRING`** |
+-- MAGIC | **`total`** | **`DOUBLE`** |
+-- MAGIC | **`email`** | **`STRING`** |
+-- MAGIC | **`country`** | **`STRING`** |
+-- MAGIC
+
+-- COMMAND ----------
+
+CREATE OR REFRESH STREAMING LIVE TABLE enrollments_cleaned
+ (CONSTRAINT ____________________ ON VIOLATION ____________________ )
+AS SELECT enroll_id, total, email, profile:address:country as country
+ FROM ____________________ n
+ INNER ____________________ s
+ ON n.student_id = s.student_id
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC ### Q3- Declaring Gold Table
+-- MAGIC
+-- MAGIC Declare a live table, **`course_sales_per_country`** against **`enrollments_cleaned`** that calculate per **`country`** the following:
+-- MAGIC * **`enrollments_count`**: the number of enrollments
+-- MAGIC * **`enrollments_amount`**: the sum of the total amount of enrollments
+-- MAGIC
+-- MAGIC Add a comment to the table: "Course Sales Per Country"
+
+-- COMMAND ----------
+
+CREATE OR REFRESH LIVE TABLE course_sales_per_country
+ COMMENT ____________________
+AS SELECT ____________________
+
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC ### Q4- Deploying DLT pipeline
+-- MAGIC
+-- MAGIC From the **Workflows** button on the sidebar, under the **Delta Live Tables** tab, click **Create Pipeline**
+-- MAGIC
+-- MAGIC Configure the pipeline settings specified below:
+-- MAGIC
+-- MAGIC | Setting | Instructions |
+-- MAGIC |--|--|
+-- MAGIC | Pipeline name | School DLT |
+-- MAGIC | Product edition | Choose **Advanced** |
+-- MAGIC | Pipeline mode | Choose **Triggered** |
+-- MAGIC | Source code | Use the navigator to select this current notebook (4.1L - Delta Live Tables) |
+-- MAGIC | Storage location | dbfs:/mnt/DE-Associate/dlt/school |
+-- MAGIC | Target schema | DE_Associate_School_DLT |
+-- MAGIC | Cluster policy | Leave it **None**|
+-- MAGIC | Cluster mode | Choose **Fixed size**|
+-- MAGIC | Workers | Enter **0**|
+-- MAGIC | Photon Acceleration | Leave it unchecked |
+-- MAGIC | Advanced Configuration | Click **Add Configuration** and enter:
- Key: **datasets.path**
- Value: **dbfs:/mnt/DE-Associate/datasets/school** |
+-- MAGIC | Channel | Choose **Current**|
+-- MAGIC
+-- MAGIC Finally, click **Create**.
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC ### Q5 - Run your Pipeline
+-- MAGIC
+-- MAGIC Select **Development** mode and Click **Start** to begin the update to your pipeline's tables
diff --git a/Labs/4- Production Pipelines/4.2L - Jobs - Land New Data.py b/Labs/4- Production Pipelines/4.2L - Jobs - Land New Data.py
new file mode 100644
index 0000000..e5f60c9
--- /dev/null
+++ b/Labs/4- Production Pipelines/4.2L - Jobs - Land New Data.py
@@ -0,0 +1,91 @@
+# Databricks notebook source
+# MAGIC %md
+# MAGIC
+# MAGIC ## Lab: Creating a multi-task job
+# MAGIC
+# MAGIC In this lab, we will create a job that has 2 tasks:
+# MAGIC 1. The current notebook that lands a new batch of data in the lab dataset directory
+# MAGIC 1. The Delta Live Table pipeline created in the previous lab to processes this data
+# MAGIC
+# MAGIC * Help: Databricks Jobs documentation.
+
+# COMMAND ----------
+
+# MAGIC %md-sandbox
+# MAGIC
+# MAGIC
+# MAGIC

+# MAGIC
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q1- Configuring Task 1 - Land New Data
+# MAGIC
+# MAGIC
+# MAGIC From the **Workflows** button on the sidebar, under the **Jobs** tab, click the **Create Job** button.
+# MAGIC
+# MAGIC
+# MAGIC
+# MAGIC 1. Set the job name in the top-left of the screen to **School Job**
+# MAGIC 1. Configure the first task as specified below:
+# MAGIC | Setting | Value |
+# MAGIC |--|--|
+# MAGIC | Task name | Enter **Land New Data** |
+# MAGIC | Type | Choose **Notebook** |
+# MAGIC | Source | Choose **Workspace** |
+# MAGIC | Path | Use the navigator to choose the current notebook (4.2L - Jobs - Land New Data) |
+# MAGIC | Cluster | Select your cluster from the dropdown, under **Existing All Purpose Clusters** |
+# MAGIC
+# MAGIC
+# MAGIC
+# MAGIC
+# MAGIC 3. Click the **Create** button
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q2- Configuring Task 2 - DLT pipeline
+# MAGIC
+# MAGIC
+# MAGIC
+# MAGIC 1. Click the add button (**+**) to add a new **Delta Live Tables pipeline** task
+# MAGIC 1. Configure the task:
+# MAGIC
+# MAGIC | Setting | Value |
+# MAGIC |--|--|
+# MAGIC | Task name | Enter **DLT pipeline** |
+# MAGIC | Type | Choose **Delta Live Tables pipeline** |
+# MAGIC | Pipeline | Choose the DLT pipeline created in the previous lab |
+# MAGIC | Depends on | Choose **Land New Data**, which is the previous task we defined above |
+# MAGIC
+# MAGIC
+# MAGIC
+# MAGIC 3. Click the **Create task** button
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q3- Run the job
+# MAGIC
+# MAGIC Click the **Run now** button in the top right to run this job. From the **Runs** tab, check your job run
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q4- Review the finished job
+# MAGIC
+# MAGIC Once all tasks completed successfully, review the contents of each task to verify its result
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC > **Note**: The below cells are to be run as part of the **Task 1** to land new batch of data in the dataset directory
+
+# COMMAND ----------
+
+# MAGIC %run ../Includes/Setup-Lab
+
+# COMMAND ----------
+
+load_new_json_data()
diff --git a/Labs/4- Production Pipelines/4.3L - Databricks SQL.sql b/Labs/4- Production Pipelines/4.3L - Databricks SQL.sql
new file mode 100644
index 0000000..6a878b5
--- /dev/null
+++ b/Labs/4- Production Pipelines/4.3L - Databricks SQL.sql
@@ -0,0 +1,121 @@
+-- Databricks notebook source
+-- MAGIC %md
+-- MAGIC
+-- MAGIC ## Lab: Design a Dashboard with DBSQL
+-- MAGIC
+-- MAGIC In this lab, we will design a dashboard in DBSQL that has 2 graphs:
+-- MAGIC 1. Bar graph that shows the number of students per country
+-- MAGIC 1. Line graph that shows the daily enrollments amount
+-- MAGIC
+-- MAGIC * Help: Databricks SQL documentation.
+
+-- COMMAND ----------
+
+-- MAGIC %md-sandbox
+-- MAGIC
+-- MAGIC
+-- MAGIC

+-- MAGIC
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q1 - Working with queries in SQL Editor
+-- MAGIC
+-- MAGIC Run the the below query in **SQL Editor** in Databricks SQL, and then save it with the name **Student Counts**
+
+-- COMMAND ----------
+
+SELECT profile:address:country as country, count(student_id) AS students_count
+FROM hive_metastore.de_associate_school.students
+GROUP BY profile:address:country
+ORDER BY students_count DESC
+LIMIT 10
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q2 - Creating a Bar Graph Visualization
+-- MAGIC
+-- MAGIC Create a bar graph that shows the number of students per country
+-- MAGIC
+-- MAGIC
+-- MAGIC
+-- MAGIC
+-- MAGIC ##### Anwser:
+-- MAGIC Steps:
+-- MAGIC 1. Click the Add butoon (**+**) next to the results tab, and select **Visualization** from the dialog box
+-- MAGIC 1. Select **`Bar`** for the **Visualization Type**
+-- MAGIC 1. Set **`country`** for the **X Column**
+-- MAGIC 1. Under **Y columns** click **Add column**, and set it to **`students_count`**
+-- MAGIC 1. Click **Save**
+-- MAGIC 1. Finally, set the title of the graph to **Student Counts Viz**
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q3 - Creating a New Dashboard
+-- MAGIC
+-- MAGIC Add the above graph to a new dashboard named **Students Statistics**
+-- MAGIC
+-- MAGIC ##### Anwser:
+-- MAGIC
+-- MAGIC Steps:
+-- MAGIC 1. Click the three vertical dots button at the top of the graph and select **Add to Dashboard**.
+-- MAGIC 1. Click the **Create new dashboard** option
+-- MAGIC 1. Name your dashboard **Students Statistics**
+-- MAGIC 1. Click **Save**
+-- MAGIC 1. With the new dashboard selected as the target, click **OK** to add your visualization
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q4 - Creating a Line Plot Visualization
+-- MAGIC
+-- MAGIC 1. Run the the below query in a new query tab in the **SQL Editor**, and then save it with the name **Daily Sales**
+
+-- COMMAND ----------
+
+SELECT cast(from_unixtime(enroll_timestamp, 'yyyy-MM-dd HH:mm:ss') AS date) enroll_timestamp,
+ sum(total) AS enrollments_amount
+FROM hive_metastore.de_associate_school.enrollments n
+INNER JOIN hive_metastore.de_associate_school.students s ON s.student_id = n.student_id
+GROUP BY enroll_timestamp
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC
+-- MAGIC 2. Create a Line Plot Visualization that shows the daily enrollments amount
+-- MAGIC
+-- MAGIC
+-- MAGIC
+-- MAGIC
+-- MAGIC
+-- MAGIC
+-- MAGIC ##### Anwser:
+-- MAGIC
+-- MAGIC Steps:
+-- MAGIC 1. Click the **Add Visualization** button
+-- MAGIC 1. Select **`Line`** for the **Visualization Type**
+-- MAGIC 1. Set **`enroll_timestamp`** for the **X Column**
+-- MAGIC 1. Under **Y columns** click **Add column**, and set it to **`enrollments_amount`**
+-- MAGIC 1. Click **Save**
+-- MAGIC 1. Finally, set the title of the graph to **Daily Sales Viz**
+-- MAGIC 1. Click the three vertical dots button at the top of the graph and select **Add to Dashboard**.
+-- MAGIC 1. Select the dashboard **Students Statistics** created above
+-- MAGIC 1. Click **OK** to add your visualization
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q5 - Review your Dashboard
+-- MAGIC
+-- MAGIC Open your Dashboard and refresh its underlaying data
+-- MAGIC
+-- MAGIC ##### Anwser:
+-- MAGIC
+-- MAGIC Steps:
+-- MAGIC 1. Click on the **Dashboards** button on left side bar
+-- MAGIC 1. Find the dashboard **Students Statistics** created earlier. Click to open it
+-- MAGIC 1. Click the **Refresh** button to update your dashboard
diff --git a/Labs/Includes/Setup-Lab.py b/Labs/Includes/Setup-Lab.py
new file mode 100644
index 0000000..ac61e86
--- /dev/null
+++ b/Labs/Includes/Setup-Lab.py
@@ -0,0 +1,137 @@
+# Databricks notebook source
+data_source_uri = "s3://dalhussein-courses/datasets/school/v1/"
+dataset_school = 'dbfs:/mnt/DE-Associate/datasets/school'
+checkpoint_path = 'dbfs:/mnt/DE-Associate/checkpoints/school'
+dlt_path = 'dbfs:/mnt/DE-Associate/dlt/school'
+db_name = 'DE_Associate_School'
+dlt_db_name = 'DE_Associate_School_DLT'
+spark.conf.set(f"dataset.school", dataset_school)
+
+# COMMAND ----------
+
+def clean_up():
+ print("Removing Checkpoints ...")
+ dbutils.fs.rm(checkpoint_path, True)
+ print("Removing DLT storage location ...")
+ dbutils.fs.rm(dlt_path, True)
+ print("Dropping Database ...")
+ spark.sql(f"DROP SCHEMA IF EXISTS {db_name} CASCADE")
+ print("Dropping DLT database ...")
+ spark.sql(f"DROP SCHEMA IF EXISTS {dlt_db_name} CASCADE")
+ print("Removing Dataset ...")
+ dbutils.fs.rm(dataset_school, True)
+ print("Done")
+
+# COMMAND ----------
+
+try:
+ clean = int(dbutils.widgets.get("clean"))
+except:
+ clean = 0
+
+if clean:
+ clean_up()
+
+# COMMAND ----------
+
+def path_exists(path):
+ try:
+ dbutils.fs.ls(path)
+ return True
+ except Exception as e:
+ msg = str(e)
+ if ("com.databricks.sql.io.CloudFileNotFoundException" in msg
+ or "java.io.FileNotFoundException" in msg):
+ return False
+ else:
+ raise
+
+# COMMAND ----------
+
+def download_dataset(source, target):
+ files = dbutils.fs.ls(source)
+
+ for f in files:
+ source_path = f"{source}/{f.name}"
+ target_path = f"{target}/{f.name}"
+ if not path_exists(target_path):
+ print(f"Copying {f.name} ...")
+ dbutils.fs.cp(source_path, target_path, True)
+
+# COMMAND ----------
+
+def get_index(dir):
+ files = dbutils.fs.ls(dir)
+ index = 0
+ if files:
+ file = max(files).name
+ index = int(file.rsplit('.', maxsplit=1)[0])
+ return index+1
+
+# COMMAND ----------
+
+def set_current_schema(schema_name, catalog_name='hive_metastore'):
+ spark.sql(f"USE CATALOG {catalog_name}")
+ spark.sql(f"CREATE SCHEMA IF NOT EXISTS {schema_name}")
+ spark.sql(f"USE {schema_name}")
+ print(f"Schema for the hands-on labs: {catalog_name}.{schema_name}")
+
+# COMMAND ----------
+
+# Structured Streaming
+streaming_dir = f"{dataset_school}/enrollments-streaming"
+raw_dir = f"{dataset_school}/enrollments-raw"
+
+def load_file(current_index):
+ latest_file = f"{str(current_index).zfill(2)}.parquet"
+ print(f"Loading {latest_file} file to the school dataset")
+ dbutils.fs.cp(f"{streaming_dir}/{latest_file}", f"{raw_dir}/{latest_file}")
+
+
+def load_new_data(all=False):
+ index = get_index(raw_dir)
+ if index >= 10:
+ print("No more data to load\n")
+
+ elif all == True:
+ while index <= 10:
+ load_file(index)
+ index += 1
+ else:
+ load_file(index)
+ index += 1
+
+# COMMAND ----------
+
+# DLT
+streaming_enrollments_dir = f"{dataset_school}/enrollments-json-streaming"
+streaming_courses_dir = f"{dataset_school}/courses-streaming"
+
+raw_enrollments_dir = f"{dataset_school}/enrollments-json-raw"
+raw_courses_dir = f"{dataset_school}/courses-cdc"
+
+def load_json_file(current_index):
+ latest_file = f"{str(current_index).zfill(2)}.json"
+ print(f"Loading {latest_file} enrollments file to the school dataset")
+ dbutils.fs.cp(f"{streaming_enrollments_dir}/{latest_file}", f"{raw_enrollments_dir}/{latest_file}")
+ #print(f"Loading {latest_file} courses file to the school dataset")
+ #dbutils.fs.cp(f"{streaming_courses_dir}/{latest_file}", f"{raw_courses_dir}/{latest_file}")
+
+
+def load_new_json_data(all=False):
+ index = get_index(raw_enrollments_dir)
+ if index >= 10:
+ print("No more data to load\n")
+
+ elif all == True:
+ while index <= 10:
+ load_json_file(index)
+ index += 1
+ else:
+ load_json_file(index)
+ index += 1
+
+# COMMAND ----------
+
+download_dataset(data_source_uri, dataset_school)
+set_current_schema(db_name)
diff --git a/Labs/Includes/images/bar_graph.png b/Labs/Includes/images/bar_graph.png
new file mode 100644
index 0000000..013c7c2
Binary files /dev/null and b/Labs/Includes/images/bar_graph.png differ
diff --git a/Labs/Includes/images/cluster_par1.png b/Labs/Includes/images/cluster_par1.png
new file mode 100644
index 0000000..801f94e
Binary files /dev/null and b/Labs/Includes/images/cluster_par1.png differ
diff --git a/Labs/Includes/images/cluster_par2.png b/Labs/Includes/images/cluster_par2.png
new file mode 100644
index 0000000..5a10f40
Binary files /dev/null and b/Labs/Includes/images/cluster_par2.png differ
diff --git a/Labs/Includes/images/cluster_par3.png b/Labs/Includes/images/cluster_par3.png
new file mode 100644
index 0000000..bf77236
Binary files /dev/null and b/Labs/Includes/images/cluster_par3.png differ
diff --git a/Labs/Includes/images/dashboard.png b/Labs/Includes/images/dashboard.png
new file mode 100644
index 0000000..e8a8d2c
Binary files /dev/null and b/Labs/Includes/images/dashboard.png differ
diff --git a/Labs/Includes/images/line_graph.png b/Labs/Includes/images/line_graph.png
new file mode 100644
index 0000000..901d4f9
Binary files /dev/null and b/Labs/Includes/images/line_graph.png differ
diff --git a/Labs/Includes/images/markdown.png b/Labs/Includes/images/markdown.png
new file mode 100644
index 0000000..0c93afa
Binary files /dev/null and b/Labs/Includes/images/markdown.png differ
diff --git a/Labs/Includes/images/school_job.png b/Labs/Includes/images/school_job.png
new file mode 100644
index 0000000..66179b2
Binary files /dev/null and b/Labs/Includes/images/school_job.png differ
diff --git a/Labs/Includes/images/school_schema.png b/Labs/Includes/images/school_schema.png
new file mode 100644
index 0000000..2945a2f
Binary files /dev/null and b/Labs/Includes/images/school_schema.png differ
diff --git a/Labs/Solutions/1- Databricks Lakehouse Platform/1.0L Solution - Creating Clusters.py b/Labs/Solutions/1- Databricks Lakehouse Platform/1.0L Solution - Creating Clusters.py
new file mode 100644
index 0000000..ebcb29f
--- /dev/null
+++ b/Labs/Solutions/1- Databricks Lakehouse Platform/1.0L Solution - Creating Clusters.py
@@ -0,0 +1,75 @@
+# Databricks notebook source
+# MAGIC %md
+# MAGIC
+# MAGIC ## Lab: Creating Clusters
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q1 - Creating a Demo Cluster
+# MAGIC
+# MAGIC Create a cluster with the following configurations:
+# MAGIC
+# MAGIC | Setting | Instructions |
+# MAGIC |--|--|
+# MAGIC |Cluster name|**Demo Cluster**|
+# MAGIC |Cluster mode|**Signle node**|
+# MAGIC |Runtime version|Select the Databricks runtime version 13.3 LTS|
+# MAGIC |Photon Acceleration| Uncheck the option |
+# MAGIC |Node type|4 cores|
+# MAGIC |Auto termination|30 minutes|
+# MAGIC
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC 1- Navigate to the **Compute** tab in the left side bar.
+# MAGIC
+# MAGIC 2- Under **All-purpose compute** tab, click **Create compute**.
+# MAGIC
+
+# COMMAND ----------
+
+# MAGIC %md-sandbox
+# MAGIC
+# MAGIC
+# MAGIC

+# MAGIC
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC 3- On top, click on the default name to change it. Name your cluster as **Demo Cluster**
+# MAGIC
+# MAGIC 4- Select **Single node** cluster
+# MAGIC
+
+# COMMAND ----------
+
+# MAGIC %md-sandbox
+# MAGIC
+# MAGIC
+# MAGIC

+# MAGIC
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC 5- Select the Databricks runtime version 13.3 LTS (Long Term Support)
+# MAGIC
+# MAGIC 6- Uncheck the option for the **Use Photon Acceleration**
+# MAGIC
+# MAGIC 7- Select a Node type of 4 cores
+# MAGIC
+# MAGIC 8- Set the auto termination of the cluster to 30 minutes
+# MAGIC
+# MAGIC 9- Lastly, click **Create compute**.
+# MAGIC
+
+# COMMAND ----------
+
+# MAGIC %md-sandbox
+# MAGIC
+# MAGIC
+# MAGIC

+# MAGIC
diff --git a/Labs/Solutions/1- Databricks Lakehouse Platform/1.1L Solution - Notebook Basics.py b/Labs/Solutions/1- Databricks Lakehouse Platform/1.1L Solution - Notebook Basics.py
new file mode 100644
index 0000000..82424c2
--- /dev/null
+++ b/Labs/Solutions/1- Databricks Lakehouse Platform/1.1L Solution - Notebook Basics.py
@@ -0,0 +1,112 @@
+# Databricks notebook source
+# MAGIC %md
+# MAGIC
+# MAGIC ## Lab Solution: Get started with Databricks Notebook
+# MAGIC
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q1 - Renaming the Notebook
+# MAGIC
+# MAGIC Change the name of the current notebook to "1.1L - My first lab"
+# MAGIC
+# MAGIC **Anwser:** to change the name of the notebook, click on the name at the top of this page, then make changes to the name
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q2 - Attaching a cluster
+# MAGIC
+# MAGIC Attach the cluster you created previously to this notebook
+# MAGIC
+# MAGIC **Anwser:** to attach a cluster to this notebook, click the dropdown near the top-right corner of this page. Select your cluster.
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q3 - Execute a Python code
+# MAGIC
+# MAGIC In the below cell, fill in the blank to print the result of adding the two variables x and y
+
+# COMMAND ----------
+
+x = 5
+y = 10
+result = x + y
+
+# Answer
+print(result)
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q4 - Execute a SQL cell
+# MAGIC
+# MAGIC Change the language in the below cell to execute the SQL statement
+
+# COMMAND ----------
+
+# MAGIC %sql
+# MAGIC SELECT 5 + 10
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q5 - Create a Markdown Cell
+# MAGIC
+# MAGIC 1. Insert a new cell below this one
+# MAGIC 1. In the new cell, add Markdown with a header and bullet points as shown in the following image
+# MAGIC
+# MAGIC
+# MAGIC

+# MAGIC
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC
+# MAGIC # Animals
+# MAGIC
+# MAGIC * Cats
+# MAGIC * Dogs
+# MAGIC * Birds
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q6 - Using %run command
+# MAGIC
+# MAGIC 1. Create a new Python notebook named **helper** in the current directory (i.e., in the **labs/1- Databricks Lakehouse Platform** folder)
+# MAGIC 1. In the **helper** notebook, declare a variable called **my_country** and assign it a value of your country name. For example:
+# MAGIC > my_country = "France"
+# MAGIC 1. In the following cell, execute a %run command to include the **helper** notebook into this current notebook
+# MAGIC > **Hint**: use a dot (**.**) to refer to the current directory
+
+# COMMAND ----------
+
+# MAGIC %run ./helper
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC Now, run the following cell to test that you are able to print the **my_country** variable
+
+# COMMAND ----------
+
+print(my_country)
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q7 - Functions definition
+# MAGIC 1. In the **helper** notebook, define a Python function named **addition()** that print the sum of two numbers
+# MAGIC 1. Execute again the above %run command to take the function definition into account
+# MAGIC 1. Run the below cell to call the function
+
+# COMMAND ----------
+
+num1 = 20
+num2 = 30
+
+addition(num1, num2)
diff --git a/Labs/Solutions/1- Databricks Lakehouse Platform/1.2L Solution - Delta Lake.sql b/Labs/Solutions/1- Databricks Lakehouse Platform/1.2L Solution - Delta Lake.sql
new file mode 100644
index 0000000..8278576
--- /dev/null
+++ b/Labs/Solutions/1- Databricks Lakehouse Platform/1.2L Solution - Delta Lake.sql
@@ -0,0 +1,100 @@
+-- Databricks notebook source
+-- MAGIC %md
+-- MAGIC
+-- MAGIC ## Lab Solution: Delta Lake
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Creating a Delta Table
+-- MAGIC
+-- MAGIC Run the cell below to create the **persons** Delta Table, and apply some operations on it.
+
+-- COMMAND ----------
+
+USE CATALOG hive_metastore;
+
+CREATE OR REPLACE TABLE persons
+ (id INT, name STRING, age INT);
+
+INSERT INTO persons
+VALUES
+ (1, "Tom", 18),
+ (2, "Kumar", 25);
+
+INSERT INTO persons
+VALUES
+ (3, "Ali", 50),
+ (4, "Sandra", 35);
+
+INSERT INTO persons
+VALUES
+ (5, "Eric", 28),
+ (6, "Salma", 42);
+
+UPDATE persons
+SET age = age + 10
+WHERE id = 1;
+
+DELETE FROM persons
+WHERE name = "Eric";
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q1 - Querying Delta Lake table
+-- MAGIC
+-- MAGIC Query the data in the **persons** table using **SELECT** statement
+
+-- COMMAND ----------
+
+-- Answer
+SELECT * FROM persons
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q2 - Checking table history
+-- MAGIC
+-- MAGIC Review the history of the table transactions using the **DESCRIBE HISTORY** command
+
+-- COMMAND ----------
+
+-- Answer
+DESCRIBE HISTORY persons
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q3- Checking table metadata
+-- MAGIC
+-- MAGIC Review the basic metadata information of the table using the **DESCRIBE DETAIL** command
+
+-- COMMAND ----------
+
+--Answer
+DESCRIBE DETAIL persons
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q4- Exploring table directory
+-- MAGIC
+-- MAGIC Explore the table directory using the **%fs** magic command.
+-- MAGIC
+-- MAGIC **Hint:** get the table location from the above metadata information
+
+-- COMMAND ----------
+
+-- MAGIC %fs ls 'dbfs:/user/hive/warehouse/persons'
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q5- Exploring the transactions log
+-- MAGIC
+-- MAGIC Explore the **_delta_log** subfolder in the table directory
+
+-- COMMAND ----------
+
+-- MAGIC %fs ls 'dbfs:/user/hive/warehouse/persons/_delta_log'
diff --git a/Labs/Solutions/1- Databricks Lakehouse Platform/1.3L Solution - Databases and Tables on Databricks.sql b/Labs/Solutions/1- Databricks Lakehouse Platform/1.3L Solution - Databases and Tables on Databricks.sql
new file mode 100644
index 0000000..5d69479
--- /dev/null
+++ b/Labs/Solutions/1- Databricks Lakehouse Platform/1.3L Solution - Databases and Tables on Databricks.sql
@@ -0,0 +1,199 @@
+-- Databricks notebook source
+-- MAGIC %md
+-- MAGIC ## Lab Solution: Databases and Tables on Databricks
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Setting the default catalog
+-- MAGIC
+-- MAGIC Run the cell below to set the current catalog to **hive_metastore**
+
+-- COMMAND ----------
+
+USE CATALOG hive_metastore
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q1 - Creating managed table
+-- MAGIC
+-- MAGIC In the default database, create a managed table named **movies_managed** that has the following schema:
+-- MAGIC
+-- MAGIC
+-- MAGIC | Column Name | Column Type |
+-- MAGIC | --- | --- |
+-- MAGIC | title | STRING |
+-- MAGIC | category | STRING |
+-- MAGIC | length | FLOAT |
+-- MAGIC | release_date | DATE |
+
+-- COMMAND ----------
+
+-- Answer
+CREATE TABLE movies_managed
+ (title STRING, category STRING, length INT, release_date DATE);
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC Review the extended metadata information of the table, and verify that:
+-- MAGIC 1. The table type is Managed
+-- MAGIC 1. The table is located under the default hive directory
+
+-- COMMAND ----------
+
+DESCRIBE EXTENDED movies_managed
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q2 - Creating external table
+-- MAGIC
+-- MAGIC In the default database, create an external Delta table named **actors_external**, and located under the directory:
+-- MAGIC **dbfs:/mnt/demo/actors_external**
+-- MAGIC
+-- MAGIC The schema for the table:
+-- MAGIC
+-- MAGIC | Column Name | Column Type |
+-- MAGIC | --- | --- |
+-- MAGIC | actor_id | INT |
+-- MAGIC | name | STRING |
+-- MAGIC | nationality | STRING |
+
+-- COMMAND ----------
+
+-- Answer
+CREATE OR REPLACE TABLE actors_external
+ (actor_id INT, name STRING, nationality STRING)
+LOCATION 'dbfs:/mnt/demo/actors_external';
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q4- Checking table metadata
+-- MAGIC
+-- MAGIC Review the extended metadata information of the table, and verify that:
+-- MAGIC 1. The table type is External
+-- MAGIC 1. The table is located under the directory: **dbfs:/mnt/demo/actors_external**
+
+-- COMMAND ----------
+
+DESCRIBE EXTENDED actors_external
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q3- Dropping manged table
+-- MAGIC
+-- MAGIC Drop the manged table **movies_managed**
+
+-- COMMAND ----------
+
+-- Answer
+DROP TABLE movies_managed
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC Check that the directory of the managed table has been deleted
+-- MAGIC
+
+-- COMMAND ----------
+
+-- MAGIC %fs ls 'dbfs:/user/hive/warehouse/movies_managed'
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q4- Drop external table
+-- MAGIC
+-- MAGIC Drop the external table **actors_external**
+
+-- COMMAND ----------
+
+DROP TABLE actors_external
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC Check that the directory of the external table has **Not** been deleted
+
+-- COMMAND ----------
+
+-- MAGIC %fs ls 'dbfs:/mnt/demo/actors_external'
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q5- Creating new schema
+-- MAGIC
+-- MAGIC Create a new schema named **db_cinema**
+
+-- COMMAND ----------
+
+-- Answer
+CREATE SCHEMA db_cinema
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC
+-- MAGIC Review the extended metadata information of the database, and verify that the database is located under the default hive directory.
+-- MAGIC
+-- MAGIC Note that the database folder has the extenstion **.db**
+
+-- COMMAND ----------
+
+DESCRIBE DATABASE EXTENDED db_cinema
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC
+-- MAGIC Use the new schema to create the below **movies** table
+
+-- COMMAND ----------
+
+-- Answer
+USE db_cinema;
+
+CREATE TABLE movies
+ (title STRING, category STRING, length INT, release_date DATE);
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q6- Creating new schema in custom location
+-- MAGIC
+-- MAGIC Create a new schema named **cinema_custom** in the directory: **dbfs:/Shared/schemas/cinema_custom.db**
+
+-- COMMAND ----------
+
+-- Answer
+CREATE SCHEMA cinema_custom
+LOCATION 'dbfs:/Shared/schemas/cinema_custom.db'
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC Use the new schema to create the below **movies** table
+
+-- COMMAND ----------
+
+USE cinema_custom;
+
+CREATE TABLE movies
+ (title STRING, category STRING, length INT, release_date DATE);
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC Finally, review the extended metadata information of the table **movies**, and verify that:
+-- MAGIC
+-- MAGIC 1. The table type is Managed
+-- MAGIC 1. The table is located in the new database defined under the custom location
+
+-- COMMAND ----------
+
+DESCRIBE EXTENDED movies
diff --git a/Labs/Solutions/1- Databricks Lakehouse Platform/helper.py b/Labs/Solutions/1- Databricks Lakehouse Platform/helper.py
new file mode 100644
index 0000000..00fb308
--- /dev/null
+++ b/Labs/Solutions/1- Databricks Lakehouse Platform/helper.py
@@ -0,0 +1,7 @@
+# Databricks notebook source
+my_country = "France"
+
+# COMMAND ----------
+
+def addition(a, b):
+ print(a + b)
diff --git a/Labs/Solutions/2- ELT with Spark SQL and Python/2.1L Solution - Querying Files.sql b/Labs/Solutions/2- ELT with Spark SQL and Python/2.1L Solution - Querying Files.sql
new file mode 100644
index 0000000..ce81033
--- /dev/null
+++ b/Labs/Solutions/2- ELT with Spark SQL and Python/2.1L Solution - Querying Files.sql
@@ -0,0 +1,134 @@
+-- Databricks notebook source
+-- MAGIC %md
+-- MAGIC
+-- MAGIC ## Lab Solution: Querying Files
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC Run the following cell to setup the lab environment
+
+-- COMMAND ----------
+
+-- MAGIC %run ../Includes/Setup-Lab
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q1- Extracting data directly from Parquet files
+-- MAGIC
+-- MAGIC Use a SELECT statement to directly query the content of the Parquet files in the directory **${dataset.school}/enrollments**
+
+-- COMMAND ----------
+
+-- ANSWER
+SELECT * FROM parquet.`${dataset.school}/enrollments`
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC Use the above SELECT query in a CTAS statement to create the table **enrollments**
+
+-- COMMAND ----------
+
+-- ANSWER
+CREATE TABLE enrollments AS
+SELECT * FROM parquet.`${dataset.school}/enrollments`
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC
+-- MAGIC Run the below cell to ensure data was written as expected in the **enrollments** table
+
+-- COMMAND ----------
+
+SELECT * FROM enrollments
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q2- Registering Tables from JSON Files
+-- MAGIC
+-- MAGIC Use CTAS statement to create the table **students** from the json files in the directory: **${dataset.school}/students-json**
+-- MAGIC
+
+-- COMMAND ----------
+
+-- ANSWER
+CREATE TABLE students AS
+SELECT * FROM json.`${dataset.school}/students-json`;
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC Run the below cell to ensure data was written as expected in the **students** table
+
+-- COMMAND ----------
+
+SELECT * FROM students
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q3- Registering Tables from CSV Files
+-- MAGIC
+-- MAGIC Create the temporary view **courses_tmp_vw** from the csv files in the directory: **${dataset.school}/courses-csv**
+-- MAGIC
+-- MAGIC Knowing that:
+-- MAGIC * The delimiter is semicolon (**;**)
+-- MAGIC * There is a header of column names in each file
+-- MAGIC
+-- MAGIC The schema for the view:
+-- MAGIC
+-- MAGIC | Column Name | Column Type |
+-- MAGIC | --- | --- |
+-- MAGIC | course_id | STRING |
+-- MAGIC | title | STRING |
+-- MAGIC | instructor | STRING |
+-- MAGIC | category | STRING |
+-- MAGIC | price | DOUBLE |
+
+-- COMMAND ----------
+
+-- ANSWER
+CREATE TEMP VIEW courses_tmp_vw
+ (course_id STRING, title STRING, instructor STRING, category STRING, price DOUBLE)
+USING CSV
+OPTIONS (
+ path = "${dataset.school}/courses-csv",
+ header = "true",
+ delimiter = ";"
+);
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC Create the manged table **courses** from the temporary view **courses_tmp_vw**
+
+-- COMMAND ----------
+
+-- ANSWER
+CREATE TABLE courses AS
+ SELECT * FROM courses_tmp_vw;
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC
+-- MAGIC Query the data in the **courses** table to ensure data was written as expected.
+
+-- COMMAND ----------
+
+-- ANSWER
+SELECT * FROM courses
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC
+-- MAGIC Finally, review the metadata information of the table **courses**, and verify that the table type is Managed
+
+-- COMMAND ----------
+
+DESCRIBE EXTENDED courses
diff --git a/Labs/Solutions/2- ELT with Spark SQL and Python/2.2L Solution - Advanced ETL.sql b/Labs/Solutions/2- ELT with Spark SQL and Python/2.2L Solution - Advanced ETL.sql
new file mode 100644
index 0000000..fa79f84
--- /dev/null
+++ b/Labs/Solutions/2- ELT with Spark SQL and Python/2.2L Solution - Advanced ETL.sql
@@ -0,0 +1,117 @@
+-- Databricks notebook source
+-- MAGIC %md
+-- MAGIC
+-- MAGIC ## Lab Solution: Advanced ETL
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC Run the following cell to setup the lab environment
+
+-- COMMAND ----------
+
+-- MAGIC %run ../Includes/Setup-Lab
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q1- Interacting with JSON data
+-- MAGIC
+-- MAGIC Review the nested data structures of the **profile** column in the **students** table created in the previous lab
+
+-- COMMAND ----------
+
+SELECT email, profile
+FROM students
+
+-- COMMAND ----------
+
+DESCRIBE students
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC
+-- MAGIC Use the appropriate syntax to access the **last_name** and **city** information from the **profile** column
+
+-- COMMAND ----------
+
+-- ANSWER
+SELECT email, profile:last_name AS student_surname, profile:address:city AS student_city
+FROM students
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q2- Higher Order Functions
+-- MAGIC
+-- MAGIC Review the array column **courses** in the **enrollments** table created in the previous lab
+
+-- COMMAND ----------
+
+SELECT enroll_id, courses
+FROM enrollments
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC Filter this array column to keep only course elements having subtotal greater than 40
+
+-- COMMAND ----------
+
+-- ANSWER
+SELECT
+ enroll_id,
+ courses,
+ FILTER (courses, i -> i.subtotal > 40) AS large_totals
+FROM enrollments
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q3- SQL UDFs
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC
+-- MAGIC Define a UDF function named **get_letter_grade** that takes one parameter named **gpa** of type DOUBLE. It returns the corresponding letter grade as indicated in the following table:
+-- MAGIC
+-- MAGIC
+-- MAGIC
+-- MAGIC | GPA (4.0 Scale) | Grade Letter |
+-- MAGIC |---------|-----------|
+-- MAGIC | 3.50 - 4.0 | A |
+-- MAGIC | 2.75 - 3.44 | B |
+-- MAGIC | 2.0 - 2.74 | C |
+-- MAGIC | 0.0 - 1.99 | F |
+
+-- COMMAND ----------
+
+-- ANSWER
+CREATE FUNCTION get_letter_grade(gpa DOUBLE)
+RETURNS STRING
+RETURN CASE
+ WHEN gpa >= 3.5 THEN "A"
+ WHEN gpa >= 2.75 AND gpa < 3.5 THEN "B"
+ WHEN gpa >= 2 AND gpa < 2.75 THEN "C"
+ ELSE "F"
+ END
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC
+-- MAGIC Let's apply the above UDF on the **students** table created in the previous lab
+-- MAGIC
+-- MAGIC Fill in the below query to call the defined UDF on the **gpa** column
+
+-- COMMAND ----------
+
+-- ANSWER
+SELECT student_id, gpa, get_letter_grade(gpa) as letter_grade
+FROM students
+
+-- COMMAND ----------
+
+
diff --git a/Labs/Solutions/3- Incremental Data Processing/3.1L Solution - Spark Structured Streaming.py b/Labs/Solutions/3- Incremental Data Processing/3.1L Solution - Spark Structured Streaming.py
new file mode 100644
index 0000000..ef51253
--- /dev/null
+++ b/Labs/Solutions/3- Incremental Data Processing/3.1L Solution - Spark Structured Streaming.py
@@ -0,0 +1,114 @@
+# Databricks notebook source
+# MAGIC %md
+# MAGIC
+# MAGIC ## Lab Solution: Spark Structured Streaming
+
+# COMMAND ----------
+
+# MAGIC %md-sandbox
+# MAGIC
+# MAGIC
+# MAGIC

+# MAGIC
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC Run the following cell to setup the lab environment
+
+# COMMAND ----------
+
+# MAGIC %run ../Includes/Setup-Lab
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q1- Auto Loader
+# MAGIC
+# MAGIC Use Auto Loader to incrementally load enrollments json files from the directory **{dataset_school}/enrollments-json-raw** into a streaming view called **`enrollments_tmp_vw`**
+# MAGIC
+
+# COMMAND ----------
+
+dataset_source = f"{dataset_school}/enrollments-json-raw"
+schema_location = "dbfs:/mnt/DE-Associate/checkpoints/school/enrollments_stats"
+
+# ANSWER
+(spark.readStream
+ .format("cloudFiles")
+ .option("cloudFiles.format", "json")
+ .option("cloudFiles.schemaLocation", schema_location)
+ .load(dataset_source)
+ .createOrReplaceTempView("enrollments_tmp_vw"))
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q2 - Calculating aggregations on streaming data
+# MAGIC
+# MAGIC Using CTAS syntax, define a new streaming view against **`enrollments_tmp_vw`** to count the number of enrollments per **`student_id`**. Name the aggregated field: **`enrollments_counts`**
+
+# COMMAND ----------
+
+# MAGIC %sql
+# MAGIC -- ANSWER
+# MAGIC CREATE OR REPLACE TEMPORARY VIEW enrollments_per_student_tmp_vw AS
+# MAGIC SELECT
+# MAGIC student_id, count(enroll_id) AS enrollments_count
+# MAGIC FROM enrollments_tmp_vw
+# MAGIC GROUP BY student_id
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q3 - Writing stream data
+# MAGIC
+# MAGIC Stream the aggregated data from the **`enrollments_per_student_tmp_vw`** view to a Delta table called **`enrollments_stats`**.
+
+# COMMAND ----------
+
+checkpoint_path = "dbfs:/mnt/DE-Associate/checkpoints/school/enrollments_stats"
+
+# ANSWER
+query = (spark.table("enrollments_per_student_tmp_vw")
+ .writeStream
+ .option("checkpointLocation", checkpoint_path)
+ .outputMode("complete")
+ .table("enrollments_stats")
+ )
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC Query the data in the **`enrollments_stats`** table to ensure data was written as expected.
+
+# COMMAND ----------
+
+# MAGIC %sql
+# MAGIC SELECT * FROM enrollments_stats
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC Run the below cell to land a new json file of enrollments data
+
+# COMMAND ----------
+
+load_new_json_data()
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC Verify that the statistics have been updated in the table **enrollments_stats**
+
+# COMMAND ----------
+
+# MAGIC %sql
+# MAGIC SELECT * FROM enrollments_stats
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q4 - Canceling streaming query
+# MAGIC
+# MAGIC Finally, cancel the above streaming query
diff --git a/Labs/Solutions/3- Incremental Data Processing/3.2L Solution - Multi-Hop Architecture.py b/Labs/Solutions/3- Incremental Data Processing/3.2L Solution - Multi-Hop Architecture.py
new file mode 100644
index 0000000..4d222c1
--- /dev/null
+++ b/Labs/Solutions/3- Incremental Data Processing/3.2L Solution - Multi-Hop Architecture.py
@@ -0,0 +1,188 @@
+# Databricks notebook source
+# MAGIC %md
+# MAGIC
+# MAGIC ## Lab Solution: Multi-Hop Architecture
+
+# COMMAND ----------
+
+# MAGIC %md-sandbox
+# MAGIC
+# MAGIC
+# MAGIC

+# MAGIC
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC Run the following cell to setup the lab environment
+
+# COMMAND ----------
+
+# MAGIC %run ../Includes/Setup-Lab
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q1- Declaring Bronze Table
+# MAGIC
+# MAGIC Use Auto Loader to incrementally load enrollments json files from the directory **{dataset_school}/enrollments-json-raw** to a Delta table called **`bronze`**
+# MAGIC
+
+# COMMAND ----------
+
+dataset_source = f"{dataset_school}/enrollments-json-raw"
+bronze_checkpoint_path = "dbfs:/mnt/DE-Associate/checkpoints/school/bronze"
+schema_location = bronze_checkpoint_path
+
+# ANSWER
+(spark.readStream
+ .format("cloudFiles")
+ .option("cloudFiles.format", "json")
+ .option("cloudFiles.schemaLocation", schema_location)
+ .load(dataset_source)
+ .writeStream
+ .option("checkpointLocation", bronze_checkpoint_path)
+ .outputMode("append")
+ .table("bronze")
+)
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC Let's create a streaming temporary view from the bronze table in order to perform transformations using SQL.
+
+# COMMAND ----------
+
+(spark
+ .readStream
+ .table("bronze")
+ .createOrReplaceTempView("bronze_tmp"))
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q2 - Data Cleansing & Enrichment
+# MAGIC
+# MAGIC Using CTAS syntax, define a new streaming view **`bronze_cleaned_tmp`** against **`bronze_tmp`** that does the following:
+# MAGIC * Remove records with **quantity** of 0 item
+# MAGIC * Add a column called **`processing_time`** containing the current timestamp using the **current_timestamp()** function
+
+# COMMAND ----------
+
+# MAGIC %sql
+# MAGIC -- ANSWER
+# MAGIC CREATE OR REPLACE TEMPORARY VIEW bronze_cleaned_tmp AS
+# MAGIC SELECT
+# MAGIC *, current_timestamp() processing_time
+# MAGIC FROM bronze_tmp
+# MAGIC WHERE quantity > 0
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q3 - Declaring Silver Table
+# MAGIC
+# MAGIC Stream the data from **`bronze_cleaned_tmp`** to a table called **`silver`**.
+
+# COMMAND ----------
+
+silver_checkpoint_path = "dbfs:/mnt/DE-Associate/checkpoints/school/silver"
+
+# ANSWER
+(spark.table("bronze_cleaned_tmp")
+ .writeStream
+ .option("checkpointLocation", silver_checkpoint_path)
+ .outputMode("append")
+ .table("silver")
+)
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC Let's create a streaming temporary view from the silver table in order to perform business-level aggregation using SQL
+
+# COMMAND ----------
+
+(spark
+ .readStream
+ .table("silver")
+ .createOrReplaceTempView("silver_tmp"))
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q4- Declaring Gold Table
+# MAGIC
+# MAGIC Using CTAS syntax, define a new streaming view **`enrollments_per_student_tmp_vw`** against **`silver_tmp`** to count the number of enrollments per **`student`**. Name the aggregated field: **`enrollments_count`**
+
+# COMMAND ----------
+
+# MAGIC %sql
+# MAGIC -- ANSWER
+# MAGIC CREATE OR REPLACE TEMPORARY VIEW enrollments_per_student_tmp_vw AS
+# MAGIC SELECT
+# MAGIC student_id, count(enroll_id) AS enrollments_count
+# MAGIC FROM silver_tmp
+# MAGIC GROUP BY student_id
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC
+# MAGIC Stream the aggregated data from the **`enrollments_per_student_tmp_vw`** view to a Delta table called **`gold_enrollments_stats`**.
+
+# COMMAND ----------
+
+gold_checkpoint_path = "dbfs:/mnt/DE-Associate/checkpoints/school/gold_enrollments_stats"
+
+# ANSWER
+query = (spark.table("enrollments_per_student_tmp_vw")
+ .writeStream
+ .option("checkpointLocation", gold_checkpoint_path)
+ .outputMode("complete")
+ .table("gold_enrollments_stats"))
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC Query the data in the **`gold_enrollments_stats`** table to ensure data was written as expected.
+
+# COMMAND ----------
+
+# MAGIC %sql
+# MAGIC SELECT * FROM gold_enrollments_stats
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC Run the below cell to land new a json file of enrollments data
+
+# COMMAND ----------
+
+load_new_json_data()
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC Wait for the new data to be propagated, and then run the below query to verify that the statistics have been updated in the table **gold_enrollments_stats**
+
+# COMMAND ----------
+
+# MAGIC %sql
+# MAGIC SELECT * FROM gold_enrollments_stats
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC Finally, run the below cell for canceling the above streaming queries
+
+# COMMAND ----------
+
+for s in spark.streams.active:
+ print("Stopping stream: " + s.id)
+ s.stop()
+ s.awaitTermination()
+
+# COMMAND ----------
+
+
diff --git a/Labs/Solutions/4- Production Pipelines/4.1L Solution - Delta Live Tables.sql b/Labs/Solutions/4- Production Pipelines/4.1L Solution - Delta Live Tables.sql
new file mode 100644
index 0000000..a7cd3e8
--- /dev/null
+++ b/Labs/Solutions/4- Production Pipelines/4.1L Solution - Delta Live Tables.sql
@@ -0,0 +1,130 @@
+-- Databricks notebook source
+-- MAGIC %md
+-- MAGIC
+-- MAGIC ## Lab Solution: implementing a DLT pipeline
+-- MAGIC
+-- MAGIC > This notebook is **not intended** to be executed interactively, but rather to be deployed as a DLT pipeline from the **workflows** tab
+-- MAGIC
+-- MAGIC
+-- MAGIC * Help: DLT syntax documentation.
+
+-- COMMAND ----------
+
+-- MAGIC %md-sandbox
+-- MAGIC
+-- MAGIC
+-- MAGIC

+-- MAGIC
+
+-- COMMAND ----------
+
+SET datasets.path=dbfs:/mnt/DE-Associate/datasets/school;
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q1- Declaring Bronze Tables
+-- MAGIC
+-- MAGIC Declare a streaming live table, **`enrollments_bronze`**, that ingests JSON data incrementally using Auto Loader from the directory **"${datasets.path}/enrollments-json-raw"**
+
+-- COMMAND ----------
+
+-- ANSWER
+CREATE OR REFRESH STREAMING LIVE TABLE enrollments_bronze
+AS SELECT * FROM cloud_files("${datasets.path}/enrollments-json-raw", "json",
+ map("cloudFiles.inferColumnTypes", "true"))
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC Declare a live table, **`students_bronze`**, that load data directly from JSON files in the directory **"${datasets.path}/students-json"**
+
+-- COMMAND ----------
+
+-- ANSWER
+CREATE OR REFRESH LIVE TABLE students_bronze
+AS SELECT * FROM json.`${datasets.path}/students-json`
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q2 - Declaring Silver Table
+-- MAGIC
+-- MAGIC Declare a streaming live table, **`enrollments_cleaned`**, that:
+-- MAGIC
+-- MAGIC 1. Enrich the **enrollments_bronze** data through an inner join with the **`students_bronze`** table on the common **`student_id`** field to obtain the student's country
+-- MAGIC 1. Implement quality control by applying a constraint to drop records with a null **`email`**
+-- MAGIC 1. The table will have the following schema:
+-- MAGIC
+-- MAGIC | Field | Type |
+-- MAGIC | --- | --- |
+-- MAGIC | **`enroll_id`** | **`STRING`** |
+-- MAGIC | **`total`** | **`DOUBLE`** |
+-- MAGIC | **`email`** | **`STRING`** |
+-- MAGIC | **`country`** | **`STRING`** |
+-- MAGIC
+
+-- COMMAND ----------
+
+-- ANSWER
+CREATE OR REFRESH STREAMING LIVE TABLE enrollments_cleaned (
+ CONSTRAINT valid_email EXPECT (email IS NOT NULL) ON VIOLATION DROP ROW
+)
+AS SELECT enroll_id, total, email, profile:address:country as country
+ FROM STREAM(LIVE.enrollments_bronze) n
+ LEFT JOIN LIVE.students_bronze s
+ ON n.student_id = s.student_id
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC ### Q3- Declaring Gold Table
+-- MAGIC
+-- MAGIC Declare a live table, **`course_sales_per_country`** against **`enrollments_cleaned`** that calculate per **`country`** the following:
+-- MAGIC * **`enrollments_count`**: the number of enrollments
+-- MAGIC * **`enrollments_amount`**: the sum of the total amount of enrollments
+-- MAGIC
+-- MAGIC Add a comment to the table: "Course Sales Per Country"
+
+-- COMMAND ----------
+
+-- ANSWER
+CREATE OR REFRESH LIVE TABLE course_sales_per_country
+COMMENT "Course Sales Per Country"
+AS
+ SELECT country, count(enroll_id) AS enrollments_count, sum(total) AS enrollments_amount
+ FROM LIVE.enrollments_cleaned
+ GROUP BY country
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC ### Q4- Deploying DLT pipeline
+-- MAGIC
+-- MAGIC From the **Workflows** button on the sidebar, under the **Delta Live Tables** tab, click **Create Pipeline**
+-- MAGIC
+-- MAGIC Configure the pipeline settings specified below:
+-- MAGIC
+-- MAGIC | Setting | Instructions |
+-- MAGIC |--|--|
+-- MAGIC | Pipeline name | School DLT |
+-- MAGIC | Product edition | Choose **Advanced** |
+-- MAGIC | Pipeline mode | Choose **Triggered** |
+-- MAGIC | Source code | Use the navigator to select this current notebook (4.1L - Delta Live Tables) |
+-- MAGIC | Storage location | dbfs:/mnt/DE-Associate/dlt/school |
+-- MAGIC | Target schema | DE_Associate_School_DLT |
+-- MAGIC | Cluster policy | Leave it **None**|
+-- MAGIC | Cluster mode | Choose **Fixed size**|
+-- MAGIC | Workers | Enter **0**|
+-- MAGIC | Photon Acceleration | Leave it unchecked |
+-- MAGIC | Advanced Configuration | Click **Add Configuration** and enter:
- Key: **datasets.path**
- Value: **dbfs:/mnt/DE-Associate/datasets/school** |
+-- MAGIC | Channel | Choose **Current**|
+-- MAGIC
+-- MAGIC Finally, click **Create**.
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC ### Q5 - Run your Pipeline
+-- MAGIC
+-- MAGIC Select **Development** mode and Click **Start** to begin the update to your pipeline's tables
diff --git a/Labs/Solutions/4- Production Pipelines/4.2L Solution - Jobs - Land New Data.py b/Labs/Solutions/4- Production Pipelines/4.2L Solution - Jobs - Land New Data.py
new file mode 100644
index 0000000..4a3af21
--- /dev/null
+++ b/Labs/Solutions/4- Production Pipelines/4.2L Solution - Jobs - Land New Data.py
@@ -0,0 +1,91 @@
+# Databricks notebook source
+# MAGIC %md
+# MAGIC
+# MAGIC ## Lab Solution: Creating a multi-task job
+# MAGIC
+# MAGIC In this lab, we will create a job that has 2 tasks:
+# MAGIC 1. The current notebook that lands a new batch of data in the lab dataset directory
+# MAGIC 1. The Delta Live Table pipeline created in the previous lab to processes this data
+# MAGIC
+# MAGIC * Help: Databricks Jobs documentation.
+
+# COMMAND ----------
+
+# MAGIC %md-sandbox
+# MAGIC
+# MAGIC
+# MAGIC

+# MAGIC
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q1- Configuring Task 1 - Land New Data
+# MAGIC
+# MAGIC
+# MAGIC From the **Workflows** button on the sidebar, under the **Jobs** tab, click the **Create Job** button.
+# MAGIC
+# MAGIC
+# MAGIC
+# MAGIC 1. Set the job name in the top-left of the screen to **School Job**
+# MAGIC 1. Configure the first task as specified below:
+# MAGIC | Setting | Value |
+# MAGIC |--|--|
+# MAGIC | Task name | Enter **Land New Data** |
+# MAGIC | Type | Choose **Notebook** |
+# MAGIC | Source | Choose **Workspace** |
+# MAGIC | Path | Use the navigator to choose the current notebook (4.2L - Jobs - Land New Data) |
+# MAGIC | Cluster | Select your cluster from the dropdown, under **Existing All Purpose Clusters** |
+# MAGIC
+# MAGIC
+# MAGIC
+# MAGIC
+# MAGIC 3. Click the **Create** button
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q2- Configuring Task 2 - DLT pipeline
+# MAGIC
+# MAGIC
+# MAGIC
+# MAGIC 1. Click the add button (**+**) to add a new **Delta Live Tables pipeline** task
+# MAGIC 1. Configure the task:
+# MAGIC
+# MAGIC | Setting | Value |
+# MAGIC |--|--|
+# MAGIC | Task name | Enter **DLT pipeline** |
+# MAGIC | Type | Choose **Delta Live Tables pipeline** |
+# MAGIC | Pipeline | Choose the DLT pipeline created in the previous lab |
+# MAGIC | Depends on | Choose **Land New Data**, which is the previous task we defined above |
+# MAGIC
+# MAGIC
+# MAGIC
+# MAGIC 3. Click the **Create task** button
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q3- Run the job
+# MAGIC
+# MAGIC Click the **Run now** button in the top right to run this job. From the **Runs** tab, check your job run
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC #### Q4- Review the finished job
+# MAGIC
+# MAGIC Once all tasks completed successfully, review the contents of each task to verify its result
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC > **Note**: The below cells are to be run as part of the **Task 1** to land new batch of data in the dataset directory
+
+# COMMAND ----------
+
+# MAGIC %run ../Includes/Setup-Lab
+
+# COMMAND ----------
+
+load_new_json_data()
diff --git a/Labs/Solutions/4- Production Pipelines/4.3L Solution - Databricks SQL.sql b/Labs/Solutions/4- Production Pipelines/4.3L Solution - Databricks SQL.sql
new file mode 100644
index 0000000..c5b32dd
--- /dev/null
+++ b/Labs/Solutions/4- Production Pipelines/4.3L Solution - Databricks SQL.sql
@@ -0,0 +1,121 @@
+-- Databricks notebook source
+-- MAGIC %md
+-- MAGIC
+-- MAGIC ## Lab Solution: Design a Dashboard with DBSQL
+-- MAGIC
+-- MAGIC In this lab, we will design a dashboard in DBSQL that has 2 graphs:
+-- MAGIC 1. Bar graph that shows the number of students per country
+-- MAGIC 1. Line graph that shows the daily enrollments amount
+-- MAGIC
+-- MAGIC * Help: Databricks SQL documentation.
+
+-- COMMAND ----------
+
+-- MAGIC %md-sandbox
+-- MAGIC
+-- MAGIC
+-- MAGIC

+-- MAGIC
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q1 - Working with queries in SQL Editor
+-- MAGIC
+-- MAGIC Run the the below query in **SQL Editor** in Databricks SQL, and then save it with the name **Student Counts**
+
+-- COMMAND ----------
+
+SELECT profile:address:country as country, count(student_id) AS students_count
+FROM hive_metastore.de_associate_school.students
+GROUP BY profile:address:country
+ORDER BY students_count DESC
+LIMIT 10
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q2 - Creating a Bar Graph Visualization
+-- MAGIC
+-- MAGIC Create a bar graph that shows the number of students per country
+-- MAGIC
+-- MAGIC
+-- MAGIC
+-- MAGIC
+-- MAGIC ##### Anwser:
+-- MAGIC Steps:
+-- MAGIC 1. Click the Add butoon (**+**) next to the results tab, and select **Visualization** from the dialog box
+-- MAGIC 1. Select **`Bar`** for the **Visualization Type**
+-- MAGIC 1. Set **`country`** for the **X Column**
+-- MAGIC 1. Under **Y columns** click **Add column**, and set it to **`students_count`**
+-- MAGIC 1. Click **Save**
+-- MAGIC 1. Finally, set the title of the graph to **Student Counts Viz**
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q3 - Creating a New Dashboard
+-- MAGIC
+-- MAGIC Add the above graph to a new dashboard named **Students Statistics**
+-- MAGIC
+-- MAGIC ##### Anwser:
+-- MAGIC
+-- MAGIC Steps:
+-- MAGIC 1. Click the three vertical dots button at the top of the graph and select **Add to Dashboard**.
+-- MAGIC 1. Click the **Create new dashboard** option
+-- MAGIC 1. Name your dashboard **Students Statistics**
+-- MAGIC 1. Click **Save**
+-- MAGIC 1. With the new dashboard selected as the target, click **OK** to add your visualization
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q4 - Creating a Line Plot Visualization
+-- MAGIC
+-- MAGIC 1. Run the the below query in a new query tab in the **SQL Editor**, and then save it with the name **Daily Sales**
+
+-- COMMAND ----------
+
+SELECT cast(from_unixtime(enroll_timestamp, 'yyyy-MM-dd HH:mm:ss') AS date) enroll_timestamp,
+ sum(total) AS enrollments_amount
+FROM hive_metastore.de_associate_school.enrollments n
+INNER JOIN hive_metastore.de_associate_school.students s ON s.student_id = n.student_id
+GROUP BY enroll_timestamp
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC
+-- MAGIC 2. Create a Line Plot Visualization that shows the daily enrollments amount
+-- MAGIC
+-- MAGIC
+-- MAGIC
+-- MAGIC
+-- MAGIC
+-- MAGIC
+-- MAGIC ##### Anwser:
+-- MAGIC
+-- MAGIC Steps:
+-- MAGIC 1. Click the **Add Visualization** button
+-- MAGIC 1. Select **`Line`** for the **Visualization Type**
+-- MAGIC 1. Set **`enroll_timestamp`** for the **X Column**
+-- MAGIC 1. Under **Y columns** click **Add column**, and set it to **`enrollments_amount`**
+-- MAGIC 1. Click **Save**
+-- MAGIC 1. Finally, set the title of the graph to **Daily Sales Viz**
+-- MAGIC 1. Click the three vertical dots button at the top of the graph and select **Add to Dashboard**.
+-- MAGIC 1. Select the dashboard **Students Statistics** created above
+-- MAGIC 1. Click **OK** to add your visualization
+
+-- COMMAND ----------
+
+-- MAGIC %md
+-- MAGIC #### Q5 - Review your Dashboard
+-- MAGIC
+-- MAGIC Open your Dashboard and refresh its underlaying data
+-- MAGIC
+-- MAGIC ##### Anwser:
+-- MAGIC
+-- MAGIC Steps:
+-- MAGIC 1. Click on the **Dashboards** button on left side bar
+-- MAGIC 1. Find the dashboard **Students Statistics** created earlier. Click to open it
+-- MAGIC 1. Click the **Refresh** button to update your dashboard
diff --git a/Labs/Solutions/Includes/Setup-Lab.py b/Labs/Solutions/Includes/Setup-Lab.py
new file mode 100644
index 0000000..ac61e86
--- /dev/null
+++ b/Labs/Solutions/Includes/Setup-Lab.py
@@ -0,0 +1,137 @@
+# Databricks notebook source
+data_source_uri = "s3://dalhussein-courses/datasets/school/v1/"
+dataset_school = 'dbfs:/mnt/DE-Associate/datasets/school'
+checkpoint_path = 'dbfs:/mnt/DE-Associate/checkpoints/school'
+dlt_path = 'dbfs:/mnt/DE-Associate/dlt/school'
+db_name = 'DE_Associate_School'
+dlt_db_name = 'DE_Associate_School_DLT'
+spark.conf.set(f"dataset.school", dataset_school)
+
+# COMMAND ----------
+
+def clean_up():
+ print("Removing Checkpoints ...")
+ dbutils.fs.rm(checkpoint_path, True)
+ print("Removing DLT storage location ...")
+ dbutils.fs.rm(dlt_path, True)
+ print("Dropping Database ...")
+ spark.sql(f"DROP SCHEMA IF EXISTS {db_name} CASCADE")
+ print("Dropping DLT database ...")
+ spark.sql(f"DROP SCHEMA IF EXISTS {dlt_db_name} CASCADE")
+ print("Removing Dataset ...")
+ dbutils.fs.rm(dataset_school, True)
+ print("Done")
+
+# COMMAND ----------
+
+try:
+ clean = int(dbutils.widgets.get("clean"))
+except:
+ clean = 0
+
+if clean:
+ clean_up()
+
+# COMMAND ----------
+
+def path_exists(path):
+ try:
+ dbutils.fs.ls(path)
+ return True
+ except Exception as e:
+ msg = str(e)
+ if ("com.databricks.sql.io.CloudFileNotFoundException" in msg
+ or "java.io.FileNotFoundException" in msg):
+ return False
+ else:
+ raise
+
+# COMMAND ----------
+
+def download_dataset(source, target):
+ files = dbutils.fs.ls(source)
+
+ for f in files:
+ source_path = f"{source}/{f.name}"
+ target_path = f"{target}/{f.name}"
+ if not path_exists(target_path):
+ print(f"Copying {f.name} ...")
+ dbutils.fs.cp(source_path, target_path, True)
+
+# COMMAND ----------
+
+def get_index(dir):
+ files = dbutils.fs.ls(dir)
+ index = 0
+ if files:
+ file = max(files).name
+ index = int(file.rsplit('.', maxsplit=1)[0])
+ return index+1
+
+# COMMAND ----------
+
+def set_current_schema(schema_name, catalog_name='hive_metastore'):
+ spark.sql(f"USE CATALOG {catalog_name}")
+ spark.sql(f"CREATE SCHEMA IF NOT EXISTS {schema_name}")
+ spark.sql(f"USE {schema_name}")
+ print(f"Schema for the hands-on labs: {catalog_name}.{schema_name}")
+
+# COMMAND ----------
+
+# Structured Streaming
+streaming_dir = f"{dataset_school}/enrollments-streaming"
+raw_dir = f"{dataset_school}/enrollments-raw"
+
+def load_file(current_index):
+ latest_file = f"{str(current_index).zfill(2)}.parquet"
+ print(f"Loading {latest_file} file to the school dataset")
+ dbutils.fs.cp(f"{streaming_dir}/{latest_file}", f"{raw_dir}/{latest_file}")
+
+
+def load_new_data(all=False):
+ index = get_index(raw_dir)
+ if index >= 10:
+ print("No more data to load\n")
+
+ elif all == True:
+ while index <= 10:
+ load_file(index)
+ index += 1
+ else:
+ load_file(index)
+ index += 1
+
+# COMMAND ----------
+
+# DLT
+streaming_enrollments_dir = f"{dataset_school}/enrollments-json-streaming"
+streaming_courses_dir = f"{dataset_school}/courses-streaming"
+
+raw_enrollments_dir = f"{dataset_school}/enrollments-json-raw"
+raw_courses_dir = f"{dataset_school}/courses-cdc"
+
+def load_json_file(current_index):
+ latest_file = f"{str(current_index).zfill(2)}.json"
+ print(f"Loading {latest_file} enrollments file to the school dataset")
+ dbutils.fs.cp(f"{streaming_enrollments_dir}/{latest_file}", f"{raw_enrollments_dir}/{latest_file}")
+ #print(f"Loading {latest_file} courses file to the school dataset")
+ #dbutils.fs.cp(f"{streaming_courses_dir}/{latest_file}", f"{raw_courses_dir}/{latest_file}")
+
+
+def load_new_json_data(all=False):
+ index = get_index(raw_enrollments_dir)
+ if index >= 10:
+ print("No more data to load\n")
+
+ elif all == True:
+ while index <= 10:
+ load_json_file(index)
+ index += 1
+ else:
+ load_json_file(index)
+ index += 1
+
+# COMMAND ----------
+
+download_dataset(data_source_uri, dataset_school)
+set_current_schema(db_name)
diff --git a/README.md b/README.md
index 07e748f..c81d2d5 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,16 @@
# Databricks Certified Data Engineer Associate
+
This repository contains the resources of the preparation course for Databricks Data Engineer Associate certification exam on Udemy:
+
+https://www.udemy.com/course/databricks-certified-data-engineer-associate/?referralCode=F0FA48E9A0546C975F14.
+
+
-https://www.udemy.com/course/databricks-certified-data-engineer-associate/?referralCode=F0FA48E9A0546C975F14.
+## Practice Exams
-To import these resources into your Databricks workspace, clone this repository via Databricks Repos.
+
+Practice exams for this certification are available in the following Udemy course:
+
+https://www.udemy.com/course/practice-exams-databricks-certified-data-engineer-associate/?referralCode=9AA679C03D1F51B2C956.