Skip to content
Open

Dev #58

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
f66bdda
Merge pull request #1 from derar-alhussein/dev
derar-alhussein Nov 15, 2022
5607297
Rename notebook
Mar 5, 2023
4eef1e4
Move transformations code
Jul 16, 2023
cc0b614
Add images folder
Sep 11, 2023
346f0df
Update images links
Sep 11, 2023
cd3ec26
Insert employees in multiple transactions
Sep 16, 2023
f9981ab
Update the number of transaction log file
Sep 16, 2023
b0947c0
Update table version numbers
Sep 16, 2023
27c53ba
Add comments
Sep 17, 2023
425bd51
Add labs
Sep 19, 2023
c9d643b
Add comments
Sep 26, 2023
edeeaea
Update comments
Sep 26, 2023
1b7f32a
Use two-level parameter for DLT pipelines
Dec 4, 2023
4a986d4
Fix table path
Dec 28, 2023
89a7145
Add SQL queries of Managing Permissions hands-on
Mar 10, 2024
bb88c92
Update lab datasets path
Mar 10, 2024
967b9ca
Fix typo
Apr 10, 2024
105e69d
Add use catalog commands
Jul 14, 2024
a206f12
Use three-level namespace
Jul 14, 2024
bfe5559
Add CDC select query
Jul 14, 2024
115e5e3
Merge pull request #24 from derar-alhussein/use_catalog_hive_metastore
derar-alhussein Jul 14, 2024
30d6753
Use catalog hive_metastore
Jul 15, 2024
5f532c6
Merge pull request #25 from derar-alhussein/update_lab
derar-alhussein Jul 15, 2024
f73ef31
Define function to set catalog
Jul 15, 2024
8cf8ae8
Add instructions for creating clusters
Jul 15, 2024
7cd1884
Specify the runtime version
Jul 15, 2024
156b181
Update source
Nov 24, 2024
fa15f8b
Update lab source
Dec 4, 2024
482d8a3
Update lab source
Dec 4, 2024
993cd7d
Update README.md
derar-alhussein Feb 17, 2025
ea79933
Configure S3 for anonymous access
Apr 7, 2025
3fcb0a2
Remove comments
derar-alhussein Dec 4, 2025
b1a2217
Update exception handling
derar-alhussein Dec 12, 2025
c3a519a
Add note about Unity Catalog
derar-alhussein Dec 26, 2025
376a1ed
updates
sivakishnapattem Feb 6, 2026
04ebbfd
Merge branch 'derar-alhussein:dev' into dev
sivakishnapattem Feb 6, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 75 additions & 0 deletions 1- Databricks Lakehouse Platform/1.0 - Creating Clusters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Databricks notebook source
# MAGIC %md
# MAGIC
# MAGIC ## Creating Clusters

# COMMAND ----------

# MAGIC %md
# MAGIC #### Creating a Demo Cluster
# MAGIC
# MAGIC Create a cluster with the following configurations:
# MAGIC
# MAGIC | Setting | Instructions |
# MAGIC |--|--|
# MAGIC |Cluster name|**Demo Cluster**|
# MAGIC |Cluster mode|**Signle node**|
# MAGIC |Runtime version|Select the Databricks runtime version 13.3 LTS|
# MAGIC |Photon Acceleration| Uncheck the option |
# MAGIC |Node type|4 cores|
# MAGIC |Auto termination|30 minutes|
# MAGIC

# COMMAND ----------

# MAGIC %md
# MAGIC 1- Navigate to the **Compute** tab in the left side bar.
# MAGIC
# MAGIC 2- Under **All-purpose compute** tab, click **Create compute**.
# MAGIC

# COMMAND ----------

# MAGIC %md-sandbox
# MAGIC
# MAGIC <div style="text-align: center; line-height: 0; padding-top: 9px;">
# MAGIC <img src="https://raw.githubusercontent.com/derar-alhussein/Databricks-Certified-Data-Engineer-Associate/main/Labs/Includes/images/cluster_par1.png">
# MAGIC </div>

# COMMAND ----------

# MAGIC %md
# MAGIC 3- On top, click on the default name to change it. Name your cluster as **Demo Cluster**
# MAGIC
# MAGIC 4- Select **Single node** cluster
# MAGIC

# COMMAND ----------

# MAGIC %md-sandbox
# MAGIC
# MAGIC <div style="text-align: center; line-height: 0; padding-top: 9px;">
# MAGIC <img src="https://raw.githubusercontent.com/derar-alhussein/Databricks-Certified-Data-Engineer-Associate/main/Labs/Includes/images/cluster_par2.png">
# MAGIC </div>

# COMMAND ----------

# MAGIC %md
# MAGIC 5- Select the Databricks runtime version 13.3 LTS (Long Term Support)
# MAGIC
# MAGIC 6- Uncheck the option for the **Use Photon Acceleration**
# MAGIC
# MAGIC 7- Select a Node type of 4 cores
# MAGIC
# MAGIC 8- Set the auto termination of the cluster to 30 minutes
# MAGIC
# MAGIC 9- Lastly, click **Create compute**.
# MAGIC

# COMMAND ----------

# MAGIC %md-sandbox
# MAGIC
# MAGIC <div style="text-align: center; line-height: 0; padding-top: 9px;">
# MAGIC <img src="https://raw.githubusercontent.com/derar-alhussein/Databricks-Certified-Data-Engineer-Associate/main/Labs/Includes/images/cluster_par3.png">
# MAGIC </div>
23 changes: 14 additions & 9 deletions 1- Databricks Lakehouse Platform/1.1 - Notebook Basics.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,31 +12,31 @@
# MAGIC # Title 1
# MAGIC ## Title 2
# MAGIC ### Title 3
# MAGIC
# MAGIC
# MAGIC text with a **bold** and *italicized* in it.
# MAGIC
# MAGIC
# MAGIC Ordered list
# MAGIC 1. first
# MAGIC 1. second
# MAGIC 1. third
# MAGIC
# MAGIC
# MAGIC Unordered list
# MAGIC * coffee
# MAGIC * tea
# MAGIC * tea
# MAGIC
# MAGIC
# MAGIC * milk
# MAGIC
# MAGIC
# MAGIC Images:
# MAGIC ![Associate-badge](https://www.databricks.com/wp-content/uploads/2022/04/associate-badge-eng.svg)
# MAGIC
# MAGIC
# MAGIC And of course, tables:
# MAGIC
# MAGIC
# MAGIC | user_id | user_name |
# MAGIC |---------|-----------|
# MAGIC | 1 | Adam |
# MAGIC | 2 | Sarah |
# MAGIC | 3 | John |
# MAGIC
# MAGIC
# MAGIC Links (or Embedded HTML): <a href="https://docs.databricks.com/notebooks/notebooks-manage.html" target="_blank"> Managing Notebooks documentation</a>

# COMMAND ----------
Expand Down Expand Up @@ -70,4 +70,9 @@

# COMMAND ----------

print("End of the Notebook")

# COMMAND ----------

# MAGIC %md
# MAGIC ## End of Notebook
Original file line number Diff line number Diff line change
@@ -1,16 +1,52 @@
-- Databricks notebook source
-- MAGIC %md
-- MAGIC ## Creating Delta Lake Tables

-- COMMAND ----------

-- MAGIC %md
-- MAGIC **Note:** If your workspace does not support the `hive_metastore` catalog, switch to the **unity-catalog** branch in this Git Folder.

-- COMMAND ----------

USE CATALOG hive_metastore

-- COMMAND ----------

CREATE TABLE employees
(id INT, name STRING, salary DOUBLE);

-- COMMAND ----------

-- MAGIC %md
-- MAGIC
-- MAGIC ## Catalog Explorer
-- MAGIC
-- MAGIC Check the created **employees** table in the **Catalog** explorer.

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ## Inserting Data

-- COMMAND ----------

INSERT INTO employees
VALUES
(1, "Adam", 3500.0),
(2, "Sarah", 4020.5),
(2, "Sarah", 4020.5);

INSERT INTO employees
VALUES
(3, "John", 2999.3),
(4, "Thomas", 4000.3),
(5, "Anna", 2500.0),
(4, "Thomas", 4000.3);

INSERT INTO employees
VALUES
(5, "Anna", 2500.0);

INSERT INTO employees
VALUES
(6, "Kim", 6200.3)

-- COMMAND ----------
Expand All @@ -19,14 +55,29 @@ SELECT * FROM employees

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ## Exploring Table Metadata

-- COMMAND ----------

DESCRIBE DETAIL employees

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ## Exploring Table Directory

-- COMMAND ----------

-- MAGIC %fs ls 'dbfs:/user/hive/warehouse/employees'

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ## Updating Table

-- COMMAND ----------

UPDATE employees
SET salary = salary + 100
WHERE name LIKE "A%"
Expand All @@ -49,6 +100,11 @@ SELECT * FROM employees

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ## Exploring Table History

-- COMMAND ----------

DESCRIBE HISTORY employees

-- COMMAND ----------
Expand All @@ -57,7 +113,7 @@ DESCRIBE HISTORY employees

-- COMMAND ----------

-- MAGIC %fs head 'dbfs:/user/hive/warehouse/employees/_delta_log/00000000000000000002.json'
-- MAGIC %fs head 'dbfs:/user/hive/warehouse/employees/_delta_log/00000000000000000005.json'

-- COMMAND ----------

Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,24 @@
-- Databricks notebook source
-- MAGIC %md
-- MAGIC
-- MAGIC ## Delta Time Travel

-- COMMAND ----------

USE CATALOG hive_metastore

-- COMMAND ----------

DESCRIBE HISTORY employees

-- COMMAND ----------

SELECT *
FROM employees VERSION AS OF 1
FROM employees VERSION AS OF 4

-- COMMAND ----------

SELECT * FROM employees@v1
SELECT * FROM employees@v4

-- COMMAND ----------

Expand All @@ -20,7 +30,7 @@ SELECT * FROM employees

-- COMMAND ----------

RESTORE TABLE employees TO VERSION AS OF 2
RESTORE TABLE employees TO VERSION AS OF 5

-- COMMAND ----------

Expand All @@ -32,6 +42,12 @@ DESCRIBE HISTORY employees

-- COMMAND ----------

-- MAGIC %md
-- MAGIC
-- MAGIC ## OPTIMIZE Command

-- COMMAND ----------

DESCRIBE DETAIL employees

-- COMMAND ----------
Expand All @@ -53,6 +69,12 @@ DESCRIBE HISTORY employees

-- COMMAND ----------

-- MAGIC %md
-- MAGIC
-- MAGIC ## VACUUM Command

-- COMMAND ----------

VACUUM employees

-- COMMAND ----------
Expand Down Expand Up @@ -81,16 +103,18 @@ SELECT * FROM employees@v1

-- COMMAND ----------

DROP TABLE employees
-- MAGIC %md
-- MAGIC
-- MAGIC ## Dropping Tables

-- COMMAND ----------

SELECT * FROM employees
DROP TABLE employees

-- COMMAND ----------

-- MAGIC %fs ls 'dbfs:/user/hive/warehouse/employees'
SELECT * FROM employees

-- COMMAND ----------


-- MAGIC %fs ls 'dbfs:/user/hive/warehouse/employees'
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
-- Databricks notebook source
-- MAGIC %md
-- MAGIC ## Managed Tables

-- COMMAND ----------

USE CATALOG hive_metastore;

CREATE TABLE managed_default
(width INT, length INT, height INT);

Expand All @@ -11,6 +18,12 @@ DESCRIBE EXTENDED managed_default

-- COMMAND ----------

-- MAGIC %md
-- MAGIC
-- MAGIC ## External Tables

-- COMMAND ----------

CREATE TABLE external_default
(width INT, length INT, height INT)
LOCATION 'dbfs:/mnt/demo/external_default';
Expand All @@ -24,6 +37,12 @@ DESCRIBE EXTENDED external_default

-- COMMAND ----------

-- MAGIC %md
-- MAGIC
-- MAGIC ## Dropping Tables

-- COMMAND ----------

DROP TABLE managed_default

-- COMMAND ----------
Expand All @@ -40,6 +59,11 @@ DROP TABLE external_default

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ## Creating Schemas

-- COMMAND ----------

CREATE SCHEMA new_default

-- COMMAND ----------
Expand Down Expand Up @@ -80,14 +104,19 @@ DROP TABLE external_new_default;

-- COMMAND ----------

-- MAGIC %fs ls 'dbfs:/user/hive/warehouse/managed_new_default'
-- MAGIC %fs ls 'dbfs:/user/hive/warehouse/new_default.db/managed_new_default'

-- COMMAND ----------

-- MAGIC %fs ls 'dbfs:/mnt/demo/external_new_default'

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ## Creating Schemas in Custom Location

-- COMMAND ----------

CREATE SCHEMA custom
LOCATION 'dbfs:/Shared/schemas/custom.db'

Expand Down Expand Up @@ -134,7 +163,3 @@ DROP TABLE external_custom;
-- COMMAND ----------

-- MAGIC %fs ls 'dbfs:/mnt/demo/external_custom'

-- COMMAND ----------


Loading