Skip to content

Commit 07b153a

Browse files
committed
fix: update SparkSession retrieval method in Databricks adapter
### Description This commit modifies the `DatabricksSessionHandle` and `SessionPythonJobHelper` classes to ensure the retrieval of the SparkSession uses the shared SparkContext instead of creating a new one. This change enhances compatibility with Databricks environments and improves error handling during SparkSession initialization.
1 parent 2dfac34 commit 07b153a

3 files changed

Lines changed: 20 additions & 17 deletions

File tree

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
version = "1.10.15d"
1+
version = "1.10.15-2"

dbt/adapters/databricks/python_models/python_submissions.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -846,25 +846,26 @@ def __init__(self, parsed_model: dict,
846846
f"[Session Python] Could not get SparkSession from __main__.spark: {e}"
847847
)
848848

849-
# Method 4: Create a new SparkSession on Databricks
850-
# If no existing session found, create one using local[*] master
851-
# This works on Databricks because the Spark runtime is already configured
849+
# Method 4: Get the shared SparkContext on Databricks and create SparkSession from it
850+
# On Databricks, we must use the shared SparkContext - we cannot create a new one
852851
if spark is None:
853852
databricks_runtime = os.getenv("DATABRICKS_RUNTIME_VERSION")
854853
if databricks_runtime:
855854
logger.debug(
856855
f"[Session Python] No existing SparkSession found. "
857-
f"Creating new SparkSession on Databricks runtime {databricks_runtime}"
856+
f"Getting shared SparkContext on Databricks runtime {databricks_runtime}"
858857
)
859858
try:
860-
spark = (SparkSession.builder.master("local[*]").appName(
861-
"dbt-databricks-session-python").getOrCreate())
859+
# On Databricks, we MUST use SparkContext.getOrCreate() to get the shared context
860+
# Creating a new SparkContext is not allowed
861+
sc = SparkContext.getOrCreate()
862+
spark = SparkSession(sc)
862863
logger.debug(
863-
"[Session Python] Created new SparkSession for session mode"
864+
"[Session Python] Created SparkSession from shared SparkContext"
864865
)
865866
except Exception as create_error:
866867
raise DbtRuntimeError(
867-
f"[Session Python] Failed to create SparkSession on Databricks: "
868+
f"[Session Python] Failed to get shared SparkContext on Databricks: "
868869
f"{create_error}. Databricks runtime version: {databricks_runtime}"
869870
) from create_error
870871
else:

dbt/adapters/databricks/session.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -212,23 +212,25 @@ def create(
212212
logger.debug(
213213
f"Could not get SparkSession from __main__.spark: {e}")
214214

215-
# Method 4: Create a new SparkSession on Databricks
216-
# If no existing session found, create one using local[*] master
217-
# This works on Databricks because the Spark runtime is already configured
215+
# Method 4: Get the shared SparkContext on Databricks and create SparkSession from it
216+
# On Databricks, we must use the shared SparkContext - we cannot create a new one
218217
if spark is None:
219218
databricks_runtime = os.getenv("DATABRICKS_RUNTIME_VERSION")
220219
if databricks_runtime:
221220
logger.debug(
222221
f"No existing SparkSession found. "
223-
f"Creating new SparkSession on Databricks runtime {databricks_runtime}"
222+
f"Getting shared SparkContext on Databricks runtime {databricks_runtime}"
224223
)
225224
try:
226-
spark = (SparkSession.builder.master("local[*]").appName(
227-
"dbt-databricks-session").getOrCreate())
228-
logger.debug("Created new SparkSession for session mode")
225+
# On Databricks, we MUST use SparkContext.getOrCreate() to get the shared context
226+
# Creating a new SparkContext is not allowed
227+
sc = SparkContext.getOrCreate()
228+
spark = SparkSession(sc)
229+
logger.debug(
230+
"Created SparkSession from shared SparkContext")
229231
except Exception as create_error:
230232
raise DbtRuntimeError(
231-
f"Failed to create SparkSession on Databricks: {create_error}. "
233+
f"Failed to get shared SparkContext on Databricks: {create_error}. "
232234
f"Databricks runtime version: {databricks_runtime}"
233235
) from create_error
234236
else:

0 commit comments

Comments
 (0)