Skip to content

Commit 2e133b3

Browse files
committed
fix: update error handling for SparkSession retrieval in Databricks adapter
### Description This commit updates the error messages in the `DatabricksSessionHandle` and `SessionPythonJobHelper` classes to provide clearer guidance when a SparkSession is not found. It emphasizes the compatibility requirements for session mode execution in Databricks, ensuring users are informed about the necessary environments and configurations.
1 parent 07b153a commit 2e133b3

3 files changed

Lines changed: 36 additions & 42 deletions

File tree

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
version = "1.10.15-2"
1+
version = "1.10.15-3"

dbt/adapters/databricks/python_models/python_submissions.py

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -846,33 +846,30 @@ def __init__(self, parsed_model: dict,
846846
f"[Session Python] Could not get SparkSession from __main__.spark: {e}"
847847
)
848848

849-
# Method 4: Get the shared SparkContext on Databricks and create SparkSession from it
850-
# On Databricks, we must use the shared SparkContext - we cannot create a new one
849+
# If no existing SparkSession found, provide a clear error message
851850
if spark is None:
852851
databricks_runtime = os.getenv("DATABRICKS_RUNTIME_VERSION")
853852
if databricks_runtime:
854-
logger.debug(
855-
f"[Session Python] No existing SparkSession found. "
856-
f"Getting shared SparkContext on Databricks runtime {databricks_runtime}"
857-
)
858-
try:
859-
# On Databricks, we MUST use SparkContext.getOrCreate() to get the shared context
860-
# Creating a new SparkContext is not allowed
861-
sc = SparkContext.getOrCreate()
862-
spark = SparkSession(sc)
863-
logger.debug(
864-
"[Session Python] Created SparkSession from shared SparkContext"
865-
)
866-
except Exception as create_error:
867-
raise DbtRuntimeError(
868-
f"[Session Python] Failed to get shared SparkContext on Databricks: "
869-
f"{create_error}. Databricks runtime version: {databricks_runtime}"
870-
) from create_error
853+
raise DbtRuntimeError(
854+
"[Session Python] Could not find an existing SparkSession. "
855+
"This typically happens when using the native 'dbt task' in Databricks Jobs, "
856+
"which does not provide a SparkSession context.\n\n"
857+
"Session mode is only compatible with:\n"
858+
" - Databricks Notebooks (where 'spark' is pre-initialized)\n"
859+
" - Python tasks that initialize SparkSession before running dbt\n"
860+
" - Environments where SparkSession is already available\n\n"
861+
"For the native dbt task, use DBSQL mode instead (the default):\n"
862+
" - Set 'method: dbsql' in your profile (or omit 'method' entirely)\n"
863+
" - Configure 'host' and 'http_path' to connect to a SQL warehouse or cluster\n\n"
864+
f"Databricks runtime version: {databricks_runtime}")
871865
else:
872866
raise DbtRuntimeError(
873-
"[Session Python] Session mode requires a Databricks cluster environment. "
867+
"[Session Python] Session mode requires a Databricks cluster environment "
868+
"with an active SparkSession. "
874869
"DATABRICKS_RUNTIME_VERSION environment variable not found. "
875-
"Ensure you are running on a Databricks cluster.")
870+
"Ensure you are running on a Databricks cluster in a context where "
871+
"SparkSession is available (e.g., Notebook or Python task with Spark initialized)."
872+
)
876873

877874
self._spark = spark
878875
logger.debug(

dbt/adapters/databricks/session.py

Lines changed: 17 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -212,32 +212,29 @@ def create(
212212
logger.debug(
213213
f"Could not get SparkSession from __main__.spark: {e}")
214214

215-
# Method 4: Get the shared SparkContext on Databricks and create SparkSession from it
216-
# On Databricks, we must use the shared SparkContext - we cannot create a new one
215+
# If no existing SparkSession found, provide a clear error message
217216
if spark is None:
218217
databricks_runtime = os.getenv("DATABRICKS_RUNTIME_VERSION")
219218
if databricks_runtime:
220-
logger.debug(
221-
f"No existing SparkSession found. "
222-
f"Getting shared SparkContext on Databricks runtime {databricks_runtime}"
223-
)
224-
try:
225-
# On Databricks, we MUST use SparkContext.getOrCreate() to get the shared context
226-
# Creating a new SparkContext is not allowed
227-
sc = SparkContext.getOrCreate()
228-
spark = SparkSession(sc)
229-
logger.debug(
230-
"Created SparkSession from shared SparkContext")
231-
except Exception as create_error:
232-
raise DbtRuntimeError(
233-
f"Failed to get shared SparkContext on Databricks: {create_error}. "
234-
f"Databricks runtime version: {databricks_runtime}"
235-
) from create_error
219+
raise DbtRuntimeError(
220+
"Session mode could not find an existing SparkSession. "
221+
"This typically happens when using the native 'dbt task' in Databricks Jobs, "
222+
"which does not provide a SparkSession context.\n\n"
223+
"Session mode is only compatible with:\n"
224+
" - Databricks Notebooks (where 'spark' is pre-initialized)\n"
225+
" - Python tasks that initialize SparkSession before running dbt\n"
226+
" - Environments where SparkSession is already available\n\n"
227+
"For the native dbt task, use DBSQL mode instead (the default):\n"
228+
" - Set 'method: dbsql' in your profile (or omit 'method' entirely)\n"
229+
" - Configure 'host' and 'http_path' to connect to a SQL warehouse or cluster\n\n"
230+
f"Databricks runtime version: {databricks_runtime}")
236231
else:
237232
raise DbtRuntimeError(
238-
"Session mode requires a Databricks cluster environment. "
233+
"Session mode requires a Databricks cluster environment with an active SparkSession. "
239234
"DATABRICKS_RUNTIME_VERSION environment variable not found. "
240-
"Ensure you are running on a Databricks cluster.")
235+
"Ensure you are running on a Databricks cluster in a context where "
236+
"SparkSession is available (e.g., Notebook or Python task with Spark initialized)."
237+
)
241238

242239
# Set catalog if provided
243240
if catalog:

0 commit comments

Comments
 (0)