Describe the bug
The error was raised when I tried to start a DLT pipeline with Databricks notebook, which just to start experimenting the DLT.
The primary library was Mosaic, which is instructed for installation prior importing.
The code is roughly as follow
$ %pip install databricks-mosaic
import mosaic as mos
mos.enable_mosaic(spark, dbutils) # Error line
import dlt
from pyspark.sql.functions import *
from pyspark.sql.types import *
@dlt.table(comment="Testing a DLT table for area coverage")
def area():
return spark.read.table("area")
The following error was raised
java.lang.RuntimeException: Failed to execute python command for notebook '/Users/email@gmail.com/test DLT' with id RunnableCommandId(66333709513xxxxxxxx) and error AnsiResult(---------------------------------------------------------------------------
Py4JError Traceback (most recent call last)
File <command--1>:3
1 import mosaic as mos
----> 3 mos.enable_mosaic(spark, dbutils)
5 import dlt
6 # import pyspark.sql.functions as
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-e1a54f86-17c5-4856-abb7-5a8e08a9bfed/lib/python3.9/site-packages/mosaic/api/enable.py:47, in enable_mosaic(spark, dbutils)
14 """
15 Enable Mosaic functions.
16
(...)
44
45 """
46 config.mosaic_spark = spark
---> 47 _ = MosaicLibraryHandler(config.mosaic_spark)
48 config.mosaic_context = MosaicContext(config.mosaic_spark)
50 # Register SQL functions
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-e1a54f86-17c5-4856-abb7-5a8e08a9bfed/lib/python3.9/site-packages/mosaic/core/library_handler.py:18, in MosaicLibraryHandler.__init__(self, spark)
16 self.spark = spark
17 self.sc = spark.sparkContext
---> 18 self.sc.setLogLevel("info")
19 log4jLogger = self.sc._jvm.org.apache.log4j
20 LOGGER = log4jLogger.LogManager.getLogger(__class__.__name__)
File /databricks/spark/python/pyspark/context.py:575, in SparkContext.setLogLevel(self, logLevel)
559 def setLogLevel(self, logLevel: str) -> None:
560 """
561 Control our logLevel. This overrides any user-defined log settings.
562 Valid log levels include: ALL, DEBUG, ERROR, FATAL, INFO, OFF, TRACE, WARN
(...)
573 >>> sc.setLogLevel("WARN") # doctest :+SKIP
574 """
--> 575 self._jsc.setLogLevel(logLevel)
File /databricks/spark/python/lib/py4j-0.10.9.5-src.zip/py4j/java_gateway.py:1321, in JavaMember.__call__(self, *args)
1315 command = proto.CALL_COMMAND_NAME +\
1316 self.command_header +\
1317 args_command +\
1318 proto.END_COMMAND_PART
1320 answer = self.gateway_client.send_command(command)
-> 1321 return_value = get_return_value(
1322 answer, self.gateway_client, self.target_id, self.name)
1324 for temp_arg in temp_args:
1325 temp_arg._detach()
File /databricks/spark/python/pyspark/errors/exceptions.py:228, in capture_sql_exception.<locals>.deco(*a, **kw)
226 def deco(*a: Any, **kw: Any) -> Any:
227 try:
--> 228 return f(*a, **kw)
229 except Py4JJavaError as e:
230 converted = convert_exception(e.java_exception)
File /databricks/spark/python/lib/py4j-0.10.9.5-src.zip/py4j/protocol.py:330, in get_return_value(answer, gateway_client, target_id, name)
326 raise Py4JJavaError(
327 "An error occurred while calling {0}{1}{2}.\n".
328 format(target_id, ".", name), value)
329 else:
--> 330 raise Py4JError(
331 "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n".
332 format(target_id, ".", name, value))
333 else:
334 raise Py4JError(
335 "An error occurred while calling {0}{1}{2}".
336 format(target_id, ".", name))
Py4JError: An error occurred while calling o425.setLogLevel. Trace:
py4j.security.Py4JSecurityException: Method public void org.apache.spark.api.java.JavaSparkContext.setLogLevel(java.lang.String) is not whitelisted on class class org.apache.spark.api.java.JavaSparkContext
at py4j.security.WhitelistingPy4JSecurityManager.checkCall(WhitelistingPy4JSecurityManager.java:473)
at py4j.Gateway.invoke(Gateway.java:305)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:195)
at py4j.ClientServerConnection.run(ClientServerConnection.java:115)
at java.lang.Thread.run(Thread.java:750)
,None,Map(),Map(),List(),List(),Map())
The following is the DLT Pipeline setting JSON.
{
"id": "ce6e63a5-bef4-405c-90f9-02cd9b890b18",
"pipeline_type": "WORKSPACE",
"clusters": [
{
"label": "default",
"node_type_id": "m5d.large",
"driver_node_type_id": "m5d.large",
"custom_tags": {
"type": "test"
},
"num_workers": 1
},
{
"label": "maintenance",
"custom_tags": {
"type": "test"
}
}
],
"development": true,
"continuous": false,
"channel": "CURRENT",
"photon": true,
"libraries": [
{
"notebook": {
"path": "/Users/email@gmail.com/test DLT"
}
}
],
"name": "areaCov",
"edition": "CORE",
"catalog": "workspace",
"target": "default",
"data_sampling": false
}
Describe the bug
The error was raised when I tried to start a DLT pipeline with Databricks notebook, which just to start experimenting the DLT.
The primary library was Mosaic, which is instructed for installation prior importing.
The code is roughly as follow
The following error was raised
The following is the DLT Pipeline setting JSON.
{ "id": "ce6e63a5-bef4-405c-90f9-02cd9b890b18", "pipeline_type": "WORKSPACE", "clusters": [ { "label": "default", "node_type_id": "m5d.large", "driver_node_type_id": "m5d.large", "custom_tags": { "type": "test" }, "num_workers": 1 }, { "label": "maintenance", "custom_tags": { "type": "test" } } ], "development": true, "continuous": false, "channel": "CURRENT", "photon": true, "libraries": [ { "notebook": { "path": "/Users/email@gmail.com/test DLT" } } ], "name": "areaCov", "edition": "CORE", "catalog": "workspace", "target": "default", "data_sampling": false }