Skip to content

Error with mosaic.enable_mosaic() when created Databricks' DLT Pipeline with Mosaic #538

@slothPete7773

Description

@slothPete7773

Describe the bug
The error was raised when I tried to start a DLT pipeline with Databricks notebook, which just to start experimenting the DLT.
The primary library was Mosaic, which is instructed for installation prior importing.
The code is roughly as follow

$ %pip install databricks-mosaic
import mosaic as mos
mos.enable_mosaic(spark, dbutils) # Error line 
import dlt
from pyspark.sql.functions import *
from pyspark.sql.types import *

@dlt.table(comment="Testing a DLT table for area coverage")
def area():
    return spark.read.table("area")

The following error was raised

java.lang.RuntimeException: Failed to execute python command for notebook '/Users/email@gmail.com/test DLT' with id RunnableCommandId(66333709513xxxxxxxx) and error AnsiResult(---------------------------------------------------------------------------
Py4JError                                 Traceback (most recent call last)
File <command--1>:3
      1 import mosaic as mos
----> 3 mos.enable_mosaic(spark, dbutils)
      5 import dlt
      6 # import pyspark.sql.functions as 

File /local_disk0/.ephemeral_nfs/envs/pythonEnv-e1a54f86-17c5-4856-abb7-5a8e08a9bfed/lib/python3.9/site-packages/mosaic/api/enable.py:47, in enable_mosaic(spark, dbutils)
     14 """
     15 Enable Mosaic functions.
     16 
   (...)
     44 
     45 """
     46 config.mosaic_spark = spark
---> 47 _ = MosaicLibraryHandler(config.mosaic_spark)
     48 config.mosaic_context = MosaicContext(config.mosaic_spark)
     50 # Register SQL functions

File /local_disk0/.ephemeral_nfs/envs/pythonEnv-e1a54f86-17c5-4856-abb7-5a8e08a9bfed/lib/python3.9/site-packages/mosaic/core/library_handler.py:18, in MosaicLibraryHandler.__init__(self, spark)
     16 self.spark = spark
     17 self.sc = spark.sparkContext
---> 18 self.sc.setLogLevel("info")
     19 log4jLogger = self.sc._jvm.org.apache.log4j
     20 LOGGER = log4jLogger.LogManager.getLogger(__class__.__name__)

File /databricks/spark/python/pyspark/context.py:575, in SparkContext.setLogLevel(self, logLevel)
    559 def setLogLevel(self, logLevel: str) -> None:
    560     """
    561     Control our logLevel. This overrides any user-defined log settings.
    562     Valid log levels include: ALL, DEBUG, ERROR, FATAL, INFO, OFF, TRACE, WARN
   (...)
    573     >>> sc.setLogLevel("WARN")  # doctest :+SKIP
    574     """
--> 575     self._jsc.setLogLevel(logLevel)

File /databricks/spark/python/lib/py4j-0.10.9.5-src.zip/py4j/java_gateway.py:1321, in JavaMember.__call__(self, *args)
   1315 command = proto.CALL_COMMAND_NAME +\
   1316     self.command_header +\
   1317     args_command +\
   1318     proto.END_COMMAND_PART
   1320 answer = self.gateway_client.send_command(command)
-> 1321 return_value = get_return_value(
   1322     answer, self.gateway_client, self.target_id, self.name)
   1324 for temp_arg in temp_args:
   1325     temp_arg._detach()

File /databricks/spark/python/pyspark/errors/exceptions.py:228, in capture_sql_exception.<locals>.deco(*a, **kw)
    226 def deco(*a: Any, **kw: Any) -> Any:
    227     try:
--> 228         return f(*a, **kw)
    229     except Py4JJavaError as e:
    230         converted = convert_exception(e.java_exception)

File /databricks/spark/python/lib/py4j-0.10.9.5-src.zip/py4j/protocol.py:330, in get_return_value(answer, gateway_client, target_id, name)
    326         raise Py4JJavaError(
    327             "An error occurred while calling {0}{1}{2}.\n".
    328             format(target_id, ".", name), value)
    329     else:
--> 330         raise Py4JError(
    331             "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n".
    332             format(target_id, ".", name, value))
    333 else:
    334     raise Py4JError(
    335         "An error occurred while calling {0}{1}{2}".
    336         format(target_id, ".", name))

Py4JError: An error occurred while calling o425.setLogLevel. Trace:
py4j.security.Py4JSecurityException: Method public void org.apache.spark.api.java.JavaSparkContext.setLogLevel(java.lang.String) is not whitelisted on class class org.apache.spark.api.java.JavaSparkContext
	at py4j.security.WhitelistingPy4JSecurityManager.checkCall(WhitelistingPy4JSecurityManager.java:473)
	at py4j.Gateway.invoke(Gateway.java:305)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:195)
	at py4j.ClientServerConnection.run(ClientServerConnection.java:115)
	at java.lang.Thread.run(Thread.java:750)

,None,Map(),Map(),List(),List(),Map())

The following is the DLT Pipeline setting JSON.

{
    "id": "ce6e63a5-bef4-405c-90f9-02cd9b890b18",
    "pipeline_type": "WORKSPACE",
    "clusters": [
        {
            "label": "default",
            "node_type_id": "m5d.large",
            "driver_node_type_id": "m5d.large",
            "custom_tags": {
                "type": "test"
            },
            "num_workers": 1
        },
        {
            "label": "maintenance",
            "custom_tags": {
                "type": "test"
            }
        }
    ],
    "development": true,
    "continuous": false,
    "channel": "CURRENT",
    "photon": true,
    "libraries": [
        {
            "notebook": {
                "path": "/Users/email@gmail.com/test DLT"
            }
        }
    ],
    "name": "areaCov",
    "edition": "CORE",
    "catalog": "workspace",
    "target": "default",
    "data_sampling": false
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions