Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ dependencies = [
"databricks-switch-plugin~=0.1.7", # Temporary, until Switch is migrated to be a transpiler (LSP) plugin.
"requests>=2.28.1,<3", # Matches databricks-sdk (and 'types-requests' below), to avoid conflicts.
"pandas~=2.3.1", # Required for new configure assessment
"libcst>=1.4.0,<2",
]

[project.urls]
Expand Down
16 changes: 14 additions & 2 deletions src/databricks/labs/lakebridge/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,18 @@
from databricks.labs.lakebridge.reconcile.recon_config import RECONCILE_OPERATION_NAME, AGG_RECONCILE_OPERATION_NAME
from databricks.labs.lakebridge.transpiler.describe import TranspilersDescription
from databricks.labs.lakebridge.transpiler.execute import transpile as do_transpile
from databricks.labs.lakebridge.transpiler.glue.glue_engine import GlueEngine
from databricks.labs.lakebridge.transpiler.lsp.lsp_engine import LSPEngine
from databricks.labs.lakebridge.transpiler.repository import TranspilerRepository
from databricks.labs.lakebridge.transpiler.sqlglot.sqlglot_engine import SqlglotEngine
from databricks.labs.lakebridge.transpiler.switch_runner import SwitchRunner
from databricks.labs.lakebridge.transpiler.transpile_engine import TranspileEngine

# Built-in engine sentinels — do not require a config file on disk
_BUILTIN_ENGINES: dict[str, type[TranspileEngine]] = {
"glue": GlueEngine,
}

from databricks.labs.lakebridge.transpiler.transpile_status import ErrorSeverity
from databricks.labs.switch.lsp import get_switch_dialects

Expand Down Expand Up @@ -243,6 +249,9 @@ def __init__(
@staticmethod
def _validate_transpiler_config_path(transpiler_config_path: str, msg: str) -> None:
"""Validate the transpiler config path: it must be a valid path that exists."""
# Built-in engine sentinels don't require a file on disk.
if transpiler_config_path in _BUILTIN_ENGINES:
return
# Note: the content is not validated here, but during loading of the engine.
if not Path(transpiler_config_path).exists():
raise_validation_exception(msg)
Expand Down Expand Up @@ -508,8 +517,11 @@ def _check_lsp_engine(self) -> TranspileEngine:
transpiler_config_path,
f"Error: Invalid value for '--transpiler-config-path': '{str(transpiler_config_path)}', file does not exist.",
)
path = Path(transpiler_config_path)
engine = LSPEngine.from_config_path(path)
if transpiler_config_path in _BUILTIN_ENGINES:
engine = _BUILTIN_ENGINES[transpiler_config_path]()
else:
path = Path(transpiler_config_path)
engine = LSPEngine.from_config_path(path)
else:
engine = None
del transpiler_config_path
Expand Down
Empty file.
110 changes: 110 additions & 0 deletions src/databricks/labs/lakebridge/transpiler/glue/glue_engine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
from __future__ import annotations

import ast as _ast
import logging
from collections.abc import Mapping, Sequence
from pathlib import Path

from databricks.labs.lakebridge.config import TranspileConfig, TranspileResult
from databricks.labs.lakebridge.transpiler.transpile_engine import TranspileEngine
from databricks.labs.lakebridge.transpiler.transpile_status import (
ErrorKind,
ErrorSeverity,
TranspileError,
)
from databricks.labs.lakebridge.transpiler.glue.glue_transformer import GlueTransformer

logger = logging.getLogger(__name__)

_DEFAULT_ARGS_STYLE = "argparse"


def _extract_options(config: TranspileConfig) -> tuple[str | None, str]:
"""Extract catalog and args_style from transpiler_options mapping."""
opts = config.transpiler_options
if not isinstance(opts, Mapping):
return None, _DEFAULT_ARGS_STYLE
catalog = opts.get("catalog") or None
args_style = str(opts.get("args-style", _DEFAULT_ARGS_STYLE))
if args_style not in ("argparse", "dbutils"):
logger.warning("Unknown args-style %r, falling back to 'argparse'.", args_style)
args_style = _DEFAULT_ARGS_STYLE
return catalog, args_style


class GlueEngine(TranspileEngine):
"""Transpiles AWS Glue PySpark scripts to Databricks PySpark."""

def __init__(self) -> None:
self._catalog: str | None = None
self._args_style: str = _DEFAULT_ARGS_STYLE

@property
def transpiler_name(self) -> str:
return "glue"

@property
def supported_dialects(self) -> Sequence[str]:
return ["glue"]

def is_supported_file(self, file: Path) -> bool:
return file.suffix.lower() == ".py"

async def initialize(self, config: TranspileConfig) -> None:
self._catalog, self._args_style = _extract_options(config)

async def shutdown(self) -> None:
pass

async def transpile(
self,
source_dialect: str,
target_dialect: str,
source_code: str,
file_path: Path,
) -> TranspileResult:
try:
transformer = GlueTransformer(
file_path,
catalog=self._catalog,
args_style=self._args_style,
)
transpiled_code, warnings = transformer.transform(source_code)

try:
_ast.parse(transpiled_code)
except SyntaxError as syn_err:
warnings.append(f"Generated code contains a syntax error: {syn_err}")

errors = [
TranspileError(
code="GLUE_WARNING",
kind=ErrorKind.GENERATION,
severity=ErrorSeverity.WARNING,
path=file_path,
message=msg,
)
for msg in warnings
]
return TranspileResult(transpiled_code, 1, errors)

except SyntaxError as err:
error = TranspileError(
code="SYNTAX_ERROR",
kind=ErrorKind.PARSING,
severity=ErrorSeverity.ERROR,
path=file_path,
message=f"Python syntax error in source: {err}",
)
return TranspileResult(source_code, 0, [error])

except Exception as err: # pylint: disable=broad-exception-caught
logger.exception("Unexpected error transpiling %s", file_path)
error = TranspileError(
code="GLUE_TRANSPILE_ERROR",
kind=ErrorKind.GENERATION,
severity=ErrorSeverity.ERROR,
path=file_path,
message=f"Unexpected transpilation error: {err}",
)
return TranspileResult(source_code, 0, [error])
Loading