databricks-solutions
diff --git a/‎databricks-mcp-server/databricks_mcp_server/tools/file.py‎
Lines changed: 6 additions & 1 deletion b/‎databricks-mcp-server/databricks_mcp_server/tools/file.py‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎databricks-mcp-server/databricks_mcp_server/tools/pdf.py‎
Lines changed: 2 additions & 2 deletions b/‎databricks-mcp-server/databricks_mcp_server/tools/pdf.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎databricks-mcp-server/databricks_mcp_server/tools/pipelines.py‎
Lines changed: 71 additions & 89 deletions b/‎databricks-mcp-server/databricks_mcp_server/tools/pipelines.py‎
Lines changed: 71 additions & 89 deletions
diff --git a/‎databricks-mcp-server/databricks_mcp_server/tools/workspace.py‎
Lines changed: 0 additions & 1 deletion b/‎databricks-mcp-server/databricks_mcp_server/tools/workspace.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎databricks-tools-core/databricks_tools_core/file/workspace.py‎
Lines changed: 35 additions & 8 deletions b/‎databricks-tools-core/databricks_tools_core/file/workspace.py‎
Lines changed: 35 additions & 8 deletions
@@ -23,8 +23,13 @@ def upload_folder(
     Uses parallel uploads with ThreadPoolExecutor for performance.
     Automatically handles all file types.
 
+    Follows `cp -r` semantics:
+    - With trailing slash or /* (e.g., "pipeline/" or "pipeline/*"): copies contents into workspace_folder
+    - Without trailing slash (e.g., "pipeline"): creates workspace_folder/pipeline/
+
     Args:
-        local_folder: Path to local folder to upload
+        local_folder: Path to local folder to upload. Add trailing slash to copy
+            contents only, omit to preserve folder name.
         workspace_folder: Target path in Databricks workspace
             (e.g., "/Workspace/Users/user@example.com/my-project")
         max_workers: Maximum parallel upload threads (default: 10)
 
@@ -11,7 +11,7 @@
 from ..server import mcp
 
 
-@mcp.tool
+@mcp.tool(timeout=300.0)
 def generate_and_upload_pdfs(
     catalog: str,
     schema: str,
@@ -101,7 +101,7 @@ def generate_and_upload_pdfs(
     }
 
 
-@mcp.tool
+@mcp.tool(timeout=60.0)
 def generate_and_upload_pdf(
     title: str,
     description: str,
 
@@ -37,20 +37,18 @@ def create_pipeline(
     extra_settings: Dict[str, Any] = None,
 ) -> Dict[str, Any]:
     """
-    Create a new Spark Declarative Pipeline (Unity Catalog, serverless by default).
+    Create a new Spark Declarative Pipeline (SDP). Unity Catalog, serverless by default.
 
     Args:
         name: Pipeline name
-        root_path: Root folder for source code (added to Python sys.path for imports)
+        root_path: Root folder for source code (added to sys.path)
         catalog: Unity Catalog name
         schema: Schema name for output tables
-        workspace_file_paths: List of workspace file paths (raw .sql or .py files)
-        extra_settings: Optional dict with additional pipeline settings (clusters,
-            continuous, development, photon, edition, channel, event_log, configuration,
-            notifications, tags, serverless, etc.). Explicit parameters take precedence.
+        workspace_file_paths: List of workspace .sql or .py file paths
+        extra_settings: Additional pipeline settings dict
 
     Returns:
-        Dictionary with pipeline_id of the created pipeline.
+        Dict with pipeline_id.
     """
     # Auto-inject default tags into extra_settings; user tags take precedence
     extra_settings = extra_settings or {}
@@ -116,13 +114,11 @@ def update_pipeline(
         root_path: New root folder for source code
         catalog: New catalog name
         schema: New schema name
-        workspace_file_paths: New list of file paths (raw .sql or .py files)
-        extra_settings: Optional dict with additional pipeline settings (clusters,
-            continuous, development, photon, edition, channel, event_log, configuration,
-            notifications, tags, serverless, etc.). Explicit parameters take precedence.
+        workspace_file_paths: New list of .sql or .py file paths
+        extra_settings: Additional pipeline settings dict
 
     Returns:
-        Dictionary with status message.
+        Dict with status.
     """
     _update_pipeline(
         pipeline_id=pipeline_id,
@@ -157,51 +153,70 @@ def delete_pipeline(pipeline_id: str) -> Dict[str, str]:
     return {"status": "deleted"}
 
 
-@mcp.tool
+@mcp.tool(timeout=300)
 def start_update(
     pipeline_id: str,
     refresh_selection: List[str] = None,
     full_refresh: bool = False,
     full_refresh_selection: List[str] = None,
     validate_only: bool = False,
-) -> Dict[str, str]:
+    wait: bool = True,
+    timeout: int = 300,
+    full_error_details: bool = False,
+) -> Dict[str, Any]:
     """
-    Start a pipeline update or dry-run validation.
+    Start a pipeline update. Waits for completion by default.
 
     Args:
         pipeline_id: Pipeline ID
-        refresh_selection: List of table names to refresh
-        full_refresh: If True, performs full refresh of all tables
-        full_refresh_selection: List of table names for full refresh
-        validate_only: If True, validates without updating data (dry run)
+        refresh_selection: Table names to refresh
+        full_refresh: Full refresh all tables
+        full_refresh_selection: Table names for full refresh
+        validate_only: Dry run without updating data
+        wait: Wait for completion (default: True)
+        timeout: Max wait time in seconds (default: 300)
+        full_error_details: Include full stack traces (default: False)
 
     Returns:
-        Dictionary with update_id for polling status.
+        Dict with update_id, state, success, error_summary if failed.
     """
-    update_id = _start_update(
+    return _start_update(
         pipeline_id=pipeline_id,
         refresh_selection=refresh_selection,
         full_refresh=full_refresh,
         full_refresh_selection=full_refresh_selection,
         validate_only=validate_only,
+        wait=wait,
+        timeout=timeout,
+        full_error_details=full_error_details,
     )
-    return {"update_id": update_id}
 
 
 @mcp.tool
-def get_update(pipeline_id: str, update_id: str) -> Dict[str, Any]:
+def get_update(
+    pipeline_id: str,
+    update_id: str,
+    include_config: bool = False,
+    full_error_details: bool = False,
+) -> Dict[str, Any]:
     """
-    Get pipeline update status and results.
+    Get pipeline update status. Auto-fetches errors if failed.
 
     Args:
         pipeline_id: Pipeline ID
         update_id: Update ID from start_update
+        include_config: Include full pipeline config (default: False)
+        full_error_details: Include full stack traces (default: False)
 
     Returns:
-        Dictionary with update status (QUEUED, RUNNING, COMPLETED, FAILED, etc.)
+        Dict with update_id, state, success, error_summary if failed.
     """
-    result = _get_update(pipeline_id=pipeline_id, update_id=update_id)
-    return result.as_dict() if hasattr(result, "as_dict") else vars(result)
+    return _get_update(
+        pipeline_id=pipeline_id,
+        update_id=update_id,
+        include_config=include_config,
+        full_error_details=full_error_details,
+    )
 
 
 @mcp.tool
@@ -222,21 +237,33 @@ def stop_pipeline(pipeline_id: str) -> Dict[str, str]:
 @mcp.tool
 def get_pipeline_events(
     pipeline_id: str,
-    max_results: int = 100,
+    max_results: int = 5,
+    event_log_level: str = "WARN",
+    update_id: str = None,
 ) -> List[Dict[str, Any]]:
     """
-    Get pipeline events, issues, and error messages.
-
-    Use this to debug pipeline failures.
+    Get pipeline events for debugging. Returns ERROR/WARN by default.
 
     Args:
         pipeline_id: Pipeline ID
-        max_results: Maximum number of events to return (default: 100)
+        max_results: Max events to return (default: 5)
+        event_log_level: ERROR, WARN (includes ERROR), or INFO (all events)
+        update_id: Filter to specific update
 
     Returns:
-        List of event dictionaries with error details.
+        List of event dicts with error details.
     """
-    events = _get_pipeline_events(pipeline_id=pipeline_id, max_results=max_results)
+    # Convert log level to filter expression
+    level_filters = {
+        "ERROR": "level='ERROR'",
+        "WARN": "level in ('ERROR', 'WARN')",
+        "INFO": "",  # No filter = all events
+    }
+    filter_expr = level_filters.get(event_log_level.upper(), level_filters["WARN"])
+
+    events = _get_pipeline_events(
+        pipeline_id=pipeline_id, max_results=max_results, filter=filter_expr, update_id=update_id
+    )
     return [e.as_dict() if hasattr(e, "as_dict") else vars(e) for e in events]
 
 
@@ -254,67 +281,22 @@ def create_or_update_pipeline(
     extra_settings: Dict[str, Any] = None,
 ) -> Dict[str, Any]:
     """
-    Create a new pipeline or update an existing one with the same name.
-
-    This is the main tool for pipeline management. It:
-    1. Searches for an existing pipeline with the same name (or uses 'id' from extra_settings)
-    2. Creates a new pipeline or updates the existing one
-    3. Optionally starts a pipeline run with full refresh
-    4. Optionally waits for the run to complete and returns detailed results
-
-    Uses Unity Catalog and serverless compute by default.
+    Create or update a pipeline by name, optionally run it. Uses Unity Catalog + serverless.
 
     Args:
         name: Pipeline name (used for lookup and creation)
-        root_path: Root folder for source code (added to Python sys.path for imports)
-        catalog: Unity Catalog name for output tables
+        root_path: Root folder for source code (added to sys.path)
+        catalog: Unity Catalog name
         schema: Schema name for output tables
-        workspace_file_paths: List of workspace file paths (raw .sql or .py files)
-        start_run: If True, start a pipeline update after create/update (default: False)
-        wait_for_completion: If True, wait for run to complete (default: False)
-        full_refresh: If True, perform full refresh when starting (default: True)
-        timeout: Maximum wait time in seconds (default: 1800 = 30 minutes)
-        extra_settings: Optional dict with additional pipeline settings. Supports all SDK
-            options: clusters, continuous, development, photon, edition, channel, event_log,
-            configuration, notifications, tags, serverless, etc.
-            If 'id' is provided, the pipeline will be updated instead of created.
-            Explicit parameters (name, root_path, catalog, schema) take precedence.
+        workspace_file_paths: List of workspace .sql or .py file paths
+        start_run: Start pipeline update after create/update
+        wait_for_completion: Wait for run to complete
+        full_refresh: Full refresh when starting (default: True)
+        timeout: Max wait time in seconds (default: 1800)
+        extra_settings: Additional pipeline settings dict
 
     Returns:
-        Dictionary with detailed status:
-        - pipeline_id: The pipeline ID
-        - pipeline_name: The pipeline name
-        - created: True if newly created, False if updated
-        - success: True if all operations succeeded
-        - state: Final state if run was started (COMPLETED, FAILED, etc.)
-        - duration_seconds: Time taken if waited
-        - error_message: Error message if failed
-        - errors: List of detailed errors if failed
-        - message: Human-readable status message
-
-    Example usage:
-        # Just create/update the pipeline
-        create_or_update_pipeline(name="my_pipeline", ...)
-
-        # Create/update and run immediately
-        create_or_update_pipeline(name="my_pipeline", ..., start_run=True)
-
-        # Create/update, run, and wait for completion
-        create_or_update_pipeline(
-            name="my_pipeline", ...,
-            start_run=True,
-            wait_for_completion=True
-        )
-
-        # Create with custom settings (non-serverless, development mode)
-        create_or_update_pipeline(
-            name="my_pipeline", ...,
-            extra_settings={
-                "serverless": False,
-                "development": True,
-                "clusters": [{"label": "default", "num_workers": 2}]
-            }
-        )
+        Dict with pipeline_id, created (bool), success, state, error_summary if failed.
     """
     # Auto-inject default tags into extra_settings; user tags take precedence
     extra_settings = extra_settings or {}
 
@@ -6,7 +6,6 @@
 from typing import Any, Dict, List, Optional
 
 from databricks_tools_core.auth import (
-    clear_active_workspace,
     get_active_workspace,
     get_workspace_client,
     set_active_workspace,
 
@@ -146,10 +146,15 @@ def upload_folder(
     Uses parallel uploads with ThreadPoolExecutor for performance.
     Automatically handles all file types using ImportFormat.AUTO.
 
+    Follows `cp -r` semantics:
+    - With trailing slash or /* (e.g., "pipeline/" or "pipeline/*"): copies contents into workspace_folder
+    - Without trailing slash (e.g., "pipeline"): creates workspace_folder/pipeline/
+
     Args:
-        local_folder: Path to local folder to upload
+        local_folder: Path to local folder to upload. Add trailing slash to copy
+            contents only, omit to preserve folder name.
         workspace_folder: Target path in Databricks workspace
-            (e.g., "/Users/user@example.com/my-project")
+            (e.g., "/Workspace/Users/user@example.com/my-project")
         max_workers: Maximum number of parallel upload threads (default: 10)
         overwrite: Whether to overwrite existing files (default: True)
 
@@ -161,25 +166,47 @@ def upload_folder(
         ValueError: If local folder is not a directory
 
     Example:
+        >>> # Copy folder preserving name: creates /Workspace/.../dest/my-project/
         >>> result = upload_folder(
         ...     local_folder="/path/to/my-project",
-        ...     workspace_folder="/Users/me@example.com/my-project"
+        ...     workspace_folder="/Workspace/Users/me@example.com/dest"
+        ... )
+        >>> # Copy contents only: files go directly into /Workspace/.../dest/
+        >>> result = upload_folder(
+        ...     local_folder="/path/to/my-project/",
+        ...     workspace_folder="/Workspace/Users/me@example.com/dest"
         ... )
         >>> print(f"Uploaded {result.successful}/{result.total_files} files")
         >>> if not result.success:
         ...     for failed in result.get_failed_uploads():
         ...         print(f"Failed: {failed.local_path} - {failed.error}")
     """
+    # Check if user wants to copy contents only (trailing slash or /*) or preserve folder name
+    # Supports: "folder/", "folder/*", "folder\\*" (Windows)
+    copy_contents_suffixes = ("/", os.sep, "/*", os.sep + "*")
+    copy_contents_only = local_folder.endswith(copy_contents_suffixes)
+
+    # Strip /* or * suffix before validation
+    clean_local_folder = local_folder.rstrip("*").rstrip("/").rstrip(os.sep)
+
     # Validate local folder
-    local_folder = os.path.abspath(local_folder)
-    if not os.path.exists(local_folder):
-        raise FileNotFoundError(f"Local folder not found: {local_folder}")
-    if not os.path.isdir(local_folder):
-        raise ValueError(f"Path is not a directory: {local_folder}")
+    local_folder_abs = os.path.abspath(clean_local_folder)
+    if not os.path.exists(local_folder_abs):
+        raise FileNotFoundError(f"Local folder not found: {local_folder_abs}")
+    if not os.path.isdir(local_folder_abs):
+        raise ValueError(f"Path is not a directory: {local_folder_abs}")
 
     # Normalize workspace path (remove trailing slash)
     workspace_folder = workspace_folder.rstrip("/")
 
+    # If not copying contents only, append the source folder name to destination
+    if not copy_contents_only:
+        folder_name = os.path.basename(local_folder_abs)
+        workspace_folder = f"{workspace_folder}/{folder_name}"
+
+    # Use absolute path for file collection
+    local_folder = local_folder_abs
+
     # Initialize client
     w = get_workspace_client()