databrickslabs · simone-dbx-labs · May 1, 2026 · May 1, 2026 · May 1, 2026 · May 11, 2026
@@ -37,7 +37,7 @@ Usage:
     databricks labs lakebridge analyze [flags]
 
 Flags:
-    --generate-json true|false    (Optional) Generate JSON file alongside the Excel report
+    --generate-json true|false    (Optional) Generate JSON alongside Excel (default: true); pass false to skip
     -h, --help                help for analyze
     --report-file path        (Optional) Local filesystem path of the analysis report file to write
     --source-directory path   (Optional) Local filesystem path of a directory containing sources to analyze
@@ -62,7 +62,7 @@ Below is an explanation of the settings needed for the analyzer:
  - _Source directory_: The path of the folder containing the artifacts to analyze.
  - _Report file_: The path of the Excel file into which the analyzer results will be written. (The parent directory of this file must already exist.)
  - _Source technology_: The underlying technology platform of the files in the source directory that need to be analyzed.
- - _Generate JSON_ (optional): When set to `true`, generates a JSON file alongside the Excel report. This is useful for programmatic processing and integration with other tools.
+ - _Generate JSON_ (optional): Defaults to `true` so a JSON file is written next to the Excel report (for programmatic use, migration estimators, and UI). Set to `false` if you only need the spreadsheet.
 
 ## Execution
 Execute the below command to start the analyzer:
@@ -72,14 +72,15 @@ Execute the below command to start the analyzer:
 
 _Note: Any settings that are not provided as arguments will trigger a prompt for the missing setting(s)._
 
-To generate both Excel and JSON output:
+By default, both Excel and JSON are produced. To skip JSON:
+
 ```bash
- databricks labs lakebridge analyze [--source-directory <path>] [--report-file <path>] [--source-tech <string>] --generate-json true
+ databricks labs lakebridge analyze [--source-directory <path>] [--report-file <path>] [--source-tech <string>] --generate-json false
 ```
 
 ### Output Files
 - **Excel Report** (`.xlsx`) - Always generated. Contains formatted analysis results with multiple worksheets for different aspects of the analysis.
-- **JSON Report** (`.json`) - Generated when `--generate-json true` is specified. Contains the same analysis data in JSON format for programmatic access and integration with other tools.
+- **JSON Report** (`.json`) - Generated by default alongside the Excel report. Contains the same analysis data in JSON format for programmatic access and integration with other tools. Omit JSON with `--generate-json false`.
 
 Both files will be written to the location specified by `--report-file` with their respective extensions.
 

@@ -18,8 +18,8 @@ commands:
       - name: source-tech
         description: (Optional) Name of the Source System Technology you want to analyze
       - name: generate-json
-        description: (Optional) Generate JSON file alongside the Excel report (`true|false`)
-        default: "false"
+        description: (Optional) Emit JSON alongside the Excel report (`true|false`; default `true`). Pass `false` for Excel only.
+        default: "true"
 
   - name: transpile
     description: Transpile SQL/ETL sources to Databricks-compatible code

@@ -63,7 +63,7 @@ def create(cls, is_debug: bool = False) -> "AnalyzerRunner":
         return cls(Analyzer.analyze, is_debug)
 
     def run(
-        self, source_dir: Path, results_file_path: Path, platform: str, generate_json: bool = False
+        self, source_dir: Path, results_file_path: Path, platform: str, generate_json: bool = True
     ) -> AnalyzerResult:
         logger.debug(f"Starting analyzer execution for {platform}: {source_dir}")
 
@@ -125,7 +125,7 @@ def run_analyzer(
         source: str | None = None,
         report_file: str | None = None,
         platform: str | None = None,
-        generate_json: bool = False,
+        generate_json: bool = True,
     ) -> AnalyzerResult:
         source_dir = self._prompts.get_source_directory() if source is None else Path(source)
         results_file_path = self._prompts.get_result_file_path() if report_file is None else Path(report_file)

@@ -847,9 +847,12 @@ def analyze(
     source_directory: str | None = None,
     report_file: str | None = None,
     source_tech: str | None = None,
-    generate_json: bool = False,
+    generate_json: bool = True,
 ):
-    """Run the Analyzer"""
+    """Run the Analyzer.
+
+    JSON is emitted alongside Excel by default (for estimators and UI). Use --generate-json false to skip.
+    """
     ctx = ApplicationContext(w)
     try:
         result = ctx.analyzer.run_analyzer(source_directory, report_file, source_tech, generate_json)

@@ -1,3 +1,4 @@
+import json
 from pathlib import Path
 
 import pytest
@@ -14,10 +15,12 @@
 
 
 def _mock_analyze(
-    _directory: Path, result: Path, _platform: str, _is_debug: bool = False, _json_result: Path | None = None
+    _directory: Path, result: Path, _platform: str, _is_debug: bool = False, json_result: Path | None = None
 ) -> None:
-    # Nothing really needed here, except a result needs to be created.
+    """Stand in for Bladespector: create Excel path; when JSON is requested, create that file too."""
     result.touch()
+    if json_result is not None:
+        json_result.write_text(json.dumps({"mock": True}), encoding="utf-8")
 
 
 @pytest.mark.parametrize(
@@ -28,7 +31,20 @@ def _mock_analyze(
     ),
     ids=str,
 )
-def test_analyze_arguments_return(tmp_path: Path, report_file: Path) -> None:
+@pytest.mark.parametrize(
+    "omit_generate_json_kw, expect_json_file",
+    (
+        (True, True),  # default: omit kwarg → JSON on
+        (False, False),  # explicit generate_json=False
+    ),
+    ids=("default_json", "skip_json"),
+)
+def test_analyze_json_output(
+    tmp_path: Path,
+    report_file: Path,
+    omit_generate_json_kw: bool,
+    expect_json_file: bool,
+) -> None:
     path = tmp_path / "in"
     file = tmp_path / report_file
     mock_prompts = MockPrompts({})
@@ -37,9 +53,18 @@ def test_analyze_arguments_return(tmp_path: Path, report_file: Path) -> None:
     expected_result = AnalyzerResult(source_directory=path, report_path=file, source_system=str("Synapse"))
 
     analyzer = LakebridgeAnalyzer(AnalyzerPrompts(mock_prompts), runner)
-    result = analyzer.run_analyzer(source=str(path), report_file=str(file), platform="Synapse")
+    if omit_generate_json_kw:
+        result = analyzer.run_analyzer(source=str(path), report_file=str(file), platform="Synapse")
+    else:
+        result = analyzer.run_analyzer(source=str(path), report_file=str(file), platform="Synapse", generate_json=False)
 
     assert result == expected_result
+    assert file.exists()
+    json_path = file.with_suffix(".json")
+    if expect_json_file:
+        assert json_path.exists(), "JSON should be produced by default alongside Excel"
+    else:
+        assert not json_path.exists(), "JSON should not be produced when generate_json is False"
 
 
 def test_analyze_prompts_result(tmp_path: Path):
@@ -77,9 +102,34 @@ def test_analyze_prompt_relative_result_path(tmp_path: Path) -> None:
         _test_analyze_prompt(mock_prompts, expected_result)
 
 
-def _test_analyze_prompt(mock_prompts: MockPrompts, expected_result: AnalyzerResult) -> None:
+def test_analyze_prompts_skip_json(tmp_path: Path) -> None:
+    first_tech = next(iter(sorted(Analyzer.supported_source_technologies(), key=str.casefold)))
+    input_path = tmp_path / "in"
+    report_file = tmp_path / "report-no-json.xlsx"
+    mock_prompts = MockPrompts(
+        {
+            "Select the source technology": "0",
+            "Enter the path of the directory containing sources to analyze": str(input_path),
+            "Enter the path of the report file for analyzer results": str(report_file),
+        }
+    )
+    expected_result = AnalyzerResult(source_directory=input_path, report_path=report_file, source_system=first_tech)
+    _test_analyze_prompt(mock_prompts, expected_result, generate_json=False)
+
+
+def _test_analyze_prompt(
+    mock_prompts: MockPrompts, expected_result: AnalyzerResult, *, generate_json: bool | None = None
+) -> None:
     runner = AnalyzerRunner(runnable=_mock_analyze, is_debug=True)
     analyzer = LakebridgeAnalyzer(AnalyzerPrompts(mock_prompts), runner)
 
-    result = analyzer.run_analyzer()
+    if generate_json is None:
+        result = analyzer.run_analyzer()
+    else:
+        result = analyzer.run_analyzer(generate_json=generate_json)
     assert result == expected_result
+    json_path = expected_result.report_path.with_suffix(".json")
+    if generate_json is False:
+        assert not json_path.exists(), "JSON should not be produced when generate_json is False"
+    else:
+        assert json_path.exists(), "JSON should be produced by default alongside Excel"
@@ -41,6 +41,8 @@ def test_analyze_arguments(
         source_tech="Informatica - PC",
     )
 
+    assert report_path.with_suffix(".json").exists(), "JSON report should be generated by default"
+
     with report_path.open("rb") as f:
         header = f.read(4)
     # Excel files are .zip files, so we can check they have the zip header.
@@ -62,15 +64,18 @@ def test_analyze_arguments_wrong_tech(
         }
     )
 
+    report_path = tmp_path / "sample.xlsx"
     with patch.object(ApplicationContext, "prompts", mock_prompts):
         input_path = test_resources / "functional" / "informatica"
         cli.analyze(
             w=mock_workspace_client,
             source_directory=str(input_path),
-            report_file=str(tmp_path / "sample.xlsx"),
+            report_file=str(report_path),
             source_tech="Informatica",
         )
 
+    assert report_path.with_suffix(".json").exists(), "JSON should be generated by default"
+
 
 def test_analyze_generate_json(
     mock_workspace_client: WorkspaceClient,
@@ -86,7 +91,6 @@ def test_analyze_generate_json(
         source_directory=str(input_path),
         report_file=str(report_path),
         source_tech="Snowflake",
-        generate_json=True,
     )
 
     assert report_path.exists(), "Excel report was not created"
@@ -96,6 +100,27 @@ def test_analyze_generate_json(
     assert isinstance(data, dict), "JSON report is not a valid JSON object"
 
 
+def test_analyze_skip_json(
+    mock_workspace_client: WorkspaceClient,
+    test_resources: Path,
+    tmp_path: Path,
+) -> None:
+    input_path = test_resources / "functional" / "snowflake" / "integration"
+    report_path = tmp_path / "report-skip-json.xlsx"
+    expected_json = tmp_path / "report-skip-json.json"
+
+    cli.analyze(
+        w=mock_workspace_client,
+        source_directory=str(input_path),
+        report_file=str(report_path),
+        source_tech="Snowflake",
+        generate_json=False,
+    )
+
+    assert report_path.exists(), "Excel report was not created"
+    assert not expected_json.exists(), "JSON report should not be created when generate_json is false"
+
+
 def test_analyze_prompts(mock_workspace_client: WorkspaceClient, test_resources: Path, tmp_path: Path) -> None:
 
     supported_tech = sorted(Analyzer.supported_source_technologies(), key=str.casefold)
@@ -113,3 +138,30 @@ def test_analyze_prompts(mock_workspace_client: WorkspaceClient, test_resources:
     )
     with patch.object(ApplicationContext, "prompts", mock_prompts):
         cli.analyze(w=mock_workspace_client)
+
+    assert report_path.exists(), "Excel report was not created"
+    assert report_path.with_suffix(".json").exists(), "JSON should be generated by default when using prompts"
+
+
+def test_analyze_prompts_skip_json(
+    mock_workspace_client: WorkspaceClient, test_resources: Path, tmp_path: Path
+) -> None:
+    supported_tech = sorted(Analyzer.supported_source_technologies(), key=str.casefold)
+    tech_enum = next((i for i, tech in enumerate(supported_tech) if tech == "Informatica - PC"), 12)
+
+    source_dir = test_resources / "functional" / "informatica"
+    report_path = tmp_path / "results-no-json.xlsx"
+    expected_json = report_path.with_suffix(".json")
+
+    mock_prompts = MockPrompts(
+        {
+            "Select the source technology": str(tech_enum),
+            "Enter the path of the directory containing sources to analyze": str(source_dir),
+            "Enter the path of the report file for analyzer results": str(report_path),
+        }
+    )
+    with patch.object(ApplicationContext, "prompts", mock_prompts):
+        cli.analyze(w=mock_workspace_client, generate_json=False)
+
+    assert report_path.exists(), "Excel report was not created"
+    assert not expected_json.exists(), "JSON should not be created when generate_json is false"