OneOffTech
diff --git a/‎docs/howto/batch_processing.md‎
Lines changed: 34 additions & 0 deletions b/‎docs/howto/batch_processing.md‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎docs/tutorials/using_cli.md‎
Lines changed: 5 additions & 15 deletions b/‎docs/tutorials/using_cli.md‎
Lines changed: 5 additions & 15 deletions
diff --git a/‎src/parxy_cli/commands/parse.py‎
Lines changed: 44 additions & 166 deletions b/‎src/parxy_cli/commands/parse.py‎
Lines changed: 44 additions & 166 deletions
@@ -101,6 +101,40 @@ When `stop_on_error=True`:
 - Only completed results (including the failed one) are returned
 
 
+## Circuit Breaker
+
+Batch processing includes a built-in circuit breaker that detects systemic driver failures and short-circuits remaining tasks for the affected driver. This prevents wasting API calls and time when a driver is guaranteed to fail (e.g., invalid API key, exhausted quota).
+
+The circuit breaker trips immediately (after a single failure) for these exception types:
+
+| Exception | Meaning |
+|---|---|
+| `AuthenticationException` | API key or token is invalid |
+| `QuotaExceededException` | Account balance or credits exhausted |
+| `RateLimitException` | Rate limit hit |
+
+Per-file errors like `FileNotFoundException` or `ParsingException` do **not** trip the circuit, since they are specific to individual files and don't indicate a driver-wide problem.
+
+The circuit breaker is **per-driver**: if LlamaParse fails with an authentication error, PyMuPDF tasks continue unaffected. Short-circuited results carry the original tripping exception in `BatchResult.exception` and `BatchResult.error`.
+
+A new circuit breaker is created for each `batch()` / `batch_iter()` call, so previous failures do not carry over between calls.
+
+```python
+results = Parxy.batch(
+    tasks=['doc1.pdf', 'doc2.pdf', 'doc3.pdf'],
+    drivers=['llamaparse', 'pymupdf'],
+)
+
+for result in results:
+    if result.failed:
+        # If llamaparse auth fails on doc1, doc2 and doc3 are
+        # short-circuited immediately, i.e. no additional API calls.
+        print(f'{result.file} ({result.driver}): {result.error}')
+    else:
+        print(f'{result.file} ({result.driver}): OK')
+```
+
+
 ## Advanced: Per-File Configuration with BatchTask
 
 For more control, use `BatchTask` objects to specify per-file configuration:
 
@@ -26,13 +26,13 @@ The `parse` command is a powerful tool for extracting text from documents with e
 
 ### Basic Usage
 
-Parse a single document using the default settings (PyMuPDF driver, markdown output):
+Parse a single document using the default settings (PyMuPDF driver, json output):
 
 ```bash
 parxy parse document.pdf
 ```
 
-This creates a `document.md` file in the same directory as the source file.
+This creates a `pymupdf-document.json` file in the same directory as the source file. Parxy always prefix the output file with the driver name.
 
 ### Processing Multiple Files and Folders
 
@@ -103,29 +103,19 @@ Specify a driver with the `--driver` (`-d`) option:
 
 ```bash
 parxy parse --driver llamaparse document.pdf
+# output will be saved as llamaparse-document.json
 ```
 
 ### Using Multiple Drivers for Comparison
 
-Parse the same document(s) with multiple drivers by specifying `--driver` multiple times:
+Parse the same document(s) with multiple drivers by specifying `--driver` (or `-d` for short) multiple times:
 
 ```bash
 parxy parse document.pdf -d pymupdf -d llamaparse
 ```
 
-When using multiple drivers, Parxy automatically appends the driver name to the output filenames:
-- `document_pymupdf.md`
-- `document_llamaparse.md`
+When using multiple drivers, Parxy always prepend the driver name to the output filenames, e.g. `pymupdf-document.json`, `llamaparse-document.json`. This is particularly useful for comparing extraction quality across different parsers.
 
-This is particularly useful for comparing extraction quality across different parsers.
-
-### Showing Output in Console
-
-By default, output is only saved to files. To also display content in the console, use the `--show` (`-s`) flag:
-
-```bash
-parxy parse document.pdf --show
-```
 
 ### Progress Tracking
 
 
@@ -1,14 +1,13 @@
 """Command line interface for Parxy document processing."""
 
-import os
 from datetime import timedelta
 from pathlib import Path
 from typing import Optional, List, Annotated
 
 import typer
 
 from parxy_core.facade import Parxy
-from parxy_core.models import Document, BatchTask, BatchResult
+from parxy_core.models import Document, BatchResult
 
 from parxy_cli.models import Level, OutputMode
 from parxy_cli.console.console import Console
@@ -108,67 +107,6 @@ def get_content(doc: Document, mode: OutputMode) -> str:
         return doc.text()
 
 
-def process_file_with_driver(
-    file_path: Path,
-    driver: str,
-    level: Level,
-    mode: OutputMode,
-    output_dir: Optional[Path],
-    show: bool,
-    use_driver_suffix: bool = False,
-) -> tuple[str, int]:
-    """
-    Process a single file with a single driver.
-
-    Args:
-        file_path: Path to file to process
-        driver: Driver name to use
-        level: Extraction level
-        mode: Output mode
-        output_dir: Optional output directory
-        show: Whether to show content in console
-        use_driver_suffix: Whether to append driver name to output filename
-
-    Returns:
-        Tuple of (output_path, page_count)
-    """
-    # Parse the document
-    doc = Parxy.parse(
-        file=str(file_path),
-        level=level.value,
-        driver_name=driver,
-    )
-
-    # Get content
-    content = get_content(doc, mode)
-
-    # Determine output path
-    if output_dir:
-        output_dir.mkdir(parents=True, exist_ok=True)
-        base_name = file_path.stem
-    else:
-        # Save in same directory as source file
-        output_dir = file_path.parent
-        base_name = file_path.stem
-
-    # If multiple drivers, append driver name to filename
-    if use_driver_suffix and driver:
-        base_name = f'{base_name}-{driver}'
-
-    extension = get_output_extension(mode)
-    output_path = output_dir / f'{base_name}{extension}'
-
-    # Save to file
-    output_path.write_text(content, encoding='utf-8')
-
-    # Show in console if requested
-    if show:
-        console.print(content)
-        console.newline()
-
-    return str(output_path), len(doc.pages)
-
-
 def format_timedelta(td):
     days = td.days
     milliseconds = td.microseconds // 1000
@@ -195,7 +133,7 @@ def save_batch_result(
     mode: OutputMode,
     output_dir: Optional[Path],
     show: bool,
-    use_driver_suffix: bool = False,
+    use_driver_prefix: bool = True,
 ) -> tuple[str, int]:
     """
     Save a BatchResult to file.
@@ -205,7 +143,7 @@ def save_batch_result(
         mode: Output mode
         output_dir: Optional output directory
         show: Whether to show content in console
-        use_driver_suffix: Whether to append driver name to output filename
+        use_driver_prefix: Whether to prepend driver name to output filename
 
     Returns:
         Tuple of (output_path, page_count)
@@ -226,8 +164,8 @@ def save_batch_result(
         base_name = file_path.stem
 
     # If multiple drivers, append driver name to filename
-    if use_driver_suffix and result.driver:
-        base_name = f'{base_name}-{result.driver}'
+    if use_driver_prefix and result.driver:
+        base_name = f'{result.driver}-{base_name}'
 
     extension = get_output_extension(mode)
     output_path = output_dir / f'{base_name}{extension}'
@@ -314,23 +252,15 @@ def parse(
             help='Stop processing files immediately if an error occurs with any file',
         ),
     ] = False,
-    parallel: Annotated[
-        bool,
-        typer.Option(
-            '--parallel',
-            '-p',
-            help='Process files in parallel using multiple workers',
-        ),
-    ] = False,
     workers: Annotated[
-        Optional[int],
+        int,
         typer.Option(
             '--workers',
             '-w',
-            help='Number of parallel workers to use (only applies with --parallel). Defaults to CPU count.',
+            help='Number of parallel workers to use. Defaults to 2.',
             min=1,
         ),
-    ] = None,
+    ] = 2,
 ):
     """
     Parse documents using one or more drivers.
@@ -361,8 +291,8 @@ def parse(
         # Output as JSON and show in console
         parxy parse document.pdf -m json --show
 
-        # Process files in parallel with 4 workers
-        parxy parse /path/to/folder --parallel --workers 4
+        # Process files with 4 workers
+        parxy parse /path/to/folder --workers 4
     """
     console.action('Parse files', space_after=False)
     # Collect all files
@@ -382,21 +312,8 @@ def parse(
     # Calculate total tasks
     total_tasks = len(files) * len(drivers)
 
-    # Determine if we should use driver suffix (when multiple drivers are used)
-    use_driver_suffix = len(drivers) > 1
-
-    if use_driver_suffix:
-        console.info(
-            'You have specified more than one driver. Driver name will be added as suffix to the file name while saving.'
-        )
-
     error_count = 0
 
-    # Determine number of workers for parallel processing
-    if parallel:
-        max_workers = workers if workers else (os.cpu_count() or 2)
-        console.info(f'Using parallel processing with {max_workers} workers')
-
     # Show info
     with console.shimmer(
         f'Processing {len(files)} file{"s" if len(files) > 1 else ""} with {len(drivers)} driver{"s" if len(drivers) > 1 else ""}...'
@@ -405,83 +322,44 @@ def parse(
         with console.progress('Processing documents') as progress:
             task = progress.add_task('', total=total_tasks)
 
-            if parallel:
-                # Parallel processing using batch_iter
-                batch_tasks = [str(f) for f in files]
-
-                for result in Parxy.batch_iter(
-                    tasks=batch_tasks,
-                    drivers=drivers,
-                    level=level.value,
-                    workers=max_workers,
-                ):
-                    file_name = (
-                        Path(result.file).name
-                        if isinstance(result.file, str)
-                        else 'document'
+            batch_tasks = [str(f) for f in files]
+
+            for result in Parxy.batch_iter(
+                tasks=batch_tasks,
+                drivers=drivers,
+                level=level.value,
+                workers=workers,
+            ):
+                file_name = (
+                    Path(result.file).name
+                    if isinstance(result.file, str)
+                    else 'document'
+                )
+
+                if result.success:
+                    output_file, page_count = save_batch_result(
+                        result=result,
+                        mode=mode,
+                        output_dir=output_path,
+                        show=show,
                     )
+                    console.print(
+                        f'[faint]⎿ [/faint] {file_name} via {result.driver} to [success]{output_file}[/success] [faint]({page_count} pages)[/faint]'
+                    )
+                else:
+                    console.print(
+                        f'[faint]⎿ [/faint] {file_name} via {result.driver} error. [error]{result.error}[/error]'
+                    )
+                    error_count += 1
 
-                    if result.success:
-                        output_file, page_count = save_batch_result(
-                            result=result,
-                            mode=mode,
-                            output_dir=output_path,
-                            show=show,
-                            use_driver_suffix=use_driver_suffix,
-                        )
-                        console.print(
-                            f'[faint]⎿ [/faint] {file_name} via {result.driver} to [success]{output_file}[/success] [faint]({page_count} pages)[/faint]'
+                    if stop_on_failure:
+                        console.newline()
+                        console.info(
+                            'Stopping due to error (--stop-on-failure flag is set)'
                         )
-                    else:
-                        console.print(
-                            f'[faint]⎿ [/faint] {file_name} via {result.driver} error. [error]{result.error}[/error]'
-                        )
-                        error_count += 1
-
-                        if stop_on_failure:
-                            console.newline()
-                            console.info(
-                                'Stopping due to error (--stop-on-failure flag is set)'
-                            )
-                            raise typer.Exit(1)
+                        raise typer.Exit(1)
 
-                    progress.update(task, advance=1)
-            else:
-                # Sequential processing
-                for file_path in files:
-                    for driver in drivers:
-                        try:
-                            output_file, page_count = process_file_with_driver(
-                                file_path=file_path,
-                                driver=driver,
-                                level=level,
-                                mode=mode,
-                                output_dir=output_path,
-                                show=show,
-                                use_driver_suffix=use_driver_suffix,
-                            )
-
-                            # Update progress
-                            console.print(
-                                f'[faint]⎿ [/faint] {file_path.name} via {driver} to [success]{output_file}[/success] [faint]({page_count} pages)[/faint]'
-                            )
-                            progress.update(task, advance=1)
-
-                        except Exception as e:
-                            console.print(
-                                f'[faint]⎿ [/faint] {file_path.name} via {driver} error. [error]{str(e)}[/error]'
-                            )
-                            progress.update(task, advance=1)
-                            error_count += 1
-
-                            if stop_on_failure:
-                                console.newline()
-                                console.info(
-                                    'Stopping due to error (--stop-on-failure flag is set)'
-                                )
-                                raise typer.Exit(1)
-
-                            continue
+                progress.update(task, advance=1)
 
             elapsed_time = format_timedelta(
                 timedelta(seconds=max(0, progress.tasks[0].elapsed))