88from rich .console import Console
99from rich .table import Table
1010
11- from a4d .pipeline .patient import process_patient_tables , run_patient_pipeline
11+ from a4d .pipeline .patient import discover_tracker_files , process_patient_tables , run_patient_pipeline
1212from a4d .tables .logs import create_table_logs
1313
1414app = typer .Typer (
@@ -69,30 +69,36 @@ def process_patient_cmd(
6969 ),
7070 ] = None ,
7171 workers : Annotated [
72- int , typer .Option ("--workers" , "-w" , help = "Number of parallel workers (1 = sequential )" )
73- ] = 1 ,
72+ int | None , typer .Option ("--workers" , "-w" , help = "Number of parallel workers (default: A4D_MAX_WORKERS )" )
73+ ] = None ,
7474 skip_tables : Annotated [
7575 bool , typer .Option ("--skip-tables" , help = "Skip table creation (only extract + clean)" )
7676 ] = False ,
7777 force : Annotated [
7878 bool , typer .Option ("--force" , help = "Force reprocessing (ignore existing outputs)" )
7979 ] = False ,
80- clean : Annotated [
81- bool ,
82- typer .Option ("--clean " , help = "Wipe output directory before running (default when --file is used )" ),
83- ] = False ,
80+ data_root : Annotated [
81+ Path | None ,
82+ typer .Option ("--data-root " , "-d" , help = "Directory containing tracker files (default: from config )" ),
83+ ] = None ,
8484 output_root : Annotated [
8585 Path | None , typer .Option ("--output" , "-o" , help = "Output directory (default: from config)" )
8686 ] = None ,
8787):
8888 """Process patient data pipeline.
8989
9090 \b
91+ Output is always cleaned before each run so tables reflect only the
92+ current run's files.
93+
9194 Examples:
92- # Process all trackers in data_root
95+ # Process all trackers in data_root (from config)
9396 uv run a4d process-patient
9497
95- # Process specific file (output is always cleaned first)
98+ # Process all trackers in a specific directory
99+ uv run a4d process-patient --data-root /path/to/trackers
100+
101+ # Process specific file
96102 uv run a4d process-patient --file /path/to/tracker.xlsx
97103
98104 # Parallel processing with 8 workers
@@ -101,25 +107,45 @@ def process_patient_cmd(
101107 # Just extract + clean, skip tables
102108 uv run a4d process-patient --skip-tables
103109 """
104- console . print ( " \n [bold blue]A4D Patient Pipeline[/bold blue] \n " )
110+ from a4d . config import settings as _settings
105111
106- # Prepare tracker files list
107- tracker_files = [file ] if file else None
112+ console .print ("\n [bold blue]A4D Patient Pipeline[/bold blue]\n " )
108113
109- # Single-file mode always cleans first — there's no reason to keep stale
110- # outputs from previous runs when testing a specific file.
111- clean_output = clean or (file is not None )
114+ if file :
115+ tracker_files = [file ]
116+ data_root_display = f"{ file } (single file)"
117+ elif data_root :
118+ tracker_files = discover_tracker_files (data_root )
119+ if not tracker_files :
120+ console .print (f"[bold red]Error: No tracker files found in { data_root } [/bold red]\n " )
121+ raise typer .Exit (1 )
122+ data_root_display = str (data_root )
123+ else :
124+ tracker_files = None # pipeline uses settings.data_root
125+ data_root_display = str (_settings .data_root )
126+
127+ _output_root = output_root or _settings .output_root
128+ _workers = workers if workers is not None else _settings .max_workers
129+
130+ console .print (f"Data root: { data_root_display } " )
131+ console .print (f"Output root: { _output_root } " )
132+ console .print (f"Workers: { _workers } " )
133+ if skip_tables :
134+ console .print ("Tables: skipped" )
135+ if force :
136+ console .print ("Force: yes" )
137+ console .print ()
112138
113139 # Step 1: Extract + clean (table creation handled below for visible progress)
114140 console .print ("[bold]Step 1/3:[/bold] Extracting and cleaning tracker files..." )
115141 try :
116142 result = run_patient_pipeline (
117143 tracker_files = tracker_files ,
118- max_workers = workers ,
144+ max_workers = _workers ,
119145 output_root = output_root ,
120146 skip_tables = True , # tables created below with console feedback
121147 force = force ,
122- clean_output = clean_output ,
148+ clean_output = True ,
123149 show_progress = True ,
124150 console_log_level = "ERROR" ,
125151 )
@@ -130,9 +156,6 @@ def process_patient_cmd(
130156 # Step 2+3: Table and log creation with console feedback
131157 tables : dict [str , Path ] = {}
132158 if not skip_tables and result .successful_trackers > 0 :
133- from a4d .config import settings as _settings
134-
135- _output_root = output_root or _settings .output_root
136159 cleaned_dir = _output_root / "patient_data_cleaned"
137160 tables_dir = _output_root / "tables"
138161 logs_dir = _output_root / "logs"
@@ -483,14 +506,18 @@ def upload_output_cmd(
483506@app .command ("run-pipeline" )
484507def run_pipeline_cmd (
485508 workers : Annotated [
486- int , typer .Option ("--workers" , "-w" , help = "Number of parallel workers (1 = sequential )" )
487- ] = 4 ,
509+ int | None , typer .Option ("--workers" , "-w" , help = "Number of parallel workers (default: A4D_MAX_WORKERS )" )
510+ ] = None ,
488511 force : Annotated [
489512 bool , typer .Option ("--force" , help = "Force reprocessing (ignore existing outputs)" )
490513 ] = False ,
514+ skip_download : Annotated [
515+ bool ,
516+ typer .Option ("--skip-download" , help = "Skip GCS download (use files already in data_root)" ),
517+ ] = False ,
491518 skip_upload : Annotated [
492519 bool ,
493- typer .Option ("--skip-upload" , help = "Skip GCS and BigQuery uploads (local testing) " ),
520+ typer .Option ("--skip-upload" , help = "Skip GCS and BigQuery upload steps " ),
494521 ] = False ,
495522):
496523 """Run the full end-to-end A4D pipeline.
@@ -506,28 +533,33 @@ def run_pipeline_cmd(
506533
507534 \b
508535 Examples:
509- # Full pipeline with 4 workers
536+ # Full pipeline (download + process + upload)
510537 uv run a4d run-pipeline
511538
512- # Force reprocess all files
513- uv run a4d run-pipeline --force
514-
515- # Local testing without GCS/BigQuery uploads
539+ # Download latest files, process locally, skip upload
516540 uv run a4d run-pipeline --skip-upload
541+
542+ # Process local files only, no download or upload
543+ uv run a4d run-pipeline --skip-download --skip-upload
517544 """
518545 from a4d .config import settings
519546 from a4d .gcp .bigquery import load_pipeline_tables
520547 from a4d .gcp .storage import download_tracker_files , upload_output
521548
549+ _workers = workers if workers is not None else settings .max_workers
550+
522551 console .print ("\n [bold blue]A4D Full Pipeline[/bold blue]\n " )
523552 console .print (f"Data root: { settings .data_root } " )
524553 console .print (f"Output root: { settings .output_root } " )
525- console .print (f"Workers: { workers } " )
554+ console .print (f"Workers: { _workers } " )
526555 console .print (f"Project: { settings .project_id } " )
527- console .print (f"Dataset: { settings .dataset } \n " )
556+ console .print (f"Dataset: { settings .dataset } " )
557+ console .print (f"Download: { 'yes' if not skip_download else 'skipped (--skip-download)' } " )
558+ console .print (f"Upload: { 'yes' if not skip_upload else 'skipped (--skip-upload)' } " )
559+ console .print ()
528560
529561 # Step 1 – Download tracker files from GCS
530- if not skip_upload :
562+ if not skip_download :
531563 console .print ("[bold]Step 1/5:[/bold] Downloading tracker files from GCS..." )
532564 try :
533565 downloaded = download_tracker_files (destination = settings .data_root )
@@ -536,13 +568,13 @@ def run_pipeline_cmd(
536568 console .print (f"\n [bold red]Error during download: { e } [/bold red]\n " )
537569 raise typer .Exit (1 ) from e
538570 else :
539- console .print ("[bold]Step 1/5:[/bold] Skipping GCS download (--skip-upload )\n " )
571+ console .print ("[bold]Step 1/5:[/bold] Skipping GCS download (--skip-download )\n " )
540572
541573 # Step 2+3 – Extract, clean and build tables
542574 console .print ("[bold]Steps 2–3/5:[/bold] Processing tracker files...\n " )
543575 try :
544576 result = run_patient_pipeline (
545- max_workers = workers ,
577+ max_workers = _workers ,
546578 force = force ,
547579 show_progress = True ,
548580 console_log_level = "WARNING" ,
0 commit comments