@@ -38,10 +38,10 @@ def main():
3838 epilog = """
3939Examples:
4040 # Run hash partitioning shuffle benchmark in Spark mode
41- python run_benchmark.py --data /path/to/data --mode spark --benchmark shuffle-hash
41+ python run_benchmark.py --data /path/to/data --shuffle- mode spark --benchmark shuffle-hash
4242
4343 # Run round-robin shuffle benchmark in Comet native mode
44- python run_benchmark.py --data /path/to/data --mode native --benchmark shuffle-roundrobin
44+ python run_benchmark.py --data /path/to/data --shuffle- mode native --benchmark shuffle-roundrobin
4545
4646 # List all available benchmarks
4747 python run_benchmark.py --list-benchmarks
@@ -52,7 +52,7 @@ def main():
5252 help = "Path to input parquet data"
5353 )
5454 parser .add_argument (
55- "--mode" , "-m" ,
55+ "--shuffle- mode" , "-m" ,
5656 choices = ["spark" , "jvm" , "native" ],
5757 help = "Shuffle mode being tested"
5858 )
@@ -81,25 +81,25 @@ def main():
8181 print (f" { name :25s} - { description } " )
8282 return 0
8383
84- # Handle --print-configs (requires --benchmark and --mode)
84+ # Handle --print-configs (requires --benchmark and --shuffle- mode)
8585 if args .print_configs :
86- if not args .mode :
87- parser .error ("--mode is required when using --print-configs" )
86+ if not args .shuffle_mode :
87+ parser .error ("--shuffle- mode is required when using --print-configs" )
8888 try :
8989 benchmark_cls = get_benchmark (args .benchmark )
9090 except KeyError as e :
9191 print (f"Error: { e } " , file = sys .stderr )
9292 return 1
93- configs = benchmark_cls .get_spark_configs (args .mode )
93+ configs = benchmark_cls .get_spark_configs (args .shuffle_mode )
9494 for key , value in configs .items ():
9595 print (f"--conf { key } ={ value } " )
9696 return 0
9797
9898 # Validate required arguments
9999 if not args .data :
100100 parser .error ("--data is required when running a benchmark" )
101- if not args .mode :
102- parser .error ("--mode is required when running a benchmark" )
101+ if not args .shuffle_mode :
102+ parser .error ("--shuffle- mode is required when running a benchmark" )
103103
104104 # Get the benchmark class
105105 try :
@@ -110,17 +110,17 @@ def main():
110110 return 1
111111
112112 # Get benchmark-specific configs
113- benchmark_configs = benchmark_cls .get_spark_configs (args .mode )
113+ benchmark_configs = benchmark_cls .get_spark_configs (args .shuffle_mode )
114114
115115 # Create Spark session with benchmark-specific configs
116- builder = SparkSession .builder .appName (f"{ benchmark_cls .name ()} -{ args .mode .upper ()} " )
116+ builder = SparkSession .builder .appName (f"{ benchmark_cls .name ()} -{ args .shuffle_mode .upper ()} " )
117117 for key , value in benchmark_configs .items ():
118118 builder = builder .config (key , value )
119119 spark = builder .getOrCreate ()
120120
121121 try :
122122 # Create and run the benchmark
123- benchmark = benchmark_cls (spark , args .data , args .mode )
123+ benchmark = benchmark_cls (spark , args .data , args .shuffle_mode )
124124 results = benchmark .execute_timed ()
125125
126126 print ("\n Check Spark UI for shuffle sizes and detailed metrics" )
0 commit comments