1111def format_ticks (x , pos ):
1212 return f'{ x :.2f} '
1313
14- def main (json_file , pdf_filename , alpha ):
14+ def main (json_file , pdf_filename , alpha , bucketer ):
1515 # Load the data from the provided JSON file
1616 with open (json_file , 'r' ) as f :
1717 records = [json .loads (line ) for line in f ]
@@ -27,13 +27,13 @@ def main(json_file, pdf_filename, alpha):
2727 # Define configurations for filtering
2828 configurations = [
2929
30- ((df ['avg_partition_size' ] == "0" ) & (df ['num_partitions' ] == "0" ) & (df ['dense_partitioning' ] == "false" )
30+ ((df ['avg_partition_size' ] == "0" ) & (df ['num_partitions' ] == "0" ) & (df ['dense_partitioning' ] == "false" ) & ( df [ 'bucketer_type' ] == bucketer )
3131 , "SINGLE" ),
3232
33- ((df ['avg_partition_size' ] != "0" ) & (df ['num_partitions' ] != "0" ) & (df ['dense_partitioning' ] == "false" )
33+ ((df ['avg_partition_size' ] != "0" ) & (df ['num_partitions' ] != "0" ) & (df ['dense_partitioning' ] == "false" ) & ( df [ 'bucketer_type' ] == bucketer )
3434 , "PARTITIONED" ),
3535
36- ((df ['avg_partition_size' ] != "0" ) & (df ['num_partitions' ] != "0" ) & (df ['dense_partitioning' ] == "true" )
36+ ((df ['avg_partition_size' ] != "0" ) & (df ['num_partitions' ] != "0" ) & (df ['dense_partitioning' ] == "true" ) & ( df [ 'bucketer_type' ] == bucketer )
3737 , "DENSE-PARTITIONED" )
3838 ]
3939
@@ -50,7 +50,8 @@ def main(json_file, pdf_filename, alpha):
5050 # Group by the specified fields
5151 grouped_avg = filtered_df .groupby ([
5252 'n' , 'lambda' , 'alpha' , 'minimal' ,
53- 'bucketer_type' , 'avg_partition_size' ,
53+ 'bucketer_type' ,
54+ 'avg_partition_size' ,
5455 'num_partitions' , 'dense_partitioning' , 'seed' , 'num_threads' ,
5556 'external_memory' , 'encoder_type'
5657 ])['bits_per_key' ].mean ().reset_index ()
@@ -68,23 +69,28 @@ def main(json_file, pdf_filename, alpha):
6869 # Define different marker symbols for each encoder_type
6970 # marker_symbols = ['o', 'v', '^', '<', '>', '8', 's', 'p', '*', 'h', 'H', 'D', 'd', 'P', 'X']
7071 colors = plt .get_cmap ('tab20' , 12 ) # Use 'tab20'
71-
7272 # Create a new PDF file to save plots
7373 with PdfPages (pdf_filename ) as pdf :
7474 fig = plt .figure (figsize = (20 , 8 ))
7575 gs = gridspec .GridSpec (1 , 4 , width_ratios = [3 , 3 , 3 , 1 ])
7676 axs = [fig .add_subplot (gs [i ]) for i in range (3 )]
7777
78- inter_mono_handles = []
79- other_encoder_handles = []
78+ encoder_handles = []
79+ encoder_color_map = {}
80+ i = 0
81+ for (grouped_avg , _ ) in grouped_data :
82+ for e in sorted (grouped_avg ['encoder_type' ].unique ()):
83+ if e not in encoder_color_map .keys ():
84+ encoder_color_map [e ] = i
85+ i += 1
8086
8187 for ax , (grouped_avg , title ) in zip (axs , grouped_data ):
8288
8389 encoder_types = sorted (grouped_avg ['encoder_type' ].unique ())
8490
85- for i , encoder_type in enumerate ( encoder_types ) :
91+ for encoder_type in encoder_types :
8692
87- encoder_color = colors (i )
93+ encoder_color = colors (encoder_color_map [ encoder_type ] )
8894 for alpha_value in sorted (grouped_avg ['alpha' ].unique (), reverse = True ):
8995
9096 if alpha_value == alpha : # filter on specific alpha
@@ -100,11 +106,8 @@ def main(json_file, pdf_filename, alpha):
100106 color = encoder_color ,
101107 linestyle = 'none' )[0 ]
102108
103- if 'inter' in encoder_type or 'mono' in encoder_type :
104- inter_mono_handles .append (handle )
105- else :
106- if not any (encoder_type == l .get_label () for l in other_encoder_handles ):
107- other_encoder_handles .append (handle )
109+ if not encoder_type in [l .get_label () for l in encoder_handles ]:
110+ encoder_handles .append (handle )
108111
109112 # Set plot labels and title with LaTeX formatting
110113 ax .set_xlabel (r'$\lambda$' , fontsize = 14 )
@@ -119,23 +122,19 @@ def main(json_file, pdf_filename, alpha):
119122 ax .tick_params (axis = 'both' , which = 'major' , labelsize = 12 )
120123
121124 # Create three columns in the legend
122- other_encoder_labels = [h .get_label () for h in other_encoder_handles ]
123- inter_mono_labels = [h .get_label () for h in inter_mono_handles ]
125+ encoder_labels = [h .get_label () for h in encoder_handles ]
124126
125127 # Create a new axis for the legend at the bottom of the main figure
126128 legend_ax = fig .add_subplot (gs [- 1 , :]) # Use GridSpec to create the legend axis
127129 legend_ax .axis ('off' )
128130
129131 # Adjusting the `bbox_to_anchor` to move legends further to the right
130- other_legend = legend_ax .legend (other_encoder_handles , other_encoder_labels , loc = 'upper right' ,
131- title = 'SINGLE and PARTITIONED' ) # Move further right
132- inter_mono_legend = legend_ax .legend (inter_mono_handles , inter_mono_labels , loc = 'center right' ,
133- bbox_to_anchor = (1 , 0.45 ),
134- title = 'DENSE-PARTITIONED' ) # Adjusted position
132+ other_legend = legend_ax .legend (encoder_handles , encoder_labels , loc = 'upper right' ,
133+ title = 'Encoders' ) # Move further right
135134
136135 # Add the legends to the axis
137136 legend_ax .add_artist (other_legend )
138- legend_ax .add_artist (inter_mono_legend )
137+ # legend_ax.add_artist(inter_mono_legend)
139138
140139 # Adjust the layout of the main figure
141140 plt .tight_layout ()
@@ -154,9 +153,10 @@ def main(json_file, pdf_filename, alpha):
154153 parser .add_argument ('-i' , '--input_json_filename' , required = True , type = str , help = 'Path to the input JSON file.' )
155154 parser .add_argument ('-o' , '--output_pdf_filename' , required = True , type = str , help = 'Path for the output PDF file.' )
156155 parser .add_argument ('-a' , '--alpha' , required = True , type = float , help = 'Value of alpha (a float).' )
156+ parser .add_argument ('-b' , '--bucketer' , required = True , type = str , help = 'Bucketer type: values are "skew" or "opt".' )
157157
158158 # Parse the arguments
159159 args = parser .parse_args ()
160160
161161 # Call the main function with parsed arguments
162- main (args .input_json_filename , args .output_pdf_filename , args .alpha )
162+ main (args .input_json_filename , args .output_pdf_filename , args .alpha , args . bucketer )
0 commit comments