Skip to content

Commit 246ddd6

Browse files
committed
fixed script
1 parent 66e8783 commit 246ddd6

3 files changed

Lines changed: 53 additions & 57 deletions

File tree

script/plot_avg_building_time.py

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
def format_ticks(x, pos):
1212
return f'{x:.2f}'
1313

14-
def main(json_file, pdf_filename, alpha=None):
14+
def main(json_file, pdf_filename, bucketer, alpha=None):
1515
# Load the data from the provided JSON file
1616
with open(json_file, 'r') as f:
1717
records = [json.loads(line) for line in f] # Read each line as a separate JSON object
@@ -27,13 +27,13 @@ def main(json_file, pdf_filename, alpha=None):
2727
# Define configurations for filtering
2828
configurations = [
2929

30-
((df['avg_partition_size'] == "0") & (df['num_partitions'] == "0") & (df['dense_partitioning'] == "false")
30+
((df['avg_partition_size'] == "0") & (df['num_partitions'] == "0") & (df['dense_partitioning'] == "false") & (df['bucketer_type'] == bucketer)
3131
, "SINGLE"),
3232

33-
((df['avg_partition_size'] != "0") & (df['num_partitions'] != "0") & (df['dense_partitioning'] == "false")
33+
((df['avg_partition_size'] != "0") & (df['num_partitions'] != "0") & (df['dense_partitioning'] == "false") & (df['bucketer_type'] == bucketer)
3434
, "PARTITIONED"),
3535

36-
((df['avg_partition_size'] != "0") & (df['num_partitions'] != "0") & (df['dense_partitioning'] == "true")
36+
((df['avg_partition_size'] != "0") & (df['num_partitions'] != "0") & (df['dense_partitioning'] == "true") & (df['bucketer_type'] == bucketer)
3737
, "DENSE-PARTITIONED")
3838

3939
]
@@ -67,8 +67,8 @@ def main(json_file, pdf_filename, alpha=None):
6767
min_y = min(all_y_values)
6868
max_y = max(all_y_values)
6969

70-
colors = plt.get_cmap('tab20', 12) # Use 'tab20'
7170
alpha_handles = []
71+
alpha_color_map = {}
7272

7373
# Create a new PDF file to save plots
7474
with PdfPages(pdf_filename) as pdf:
@@ -78,21 +78,31 @@ def main(json_file, pdf_filename, alpha=None):
7878
gs = gridspec.GridSpec(1, 4, width_ratios=[3, 3, 3, 1])
7979
axs = [fig.add_subplot(gs[i]) for i in range(3)]
8080

81+
i = 0
82+
for (grouped_avg, _) in grouped_data:
83+
for alpha_value in sorted(grouped_avg['alpha'].unique(), reverse=True):
84+
if alpha_value not in alpha_color_map.keys():
85+
alpha_color_map[alpha_value] = i
86+
i += 1
87+
88+
colors = plt.get_cmap('tab20', len(alpha_color_map)) # Use 'tab20'
89+
8190
for ax, (grouped_avg, title) in zip(axs, grouped_data):
8291
# Scatter plot for each unique alpha value
83-
for i, alpha_value in enumerate(sorted(grouped_avg['alpha'].unique(), reverse=True)):
92+
for alpha_value in sorted(grouped_avg['alpha'].unique(), reverse=True):
8493

8594
if alpha == None or alpha_value == alpha: # filter on specific alpha
8695

96+
color_alpha = colors(alpha_color_map[alpha_value])
8797
subset = grouped_avg[grouped_avg['alpha'] == alpha_value]
8898

8999
label = rf'$\alpha$ = {float(alpha_value):.2f}'
90100
if not any(label in l.get_label() for l in alpha_handles):
91-
alpha_handles.append(ax.plot([], [], label=label, color=colors(i))[0])
101+
alpha_handles.append(ax.plot([], [], label=label, color=color_alpha)[0])
92102

93103
ax.plot(subset['lambda'], subset['total_seconds'],
94104
marker='o', # marker_symbols[i],
95-
markersize=6, color=colors(i))
105+
markersize=6, color=color_alpha)
96106

97107
# Set plot labels and title with LaTeX formatting
98108
ax.set_xlabel(r'$\lambda$', fontsize=14)
@@ -112,10 +122,7 @@ def main(json_file, pdf_filename, alpha=None):
112122
legend_ax.axis('off')
113123

114124
alpha_labels = [h.get_label() for h in alpha_handles]
115-
116-
# Adjusting the `bbox_to_anchor` to move legends further to the right
117-
alpha_legend = legend_ax.legend(alpha_handles, alpha_labels, loc='upper right')
118-
legend_ax.add_artist(alpha_legend)
125+
plt.legend(alpha_handles, alpha_labels, loc='upper right', fontsize=14)
119126

120127
# Adjust the layout of the main figure
121128
plt.tight_layout()
@@ -133,10 +140,11 @@ def main(json_file, pdf_filename, alpha=None):
133140
# Define the expected arguments
134141
parser.add_argument('-i', '--input_json_filename', required=True, type=str, help='Path to the input JSON file.')
135142
parser.add_argument('-o', '--output_pdf_filename', required=True, type=str, help='Path for the output PDF file.')
143+
parser.add_argument('-b', '--bucketer', required=True, type=str, help='Bucketer type: values are "skew" or "opt".')
136144
parser.add_argument('-a', '--alpha', required=False, type=float, help='Value of alpha (a float).')
137145

138146
# Parse the arguments
139147
args = parser.parse_args()
140148

141149
# Call the main function with parsed arguments
142-
main(args.input_json_filename, args.output_pdf_filename, args.alpha)
150+
main(args.input_json_filename, args.output_pdf_filename, args.bucketer, args.alpha)

script/plot_avg_query_time.py

Lines changed: 25 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
def format_ticks(x, pos):
1212
return f'{x:.2f}'
1313

14-
def main(json_file, pdf_filename, alpha):
14+
def main(json_file, pdf_filename, alpha, bucketer):
1515
# Load the data from the provided JSON file
1616
with open(json_file, 'r') as f:
1717
records = [json.loads(line) for line in f]
@@ -27,13 +27,13 @@ def main(json_file, pdf_filename, alpha):
2727
# Define configurations for filtering
2828
configurations = [
2929

30-
((df['avg_partition_size'] == "0") & (df['num_partitions'] == "0") & (df['dense_partitioning'] == "false")
30+
((df['avg_partition_size'] == "0") & (df['num_partitions'] == "0") & (df['dense_partitioning'] == "false") & (df['bucketer_type'] == bucketer)
3131
, "SINGLE"),
3232

33-
((df['avg_partition_size'] != "0") & (df['num_partitions'] != "0") & (df['dense_partitioning'] == "false")
33+
((df['avg_partition_size'] != "0") & (df['num_partitions'] != "0") & (df['dense_partitioning'] == "false") & (df['bucketer_type'] == bucketer)
3434
, "PARTITIONED"),
3535

36-
((df['avg_partition_size'] != "0") & (df['num_partitions'] != "0") & (df['dense_partitioning'] == "true")
36+
((df['avg_partition_size'] != "0") & (df['num_partitions'] != "0") & (df['dense_partitioning'] == "true") & (df['bucketer_type'] == bucketer)
3737
, "DENSE-PARTITIONED")
3838

3939
]
@@ -52,7 +52,8 @@ def main(json_file, pdf_filename, alpha):
5252
grouped_avg = filtered_df.groupby([
5353
'n', 'lambda', 'alpha', 'minimal',
5454
'bucketer_type', 'avg_partition_size',
55-
'num_partitions', 'dense_partitioning', 'seed', 'num_threads',
55+
'num_partitions', 'dense_partitioning',
56+
'seed', 'num_threads',
5657
'external_memory', 'encoder_type'
5758
])['nanosec_per_key'].mean().reset_index()
5859

@@ -64,26 +65,30 @@ def main(json_file, pdf_filename, alpha):
6465
min_y = min(all_y_values)
6566
max_y = max(all_y_values)
6667

67-
# Define different marker symbols for each encoder_type
68-
# marker_symbols = ['o', 'v', '^', '<', '>', '8', 's', 'p', '*', 'h', 'H', 'D', 'd', 'P', 'X']
69-
colors = plt.get_cmap('tab20', 12) # Use 'tab20'
70-
7168
# Create a new PDF file to save plots
7269
with PdfPages(pdf_filename) as pdf:
7370
fig = plt.figure(figsize=(20, 8))
74-
gs = gridspec.GridSpec(1, 4, width_ratios=[3, 3, 3, 1])
71+
gs = gridspec.GridSpec(1, 4, width_ratios=[3, 3, 3, 0.2])
7572
axs = [fig.add_subplot(gs[i]) for i in range(3)]
7673

77-
inter_mono_handles = []
78-
other_encoder_handles = []
74+
encoder_handles = []
75+
encoder_color_map = {}
76+
i = 0
77+
for (grouped_avg, _) in grouped_data:
78+
for e in sorted(grouped_avg['encoder_type'].unique()):
79+
if e not in encoder_color_map.keys():
80+
encoder_color_map[e] = i
81+
i += 1
82+
83+
colors = plt.get_cmap('tab20', len(encoder_color_map)) # Use 'tab20'
7984

8085
for ax, (grouped_avg, title) in zip(axs, grouped_data):
8186

8287
encoder_types = sorted(grouped_avg['encoder_type'].unique())
8388

84-
for i, encoder_type in enumerate(encoder_types):
89+
for encoder_type in encoder_types:
8590

86-
encoder_color = colors(i)
91+
encoder_color = colors(encoder_color_map[encoder_type])
8792
for alpha_value in sorted(grouped_avg['alpha'].unique(), reverse=True):
8893

8994
if alpha_value == alpha: # filter on specific alpha
@@ -99,11 +104,8 @@ def main(json_file, pdf_filename, alpha):
99104
color=encoder_color,
100105
linestyle='none')[0]
101106

102-
if 'inter' in encoder_type or 'mono' in encoder_type:
103-
inter_mono_handles.append(handle)
104-
else:
105-
if not any(encoder_type == l.get_label() for l in other_encoder_handles):
106-
other_encoder_handles.append(handle)
107+
if not encoder_type in [l.get_label() for l in encoder_handles]:
108+
encoder_handles.append(handle)
107109

108110
# Set plot labels and title with LaTeX formatting
109111
ax.set_xlabel(r'$\lambda$', fontsize=14)
@@ -118,23 +120,14 @@ def main(json_file, pdf_filename, alpha):
118120
ax.tick_params(axis='both', which='major', labelsize=12)
119121

120122
# Create three columns in the legend
121-
other_encoder_labels = [h.get_label() for h in other_encoder_handles]
122-
inter_mono_labels = [h.get_label() for h in inter_mono_handles]
123+
encoder_labels = [h.get_label() for h in encoder_handles]
123124

124125
# Create a new axis for the legend at the bottom of the main figure
125126
legend_ax = fig.add_subplot(gs[-1, :]) # Use GridSpec to create the legend axis
126127
legend_ax.axis('off')
127128

128129
# Adjusting the `bbox_to_anchor` to move legends further to the right
129-
other_legend = legend_ax.legend(other_encoder_handles, other_encoder_labels, loc='upper right',
130-
title='SINGLE and PARTITIONED') # Move further right
131-
inter_mono_legend = legend_ax.legend(inter_mono_handles, inter_mono_labels, loc='center right',
132-
bbox_to_anchor=(1, 0.45),
133-
title='DENSE-PARTITIONED') # Adjusted position
134-
135-
# Add the legends to the axis
136-
legend_ax.add_artist(other_legend)
137-
legend_ax.add_artist(inter_mono_legend)
130+
plt.legend(encoder_handles, encoder_labels, loc='upper right', fontsize=14)
138131

139132
# Adjust the layout of the main figure
140133
plt.tight_layout()
@@ -153,9 +146,10 @@ def main(json_file, pdf_filename, alpha):
153146
parser.add_argument('-i', '--input_json_filename', required=True, type=str, help='Path to the input JSON file.')
154147
parser.add_argument('-o', '--output_pdf_filename', required=True, type=str, help='Path for the output PDF file.')
155148
parser.add_argument('-a', '--alpha', required=True, type=float, help='Value of alpha (a float).')
149+
parser.add_argument('-b', '--bucketer', required=True, type=str, help='Bucketer type: values are "skew" or "opt".')
156150

157151
# Parse the arguments
158152
args = parser.parse_args()
159153

160154
# Call the main function with parsed arguments
161-
main(args.input_json_filename, args.output_pdf_filename, args.alpha)
155+
main(args.input_json_filename, args.output_pdf_filename, args.alpha, args.bucketer)

script/plot_space.py

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,9 @@ def main(json_file, pdf_filename, alpha, bucketer):
5050
# Group by the specified fields
5151
grouped_avg = filtered_df.groupby([
5252
'n', 'lambda', 'alpha', 'minimal',
53-
'bucketer_type',
54-
'avg_partition_size',
55-
'num_partitions', 'dense_partitioning', 'seed', 'num_threads',
53+
'bucketer_type', 'avg_partition_size',
54+
'num_partitions', 'dense_partitioning',
55+
'seed', 'num_threads',
5656
'external_memory', 'encoder_type'
5757
])['bits_per_key'].mean().reset_index()
5858

@@ -66,13 +66,10 @@ def main(json_file, pdf_filename, alpha, bucketer):
6666
if max_y > 5.0:
6767
max_y = 5.0 # saturate to 5 bits/key (that's enough!)
6868

69-
# Define different marker symbols for each encoder_type
70-
# marker_symbols = ['o', 'v', '^', '<', '>', '8', 's', 'p', '*', 'h', 'H', 'D', 'd', 'P', 'X']
71-
colors = plt.get_cmap('tab20', 12) # Use 'tab20'
7269
# Create a new PDF file to save plots
7370
with PdfPages(pdf_filename) as pdf:
7471
fig = plt.figure(figsize=(20, 8))
75-
gs = gridspec.GridSpec(1, 4, width_ratios=[3, 3, 3, 1])
72+
gs = gridspec.GridSpec(1, 4, width_ratios=[3, 3, 3, 0.3])
7673
axs = [fig.add_subplot(gs[i]) for i in range(3)]
7774

7875
encoder_handles = []
@@ -84,6 +81,8 @@ def main(json_file, pdf_filename, alpha, bucketer):
8481
encoder_color_map[e] = i
8582
i += 1
8683

84+
colors = plt.get_cmap('tab20', len(encoder_color_map)) # Use 'tab20'
85+
8786
for ax, (grouped_avg, title) in zip(axs, grouped_data):
8887

8988
encoder_types = sorted(grouped_avg['encoder_type'].unique())
@@ -129,12 +128,7 @@ def main(json_file, pdf_filename, alpha, bucketer):
129128
legend_ax.axis('off')
130129

131130
# Adjusting the `bbox_to_anchor` to move legends further to the right
132-
other_legend = legend_ax.legend(encoder_handles, encoder_labels, loc='upper right',
133-
title='Encoders') # Move further right
134-
135-
# Add the legends to the axis
136-
legend_ax.add_artist(other_legend)
137-
# legend_ax.add_artist(inter_mono_legend)
131+
plt.legend(encoder_handles, encoder_labels, loc='upper right', fontsize=14)
138132

139133
# Adjust the layout of the main figure
140134
plt.tight_layout()

0 commit comments

Comments
 (0)