Skip to content

Commit c5fb22a

Browse files
adds plotting script
1 parent a682c9b commit c5fb22a

1 file changed

Lines changed: 104 additions & 0 deletions

File tree

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
import pandas as pd
2+
import matplotlib.pyplot as plt
3+
import seaborn as sns
4+
import numpy as np
5+
from matplotlib import rcParams
6+
7+
# Set font aesthetics
8+
rcParams['font.family'] = 'DejaVu Sans'
9+
rcParams['font.size'] = 16
10+
11+
# Define the data for each environment
12+
data_combo_burger = {
13+
'EXPERIMENT_ID': [
14+
'INTERPRET', 'Ours', 'Ours-no-geo', 'Ours-no-invent',
15+
'Ours-no-subselection', 'Ours-no-visual', 'Ours-vlm-subselection',
16+
'VILA-pure', 'VILA-with-fewshot'
17+
],
18+
'NUM_SOLVED': [0.00, 8.20, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 3.80],
19+
'NUM_SOLVED_STDDEV': [0.00, 1.17, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.40]
20+
}
21+
22+
data_fatter_burger = {
23+
'EXPERIMENT_ID': [
24+
'INTERPRET', 'Ours', 'Ours-no-geo', 'Ours-no-invent',
25+
'Ours-no-subselection', 'Ours-no-visual', 'Ours-vlm-subselection',
26+
'VILA-pure', 'VILA-with-fewshot'
27+
],
28+
'NUM_SOLVED': [0.00, 9.60, 1.20, 0.00, 0.00, 1.20, 3.00, 0.80, 3.80],
29+
'NUM_SOLVED_STDDEV': [0.00, 0.80, 2.40, 0.00, 0.00, 2.40, 1.41, 0.40, 0.40]
30+
}
31+
32+
data_more_stacks = {
33+
'EXPERIMENT_ID': [
34+
'INTERPRET', 'Ours', 'Ours-no-geo', 'Ours-no-invent',
35+
'Ours-no-subselection', 'Ours-no-visual', 'Ours-vlm-subselection',
36+
'VILA-pure', 'VILA-with-fewshot'
37+
],
38+
'NUM_SOLVED': [0.00, 9.40, 0.00, 0.00, 0.00, 0.00, 3.60, 0.80, 3.80],
39+
'NUM_SOLVED_STDDEV': [0.00, 0.80, 0.00, 0.00, 0.00, 0.00, 2.24, 1.17, 0.40]
40+
}
41+
42+
data_kitchen_boil_kettle = {
43+
'EXPERIMENT_ID': [
44+
'INTERPRET', 'Ours', 'Ours-no-geo', 'Ours-no-invent',
45+
'Ours-no-subselection', 'Ours-no-visual', 'Ours-vlm-subselection',
46+
'VILA-pure', 'VILA-with-fewshot'
47+
],
48+
'NUM_SOLVED': [0.00, 9.80, 9.80, 0.00, 0.00, 9.80, 1.00, 6.60, 10.00],
49+
'NUM_SOLVED_STDDEV': [0.00, 0.40, 0.40, 0.00, 0.00, 0.40, 2.00, 1.02, 0.00]
50+
}
51+
52+
# Convert each dataset to a DataFrame
53+
df_combo_burger = pd.DataFrame(data_combo_burger)
54+
df_fatter_burger = pd.DataFrame(data_fatter_burger)
55+
df_more_stacks = pd.DataFrame(data_more_stacks)
56+
df_kitchen_boil_kettle = pd.DataFrame(data_kitchen_boil_kettle)
57+
58+
# Reorder the 'EXPERIMENT_ID' column to match 'custom_order'
59+
custom_order = [
60+
'Ours', 'Ours-no-geo', 'Ours-no-invent', 'Ours-no-subselection',
61+
'Ours-no-visual', 'Ours-vlm-subselection', 'INTERPRET', 'VILA-pure',
62+
'VILA-with-fewshot'
63+
]
64+
65+
# Apply Categorical ordering before any transformations
66+
for df in [df_combo_burger, df_fatter_burger, df_more_stacks, df_kitchen_boil_kettle]:
67+
df['EXPERIMENT_ID'] = pd.Categorical(df['EXPERIMENT_ID'], categories=custom_order, ordered=True)
68+
df.sort_values('EXPERIMENT_ID', inplace=True)
69+
70+
# Convert 'NUM_SOLVED' to percentages and calculate standard error
71+
for df in [df_combo_burger, df_fatter_burger, df_more_stacks, df_kitchen_boil_kettle]:
72+
df['NUM_SOLVED'] = df['NUM_SOLVED'] * 10
73+
df['NUM_SOLVED_SE'] = df['NUM_SOLVED_STDDEV'] / np.sqrt(5) * 10
74+
75+
# Initialize subplots
76+
fig, axes = plt.subplots(1, 4, figsize=(18, 6), sharey=True)
77+
78+
# Assign a larger color palette for the bars, so that each bar has a unique color
79+
unique_palette = sns.color_palette("pastel", n_colors=len(df_combo_burger))
80+
81+
# Plot in the new order: 'Boil Kettle', 'More Stacks', 'Bigger Burger', then 'Combo Burger'
82+
environments = [df_kitchen_boil_kettle, df_more_stacks, df_fatter_burger, df_combo_burger]
83+
titles = ["Kitchen Boil Kettle", "More Burger Stacks", "Bigger Burger", "Combo Burger"]
84+
85+
for i, (df, title) in enumerate(zip(environments, titles)):
86+
sns.barplot(
87+
data=df, y='EXPERIMENT_ID', x='NUM_SOLVED', ax=axes[i], palette=unique_palette, capsize=0.1
88+
)
89+
axes[i].errorbar(
90+
df['NUM_SOLVED'], df['EXPERIMENT_ID'],
91+
xerr=df['NUM_SOLVED_SE'], fmt='none', c='black', capsize=5, capthick=1
92+
)
93+
axes[i].set_title(title, fontsize=20) # Increase title font size
94+
axes[i].set_xlabel('') # Clear individual x-labels
95+
axes[i].set_ylabel('', fontsize=16) # Increase y-label font size
96+
axes[i].tick_params(axis='both', labelsize=14) # Increase tick label size
97+
axes[i].grid(True, linestyle='--', alpha=0.6) # Add gridlines for clarity
98+
99+
# Set shared x-label
100+
fig.text(0.5, 0.01, '% Evaluation Tasks Solved', ha='center', fontsize=18)
101+
102+
# Adjust layout with tighter spacing
103+
plt.tight_layout(rect=[0.02, 0.05, 1, 1])
104+
plt.show()

0 commit comments

Comments
 (0)