|
11 | 11 | # Define the data for each environment |
12 | 12 | data_combo_burger = { |
13 | 13 | 'EXPERIMENT_ID': [ |
14 | | - 'INTERPRET', 'Ours', 'Ours-no-geo', 'Ours-no-invent', |
15 | | - 'Ours-no-subselection', 'Ours-no-visual', 'Ours-vlm-subselection', |
16 | | - 'VILA-pure', 'VILA-with-fewshot' |
| 14 | + 'VLM feat. pred', 'Ours', 'No feat.', 'No invent', |
| 15 | + 'No subselect', 'No visual', 'VLM subselect', |
| 16 | + 'ViLa', 'ViLa fewshot' |
17 | 17 | ], |
18 | 18 | 'NUM_SOLVED': [0.00, 8.20, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 3.80], |
19 | 19 | 'NUM_SOLVED_STDDEV': [0.00, 1.17, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.40] |
20 | 20 | } |
21 | 21 |
|
22 | 22 | data_fatter_burger = { |
23 | 23 | 'EXPERIMENT_ID': [ |
24 | | - 'INTERPRET', 'Ours', 'Ours-no-geo', 'Ours-no-invent', |
25 | | - 'Ours-no-subselection', 'Ours-no-visual', 'Ours-vlm-subselection', |
26 | | - 'VILA-pure', 'VILA-with-fewshot' |
| 24 | + 'VLM feat. pred', 'Ours', 'No feat.', 'No invent', |
| 25 | + 'No subselect', 'No visual', 'VLM subselect', |
| 26 | + 'ViLa', 'ViLa fewshot' |
27 | 27 | ], |
28 | 28 | 'NUM_SOLVED': [0.00, 9.60, 1.20, 0.00, 0.00, 1.20, 3.00, 0.80, 3.80], |
29 | 29 | 'NUM_SOLVED_STDDEV': [0.00, 0.80, 2.40, 0.00, 0.00, 2.40, 1.41, 0.40, 0.40] |
30 | 30 | } |
31 | 31 |
|
32 | 32 | data_more_stacks = { |
33 | 33 | 'EXPERIMENT_ID': [ |
34 | | - 'INTERPRET', 'Ours', 'Ours-no-geo', 'Ours-no-invent', |
35 | | - 'Ours-no-subselection', 'Ours-no-visual', 'Ours-vlm-subselection', |
36 | | - 'VILA-pure', 'VILA-with-fewshot' |
| 34 | + 'VLM feat. pred', 'Ours', 'No feat.', 'No invent', |
| 35 | + 'No subselect', 'No visual', 'VLM subselect', |
| 36 | + 'ViLa', 'ViLa fewshot' |
37 | 37 | ], |
38 | 38 | 'NUM_SOLVED': [0.00, 9.40, 0.00, 0.00, 0.00, 0.00, 3.60, 0.80, 3.80], |
39 | 39 | 'NUM_SOLVED_STDDEV': [0.00, 0.80, 0.00, 0.00, 0.00, 0.00, 2.24, 1.17, 0.40] |
40 | 40 | } |
41 | 41 |
|
42 | 42 | data_kitchen_boil_kettle = { |
43 | 43 | 'EXPERIMENT_ID': [ |
44 | | - 'INTERPRET', 'Ours', 'Ours-no-geo', 'Ours-no-invent', |
45 | | - 'Ours-no-subselection', 'Ours-no-visual', 'Ours-vlm-subselection', |
46 | | - 'VILA-pure', 'VILA-with-fewshot' |
| 44 | + 'VLM feat. pred', 'Ours', 'No feat.', 'No invent', |
| 45 | + 'No subselect', 'No visual', 'VLM subselect', |
| 46 | + 'ViLa', 'ViLa fewshot' |
47 | 47 | ], |
48 | 48 | 'NUM_SOLVED': [0.00, 9.80, 9.80, 0.00, 0.00, 9.80, 1.00, 6.60, 10.00], |
49 | 49 | 'NUM_SOLVED_STDDEV': [0.00, 0.40, 0.40, 0.00, 0.00, 0.40, 2.00, 1.02, 0.00] |
|
57 | 57 |
|
58 | 58 | # Reorder the 'EXPERIMENT_ID' column to match 'custom_order' |
59 | 59 | custom_order = [ |
60 | | - 'Ours', 'Ours-no-geo', 'Ours-no-invent', 'Ours-no-subselection', |
61 | | - 'Ours-no-visual', 'Ours-vlm-subselection', 'INTERPRET', 'VILA-pure', |
62 | | - 'VILA-with-fewshot' |
| 60 | + 'Ours', 'VLM subselect', 'No subselect', 'No feat.', 'No visual', 'No invent', |
| 61 | + 'VLM feat. pred', 'ViLa', 'ViLa fewshot' |
63 | 62 | ] |
64 | 63 |
|
65 | 64 | # Apply Categorical ordering before any transformations |
|
0 commit comments