|
40 | 40 | import CreateNewRunConfigDialog from "$lib/ui/run_config_component/create_new_run_config_dialog.svelte" |
41 | 41 | import SavedRunConfigurationsDropdown from "$lib/ui/run_config_component/saved_run_configs_dropdown.svelte" |
42 | 42 | import RunEval from "$lib/components/run_eval.svelte" |
| 43 | + import FloatingMenu from "$lib/ui/floating_menu.svelte" |
| 44 | + import type { FloatingMenuItem } from "$lib/ui/floating_menu_types" |
43 | 45 |
|
44 | 46 | import { agentInfo } from "$lib/agent" |
45 | 47 | $: project_id = $page.params.project_id! |
|
53 | 55 | // State management |
54 | 56 | let columns = 2 // Start with 2 columns |
55 | 57 | let selectedModels: (string | null)[] = [null, null] // Track selected model for each column |
| 58 | + let hiddenEvalIds: string[] = [] // Eval IDs hidden by the user (kiln_cost_section is never hideable) |
56 | 59 |
|
57 | 60 | // Run configs state |
58 | 61 | let loading_run_configs = true |
|
94 | 97 |
|
95 | 98 | // Initialize selectedModels array with correct length |
96 | 99 | selectedModels = new Array(columns).fill(null) |
| 100 | +
|
| 101 | + // Hidden evals can be restored before run configs are loaded - they are just IDs. |
| 102 | + // Defensive: drop the cost section ID + dedupe in case a hand-edited URL is messy. |
| 103 | + const urlHidden = urlParams.get("hidden_evals") |
| 104 | + if (urlHidden) { |
| 105 | + hiddenEvalIds = [ |
| 106 | + ...new Set( |
| 107 | + urlHidden |
| 108 | + .split(",") |
| 109 | + .map((id) => id.trim()) |
| 110 | + .filter((id) => id.length > 0 && id !== "kiln_cost_section"), |
| 111 | + ), |
| 112 | + ] |
| 113 | + } |
97 | 114 | } |
98 | 115 |
|
99 | 116 | // Restore model selections from URL after data is loaded |
|
137 | 154 | ) |
138 | 155 | urlParams.set("models", modelIds.join(",")) |
139 | 156 |
|
| 157 | + // Update hidden evals (omit param when none are hidden to keep URL clean) |
| 158 | + if (hiddenEvalIds.length > 0) { |
| 159 | + urlParams.set("hidden_evals", hiddenEvalIds.join(",")) |
| 160 | + } else { |
| 161 | + urlParams.delete("hidden_evals") |
| 162 | + } |
| 163 | +
|
140 | 164 | // Use replace to avoid creating new history entries |
141 | 165 | const newURL = `${$page.url.pathname}?${urlParams.toString()}` |
142 | 166 | goto(newURL, { replaceState: true }) |
143 | 167 | } |
144 | 168 |
|
145 | 169 | // Reactive statements to update URL when state changes |
146 | | - $: if (!isInitializing && (columns || selectedModels)) { |
| 170 | + $: if (!isInitializing && (columns || selectedModels || hiddenEvalIds)) { |
147 | 171 | updateURL() |
148 | 172 | } |
149 | 173 |
|
|
323 | 347 | eval_scores_cache, |
324 | 348 | ) |
325 | 349 |
|
| 350 | + // Filter out user-hidden evals (cost section is never hideable). hiddenEvalIds is |
| 351 | + // passed in as a parameter (rather than read via closure) so that Svelte's reactive |
| 352 | + // `$:` statements track it as a dependency and re-run when it changes. |
| 353 | + function filterVisibleFeatures<T extends { eval_id: string }>( |
| 354 | + features: T[], |
| 355 | + hidden: string[], |
| 356 | + ): T[] { |
| 357 | + if (hidden.length === 0) return features |
| 358 | + return features.filter( |
| 359 | + (section) => |
| 360 | + section.eval_id === "kiln_cost_section" || |
| 361 | + !hidden.includes(section.eval_id), |
| 362 | + ) |
| 363 | + } |
| 364 | +
|
| 365 | + $: visibleComparisonFeatures = filterVisibleFeatures( |
| 366 | + comparisonFeatures, |
| 367 | + hiddenEvalIds, |
| 368 | + ) |
| 369 | + $: visibleChartComparisonFeatures = filterVisibleFeatures( |
| 370 | + chartComparisonFeatures, |
| 371 | + hiddenEvalIds, |
| 372 | + ) |
| 373 | +
|
| 374 | + // Names of currently-hidden evals (used for the "show hidden" dropdown). |
| 375 | + // chartComparisonFeatures is built from ALL run configs for the task and is a |
| 376 | + // superset of comparisonFeatures (which only covers selected models), so it |
| 377 | + // alone is enough to resolve display names. |
| 378 | + $: hiddenEvalsInfo = hiddenEvalIds |
| 379 | + .filter((id) => id !== "kiln_cost_section") |
| 380 | + .map((evalId) => { |
| 381 | + const feature = chartComparisonFeatures.find((s) => s.eval_id === evalId) |
| 382 | + return { eval_id: evalId, category: feature?.category ?? "Unknown eval" } |
| 383 | + }) |
| 384 | +
|
| 385 | + function hideEval(evalId: string) { |
| 386 | + if (evalId === "kiln_cost_section") return |
| 387 | + if (hiddenEvalIds.includes(evalId)) return |
| 388 | + hiddenEvalIds = [...hiddenEvalIds, evalId] |
| 389 | + } |
| 390 | +
|
| 391 | + function showEval(evalId: string) { |
| 392 | + hiddenEvalIds = hiddenEvalIds.filter((id) => id !== evalId) |
| 393 | + } |
| 394 | +
|
| 395 | + function showAllHiddenEvals() { |
| 396 | + hiddenEvalIds = [] |
| 397 | + } |
| 398 | +
|
| 399 | + $: hiddenEvalsMenuItems = [ |
| 400 | + { label: "Show Eval", header: true }, |
| 401 | + ...hiddenEvalsInfo.map( |
| 402 | + (info): FloatingMenuItem => ({ |
| 403 | + label: info.category, |
| 404 | + onclick: () => showEval(info.eval_id), |
| 405 | + }), |
| 406 | + ), |
| 407 | + ...(hiddenEvalsInfo.length > 1 |
| 408 | + ? [{ label: "Show All", onclick: showAllHiddenEvals }] |
| 409 | + : []), |
| 410 | + ] as FloatingMenuItem[] |
| 411 | +
|
326 | 412 | // Reactively fetch eval templates for sections |
327 | 413 | $: { |
328 | 414 | comparisonFeatures.forEach((section) => { |
|
688 | 774 | <div class="text-gray-600">Loading evaluation scores...</div> |
689 | 775 | </div> |
690 | 776 | {:else} |
691 | | - <!-- Add Column Button - positioned above table on the right --> |
692 | | - <div class="flex justify-end mb-4"> |
| 777 | + <!-- Table action buttons - positioned above table on the right --> |
| 778 | + <div class="flex justify-end gap-2 mb-4"> |
| 779 | + {#if hiddenEvalsInfo.length > 0} |
| 780 | + <div class="hidden-evals-dropdown"> |
| 781 | + <FloatingMenu items={hiddenEvalsMenuItems} width="w-72"> |
| 782 | + <button |
| 783 | + slot="trigger" |
| 784 | + type="button" |
| 785 | + class="btn btn-sm btn-outline" |
| 786 | + > |
| 787 | + Hidden Evals ({hiddenEvalsInfo.length}) |
| 788 | + </button> |
| 789 | + </FloatingMenu> |
| 790 | + </div> |
| 791 | + {/if} |
693 | 792 | {#if columns < MAX_COLUMNS} |
694 | 793 | <button |
695 | 794 | on:click={addColumn} |
|
808 | 907 |
|
809 | 908 | <!-- Comparison Data - only show if models are selected --> |
810 | 909 | {#if validSelectedModels.length > 0} |
811 | | - {#each comparisonFeatures as section} |
| 910 | + {#each visibleComparisonFeatures as section} |
812 | 911 | <!-- Section Header --> |
813 | | - <div class="bg-gray-50 px-6 py-3 border-b border-gray-200"> |
| 912 | + <div |
| 913 | + class="bg-gray-50 px-6 py-3 border-b border-gray-200 flex items-center justify-between gap-2" |
| 914 | + > |
814 | 915 | <h4 |
815 | 916 | class="text-sm font-semibold text-gray-900 uppercase tracking-wide" |
816 | 917 | > |
817 | 918 | {section.category} |
818 | 919 | </h4> |
| 920 | + {#if section.eval_id !== "kiln_cost_section"} |
| 921 | + <button |
| 922 | + on:click={() => hideEval(section.eval_id)} |
| 923 | + class="w-6 h-6 rounded-full flex items-center justify-center text-gray-500 hover:bg-gray-200 hover:text-gray-900 transition-colors" |
| 924 | + title="Hide this eval" |
| 925 | + > |
| 926 | + ✕ |
| 927 | + </button> |
| 928 | + {/if} |
819 | 929 | </div> |
820 | 930 |
|
821 | 931 | {#if section.items.length == 0} |
|
1032 | 1142 | {#if validSelectedModels.length > 0} |
1033 | 1143 | <div class="mt-16"> |
1034 | 1144 | <CompareRadarChart |
1035 | | - {comparisonFeatures} |
| 1145 | + comparisonFeatures={visibleComparisonFeatures} |
1036 | 1146 | {getModelValueRaw} |
1037 | 1147 | run_configs={current_task_run_configs || []} |
1038 | 1148 | model_info={$model_info} |
|
1046 | 1156 |
|
1047 | 1157 | <div class="mt-16"> |
1048 | 1158 | <CompareChart |
1049 | | - comparisonFeatures={chartComparisonFeatures} |
| 1159 | + comparisonFeatures={visibleChartComparisonFeatures} |
1050 | 1160 | {getModelValueRaw} |
1051 | 1161 | run_configs={current_task_run_configs || []} |
1052 | 1162 | model_info={$model_info} |
|
1084 | 1194 | } |
1085 | 1195 | }} |
1086 | 1196 | /> |
| 1197 | + |
| 1198 | +<style> |
| 1199 | + .hidden-evals-dropdown :global(ul.menu li > button), |
| 1200 | + .hidden-evals-dropdown :global(ul.menu li > a) { |
| 1201 | + font-size: 0.875rem; |
| 1202 | + font-weight: 500; |
| 1203 | + color: rgb(17 24 39); |
| 1204 | + } |
| 1205 | +
|
| 1206 | + /* Render a gray "+" prefix on eval rows only. Excludes the header |
| 1207 | + (first-child) and the "Restore All" footer (last-child at position 4+). */ |
| 1208 | + .hidden-evals-dropdown |
| 1209 | + :global( |
| 1210 | + ul.menu |
| 1211 | + li:not(:first-child):not(:last-child:nth-child(n + 4)) |
| 1212 | + > button::before |
| 1213 | + ) { |
| 1214 | + content: "+"; |
| 1215 | + color: rgb(107 114 128); |
| 1216 | + margin-right: 0.375rem; |
| 1217 | + font-weight: 400; |
| 1218 | + } |
| 1219 | +
|
| 1220 | + /* "Restore All" footer styling — gray-500. */ |
| 1221 | + .hidden-evals-dropdown |
| 1222 | + :global(ul.menu li:last-child:nth-child(n + 4) > button) { |
| 1223 | + color: rgb(107 114 128); |
| 1224 | + } |
| 1225 | +
|
| 1226 | + /* Divider before the "Show all hidden" footer. nth-child(n+4) ensures we |
| 1227 | + only render it when the list has header + 2+ evals + show-all footer. */ |
| 1228 | + .hidden-evals-dropdown :global(ul.menu li:last-child:nth-child(n + 4)) { |
| 1229 | + border-top: 1px solid rgb(209 213 219); |
| 1230 | + margin-top: 0.5rem; |
| 1231 | + padding-top: 0.5rem; |
| 1232 | + } |
| 1233 | +</style> |
0 commit comments