Skip to content

Commit 568402a

Browse files
authored
Merge pull request #1411 from Kiln-AI/mike/add-hide-button-to-compare
Add Hide/Show button for evals on the compare screen
2 parents bbf5aa6 + f5b9400 commit 568402a

3 files changed

Lines changed: 183 additions & 29 deletions

File tree

app/web_ui/src/lib/ui/floating_menu.svelte

Lines changed: 28 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -104,28 +104,34 @@
104104
<Float {placement} strategy="fixed">
105105
<ul class="menu bg-base-100 rounded-box p-2 shadow z-[1] {width}">
106106
{#each visibleItems as item}
107-
<li>
108-
{#if item.href}
109-
<a
110-
href={item.href}
111-
target={item.target}
112-
rel={item.rel}
113-
on:click|stopPropagation={() => {
114-
item.onclick?.()
115-
close()
116-
}}
117-
>
118-
{item.label}
119-
</a>
120-
{:else}
121-
<button
122-
type="button"
123-
on:click={(e) => handleItemClick(e, item)}
124-
>
125-
{item.label}
126-
</button>
127-
{/if}
128-
</li>
107+
{#if item.header}
108+
<li class="menu-title">
109+
<span>{item.label}</span>
110+
</li>
111+
{:else}
112+
<li>
113+
{#if item.href}
114+
<a
115+
href={item.href}
116+
target={item.target}
117+
rel={item.rel}
118+
on:click|stopPropagation={() => {
119+
item.onclick?.()
120+
close()
121+
}}
122+
>
123+
{item.label}
124+
</a>
125+
{:else}
126+
<button
127+
type="button"
128+
on:click={(e) => handleItemClick(e, item)}
129+
>
130+
{item.label}
131+
</button>
132+
{/if}
133+
</li>
134+
{/if}
129135
{/each}
130136
</ul>
131137
</Float>

app/web_ui/src/lib/ui/floating_menu_types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,5 @@ export type FloatingMenuItem = {
55
rel?: string
66
onclick?: () => void
77
hidden?: boolean
8+
header?: boolean
89
}

app/web_ui/src/routes/(app)/specs/[project_id]/[task_id]/compare/+page.svelte

Lines changed: 154 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@
4040
import CreateNewRunConfigDialog from "$lib/ui/run_config_component/create_new_run_config_dialog.svelte"
4141
import SavedRunConfigurationsDropdown from "$lib/ui/run_config_component/saved_run_configs_dropdown.svelte"
4242
import RunEval from "$lib/components/run_eval.svelte"
43+
import FloatingMenu from "$lib/ui/floating_menu.svelte"
44+
import type { FloatingMenuItem } from "$lib/ui/floating_menu_types"
4345
4446
import { agentInfo } from "$lib/agent"
4547
$: project_id = $page.params.project_id!
@@ -53,6 +55,7 @@
5355
// State management
5456
let columns = 2 // Start with 2 columns
5557
let selectedModels: (string | null)[] = [null, null] // Track selected model for each column
58+
let hiddenEvalIds: string[] = [] // Eval IDs hidden by the user (kiln_cost_section is never hideable)
5659
5760
// Run configs state
5861
let loading_run_configs = true
@@ -94,6 +97,20 @@
9497
9598
// Initialize selectedModels array with correct length
9699
selectedModels = new Array(columns).fill(null)
100+
101+
// Hidden evals can be restored before run configs are loaded - they are just IDs.
102+
// Defensive: drop the cost section ID + dedupe in case a hand-edited URL is messy.
103+
const urlHidden = urlParams.get("hidden_evals")
104+
if (urlHidden) {
105+
hiddenEvalIds = [
106+
...new Set(
107+
urlHidden
108+
.split(",")
109+
.map((id) => id.trim())
110+
.filter((id) => id.length > 0 && id !== "kiln_cost_section"),
111+
),
112+
]
113+
}
97114
}
98115
99116
// Restore model selections from URL after data is loaded
@@ -137,13 +154,20 @@
137154
)
138155
urlParams.set("models", modelIds.join(","))
139156
157+
// Update hidden evals (omit param when none are hidden to keep URL clean)
158+
if (hiddenEvalIds.length > 0) {
159+
urlParams.set("hidden_evals", hiddenEvalIds.join(","))
160+
} else {
161+
urlParams.delete("hidden_evals")
162+
}
163+
140164
// Use replace to avoid creating new history entries
141165
const newURL = `${$page.url.pathname}?${urlParams.toString()}`
142166
goto(newURL, { replaceState: true })
143167
}
144168
145169
// Reactive statements to update URL when state changes
146-
$: if (!isInitializing && (columns || selectedModels)) {
170+
$: if (!isInitializing && (columns || selectedModels || hiddenEvalIds)) {
147171
updateURL()
148172
}
149173
@@ -323,6 +347,68 @@
323347
eval_scores_cache,
324348
)
325349
350+
// Filter out user-hidden evals (cost section is never hideable). hiddenEvalIds is
351+
// passed in as a parameter (rather than read via closure) so that Svelte's reactive
352+
// `$:` statements track it as a dependency and re-run when it changes.
353+
function filterVisibleFeatures<T extends { eval_id: string }>(
354+
features: T[],
355+
hidden: string[],
356+
): T[] {
357+
if (hidden.length === 0) return features
358+
return features.filter(
359+
(section) =>
360+
section.eval_id === "kiln_cost_section" ||
361+
!hidden.includes(section.eval_id),
362+
)
363+
}
364+
365+
$: visibleComparisonFeatures = filterVisibleFeatures(
366+
comparisonFeatures,
367+
hiddenEvalIds,
368+
)
369+
$: visibleChartComparisonFeatures = filterVisibleFeatures(
370+
chartComparisonFeatures,
371+
hiddenEvalIds,
372+
)
373+
374+
// Names of currently-hidden evals (used for the "show hidden" dropdown).
375+
// chartComparisonFeatures is built from ALL run configs for the task and is a
376+
// superset of comparisonFeatures (which only covers selected models), so it
377+
// alone is enough to resolve display names.
378+
$: hiddenEvalsInfo = hiddenEvalIds
379+
.filter((id) => id !== "kiln_cost_section")
380+
.map((evalId) => {
381+
const feature = chartComparisonFeatures.find((s) => s.eval_id === evalId)
382+
return { eval_id: evalId, category: feature?.category ?? "Unknown eval" }
383+
})
384+
385+
function hideEval(evalId: string) {
386+
if (evalId === "kiln_cost_section") return
387+
if (hiddenEvalIds.includes(evalId)) return
388+
hiddenEvalIds = [...hiddenEvalIds, evalId]
389+
}
390+
391+
function showEval(evalId: string) {
392+
hiddenEvalIds = hiddenEvalIds.filter((id) => id !== evalId)
393+
}
394+
395+
function showAllHiddenEvals() {
396+
hiddenEvalIds = []
397+
}
398+
399+
$: hiddenEvalsMenuItems = [
400+
{ label: "Show Eval", header: true },
401+
...hiddenEvalsInfo.map(
402+
(info): FloatingMenuItem => ({
403+
label: info.category,
404+
onclick: () => showEval(info.eval_id),
405+
}),
406+
),
407+
...(hiddenEvalsInfo.length > 1
408+
? [{ label: "Show All", onclick: showAllHiddenEvals }]
409+
: []),
410+
] as FloatingMenuItem[]
411+
326412
// Reactively fetch eval templates for sections
327413
$: {
328414
comparisonFeatures.forEach((section) => {
@@ -688,8 +774,21 @@
688774
<div class="text-gray-600">Loading evaluation scores...</div>
689775
</div>
690776
{:else}
691-
<!-- Add Column Button - positioned above table on the right -->
692-
<div class="flex justify-end mb-4">
777+
<!-- Table action buttons - positioned above table on the right -->
778+
<div class="flex justify-end gap-2 mb-4">
779+
{#if hiddenEvalsInfo.length > 0}
780+
<div class="hidden-evals-dropdown">
781+
<FloatingMenu items={hiddenEvalsMenuItems} width="w-72">
782+
<button
783+
slot="trigger"
784+
type="button"
785+
class="btn btn-sm btn-outline"
786+
>
787+
Hidden Evals ({hiddenEvalsInfo.length})
788+
</button>
789+
</FloatingMenu>
790+
</div>
791+
{/if}
693792
{#if columns < MAX_COLUMNS}
694793
<button
695794
on:click={addColumn}
@@ -808,14 +907,25 @@
808907

809908
<!-- Comparison Data - only show if models are selected -->
810909
{#if validSelectedModels.length > 0}
811-
{#each comparisonFeatures as section}
910+
{#each visibleComparisonFeatures as section}
812911
<!-- Section Header -->
813-
<div class="bg-gray-50 px-6 py-3 border-b border-gray-200">
912+
<div
913+
class="bg-gray-50 px-6 py-3 border-b border-gray-200 flex items-center justify-between gap-2"
914+
>
814915
<h4
815916
class="text-sm font-semibold text-gray-900 uppercase tracking-wide"
816917
>
817918
{section.category}
818919
</h4>
920+
{#if section.eval_id !== "kiln_cost_section"}
921+
<button
922+
on:click={() => hideEval(section.eval_id)}
923+
class="w-6 h-6 rounded-full flex items-center justify-center text-gray-500 hover:bg-gray-200 hover:text-gray-900 transition-colors"
924+
title="Hide this eval"
925+
>
926+
927+
</button>
928+
{/if}
819929
</div>
820930

821931
{#if section.items.length == 0}
@@ -1032,7 +1142,7 @@
10321142
{#if validSelectedModels.length > 0}
10331143
<div class="mt-16">
10341144
<CompareRadarChart
1035-
{comparisonFeatures}
1145+
comparisonFeatures={visibleComparisonFeatures}
10361146
{getModelValueRaw}
10371147
run_configs={current_task_run_configs || []}
10381148
model_info={$model_info}
@@ -1046,7 +1156,7 @@
10461156

10471157
<div class="mt-16">
10481158
<CompareChart
1049-
comparisonFeatures={chartComparisonFeatures}
1159+
comparisonFeatures={visibleChartComparisonFeatures}
10501160
{getModelValueRaw}
10511161
run_configs={current_task_run_configs || []}
10521162
model_info={$model_info}
@@ -1084,3 +1194,40 @@
10841194
}
10851195
}}
10861196
/>
1197+
1198+
<style>
1199+
.hidden-evals-dropdown :global(ul.menu li > button),
1200+
.hidden-evals-dropdown :global(ul.menu li > a) {
1201+
font-size: 0.875rem;
1202+
font-weight: 500;
1203+
color: rgb(17 24 39);
1204+
}
1205+
1206+
/* Render a gray "+" prefix on eval rows only. Excludes the header
1207+
(first-child) and the "Restore All" footer (last-child at position 4+). */
1208+
.hidden-evals-dropdown
1209+
:global(
1210+
ul.menu
1211+
li:not(:first-child):not(:last-child:nth-child(n + 4))
1212+
> button::before
1213+
) {
1214+
content: "+";
1215+
color: rgb(107 114 128);
1216+
margin-right: 0.375rem;
1217+
font-weight: 400;
1218+
}
1219+
1220+
/* "Restore All" footer styling — gray-500. */
1221+
.hidden-evals-dropdown
1222+
:global(ul.menu li:last-child:nth-child(n + 4) > button) {
1223+
color: rgb(107 114 128);
1224+
}
1225+
1226+
/* Divider before the "Show all hidden" footer. nth-child(n+4) ensures we
1227+
only render it when the list has header + 2+ evals + show-all footer. */
1228+
.hidden-evals-dropdown :global(ul.menu li:last-child:nth-child(n + 4)) {
1229+
border-top: 1px solid rgb(209 213 219);
1230+
margin-top: 0.5rem;
1231+
padding-top: 0.5rem;
1232+
}
1233+
</style>

0 commit comments

Comments
 (0)