@@ -314,7 +314,7 @@ function renderFrontierChart(data) {
314314 bodyFont : { family : "'Fira Code', monospace" , size : 16 } ,
315315 callbacks : {
316316 title : ( items ) => labels [ items [ 0 ] . dataIndex ] || '' ,
317- label : c2 => `${ c2 . dataset . label } : ${ c2 . parsed . y !== null ? c2 . parsed . y . toFixed ( 1 ) : '-' } ` ,
317+ label : c2 => `${ getAgentDisplayLabel ( data , c2 . dataset . label ) } : ${ c2 . parsed . y !== null ? c2 . parsed . y . toFixed ( 1 ) : '-' } ` ,
318318 } ,
319319 } ,
320320 } ,
@@ -348,8 +348,6 @@ function renderFrontierChart(data) {
348348
349349 // Custom HTML legend with logos
350350 const legendEl = document . getElementById ( 'chart-legend' ) ;
351- const agentModelLabels = { } ;
352- data . agents . forEach ( agent => { agentModelLabels [ agent ] = getAgentModelLabel ( data , agent ) ; } ) ;
353351 legendEl . innerHTML = frontierChart . data . datasets . map ( ( ds , i ) => {
354352 const logo = getAgentLogo ( ds . label ) ;
355353 const logoHtml = logo ? `<img src="${ logo } " alt="">` : '' ;
@@ -359,12 +357,24 @@ function renderFrontierChart(data) {
359357 if ( ds . label . startsWith ( 'Human' ) ) {
360358 return `<div class="chart-legend-item"><span class="chart-legend-swatch dashed" style="border-color:${ ds . borderColor } "></span>${ ds . label } </div>` ;
361359 }
362- const modelLabel = agentModelLabels [ ds . label ] ;
360+ const displayLabel = getAgentDisplayLabel ( data , ds . label ) ;
361+ const modelLabel = getAgentSecondaryLabel ( data , ds . label ) ;
363362 const textHtml = modelLabel
364- ? `<span class="chart-legend-text"><span>${ esc ( ds . label ) } </span><span class="chart-legend-model">${ esc ( modelLabel ) } </span></span>`
365- : `<span>${ esc ( ds . label ) } </span>` ;
363+ ? `<span class="chart-legend-text"><span>${ esc ( displayLabel ) } </span><span class="chart-legend-model">${ esc ( modelLabel ) } </span></span>`
364+ : `<span>${ esc ( displayLabel ) } </span>` ;
366365 return `<div class="chart-legend-item">${ logoHtml } <span class="chart-legend-swatch" style="background:${ ds . borderColor } "></span>${ textHtml } </div>` ;
367366 } ) . join ( '' ) ;
367+
368+ const card = ctx . closest ( '.card' ) ;
369+ if ( card ) {
370+ let noteEl = card . querySelector ( '.dashboard-footnote.frontier-footnote' ) ;
371+ if ( ! noteEl ) {
372+ noteEl = document . createElement ( 'div' ) ;
373+ noteEl . className = 'dashboard-footnote frontier-footnote' ;
374+ ctx . parentElement . insertAdjacentElement ( 'afterend' , noteEl ) ;
375+ }
376+ noteEl . innerHTML = researchHarnessFootnoteHtml ( ) ;
377+ }
368378}
369379
370380function renderLeaderboard ( data ) {
@@ -396,11 +406,12 @@ function renderLeaderboard(data) {
396406 if ( ! costText ) return `<span class="leaderboard-cell-meta"><span>${ timeText } </span></span>` ;
397407 return `<span class="leaderboard-cell-meta"><span>${ costText } </span><span>${ timeText } </span></span>` ;
398408 }
399- function renderScoreBlock ( entry , clickable ) {
409+ function renderScoreBlock ( entry , clickable , extraClass = '' ) {
400410 if ( ! entry || ! Number . isFinite ( entry . score ) ) return '<span class="score-cell score-cell-empty">-</span>' ;
401411 const scoreHtml = `<span class="score-cell" style="${ cellStyle ( entry . score ) } ">${ entry . score . toFixed ( 1 ) } </span>` ;
402412 const inner = `<div class="leaderboard-score-wrap">${ scoreHtml } ${ renderMetricLines ( entry ) } </div>` ;
403- return clickable ? `<td class="leaderboard-score-td" onclick="goToRun('${ entry . run_id } ')">${ inner } </td>` : `<td class="leaderboard-score-td">${ inner } </td>` ;
413+ const tdClass = `leaderboard-score-td${ extraClass ? ` ${ extraClass } ` : '' } ` ;
414+ return clickable ? `<td class="${ tdClass } " onclick="goToRun('${ entry . run_id } ')">${ inner } </td>` : `<td class="${ tdClass } ">${ inner } </td>` ;
404415 }
405416 function averageEntry ( entries ) {
406417 const scored = entries . filter ( e => Number . isFinite ( e ?. score ) ) ;
@@ -417,6 +428,26 @@ function renderLeaderboard(data) {
417428 . filter ( Boolean )
418429 . reduce ( ( best , entry ) => ! best || entry . score > best . score ? entry : best , null ) ;
419430 }
431+ function averageScoreForAgent ( agent ) {
432+ return averageEntry ( data . tasks . map ( task => data . scores [ agent ] ?. [ task ] ) . filter ( Boolean ) ) ?. score ?? - Infinity ;
433+ }
434+ function splitAgentGroups ( list ) {
435+ const agents = [ ] ;
436+ const llms = [ ] ;
437+ list . forEach ( name => {
438+ if ( isResearchHarnessAgent ( name ) ) {
439+ llms . push ( name ) ;
440+ } else {
441+ agents . push ( name ) ;
442+ }
443+ } ) ;
444+ llms . sort ( ( a , b ) => {
445+ const diff = averageScoreForAgent ( b ) - averageScoreForAgent ( a ) ;
446+ if ( diff ) return diff ;
447+ return getAgentDisplayLabel ( data , a ) . localeCompare ( getAgentDisplayLabel ( data , b ) ) ;
448+ } ) ;
449+ return { agents, llms } ;
450+ }
420451 function renderSummaryCell ( entry ) {
421452 if ( ! entry || ! Number . isFinite ( entry . score ) ) return '<td class="no-score leaderboard-static-cell">-</td>' ;
422453 const scoreHtml = `<span class="score-cell" style="${ cellStyle ( entry . score ) } ">${ entry . score . toFixed ( 1 ) } </span>` ;
@@ -450,52 +481,68 @@ function renderLeaderboard(data) {
450481 ) ;
451482 return { agent, overall, domains : domainsMap } ;
452483 } ) ;
453- rows . sort ( ( a , b ) => {
484+ const sortRows = rowsToSort => rowsToSort . sort ( ( a , b ) => {
454485 const av = Number . isFinite ( a . overall ?. score ) ? a . overall . score : - Infinity ;
455486 const bv = Number . isFinite ( b . overall ?. score ) ? b . overall . score : - Infinity ;
456487 if ( bv !== av ) return bv - av ;
457- return a . agent . localeCompare ( b . agent ) ;
488+ return getAgentDisplayLabel ( data , a . agent ) . localeCompare ( getAgentDisplayLabel ( data , b . agent ) ) ;
458489 } ) ;
459- return { domains, rows } ;
490+ return {
491+ domains,
492+ agentRows : sortRows ( rows . filter ( row => ! isResearchHarnessAgent ( row . agent ) ) ) ,
493+ llmRows : sortRows ( rows . filter ( row => isResearchHarnessAgent ( row . agent ) ) ) ,
494+ } ;
460495 }
461496
497+ const groupedAgents = splitAgentGroups ( data . agents ) ;
498+ const orderedTaskAgents = [ ...groupedAgents . agents , ...groupedAgents . llms ] ;
499+ const firstLlmAgent = groupedAgents . agents . length && groupedAgents . llms . length ? groupedAgents . llms [ 0 ] : '' ;
462500 const domainSummary = summarizeByDomain ( ) ;
463501
464- let summaryHtml = '<table class="leaderboard leaderboard-summary"><thead><tr><th>Agent</th><th>Overall</th>' ;
502+ let summaryHtml = '<table class="leaderboard leaderboard-summary"><thead><tr><th>Agent/LLM </th><th>Overall</th>' ;
465503 domainSummary . domains . forEach ( domain => {
466504 summaryHtml += `<th>${ esc ( domain ) } </th>` ;
467505 } ) ;
468506 summaryHtml += '</tr></thead><tbody>' ;
469- domainSummary . rows . forEach ( ( row , index ) => {
470- const modelLabel = getAgentModelLabel ( data , row . agent ) ;
471- const modelHtml = modelLabel ? `<span class="leaderboard-agent-model">${ esc ( modelLabel ) } </span>` : '' ;
472- const medal = Number . isFinite ( row . overall ?. score ) && index < 3 ? [ '🥇' , '🥈' , '🥉' ] [ index ] : '' ;
473- const medalHtml = medal ? `<span class="leaderboard-medal" aria-hidden="true">${ medal } </span>` : '' ;
474- summaryHtml += `<tr><td><div class="leaderboard-agent-row"><span class="leaderboard-agent-name">${ medalHtml } ${ agentLogoHtml ( row . agent , 18 ) } <span>${ esc ( row . agent ) } </span></span>${ modelHtml } </div></td>` ;
475- summaryHtml += renderSummaryCell ( row . overall ) ;
476- domainSummary . domains . forEach ( domain => {
477- summaryHtml += renderSummaryCell ( row . domains [ domain ] ) ;
507+ function appendSummaryRows ( rows , addDivider ) {
508+ rows . forEach ( ( row , index ) => {
509+ const rowClass = addDivider && index === 0 ? ' class="leaderboard-group-start-row"' : '' ;
510+ const displayLabel = getAgentDisplayLabel ( data , row . agent ) ;
511+ const modelLabel = getAgentSecondaryLabel ( data , row . agent ) ;
512+ const modelHtml = modelLabel ? `<span class="leaderboard-agent-model">${ esc ( modelLabel ) } </span>` : '' ;
513+ const medal = Number . isFinite ( row . overall ?. score ) && index < 3 ? [ '🥇' , '🥈' , '🥉' ] [ index ] : '' ;
514+ const medalHtml = medal ? `<span class="leaderboard-medal" aria-hidden="true">${ medal } </span>` : '' ;
515+ summaryHtml += `<tr${ rowClass } ><td><div class="leaderboard-agent-row"><span class="leaderboard-agent-name">${ medalHtml } ${ agentLogoHtml ( row . agent , 18 ) } <span>${ esc ( displayLabel ) } </span></span>${ modelHtml } </div></td>` ;
516+ summaryHtml += renderSummaryCell ( row . overall ) ;
517+ domainSummary . domains . forEach ( domain => {
518+ summaryHtml += renderSummaryCell ( row . domains [ domain ] ) ;
519+ } ) ;
520+ summaryHtml += '</tr>' ;
478521 } ) ;
479- summaryHtml += '</tr>' ;
480- } ) ;
522+ }
523+ appendSummaryRows ( domainSummary . agentRows , false ) ;
524+ appendSummaryRows ( domainSummary . llmRows , domainSummary . agentRows . length > 0 ) ;
481525 summaryHtml += '</tbody></table>' ;
482526
483527 let taskHtml = '<table class="leaderboard"><thead><tr><th>Task</th>' ;
484- data . agents . forEach ( a => {
485- const modelLabel = getAgentModelLabel ( data , a ) ;
528+ orderedTaskAgents . forEach ( a => {
529+ const displayLabel = getAgentDisplayLabel ( data , a ) ;
530+ const modelLabel = getAgentSecondaryLabel ( data , a ) ;
486531 const modelHtml = modelLabel ? `<span class="leaderboard-agent-model">${ esc ( modelLabel ) } </span>` : '' ;
487- taskHtml += `<th><div class="leaderboard-agent-head">${ agentLogoHtml ( a , 20 ) } <span class="leaderboard-agent-name">${ esc ( a ) } </span>${ modelHtml } </div></th>` ;
532+ const dividerClass = a === firstLlmAgent ? ' class="leaderboard-group-divider-left"' : '' ;
533+ taskHtml += `<th${ dividerClass } ><div class="leaderboard-agent-head">${ agentLogoHtml ( a , 20 ) } <span class="leaderboard-agent-name">${ esc ( displayLabel ) } </span>${ modelHtml } </div></th>` ;
488534 } ) ;
489535 taskHtml += '<th>Frontier</th></tr></thead><tbody>' ;
490536
491537 data . tasks . forEach ( task => {
492538 taskHtml += `<tr><td>${ esc ( task ) } </td>` ;
493- data . agents . forEach ( agent => {
539+ orderedTaskAgents . forEach ( agent => {
494540 const entry = data . scores [ agent ] ?. [ task ] ;
541+ const dividerClass = agent === firstLlmAgent ? 'leaderboard-group-divider-left' : '' ;
495542 if ( entry ) {
496- taskHtml += renderScoreBlock ( entry , true ) ;
543+ taskHtml += renderScoreBlock ( entry , true , dividerClass ) ;
497544 } else {
498- taskHtml += ' <td class="no-score">-</td>' ;
545+ taskHtml += ` <td class="no-score${ dividerClass ? ` ${ dividerClass } ` : '' } ">-</td>` ;
499546 }
500547 } ) ;
501548 const frontier = frontierEntry ( task ) ;
@@ -509,13 +556,14 @@ function renderLeaderboard(data) {
509556
510557 // Average row — only count tasks that have scores
511558 taskHtml += '<tr class="frontier-row"><td>Average</td>' ;
512- data . agents . forEach ( agent => {
559+ orderedTaskAgents . forEach ( agent => {
513560 const avgEntry = averageEntry ( data . tasks . map ( t => data . scores [ agent ] ?. [ t ] ) . filter ( Boolean ) ) ;
561+ const dividerClass = agent === firstLlmAgent ? 'leaderboard-group-divider-left' : '' ;
514562 if ( ! avgEntry ) {
515- taskHtml += ' <td class="no-score">-</td>' ;
563+ taskHtml += ` <td class="no-score${ dividerClass ? ` ${ dividerClass } ` : '' } ">-</td>` ;
516564 return ;
517565 }
518- taskHtml += renderScoreBlock ( avgEntry , false ) ;
566+ taskHtml += renderScoreBlock ( avgEntry , false , dividerClass ) ;
519567 } ) ;
520568 const frontierAvgEntry = averageEntry ( data . tasks . map ( frontierEntry ) . filter ( Boolean ) ) ;
521569 if ( frontierAvgEntry ) {
@@ -529,7 +577,8 @@ function renderLeaderboard(data) {
529577 <div class="leaderboard-stack">
530578 ${ renderSection ( 'summary' , 'By Domain' , summaryHtml , 'Slide to view more domains' ) }
531579 ${ renderSection ( 'task' , 'By Task' , taskHtml , 'Slide to view more agents' , '<span class="leaderboard-note-icon" aria-hidden="true">👉</span> Click any scored cell to jump to run details' ) }
532- </div>` ;
580+ </div>
581+ <div class="dashboard-footnote leaderboard-footnote">${ researchHarnessFootnoteHtml ( ) } </div>` ;
533582
534583 container . innerHTML = html ;
535584 syncLeaderboardScrollbars ( ) ;
@@ -1784,8 +1833,28 @@ function getAgentBaseLabel(name) {
17841833 return m ? m [ 1 ] : String ( name ) ;
17851834}
17861835
1836+ function getModelLogo ( model ) {
1837+ const label = String ( model || '' ) ;
1838+ if ( ! label ) return '' ;
1839+ const mappings = [
1840+ [ / ^ G P T \b / i, 'static/logos/openai.svg' ] ,
1841+ [ / ^ C l a u d e \b / i, 'static/logos/anthropic.svg' ] ,
1842+ [ / ^ Q w e n / i, 'static/logos/qwen.png' ] ,
1843+ [ / ^ G L M \b / i, 'static/logos/glm.webp' ] ,
1844+ [ / ^ K i m i \b / i, 'static/logos/kimi.png' ] ,
1845+ [ / ^ M i M o \b / i, 'static/logos/mimo.png' ] ,
1846+ [ / ^ G r o k \b / i, 'static/logos/grok.png' ] ,
1847+ ] ;
1848+ const match = mappings . find ( ( [ pattern ] ) => pattern . test ( label ) ) ;
1849+ return match ? match [ 1 ] : '' ;
1850+ }
1851+
17871852function getAgentLogo ( name ) {
1788- return state . agentLogos [ name ] || state . agentLogos [ getAgentBaseLabel ( name ) ] || '' ;
1853+ if ( state . agentLogos [ name ] ) return state . agentLogos [ name ] ;
1854+ if ( isResearchHarnessAgent ( name ) ) {
1855+ return getModelLogo ( getResearchHarnessModelName ( null , name ) ) || state . agentLogos [ getAgentBaseLabel ( name ) ] || '' ;
1856+ }
1857+ return state . agentLogos [ getAgentBaseLabel ( name ) ] || getModelLogo ( name ) || '' ;
17891858}
17901859
17911860function agentLogoHtml ( name , size = 16 ) {
@@ -1833,6 +1902,33 @@ function getAgentModelLabel(data, agent) {
18331902 return labels [ 0 ] ;
18341903}
18351904
1905+ function isResearchHarnessAgent ( name ) {
1906+ return / ^ R e s e a r c h H a r n e s s \b / . test ( String ( name || '' ) ) ;
1907+ }
1908+
1909+ function getResearchHarnessModelName ( data , agent ) {
1910+ const match = String ( agent || '' ) . match ( / ^ R e s e a r c h H a r n e s s \( ( .+ ) \) $ / ) ;
1911+ if ( match ) return match [ 1 ] ;
1912+ return getAgentModelLabel ( data , agent ) || '' ;
1913+ }
1914+
1915+ function getAgentDisplayLabel ( data , agent ) {
1916+ if ( agent === 'Frontier' || String ( agent || '' ) . startsWith ( 'Human' ) ) return String ( agent || '' ) ;
1917+ if ( isResearchHarnessAgent ( agent ) ) return getResearchHarnessModelName ( data , agent ) || String ( agent || '' ) ;
1918+ return String ( agent || '' ) ;
1919+ }
1920+
1921+ function getAgentSecondaryLabel ( data , agent ) {
1922+ if ( isResearchHarnessAgent ( agent ) ) return '' ;
1923+ const modelLabel = getAgentModelLabel ( data , agent ) ;
1924+ if ( ! modelLabel || modelLabel === getAgentDisplayLabel ( data , agent ) ) return '' ;
1925+ return modelLabel ;
1926+ }
1927+
1928+ function researchHarnessFootnoteHtml ( ) {
1929+ return 'Note: All standalone LLM results below are evaluated with <a href="https://github.com/black-yt/ResearchHarness" target="_blank" rel="noopener noreferrer">ResearchHarness</a>.' ;
1930+ }
1931+
18361932let _durationTimer = null ;
18371933let _durationStart = null ;
18381934
0 commit comments