Skip to content

Commit 41e7b69

Browse files
committed
separating html out to shortcodes sourcebench
1 parent 18a94fa commit 41e7b69

9 files changed

Lines changed: 1204 additions & 298 deletions

File tree

content/posts/sourcebench.md

Lines changed: 12 additions & 298 deletions
Large diffs are not rendered by default.
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
<div class="chart-wrapper">
2+
<canvas id="deepseekChart"></canvas>
3+
</div>
4+
<div class="caption">Figure 3: DeepSeek experiment results.</div>
5+
</div>
6+
7+
<script>
8+
Chart.defaults.font.family = "'Inter', sans-serif";
9+
Chart.defaults.color = '#475569';
10+
11+
const inverseData = {
12+
labels: ['GPT-5', 'Grok-4.1', 'GPT-4o', 'Perplexity', 'Tavily'],
13+
score: [89.1, 83.4, 81.5, 78.5, 78.3],
14+
overlap: [16.0, 29.7, 27.5, 40.0, 55.5]
15+
};
16+
17+
const deepSeekData = [
18+
{ name: 'Chat + Low Search', score: 70.1, color: '#cbd5e1' },
19+
{ name: 'Reason + Low Search', score: 75.8, color: '#94a3b8' },
20+
{ name: 'Chat + High Search', score: 75.9, color: '#8b5cf6' },
21+
];
22+
23+
new Chart(document.getElementById('deepseekChart'), {
24+
type: 'bar',
25+
data: {
26+
labels: deepSeekData.map(d => d.name),
27+
datasets: [{
28+
label: 'Score',
29+
data: deepSeekData.map(d => d.score),
30+
backgroundColor: deepSeekData.map(d => d.color),
31+
borderRadius: 4,
32+
barThickness: 24
33+
}]
34+
},
35+
options: {
36+
indexAxis: 'y',
37+
responsive: true,
38+
maintainAspectRatio: false,
39+
plugins: { legend: { display: false } },
40+
scales: { x: { min: 65, max: 80, grid: { color: '#f1f5f9' } }, y: { grid: { display: false } } }
41+
}
42+
});
43+
44+
</script>
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
2+
<style>
3+
/* Heatmap Styling for 4.3 */
4+
.heatmap-container {
5+
display: grid;
6+
grid-template-columns: 30px repeat(8, 1fr);
7+
gap: 2px;
8+
margin: 2rem 0;
9+
font-family: 'Inter', sans-serif;
10+
font-size: 0.65rem;
11+
}
12+
.heatmap-label {
13+
display: flex;
14+
align-items: center;
15+
justify-content: center;
16+
font-weight: 600;
17+
color: #64748b;
18+
}
19+
.heatmap-cell {
20+
aspect-ratio: 1;
21+
display: flex;
22+
align-items: center;
23+
justify-content: center;
24+
border-radius: 2px;
25+
transition: transform 0.1s;
26+
font-weight: 500;
27+
}
28+
29+
/* 10-step Gradient (Rose Palette) */
30+
.grad-neg { background-color: #f1f5f9; color: #94a3b8; } /* Slate 100 */
31+
.grad-0 { background-color: #fff1f2; color: #be123c; } /* Rose 50 */
32+
.grad-10 { background-color: #ffe4e6; color: #be123c; } /* Rose 100 */
33+
.grad-20 { background-color: #fecdd3; color: #be123c; } /* Rose 200 */
34+
.grad-30 { background-color: #fda4af; color: #881337; } /* Rose 300 */
35+
.grad-40 { background-color: #fb7185; color: #fff; } /* Rose 400 */
36+
.grad-50 { background-color: #f43f5e; color: #fff; } /* Rose 500 */
37+
.grad-60 { background-color: #e11d48; color: #fff; } /* Rose 600 */
38+
.grad-70 { background-color: #be123c; color: #fff; } /* Rose 700 */
39+
.grad-80 { background-color: #9f1239; color: #fff; } /* Rose 800 */
40+
.grad-90 { background-color: #881337; color: #fff; } /* Rose 900 */
41+
.grad-100 { background-color: #4c0519; color: #fff; } /* Rose 950 */
42+
43+
.heatmap-cell:hover {
44+
transform: scale(1.2);
45+
z-index: 10;
46+
box-shadow: 0 4px 6px -1px rgba(0,0,0,0.2);
47+
border-radius: 4px;
48+
}
49+
</style>
50+
51+
<figure>
52+
<div class="bg-white p-6 border border-gray-200 rounded-lg shadow-sm w-full max-w-lg mx-auto">
53+
<div class="heatmap-container">
54+
<!-- Header Row -->
55+
<div class="heatmap-label"></div>
56+
<div class="heatmap-label">CR</div><div class="heatmap-label">FA</div><div class="heatmap-label">NE</div>
57+
<div class="heatmap-label">AA</div><div class="heatmap-label">FR</div><div class="heatmap-label">OA</div>
58+
<div class="heatmap-label">DA</div><div class="heatmap-label">LC</div>
59+
60+
<!-- Row CR -->
61+
<div class="heatmap-label">CR</div>
62+
<div class="heatmap-cell grad-100">1.0</div>
63+
<div class="heatmap-cell grad-60">.61</div>
64+
<div class="heatmap-cell grad-30">.31</div>
65+
<div class="heatmap-cell grad-30">.32</div>
66+
<div class="heatmap-cell grad-0">.02</div>
67+
<div class="heatmap-cell grad-20">.21</div>
68+
<div class="heatmap-cell grad-20">.19</div>
69+
<div class="heatmap-cell grad-10">.12</div>
70+
71+
<!-- Row FA -->
72+
<div class="heatmap-label">FA</div>
73+
<div class="heatmap-cell grad-60">.61</div>
74+
<div class="heatmap-cell grad-100">1.0</div>
75+
<div class="heatmap-cell grad-60">.67</div>
76+
<div class="heatmap-cell grad-40">.44</div>
77+
<div class="heatmap-cell grad-10">.07</div>
78+
<div class="heatmap-cell grad-40">.47</div>
79+
<div class="heatmap-cell grad-50">.48</div>
80+
<div class="heatmap-cell grad-30">.35</div>
81+
82+
<!-- Row NE -->
83+
<div class="heatmap-label">NE</div>
84+
<div class="heatmap-cell grad-30">.31</div>
85+
<div class="heatmap-cell grad-60">.67</div>
86+
<div class="heatmap-cell grad-100">1.0</div>
87+
<div class="heatmap-cell grad-30">.31</div>
88+
<div class="heatmap-cell grad-0">.02</div>
89+
<div class="heatmap-cell grad-40">.39</div>
90+
<div class="heatmap-cell grad-40">.44</div>
91+
<div class="heatmap-cell grad-40">.44</div>
92+
93+
<!-- Row AA -->
94+
<div class="heatmap-label">AA</div>
95+
<div class="heatmap-cell grad-30">.32</div>
96+
<div class="heatmap-cell grad-40">.44</div>
97+
<div class="heatmap-cell grad-30">.31</div>
98+
<div class="heatmap-cell grad-100">1.0</div>
99+
<div class="heatmap-cell grad-0">.05</div>
100+
<div class="heatmap-cell grad-50">.53</div>
101+
<div class="heatmap-cell grad-50">.48</div>
102+
<div class="heatmap-cell grad-20">.22</div>
103+
104+
<!-- Row FR -->
105+
<div class="heatmap-label">FR</div>
106+
<div class="heatmap-cell grad-0">.02</div>
107+
<div class="heatmap-cell grad-10">.07</div>
108+
<div class="heatmap-cell grad-0">.02</div>
109+
<div class="heatmap-cell grad-0">.05</div>
110+
<div class="heatmap-cell grad-100">1.0</div>
111+
<div class="heatmap-cell grad-10">.10</div>
112+
<div class="heatmap-cell grad-0">.05</div>
113+
<div class="heatmap-cell grad-neg">-.03</div>
114+
115+
<!-- Row OA -->
116+
<div class="heatmap-label">OA</div>
117+
<div class="heatmap-cell grad-20">.21</div>
118+
<div class="heatmap-cell grad-40">.47</div>
119+
<div class="heatmap-cell grad-40">.39</div>
120+
<div class="heatmap-cell grad-50">.53</div>
121+
<div class="heatmap-cell grad-10">.10</div>
122+
<div class="heatmap-cell grad-100">1.0</div>
123+
<div class="heatmap-cell grad-70">.73</div>
124+
<div class="heatmap-cell grad-30">.36</div>
125+
126+
<!-- Row DA -->
127+
<div class="heatmap-label">DA</div>
128+
<div class="heatmap-cell grad-20">.19</div>
129+
<div class="heatmap-cell grad-50">.48</div>
130+
<div class="heatmap-cell grad-40">.44</div>
131+
<div class="heatmap-cell grad-50">.48</div>
132+
<div class="heatmap-cell grad-0">.05</div>
133+
<div class="heatmap-cell grad-70">.73</div>
134+
<div class="heatmap-cell grad-100">1.0</div>
135+
<div class="heatmap-cell grad-40">.39</div>
136+
137+
<!-- Row LC -->
138+
<div class="heatmap-label">LC</div>
139+
<div class="heatmap-cell grad-10">.12</div>
140+
<div class="heatmap-cell grad-30">.35</div>
141+
<div class="heatmap-cell grad-40">.44</div>
142+
<div class="heatmap-cell grad-20">.22</div>
143+
<div class="heatmap-cell grad-neg">-.03</div>
144+
<div class="heatmap-cell grad-30">.36</div>
145+
<div class="heatmap-cell grad-40">.39</div>
146+
<div class="heatmap-cell grad-100">1.0</div>
147+
</div>
148+
</div>
149+
<figcaption class="caption">
150+
Figure 4: Full Correlation Matrix. Colors represent correlation strength in 0.1 intervals.
151+
</figcaption>
152+
</figure>
153+
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
<div class="chart-wrapper">
2+
<canvas id="inverseChart"></canvas>
3+
</div>
4+
<div class="caption">Figure 2: SourceBench Score (Green) vs. Google Overlap (Gray).</div>
5+
</div>
6+
7+
8+
<script>
9+
Chart.defaults.font.family = "'Inter', sans-serif";
10+
Chart.defaults.color = '#475569';
11+
12+
const inverseData = {
13+
labels: ['GPT-5', 'Grok-4.1', 'GPT-4o', 'Perplexity', 'Tavily'],
14+
score: [89.1, 83.4, 81.5, 78.5, 78.3],
15+
overlap: [16.0, 29.7, 27.5, 40.0, 55.5]
16+
};
17+
18+
new Chart(document.getElementById('inverseChart'), {
19+
type: 'bar',
20+
data: {
21+
labels: inverseData.labels,
22+
datasets: [
23+
{
24+
label: 'SourceBench Score',
25+
data: inverseData.score,
26+
backgroundColor: '#10b981',
27+
yAxisID: 'y',
28+
borderRadius: 4,
29+
barPercentage: 0.6
30+
},
31+
{
32+
label: 'Google Overlap %',
33+
data: inverseData.overlap,
34+
backgroundColor: '#94a3b8',
35+
yAxisID: 'y1',
36+
borderRadius: 4,
37+
barPercentage: 0.6
38+
}
39+
]
40+
},
41+
options: {
42+
responsive: true,
43+
maintainAspectRatio: false,
44+
interaction: { mode: 'index', intersect: false },
45+
plugins: { legend: { position: 'top', labels: { usePointStyle: true } } },
46+
scales: {
47+
x: { grid: { display: false } },
48+
y: { type: 'linear', display: true, position: 'left', min: 70, max: 95, title: { display: true, text: 'Weighted Score' } },
49+
y1: { type: 'linear', display: true, position: 'right', min: 0, max: 60, grid: { drawOnChartArea: false }, title: { display: true, text: 'Overlap %' } }
50+
}
51+
}
52+
});
53+
54+
</script>
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
<figure>
2+
<div class="relative h-[400px] w-full border border-gray-100 rounded-lg p-6 bg-white shadow-sm mt-8">
3+
<canvas id="leaderboardChart"></canvas>
4+
</div>
5+
</figure>
6+
7+
<script>
8+
// Chart Config Defaults
9+
Chart.defaults.font.family = "'Inter', sans-serif";
10+
Chart.defaults.color = '#475569';
11+
12+
// Data with Vibrant Colors
13+
const leaderBoardData = [
14+
{ name: 'GPT-5', score: 89.1, color: '#10b981' }, // Emerald 500
15+
{ name: 'Grok-4.1', score: 83.4, color: '#3b82f6' }, // Blue 500
16+
{ name: 'Gensee', score: 81.8, color: '#06b6d4' }, // Cyan 500
17+
{ name: 'GPT-4o', score: 81.5, color: '#94a3b8' }, // Slate 400
18+
{ name: 'Claude 3.5', score: 81.3, color: '#94a3b8' },
19+
{ name: 'Exa', score: 80.1, color: '#94a3b8' },
20+
{ name: 'Google', score: 79.9, color: '#94a3b8' },
21+
{ name: 'Gemini 3 Pro', score: 79.4, color: '#94a3b8' },
22+
{ name: 'Perplexity', score: 78.5, color: '#94a3b8' },
23+
{ name: 'Tavily', score: 78.3, color: '#ef4444' }, // Red 500
24+
];
25+
26+
// Leaderboard Chart
27+
new Chart(document.getElementById('leaderboardChart'), {
28+
type: 'bar',
29+
data: {
30+
labels: leaderBoardData.map(d => d.name),
31+
datasets: [{
32+
label: 'SourceBench Score',
33+
data: leaderBoardData.map(d => d.score),
34+
backgroundColor: leaderBoardData.map(d => d.color),
35+
borderRadius: 4,
36+
barThickness: 24
37+
}]
38+
},
39+
options: {
40+
indexAxis: 'y',
41+
responsive: true,
42+
maintainAspectRatio: false,
43+
plugins: { legend: { display: false } },
44+
scales: {
45+
x: { min: 70, max: 95, grid: { color: '#f1f5f9' } },
46+
y: { grid: { display: false } }
47+
}
48+
}
49+
});
50+
51+
</script>
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
<table>
2+
<thead>
3+
<tr>
4+
<th class="rank-col">Rank</th>
5+
<th>System</th>
6+
<th class="score-col">Weighted Score</th>
7+
<th class="metric-col">Content Metric</th>
8+
<th class="metric-col">Meta Metric</th>
9+
</tr>
10+
</thead>
11+
<tbody>
12+
<tr><td class="rank-col">1</td><td style="font-weight: 600; color: #047857;">GPT-5</td><td class="score-col">89.1</td><td class="metric-col">4.4</td><td class="metric-col">4.5</td></tr>
13+
<tr><td class="rank-col">2</td><td style="font-weight: 600; color: #1d4ed8;">Grok-4.1</td><td class="score-col">83.4</td><td class="metric-col">4.2</td><td class="metric-col">4.1</td></tr>
14+
<tr><td class="rank-col">3</td><td style="font-weight: 600; color: #0891b2;">Gensee</td><td class="score-col">81.8</td><td class="metric-col">4.3</td><td class="metric-col">3.9</td></tr>
15+
<tr><td class="rank-col">4</td><td>GPT-4o</td><td class="score-col">81.5</td><td class="metric-col">4.1</td><td class="metric-col">4.0</td></tr>
16+
<tr><td class="rank-col">5</td><td>Claude 3.5</td><td class="score-col">81.3</td><td class="metric-col">4.1</td><td class="metric-col">4.0</td></tr>
17+
<tr><td class="rank-col">6</td><td>Exa</td><td class="score-col">80.1</td><td class="metric-col">3.9</td><td class="metric-col">4.1</td></tr>
18+
<tr><td class="rank-col">7</td><td>Google</td><td class="score-col">79.9</td><td class="metric-col">4.0</td><td class="metric-col">4.0</td></tr>
19+
<tr><td class="rank-col">8</td><td>Gemini 3 Pro</td><td class="score-col">79.4</td><td class="metric-col">3.9</td><td class="metric-col">4.0</td></tr>
20+
<tr><td class="rank-col">9</td><td>Perplexity</td><td class="score-col">78.5</td><td class="metric-col">3.8</td><td class="metric-col">4.0</td></tr>
21+
<tr><td class="rank-col">10</td><td>Tavily</td><td class="score-col">78.3</td><td class="metric-col">3.8</td><td class="metric-col">3.9</td></tr>
22+
</tbody>
23+
</table>
24+
<div class="caption">Table 1: SourceBench Leaderboard. "Content Metric" averages Relevance, Factuality, and Objectivity.</div>
25+
<br>

0 commit comments

Comments
 (0)