Skip to content

Commit ae7d496

Browse files
Added comparison 0.3.0 vs 0.3.2 to ground truth
1 parent dd35762 commit ae7d496

3 files changed

Lines changed: 4787 additions & 54 deletions

File tree

docs/ground_truth/index.html

Lines changed: 225 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,12 @@
3030
background-color: #fff;
3131
}
3232
th, td {
33-
padding: 12px 15px;
33+
padding: 6px 10px;
3434
text-align: left;
3535
border-bottom: 1px solid #e0e0e0;
36+
word-wrap: break-word;
37+
word-break: break-word;
38+
max-width: 350px;
3639
}
3740
th {
3841
background-color: #f8f9fa;
@@ -75,17 +78,35 @@
7578
color: #666;
7679
margin-top: 4px;
7780
}
81+
.cell-only-030 {
82+
background-color: #ffebee !important;
83+
}
84+
.cell-only-032 {
85+
background-color: #e8f5e9 !important;
86+
}
87+
.cell-missing {
88+
color: #bbb;
89+
font-style: italic;
90+
text-align: center;
91+
}
92+
.section-divider {
93+
border: none;
94+
border-top: 3px solid #e0e0e0;
95+
max-width: 95%;
96+
margin: 40px auto 20px auto;
97+
}
98+
.section-title {
99+
text-align: center;
100+
color: #2c3e50;
101+
margin: 0 auto 20px auto;
102+
}
78103
</style>
79104
</head>
80105
<body>
81106

82107
<h1>Ground TruthReport</h1>
83108

84-
<p style="text-align: center; color: #444; font-size: 1.05em; line-height: 1.4;">
85-
Total repositories studied: 200 extracted<br>
86-
Every repository being studied has codemeta.json, and at least one metadata file<br>
87-
Currently: Covered 150 out 200 repository
88-
</p>
109+
<p id="summary-text" style="text-align: center; color: #444; font-size: 1.05em; line-height: 1.4;"></p>
89110

90111
<div class="table-container">
91112
<table id="pitfallsTable">
@@ -101,58 +122,129 @@ <h1>Ground TruthReport</h1>
101122
</table>
102123
</div>
103124

125+
<hr class="section-divider">
126+
<h2 class="section-title">0.3.0 vs 0.3.2 &mdash; Comparison</h2>
127+
128+
<div class="table-container">
129+
<table id="comparisonTable">
130+
<thead>
131+
<tr>
132+
<th>Repository</th>
133+
<th>Code</th>
134+
<th>0.3.0 Source</th>
135+
<th>0.3.0 Description</th>
136+
<th>0.3.2 Source</th>
137+
<th>0.3.2 Description</th>
138+
</tr>
139+
</thead>
140+
</table>
141+
</div>
142+
104143
<script>
105144
document.addEventListener("DOMContentLoaded", () => {
106-
fetch('summary_pitfalls_warnings.json')
107-
.then(response => {
108-
if (!response.ok) {
109-
throw new Error('Network response was not ok');
110-
}
111-
return response.json();
112-
})
113-
.then(data => {
114-
const table = document.querySelector('#pitfallsTable');
115-
116-
for (const [repoId, repoData] of Object.entries(data)) {
117-
const url = repoData.url || 'Unknown URL';
118-
119-
const pEntries = repoData.pitfalls ? Object.entries(repoData.pitfalls) : [];
120-
const wEntries = repoData.warnings ? Object.entries(repoData.warnings) : [];
121-
const totalRows = pEntries.length + wEntries.length;
122-
123-
if (totalRows === 0) continue;
124-
125-
const tbody = document.createElement('tbody');
126-
let isFirstContext = true;
127-
128-
for (const [code, info] of pEntries) {
129-
addRow(tbody, url, code, info, 'p', isFirstContext, totalRows, repoData);
130-
isFirstContext = false;
145+
const gtTable = document.querySelector('#pitfallsTable');
146+
const cmpTable = document.querySelector('#comparisonTable');
147+
148+
Promise.all([
149+
fetch('summary_pitfalls_warnings.json').then(r => r.json()),
150+
fetch('summary_0_3_0.json').then(r => r.json()),
151+
fetch('summary_0_3_2.json').then(r => r.json())
152+
])
153+
.then(([gtData, data030, data032]) => {
154+
const gtNames = new Set();
155+
for (const entry of Object.values(gtData)) {
156+
const url = (entry.url || '').replace(/\/$/, "").split("/");
157+
if (url.length >= 2) gtNames.add(url.slice(-2).join("/"));
158+
}
159+
const allThree = new Set(
160+
[...gtNames].filter(r => data030[r] && data032[r])
161+
);
162+
163+
document.getElementById('summary-text').textContent =
164+
`${allThree.size} repositories present across all three datasets (ground truth, 0.3.0, 0.3.2)`;
165+
166+
buildGroundTruthTable(gtTable, gtData, data030, data032, allThree);
167+
buildComparisonTable(cmpTable, data030, data032, allThree);
168+
})
169+
.catch(err => {
170+
console.error('Failed to load data:', err);
171+
gtTable.innerHTML += '<tbody><tr><td colspan="5" style="text-align:center;color:red;">Error loading data.</td></tr></tbody>';
172+
cmpTable.innerHTML += '<tbody><tr><td colspan="6" style="text-align:center;color:red;">Error loading comparison data.</td></tr></tbody>';
173+
});
174+
});
175+
176+
function buildGroundTruthTable(table, gtData, data030, data032, allThree) {
177+
for (const [repoId, repoData] of Object.entries(gtData)) {
178+
const url = repoData.url || 'Unknown URL';
179+
const repoName = url.replace(/\/$/, "").split("/").slice(-2).join("/");
180+
181+
if (!allThree.has(repoName)) continue;
182+
183+
const r030 = data030[repoName] || {};
184+
const r032 = data032[repoName] || {};
185+
186+
const matchedPitfalls = {};
187+
for (const [code, info] of Object.entries(repoData.pitfalls || {})) {
188+
const r032Code = r032.pitfalls && r032.pitfalls[code];
189+
const r030Code = r030.pitfalls && r030.pitfalls[code];
190+
if (r032Code || r030Code) {
191+
const gtInfo = { source_file: info.source_file, description: info.description };
192+
if (gtInfo.source_file === 'Metadata files (codemeta.json, setup.py, pom.xml etc...)') {
193+
const resCode = r032Code || r030Code;
194+
if (resCode && resCode.source_file) {
195+
gtInfo.source_file = resCode.source_file;
196+
}
131197
}
132-
for (const [code, info] of wEntries) {
133-
addRow(tbody, url, code, info, 'w', isFirstContext, totalRows, repoData);
134-
isFirstContext = false;
198+
matchedPitfalls[code] = gtInfo;
199+
}
200+
}
201+
202+
const matchedWarnings = {};
203+
for (const [code, info] of Object.entries(repoData.warnings || {})) {
204+
if (code === 'W003') continue;
205+
const r032Code = r032.warnings && r032.warnings[code];
206+
const r030Code = r030.warnings && r030.warnings[code];
207+
if (r032Code || r030Code) {
208+
const gtInfo = { source_file: info.source_file, description: info.description };
209+
if (gtInfo.source_file === 'Metadata files (codemeta.json, setup.py, pom.xml etc...)') {
210+
const resCode = r032Code || r030Code;
211+
if (resCode && resCode.source_file) {
212+
gtInfo.source_file = resCode.source_file;
213+
}
135214
}
136-
137-
table.appendChild(tbody);
215+
matchedWarnings[code] = gtInfo;
138216
}
139-
})
140-
.catch(error => {
141-
console.error('Error loading the JSON data:', error);
142-
const table = document.querySelector('#pitfallsTable');
143-
table.innerHTML += `<tbody><tr><td colspan="5" style="text-align: center; color: red;">Error loading data. Please ensure you are viewing this file via a local server.</td></tr></tbody>`;
144-
});
145-
});
217+
}
218+
219+
const pEntries = Object.entries(matchedPitfalls);
220+
const wEntries = Object.entries(matchedWarnings);
221+
const totalRows = pEntries.length + wEntries.length;
222+
223+
if (totalRows === 0) continue;
224+
225+
const tbody = document.createElement('tbody');
226+
let isFirstContext = true;
227+
228+
for (const [code, info] of pEntries) {
229+
addGTRow(tbody, url, repoName, code, info, 'p', isFirstContext, totalRows, repoData, allThree);
230+
isFirstContext = false;
231+
}
232+
for (const [code, info] of wEntries) {
233+
addGTRow(tbody, url, repoName, code, info, 'w', isFirstContext, totalRows, repoData, allThree);
234+
isFirstContext = false;
235+
}
236+
237+
table.appendChild(tbody);
238+
}
239+
}
146240

147-
function addRow(tbody, url, code, info, type, isFirstContext, totalRows, repoData) {
241+
function addGTRow(tbody, url, repoName, code, info, type, isFirstContext, totalRows, repoData, allThree) {
148242
const tr = document.createElement('tr');
149-
243+
150244
if (isFirstContext) {
151245
const tdRepo = document.createElement('td');
152246
const aRepo = document.createElement('a');
153247
aRepo.href = url;
154-
// Extract "user/repo" from the URL
155-
const repoName = url.replace(/\/$/, "").split("/").slice(-2).join("/");
156248
aRepo.textContent = repoName;
157249
aRepo.className = 'repo-link';
158250
aRepo.target = '_blank';
@@ -179,33 +271,112 @@ <h1>Ground TruthReport</h1>
179271
tdCommit.rowSpan = totalRows;
180272
tr.appendChild(tdCommit);
181273
}
182-
183-
// Code Column
274+
184275
const tdCode = document.createElement('td');
185276
const spanCode = document.createElement('span');
186277
spanCode.textContent = code;
187278
spanCode.className = `code-badge code-${type}`;
188279
tdCode.appendChild(spanCode);
189-
190-
// Description Column
280+
191281
const tdDesc = document.createElement('td');
192282
if (info.description && info.description !== "No description available") {
193283
tdDesc.textContent = info.description;
194284
tdDesc.className = 'desc-text';
195285
} else {
196286
tdDesc.textContent = '';
197287
}
198-
199-
// Source File Column
288+
200289
const tdSource = document.createElement('td');
201290
tdSource.textContent = info.source_file || 'Unknown';
202-
291+
203292
tr.appendChild(tdCode);
204293
tr.appendChild(tdDesc);
205294
tr.appendChild(tdSource);
206295

207296
tbody.appendChild(tr);
208297
}
298+
299+
function buildComparisonTable(table, data030, data032, allThree) {
300+
for (const repoName of Object.keys(data030).sort()) {
301+
const r030 = data030[repoName];
302+
const r032 = data032[repoName];
303+
if (!r032) continue;
304+
if (!allThree.has(repoName)) continue;
305+
306+
const codes030 = Object.assign(
307+
{}, r030.pitfalls || {}, r030.warnings || {}
308+
);
309+
const codes032 = Object.assign(
310+
{}, r032.pitfalls || {}, r032.warnings || {}
311+
);
312+
const allCodes = Array.from(
313+
new Set([...Object.keys(codes030), ...Object.keys(codes032)])
314+
).sort((a, b) => {
315+
const ta = a.startsWith('P') ? 0 : 1;
316+
const tb = b.startsWith('P') ? 0 : 1;
317+
if (ta !== tb) return ta - tb;
318+
return a.localeCompare(b);
319+
});
320+
321+
if (allCodes.length === 0) continue;
322+
323+
const tbody = document.createElement('tbody');
324+
const url = r030.url || r032.url;
325+
326+
allCodes.forEach((code, i) => {
327+
const entry030 = codes030[code];
328+
const entry032 = codes032[code];
329+
const only030 = entry030 && !entry032;
330+
const only032 = entry032 && !entry030;
331+
const type = code.startsWith('P') ? 'p' : 'w';
332+
333+
const tr = document.createElement('tr');
334+
335+
if (i === 0) {
336+
const tdRepo = document.createElement('td');
337+
const aRepo = document.createElement('a');
338+
aRepo.href = url;
339+
aRepo.textContent = repoName;
340+
aRepo.className = 'repo-link';
341+
aRepo.target = '_blank';
342+
tdRepo.appendChild(aRepo);
343+
tdRepo.rowSpan = allCodes.length;
344+
tr.appendChild(tdRepo);
345+
}
346+
347+
const tdCode = document.createElement('td');
348+
const spanCode = document.createElement('span');
349+
spanCode.textContent = code;
350+
spanCode.className = `code-badge code-${type}`;
351+
tdCode.appendChild(spanCode);
352+
tr.appendChild(tdCode);
353+
354+
const cellDefs = [
355+
{ data: entry030, key: 'source_file', ver: only030 ? '030' : '' },
356+
{ data: entry030, key: 'description', ver: only030 ? '030' : '' },
357+
{ data: entry032, key: 'source_file', ver: only032 ? '032' : '' },
358+
{ data: entry032, key: 'description', ver: only032 ? '032' : '' }
359+
];
360+
361+
for (const { data, key, ver } of cellDefs) {
362+
const td = document.createElement('td');
363+
if (data) {
364+
td.textContent = data[key] || '';
365+
} else {
366+
td.textContent = '\u2014';
367+
td.classList.add('cell-missing');
368+
}
369+
if (ver === '030') td.classList.add('cell-only-030');
370+
if (ver === '032') td.classList.add('cell-only-032');
371+
tr.appendChild(td);
372+
}
373+
374+
tbody.appendChild(tr);
375+
});
376+
377+
table.appendChild(tbody);
378+
}
379+
}
209380
</script>
210381
</body>
211382
</html>

0 commit comments

Comments
 (0)