Skip to content

Commit d62e6d1

Browse files
committed
fix: increase token limit sent to LLM
1 parent 438b418 commit d62e6d1

File tree

3 files changed

+265
-54
lines changed

3 files changed

+265
-54
lines changed

src/components/User/Dashboard/DatasetOrganizer/LLMPanel.tsx

Lines changed: 152 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@ const LLMPanel: React.FC<LLMPanelProps> = ({
184184
});
185185

186186
setEvidenceBundle(bundle);
187+
setSubjectAnalysis(null); // ← add this line
187188
downloadJSON(bundle, "evidence_bundle.json");
188189
setStatus("✓ Evidence bundle generated and downloaded!");
189190
} catch (err: any) {
@@ -380,6 +381,43 @@ const LLMPanel: React.FC<LLMPanelProps> = ({
380381
setStatus("3/3 Generating participants.tsv...");
381382
const partsPrompt = getParticipantsPrompt(userText);
382383

384+
// ← ADD HERE: compute subject analysis before try block so it's in scope
385+
const currentSubjectAnalysis = extractSubjectAnalysis(
386+
evidenceBundle?.all_files || [],
387+
evidenceBundle?.user_hints?.n_subjects,
388+
evidenceBundle?.filename_analysis?.python_statistics
389+
?.dominant_prefixes
390+
);
391+
392+
console.log("=== PARTICIPANTS DEBUG ===");
393+
console.log("method:", currentSubjectAnalysis?.method);
394+
console.log("subject_count:", currentSubjectAnalysis?.subject_count);
395+
console.log(
396+
"id_mapping:",
397+
currentSubjectAnalysis?.id_mapping?.id_mapping
398+
);
399+
console.log(
400+
"reverse_mapping:",
401+
currentSubjectAnalysis?.id_mapping?.reverse_mapping
402+
);
403+
console.log(
404+
"subject_records sample:",
405+
currentSubjectAnalysis?.subject_records?.slice(0, 3)
406+
);
407+
const idMap = currentSubjectAnalysis?.id_mapping?.id_mapping;
408+
const expectedCount = evidenceBundle?.user_hints?.n_subjects;
409+
const subjectLabels: string[] =
410+
idMap &&
411+
Object.keys(idMap).length > 0 &&
412+
(!expectedCount || Object.keys(idMap).length === expectedCount)
413+
? Object.values(idMap).map((id: string) => `sub-${id}`)
414+
: Array.from(
415+
{
416+
length: expectedCount || Object.keys(idMap || {}).length || 1,
417+
},
418+
(_, i) => `sub-${String(i + 1).padStart(2, "0")}`
419+
);
420+
383421
let partsResponse;
384422
if (currentProvider.isAnthropic) {
385423
partsResponse = await fetch(currentProvider.baseUrl, {
@@ -435,52 +473,130 @@ const LLMPanel: React.FC<LLMPanelProps> = ({
435473
: partsData.choices[0].message.content;
436474

437475
// Build TSV from schema
476+
// try {
477+
// const schemaText = participantsRaw
478+
// .replace(/^```json\n?/g, "")
479+
// .replace(/\n?```$/g, "")
480+
// .trim();
481+
// const schema = JSON.parse(schemaText);
482+
// const columns: string[] = schema.columns.map((c: any) => c.name);
483+
484+
// // Get subject IDs from evidence bundle (extracted by Python-style analysis)
485+
// // const idMapping =
486+
// // evidenceBundle?.subject_analysis?.id_mapping?.id_mapping;
487+
// // const subjectLabels: string[] = idMapping
488+
// // ? Object.values(idMapping).map((id) => `sub-${id}`)
489+
// // : ["sub-01"]; // fallback if no subject analysis
490+
// // Get subject IDs from subjectAnalysis state (computed at plan stage)
491+
// // Fall back to computing fresh if plan hasn't been run yet
492+
// const currentSubjectAnalysis =
493+
// subjectAnalysis ||
494+
// extractSubjectAnalysis(
495+
// evidenceBundle?.all_files || [],
496+
// evidenceBundle?.user_hints?.n_subjects,
497+
// evidenceBundle?.filename_analysis?.python_statistics
498+
// ?.dominant_prefixes
499+
// );
500+
// const idMap = currentSubjectAnalysis?.id_mapping?.id_mapping;
501+
// const subjectLabels: string[] =
502+
// idMap && Object.keys(idMap).length > 0
503+
// ? Object.values(idMap).map((id) => `sub-${id}`)
504+
// : Array.from(
505+
// { length: evidenceBundle?.user_hints?.n_subjects || 1 },
506+
// (_, i) => `sub-${String(i + 1).padStart(2, "0")}`
507+
// );
508+
509+
// const header = columns.join("\t");
510+
// // ====origin====
511+
// // const rows = subjectLabels.map((subId) =>
512+
// // columns
513+
// // .map((col: string) => (col === "participant_id" ? subId : "n/a"))
514+
// // .join("\t")
515+
// // );
516+
// //====== end ======
517+
// // =====update start=====
518+
// const reverseMap =
519+
// currentSubjectAnalysis?.id_mapping?.reverse_mapping || {};
520+
// const subjectRecords = currentSubjectAnalysis?.subject_records || [];
521+
522+
// const rows = subjectLabels.map((subId) => {
523+
// const bareId = subId.replace(/^sub-/, "");
524+
// const originalId = reverseMap[bareId];
525+
// const record = subjectRecords.find(
526+
// (r: any) => r.original_id === originalId
527+
// );
528+
// return columns
529+
// .map((col: string) => {
530+
// if (col === "participant_id") return subId;
531+
// if (col === "original_id") return originalId || "n/a";
532+
// if (col === "group") return (record as any)?.group || "n/a";
533+
// return "n/a";
534+
// })
535+
// .join("\t");
536+
// });
537+
// //====update end======
538+
// participantsContent = [header, ...rows].join("\n");
539+
// } catch (e) {
540+
// // Fallback: LLM didn't return valid JSON schema, use raw content
541+
// participantsContent = participantsRaw
542+
// .replace(/^```\n?/g, "")
543+
// .replace(/\n?```$/g, "")
544+
// .trim();
545+
// }
546+
// Build TSV from schema + subject analysis
547+
// Mirrors _generate_participants_tsv_from_python() in planner.py
438548
try {
439549
const schemaText = participantsRaw
440550
.replace(/^```json\n?/g, "")
441551
.replace(/\n?```$/g, "")
442552
.trim();
443553
const schema = JSON.parse(schemaText);
444-
const columns: string[] = schema.columns.map((c: any) => c.name);
445-
446-
// Get subject IDs from evidence bundle (extracted by Python-style analysis)
447-
// const idMapping =
448-
// evidenceBundle?.subject_analysis?.id_mapping?.id_mapping;
449-
// const subjectLabels: string[] = idMapping
450-
// ? Object.values(idMapping).map((id) => `sub-${id}`)
451-
// : ["sub-01"]; // fallback if no subject analysis
452-
// Get subject IDs from subjectAnalysis state (computed at plan stage)
453-
// Fall back to computing fresh if plan hasn't been run yet
454-
const currentSubjectAnalysis =
455-
subjectAnalysis ||
456-
extractSubjectAnalysis(
457-
evidenceBundle?.all_files || [],
458-
evidenceBundle?.user_hints?.n_subjects,
459-
evidenceBundle?.filename_analysis?.python_statistics
460-
?.dominant_prefixes
554+
555+
// LLM decides extra demographic columns (sex, age, group etc.)
556+
// but we always add participant_id and original_id ourselves
557+
const extraColumns: string[] = schema.columns
558+
.map((c: any) => c.name)
559+
.filter(
560+
(name: string) =>
561+
name !== "participant_id" && name !== "original_id"
461562
);
462-
const idMap = currentSubjectAnalysis?.id_mapping?.id_mapping;
463-
const subjectLabels: string[] =
464-
idMap && Object.keys(idMap).length > 0
465-
? Object.values(idMap).map((id) => `sub-${id}`)
466-
: Array.from(
467-
{ length: evidenceBundle?.user_hints?.n_subjects || 1 },
468-
(_, i) => `sub-${String(i + 1).padStart(2, "0")}`
469-
);
563+
564+
// Always start with participant_id and original_id
565+
const columns = ["participant_id", "original_id", ...extraColumns];
566+
567+
const reverseMap =
568+
currentSubjectAnalysis?.id_mapping?.reverse_mapping || {};
569+
const subjectRecords = currentSubjectAnalysis?.subject_records || [];
470570

471571
const header = columns.join("\t");
472-
const rows = subjectLabels.map((subId) =>
473-
columns
474-
.map((col: string) => (col === "participant_id" ? subId : "n/a"))
475-
.join("\t")
476-
);
572+
const rows = subjectLabels.map((subId) => {
573+
const bareId = subId.replace(/^sub-/, "");
574+
const originalId = reverseMap[bareId] || "n/a";
575+
const record = subjectRecords.find(
576+
(r: any) => r.original_id === originalId
577+
);
578+
return columns
579+
.map((col: string) => {
580+
if (col === "participant_id") return subId;
581+
if (col === "original_id") return originalId;
582+
if (col === "group") return (record as any)?.group || "n/a";
583+
return "n/a";
584+
})
585+
.join("\t");
586+
});
587+
477588
participantsContent = [header, ...rows].join("\n");
478589
} catch (e) {
479-
// Fallback: LLM didn't return valid JSON schema, use raw content
480-
participantsContent = participantsRaw
481-
.replace(/^```\n?/g, "")
482-
.replace(/\n?```$/g, "")
483-
.trim();
590+
// Fallback: generate minimal TSV directly from subject analysis
591+
const reverseMap =
592+
currentSubjectAnalysis?.id_mapping?.reverse_mapping || {};
593+
const header = "participant_id\toriginal_id";
594+
const rows = subjectLabels.map((subId) => {
595+
const bareId = subId.replace(/^sub-/, "");
596+
const originalId = reverseMap[bareId] || "n/a";
597+
return `${subId}\t${originalId}`;
598+
});
599+
participantsContent = [header, ...rows].join("\n");
484600
}
485601
}
486602
// ==========================================
@@ -632,11 +748,6 @@ const LLMPanel: React.FC<LLMPanelProps> = ({
632748
const filePatterns = analyzeFilePatterns(files);
633749
const userContext = getUserContext(files);
634750
const annotations = getFileAnnotations(files);
635-
// console.log("=== PROMPT BEING SENT TO LLM ===");
636-
// console.log(fileSummary);
637-
// console.log(filePatterns);
638-
// console.log(userContext);
639-
// console.log("=================================");
640751

641752
// UPDATED: Improved prompt that uses trio files
642753
const prompt = getConversionScriptPrompt(
@@ -788,6 +899,7 @@ const LLMPanel: React.FC<LLMPanelProps> = ({
788899
userNSubjects,
789900
dominantPrefixes
790901
);
902+
791903
setSubjectAnalysis(computedSubjectAnalysis);
792904

793905
const fileSummary = buildFileSummary(files);
@@ -801,11 +913,6 @@ const LLMPanel: React.FC<LLMPanelProps> = ({
801913
.map((s: any) => ` - ${s.relpath}`)
802914
.join("\n") || "";
803915

804-
// console.log("=== SAMPLE FILES ===");
805-
// console.log(sampleFiles);
806-
// console.log("=== COUNTS BY EXT ===");
807-
// console.log(evidenceBundle?.counts_by_ext);
808-
809916
const prompt = getBIDSPlanPrompt(
810917
fileSummary,
811918
filePatterns,

0 commit comments

Comments
 (0)