Skip to content

Commit 17e6fb5

Browse files
authored
Merge pull request #39 from supermemoryai/vorflux/fix-question-id-pr26
refactor: deduplicate question ID validation, fix cross-benchmark pattern matching
2 parents 28a1861 + a7c46a8 commit 17e6fb5

8 files changed

Lines changed: 259 additions & 422 deletions

File tree

src/orchestrator/batch.ts

Lines changed: 2 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import type { BenchmarkResult } from "../types/unified"
55
import { orchestrator, CheckpointManager } from "./index"
66
import { createBenchmark } from "../benchmarks"
77
import { logger } from "../utils/logger"
8+
import { validateQuestionIds } from "../utils/question-ids"
89
import { existsSync, mkdirSync, readFileSync, writeFileSync, rmSync } from "fs"
910
import { join } from "path"
1011
import { startRun, endRun } from "../server/runState"
@@ -157,35 +158,8 @@ export class BatchManager {
157158

158159
let targetQuestionIds: string[]
159160
if (questionIds && questionIds.length > 0) {
160-
// Validate that all provided IDs exist in the benchmark
161-
const allQuestionIdsSet = new Set(allQuestions.map((q) => q.questionId))
162-
const validIds: string[] = []
163-
const invalidIds: string[] = []
164-
165-
for (const id of questionIds) {
166-
if (allQuestionIdsSet.has(id)) {
167-
validIds.push(id)
168-
} else {
169-
invalidIds.push(id)
170-
}
171-
}
172-
173-
if (invalidIds.length > 0) {
174-
logger.warn(`Invalid question IDs (will be skipped): ${invalidIds.join(", ")}`)
175-
}
176-
177-
if (validIds.length === 0) {
178-
throw new Error(
179-
`All provided questionIds are invalid. No matching questions found in benchmark "${benchmark}". ` +
180-
`Invalid IDs: ${invalidIds.join(", ")}`
181-
)
182-
}
183-
161+
const { validIds } = validateQuestionIds(questionIds, allQuestions, benchmark)
184162
targetQuestionIds = validIds
185-
logger.info(
186-
`Using explicit questionIds: ${validIds.length} valid questions` +
187-
(invalidIds.length > 0 ? ` (${invalidIds.length} invalid skipped)` : "")
188-
)
189163
} else if (sampling) {
190164
targetQuestionIds = selectQuestionsBySampling(allQuestions, sampling)
191165
} else {

src/orchestrator/index.ts

Lines changed: 2 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import { CheckpointManager } from "./checkpoint"
1010
import { getProviderConfig, getJudgeConfig } from "../utils/config"
1111
import { resolveModel } from "../utils/models"
1212
import { logger } from "../utils/logger"
13+
import { validateQuestionIds } from "../utils/question-ids"
1314
import { runIngestPhase } from "./phases/ingest"
1415
import { runIndexingPhase } from "./phases/indexing"
1516
import { runSearchPhase } from "./phases/search"
@@ -213,35 +214,8 @@ export class Orchestrator {
213214
effectiveLimit = limit
214215

215216
if (questionIds && questionIds.length > 0) {
216-
// Validate that all provided IDs exist in the benchmark
217-
const allQuestionIdsSet = new Set(allQuestions.map((q) => q.questionId))
218-
const validIds: string[] = []
219-
const invalidIds: string[] = []
220-
221-
for (const id of questionIds) {
222-
if (allQuestionIdsSet.has(id)) {
223-
validIds.push(id)
224-
} else {
225-
invalidIds.push(id)
226-
}
227-
}
228-
229-
if (invalidIds.length > 0) {
230-
logger.warn(`Invalid question IDs (will be skipped): ${invalidIds.join(", ")}`)
231-
}
232-
233-
if (validIds.length === 0) {
234-
throw new Error(
235-
`All provided questionIds are invalid. No matching questions found in benchmark "${benchmarkName}". ` +
236-
`Invalid IDs: ${invalidIds.join(", ")}`
237-
)
238-
}
239-
217+
const { validIds } = validateQuestionIds(questionIds, allQuestions, benchmarkName)
240218
targetQuestionIds = validIds
241-
logger.info(
242-
`Using explicit questionIds: ${validIds.length} valid questions` +
243-
(invalidIds.length > 0 ? ` (${invalidIds.length} invalid skipped)` : "")
244-
)
245219
} else if (sampling) {
246220
logger.info(`Using sampling mode: ${sampling.mode}`)
247221
targetQuestionIds = selectQuestionsBySampling(allQuestions, sampling)

src/server/routes/benchmarks.ts

Lines changed: 21 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,8 @@ export async function handleBenchmarksRoutes(req: Request, url: URL): Promise<Re
128128
}
129129
}
130130

131-
// POST /api/benchmarks/:name/expand-ids - Expand conversation/session patterns to question IDs
131+
// POST /api/benchmarks/:name/expand-ids - Expand patterns to question IDs
132+
// Supports: exact question IDs, session IDs, and prefix matching (works across all benchmarks)
132133
const expandIdsMatch = pathname.match(/^\/api\/benchmarks\/([^/]+)\/expand-ids$/)
133134
if (method === "POST" && expandIdsMatch) {
134135
const benchmarkName = expandIdsMatch[1]
@@ -153,39 +154,27 @@ export async function handleBenchmarksRoutes(req: Request, url: URL): Promise<Re
153154
if (!trimmed) continue
154155

155156
const expanded: string[] = []
156-
157-
// Pattern 1: Conversation ID (e.g., "conv-26") - expand to all questions
158-
// Check if pattern ends with a number and doesn't have -q or -session suffix
159-
if (/^[a-zA-Z]+-\d+$/.test(trimmed)) {
160-
const matchingQuestions = allQuestions.filter((q) =>
161-
q.questionId.startsWith(trimmed + "-q")
162-
)
163-
matchingQuestions.forEach((q) => {
164-
expanded.push(q.questionId)
165-
expandedIds.add(q.questionId)
166-
})
167-
}
168-
// Pattern 2: Session ID (e.g., "conv-26-session_1" or "001be529-session-0")
169-
// Find all questions that reference this session
170-
else if (trimmed.includes("-session")) {
171-
const matchingQuestions = allQuestions.filter((q) =>
172-
q.haystackSessionIds.includes(trimmed)
173-
)
174-
matchingQuestions.forEach((q) => {
175-
expanded.push(q.questionId)
176-
expandedIds.add(q.questionId)
177-
})
178-
}
179-
// Pattern 3: Direct question ID - add as-is if it exists
180-
else {
181-
const exactMatch = allQuestions.find((q) => q.questionId === trimmed)
182-
if (exactMatch) {
183-
expanded.push(trimmed)
184-
expandedIds.add(trimmed)
185-
}
157+
const addMatch = (id: string) => { expanded.push(id); expandedIds.add(id) }
158+
159+
// Priority: exact match > session lookup > prefix expansion
160+
const exactMatch = allQuestions.find((q) => q.questionId === trimmed)
161+
if (exactMatch) {
162+
addMatch(trimmed)
163+
} else if (trimmed.includes("-session")) {
164+
// Session ID — find all questions that reference this session
165+
allQuestions
166+
.filter((q) => q.haystackSessionIds.includes(trimmed))
167+
.forEach((q) => addMatch(q.questionId))
168+
} else {
169+
// Prefix match — works across all benchmarks (e.g., "conv-26" matches
170+
// "conv-26-q0", "convomem-user_evidence" matches "convomem-user_evidence-0")
171+
const prefix = trimmed.endsWith("-") ? trimmed : trimmed + "-"
172+
allQuestions
173+
.filter((q) => q.questionId.startsWith(prefix))
174+
.forEach((q) => addMatch(q.questionId))
186175
}
187176

188-
patternResults[pattern] = expanded
177+
patternResults[trimmed] = expanded
189178
}
190179

191180
return json({

src/utils/question-ids.ts

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import { logger } from "./logger"
2+
3+
export interface ValidateQuestionIdsResult {
4+
validIds: string[]
5+
invalidIds: string[]
6+
}
7+
8+
/**
9+
* Validates a list of question IDs against the full set of questions in a benchmark.
10+
* Returns valid and invalid IDs separately. Throws if all IDs are invalid.
11+
*/
12+
export function validateQuestionIds(
13+
questionIds: string[],
14+
allQuestions: { questionId: string }[],
15+
benchmarkName: string
16+
): ValidateQuestionIdsResult {
17+
const allQuestionIdsSet = new Set(allQuestions.map((q) => q.questionId))
18+
const validIds: string[] = []
19+
const invalidIds: string[] = []
20+
21+
for (const id of questionIds) {
22+
if (allQuestionIdsSet.has(id)) {
23+
validIds.push(id)
24+
} else {
25+
invalidIds.push(id)
26+
}
27+
}
28+
29+
if (invalidIds.length > 0) {
30+
logger.warn(`Invalid question IDs (will be skipped): ${invalidIds.join(", ")}`)
31+
}
32+
33+
if (validIds.length === 0) {
34+
throw new Error(
35+
`All provided questionIds are invalid. No matching questions found in benchmark "${benchmarkName}". ` +
36+
`Invalid IDs: ${invalidIds.join(", ")}`
37+
)
38+
}
39+
40+
logger.info(
41+
`Using explicit questionIds: ${validIds.length} valid questions` +
42+
(invalidIds.length > 0 ? ` (${invalidIds.length} invalid skipped)` : "")
43+
)
44+
45+
return { validIds, invalidIds }
46+
}

0 commit comments

Comments
 (0)