Skip to content

Commit 7e4d519

Browse files
committed
refactor: add parseLlmJson utility for improved JSON handling across LLM queries
1 parent 0055c0d commit 7e4d519

5 files changed

Lines changed: 73 additions & 22 deletions

File tree

services/apps/organizations_enrichment_worker/src/activities/llm.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import { parseLlmJson } from '@crowd/common'
12
import { LlmService } from '@crowd/common_services'
23
import {
34
OrganizationField,
@@ -132,7 +133,7 @@ export async function selectMostRelevantDomainWithLLM(
132133
organizationId,
133134
)
134135
if (!response) throw new Error('LLM returned no response')
135-
return JSON.parse(response.answer) as LlmDomainSelection
136+
return parseLlmJson<LlmDomainSelection>(response.answer)
136137
}
137138

138139
const MAX_RETRIES = 1

services/apps/profiles_worker/src/workflows/member/processMemberBotAnalysisWithLLM.ts

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
import { ApplicationFailure, proxyActivities } from '@temporalio/workflow'
1+
import { proxyActivities } from '@temporalio/workflow'
22

3+
import { parseLlmJson } from '@crowd/common'
34
import { LlmQueryType } from '@crowd/types'
45

56
import * as activities from '../../activities'
@@ -72,18 +73,7 @@ export async function processMemberBotAnalysisWithLLM(
7273

7374
const llm = await getLLMResult(LlmQueryType.MEMBER_BOT_VALIDATION, PROMPT, memberId)
7475

75-
const start = llm.answer.indexOf('{')
76-
const end = llm.answer.lastIndexOf('}')
77-
78-
if (start === -1 || end === -1) {
79-
throw ApplicationFailure.retryable(
80-
`LLM returned no valid JSON object for member ${memberId}: ${llm.answer.substring(0, 200)}`,
81-
)
82-
}
83-
84-
const { isBot, signals } = JSON.parse(
85-
llm.answer.substring(start, end + 1),
86-
) as MemberBotSuggestionResult
76+
const { isBot, signals } = parseLlmJson<MemberBotSuggestionResult>(llm.answer)
8777

8878
if (!isBot) {
8979
await createMemberNoBot(memberId)

services/libs/common/src/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ export * from './displayName'
3636
export * from './jira'
3737
export * from './email'
3838
export * from './bot'
39+
export * from './llm'
3940

4041
export * from './i18n'
4142
export * from './member'

services/libs/common/src/llm.ts

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
export function parseLlmJson<T>(answer: string): T {
2+
const raw = answer.trim()
3+
4+
try {
5+
return JSON.parse(raw) as T
6+
} catch {
7+
// continue with normalization
8+
}
9+
10+
const fenced = raw.match(/^```(?:json)?\s*([\s\S]*?)\s*```$/i)
11+
if (fenced?.[1]) {
12+
return JSON.parse(fenced[1].trim()) as T
13+
}
14+
15+
const starts = [
16+
{ idx: raw.indexOf('{'), open: '{', close: '}' },
17+
{ idx: raw.indexOf('['), open: '[', close: ']' },
18+
]
19+
.filter((s) => s.idx >= 0)
20+
.sort((a, b) => a.idx - b.idx)
21+
22+
for (const candidate of starts) {
23+
let depth = 0
24+
let inString = false
25+
let escaped = false
26+
27+
for (let i = candidate.idx; i < raw.length; i++) {
28+
const char = raw[i]
29+
30+
if (escaped) {
31+
escaped = false
32+
continue
33+
}
34+
35+
if (char === '\\') {
36+
escaped = true
37+
continue
38+
}
39+
40+
if (char === '"') {
41+
inString = !inString
42+
continue
43+
}
44+
45+
if (!inString) {
46+
if (char === candidate.open) {
47+
depth += 1
48+
} else if (char === candidate.close) {
49+
depth -= 1
50+
if (depth === 0) {
51+
return JSON.parse(raw.slice(candidate.idx, i + 1)) as T
52+
}
53+
}
54+
}
55+
}
56+
}
57+
58+
throw new SyntaxError('LLM response does not contain valid JSON content')
59+
}

services/libs/common_services/src/services/llm.service.ts

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import {
55
} from '@aws-sdk/client-bedrock-runtime'
66
import { performance } from 'perf_hooks'
77

8-
import { IS_LLM_ENABLED } from '@crowd/common'
8+
import { IS_LLM_ENABLED, parseLlmJson } from '@crowd/common'
99
import { insertPromptHistoryEntry } from '@crowd/data-access-layer'
1010
import { QueryExecutor } from '@crowd/data-access-layer'
1111
import { Logger, LoggerBase } from '@crowd/logging'
@@ -177,7 +177,7 @@ export class LlmService extends LoggerBase {
177177
} as ILlmResult<LlmMemberEnrichmentResult>
178178
}
179179

180-
const result = JSON.parse(response.answer)
180+
const result = parseLlmJson<LlmMemberEnrichmentResult>(response.answer)
181181

182182
return {
183183
result,
@@ -200,7 +200,7 @@ export class LlmService extends LoggerBase {
200200
} as ILlmResult<{ profileIndex: number }>
201201
}
202202

203-
const result = JSON.parse(response.answer)
203+
const result = parseLlmJson<{ profileIndex: number }>(response.answer)
204204

205205
return {
206206
result,
@@ -224,7 +224,7 @@ export class LlmService extends LoggerBase {
224224
} as ILlmResult<T>
225225
}
226226

227-
const result = JSON.parse(response.answer)
227+
const result = parseLlmJson<T>(response.answer)
228228

229229
return {
230230
result,
@@ -248,7 +248,7 @@ export class LlmService extends LoggerBase {
248248
} as ILlmResult<T>
249249
}
250250

251-
const result = JSON.parse(response.answer)
251+
const result = parseLlmJson<T>(response.answer)
252252

253253
return {
254254
result,
@@ -268,7 +268,7 @@ export class LlmService extends LoggerBase {
268268
} as ILlmResult<T>
269269
}
270270

271-
const result = JSON.parse(response.answer)
271+
const result = parseLlmJson<T>(response.answer)
272272

273273
return {
274274
result,
@@ -285,7 +285,7 @@ export class LlmService extends LoggerBase {
285285
} as ILlmResult<T>
286286
}
287287

288-
const result = JSON.parse(response.answer)
288+
const result = parseLlmJson<T>(response.answer)
289289

290290
return {
291291
result,
@@ -302,7 +302,7 @@ export class LlmService extends LoggerBase {
302302
} as ILlmResult<T>
303303
}
304304

305-
const result = JSON.parse(response.answer)
305+
const result = parseLlmJson<T>(response.answer)
306306

307307
return {
308308
result,

0 commit comments

Comments
 (0)