Skip to content

Commit 0b95e71

Browse files
aixierclaude
andcommitted
feat: 增强JSON修复机制,支持Pod2Post模板中文引号问题
- 安装并集成jsonrepair库,提供更强大的JSON修复能力 - 实现三层防御修复策略:jsonrepair库 → 自定义修复 → Claude API - 优化Pod2Post模板的JSON修复,特别处理中文引号问题 - 动态识别模板类型,自动设置对应的必需字段验证 - 增加修复统计和日志记录,便于调试和监控 修复的问题: - 解决JSON值中包含中文引号导致的解析失败 - 避免正则表达式匹配错误导致的JSON分割 - 提供更可靠的JSON修复保障机制 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 93a0e84 commit 0b95e71

4 files changed

Lines changed: 399 additions & 37 deletions

File tree

terminal-backend/package-lock.json

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

terminal-backend/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
"express": "^5.1.0",
3434
"form-data": "^4.0.4",
3535
"jsdom": "^24.0.0",
36+
"jsonrepair": "^3.13.1",
3637
"jsonwebtoken": "^9.0.2",
3738
"multer": "^2.0.2",
3839
"node-fetch": "^3.3.2",

terminal-backend/src/routes/generate/utils/fileGenerator.js

Lines changed: 261 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -10,32 +10,198 @@ import axios from 'axios'
1010
import { SessionMetadata } from './sessionMetadata.js'
1111
import { downloadAndSaveHtml } from './htmlProcessor.js'
1212
import { repairJsonContent } from '../../../utils/jsonRepair.js'
13+
import { jsonrepair } from 'jsonrepair'
1314

1415
/**
1516
* JSON字符串清理函数 - 修复常见的JSON格式问题
17+
* 修复策略:先处理特殊字符,再处理JSON结构
1618
*/
1719
function sanitizeJsonString(jsonString) {
18-
// JSON清理:重点处理字符串值中破坏JSON结构的中文引号
20+
console.log('[FileGenerator] Starting JSON sanitization')
1921
let result = jsonString
20-
21-
// 在JSON字符串值中,将中文双引号替换为转义的英文双引号
22-
// 匹配JSON字符串值: "key": "value with "quoted" content"
23-
result = result.replace(/"([^"]+)"/g, (match, content) => {
24-
// 如果内容包含中文引号,替换为转义的英文引号
25-
const cleanContent = content
26-
.replace(/"/g, '\\"') // 中文左双引号
27-
.replace(/"/g, '\\"') // 中文右双引号
28-
.replace(/'/g, "\\'") // 中文左单引号
29-
.replace(/'/g, "\\'") // 中文右单引号
30-
return `"${cleanContent}"`
22+
let stats = {
23+
leftQuotes: 0,
24+
rightQuotes: 0,
25+
leftSingleQuotes: 0,
26+
rightSingleQuotes: 0
27+
}
28+
29+
// 1. 首先处理所有中文引号 - 在JSON解析前先转义
30+
// 这样可以避免正则表达式匹配错误
31+
result = result.replace(/"/g, (match) => {
32+
stats.leftQuotes++
33+
return '\\"' // 转义左双引号
3134
})
32-
33-
// 移除控制字符
35+
36+
result = result.replace(/"/g, (match) => {
37+
stats.rightQuotes++
38+
return '\\"' // 转义右双引号
39+
})
40+
41+
result = result.replace(/'/g, (match) => {
42+
stats.leftSingleQuotes++
43+
return "\\'" // 转义左单引号
44+
})
45+
46+
result = result.replace(/'/g, (match) => {
47+
stats.rightSingleQuotes++
48+
return "\\'" // 转义右单引号
49+
})
50+
51+
console.log('[FileGenerator] Quote replacement stats:', stats)
52+
53+
// 2. 处理其他常见的JSON问题
54+
// 修复尾随逗号(在对象和数组中)
55+
result = result.replace(/,\s*}/g, '}') // 对象中的尾随逗号
56+
result = result.replace(/,\s*]/g, ']') // 数组中的尾随逗号
57+
58+
// 修复缺少逗号的情况(简单启发式)
59+
result = result.replace(/"\s*\n\s*"/g, '",\n"') // 两个字符串值之间缺少逗号
60+
61+
// 3. 移除控制字符
3462
result = result.replace(/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F-\u009F]/g, '')
35-
63+
64+
// 4. 修复未转义的换行符(在字符串中)
65+
result = result.replace(/(?<!\\)\n/g, '\\n') // 将未转义的换行符转义
66+
result = result.replace(/(?<!\\)\r/g, '\\r') // 将未转义的回车符转义
67+
result = result.replace(/(?<!\\)\t/g, '\\t') // 将未转义的制表符转义
68+
69+
console.log('[FileGenerator] JSON sanitization completed')
3670
return result
3771
}
3872

73+
/**
74+
* 检测JSON是否包含中文引号
75+
* @param {string} jsonString - JSON字符串
76+
* @returns {boolean} 是否包含中文引号
77+
*/
78+
function hasChineseQuotes(jsonString) {
79+
return /[""'']/.test(jsonString)
80+
}
81+
82+
/**
83+
* 修复Pod2Post模板JSON中的中文引号
84+
* 只替换字符串值中的引号,保护JSON结构
85+
* @param {string} jsonString - JSON字符串
86+
* @returns {string} 修复后的JSON
87+
*/
88+
function fixPod2PostChineseQuotes(jsonString) {
89+
let result = jsonString
90+
let stats = { replaced: 0 }
91+
92+
// 1. 修复对象中的字符串值: "key": "value"
93+
result = result.replace(/:\s*"([^"]*)"/g, (match, content) => {
94+
let fixedContent = content
95+
.replace(/"/g, '\\"')
96+
.replace(/"/g, '\\"')
97+
.replace(/'/g, "\\'")
98+
.replace(/'/g, "\\'")
99+
100+
if (fixedContent !== content) {
101+
stats.replaced++
102+
}
103+
104+
return `: "${fixedContent}"`
105+
})
106+
107+
// 2. 修复数组中的字符串值
108+
result = result.replace(/"([^"]*)"(?=\s*[,|\]])/g, (match, content) => {
109+
let fixedContent = content
110+
.replace(/"/g, '\\"')
111+
.replace(/"/g, '\\"')
112+
.replace(/'/g, "\\'")
113+
.replace(/'/g, "\\'")
114+
115+
if (fixedContent !== content) {
116+
stats.replaced++
117+
}
118+
119+
return `"${fixedContent}"`
120+
})
121+
122+
console.log(`[Pod2Post] Fixed ${stats.replaced} Chinese quote occurrences`)
123+
return result
124+
}
125+
126+
/**
127+
* 智能JSON修复策略(使用jsonrepair库)
128+
* 结合jsonrepair库和自定义逻辑
129+
* @param {string} jsonString - 原始JSON字符串
130+
* @param {string} templateName - 模板名称
131+
* @returns {Object} 修复结果
132+
*/
133+
function smartJsonRepair(jsonString, templateName) {
134+
console.log('[FileGenerator] Starting smart JSON repair')
135+
136+
// 检测是否包含中文引号
137+
const containsChineseQuotes = hasChineseQuotes(jsonString)
138+
console.log('[FileGenerator] Contains Chinese quotes:', containsChineseQuotes)
139+
140+
// 1. 首先尝试使用 jsonrepair 库(能处理大部分问题)
141+
try {
142+
console.log('[FileGenerator] Trying jsonrepair library first')
143+
const repaired = jsonrepair(jsonString)
144+
const parsed = JSON.parse(repaired)
145+
console.log('[FileGenerator] ✅ jsonrepair success')
146+
return {
147+
success: true,
148+
data: parsed,
149+
cleanedContent: repaired,
150+
method: 'jsonrepair-library'
151+
}
152+
} catch (error) {
153+
console.log('[FileGenerator] jsonrepair failed:', error.message)
154+
155+
// 2. 如果包含中文引号且jsonrepair失败,使用自定义修复
156+
if (containsChineseQuotes) {
157+
// Pod2Post模板使用专门的修复函数
158+
if (templateName === 'pod2post-template.md' || templateName === 'pod2post') {
159+
console.log('[FileGenerator] Using Pod2Post specific quote fixer')
160+
const sanitized = fixPod2PostChineseQuotes(jsonString)
161+
162+
try {
163+
const parsed = JSON.parse(sanitized)
164+
console.log('[FileGenerator] Pod2Post JSON successfully parsed')
165+
return {
166+
success: true,
167+
data: parsed,
168+
cleanedContent: sanitized,
169+
method: 'pod2post-quote-fixer'
170+
}
171+
} catch (pod2postError) {
172+
console.log('[FileGenerator] Pod2Post quote fixer failed:', pod2postError.message)
173+
}
174+
}
175+
176+
// 通用sanitize函数
177+
console.log('[FileGenerator] Using general sanitizer for Chinese quotes')
178+
const sanitized = sanitizeJsonString(jsonString)
179+
180+
try {
181+
const parsed = JSON.parse(sanitized)
182+
console.log('[FileGenerator] JSON successfully parsed with general sanitizer')
183+
return {
184+
success: true,
185+
data: parsed,
186+
cleanedContent: sanitized,
187+
method: 'general-sanitizer'
188+
}
189+
} catch (sanitizeError) {
190+
console.log('[FileGenerator] General sanitizer failed:', sanitizeError.message)
191+
}
192+
}
193+
194+
// 3. 所有方法都失败,需要Claude API
195+
console.log('[FileGenerator] All methods failed, will use Claude API')
196+
return {
197+
success: false,
198+
error: error.message,
199+
needsClaude: true,
200+
method: 'all-methods-failed'
201+
}
202+
}
203+
}
204+
39205
/**
40206
* 外部API配置
41207
*/
@@ -332,36 +498,69 @@ export async function generateFourFiles(params) {
332498
let jsonContent
333499
try {
334500
const jsonData = await fs.readFile(jsonFilePath, 'utf-8')
335-
336-
// 先尝试使用内置的清理函数
337-
const sanitizedJsonData = sanitizeJsonString(jsonData)
338-
339-
try {
340-
// 尝试直接解析
341-
jsonContent = JSON.parse(sanitizedJsonData)
342-
console.log('[FileGenerator] JSON parsed successfully with built-in sanitizer')
343-
} catch (parseError) {
344-
// 解析失败,使用jsonRepair模块修复
345-
console.log('[FileGenerator] JSON parse failed, attempting repair:', parseError.message)
346-
501+
console.log('[FileGenerator] Original JSON length:', jsonData.length)
502+
503+
// 使用智能修复策略
504+
const smartRepairResult = smartJsonRepair(jsonData, templateName)
505+
506+
if (smartRepairResult.success) {
507+
// 智能修复成功
508+
jsonContent = smartRepairResult.data
509+
console.log('[FileGenerator] JSON successfully repaired with method:', smartRepairResult.method)
510+
511+
// 如果进行了修复,保存回文件
512+
if (smartRepairResult.method !== 'original-sanitizer' && smartRepairResult.cleanedContent) {
513+
await fs.writeFile(jsonFilePath, smartRepairResult.cleanedContent, 'utf-8')
514+
console.log('[FileGenerator] Repaired JSON saved back to file')
515+
}
516+
} else {
517+
// 智能修复失败,需要使用Claude API
518+
console.log('[FileGenerator] Smart repair failed, using Claude API repair')
519+
520+
// 强制触发Claude修复,特别是对于包含中文引号的情况
347521
const repairResult = await repairJsonContent(jsonData, {
348-
templateName: 'daily-knowledge-card-template',
349-
description: 'Knowledge card JSON',
350-
requiredFields: ['theme', 'copy', 'cards'],
351-
timeout: 60000,
352-
retries: 1
522+
templateName: templateName, // 使用传入的模板名
523+
description: `${templateName} generated JSON`,
524+
requiredFields: getRequiredFieldsForTemplate(templateName), // 根据模板类型设置必需字段
525+
timeout: 90000, // 增加超时时间
526+
retries: 2, // 增加重试次数
527+
forceRepair: true, // 强制修复
528+
includeContext: true // 包含上下文信息
353529
})
354-
530+
355531
if (repairResult.success) {
356-
console.log('[FileGenerator] JSON repaired successfully')
532+
console.log('[FileGenerator] Claude API repair successful')
357533
jsonContent = repairResult.data
358-
534+
359535
// 保存修复后的JSON到文件
360536
const repairedJsonString = JSON.stringify(jsonContent, null, 2)
361537
await fs.writeFile(jsonFilePath, repairedJsonString, 'utf-8')
362-
console.log('[FileGenerator] Repaired JSON saved back to file')
538+
console.log('[FileGenerator] Claude-repaired JSON saved back to file')
539+
540+
// 记录修复统计
541+
console.log('[FileGenerator] Claude repair stats:', {
542+
originalLength: jsonData.length,
543+
fixedLength: repairedJsonString.length,
544+
attempts: repairResult.attempts,
545+
executionTime: repairResult.executionTime
546+
})
363547
} else {
364-
throw new Error(`JSON repair failed: ${repairResult.error}`)
548+
// Claude修复也失败了
549+
console.error('[FileGenerator] Claude API repair failed:', repairResult.error)
550+
551+
// 尝试最后的修复策略:基础清理
552+
console.log('[FileGenerator] Attempting basic cleanup as last resort')
553+
const basicCleaned = jsonData
554+
.replace(/[""]/g, '"') // 替换中文引号为英文引号
555+
.replace(/[\u0000-\u001F]/g, '') // 移除控制字符
556+
557+
try {
558+
jsonContent = JSON.parse(basicCleaned)
559+
console.log('[FileGenerator] Basic cleanup successful')
560+
await fs.writeFile(jsonFilePath, JSON.stringify(jsonContent, null, 2), 'utf-8')
561+
} catch (basicError) {
562+
throw new Error(`All repair methods failed. Original error: ${parseError.message}. Claude error: ${repairResult.error}. Basic cleanup error: ${basicError.message}`)
563+
}
365564
}
366565
}
367566

@@ -451,6 +650,31 @@ export function isDailyKnowledgeTemplate(templateName) {
451650
return templateName === 'daily-knowledge-card-template.md'
452651
}
453652

653+
/**
654+
* 获取模板的必需字段
655+
* @param {string} templateName - 模板名称
656+
* @returns {Array} 必需字段列表
657+
*/
658+
export function getRequiredFieldsForTemplate(templateName) {
659+
// Pod2Post模板的必需字段
660+
if (templateName === 'pod2post-template.md' || templateName === 'pod2post') {
661+
return [
662+
'social_content.post_title',
663+
'social_content.post_content',
664+
'social_content.highlights',
665+
'social_content.hashtags'
666+
]
667+
}
668+
669+
// Daily知识卡片模板的必需字段
670+
if (isDailyKnowledgeTemplate(templateName)) {
671+
return ['theme', 'copy', 'cards']
672+
}
673+
674+
// 其他模板不强制要求特定字段
675+
return []
676+
}
677+
454678
/**
455679
* 获取预期的文件列表
456680
* @param {string} templateName - 模板名称

0 commit comments

Comments
 (0)