@@ -10,32 +10,198 @@ import axios from 'axios'
1010import { SessionMetadata } from './sessionMetadata.js'
1111import { downloadAndSaveHtml } from './htmlProcessor.js'
1212import { repairJsonContent } from '../../../utils/jsonRepair.js'
13+ import { jsonrepair } from 'jsonrepair'
1314
1415/**
1516 * JSON字符串清理函数 - 修复常见的JSON格式问题
17+ * 修复策略:先处理特殊字符,再处理JSON结构
1618 */
1719function sanitizeJsonString ( jsonString ) {
18- // JSON清理:重点处理字符串值中破坏JSON结构的中文引号
20+ console . log ( '[FileGenerator] Starting JSON sanitization' )
1921 let result = jsonString
20-
21- // 在JSON字符串值中,将中文双引号替换为转义的英文双引号
22- // 匹配JSON字符串值: "key": "value with "quoted" content"
23- result = result . replace ( / " ( [ ^ " ] + ) " / g, ( match , content ) => {
24- // 如果内容包含中文引号,替换为转义的英文引号
25- const cleanContent = content
26- . replace ( / " / g, '\\"' ) // 中文左双引号
27- . replace ( / " / g, '\\"' ) // 中文右双引号
28- . replace ( / ' / g, "\\'" ) // 中文左单引号
29- . replace ( / ' / g, "\\'" ) // 中文右单引号
30- return `"${ cleanContent } "`
22+ let stats = {
23+ leftQuotes : 0 ,
24+ rightQuotes : 0 ,
25+ leftSingleQuotes : 0 ,
26+ rightSingleQuotes : 0
27+ }
28+
29+ // 1. 首先处理所有中文引号 - 在JSON解析前先转义
30+ // 这样可以避免正则表达式匹配错误
31+ result = result . replace ( / " / g, ( match ) => {
32+ stats . leftQuotes ++
33+ return '\\"' // 转义左双引号
3134 } )
32-
33- // 移除控制字符
35+
36+ result = result . replace ( / " / g, ( match ) => {
37+ stats . rightQuotes ++
38+ return '\\"' // 转义右双引号
39+ } )
40+
41+ result = result . replace ( / ' / g, ( match ) => {
42+ stats . leftSingleQuotes ++
43+ return "\\'" // 转义左单引号
44+ } )
45+
46+ result = result . replace ( / ' / g, ( match ) => {
47+ stats . rightSingleQuotes ++
48+ return "\\'" // 转义右单引号
49+ } )
50+
51+ console . log ( '[FileGenerator] Quote replacement stats:' , stats )
52+
53+ // 2. 处理其他常见的JSON问题
54+ // 修复尾随逗号(在对象和数组中)
55+ result = result . replace ( / , \s * } / g, '}' ) // 对象中的尾随逗号
56+ result = result . replace ( / , \s * ] / g, ']' ) // 数组中的尾随逗号
57+
58+ // 修复缺少逗号的情况(简单启发式)
59+ result = result . replace ( / " \s * \n \s * " / g, '",\n"' ) // 两个字符串值之间缺少逗号
60+
61+ // 3. 移除控制字符
3462 result = result . replace ( / [ \u0000 - \u0008 \u000B \u000C \u000E - \u001F \u007F - \u009F ] / g, '' )
35-
63+
64+ // 4. 修复未转义的换行符(在字符串中)
65+ result = result . replace ( / (?< ! \\ ) \n / g, '\\n' ) // 将未转义的换行符转义
66+ result = result . replace ( / (?< ! \\ ) \r / g, '\\r' ) // 将未转义的回车符转义
67+ result = result . replace ( / (?< ! \\ ) \t / g, '\\t' ) // 将未转义的制表符转义
68+
69+ console . log ( '[FileGenerator] JSON sanitization completed' )
3670 return result
3771}
3872
73+ /**
74+ * 检测JSON是否包含中文引号
75+ * @param {string } jsonString - JSON字符串
76+ * @returns {boolean } 是否包含中文引号
77+ */
78+ function hasChineseQuotes ( jsonString ) {
79+ return / [ " " ' ' ] / . test ( jsonString )
80+ }
81+
82+ /**
83+ * 修复Pod2Post模板JSON中的中文引号
84+ * 只替换字符串值中的引号,保护JSON结构
85+ * @param {string } jsonString - JSON字符串
86+ * @returns {string } 修复后的JSON
87+ */
88+ function fixPod2PostChineseQuotes ( jsonString ) {
89+ let result = jsonString
90+ let stats = { replaced : 0 }
91+
92+ // 1. 修复对象中的字符串值: "key": "value"
93+ result = result . replace ( / : \s * " ( [ ^ " ] * ) " / g, ( match , content ) => {
94+ let fixedContent = content
95+ . replace ( / " / g, '\\"' )
96+ . replace ( / " / g, '\\"' )
97+ . replace ( / ' / g, "\\'" )
98+ . replace ( / ' / g, "\\'" )
99+
100+ if ( fixedContent !== content ) {
101+ stats . replaced ++
102+ }
103+
104+ return `: "${ fixedContent } "`
105+ } )
106+
107+ // 2. 修复数组中的字符串值
108+ result = result . replace ( / " ( [ ^ " ] * ) " (? = \s * [ , | \] ] ) / g, ( match , content ) => {
109+ let fixedContent = content
110+ . replace ( / " / g, '\\"' )
111+ . replace ( / " / g, '\\"' )
112+ . replace ( / ' / g, "\\'" )
113+ . replace ( / ' / g, "\\'" )
114+
115+ if ( fixedContent !== content ) {
116+ stats . replaced ++
117+ }
118+
119+ return `"${ fixedContent } "`
120+ } )
121+
122+ console . log ( `[Pod2Post] Fixed ${ stats . replaced } Chinese quote occurrences` )
123+ return result
124+ }
125+
126+ /**
127+ * 智能JSON修复策略(使用jsonrepair库)
128+ * 结合jsonrepair库和自定义逻辑
129+ * @param {string } jsonString - 原始JSON字符串
130+ * @param {string } templateName - 模板名称
131+ * @returns {Object } 修复结果
132+ */
133+ function smartJsonRepair ( jsonString , templateName ) {
134+ console . log ( '[FileGenerator] Starting smart JSON repair' )
135+
136+ // 检测是否包含中文引号
137+ const containsChineseQuotes = hasChineseQuotes ( jsonString )
138+ console . log ( '[FileGenerator] Contains Chinese quotes:' , containsChineseQuotes )
139+
140+ // 1. 首先尝试使用 jsonrepair 库(能处理大部分问题)
141+ try {
142+ console . log ( '[FileGenerator] Trying jsonrepair library first' )
143+ const repaired = jsonrepair ( jsonString )
144+ const parsed = JSON . parse ( repaired )
145+ console . log ( '[FileGenerator] ✅ jsonrepair success' )
146+ return {
147+ success : true ,
148+ data : parsed ,
149+ cleanedContent : repaired ,
150+ method : 'jsonrepair-library'
151+ }
152+ } catch ( error ) {
153+ console . log ( '[FileGenerator] jsonrepair failed:' , error . message )
154+
155+ // 2. 如果包含中文引号且jsonrepair失败,使用自定义修复
156+ if ( containsChineseQuotes ) {
157+ // Pod2Post模板使用专门的修复函数
158+ if ( templateName === 'pod2post-template.md' || templateName === 'pod2post' ) {
159+ console . log ( '[FileGenerator] Using Pod2Post specific quote fixer' )
160+ const sanitized = fixPod2PostChineseQuotes ( jsonString )
161+
162+ try {
163+ const parsed = JSON . parse ( sanitized )
164+ console . log ( '[FileGenerator] Pod2Post JSON successfully parsed' )
165+ return {
166+ success : true ,
167+ data : parsed ,
168+ cleanedContent : sanitized ,
169+ method : 'pod2post-quote-fixer'
170+ }
171+ } catch ( pod2postError ) {
172+ console . log ( '[FileGenerator] Pod2Post quote fixer failed:' , pod2postError . message )
173+ }
174+ }
175+
176+ // 通用sanitize函数
177+ console . log ( '[FileGenerator] Using general sanitizer for Chinese quotes' )
178+ const sanitized = sanitizeJsonString ( jsonString )
179+
180+ try {
181+ const parsed = JSON . parse ( sanitized )
182+ console . log ( '[FileGenerator] JSON successfully parsed with general sanitizer' )
183+ return {
184+ success : true ,
185+ data : parsed ,
186+ cleanedContent : sanitized ,
187+ method : 'general-sanitizer'
188+ }
189+ } catch ( sanitizeError ) {
190+ console . log ( '[FileGenerator] General sanitizer failed:' , sanitizeError . message )
191+ }
192+ }
193+
194+ // 3. 所有方法都失败,需要Claude API
195+ console . log ( '[FileGenerator] All methods failed, will use Claude API' )
196+ return {
197+ success : false ,
198+ error : error . message ,
199+ needsClaude : true ,
200+ method : 'all-methods-failed'
201+ }
202+ }
203+ }
204+
39205/**
40206 * 外部API配置
41207 */
@@ -332,36 +498,69 @@ export async function generateFourFiles(params) {
332498 let jsonContent
333499 try {
334500 const jsonData = await fs . readFile ( jsonFilePath , 'utf-8' )
335-
336- // 先尝试使用内置的清理函数
337- const sanitizedJsonData = sanitizeJsonString ( jsonData )
338-
339- try {
340- // 尝试直接解析
341- jsonContent = JSON . parse ( sanitizedJsonData )
342- console . log ( '[FileGenerator] JSON parsed successfully with built-in sanitizer' )
343- } catch ( parseError ) {
344- // 解析失败,使用jsonRepair模块修复
345- console . log ( '[FileGenerator] JSON parse failed, attempting repair:' , parseError . message )
346-
501+ console . log ( '[FileGenerator] Original JSON length:' , jsonData . length )
502+
503+ // 使用智能修复策略
504+ const smartRepairResult = smartJsonRepair ( jsonData , templateName )
505+
506+ if ( smartRepairResult . success ) {
507+ // 智能修复成功
508+ jsonContent = smartRepairResult . data
509+ console . log ( '[FileGenerator] JSON successfully repaired with method:' , smartRepairResult . method )
510+
511+ // 如果进行了修复,保存回文件
512+ if ( smartRepairResult . method !== 'original-sanitizer' && smartRepairResult . cleanedContent ) {
513+ await fs . writeFile ( jsonFilePath , smartRepairResult . cleanedContent , 'utf-8' )
514+ console . log ( '[FileGenerator] Repaired JSON saved back to file' )
515+ }
516+ } else {
517+ // 智能修复失败,需要使用Claude API
518+ console . log ( '[FileGenerator] Smart repair failed, using Claude API repair' )
519+
520+ // 强制触发Claude修复,特别是对于包含中文引号的情况
347521 const repairResult = await repairJsonContent ( jsonData , {
348- templateName : 'daily-knowledge-card-template' ,
349- description : 'Knowledge card JSON' ,
350- requiredFields : [ 'theme' , 'copy' , 'cards' ] ,
351- timeout : 60000 ,
352- retries : 1
522+ templateName : templateName , // 使用传入的模板名
523+ description : `${ templateName } generated JSON` ,
524+ requiredFields : getRequiredFieldsForTemplate ( templateName ) , // 根据模板类型设置必需字段
525+ timeout : 90000 , // 增加超时时间
526+ retries : 2 , // 增加重试次数
527+ forceRepair : true , // 强制修复
528+ includeContext : true // 包含上下文信息
353529 } )
354-
530+
355531 if ( repairResult . success ) {
356- console . log ( '[FileGenerator] JSON repaired successfully ' )
532+ console . log ( '[FileGenerator] Claude API repair successful ' )
357533 jsonContent = repairResult . data
358-
534+
359535 // 保存修复后的JSON到文件
360536 const repairedJsonString = JSON . stringify ( jsonContent , null , 2 )
361537 await fs . writeFile ( jsonFilePath , repairedJsonString , 'utf-8' )
362- console . log ( '[FileGenerator] Repaired JSON saved back to file' )
538+ console . log ( '[FileGenerator] Claude-repaired JSON saved back to file' )
539+
540+ // 记录修复统计
541+ console . log ( '[FileGenerator] Claude repair stats:' , {
542+ originalLength : jsonData . length ,
543+ fixedLength : repairedJsonString . length ,
544+ attempts : repairResult . attempts ,
545+ executionTime : repairResult . executionTime
546+ } )
363547 } else {
364- throw new Error ( `JSON repair failed: ${ repairResult . error } ` )
548+ // Claude修复也失败了
549+ console . error ( '[FileGenerator] Claude API repair failed:' , repairResult . error )
550+
551+ // 尝试最后的修复策略:基础清理
552+ console . log ( '[FileGenerator] Attempting basic cleanup as last resort' )
553+ const basicCleaned = jsonData
554+ . replace ( / [ " " ] / g, '"' ) // 替换中文引号为英文引号
555+ . replace ( / [ \u0000 - \u001F ] / g, '' ) // 移除控制字符
556+
557+ try {
558+ jsonContent = JSON . parse ( basicCleaned )
559+ console . log ( '[FileGenerator] Basic cleanup successful' )
560+ await fs . writeFile ( jsonFilePath , JSON . stringify ( jsonContent , null , 2 ) , 'utf-8' )
561+ } catch ( basicError ) {
562+ throw new Error ( `All repair methods failed. Original error: ${ parseError . message } . Claude error: ${ repairResult . error } . Basic cleanup error: ${ basicError . message } ` )
563+ }
365564 }
366565 }
367566
@@ -451,6 +650,31 @@ export function isDailyKnowledgeTemplate(templateName) {
451650 return templateName === 'daily-knowledge-card-template.md'
452651}
453652
653+ /**
654+ * 获取模板的必需字段
655+ * @param {string } templateName - 模板名称
656+ * @returns {Array } 必需字段列表
657+ */
658+ export function getRequiredFieldsForTemplate ( templateName ) {
659+ // Pod2Post模板的必需字段
660+ if ( templateName === 'pod2post-template.md' || templateName === 'pod2post' ) {
661+ return [
662+ 'social_content.post_title' ,
663+ 'social_content.post_content' ,
664+ 'social_content.highlights' ,
665+ 'social_content.hashtags'
666+ ]
667+ }
668+
669+ // Daily知识卡片模板的必需字段
670+ if ( isDailyKnowledgeTemplate ( templateName ) ) {
671+ return [ 'theme' , 'copy' , 'cards' ]
672+ }
673+
674+ // 其他模板不强制要求特定字段
675+ return [ ]
676+ }
677+
454678/**
455679 * 获取预期的文件列表
456680 * @param {string } templateName - 模板名称
0 commit comments