@@ -200,6 +200,84 @@ def normalize_quotes(s: str) -> str:
200200 # 常见的几种智能引号 U+201C U+201D U+2018 U+2019
201201 return s .replace ("“" , '"' ).replace ("”" , '"' ).replace ("‘" , "'" ).replace ("’" , "'" )
202202
203+ def fallback_repair_json (input_str : str ) -> str :
204+ """
205+ A last-resort JSON repair function.
206+ It tries to reconstruct a valid JSON object with the target structure:
207+ {"reasoning": "...", "score": [float, float]}
208+ even if the original input string is heavily corrupted.
209+
210+ Args:
211+ input_str (str): Possibly malformed JSON string.
212+
213+ Returns:
214+ str: A repaired and valid JSON string.
215+ """
216+
217+ # 1. Try to extract the reasoning text between "reasoning" and "score"
218+ reasoning_match = re .search (
219+ r'"?reasoning"?\s*[::]\s*["\']?(.*?)["\']?\s*,\s*"?score"?' ,
220+ input_str ,
221+ re .DOTALL | re .IGNORECASE ,
222+ )
223+
224+ if reasoning_match :
225+ reasoning_text = reasoning_match .group (1 ).strip ()
226+ else :
227+ # If not found, fallback to an empty string
228+ reasoning_text = ""
229+
230+ # 2. Clean and normalize the reasoning content
231+ reasoning_text = reasoning_text .replace ('\\ "' , '"' ) # Unescape existing escapes
232+ reasoning_text = re .sub (r'["“”]' , '"' , reasoning_text ) # Normalize quotes
233+ reasoning_text = reasoning_text .strip ()
234+ # Escape any remaining unescaped double quotes to avoid breaking JSON
235+ reasoning_text = reasoning_text .replace ('"' , '\\ "' )
236+
237+ # 3. Try to extract the score list (two floats)
238+ score_match = re .search (
239+ r'"?score"?\s*[::]\s*\[?([^\]]+)\]?' , input_str , re .DOTALL | re .IGNORECASE
240+ )
241+ scores = []
242+ if score_match :
243+ # Extract numeric values using regex
244+ nums = re .findall (r"-?\d+(?:\.\d+)?" , score_match .group (1 ))
245+ try :
246+ scores = [float (n ) for n in nums [:2 ]]
247+ except ValueError :
248+ pass
249+
250+ # Fill missing values with default zeros
251+ if len (scores ) < 2 :
252+ scores += [0.0 ] * (2 - len (scores ))
253+
254+ # 4. Construct the repaired JSON object
255+ repaired_obj = {"reasoning" : reasoning_text , "score" : scores }
256+
257+ # 5. Return a valid JSON string
258+ return json .dumps (repaired_obj , ensure_ascii = False )
259+
260+ def robust_json_fix (s : str ):
261+ try :
262+ return json .loads (s )
263+ except Exception :
264+ pass
265+
266+ for fixer in [fix_json , repair_reasoning_field_robust ]:
267+ s = fixer (s )
268+ try :
269+ return json .loads (s )
270+ except Exception :
271+ print (f"Error: Cannot fix { fixer .__name__ } { s = } " )
272+ continue
273+
274+ try :
275+ repaired_str = fallback_repair_json (s )
276+ return json .loads (repaired_str )
277+ except Exception as e :
278+ print (f"Error: Cannot fix fallback_repair_json { s = } { e = } " )
279+ return False
280+
203281#+=========================================================================================
204282def mllm_output_to_dict (input_string , give_up_parsing = False , text_prompt = None , score_range : int = 10 ):
205283 """
@@ -271,17 +349,20 @@ def mllm_output_to_dict(input_string, give_up_parsing=False, text_prompt=None, s
271349 new_data ['score' ] = [new_data ['score' ]]
272350 except Exception as e1 :
273351 print (f"Now fixing: { e1 = } { json_str = } " )
274- try :
275- new_data = json .loads (fix_json (json_str ))
276- return new_data
277- except Exception as e2 :
278- try :
279- print (f"Now fixing: { e2 = } { fix_json (json_str )= } " )
280- new_data = json .loads (repair_reasoning_field_robust (fix_json (json_str )))
281- return new_data
282- except Exception as e3 :
283- print (f"Error: Cannot fix { e3 = } { repair_reasoning_field_robust (fix_json (json_str ))= } " )
284- return False
352+
353+ new_data = robust_json_fix (json_str )
354+
355+ # try:
356+ # new_data = json.loads(fix_json(json_str))
357+ # return new_data
358+ # except Exception as e2:
359+ # try:
360+ # print(f"Now fixing: {e2=} {fix_json(json_str)=}")
361+ # new_data = json.loads(repair_reasoning_field_robust(fix_json(json_str)))
362+ # return new_data
363+ # except Exception as e3:
364+ # print(f"Error: Cannot fix {e3=} {repair_reasoning_field_robust(fix_json(json_str))=}")
365+ # return False
285366 return new_data
286367 else :
287368 print ("The required delimiters were not found correctly in the string." )
0 commit comments