11import itertools
22import logging
3+ from contextlib import suppress
34
45from pymongo import UpdateOne , errors
6+ from pymongo .errors import InvalidDocument , BulkWriteError
7+ import bson
58
69from dev_utils .mongodb import (
710 mongo_bulk_write ,
@@ -61,13 +64,12 @@ def normalize_file(file_dict, task_id):
6164 )
6265 new_dict = {}
6366 for fld in static_fields :
64- try :
67+ with suppress ( KeyError ) :
6568 new_dict [fld ] = file_dict .pop (fld )
66- except KeyError :
67- pass
6869
6970 new_dict ["_id" ] = key
7071 file_dict [FILE_REF_KEY ] = key
72+
7173 return UpdateOne ({"_id" : key }, {"$set" : new_dict , "$addToSet" : {TASK_IDS_KEY : task_id }}, upsert = True , hint = [("_id" , 1 )])
7274
7375
@@ -87,8 +89,32 @@ def normalize_files(report):
8789 try :
8890 if requests :
8991 mongo_bulk_write (FILES_COLL , requests , ordered = False )
90- except errors .OperationFailure as exc :
91- log .error ("Mongo hook 'normalize_files' failed with code %d: %s" , exc .code , exc )
92+ except (errors .OperationFailure , InvalidDocument , BulkWriteError ) as exc :
93+ log .warning ("Mongo hook 'normalize_files' failed: %s. Attempting to sanitize strings and retry." , exc )
94+ for req in requests :
95+ # req._doc is the update document: {"$set": new_dict, ...}
96+ # Accessing private attribute _doc to modify in place for retry
97+ try :
98+ if hasattr (req , "_doc" ) and "$set" in req ._doc and "strings" in req ._doc ["$set" ]:
99+ strings_val = req ._doc ["$set" ]["strings" ]
100+ # Check if strings field alone is too large (buffer safe 15MB)
101+ if strings_val and len (bson .encode ({"strings" : strings_val })) > 15 * 1024 * 1024 :
102+ log .warning ("Truncating oversized strings field for retry." )
103+ if isinstance (strings_val , list ):
104+ req ._doc ["$set" ]["strings" ] = strings_val [:1000 ]
105+ else :
106+ req ._doc ["$set" ]["strings" ] = []
107+ # If still too large, clear it
108+ if len (bson .encode ({"strings" : req ._doc ["$set" ]["strings" ]})) > 15 * 1024 * 1024 :
109+ req ._doc ["$set" ]["strings" ] = []
110+ except Exception as e :
111+ log .error ("Failed to sanitize request during retry: %s" , e )
112+
113+ # Retry the bulk write
114+ try :
115+ mongo_bulk_write (FILES_COLL , requests , ordered = False )
116+ except Exception as retry_exc :
117+ log .error ("Retry of 'normalize_files' failed: %s" , retry_exc )
92118
93119 return report
94120
0 commit comments