Skip to content

Commit df66030

Browse files
authored
handle strings size is bigger than 16mb (kevoreilly#2635)
1 parent fd85db4 commit df66030

File tree

1 file changed

+31
-5
lines changed

1 file changed

+31
-5
lines changed

dev_utils/mongo_hooks.py

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
import itertools
22
import logging
3+
from contextlib import suppress
34

45
from pymongo import UpdateOne, errors
6+
from pymongo.errors import InvalidDocument, BulkWriteError
7+
import bson
58

69
from dev_utils.mongodb import (
710
mongo_bulk_write,
@@ -61,13 +64,12 @@ def normalize_file(file_dict, task_id):
6164
)
6265
new_dict = {}
6366
for fld in static_fields:
64-
try:
67+
with suppress(KeyError):
6568
new_dict[fld] = file_dict.pop(fld)
66-
except KeyError:
67-
pass
6869

6970
new_dict["_id"] = key
7071
file_dict[FILE_REF_KEY] = key
72+
7173
return UpdateOne({"_id": key}, {"$set": new_dict, "$addToSet": {TASK_IDS_KEY: task_id}}, upsert=True, hint=[("_id", 1)])
7274

7375

@@ -87,8 +89,32 @@ def normalize_files(report):
8789
try:
8890
if requests:
8991
mongo_bulk_write(FILES_COLL, requests, ordered=False)
90-
except errors.OperationFailure as exc:
91-
log.error("Mongo hook 'normalize_files' failed with code %d: %s", exc.code, exc)
92+
except (errors.OperationFailure, InvalidDocument, BulkWriteError) as exc:
93+
log.warning("Mongo hook 'normalize_files' failed: %s. Attempting to sanitize strings and retry.", exc)
94+
for req in requests:
95+
# req._doc is the update document: {"$set": new_dict, ...}
96+
# Accessing private attribute _doc to modify in place for retry
97+
try:
98+
if hasattr(req, "_doc") and "$set" in req._doc and "strings" in req._doc["$set"]:
99+
strings_val = req._doc["$set"]["strings"]
100+
# Check if strings field alone is too large (buffer safe 15MB)
101+
if strings_val and len(bson.encode({"strings": strings_val})) > 15 * 1024 * 1024:
102+
log.warning("Truncating oversized strings field for retry.")
103+
if isinstance(strings_val, list):
104+
req._doc["$set"]["strings"] = strings_val[:1000]
105+
else:
106+
req._doc["$set"]["strings"] = []
107+
# If still too large, clear it
108+
if len(bson.encode({"strings": req._doc["$set"]["strings"]})) > 15 * 1024 * 1024:
109+
req._doc["$set"]["strings"] = []
110+
except Exception as e:
111+
log.error("Failed to sanitize request during retry: %s", e)
112+
113+
# Retry the bulk write
114+
try:
115+
mongo_bulk_write(FILES_COLL, requests, ordered=False)
116+
except Exception as retry_exc:
117+
log.error("Retry of 'normalize_files' failed: %s", retry_exc)
92118

93119
return report
94120

0 commit comments

Comments
 (0)