Skip to content

Commit 786d018

Browse files
committed
fix(sharepoint): use to_json() for permissions and pin collection across accesses
Address review feedback: - Use perm.to_json() instead of perm.properties to get plain dicts - Pin PermissionCollection into drive_item.properties so batch results survive across property getter accesses - Extract _pin_permissions helper to deduplicate the pin pattern - Add pin assertion to fallback test
1 parent 968e0c4 commit 786d018

2 files changed

Lines changed: 17 additions & 5 deletions

File tree

test/unit/connectors/test_sharepoint.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ def test_fetch_file_handles_site_not_found_immediately(
140140

141141
def _make_permission(raw_props: dict) -> Mock:
142142
perm = Mock()
143-
perm.properties = raw_props
143+
perm.to_json.return_value = raw_props
144144
return perm
145145

146146

@@ -428,7 +428,7 @@ class TestDriveItemToFileDataSync:
428428
def test_permissions_attached_when_pre_hydrated(self):
429429
indexer = _make_indexer()
430430
perm = Mock()
431-
perm.properties = {
431+
perm.to_json.return_value = {
432432
"roles": ["read"],
433433
"grantedToV2": {"user": {"id": "user-1"}},
434434
}
@@ -473,6 +473,7 @@ def test_falls_back_to_per_item_on_batch_failure(self):
473473
assert client._queries == []
474474
for item in items:
475475
assert item.permissions.get.return_value.execute_query.called
476+
assert item.properties["permissions"] is item.permissions
476477

477478
def test_fallback_works_without_internal_query_list(self):
478479
indexer = _make_indexer()

unstructured_ingest/processes/connectors/sharepoint.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ def extract_permissions(
185185
}
186186

187187
for perm in permissions:
188-
raw_props = perm.properties
188+
raw_props = perm.to_json()
189189
roles = raw_props.get("roles", [])
190190

191191
operations: set[str] = set()
@@ -212,6 +212,17 @@ def extract_permissions(
212212
logger.debug(f"normalized permissions generated: {result}")
213213
return [{k: v} for k, v in result.items()]
214214

215+
@staticmethod
216+
def _pin_permissions(drive_item: DriveItem) -> Any:
217+
"""Pin the permissions collection into properties so it survives across accesses.
218+
219+
The property getter creates a fresh empty PermissionCollection each time
220+
when the key is absent. Pinning ensures batch/query results are retained.
221+
"""
222+
perms = drive_item.permissions
223+
drive_item.properties["permissions"] = perms
224+
return perms
225+
215226
def _fetch_permissions_batched(
216227
self,
217228
client: Any,
@@ -226,7 +237,7 @@ def _fetch_permissions_batched(
226237
from office365.runtime.client_request_exception import ClientRequestException
227238

228239
for drive_item in drive_items:
229-
drive_item.permissions.get()
240+
self._pin_permissions(drive_item).get()
230241

231242
try:
232243
client.execute_batch(items_per_batch=PERMISSIONS_BATCH_SIZE)
@@ -241,7 +252,7 @@ def _fetch_permissions_batched(
241252
pending_queries.clear()
242253
for drive_item in drive_items:
243254
try:
244-
drive_item.permissions.get().execute_query()
255+
self._pin_permissions(drive_item).get().execute_query()
245256
except ClientRequestException as exc:
246257
logger.error(
247258
f"failed to fetch permissions for {drive_item.name}: {exc}",

0 commit comments

Comments
 (0)