Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions src/crawlee/storages/_storage_instance_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,18 +158,22 @@ async def open_storage_instance(
instance = cls(client, metadata.id, metadata.name) # type: ignore[call-arg]
instance_name = getattr(instance, 'name', None)

# Cache the instance. Always cache by id and cache named or unnamed (alias).
# Cache the instance.
# Always cache by id.
self._cache_by_storage_client[storage_client_type].by_id[cls][instance.id][additional_cache_key] = instance

# Cache named storage.
if instance_name is not None:
self._cache_by_storage_client[storage_client_type].by_name[cls][instance_name][additional_cache_key] = (
instance
)
elif alias is not None:

# Cache unnamed storage.
if alias is not None:
Comment thread
Pijukatel marked this conversation as resolved.
self._cache_by_storage_client[storage_client_type].by_alias[cls][alias][additional_cache_key] = instance
else:
raise RuntimeError('Storage instance must have either a name or an alias.')

return instance

finally:
# Make sure the client opener is closed.
# If it was awaited, then closing is no operation, if it was not awaited, this is the cleanup.
Expand Down
13 changes: 13 additions & 0 deletions tests/unit/storages/test_storage_instance_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,3 +128,16 @@ async def test_identical_storage_remove_from_cache(storage_type: type[Storage])
service_locator.storage_instance_manager.remove_from_cache(storage_1)
storage_2 = await storage_type.open()
assert storage_1 is not storage_2


async def test_preexisting_unnamed_storage_open_by_id(storage_type: type[Storage]) -> None:
"""Test that persisted pre-existing unnamed storage can be opened by ID."""
storage_client = FileSystemStorageClient()
storage_1 = await storage_type.open(alias='custom_name', storage_client=storage_client)

# Make service_locator unaware of this storage
service_locator.storage_instance_manager.clear_cache()

storage_1_again = await storage_type.open(id=storage_1.id, storage_client=storage_client)

assert storage_1.id == storage_1_again.id
Loading