33import asyncio
44import contextlib
55import os
6+ from logging import getLogger
67from pathlib import Path
78
89import aioshutil
1920from crawlee .memory_storage_client .request_queue_client import RequestQueueClient
2021from crawlee .memory_storage_client .request_queue_collection_client import RequestQueueCollectionClient
2122
23+ logger = getLogger (__name__ )
24+
2225
2326class MemoryStorageClient (BaseStorageClient ):
2427 """Represents an in-memory storage client for managing datasets, key-value stores, and request queues.
@@ -47,6 +50,7 @@ def __init__(self, configuration: Configuration | None = None) -> None:
4750 self .datasets_handled : list [DatasetClient ] = []
4851 self .key_value_stores_handled : list [KeyValueStoreClient ] = []
4952 self .request_queues_handled : list [RequestQueueClient ] = []
53+
5054 self ._purged_on_start = False # Indicates whether a purge was already performed on this instance.
5155 self ._purge_lock = asyncio .Lock ()
5256
@@ -134,6 +138,7 @@ def request_queues(self) -> RequestQueueCollectionClient:
134138 async def purge_on_start (self ) -> None :
135139 # Optimistic, non-blocking check
136140 if self ._purged_on_start is True :
141+ logger .debug ('Storage was already purged on start.' )
137142 return
138143
139144 async with self ._purge_lock :
@@ -142,10 +147,10 @@ async def purge_on_start(self) -> None:
142147 # Mypy doesn't understand that the _purged_on_start can change while we're getting the async lock
143148 return # type: ignore[unreachable]
144149
145- await self ._purge_inner ()
150+ await self ._purge_default_storages ()
146151 self ._purged_on_start = True
147152
148- async def _purge_inner (self ) -> None :
153+ async def _purge_default_storages (self ) -> None :
149154 """Cleans up the storage directories, preparing the environment for a new run.
150155
151156 It aims to remove residues from previous executions to avoid data contamination between runs.
@@ -163,21 +168,23 @@ async def _purge_inner(self) -> None:
163168 self ._TEMPORARY_DIR_NAME
164169 ) or key_value_store_folder .name .startswith ('__OLD' ):
165170 await self ._batch_remove_files (key_value_store_folder .path )
166- elif key_value_store_folder .name == 'default' :
171+ elif key_value_store_folder .name == self . default_storage_id :
167172 await self ._handle_default_key_value_store (key_value_store_folder .path )
168173
169174 # Datasets
170175 if await ospath .exists (self .datasets_directory ):
171176 dataset_folders = await scandir (self .datasets_directory )
172177 for dataset_folder in dataset_folders :
173- if dataset_folder .name == 'default' or dataset_folder .name .startswith (self ._TEMPORARY_DIR_NAME ):
178+ if dataset_folder .name == self .default_storage_id or dataset_folder .name .startswith (
179+ self ._TEMPORARY_DIR_NAME
180+ ):
174181 await self ._batch_remove_files (dataset_folder .path )
175182
176183 # Request queues
177184 if await ospath .exists (self .request_queues_directory ):
178185 request_queue_folders = await scandir (self .request_queues_directory )
179186 for request_queue_folder in request_queue_folders :
180- if request_queue_folder .name == 'default' or request_queue_folder .name .startswith (
187+ if request_queue_folder .name == self . default_storage_id or request_queue_folder .name .startswith (
181188 self ._TEMPORARY_DIR_NAME
182189 ):
183190 await self ._batch_remove_files (request_queue_folder .path )
0 commit comments