-
Notifications
You must be signed in to change notification settings - Fork 725
refactor!: Rename storages related methods from get_ to open_
#1418
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
be33b0c
e80c55a
29f154e
0f90efc
ca092f8
7d87e3c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -563,21 +563,33 @@ async def _get_proxy_info(self, request: Request, session: Session | None) -> Pr | |
| async def get_request_manager(self) -> RequestManager: | ||
| """Return the configured request manager. If none is configured, open and return the default request queue.""" | ||
| if not self._request_manager: | ||
| self._request_manager = await RequestQueue.open( | ||
| storage_client=self._service_locator.get_storage_client(), | ||
| configuration=self._service_locator.get_configuration(), | ||
| ) | ||
|
|
||
| self._request_manager = await self.open_request_queue() | ||
| return self._request_manager | ||
|
|
||
| async def get_dataset( | ||
| async def open_request_queue( | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This has some funk to it - if the crawler uses a non-default request manager, this will still return the default request queue. If somebody does that, they will probably be surprised that adding requests to this queue does nothing 😁 Perhaps the method could throw if there is a non-default request manager in place? |
||
| self, | ||
| *, | ||
| id: str | None = None, | ||
| name: str | None = None, | ||
| alias: str | None = None, | ||
| ) -> RequestQueue: | ||
| """Return `RequestQueue` with the given ID or name or alias. If none is provided, return the default one.""" | ||
| return await RequestQueue.open( | ||
| id=id, | ||
| name=name, | ||
| alias=alias, | ||
| storage_client=self._service_locator.get_storage_client(), | ||
| configuration=self._service_locator.get_configuration(), | ||
| ) | ||
|
|
||
| async def open_dataset( | ||
| self, | ||
| *, | ||
| id: str | None = None, | ||
| name: str | None = None, | ||
| alias: str | None = None, | ||
| ) -> Dataset: | ||
| """Return the `Dataset` with the given ID or name. If none is provided, return the default one.""" | ||
| """Return `Dataset` with the given ID or name or alias. If none is provided, return the default one.""" | ||
| return await Dataset.open( | ||
| id=id, | ||
| name=name, | ||
|
|
@@ -586,14 +598,14 @@ async def get_dataset( | |
| configuration=self._service_locator.get_configuration(), | ||
| ) | ||
|
|
||
| async def get_key_value_store( | ||
| async def open_key_value_store( | ||
| self, | ||
| *, | ||
| id: str | None = None, | ||
| name: str | None = None, | ||
| alias: str | None = None, | ||
| ) -> KeyValueStore: | ||
| """Return the `KeyValueStore` with the given ID or name. If none is provided, return the default KVS.""" | ||
| """Return `KeyValueStore` with the given ID or name or alias. If none is provided, return the default KVS.""" | ||
| return await KeyValueStore.open( | ||
| id=id, | ||
| name=name, | ||
|
|
@@ -659,7 +671,7 @@ async def run( | |
| request_manager = await self.get_request_manager() | ||
| if purge_request_queue and isinstance(request_manager, RequestQueue): | ||
| await request_manager.drop() | ||
| self._request_manager = await RequestQueue.open() | ||
| self._request_manager = await self.open_request_queue() | ||
|
|
||
| if requests is not None: | ||
| await self.add_requests(requests) | ||
|
|
@@ -793,11 +805,11 @@ async def _use_state( | |
| self, | ||
| default_value: dict[str, JsonSerializable] | None = None, | ||
| ) -> dict[str, JsonSerializable]: | ||
| kvs = await self.get_key_value_store() | ||
| kvs = await self.open_key_value_store() | ||
| return await kvs.get_auto_saved_value(self._CRAWLEE_STATE_KEY, default_value) | ||
|
|
||
| async def _save_crawler_state(self) -> None: | ||
| store = await self.get_key_value_store() | ||
| store = await self.open_key_value_store() | ||
| await store.persist_autosaved_values() | ||
|
|
||
| async def get_data( | ||
|
|
@@ -887,7 +899,7 @@ async def _push_data( | |
| dataset_alias: The alias of the `Dataset` (run scope, unnamed storage). | ||
| kwargs: Keyword arguments to be passed to the `Dataset.push_data()` method. | ||
| """ | ||
| dataset = await self.get_dataset(id=dataset_id, name=dataset_name, alias=dataset_alias) | ||
| dataset = await self.open_dataset(id=dataset_id, name=dataset_name, alias=dataset_alias) | ||
| await dataset.push_data(data, **kwargs) | ||
|
|
||
| def _should_retry_request(self, context: BasicCrawlingContext, error: Exception) -> bool: | ||
|
|
@@ -1269,7 +1281,7 @@ async def _commit_request_handler_result(self, context: BasicCrawlingContext) -> | |
| for push_data_call in result.push_data_calls: | ||
| await self._push_data(**push_data_call) | ||
|
|
||
| await self._commit_key_value_store_changes(result, get_kvs=self.get_key_value_store) | ||
| await self._commit_key_value_store_changes(result, get_kvs=self.open_key_value_store) | ||
|
|
||
| @staticmethod | ||
| async def _commit_key_value_store_changes( | ||
|
|
@@ -1336,7 +1348,7 @@ async def __run_task_function(self) -> None: | |
| else: | ||
| session = await self._get_session() | ||
| proxy_info = await self._get_proxy_info(request, session) | ||
| result = RequestHandlerRunResult(key_value_store_getter=self.get_key_value_store) | ||
| result = RequestHandlerRunResult(key_value_store_getter=self.open_key_value_store) | ||
|
|
||
| context = BasicCrawlingContext( | ||
| request=request, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use words. This will be a page in the docs. It is not a changelog. Check out the other sections.