Skip to content

Commit b12e27e

Browse files
committed
Fix type issues, prepare for tests. Merge first
1 parent 19113e7 commit b12e27e

File tree

4 files changed

+49
-30
lines changed

4 files changed

+49
-30
lines changed

src/apify/_actor.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -207,8 +207,7 @@ async def __aenter__(self) -> Self:
207207
await self.open_key_value_store()
208208

209209
# Load non-default aliased storages from configuration
210-
self.log.warning('\n'.join(f'{k}={v}' for k, v in os.environ.items()))
211-
await AliasResolver.register_aliases(configuration=self._configuration)
210+
#await AliasResolver.register_aliases(configuration=self.configuration)
212211
return self
213212

214213
async def __aexit__(

src/apify/_configuration.py

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
from __future__ import annotations
22

3+
import dataclasses
34
import json
45
from datetime import datetime, timedelta
56
from decimal import Decimal
67
from logging import getLogger
78
from pathlib import Path
8-
from typing import Annotated, Any, Required, TypedDict
9+
from typing import Annotated, Any
910

10-
from pydantic import AliasChoices, BaseModel, BeforeValidator, Field, field_validator, model_validator
11+
from pydantic import AliasChoices, BeforeValidator, Field, model_validator
1112
from typing_extensions import Self, deprecated
1213

1314
from crawlee import service_locator
@@ -34,11 +35,26 @@ def _transform_to_list(value: Any) -> list[str] | None:
3435
return value if isinstance(value, list) else str(value).split(',')
3536

3637

37-
class ActorStorageIds(TypedDict):
38+
@dataclasses.dataclass
39+
class ActorStorages:
3840
"""Storage IDs for different storage types used by an Actor."""
39-
keyValueStores: dict[str, str]
41+
42+
key_value_stores: dict[str, str]
4043
datasets: dict[str, str]
41-
requestQueues: dict[str, str]
44+
request_queues: dict[str, str]
45+
46+
47+
def _load_storage_keys(data: None | str | dict) -> ActorStorages | None:
48+
"""Load storage keys from environment."""
49+
if data is None:
50+
return None
51+
52+
storage_mapping = data if isinstance(data, dict) else json.loads(data)
53+
return ActorStorages(
54+
key_value_stores=storage_mapping.get('keyValueStores', {}),
55+
datasets=storage_mapping.get('datasets', {}),
56+
request_queues=storage_mapping.get('requestQueues', {}),
57+
)
4258

4359

4460
@docs_group('Configuration')
@@ -454,12 +470,12 @@ class Configuration(CrawleeConfiguration):
454470
] = None
455471

456472
actor_storages: Annotated[
457-
ActorStorageIds | None,
473+
ActorStorages | None,
458474
Field(
459475
alias='actor_storages_json',
460476
description='Storage IDs for the actor',
461477
),
462-
BeforeValidator(lambda data: json.loads(data) if isinstance(data, str) else data or None),
478+
BeforeValidator(_load_storage_keys),
463479
] = None
464480

465481
@model_validator(mode='after')

src/apify/storage_clients/_apify/_alias_resolving.py

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -263,38 +263,40 @@ async def _get_default_kvs_client(configuration: Configuration) -> KeyValueStore
263263

264264
return apify_client_async.key_value_store(key_value_store_id=configuration.default_key_value_store_id)
265265

266-
267266
@classmethod
268267
async def register_aliases(cls, configuration: Configuration) -> None:
269-
"""Load alias mapping from dictionary to the default kvs."""
270-
def convert_name(name: str):
271-
"""Convert from mapping name to storage type name used in the alias mapping."""
272-
return {"datasets": "Dataset", "keyValueStores": "KeyValueStore", "requestQueues": "RequestQueue"}[name]
273-
268+
"""Load any alias mapping from configuration to the default kvs."""
269+
if configuration.actor_storages is None:
270+
return
274271
configuration_mapping = {}
275272

276-
if configuration.default_dataset_id != configuration.actor_storages["datasets"].get("default", configuration.default_dataset_id):
277-
raise RuntimeError(
278-
f"Conflicting default dataset ids: {configuration.default_dataset_id=},"
279-
f" {configuration.actor_storages['datasets'].get('default')=}")
273+
if configuration.default_dataset_id != configuration.actor_storages.datasets.get(
274+
'default'):
275+
logger.warning(
276+
f'Conflicting default dataset ids: {configuration.default_dataset_id=},'
277+
f" {configuration.actor_storages.datasets.get('default')=}"
278+
)
280279

281-
for config_storage_type, mapping in configuration.actor_storages.items():
280+
for mapping, storage_type in (
281+
(configuration.actor_storages.key_value_stores, 'KeyValueStore'),
282+
(configuration.actor_storages.datasets, 'Dataset'),
283+
(configuration.actor_storages.request_queues, 'RequestQueue'),
284+
):
282285
for storage_alias, storage_id in mapping.items():
283-
if storage_alias == "default":
284-
# This is how the default storage is stored in the default kvs
285-
storage_alias="__default__"
286-
287-
configuration_mapping[AliasResolver(
288-
storage_type=convert_name(config_storage_type),
289-
alias=storage_alias,
290-
configuration=configuration,
291-
)._storage_key] = storage_id
286+
configuration_mapping[
287+
cls( # noqa: SLF001# It is ok in own classmethod.
288+
storage_type=storage_type,
289+
alias='__default__' if storage_alias == 'default' else storage_alias,
290+
configuration=configuration,
291+
)._storage_key
292+
] = storage_id
292293

293294
# Aliased storage can be also default storage!!!
294295
# Should we store such second alias to the default storage or ignore it in such case? Probably
295296

296297
# What if existing default dataset already has conflicting keys?
297298
# Just override it, that will teach it to have conflicting values!
298299
client = await cls._get_default_kvs_client(configuration=configuration)
299-
existing_mapping = (await client.get_record(cls._ALIAS_MAPPING_KEY) or {"value":{}}).get("value")
300+
existing_mapping = ((await client.get_record(cls._ALIAS_MAPPING_KEY)) or {'value': {}}).get('value',
301+
{})
300302
await client.set_record(cls._ALIAS_MAPPING_KEY, {**existing_mapping, **configuration_mapping})

tests/unit/actor/test_configuration.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
from pathlib import Path
23

34
import pytest
@@ -7,6 +8,7 @@
78
from crawlee.configuration import Configuration as CrawleeConfiguration
89
from crawlee.crawlers import BasicCrawler
910
from crawlee.errors import ServiceConflictError
11+
from crawlee.storage_clients import MemoryStorageClient
1012

1113
from apify import Actor
1214
from apify import Configuration as ApifyConfiguration

0 commit comments

Comments
 (0)