Skip to content

Commit 53d31e5

Browse files
committed
Merge branch 'master' into fix-global-event-manager
2 parents ff4c976 + 69cc855 commit 53d31e5

12 files changed

Lines changed: 733 additions & 697 deletions

File tree

CHANGELOG.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,15 @@
22

33
All notable changes to this project will be documented in this file.
44

5+
<!-- git-cliff-unreleased-start -->
6+
## 1.6.1 - **not yet released**
7+
8+
### 🐛 Bug Fixes
9+
10+
- Handle invalid URLs in `RequestList` ([#1803](https://github.com/apify/crawlee-python/pull/1803)) ([0b2e3fc](https://github.com/apify/crawlee-python/commit/0b2e3fc5cbca371131b54085e052a6cda6361b0f)) by [@Mantisus](https://github.com/Mantisus), closes [#1802](https://github.com/apify/crawlee-python/issues/1802)
11+
12+
13+
<!-- git-cliff-unreleased-end -->
514
## [1.6.0](https://github.com/apify/crawlee-python/releases/tag/v1.6.0) (2026-03-20)
615

716
### 🚀 Features

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "crawlee"
7-
version = "1.6.0"
7+
version = "1.6.1"
88
description = "Crawlee for Python"
99
authors = [{ name = "Apify Technologies s.r.o.", email = "support@apify.com" }]
1010
license = { file = "LICENSE" }

src/crawlee/otel/crawler_instrumentor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,10 @@ def _init_wrapper(wrapped: Any, _: Any, args: Any, kwargs: Any) -> None:
6969

7070
async def middleware_wrapper(wrapped: Any, instance: _Middleware, args: Any, kwargs: Any) -> Any:
7171
with self._tracer.start_as_current_span(
72-
name=f'{instance.generator.__name__}, {wrapped.__name__}', # type:ignore[attr-defined] # valid in our context
72+
name=f'{instance.generator.__name__}, {wrapped.__name__}', # ty:ignore[unresolved-attribute] # valid in our context
7373
attributes={
7474
URL_FULL: instance.input_context.request.url,
75-
CODE_FUNCTION_NAME: instance.generator.__qualname__, # type:ignore[attr-defined] # valid in our context
75+
CODE_FUNCTION_NAME: instance.generator.__qualname__, # ty:ignore[unresolved-attribute] # valid in our context
7676
},
7777
):
7878
return await wrapped(*args, **kwargs)

src/crawlee/storage_clients/_base/_dataset_client.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,6 @@ async def iterate_items(
8888
The backend method for the `Dataset.iterate_items` call.
8989
"""
9090
# This syntax is to make type checker properly work with abstract AsyncIterator.
91-
# https://mypy.readthedocs.io/en/stable/more_types.html#asynchronous-iterators
9291
raise NotImplementedError
9392
if False:
94-
yield 0
93+
yield {}

src/crawlee/storage_clients/_base/_key_value_store_client.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@
33
from abc import ABC, abstractmethod
44
from typing import TYPE_CHECKING, Any
55

6+
from crawlee.storage_clients.models import KeyValueStoreRecordMetadata
7+
68
if TYPE_CHECKING:
79
from collections.abc import AsyncIterator
810

9-
from crawlee.storage_clients.models import KeyValueStoreMetadata, KeyValueStoreRecord, KeyValueStoreRecordMetadata
11+
from crawlee.storage_clients.models import KeyValueStoreMetadata, KeyValueStoreRecord
1012

1113

1214
class KeyValueStoreClient(ABC):
@@ -73,10 +75,9 @@ async def iterate_keys(
7375
The backend method for the `KeyValueStore.iterate_keys` call.
7476
"""
7577
# This syntax is to make type checker properly work with abstract AsyncIterator.
76-
# https://mypy.readthedocs.io/en/stable/more_types.html#asynchronous-iterators
7778
raise NotImplementedError
7879
if False:
79-
yield 0
80+
yield KeyValueStoreRecordMetadata()
8081

8182
@abstractmethod
8283
async def get_public_url(self, *, key: str) -> str:

src/crawlee/storages/_storage_instance_manager.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ async def open_storage_instance(
167167

168168
metadata = await client.get_metadata()
169169

170-
instance = cls(client, metadata.id, metadata.name) # type: ignore[call-arg]
170+
instance = cls(client, metadata.id, metadata.name) # ty: ignore[too-many-positional-arguments]
171171
instance_name = getattr(instance, 'name', None)
172172

173173
# Cache the instance.

tests/unit/conftest.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,8 @@ def _prepare_test_env() -> None:
7474

7575
# Reset global class variables to ensure test isolation.
7676
KeyValueStore._autosaved_values = {}
77-
Statistics._Statistics__next_id = 0 # type:ignore[attr-defined] # Mangled attribute
78-
BasicCrawler._BasicCrawler__next_id = 0 # type:ignore[attr-defined] # Mangled attribute
77+
Statistics._Statistics__next_id = 0 # ty:ignore[unresolved-attribute] # Mangled attribute
78+
BasicCrawler._BasicCrawler__next_id = 0 # ty:ignore[unresolved-attribute] # Mangled attribute
7979

8080
return _prepare_test_env
8181

tests/unit/crawlers/_basic/test_basic_crawler.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1167,7 +1167,7 @@ async def test_crawler_multiple_stops_in_parallel() -> None:
11671167
# Set concurrency to 2 to ensure two urls are being visited in parallel.
11681168
crawler = BasicCrawler(concurrency_settings=ConcurrencySettings(desired_concurrency=2, max_concurrency=2))
11691169

1170-
both_handlers_started = asyncio.Barrier(2) # type:ignore[attr-defined] # Test is skipped in older Python versions.
1170+
both_handlers_started = asyncio.Barrier(2) # ty:ignore[unresolved-attribute] # Test is skipped in older Python versions.
11711171
only_one_handler_at_a_time = asyncio.Semaphore(1)
11721172

11731173
@crawler.router.default_handler
@@ -1351,7 +1351,7 @@ async def test_context_use_state_race_condition_in_handlers(key_value_store: Key
13511351
Result should be incremented by 2.
13521352
Method `use_state` must be implemented in a way that prevents race conditions in such scenario."""
13531353
# Test is skipped in older Python versions.
1354-
from asyncio import Barrier # type:ignore[attr-defined] # noqa: PLC0415
1354+
from asyncio import Barrier # ty:ignore[unresolved-import] # noqa: PLC0415
13551355

13561356
crawler = BasicCrawler()
13571357
store = await crawler.get_key_value_store()
@@ -1392,7 +1392,7 @@ async def test_timeout_in_handler(sleep_type: str) -> None:
13921392
Crawler should attempt to retry it.
13931393
This test creates situation where the request handler times out twice, on third retry it does not time out."""
13941394
# Test is skipped in older Python versions.
1395-
from asyncio import timeout # type:ignore[attr-defined] # noqa: PLC0415
1395+
from asyncio import timeout # ty:ignore[unresolved-import] # noqa: PLC0415
13961396

13971397
non_realtime_system_coefficient = 10
13981398
handler_timeout = timedelta(seconds=1)

tests/unit/storages/test_storage_instance_manager.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ async def test_preexisting_unnamed_storage_open_by_id(storage_type: type[Storage
136136
@pytest.mark.skipif(sys.version_info[:3] < (3, 11), reason='asyncio.Barrier was introduced in Python 3.11.')
137137
async def test_concurrent_open_datasets() -> None:
138138
"""Test that concurrent open datasets with the same name return the same instance."""
139-
from asyncio import Barrier # type:ignore[attr-defined] # noqa: PLC0415
139+
from asyncio import Barrier # ty:ignore[unresolved-import] # noqa: PLC0415
140140

141141
barrier = Barrier(2)
142142

@@ -161,7 +161,7 @@ async def push_data(data: dict) -> None:
161161
@pytest.mark.skipif(sys.version_info[:3] < (3, 11), reason='asyncio.Barrier was introduced in Python 3.11.')
162162
async def test_concurrent_open_datasets_with_same_name_and_alias() -> None:
163163
"""Test that concurrent open requests for the same storage return the same instance."""
164-
from asyncio import Barrier # type:ignore[attr-defined] # noqa: PLC0415
164+
from asyncio import Barrier # ty:ignore[unresolved-import] # noqa: PLC0415
165165

166166
valid_kwargs: dict[str, str | None] = {}
167167

0 commit comments

Comments
 (0)