Skip to content

Commit c2a2290

Browse files
committed
launch global event_manager with local in crawler
1 parent 0ca0895 commit c2a2290

2 files changed

Lines changed: 40 additions & 4 deletions

File tree

src/crawlee/crawlers/_basic/_basic_crawler.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -768,14 +768,18 @@ def sigint_handler() -> None:
768768
return final_statistics
769769

770770
async def _run_crawler(self) -> None:
771-
event_manager = self._service_locator.get_event_manager()
771+
local_event_manager = self._service_locator.get_event_manager()
772+
global_event_manager = service_locator.get_event_manager()
773+
if local_event_manager is global_event_manager:
774+
local_event_manager = None # Avoid entering the same event manager context twice
772775

773776
# Collect the context managers to be entered. Context managers that are already active are excluded,
774777
# as they were likely entered by the caller, who will also be responsible for exiting them.
775778
contexts_to_enter = [
776779
cm
777780
for cm in (
778-
event_manager,
781+
global_event_manager,
782+
local_event_manager,
779783
self._snapshotter,
780784
self._statistics,
781785
self._session_pool if self._use_session_pool else None,

tests/unit/crawlers/_basic/test_basic_crawler.py

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,7 @@
2626
from crawlee.configuration import Configuration
2727
from crawlee.crawlers import BasicCrawler
2828
from crawlee.errors import RequestCollisionError, SessionError, UserDefinedErrorHandlerError
29-
from crawlee.events import Event, EventCrawlerStatusData
30-
from crawlee.events._local_event_manager import LocalEventManager
29+
from crawlee.events import Event, EventCrawlerStatusData, LocalEventManager
3130
from crawlee.request_loaders import RequestList, RequestManagerTandem
3231
from crawlee.sessions import Session, SessionPool
3332
from crawlee.statistics import FinalStatistics
@@ -2118,3 +2117,36 @@ async def handler_2(context: BasicCrawlingContext) -> None:
21182117

21192118
await rq1.drop()
21202119
await rq2.drop()
2120+
2121+
2122+
async def test_globa_and_local_event_manager_in_crawler_run() -> None:
2123+
"""Test that both global and local event managers are used in crawler run"""
2124+
2125+
config = service_locator.get_configuration()
2126+
2127+
local_event_manager = LocalEventManager.from_config(config)
2128+
2129+
crawler = BasicCrawler(event_manager=local_event_manager)
2130+
2131+
handler_call = AsyncMock()
2132+
2133+
@crawler.router.default_handler
2134+
async def handler(context: BasicCrawlingContext) -> None:
2135+
global_event_manager = service_locator.get_event_manager()
2136+
handler_call(local_event_manager.active, global_event_manager.active)
2137+
2138+
await crawler.run(['https://a.placeholder.com'])
2139+
2140+
assert handler_call.call_count == 1
2141+
2142+
local_em_state, global_em_state = handler_call.call_args_list[0][0]
2143+
2144+
# Both event managers should be active.
2145+
assert local_em_state is True
2146+
assert global_em_state is True
2147+
2148+
global_event_manager = service_locator.get_event_manager()
2149+
2150+
# After crawler is finished, both event managers should be inactive.
2151+
assert local_event_manager.active is False
2152+
assert global_event_manager.active is False

0 commit comments

Comments
 (0)