Skip to content

Commit f949c24

Browse files
authored
refactor!: polish logging across the codebase (#1005)
### Description Logging polish across the whole codebase: - Fix typos & grammar - Unify logger usage - Remove useless debug logs - Rebalance a few log levels ### Breaking changes The typed public API is unchanged, but log output changes in a few minor ways: - `error` -> `warning` for the local no-op `metamorph()` / `reboot()` / `add_webhook()` messages and the Scrapy scheduler skip. - `info` -> `debug` for the "unknown platform event" message. - Internal logger names changed from `apify` to `apify.<module>` for the charging, proxy, and events modules.
1 parent 838ceef commit f949c24

15 files changed

Lines changed: 45 additions & 54 deletions

File tree

docs/04_upgrading/upgrading_to_v4.md

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,4 +244,17 @@ yield scrapy.Request(url, meta={'since': datetime(2024, 1, 1)})
244244

245245
# After (v4): store a JSON-serializable value.
246246
yield scrapy.Request(url, meta={'since': datetime(2024, 1, 1).isoformat()})
247-
```Pickle
247+
```
248+
249+
## Logging changes
250+
251+
The typed public API is unchanged, so your code keeps working without edits. Only log output changes. Adjust your setup if you parse logs, route them by logger name, or assert on log records, for example in tests.
252+
253+
### Rebalanced log levels
254+
255+
- The local no-op messages from `Actor.metamorph()`, `Actor.reboot()`, and `Actor.add_webhook()`, logged when the Actor runs outside the Apify platform, are now `warning` instead of `error`. The Scrapy scheduler message for a request that can't be converted to an Apify request changed the same way.
256+
- The "unknown platform event" message is now `debug` instead of `info`.
257+
258+
### Module-qualified logger names
259+
260+
The charging, proxy, and events modules now log under their own child loggers (`apify._charging`, `apify._proxy_configuration`, and `apify.events._apify_event_manager`) instead of the generic `apify` logger. These child loggers still propagate to `apify`, so a handler attached to `apify` keeps receiving their records. Update only configuration that targets the exact `apify` logger name for these messages.

src/apify/_actor.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1162,7 +1162,7 @@ async def metamorph(
11621162
custom_after_sleep: How long to sleep for after the metamorph, to wait for the container to be stopped.
11631163
"""
11641164
if not self.is_at_home():
1165-
self.log.error('Actor.metamorph() is only supported when running on the Apify platform.')
1165+
self.log.warning('Actor.metamorph() is only supported when running on the Apify platform.')
11661166
return
11671167

11681168
if custom_after_sleep is None:
@@ -1172,6 +1172,7 @@ async def metamorph(
11721172
if not self.configuration.actor_run_id:
11731173
raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.')
11741174

1175+
self.log.debug(f'Metamorphing the Actor run into target Actor {target_actor_id!r}.')
11751176
await self.apify_client.run(self.configuration.actor_run_id).metamorph(
11761177
target_actor_id=target_actor_id,
11771178
run_input=run_input,
@@ -1198,7 +1199,7 @@ async def reboot(
11981199
custom_after_sleep: How long to sleep for after the reboot, to wait for the container to be stopped.
11991200
"""
12001201
if not self.is_at_home():
1201-
self.log.error('Actor.reboot() is only supported when running on the Apify platform.')
1202+
self.log.warning('Actor.reboot() is only supported when running on the Apify platform.')
12021203
return
12031204

12041205
if self._is_rebooting:
@@ -1243,6 +1244,7 @@ async def safe_dispatch(listener: Any, data: Any) -> None:
12431244
except TimeoutError:
12441245
self.log.warning('Pre-reboot event listeners did not finish within timeout; proceeding with reboot')
12451246

1247+
self.log.debug('Rebooting the Actor run.')
12461248
await self.apify_client.run(self.configuration.actor_run_id).reboot()
12471249
except BaseException:
12481250
# Reset the flag so that a failed or cancelled reboot can be retried.
@@ -1277,7 +1279,7 @@ async def add_webhook(self, webhook: Webhook, *, idempotency_key: str | None = N
12771279
)
12781280

12791281
if not self.is_at_home():
1280-
self.log.error('Actor.add_webhook() is only supported when running on the Apify platform.')
1282+
self.log.warning('Actor.add_webhook() is only supported when running on the Apify platform.')
12811283
return
12821284

12831285
# If is_at_home() is True, config.actor_run_id is always set
@@ -1454,9 +1456,9 @@ def _get_remaining_time(self) -> timedelta | None:
14541456
return max(self.configuration.timeout_at - datetime.now(tz=UTC), timedelta(0))
14551457

14561458
self.log.warning(
1457-
'Using `inherit` argument is only possible when the Actor'
1458-
' is running on the Apify platform and when the timeout for the Actor run is set. '
1459-
f'{self.is_at_home()=}, {self.configuration.timeout_at=}'
1459+
'Using the `inherit` argument is only possible when the Actor is running on the Apify platform and '
1460+
'the timeout for the Actor run is set.',
1461+
extra={'is_at_home': self.is_at_home(), 'timeout_at': self.configuration.timeout_at},
14601462
)
14611463
return None
14621464

src/apify/_charging.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from dataclasses import dataclass
66
from datetime import UTC, datetime
77
from decimal import Decimal
8+
from logging import getLogger
89
from typing import TYPE_CHECKING, Literal, Protocol, TypedDict
910

1011
from pydantic import ConfigDict
@@ -19,7 +20,6 @@
1920
from apify_client._models import PricingPerEvent as ClientPricingPerEvent
2021

2122
from apify._utils import ReentrantLock, docs_group, ensure_context
22-
from apify.log import logger
2323
from apify.storages import Dataset
2424

2525
if TYPE_CHECKING:
@@ -29,6 +29,8 @@
2929

3030
from apify._configuration import Configuration
3131

32+
logger = getLogger(__name__)
33+
3234
charging_manager_ctx: ContextVar[ChargingManager | None] = ContextVar('charging_manager_ctx', default=None)
3335
"""Holds the current `ChargingManager` instance, if any.
3436
@@ -450,6 +452,7 @@ async def charge(self, event_name: str, *, count: int = 1) -> ChargeResult:
450452
pass
451453
elif event_name in self._pricing_info:
452454
await self._client.run(self._actor_run_id).charge(event_name, count=charged_count)
455+
logger.debug(f"Charged {charged_count} occurrence(s) of event '{event_name}'.")
453456
elif event_name in self._tier_priced_events:
454457
logger.warning(
455458
f"Event '{event_name}' is tier-priced and is not chargeable via the pay-per-event API."

src/apify/_proxy_configuration.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import json
44
import re
55
from dataclasses import dataclass, field
6+
from logging import getLogger
67
from re import Pattern
78
from typing import TYPE_CHECKING, Any
89

@@ -22,13 +23,14 @@
2223
from apify._configuration import Configuration
2324
from apify._consts import ApifyEnvVars
2425
from apify._utils import docs_group
25-
from apify.log import logger
2626

2727
if TYPE_CHECKING:
2828
from apify_client import ApifyClientAsync
2929

3030
from apify import Request
3131

32+
logger = getLogger(__name__)
33+
3234
APIFY_PROXY_VALUE_REGEX = re.compile(r'^[\w._~]+$')
3335
COUNTRY_CODE_REGEX = re.compile(r'^[A-Z]{2}$')
3436
# ISO 3166-2 subdivision codes are 1-3 uppercase alphanumeric characters, e.g. 'CA', 'NSW', '9' (Wien, AT-9)

src/apify/events/_apify_event_manager.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import asyncio
44
import contextlib
55
import time
6+
from logging import getLogger
67
from typing import TYPE_CHECKING, Annotated, Self
78

89
import websockets.asyncio.client
@@ -17,7 +18,6 @@
1718

1819
from apify._utils import docs_group
1920
from apify.events._types import DeprecatedEvent, EventMessage, SystemInfoEventData, UnknownEvent
20-
from apify.log import logger
2121

2222
if TYPE_CHECKING:
2323
from collections.abc import Generator
@@ -28,6 +28,8 @@
2828
from apify._configuration import Configuration
2929

3030

31+
logger = getLogger(__name__)
32+
3133
event_data_adapter = TypeAdapter[EventMessage | DeprecatedEvent | UnknownEvent](
3234
Annotated[EventMessage, Discriminator('name')] | DeprecatedEvent | UnknownEvent
3335
)
@@ -195,7 +197,7 @@ async def _handle_platform_message(self, message: str | bytes) -> None:
195197
return
196198

197199
if isinstance(parsed_message, UnknownEvent):
198-
logger.info(
200+
logger.debug(
199201
f'Unknown message received: event_name={parsed_message.name}, event_data={parsed_message.data}'
200202
)
201203
return

src/apify/scrapy/middlewares/apify_proxy.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@ async def process_request(self, request: Request) -> None:
7272
Raises:
7373
ValueError: If username and password are not provided in the proxy URL.
7474
"""
75-
Actor.log.debug(f'ApifyHttpProxyMiddleware.process_request: request={request}')
7675
url = await self._get_new_proxy_url()
7776

7877
if not (url.username and url.password):
@@ -82,8 +81,6 @@ async def process_request(self, request: Request) -> None:
8281
basic_auth_header = get_basic_auth_header(url.username, url.password)
8382
request.headers[b'Proxy-Authorization'] = basic_auth_header
8483

85-
Actor.log.debug(f'ApifyHttpProxyMiddleware.process_request: updated request.meta={request.meta}')
86-
8784
def process_exception(
8885
self,
8986
request: Request,
@@ -100,10 +97,6 @@ def process_exception(
10097
process_exception() methods of installed middleware, until no middleware is left and the default
10198
exception handling kicks in.
10299
"""
103-
Actor.log.debug(
104-
f'ApifyHttpProxyMiddleware.process_exception: request={request}, exception={exception}',
105-
)
106-
107100
if isinstance(exception, TunnelError):
108101
Actor.log.warning(
109102
f'ApifyHttpProxyMiddleware: TunnelError occurred for request="{request}", '

src/apify/scrapy/pipelines/actor_dataset_push.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from __future__ import annotations
22

3-
from logging import getLogger
43
from typing import TYPE_CHECKING
54

65
from itemadapter.adapter import ItemAdapter
@@ -10,8 +9,6 @@
109
if TYPE_CHECKING:
1110
from scrapy import Item
1211

13-
logger = getLogger(__name__)
14-
1512

1613
class ActorDatasetPushPipeline:
1714
"""A Scrapy pipeline for pushing items to an Actor's default dataset.
@@ -25,6 +22,5 @@ async def process_item(
2522
) -> Item:
2623
"""Pushes the provided Scrapy item to the Actor's default dataset."""
2724
item_dict = ItemAdapter(item).asdict()
28-
logger.debug(f'Pushing item={item_dict} to the dataset.')
2925
await Actor.push_data(item_dict)
3026
return item

src/apify/scrapy/requests.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,6 @@ def to_apify_request(scrapy_request: ScrapyRequest, spider: Spider) -> ApifyRequ
6161
logger.warning('Failed to convert to Apify request: Scrapy request must be a ScrapyRequest instance.')
6262
return None
6363

64-
logger.debug(f'to_apify_request was called (scrapy_request={scrapy_request})...')
65-
6664
# Configuration to behave as similarly as possible to Scrapy's default RFPDupeFilter.
6765
#
6866
# The body is stored twice on purpose: as `payload` (used for the extended unique key) and inside the serialized
@@ -150,7 +148,6 @@ def to_apify_request(scrapy_request: ScrapyRequest, spider: Spider) -> ApifyRequ
150148
# a second base64 layer would only add ~33% overhead on the enqueue path.
151149
apify_request.user_data['scrapy_request'] = scrapy_request_json
152150

153-
logger.debug(f'scrapy_request was converted to the apify_request={apify_request}')
154151
return apify_request
155152

156153

@@ -171,14 +168,10 @@ def to_scrapy_request(apify_request: ApifyRequest, spider: Spider) -> ScrapyRequ
171168
if not isinstance(cast('Any', apify_request), ApifyRequest):
172169
raise TypeError('apify_request must be an apify.Request instance')
173170

174-
logger.debug(f'to_scrapy_request was called (apify_request={apify_request})...')
175-
176171
# If the apify_request comes from the Scrapy
177172
if 'scrapy_request' in apify_request.user_data:
178173
# Deserialize the Scrapy ScrapyRequest from the apify_request by parsing the stored JSON and reconstructing
179174
# the Scrapy ScrapyRequest object from its dictionary representation.
180-
logger.debug('Restoring the Scrapy ScrapyRequest from the apify_request...')
181-
182175
scrapy_request_json = apify_request.user_data['scrapy_request']
183176
if not isinstance(scrapy_request_json, str):
184177
raise TypeError('the stored scrapy_request must be a string')
@@ -194,8 +187,6 @@ def to_scrapy_request(apify_request: ApifyRequest, spider: Spider) -> ScrapyRequ
194187
if not isinstance(scrapy_request, ScrapyRequest):
195188
raise TypeError('scrapy_request must be an instance of the ScrapyRequest class')
196189

197-
logger.debug(f'Scrapy ScrapyRequest successfully reconstructed (scrapy_request={scrapy_request})...')
198-
199190
# Update the meta field with the meta field from the apify_request
200191
meta = scrapy_request.meta or {}
201192
meta.update({'apify_request_unique_key': apify_request.unique_key})
@@ -204,8 +195,6 @@ def to_scrapy_request(apify_request: ApifyRequest, spider: Spider) -> ScrapyRequ
204195

205196
# If the apify_request comes directly from the Scrapy, typically start URLs.
206197
else:
207-
logger.debug('Gonna create a new Scrapy ScrapyRequest (cannot be restored)')
208-
209198
scrapy_request = ScrapyRequest(
210199
url=apify_request.url,
211200
method=apify_request.method,
@@ -222,5 +211,4 @@ def to_scrapy_request(apify_request: ApifyRequest, spider: Spider) -> ScrapyRequ
222211
if apify_request.user_data:
223212
scrapy_request.meta['userData'] = apify_request.user_data
224213

225-
logger.debug(f'an apify_request was converted to the scrapy_request={scrapy_request}')
226214
return scrapy_request

src/apify/scrapy/scheduler.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -134,17 +134,14 @@ def enqueue_request(self, request: Request) -> bool:
134134
Returns:
135135
True if the request was successfully enqueued, False otherwise.
136136
"""
137-
logger.debug(f'ApifyScheduler.enqueue_request was called (scrapy_request={request})...')
138-
139137
if not isinstance(self.spider, Spider):
140138
raise TypeError('self.spider must be an instance of the Spider class')
141139

142140
apify_request = to_apify_request(request, spider=self.spider)
143141
if apify_request is None:
144-
logger.error(f'Request {request} could not be converted to Apify request.')
142+
logger.warning(f'Request {request} could not be converted to Apify request; skipping it.')
145143
return False
146144

147-
logger.debug(f'Converted to apify_request: {apify_request}')
148145
if not isinstance(self._rq, RequestQueue):
149146
raise TypeError('self._rq must be an instance of the RequestQueue class')
150147

@@ -156,7 +153,6 @@ def enqueue_request(self, request: Request) -> bool:
156153
logger.exception('Failed to enqueue the request to the request queue.')
157154
raise
158155

159-
logger.debug(f'rq.add_request result: {result}')
160156
return not bool(result.was_already_present)
161157

162158
def next_request(self) -> Request | None:
@@ -165,7 +161,6 @@ def next_request(self) -> Request | None:
165161
Returns:
166162
The next request, or None if there are no more requests.
167163
"""
168-
logger.debug('next_request called...')
169164
if not isinstance(self._rq, RequestQueue):
170165
raise TypeError('self._rq must be an instance of the RequestQueue class')
171166

@@ -177,7 +172,6 @@ def next_request(self) -> Request | None:
177172
logger.exception('Failed to fetch the next request from the request queue.')
178173
raise
179174

180-
logger.debug(f'Fetched apify_request: {apify_request}')
181175
if apify_request is None:
182176
return None
183177

@@ -206,5 +200,4 @@ def next_request(self) -> Request | None:
206200
if scrapy_request is None:
207201
return None
208202

209-
logger.debug(f'Converted to scrapy_request: {scrapy_request}')
210203
return scrapy_request

src/apify/storage_clients/_apify/_alias_resolving.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from __future__ import annotations
22

3-
import logging
43
from asyncio import Lock
54
from functools import cached_property
65
from logging import getLogger
@@ -225,7 +224,7 @@ async def store_mapping(self, storage_id: str) -> None:
225224
alias_map[self._storage_key] = storage_id
226225

227226
if not self._configuration.is_at_home:
228-
logging.getLogger(__name__).debug(
227+
logger.debug(
229228
'AliasResolver storage limited retention is only supported on Apify platform. Storage is not exported.'
230229
)
231230
return

0 commit comments

Comments
 (0)