Skip to content

Commit c13aa95

Browse files
committed
refactor: polish logging across the codebase
Standardize internal modules on getLogger(__name__), rebalance log levels (no-op platform-only calls error->warning), and trim Scrapy trace-debug noise ahead of the next major release.
1 parent e42e8c7 commit c13aa95

14 files changed

Lines changed: 31 additions & 53 deletions

File tree

src/apify/_actor.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1162,7 +1162,7 @@ async def metamorph(
11621162
custom_after_sleep: How long to sleep for after the metamorph, to wait for the container to be stopped.
11631163
"""
11641164
if not self.is_at_home():
1165-
self.log.error('Actor.metamorph() is only supported when running on the Apify platform.')
1165+
self.log.warning('Actor.metamorph() is only supported when running on the Apify platform.')
11661166
return
11671167

11681168
if custom_after_sleep is None:
@@ -1172,6 +1172,7 @@ async def metamorph(
11721172
if not self.configuration.actor_run_id:
11731173
raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.')
11741174

1175+
self.log.debug(f'Metamorphing the Actor run into target Actor {target_actor_id!r}.')
11751176
await self.apify_client.run(self.configuration.actor_run_id).metamorph(
11761177
target_actor_id=target_actor_id,
11771178
run_input=run_input,
@@ -1198,7 +1199,7 @@ async def reboot(
11981199
custom_after_sleep: How long to sleep for after the reboot, to wait for the container to be stopped.
11991200
"""
12001201
if not self.is_at_home():
1201-
self.log.error('Actor.reboot() is only supported when running on the Apify platform.')
1202+
self.log.warning('Actor.reboot() is only supported when running on the Apify platform.')
12021203
return
12031204

12041205
if self._is_rebooting:
@@ -1243,6 +1244,7 @@ async def safe_dispatch(listener: Any, data: Any) -> None:
12431244
except TimeoutError:
12441245
self.log.warning('Pre-reboot event listeners did not finish within timeout; proceeding with reboot')
12451246

1247+
self.log.debug('Rebooting the Actor run.')
12461248
await self.apify_client.run(self.configuration.actor_run_id).reboot()
12471249
except BaseException:
12481250
# Reset the flag so that a failed or cancelled reboot can be retried.
@@ -1277,7 +1279,7 @@ async def add_webhook(self, webhook: Webhook, *, idempotency_key: str | None = N
12771279
)
12781280

12791281
if not self.is_at_home():
1280-
self.log.error('Actor.add_webhook() is only supported when running on the Apify platform.')
1282+
self.log.warning('Actor.add_webhook() is only supported when running on the Apify platform.')
12811283
return
12821284

12831285
# If is_at_home() is True, config.actor_run_id is always set
@@ -1454,9 +1456,9 @@ def _get_remaining_time(self) -> timedelta | None:
14541456
return max(self.configuration.timeout_at - datetime.now(tz=UTC), timedelta(0))
14551457

14561458
self.log.warning(
1457-
'Using `inherit` argument is only possible when the Actor'
1458-
' is running on the Apify platform and when the timeout for the Actor run is set. '
1459-
f'{self.is_at_home()=}, {self.configuration.timeout_at=}'
1459+
'Using the `inherit` argument is only possible when the Actor is running on the Apify platform and '
1460+
'the timeout for the Actor run is set.',
1461+
extra={'is_at_home': self.is_at_home(), 'timeout_at': self.configuration.timeout_at},
14601462
)
14611463
return None
14621464

src/apify/_charging.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from dataclasses import dataclass
66
from datetime import UTC, datetime
77
from decimal import Decimal
8+
from logging import getLogger
89
from typing import TYPE_CHECKING, Literal, Protocol, TypedDict
910

1011
from pydantic import ConfigDict
@@ -19,7 +20,6 @@
1920
from apify_client._models import PricingPerEvent as ClientPricingPerEvent
2021

2122
from apify._utils import ReentrantLock, docs_group, ensure_context
22-
from apify.log import logger
2323
from apify.storages import Dataset
2424

2525
if TYPE_CHECKING:
@@ -29,6 +29,8 @@
2929

3030
from apify._configuration import Configuration
3131

32+
logger = getLogger(__name__)
33+
3234
charging_manager_ctx: ContextVar[ChargingManager | None] = ContextVar('charging_manager_ctx', default=None)
3335
"""Holds the current `ChargingManager` instance, if any.
3436
@@ -450,6 +452,7 @@ async def charge(self, event_name: str, *, count: int = 1) -> ChargeResult:
450452
pass
451453
elif event_name in self._pricing_info:
452454
await self._client.run(self._actor_run_id).charge(event_name, count=charged_count)
455+
logger.debug(f"Charged {charged_count} occurrence(s) of event '{event_name}'.")
453456
elif event_name in self._tier_priced_events:
454457
logger.warning(
455458
f"Event '{event_name}' is tier-priced and is not chargeable via the pay-per-event API."

src/apify/_proxy_configuration.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import json
44
import re
55
from dataclasses import dataclass, field
6+
from logging import getLogger
67
from re import Pattern
78
from typing import TYPE_CHECKING, Any
89

@@ -22,13 +23,14 @@
2223
from apify._configuration import Configuration
2324
from apify._consts import ApifyEnvVars
2425
from apify._utils import docs_group
25-
from apify.log import logger
2626

2727
if TYPE_CHECKING:
2828
from apify_client import ApifyClientAsync
2929

3030
from apify import Request
3131

32+
logger = getLogger(__name__)
33+
3234
APIFY_PROXY_VALUE_REGEX = re.compile(r'^[\w._~]+$')
3335
COUNTRY_CODE_REGEX = re.compile(r'^[A-Z]{2}$')
3436
# ISO 3166-2 subdivision codes are 1-3 uppercase alphanumeric characters, e.g. 'CA', 'NSW', '9' (Wien, AT-9)

src/apify/events/_apify_event_manager.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import asyncio
44
import contextlib
55
import time
6+
from logging import getLogger
67
from typing import TYPE_CHECKING, Annotated, Self
78

89
import websockets.asyncio.client
@@ -17,7 +18,6 @@
1718

1819
from apify._utils import docs_group
1920
from apify.events._types import DeprecatedEvent, EventMessage, SystemInfoEventData, UnknownEvent
20-
from apify.log import logger
2121

2222
if TYPE_CHECKING:
2323
from collections.abc import Generator
@@ -28,6 +28,8 @@
2828
from apify._configuration import Configuration
2929

3030

31+
logger = getLogger(__name__)
32+
3133
event_data_adapter = TypeAdapter[EventMessage | DeprecatedEvent | UnknownEvent](
3234
Annotated[EventMessage, Discriminator('name')] | DeprecatedEvent | UnknownEvent
3335
)
@@ -195,7 +197,7 @@ async def _handle_platform_message(self, message: str | bytes) -> None:
195197
return
196198

197199
if isinstance(parsed_message, UnknownEvent):
198-
logger.info(
200+
logger.debug(
199201
f'Unknown message received: event_name={parsed_message.name}, event_data={parsed_message.data}'
200202
)
201203
return

src/apify/scrapy/middlewares/apify_proxy.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@ async def process_request(self, request: Request) -> None:
7272
Raises:
7373
ValueError: If username and password are not provided in the proxy URL.
7474
"""
75-
Actor.log.debug(f'ApifyHttpProxyMiddleware.process_request: request={request}')
7675
url = await self._get_new_proxy_url()
7776

7877
if not (url.username and url.password):
@@ -82,8 +81,6 @@ async def process_request(self, request: Request) -> None:
8281
basic_auth_header = get_basic_auth_header(url.username, url.password)
8382
request.headers[b'Proxy-Authorization'] = basic_auth_header
8483

85-
Actor.log.debug(f'ApifyHttpProxyMiddleware.process_request: updated request.meta={request.meta}')
86-
8784
def process_exception(
8885
self,
8986
request: Request,
@@ -100,10 +97,6 @@ def process_exception(
10097
process_exception() methods of installed middleware, until no middleware is left and the default
10198
exception handling kicks in.
10299
"""
103-
Actor.log.debug(
104-
f'ApifyHttpProxyMiddleware.process_exception: request={request}, exception={exception}',
105-
)
106-
107100
if isinstance(exception, TunnelError):
108101
Actor.log.warning(
109102
f'ApifyHttpProxyMiddleware: TunnelError occurred for request="{request}", '

src/apify/scrapy/pipelines/actor_dataset_push.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from __future__ import annotations
22

3-
from logging import getLogger
43
from typing import TYPE_CHECKING
54

65
from itemadapter.adapter import ItemAdapter
@@ -10,8 +9,6 @@
109
if TYPE_CHECKING:
1110
from scrapy import Item
1211

13-
logger = getLogger(__name__)
14-
1512

1613
class ActorDatasetPushPipeline:
1714
"""A Scrapy pipeline for pushing items to an Actor's default dataset.
@@ -25,6 +22,5 @@ async def process_item(
2522
) -> Item:
2623
"""Pushes the provided Scrapy item to the Actor's default dataset."""
2724
item_dict = ItemAdapter(item).asdict()
28-
logger.debug(f'Pushing item={item_dict} to the dataset.')
2925
await Actor.push_data(item_dict)
3026
return item

src/apify/scrapy/requests.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,6 @@ def to_apify_request(scrapy_request: ScrapyRequest, spider: Spider) -> ApifyRequ
6161
logger.warning('Failed to convert to Apify request: Scrapy request must be a ScrapyRequest instance.')
6262
return None
6363

64-
logger.debug(f'to_apify_request was called (scrapy_request={scrapy_request})...')
65-
6664
# Configuration to behave as similarly as possible to Scrapy's default RFPDupeFilter.
6765
#
6866
# The body is stored twice on purpose: as `payload` (used for the extended unique key) and inside the serialized
@@ -150,7 +148,6 @@ def to_apify_request(scrapy_request: ScrapyRequest, spider: Spider) -> ApifyRequ
150148
# a second base64 layer would only add ~33% overhead on the enqueue path.
151149
apify_request.user_data['scrapy_request'] = scrapy_request_json
152150

153-
logger.debug(f'scrapy_request was converted to the apify_request={apify_request}')
154151
return apify_request
155152

156153

@@ -171,14 +168,10 @@ def to_scrapy_request(apify_request: ApifyRequest, spider: Spider) -> ScrapyRequ
171168
if not isinstance(cast('Any', apify_request), ApifyRequest):
172169
raise TypeError('apify_request must be an apify.Request instance')
173170

174-
logger.debug(f'to_scrapy_request was called (apify_request={apify_request})...')
175-
176171
# If the apify_request comes from the Scrapy
177172
if 'scrapy_request' in apify_request.user_data:
178173
# Deserialize the Scrapy ScrapyRequest from the apify_request by parsing the stored JSON and reconstructing
179174
# the Scrapy ScrapyRequest object from its dictionary representation.
180-
logger.debug('Restoring the Scrapy ScrapyRequest from the apify_request...')
181-
182175
scrapy_request_json = apify_request.user_data['scrapy_request']
183176
if not isinstance(scrapy_request_json, str):
184177
raise TypeError('the stored scrapy_request must be a string')
@@ -194,8 +187,6 @@ def to_scrapy_request(apify_request: ApifyRequest, spider: Spider) -> ScrapyRequ
194187
if not isinstance(scrapy_request, ScrapyRequest):
195188
raise TypeError('scrapy_request must be an instance of the ScrapyRequest class')
196189

197-
logger.debug(f'Scrapy ScrapyRequest successfully reconstructed (scrapy_request={scrapy_request})...')
198-
199190
# Update the meta field with the meta field from the apify_request
200191
meta = scrapy_request.meta or {}
201192
meta.update({'apify_request_unique_key': apify_request.unique_key})
@@ -204,8 +195,6 @@ def to_scrapy_request(apify_request: ApifyRequest, spider: Spider) -> ScrapyRequ
204195

205196
# If the apify_request comes directly from the Scrapy, typically start URLs.
206197
else:
207-
logger.debug('Gonna create a new Scrapy ScrapyRequest (cannot be restored)')
208-
209198
scrapy_request = ScrapyRequest(
210199
url=apify_request.url,
211200
method=apify_request.method,
@@ -222,5 +211,4 @@ def to_scrapy_request(apify_request: ApifyRequest, spider: Spider) -> ScrapyRequ
222211
if apify_request.user_data:
223212
scrapy_request.meta['userData'] = apify_request.user_data
224213

225-
logger.debug(f'an apify_request was converted to the scrapy_request={scrapy_request}')
226214
return scrapy_request

src/apify/scrapy/scheduler.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -134,17 +134,14 @@ def enqueue_request(self, request: Request) -> bool:
134134
Returns:
135135
True if the request was successfully enqueued, False otherwise.
136136
"""
137-
logger.debug(f'ApifyScheduler.enqueue_request was called (scrapy_request={request})...')
138-
139137
if not isinstance(self.spider, Spider):
140138
raise TypeError('self.spider must be an instance of the Spider class')
141139

142140
apify_request = to_apify_request(request, spider=self.spider)
143141
if apify_request is None:
144-
logger.error(f'Request {request} could not be converted to Apify request.')
142+
logger.warning(f'Request {request} could not be converted to Apify request; skipping it.')
145143
return False
146144

147-
logger.debug(f'Converted to apify_request: {apify_request}')
148145
if not isinstance(self._rq, RequestQueue):
149146
raise TypeError('self._rq must be an instance of the RequestQueue class')
150147

@@ -156,7 +153,6 @@ def enqueue_request(self, request: Request) -> bool:
156153
logger.exception('Failed to enqueue the request to the request queue.')
157154
raise
158155

159-
logger.debug(f'rq.add_request result: {result}')
160156
return not bool(result.was_already_present)
161157

162158
def next_request(self) -> Request | None:
@@ -165,7 +161,6 @@ def next_request(self) -> Request | None:
165161
Returns:
166162
The next request, or None if there are no more requests.
167163
"""
168-
logger.debug('next_request called...')
169164
if not isinstance(self._rq, RequestQueue):
170165
raise TypeError('self._rq must be an instance of the RequestQueue class')
171166

@@ -177,7 +172,6 @@ def next_request(self) -> Request | None:
177172
logger.exception('Failed to fetch the next request from the request queue.')
178173
raise
179174

180-
logger.debug(f'Fetched apify_request: {apify_request}')
181175
if apify_request is None:
182176
return None
183177

@@ -206,5 +200,4 @@ def next_request(self) -> Request | None:
206200
if scrapy_request is None:
207201
return None
208202

209-
logger.debug(f'Converted to scrapy_request: {scrapy_request}')
210203
return scrapy_request

src/apify/storage_clients/_apify/_alias_resolving.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from __future__ import annotations
22

3-
import logging
43
from asyncio import Lock
54
from functools import cached_property
65
from logging import getLogger
@@ -225,7 +224,7 @@ async def store_mapping(self, storage_id: str) -> None:
225224
alias_map[self._storage_key] = storage_id
226225

227226
if not self._configuration.is_at_home:
228-
logging.getLogger(__name__).debug(
227+
logger.debug(
229228
'AliasResolver storage limited retention is only supported on Apify platform. Storage is not exported.'
230229
)
231230
return

src/apify/storage_clients/_file_system/_key_value_store_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ async def _create_missing_metadata_for_input_file(key: str, record_path: Path) -
122122
try:
123123
content = await asyncio.to_thread(record_path.read_bytes)
124124
except FileNotFoundError:
125-
logger.warning(f'Input file disparaged on path: "{record_path}"')
125+
logger.warning(f'Input file disappeared from path: "{record_path}"')
126126
return
127127

128128
# Figure out the metadata from the file content

0 commit comments

Comments
 (0)