Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 24 additions & 6 deletions src/apify/scrapy/_logging_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@
_SUPPLEMENTAL_LOGGERS = ['filelock', 'hpack', 'httpcore', 'protego', 'twisted']
_ALL_LOGGERS = _PRIMARY_LOGGERS + _SUPPLEMENTAL_LOGGERS

# Mutable state shared with the Scrapy monkey-patch below. `initialize_logging` refreshes
# `level`/`handler` on each call; the patch (installed once) reads them so it always applies the
# latest configuration rather than values captured the first time it ran.
_state: dict[str, Any] = {'level': 'INFO', 'handler': None, 'patched': False}


def _configure_logger(name: str | None, logging_level: str, handler: logging.Handler) -> None:
"""Clear and reconfigure the logger."""
Expand All @@ -23,26 +28,39 @@ def _configure_logger(name: str | None, logging_level: str, handler: logging.Han
logger.propagate = False


def _configure_all_loggers() -> None:

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe I am missing some context, but can't we skip the module-level dict and do it like this?

@lru_cache
def _configure_all_loggers(handler, level):
    if handler is None:
        return
    for logger_name in [None, *_ALL_LOGGERS]:
        _configure_logger(logger_name, level, handler)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately, we can't do that - the _configure_all_loggers needs to run more than once.

The expected flow of the Scrapy Actors is something like this:

  1. We configure logging (our handler + levels).
  2. We do some setup, during which things may already log.
  3. Scrapy's own setup runs and overrides our logging configuration.
  4. We re-apply our configuration on top to override it back.

So the current code is the correct way.

"""Apply the Apify handler and level to the root logger and all defined loggers."""
handler = _state['handler']
if handler is None:
return
for logger_name in [None, *_ALL_LOGGERS]:
_configure_logger(logger_name, _state['level'], handler)


def initialize_logging() -> None:
"""Configure logging for Apify Actors and adjust Scrapy's logging settings."""
# Retrieve Scrapy project settings and determine the logging level.
settings = get_project_settings()
logging_level = settings.get('LOG_LEVEL', 'INFO') # Default to INFO.
_state['level'] = settings.get('LOG_LEVEL', 'INFO') # Default to INFO.

# Create a custom handler with the Apify log formatter.
handler = logging.StreamHandler()
handler.setFormatter(ActorLogFormatter(include_logger_name=True))
_state['handler'] = handler

# Configure the root logger and all other defined loggers.
for logger_name in [None, *_ALL_LOGGERS]:
_configure_logger(logger_name, logging_level, handler)
_configure_all_loggers()

# Monkey-patch Scrapy's logging to re-apply our settings whenever it reconfigures logging.
# Install the wrapper at most once, otherwise repeated calls would nest wrappers.
if _state['patched']:
return

# Monkey-patch Scrapy's logging configuration to re-apply our settings.
original_configure_logging = scrapy_logging.configure_logging

def new_configure_logging(*args: Any, **kwargs: Any) -> None:
original_configure_logging(*args, **kwargs)
for logger_name in [None, *_ALL_LOGGERS]:
_configure_logger(logger_name, logging_level, handler)
_configure_all_loggers()

scrapy_logging.configure_logging = new_configure_logging # ty: ignore[invalid-assignment]
_state['patched'] = True
Loading