From b6e5610b4e4e645aa19b66815e7e19fcd0acca27 Mon Sep 17 00:00:00 2001 From: mnietona Date: Sun, 8 Mar 2026 20:19:56 +0100 Subject: [PATCH 1/9] feat: configure logging in settings and migrate load_tree to logging --- catalog/management/commands/load_tree.py | 17 +++++++------ www/settings.py | 31 ++++++++++++++++++++---- 2 files changed, 36 insertions(+), 12 deletions(-) diff --git a/catalog/management/commands/load_tree.py b/catalog/management/commands/load_tree.py index 23393b04..dd168923 100644 --- a/catalog/management/commands/load_tree.py +++ b/catalog/management/commands/load_tree.py @@ -1,6 +1,7 @@ from typing import Any import json +import logging from django.core.management.base import BaseCommand from django.db import transaction @@ -9,6 +10,8 @@ from catalog.models import Category +logger = logging.getLogger(__name__) + STOP = {"d'", "de", "du", "et", "l'", "la", "le", "les"} @@ -51,10 +54,10 @@ def handle(self, *args: Any, **options: Any) -> None: # noqa: PLR0912 with transaction.atomic(): if Category.objects.filter(slug="archives").first(): - print("We already have archives, deleting the non-archives") + logger.info("We already have archives, deleting the non-archives") Category.objects.filter(is_archive=False).delete() else: - print("Archiving old categories") + logger.info("Archiving old categories") old_ulb = Category.objects.filter(slug="ULB").get() old_ulb.name = "Archives" old_ulb.slug = "archives" @@ -63,13 +66,13 @@ def handle(self, *args: Any, **options: Any) -> None: # noqa: PLR0912 Category.objects.all().update(is_archive=True) - print("Fixing old faculties") + logger.info("Fixing old faculties") for child in old_ulb.children.all(): child.type = Category.CategoryType.FACULTY child.save() # Level 0 - print("Creating level 0") + logger.info("Creating level 0") ULB = Category.objects.create( name="Université Libre de Bruxelles", slug="ULB", @@ -84,7 +87,7 @@ def handle(self, *args: Any, **options: Any) -> None: # noqa: PLR0912 ) # Level 1 - print("Creating level 1") + logger.info("Creating level 1") for name, _color in level1.items(): slug = ( name.removeprefix("Faculté de ") @@ -101,7 +104,7 @@ def handle(self, *args: Any, **options: Any) -> None: # noqa: PLR0912 c.parents.add(ULB) # Programs - print("Adding all programs") + logger.info("Adding all programs") for program in programs: if "bachelier" in program["name"].lower() or program["slug"].startswith( "BA" @@ -132,4 +135,4 @@ def handle(self, *args: Any, **options: Any) -> None: # noqa: PLR0912 parent = Category.objects.get(name=faculty["name"]) c.parents.add(parent) - print("Done") + logger.info("Done") diff --git a/www/settings.py b/www/settings.py index 23b3ec16..467a9e4a 100644 --- a/www/settings.py +++ b/www/settings.py @@ -91,7 +91,7 @@ ] DATABASES = { - "default": env.db_url("DB_URL", default=f'sqlite:///{BASE_DIR / "db.sqlite"}') + "default": env.db_url("DB_URL", default=f"sqlite:///{BASE_DIR / 'db.sqlite'}") } # Password validation @@ -219,9 +219,30 @@ except ImportError: pass +LOGGING = { + "version": 1, + "disable_existing_loggers": False, + "formatters": { + "simple": { + "format": "{levelname} {message}", + "style": "{", + }, + }, + "handlers": { + "console": { + "class": "logging.StreamHandler", + "formatter": "simple", + }, + }, + "root": { + "handlers": ["console"], + "level": "INFO", + }, +} if DEBUG: - print( - "Warning: you are running Dochub with DEBUG=True. This is dangerous if your server is publicly accessible." - ) - print("You should set DEBUG=False in production.\n\n") + import logging + + logger = logging.getLogger(__name__) + logger.warning("Warning: you are running Dochub with DEBUG=True...") + logger.warning("You should set DEBUG=False in production.") From ffeb8f6a98542931652271dfd28ea2a0d6ae2d5b Mon Sep 17 00:00:00 2001 From: mnietona Date: Sun, 8 Mar 2026 20:43:27 +0100 Subject: [PATCH 2/9] feat: migrate load_courses command to logging --- catalog/management/commands/load_courses.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/catalog/management/commands/load_courses.py b/catalog/management/commands/load_courses.py index e7ef80c9..730157f0 100644 --- a/catalog/management/commands/load_courses.py +++ b/catalog/management/commands/load_courses.py @@ -1,4 +1,5 @@ import json +import logging from django.core.management.base import BaseCommand from django.db import transaction @@ -6,6 +7,8 @@ from catalog.models import Category, Course from catalog.slug import normalize_slug +logger = logging.getLogger(__name__) + def get_category(slug, name=None, parent=None, type=None): cat, created = Category.objects.get_or_create( @@ -22,13 +25,13 @@ def handle(self, *args, **options): programs = json.load(f) with transaction.atomic(): - print("Temporarily set all courses as archived") + logger.info("Temporarily set all courses as archived") for course in Course.objects.all(): course.is_archive = True course.save() for program_slug, courses in programs.items(): - print(f"Inserting {len(courses)} courses from {program_slug}") + logger.info("Inserting %s courses from %s", len(courses), program_slug) category = get_category(program_slug) for course in courses.values(): bloc = course["bloc"] From 09d0aab4dd82a1218d9f5ccd502e17f6bfa879c3 Mon Sep 17 00:00:00 2001 From: mnietona Date: Sun, 8 Mar 2026 21:07:58 +0100 Subject: [PATCH 3/9] feat : migrate clean_archives command to logging --- catalog/management/commands/clean_archives.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/catalog/management/commands/clean_archives.py b/catalog/management/commands/clean_archives.py index 4dedecc0..8993de96 100644 --- a/catalog/management/commands/clean_archives.py +++ b/catalog/management/commands/clean_archives.py @@ -1,18 +1,22 @@ from typing import Any +import logging + from django.core.management.base import BaseCommand from django.db.models import Count from catalog.models import Course +logger = logging.getLogger(__name__) + class Command(BaseCommand): def handle(self, *args: Any, **options: Any) -> None: - print("Cleaning archives") + logger.info("Cleaning archives") empty_archived_courses = ( Course.objects.filter(is_archive=True) .annotate(num_doc=Count("document")) .filter(num_doc=0) ) - print("Deleting %s empty courses" % len(empty_archived_courses)) + logger.info("Deleting %s empty courses", len(empty_archived_courses)) empty_archived_courses.delete() From ce60ea03049b8eac73b2a44078aea64121572235 Mon Sep 17 00:00:00 2001 From: mnietona Date: Sun, 8 Mar 2026 21:10:31 +0100 Subject: [PATCH 4/9] feat : migrate find_orphans to logging --- catalog/management/commands/find_orphans.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/catalog/management/commands/find_orphans.py b/catalog/management/commands/find_orphans.py index 7dd22a0a..2e18b663 100644 --- a/catalog/management/commands/find_orphans.py +++ b/catalog/management/commands/find_orphans.py @@ -1,5 +1,6 @@ import csv import json +import logging from django.core.management.base import BaseCommand from django.db.models import Count @@ -7,6 +8,8 @@ from catalog.models import Course from catalog.slug import normalize_slug +logger = logging.getLogger(__name__) + class Command(BaseCommand): def handle(self, *args, **options): @@ -26,10 +29,11 @@ def handle(self, *args, **options): empty_orphans = orphans.filter(num_docs=0) orphans_to_fix = orphans.exclude(num_docs=0) - print( - f"{empty_orphans.count()} empty orphans and {orphans_to_fix.count()} orphans with documents" + logger.info( + "%s empty orphans and %s orphans with documents", + empty_orphans.count(), + orphans_to_fix.count(), ) - with open("csv/orphans.csv", "w") as fd: writer = csv.writer(fd) for course in orphans_to_fix: From 1cbf921d3398ff3b331dff6b95ffe3eac0e6b403 Mon Sep 17 00:00:00 2001 From: mnietona Date: Sun, 8 Mar 2026 21:33:46 +0100 Subject: [PATCH 5/9] feat : migrate crawl_uv to logging --- catalog/management/commands/crawl_uv.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/catalog/management/commands/crawl_uv.py b/catalog/management/commands/crawl_uv.py index a8e7dad3..c9807163 100644 --- a/catalog/management/commands/crawl_uv.py +++ b/catalog/management/commands/crawl_uv.py @@ -1,10 +1,13 @@ import csv +import logging from django.core.management.base import BaseCommand import requests from bs4 import BeautifulSoup +logger = logging.getLogger(__name__) + class Command(BaseCommand): def handle(self, *args, **options): @@ -17,9 +20,9 @@ def handle(self, *args, **options): courses = [] fails = [] - print(f"Found {len(options)} options") + logger.info("Found %s options", len(options)) # debug car verbeux for option in options: - print(f"..{option.text}") + logger.debug("..%s", option.text) value = option["value"] response = requests.get( f"https://uv.ulb.ac.be/course/index.php?categoryid={value}&browse=courses&perpage=1000&page=0" @@ -27,7 +30,7 @@ def handle(self, *args, **options): soup = BeautifulSoup(response.content, "html.parser") course_divs = soup.find_all("div", {"class": "coursebox"}) - print(f"Found {len(courses)} in {option.text}") + logger.info("Found %s in %s", len(courses), option.text) for course in course_divs: try: @@ -37,7 +40,7 @@ def handle(self, *args, **options): except: # noqa fails.append(course.text) - print(f"Found {len(courses)} and failed to parse {len(fails)}") + logger.info("Found %s and failed to parse %s", len(courses), len(fails)) with open("csv/uv_courses.csv", "w") as fd: writer = csv.writer(fd) for course in courses: From c4aef32fa4ee87ca68ffc9fd60454d5669a80fb1 Mon Sep 17 00:00:00 2001 From: mnietona Date: Sun, 8 Mar 2026 21:35:14 +0100 Subject: [PATCH 6/9] feat : migrate load_courses.py to use logging --- catalog/management/parser/load_courses.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/catalog/management/parser/load_courses.py b/catalog/management/parser/load_courses.py index c045f665..80e9844d 100644 --- a/catalog/management/parser/load_courses.py +++ b/catalog/management/parser/load_courses.py @@ -1,30 +1,33 @@ import json +import logging from django.utils.text import slugify from catalog.models import Category, Course +logger = logging.getLogger(__name__) + with open("catalog/management/parser/data/tree.json") as tree_file: tree = json.load(tree_file) a = 0 ulb, _created = Category.objects.get_or_create(name="ULB", slug="root") for fac_name, fac_info in tree["ULB"].items(): - print(fac_name) + logger.info("Processing faculty: %s", fac_name) fac_obj, _created = Category.objects.get_or_create( name=fac_name, slug=slugify(fac_name), description=fac_info["color"] ) fac_obj.parents.add(ulb) for program_slug, program_info in fac_info["programs"].items(): - print(" ", program_info["name"]) + logger.info(" Program: %s", program_info["name"]) program_obj, _created = Category.objects.get_or_create( name=program_info["name"], slug=program_slug ) program_obj.parents.add(fac_obj) for bloc_name, bloc_info in program_info["blocs"].items(): - print(" ", bloc_name) + logger.info(" Bloc: %s", bloc_name) bloc_obj, _created = Category.objects.get_or_create( name=f"Bloc {bloc_name}", slug=f"{program_slug}-bloc-{bloc_name}", @@ -33,7 +36,8 @@ for course_mnemo, course_info in bloc_info["courses"].items(): a += 1 - print(str(a).zfill(4), course_info["title"]) + logger.info(" [%s] %s", str(a).zfill(4), course_info["title"]) + course_obj, _created = Course.objects.get_or_create(slug=course_mnemo) if _created: course_obj.name = course_info["title"] From 54fa8746b042a7263914391422ed222b066b79b0 Mon Sep 17 00:00:00 2001 From: mnietona Date: Sun, 8 Mar 2026 21:36:27 +0100 Subject: [PATCH 7/9] "feat: migrate remaining catalog commands to logging" --- .../commands/download_program_contents.py | 55 ++++++++++--------- .../management/commands/download_programs.py | 37 +++++++------ www/settings.py | 11 ++-- 3 files changed, 54 insertions(+), 49 deletions(-) diff --git a/catalog/management/commands/download_program_contents.py b/catalog/management/commands/download_program_contents.py index e59533c9..70f7055c 100644 --- a/catalog/management/commands/download_program_contents.py +++ b/catalog/management/commands/download_program_contents.py @@ -1,76 +1,76 @@ from typing import Any import json +import logging from urllib.parse import quote from django.core.management import BaseCommand import requests -from rich import print from rich.progress import MofNCompleteColumn, Progress, SpinnerColumn +logger = logging.getLogger(__name__) + class Command(BaseCommand): - help = "" + help = "Download course contents for all programs from ULB API" def handle(self, *args: Any, **options: Any) -> None: with open("csv/programs.json") as f: programs: list[dict] = json.load(f) - print("\n[bold blue]Listing the course content of all programs...[/]\n") + logger.info("Listing the course content of all programs...") failed: list = [] program_content: dict[str, dict[str, dict]] = {} - - # programs = [p for p in programs if p["slug"] in ["BA-GEOG"]] - with Progress( SpinnerColumn(), *Progress.get_default_columns(), MofNCompleteColumn(), ) as progress: - task1 = progress.add_task( - "Listing the course content of all programs...", total=len(programs) - ) + task1 = progress.add_task("Processing programs...", total=len(programs)) for progam in programs: + slug_upper = progam["slug"].upper() progress.update( task1, advance=1, - description=f"Listing the course content of {progam['slug'].upper()}...", + description=f"Listing content of {slug_upper}...", ) + if "parent" in progam: - qs = f"/ksup/programme?gen=prod&anet={progam['parent'].upper()}&option={progam['slug'].upper()}&lang=fr" + qs = f"/ksup/programme?gen=prod&anet={progam['parent'].upper()}&option={slug_upper}&lang=fr" else: - qs = f"/ksup/programme?gen=prod&anet={progam['slug'].upper()}&lang=fr" + qs = f"/ksup/programme?gen=prod&anet={slug_upper}&lang=fr" URL = f"https://www.ulb.be/api/formation?path={quote(qs)}" try: response = requests.get(URL) if not response.ok: if "parent" in progam: - print( - f"[yellow]Skip:[/] [magenta]{progam['slug'].upper()}[/] with bogus parent {progam['parent'].upper()}" + # Utilisation de logger.warning pour les sauts/retries + logger.warning( + "Skip: %s with bogus parent %s. Retrying...", + slug_upper, + progam["parent"].upper(), ) - print("Retry") - qs = f"/ksup/programme?gen=prod&anet={progam['slug'].upper()}&lang=fr" + qs = f"/ksup/programme?gen=prod&anet={slug_upper}&lang=fr" URL = f"https://www.ulb.be/api/formation?path={quote(qs)}" response = requests.get(URL) if not response.ok: - print("Retry failed") + logger.error("Retry failed for %s", slug_upper) continue - else: - print( - f"[red]Error:[/] [magenta]{progam['slug'].upper()}[/] failed with {response.status_code}" + logger.error( + "%s failed with %s. URL: %s", + slug_upper, + response.status_code, + URL, ) - print(" ", URL) continue except Exception: - print(f"[red]Error:[/] Failed to GET {progam['slug'].upper()}") - print(" URL", URL) - progress.console.print_exception() + logger.exception("Failed to GET %s. URL: %s", slug_upper, URL) continue try: @@ -92,8 +92,11 @@ def handle(self, *args: Any, **options: Any) -> None: } except Exception: failed.append(progam["slug"]) - print(f"Error while listing content of {progam['slug']}") - progress.console.print_exception() + logger.exception( + "Error while listing content of %s", progam["slug"] + ) with open("csv/courses.json", "w+") as all_courses_json: json.dump(program_content, all_courses_json, indent=2) + + logger.info("Course content download complete. Saved to csv/courses.json") diff --git a/catalog/management/commands/download_programs.py b/catalog/management/commands/download_programs.py index cae6fd65..af4b4da3 100644 --- a/catalog/management/commands/download_programs.py +++ b/catalog/management/commands/download_programs.py @@ -2,28 +2,29 @@ from typing import Any import json +import logging import re from django.core.management import BaseCommand import requests from bs4 import BeautifulSoup -from rich import print from rich.progress import MofNCompleteColumn, Progress, SpinnerColumn +logger = logging.getLogger(__name__) + class Command(BaseCommand): - help = "" + help = "Download the list of available programs from ULB" PAGE_SIZE = 20 - URL = f"https://www.ulb.be/servlet/search?beanKey=beanKeyRechercheFormation&types=formation&natureFormation=ulb&s=FACULTE_ASC&limit={PAGE_SIZE}" def handle(self, *args: Any, **options: Any) -> None: programs: list[dict] = [] - parent_programs: set[str] = set() - print("[bold blue]Gathering the list of available programs...[/]\n") + + logger.info("Gathering the list of available programs...") with Progress( SpinnerColumn(), @@ -36,9 +37,11 @@ def handle(self, *args: Any, **options: Any) -> None: task1 = progress.add_task( "Listing available programs...", total=result_count ) - progress.console.print( + + logger.info( "Querying ULB a first time to count the number of programs available..." ) + while page < last_page: response = requests.get(self.URL + f"&page={page}") soup = BeautifulSoup(response.content, "html.parser") @@ -53,14 +56,15 @@ def handle(self, *args: Any, **options: Any) -> None: r"a( +)donné( +)(?P\d+)( +)résultats", result_count_text ): result_count = int(match.group("count")) - else: raise Exception( f"Could not parse result count ({result_count_text})" ) + last_page = int(result_count / self.PAGE_SIZE) + 1 - progress.console.print( - f"Found {result_count} programs on {last_page} pages..." + + logger.info( + "Found %s programs on %s pages...", result_count, last_page ) progress.update(task1, total=result_count) @@ -73,7 +77,6 @@ def handle(self, *args: Any, **options: Any) -> None: program_name = mnemonic_span.find_previous( "strong", {"class": "search-result__structure-intitule"} ).text - faculties: list = [] for elem in fac: children = elem.findChildren() @@ -89,7 +92,6 @@ def handle(self, *args: Any, **options: Any) -> None: "name": program_name, "faculty": faculties, } - if option_div := mnemonic_span.find_previous( "div", {"class": "search-result__resultat--fille"} ): @@ -104,18 +106,17 @@ def handle(self, *args: Any, **options: Any) -> None: programs.append(p) else: - progress.console.print( - f"Skipping already seen [magenta]{mnemonic_span.text}" - ) + logger.debug("Skipping already seen %s", mnemonic_span.text) progress.update(task1, completed=self.PAGE_SIZE * page) page += 1 - print( - f"Found {len(parent_programs)} programs containing options, ignoring those..." + logger.info( + "Found %s programs containing options, ignoring those...", + len(parent_programs), ) - print(parent_programs) + logger.debug("Ignored programs: %s", parent_programs) programs = [p for p in programs if p["slug"] not in parent_programs] - print(f"Found {len(programs)} distinct programs, dumping to json...") + logger.info("Found %s distinct programs, dumping to json...", len(programs)) with open("csv/programs.json", "w") as f: json.dump(programs, f, indent=4) diff --git a/www/settings.py b/www/settings.py index 467a9e4a..32fcc569 100644 --- a/www/settings.py +++ b/www/settings.py @@ -223,15 +223,16 @@ "version": 1, "disable_existing_loggers": False, "formatters": { - "simple": { - "format": "{levelname} {message}", - "style": "{", + "rich": { + "format": "%(message)s", }, }, "handlers": { "console": { - "class": "logging.StreamHandler", - "formatter": "simple", + "class": "rich.logging.RichHandler", + "formatter": "rich", + "rich_tracebacks": True, + "show_path": False, }, }, "root": { From 12126967f2684a2e363902a8ceebb72341e26235 Mon Sep 17 00:00:00 2001 From: mnietona Date: Sun, 8 Mar 2026 21:40:10 +0100 Subject: [PATCH 8/9] feat : migrate settings.py to use logging --- www/settings.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/www/settings.py b/www/settings.py index 32fcc569..7dfe3c13 100644 --- a/www/settings.py +++ b/www/settings.py @@ -1,3 +1,4 @@ +import logging from pathlib import Path import environ @@ -6,6 +7,8 @@ # https://django-environ.readthedocs.io/en/latest/tips.html#docker-style-file-based-variables from sentry_sdk.utils import get_default_release +logger = logging.getLogger(__name__) + env = environ.FileAwareEnv() # Set the project base directory @@ -163,8 +166,10 @@ AWS_S3_SECRET_ACCESS_KEY = env("STORAGE_SECRET_KEY") AWS_STORAGE_BUCKET_NAME = env("STORAGE_MEDIA_BUCKET_NAME") elif not DEBUG: - print("Warning: no storage configured but DEBUG=False, using local filesystem.") - print("You DO NOT want this in production!") + logger.warning( + "Warning: no storage configured but DEBUG=False, using local filesystem." + ) + logger.warning("You DO NOT want this in production!") STORAGES = { "default": { From 164927b667e4bdb5579c5853d7545111378b15f9 Mon Sep 17 00:00:00 2001 From: mnietona Date: Sun, 8 Mar 2026 21:42:13 +0100 Subject: [PATCH 9/9] feat : migrate tasks.py to use logging --- documents/tasks.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/documents/tasks.py b/documents/tasks.py index 197f8e66..4336fdcb 100644 --- a/documents/tasks.py +++ b/documents/tasks.py @@ -1,5 +1,6 @@ import contextlib import hashlib +import logging import os import re import subprocess @@ -24,13 +25,15 @@ ) from .thumbnail import get_thumbnail +logger = logging.getLogger(__name__) + def on_failure(self, exc, task_id, args, kwargs, einfo): if isinstance(exc, SkipException): return None doc_id = args[0] - print(f"Document {doc_id} failed.") + logger.error("Document %s failed to process.", doc_id) document = Document.objects.get(id=doc_id) document.state = Document.DocumentState.ERROR