Skip to content

Commit ff49125

Browse files
authored
Remove all calls to print() and use rich to pretty format logs (#363)
1 parent 4d47197 commit ff49125

10 files changed

Lines changed: 129 additions & 74 deletions

File tree

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,22 @@
11
from typing import Any
22

3+
import logging
4+
35
from django.core.management.base import BaseCommand
46
from django.db.models import Count
57

68
from catalog.models import Course
79

10+
logger = logging.getLogger(__name__)
11+
812

913
class Command(BaseCommand):
1014
def handle(self, *args: Any, **options: Any) -> None:
11-
print("Cleaning archives")
15+
logger.info("Cleaning archives")
1216
empty_archived_courses = (
1317
Course.objects.filter(is_archive=True)
1418
.annotate(num_doc=Count("document"))
1519
.filter(num_doc=0)
1620
)
17-
print("Deleting %s empty courses" % len(empty_archived_courses))
21+
logger.info("Deleting %s empty courses", len(empty_archived_courses))
1822
empty_archived_courses.delete()

catalog/management/commands/crawl_uv.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
import csv
2+
import logging
23

34
from django.core.management.base import BaseCommand
45

56
import requests
67
from bs4 import BeautifulSoup
78

9+
logger = logging.getLogger(__name__)
10+
811

912
class Command(BaseCommand):
1013
def handle(self, *args, **options):
@@ -17,17 +20,17 @@ def handle(self, *args, **options):
1720

1821
courses = []
1922
fails = []
20-
print(f"Found {len(options)} options")
23+
logger.info("Found %s options", len(options)) # debug car verbeux
2124
for option in options:
22-
print(f"..{option.text}")
25+
logger.debug("..%s", option.text)
2326
value = option["value"]
2427
response = requests.get(
2528
f"https://uv.ulb.ac.be/course/index.php?categoryid={value}&browse=courses&perpage=1000&page=0"
2629
)
2730
soup = BeautifulSoup(response.content, "html.parser")
2831

2932
course_divs = soup.find_all("div", {"class": "coursebox"})
30-
print(f"Found {len(courses)} in {option.text}")
33+
logger.info("Found %s in %s", len(courses), option.text)
3134

3235
for course in course_divs:
3336
try:
@@ -37,7 +40,7 @@ def handle(self, *args, **options):
3740
except: # noqa
3841
fails.append(course.text)
3942

40-
print(f"Found {len(courses)} and failed to parse {len(fails)}")
43+
logger.info("Found %s and failed to parse %s", len(courses), len(fails))
4144
with open("csv/uv_courses.csv", "w") as fd:
4245
writer = csv.writer(fd)
4346
for course in courses:
Lines changed: 29 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,76 +1,76 @@
11
from typing import Any
22

33
import json
4+
import logging
45
from urllib.parse import quote
56

67
from django.core.management import BaseCommand
78

89
import requests
9-
from rich import print
1010
from rich.progress import MofNCompleteColumn, Progress, SpinnerColumn
1111

12+
logger = logging.getLogger(__name__)
13+
1214

1315
class Command(BaseCommand):
14-
help = ""
16+
help = "Download course contents for all programs from ULB API"
1517

1618
def handle(self, *args: Any, **options: Any) -> None:
1719
with open("csv/programs.json") as f:
1820
programs: list[dict] = json.load(f)
19-
print("\n[bold blue]Listing the course content of all programs...[/]\n")
21+
logger.info("Listing the course content of all programs...")
2022

2123
failed: list = []
2224
program_content: dict[str, dict[str, dict]] = {}
23-
24-
# programs = [p for p in programs if p["slug"] in ["BA-GEOG"]]
25-
2625
with Progress(
2726
SpinnerColumn(),
2827
*Progress.get_default_columns(),
2928
MofNCompleteColumn(),
3029
) as progress:
31-
task1 = progress.add_task(
32-
"Listing the course content of all programs...", total=len(programs)
33-
)
30+
task1 = progress.add_task("Processing programs...", total=len(programs))
3431

3532
for progam in programs:
33+
slug_upper = progam["slug"].upper()
3634
progress.update(
3735
task1,
3836
advance=1,
39-
description=f"Listing the course content of {progam['slug'].upper()}...",
37+
description=f"Listing content of {slug_upper}...",
4038
)
39+
4140
if "parent" in progam:
42-
qs = f"/ksup/programme?gen=prod&anet={progam['parent'].upper()}&option={progam['slug'].upper()}&lang=fr"
41+
qs = f"/ksup/programme?gen=prod&anet={progam['parent'].upper()}&option={slug_upper}&lang=fr"
4342
else:
44-
qs = f"/ksup/programme?gen=prod&anet={progam['slug'].upper()}&lang=fr"
43+
qs = f"/ksup/programme?gen=prod&anet={slug_upper}&lang=fr"
4544

4645
URL = f"https://www.ulb.be/api/formation?path={quote(qs)}"
4746
try:
4847
response = requests.get(URL)
4948
if not response.ok:
5049
if "parent" in progam:
51-
print(
52-
f"[yellow]Skip:[/] [magenta]{progam['slug'].upper()}[/] with bogus parent {progam['parent'].upper()}"
50+
# Utilisation de logger.warning pour les sauts/retries
51+
logger.warning(
52+
"Skip: %s with bogus parent %s. Retrying...",
53+
slug_upper,
54+
progam["parent"].upper(),
5355
)
5456

55-
print("Retry")
56-
qs = f"/ksup/programme?gen=prod&anet={progam['slug'].upper()}&lang=fr"
57+
qs = f"/ksup/programme?gen=prod&anet={slug_upper}&lang=fr"
5758
URL = f"https://www.ulb.be/api/formation?path={quote(qs)}"
5859
response = requests.get(URL)
5960
if not response.ok:
60-
print("Retry failed")
61+
logger.error("Retry failed for %s", slug_upper)
6162
continue
62-
6363
else:
64-
print(
65-
f"[red]Error:[/] [magenta]{progam['slug'].upper()}[/] failed with {response.status_code}"
64+
logger.error(
65+
"%s failed with %s. URL: %s",
66+
slug_upper,
67+
response.status_code,
68+
URL,
6669
)
67-
print(" ", URL)
6870
continue
6971

7072
except Exception:
71-
print(f"[red]Error:[/] Failed to GET {progam['slug'].upper()}")
72-
print(" URL", URL)
73-
progress.console.print_exception()
73+
logger.exception("Failed to GET %s. URL: %s", slug_upper, URL)
7474
continue
7575

7676
try:
@@ -92,8 +92,11 @@ def handle(self, *args: Any, **options: Any) -> None:
9292
}
9393
except Exception:
9494
failed.append(progam["slug"])
95-
print(f"Error while listing content of {progam['slug']}")
96-
progress.console.print_exception()
95+
logger.exception(
96+
"Error while listing content of %s", progam["slug"]
97+
)
9798

9899
with open("csv/courses.json", "w+") as all_courses_json:
99100
json.dump(program_content, all_courses_json, indent=2)
101+
102+
logger.info("Course content download complete. Saved to csv/courses.json")

catalog/management/commands/download_programs.py

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,28 +2,29 @@
22
from typing import Any
33

44
import json
5+
import logging
56
import re
67

78
from django.core.management import BaseCommand
89

910
import requests
1011
from bs4 import BeautifulSoup
11-
from rich import print
1212
from rich.progress import MofNCompleteColumn, Progress, SpinnerColumn
1313

14+
logger = logging.getLogger(__name__)
15+
1416

1517
class Command(BaseCommand):
16-
help = ""
18+
help = "Download the list of available programs from ULB"
1719

1820
PAGE_SIZE = 20
19-
2021
URL = f"https://www.ulb.be/servlet/search?beanKey=beanKeyRechercheFormation&types=formation&natureFormation=ulb&s=FACULTE_ASC&limit={PAGE_SIZE}"
2122

2223
def handle(self, *args: Any, **options: Any) -> None:
2324
programs: list[dict] = []
24-
2525
parent_programs: set[str] = set()
26-
print("[bold blue]Gathering the list of available programs...[/]\n")
26+
27+
logger.info("Gathering the list of available programs...")
2728

2829
with Progress(
2930
SpinnerColumn(),
@@ -36,9 +37,11 @@ def handle(self, *args: Any, **options: Any) -> None:
3637
task1 = progress.add_task(
3738
"Listing available programs...", total=result_count
3839
)
39-
progress.console.print(
40+
41+
logger.info(
4042
"Querying ULB a first time to count the number of programs available..."
4143
)
44+
4245
while page < last_page:
4346
response = requests.get(self.URL + f"&page={page}")
4447
soup = BeautifulSoup(response.content, "html.parser")
@@ -53,14 +56,15 @@ def handle(self, *args: Any, **options: Any) -> None:
5356
r"a( +)donné( +)(?P<count>\d+)( +)résultats", result_count_text
5457
):
5558
result_count = int(match.group("count"))
56-
5759
else:
5860
raise Exception(
5961
f"Could not parse result count ({result_count_text})"
6062
)
63+
6164
last_page = int(result_count / self.PAGE_SIZE) + 1
62-
progress.console.print(
63-
f"Found {result_count} programs on {last_page} pages..."
65+
66+
logger.info(
67+
"Found %s programs on %s pages...", result_count, last_page
6468
)
6569
progress.update(task1, total=result_count)
6670

@@ -73,7 +77,6 @@ def handle(self, *args: Any, **options: Any) -> None:
7377
program_name = mnemonic_span.find_previous(
7478
"strong", {"class": "search-result__structure-intitule"}
7579
).text
76-
7780
faculties: list = []
7881
for elem in fac:
7982
children = elem.findChildren()
@@ -89,7 +92,6 @@ def handle(self, *args: Any, **options: Any) -> None:
8992
"name": program_name,
9093
"faculty": faculties,
9194
}
92-
9395
if option_div := mnemonic_span.find_previous(
9496
"div", {"class": "search-result__resultat--fille"}
9597
):
@@ -104,18 +106,17 @@ def handle(self, *args: Any, **options: Any) -> None:
104106

105107
programs.append(p)
106108
else:
107-
progress.console.print(
108-
f"Skipping already seen [magenta]{mnemonic_span.text}"
109-
)
109+
logger.debug("Skipping already seen %s", mnemonic_span.text)
110110
progress.update(task1, completed=self.PAGE_SIZE * page)
111111
page += 1
112112

113-
print(
114-
f"Found {len(parent_programs)} programs containing options, ignoring those..."
113+
logger.info(
114+
"Found %s programs containing options, ignoring those...",
115+
len(parent_programs),
115116
)
116-
print(parent_programs)
117+
logger.debug("Ignored programs: %s", parent_programs)
117118
programs = [p for p in programs if p["slug"] not in parent_programs]
118119

119-
print(f"Found {len(programs)} distinct programs, dumping to json...")
120+
logger.info("Found %s distinct programs, dumping to json...", len(programs))
120121
with open("csv/programs.json", "w") as f:
121122
json.dump(programs, f, indent=4)

catalog/management/commands/find_orphans.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
import csv
22
import json
3+
import logging
34

45
from django.core.management.base import BaseCommand
56
from django.db.models import Count
67

78
from catalog.models import Course
89
from catalog.slug import normalize_slug
910

11+
logger = logging.getLogger(__name__)
12+
1013

1114
class Command(BaseCommand):
1215
def handle(self, *args, **options):
@@ -26,10 +29,11 @@ def handle(self, *args, **options):
2629
empty_orphans = orphans.filter(num_docs=0)
2730

2831
orphans_to_fix = orphans.exclude(num_docs=0)
29-
print(
30-
f"{empty_orphans.count()} empty orphans and {orphans_to_fix.count()} orphans with documents"
32+
logger.info(
33+
"%s empty orphans and %s orphans with documents",
34+
empty_orphans.count(),
35+
orphans_to_fix.count(),
3136
)
32-
3337
with open("csv/orphans.csv", "w") as fd:
3438
writer = csv.writer(fd)
3539
for course in orphans_to_fix:

catalog/management/commands/load_courses.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
import json
2+
import logging
23

34
from django.core.management.base import BaseCommand
45
from django.db import transaction
56

67
from catalog.models import Category, Course
78
from catalog.slug import normalize_slug
89

10+
logger = logging.getLogger(__name__)
11+
912

1013
def get_category(slug, name=None, parent=None, type=None):
1114
cat, created = Category.objects.get_or_create(
@@ -22,13 +25,13 @@ def handle(self, *args, **options):
2225
programs = json.load(f)
2326

2427
with transaction.atomic():
25-
print("Temporarily set all courses as archived")
28+
logger.info("Temporarily set all courses as archived")
2629
for course in Course.objects.all():
2730
course.is_archive = True
2831
course.save()
2932

3033
for program_slug, courses in programs.items():
31-
print(f"Inserting {len(courses)} courses from {program_slug}")
34+
logger.info("Inserting %s courses from %s", len(courses), program_slug)
3235
category = get_category(program_slug)
3336
for course in courses.values():
3437
bloc = course["bloc"]

0 commit comments

Comments
 (0)