22from typing import Any
33
44import json
5+ import logging
56import re
67
78from django .core .management import BaseCommand
89
910import requests
1011from bs4 import BeautifulSoup
11- from rich import print
1212from rich .progress import MofNCompleteColumn , Progress , SpinnerColumn
1313
14+ logger = logging .getLogger (__name__ )
15+
1416
1517class Command (BaseCommand ):
16- help = ""
18+ help = "Download the list of available programs from ULB "
1719
1820 PAGE_SIZE = 20
19-
2021 URL = f"https://www.ulb.be/servlet/search?beanKey=beanKeyRechercheFormation&types=formation&natureFormation=ulb&s=FACULTE_ASC&limit={ PAGE_SIZE } "
2122
2223 def handle (self , * args : Any , ** options : Any ) -> None :
2324 programs : list [dict ] = []
24-
2525 parent_programs : set [str ] = set ()
26- print ("[bold blue]Gathering the list of available programs...[/]\n " )
26+
27+ logger .info ("Gathering the list of available programs..." )
2728
2829 with Progress (
2930 SpinnerColumn (),
@@ -36,9 +37,11 @@ def handle(self, *args: Any, **options: Any) -> None:
3637 task1 = progress .add_task (
3738 "Listing available programs..." , total = result_count
3839 )
39- progress .console .print (
40+
41+ logger .info (
4042 "Querying ULB a first time to count the number of programs available..."
4143 )
44+
4245 while page < last_page :
4346 response = requests .get (self .URL + f"&page={ page } " )
4447 soup = BeautifulSoup (response .content , "html.parser" )
@@ -53,14 +56,15 @@ def handle(self, *args: Any, **options: Any) -> None:
5356 r"a( +)donné( +)(?P<count>\d+)( +)résultats" , result_count_text
5457 ):
5558 result_count = int (match .group ("count" ))
56-
5759 else :
5860 raise Exception (
5961 f"Could not parse result count ({ result_count_text } )"
6062 )
63+
6164 last_page = int (result_count / self .PAGE_SIZE ) + 1
62- progress .console .print (
63- f"Found { result_count } programs on { last_page } pages..."
65+
66+ logger .info (
67+ "Found %s programs on %s pages..." , result_count , last_page
6468 )
6569 progress .update (task1 , total = result_count )
6670
@@ -73,7 +77,6 @@ def handle(self, *args: Any, **options: Any) -> None:
7377 program_name = mnemonic_span .find_previous (
7478 "strong" , {"class" : "search-result__structure-intitule" }
7579 ).text
76-
7780 faculties : list = []
7881 for elem in fac :
7982 children = elem .findChildren ()
@@ -89,7 +92,6 @@ def handle(self, *args: Any, **options: Any) -> None:
8992 "name" : program_name ,
9093 "faculty" : faculties ,
9194 }
92-
9395 if option_div := mnemonic_span .find_previous (
9496 "div" , {"class" : "search-result__resultat--fille" }
9597 ):
@@ -104,18 +106,17 @@ def handle(self, *args: Any, **options: Any) -> None:
104106
105107 programs .append (p )
106108 else :
107- progress .console .print (
108- f"Skipping already seen [magenta]{ mnemonic_span .text } "
109- )
109+ logger .debug ("Skipping already seen %s" , mnemonic_span .text )
110110 progress .update (task1 , completed = self .PAGE_SIZE * page )
111111 page += 1
112112
113- print (
114- f"Found { len (parent_programs )} programs containing options, ignoring those..."
113+ logger .info (
114+ "Found %s programs containing options, ignoring those..." ,
115+ len (parent_programs ),
115116 )
116- print ( parent_programs )
117+ logger . debug ( "Ignored programs: %s" , parent_programs )
117118 programs = [p for p in programs if p ["slug" ] not in parent_programs ]
118119
119- print ( f "Found { len ( programs ) } distinct programs, dumping to json..." )
120+ logger . info ( "Found %s distinct programs, dumping to json..." , len ( programs ) )
120121 with open ("csv/programs.json" , "w" ) as f :
121122 json .dump (programs , f , indent = 4 )
0 commit comments