@@ -43,6 +43,9 @@ def collect_catalog(self) -> None:
4343 self .backfill_from_year (DEFAULT_START_YEAR )
4444
4545 def backfill_from_year (self , start_year : int ) -> None :
46+ """
47+ Backfill EUVD catalog data starting from the given year(DEFAULT_START_YEAR) up to today. Data is collected month by month and stored in year/month directories as JSON files.
48+ """
4649 today = date .today ()
4750 backfill_start = date (start_year , 1 , 1 )
4851 backfill_end = today
@@ -98,7 +101,7 @@ def sync_yesterday(self) -> None:
98101 {
99102 "fromUpdatedDate" : target_date .isoformat (),
100103 "toUpdatedDate" : target_date .isoformat (),
101- "size" : PAGE_SIZE ,
104+ "size" : 1 ,
102105 "page" : 0 ,
103106 }
104107 )
@@ -109,17 +112,14 @@ def sync_yesterday(self) -> None:
109112 progress = LoopProgress (total_iterations = total_pages , logger = self .log )
110113
111114 for page in progress .iter (range (total_pages )):
112- if page == 0 :
113- data = first_page
114- else :
115- data = self .fetch_page (
116- {
117- "fromUpdatedDate" : target_date .isoformat (),
118- "toUpdatedDate" : target_date .isoformat (),
119- "size" : PAGE_SIZE ,
120- "page" : page ,
121- }
122- )
115+ data = self .fetch_page (
116+ {
117+ "fromUpdatedDate" : target_date .isoformat (),
118+ "toUpdatedDate" : target_date .isoformat (),
119+ "size" : PAGE_SIZE ,
120+ "page" : page ,
121+ }
122+ )
123123
124124 self .write_page_file (
125125 year = target_date .year ,
@@ -145,7 +145,7 @@ def _collect_paginated(
145145 {
146146 "fromUpdatedDate" : start .isoformat (),
147147 "toUpdatedDate" : end .isoformat (),
148- "size" : PAGE_SIZE ,
148+ "size" : 1 ,
149149 "page" : 0 ,
150150 }
151151 )
@@ -154,17 +154,14 @@ def _collect_paginated(
154154 total_pages = (total + PAGE_SIZE - 1 ) // PAGE_SIZE
155155
156156 for page in range (total_pages ):
157- if page == 0 :
158- data = first_page
159- else :
160- data = self .fetch_page (
161- {
162- "fromUpdatedDate" : start .isoformat (),
163- "toUpdatedDate" : end .isoformat (),
164- "size" : PAGE_SIZE ,
165- "page" : page ,
166- }
167- )
157+ data = self .fetch_page (
158+ {
159+ "fromUpdatedDate" : start .isoformat (),
160+ "toUpdatedDate" : end .isoformat (),
161+ "size" : PAGE_SIZE ,
162+ "page" : page ,
163+ }
164+ )
168165
169166 self .write_page_file (
170167 year = year ,
@@ -235,3 +232,4 @@ def log(self, message: str) -> None:
235232 if error_message :
236233 print (error_message )
237234 sys .exit (status_code )
235+
0 commit comments