@@ -80,58 +80,12 @@ def _fix_broken_symlink_based_on_local_index(
8080 )
8181
8282 def _fix_files (self , records : dict ) -> None :
83- # pylint: disable=too-many-branches
84- for record_dict in records .values ():
85- if Fields .FILE not in record_dict :
86- continue
87-
88- if not record_dict [Fields .FILE ].startswith ("data/pdfs/" ):
89- record_dict [Fields .FILE ] = f"data/pdfs/{ record_dict ['ID' ]} .pdf"
90-
91- full_path = self .review_manager .path / Path (record_dict [Fields .FILE ])
92-
93- if full_path .is_file ():
94- continue
95-
96- # Add .pdf extension if missing
97- if Path (str (full_path ) + ".pdf" ).is_file ():
98- shutil .move (str (full_path ) + ".pdf" , str (full_path ))
99-
100- # Check / replace multiple blanks in file and filename
101- try :
102- parent_dir = full_path .parent
103- same_dir_pdfs = [
104- x .relative_to (self .review_manager .path )
105- for x in parent_dir .glob ("*.pdf" )
106- ]
107- for same_dir_pdf in same_dir_pdfs :
108- if record_dict [Fields .FILE ].replace (" " , " " ) == str (
109- same_dir_pdf
110- ).replace (" " , " " ):
111- shutil .move (
112- str (same_dir_pdf ), str (same_dir_pdf ).replace (" " , " " )
113- )
114- record_dict [Fields .FILE ] = record_dict [Fields .FILE ].replace (
115- " " , " "
116- )
117- except ValueError :
118- pass
119-
120- full_path = self .review_manager .path / Path (record_dict [Fields .FILE ])
121-
122- if not full_path .is_file ():
123- record = colrev .record .record .Record (record_dict )
124- self ._fix_broken_symlink_based_on_local_index (
125- record = record , full_path = full_path
126- )
127-
128- if full_path .is_file ():
129- continue
130-
131- record_dict ["colrev_status_backup" ] = record_dict [Fields .STATUS ]
132- del record_dict [Fields .FILE ]
133- record = colrev .record .record .Record (record_dict )
134- record .set_status (RecordState .rev_prescreen_included )
83+ for record_id , record_dict in self ._records_with_file_references (
84+ records = records
85+ ):
86+ self ._fix_record_file_reference (
87+ record_id = record_id , record_dict = record_dict
88+ )
13589
13690 file_search_sources = [
13791 x
@@ -163,6 +117,76 @@ def _fix_files(self, records: dict) -> None:
163117
164118 files_dir_feed .save ()
165119
120+ def _records_with_file_references (self , * , records : dict ) -> list [tuple [str , dict ]]:
121+ return [
122+ (record_id , record_dict )
123+ for record_id , record_dict in records .items ()
124+ if Fields .FILE in record_dict
125+ ]
126+
127+ def _fix_record_file_reference (self , * , record_id : str , record_dict : dict ) -> None :
128+ self ._normalize_expected_pdf_path (record_dict = record_dict , record_id = record_id )
129+ full_path = self ._full_file_path (record_dict = record_dict )
130+
131+ if full_path .is_file ():
132+ return
133+
134+ self ._move_missing_pdf_extension_variant_if_exists (full_path = full_path )
135+ self ._normalize_double_blanks_in_file_path (
136+ record_dict = record_dict , full_path = full_path
137+ )
138+ full_path = self ._full_file_path (record_dict = record_dict )
139+
140+ if not full_path .is_file ():
141+ record = colrev .record .record .Record (record_dict )
142+ self ._fix_broken_symlink_based_on_local_index (
143+ record = record , full_path = full_path
144+ )
145+
146+ if full_path .is_file ():
147+ return
148+
149+ self ._handle_missing_file_reference (record_dict = record_dict )
150+
151+ def _normalize_expected_pdf_path (
152+ self , * , record_dict : dict , record_id : str
153+ ) -> None :
154+ if not record_dict [Fields .FILE ].startswith ("data/pdfs/" ):
155+ record_dict [Fields .FILE ] = f"data/pdfs/{ record_id } .pdf"
156+
157+ def _full_file_path (self , * , record_dict : dict ) -> Path :
158+ return self .review_manager .path / Path (record_dict [Fields .FILE ])
159+
160+ def _move_missing_pdf_extension_variant_if_exists (self , * , full_path : Path ) -> None :
161+ if Path (str (full_path ) + ".pdf" ).is_file ():
162+ shutil .move (str (full_path ) + ".pdf" , str (full_path ))
163+
164+ def _normalize_double_blanks_in_file_path (
165+ self , * , record_dict : dict , full_path : Path
166+ ) -> None :
167+ try :
168+ parent_dir = full_path .parent
169+ same_dir_pdfs = [
170+ x .relative_to (self .review_manager .path )
171+ for x in parent_dir .glob ("*.pdf" )
172+ ]
173+ for same_dir_pdf in same_dir_pdfs :
174+ if record_dict [Fields .FILE ].replace (" " , " " ) == str (
175+ same_dir_pdf
176+ ).replace (" " , " " ):
177+ shutil .move (str (same_dir_pdf ), str (same_dir_pdf ).replace (" " , " " ))
178+ record_dict [Fields .FILE ] = record_dict [Fields .FILE ].replace (
179+ " " , " "
180+ )
181+ except ValueError :
182+ pass
183+
184+ def _handle_missing_file_reference (self , * , record_dict : dict ) -> None :
185+ record_dict ["colrev_status_backup" ] = record_dict [Fields .STATUS ]
186+ del record_dict [Fields .FILE ]
187+ record = colrev .record .record .Record (record_dict )
188+ record .set_status (RecordState .rev_prescreen_included )
189+
166190 def _get_source_feeds (self ) -> dict :
167191 source_feeds = {}
168192 for source in self .review_manager .settings .sources :
0 commit comments