Skip to content

Commit cbbd1a4

Browse files
committed
refactor: repare
1 parent 12f1599 commit cbbd1a4

1 file changed

Lines changed: 76 additions & 52 deletions

File tree

colrev/ops/repare.py

Lines changed: 76 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -80,58 +80,12 @@ def _fix_broken_symlink_based_on_local_index(
8080
)
8181

8282
def _fix_files(self, records: dict) -> None:
83-
# pylint: disable=too-many-branches
84-
for record_dict in records.values():
85-
if Fields.FILE not in record_dict:
86-
continue
87-
88-
if not record_dict[Fields.FILE].startswith("data/pdfs/"):
89-
record_dict[Fields.FILE] = f"data/pdfs/{record_dict['ID']}.pdf"
90-
91-
full_path = self.review_manager.path / Path(record_dict[Fields.FILE])
92-
93-
if full_path.is_file():
94-
continue
95-
96-
# Add .pdf extension if missing
97-
if Path(str(full_path) + ".pdf").is_file():
98-
shutil.move(str(full_path) + ".pdf", str(full_path))
99-
100-
# Check / replace multiple blanks in file and filename
101-
try:
102-
parent_dir = full_path.parent
103-
same_dir_pdfs = [
104-
x.relative_to(self.review_manager.path)
105-
for x in parent_dir.glob("*.pdf")
106-
]
107-
for same_dir_pdf in same_dir_pdfs:
108-
if record_dict[Fields.FILE].replace(" ", " ") == str(
109-
same_dir_pdf
110-
).replace(" ", " "):
111-
shutil.move(
112-
str(same_dir_pdf), str(same_dir_pdf).replace(" ", " ")
113-
)
114-
record_dict[Fields.FILE] = record_dict[Fields.FILE].replace(
115-
" ", " "
116-
)
117-
except ValueError:
118-
pass
119-
120-
full_path = self.review_manager.path / Path(record_dict[Fields.FILE])
121-
122-
if not full_path.is_file():
123-
record = colrev.record.record.Record(record_dict)
124-
self._fix_broken_symlink_based_on_local_index(
125-
record=record, full_path=full_path
126-
)
127-
128-
if full_path.is_file():
129-
continue
130-
131-
record_dict["colrev_status_backup"] = record_dict[Fields.STATUS]
132-
del record_dict[Fields.FILE]
133-
record = colrev.record.record.Record(record_dict)
134-
record.set_status(RecordState.rev_prescreen_included)
83+
for record_id, record_dict in self._records_with_file_references(
84+
records=records
85+
):
86+
self._fix_record_file_reference(
87+
record_id=record_id, record_dict=record_dict
88+
)
13589

13690
file_search_sources = [
13791
x
@@ -163,6 +117,76 @@ def _fix_files(self, records: dict) -> None:
163117

164118
files_dir_feed.save()
165119

120+
def _records_with_file_references(self, *, records: dict) -> list[tuple[str, dict]]:
121+
return [
122+
(record_id, record_dict)
123+
for record_id, record_dict in records.items()
124+
if Fields.FILE in record_dict
125+
]
126+
127+
def _fix_record_file_reference(self, *, record_id: str, record_dict: dict) -> None:
128+
self._normalize_expected_pdf_path(record_dict=record_dict, record_id=record_id)
129+
full_path = self._full_file_path(record_dict=record_dict)
130+
131+
if full_path.is_file():
132+
return
133+
134+
self._move_missing_pdf_extension_variant_if_exists(full_path=full_path)
135+
self._normalize_double_blanks_in_file_path(
136+
record_dict=record_dict, full_path=full_path
137+
)
138+
full_path = self._full_file_path(record_dict=record_dict)
139+
140+
if not full_path.is_file():
141+
record = colrev.record.record.Record(record_dict)
142+
self._fix_broken_symlink_based_on_local_index(
143+
record=record, full_path=full_path
144+
)
145+
146+
if full_path.is_file():
147+
return
148+
149+
self._handle_missing_file_reference(record_dict=record_dict)
150+
151+
def _normalize_expected_pdf_path(
152+
self, *, record_dict: dict, record_id: str
153+
) -> None:
154+
if not record_dict[Fields.FILE].startswith("data/pdfs/"):
155+
record_dict[Fields.FILE] = f"data/pdfs/{record_id}.pdf"
156+
157+
def _full_file_path(self, *, record_dict: dict) -> Path:
158+
return self.review_manager.path / Path(record_dict[Fields.FILE])
159+
160+
def _move_missing_pdf_extension_variant_if_exists(self, *, full_path: Path) -> None:
161+
if Path(str(full_path) + ".pdf").is_file():
162+
shutil.move(str(full_path) + ".pdf", str(full_path))
163+
164+
def _normalize_double_blanks_in_file_path(
165+
self, *, record_dict: dict, full_path: Path
166+
) -> None:
167+
try:
168+
parent_dir = full_path.parent
169+
same_dir_pdfs = [
170+
x.relative_to(self.review_manager.path)
171+
for x in parent_dir.glob("*.pdf")
172+
]
173+
for same_dir_pdf in same_dir_pdfs:
174+
if record_dict[Fields.FILE].replace(" ", " ") == str(
175+
same_dir_pdf
176+
).replace(" ", " "):
177+
shutil.move(str(same_dir_pdf), str(same_dir_pdf).replace(" ", " "))
178+
record_dict[Fields.FILE] = record_dict[Fields.FILE].replace(
179+
" ", " "
180+
)
181+
except ValueError:
182+
pass
183+
184+
def _handle_missing_file_reference(self, *, record_dict: dict) -> None:
185+
record_dict["colrev_status_backup"] = record_dict[Fields.STATUS]
186+
del record_dict[Fields.FILE]
187+
record = colrev.record.record.Record(record_dict)
188+
record.set_status(RecordState.rev_prescreen_included)
189+
166190
def _get_source_feeds(self) -> dict:
167191
source_feeds = {}
168192
for source in self.review_manager.settings.sources:

0 commit comments

Comments
 (0)