Skip to content

Commit bc4a073

Browse files
committed
package: abi_inform refactor load_ris
1 parent dfff5c0 commit bc4a073

1 file changed

Lines changed: 134 additions & 113 deletions

File tree

colrev/packages/abi_inform_proquest/src/abi_inform_proquest.py

Lines changed: 134 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -133,125 +133,146 @@ def prep_link_md(
133133
return record
134134

135135
@classmethod
136-
def _load_ris(cls, filename: Path, logger: logging.Logger) -> dict:
136+
def _ris_id_labeler(cls, records: list) -> None:
137+
for record_dict in records:
138+
record_dict[Fields.ID] = record_dict["AN"]
137139

138-
def id_labeler(records: list) -> None:
139-
for record_dict in records:
140-
record_dict[Fields.ID] = record_dict["AN"]
141-
142-
def entrytype_setter(record_dict: dict) -> None:
143-
if record_dict["TY"] == "JOUR":
144-
record_dict[Fields.ENTRYTYPE] = ENTRYTYPES.ARTICLE
145-
elif record_dict["TY"] == "BOOK":
146-
record_dict[Fields.ENTRYTYPE] = ENTRYTYPES.BOOK
147-
elif record_dict["TY"] == "THES":
148-
record_dict[Fields.ENTRYTYPE] = ENTRYTYPES.PHDTHESIS
149-
else:
150-
record_dict[Fields.ENTRYTYPE] = ENTRYTYPES.MISC
151-
152-
def field_mapper(record_dict: dict) -> None:
153-
154-
key_maps = {
155-
ENTRYTYPES.ARTICLE: {
156-
"PY": Fields.YEAR,
157-
"AU": Fields.AUTHOR,
158-
"TI": Fields.TITLE,
159-
"JF": Fields.JOURNAL,
160-
"AB": Fields.ABSTRACT,
161-
"VL": Fields.VOLUME,
162-
"IS": Fields.NUMBER,
163-
"KW": Fields.KEYWORDS,
164-
"DO": Fields.DOI,
165-
"PB": Fields.PUBLISHER,
166-
"SP": Fields.PAGES,
167-
"PMID": Fields.PUBMED_ID,
168-
"SN": Fields.ISSN,
169-
"AN": f"{cls.endpoint}.accession_number",
170-
"LA": Fields.LANGUAGE,
171-
"L2": Fields.FULLTEXT,
172-
"UR": Fields.URL,
173-
},
174-
ENTRYTYPES.PHDTHESIS: {
175-
"PY": Fields.YEAR,
176-
"AU": Fields.AUTHOR,
177-
"T1": Fields.TITLE,
178-
"UR": Fields.URL,
179-
"PB": Fields.SCHOOL,
180-
"KW": Fields.KEYWORDS,
181-
"AN": f"{cls.endpoint}.accession_number",
182-
"AB": Fields.ABSTRACT,
183-
"LA": Fields.LANGUAGE,
184-
"CY": Fields.ADDRESS,
185-
"L2": Fields.FULLTEXT,
186-
"A3": f"{cls.endpoint}.supervisor",
187-
},
188-
}
189-
190-
if record_dict[Fields.ENTRYTYPE] == ENTRYTYPES.ARTICLE:
191-
if "T1" in record_dict and "TI" not in record_dict:
192-
record_dict["TI"] = record_dict.pop("T1")
193-
194-
key_map = key_maps[record_dict[Fields.ENTRYTYPE]]
195-
for ris_key in list(record_dict.keys()):
196-
if ris_key in key_map:
197-
standard_key = key_map[ris_key]
198-
record_dict[standard_key] = record_dict.pop(ris_key)
199-
200-
if "SP" in record_dict and "EP" in record_dict:
201-
record_dict[Fields.PAGES] = (
202-
f"{record_dict.pop('SP')}--{record_dict.pop('EP')}"
203-
)
140+
@classmethod
141+
def _ris_entrytype_setter(cls, record_dict: dict) -> None:
142+
if record_dict["TY"] == "JOUR":
143+
record_dict[Fields.ENTRYTYPE] = ENTRYTYPES.ARTICLE
144+
elif record_dict["TY"] == "BOOK":
145+
record_dict[Fields.ENTRYTYPE] = ENTRYTYPES.BOOK
146+
elif record_dict["TY"] == "THES":
147+
record_dict[Fields.ENTRYTYPE] = ENTRYTYPES.PHDTHESIS
148+
else:
149+
record_dict[Fields.ENTRYTYPE] = ENTRYTYPES.MISC
150+
151+
@classmethod
152+
def _get_ris_key_maps(cls) -> dict:
153+
return {
154+
ENTRYTYPES.ARTICLE: {
155+
"PY": Fields.YEAR,
156+
"AU": Fields.AUTHOR,
157+
"TI": Fields.TITLE,
158+
"JF": Fields.JOURNAL,
159+
"AB": Fields.ABSTRACT,
160+
"VL": Fields.VOLUME,
161+
"IS": Fields.NUMBER,
162+
"KW": Fields.KEYWORDS,
163+
"DO": Fields.DOI,
164+
"PB": Fields.PUBLISHER,
165+
"SP": Fields.PAGES,
166+
"PMID": Fields.PUBMED_ID,
167+
"SN": Fields.ISSN,
168+
"AN": f"{cls.endpoint}.accession_number",
169+
"LA": Fields.LANGUAGE,
170+
"L2": Fields.FULLTEXT,
171+
"UR": Fields.URL,
172+
},
173+
ENTRYTYPES.PHDTHESIS: {
174+
"PY": Fields.YEAR,
175+
"AU": Fields.AUTHOR,
176+
"T1": Fields.TITLE,
177+
"UR": Fields.URL,
178+
"PB": Fields.SCHOOL,
179+
"KW": Fields.KEYWORDS,
180+
"AN": f"{cls.endpoint}.accession_number",
181+
"AB": Fields.ABSTRACT,
182+
"LA": Fields.LANGUAGE,
183+
"CY": Fields.ADDRESS,
184+
"L2": Fields.FULLTEXT,
185+
"A3": f"{cls.endpoint}.supervisor",
186+
},
187+
}
188+
189+
@classmethod
190+
def _map_ris_fields(cls, record_dict: dict) -> None:
191+
if record_dict[Fields.ENTRYTYPE] == ENTRYTYPES.ARTICLE:
192+
if "T1" in record_dict and "TI" not in record_dict:
193+
record_dict["TI"] = record_dict.pop("T1")
194+
195+
key_maps = cls._get_ris_key_maps()
196+
key_map = key_maps.get(record_dict[Fields.ENTRYTYPE], {})
197+
198+
for ris_key in list(record_dict.keys()):
199+
if ris_key in key_map:
200+
standard_key = key_map[ris_key]
201+
record_dict[standard_key] = record_dict.pop(ris_key)
202+
203+
@classmethod
204+
def _merge_ris_pages(cls, record_dict: dict) -> None:
205+
if "SP" in record_dict and "EP" in record_dict:
206+
record_dict[Fields.PAGES] = (
207+
f"{record_dict.pop('SP')}--{record_dict.pop('EP')}"
208+
)
209+
210+
@classmethod
211+
def _stringify_ris_list_fields(cls, record_dict: dict) -> None:
212+
if Fields.AUTHOR in record_dict and isinstance(
213+
record_dict[Fields.AUTHOR], list
214+
):
215+
record_dict[Fields.AUTHOR] = " and ".join(record_dict[Fields.AUTHOR])
216+
217+
if Fields.EDITOR in record_dict and isinstance(
218+
record_dict[Fields.EDITOR], list
219+
):
220+
record_dict[Fields.EDITOR] = " and ".join(record_dict[Fields.EDITOR])
221+
222+
if Fields.KEYWORDS in record_dict and isinstance(
223+
record_dict[Fields.KEYWORDS], list
224+
):
225+
record_dict[Fields.KEYWORDS] = ", ".join(record_dict[Fields.KEYWORDS])
226+
227+
@classmethod
228+
def _remove_unmapped_ris_keys(cls, record_dict: dict) -> None:
229+
keys_to_remove = [
230+
"TY",
231+
"Y2",
232+
"DB",
233+
"C1",
234+
"T3",
235+
"DA",
236+
"JF",
237+
"L1",
238+
"SP",
239+
"Y1",
240+
"M1",
241+
"M3",
242+
"N1",
243+
"PP",
244+
"CY",
245+
"SN",
246+
"ER",
247+
"AN",
248+
]
249+
250+
for key in keys_to_remove:
251+
record_dict.pop(key, None)
204252

205-
if Fields.AUTHOR in record_dict and isinstance(
206-
record_dict[Fields.AUTHOR], list
207-
):
208-
record_dict[Fields.AUTHOR] = " and ".join(record_dict[Fields.AUTHOR])
209-
if Fields.EDITOR in record_dict and isinstance(
210-
record_dict[Fields.EDITOR], list
211-
):
212-
record_dict[Fields.EDITOR] = " and ".join(record_dict[Fields.EDITOR])
213-
if Fields.KEYWORDS in record_dict and isinstance(
214-
record_dict[Fields.KEYWORDS], list
215-
):
216-
record_dict[Fields.KEYWORDS] = ", ".join(record_dict[Fields.KEYWORDS])
217-
218-
keys_to_remove = [
219-
"TY",
220-
"Y2",
221-
"DB",
222-
"C1",
223-
"T3",
224-
"DA",
225-
"JF",
226-
"L1",
227-
"SP",
228-
"Y1",
229-
"M1",
230-
"M3",
231-
"N1",
232-
"PP",
233-
"CY",
234-
"SN",
235-
"ER",
236-
"AN",
237-
]
238-
239-
for key in keys_to_remove:
240-
record_dict.pop(key, None)
241-
242-
for key, value in record_dict.items():
243-
record_dict[key] = str(value)
244-
245-
records = colrev.loader.load_utils.load(
253+
@classmethod
254+
def _stringify_ris_values(cls, record_dict: dict) -> None:
255+
for key, value in record_dict.items():
256+
record_dict[key] = str(value)
257+
258+
@classmethod
259+
def _ris_field_mapper(cls, record_dict: dict) -> None:
260+
cls._map_ris_fields(record_dict)
261+
cls._merge_ris_pages(record_dict)
262+
cls._stringify_ris_list_fields(record_dict)
263+
cls._remove_unmapped_ris_keys(record_dict)
264+
cls._stringify_ris_values(record_dict)
265+
266+
@classmethod
267+
def _load_ris(cls, filename: Path, logger: logging.Logger) -> dict:
268+
return colrev.loader.load_utils.load(
246269
filename=filename,
247-
id_labeler=id_labeler,
248-
entrytype_setter=entrytype_setter,
249-
field_mapper=field_mapper,
270+
id_labeler=cls._ris_id_labeler,
271+
entrytype_setter=cls._ris_entrytype_setter,
272+
field_mapper=cls._ris_field_mapper,
250273
logger=logger,
251274
)
252275

253-
return records
254-
255276
def load(self) -> dict:
256277
"""Load the records from the SearchSource file."""
257278
if self.search_source.search_results_path.suffix == ".bib":

0 commit comments

Comments
 (0)