Skip to content

Commit 42d5290

Browse files
authored
Merge pull request #1106 from Parallel-in-Time/copilot/fix-ci-run
Handle transient DOI lookup failures in arXiv correction workflow
2 parents 6b764f8 + aa5587d commit 42d5290

2 files changed

Lines changed: 38 additions & 12 deletions

File tree

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ build/
4848
### Linux ###
4949
*~
5050

51+
### Python ###
52+
__pycache__/
53+
*.py[cod]
54+
5155
# KDE directory preferences
5256
.directory
5357

bin/arxiv_to_publications_correct.py

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,17 @@
55

66
import bibtexparser
77
from bibtexparser.bwriter import BibTexWriter
8+
from requests.exceptions import RequestException
9+
10+
11+
def fetch_doi_content(url, accept_header, description):
12+
try:
13+
response = requests.get(url, headers={'Accept': accept_header}, timeout=30)
14+
response.raise_for_status()
15+
except RequestException as exc:
16+
print(f'Ignoring {url}, failed to fetch {description}: {exc}\n\n')
17+
return None
18+
return response
819

920

1021
if __name__ == '__main__':
@@ -21,25 +32,36 @@
2132

2233
for url, id_db in zip(doi_list, id_list):
2334
print(f'Working on {id_db} with URL {url}')
24-
req = requests.get(url, headers={'Accept': 'application/x-bibtex'})
25-
if not req.status_code == 200:
26-
print(f'Ignoring {url}, got status code {req.status_code}\n\n')
35+
req = fetch_doi_content(url, 'application/x-bibtex', 'BibTeX')
36+
if req is None:
2737
continue
2838
bib = req.content.decode()
29-
req = requests.get(url, headers={'Accept': 'application/json'})
30-
if not req.status_code == 200:
31-
print(f'Ignoring {url}, got status code {req.status_code}\n\n')
39+
req = fetch_doi_content(url, 'application/json', 'metadata')
40+
if req is None:
41+
continue
42+
try:
43+
data = req.json()
44+
except ValueError as exc:
45+
print(f'Ignoring {url}, invalid metadata response: {exc}\n\n')
3246
continue
33-
data = req.json()
3447

35-
if len(data['author']) > 1:
36-
id = data['author'][0]['family'] + 'EtAl' + str(data['issued']['date-parts'][0][0])
37-
else:
38-
id = data['author'][0]['family'] + str(data['issued']['date-parts'][0][0])
48+
try:
49+
if len(data['author']) > 1:
50+
id = data['author'][0]['family'] + 'EtAl' + str(data['issued']['date-parts'][0][0])
51+
else:
52+
id = data['author'][0]['family'] + str(data['issued']['date-parts'][0][0])
53+
except (KeyError, IndexError, TypeError) as exc:
54+
print(f'Ignoring {url}, incomplete metadata response: {exc}\n\n')
55+
continue
3956
id = id.replace(" ", "_")
4057

4158
entries = db.get_entry_dict()
42-
assert entries[id_db]["ENTRYTYPE"] == 'unpublished', "original entry in bib file was NOT unpublished !"
59+
if id_db not in entries:
60+
print(f'Ignoring {id_db}, entry not found in bibliography.\n\n')
61+
continue
62+
if entries[id_db]["ENTRYTYPE"] != 'unpublished':
63+
print(f'Ignoring {id_db}, original entry in bib file was not unpublished.\n\n')
64+
continue
4365
db.entries.remove(entries[id_db])
4466

4567
# Check for duplicate keys in the remaining database and add letter suffixes if needed

0 commit comments

Comments
 (0)