Skip to content

Commit db8736a

Browse files
committed
deploy: 42d5290
1 parent ca72a65 commit db8736a

2 files changed

Lines changed: 39 additions & 17 deletions

File tree

bin/arxiv_to_publications_correct.py

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,17 @@
55

66
import bibtexparser
77
from bibtexparser.bwriter import BibTexWriter
8+
from requests.exceptions import RequestException
9+
10+
11+
def fetch_doi_content(url, accept_header, description):
12+
try:
13+
response = requests.get(url, headers={'Accept': accept_header}, timeout=30)
14+
response.raise_for_status()
15+
except RequestException as exc:
16+
print(f'Ignoring {url}, failed to fetch {description}: {exc}\n\n')
17+
return None
18+
return response
819

920

1021
if __name__ == '__main__':
@@ -21,25 +32,36 @@
2132

2233
for url, id_db in zip(doi_list, id_list):
2334
print(f'Working on {id_db} with URL {url}')
24-
req = requests.get(url, headers={'Accept': 'application/x-bibtex'})
25-
if not req.status_code == 200:
26-
print(f'Ignoring {url}, got status code {req.status_code}\n\n')
35+
req = fetch_doi_content(url, 'application/x-bibtex', 'BibTeX')
36+
if req is None:
2737
continue
2838
bib = req.content.decode()
29-
req = requests.get(url, headers={'Accept': 'application/json'})
30-
if not req.status_code == 200:
31-
print(f'Ignoring {url}, got status code {req.status_code}\n\n')
39+
req = fetch_doi_content(url, 'application/json', 'metadata')
40+
if req is None:
41+
continue
42+
try:
43+
data = req.json()
44+
except ValueError as exc:
45+
print(f'Ignoring {url}, invalid metadata response: {exc}\n\n')
3246
continue
33-
data = req.json()
3447

35-
if len(data['author']) > 1:
36-
id = data['author'][0]['family'] + 'EtAl' + str(data['issued']['date-parts'][0][0])
37-
else:
38-
id = data['author'][0]['family'] + str(data['issued']['date-parts'][0][0])
48+
try:
49+
if len(data['author']) > 1:
50+
id = data['author'][0]['family'] + 'EtAl' + str(data['issued']['date-parts'][0][0])
51+
else:
52+
id = data['author'][0]['family'] + str(data['issued']['date-parts'][0][0])
53+
except (KeyError, IndexError, TypeError) as exc:
54+
print(f'Ignoring {url}, incomplete metadata response: {exc}\n\n')
55+
continue
3956
id = id.replace(" ", "_")
4057

4158
entries = db.get_entry_dict()
42-
assert entries[id_db]["ENTRYTYPE"] == 'unpublished', "original entry in bib file was NOT unpublished !"
59+
if id_db not in entries:
60+
print(f'Ignoring {id_db}, entry not found in bibliography.\n\n')
61+
continue
62+
if entries[id_db]["ENTRYTYPE"] != 'unpublished':
63+
print(f'Ignoring {id_db}, original entry in bib file was not unpublished.\n\n')
64+
continue
4365
db.entries.remove(entries[id_db])
4466

4567
# Check for duplicate keys in the remaining database and add letter suffixes if needed

sitemap.xml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -370,12 +370,15 @@
370370
<loc>http://parallel-in-time.org/about/imprint.html</loc>
371371
</url>
372372
<url>
373-
<loc>http://parallel-in-time.org/groups/</loc>
373+
<loc>http://parallel-in-time.org/codes/</loc>
374374
</url>
375375
<url>
376376
<loc>http://parallel-in-time.org/methods/</loc>
377377
</url>
378378
<url>
379+
<loc>http://parallel-in-time.org/groups/</loc>
380+
</url>
381+
<url>
379382
<loc>http://parallel-in-time.org/events/past/</loc>
380383
</url>
381384
<url>
@@ -385,15 +388,12 @@
385388
<loc>http://parallel-in-time.org/events/</loc>
386389
</url>
387390
<url>
388-
<loc>http://parallel-in-time.org/codes/</loc>
391+
<loc>http://parallel-in-time.org/projects/</loc>
389392
</url>
390393
<url>
391394
<loc>http://parallel-in-time.org/references/</loc>
392395
</url>
393396
<url>
394-
<loc>http://parallel-in-time.org/projects/</loc>
395-
</url>
396-
<url>
397397
<loc>http://parallel-in-time.org/about/</loc>
398398
</url>
399399
<url>

0 commit comments

Comments
 (0)