|
5 | 5 |
|
6 | 6 | import bibtexparser |
7 | 7 | from bibtexparser.bwriter import BibTexWriter |
| 8 | +from requests.exceptions import RequestException |
| 9 | + |
| 10 | + |
| 11 | +def fetch_doi_content(url, accept_header, description): |
| 12 | + try: |
| 13 | + response = requests.get(url, headers={'Accept': accept_header}, timeout=30) |
| 14 | + response.raise_for_status() |
| 15 | + except RequestException as exc: |
| 16 | + print(f'Ignoring {url}, failed to fetch {description}: {exc}\n\n') |
| 17 | + return None |
| 18 | + return response |
8 | 19 |
|
9 | 20 |
|
10 | 21 | if __name__ == '__main__': |
|
21 | 32 |
|
22 | 33 | for url, id_db in zip(doi_list, id_list): |
23 | 34 | print(f'Working on {id_db} with URL {url}') |
24 | | - req = requests.get(url, headers={'Accept': 'application/x-bibtex'}) |
25 | | - if not req.status_code == 200: |
26 | | - print(f'Ignoring {url}, got status code {req.status_code}\n\n') |
| 35 | + req = fetch_doi_content(url, 'application/x-bibtex', 'BibTeX') |
| 36 | + if req is None: |
27 | 37 | continue |
28 | 38 | bib = req.content.decode() |
29 | | - req = requests.get(url, headers={'Accept': 'application/json'}) |
30 | | - if not req.status_code == 200: |
31 | | - print(f'Ignoring {url}, got status code {req.status_code}\n\n') |
| 39 | + req = fetch_doi_content(url, 'application/json', 'metadata') |
| 40 | + if req is None: |
| 41 | + continue |
| 42 | + try: |
| 43 | + data = req.json() |
| 44 | + except ValueError as exc: |
| 45 | + print(f'Ignoring {url}, invalid metadata response: {exc}\n\n') |
32 | 46 | continue |
33 | | - data = req.json() |
34 | 47 |
|
35 | | - if len(data['author']) > 1: |
36 | | - id = data['author'][0]['family'] + 'EtAl' + str(data['issued']['date-parts'][0][0]) |
37 | | - else: |
38 | | - id = data['author'][0]['family'] + str(data['issued']['date-parts'][0][0]) |
| 48 | + try: |
| 49 | + if len(data['author']) > 1: |
| 50 | + id = data['author'][0]['family'] + 'EtAl' + str(data['issued']['date-parts'][0][0]) |
| 51 | + else: |
| 52 | + id = data['author'][0]['family'] + str(data['issued']['date-parts'][0][0]) |
| 53 | + except (KeyError, IndexError, TypeError) as exc: |
| 54 | + print(f'Ignoring {url}, incomplete metadata response: {exc}\n\n') |
| 55 | + continue |
39 | 56 | id = id.replace(" ", "_") |
40 | 57 |
|
41 | 58 | entries = db.get_entry_dict() |
42 | | - assert entries[id_db]["ENTRYTYPE"] == 'unpublished', "original entry in bib file was NOT unpublished !" |
| 59 | + if id_db not in entries: |
| 60 | + print(f'Ignoring {id_db}, entry not found in bibliography.\n\n') |
| 61 | + continue |
| 62 | + if entries[id_db]["ENTRYTYPE"] != 'unpublished': |
| 63 | + print(f'Ignoring {id_db}, original entry in bib file was not unpublished.\n\n') |
| 64 | + continue |
43 | 65 | db.entries.remove(entries[id_db]) |
44 | 66 |
|
45 | 67 | # Check for duplicate keys in the remaining database and add letter suffixes if needed |
|
0 commit comments