|
2 | 2 | import requests |
3 | 3 | import argparse |
4 | 4 | import fileinput |
| 5 | +from html import unescape |
5 | 6 |
|
6 | 7 | import bibtexparser |
7 | 8 | from bibtexparser.bwriter import BibTexWriter |
8 | 9 | from requests.exceptions import RequestException |
9 | 10 |
|
10 | 11 |
|
| 12 | +def normalize_month_fields(bib): |
| 13 | + def replace_month(match): |
| 14 | + prefix, value = match.groups() |
| 15 | + value = unescape(value).strip() |
| 16 | + if value.startswith("{") and value.endswith("}"): |
| 17 | + return match.group(0) |
| 18 | + if value.startswith('"') and value.endswith('"'): |
| 19 | + value = value[1:-1].strip() |
| 20 | + value = value.strip("'\"{}") |
| 21 | + value = re.sub(r"[^A-Za-z]", "", value) |
| 22 | + return f"{prefix}{{{value}}}," |
| 23 | + |
| 24 | + return re.sub( |
| 25 | + r"(^\s*month\s*=\s*)([^,\n]+)\s*,", |
| 26 | + replace_month, |
| 27 | + bib, |
| 28 | + flags=re.MULTILINE | re.IGNORECASE, |
| 29 | + ) |
| 30 | + |
| 31 | + |
11 | 32 | def fetch_doi_content(url, accept_header, description): |
12 | 33 | try: |
13 | 34 | response = requests.get(url, headers={'Accept': accept_header}, timeout=30) |
@@ -83,6 +104,7 @@ def fetch_doi_content(url, accept_header, description): |
83 | 104 | if id != id_db: |
84 | 105 | print(f'Note: ID updated from {id_db} to {id} to reflect the publication year.') |
85 | 106 | bib = "{".join([bType] + [','.join([id]+rest2)] + rest1[1:]) |
| 107 | + bib = normalize_month_fields(bib) |
86 | 108 | bib_db = bibtexparser.loads(bib) |
87 | 109 | new_entries = bib_db.get_entry_list() |
88 | 110 | if not new_entries: |
|
0 commit comments