Skip to content

Commit 21d0cc7

Browse files
authored
Merge pull request #1109 from Parallel-in-Time/copilot/fix-date-format-issues
Fix bare month normalization in arxiv_to_publications_correct.py
2 parents bc96011 + 530c67c commit 21d0cc7

1 file changed

Lines changed: 22 additions & 0 deletions

File tree

bin/arxiv_to_publications_correct.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,33 @@
22
import requests
33
import argparse
44
import fileinput
5+
from html import unescape
56

67
import bibtexparser
78
from bibtexparser.bwriter import BibTexWriter
89
from requests.exceptions import RequestException
910

1011

12+
def normalize_month_fields(bib):
13+
def replace_month(match):
14+
prefix, value = match.groups()
15+
value = unescape(value).strip()
16+
if value.startswith("{") and value.endswith("}"):
17+
return match.group(0)
18+
if value.startswith('"') and value.endswith('"'):
19+
value = value[1:-1].strip()
20+
value = value.strip("'\"{}")
21+
value = re.sub(r"[^A-Za-z]", "", value)
22+
return f"{prefix}{{{value}}},"
23+
24+
return re.sub(
25+
r"(^\s*month\s*=\s*)([^,\n]+)\s*,",
26+
replace_month,
27+
bib,
28+
flags=re.MULTILINE | re.IGNORECASE,
29+
)
30+
31+
1132
def fetch_doi_content(url, accept_header, description):
1233
try:
1334
response = requests.get(url, headers={'Accept': accept_header}, timeout=30)
@@ -83,6 +104,7 @@ def fetch_doi_content(url, accept_header, description):
83104
if id != id_db:
84105
print(f'Note: ID updated from {id_db} to {id} to reflect the publication year.')
85106
bib = "{".join([bType] + [','.join([id]+rest2)] + rest1[1:])
107+
bib = normalize_month_fields(bib)
86108
bib_db = bibtexparser.loads(bib)
87109
new_entries = bib_db.get_entry_list()
88110
if not new_entries:

0 commit comments

Comments
 (0)