Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions bin/arxiv_to_publications_correct.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,33 @@
import requests
import argparse
import fileinput
from html import unescape

import bibtexparser
from bibtexparser.bwriter import BibTexWriter
from requests.exceptions import RequestException


def normalize_month_fields(bib):
def replace_month(match):
prefix, value = match.groups()
value = unescape(value).strip()
if value.startswith("{") and value.endswith("}"):
return match.group(0)
if value.startswith('"') and value.endswith('"'):
value = value[1:-1].strip()
value = value.strip("'\"{}")
value = re.sub(r"[^A-Za-z]", "", value)
return f"{prefix}{{{value}}},"

return re.sub(
r"(^\s*month\s*=\s*)([^,\n]+)\s*,",
replace_month,
bib,
flags=re.MULTILINE | re.IGNORECASE,
)


def fetch_doi_content(url, accept_header, description):
try:
response = requests.get(url, headers={'Accept': accept_header}, timeout=30)
Expand Down Expand Up @@ -83,6 +104,7 @@ def fetch_doi_content(url, accept_header, description):
if id != id_db:
print(f'Note: ID updated from {id_db} to {id} to reflect the publication year.')
bib = "{".join([bType] + [','.join([id]+rest2)] + rest1[1:])
bib = normalize_month_fields(bib)
bib_db = bibtexparser.loads(bib)
new_entries = bib_db.get_entry_list()
if not new_entries:
Expand Down