|
1 | 1 | import argparse |
2 | | -import re |
3 | | -import requests |
4 | 2 | import fileinput |
| 3 | +from html import unescape |
| 4 | +import re |
5 | 5 | import urllib |
| 6 | +import requests |
6 | 7 |
|
7 | 8 | from arxivcheck.arxiv import get_arxiv_info |
8 | 9 |
|
|
11 | 12 | from bibtexparser.bwriter import BibTexWriter |
12 | 13 |
|
13 | 14 |
|
| 15 | +def normalize_month_fields(bib): |
| 16 | + def replace_month(match): |
| 17 | + prefix, value = match.groups() |
| 18 | + value = unescape(value).strip() |
| 19 | + if value.startswith("{") and value.endswith("}"): |
| 20 | + return match.group(0) |
| 21 | + if value.startswith('"') and value.endswith('"'): |
| 22 | + value = value[1:-1].strip() |
| 23 | + value = value.strip("'\"{}") |
| 24 | + value = re.sub(r"[^A-Za-z]", "", value) |
| 25 | + return f"{prefix}{{{value}}}," |
| 26 | + |
| 27 | + return re.sub( |
| 28 | + r"(^\s*month\s*=\s*)([^,\n]+)\s*,", |
| 29 | + replace_month, |
| 30 | + bib, |
| 31 | + flags=re.MULTILINE | re.IGNORECASE, |
| 32 | + ) |
| 33 | + |
| 34 | + |
14 | 35 | try: |
15 | 36 |
|
16 | 37 | parser = argparse.ArgumentParser() |
|
149 | 170 | bib = re.sub(r'(@[a-z]*{)(.*?),', r'\1' + id + ',', bib) |
150 | 171 | url_bad = re.search(r'url\s*=\s*{(.*)}', bib).groups()[0] |
151 | 172 | bib = re.sub(r'(url\s*=\s*{)(.*)}', r'\1' + urllib.parse.unquote(url_bad) + '}', bib) |
152 | | - bib = re.sub( |
153 | | - r'(^\s*month\s*=\s*)(?:[\'"]?)([A-Za-z]+)(?:[\'"]?)\s*,', |
154 | | - r'\1{\2},', |
155 | | - bib, |
156 | | - flags=re.MULTILINE, |
157 | | - ) |
| 173 | + bib = normalize_month_fields(bib) |
158 | 174 | bib_db = bibtexparser.loads(bib) |
159 | 175 | print(bib) |
160 | 176 | else: |
|
0 commit comments