Skip to content

Commit 0a5cc09

Browse files
authored
Merge pull request #1107 from Parallel-in-Time/copilot/fix-ci-failure
Fix arxivbot workflow: guard against non-BibTeX DOI responses and shell injection
2 parents 42d5290 + b356f39 commit 0a5cc09

2 files changed

Lines changed: 41 additions & 26 deletions

File tree

.github/workflows/arxiv_to_publications_correct.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,14 @@ jobs:
99
- uses: actions/checkout@v4
1010
- name: Look for bibtex entries that now have a DOI
1111
if: github.event.label.name == 'food for arxivbot'
12+
env:
13+
ISSUE_BODY: ${{ github.event.issue.body }}
1214
run: |
1315
cd bin
1416
python3 -m pip install --user --upgrade pip
1517
python3 -m pip install --user setuptools
16-
python3 -m pip install --user bibtexparser
17-
python3 arxiv_to_publications_correct.py -b "${{ github.event.issue.body }}" > comment.out 2>&1
18+
python3 -m pip install --user requests bibtexparser
19+
python3 arxiv_to_publications_correct.py -b "$ISSUE_BODY" > comment.out 2>&1
1820
{
1921
echo 'COMMENT<<GITHUB_OUTPUT_DELIMITER'
2022
cat comment.out

bin/arxiv_to_publications_correct.py

Lines changed: 37 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,14 @@ def fetch_doi_content(url, accept_header, description):
3232

3333
for url, id_db in zip(doi_list, id_list):
3434
print(f'Working on {id_db} with URL {url}')
35-
req = fetch_doi_content(url, 'application/x-bibtex', 'BibTeX')
36-
if req is None:
35+
bibtex_req = fetch_doi_content(url, 'application/x-bibtex', 'BibTeX')
36+
if bibtex_req is None:
3737
continue
38-
bib = req.content.decode()
39-
req = fetch_doi_content(url, 'application/json', 'metadata')
40-
if req is None:
38+
meta_req = fetch_doi_content(url, 'application/json', 'metadata')
39+
if meta_req is None:
4140
continue
4241
try:
43-
data = req.json()
42+
data = meta_req.json()
4443
except ValueError as exc:
4544
print(f'Ignoring {url}, invalid metadata response: {exc}\n\n')
4645
continue
@@ -62,26 +61,40 @@ def fetch_doi_content(url, accept_header, description):
6261
if entries[id_db]["ENTRYTYPE"] != 'unpublished':
6362
print(f'Ignoring {id_db}, original entry in bib file was not unpublished.\n\n')
6463
continue
65-
db.entries.remove(entries[id_db])
66-
67-
# Check for duplicate keys in the remaining database and add letter suffixes if needed
68-
remaining = db.get_entry_dict()
69-
id_orig = id
70-
letters = 'bcdefghijklmnopqrstuvwxyz'
71-
i = 0
72-
while id in remaining:
73-
print(f'Key {id} already exists, augmenting with letter suffix.')
74-
id = id_orig + letters[i]
75-
i += 1
7664

77-
if id != id_db:
78-
print(f'Note: ID updated from {id_db} to {id} to reflect the publication year.')
65+
# Parse the BibTeX and replace the key before modifying the database
66+
try:
67+
bib = bibtex_req.text
68+
bType, *rest1 = bib.split("{")
69+
if not rest1:
70+
print(f'Ignoring {id_db}, DOI did not return valid BibTeX (no opening brace found).\n\n')
71+
continue
72+
oldID, *rest2 = rest1[0].split(",")
73+
# Check for duplicate keys in the remaining database and add letter suffixes if needed
74+
remaining = db.get_entry_dict()
75+
del remaining[id_db] # exclude the entry being replaced from duplicate check
76+
id_orig = id
77+
letters = 'bcdefghijklmnopqrstuvwxyz'
78+
i = 0
79+
while id in remaining:
80+
print(f'Key {id} already exists, augmenting with letter suffix.')
81+
id = id_orig + letters[i]
82+
i += 1
83+
if id != id_db:
84+
print(f'Note: ID updated from {id_db} to {id} to reflect the publication year.')
85+
bib = "{".join([bType] + [','.join([id]+rest2)] + rest1[1:])
86+
bib_db = bibtexparser.loads(bib)
87+
new_entries = bib_db.get_entry_list()
88+
if not new_entries:
89+
print(f'Ignoring {id_db}, could not parse BibTeX returned by DOI.\n\n')
90+
continue
91+
except Exception as exc:
92+
print(f'Ignoring {id_db}, error processing BibTeX from DOI: {exc}\n\n')
93+
continue
7994

80-
bType, *rest1 = bib.split("{")
81-
oldID, *rest2 = rest1[0].split(",")
82-
bib = "{".join([bType] + [','.join([id]+rest2)] + rest1[1:])
83-
bib_db = bibtexparser.loads(bib)
84-
db.entries.extend(bib_db.get_entry_list())
95+
# Only mutate the database once we have a valid replacement entry
96+
db.entries.remove(entries[id_db])
97+
db.entries.extend(new_entries)
8598

8699
if id_list:
87100
writer = BibTexWriter()

0 commit comments

Comments
 (0)