Skip to content

Commit ce0dcc1

Browse files
erwardenaarclaude
andcommitted
Revert suffix stripping from arXiv search query
Searching the full model name (e.g. "Mistral 7B") is more precise — papers that use "Mistral" generically may refer to different variants. Also removes the now-unused re import and restores the original test. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 0b5be1a commit ce0dcc1

2 files changed

Lines changed: 2 additions & 9 deletions

File tree

src/openllm_selector/database.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import json
44
import pathlib
5-
import re
65
import xml.etree.ElementTree as ET
76

87
import requests
@@ -509,14 +508,10 @@ def fetch_recent_papers(model_name: str, max_results: int = 3) -> list[dict]:
509508
>>> papers[0].keys()
510509
dict_keys(['title', 'authors', 'summary', 'published', 'arxiv_url'])
511510
"""
512-
# Strip trailing size/context suffixes (e.g. "7B", "32B", "1.4B", "4K") so
513-
# "OLMo 2 32B" searches as "OLMo 2". Mixture-of-experts names like "8x7B"
514-
# are kept because they are part of the canonical model identity.
515-
search_name = re.sub(r"\s+\d+(\.\d+)?[BbMmKk]$", "", model_name)
516511
response = requests.get(
517512
"https://export.arxiv.org/api/query",
518513
params={
519-
"search_query": f'all:"{search_name}"',
514+
"search_query": f'all:"{model_name}"',
520515
"sortBy": "submittedDate",
521516
"sortOrder": "descending",
522517
"max_results": max_results,

tests/test_database.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1617,9 +1617,7 @@ def test_model_name_used_in_query(self):
16171617
mock_get.return_value = _mock_response(_SAMPLE_ATOM)
16181618
fetch_recent_papers("Pythia 6.9B")
16191619
_, kwargs = mock_get.call_args
1620-
# Size suffix is stripped before searching; "Pythia 6.9B" -> "Pythia"
1621-
assert "Pythia" in kwargs["params"]["search_query"]
1622-
assert "6.9B" not in kwargs["params"]["search_query"]
1620+
assert "Pythia 6.9B" in kwargs["params"]["search_query"]
16231621

16241622
def test_empty_feed_returns_empty_list(self):
16251623
with patch("openllm_selector.database.requests.get") as mock_get:

0 commit comments

Comments
 (0)