Skip to content

Commit c557f76

Browse files
committed
update url handling
1 parent e6f25e4 commit c557f76

1 file changed

Lines changed: 13 additions & 6 deletions

File tree

colrev/packages/arxiv/src/arxiv.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import typing
88
from multiprocessing import Lock
99
from pathlib import Path
10-
from urllib.parse import urlparse
10+
from urllib.parse import urlparse, parse_qs
1111

1212
from pydantic import Field
1313

@@ -95,13 +95,20 @@ def add_endpoint(
9595

9696
# pylint: disable=colrev-missed-constant-usage
9797
else:
98-
host = urlparse(params_dict["url"]).hostname
98+
parsed_url = urlparse(params_dict["url"])
9999

100-
if not (host and host.endswith("arxiv.org")):
101-
raise AssertionError(f"Unexpected URL host: {host}")
100+
if (
101+
parsed_url.scheme != "https"
102+
or parsed_url.hostname != "arxiv.org"
103+
or parsed_url.path != "/search/"
104+
):
105+
raise AssertionError(f"Unexpected arXiv URL: {params_dict['url']}")
102106

103-
query = params_dict["url"].replace("https://arxiv.org/search/?query=", "")
104-
query = query[: query.find("&searchtype")]
107+
query_values = parse_qs(parsed_url.query).get("query")
108+
if not query_values:
109+
raise AssertionError("Missing arXiv search query")
110+
111+
query = query_values[0]
105112

106113
filename = colrev.utils.get_unique_filename(
107114
base_path=path,

0 commit comments

Comments
 (0)