Skip to content

Commit 4313ea5

Browse files
committed
DBLP: handle timeout gracefully
1 parent 649d806 commit 4313ea5

2 files changed

Lines changed: 33 additions & 14 deletions

File tree

colrev/packages/dblp/src/dblp.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
from multiprocessing import Lock
1010
from pathlib import Path
1111

12-
import requests # pylint: disable=colrev-search-source-requests-import
1312
from pydantic import BaseModel
1413
from pydantic import Field
1514

@@ -22,7 +21,6 @@
2221
import colrev.record.record_similarity
2322
import colrev.search_file
2423
import colrev.utils
25-
from colrev.constants import Colors
2624
from colrev.constants import Fields
2725
from colrev.constants import FieldValues
2826
from colrev.constants import RecordState
@@ -364,9 +362,9 @@ def search(self, rerun: bool) -> None:
364362

365363
else:
366364
raise NotImplementedError
367-
except (requests.exceptions.ConnectTimeout, requests.exceptions.HTTPError):
365+
except colrev_exceptions.ServiceNotAvailableException:
368366
self.logger.warning(
369-
f"{Colors.RED}Skipping DBLP search (API currently not available){Colors.END}"
367+
"Skipping DBLP search: DBLP API is currently not available or returned an invalid response"
370368
)
371369

372370
def load(self) -> dict:

colrev/packages/dblp/src/dblp_api.py

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import json
66
import re
77
import time
8+
from typing import Any
89
from datetime import datetime
910

1011
import requests
@@ -288,24 +289,44 @@ def set_total(self) -> None:
288289

289290
def retrieve_records(self) -> list:
290291
"""Retrieve records from DBLP."""
291-
# try:
292+
ret: Any
292293
while True:
293-
# print(self.url)
294-
ret = self.session.request(
295-
"GET", self.url, headers=self.headers, timeout=self._timeout # type: ignore
296-
)
294+
try:
295+
ret = self.session.request(
296+
"GET", self.url, headers=self.headers, timeout=self._timeout # type: ignore
297+
)
298+
except (
299+
requests.exceptions.ConnectionError,
300+
requests.exceptions.Timeout,
301+
requests.exceptions.HTTPError,
302+
requests.exceptions.RequestException,
303+
) as exc:
304+
raise colrev_exceptions.ServiceNotAvailableException(
305+
"DBLP API is currently not available or returned an invalid response"
306+
) from exc
297307

298308
if ret.status_code == 429:
299309
time.sleep(60)
300310
print("Waiting for 60 seconds (request limit reached)")
301311
continue
302-
ret.raise_for_status()
303-
# 429 - too many requests
304-
if ret.status_code == 500:
305-
return []
312+
if ret.status_code >= 500:
313+
raise colrev_exceptions.ServiceNotAvailableException(
314+
"DBLP API is currently not available or returned an invalid response"
315+
)
316+
try:
317+
ret.raise_for_status()
318+
except requests.exceptions.HTTPError as exc:
319+
raise colrev_exceptions.ServiceNotAvailableException(
320+
"DBLP API is currently not available or returned an invalid response"
321+
) from exc
306322
break
307323

308-
data = json.loads(ret.text)
324+
try:
325+
data = json.loads(ret.text)
326+
except (TypeError, ValueError, json.JSONDecodeError) as exc:
327+
raise colrev_exceptions.ServiceNotAvailableException(
328+
"DBLP API is currently not available or returned an invalid response"
329+
) from exc
309330
response_ms = float(data["result"]["time"]["text"])
310331
time.sleep(response_ms / 10)
311332

0 commit comments

Comments
 (0)