Skip to content

Commit 4ef1f52

Browse files
committed
Replace requests with httpx in fetcher modules
Migrated HTTP requests from the requests library to httpx in chebi.py, pdb.py, pubchem.py, and rhea.py for improved async support and consistent timeout handling. Introduced a DEFAULT_TIMEOUT constant for HTTP requests and updated exception handling to use httpx-specific exceptions.
1 parent 3bc7eb3 commit 4ef1f52

4 files changed

Lines changed: 54 additions & 29 deletions

File tree

pyenzyme/fetcher/chebi.py

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,16 @@
55
ChEBI database by ID and map it to the PyEnzyme data model (v2).
66
"""
77

8-
import requests
98
import re
10-
from typing import Optional, Dict, List
9+
from typing import Dict, List, Optional
10+
11+
import httpx
1112
from pydantic import BaseModel, Field, RootModel
13+
1214
from pyenzyme.versions import v2
1315

16+
DEFAULT_TIMEOUT = 5.0
17+
1418

1519
class ChEBIError(Exception):
1620
"""Error class for ChEBI-specific errors."""
@@ -133,8 +137,10 @@ def get_entry_by_id(self, chebi_id: str) -> ChEBIEntryResult:
133137
chebi_id = f"CHEBI:{chebi_id}"
134138

135139
try:
136-
response = requests.get(self.BASE_URL, params={"chebi_ids": chebi_id})
137-
response.raise_for_status()
140+
with httpx.Client(timeout=DEFAULT_TIMEOUT) as client:
141+
url = self.BASE_URL.format(chebi_id)
142+
response = client.get(url)
143+
response.raise_for_status()
138144

139145
if response.status_code == 200:
140146
try:
@@ -153,9 +159,9 @@ def get_entry_by_id(self, chebi_id: str) -> ChEBIEntryResult:
153159
raise e
154160
raise ChEBIError(f"Failed to parse ChEBI response: {str(e)}", e)
155161
else:
156-
raise ChEBIError(f"HTTP {response.status_code}: {response.reason}")
162+
raise ChEBIError(f"HTTP {response.status_code}: {response.text}")
157163

158-
except requests.exceptions.RequestException as e:
164+
except httpx.HTTPStatusError as e:
159165
raise ChEBIError(f"Failed to fetch ChEBI ID {chebi_id}: {str(e)}", e)
160166

161167
def get_entries_batch(self, chebi_ids: List[str]) -> List[ChEBIEntryResult]:
@@ -183,10 +189,10 @@ def get_entries_batch(self, chebi_ids: List[str]) -> List[ChEBIEntryResult]:
183189
formatted_ids.append(chebi_id)
184190

185191
try:
186-
response = requests.get(
187-
self.BASE_URL, params={"chebi_ids": ",".join(formatted_ids)}
188-
)
189-
response.raise_for_status()
192+
with httpx.Client(timeout=DEFAULT_TIMEOUT) as client:
193+
url = self.BASE_URL.format(chebi_id)
194+
response = client.get(url)
195+
response.raise_for_status()
190196

191197
if response.status_code == 200:
192198
try:
@@ -199,9 +205,9 @@ def get_entries_batch(self, chebi_ids: List[str]) -> List[ChEBIEntryResult]:
199205
f"Failed to parse ChEBI batch response: {str(e)}", e
200206
)
201207
else:
202-
raise ChEBIError(f"HTTP {response.status_code}: {response.reason}")
208+
raise ChEBIError(f"HTTP {response.status_code}: {response.text}")
203209

204-
except requests.exceptions.RequestException as e:
210+
except httpx.HTTPStatusError as e:
205211
raise ChEBIError(f"Failed to fetch ChEBI batch: {str(e)}", e)
206212

207213
def search_entries(
@@ -225,7 +231,9 @@ def search_entries(
225231
params["size"] = str(size)
226232

227233
try:
228-
response = requests.get(self.SEARCH_URL, params=params)
234+
with httpx.Client(timeout=DEFAULT_TIMEOUT) as client:
235+
url = self.SEARCH_URL
236+
response = client.get(url, params=params)
229237
response.raise_for_status()
230238

231239
if response.status_code == 200:
@@ -248,10 +256,10 @@ def search_entries(
248256
raise ChEBIError(f"Invalid search response format: {str(e)}", e)
249257
else:
250258
raise ChEBIError(
251-
f"Search failed: HTTP {response.status_code}: {response.reason}"
259+
f"Search failed: HTTP {response.status_code}: {response.text}"
252260
)
253261

254-
except requests.exceptions.RequestException as e:
262+
except httpx.HTTPStatusError as e:
255263
raise ChEBIError(f"Failed to search ChEBI: {str(e)}", e)
256264

257265

pyenzyme/fetcher/pdb.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,16 @@
55
Protein Data Bank by ID and map it to the PyEnzyme data model (v2).
66
"""
77

8-
import requests
9-
from typing import List, Optional, Dict, Any
8+
from typing import Any, Dict, List, Optional
9+
10+
import httpx
1011
from pydantic import BaseModel, Field
12+
1113
from pyenzyme.fetcher.chebi import process_id
1214
from pyenzyme.versions import v2
1315

16+
DEFAULT_TIMEOUT = 5.0
17+
1418

1519
class Citation(BaseModel):
1620
"""Model for PDB citation data"""
@@ -154,7 +158,8 @@ def _fetch_json(self, url: str) -> dict:
154158
ConnectionError: If the connection to the server fails
155159
"""
156160
try:
157-
response = requests.get(url)
161+
with httpx.Client(timeout=DEFAULT_TIMEOUT) as client:
162+
response = client.get(url)
158163
response.raise_for_status()
159164

160165
if response.status_code == 200:
@@ -164,7 +169,7 @@ def _fetch_json(self, url: str) -> dict:
164169
f"Request failed with status code {response.status_code}"
165170
)
166171

167-
except requests.exceptions.RequestException as e:
172+
except httpx.HTTPStatusError as e:
168173
raise ConnectionError(f"Connection failed: {str(e)}")
169174
except ValueError as e:
170175
raise ValueError(f"Failed to parse response: {str(e)}")

pyenzyme/fetcher/pubchem.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
from typing import ClassVar, Optional
2+
3+
import httpx
24
from pydantic import BaseModel, Field, field_validator
3-
import requests
45

56
from pyenzyme.fetcher.chebi import process_id
67
from pyenzyme.versions import v2
78

9+
DEFAULT_TIMEOUT = 5.0
10+
811

912
class PCUrn(BaseModel):
1013
"""
@@ -90,7 +93,10 @@ def from_cid(cid: int) -> PubChemQuery:
9093
ValueError: If the PubChem API request fails
9194
"""
9295
url = PubChemClient.BASE_CID_URL.format(cid)
93-
response = requests.get(url)
96+
97+
with httpx.Client(timeout=DEFAULT_TIMEOUT) as client:
98+
response = client.get(url)
99+
response.raise_for_status()
94100

95101
if response.status_code != 200:
96102
raise ValueError(f"Failed to fetch PubChem data for CID {cid}")

pyenzyme/fetcher/rhea.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,18 @@
55
Rhea database by ID and map it to the PyEnzyme data model (v2).
66
"""
77

8+
import re
89
from io import StringIO
10+
from typing import ClassVar, List, Optional, Tuple
11+
12+
import httpx
913
import pandas as pd
1014
from pydantic import BaseModel, ConfigDict
11-
from typing import List, ClassVar, Optional, Tuple
12-
13-
import requests
1415

1516
from pyenzyme.fetcher.chebi import fetch_chebi
1617
from pyenzyme.versions import v2
17-
import re
18+
19+
DEFAULT_TIMEOUT = 5.0
1820

1921

2022
class RheaResult(BaseModel):
@@ -114,8 +116,10 @@ def fetch_tsv(query: str) -> pd.DataFrame:
114116
Raises:
115117
HTTPError: If the request to the Rhea API fails
116118
"""
117-
response = requests.get(RheaClient.BASE_URL.format(query, "tsv"))
118-
response.raise_for_status()
119+
with httpx.Client(timeout=DEFAULT_TIMEOUT) as client:
120+
url = RheaClient.BASE_URL.format(query, "tsv")
121+
response = client.get(url)
122+
response.raise_for_status()
119123

120124
return pd.read_csv(StringIO(response.text), sep="\t")
121125

@@ -133,8 +137,10 @@ def fetch_json(query: str) -> RheaQuery:
133137
Raises:
134138
HTTPError: If the request to the Rhea API fails
135139
"""
136-
response = requests.get(RheaClient.BASE_URL.format(query, "json"))
137-
response.raise_for_status()
140+
with httpx.Client(timeout=DEFAULT_TIMEOUT) as client:
141+
url = RheaClient.BASE_URL.format(query, "json")
142+
response = client.get(url)
143+
response.raise_for_status()
138144

139145
return RheaQuery.model_validate(response.json())
140146

0 commit comments

Comments
 (0)