Replace requests with httpx in fetcher modules

JR-1991 · JR-1991 · commit 4ef1f52cc58c · 2026-01-13T15:15:07.000+01:00
Migrated HTTP requests from the requests library to httpx in chebi.py, pdb.py, pubchem.py, and rhea.py for improved async support and consistent timeout handling. Introduced a DEFAULT_TIMEOUT constant for HTTP requests and updated exception handling to use httpx-specific exceptions.
diff --git a/pyenzyme/fetcher/chebi.py b/pyenzyme/fetcher/chebi.py
@@ -5,12 +5,16 @@
 ChEBI database by ID and map it to the PyEnzyme data model (v2).
 """
 
-import requests
 import re
-from typing import Optional, Dict, List
+from typing import Dict, List, Optional
+
+import httpx
 from pydantic import BaseModel, Field, RootModel
+
 from pyenzyme.versions import v2
 
+DEFAULT_TIMEOUT = 5.0
+
 
 class ChEBIError(Exception):
     """Error class for ChEBI-specific errors."""
@@ -133,8 +137,10 @@ def get_entry_by_id(self, chebi_id: str) -> ChEBIEntryResult:
             chebi_id = f"CHEBI:{chebi_id}"
 
         try:
-            response = requests.get(self.BASE_URL, params={"chebi_ids": chebi_id})
-            response.raise_for_status()
+            with httpx.Client(timeout=DEFAULT_TIMEOUT) as client:
+                url = self.BASE_URL.format(chebi_id)
+                response = client.get(url)
+                response.raise_for_status()
 
             if response.status_code == 200:
                 try:
@@ -153,9 +159,9 @@ def get_entry_by_id(self, chebi_id: str) -> ChEBIEntryResult:
                         raise e
                     raise ChEBIError(f"Failed to parse ChEBI response: {str(e)}", e)
             else:
-                raise ChEBIError(f"HTTP {response.status_code}: {response.reason}")
+                raise ChEBIError(f"HTTP {response.status_code}: {response.text}")
 
-        except requests.exceptions.RequestException as e:
+        except httpx.HTTPStatusError as e:
             raise ChEBIError(f"Failed to fetch ChEBI ID {chebi_id}: {str(e)}", e)
 
     def get_entries_batch(self, chebi_ids: List[str]) -> List[ChEBIEntryResult]:
@@ -183,10 +189,10 @@ def get_entries_batch(self, chebi_ids: List[str]) -> List[ChEBIEntryResult]:
                 formatted_ids.append(chebi_id)
 
         try:
-            response = requests.get(
-                self.BASE_URL, params={"chebi_ids": ",".join(formatted_ids)}
-            )
-            response.raise_for_status()
+            with httpx.Client(timeout=DEFAULT_TIMEOUT) as client:
+                url = self.BASE_URL.format(chebi_id)
+                response = client.get(url)
+                response.raise_for_status()
 
             if response.status_code == 200:
                 try:
@@ -199,9 +205,9 @@ def get_entries_batch(self, chebi_ids: List[str]) -> List[ChEBIEntryResult]:
                         f"Failed to parse ChEBI batch response: {str(e)}", e
                     )
             else:
-                raise ChEBIError(f"HTTP {response.status_code}: {response.reason}")
+                raise ChEBIError(f"HTTP {response.status_code}: {response.text}")
 
-        except requests.exceptions.RequestException as e:
+        except httpx.HTTPStatusError as e:
             raise ChEBIError(f"Failed to fetch ChEBI batch: {str(e)}", e)
 
     def search_entries(
@@ -225,7 +231,9 @@ def search_entries(
             params["size"] = str(size)
 
         try:
-            response = requests.get(self.SEARCH_URL, params=params)
+            with httpx.Client(timeout=DEFAULT_TIMEOUT) as client:
+                url = self.SEARCH_URL
+                response = client.get(url, params=params)
             response.raise_for_status()
 
             if response.status_code == 200:
@@ -248,10 +256,10 @@ def search_entries(
                     raise ChEBIError(f"Invalid search response format: {str(e)}", e)
             else:
                 raise ChEBIError(
-                    f"Search failed: HTTP {response.status_code}: {response.reason}"
+                    f"Search failed: HTTP {response.status_code}: {response.text}"
                 )
 
-        except requests.exceptions.RequestException as e:
+        except httpx.HTTPStatusError as e:
             raise ChEBIError(f"Failed to search ChEBI: {str(e)}", e)
 
 
diff --git a/pyenzyme/fetcher/pdb.py b/pyenzyme/fetcher/pdb.py
@@ -5,12 +5,16 @@
 Protein Data Bank by ID and map it to the PyEnzyme data model (v2).
 """
 
-import requests
-from typing import List, Optional, Dict, Any
+from typing import Any, Dict, List, Optional
+
+import httpx
 from pydantic import BaseModel, Field
+
 from pyenzyme.fetcher.chebi import process_id
 from pyenzyme.versions import v2
 
+DEFAULT_TIMEOUT = 5.0
+
 
 class Citation(BaseModel):
     """Model for PDB citation data"""
@@ -154,7 +158,8 @@ def _fetch_json(self, url: str) -> dict:
             ConnectionError: If the connection to the server fails
         """
         try:
-            response = requests.get(url)
+            with httpx.Client(timeout=DEFAULT_TIMEOUT) as client:
+                response = client.get(url)
             response.raise_for_status()
 
             if response.status_code == 200:
@@ -164,7 +169,7 @@ def _fetch_json(self, url: str) -> dict:
                     f"Request failed with status code {response.status_code}"
                 )
 
-        except requests.exceptions.RequestException as e:
+        except httpx.HTTPStatusError as e:
             raise ConnectionError(f"Connection failed: {str(e)}")
         except ValueError as e:
             raise ValueError(f"Failed to parse response: {str(e)}")
diff --git a/pyenzyme/fetcher/pubchem.py b/pyenzyme/fetcher/pubchem.py
@@ -1,10 +1,13 @@
 from typing import ClassVar, Optional
+
+import httpx
 from pydantic import BaseModel, Field, field_validator
-import requests
 
 from pyenzyme.fetcher.chebi import process_id
 from pyenzyme.versions import v2
 
+DEFAULT_TIMEOUT = 5.0
+
 
 class PCUrn(BaseModel):
     """
@@ -90,7 +93,10 @@ def from_cid(cid: int) -> PubChemQuery:
             ValueError: If the PubChem API request fails
         """
         url = PubChemClient.BASE_CID_URL.format(cid)
-        response = requests.get(url)
+
+        with httpx.Client(timeout=DEFAULT_TIMEOUT) as client:
+            response = client.get(url)
+            response.raise_for_status()
 
         if response.status_code != 200:
             raise ValueError(f"Failed to fetch PubChem data for CID {cid}")
diff --git a/pyenzyme/fetcher/rhea.py b/pyenzyme/fetcher/rhea.py
@@ -5,16 +5,18 @@
 Rhea database by ID and map it to the PyEnzyme data model (v2).
 """
 
+import re
 from io import StringIO
+from typing import ClassVar, List, Optional, Tuple
+
+import httpx
 import pandas as pd
 from pydantic import BaseModel, ConfigDict
-from typing import List, ClassVar, Optional, Tuple
-
-import requests
 
 from pyenzyme.fetcher.chebi import fetch_chebi
 from pyenzyme.versions import v2
-import re
+
+DEFAULT_TIMEOUT = 5.0
 
 
 class RheaResult(BaseModel):
@@ -114,8 +116,10 @@ def fetch_tsv(query: str) -> pd.DataFrame:
         Raises:
             HTTPError: If the request to the Rhea API fails
         """
-        response = requests.get(RheaClient.BASE_URL.format(query, "tsv"))
-        response.raise_for_status()
+        with httpx.Client(timeout=DEFAULT_TIMEOUT) as client:
+            url = RheaClient.BASE_URL.format(query, "tsv")
+            response = client.get(url)
+            response.raise_for_status()
 
         return pd.read_csv(StringIO(response.text), sep="\t")
 
@@ -133,8 +137,10 @@ def fetch_json(query: str) -> RheaQuery:
         Raises:
             HTTPError: If the request to the Rhea API fails
         """
-        response = requests.get(RheaClient.BASE_URL.format(query, "json"))
-        response.raise_for_status()
+        with httpx.Client(timeout=DEFAULT_TIMEOUT) as client:
+            url = RheaClient.BASE_URL.format(query, "json")
+            response = client.get(url)
+            response.raise_for_status()
 
         return RheaQuery.model_validate(response.json())