Skip to content

Commit e78b520

Browse files
PauBadiaMclaude
andcommitted
Refactor ensmbl_to_symbol to reuse _download
Replaces raw requests.get calls with the centralized _download function which handles retries, progress bars, and streaming. Also fixes the mirror fallback to actually switch between Ensembl mirrors (www, useast, uswest, asia) and adds robustness against empty/invalid responses. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 6249825 commit e78b520

2 files changed

Lines changed: 26 additions & 14 deletions

File tree

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ and this project adheres to [Semantic Versioning][].
1313
### Added
1414
- `pl.volcano` now accepts a gene name (`str`) or list of gene names (`list[str]`) for the `top` parameter to annotate specific features on volcano plots
1515

16+
### Changes
17+
- Refactored `ds.ensmbl_to_symbol` to reuse `_download` and fixed mirror fallback to actually switch between Ensembl mirrors
18+
1619
## 2.1.4
1720

1821
### Changes

src/decoupler/ds/_utils.py

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import io
22

33
import pandas as pd
4-
import requests
4+
5+
from decoupler._download import _download
56

67

78
def ensmbl_to_symbol(
@@ -29,7 +30,7 @@ def ensmbl_to_symbol(
2930
dc.ds.ensmbl_to_symbol(genes=["ENSG00000196092", "ENSG00000115415"], organism="hsapiens_gene_ensembl")
3031
"""
3132
url = (
32-
'http://www.ensembl.org/biomart/martservice?query=<?xml version="1.0" encoding="UTF-8"?>'
33+
'http://{mirror}.ensembl.org/biomart/martservice?query=<?xml version="1.0" encoding="UTF-8"?>'
3334
'<!DOCTYPE Query><Query virtualSchemaName = "default" formatter = "TSV" header = "0" un'
3435
'iqueRows = "0" count = "" ><Dataset name = "{organism}" '
3536
'interface = "default" ><Attribute name = "ensembl_gene_id" /><Attribute name ='
@@ -47,16 +48,24 @@ def ensmbl_to_symbol(
4748
assert isinstance(genes, list), "genes must be list"
4849
assert isinstance(organism, str), "organism must be str"
4950
# Try different mirrors
50-
response = requests.get(url.format(miror="www", organism=organism))
51-
if any(msg in response.text for msg in ["Service unavailable", "Gateway Time-out"]):
52-
response = requests.get(url.format(miror="useast", organism=organism))
53-
if any(msg in response.text for msg in ["Service unavailable", "Gateway Time-out"]):
54-
response = requests.get(url.format(miror="asia", organism=organism))
55-
if not any(msg in response.text for msg in ["Service unavailable", "Gateway Time-out"]):
56-
eids = pd.read_csv(io.StringIO(response.text), sep="\t", header=None, index_col=0)[1].to_dict()
57-
elif organism in ["hsapiens_gene_ensembl", "mmusculus_gene_ensembl"]:
51+
error_msgs = ["Service unavailable", "Gateway Time-out"]
52+
for mirror in ["www", "useast", "uswest", "asia"]:
53+
try:
54+
data = _download(url.format(mirror=mirror, organism=organism))
55+
text = data.read().decode()
56+
if any(msg in text for msg in error_msgs) or not text.strip():
57+
continue
58+
df = pd.read_csv(io.StringIO(text), sep="\t", header=None, index_col=0)
59+
if df.empty or 1 not in df.columns:
60+
continue
61+
eids = df[1].to_dict()
62+
return [eids[g] if g in eids else None for g in genes]
63+
except Exception:
64+
continue
65+
# Zenodo fallback for human and mouse
66+
if organism in ["hsapiens_gene_ensembl", "mmusculus_gene_ensembl"]:
5867
url = f"https://zenodo.org/records/15551885/files/{organism}.csv.gz?download=1"
59-
eids = pd.read_csv(url, index_col=0, compression="gzip")["symbol"].to_dict()
60-
else:
61-
assert AssertionError(), "ensembl servers are down, try again later"
62-
return [eids[g] if g in eids else None for g in genes]
68+
data = _download(url)
69+
eids = pd.read_csv(data, index_col=0, compression="gzip")["symbol"].to_dict()
70+
return [eids[g] if g in eids else None for g in genes]
71+
raise ValueError("ensembl servers are down, try again later")

0 commit comments

Comments
 (0)