11package org .vcell .util .bioregistry .ncbitaxon ;
22
3+ import java .io .IOException ;
4+ import java .time .Duration ;
35import java .util .Map ;
6+ import java .net .URI ;
7+ import java .net .http .HttpClient ;
8+ import java .net .http .HttpRequest ;
9+ import java .net .http .HttpResponse ;
10+ import java .util .Random ;
11+ import java .util .regex .Matcher ;
12+ import java .util .regex .Pattern ;
13+ import com .fasterxml .jackson .databind .JsonNode ;
14+ import com .fasterxml .jackson .databind .ObjectMapper ;
15+ import org .apache .logging .log4j .LogManager ;
16+ import org .apache .logging .log4j .Logger ;
17+
418
519public class OrganismLookup {
620
21+ private static final Logger lg = LogManager .getLogger (OrganismLookup .class );
22+
723 public enum NameType {
824 COMMON ,
925 SCIENTIFIC
@@ -12,26 +28,25 @@ public enum NameType {
1228 private static final String PREFIX = "http://bioregistry.io/ncbitaxon:" ;
1329
1430 private static final Map <String , String > COMMON_NAMES = Map .ofEntries (
15- Map .entry ("9606" , "Human " ),
16- Map .entry ("10090" , "Mouse " ),
17- Map .entry ("10116" , "Rat " ),
18- Map .entry ("7955" , "Zebrafish " ),
19- Map .entry ("7227" , "Fruit fly" ),
31+ Map .entry ("9606" , "human " ),
32+ Map .entry ("10090" , "house mouse " ),
33+ Map .entry ("10116" , "Norway rat " ),
34+ Map .entry ("7955" , "zebrafish " ),
35+ Map .entry ("7227" , "fruit fly" ),
2036 Map .entry ("6239" , "Nematode worm" ),
21- Map .entry ("10141" , "Guinea pig" ),
22- Map .entry ("9986" , "Rabbit" ),
23- Map .entry ("9615" , "Dog" ),
24- Map .entry ("9823" , "Pig" ),
37+ Map .entry ("10141" , "domestic guinea pig" ),
38+ Map .entry ("9986" , "rabbit" ),
39+ Map .entry ("9615" , "dog" ),
2540 Map .entry ("9544" , "Rhesus monkey" ),
26- Map .entry ("9685" , "Cat " ),
27- Map .entry ("9913" , "Cow " ),
28- Map .entry ("9031" , "Chicken " ),
29- Map .entry ("8364" , "Xenopus frog" ),
30- Map .entry ("28377 " , "Axolotl " ),
31- Map .entry ("9825" , "Mini pig" ),
32- Map .entry ("9796" , "Horse " ),
33- Map .entry ("9915 " , "Sheep " ),
34- Map .entry ("9940 " , "Goat" )
41+ Map .entry ("9685" , "domestic cat " ),
42+ Map .entry ("9913" , "domestic cattle " ),
43+ Map .entry ("9031" , "chicken " ),
44+ Map .entry ("8364" , "tropical clawed frog" ),
45+ Map .entry ("8296 " , "axolotl " ),
46+ Map .entry ("9825" , "domestic pig" ),
47+ Map .entry ("9796" , "horse " ),
48+ Map .entry ("9940 " , "sheep " ),
49+ Map .entry ("9925 " , "Goat" )
3550 );
3651
3752 private static final Map <String , String > SCIENTIFIC_NAMES = Map .ofEntries (
@@ -44,20 +59,19 @@ public enum NameType {
4459 Map .entry ("10141" , "Cavia porcellus" ),
4560 Map .entry ("9986" , "Oryctolagus cuniculus" ),
4661 Map .entry ("9615" , "Canis lupus familiaris" ),
47- Map .entry ("9823" , "Sus scrofa" ),
4862 Map .entry ("9544" , "Macaca mulatta" ),
4963 Map .entry ("9685" , "Felis catus" ),
5064 Map .entry ("9913" , "Bos taurus" ),
5165 Map .entry ("9031" , "Gallus gallus" ),
52- Map .entry ("8364" , "Xenopus laevis " ),
53- Map .entry ("28377 " , "Ambystoma mexicanum" ),
66+ Map .entry ("8364" , "Xenopus tropicalis " ),
67+ Map .entry ("8296 " , "Ambystoma mexicanum" ),
5468 Map .entry ("9825" , "Sus scrofa domesticus" ),
5569 Map .entry ("9796" , "Equus caballus" ),
56- Map .entry ("9915 " , "Ovis aries" ),
57- Map .entry ("9940 " , "Capra hircus" )
70+ Map .entry ("9940 " , "Ovis aries" ),
71+ Map .entry ("9925 " , "Capra hircus" )
5872 );
5973
60- // 🧪 Get name from full URI
74+ // get name from full URI
6175 public static String getName (String fullUri , NameType type ) {
6276 if (!fullUri .startsWith (PREFIX )) return "Unknown" ;
6377 String taxonId = fullUri .substring (PREFIX .length ());
@@ -68,7 +82,7 @@ public static String getName(String fullUri, NameType type) {
6882 };
6983 }
7084
71- // 🔁 Reverse lookup: get full URI from name
85+ // reverse lookup: get full URI from name
7286 public static String getUriFromName (String name , NameType type ) {
7387 Map <String , String > sourceMap = switch (type ) {
7488 case COMMON -> COMMON_NAMES ;
@@ -81,4 +95,77 @@ public static String getUriFromName(String name, NameType type) {
8195 .findFirst ()
8296 .orElse ("Unknown" );
8397 }
98+
99+ public static HttpResponse <String > fetchTaxonomyResponse (String taxonId ) throws IOException , InterruptedException {
100+ String url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
101+ + "?db=taxonomy&id=" + taxonId + "&retmode=json" ;
102+
103+ HttpClient client = HttpClient .newBuilder ()
104+ .connectTimeout (Duration .ofSeconds (5 ))
105+ .build ();
106+
107+ HttpRequest request = HttpRequest .newBuilder ()
108+ .uri (URI .create (url ))
109+ .timeout (Duration .ofSeconds (10 ))
110+ .header ("Accept" , "application/json" )
111+ .build ();
112+
113+ HttpResponse <String > response = client .send (request , HttpResponse .BodyHandlers .ofString ());
114+ return response ;
115+ }
116+
117+ public static String parseTaxonomyName (String taxonId , String jsonBody ) throws IOException {
118+ ObjectMapper mapper = new ObjectMapper ();
119+ JsonNode root = mapper .readTree (jsonBody );
120+ JsonNode resultNode = root .path ("result" ).path (taxonId );
121+
122+ if (resultNode .isMissingNode ()) {
123+ return "Error: Taxon ID not found" ;
124+ }
125+
126+ String scientificName = resultNode .path ("scientificname" ).asText ("Unknown" );
127+ String commonName = resultNode .path ("commonname" ).asText (null );
128+
129+ String name = (commonName != null && !commonName .isEmpty ())
130+ ? scientificName + " (" + commonName + ")"
131+ : scientificName ;
132+ return name ;
133+ }
134+
135+ public static void verifyAllTaxonMappings () {
136+ Random random = new Random ();
137+
138+ for (Map .Entry <String , String > entry : SCIENTIFIC_NAMES .entrySet ()) {
139+ String taxonId = entry .getKey ();
140+ String expectedScientific = SCIENTIFIC_NAMES .getOrDefault (taxonId , "(none)" );
141+ String expectedCommon = COMMON_NAMES .getOrDefault (taxonId , "(none)" );
142+
143+ try {
144+ HttpResponse <String > response = fetchTaxonomyResponse (taxonId );
145+ String result = parseTaxonomyName (taxonId , response .body ());
146+
147+ lg .debug ("Taxon ID: {}" , taxonId );
148+ lg .debug (" Returned: {}" , result );
149+ lg .debug (" Expected: {} ({})" , expectedScientific , expectedCommon );
150+ lg .debug ("" ); // optional, consider removing if it adds no value
151+
152+ } catch (java .net .http .HttpTimeoutException e ) {
153+ lg .warn ("Timeout for Taxon ID: {}" , taxonId , e );
154+ } catch (java .net .UnknownHostException e ) {
155+ lg .warn ("Host unreachable for Taxon ID: {}" , taxonId , e );
156+ } catch (Exception e ) {
157+ lg .warn ("Error for Taxon ID: {}" , taxonId , e );
158+ }
159+
160+ // wait 5 to 10 random seconds before calls so that we won't look like a denial of service attack
161+ int delaySeconds = 5 + random .nextInt (6 ); // 5–10 seconds
162+ try {
163+ Thread .sleep (delaySeconds * 1000L );
164+ } catch (InterruptedException e ) {
165+ Thread .currentThread ().interrupt ();
166+ lg .warn ("Interrupted during sleep" );
167+ }
168+ }
169+ }
170+
84171}
0 commit comments