Skip to content

Commit 14021a6

Browse files
rlyclaudeoruebel
authored
Use the NCBITaxon prefix instead of NCBI_TAXON in examples and tests (#1520)
NCBITaxon is the canonical CURIE prefix for the NCBI Taxonomy ontology. The back-compat test and its 1.8.0 reproduction script keep NCBI_TAXON because they read/regenerate historical data, and the hdmf-common-schema submodule is left untouched. Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com> Co-authored-by: Oliver Ruebel <oruebel@users.noreply.github.com>
1 parent 9deb172 commit 14021a6

8 files changed

Lines changed: 39 additions & 39 deletions

docs/gallery/example_term_set.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ id: termset/species_example
22
name: Species
33
version: 0.0.1
44
prefixes:
5-
NCBI_TAXON: https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=
5+
NCBITaxon: https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=
66
imports:
77
- linkml:types
88
default_range: string
@@ -12,13 +12,13 @@ enums:
1212
permissible_values:
1313
Homo sapiens:
1414
description: the species is human
15-
meaning: NCBI_TAXON:9606
15+
meaning: NCBITaxon:9606
1616
Mus musculus:
1717
description: the species is a house mouse
18-
meaning: NCBI_TAXON:10090
18+
meaning: NCBITaxon:10090
1919
Ursus arctos horribilis:
2020
description: the species is a grizzly bear
21-
meaning: NCBI_TAXON:116960
21+
meaning: NCBITaxon:116960
2222
Myrmecophaga tridactyla:
2323
description: the species is an anteater
24-
meaning: NCBI_TAXON:71006
24+
meaning: NCBITaxon:71006

docs/gallery/plot_external_resources.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -139,14 +139,14 @@ def __init__(self, **kwargs):
139139
herd.add_ref(
140140
container=data,
141141
key='Homo sapiens',
142-
entity_id='NCBI_TAXON:9606',
142+
entity_id='NCBITaxon:9606',
143143
entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606'
144144
)
145145

146146
herd.add_ref(
147147
container=data,
148148
key='Mus musculus',
149-
entity_id='NCBI_TAXON:10090',
149+
entity_id='NCBITaxon:10090',
150150
entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090'
151151
)
152152

@@ -208,7 +208,7 @@ def __init__(self, **kwargs):
208208
container=species,
209209
attribute='Species_Data',
210210
key='Ursus arctos horribilis',
211-
entity_id='NCBI_TAXON:116960',
211+
entity_id='NCBITaxon:116960',
212212
entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id'
213213
)
214214

@@ -316,7 +316,7 @@ def __init__(self, **kwargs):
316316
container=data,
317317
field='species',
318318
key='Mus musculus',
319-
entity_id='NCBI_TAXON:txid10090',
319+
entity_id='NCBITaxon:txid10090',
320320
entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090'
321321
)
322322

docs/gallery/plot_term_set.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
1. The name of the schema is up to the user, e.g., the name could be "Species" if the term set will
4343
contain species terms.
4444
2. The prefixes will be the standardized prefix of your source, followed by the URI to the terms.
45-
For example, the NCBI Taxonomy is abbreviated as NCBI_TAXON, and Ensemble is simply Ensemble.
45+
For example, the NCBI Taxonomy is abbreviated as NCBITaxon, and Ensemble is simply Ensemble.
4646
As mentioned prior, the URI needs to be to the terms; this is to allow the URI to later be coupled
4747
with the source id for the term to create a valid link to the term source page.
4848
3. The schema uses LinkML enumerations to list all the possible terms. To define the all the permissible

tests/unit/common/test_resources.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -145,13 +145,13 @@ def test_repr_populated(self):
145145
container.parent = file
146146
er.add_ref(container=container,
147147
key='Mus musculus',
148-
entity_id='NCBI_TAXON:10090',
148+
entity_id='NCBITaxon:10090',
149149
entity_uri='http://x')
150150
text = repr(er)
151151
self.assertIn('1 key(s), 1 entity(ies), 1 object(s), 1 file(s)', text)
152152
# the HTML repr surfaces the flattened table content
153153
html = er._repr_html_()
154-
self.assertIn('NCBI_TAXON:10090', html)
154+
self.assertIn('NCBITaxon:10090', html)
155155
self.assertIn('http://x', html)
156156

157157
def test_assert_external_resources_equal(self):
@@ -390,7 +390,7 @@ def test_add_ref_container_data(self):
390390

391391
er.add_ref_container(root_container=em)
392392
self.assertEqual(er.keys.data, [('Homo sapiens',)])
393-
self.assertEqual(er.entities.data, [('NCBI_TAXON:9606',
393+
self.assertEqual(er.entities.data, [('NCBITaxon:9606',
394394
'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=9606')])
395395
self.assertEqual(er.objects.data, [(0, col1.object_id, 'VectorData', '', '')])
396396

@@ -413,7 +413,7 @@ def test_add_ref_container_attr(self):
413413

414414
er.add_ref_container(root_container=em)
415415
self.assertEqual(er.keys.data, [('Homo sapiens',)])
416-
self.assertEqual(er.entities.data, [('NCBI_TAXON:9606',
416+
self.assertEqual(er.entities.data, [('NCBITaxon:9606',
417417
'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=9606')])
418418
self.assertEqual(er.objects.data, [(0, col1.object_id, 'VectorData', 'description', '')])
419419

@@ -437,7 +437,7 @@ def test_add_ref_termset(self):
437437
termset=terms
438438
)
439439
self.assertEqual(er.keys.data, [('Homo sapiens',)])
440-
self.assertEqual(er.entities.data, [('NCBI_TAXON:9606',
440+
self.assertEqual(er.entities.data, [('NCBITaxon:9606',
441441
'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=9606')])
442442
self.assertEqual(er.objects.data, [(0, col1.object_id, 'VectorData', '', '')])
443443

@@ -475,7 +475,7 @@ def test_add_ref_termset_attribute_none(self):
475475
termset=terms
476476
)
477477
self.assertEqual(er.keys.data, [('Homo sapiens',)])
478-
self.assertEqual(er.entities.data, [('NCBI_TAXON:9606',
478+
self.assertEqual(er.entities.data, [('NCBITaxon:9606',
479479
'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=9606')])
480480
self.assertEqual(er.objects.data, [(0, col1.object_id, 'VectorData', '', '')])
481481

@@ -498,7 +498,7 @@ def test_add_ref_termset_data_object_list(self):
498498
termset=terms
499499
)
500500
self.assertEqual(er.keys.data, [('Homo sapiens',)])
501-
self.assertEqual(er.entities.data, [('NCBI_TAXON:9606',
501+
self.assertEqual(er.entities.data, [('NCBITaxon:9606',
502502
'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=9606')])
503503
self.assertEqual(er.objects.data, [(0, species.object_id, 'DynamicTable', 'colnames', '')])
504504

@@ -521,9 +521,9 @@ def test_add_ref_termset_bulk(self):
521521
termset=terms
522522
)
523523
self.assertEqual(er.keys.data, [('Homo sapiens',), ('Mus musculus',)])
524-
self.assertEqual(er.entities.data, [('NCBI_TAXON:9606',
524+
self.assertEqual(er.entities.data, [('NCBITaxon:9606',
525525
'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=9606'),
526-
('NCBI_TAXON:10090',
526+
('NCBITaxon:10090',
527527
'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=10090')])
528528
self.assertEqual(er.objects.data, [(0, col1.object_id, 'VectorData', '', '')])
529529

@@ -546,7 +546,7 @@ def test_add_ref_termset_missing_terms(self):
546546
termset=terms
547547
)
548548
self.assertEqual(er.keys.data, [('Homo sapiens',)])
549-
self.assertEqual(er.entities.data, [('NCBI_TAXON:9606',
549+
self.assertEqual(er.entities.data, [('NCBITaxon:9606',
550550
'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=9606')])
551551
self.assertEqual(er.objects.data, [(0, col1.object_id, 'VectorData', '', '')])
552552
self.assertEqual(missing_terms, {'missing_terms': ['missing_term']})
@@ -792,7 +792,7 @@ def test_get_obj_entities_hdf5_roundtrip(self):
792792
data.parent = file
793793
er.add_ref(container=data,
794794
key='Homo sapiens',
795-
entity_id='NCBI_TAXON:9606',
795+
entity_id='NCBITaxon:9606',
796796
entity_uri='http://x')
797797

798798
path = 'test_HERD_hdf5_roundtrip.h5'
@@ -809,7 +809,7 @@ def test_get_obj_entities_hdf5_roundtrip(self):
809809
remove_test_file(path)
810810

811811
expected_df = pd.DataFrame.from_dict(
812-
{'entity_id': {0: 'NCBI_TAXON:9606'},
812+
{'entity_id': {0: 'NCBITaxon:9606'},
813813
'entity_uri': {0: 'http://x'}}
814814
)
815815
pd.testing.assert_frame_equal(df, expected_df)

tests/unit/example_test_term_set.yaml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ id: termset/species_example
22
name: Species
33
version: 0.0.1
44
prefixes:
5-
NCBI_TAXON: https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=
5+
NCBITaxon: https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=
66
imports:
77
- linkml:types
88
default_range: string
@@ -12,16 +12,16 @@ enums:
1212
permissible_values:
1313
Homo sapiens:
1414
description: the species is human
15-
meaning: NCBI_TAXON:9606
15+
meaning: NCBITaxon:9606
1616
Mus musculus:
1717
description: the species is a house mouse
18-
meaning: NCBI_TAXON:10090
18+
meaning: NCBITaxon:10090
1919
Ursus arctos horribilis:
2020
description: the species is a grizzly bear
21-
meaning: NCBI_TAXON:116960
21+
meaning: NCBITaxon:116960
2222
Myrmecophaga tridactyla:
2323
description: the species is an anteater
24-
meaning: NCBI_TAXON:71006
24+
meaning: NCBITaxon:71006
2525
Ailuropoda melanoleuca:
2626
description: the species is a panda
27-
meaning: NCBI_TAXON:9646
27+
meaning: NCBITaxon:9646

tests/unit/example_test_term_set2.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ id: termset/species_example2
22
name: Species
33
version: 0.0.1
44
prefixes:
5-
NCBI_TAXON: https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=
5+
NCBITaxon: https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=
66
imports:
77
- linkml:types
88
default_range: string
@@ -12,10 +12,10 @@ enums:
1212
permissible_values:
1313
Homo sapiens:
1414
description: the species is human
15-
meaning: NCBI_TAXON:9606
15+
meaning: NCBITaxon:9606
1616
Mus musculus:
1717
description: the species is a house mouse
18-
meaning: NCBI_TAXON:10090
18+
meaning: NCBITaxon:10090
1919
Ursus arctos horribilis:
2020
description: the species is a grizzly bear
21-
meaning: NCBI_TAXON:116960
21+
meaning: NCBITaxon:116960

tests/unit/test_io_hdf5_h5tools.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1237,7 +1237,7 @@ def test_io_write_extend_herd(self):
12371237

12381238
self.assertEqual(read_herd.keys.data, [('special',), ('Homo sapiens',)])
12391239
self.assertEqual(read_herd.entities.data[0], ('id11', 'url11'))
1240-
self.assertEqual(read_herd.entities.data[1], ('NCBI_TAXON:9606',
1240+
self.assertEqual(read_herd.entities.data[1], ('NCBITaxon:9606',
12411241
'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=9606'))
12421242
self.assertEqual(read_herd.objects.data[0],
12431243
(0, read_foofile.object_id, 'FooFile', '', ''))

tests/unit/test_term_set.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,32 +36,32 @@ def tearDown(self):
3636
def test_termset_setup(self):
3737
termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml')
3838
self.assertEqual(termset.name, 'Species')
39-
self.assertEqual(list(termset.sources), ['NCBI_TAXON'])
39+
self.assertEqual(list(termset.sources), ['NCBITaxon'])
4040

4141
def test_repr_short(self):
4242
termset = TermSet(term_schema_path='tests/unit/example_test_term_set2.yaml')
43-
output = ('Schema Path: tests/unit/example_test_term_set2.yaml\nSources: NCBI_TAXON\nTerms: \n'
43+
output = ('Schema Path: tests/unit/example_test_term_set2.yaml\nSources: NCBITaxon\nTerms: \n'
4444
' - Homo sapiens\n - Mus musculus\n - Ursus arctos horribilis\nNumber of terms: 3')
4545
self.assertEqual(repr(termset), output)
4646

4747
def test_repr_html_short(self):
4848
termset = TermSet(term_schema_path='tests/unit/example_test_term_set2.yaml')
4949
output = ('<b>Schema Path: </b>tests/unit/example_test_term_set2.yaml<br><b>Sources:'
50-
' </b>NCBI_TAXON<br><b> Terms: </b><li> Homo sapiens </li><li> Mus musculus'
50+
' </b>NCBITaxon<br><b> Terms: </b><li> Homo sapiens </li><li> Mus musculus'
5151
' </li><li> Ursus arctos horribilis </li><i> Number of terms:</i> 3')
5252
self.assertEqual(termset._repr_html_(), output)
5353

5454
def test_repr_long(self):
5555
termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml')
56-
output = ('Schema Path: tests/unit/example_test_term_set.yaml\nSources: NCBI_TAXON\nTerms: \n'
56+
output = ('Schema Path: tests/unit/example_test_term_set.yaml\nSources: NCBITaxon\nTerms: \n'
5757
' - Homo sapiens\n - Mus musculus\n - Ursus arctos horribilis\n ... ... \n'
5858
' - Ailuropoda melanoleuca\nNumber of terms: 5')
5959
self.assertEqual(repr(termset), output)
6060

6161
def test_repr_html_long(self):
6262
termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml')
6363
output = ('<b>Schema Path: </b>tests/unit/example_test_term_set.yaml<br><b>Sources:'
64-
' </b>NCBI_TAXON<br><b> Terms: </b><li> Homo sapiens </li><li> Mus musculus'
64+
' </b>NCBITaxon<br><b> Terms: </b><li> Homo sapiens </li><li> Mus musculus'
6565
' </li><li> Ursus arctos horribilis </li>... ...<li> Ailuropoda melanoleuca'
6666
' </li><i> Number of terms:</i> 5')
6767
self.assertEqual(termset._repr_html_(), output)
@@ -83,7 +83,7 @@ def test_termset_validate_false(self):
8383

8484
def test_get_item(self):
8585
termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml')
86-
self.assertEqual(termset['Homo sapiens'].id, 'NCBI_TAXON:9606')
86+
self.assertEqual(termset['Homo sapiens'].id, 'NCBITaxon:9606')
8787
self.assertEqual(termset['Homo sapiens'].description, 'the species is human')
8888
self.assertEqual(
8989
termset['Homo sapiens'].meaning,

0 commit comments

Comments
 (0)