Skip to content

Commit 5558917

Browse files
committed
2.6 drafts 3 and 4
* Repair spelling of Chaetothyriomycetidae * Suppress false equation of the two Morganellas, similarly for 7 other SILVA/IF conflicts * Fix Myospalax (again) #62 * Various repairs in fungi #42 * Fix Norops (again) #31 * Remove a few erroneous extinct flags (Conus, etc.) * New output file hidden.tsv lists all hidden taxa * New flag "hidden_inherited" * Fix bug in notSame
1 parent 308b800 commit 5558917

4 files changed

Lines changed: 105 additions & 30 deletions

File tree

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# Get it from http://files.opentreeoflife.org/ott/
66
# and if there's a file "taxonomy" change that to "taxonomy.tsv".
77

8-
WHICH=2.6draft2
8+
WHICH=2.6draft4
99
PREV_WHICH=2.5
1010

1111
# $^ = all prerequisites

make-ott.py

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,15 @@
99

1010
# Hibbett 2007 updated upper fungal taxonomy
1111
h2007 = Taxonomy.getNewick('feed/h2007/tree.tre', 'h2007')
12+
13+
# 2014-04-08 Misspelling
14+
h2007.taxon('Chaetothryriomycetidae').rename('Chaetothyriomycetidae')
15+
1216
ott.absorb(h2007)
1317

18+
# h2007/if synonym https://github.com/OpenTreeOfLife/reference-taxonomy/issues/40
19+
ott.taxon('Urocystales').synonym('Urocystidales')
20+
1421
# SILVA microbial taxonomy
1522
silva = Taxonomy.getTaxonomy('tax/silva/', 'silva')
1623

@@ -60,14 +67,28 @@
6067
# 2014-03-07 Prevent a false match
6168
ott.notSame(silva.taxon('Phaeosphaeria'), fung.taxon('Phaeosphaeria'))
6269

70+
# 2014-04-08 This was causing Agaricaceae to become 'tattered'
71+
ott.notSame(silva.taxon('Morganella'), fung.taxon('Morganella'))
72+
73+
# 2014-04-08 More IF/SILVA bad matches (probably sample contamination)
74+
for name in ["Trichoderma harzianum",
75+
"Acantharia",
76+
"Bogoriella",
77+
"Steinia",
78+
"Sclerotinia homoeocarpa",
79+
"Epiphloea",
80+
"Campanella",
81+
"Lacrymaria"]:
82+
ott.notSame(silva.taxon(name), fung.taxon(name))
83+
6384
# analyzeMajorRankConflicts sets the "major_rank_conflict" flag when
6485
# intermediate ranks are missing (e.g. a family that's a child of a
6586
# class)
6687
fung.analyzeMajorRankConflicts()
6788

6889
ott.absorb(fung)
6990

70-
# Problem: Chamydotomus is an incertae sedis child of Fungi.
91+
# Problem: Chlamydotomus is an incertae sedis child of Fungi.
7192
# http://www.mycobank.org/BioloMICS.aspx?Link=T&TableKey=14682616000000067&Rec=35058&Fields=All
7293
# https://github.com/OpenTreeOfLife/reference-taxonomy/issues/20
7394
Cb = ott.taxon('Chlamydotomus beigelii')
@@ -142,7 +163,8 @@
142163
ott.notSame(silva.taxon('GN013951'), gbif.taxon('Gorkadinium')) #Tetrasphaera
143164

144165
# Joseph 2013-07-23 https://github.com/OpenTreeOfLife/opentree/issues/62
145-
gbif.taxon('Myospalax','Muridae').absorb(gbif.taxon('2439119'))
166+
# GBIF has two copies of Myospalax
167+
gbif.taxon('6006429').absorb(gbif.taxon('2439119'))
146168

147169
# Rick Ree 2014-03-28 https://github.com/OpenTreeOfLife/reference-taxonomy/issues/37
148170
ott.same(ncbi.taxon('Calothrix', 'Rivulariaceae'), gbif.taxon('Calothrix', 'Rivulariaceae'))
@@ -377,14 +399,26 @@
377399
ott.taxon('Cyphellopsis','Cyphellaceae').unhide()
378400
ott.taxon('Cyphellopsis','Cyphellaceae').absorb(ott.taxon('Cyphellopsis','Niaceae'))
379401
ott.taxon('Diaporthaceae').take(ott.taxon('Phomopsis'))
380-
ott.taxon('Valsaceae').take(ott.taxon('Valsa'))
381-
ott.taxon('Agaricaceae').take(ott.taxon('Cystoderma'))
382-
ott.taxon('Hypocreaceae').take(ott.taxon('Hypocrea'))
402+
ott.taxon('Valsaceae').take(ott.taxon('Valsa', 'Fungi'))
403+
ott.taxon('Agaricaceae').take(ott.taxon('Cystoderma','Fungi'))
404+
ott.taxon('Hypocrea').absorb(ott.taxonThatContains('Trichoderma', 'Trichoderma raseum'))
405+
# Invert the synonym relationship
406+
ott.taxon('Trichoderma deliquescens').rename('Hypocrea lutea')
383407

384408
# Fold Norops into Anolis
385409
# https://github.com/OpenTreeOfLife/reference-taxonomy/issues/31
386410
# TBD: Change species names from Norops X to Anolis X for all X
387-
ott.taxon('Anolis').take(ott.taxon('Norops'))
411+
ott.taxon('Anolis').absorb(ott.taxon('Norops', 'Iguanidae'))
412+
413+
# JAR 2014-4-08 - these are in study OTUs - see IRMNG
414+
ott.taxon('Inseliellum').extant()
415+
ott.taxon('Conus', 'Gastropoda').extant()
416+
ott.taxon('Patelloida').extant()
417+
ott.taxon('Phyllanthus', 'Phyllanthaceae').extant()
418+
ott.taxon('Stelis','Orchidaceae').extant()
419+
ott.taxon('Chloris', 'Poaceae').extant()
420+
ott.taxon('Acropora', 'Acroporidae').extant()
421+
388422

389423
# -----------------------------------------------------------------------------
390424
# Finish up

org/opentreeoflife/smasher/Taxon.java

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ public class Taxon {
3232
Taxon mapped = null; // source node -> union node
3333
Taxon comapped = null; // union node -> source node
3434
boolean novelp = false; // added to union in last round?
35-
private String division = null;
35+
String division = null;
3636

3737
static boolean windyp = true;
3838

@@ -251,11 +251,14 @@ void unifyWith(Taxon unode) {
251251
// A few are combined using |
252252
unode.properFlags |=
253253
((before | this.properFlags) &
254-
(Taxonomy.FORCED_VISIBLE | Taxonomy.TATTERED |
255-
Taxonomy.EDITED | Taxonomy.EXTINCT));
256-
// This one is anomalous
254+
(Taxonomy.FORCED_VISIBLE |
255+
Taxonomy.EDITED |
256+
Taxonomy.EXTINCT));
257+
// This one is anomalous. Propagate these flags from original setting
258+
// ignoring how they're set in the merged taxon.
257259
unode.properFlags |=
258-
(before & Taxonomy.MAJOR_RANK_CONFLICT);
260+
(before & (Taxonomy.MAJOR_RANK_CONFLICT |
261+
Taxonomy.TATTERED));
259262
}
260263

261264
void reallyUnifyWith(Taxon unode) {
@@ -405,6 +408,15 @@ Taxon augment(UnionTaxonomy union) {
405408
for (Taxon augChild: newChildren)
406409
newnode.addChild(augChild);
407410

411+
if (this.taxonomy.tag.equals("if")) {
412+
boolean javasux = false;
413+
for (Taxon p = this.parent; p != null; p = p.parent)
414+
if (p.name.equals("Fungi")) { javasux = true; break; }
415+
if (javasux)
416+
for (Taxon o : oldChildren)
417+
System.err.format("** %s losing %s to %s\n", this, o, o.parent);
418+
}
419+
408420
newflags |= Taxonomy.TATTERED;
409421
union.logAndMark(Answer.yes(this, null, "new/tattered", null));
410422
// fall through
@@ -1121,6 +1133,7 @@ else if (newchild == this)
11211133
System.err.format("** A taxon cannot be its own parent: %s\n", newchild, this);
11221134
else {
11231135
newchild.properFlags |= Taxonomy.EDITED;
1136+
newchild.properFlags &= ~(Taxonomy.MAJOR_RANK_CONFLICT | Taxonomy.INCERTAE_SEDIS);
11241137
newchild.changeParent(this);
11251138
}
11261139
}

org/opentreeoflife/smasher/Taxonomy.java

Lines changed: 46 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,7 @@ public void dump(String outprefix, String sep) throws IOException {
281281
this.dumpNodes(this.roots, outprefix, sep);
282282
this.dumpSynonyms(outprefix + "synonyms.tsv", sep);
283283
this.dumpMetadata(outprefix + "about.json");
284+
this.dumpHidden(outprefix + "hidden.tsv");
284285
}
285286

286287
public void dump(String outprefix) throws IOException {
@@ -721,6 +722,28 @@ void dumpSynonyms(String filename, String sep) throws IOException {
721722
out.close();
722723
}
723724

725+
void dumpHidden(String filename) throws IOException {
726+
PrintStream out = Taxonomy.openw(filename);
727+
for (Taxon node : this) {
728+
if (((node.properFlags | node.inheritedFlags) &
729+
(Taxonomy.HIDDEN |
730+
Taxonomy.EXTINCT |
731+
Taxonomy.MAJOR_RANK_CONFLICT |
732+
Taxonomy.TATTERED |
733+
Taxonomy.NOT_OTU |
734+
Taxonomy.HYBRID |
735+
Taxonomy.VIRAL |
736+
Taxonomy.UNCLASSIFIED |
737+
Taxonomy.ENVIRONMENTAL |
738+
Taxonomy.INCERTAE_SEDIS)) != 0) {
739+
out.format("%s\t%s\t%s\t", node.id, node.name, node.division);
740+
this.printFlags(node, out);
741+
out.println();
742+
}
743+
}
744+
out.close();
745+
}
746+
724747
/*
725748
flags are:
726749
@@ -788,7 +811,7 @@ flags are inherited (conservative approach), except for "incertaesedis", which i
788811
// NCBI only (not SILVA)
789812
public void analyzeOTUs() {
790813
for (Taxon root : this.roots)
791-
analyzeOTUs(root, 0); // mutates the tree
814+
analyzeOTUs(root); // mutates the tree
792815
}
793816

794817
// GBIF and IF only
@@ -920,10 +943,7 @@ static int analyzeRankConflicts(Taxon node, boolean majorp) {
920943
// taxa being hidden.
921944
// We use this for NCBI but not for SILVA.
922945

923-
static void analyzeOTUs(Taxon node, int inheritedFlags) {
924-
// Before
925-
node.inheritedFlags |= inheritedFlags;
926-
946+
static void analyzeOTUs(Taxon node) {
927947
// Prepare for recursive descent
928948
if (notOtuRegex.matcher(node.name).find())
929949
node.properFlags |= NOT_OTU;
@@ -932,12 +952,10 @@ static void analyzeOTUs(Taxon node, int inheritedFlags) {
932952
if (viralRegex.matcher(node.name).find())
933953
node.properFlags |= VIRAL;
934954

935-
int bequest = inheritedFlags | node.properFlags; // What the children inherit
936-
937955
// Recursive descent
938956
if (node.children != null)
939957
for (Taxon child : node.children)
940-
analyzeOTUs(child, bequest);
958+
analyzeOTUs(child);
941959
}
942960

943961
// Flags to set for all taxonomies. Also elide container pseudo-taxa
@@ -1046,6 +1064,10 @@ static void printFlags(Taxon node, PrintStream out) {
10461064
if (needComma) out.print(","); else needComma = true;
10471065
out.print("hidden");
10481066
}
1067+
else if ((node.inheritedFlags & HIDDEN) != 0) {
1068+
if (needComma) out.print(","); else needComma = true;
1069+
out.print("hidden_inherited");
1070+
}
10491071

10501072
if ((node.properFlags & MAJOR_RANK_CONFLICT) != 0) {
10511073
if (needComma) out.print(","); else needComma = true;
@@ -1844,24 +1866,25 @@ public void sameness(Taxon node1, Taxon node2, boolean polarity) {
18441866
node1, node2);
18451867
return;
18461868
}
1847-
if (snode.mapped == unode) return; // Already equated
18481869
if (!(snode.taxonomy instanceof SourceTaxonomy)) {
18491870
System.err.format("** One of the two nodes must come from a source taxonomy: %s %s\n", unode, snode);
18501871
return;
18511872
}
1852-
if (polarity) {
1873+
if (polarity) { // same
18531874
if (snode.mapped != null) {
18541875
if (snode.mapped != unode)
18551876
System.err.format("** The taxa have already been determined to be different: %s\n", snode);
18561877
return;
18571878
}
18581879
snode.unifyWith(unode);
1859-
} else {
1880+
} else { // notSame
18601881
if (snode.mapped != null) {
18611882
if (snode.mapped == unode)
18621883
System.err.format("** The taxa have already been determined to be the same: %s\n", snode);
18631884
return;
18641885
}
1886+
// Give the source node a place to go in the union that is
1887+
// different from the union node it's different from
18651888
Taxon evader = new Taxon(unode.taxonomy);
18661889
snode.unifyWithNew(evader);
18671890
evader.addSource(snode);
@@ -2200,6 +2223,17 @@ public void assignIds(SourceTaxonomy idsource) {
22002223
// idsource.tag = "ids";
22012224
idsource.mapInto(this, Criterion.idCriteria);
22022225

2226+
this.transferIds(idsource);
2227+
2228+
// Phase 2: give new ids to union nodes that didn't get them above.
2229+
long sourcemax = idsource.maxid();
2230+
this.assignNewIds(sourcemax);
2231+
// remember, this = union, idsource = previous version of ott
2232+
2233+
Taxon.printStats(); // Taxon id clash
2234+
}
2235+
2236+
public void transferIds(SourceTaxonomy idsource) {
22032237
Taxon.resetStats();
22042238
System.out.println("--- Assigning ids to union starting with " + idsource.getTag() + " ---");
22052239

@@ -2217,13 +2251,6 @@ public void assignIds(SourceTaxonomy idsource) {
22172251
unode.setId(node.id);
22182252
}
22192253
}
2220-
2221-
// Phase 2: give new ids to union nodes that didn't get them above.
2222-
long sourcemax = idsource.maxid();
2223-
this.assignNewIds(sourcemax);
2224-
// remember, this = union, idsource = previous version of ott
2225-
2226-
Taxon.printStats(); // Taxon id clash
22272254
}
22282255

22292256
// Cf. assignIds()
@@ -2347,6 +2374,7 @@ public void dump(String outprefix, String sep) throws IOException {
23472374

23482375
this.dumpNodes(this.roots, outprefix, sep);
23492376
this.dumpSynonyms(outprefix + "synonyms.tsv", sep);
2377+
this.dumpHidden(outprefix + "hidden.tsv");
23502378
}
23512379

23522380
// Overrides method in Taxonomy class

0 commit comments

Comments
 (0)