Skip to content

Commit dfeae3b

Browse files
committed
Completed updates to output for categorical data.
1 parent 8b19655 commit dfeae3b

2 files changed

Lines changed: 102 additions & 31 deletions

File tree

src/mesquite/nexml/InterpretNEXML/NexmlWriters/NexmlCharactersBlockWriter.java

Lines changed: 99 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,13 @@
22

33
import java.util.ArrayList;
44
import java.util.HashMap;
5+
import java.util.HashSet;
56
import java.util.List;
67
import java.util.Map;
8+
import java.util.Set;
79

810
import mesquite.categ.lib.CategoricalData;
11+
import mesquite.categ.lib.CategoricalState;
912
import mesquite.categ.lib.DNAData;
1013
import mesquite.categ.lib.ProteinData;
1114
import mesquite.categ.lib.RNAData;
@@ -22,16 +25,24 @@
2225
import org.nexml.model.CategoricalMatrix;
2326
import org.nexml.model.Character;
2427
import org.nexml.model.CharacterStateSet;
28+
import org.nexml.model.CompoundCharacterState;
2529
import org.nexml.model.Document;
2630
import org.nexml.model.Matrix;
2731
import org.nexml.model.MatrixCell;
2832
import org.nexml.model.MolecularMatrix;
2933
import org.nexml.model.NexmlWritable;
3034
import org.nexml.model.OTU;
3135
import org.nexml.model.OTUs;
36+
import org.nexml.model.PolymorphicCharacterState;
37+
import org.nexml.model.UncertainCharacterState;
3238

3339
public class NexmlCharactersBlockWriter extends NexmlBlockWriter {
3440

41+
/**
42+
* Generate symbols for uncertainties and polymorphisms that don't conflict with existing state symbols.
43+
*/
44+
private int nextMultipleStateSymbol = CategoricalState.getMaxPossibleStateStatic() + 1;
45+
3546
@SuppressWarnings("serial")
3647
private static final Map<String , String> xmlMolecularDataTypeFor = new HashMap<String, String>() {{
3748
put(DNAData.DATATYPENAME, MolecularMatrix.DNA);
@@ -83,10 +94,9 @@ else if ( mesDataType.equalsIgnoreCase(ContinuousData.DATATYPENAME) ) {
8394
@SuppressWarnings("unchecked")
8495
private void writeCharacterStates(CharacterData mesData, org.nexml.model.Matrix<?> xmlMatrix) {
8596
String mesDataType = mesData.getDataTypeName();
86-
8797
int mesNchar = mesData.getNumChars();
8898
List<Character> xmlCharacters = new ArrayList<Character>(mesNchar);
89-
for ( int j = 0; j < mesNchar; j++ ) {
99+
for ( int characterIndex = 0; characterIndex < mesNchar; characterIndex++ ) {
90100
CharacterStateSet xmlCharacterStateSet = null;
91101
if ( xmlMolecularDataTypeFor.containsKey(mesDataType) ) {
92102
xmlCharacterStateSet = ((MolecularMatrix)xmlMatrix).getCharacterStateSet();
@@ -95,47 +105,64 @@ else if ( mesDataType.equalsIgnoreCase(CategoricalData.DATATYPENAME) ) {
95105
xmlCharacterStateSet = ((CategoricalMatrix)xmlMatrix).createCharacterStateSet();
96106
}
97107
Character xmlChar = xmlMatrix.createCharacter(xmlCharacterStateSet);
98-
String mesCharacterName = mesData.getCharacterName(j);
108+
String mesCharacterName = mesData.getCharacterName(characterIndex);
99109
if ( null != mesCharacterName && ! mesCharacterName.equals("") ) {
100110
xmlChar.setLabel(mesCharacterName);
101111
}
102112
if ( mesDataType.equalsIgnoreCase(CategoricalData.DATATYPENAME) ) {
103113
CategoricalData data = ((CategoricalData)mesData);
104-
int maxStateIndex = data.maxStateWithName(j);
114+
int maxStateIndex = data.maxStateWithName(characterIndex);
105115
for (int stateIndex = 0; stateIndex <= maxStateIndex; stateIndex++) {
106116
String symbol = String.valueOf(data.getSymbol(stateIndex));
107117
org.nexml.model.CharacterState state = xmlChar.getCharacterStateSet().createCharacterState(symbol);
108118
state.setSymbol(symbol);
109-
if (data.hasStateName(j, stateIndex)) {
110-
String stateLabel = data.getStateName(j, stateIndex);
119+
if (data.hasStateName(characterIndex, stateIndex)) {
120+
String stateLabel = data.getStateName(characterIndex, stateIndex);
111121
state.setLabel(stateLabel);
112122
}
113123
}
114124
}
115125
xmlCharacters.add(xmlChar);
116126
}
117-
for ( int j = 0; j < mesData.getNumTaxa(); j++ ) {
118-
CharacterState[] mesChars = mesData.getCharacterStateArray(j, 0, mesNchar);
119-
String unassignedSymbol = String.valueOf(mesData.getUnassignedSymbol());
120-
Taxon mesTaxon = mesData.getTaxa().getTaxon(j);
127+
for (int taxonIndex = 0; taxonIndex < mesData.getNumTaxa(); taxonIndex++) {
128+
CharacterState[] mesCharStates = mesData.getCharacterStateArray(taxonIndex, 0, mesNchar);
129+
Taxon mesTaxon = mesData.getTaxa().getTaxon(taxonIndex);
121130
OTU xmlTaxon = findEquivalentTaxon(mesTaxon,xmlMatrix.getOTUs());
122-
for ( int k = 0; k < mesNchar; k++ ) {
123-
Character xmlChar = xmlCharacters.get(k);
124-
String mesCharString = mesChars[k].toDisplayString();
125-
if ( mesCharString != null && !mesCharString.equals("-") && !mesCharString.equals(unassignedSymbol)) {
126-
if ( mesDataType.equalsIgnoreCase(ContinuousData.DATATYPENAME) ) {
127-
MatrixCell<Double> xmlCell = (MatrixCell<Double>) xmlMatrix.getCell(xmlTaxon,xmlChar);
128-
xmlCell.setValue((Double)xmlMatrix.parseSymbol(mesCharString, xmlChar));
131+
for ( int characterIndex = 0; characterIndex < mesNchar; characterIndex++ ) {
132+
Character xmlChar = xmlCharacters.get(characterIndex);
133+
CharacterState mesState = mesCharStates[characterIndex];
134+
if (mesDataType.equalsIgnoreCase(CategoricalData.DATATYPENAME)) {
135+
CharacterStateSet xmlStateSet = xmlChar.getCharacterStateSet();
136+
CategoricalData categoricalData = (CategoricalData)mesData;
137+
long stateAssignment = categoricalData.getState(characterIndex, taxonIndex);
138+
org.nexml.model.CharacterState xmlCharacterState = null;
139+
if (CategoricalState.hasMultipleStates(stateAssignment)) {
140+
Set<String> symbols = new HashSet<String>();
141+
for (int mesStateCode : CategoricalState.expand(stateAssignment)) {
142+
symbols.add(String.valueOf(categoricalData.getSymbol(mesStateCode)));
143+
}
144+
if (CategoricalState.isUncertain(stateAssignment)) {
145+
xmlCharacterState = findOrCreateUncertainStateSet(xmlStateSet, symbols);
146+
} else { //polymorphic
147+
xmlCharacterState = findOrCreatePolymorphicStateSet(xmlStateSet, symbols);
148+
}
149+
} else { // single state
150+
if ((!CategoricalState.isUnassigned(stateAssignment)) && (!CategoricalState.isInapplicable(stateAssignment))) {
151+
String symbol = String.valueOf(categoricalData.getSymbol(CategoricalState.getOnlyElement(stateAssignment)));
152+
xmlCharacterState = xmlStateSet.lookupCharacterStateBySymbol(symbol);
153+
}
129154
}
130-
else if ( mesDataType.equalsIgnoreCase(CategoricalData.DATATYPENAME) ) {
131-
MatrixCell<org.nexml.model.CharacterState> xmlCell = (MatrixCell<org.nexml.model.CharacterState>) xmlMatrix.getCell(xmlTaxon,xmlChar);
132-
xmlCell.setValue((org.nexml.model.CharacterState)xmlMatrix.parseSymbol(mesCharString, xmlChar));
155+
if (xmlCharacterState != null) {
156+
MatrixCell<org.nexml.model.CharacterState> xmlCell = (MatrixCell<org.nexml.model.CharacterState>) xmlMatrix.getCell(xmlTaxon, xmlChar);
157+
xmlCell.setValue(xmlCharacterState);
133158
}
134-
else if ( xmlMolecularDataTypeFor.containsKey(mesDataType) ) {
135-
MatrixCell<org.nexml.model.CharacterState> xmlCell = (MatrixCell<org.nexml.model.CharacterState>) xmlMatrix.getCell(xmlTaxon,xmlChar);
136-
xmlCell.setValue((org.nexml.model.CharacterState)((MolecularMatrix)xmlMatrix).parseSymbol(mesCharString,xmlMolecularDataTypeFor.get(mesDataType)));
137-
}
138-
}
159+
} else if (mesDataType.equalsIgnoreCase(ContinuousData.DATATYPENAME)) {
160+
MatrixCell<Double> xmlCell = (MatrixCell<Double>) xmlMatrix.getCell(xmlTaxon,xmlChar);
161+
xmlCell.setValue((Double)xmlMatrix.parseSymbol(mesState.toDisplayString(), xmlChar));
162+
} else if ( xmlMolecularDataTypeFor.containsKey(mesDataType) ) {
163+
MatrixCell<org.nexml.model.CharacterState> xmlCell = (MatrixCell<org.nexml.model.CharacterState>) xmlMatrix.getCell(xmlTaxon,xmlChar);
164+
xmlCell.setValue((org.nexml.model.CharacterState)((MolecularMatrix)xmlMatrix).parseSymbol(mesState.toDisplayString(), xmlMolecularDataTypeFor.get(mesDataType)));
165+
}
139166
}
140167
}
141168
}
@@ -150,4 +177,51 @@ protected Annotatable getThingInXmlBlock(NexmlWritable xmlBlock, int index) {
150177
return xmlMatrix.getCharacters().get(index);
151178
}
152179

180+
private UncertainCharacterState findOrCreateUncertainStateSet(CharacterStateSet containingStateSet, Set<String> symbols) {
181+
for (org.nexml.model.CharacterState state : containingStateSet.getCharacterStates()) {
182+
if (state instanceof UncertainCharacterState) {
183+
UncertainCharacterState uncertainState = (UncertainCharacterState)state;
184+
if (containsMatchingStates(uncertainState, symbols)) {
185+
return uncertainState;
186+
}
187+
}
188+
}
189+
Set<org.nexml.model.CharacterState> memberStates = collectMatchingStates(containingStateSet, symbols);
190+
return containingStateSet.createUncertainCharacterState(this.nextMultipleStateSymbol++, memberStates);
191+
}
192+
193+
private PolymorphicCharacterState findOrCreatePolymorphicStateSet(CharacterStateSet containingStateSet, Set<String> symbols) {
194+
for (org.nexml.model.CharacterState state : containingStateSet.getCharacterStates()) {
195+
if (state instanceof PolymorphicCharacterState) {
196+
PolymorphicCharacterState polymorphicState = (PolymorphicCharacterState)state;
197+
if (containsMatchingStates(polymorphicState, symbols)) {
198+
return polymorphicState;
199+
}
200+
}
201+
}
202+
Set<org.nexml.model.CharacterState> memberStates = collectMatchingStates(containingStateSet, symbols);
203+
return containingStateSet.createPolymorphicCharacterState(this.nextMultipleStateSymbol++, memberStates);
204+
}
205+
206+
private boolean containsMatchingStates(CompoundCharacterState state, Set<String> symbols) {
207+
Set<String> containedSymbols = new HashSet<String>();
208+
for (org.nexml.model.CharacterState containedState : state.getStates()) {
209+
containedSymbols.add(containedState.getSymbol().toString());
210+
}
211+
return containedSymbols.equals(symbols);
212+
}
213+
214+
private Set<org.nexml.model.CharacterState> collectMatchingStates(CharacterStateSet containingStateSet, Set<String> symbols) {
215+
Set<org.nexml.model.CharacterState> memberStates = new HashSet<org.nexml.model.CharacterState>();
216+
for (String symbol : symbols) {
217+
org.nexml.model.CharacterState member = containingStateSet.lookupCharacterStateBySymbol(symbol);
218+
if ( null != member ) {
219+
memberStates.add(member);
220+
} else {
221+
memberStates.add(containingStateSet.createCharacterState(symbol));
222+
}
223+
}
224+
return memberStates;
225+
}
226+
153227
}

src/org/nexml/model/impl/CharacterStateSetImpl.java

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -137,10 +137,9 @@ protected CharacterState createCharacterState(Element stateElement) {
137137
* perhaps that needs to change.
138138
* @author rvosa
139139
*/
140-
public PolymorphicCharacterState createPolymorphicCharacterState(
141-
Object symbol,
142-
Set<CharacterState> members) {
140+
public PolymorphicCharacterState createPolymorphicCharacterState(Object symbol,Set<CharacterState> members) {
143141
PolymorphicCharacterStateImpl polymorphicCharacterStateImpl = new PolymorphicCharacterStateImpl(getDocument());
142+
getElement().appendChild(polymorphicCharacterStateImpl.getElement());
144143
polymorphicCharacterStateImpl.setSymbol(symbol);
145144
polymorphicCharacterStateImpl.setStates(members);
146145
getCharacterStates().add(polymorphicCharacterStateImpl);
@@ -173,9 +172,7 @@ private void populateCompoundCharacterState(CompoundCharacterState state, Elemen
173172
/**
174173
* XXX see discussion for createPolymorphicCharacterState()
175174
*/
176-
public UncertainCharacterState createUncertainCharacterState(
177-
Object symbol,
178-
Set<CharacterState> members) {
175+
public UncertainCharacterState createUncertainCharacterState(Object symbol, Set<CharacterState> members) {
179176
UncertainCharacterStateImpl uncertainCharacterStateImpl = new UncertainCharacterStateImpl(getDocument());
180177
getElement().appendChild(uncertainCharacterStateImpl.getElement());
181178
uncertainCharacterStateImpl.setSymbol(symbol);

0 commit comments

Comments
 (0)