22
33import java .util .ArrayList ;
44import java .util .HashMap ;
5+ import java .util .HashSet ;
56import java .util .List ;
67import java .util .Map ;
8+ import java .util .Set ;
79
810import mesquite .categ .lib .CategoricalData ;
11+ import mesquite .categ .lib .CategoricalState ;
912import mesquite .categ .lib .DNAData ;
1013import mesquite .categ .lib .ProteinData ;
1114import mesquite .categ .lib .RNAData ;
2225import org .nexml .model .CategoricalMatrix ;
2326import org .nexml .model .Character ;
2427import org .nexml .model .CharacterStateSet ;
28+ import org .nexml .model .CompoundCharacterState ;
2529import org .nexml .model .Document ;
2630import org .nexml .model .Matrix ;
2731import org .nexml .model .MatrixCell ;
2832import org .nexml .model .MolecularMatrix ;
2933import org .nexml .model .NexmlWritable ;
3034import org .nexml .model .OTU ;
3135import org .nexml .model .OTUs ;
36+ import org .nexml .model .PolymorphicCharacterState ;
37+ import org .nexml .model .UncertainCharacterState ;
3238
3339public class NexmlCharactersBlockWriter extends NexmlBlockWriter {
3440
41+ /**
42+ * Generate symbols for uncertainties and polymorphisms that don't conflict with existing state symbols.
43+ */
44+ private int nextMultipleStateSymbol = CategoricalState .getMaxPossibleStateStatic () + 1 ;
45+
3546 @ SuppressWarnings ("serial" )
3647 private static final Map <String , String > xmlMolecularDataTypeFor = new HashMap <String , String >() {{
3748 put (DNAData .DATATYPENAME , MolecularMatrix .DNA );
@@ -83,10 +94,9 @@ else if ( mesDataType.equalsIgnoreCase(ContinuousData.DATATYPENAME) ) {
8394 @ SuppressWarnings ("unchecked" )
8495 private void writeCharacterStates (CharacterData mesData , org .nexml .model .Matrix <?> xmlMatrix ) {
8596 String mesDataType = mesData .getDataTypeName ();
86-
8797 int mesNchar = mesData .getNumChars ();
8898 List <Character > xmlCharacters = new ArrayList <Character >(mesNchar );
89- for ( int j = 0 ; j < mesNchar ; j ++ ) {
99+ for ( int characterIndex = 0 ; characterIndex < mesNchar ; characterIndex ++ ) {
90100 CharacterStateSet xmlCharacterStateSet = null ;
91101 if ( xmlMolecularDataTypeFor .containsKey (mesDataType ) ) {
92102 xmlCharacterStateSet = ((MolecularMatrix )xmlMatrix ).getCharacterStateSet ();
@@ -95,47 +105,64 @@ else if ( mesDataType.equalsIgnoreCase(CategoricalData.DATATYPENAME) ) {
95105 xmlCharacterStateSet = ((CategoricalMatrix )xmlMatrix ).createCharacterStateSet ();
96106 }
97107 Character xmlChar = xmlMatrix .createCharacter (xmlCharacterStateSet );
98- String mesCharacterName = mesData .getCharacterName (j );
108+ String mesCharacterName = mesData .getCharacterName (characterIndex );
99109 if ( null != mesCharacterName && ! mesCharacterName .equals ("" ) ) {
100110 xmlChar .setLabel (mesCharacterName );
101111 }
102112 if ( mesDataType .equalsIgnoreCase (CategoricalData .DATATYPENAME ) ) {
103113 CategoricalData data = ((CategoricalData )mesData );
104- int maxStateIndex = data .maxStateWithName (j );
114+ int maxStateIndex = data .maxStateWithName (characterIndex );
105115 for (int stateIndex = 0 ; stateIndex <= maxStateIndex ; stateIndex ++) {
106116 String symbol = String .valueOf (data .getSymbol (stateIndex ));
107117 org .nexml .model .CharacterState state = xmlChar .getCharacterStateSet ().createCharacterState (symbol );
108118 state .setSymbol (symbol );
109- if (data .hasStateName (j , stateIndex )) {
110- String stateLabel = data .getStateName (j , stateIndex );
119+ if (data .hasStateName (characterIndex , stateIndex )) {
120+ String stateLabel = data .getStateName (characterIndex , stateIndex );
111121 state .setLabel (stateLabel );
112122 }
113123 }
114124 }
115125 xmlCharacters .add (xmlChar );
116126 }
117- for ( int j = 0 ; j < mesData .getNumTaxa (); j ++ ) {
118- CharacterState [] mesChars = mesData .getCharacterStateArray (j , 0 , mesNchar );
119- String unassignedSymbol = String .valueOf (mesData .getUnassignedSymbol ());
120- Taxon mesTaxon = mesData .getTaxa ().getTaxon (j );
127+ for (int taxonIndex = 0 ; taxonIndex < mesData .getNumTaxa (); taxonIndex ++) {
128+ CharacterState [] mesCharStates = mesData .getCharacterStateArray (taxonIndex , 0 , mesNchar );
129+ Taxon mesTaxon = mesData .getTaxa ().getTaxon (taxonIndex );
121130 OTU xmlTaxon = findEquivalentTaxon (mesTaxon ,xmlMatrix .getOTUs ());
122- for ( int k = 0 ; k < mesNchar ; k ++ ) {
123- Character xmlChar = xmlCharacters .get (k );
124- String mesCharString = mesChars [k ].toDisplayString ();
125- if ( mesCharString != null && !mesCharString .equals ("-" ) && !mesCharString .equals (unassignedSymbol )) {
126- if ( mesDataType .equalsIgnoreCase (ContinuousData .DATATYPENAME ) ) {
127- MatrixCell <Double > xmlCell = (MatrixCell <Double >) xmlMatrix .getCell (xmlTaxon ,xmlChar );
128- xmlCell .setValue ((Double )xmlMatrix .parseSymbol (mesCharString , xmlChar ));
131+ for ( int characterIndex = 0 ; characterIndex < mesNchar ; characterIndex ++ ) {
132+ Character xmlChar = xmlCharacters .get (characterIndex );
133+ CharacterState mesState = mesCharStates [characterIndex ];
134+ if (mesDataType .equalsIgnoreCase (CategoricalData .DATATYPENAME )) {
135+ CharacterStateSet xmlStateSet = xmlChar .getCharacterStateSet ();
136+ CategoricalData categoricalData = (CategoricalData )mesData ;
137+ long stateAssignment = categoricalData .getState (characterIndex , taxonIndex );
138+ org .nexml .model .CharacterState xmlCharacterState = null ;
139+ if (CategoricalState .hasMultipleStates (stateAssignment )) {
140+ Set <String > symbols = new HashSet <String >();
141+ for (int mesStateCode : CategoricalState .expand (stateAssignment )) {
142+ symbols .add (String .valueOf (categoricalData .getSymbol (mesStateCode )));
143+ }
144+ if (CategoricalState .isUncertain (stateAssignment )) {
145+ xmlCharacterState = findOrCreateUncertainStateSet (xmlStateSet , symbols );
146+ } else { //polymorphic
147+ xmlCharacterState = findOrCreatePolymorphicStateSet (xmlStateSet , symbols );
148+ }
149+ } else { // single state
150+ if ((!CategoricalState .isUnassigned (stateAssignment )) && (!CategoricalState .isInapplicable (stateAssignment ))) {
151+ String symbol = String .valueOf (categoricalData .getSymbol (CategoricalState .getOnlyElement (stateAssignment )));
152+ xmlCharacterState = xmlStateSet .lookupCharacterStateBySymbol (symbol );
153+ }
129154 }
130- else if ( mesDataType . equalsIgnoreCase ( CategoricalData . DATATYPENAME ) ) {
131- MatrixCell <org .nexml .model .CharacterState > xmlCell = (MatrixCell <org .nexml .model .CharacterState >) xmlMatrix .getCell (xmlTaxon ,xmlChar );
132- xmlCell .setValue (( org . nexml . model . CharacterState ) xmlMatrix . parseSymbol ( mesCharString , xmlChar ) );
155+ if (xmlCharacterState != null ) {
156+ MatrixCell <org .nexml .model .CharacterState > xmlCell = (MatrixCell <org .nexml .model .CharacterState >) xmlMatrix .getCell (xmlTaxon , xmlChar );
157+ xmlCell .setValue (xmlCharacterState );
133158 }
134- else if ( xmlMolecularDataTypeFor .containsKey (mesDataType ) ) {
135- MatrixCell <org .nexml .model .CharacterState > xmlCell = (MatrixCell <org .nexml .model .CharacterState >) xmlMatrix .getCell (xmlTaxon ,xmlChar );
136- xmlCell .setValue ((org .nexml .model .CharacterState )((MolecularMatrix )xmlMatrix ).parseSymbol (mesCharString ,xmlMolecularDataTypeFor .get (mesDataType )));
137- }
138- }
159+ } else if (mesDataType .equalsIgnoreCase (ContinuousData .DATATYPENAME )) {
160+ MatrixCell <Double > xmlCell = (MatrixCell <Double >) xmlMatrix .getCell (xmlTaxon ,xmlChar );
161+ xmlCell .setValue ((Double )xmlMatrix .parseSymbol (mesState .toDisplayString (), xmlChar ));
162+ } else if ( xmlMolecularDataTypeFor .containsKey (mesDataType ) ) {
163+ MatrixCell <org .nexml .model .CharacterState > xmlCell = (MatrixCell <org .nexml .model .CharacterState >) xmlMatrix .getCell (xmlTaxon ,xmlChar );
164+ xmlCell .setValue ((org .nexml .model .CharacterState )((MolecularMatrix )xmlMatrix ).parseSymbol (mesState .toDisplayString (), xmlMolecularDataTypeFor .get (mesDataType )));
165+ }
139166 }
140167 }
141168 }
@@ -150,4 +177,51 @@ protected Annotatable getThingInXmlBlock(NexmlWritable xmlBlock, int index) {
150177 return xmlMatrix .getCharacters ().get (index );
151178 }
152179
180+ private UncertainCharacterState findOrCreateUncertainStateSet (CharacterStateSet containingStateSet , Set <String > symbols ) {
181+ for (org .nexml .model .CharacterState state : containingStateSet .getCharacterStates ()) {
182+ if (state instanceof UncertainCharacterState ) {
183+ UncertainCharacterState uncertainState = (UncertainCharacterState )state ;
184+ if (containsMatchingStates (uncertainState , symbols )) {
185+ return uncertainState ;
186+ }
187+ }
188+ }
189+ Set <org .nexml .model .CharacterState > memberStates = collectMatchingStates (containingStateSet , symbols );
190+ return containingStateSet .createUncertainCharacterState (this .nextMultipleStateSymbol ++, memberStates );
191+ }
192+
193+ private PolymorphicCharacterState findOrCreatePolymorphicStateSet (CharacterStateSet containingStateSet , Set <String > symbols ) {
194+ for (org .nexml .model .CharacterState state : containingStateSet .getCharacterStates ()) {
195+ if (state instanceof PolymorphicCharacterState ) {
196+ PolymorphicCharacterState polymorphicState = (PolymorphicCharacterState )state ;
197+ if (containsMatchingStates (polymorphicState , symbols )) {
198+ return polymorphicState ;
199+ }
200+ }
201+ }
202+ Set <org .nexml .model .CharacterState > memberStates = collectMatchingStates (containingStateSet , symbols );
203+ return containingStateSet .createPolymorphicCharacterState (this .nextMultipleStateSymbol ++, memberStates );
204+ }
205+
206+ private boolean containsMatchingStates (CompoundCharacterState state , Set <String > symbols ) {
207+ Set <String > containedSymbols = new HashSet <String >();
208+ for (org .nexml .model .CharacterState containedState : state .getStates ()) {
209+ containedSymbols .add (containedState .getSymbol ().toString ());
210+ }
211+ return containedSymbols .equals (symbols );
212+ }
213+
214+ private Set <org .nexml .model .CharacterState > collectMatchingStates (CharacterStateSet containingStateSet , Set <String > symbols ) {
215+ Set <org .nexml .model .CharacterState > memberStates = new HashSet <org .nexml .model .CharacterState >();
216+ for (String symbol : symbols ) {
217+ org .nexml .model .CharacterState member = containingStateSet .lookupCharacterStateBySymbol (symbol );
218+ if ( null != member ) {
219+ memberStates .add (member );
220+ } else {
221+ memberStates .add (containingStateSet .createCharacterState (symbol ));
222+ }
223+ }
224+ return memberStates ;
225+ }
226+
153227}
0 commit comments