Skip to content

Commit 8b19655

Browse files
committed
Added support for separate state sets for each character. Not yet handling polymorphism/uncertaintly correctly.
1 parent b3050b5 commit 8b19655

6 files changed

Lines changed: 135 additions & 112 deletions

File tree

src/mesquite/nexml/InterpretNEXML/NexmlWriters/NexmlCharactersBlockWriter.java

Lines changed: 31 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,14 @@
3131
import org.nexml.model.OTUs;
3232

3333
public class NexmlCharactersBlockWriter extends NexmlBlockWriter {
34-
34+
3535
@SuppressWarnings("serial")
3636
private static final Map<String , String> xmlMolecularDataTypeFor = new HashMap<String, String>() {{
3737
put(DNAData.DATATYPENAME, MolecularMatrix.DNA);
3838
put(RNAData.DATATYPENAME, MolecularMatrix.RNA);
3939
put(ProteinData.DATATYPENAME, MolecularMatrix.Protein);
4040
}};
41-
41+
4242
/**
4343
*
4444
* @param employerEmployee
@@ -57,60 +57,79 @@ protected Annotatable writeBlock(Document xmlProject, FileElement mesBlock) {
5757
Taxa mesTaxa = mesData.getTaxa();
5858
OTUs xmlTaxa = findEquivalentTaxa(mesTaxa,xmlProject);
5959
org.nexml.model.Matrix<?> xmlMatrix = null;
60-
CharacterStateSet xmlCharacterStateSet = null;
6160
String mesDataType = mesData.getDataTypeName();
6261
if ( xmlMolecularDataTypeFor.containsKey(mesDataType) ) {
6362
xmlMatrix = xmlProject.createMolecularMatrix(xmlTaxa,xmlMolecularDataTypeFor.get(mesDataType));
64-
xmlCharacterStateSet = ((MolecularMatrix)xmlMatrix).getCharacterStateSet();
6563
}
6664
else if ( mesDataType.equalsIgnoreCase(CategoricalData.DATATYPENAME) ) {
6765
xmlMatrix = xmlProject.createCategoricalMatrix(xmlTaxa);
68-
xmlCharacterStateSet = ((CategoricalMatrix)xmlMatrix).createCharacterStateSet();
6966
}
7067
else if ( mesDataType.equalsIgnoreCase(ContinuousData.DATATYPENAME) ) {
7168
xmlMatrix = xmlProject.createContinuousMatrix(xmlTaxa);
7269
}
7370
else {
7471
MesquiteMessage.warnProgrammer("Can't write data type "+mesDataType);
75-
}
76-
writeCharacterStates(mesData, xmlMatrix, xmlCharacterStateSet);
72+
}
73+
writeCharacterStates(mesData, xmlMatrix);
7774
return xmlMatrix;
7875
}
79-
76+
8077
/**
8178
*
8279
* @param mesData
8380
* @param xmlMatrix
8481
* @param xmlCharacterStateSet
8582
*/
8683
@SuppressWarnings("unchecked")
87-
private void writeCharacterStates(CharacterData mesData, org.nexml.model.Matrix<?> xmlMatrix, CharacterStateSet xmlCharacterStateSet) {
84+
private void writeCharacterStates(CharacterData mesData, org.nexml.model.Matrix<?> xmlMatrix) {
8885
String mesDataType = mesData.getDataTypeName();
86+
8987
int mesNchar = mesData.getNumChars();
9088
List<Character> xmlCharacters = new ArrayList<Character>(mesNchar);
9189
for ( int j = 0; j < mesNchar; j++ ) {
90+
CharacterStateSet xmlCharacterStateSet = null;
91+
if ( xmlMolecularDataTypeFor.containsKey(mesDataType) ) {
92+
xmlCharacterStateSet = ((MolecularMatrix)xmlMatrix).getCharacterStateSet();
93+
}
94+
else if ( mesDataType.equalsIgnoreCase(CategoricalData.DATATYPENAME) ) {
95+
xmlCharacterStateSet = ((CategoricalMatrix)xmlMatrix).createCharacterStateSet();
96+
}
9297
Character xmlChar = xmlMatrix.createCharacter(xmlCharacterStateSet);
9398
String mesCharacterName = mesData.getCharacterName(j);
9499
if ( null != mesCharacterName && ! mesCharacterName.equals("") ) {
95100
xmlChar.setLabel(mesCharacterName);
96101
}
102+
if ( mesDataType.equalsIgnoreCase(CategoricalData.DATATYPENAME) ) {
103+
CategoricalData data = ((CategoricalData)mesData);
104+
int maxStateIndex = data.maxStateWithName(j);
105+
for (int stateIndex = 0; stateIndex <= maxStateIndex; stateIndex++) {
106+
String symbol = String.valueOf(data.getSymbol(stateIndex));
107+
org.nexml.model.CharacterState state = xmlChar.getCharacterStateSet().createCharacterState(symbol);
108+
state.setSymbol(symbol);
109+
if (data.hasStateName(j, stateIndex)) {
110+
String stateLabel = data.getStateName(j, stateIndex);
111+
state.setLabel(stateLabel);
112+
}
113+
}
114+
}
97115
xmlCharacters.add(xmlChar);
98116
}
99117
for ( int j = 0; j < mesData.getNumTaxa(); j++ ) {
100118
CharacterState[] mesChars = mesData.getCharacterStateArray(j, 0, mesNchar);
119+
String unassignedSymbol = String.valueOf(mesData.getUnassignedSymbol());
101120
Taxon mesTaxon = mesData.getTaxa().getTaxon(j);
102121
OTU xmlTaxon = findEquivalentTaxon(mesTaxon,xmlMatrix.getOTUs());
103122
for ( int k = 0; k < mesNchar; k++ ) {
104123
Character xmlChar = xmlCharacters.get(k);
105124
String mesCharString = mesChars[k].toDisplayString();
106-
if ( mesCharString != null && !mesCharString.equals("-") ) {
125+
if ( mesCharString != null && !mesCharString.equals("-") && !mesCharString.equals(unassignedSymbol)) {
107126
if ( mesDataType.equalsIgnoreCase(ContinuousData.DATATYPENAME) ) {
108127
MatrixCell<Double> xmlCell = (MatrixCell<Double>) xmlMatrix.getCell(xmlTaxon,xmlChar);
109-
xmlCell.setValue((Double)xmlMatrix.parseSymbol(mesCharString));
128+
xmlCell.setValue((Double)xmlMatrix.parseSymbol(mesCharString, xmlChar));
110129
}
111130
else if ( mesDataType.equalsIgnoreCase(CategoricalData.DATATYPENAME) ) {
112131
MatrixCell<org.nexml.model.CharacterState> xmlCell = (MatrixCell<org.nexml.model.CharacterState>) xmlMatrix.getCell(xmlTaxon,xmlChar);
113-
xmlCell.setValue((org.nexml.model.CharacterState)xmlMatrix.parseSymbol(mesCharString));
132+
xmlCell.setValue((org.nexml.model.CharacterState)xmlMatrix.parseSymbol(mesCharString, xmlChar));
114133
}
115134
else if ( xmlMolecularDataTypeFor.containsKey(mesDataType) ) {
116135
MatrixCell<org.nexml.model.CharacterState> xmlCell = (MatrixCell<org.nexml.model.CharacterState>) xmlMatrix.getCell(xmlTaxon,xmlChar);

src/org/nexml/model/Matrix.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ public interface Matrix<T> extends OTUsLinkable, Annotatable, Segmented<Characte
6969
* @param symbol
7070
* @return
7171
*/
72-
T parseSymbol(String symbol);
72+
T parseSymbol(String symbol, Character character);
7373

7474
/**
7575
* Creates a row element for OTU otu, and populates

src/org/nexml/model/impl/CategoricalMatrixImpl.java

Lines changed: 93 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -14,53 +14,53 @@
1414
import org.w3c.dom.Element;
1515

1616
class CategoricalMatrixImpl extends
17-
MatrixImpl<CharacterState> implements CategoricalMatrix {
18-
17+
MatrixImpl<CharacterState> implements CategoricalMatrix {
18+
1919
private Set<CharacterStateSet> mCharacterStateSets = new HashSet<CharacterStateSet>();
2020
private MolecularCharacterStateSetImpl mMolecularCharacterStates = null;
21-
22-
/**
23-
* Protected constructors that take a DOM document object but not
24-
* an element object are used for generating new element nodes in
25-
* a NeXML document. On calling such constructors, a new element
26-
* is created, which can be retrieved using getElement(). After this
27-
* step, the Impl class that called this constructor would still
28-
* need to attach the element in the proper location (typically
29-
* as a child element of the class that called the constructor).
30-
* @param document a DOM document object
31-
* @author rvosa
32-
*/
21+
22+
/**
23+
* Protected constructors that take a DOM document object but not
24+
* an element object are used for generating new element nodes in
25+
* a NeXML document. On calling such constructors, a new element
26+
* is created, which can be retrieved using getElement(). After this
27+
* step, the Impl class that called this constructor would still
28+
* need to attach the element in the proper location (typically
29+
* as a child element of the class that called the constructor).
30+
* @param document a DOM document object
31+
* @author rvosa
32+
*/
3333
protected CategoricalMatrixImpl(Document document) {
3434
super(document,"Standard");
3535
}
36-
37-
/**
38-
* Protected constructors are intended for recursive parsing, i.e.
39-
* starting from the root element (which maps onto DocumentImpl) we
40-
* traverse the element tree such that for every child element that maps
41-
* onto an Impl class the containing class calls that child's protected
42-
* constructor, passes in the element of the child. From there the
43-
* child takes over, populates itself and calls the protected
44-
* constructors of its children. These should probably be protected
45-
* because there is all sorts of opportunity for outsiders to call
46-
* these in the wrong context, passing in the wrong elements etc.
47-
* @param document the containing DOM document object. Every Impl
48-
* class needs a reference to this so that it can create DOM element
49-
* objects
50-
* @param element the equivalent NeXML element (e.g. for OTUsImpl, it's
51-
* the <otus/> element)
52-
* @author rvosa
53-
*/
36+
37+
/**
38+
* Protected constructors are intended for recursive parsing, i.e.
39+
* starting from the root element (which maps onto DocumentImpl) we
40+
* traverse the element tree such that for every child element that maps
41+
* onto an Impl class the containing class calls that child's protected
42+
* constructor, passes in the element of the child. From there the
43+
* child takes over, populates itself and calls the protected
44+
* constructors of its children. These should probably be protected
45+
* because there is all sorts of opportunity for outsiders to call
46+
* these in the wrong context, passing in the wrong elements etc.
47+
* @param document the containing DOM document object. Every Impl
48+
* class needs a reference to this so that it can create DOM element
49+
* objects
50+
* @param element the equivalent NeXML element (e.g. for OTUsImpl, it's
51+
* the <otus/> element)
52+
* @author rvosa
53+
*/
5454
protected CategoricalMatrixImpl(Document document, Element element, OTUsImpl otus) {
5555
super(document, element);
5656
for ( Element stateSetElement : getChildrenByTagName( getFormatElement(), CharacterStateSetImpl.getTagNameClass() ) ) {
5757
createCharacterStateSet(stateSetElement);
5858
}
59-
59+
6060
for ( Element characterElement : getChildrenByTagName( getFormatElement(), CharacterImpl.getTagNameClass() ) ) {
6161
createCharacter(characterElement);
6262
}
63-
63+
6464
for ( Element row : getChildrenByTagName( getMatrixElement(), "row") ) {
6565
OTU otu = otus.getThingById(row.getAttribute("otu"));
6666
MatrixRowImpl<CharacterState> matrixRow = new MatrixRowImpl<CharacterState>(getDocument(),row, this, false);
@@ -87,7 +87,7 @@ protected CharacterStateSet createCharacterStateSet(Element statesElement) {
8787
mCharacterStateSets.add(charStateSet); // XXX Make this into a setter?
8888
return charStateSet;
8989
}
90-
90+
9191
/**
9292
* This is equivalent to creating a <states> element, i.e.
9393
* a container for state elements, polymorphic_state_set elements
@@ -96,6 +96,7 @@ protected CharacterStateSet createCharacterStateSet(Element statesElement) {
9696
* If the format element object doesn't exist yet it's created here
9797
* @author rvosa
9898
*/
99+
@Override
99100
public CharacterStateSet createCharacterStateSet() {
100101
CharacterStateSetImpl characterStateSet = new CharacterStateSetImpl(getDocument());
101102
List<Element> currentCharElements = getChildrenByTagName(getFormatElement(), "char");
@@ -119,10 +120,11 @@ else if ( ! currentSetElements.isEmpty() ) {
119120
* (non-Javadoc)
120121
* @see org.nexml.model.CategoricalMatrix#getCharacterStateSets()
121122
*/
123+
@Override
122124
public Set<CharacterStateSet> getCharacterStateSets() {
123125
return Collections.unmodifiableSet(mCharacterStateSets);
124126
}
125-
127+
126128
/**
127129
* This method creates a char element, i.e. a column definition.
128130
* Because NeXML requires for categorical matrices that these
@@ -131,14 +133,15 @@ public Set<CharacterStateSet> getCharacterStateSets() {
131133
* in here, from which the attribute's value is set.
132134
* @author rvosa
133135
*/
136+
@Override
134137
public Character createCharacter(CharacterStateSet characterStateSet) {
135138
CharacterImpl character = new CharacterImpl(getDocument());
136139
addThing(character);
137140
character.setCharacterStateSet(characterStateSet);
138141
attachFundamentalDataElement(getFormatElement(), character.getElement());
139142
return character;
140143
}
141-
144+
142145
protected Character createCharacter(Element element) {
143146
CharacterImpl character = new CharacterImpl(getDocument(),element);
144147
addThing(character);
@@ -147,7 +150,7 @@ protected Character createCharacter(Element element) {
147150
character.setCharacterStateSet(stateSet);
148151
return character;
149152
}
150-
153+
151154
protected CharacterStateSet lookupCharacterStateSetById(String stateSetId) {
152155
if ( null == stateSetId ) {
153156
return null;
@@ -161,68 +164,67 @@ protected CharacterStateSet lookupCharacterStateSetById(String stateSetId) {
161164
}
162165

163166
public CharacterStateSet getDNACharacterStateSet() {
164-
if (mMolecularCharacterStates == null){
165-
mMolecularCharacterStates = new MolecularCharacterStateSetImpl(getDocument());
166-
}
167-
CharacterStateSet result = mMolecularCharacterStates.getDNAStateSet();
168-
CharacterStateSetImpl characterStateSet = (CharacterStateSetImpl)result;
169-
if (mCharacterStateSets.add(characterStateSet)){
170-
if ( null == getFormatElement() ) {
171-
setFormatElement( getDocument().createElementNS(DEFAULT_NAMESPACE,"format") );
172-
getElement().insertBefore( getFormatElement(), getElement().getFirstChild() );
173-
}
174-
getFormatElement().insertBefore( characterStateSet.getElement(), getFormatElement().getFirstChild() );
175-
}
176-
return result;
167+
if (mMolecularCharacterStates == null){
168+
mMolecularCharacterStates = new MolecularCharacterStateSetImpl(getDocument());
169+
}
170+
CharacterStateSet result = mMolecularCharacterStates.getDNAStateSet();
171+
CharacterStateSetImpl characterStateSet = (CharacterStateSetImpl)result;
172+
if (mCharacterStateSets.add(characterStateSet)){
173+
if ( null == getFormatElement() ) {
174+
setFormatElement( getDocument().createElementNS(DEFAULT_NAMESPACE,"format") );
175+
getElement().insertBefore( getFormatElement(), getElement().getFirstChild() );
176+
}
177+
getFormatElement().insertBefore( characterStateSet.getElement(), getFormatElement().getFirstChild() );
178+
}
179+
return result;
177180
}
178181

179182
public CharacterStateSet getRNACharacterStateSet() {
180-
if (mMolecularCharacterStates == null){
181-
mMolecularCharacterStates = new MolecularCharacterStateSetImpl(getDocument());
182-
}
183-
CharacterStateSet result = mMolecularCharacterStates.getRNAStateSet();
184-
CharacterStateSetImpl characterStateSet = (CharacterStateSetImpl)result;
185-
if (mCharacterStateSets.add(characterStateSet)){
186-
if ( null == getFormatElement() ) {
187-
setFormatElement( getDocument().createElementNS(DEFAULT_NAMESPACE,"format") );
188-
getElement().insertBefore( getFormatElement(), getElement().getFirstChild() );
189-
}
190-
getFormatElement().insertBefore( characterStateSet.getElement(), getFormatElement().getFirstChild() );
191-
}
192-
return result;
193-
}
194-
195-
public CharacterStateSet getProteinCharacterStateSet(){
196-
if (mMolecularCharacterStates == null){
197-
mMolecularCharacterStates = new MolecularCharacterStateSetImpl(getDocument());
198-
}
199-
CharacterStateSet result = mMolecularCharacterStates.getProteinStateSet();
200-
CharacterStateSetImpl characterStateSet = (CharacterStateSetImpl)result;
201-
if (mCharacterStateSets.add(characterStateSet)){
202-
if ( null == getFormatElement() ) {
203-
setFormatElement( getDocument().createElementNS(DEFAULT_NAMESPACE,"format") );
204-
getElement().insertBefore( getFormatElement(), getElement().getFirstChild() );
205-
}
206-
getFormatElement().insertBefore( characterStateSet.getElement(), getFormatElement().getFirstChild() );
207-
}
208-
return result;
209-
}
210-
211-
public CharacterState parseSymbol(String symbol) {
212-
CharacterStateSet lastSet = null;
213-
for ( CharacterStateSet stateSet : getCharacterStateSets() ) {
214-
CharacterState state = stateSet.lookupCharacterStateBySymbol(symbol);
215-
if ( null != state ) {
216-
return state;
183+
if (mMolecularCharacterStates == null){
184+
mMolecularCharacterStates = new MolecularCharacterStateSetImpl(getDocument());
185+
}
186+
CharacterStateSet result = mMolecularCharacterStates.getRNAStateSet();
187+
CharacterStateSetImpl characterStateSet = (CharacterStateSetImpl)result;
188+
if (mCharacterStateSets.add(characterStateSet)){
189+
if ( null == getFormatElement() ) {
190+
setFormatElement( getDocument().createElementNS(DEFAULT_NAMESPACE,"format") );
191+
getElement().insertBefore( getFormatElement(), getElement().getFirstChild() );
217192
}
218-
lastSet = stateSet;
193+
getFormatElement().insertBefore( characterStateSet.getElement(), getFormatElement().getFirstChild() );
194+
}
195+
return result;
196+
}
197+
198+
public CharacterStateSet getProteinCharacterStateSet(){
199+
if (mMolecularCharacterStates == null){
200+
mMolecularCharacterStates = new MolecularCharacterStateSetImpl(getDocument());
201+
}
202+
CharacterStateSet result = mMolecularCharacterStates.getProteinStateSet();
203+
CharacterStateSetImpl characterStateSet = (CharacterStateSetImpl)result;
204+
if (mCharacterStateSets.add(characterStateSet)){
205+
if ( null == getFormatElement() ) {
206+
setFormatElement( getDocument().createElementNS(DEFAULT_NAMESPACE,"format") );
207+
getElement().insertBefore( getFormatElement(), getElement().getFirstChild() );
208+
}
209+
getFormatElement().insertBefore( characterStateSet.getElement(), getFormatElement().getFirstChild() );
210+
}
211+
return result;
212+
}
213+
214+
@Override
215+
public CharacterState parseSymbol(String symbol, Character character) {
216+
CharacterStateSet stateSet = character.getCharacterStateSet();
217+
CharacterState state = stateSet.lookupCharacterStateBySymbol(symbol);
218+
if ( null != state ) {
219+
return state;
220+
} else {
221+
return stateSet.createCharacterState(symbol);
219222
}
220-
return lastSet.createCharacterState(symbol);
221223
}
222224

223225
@Override
224226
String getSplitString() {
225227
return "\\s+";
226228
}
227229

228-
}
230+
}

src/org/nexml/model/impl/ContinuousMatrixImpl.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,8 @@ public Character createCharacter(CharacterStateSet stateSet) {
9999
return createCharacter();
100100
}
101101

102-
public Double parseSymbol(String symbol) {
102+
@Override
103+
public Double parseSymbol(String symbol, Character character) {
103104
return Double.parseDouble(symbol);
104105
}
105106

0 commit comments

Comments
 (0)