Skip to content

Commit a1b6e34

Browse files
jcschaffclaude
andcommitted
Test SBML import preserves UTF-8 non-ASCII chars in names
Imports a minimal SBML L2V4 document containing U+2013 EN DASH and U+03BC GREEK SMALL LETTER MU in reaction and species name attributes, and asserts the resulting BioModel preserves the chars byte-for-byte in getSbmlName(). Tagged Fast. Exercises both the File path (readSbmlDocument(File), which used to read with Charset.defaultCharset()) and the InputStream path. On a UTF-8-default JVM both paths look equivalent, but the test documents expected behavior and catches regressions if either path is changed back to platform-default decoding. A complementary CI job that forks a Cp1252-default JVM is the follow-up that demonstrates the fix matters cross-platform. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 22bd1f0 commit a1b6e34

1 file changed

Lines changed: 110 additions & 0 deletions

File tree

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
package org.vcell.sbml;
2+
3+
import cbit.util.xml.VCLogger;
4+
import cbit.util.xml.VCLoggerException;
5+
import cbit.vcell.biomodel.BioModel;
6+
import cbit.vcell.model.ReactionStep;
7+
import cbit.vcell.model.SpeciesContext;
8+
import org.junit.jupiter.api.Tag;
9+
import org.junit.jupiter.api.Test;
10+
import org.vcell.sbml.vcell.SBMLImporter;
11+
12+
import java.io.File;
13+
import java.nio.charset.StandardCharsets;
14+
import java.nio.file.Files;
15+
import java.nio.file.Path;
16+
17+
import static org.junit.jupiter.api.Assertions.assertEquals;
18+
import static org.junit.jupiter.api.Assertions.assertNotNull;
19+
20+
/**
21+
* Verifies SBML import reads non-ASCII attribute values byte-for-byte from a
22+
* UTF-8 source. The two reaction-name patterns chosen here (en-dash U+2013 and
23+
* Greek mu U+03BC) are common in scientific notation and would mojibake under
24+
* the previous {@code Charset.defaultCharset()} read on a non-UTF-8 JVM.
25+
*/
26+
@Tag("Fast")
27+
public class SBMLImportCharsetTest {
28+
29+
private static class CapturingVCLogger extends VCLogger {
30+
@Override public boolean hasMessages() { return false; }
31+
@Override public void sendAllMessages() { }
32+
@Override public void sendMessage(Priority p, ErrorType et, String message) throws VCLoggerException {
33+
if (p == Priority.HighPriority) {
34+
throw new VCLoggerException(p + " " + et + ": " + message);
35+
}
36+
}
37+
}
38+
39+
private static final String SBML_WITH_NON_ASCII =
40+
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
41+
"<sbml xmlns=\"http://www.sbml.org/sbml/level2/version4\" level=\"2\" version=\"4\">\n" +
42+
" <model id=\"charsetTestModel\">\n" +
43+
" <listOfCompartments>\n" +
44+
" <compartment id=\"c1\" size=\"1.0\"/>\n" +
45+
" </listOfCompartments>\n" +
46+
" <listOfSpecies>\n" +
47+
" <species id=\"s1\" name=\"μ-prot\" compartment=\"c1\" initialConcentration=\"1.0\"/>\n" +
48+
" </listOfSpecies>\n" +
49+
" <listOfReactions>\n" +
50+
" <reaction id=\"r1\" name=\"k_14–3–3\">\n" +
51+
" <listOfProducts>\n" +
52+
" <speciesReference species=\"s1\"/>\n" +
53+
" </listOfProducts>\n" +
54+
" <kineticLaw>\n" +
55+
" <math xmlns=\"http://www.w3.org/1998/Math/MathML\">\n" +
56+
" <cn>1.0</cn>\n" +
57+
" </math>\n" +
58+
" </kineticLaw>\n" +
59+
" </reaction>\n" +
60+
" </listOfReactions>\n" +
61+
" </model>\n" +
62+
"</sbml>\n";
63+
64+
@Test
65+
public void importsUtf8ReactionAndSpeciesNames() throws Exception {
66+
Path tmp = Files.createTempFile("vcell-charset-test-", ".xml");
67+
try {
68+
Files.write(tmp, SBML_WITH_NON_ASCII.getBytes(StandardCharsets.UTF_8));
69+
70+
SBMLImporter importer = new SBMLImporter(tmp.toAbsolutePath().toString(), new CapturingVCLogger(), false);
71+
BioModel bioModel = importer.getBioModel();
72+
assertNotNull(bioModel);
73+
74+
ReactionStep r1 = null;
75+
for (ReactionStep rs : bioModel.getModel().getReactionSteps()) {
76+
if ("r1".equals(rs.getName())) { r1 = rs; break; }
77+
}
78+
assertNotNull(r1, "expected reaction with id 'r1' in imported model");
79+
assertEquals("k_14–3–3", r1.getSbmlName(),
80+
"reaction sbmlName must preserve U+2013 EN DASH characters");
81+
82+
SpeciesContext s1 = null;
83+
for (SpeciesContext sc : bioModel.getModel().getSpeciesContexts()) {
84+
if ("s1".equals(sc.getName())) { s1 = sc; break; }
85+
}
86+
assertNotNull(s1, "expected species with id 's1' in imported model");
87+
assertEquals("μ-prot", s1.getSbmlName(),
88+
"species sbmlName must preserve U+03BC GREEK SMALL LETTER MU");
89+
} finally {
90+
Files.deleteIfExists(tmp);
91+
}
92+
}
93+
94+
@Test
95+
public void inputStreamPathPreservesUtf8() throws Exception {
96+
try (java.io.ByteArrayInputStream in =
97+
new java.io.ByteArrayInputStream(SBML_WITH_NON_ASCII.getBytes(StandardCharsets.UTF_8))) {
98+
SBMLImporter importer = new SBMLImporter(in, new CapturingVCLogger(), false);
99+
BioModel bioModel = importer.getBioModel();
100+
assertNotNull(bioModel);
101+
102+
ReactionStep r1 = null;
103+
for (ReactionStep rs : bioModel.getModel().getReactionSteps()) {
104+
if ("r1".equals(rs.getName())) { r1 = rs; break; }
105+
}
106+
assertNotNull(r1);
107+
assertEquals("k_14–3–3", r1.getSbmlName());
108+
}
109+
}
110+
}

0 commit comments

Comments
 (0)