Skip to content

Commit e41d17e

Browse files
authored
Merge pull request #4023 from headius/byte_ary_identifiers
2 parents 5a04c18 + e182867 commit e41d17e

7 files changed

Lines changed: 70 additions & 63 deletions

File tree

.github/workflows/java-wasm-bindings.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ on:
1515
pull_request:
1616

1717
jobs:
18-
build:
18+
build-wasm:
1919
runs-on: ubuntu-latest
2020
steps:
2121
- uses: actions/checkout@v6

.github/workflows/main.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,18 @@ jobs:
188188
- name: Run Java Loader test
189189
run: PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS=1 JRUBY_OPTS="-J-ea" bundle exec rake test:java_loader
190190

191+
build-java-truffleruby:
192+
runs-on: ubuntu-latest
193+
steps:
194+
- uses: actions/checkout@v6
195+
- name: Set up JRuby
196+
uses: ruby/setup-ruby@v1
197+
with:
198+
ruby-version: jruby
199+
bundler-cache: true
200+
- name: Run Java Loader test
201+
run: PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS=1 PRISM_JAVA_BACKEND=truffleruby bundle exec rake compile
202+
191203
lex-ruby:
192204
runs-on: ubuntu-latest
193205
steps:

java-wasm/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ This dir contains the chicory-prism artifact, a version of prism compiled to WAS
55
Generate the templated sources:
66

77
```
8-
PRISM_EXCLUDE_PRETTYPRINT=1 PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS=1 PRISM_JAVA_BACKEND=jruby bundle exec rake templates
8+
PRISM_EXCLUDE_PRETTYPRINT=1 PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS=1 bundle exec rake templates
99
```
1010

1111
Compile to WASM using WASI SDK version 25:

java-wasm/src/test/java/org/jruby/parser/prism/WASMTest.java

Lines changed: 5 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import org.ruby_lang.prism.ParsingOptions;
66
import org.ruby_lang.prism.wasm.Prism;
77

8+
import java.nio.charset.StandardCharsets;
89
import java.util.EnumSet;
910

1011
import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -41,22 +42,6 @@ public void test1() {
4142
assertTrue(pr.value.childNodes()[0].toString().contains("IntegerNode"));
4243
}
4344

44-
@Test
45-
public void test1Aot() {
46-
// The Ruby source code to be processed
47-
var source = "1 + 1";
48-
49-
ParseResult pr = null;
50-
try (Prism prism = new Prism()) {
51-
pr = prism.serializeParse(packedOptions, source);
52-
}
53-
54-
assertEquals(1, pr.value.childNodes().length);
55-
System.out.println("Nodes:");
56-
System.out.println(pr.value.childNodes()[0]);
57-
assertTrue(pr.value.childNodes()[0].toString().contains("IntegerNode"));
58-
}
59-
6045
@Test
6146
public void test2() {
6247
// The Ruby source code to be processed
@@ -74,19 +59,19 @@ public void test2() {
7459
}
7560

7661
@Test
77-
public void test2Aot() {
62+
public void testMBCIdentifier() {
7863
// The Ruby source code to be processed
79-
var source = "puts \"h\ne\nl\nl\no\n\"";
64+
var source = new String("hellø = \"hello\"".getBytes(StandardCharsets.UTF_8), StandardCharsets.ISO_8859_1);
8065

8166
ParseResult pr = null;
8267
try (Prism prism = new Prism()) {
8368
pr = prism.serializeParse(packedOptions, source);
8469
}
8570

86-
assertEquals(1, pr.value.childNodes().length);
8771
System.out.println("Nodes:");
72+
System.out.println(pr);
8873
System.out.println(pr.value.childNodes()[0]);
89-
assertTrue(pr.value.childNodes()[0].toString().contains("CallNode"));
74+
assertTrue(pr.value.childNodes()[0].toString().contains("hell\\xc3\\xb8"));
9075
}
9176

9277
@Test

templates/java/org/ruby_lang/prism/Loader.java.erb

Lines changed: 23 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<%- string_type = Prism::Template::JAVA_STRING_TYPE -%>
1+
<%- id_type = Prism::Template::JAVA_IDENTIFIER_TYPE -%>
22
package org.ruby_lang.prism;
33

44
import java.lang.Short;
@@ -19,37 +19,33 @@ public class Loader {
1919

2020
// Overridable methods
2121

22-
public Charset getEncodingCharset(String encodingName) {
23-
encodingName = encodingName.toLowerCase(Locale.ROOT);
24-
if (encodingName.equals("ascii-8bit")) {
25-
return StandardCharsets.US_ASCII;
26-
}
27-
return Charset.forName(encodingName);
28-
}
29-
30-
public <%= string_type %> bytesToName(byte[] bytes) {
31-
<%- if string_type == "String" -%>
32-
return new String(bytes, encodingCharset).intern();
22+
public <%= id_type %> bytesToName(byte[] bytes) {
23+
<%- if id_type == "byte[]" -%>
24+
return bytes;
3325
<%- else -%>
34-
return null; // Must be implemented by subclassing Loader
26+
throw new AbstractMethodError("Loader.bytesToName(<%= id_type %>) is not implemented");
3527
<%- end -%>
3628
}
3729

3830
private static final class ConstantPool {
3931

4032
private final Loader loader;
4133
private final int bufferOffset;
42-
private final <%= string_type %>[] cache;
34+
private final <%= id_type %>[] cache;
4335

4436
ConstantPool(Loader loader, int bufferOffset, int length) {
4537
this.loader = loader;
4638
this.bufferOffset = bufferOffset;
47-
cache = new <%= string_type %>[length];
39+
<%- if id_type == "String" -%>
40+
cache = new <%= id_type %>[length];
41+
<%- else -%>
42+
cache = new byte[length][];
43+
<%- end -%>
4844
}
4945

50-
<%= string_type %> get(ByteBuffer buffer, int oneBasedIndex) {
46+
<%= id_type %> get(ByteBuffer buffer, int oneBasedIndex) {
5147
int index = oneBasedIndex - 1;
52-
<%= string_type %> constant = cache[index];
48+
<%= id_type %> constant = cache[index];
5349

5450
if (constant == null) {
5551
int offset = bufferOffset + index * 8;
@@ -70,9 +66,6 @@ public class Loader {
7066

7167
private final ByteBuffer buffer;
7268
protected String encodingName;
73-
<%- if string_type == "String" -%>
74-
private Charset encodingCharset;
75-
<%- end -%>
7669
private ConstantPool constantPool;
7770
private Nodes.Source source = null;
7871

@@ -100,9 +93,6 @@ public class Loader {
10093
byte[] encodingNameBytes = new byte[encodingLength];
10194
buffer.get(encodingNameBytes);
10295
this.encodingName = new String(encodingNameBytes, StandardCharsets.US_ASCII);
103-
<%- if string_type == "String" -%>
104-
this.encodingCharset = getEncodingCharset(this.encodingName);
105-
<%- end -%>
10696

10797
source.setStartLine(loadVarSInt());
10898
source.setLineOffsets(loadLineOffsets());
@@ -213,11 +203,11 @@ public class Loader {
213203
}
214204
}
215205

216-
private <%= string_type %> loadConstant() {
206+
private <%= id_type %> loadConstant() {
217207
return constantPool.get(buffer, loadVarUInt());
218208
}
219209

220-
private <%= string_type %> loadOptionalConstant() {
210+
private <%= id_type %> loadOptionalConstant() {
221211
if (buffer.get(buffer.position()) != 0) {
222212
return loadConstant();
223213
} else {
@@ -226,12 +216,16 @@ public class Loader {
226216
}
227217
}
228218

229-
private <%= string_type %>[] loadConstants() {
219+
private <%= id_type %>[] loadConstants() {
230220
int length = loadVarUInt();
231221
if (length == 0) {
232-
return Nodes.EMPTY_STRING_ARRAY;
222+
return Nodes.EMPTY_IDENTIFIER_ARRAY;
233223
}
234-
<%= string_type %>[] constants = new <%= string_type %>[length];
224+
<%- if id_type == "String" -%>
225+
<%= id_type %>[] constants = new <%= id_type %>[length];
226+
<%- else -%>
227+
<%= id_type %>[] constants = new byte[length][];
228+
<%- end -%>
235229
for (int i = 0; i < length; i++) {
236230
constants[i] = constantPool.get(buffer, loadVarUInt());
237231
}
@@ -395,7 +389,7 @@ public class Loader {
395389
int bufferPosition = buffer.position();
396390
int serializedLength = buffer.getInt();
397391
// Load everything except the body and locals, because the name, receiver, parameters are still needed for lazily defining the method
398-
Nodes.DefNode lazyDefNode = new Nodes.DefNode(<%= base_params.join(", ") -%>, -bufferPosition, this, loadConstant(), loadOptionalNode(), (Nodes.ParametersNode) loadOptionalNode(), null, Nodes.EMPTY_STRING_ARRAY);
392+
Nodes.DefNode lazyDefNode = new Nodes.DefNode(<%= base_params.join(", ") -%>, -bufferPosition, this, loadConstant(), loadOptionalNode(), (Nodes.ParametersNode) loadOptionalNode(), null, Nodes.EMPTY_IDENTIFIER_ARRAY);
399393
buffer.position(bufferPosition + serializedLength); // skip past the serialized DefNode
400394
return lazyDefNode;
401395
}

templates/java/org/ruby_lang/prism/Nodes.java.erb

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<%- string_type = Prism::Template::JAVA_STRING_TYPE -%>
1+
<%- id_type = Prism::Template::JAVA_IDENTIFIER_TYPE -%>
22
package org.ruby_lang.prism;
33

44
import java.lang.Override;
@@ -16,7 +16,7 @@ import java.util.Arrays;
1616
// @formatter:off
1717
public abstract class Nodes {
1818

19-
public static final <%= string_type %>[] EMPTY_STRING_ARRAY = {};
19+
public static final <%= id_type %>[] EMPTY_IDENTIFIER_ARRAY = {};
2020

2121
@Target(ElementType.FIELD)
2222
@Retention(RetentionPolicy.SOURCE)
@@ -139,6 +139,22 @@ public abstract class Nodes {
139139

140140
protected abstract String toString(String indent);
141141
}
142+
143+
protected static String asString(Object value) {
144+
return value.toString();
145+
}
146+
147+
protected static String asString(byte[] value) {
148+
StringBuilder buf = new StringBuilder(value.length);
149+
for (byte b : value) {
150+
if (b >= 0x20 && b <= 0x7e) {
151+
buf.append((char) b);
152+
} else {
153+
buf.append(String.format("\\x%02x", Byte.toUnsignedInt(b)));
154+
}
155+
}
156+
return buf.toString();
157+
}
142158
<%-# FLAGS -%>
143159
<%- flags.each do |flag| -%>
144160
@@ -373,18 +389,18 @@ public abstract class Nodes {
373389
builder.append(nextNextIndent).append(child.toString(nextNextIndent));
374390
}
375391
<%- when Prism::Template::StringField -%>
376-
builder.append('"' + new String(this.<%= field.name %>, StandardCharsets.UTF_8) + '"');
392+
builder.append('"' + asString(this.<%= field.name %>) + '"');
377393
builder.append('\n');
378394
<%- when Prism::Template::ConstantField -%>
379-
builder.append('"').append(this.<%= field.name %>).append('"');
395+
builder.append('"').append(asString(this.<%= field.name %>)).append('"');
380396
builder.append('\n');
381397
<%- when Prism::Template::OptionalConstantField -%>
382-
builder.append(this.<%= field.name %> == null ? "null" : "\"" + this.<%= field.name %> + "\"");
398+
builder.append(this.<%= field.name %> == null ? "null" : "\"" + asString(this.<%= field.name %>) + "\"");
383399
builder.append('\n');
384400
<%- when Prism::Template::ConstantListField -%>
385401
builder.append('\n');
386-
for (<%= string_type %> constant : this.<%= field.name %>) {
387-
builder.append(nextNextIndent).append('"').append(constant).append('"').append('\n');
402+
for (<%= id_type %> constant : this.<%= field.name %>) {
403+
builder.append(nextNextIndent).append('"').append(asString(constant)).append('"').append('\n');
388404
}
389405
<%- when Prism::Template::Flags -%>
390406
builder.append(flags);

templates/template.rb

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ module Template # :nodoc: all
1111
REMOVE_ON_ERROR_TYPES = SERIALIZE_ONLY_SEMANTICS_FIELDS
1212
CHECK_FIELD_KIND = ENV.fetch("CHECK_FIELD_KIND", false)
1313

14-
JAVA_BACKEND = ENV["PRISM_JAVA_BACKEND"] || "truffleruby"
15-
JAVA_STRING_TYPE = JAVA_BACKEND == "jruby" ? "org.jruby.RubySymbol" : "String"
14+
JAVA_BACKEND = ENV["PRISM_JAVA_BACKEND"] || "default"
15+
JAVA_IDENTIFIER_TYPE = JAVA_BACKEND == "truffleruby" ? "String" : "byte[]"
1616
INCLUDE_NODE_ID = !SERIALIZE_ONLY_SEMANTICS_FIELDS || JAVA_BACKEND == "jruby"
1717

1818
COMMON_FLAGS_COUNT = 2
@@ -272,7 +272,7 @@ def call_seq_type
272272
end
273273

274274
def java_type
275-
JAVA_STRING_TYPE
275+
JAVA_IDENTIFIER_TYPE
276276
end
277277
end
278278

@@ -292,7 +292,7 @@ def call_seq_type
292292
end
293293

294294
def java_type
295-
JAVA_STRING_TYPE
295+
JAVA_IDENTIFIER_TYPE
296296
end
297297
end
298298

@@ -312,7 +312,7 @@ def call_seq_type
312312
end
313313

314314
def java_type
315-
"#{JAVA_STRING_TYPE}[]"
315+
"#{JAVA_IDENTIFIER_TYPE}[]"
316316
end
317317
end
318318

0 commit comments

Comments
 (0)