Skip to content

Commit 608f1f7

Browse files
committed
Fix unicode characters in strings between Lua -> Java and Java -> Lua
This is such a major hack in my opinion but it works. The problems seems to be due to the fact that Java uses Modified UTF-8 which limits characters to three bytes (instead of four) while Lua uses normal UTF-8 so there's a small mismatch. This change will simply check if the string needs fixing and instead of passing the string directly through Java/JNI it will pass the raw bytes and construct a string from them
1 parent 192daaa commit 608f1f7

File tree

1 file changed

+53
-1
lines changed

1 file changed

+53
-1
lines changed

luajava/src/main/java/party/iroiro/luajava/AbstractLua.java

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import java.lang.reflect.Proxy;
3737
import java.nio.Buffer;
3838
import java.nio.ByteBuffer;
39+
import java.nio.charset.StandardCharsets;
3940
import java.util.*;
4041
import java.util.concurrent.ConcurrentHashMap;
4142

@@ -200,6 +201,32 @@ public void push(long integer) {
200201
@Override
201202
public void push(@NotNull String string) {
202203
checkStack(1);
204+
205+
if (needsUTF8Fix(string)) {
206+
try {
207+
// Get UTF-8 bytes
208+
byte[] utf8Bytes = string.getBytes(StandardCharsets.UTF_8);
209+
210+
// Build string using Lua's string.char
211+
getGlobal("string");
212+
getField(-1, "char");
213+
remove(-2); // Remove string table, keep char function
214+
215+
// Push all bytes as integers
216+
for (byte b : utf8Bytes) {
217+
push((long)(b & 0xFF)); // Convert to unsigned
218+
}
219+
220+
// Call string.char(byte1, byte2, byte3, ...)
221+
pCall(utf8Bytes.length, 1);
222+
return;
223+
224+
} catch (Exception e) {
225+
// Fall back to native method
226+
}
227+
}
228+
229+
// Fast path for ASCII
203230
C.luaJ_pushstring(L, string);
204231
}
205232

@@ -375,9 +402,34 @@ public boolean toBoolean(int index) {
375402
return null;
376403
}
377404

405+
private boolean needsUTF8Fix(String str) {
406+
// Quick check: if any character is > 0x7F, might need UTF-8 fix
407+
for (int i = 0; i < str.length(); i++) {
408+
if (str.charAt(i) > 0x7F) {
409+
return true;
410+
}
411+
}
412+
return false;
413+
}
414+
378415
@Override
379416
public @Nullable String toString(int index) {
380-
return C.lua_tostring(L, index);
417+
String result = C.lua_tostring(L, index);
418+
419+
// Only do UTF-8 fix if string contains high Unicode characters
420+
if (result != null && needsUTF8Fix(result)) {
421+
ByteBuffer buffer = toBuffer(index);
422+
if (buffer != null) {
423+
try {
424+
byte[] bytes = new byte[buffer.remaining()];
425+
buffer.get(bytes);
426+
return new String(bytes, StandardCharsets.UTF_8);
427+
} catch (Exception e) {
428+
// Fall back to original result
429+
}
430+
}
431+
}
432+
return result;
381433
}
382434

383435
@Override

0 commit comments

Comments
 (0)