Skip to content

Commit b527b4f

Browse files
committed
fix(marshalling): Expose C strings as references to uint8 in JS
* [Marshalling] Change underlying type of returned references from `int8` to `uint8` * [TestFixtures] Use `stringWithUTF8String` instead of formatting with `%s` (which uses system default encoding) * [TestRunner] Add `utf8` node module to TestRunner project * [TestRunner] Add a test with cyrillic characters which validates that references are indeed unsigned
1 parent cdce9f8 commit b527b4f

5 files changed

Lines changed: 230 additions & 9 deletions

File tree

src/NativeScript/Marshalling/Fundamentals/FFIPrimitiveTypes.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ static JSValue cStringType_read(ExecState* execState, const void* buffer, JSCell
126126
}
127127

128128
GlobalObject* globalObject = jsCast<GlobalObject*>(execState->lexicalGlobalObject());
129-
JSCell* type = globalObject->typeFactory()->int8Type();
129+
JSCell* type = globalObject->typeFactory()->uint8Type();
130130
PointerInstance* pointer = jsCast<PointerInstance*>(globalObject->interop()->pointerInstanceForPointer(execState, const_cast<char*>(string)));
131131
return ReferenceInstance::create(execState->vm(), globalObject, globalObject->interop()->referenceInstanceStructure(), type, pointer).get();
132132
}

tests/TestFixtures/Marshalling/TNSPrimitivePointers.m

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
}
2020

2121
unsigned char* functionWithUCharPtr(unsigned char* x) {
22-
TNSLog([NSString stringWithFormat:@"%s", x]);
22+
TNSLog([NSString stringWithUTF8String:(char*)x]);
2323
return x;
2424
}
2525

@@ -44,7 +44,7 @@
4444
}
4545

4646
char* functionWithCharPtr(char* x) {
47-
TNSLog([NSString stringWithFormat:@"%s", x]);
47+
TNSLog([NSString stringWithUTF8String:x]);
4848

4949
return x;
5050
}
@@ -108,8 +108,8 @@ void functionWithIntConstantArray2(int x[2][2]) {
108108
}
109109

110110
char** functionWithDoubleCharPtr(char** x) {
111-
TNSLog([NSString stringWithFormat:@"%s", x[0]]);
112-
TNSLog([NSString stringWithFormat:@"%s", x[1]]);
111+
TNSLog([NSString stringWithUTF8String:x[0]]);
112+
TNSLog([NSString stringWithUTF8String:x[1]]);
113113

114114
free(x[0]);
115115
x[0] = calloc(4, sizeof(char));
Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
// Copied from https://github.com/mathiasbynens/utf8.js/blob/v3.0.0/utf8.js
2+
3+
/*! https://mths.be/utf8js v3.0.0 by @mathias */
4+
;(function(root) {
5+
6+
var stringFromCharCode = String.fromCharCode;
7+
8+
// Taken from https://mths.be/punycode
9+
function ucs2decode(string) {
10+
var output = [];
11+
var counter = 0;
12+
var length = string.length;
13+
var value;
14+
var extra;
15+
while (counter < length) {
16+
value = string.charCodeAt(counter++);
17+
if (value >= 0xD800 && value <= 0xDBFF && counter < length) {
18+
// high surrogate, and there is a next character
19+
extra = string.charCodeAt(counter++);
20+
if ((extra & 0xFC00) == 0xDC00) { // low surrogate
21+
output.push(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000);
22+
} else {
23+
// unmatched surrogate; only append this code unit, in case the next
24+
// code unit is the high surrogate of a surrogate pair
25+
output.push(value);
26+
counter--;
27+
}
28+
} else {
29+
output.push(value);
30+
}
31+
}
32+
return output;
33+
}
34+
35+
// Taken from https://mths.be/punycode
36+
function ucs2encode(array) {
37+
var length = array.length;
38+
var index = -1;
39+
var value;
40+
var output = '';
41+
while (++index < length) {
42+
value = array[index];
43+
if (value > 0xFFFF) {
44+
value -= 0x10000;
45+
output += stringFromCharCode(value >>> 10 & 0x3FF | 0xD800);
46+
value = 0xDC00 | value & 0x3FF;
47+
}
48+
output += stringFromCharCode(value);
49+
}
50+
return output;
51+
}
52+
53+
function checkScalarValue(codePoint) {
54+
if (codePoint >= 0xD800 && codePoint <= 0xDFFF) {
55+
throw Error(
56+
'Lone surrogate U+' + codePoint.toString(16).toUpperCase() +
57+
' is not a scalar value'
58+
);
59+
}
60+
}
61+
/*--------------------------------------------------------------------------*/
62+
63+
function createByte(codePoint, shift) {
64+
return stringFromCharCode(((codePoint >> shift) & 0x3F) | 0x80);
65+
}
66+
67+
function encodeCodePoint(codePoint) {
68+
if ((codePoint & 0xFFFFFF80) == 0) { // 1-byte sequence
69+
return stringFromCharCode(codePoint);
70+
}
71+
var symbol = '';
72+
if ((codePoint & 0xFFFFF800) == 0) { // 2-byte sequence
73+
symbol = stringFromCharCode(((codePoint >> 6) & 0x1F) | 0xC0);
74+
}
75+
else if ((codePoint & 0xFFFF0000) == 0) { // 3-byte sequence
76+
checkScalarValue(codePoint);
77+
symbol = stringFromCharCode(((codePoint >> 12) & 0x0F) | 0xE0);
78+
symbol += createByte(codePoint, 6);
79+
}
80+
else if ((codePoint & 0xFFE00000) == 0) { // 4-byte sequence
81+
symbol = stringFromCharCode(((codePoint >> 18) & 0x07) | 0xF0);
82+
symbol += createByte(codePoint, 12);
83+
symbol += createByte(codePoint, 6);
84+
}
85+
symbol += stringFromCharCode((codePoint & 0x3F) | 0x80);
86+
return symbol;
87+
}
88+
89+
function utf8encode(string) {
90+
var codePoints = ucs2decode(string);
91+
var length = codePoints.length;
92+
var index = -1;
93+
var codePoint;
94+
var byteString = '';
95+
while (++index < length) {
96+
codePoint = codePoints[index];
97+
byteString += encodeCodePoint(codePoint);
98+
}
99+
return byteString;
100+
}
101+
102+
/*--------------------------------------------------------------------------*/
103+
104+
function readContinuationByte() {
105+
if (byteIndex >= byteCount) {
106+
throw Error('Invalid byte index');
107+
}
108+
109+
var continuationByte = byteArray[byteIndex] & 0xFF;
110+
byteIndex++;
111+
112+
if ((continuationByte & 0xC0) == 0x80) {
113+
return continuationByte & 0x3F;
114+
}
115+
116+
// If we end up here, it’s not a continuation byte
117+
throw Error('Invalid continuation byte');
118+
}
119+
120+
function decodeSymbol() {
121+
var byte1;
122+
var byte2;
123+
var byte3;
124+
var byte4;
125+
var codePoint;
126+
127+
if (byteIndex > byteCount) {
128+
throw Error('Invalid byte index');
129+
}
130+
131+
if (byteIndex == byteCount) {
132+
return false;
133+
}
134+
135+
// Read first byte
136+
byte1 = byteArray[byteIndex] & 0xFF;
137+
byteIndex++;
138+
139+
// 1-byte sequence (no continuation bytes)
140+
if ((byte1 & 0x80) == 0) {
141+
return byte1;
142+
}
143+
144+
// 2-byte sequence
145+
if ((byte1 & 0xE0) == 0xC0) {
146+
byte2 = readContinuationByte();
147+
codePoint = ((byte1 & 0x1F) << 6) | byte2;
148+
if (codePoint >= 0x80) {
149+
return codePoint;
150+
} else {
151+
throw Error('Invalid continuation byte');
152+
}
153+
}
154+
155+
// 3-byte sequence (may include unpaired surrogates)
156+
if ((byte1 & 0xF0) == 0xE0) {
157+
byte2 = readContinuationByte();
158+
byte3 = readContinuationByte();
159+
codePoint = ((byte1 & 0x0F) << 12) | (byte2 << 6) | byte3;
160+
if (codePoint >= 0x0800) {
161+
checkScalarValue(codePoint);
162+
return codePoint;
163+
} else {
164+
throw Error('Invalid continuation byte');
165+
}
166+
}
167+
168+
// 4-byte sequence
169+
if ((byte1 & 0xF8) == 0xF0) {
170+
byte2 = readContinuationByte();
171+
byte3 = readContinuationByte();
172+
byte4 = readContinuationByte();
173+
codePoint = ((byte1 & 0x07) << 0x12) | (byte2 << 0x0C) |
174+
(byte3 << 0x06) | byte4;
175+
if (codePoint >= 0x010000 && codePoint <= 0x10FFFF) {
176+
return codePoint;
177+
}
178+
}
179+
180+
throw Error('Invalid UTF-8 detected');
181+
}
182+
183+
var byteArray;
184+
var byteCount;
185+
var byteIndex;
186+
function utf8decode(byteString) {
187+
byteArray = ucs2decode(byteString);
188+
byteCount = byteArray.length;
189+
byteIndex = 0;
190+
var codePoints = [];
191+
var tmp;
192+
while ((tmp = decodeSymbol()) !== false) {
193+
codePoints.push(tmp);
194+
}
195+
return ucs2encode(codePoints);
196+
}
197+
198+
/*--------------------------------------------------------------------------*/
199+
200+
root.version = '3.0.0';
201+
root.encode = utf8encode;
202+
root.decode = utf8decode;
203+
204+
}(typeof exports === 'undefined' ? this.utf8 = {} : exports));

tests/TestRunner/app/Marshalling/ReferenceTests.js

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -128,15 +128,16 @@ describe(module.id, function () {
128128
it("Wrong type of argument passed as pointer", function () {
129129
expect(() => functionWith_VoidPtr(1)).toThrowError(/1 is not a pointer. \(evaluating 'functionWith_VoidPtr\(1\)'\)/);
130130
});
131+
131132
it("CString marshalling from JS string", function () {
132133
functionWithUCharPtr('test');
133134
expect(TNSGetOutput()).toBe('test');
134135
});
135136

136137
it("CString as arg/return value", function () {
137-
const ptr = interop.alloc(5 * interop.sizeof(interop.types.uint8));
138-
var reference = new interop.Reference(interop.types.uint8, ptr);
139138
const str = "test";
139+
const ptr = interop.alloc((str.length + 1) * interop.sizeof(interop.types.uint8));
140+
var reference = new interop.Reference(interop.types.uint8, ptr);
140141
for (ii in str) {
141142
const i = parseInt(ii);
142143
reference[i] = str.charCodeAt(i);
@@ -145,9 +146,23 @@ describe(module.id, function () {
145146

146147
const result = functionWithCharPtr(ptr);
147148

148-
expect(TNSGetOutput()).toBe('test');
149+
expect(TNSGetOutput()).toBe(str);
149150
expect(interop.handleof(result).toNumber() == interop.handleof(ptr).toNumber());
150-
expect(NSString.stringWithUTF8String(result).toString()).toBe('test');
151+
expect(NSString.stringWithUTF8String(result).toString()).toBe(str);
152+
});
153+
154+
it("CString should be passed as its UTF8 encoding and returned as a reference to unsigned characters", function () {
155+
const str = "test АБВГ";
156+
const result = functionWithUCharPtr(str);
157+
158+
expect(TNSGetOutput()).toBe(str);
159+
160+
const strUtf8 = utf8.encode(str);
161+
for (i in strUtf8) {
162+
const actual = strUtf8.charCodeAt(i);
163+
const expected = result[i];
164+
expect(actual).toBe(expected, `Char code difference at index ${i} ("${actual}" vs "${expected}")`);
165+
}
151166
});
152167

153168
// TODO: Create array type and constructor

tests/TestRunner/app/index.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import "./Infrastructure/timers";
55
import "./Infrastructure/simulator";
66
import "./Infrastructure/utils";
77

8+
global.utf8 = require("./Infrastructure/utf8")
9+
810
global.UNUSED = function (param) {
911
};
1012

0 commit comments

Comments
 (0)