Merge pull request #1208 from NativeScript/bektchiev/cstring-as-ref-to-uint8

mbektchiev · web-flow · commit 775598a43fb7 · 2019-09-17T14:30:14.000+03:00
fix(marshalling): Expose C strings as references to uint8 in JS
diff --git a/src/NativeScript/Marshalling/Fundamentals/FFIPrimitiveTypes.cpp b/src/NativeScript/Marshalling/Fundamentals/FFIPrimitiveTypes.cpp
@@ -126,7 +126,7 @@ static JSValue cStringType_read(ExecState* execState, const void* buffer, JSCell
     }
 
     GlobalObject* globalObject = jsCast<GlobalObject*>(execState->lexicalGlobalObject());
-    JSCell* type = globalObject->typeFactory()->int8Type();
+    JSCell* type = globalObject->typeFactory()->uint8Type();
     PointerInstance* pointer = jsCast<PointerInstance*>(globalObject->interop()->pointerInstanceForPointer(execState, const_cast<char*>(string)));
     return ReferenceInstance::create(execState->vm(), globalObject, globalObject->interop()->referenceInstanceStructure(), type, pointer).get();
 }
diff --git a/tests/TestFixtures/Marshalling/TNSPrimitivePointers.m b/tests/TestFixtures/Marshalling/TNSPrimitivePointers.m
@@ -19,7 +19,7 @@
 }
 
 unsigned char* functionWithUCharPtr(unsigned char* x) {
-    TNSLog([NSString stringWithFormat:@"%s", x]);
+    TNSLog([NSString stringWithUTF8String:(char*)x]);
     return x;
 }
 
@@ -44,7 +44,7 @@
 }
 
 char* functionWithCharPtr(char* x) {
-    TNSLog([NSString stringWithFormat:@"%s", x]);
+    TNSLog([NSString stringWithUTF8String:x]);
 
     return x;
 }
@@ -108,8 +108,8 @@ void functionWithIntConstantArray2(int x[2][2]) {
 }
 
 char** functionWithDoubleCharPtr(char** x) {
-    TNSLog([NSString stringWithFormat:@"%s", x[0]]);
-    TNSLog([NSString stringWithFormat:@"%s", x[1]]);
+    TNSLog([NSString stringWithUTF8String:x[0]]);
+    TNSLog([NSString stringWithUTF8String:x[1]]);
 
     free(x[0]);
     x[0] = calloc(4, sizeof(char));
diff --git a/tests/TestRunner/app/Infrastructure/utf8.js b/tests/TestRunner/app/Infrastructure/utf8.js
@@ -0,0 +1,204 @@
+// Copied from https://github.com/mathiasbynens/utf8.js/blob/v3.0.0/utf8.js
+
+/*! https://mths.be/utf8js v3.0.0 by @mathias */
+;(function(root) {
+
+    var stringFromCharCode = String.fromCharCode;
+
+    // Taken from https://mths.be/punycode
+    function ucs2decode(string) {
+        var output = [];
+        var counter = 0;
+        var length = string.length;
+        var value;
+        var extra;
+        while (counter < length) {
+            value = string.charCodeAt(counter++);
+            if (value >= 0xD800 && value <= 0xDBFF && counter < length) {
+                // high surrogate, and there is a next character
+                extra = string.charCodeAt(counter++);
+                if ((extra & 0xFC00) == 0xDC00) { // low surrogate
+                    output.push(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000);
+                } else {
+                    // unmatched surrogate; only append this code unit, in case the next
+                    // code unit is the high surrogate of a surrogate pair
+                    output.push(value);
+                    counter--;
+                }
+            } else {
+                output.push(value);
+            }
+        }
+        return output;
+    }
+
+    // Taken from https://mths.be/punycode
+    function ucs2encode(array) {
+        var length = array.length;
+        var index = -1;
+        var value;
+        var output = '';
+        while (++index < length) {
+            value = array[index];
+            if (value > 0xFFFF) {
+                value -= 0x10000;
+                output += stringFromCharCode(value >>> 10 & 0x3FF | 0xD800);
+                value = 0xDC00 | value & 0x3FF;
+            }
+            output += stringFromCharCode(value);
+        }
+        return output;
+    }
+
+    function checkScalarValue(codePoint) {
+        if (codePoint >= 0xD800 && codePoint <= 0xDFFF) {
+            throw Error(
+                'Lone surrogate U+' + codePoint.toString(16).toUpperCase() +
+                ' is not a scalar value'
+            );
+        }
+    }
+    /*--------------------------------------------------------------------------*/
+
+    function createByte(codePoint, shift) {
+        return stringFromCharCode(((codePoint >> shift) & 0x3F) | 0x80);
+    }
+
+    function encodeCodePoint(codePoint) {
+        if ((codePoint & 0xFFFFFF80) == 0) { // 1-byte sequence
+            return stringFromCharCode(codePoint);
+        }
+        var symbol = '';
+        if ((codePoint & 0xFFFFF800) == 0) { // 2-byte sequence
+            symbol = stringFromCharCode(((codePoint >> 6) & 0x1F) | 0xC0);
+        }
+        else if ((codePoint & 0xFFFF0000) == 0) { // 3-byte sequence
+            checkScalarValue(codePoint);
+            symbol = stringFromCharCode(((codePoint >> 12) & 0x0F) | 0xE0);
+            symbol += createByte(codePoint, 6);
+        }
+        else if ((codePoint & 0xFFE00000) == 0) { // 4-byte sequence
+            symbol = stringFromCharCode(((codePoint >> 18) & 0x07) | 0xF0);
+            symbol += createByte(codePoint, 12);
+            symbol += createByte(codePoint, 6);
+        }
+        symbol += stringFromCharCode((codePoint & 0x3F) | 0x80);
+        return symbol;
+    }
+
+    function utf8encode(string) {
+        var codePoints = ucs2decode(string);
+        var length = codePoints.length;
+        var index = -1;
+        var codePoint;
+        var byteString = '';
+        while (++index < length) {
+            codePoint = codePoints[index];
+            byteString += encodeCodePoint(codePoint);
+        }
+        return byteString;
+    }
+
+    /*--------------------------------------------------------------------------*/
+
+    function readContinuationByte() {
+        if (byteIndex >= byteCount) {
+            throw Error('Invalid byte index');
+        }
+
+        var continuationByte = byteArray[byteIndex] & 0xFF;
+        byteIndex++;
+
+        if ((continuationByte & 0xC0) == 0x80) {
+            return continuationByte & 0x3F;
+        }
+
+        // If we end up here, it’s not a continuation byte
+        throw Error('Invalid continuation byte');
+    }
+
+    function decodeSymbol() {
+        var byte1;
+        var byte2;
+        var byte3;
+        var byte4;
+        var codePoint;
+
+        if (byteIndex > byteCount) {
+            throw Error('Invalid byte index');
+        }
+
+        if (byteIndex == byteCount) {
+            return false;
+        }
+
+        // Read first byte
+        byte1 = byteArray[byteIndex] & 0xFF;
+        byteIndex++;
+
+        // 1-byte sequence (no continuation bytes)
+        if ((byte1 & 0x80) == 0) {
+            return byte1;
+        }
+
+        // 2-byte sequence
+        if ((byte1 & 0xE0) == 0xC0) {
+            byte2 = readContinuationByte();
+            codePoint = ((byte1 & 0x1F) << 6) | byte2;
+            if (codePoint >= 0x80) {
+                return codePoint;
+            } else {
+                throw Error('Invalid continuation byte');
+            }
+        }
+
+        // 3-byte sequence (may include unpaired surrogates)
+        if ((byte1 & 0xF0) == 0xE0) {
+            byte2 = readContinuationByte();
+            byte3 = readContinuationByte();
+            codePoint = ((byte1 & 0x0F) << 12) | (byte2 << 6) | byte3;
+            if (codePoint >= 0x0800) {
+                checkScalarValue(codePoint);
+                return codePoint;
+            } else {
+                throw Error('Invalid continuation byte');
+            }
+        }
+
+        // 4-byte sequence
+        if ((byte1 & 0xF8) == 0xF0) {
+            byte2 = readContinuationByte();
+            byte3 = readContinuationByte();
+            byte4 = readContinuationByte();
+            codePoint = ((byte1 & 0x07) << 0x12) | (byte2 << 0x0C) |
+                (byte3 << 0x06) | byte4;
+            if (codePoint >= 0x010000 && codePoint <= 0x10FFFF) {
+                return codePoint;
+            }
+        }
+
+        throw Error('Invalid UTF-8 detected');
+    }
+
+    var byteArray;
+    var byteCount;
+    var byteIndex;
+    function utf8decode(byteString) {
+        byteArray = ucs2decode(byteString);
+        byteCount = byteArray.length;
+        byteIndex = 0;
+        var codePoints = [];
+        var tmp;
+        while ((tmp = decodeSymbol()) !== false) {
+            codePoints.push(tmp);
+        }
+        return ucs2encode(codePoints);
+    }
+
+    /*--------------------------------------------------------------------------*/
+
+    root.version = '3.0.0';
+    root.encode = utf8encode;
+    root.decode = utf8decode;
+
+}(typeof exports === 'undefined' ? this.utf8 = {} : exports));
diff --git a/tests/TestRunner/app/Marshalling/ReferenceTests.js b/tests/TestRunner/app/Marshalling/ReferenceTests.js
@@ -128,15 +128,16 @@ describe(module.id, function () {
     it("Wrong type of argument passed as pointer", function () {
         expect(() => functionWith_VoidPtr(1)).toThrowError(/1 is not a pointer. \(evaluating 'functionWith_VoidPtr\(1\)'\)/);
     });
+    
     it("CString marshalling from JS string", function () {
         functionWithUCharPtr('test');
         expect(TNSGetOutput()).toBe('test');
     });
 
     it("CString as arg/return value", function () {
-        const ptr = interop.alloc(5 * interop.sizeof(interop.types.uint8));
-        var reference = new interop.Reference(interop.types.uint8, ptr);
         const str = "test";
+        const ptr = interop.alloc((str.length + 1) * interop.sizeof(interop.types.uint8));
+        var reference = new interop.Reference(interop.types.uint8, ptr);
         for (ii in str) {
             const i = parseInt(ii);
             reference[i] = str.charCodeAt(i);
@@ -145,9 +146,23 @@ describe(module.id, function () {
 
         const result = functionWithCharPtr(ptr);
 
-        expect(TNSGetOutput()).toBe('test');
+        expect(TNSGetOutput()).toBe(str);
         expect(interop.handleof(result).toNumber() == interop.handleof(ptr).toNumber());
-        expect(NSString.stringWithUTF8String(result).toString()).toBe('test');
+        expect(NSString.stringWithUTF8String(result).toString()).toBe(str);
+    });
+
+    it("CString should be passed as its UTF8 encoding and returned as a reference to unsigned characters", function () {
+        const str = "test АБВГ";
+        const result = functionWithUCharPtr(str);
+
+        expect(TNSGetOutput()).toBe(str);
+
+        const strUtf8 = utf8.encode(str);
+        for (i in strUtf8) {
+            const actual = strUtf8.charCodeAt(i);
+            const expected = result[i];
+            expect(actual).toBe(expected, `Char code difference at index ${i} ("${actual}" vs "${expected}")`);
+        }
     });
 
     // TODO: Create array type and constructor
diff --git a/tests/TestRunner/app/index.js b/tests/TestRunner/app/index.js
@@ -5,6 +5,8 @@ import "./Infrastructure/timers";
 import "./Infrastructure/simulator";
 import "./Infrastructure/utils";
 
+global.utf8 = require("./Infrastructure/utf8")
+
 global.UNUSED = function (param) {
 };
 

Original file line number	Diff line number	Diff line change
`@@ -126,7 +126,7 @@ static JSValue cStringType_read(ExecState* execState, const void* buffer, JSCell`
`126`	`126`	`}`
`127`	`127`
`128`	`128`	`GlobalObject* globalObject = jsCast<GlobalObject*>(execState->lexicalGlobalObject());`
`129`		`- JSCell* type = globalObject->typeFactory()->int8Type();`
	`129`	`+ JSCell* type = globalObject->typeFactory()->uint8Type();`
`130`	`130`	`PointerInstance* pointer = jsCast<PointerInstance>(globalObject->interop()->pointerInstanceForPointer(execState, const_cast<char>(string)));`
`131`	`131`	`return ReferenceInstance::create(execState->vm(), globalObject, globalObject->interop()->referenceInstanceStructure(), type, pointer).get();`
`132`	`132`	`}`
Original file line number	Diff line number	Diff line change
`@@ -19,7 +19,7 @@`
`19`	`19`	`}`
`20`	`20`
`21`	`21`	`unsigned char* functionWithUCharPtr(unsigned char* x) {`
`22`		`- TNSLog([NSString stringWithFormat:@"%s", x]);`
	`22`	`+ TNSLog([NSString stringWithUTF8String:(char*)x]);`
`23`	`23`	`return x;`
`24`	`24`	`}`
`25`	`25`
`@@ -44,7 +44,7 @@`
`44`	`44`	`}`
`45`	`45`
`46`	`46`	`char* functionWithCharPtr(char* x) {`
`47`		`- TNSLog([NSString stringWithFormat:@"%s", x]);`
	`47`	`+ TNSLog([NSString stringWithUTF8String:x]);`
`48`	`48`
`49`	`49`	`return x;`
`50`	`50`	`}`
`@@ -108,8 +108,8 @@ void functionWithIntConstantArray2(int x[2][2]) {`
`108`	`108`	`}`
`109`	`109`
`110`	`110`	`char functionWithDoubleCharPtr(char x) {`
`111`		`- TNSLog([NSString stringWithFormat:@"%s", x[0]]);`
`112`		`- TNSLog([NSString stringWithFormat:@"%s", x[1]]);`
	`111`	`+ TNSLog([NSString stringWithUTF8String:x[0]]);`
	`112`	`+ TNSLog([NSString stringWithUTF8String:x[1]]);`
`113`	`113`
`114`	`114`	`free(x[0]);`
`115`	`115`	`x[0] = calloc(4, sizeof(char));`