Trim PR #289 to ASAN/Valgrind CI scope

fdcastel · fdcastel · commit 09b934a49eac · 2026-04-20T12:50:55.000-03:00
Revert the MainUnicode.cpp changes (832d8e7, 0f8f90f, bea6403) per irodushka's plan in #289 comment 4279111183 — the Unicode rewrite is moving to its own PR, preceded by a widechar-tests PR. Restore continue-on-error on the ASAN matrix job temporarily; Valgrind stays strict. ASAN will flag the heap-buffer-overflow in SQLGetDiagRecW again; this is the known issue the follow-up PRs will fix and is tracked as issue #287 Tier 1b.
diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml
@@ -65,6 +65,12 @@ jobs:
 
     runs-on: ${{ matrix.os }}
 
+    # Temporary: the ASAN job is expected to fail on the heap-buffer-overflow
+    # inside SQLGetDiagRecW until the Linux widechar conversion paths in
+    # MainUnicode.cpp are rewritten. Tracked in issue #287 Tier 1b. Revert
+    # this back to strict once the Unicode fix PR lands.
+    continue-on-error: ${{ matrix.sanitizer == 'Asan' }}
+
     steps:
       - uses: actions/checkout@v6
         with:
diff --git a/MainUnicode.cpp b/MainUnicode.cpp
@@ -38,21 +38,6 @@
 extern FILE	*logFile;
 using namespace OdbcJdbcLibrary;
 
-#ifndef _WINDOWS
-// SQLWCHAR-aware length (in SQLWCHAR units), safe on Linux where
-// sizeof(wchar_t) != sizeof(SQLWCHAR). Do NOT use wcslen() on SQLWCHAR
-// data on Linux — it reads two SQLWCHARs per wchar_t and runs off the end.
-static size_t sqlwcharLen( const SQLWCHAR *s )
-{
-	size_t n = 0;
-	if ( !s )
-		return 0;
-	while ( s[n] )
-		++n;
-	return n;
-}
-#endif
-
 #ifdef _WINDOWS
 extern UINT codePage; // from Main.cpp
 #endif
@@ -100,7 +85,7 @@ class ConvertingString
 			if ( length == SQL_NTS )
 				lengthString = 0;
 			else if ( retCountOfBytes )
-				lengthString = length / sizeof(SQLWCHAR);
+				lengthString = length / sizeof(wchar_t);
 			else
 				lengthString = length;
 		}
@@ -150,33 +135,13 @@ class ConvertingString
 					if ( len > 0 )
 						len--;
 #else
-					// SQLWCHAR is 2 bytes on Linux (unixODBC defines it as unsigned short),
-					// but wchar_t is 4 bytes, so mbstowcs((wchar_t*)unicodeString, ...)
-					// both corrupts the output and risks overflowing the caller's buffer.
-					// Widen byte-by-byte into SQLWCHAR units, matching what unixODBC's
-					// ansi_to_unicode_copy() does internally. This is correct for the
-					// ASCII-only error/state strings that reach this code path; non-ASCII
-					// input will be handled by the broader ConvertingString rewrite tracked
-					// in issue #287 (Tier 9.1).
-					{
-						const SQLCHAR *src = byteString;
-						size_t i = 0;
-						while ( i < (size_t)lengthString && src[i] != 0 )
-						{
-							unicodeString[i] = (SQLWCHAR)( src[i] & 0xFF );
-							++i;
-						}
-						len = i;
-					}
+					len = mbstowcs( (wchar_t*)unicodeString, (const char*)byteString, lengthString );
 #endif
 				}
 
 				if ( len > 0 )
 				{
-					// NUL-terminate in SQLWCHAR units. LPWSTR assignment of L'\0' writes
-					// sizeof(wchar_t) bytes, which overruns the output buffer by 2 bytes
-					// on Linux.
-					unicodeString[len] = 0;
+					*(LPWSTR)(unicodeString + len) = L'\0';
 
 					if ( realLength )
 					{
@@ -205,18 +170,12 @@ class ConvertingString
 		wchar_t saveWC;
 
 		if ( length == SQL_NTS )
-#ifdef _WINDOWS
 			length = (int)wcslen( (const wchar_t*)wcString );
-#else
-			length = (int)sqlwcharLen( wcString );
-#endif
-		else if ( wcString[length] != 0 )
+		else if ( wcString[length] != L'\0' )
 		{
 			ptEndWC = (wchar_t*)&wcString[length];
 			saveWC = *ptEndWC;
-			// Write a SQLWCHAR-sized NUL so we don't overrun the input by 2 bytes
-			// on Linux (wchar_t is 4 bytes there).
-			wcString[length] = 0;
+			*ptEndWC = L'\0';
 		}
 
 		if ( connection )
@@ -226,10 +185,7 @@ class ConvertingString
 #ifdef _WINDOWS
 			bytesNeeded = WideCharToMultiByte( codePage, (DWORD)0, wcString, length, NULL, (int)0, NULL, NULL );
 #else
-			// See the symmetric comment in the destructor above: wcstombs assumes
-			// wchar_t-sized input, which corrupts SQLWCHAR data on Linux. The
-			// byte-narrowing loop below produces exactly `length` output bytes.
-			bytesNeeded = (size_t)length;
+			bytesNeeded = wcstombs( NULL, (const wchar_t*)wcString, length );
 #endif
 		}
 
@@ -242,15 +198,7 @@ class ConvertingString
 #ifdef _WINDOWS
 			bytesNeeded = WideCharToMultiByte( codePage, 0, wcString, length, (LPSTR)byteString, (int)bytesNeeded, NULL, NULL );
 #else
-			{
-				size_t i = 0;
-				while ( i < (size_t)length && wcString[i] != 0 )
-				{
-					byteString[i] = (SQLCHAR)( wcString[i] & 0xFF );
-					++i;
-				}
-				bytesNeeded = i;
-			}
+			bytesNeeded = wcstombs( (char *)byteString, (const wchar_t*)wcString, bytesNeeded );
 #endif
 		}
 
@@ -272,7 +220,7 @@ class ConvertingString
 			if ( lengthString )
 			{
 				byteString = new SQLCHAR[ lengthString + 2 ];
-				memset( byteString, 0, lengthString + 2 );
+				memset(byteString, 0, lengthString + 2); 
 			}
 			else
 				byteString = NULL;