Skip to content

Commit bea6403

Browse files
committed
Use byte-level widening/narrowing on Linux SQLWCHAR paths
Follow-up to the ConvertingString discussion in #289. The 8-byte floor in Alloc() introduced by the previous commit was a crutch: it avoided the strcpy heap overflow for the SQL-state case but left the destructor's mbstowcs((wchar_t*)unicodeString, ..., lengthString) writing 4-byte wchar_t units into a caller buffer that is SQLWCHAR-sized (2 bytes). Even when that did not overflow, the data was wrong — 'HY000' became 'H' after the client reinterpreted the bytes as UCS-2. Replace the Linux non-connection paths in both directions with the same byte-widening / byte-narrowing loop that unixODBC itself uses internally (ansi_to_unicode_copy / unicode_to_ansi_copy). This is correct for the ASCII-only error/state strings that reach this code; non-ASCII handling remains the subject of the broader rewrite tracked as Tier 9.1 in #287. Changes in MainUnicode.cpp: - lengthString now uses sizeof(SQLWCHAR) again (correct SQLWCHAR count). - Destructor Linux branch widens bytes into SQLWCHAR units. - convUnicodeToString Linux branch narrows SQLWCHAR to low bytes. - Temporary NUL is written as a SQLWCHAR-sized zero (not wchar_t). - Remove the Alloc() floor — no longer needed. - Add sqlwcharLen() helper; wcslen() on SQLWCHAR data is unsafe on Linux.
1 parent 0f8f90f commit bea6403

1 file changed

Lines changed: 61 additions & 17 deletions

File tree

MainUnicode.cpp

Lines changed: 61 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,21 @@
3838
extern FILE *logFile;
3939
using namespace OdbcJdbcLibrary;
4040

41+
#ifndef _WINDOWS
42+
// SQLWCHAR-aware length (in SQLWCHAR units), safe on Linux where
43+
// sizeof(wchar_t) != sizeof(SQLWCHAR). Do NOT use wcslen() on SQLWCHAR
44+
// data on Linux — it reads two SQLWCHARs per wchar_t and runs off the end.
45+
static size_t sqlwcharLen( const SQLWCHAR *s )
46+
{
47+
size_t n = 0;
48+
if ( !s )
49+
return 0;
50+
while ( s[n] )
51+
++n;
52+
return n;
53+
}
54+
#endif
55+
4156
#ifdef _WINDOWS
4257
extern UINT codePage; // from Main.cpp
4358
#endif
@@ -85,7 +100,7 @@ class ConvertingString
85100
if ( length == SQL_NTS )
86101
lengthString = 0;
87102
else if ( retCountOfBytes )
88-
lengthString = length / sizeof(wchar_t);
103+
lengthString = length / sizeof(SQLWCHAR);
89104
else
90105
lengthString = length;
91106
}
@@ -135,13 +150,33 @@ class ConvertingString
135150
if ( len > 0 )
136151
len--;
137152
#else
138-
len = mbstowcs( (wchar_t*)unicodeString, (const char*)byteString, lengthString );
153+
// SQLWCHAR is 2 bytes on Linux (unixODBC defines it as unsigned short),
154+
// but wchar_t is 4 bytes, so mbstowcs((wchar_t*)unicodeString, ...)
155+
// both corrupts the output and risks overflowing the caller's buffer.
156+
// Widen byte-by-byte into SQLWCHAR units, matching what unixODBC's
157+
// ansi_to_unicode_copy() does internally. This is correct for the
158+
// ASCII-only error/state strings that reach this code path; non-ASCII
159+
// input will be handled by the broader ConvertingString rewrite tracked
160+
// in issue #287 (Tier 9.1).
161+
{
162+
const SQLCHAR *src = byteString;
163+
size_t i = 0;
164+
while ( i < (size_t)lengthString && src[i] != 0 )
165+
{
166+
unicodeString[i] = (SQLWCHAR)( src[i] & 0xFF );
167+
++i;
168+
}
169+
len = i;
170+
}
139171
#endif
140172
}
141173

142174
if ( len > 0 )
143175
{
144-
*(LPWSTR)(unicodeString + len) = L'\0';
176+
// NUL-terminate in SQLWCHAR units. LPWSTR assignment of L'\0' writes
177+
// sizeof(wchar_t) bytes, which overruns the output buffer by 2 bytes
178+
// on Linux.
179+
unicodeString[len] = 0;
145180

146181
if ( realLength )
147182
{
@@ -170,12 +205,18 @@ class ConvertingString
170205
wchar_t saveWC;
171206

172207
if ( length == SQL_NTS )
208+
#ifdef _WINDOWS
173209
length = (int)wcslen( (const wchar_t*)wcString );
174-
else if ( wcString[length] != L'\0' )
210+
#else
211+
length = (int)sqlwcharLen( wcString );
212+
#endif
213+
else if ( wcString[length] != 0 )
175214
{
176215
ptEndWC = (wchar_t*)&wcString[length];
177216
saveWC = *ptEndWC;
178-
*ptEndWC = L'\0';
217+
// Write a SQLWCHAR-sized NUL so we don't overrun the input by 2 bytes
218+
// on Linux (wchar_t is 4 bytes there).
219+
wcString[length] = 0;
179220
}
180221

181222
if ( connection )
@@ -185,7 +226,10 @@ class ConvertingString
185226
#ifdef _WINDOWS
186227
bytesNeeded = WideCharToMultiByte( codePage, (DWORD)0, wcString, length, NULL, (int)0, NULL, NULL );
187228
#else
188-
bytesNeeded = wcstombs( NULL, (const wchar_t*)wcString, length );
229+
// See the symmetric comment in the destructor above: wcstombs assumes
230+
// wchar_t-sized input, which corrupts SQLWCHAR data on Linux. The
231+
// byte-narrowing loop below produces exactly `length` output bytes.
232+
bytesNeeded = (size_t)length;
189233
#endif
190234
}
191235

@@ -198,7 +242,15 @@ class ConvertingString
198242
#ifdef _WINDOWS
199243
bytesNeeded = WideCharToMultiByte( codePage, 0, wcString, length, (LPSTR)byteString, (int)bytesNeeded, NULL, NULL );
200244
#else
201-
bytesNeeded = wcstombs( (char *)byteString, (const wchar_t*)wcString, bytesNeeded );
245+
{
246+
size_t i = 0;
247+
while ( i < (size_t)length && wcString[i] != 0 )
248+
{
249+
byteString[i] = (SQLCHAR)( wcString[i] & 0xFF );
250+
++i;
251+
}
252+
bytesNeeded = i;
253+
}
202254
#endif
203255
}
204256

@@ -219,16 +271,8 @@ class ConvertingString
219271
case BYTESCHARS:
220272
if ( lengthString )
221273
{
222-
// Floor the internal buffer at 8 bytes so that callers which pass a
223-
// small SQLWCHAR output buffer (e.g. SQLGetDiagRecW with a 12-byte
224-
// SQL state, yielding lengthString=3 on Linux where sizeof(wchar_t)=4)
225-
// still have room for the 6-byte SQL state ("HY000\0") that
226-
// OdbcError::sqlGetDiagRec strcpy's into this buffer. Keeping
227-
// lengthString itself unchanged preserves the mbstowcs writeback
228-
// bound and avoids smashing the caller's stack buffer.
229-
const size_t bufSize = (lengthString + 2 < 8) ? 8 : (size_t)lengthString + 2;
230-
byteString = new SQLCHAR[ bufSize ];
231-
memset(byteString, 0, bufSize);
274+
byteString = new SQLCHAR[ lengthString + 2 ];
275+
memset( byteString, 0, lengthString + 2 );
232276
}
233277
else
234278
byteString = NULL;

0 commit comments

Comments
 (0)