@@ -10,6 +10,7 @@ in the source distribution for its full text.
1010#include "XUtils.h"
1111
1212#include <assert.h>
13+ #include <ctype.h> // IWYU pragma: keep
1314#include <errno.h>
1415#include <fcntl.h>
1516#include <limits.h>
@@ -259,6 +260,292 @@ size_t strnlen(const char* str, size_t maxLen) {
259260}
260261#endif
261262
263+ #ifdef HAVE_LIBNCURSESW
264+ static void String_encodeWChar (WCharEncoderState * ps , wchar_t wc ) {
265+ assert (!ps -> buf || ps -> pos < ps -> size );
266+
267+ char tempBuf [MB_LEN_MAX ];
268+ char * dest = ps -> buf ? (char * )ps -> buf + ps -> pos : tempBuf ;
269+
270+ // It is unnecessarily expensive to fix the output string if the caller
271+ // gives an incorrect buffer size. This function would not support any
272+ // truncation of the output string.
273+ size_t len = wcrtomb (dest , wc , & ps -> mbState );
274+ assert (len > 0 );
275+ if (len == (size_t )-1 ) {
276+ assert (len != (size_t )-1 );
277+ fail ();
278+ }
279+ if (ps -> buf && len > ps -> size - ps -> pos ) {
280+ assert (!ps -> buf || len <= ps -> size - ps -> pos );
281+ fail ();
282+ }
283+
284+ ps -> pos += len ;
285+ }
286+ #else
287+ static void String_encodeWChar (WCharEncoderState * ps , int c ) {
288+ assert (!ps -> buf || ps -> pos < ps -> size );
289+
290+ char * buf = ps -> buf ;
291+ if (buf )
292+ buf [ps -> pos ] = (char )c ;
293+
294+ ps -> pos += 1 ;
295+ }
296+ #endif
297+
298+ void EncodePrintableString (WCharEncoderState * ps , const char * src , size_t maxLen , EncodeWChar encodeWChar ) {
299+ assert (src || maxLen == 0 );
300+
301+ size_t pos = 0 ;
302+ bool wasReplaced = false;
303+
304+ #ifdef HAVE_LIBNCURSESW
305+ const wchar_t replacementChar = CRT_utf8 ? L'\xFFFD' : L'?' ;
306+ wchar_t ch ;
307+
308+ mbstate_t decState ;
309+ memset (& decState , 0 , sizeof (decState ));
310+ #else
311+ const char replacementChar = '?' ;
312+ char ch ;
313+ #endif
314+
315+ do {
316+ size_t len = 0 ;
317+ bool shouldReplace = false;
318+ ch = 0 ;
319+
320+ if (pos < maxLen ) {
321+ // Read the next character from the byte sequence
322+ #ifdef HAVE_LIBNCURSESW
323+ mbstate_t newState ;
324+ memcpy (& newState , & decState , sizeof (newState ));
325+ len = mbrtowc (& ch , & src [pos ], maxLen - pos , & newState );
326+
327+ assert (len != 0 || ch == 0 );
328+ switch (len ) {
329+ case (size_t )-2 :
330+ errno = EILSEQ ;
331+ shouldReplace = true;
332+ len = maxLen - pos ;
333+ break ;
334+
335+ case (size_t )-1 :
336+ shouldReplace = true;
337+ len = 1 ;
338+ break ;
339+
340+ default :
341+ memcpy (& decState , & newState , sizeof (decState ));
342+ }
343+ #else
344+ len = 1 ;
345+ ch = src [pos ];
346+ #endif
347+ }
348+
349+ pos += len ;
350+
351+ // Filter unprintable characters
352+ if (!shouldReplace && ch != 0 ) {
353+ #ifdef HAVE_LIBNCURSESW
354+ shouldReplace = !iswprint (ch );
355+ #else
356+ shouldReplace = !isprint ((unsigned char )ch );
357+ #endif
358+ }
359+
360+ if (shouldReplace ) {
361+ ch = replacementChar ;
362+ if (wasReplaced )
363+ continue ;
364+ }
365+ wasReplaced = shouldReplace ;
366+
367+ encodeWChar (ps , ch );
368+ } while (ch != 0 );
369+ }
370+
371+ char * String_makePrintable (const char * str , size_t maxLen ) {
372+ WCharEncoderState encState ;
373+
374+ memset (& encState , 0 , sizeof (encState ));
375+ EncodePrintableString (& encState , str , maxLen , String_encodeWChar );
376+ size_t size = encState .pos ;
377+ assert (size > 0 );
378+
379+ memset (& encState , 0 , sizeof (encState ));
380+ char * buf = xMalloc (size );
381+ encState .size = size ;
382+ encState .buf = buf ;
383+ EncodePrintableString (& encState , str , maxLen , String_encodeWChar );
384+ assert (encState .pos == size );
385+
386+ return buf ;
387+ }
388+
389+ bool String_decodeNextWChar (MBStringDecoderState * ps ) {
390+ if (!ps -> str || ps -> maxLen == 0 )
391+ return false;
392+
393+ // If the previous call of this function encounters an invalid sequence,
394+ // do not continue (because the "mbState" object for mbrtowc() is
395+ // undefined). The caller is supposed to reset the state.
396+ #ifdef HAVE_LIBNCURSESW
397+ bool isStateDefined = ps -> ch != WEOF ;
398+ #else
399+ bool isStateDefined = ps -> ch != EOF ;
400+ #endif
401+ if (!isStateDefined )
402+ return false;
403+
404+ #ifdef HAVE_LIBNCURSESW
405+ wchar_t wc ;
406+ size_t len = mbrtowc (& wc , ps -> str , ps -> maxLen , & ps -> mbState );
407+ switch (len ) {
408+ case (size_t )-1 :
409+ // Invalid sequence
410+ ps -> ch = WEOF ;
411+ return false;
412+
413+ case (size_t )-2 :
414+ // Incomplete sequence
415+ ps -> str += ps -> maxLen ;
416+ ps -> maxLen = 0 ;
417+ return false;
418+
419+ case 0 :
420+ assert (wc == 0 );
421+
422+ ps -> str = NULL ;
423+ ps -> maxLen = 0 ;
424+ ps -> ch = wc ;
425+ return true;
426+
427+ default :
428+ ps -> str += len ;
429+ ps -> maxLen -= len ;
430+ ps -> ch = wc ;
431+ }
432+ return true;
433+ #else
434+ const size_t len = 1 ;
435+ ps -> ch = * ps -> str ;
436+ if (ps -> ch == 0 ) {
437+ ps -> str = NULL ;
438+ ps -> maxLen = 0 ;
439+ } else {
440+ ps -> str += len ;
441+ ps -> maxLen -= len ;
442+ }
443+ return true;
444+ #endif
445+ }
446+
447+ int String_lineBreakWidth (const char * * str , size_t maxLen , int maxWidth , char separator ) {
448+ assert (* str || maxLen == 0 );
449+
450+ // The caller should ensure (maxWidth >= 0).
451+ // It's possible for a Unicode string to occupy 0 terminal columns, so this
452+ // function allows (maxWidth == 0).
453+ if (maxWidth < 0 )
454+ maxWidth = INT_MAX ;
455+
456+ MBStringDecoderState state ;
457+ memset (& state , 0 , sizeof (state ));
458+ state .str = * str ;
459+ state .maxLen = maxLen ;
460+
461+ int totalWidth = 0 ;
462+ int breakWidth = 0 ;
463+
464+ const char * breakPos = NULL ;
465+ bool inSpaces = true;
466+
467+ while (String_decodeNextWChar (& state )) {
468+ if (state .ch == 0 )
469+ break ;
470+
471+ if (state .ch == ' ' && separator == ' ' && !inSpaces ) {
472+ breakWidth = totalWidth ;
473+ breakPos = * str ;
474+ inSpaces = true;
475+ }
476+
477+ #ifdef HAVE_LIBNCURSESW
478+ int cw = wcwidth ((wchar_t )state .ch );
479+ if (cw < 0 ) {
480+ // This function should not be used with string containing unprintable
481+ // characters. Tolerate them on release build, however.
482+ assert (cw >= 0 );
483+ break ;
484+ }
485+ #else
486+ assert (isprint (state .ch ));
487+ const int cw = 1 ;
488+ #endif
489+
490+ if (cw > maxWidth - totalWidth ) {
491+ // This character cannot fit the line with the given maxWidth.
492+ if (breakPos ) {
493+ // Rewind the scanning state to the last found separator.
494+ totalWidth = breakWidth ;
495+ * str = breakPos ;
496+ }
497+ break ;
498+ }
499+
500+ #ifdef HAVE_LIBNCURSESW
501+ // If the character takes zero columns, include the character in the
502+ // substring if the working encoding is UTF-8, and ignore it otherwise.
503+ // In Unicode, combining characters are always placed after the base
504+ // character, but some legacy 8-bit encodings instead place combining
505+ // characters before the base character.
506+ if (cw <= 0 && !CRT_utf8 )
507+ continue ;
508+ #endif
509+
510+ totalWidth += cw ;
511+
512+ // (*str - start) will represent the length of the substring bounded
513+ // by the width limit.
514+ * str = state .str ;
515+
516+ if (state .ch != ' ' )
517+ inSpaces = false;
518+
519+ #ifdef HAVE_LIBNCURSESW
520+ bool isSeparator = state .ch == (wint_t )separator ;
521+ #else
522+ bool isSeparator = state .ch == (int )separator ;
523+ #endif
524+ if (isSeparator && separator != ' ' ) {
525+ breakWidth = totalWidth ;
526+ breakPos = * str ;
527+ }
528+ }
529+
530+ return totalWidth ;
531+ }
532+
533+ int String_mbswidth (const char * * str , size_t maxLen , int maxWidth ) {
534+ #ifdef HAVE_LIBNCURSESW
535+ return String_lineBreakWidth (str , maxLen , maxWidth , '\0' );
536+ #else
537+ assert (* str || maxLen == 0 );
538+
539+ if (maxWidth < 0 )
540+ maxWidth = INT_MAX ;
541+
542+ maxLen = MINIMUM ((size_t )maxWidth , maxLen );
543+ size_t len = strnlen (* str , maxLen );
544+ * str += len ;
545+ return (int )len ;
546+ #endif
547+ }
548+
262549int xAsprintf (char * * strp , const char * fmt , ...) {
263550 * strp = NULL ;
264551
0 commit comments