@@ -10,6 +10,7 @@ in the source distribution for its full text.
1010#include "XUtils.h"
1111
1212#include <assert.h>
13+ #include <ctype.h> // IWYU pragma: keep
1314#include <errno.h>
1415#include <fcntl.h>
1516#include <limits.h>
@@ -259,6 +260,299 @@ size_t strnlen(const char* str, size_t maxLen) {
259260}
260261#endif
261262
263+ #ifdef HAVE_LIBNCURSESW
264+ static void String_encodeWChar (WCharEncoderState * ps , wchar_t wc ) {
265+ assert (!ps -> buf || ps -> pos < ps -> size );
266+
267+ char tempBuf [MB_LEN_MAX ];
268+
269+ // It is unnecessarily expensive to fix the output string if the
270+ // caller gives an incorrect buffer size. While this function will
271+ // not attempt to write out of bounds, it does not guarantee that
272+ // the string will be null terminated if the given buffer size is
273+ // too small.
274+ size_t len = wcrtomb (tempBuf , wc , & ps -> mbState );
275+ assert (len != 0 );
276+ if (len == (size_t )-1 ) {
277+ assert (len != (size_t )-1 );
278+ fail ();
279+ }
280+ if (ps -> buf ) {
281+ if (len > ps -> size - ps -> pos ) {
282+ fail ();
283+ }
284+ memcpy ((char * )ps -> buf + ps -> pos , tempBuf , len );
285+ }
286+ ps -> pos += len ;
287+ }
288+ #else
289+ static void String_encodeWChar (WCharEncoderState * ps , int c ) {
290+ assert (!ps -> buf || ps -> pos < ps -> size );
291+
292+ char * buf = ps -> buf ;
293+ if (buf )
294+ buf [ps -> pos ] = (char )c ;
295+
296+ ps -> pos += 1 ;
297+ }
298+ #endif
299+
300+ void EncodePrintableString (WCharEncoderState * ps , const char * src , size_t maxLen , EncodeWChar encodeWChar ) {
301+ assert (src || maxLen == 0 );
302+
303+ size_t pos = 0 ;
304+ bool wasReplaced = false;
305+
306+ #ifdef HAVE_LIBNCURSESW
307+ const wchar_t replacementChar = CRT_utf8 ? L'\xFFFD' : L'?' ;
308+ wchar_t ch ;
309+
310+ mbstate_t decState = {0 };
311+ #else
312+ const char replacementChar = '?' ;
313+ char ch ;
314+ #endif
315+
316+ do {
317+ size_t len = 0 ;
318+ bool shouldReplace = false;
319+ ch = 0 ;
320+
321+ if (pos < maxLen ) {
322+ // Read the next character from the byte sequence
323+ #ifdef HAVE_LIBNCURSESW
324+ mbstate_t newState ;
325+ memcpy (& newState , & decState , sizeof (newState ));
326+ len = mbrtowc (& ch , & src [pos ], maxLen - pos , & newState );
327+
328+ assert (len != 0 || ch == 0 );
329+ switch (len ) {
330+ case (size_t )-2 :
331+ errno = EILSEQ ;
332+ shouldReplace = true;
333+ len = maxLen - pos ;
334+ break ;
335+
336+ case (size_t )-1 :
337+ shouldReplace = true;
338+ len = 1 ;
339+ break ;
340+
341+ default :
342+ memcpy (& decState , & newState , sizeof (decState ));
343+ }
344+ #else
345+ len = 1 ;
346+ ch = src [pos ];
347+ #endif
348+ }
349+
350+ pos += len ;
351+
352+ // Filter unprintable characters
353+ if (!shouldReplace && ch != 0 ) {
354+ #ifdef HAVE_LIBNCURSESW
355+ shouldReplace = !iswprint (ch );
356+ #else
357+ shouldReplace = !isprint ((unsigned char )ch );
358+ #endif
359+ }
360+
361+ if (shouldReplace ) {
362+ ch = replacementChar ;
363+ if (wasReplaced )
364+ continue ;
365+ }
366+ wasReplaced = shouldReplace ;
367+
368+ encodeWChar (ps , ch );
369+ } while (ch != 0 );
370+ }
371+
372+ char * String_makePrintable (const char * str , size_t maxLen ) {
373+ WCharEncoderState encState = {0 };
374+
375+ EncodePrintableString (& encState , str , maxLen , String_encodeWChar );
376+ size_t size = encState .pos ;
377+ assert (size > 0 );
378+
379+ memset (& encState , 0 , sizeof (encState ));
380+ char * buf = xMalloc (size );
381+ encState .size = size ;
382+ encState .buf = buf ;
383+ EncodePrintableString (& encState , str , maxLen , String_encodeWChar );
384+ assert (encState .pos == size );
385+
386+ return buf ;
387+ }
388+
389+ bool String_decodeNextWChar (MBStringDecoderState * ps ) {
390+ if (!ps -> str || ps -> maxLen == 0 )
391+ return false;
392+
393+ // If the previous call of this function encounters an invalid sequence,
394+ // do not continue (because the "mbState" object for mbrtowc() is
395+ // undefined). The caller is supposed to reset the state.
396+ #ifdef HAVE_LIBNCURSESW
397+ bool isStateDefined = ps -> ch != WEOF ;
398+ #else
399+ bool isStateDefined = ps -> ch != EOF ;
400+ #endif
401+ if (!isStateDefined )
402+ return false;
403+
404+ #ifdef HAVE_LIBNCURSESW
405+ wchar_t wc ;
406+ size_t len = mbrtowc (& wc , ps -> str , ps -> maxLen , & ps -> mbState );
407+ switch (len ) {
408+ case (size_t )-1 :
409+ // Invalid sequence
410+ ps -> ch = WEOF ;
411+ return false;
412+
413+ case (size_t )-2 :
414+ // Incomplete sequence
415+ ps -> str += ps -> maxLen ;
416+ ps -> maxLen = 0 ;
417+ return false;
418+
419+ case 0 :
420+ assert (wc == 0 );
421+
422+ ps -> str = NULL ;
423+ ps -> maxLen = 0 ;
424+ ps -> ch = wc ;
425+ return true;
426+
427+ default :
428+ ps -> str += len ;
429+ ps -> maxLen -= len ;
430+ ps -> ch = wc ;
431+ }
432+ return true;
433+ #else
434+ const size_t len = 1 ;
435+ ps -> ch = * ps -> str ;
436+ if (ps -> ch == 0 ) {
437+ ps -> str = NULL ;
438+ ps -> maxLen = 0 ;
439+ } else {
440+ ps -> str += len ;
441+ ps -> maxLen -= len ;
442+ }
443+ return true;
444+ #endif
445+ }
446+
447+ int String_lineBreakWidth (const char * * str , size_t maxLen , int maxWidth , char separator ) {
448+ assert (* str || maxLen == 0 );
449+
450+ // The caller should ensure (maxWidth >= 0).
451+ // It's possible for a Unicode string to occupy 0 terminal columns, so this
452+ // function allows (maxWidth == 0).
453+ if (maxWidth < 0 )
454+ maxWidth = INT_MAX ;
455+
456+ #ifdef HAVE_LIBNCURSESW
457+ // If the character takes zero columns, include the character in the
458+ // substring if the working encoding is UTF-8, and ignore it otherwise.
459+ // In Unicode, combining characters are always placed after the base
460+ // character, but some legacy 8-bit encodings instead place combining
461+ // characters before the base character.
462+ const bool isUnicode = CRT_utf8 ;
463+ #else
464+ const bool isUnicode = false;
465+ #endif
466+
467+ int totalWidth = 0 ;
468+
469+ MBStringDecoderState state = {0 };
470+ state .str = * str ;
471+ state .maxLen = maxLen ;
472+
473+ bool inSpaces = true;
474+ const char * breakPos = NULL ;
475+ int breakWidth = 0 ;
476+
477+ while (totalWidth < maxWidth || isUnicode ) {
478+ assert (totalWidth <= maxWidth );
479+
480+ if (!String_decodeNextWChar (& state ))
481+ break ;
482+ if (state .ch == 0 )
483+ break ;
484+
485+ if (state .ch == ' ' && separator == ' ' && !inSpaces ) {
486+ inSpaces = true;
487+ breakPos = * str ;
488+ breakWidth = totalWidth ;
489+ }
490+
491+ #ifdef HAVE_LIBNCURSESW
492+ int cw = wcwidth ((wchar_t )state .ch );
493+ if (cw < 0 ) {
494+ // This function should not be used with string containing unprintable
495+ // characters. Tolerate them on release build, however.
496+ assert (cw >= 0 );
497+ break ;
498+ }
499+ #else
500+ assert (isprint (state .ch ));
501+ const int cw = 1 ;
502+ #endif
503+
504+ if (cw > maxWidth - totalWidth ) {
505+ // This character cannot fit the line with the given maxWidth.
506+ if (breakPos ) {
507+ // Rewind the scanning state to the last found separator.
508+ totalWidth = breakWidth ;
509+ * str = breakPos ;
510+ }
511+ break ;
512+ }
513+
514+ if (cw <= 0 && !isUnicode )
515+ continue ;
516+
517+ totalWidth += cw ;
518+
519+ // (*str - start) will represent the length of the substring bounded
520+ // by the width limit.
521+ * str = state .str ;
522+
523+ if (state .ch != ' ' )
524+ inSpaces = false;
525+
526+ #ifdef HAVE_LIBNCURSESW
527+ bool isSeparator = state .ch == (wint_t )separator ;
528+ #else
529+ bool isSeparator = state .ch == (int )separator ;
530+ #endif
531+ if (isSeparator && separator != ' ' ) {
532+ breakPos = * str ;
533+ breakWidth = totalWidth ;
534+ }
535+ }
536+
537+ return totalWidth ;
538+ }
539+
540+ int String_mbswidth (const char * * str , size_t maxLen , int maxWidth ) {
541+ #ifdef HAVE_LIBNCURSESW
542+ return String_lineBreakWidth (str , maxLen , maxWidth , '\0' );
543+ #else
544+ assert (* str || maxLen == 0 );
545+
546+ if (maxWidth < 0 )
547+ maxWidth = INT_MAX ;
548+
549+ maxLen = MINIMUM ((size_t )maxWidth , maxLen );
550+ size_t len = strnlen (* str , maxLen );
551+ * str += len ;
552+ return (int )len ;
553+ #endif
554+ }
555+
262556int xAsprintf (char * * strp , const char * fmt , ...) {
263557 * strp = NULL ;
264558
0 commit comments