@@ -10,6 +10,7 @@ in the source distribution for its full text.
1010#include "XUtils.h"
1111
1212#include <assert.h>
13+ #include <ctype.h> // IWYU pragma: keep
1314#include <errno.h>
1415#include <fcntl.h>
1516#include <limits.h>
@@ -259,6 +260,300 @@ size_t strnlen(const char* str, size_t maxLen) {
259260}
260261#endif
261262
263+ #ifdef HAVE_LIBNCURSESW
264+ static void String_encodeWChar (WCharEncoderState * ps , wchar_t wc ) {
265+ assert (!ps -> buf || ps -> pos < ps -> size );
266+
267+ char tempBuf [MB_LEN_MAX ];
268+
269+ // This function will null terminate the string only upon a call
270+ // with (wc == 0). It might take more than a single NUL byte to
271+ // terminate a string when using the C multibyte functions and a
272+ // non-Unicode encoding, thus this function won't support truncation
273+ // of a string. The caller must provide the right size in ps->size
274+ // if ps->buf is not NULL.
275+ size_t len = wcrtomb (tempBuf , wc , & ps -> mbState );
276+ assert (len != 0 );
277+ if (len == (size_t )-1 ) {
278+ assert (len != (size_t )-1 );
279+ fail ();
280+ }
281+ if (ps -> buf ) {
282+ if (len > ps -> size - ps -> pos ) {
283+ fail ();
284+ }
285+ memcpy ((char * )ps -> buf + ps -> pos , tempBuf , len );
286+ }
287+ ps -> pos += len ;
288+ }
289+ #else
290+ static void String_encodeWChar (WCharEncoderState * ps , int c ) {
291+ assert (!ps -> buf || ps -> pos < ps -> size );
292+
293+ char * buf = ps -> buf ;
294+ if (buf )
295+ buf [ps -> pos ] = (char )c ;
296+
297+ ps -> pos += 1 ;
298+ }
299+ #endif
300+
301+ void EncodePrintableString (WCharEncoderState * ps , const char * src , size_t maxLen , EncodeWChar encodeWChar ) {
302+ assert (src || maxLen == 0 );
303+
304+ size_t pos = 0 ;
305+ bool wasReplaced = false;
306+
307+ #ifdef HAVE_LIBNCURSESW
308+ const wchar_t replacementChar = CRT_utf8 ? L'\xFFFD' : L'?' ;
309+ wchar_t ch ;
310+
311+ mbstate_t decState = {0 };
312+ #else
313+ const char replacementChar = '?' ;
314+ char ch ;
315+ #endif
316+
317+ do {
318+ size_t len = 0 ;
319+ bool shouldReplace = false;
320+ ch = 0 ;
321+
322+ if (pos < maxLen ) {
323+ // Read the next character from the byte sequence
324+ #ifdef HAVE_LIBNCURSESW
325+ mbstate_t newState ;
326+ memcpy (& newState , & decState , sizeof (newState ));
327+ len = mbrtowc (& ch , & src [pos ], maxLen - pos , & newState );
328+
329+ assert (len != 0 || ch == 0 );
330+ switch (len ) {
331+ case (size_t )-2 :
332+ errno = EILSEQ ;
333+ shouldReplace = true;
334+ len = maxLen - pos ;
335+ break ;
336+
337+ case (size_t )-1 :
338+ shouldReplace = true;
339+ len = 1 ;
340+ break ;
341+
342+ default :
343+ memcpy (& decState , & newState , sizeof (decState ));
344+ }
345+ #else
346+ len = 1 ;
347+ ch = src [pos ];
348+ #endif
349+ }
350+
351+ pos += len ;
352+
353+ // Filter unprintable characters
354+ if (!shouldReplace && ch != 0 ) {
355+ #ifdef HAVE_LIBNCURSESW
356+ shouldReplace = !iswprint (ch );
357+ #else
358+ shouldReplace = !isprint ((unsigned char )ch );
359+ #endif
360+ }
361+
362+ if (shouldReplace ) {
363+ ch = replacementChar ;
364+ if (wasReplaced )
365+ continue ;
366+ }
367+ wasReplaced = shouldReplace ;
368+
369+ encodeWChar (ps , ch );
370+ } while (ch != 0 );
371+ }
372+
373+ char * String_makePrintable (const char * str , size_t maxLen ) {
374+ WCharEncoderState encState = {0 };
375+
376+ EncodePrintableString (& encState , str , maxLen , String_encodeWChar );
377+ size_t size = encState .pos ;
378+ assert (size > 0 );
379+
380+ memset (& encState , 0 , sizeof (encState ));
381+ char * buf = xMalloc (size );
382+ encState .size = size ;
383+ encState .buf = buf ;
384+ EncodePrintableString (& encState , str , maxLen , String_encodeWChar );
385+ assert (encState .pos == size );
386+
387+ return buf ;
388+ }
389+
390+ bool String_decodeNextWChar (MBStringDecoderState * ps ) {
391+ if (!ps -> str || ps -> maxLen == 0 )
392+ return false;
393+
394+ // If the previous call of this function encounters an invalid sequence,
395+ // do not continue (because the "mbState" object for mbrtowc() is
396+ // undefined). The caller is supposed to reset the state.
397+ #ifdef HAVE_LIBNCURSESW
398+ bool isStateDefined = ps -> ch != WEOF ;
399+ #else
400+ bool isStateDefined = ps -> ch != EOF ;
401+ #endif
402+ if (!isStateDefined )
403+ return false;
404+
405+ #ifdef HAVE_LIBNCURSESW
406+ wchar_t wc ;
407+ size_t len = mbrtowc (& wc , ps -> str , ps -> maxLen , & ps -> mbState );
408+ switch (len ) {
409+ case (size_t )-1 :
410+ // Invalid sequence
411+ ps -> ch = WEOF ;
412+ return false;
413+
414+ case (size_t )-2 :
415+ // Incomplete sequence
416+ ps -> str += ps -> maxLen ;
417+ ps -> maxLen = 0 ;
418+ return false;
419+
420+ case 0 :
421+ assert (wc == 0 );
422+
423+ ps -> str = NULL ;
424+ ps -> maxLen = 0 ;
425+ ps -> ch = wc ;
426+ return true;
427+
428+ default :
429+ ps -> str += len ;
430+ ps -> maxLen -= len ;
431+ ps -> ch = wc ;
432+ }
433+ return true;
434+ #else
435+ const size_t len = 1 ;
436+ ps -> ch = * ps -> str ;
437+ if (ps -> ch == 0 ) {
438+ ps -> str = NULL ;
439+ ps -> maxLen = 0 ;
440+ } else {
441+ ps -> str += len ;
442+ ps -> maxLen -= len ;
443+ }
444+ return true;
445+ #endif
446+ }
447+
448+ int String_lineBreakWidth (const char * * str , size_t maxLen , int maxWidth , char separator ) {
449+ assert (* str || maxLen == 0 );
450+
451+ // The caller should ensure (maxWidth >= 0).
452+ // It's possible for a Unicode string to occupy 0 terminal columns, so this
453+ // function allows (maxWidth == 0).
454+ if (maxWidth < 0 )
455+ maxWidth = INT_MAX ;
456+
457+ #ifdef HAVE_LIBNCURSESW
458+ // If the character takes zero columns, include the character in the
459+ // substring if the working encoding is UTF-8, and ignore it otherwise.
460+ // In Unicode, combining characters are always placed after the base
461+ // character, but some legacy 8-bit encodings instead place combining
462+ // characters before the base character.
463+ const bool isUnicode = CRT_utf8 ;
464+ #else
465+ const bool isUnicode = false;
466+ #endif
467+
468+ int totalWidth = 0 ;
469+
470+ MBStringDecoderState state = {0 };
471+ state .str = * str ;
472+ state .maxLen = maxLen ;
473+
474+ bool inSpaces = true;
475+ const char * breakPos = NULL ;
476+ int breakWidth = 0 ;
477+
478+ while (totalWidth < maxWidth || isUnicode ) {
479+ assert (totalWidth <= maxWidth );
480+
481+ if (!String_decodeNextWChar (& state ))
482+ break ;
483+ if (state .ch == 0 )
484+ break ;
485+
486+ if (state .ch == ' ' && separator == ' ' && !inSpaces ) {
487+ inSpaces = true;
488+ breakPos = * str ;
489+ breakWidth = totalWidth ;
490+ }
491+
492+ #ifdef HAVE_LIBNCURSESW
493+ int cw = wcwidth ((wchar_t )state .ch );
494+ if (cw < 0 ) {
495+ // This function should not be used with string containing unprintable
496+ // characters. Tolerate them on release build, however.
497+ assert (cw >= 0 );
498+ break ;
499+ }
500+ #else
501+ assert (isprint (state .ch ));
502+ const int cw = 1 ;
503+ #endif
504+
505+ if (cw > maxWidth - totalWidth ) {
506+ // This character cannot fit the line with the given maxWidth.
507+ if (breakPos ) {
508+ // Rewind the scanning state to the last found separator.
509+ totalWidth = breakWidth ;
510+ * str = breakPos ;
511+ }
512+ break ;
513+ }
514+
515+ if (cw <= 0 && !isUnicode )
516+ continue ;
517+
518+ totalWidth += cw ;
519+
520+ // (*str - start) will represent the length of the substring bounded
521+ // by the width limit.
522+ * str = state .str ;
523+
524+ if (state .ch != ' ' )
525+ inSpaces = false;
526+
527+ #ifdef HAVE_LIBNCURSESW
528+ bool isSeparator = state .ch == (wint_t )separator ;
529+ #else
530+ bool isSeparator = state .ch == (int )separator ;
531+ #endif
532+ if (isSeparator && separator != ' ' ) {
533+ breakPos = * str ;
534+ breakWidth = totalWidth ;
535+ }
536+ }
537+
538+ return totalWidth ;
539+ }
540+
541+ int String_mbswidth (const char * * str , size_t maxLen , int maxWidth ) {
542+ #ifdef HAVE_LIBNCURSESW
543+ return String_lineBreakWidth (str , maxLen , maxWidth , '\0' );
544+ #else
545+ assert (* str || maxLen == 0 );
546+
547+ if (maxWidth < 0 )
548+ maxWidth = INT_MAX ;
549+
550+ maxLen = MINIMUM ((size_t )maxWidth , maxLen );
551+ size_t len = strnlen (* str , maxLen );
552+ * str += len ;
553+ return (int )len ;
554+ #endif
555+ }
556+
262557int xAsprintf (char * * strp , const char * fmt , ...) {
263558 * strp = NULL ;
264559
0 commit comments