@@ -10,6 +10,7 @@ in the source distribution for its full text.
1010#include "XUtils.h"
1111
1212#include <assert.h>
13+ #include <ctype.h> // IWYU pragma: keep
1314#include <errno.h>
1415#include <fcntl.h>
1516#include <limits.h>
@@ -259,6 +260,297 @@ size_t strnlen(const char* str, size_t maxLen) {
259260}
260261#endif
261262
263+ #ifdef HAVE_LIBNCURSESW
264+ static void String_encodeWChar (WCharEncoderState * ps , wchar_t wc ) {
265+ assert (!ps -> buf || ps -> pos < ps -> size );
266+
267+ char tempBuf [MB_LEN_MAX ];
268+
269+ // This function will null terminate the string only upon a call
270+ // with (wc == 0). It might take more than a single NUL byte to
271+ // terminate a string when using the C multibyte functions and a
272+ // non-Unicode encoding, thus this function won't support truncation
273+ // of a string. The caller must provide the right size in ps->size
274+ // if ps->buf is not NULL.
275+ size_t len = wcrtomb (tempBuf , wc , & ps -> mbState );
276+ assert (len != 0 );
277+ if (len == (size_t )-1 ) {
278+ assert (len != (size_t )-1 );
279+ fail ();
280+ }
281+ if (ps -> buf ) {
282+ if (len > ps -> size - ps -> pos ) {
283+ fail ();
284+ }
285+ memcpy ((char * )ps -> buf + ps -> pos , tempBuf , len );
286+ }
287+ ps -> pos += len ;
288+ }
289+ #else
290+ static void String_encodeWChar (WCharEncoderState * ps , int c ) {
291+ assert (!ps -> buf || ps -> pos < ps -> size );
292+
293+ char * buf = ps -> buf ;
294+ if (buf )
295+ buf [ps -> pos ] = (char )c ;
296+
297+ ps -> pos += 1 ;
298+ }
299+ #endif
300+
301+ void EncodePrintableString (WCharEncoderState * ps , const char * src , size_t maxLen , EncodeWChar encodeWChar ) {
302+ assert (src || maxLen == 0 );
303+
304+ size_t pos = 0 ;
305+ bool wasReplaced = false;
306+
307+ #ifdef HAVE_LIBNCURSESW
308+ const wchar_t replacementChar = CRT_utf8 ? L'\xFFFD' : L'?' ;
309+ wchar_t ch ;
310+
311+ mbstate_t decState = {0 };
312+ #else
313+ const char replacementChar = '?' ;
314+ char ch ;
315+ #endif
316+
317+ do {
318+ size_t len = 0 ;
319+ bool shouldReplace = false;
320+ ch = 0 ;
321+
322+ if (pos < maxLen ) {
323+ // Read the next character from the byte sequence
324+ #ifdef HAVE_LIBNCURSESW
325+ mbstate_t newState ;
326+ memcpy (& newState , & decState , sizeof (newState ));
327+ len = mbrtowc (& ch , & src [pos ], maxLen - pos , & newState );
328+
329+ assert (len != 0 || ch == 0 );
330+ switch (len ) {
331+ case (size_t )-2 :
332+ errno = EILSEQ ;
333+ shouldReplace = true;
334+ len = maxLen - pos ;
335+ break ;
336+
337+ case (size_t )-1 :
338+ shouldReplace = true;
339+ len = 1 ;
340+ break ;
341+
342+ default :
343+ memcpy (& decState , & newState , sizeof (decState ));
344+ }
345+ #else
346+ len = 1 ;
347+ ch = src [pos ];
348+ #endif
349+ }
350+
351+ pos += len ;
352+
353+ // Filter unprintable characters
354+ if (!shouldReplace && ch != 0 ) {
355+ #ifdef HAVE_LIBNCURSESW
356+ shouldReplace = !iswprint (ch );
357+ #else
358+ shouldReplace = !isprint ((unsigned char )ch );
359+ #endif
360+ }
361+
362+ if (shouldReplace ) {
363+ ch = replacementChar ;
364+ if (wasReplaced )
365+ continue ;
366+ }
367+ wasReplaced = shouldReplace ;
368+
369+ encodeWChar (ps , ch );
370+ } while (ch != 0 );
371+ }
372+
373+ char * String_makePrintable (const char * str , size_t maxLen ) {
374+ WCharEncoderState encState = {0 };
375+
376+ EncodePrintableString (& encState , str , maxLen , String_encodeWChar );
377+ size_t size = encState .pos ;
378+ assert (size > 0 );
379+
380+ memset (& encState , 0 , sizeof (encState ));
381+ char * buf = xMalloc (size );
382+ encState .size = size ;
383+ encState .buf = buf ;
384+ EncodePrintableString (& encState , str , maxLen , String_encodeWChar );
385+ assert (encState .pos == size );
386+
387+ return buf ;
388+ }
389+
390+ bool MBStringDecoder_nextWChar (MBStringDecoder * decoder ) {
391+ if (!decoder -> str || decoder -> maxLen == 0 )
392+ return false;
393+
394+ // If the previous call of this function encounters an invalid sequence,
395+ // do not continue (because the "mbState" object for mbrtowc() is
396+ // undefined). The caller is supposed to reset the state.
397+ #ifdef HAVE_LIBNCURSESW
398+ if (decoder -> ch == WEOF )
399+ return false;
400+ #endif
401+
402+ #ifdef HAVE_LIBNCURSESW
403+ wchar_t ch ;
404+ size_t len = mbrtowc (& ch , decoder -> str , decoder -> maxLen , & decoder -> mbState );
405+
406+ // These assertions ensure the mbrtowc() implementation is correct
407+ assert (len == 0 || len >= (size_t )-2 || ch != 0 );
408+ assert (len != 0 || ch == 0 );
409+
410+ switch (len ) {
411+ case (size_t )-1 :
412+ // Invalid sequence. decoder->str remains at the position where
413+ // the first byte of the invalid sequence is found.
414+ decoder -> ch = WEOF ;
415+ return false;
416+
417+ case (size_t )-2 :
418+ // Incomplete sequence
419+ decoder -> str += decoder -> maxLen ;
420+ decoder -> maxLen = 0 ;
421+ return false;
422+
423+ case 0 :
424+ // End of string. This assignment is an optimization hint.
425+ ch = 0 ;
426+ }
427+ #else
428+ char ch = * decoder -> str ;
429+ const size_t len = 1 ;
430+ #endif
431+
432+ if (ch == 0 ) {
433+ // Setting "str" to NULL prevents subsequent calls from reading
434+ // out of bounds.
435+ decoder -> str = NULL ;
436+ decoder -> maxLen = 0 ;
437+ } else {
438+ decoder -> str += len ;
439+ decoder -> maxLen -= len ;
440+ }
441+ decoder -> ch = ch ;
442+ return true;
443+ }
444+
445+ int String_lineBreakWidth (const char * * str , size_t maxLen , int maxWidth , char separator ) {
446+ assert (* str || maxLen == 0 );
447+
448+ // The caller should ensure (maxWidth >= 0).
449+ // It's possible for a Unicode string to occupy 0 terminal columns, so this
450+ // function allows (maxWidth == 0).
451+ if (maxWidth < 0 )
452+ maxWidth = INT_MAX ;
453+
454+ #ifdef HAVE_LIBNCURSESW
455+ // If the character takes zero columns, include the character in the
456+ // substring if the working encoding is UTF-8, and ignore it otherwise.
457+ // In Unicode, combining characters are always placed after the base
458+ // character, but some legacy 8-bit encodings instead place combining
459+ // characters before the base character.
460+ const bool isUnicode = CRT_utf8 ;
461+ #else
462+ const bool isUnicode = false;
463+ #endif
464+
465+ int totalWidth = 0 ;
466+
467+ MBStringDecoder decoder = {0 };
468+ decoder .str = * str ;
469+ decoder .maxLen = maxLen ;
470+
471+ bool inSpaces = true;
472+ const char * breakPos = NULL ;
473+ int breakWidth = 0 ;
474+
475+ while (totalWidth < maxWidth || isUnicode ) {
476+ assert (totalWidth <= maxWidth );
477+
478+ if (!MBStringDecoder_nextWChar (& decoder ))
479+ break ;
480+ if (decoder .ch == 0 )
481+ break ;
482+
483+ if (decoder .ch == ' ' && separator == ' ' && !inSpaces ) {
484+ inSpaces = true;
485+ breakPos = * str ;
486+ breakWidth = totalWidth ;
487+ }
488+
489+ #ifdef HAVE_LIBNCURSESW
490+ int cw = wcwidth ((wchar_t )decoder .ch );
491+ if (cw < 0 ) {
492+ // This function should not be used with string containing unprintable
493+ // characters. Tolerate them on release build, however.
494+ assert (cw >= 0 );
495+ break ;
496+ }
497+ #else
498+ assert (isprint (decoder .ch ));
499+ const int cw = 1 ;
500+ #endif
501+
502+ if (cw > maxWidth - totalWidth ) {
503+ // This character cannot fit the line with the given maxWidth.
504+ if (breakPos ) {
505+ // Rewind the scanning state to the last found separator.
506+ totalWidth = breakWidth ;
507+ * str = breakPos ;
508+ }
509+ break ;
510+ }
511+
512+ if (cw <= 0 && !isUnicode )
513+ continue ;
514+
515+ totalWidth += cw ;
516+
517+ // (*str - start) will represent the length of the substring bounded
518+ // by the width limit.
519+ * str = decoder .str ;
520+
521+ if (decoder .ch != ' ' )
522+ inSpaces = false;
523+
524+ #ifdef HAVE_LIBNCURSESW
525+ bool isSeparator = decoder .ch == (wint_t )separator ;
526+ #else
527+ bool isSeparator = decoder .ch == (int )separator ;
528+ #endif
529+ if (isSeparator && separator != ' ' ) {
530+ breakPos = * str ;
531+ breakWidth = totalWidth ;
532+ }
533+ }
534+
535+ return totalWidth ;
536+ }
537+
538+ int String_mbswidth (const char * * str , size_t maxLen , int maxWidth ) {
539+ #ifdef HAVE_LIBNCURSESW
540+ return String_lineBreakWidth (str , maxLen , maxWidth , '\0' );
541+ #else
542+ assert (* str || maxLen == 0 );
543+
544+ if (maxWidth < 0 )
545+ maxWidth = INT_MAX ;
546+
547+ maxLen = MINIMUM ((size_t )maxWidth , maxLen );
548+ size_t len = strnlen (* str , maxLen );
549+ * str += len ;
550+ return (int )len ;
551+ #endif
552+ }
553+
262554int xAsprintf (char * * strp , const char * fmt , ...) {
263555 * strp = NULL ;
264556
0 commit comments