htop-dev · Explorer09 · May 20, 2026 · May 20, 2026 · BenBE · May 8, 2026
diff --git a/Action.c b/Action.c
@@ -432,7 +432,8 @@ Htop_Reaction Action_setScreenTab(State* st, int x) {
    int rem = x - SCREEN_TAB_MARGIN_LEFT;
    for (unsigned int i = 0; i < settings->nScreens; i++) {
       const char* tab = settings->screens[i]->heading;
-      int width = rem >= bracketWidth ? (int)strnlen(tab, rem - bracketWidth + 1) : 0;
+      const char* ptr = tab;
+      int width = rem >= bracketWidth ? String_mbswidth(&ptr, SIZE_MAX, rem - bracketWidth + 1) : 0;
       if (width >= rem - bracketWidth + 1) {
          settings->ssIndex = i;
          Htop_Reaction reaction = HTOP_UPDATE_PANELHDR | HTOP_REFRESH | HTOP_REDRAW_BAR;

diff --git a/ScreenManager.c b/ScreenManager.c
@@ -177,9 +177,10 @@ static inline bool drawTab(const int* y, int* x, int l, const char* name, bool c
    (*x)++;
    if (*x >= l)
       return false;
-   int nameWidth = (int)strnlen(name, l - *x);
+   const char* ptr = name;
+   int nameWidth = String_mbswidth(&ptr, (size_t)INT_MAX, l - *x);
    attrset(CRT_colors[cur ? SCREENS_CUR_TEXT : SCREENS_OTH_TEXT]);
-   mvaddnstr(*y, *x, name, nameWidth);
+   mvaddnstr(*y, *x, name, (int)(ptr - name));
    *x += nameWidth;
    if (*x >= l)
       return false;

diff --git a/XUtils.c b/XUtils.c
@@ -10,6 +10,7 @@ in the source distribution for its full text.
 #include "XUtils.h"
 
 #include <assert.h>
+#include <ctype.h> // IWYU pragma: keep
 #include <errno.h>
 #include <fcntl.h>
 #include <limits.h>
@@ -259,6 +260,297 @@ size_t strnlen(const char* str, size_t maxLen) {
 }
 #endif
 
+#ifdef HAVE_LIBNCURSESW
+static void String_encodeWChar(WCharEncoderState* ps, wchar_t wc) {
+   assert(!ps->buf || ps->pos < ps->size);
+
+   char tempBuf[MB_LEN_MAX];
+
+   // This function will null terminate the string only upon a call
+   // with (wc == 0). It might take more than a single NUL byte to
+   // terminate a string when using the C multibyte functions and a
+   // non-Unicode encoding, thus this function won't support truncation
+   // of a string. The caller must provide the right size in ps->size
+   // if ps->buf is not NULL.
+   size_t len = wcrtomb(tempBuf, wc, &ps->mbState);
+   assert(len != 0);
+   if (len == (size_t)-1) {
+      assert(len != (size_t)-1);
+      fail();
+   }
+   if (ps->buf) {
+      if (len > ps->size - ps->pos) {
+         fail();
+      }
+      memcpy((char*)ps->buf + ps->pos, tempBuf, len);
+   }
+   ps->pos += len;
+}
+#else
+static void String_encodeWChar(WCharEncoderState* ps, int c) {
+   assert(!ps->buf || ps->pos < ps->size);
+
+   char* buf = ps->buf;
+   if (buf)
+      buf[ps->pos] = (char)c;
+
+   ps->pos += 1;
+}
+#endif
+
+void EncodePrintableString(WCharEncoderState* ps, const char* src, size_t maxLen, EncodeWChar encodeWChar) {
+   assert(src || maxLen == 0);
+
+   size_t pos = 0;
+   bool wasReplaced = false;
+
+#ifdef HAVE_LIBNCURSESW
+   const wchar_t replacementChar = CRT_utf8 ? L'\xFFFD' : L'?';
+   wchar_t ch;
+
+   mbstate_t decState = {0};
+#else
+   const char replacementChar = '?';
+   char ch;
+#endif
+
+   do {
+      size_t len = 0;
+      bool shouldReplace = false;
+      ch = 0;
+
+      if (pos < maxLen) {
+         // Read the next character from the byte sequence
+#ifdef HAVE_LIBNCURSESW
+         mbstate_t newState;
+         memcpy(&newState, &decState, sizeof(newState));
+         len = mbrtowc(&ch, &src[pos], maxLen - pos, &newState);
+
+         assert(len != 0 || ch == 0);
+         switch (len) {
+         case (size_t)-2:
+            errno = EILSEQ;
+            shouldReplace = true;
+            len = maxLen - pos;
+            break;
+
+         case (size_t)-1:
+            shouldReplace = true;
+            len = 1;
+            break;
+
+         default:
+            memcpy(&decState, &newState, sizeof(decState));
+         }
+#else
+         len = 1;
+         ch = src[pos];
+#endif
+      }
+
+      pos += len;
+
+      // Filter unprintable characters
+      if (!shouldReplace && ch != 0) {
+#ifdef HAVE_LIBNCURSESW
+         shouldReplace = !iswprint(ch);
+#else
+         shouldReplace = !isprint((unsigned char)ch);
+#endif
+      }
+
+      if (shouldReplace) {
+         ch = replacementChar;
+         if (wasReplaced)
+            continue;
+      }
+      wasReplaced = shouldReplace;
+
+      encodeWChar(ps, ch);
+   } while (ch != 0);
+}
+
+char* String_makePrintable(const char* str, size_t maxLen) {
+   WCharEncoderState encState = {0};
+
+   EncodePrintableString(&encState, str, maxLen, String_encodeWChar);
+   size_t size = encState.pos;
+   assert(size > 0);
+
+   memset(&encState, 0, sizeof(encState));
+   char* buf = xMalloc(size);
+   encState.size = size;
+   encState.buf = buf;
+   EncodePrintableString(&encState, str, maxLen, String_encodeWChar);
+   assert(encState.pos == size);
+
+   return buf;
+}
+
+bool MBStringDecoder_nextWChar(MBStringDecoder* decoder) {
+   if (!decoder->str || decoder->maxLen == 0)
+      return false;
+
+   // If the previous call of this function encounters an invalid sequence,
+   // do not continue (because the "mbState" object for mbrtowc() is
+   // undefined). The caller is supposed to reset the state.
+#ifdef HAVE_LIBNCURSESW
+   if (decoder->ch == WEOF)
+      return false;
+#endif
+
+#ifdef HAVE_LIBNCURSESW
+   wchar_t ch;
+   size_t len = mbrtowc(&ch, decoder->str, decoder->maxLen, &decoder->mbState);
+
+   // These assertions ensure the mbrtowc() implementation is correct
+   assert(len == 0 || len >= (size_t)-2 || ch != 0);
+   assert(len != 0 || ch == 0);
+
+   switch (len) {
+   case (size_t)-1:
+      // Invalid sequence. decoder->str remains at the position where
+      // the first byte of the invalid sequence is found.
+      decoder->ch = WEOF;
+      return false;
+
+   case (size_t)-2:
+      // Incomplete sequence
+      decoder->str += decoder->maxLen;
+      decoder->maxLen = 0;
+      return false;
+
+   case 0:
+      // End of string. This assignment is an optimization hint.
+      ch = 0;
+   }
+#else
+   char ch = *decoder->str;
+   const size_t len = 1;
+#endif
+
+   if (ch == 0) {
+      // Setting "str" to NULL prevents subsequent calls from reading
+      // out of bounds.
+      decoder->str = NULL;
+      decoder->maxLen = 0;
+   } else {
+      decoder->str += len;
+      decoder->maxLen -= len;
+   }
+   decoder->ch = ch;
+   return true;
+}
+
+int String_lineBreakWidth(const char** str, size_t maxLen, int maxWidth, char separator) {
+   assert(*str || maxLen == 0);
+
+   // The caller should ensure (maxWidth >= 0).
+   // It's possible for a Unicode string to occupy 0 terminal columns, so this
+   // function allows (maxWidth == 0).
+   if (maxWidth < 0)
+      maxWidth = INT_MAX;
+
+#ifdef HAVE_LIBNCURSESW
+   // If the character takes zero columns, include the character in the
+   // substring if the working encoding is UTF-8, and ignore it otherwise.
+   // In Unicode, combining characters are always placed after the base
+   // character, but some legacy 8-bit encodings instead place combining
+   // characters before the base character.
+   const bool isUnicode = CRT_utf8;
+#else
+   const bool isUnicode = false;
+#endif
+
+   int totalWidth = 0;
+
+   MBStringDecoder decoder = {0};
+   decoder.str = *str;
+   decoder.maxLen = maxLen;
+
+   bool inSpaces = true;
+   const char* breakPos = NULL;
+   int breakWidth = 0;
+
+   while (totalWidth < maxWidth || isUnicode) {
+      assert(totalWidth <= maxWidth);
+
+      if (!MBStringDecoder_nextWChar(&decoder))
+         break;
+      if (decoder.ch == 0)
+         break;
+
+      if (decoder.ch == ' ' && separator == ' ' && !inSpaces) {
+         inSpaces = true;
+         breakPos = *str;
+         breakWidth = totalWidth;
+      }
+
+#ifdef HAVE_LIBNCURSESW
+      int cw = wcwidth((wchar_t)decoder.ch);
+      if (cw < 0) {
+         // This function should not be used with string containing unprintable
+         // characters. Tolerate them on release build, however.
+         assert(cw >= 0);
+         break;
+      }
+#else
+      assert(isprint(decoder.ch));
+      const int cw = 1;
+#endif
+
+      if (cw > maxWidth - totalWidth) {
+         // This character cannot fit the line with the given maxWidth.
+         if (breakPos) {
+            // Rewind the scanning state to the last found separator.
+            totalWidth = breakWidth;
+            *str = breakPos;
+         }
+         break;
+      }
+
+      if (cw <= 0 && !isUnicode)
+         continue;
+
+      totalWidth += cw;
+
+      // (*str - start) will represent the length of the substring bounded
+      // by the width limit.
+      *str = decoder.str;
+
+      if (decoder.ch != ' ')
+         inSpaces = false;
+
+#ifdef HAVE_LIBNCURSESW
+      bool isSeparator = decoder.ch == (wint_t)separator;
+#else
+      bool isSeparator = decoder.ch == (int)separator;
+#endif
+      if (isSeparator && separator != ' ') {
+         breakPos = *str;
+         breakWidth = totalWidth;
+      }
+   }
+
+   return totalWidth;
+}
+
+int String_mbswidth(const char** str, size_t maxLen, int maxWidth) {
+#ifdef HAVE_LIBNCURSESW
+   return String_lineBreakWidth(str, maxLen, maxWidth, '\0');
+#else
+   assert(*str || maxLen == 0);
+
+   if (maxWidth < 0)
+      maxWidth = INT_MAX;
+
+   maxLen = MINIMUM((size_t)maxWidth, maxLen);
+   size_t len = strnlen(*str, maxLen);
+   *str += len;
+   return (int)len;
+#endif
+}
+
 int xAsprintf(char** strp, const char* fmt, ...) {
    *strp = NULL;
 

diff --git a/XUtils.h b/XUtils.h
@@ -22,7 +22,36 @@ in the source distribution for its full text.
 #include <string.h> // IWYU pragma: keep
 
 #include "Macros.h"
+#include "ProvideCurses.h"
+
+
+typedef struct WCharEncoderState_ {
+   size_t pos;
+   size_t size;
+   void* buf;
+   mbstate_t mbState;
+} WCharEncoderState;
+
+/* Object for reading wide characters from a multibyte string.
+   "str" and "maxLen" are input but will be modified during process.
+   "str" will be set to NULL when the decoding is finished with the
+   terminating L'\0' character. */
+typedef struct MBStringDecoder_ {
+   const char* str;
+   size_t maxLen;
+#ifdef HAVE_LIBNCURSESW
+   wint_t ch;
+   mbstate_t mbState;
+#else
+   int ch;
+#endif
+} MBStringDecoder;
 
+#ifdef HAVE_LIBNCURSESW
+typedef ATTR_NONNULL void (*EncodeWChar)(WCharEncoderState* ps, wchar_t wc);
+#else
+typedef ATTR_NONNULL void (*EncodeWChar)(WCharEncoderState* ps, int c);
+#endif
 
 ATTR_NORETURN
 void fail(void);
@@ -108,6 +137,27 @@ size_t String_safeStrncpy(char* restrict dest, const char* restrict src, size_t
 size_t strnlen(const char* str, size_t maxLen);
 #endif
 
+ATTR_NONNULL_N(1, 4) ATTR_ACCESS2_W(1) ATTR_ACCESS3_R(2, 3)
+void EncodePrintableString(WCharEncoderState* ps, const char* src, size_t maxLen, EncodeWChar encodeWChar);
+
+ATTR_RETNONNULL ATTR_MALLOC ATTR_ACCESS3_R(1, 2)
+char* String_makePrintable(const char* str, size_t maxLen);
+
+ATTR_NONNULL
+bool MBStringDecoder_nextWChar(MBStringDecoder* ps);
+
+ATTR_NONNULL ATTR_ACCESS2_RW(1)
+int String_lineBreakWidth(const char** str, size_t maxLen, int maxWidth, char separator);
+
+/* Count the number of terminal columns needed to display a string, or
+   count how many characters from the string that can be displayed
+   with the column limit ("maxWidth").
+   "maxLen" is in bytes.
+   maxLen = SIZE_MAX to take the whole string.
+   maxWidth = INT_MAX for no terminal column limit. */
+ATTR_NONNULL ATTR_ACCESS2_RW(1)
+int String_mbswidth(const char** str, size_t maxLen, int maxWidth);
+
 ATTR_FORMAT(printf, 2, 3) ATTR_NONNULL_N(1, 2)
 int xAsprintf(char** strp, const char* fmt, ...);