diff --git a/Action.c b/Action.c index d630cb80d..846e1a15b 100644 --- a/Action.c +++ b/Action.c @@ -432,7 +432,8 @@ Htop_Reaction Action_setScreenTab(State* st, int x) { int rem = x - SCREEN_TAB_MARGIN_LEFT; for (unsigned int i = 0; i < settings->nScreens; i++) { const char* tab = settings->screens[i]->heading; - int width = rem >= bracketWidth ? (int)strnlen(tab, rem - bracketWidth + 1) : 0; + const char* ptr = tab; + int width = rem >= bracketWidth ? String_mbswidth(&ptr, SIZE_MAX, rem - bracketWidth + 1) : 0; if (width >= rem - bracketWidth + 1) { settings->ssIndex = i; Htop_Reaction reaction = HTOP_UPDATE_PANELHDR | HTOP_REFRESH | HTOP_REDRAW_BAR; diff --git a/ScreenManager.c b/ScreenManager.c index f79596c38..356207294 100644 --- a/ScreenManager.c +++ b/ScreenManager.c @@ -177,9 +177,10 @@ static inline bool drawTab(const int* y, int* x, int l, const char* name, bool c (*x)++; if (*x >= l) return false; - int nameWidth = (int)strnlen(name, l - *x); + const char* ptr = name; + int nameWidth = String_mbswidth(&ptr, (size_t)INT_MAX, l - *x); attrset(CRT_colors[cur ? SCREENS_CUR_TEXT : SCREENS_OTH_TEXT]); - mvaddnstr(*y, *x, name, nameWidth); + mvaddnstr(*y, *x, name, (int)(ptr - name)); *x += nameWidth; if (*x >= l) return false; diff --git a/XUtils.c b/XUtils.c index 59d006391..f87042015 100644 --- a/XUtils.c +++ b/XUtils.c @@ -10,6 +10,7 @@ in the source distribution for its full text. #include "XUtils.h" #include +#include // IWYU pragma: keep #include #include #include @@ -259,6 +260,297 @@ size_t strnlen(const char* str, size_t maxLen) { } #endif +#ifdef HAVE_LIBNCURSESW +static void String_encodeWChar(WCharEncoderState* ps, wchar_t wc) { + assert(!ps->buf || ps->pos < ps->size); + + char tempBuf[MB_LEN_MAX]; + + // This function will null terminate the string only upon a call + // with (wc == 0). It might take more than a single NUL byte to + // terminate a string when using the C multibyte functions and a + // non-Unicode encoding, thus this function won't support truncation + // of a string. The caller must provide the right size in ps->size + // if ps->buf is not NULL. + size_t len = wcrtomb(tempBuf, wc, &ps->mbState); + assert(len != 0); + if (len == (size_t)-1) { + assert(len != (size_t)-1); + fail(); + } + if (ps->buf) { + if (len > ps->size - ps->pos) { + fail(); + } + memcpy((char*)ps->buf + ps->pos, tempBuf, len); + } + ps->pos += len; +} +#else +static void String_encodeWChar(WCharEncoderState* ps, int c) { + assert(!ps->buf || ps->pos < ps->size); + + char* buf = ps->buf; + if (buf) + buf[ps->pos] = (char)c; + + ps->pos += 1; +} +#endif + +void EncodePrintableString(WCharEncoderState* ps, const char* src, size_t maxLen, EncodeWChar encodeWChar) { + assert(src || maxLen == 0); + + size_t pos = 0; + bool wasReplaced = false; + +#ifdef HAVE_LIBNCURSESW + const wchar_t replacementChar = CRT_utf8 ? L'\xFFFD' : L'?'; + wchar_t ch; + + mbstate_t decState = {0}; +#else + const char replacementChar = '?'; + char ch; +#endif + + do { + size_t len = 0; + bool shouldReplace = false; + ch = 0; + + if (pos < maxLen) { + // Read the next character from the byte sequence +#ifdef HAVE_LIBNCURSESW + mbstate_t newState; + memcpy(&newState, &decState, sizeof(newState)); + len = mbrtowc(&ch, &src[pos], maxLen - pos, &newState); + + assert(len != 0 || ch == 0); + switch (len) { + case (size_t)-2: + errno = EILSEQ; + shouldReplace = true; + len = maxLen - pos; + break; + + case (size_t)-1: + shouldReplace = true; + len = 1; + break; + + default: + memcpy(&decState, &newState, sizeof(decState)); + } +#else + len = 1; + ch = src[pos]; +#endif + } + + pos += len; + + // Filter unprintable characters + if (!shouldReplace && ch != 0) { +#ifdef HAVE_LIBNCURSESW + shouldReplace = !iswprint(ch); +#else + shouldReplace = !isprint((unsigned char)ch); +#endif + } + + if (shouldReplace) { + ch = replacementChar; + if (wasReplaced) + continue; + } + wasReplaced = shouldReplace; + + encodeWChar(ps, ch); + } while (ch != 0); +} + +char* String_makePrintable(const char* str, size_t maxLen) { + WCharEncoderState encState = {0}; + + EncodePrintableString(&encState, str, maxLen, String_encodeWChar); + size_t size = encState.pos; + assert(size > 0); + + memset(&encState, 0, sizeof(encState)); + char* buf = xMalloc(size); + encState.size = size; + encState.buf = buf; + EncodePrintableString(&encState, str, maxLen, String_encodeWChar); + assert(encState.pos == size); + + return buf; +} + +bool MBStringDecoder_nextWChar(MBStringDecoder* decoder) { + if (!decoder->str || decoder->maxLen == 0) + return false; + + // If the previous call of this function encounters an invalid sequence, + // do not continue (because the "mbState" object for mbrtowc() is + // undefined). The caller is supposed to reset the state. +#ifdef HAVE_LIBNCURSESW + if (decoder->ch == WEOF) + return false; +#endif + +#ifdef HAVE_LIBNCURSESW + wchar_t ch; + size_t len = mbrtowc(&ch, decoder->str, decoder->maxLen, &decoder->mbState); + + // These assertions ensure the mbrtowc() implementation is correct + assert(len == 0 || len >= (size_t)-2 || ch != 0); + assert(len != 0 || ch == 0); + + switch (len) { + case (size_t)-1: + // Invalid sequence. decoder->str remains at the position where + // the first byte of the invalid sequence is found. + decoder->ch = WEOF; + return false; + + case (size_t)-2: + // Incomplete sequence + decoder->str += decoder->maxLen; + decoder->maxLen = 0; + return false; + + case 0: + // End of string. This assignment is an optimization hint. + ch = 0; + } +#else + char ch = *decoder->str; + const size_t len = 1; +#endif + + if (ch == 0) { + // Setting "str" to NULL prevents subsequent calls from reading + // out of bounds. + decoder->str = NULL; + decoder->maxLen = 0; + } else { + decoder->str += len; + decoder->maxLen -= len; + } + decoder->ch = ch; + return true; +} + +int String_lineBreakWidth(const char** str, size_t maxLen, int maxWidth, char separator) { + assert(*str || maxLen == 0); + + // The caller should ensure (maxWidth >= 0). + // It's possible for a Unicode string to occupy 0 terminal columns, so this + // function allows (maxWidth == 0). + if (maxWidth < 0) + maxWidth = INT_MAX; + +#ifdef HAVE_LIBNCURSESW + // If the character takes zero columns, include the character in the + // substring if the working encoding is UTF-8, and ignore it otherwise. + // In Unicode, combining characters are always placed after the base + // character, but some legacy 8-bit encodings instead place combining + // characters before the base character. + const bool isUnicode = CRT_utf8; +#else + const bool isUnicode = false; +#endif + + int totalWidth = 0; + + MBStringDecoder decoder = {0}; + decoder.str = *str; + decoder.maxLen = maxLen; + + bool inSpaces = true; + const char* breakPos = NULL; + int breakWidth = 0; + + while (totalWidth < maxWidth || isUnicode) { + assert(totalWidth <= maxWidth); + + if (!MBStringDecoder_nextWChar(&decoder)) + break; + if (decoder.ch == 0) + break; + + if (decoder.ch == ' ' && separator == ' ' && !inSpaces) { + inSpaces = true; + breakPos = *str; + breakWidth = totalWidth; + } + +#ifdef HAVE_LIBNCURSESW + int cw = wcwidth((wchar_t)decoder.ch); + if (cw < 0) { + // This function should not be used with string containing unprintable + // characters. Tolerate them on release build, however. + assert(cw >= 0); + break; + } +#else + assert(isprint(decoder.ch)); + const int cw = 1; +#endif + + if (cw > maxWidth - totalWidth) { + // This character cannot fit the line with the given maxWidth. + if (breakPos) { + // Rewind the scanning state to the last found separator. + totalWidth = breakWidth; + *str = breakPos; + } + break; + } + + if (cw <= 0 && !isUnicode) + continue; + + totalWidth += cw; + + // (*str - start) will represent the length of the substring bounded + // by the width limit. + *str = decoder.str; + + if (decoder.ch != ' ') + inSpaces = false; + +#ifdef HAVE_LIBNCURSESW + bool isSeparator = decoder.ch == (wint_t)separator; +#else + bool isSeparator = decoder.ch == (int)separator; +#endif + if (isSeparator && separator != ' ') { + breakPos = *str; + breakWidth = totalWidth; + } + } + + return totalWidth; +} + +int String_mbswidth(const char** str, size_t maxLen, int maxWidth) { +#ifdef HAVE_LIBNCURSESW + return String_lineBreakWidth(str, maxLen, maxWidth, '\0'); +#else + assert(*str || maxLen == 0); + + if (maxWidth < 0) + maxWidth = INT_MAX; + + maxLen = MINIMUM((size_t)maxWidth, maxLen); + size_t len = strnlen(*str, maxLen); + *str += len; + return (int)len; +#endif +} + int xAsprintf(char** strp, const char* fmt, ...) { *strp = NULL; diff --git a/XUtils.h b/XUtils.h index 57bb60adf..08d15ab00 100644 --- a/XUtils.h +++ b/XUtils.h @@ -22,7 +22,36 @@ in the source distribution for its full text. #include // IWYU pragma: keep #include "Macros.h" +#include "ProvideCurses.h" + + +typedef struct WCharEncoderState_ { + size_t pos; + size_t size; + void* buf; + mbstate_t mbState; +} WCharEncoderState; + +/* Object for reading wide characters from a multibyte string. + "str" and "maxLen" are input but will be modified during process. + "str" will be set to NULL when the decoding is finished with the + terminating L'\0' character. */ +typedef struct MBStringDecoder_ { + const char* str; + size_t maxLen; +#ifdef HAVE_LIBNCURSESW + wint_t ch; + mbstate_t mbState; +#else + int ch; +#endif +} MBStringDecoder; +#ifdef HAVE_LIBNCURSESW +typedef ATTR_NONNULL void (*EncodeWChar)(WCharEncoderState* ps, wchar_t wc); +#else +typedef ATTR_NONNULL void (*EncodeWChar)(WCharEncoderState* ps, int c); +#endif ATTR_NORETURN void fail(void); @@ -108,6 +137,27 @@ size_t String_safeStrncpy(char* restrict dest, const char* restrict src, size_t size_t strnlen(const char* str, size_t maxLen); #endif +ATTR_NONNULL_N(1, 4) ATTR_ACCESS2_W(1) ATTR_ACCESS3_R(2, 3) +void EncodePrintableString(WCharEncoderState* ps, const char* src, size_t maxLen, EncodeWChar encodeWChar); + +ATTR_RETNONNULL ATTR_MALLOC ATTR_ACCESS3_R(1, 2) +char* String_makePrintable(const char* str, size_t maxLen); + +ATTR_NONNULL +bool MBStringDecoder_nextWChar(MBStringDecoder* ps); + +ATTR_NONNULL ATTR_ACCESS2_RW(1) +int String_lineBreakWidth(const char** str, size_t maxLen, int maxWidth, char separator); + +/* Count the number of terminal columns needed to display a string, or + count how many characters from the string that can be displayed + with the column limit ("maxWidth"). + "maxLen" is in bytes. + maxLen = SIZE_MAX to take the whole string. + maxWidth = INT_MAX for no terminal column limit. */ +ATTR_NONNULL ATTR_ACCESS2_RW(1) +int String_mbswidth(const char** str, size_t maxLen, int maxWidth); + ATTR_FORMAT(printf, 2, 3) ATTR_NONNULL_N(1, 2) int xAsprintf(char** strp, const char* fmt, ...);