Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Action.c
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,8 @@ Htop_Reaction Action_setScreenTab(State* st, int x) {
int rem = x - SCREEN_TAB_MARGIN_LEFT;
for (unsigned int i = 0; i < settings->nScreens; i++) {
const char* tab = settings->screens[i]->heading;
int width = rem >= bracketWidth ? (int)strnlen(tab, rem - bracketWidth + 1) : 0;
const char* ptr = tab;
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why introduce this extra variable? Was this patch written by AI? ;-)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically no AI is used :)

The ptr is a temporary variable holding a pointer because of the API design I explained in https://github.com/htop-dev/htop/pull/1642/changes/BASE..6b82bcef3b8f7aecd9be6c83d66e84af152a1d58#r3211053429

int width = rem >= bracketWidth ? String_mbswidth(&ptr, SIZE_MAX, rem - bracketWidth + 1) : 0;
if (width >= rem - bracketWidth + 1) {
settings->ssIndex = i;
Htop_Reaction reaction = HTOP_UPDATE_PANELHDR | HTOP_REFRESH | HTOP_REDRAW_BAR;
Expand Down
5 changes: 3 additions & 2 deletions ScreenManager.c
Original file line number Diff line number Diff line change
Expand Up @@ -177,9 +177,10 @@ static inline bool drawTab(const int* y, int* x, int l, const char* name, bool c
(*x)++;
if (*x >= l)
return false;
int nameWidth = (int)strnlen(name, l - *x);
const char* ptr = name;
int nameWidth = String_mbswidth(&ptr, (size_t)INT_MAX, l - *x);
attrset(CRT_colors[cur ? SCREENS_CUR_TEXT : SCREENS_OTH_TEXT]);
mvaddnstr(*y, *x, name, nameWidth);
mvaddnstr(*y, *x, name, (int)(ptr - name));
*x += nameWidth;
if (*x >= l)
return false;
Expand Down
292 changes: 292 additions & 0 deletions XUtils.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ in the source distribution for its full text.
#include "XUtils.h"

#include <assert.h>
#include <ctype.h> // IWYU pragma: keep
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
Expand Down Expand Up @@ -259,6 +260,297 @@ size_t strnlen(const char* str, size_t maxLen) {
}
#endif

#ifdef HAVE_LIBNCURSESW
static void String_encodeWChar(WCharEncoderState* ps, wchar_t wc) {
assert(!ps->buf || ps->pos < ps->size);

char tempBuf[MB_LEN_MAX];

// This function will null terminate the string only upon a call
// with (wc == 0). It might take more than a single NUL byte to
// terminate a string when using the C multibyte functions and a
// non-Unicode encoding, thus this function won't support truncation
// of a string. The caller must provide the right size in ps->size
// if ps->buf is not NULL.
size_t len = wcrtomb(tempBuf, wc, &ps->mbState);
assert(len != 0);
if (len == (size_t)-1) {
assert(len != (size_t)-1);
fail();
}
if (ps->buf) {
if (len > ps->size - ps->pos) {
fail();
}
memcpy((char*)ps->buf + ps->pos, tempBuf, len);
}
ps->pos += len;
}
#else
static void String_encodeWChar(WCharEncoderState* ps, int c) {
assert(!ps->buf || ps->pos < ps->size);

char* buf = ps->buf;
if (buf)
buf[ps->pos] = (char)c;

ps->pos += 1;
}
#endif

void EncodePrintableString(WCharEncoderState* ps, const char* src, size_t maxLen, EncodeWChar encodeWChar) {
assert(src || maxLen == 0);

size_t pos = 0;
bool wasReplaced = false;

#ifdef HAVE_LIBNCURSESW
Comment thread
Explorer09 marked this conversation as resolved.
const wchar_t replacementChar = CRT_utf8 ? L'\xFFFD' : L'?';
wchar_t ch;

mbstate_t decState = {0};
#else
const char replacementChar = '?';
char ch;
#endif

do {
size_t len = 0;
bool shouldReplace = false;
ch = 0;

if (pos < maxLen) {
// Read the next character from the byte sequence
#ifdef HAVE_LIBNCURSESW
mbstate_t newState;
memcpy(&newState, &decState, sizeof(newState));
len = mbrtowc(&ch, &src[pos], maxLen - pos, &newState);

assert(len != 0 || ch == 0);
switch (len) {
case (size_t)-2:
errno = EILSEQ;
shouldReplace = true;
len = maxLen - pos;
break;

case (size_t)-1:
shouldReplace = true;
len = 1;
break;

default:
memcpy(&decState, &newState, sizeof(decState));
}
#else
len = 1;
ch = src[pos];
#endif
}

pos += len;

// Filter unprintable characters
if (!shouldReplace && ch != 0) {
#ifdef HAVE_LIBNCURSESW
shouldReplace = !iswprint(ch);
#else
shouldReplace = !isprint((unsigned char)ch);
#endif
}

if (shouldReplace) {
ch = replacementChar;
if (wasReplaced)
continue;
}
wasReplaced = shouldReplace;

encodeWChar(ps, ch);
} while (ch != 0);
}

char* String_makePrintable(const char* str, size_t maxLen) {
WCharEncoderState encState = {0};

EncodePrintableString(&encState, str, maxLen, String_encodeWChar);
size_t size = encState.pos;
assert(size > 0);

memset(&encState, 0, sizeof(encState));
char* buf = xMalloc(size);
encState.size = size;
encState.buf = buf;
EncodePrintableString(&encState, str, maxLen, String_encodeWChar);
assert(encState.pos == size);

return buf;
}

bool MBStringDecoder_nextWChar(MBStringDecoder* decoder) {
if (!decoder->str || decoder->maxLen == 0)
return false;

// If the previous call of this function encounters an invalid sequence,
// do not continue (because the "mbState" object for mbrtowc() is
// undefined). The caller is supposed to reset the state.
#ifdef HAVE_LIBNCURSESW
if (decoder->ch == WEOF)
return false;
#endif

#ifdef HAVE_LIBNCURSESW
wchar_t ch;
size_t len = mbrtowc(&ch, decoder->str, decoder->maxLen, &decoder->mbState);

// These assertions ensure the mbrtowc() implementation is correct
assert(len == 0 || len >= (size_t)-2 || ch != 0);
assert(len != 0 || ch == 0);

switch (len) {
case (size_t)-1:
// Invalid sequence. decoder->str remains at the position where
// the first byte of the invalid sequence is found.
decoder->ch = WEOF;
return false;

Comment on lines +411 to +416
Copy link
Copy Markdown

@coderabbitai coderabbitai Bot May 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Verify callers that may loop on String_lineBreakWidth/String_mbswidth
# and require forward progress when malformed bytes are present.
rg -n -C3 '\bString_lineBreakWidth\s*\(|\bString_mbswidth\s*\('

Repository: htop-dev/htop

Length of output: 2394


🏁 Script executed:

# Get the size of XUtils.c first to determine reading strategy
wc -l XUtils.c

Repository: htop-dev/htop

Length of output: 70


🏁 Script executed:

# Read the decoder function around lines 411-415
sed -n '385,420p' XUtils.c

Repository: htop-dev/htop

Length of output: 969


🏁 Script executed:

# Read the String_lineBreakWidth function around lines 442-480
sed -n '442,480p' XUtils.c

Repository: htop-dev/htop

Length of output: 1278


🏁 Script executed:

# Read the full String_decodeNextWChar to understand the complete context
sed -n '370,440p' XUtils.c

Repository: htop-dev/htop

Length of output: 1725


🏁 Script executed:

sed -n '442,533p' XUtils.c

Repository: htop-dev/htop

Length of output: 2586


Consume bytes on invalid multibyte sequences to ensure forward progress.

Invalid UTF-8 at line 411 returns false without advancing ps->str, causing String_lineBreakWidth to return without updating *str (line 495 is unreached). Callers retrying on the same pointer risk infinite loops. The incomplete sequence case (lines 418-421) correctly advances pointers; invalid sequences must do the same.

Proposed fix
   switch (len) {
   case (size_t)-1:
-      // Invalid sequence
-      ps->ch = WEOF;
-      return false;
+      // Invalid sequence: consume one byte to guarantee progress
+      ps->str += 1;
+      ps->maxLen -= 1;
+      ps->ch = WEOF;
+      return false;

   case (size_t)-2:

And in String_lineBreakWidth, update *str on decode failure:

-      if (!String_decodeNextWChar(&state))
+      if (!String_decodeNextWChar(&state)) {
+         *str = state.str;
          break;
+      }

✅ Addressed in commits 1374d7c to 9d9301f

case (size_t)-2:
// Incomplete sequence
decoder->str += decoder->maxLen;
decoder->maxLen = 0;
return false;

case 0:
// End of string. This assignment is an optimization hint.
ch = 0;
}
#else
char ch = *decoder->str;
const size_t len = 1;
#endif

if (ch == 0) {
// Setting "str" to NULL prevents subsequent calls from reading
// out of bounds.
decoder->str = NULL;
decoder->maxLen = 0;
} else {
decoder->str += len;
decoder->maxLen -= len;
}
decoder->ch = ch;
return true;
}

int String_lineBreakWidth(const char** str, size_t maxLen, int maxWidth, char separator) {
assert(*str || maxLen == 0);

// The caller should ensure (maxWidth >= 0).
// It's possible for a Unicode string to occupy 0 terminal columns, so this
// function allows (maxWidth == 0).
if (maxWidth < 0)
maxWidth = INT_MAX;

#ifdef HAVE_LIBNCURSESW
// If the character takes zero columns, include the character in the
// substring if the working encoding is UTF-8, and ignore it otherwise.
// In Unicode, combining characters are always placed after the base
// character, but some legacy 8-bit encodings instead place combining
// characters before the base character.
const bool isUnicode = CRT_utf8;
#else
const bool isUnicode = false;
#endif

int totalWidth = 0;

MBStringDecoder decoder = {0};
decoder.str = *str;
decoder.maxLen = maxLen;

bool inSpaces = true;
const char* breakPos = NULL;
int breakWidth = 0;

while (totalWidth < maxWidth || isUnicode) {
assert(totalWidth <= maxWidth);

if (!MBStringDecoder_nextWChar(&decoder))
break;
if (decoder.ch == 0)
break;

if (decoder.ch == ' ' && separator == ' ' && !inSpaces) {
inSpaces = true;
breakPos = *str;
breakWidth = totalWidth;
}

#ifdef HAVE_LIBNCURSESW
int cw = wcwidth((wchar_t)decoder.ch);
if (cw < 0) {
// This function should not be used with string containing unprintable
// characters. Tolerate them on release build, however.
assert(cw >= 0);
break;
}
#else
assert(isprint(decoder.ch));
const int cw = 1;
#endif

if (cw > maxWidth - totalWidth) {
// This character cannot fit the line with the given maxWidth.
if (breakPos) {
// Rewind the scanning state to the last found separator.
totalWidth = breakWidth;
*str = breakPos;
}
break;
}

if (cw <= 0 && !isUnicode)
continue;

totalWidth += cw;

// (*str - start) will represent the length of the substring bounded
// by the width limit.
*str = decoder.str;

if (decoder.ch != ' ')
inSpaces = false;

#ifdef HAVE_LIBNCURSESW
bool isSeparator = decoder.ch == (wint_t)separator;
#else
bool isSeparator = decoder.ch == (int)separator;
#endif
if (isSeparator && separator != ' ') {
breakPos = *str;
breakWidth = totalWidth;
}
}

return totalWidth;
}

int String_mbswidth(const char** str, size_t maxLen, int maxWidth) {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why have the public API of this function modify an input argument to move to the end of the function?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function has two outputs: One is the terminal width (i.e. num of terminal columns) and the other is the end character position after calculating the width.

The second output is useful to tell, for example, how many Unicode characters would it take to fit the number of terminal columns.

This functions has two uses combined in a single API: (1) to count the number of terminal columns for a given multibyte string; (2) to count the number characters it takes to fit the given terminal width.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might seem redundant, but I'd prefer if this output parameter was a separate (NULL-able) argument to the function. This separates concerns (input argument vs. optional additional output) and makes the API clearer. This also allows to just skip the extra local variable in case you don't care AND don't have it unexpectedly be changed.

Copy link
Copy Markdown
Contributor Author

@Explorer09 Explorer09 May 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might seem redundant, but I'd prefer if this output parameter was a separate (NULL-able) argument to the function. This separates concerns (input argument vs. optional additional output) and makes the API clearer. This also allows to just skip the extra local variable in case you don't care AND don't have it unexpectedly be changed.

A static inline function that wraps around this String_mbswidth() for a more convenient output interface? That wouldn't hurt, I guess.

By the way, the "input pointer that would be modified on output" is not my original idea. There are precedents in mbsrtowcs(3) and mbsnrtowcs(3) (compare mbsrtowcs(3) with mbstowcs(3) and I would argue that the const char** argument in the former makes the API more flexible).

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I gave both variants of the API to Copilot to compare (without initially disclosing what the function would do) and except for "if it runs in a loop for parsing" the additional out argument was the preferred variant. This only solidified further once I gave Copilot also some more semantics of the function it just had rated. One interesting aspect was that modern compiler tend to optimize the additional out parameter variant better. So there's several aspects that lean towards that variant of the API.

Using the C standard library's string function API as a baseline for API quality sets a very low bar for quality … ;-)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I gave both variants of the API to Copilot to compare (without initially disclosing what the function would do) and except for "if it runs in a loop for parsing" the additional out argument was the preferred variant.

What variants of the API did you give to Copilot in particular?

One interesting aspect was that modern compiler tend to optimize the additional out parameter variant better. So there's several aspects that lean towards that variant of the API.

Again I'm not sure what variant you are referring to.

Using the C standard library's string function API as a baseline for API quality sets a very low bar for quality … ;-)

I personally value utility of the API over the "quality" which I'm not sure what it is. Convenience of passing the arguments and calling the function? Register and stack usage?

It's not I disagree with changing the API, but I didn't see an API suggestion to compare with, thus I cannot judge anything regarding what Copilot said.

#ifdef HAVE_LIBNCURSESW
return String_lineBreakWidth(str, maxLen, maxWidth, '\0');
#else
assert(*str || maxLen == 0);

if (maxWidth < 0)
maxWidth = INT_MAX;

maxLen = MINIMUM((size_t)maxWidth, maxLen);
size_t len = strnlen(*str, maxLen);
*str += len;
return (int)len;
#endif
}

int xAsprintf(char** strp, const char* fmt, ...) {
*strp = NULL;

Expand Down
50 changes: 50 additions & 0 deletions XUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,36 @@ in the source distribution for its full text.
#include <string.h> // IWYU pragma: keep

#include "Macros.h"
#include "ProvideCurses.h"


typedef struct WCharEncoderState_ {
size_t pos;
size_t size;
void* buf;
mbstate_t mbState;
} WCharEncoderState;

/* Object for reading wide characters from a multibyte string.
"str" and "maxLen" are input but will be modified during process.
"str" will be set to NULL when the decoding is finished with the
terminating L'\0' character. */
typedef struct MBStringDecoder_ {
const char* str;
size_t maxLen;
#ifdef HAVE_LIBNCURSESW
wint_t ch;
mbstate_t mbState;
#else
int ch;
#endif
} MBStringDecoder;

#ifdef HAVE_LIBNCURSESW
typedef ATTR_NONNULL void (*EncodeWChar)(WCharEncoderState* ps, wchar_t wc);
#else
typedef ATTR_NONNULL void (*EncodeWChar)(WCharEncoderState* ps, int c);
#endif

ATTR_NORETURN
void fail(void);
Expand Down Expand Up @@ -108,6 +137,27 @@ size_t String_safeStrncpy(char* restrict dest, const char* restrict src, size_t
size_t strnlen(const char* str, size_t maxLen);
#endif

ATTR_NONNULL_N(1, 4) ATTR_ACCESS2_W(1) ATTR_ACCESS3_R(2, 3)
void EncodePrintableString(WCharEncoderState* ps, const char* src, size_t maxLen, EncodeWChar encodeWChar);
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion | 🟠 Major | ⚡ Quick win

Rename EncodePrintableString to module-prefixed style.

This new public function name breaks the project’s C naming convention and makes the API inconsistent with surrounding String_* utilities.

As per coding guidelines, use ModuleName_functionName() naming convention for C functions.


ATTR_RETNONNULL ATTR_MALLOC ATTR_ACCESS3_R(1, 2)
char* String_makePrintable(const char* str, size_t maxLen);

ATTR_NONNULL
bool MBStringDecoder_nextWChar(MBStringDecoder* ps);

ATTR_NONNULL ATTR_ACCESS2_RW(1)
int String_lineBreakWidth(const char** str, size_t maxLen, int maxWidth, char separator);

/* Count the number of terminal columns needed to display a string, or
count how many characters from the string that can be displayed
with the column limit ("maxWidth").
"maxLen" is in bytes.
maxLen = SIZE_MAX to take the whole string.
maxWidth = INT_MAX for no terminal column limit. */
ATTR_NONNULL ATTR_ACCESS2_RW(1)
int String_mbswidth(const char** str, size_t maxLen, int maxWidth);

ATTR_FORMAT(printf, 2, 3) ATTR_NONNULL_N(1, 2)
int xAsprintf(char** strp, const char* fmt, ...);

Expand Down
Loading