@@ -10,8 +10,10 @@ in the source distribution for its full text.
1010#include "XUtils.h"
1111
1212#include <assert.h>
13+ #include <ctype.h> // IWYU pragma: keep
1314#include <errno.h>
1415#include <fcntl.h>
16+ #include <limits.h> // IWYU pragma: keep
1517#include <math.h>
1618#include <stdarg.h>
1719#include <stdint.h>
@@ -235,6 +237,288 @@ size_t strnlen(const char* str, size_t maxLen) {
235237}
236238#endif
237239
240+ #ifdef HAVE_LIBNCURSESW
241+ static void String_encodeWChar (WCharEncoderState * ps , wchar_t wc ) {
242+ assert (!ps -> buf || ps -> pos < ps -> size );
243+
244+ char tempBuf [MB_LEN_MAX ];
245+ char * dest = ps -> buf ? (char * )ps -> buf + ps -> pos : tempBuf ;
246+
247+ // It is unnecessarily expensive to fix the output string if the caller
248+ // gives an incorrect buffer size. This function would not support any
249+ // truncation of the output string.
250+ size_t len = wcrtomb (dest , wc , & ps -> mbState );
251+ assert (len > 0 );
252+ if (len == (size_t )-1 ) {
253+ assert (len != (size_t )-1 );
254+ fail ();
255+ }
256+ if (ps -> buf && len > ps -> size - ps -> pos ) {
257+ assert (!ps -> buf || len <= ps -> size - ps -> pos );
258+ fail ();
259+ }
260+
261+ ps -> pos += len ;
262+ }
263+ #else
264+ static void String_encodeWChar (WCharEncoderState * ps , int c ) {
265+ assert (!ps -> buf || ps -> pos < ps -> size );
266+
267+ char * buf = ps -> buf ;
268+ if (buf ) {
269+ buf [ps -> pos ] = (char )c ;
270+ }
271+
272+ ps -> pos += 1 ;
273+ }
274+ #endif
275+
276+ void EncodePrintableString (WCharEncoderState * ps , const char * src , size_t maxLen , EncodeWChar encodeWChar ) {
277+ assert (src || maxLen == 0 );
278+
279+ size_t pos = 0 ;
280+ bool wasReplaced = false;
281+
282+ #ifdef HAVE_LIBNCURSESW
283+ const wchar_t replacementChar = CRT_utf8 ? L'\xFFFD' : L'?' ;
284+ wchar_t ch ;
285+
286+ mbstate_t decState ;
287+ memset (& decState , 0 , sizeof (decState ));
288+ #else
289+ const char replacementChar = '?' ;
290+ char ch ;
291+ #endif
292+
293+ do {
294+ size_t len = 0 ;
295+ bool shouldReplace = false;
296+ ch = 0 ;
297+
298+ if (pos < maxLen ) {
299+ // Read the next character from the byte sequence
300+ #ifdef HAVE_LIBNCURSESW
301+ mbstate_t newState ;
302+ memcpy (& newState , & decState , sizeof (newState ));
303+ len = mbrtowc (& ch , & src [pos ], maxLen - pos , & newState );
304+
305+ assert (len != 0 || ch == 0 );
306+ switch (len ) {
307+ case (size_t )-2 :
308+ errno = EILSEQ ;
309+ shouldReplace = true;
310+ len = maxLen - pos ;
311+ break ;
312+
313+ case (size_t )-1 :
314+ shouldReplace = true;
315+ len = 1 ;
316+ break ;
317+
318+ default :
319+ memcpy (& decState , & newState , sizeof (decState ));
320+ }
321+ #else
322+ len = 1 ;
323+ ch = src [pos ];
324+ #endif
325+ }
326+
327+ pos += len ;
328+
329+ // Filter unprintable characters
330+ if (!shouldReplace && ch != 0 ) {
331+ #ifdef HAVE_LIBNCURSESW
332+ shouldReplace = !iswprint (ch );
333+ #else
334+ shouldReplace = !isprint ((unsigned char )ch );
335+ #endif
336+ }
337+
338+ if (shouldReplace ) {
339+ ch = replacementChar ;
340+ if (wasReplaced ) {
341+ continue ;
342+ }
343+ }
344+ wasReplaced = shouldReplace ;
345+
346+ encodeWChar (ps , ch );
347+ } while (ch != 0 );
348+ }
349+
350+ char * String_makePrintable (const char * str , size_t maxLen ) {
351+ WCharEncoderState encState ;
352+
353+ memset (& encState , 0 , sizeof (encState ));
354+ EncodePrintableString (& encState , str , maxLen , String_encodeWChar );
355+ size_t size = encState .pos ;
356+ assert (size > 0 );
357+
358+ memset (& encState , 0 , sizeof (encState ));
359+ char * buf = xMalloc (size );
360+ encState .size = size ;
361+ encState .buf = buf ;
362+ EncodePrintableString (& encState , str , maxLen , String_encodeWChar );
363+ assert (encState .pos == size );
364+
365+ return buf ;
366+ }
367+
368+ bool String_decodeNextWChar (MBStringDecoderState * ps ) {
369+ if (!ps -> str || ps -> maxLen == 0 ) {
370+ return false;
371+ }
372+
373+ // If the previous call of this function encounters an invalid sequence,
374+ // do not continue (because the "mbState" object for mbrtowc() is
375+ // undefined). The caller is supposed to reset the state.
376+ #ifdef HAVE_LIBNCURSESW
377+ bool isStateDefined = ps -> ch != WEOF ;
378+ #else
379+ bool isStateDefined = ps -> ch != EOF ;
380+ #endif
381+ if (!isStateDefined ) {
382+ return false;
383+ }
384+
385+ #ifdef HAVE_LIBNCURSESW
386+ wchar_t wc ;
387+ size_t len = mbrtowc (& wc , ps -> str , ps -> maxLen , & ps -> mbState );
388+ switch (len ) {
389+ case (size_t )-1 :
390+ // Invalid sequence
391+ ps -> ch = WEOF ;
392+ return false;
393+
394+ case (size_t )-2 :
395+ // Incomplete sequence
396+ ps -> str += ps -> maxLen ;
397+ ps -> maxLen = 0 ;
398+ return false;
399+
400+ case 0 :
401+ assert (wc == 0 );
402+
403+ ps -> str = NULL ;
404+ ps -> maxLen = 0 ;
405+ ps -> ch = wc ;
406+ return true;
407+
408+ default :
409+ ps -> str += len ;
410+ ps -> maxLen -= len ;
411+ ps -> ch = wc ;
412+ }
413+ return true;
414+ #else
415+ ps -> ch = * ps -> str ;
416+ if (ps -> ch == 0 ) {
417+ ps -> str = NULL ;
418+ ps -> maxLen = 0 ;
419+ } else {
420+ ps -> str ++ ;
421+ ps -> maxLen -- ;
422+ }
423+ return true;
424+ #endif
425+ }
426+
427+ int String_lineBreakWidth (const char * * str , size_t maxLen , int maxWidth , char separator ) {
428+ assert (* str || maxLen == 0 );
429+
430+ if (maxWidth < 0 )
431+ maxWidth = INT_MAX ;
432+
433+ MBStringDecoderState state ;
434+ memset (& state , 0 , sizeof (state ));
435+ state .str = * str ;
436+ state .maxLen = maxLen ;
437+
438+ int totalWidth = 0 ;
439+ int breakWidth = 0 ;
440+
441+ const char * breakPos = NULL ;
442+ bool inSpaces = true;
443+
444+ while (String_decodeNextWChar (& state )) {
445+ if (state .ch == 0 )
446+ break ;
447+
448+ if (state .ch == ' ' && separator == ' ' && !inSpaces ) {
449+ breakWidth = totalWidth ;
450+ breakPos = * str ;
451+ inSpaces = true;
452+ }
453+
454+ #ifdef HAVE_LIBNCURSESW
455+ int w = wcwidth ((wchar_t )state .ch );
456+ if (w < 0 ) {
457+ // This function should not be used with string containing unprintable
458+ // characters. Tolerate them on release build, however.
459+ assert (w >= 0 );
460+ break ;
461+ }
462+ #else
463+ assert (isprint (state .ch ));
464+ int w = 1 ;
465+ #endif
466+
467+ if (w > maxWidth - totalWidth ) {
468+ // This character cannot fit the line with the given maxWidth.
469+ if (breakPos ) {
470+ // Rewind the scanning state to the last found separator.
471+ totalWidth = breakWidth ;
472+ * str = breakPos ;
473+ }
474+ break ;
475+ }
476+
477+ #ifdef HAVE_LIBNCURSESW
478+ // If the character takes zero columns, include the character in the
479+ // substring if the working encoding is UTF-8, and ignore it otherwise.
480+ // In Unicode, combining characters are always placed after the base
481+ // character, but some legacy 8-bit encodings instead place combining
482+ // characters before the base character.
483+ if (w <= 0 && !CRT_utf8 ) {
484+ continue ;
485+ }
486+ #endif
487+
488+ totalWidth += w ;
489+
490+ // (*str - start) will represent the length of the substring bounded
491+ // by the width limit.
492+ * str = state .str ;
493+
494+ if (state .ch != ' ' )
495+ inSpaces = false;
496+
497+ if (state .ch == separator && separator != ' ' ) {
498+ breakWidth = totalWidth ;
499+ breakPos = * str ;
500+ }
501+ }
502+
503+ return totalWidth ;
504+ }
505+
506+ int String_mbswidth (const char * * str , size_t maxLen , int maxWidth ) {
507+ #ifdef HAVE_LIBNCURSESW
508+ return String_lineBreakWidth (str , maxLen , maxWidth , '\0' );
509+ #else
510+ assert (* str || maxLen == 0 );
511+
512+ if (maxWidth < 0 )
513+ maxWidth = INT_MAX ;
514+
515+ maxLen = MINIMUM ((size_t )maxWidth , maxLen );
516+ size_t len = strnlen (* str , maxLen );
517+ * str += len ;
518+ return (int )len ;
519+ #endif
520+ }
521+
238522int xAsprintf (char * * strp , const char * fmt , ...) {
239523 va_list vl ;
240524 va_start (vl , fmt );
0 commit comments