diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index e6ef9be1dd..54a17b98e3 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21858,3 +21858,10 @@ test(2344.04, key(DT[, .(V4 = c("b", "a"), V2, V5 = c("y", "x"), V1)]), c("V1", # fread with quotes and single column #7366 test(2345, fread('"this_that"\n"2025-01-01 00:00:01"'), data.table(this_that = as.POSIXct("2025-01-01 00:00:01", tz="UTC"))) + +# one-byte stack overflow in strlim() to be tested with sanitizers, #7408 +text = paste0( + strrep("mary had a little lamb\n", 100), + strrep("a", 500), "\n", "a" +) +test(2346, data.table::fread(text = text), data.table(mary = rep("mary", 99), had = "had", a = "a", little = "little", lamb = "lamb"), warning = "First discarded non-empty line") diff --git a/src/fread.c b/src/fread.c index 51b08af3fc..beca9a2f43 100644 --- a/src/fread.c +++ b/src/fread.c @@ -219,15 +219,16 @@ static inline int64_t clamp_i64t(int64_t x, int64_t lower, int64_t upper) /** * Helper for error and warning messages to extract an input line starting at * `*ch` and until an end of line, but no longer than `limit` characters. - * This function returns the string copied into an internal static buffer. Cannot - * be called more than twice per single printf() invocation. - * Parameter `limit` cannot exceed 500. + * This function returns the string copied into a caller-allocated buffer (typically on the stack). + * Parameter `limit` should not exceed STRLIM_BUF_SIZE-1 (500). * The data might contain % characters. Therefore, careful to ensure that if the msg * is constructed manually (using say snprintf) that warning(), stop() * and Rprintf() are all called as warning(_("%s"), msg) and not warning(msg). */ -static const char* strlim(const char *ch, char buf[static 500], size_t limit) +#define STRLIM_BUF_SIZE 501 +static const char* strlim(const char *ch, char buf[static STRLIM_BUF_SIZE], size_t limit) { + if (limit >= STRLIM_BUF_SIZE) limit = STRLIM_BUF_SIZE-1; char *ch2 = buf; for (size_t width = 0; (*ch > '\r' || (*ch != '\0' && *ch != '\r' && *ch != '\n')) && width < limit; width++) { *ch2++ = *ch++; @@ -1776,7 +1777,7 @@ int freadMain(freadMainArgs _args) if (ch >= eof) STOP(_("Input is either empty, fully whitespace, or skip has been set after the last non-whitespace.")); if (verbose) { if (lineStart > ch) DTPRINT(_(" Moved forward to first non-blank line (%d)\n"), row1line); - DTPRINT(_(" Positioned on line %d starting: <<%s>>\n"), row1line, strlim(lineStart, (char[500]) {0}, 30)); + DTPRINT(_(" Positioned on line %d starting: <<%s>>\n"), row1line, strlim(lineStart, (char[STRLIM_BUF_SIZE]) {0}, 30)); } ch = pos = lineStart; } @@ -1982,7 +1983,7 @@ int freadMain(freadMainArgs _args) if (!fill && tt != ncol) INTERNAL_STOP("first line has field count %d but expecting %d", tt, ncol); // # nocov if (verbose) { DTPRINT(_(" Detected %d columns on line %d. This line is either column names or first data row. Line starts as: <<%s>>\n"), - tt, row1line, strlim(pos, (char[500]) {0}, 30)); + tt, row1line, strlim(pos, (char[STRLIM_BUF_SIZE]) {0}, 30)); DTPRINT(_(" Quote rule picked = %d\n"), quoteRule); DTPRINT(_(" fill=%s and the most number of columns found is %d\n"), fill ? "true" : "false", ncol); } @@ -2950,23 +2951,23 @@ int freadMain(freadMainArgs _args) ch = skip_to_nextline(ch, eof); while (ch < eof && isspace(*ch)) ch++; if (ch == eof) { - DTWARN(_("Discarded single-line footer: <<%s>>"), strlim(skippedFooter, (char[500]) {0}, 500)); + DTWARN(_("Discarded single-line footer: <<%s>>"), strlim(skippedFooter, (char[STRLIM_BUF_SIZE]) {0}, 500)); } else { ch = headPos; int tt = countfields(&ch); if (fill > 0) { DTWARN(_("Stopped early on line %"PRId64". Expected %d fields but found %d. Consider fill=%d or even more based on your knowledge of the input file. Use fill=Inf for reading the whole file for detecting the number of fields. First discarded non-empty line: <<%s>>"), - DTi + row1line, ncol, tt, tt, strlim(skippedFooter, (char[500]) {0}, 500)); + DTi + row1line, ncol, tt, tt, strlim(skippedFooter, (char[STRLIM_BUF_SIZE]) {0}, 500)); } else { DTWARN(_("Stopped early on line %"PRId64". Expected %d fields but found %d. Consider fill=TRUE. First discarded non-empty line: <<%s>>"), - DTi + row1line, ncol, tt, strlim(skippedFooter, (char[500]) {0}, 500)); + DTi + row1line, ncol, tt, strlim(skippedFooter, (char[STRLIM_BUF_SIZE]) {0}, 500)); } } } } if (quoteRuleBumpedCh != NULL && quoteRuleBumpedCh < headPos) { - DTWARN(_("Found and resolved improper quoting out-of-sample. First healed line %"PRId64": <<%s>>. If the fields are not quoted (e.g. field separator does not appear within any field), try quote=\"\" to avoid this warning."), quoteRuleBumpedLine, strlim(quoteRuleBumpedCh, (char[500]) {0}, 500)); + DTWARN(_("Found and resolved improper quoting out-of-sample. First healed line %"PRId64": <<%s>>. If the fields are not quoted (e.g. field separator does not appear within any field), try quote=\"\" to avoid this warning."), quoteRuleBumpedLine, strlim(quoteRuleBumpedCh, (char[STRLIM_BUF_SIZE]) {0}, 500)); } if (verbose) {