Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -21858,3 +21858,10 @@ test(2344.04, key(DT[, .(V4 = c("b", "a"), V2, V5 = c("y", "x"), V1)]), c("V1",

# fread with quotes and single column #7366
test(2345, fread('"this_that"\n"2025-01-01 00:00:01"'), data.table(this_that = as.POSIXct("2025-01-01 00:00:01", tz="UTC")))

# one-byte stack overflow in strlim() to be tested with sanitizers, #7408
text = paste0(
strrep("mary had a little lamb\n", 100),
strrep("a", 500), "\n", "a"
)
test(2346, data.table::fread(text = text), data.table(mary = rep("mary", 99), had = "had", a = "a", little = "little", lamb = "lamb"), warning = "First discarded non-empty line")
21 changes: 11 additions & 10 deletions src/fread.c
Original file line number Diff line number Diff line change
Expand Up @@ -219,15 +219,16 @@ static inline int64_t clamp_i64t(int64_t x, int64_t lower, int64_t upper)
/**
* Helper for error and warning messages to extract an input line starting at
* `*ch` and until an end of line, but no longer than `limit` characters.
* This function returns the string copied into an internal static buffer. Cannot
* be called more than twice per single printf() invocation.
* Parameter `limit` cannot exceed 500.
* This function returns the string copied into a caller-allocated buffer (typically on the stack).
* Parameter `limit` should not exceed STRLIM_BUF_SIZE-1 (500).
* The data might contain % characters. Therefore, careful to ensure that if the msg
* is constructed manually (using say snprintf) that warning(), stop()
* and Rprintf() are all called as warning(_("%s"), msg) and not warning(msg).
*/
static const char* strlim(const char *ch, char buf[static 500], size_t limit)
#define STRLIM_BUF_SIZE 501
static const char* strlim(const char *ch, char buf[static STRLIM_BUF_SIZE], size_t limit)
{
if (limit >= STRLIM_BUF_SIZE) limit = STRLIM_BUF_SIZE-1;
char *ch2 = buf;
for (size_t width = 0; (*ch > '\r' || (*ch != '\0' && *ch != '\r' && *ch != '\n')) && width < limit; width++) {
*ch2++ = *ch++;
Expand Down Expand Up @@ -1776,7 +1777,7 @@ int freadMain(freadMainArgs _args)
if (ch >= eof) STOP(_("Input is either empty, fully whitespace, or skip has been set after the last non-whitespace."));
if (verbose) {
if (lineStart > ch) DTPRINT(_(" Moved forward to first non-blank line (%d)\n"), row1line);
DTPRINT(_(" Positioned on line %d starting: <<%s>>\n"), row1line, strlim(lineStart, (char[500]) {0}, 30));
DTPRINT(_(" Positioned on line %d starting: <<%s>>\n"), row1line, strlim(lineStart, (char[STRLIM_BUF_SIZE]) {0}, 30));
}
ch = pos = lineStart;
}
Expand Down Expand Up @@ -1982,7 +1983,7 @@ int freadMain(freadMainArgs _args)
if (!fill && tt != ncol) INTERNAL_STOP("first line has field count %d but expecting %d", tt, ncol); // # nocov
if (verbose) {
DTPRINT(_(" Detected %d columns on line %d. This line is either column names or first data row. Line starts as: <<%s>>\n"),
tt, row1line, strlim(pos, (char[500]) {0}, 30));
tt, row1line, strlim(pos, (char[STRLIM_BUF_SIZE]) {0}, 30));
DTPRINT(_(" Quote rule picked = %d\n"), quoteRule);
DTPRINT(_(" fill=%s and the most number of columns found is %d\n"), fill ? "true" : "false", ncol);
}
Expand Down Expand Up @@ -2950,23 +2951,23 @@ int freadMain(freadMainArgs _args)
ch = skip_to_nextline(ch, eof);
while (ch < eof && isspace(*ch)) ch++;
if (ch == eof) {
DTWARN(_("Discarded single-line footer: <<%s>>"), strlim(skippedFooter, (char[500]) {0}, 500));
DTWARN(_("Discarded single-line footer: <<%s>>"), strlim(skippedFooter, (char[STRLIM_BUF_SIZE]) {0}, 500));
}
else {
ch = headPos;
int tt = countfields(&ch);
if (fill > 0) {
DTWARN(_("Stopped early on line %"PRId64". Expected %d fields but found %d. Consider fill=%d or even more based on your knowledge of the input file. Use fill=Inf for reading the whole file for detecting the number of fields. First discarded non-empty line: <<%s>>"),
DTi + row1line, ncol, tt, tt, strlim(skippedFooter, (char[500]) {0}, 500));
DTi + row1line, ncol, tt, tt, strlim(skippedFooter, (char[STRLIM_BUF_SIZE]) {0}, 500));
} else {
DTWARN(_("Stopped early on line %"PRId64". Expected %d fields but found %d. Consider fill=TRUE. First discarded non-empty line: <<%s>>"),
DTi + row1line, ncol, tt, strlim(skippedFooter, (char[500]) {0}, 500));
DTi + row1line, ncol, tt, strlim(skippedFooter, (char[STRLIM_BUF_SIZE]) {0}, 500));
}
}
}
}
if (quoteRuleBumpedCh != NULL && quoteRuleBumpedCh < headPos) {
DTWARN(_("Found and resolved improper quoting out-of-sample. First healed line %"PRId64": <<%s>>. If the fields are not quoted (e.g. field separator does not appear within any field), try quote=\"\" to avoid this warning."), quoteRuleBumpedLine, strlim(quoteRuleBumpedCh, (char[500]) {0}, 500));
DTWARN(_("Found and resolved improper quoting out-of-sample. First healed line %"PRId64": <<%s>>. If the fields are not quoted (e.g. field separator does not appear within any field), try quote=\"\" to avoid this warning."), quoteRuleBumpedLine, strlim(quoteRuleBumpedCh, (char[STRLIM_BUF_SIZE]) {0}, 500));
}

if (verbose) {
Expand Down
Loading