Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -21855,3 +21855,10 @@ test(2344.03, setkey(d1[, .(V1, label = c("one", "zero", "one"), V2)][data.table
# keep sub-key in case of multiple keys, even with new columns and changing column order
DT = data.table(V1 = 1:2, V2 = 3:4, V3 = 5:6, key = c("V1", "V2", "V3"))
test(2344.04, key(DT[, .(V4 = c("b", "a"), V2, V5 = c("y", "x"), V1)]), c("V1", "V2"))

# one-byte stack overflow in strlim() to be tested with sanitizers, #7408
text = paste0(
strrep("mary had a little lamb\n", 100),
strrep("a", 500), "\n", "a"
)
test(2345, data.table::fread(text = text), data.table(mary = rep("mary", 99), had = "had", a = "a", little = "little", lamb = "lamb"), warning = "First discarded non-empty line")
21 changes: 11 additions & 10 deletions src/fread.c
Original file line number Diff line number Diff line change
Expand Up @@ -219,15 +219,16 @@ static inline int64_t clamp_i64t(int64_t x, int64_t lower, int64_t upper)
/**
* Helper for error and warning messages to extract an input line starting at
* `*ch` and until an end of line, but no longer than `limit` characters.
* This function returns the string copied into an internal static buffer. Cannot
* be called more than twice per single printf() invocation.
* Parameter `limit` cannot exceed 500.
* This function returns the string copied into a caller-allocated buffer (typically on the stack).
* Parameter `limit` should not exceed STRLIM_BUF_SIZE-1 (500).
* The data might contain % characters. Therefore, careful to ensure that if the msg
* is constructed manually (using say snprintf) that warning(), stop()
* and Rprintf() are all called as warning(_("%s"), msg) and not warning(msg).
*/
static const char* strlim(const char *ch, char buf[static 500], size_t limit)
#define STRLIM_BUF_SIZE 501
static const char* strlim(const char *ch, char buf[static STRLIM_BUF_SIZE], size_t limit)
{
if (limit >= STRLIM_BUF_SIZE) limit = STRLIM_BUF_SIZE-1;
char *ch2 = buf;
for (size_t width = 0; (*ch > '\r' || (*ch != '\0' && *ch != '\r' && *ch != '\n')) && width < limit; width++) {
*ch2++ = *ch++;
Expand Down Expand Up @@ -1776,7 +1777,7 @@ int freadMain(freadMainArgs _args)
if (ch >= eof) STOP(_("Input is either empty, fully whitespace, or skip has been set after the last non-whitespace."));
if (verbose) {
if (lineStart > ch) DTPRINT(_(" Moved forward to first non-blank line (%d)\n"), row1line);
DTPRINT(_(" Positioned on line %d starting: <<%s>>\n"), row1line, strlim(lineStart, (char[500]) {0}, 30));
DTPRINT(_(" Positioned on line %d starting: <<%s>>\n"), row1line, strlim(lineStart, (char[STRLIM_BUF_SIZE]) {0}, 30));
}
ch = pos = lineStart;
}
Expand Down Expand Up @@ -1967,7 +1968,7 @@ int freadMain(freadMainArgs _args)
if (!fill && tt != ncol) INTERNAL_STOP("first line has field count %d but expecting %d", tt, ncol); // # nocov
if (verbose) {
DTPRINT(_(" Detected %d columns on line %d. This line is either column names or first data row. Line starts as: <<%s>>\n"),
tt, row1line, strlim(pos, (char[500]) {0}, 30));
tt, row1line, strlim(pos, (char[STRLIM_BUF_SIZE]) {0}, 30));
DTPRINT(_(" Quote rule picked = %d\n"), quoteRule);
DTPRINT(_(" fill=%s and the most number of columns found is %d\n"), fill ? "true" : "false", ncol);
}
Expand Down Expand Up @@ -2935,23 +2936,23 @@ int freadMain(freadMainArgs _args)
ch = skip_to_nextline(ch, eof);
while (ch < eof && isspace(*ch)) ch++;
if (ch == eof) {
DTWARN(_("Discarded single-line footer: <<%s>>"), strlim(skippedFooter, (char[500]) {0}, 500));
DTWARN(_("Discarded single-line footer: <<%s>>"), strlim(skippedFooter, (char[STRLIM_BUF_SIZE]) {0}, 500));
}
else {
ch = headPos;
int tt = countfields(&ch);
if (fill > 0) {
DTWARN(_("Stopped early on line %"PRId64". Expected %d fields but found %d. Consider fill=%d or even more based on your knowledge of the input file. Use fill=Inf for reading the whole file for detecting the number of fields. First discarded non-empty line: <<%s>>"),
DTi + row1line, ncol, tt, tt, strlim(skippedFooter, (char[500]) {0}, 500));
DTi + row1line, ncol, tt, tt, strlim(skippedFooter, (char[STRLIM_BUF_SIZE]) {0}, 500));
} else {
DTWARN(_("Stopped early on line %"PRId64". Expected %d fields but found %d. Consider fill=TRUE. First discarded non-empty line: <<%s>>"),
DTi + row1line, ncol, tt, strlim(skippedFooter, (char[500]) {0}, 500));
DTi + row1line, ncol, tt, strlim(skippedFooter, (char[STRLIM_BUF_SIZE]) {0}, 500));
}
}
}
}
if (quoteRuleBumpedCh != NULL && quoteRuleBumpedCh < headPos) {
DTWARN(_("Found and resolved improper quoting out-of-sample. First healed line %"PRId64": <<%s>>. If the fields are not quoted (e.g. field separator does not appear within any field), try quote=\"\" to avoid this warning."), quoteRuleBumpedLine, strlim(quoteRuleBumpedCh, (char[500]) {0}, 500));
DTWARN(_("Found and resolved improper quoting out-of-sample. First healed line %"PRId64": <<%s>>. If the fields are not quoted (e.g. field separator does not appear within any field), try quote=\"\" to avoid this warning."), quoteRuleBumpedLine, strlim(quoteRuleBumpedCh, (char[STRLIM_BUF_SIZE]) {0}, 500));
}

if (verbose) {
Expand Down