Skip to content

Commit c20fd09

Browse files
aitapMichaelChiricoben-schwen
authored andcommitted
rbindlist: unprotect longestLevels inside the loop (#7794)
* test case * Protect coercedForFactor deeper in the stack * Unprotect longestLevels inside the loop * NEWS entry * Keep listNames protected * Clarify test comment Co-Authored-By: Michael Chirico <chiricom@google.com> * Detect --max-ppsize Co-Authored-By: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> --------- Co-authored-by: Michael Chirico <chiricom@google.com> Co-authored-by: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com>
1 parent 69b33aa commit c20fd09

3 files changed

Lines changed: 37 additions & 16 deletions

File tree

NEWS.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,9 @@
5858

5959
12. `print.data.table()` now truncates long character columns and list-column summaries by default to avoid horizontal console overflow, [#7718](https://github.com/Rdatatable/data.table/issues/7718). When `datatable.prettyprint.char` is `NULL` (the default), the truncation limit is now dynamically calculated based on the available console width. Use `options(datatable.prettyprint.char=Inf)` for the old default behavior (never truncate). Thanks @tdhock for the report and @venom1204 for the fix.
6060

61-
13. `as.IDate()` and `as.ITime()` now preserve names, matching base `as.Date()` behavior, [#7252](https://github.com/Rdatatable/data.table/issues/7252). Thanks @DavisVaughan for the report and @venom1204 for the PR.
61+
13. `rbindlist()` (and therefore the `rbind()` method for `data.table`s) no longer raises an error upon encountering more than approximately 50000 columns in a list entry, [#7793](https://github.com/Rdatatable/data.table/issues/7793). The bug was introduced in `data.table` version 1.18.2.1. Thanks to @rickhelmus for the report and @aitap for the fix.
62+
63+
14. `as.IDate()` and `as.ITime()` now preserve names, matching base `as.Date()` behavior, [#7252](https://github.com/Rdatatable/data.table/issues/7252). Thanks @DavisVaughan for the report and @venom1204 for the PR.
6264

6365
### Notes
6466

inst/tests/tests.Rraw

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21670,14 +21670,29 @@ test(2375.4, print(data.table(x="abcdefghijklmnopqrstuvwxyz")), output="abcdefgh
2167021670
test(2375.5, print(data.table(id=1L, score=99.1, txt="abcdefghijklmnopqrstuvwxyz")), output="abcdefghijklmn...", options=list(width=20, datatable.prettyprint.char=NULL))
2167121671
test(2375.6, print(data.table(x=rep("ABCDEFGHIJKLMNOPQRSTUVWXYZ", 1e6)), topn=1), output="1000000: ABCDEFGHIJKLM...", options=list(width=25, datatable.prettyprint.char=NULL))
2167221672

21673+
# rbindlist() used to put O(ncol(...)) > R_PPStackSize items on the protect stack, #7793
21674+
# Assuming 50000 as the default protection stack size, although R --max-ppsize=... can increase it to 500000.
21675+
max_ppsize = local({
21676+
arg = grep("^--max-ppsize=", commandArgs(FALSE), value = TRUE)
21677+
if (length(arg)) {
21678+
v = suppressWarnings(as.integer(sub("^--max-ppsize=", "", tail(arg, 1L))))
21679+
if (!is.na(v)) return(v)
21680+
}
21681+
50000L
21682+
})
21683+
x = as.data.table(as.list(1:max_ppsize))
21684+
test(2376, rbindlist(list(x)), x)
21685+
rm(x, max_ppsize)
21686+
2167321687
# #7252 as.IDate()/as.ITime preserve names
21674-
test(2376.01, names(as.IDate(c(a = "2019-01-01"))), "a")
21675-
test(2376.02, names(c(a = as.IDate("2019-01-01"))), "a")
21676-
test(2376.03, names(as.ITime(c(a = "12:00:00"))), "a")
21677-
test(2376.04, names(as.IDate(structure(as.POSIXct("2019-01-01 12:00:00"), names = "a"))), "a")
21678-
test(2376.05, names(as.ITime(structure(3600, names = "a"))), "a")
21679-
test(2376.06, names(as.IDate(c(a = 18000))), "a")
21680-
test(2376.07, names(as.IDate(c(a = 1), origin = "2020-01-01")), "a")
21681-
test(2376.08, names(as.ITime(c(a = "12-00-00"), format = "%H-%M-%S")), "a")
21682-
test(2376.09, names(as.IDate(as.POSIXct(c(a = "2019-01-01"), tz="UTC"))), "a")
21683-
test(2376.10, names(as.IDate(as.POSIXct(c(a = "2019-01-01"), tz="America/New_York"))), "a")
21688+
test(2377.01, names(as.IDate(c(a = "2019-01-01"))), "a")
21689+
test(2377.02, names(c(a = as.IDate("2019-01-01"))), "a")
21690+
test(2377.03, names(as.ITime(c(a = "12:00:00"))), "a")
21691+
test(2377.04, names(as.IDate(structure(as.POSIXct("2019-01-01 12:00:00"), names = "a"))), "a")
21692+
test(2377.05, names(as.ITime(structure(3600, names = "a"))), "a")
21693+
test(2377.06, names(as.IDate(c(a = 18000))), "a")
21694+
test(2377.07, names(as.IDate(c(a = 1), origin = "2020-01-01")), "a")
21695+
test(2377.08, names(as.ITime(c(a = "12-00-00"), format = "%H-%M-%S")), "a")
21696+
test(2377.09, names(as.IDate(as.POSIXct(c(a = "2019-01-01"), tz="UTC"))), "a")
21697+
test(2377.10, names(as.IDate(as.POSIXct(c(a = "2019-01-01"), tz="America/New_York"))), "a")
21698+
test(2376.11, names(as.IDate(c(a = 1), origin = c(b = "2020-01-01"))), "b")

src/rbindlist.c

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg, SEXP ignor
250250
setAttrib(ans, R_NamesSymbol, ansNames);
251251
if (idcol) {
252252
SET_STRING_ELT(ansNames, 0, STRING_ELT(idcolArg, 0));
253-
SEXP idval, listNames=getAttrib(l, R_NamesSymbol);
253+
SEXP idval, listNames=PROTECT(getAttrib(l, R_NamesSymbol));
254254
if (length(listNames)) {
255255
SET_VECTOR_ELT(ans, 0, idval=allocVector(STRSXP, nrow));
256256
for (int i=0,ansloc=0; i<LENGTH(l); ++i) {
@@ -270,16 +270,19 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg, SEXP ignor
270270
for (int k=0; k<thisnrow; ++k) idvald[ansloc++] = i+1;
271271
}
272272
}
273+
UNPROTECT(1); // listNames
273274
}
274275

275-
SEXP coercedForFactor = NULL;
276+
PROTECT_INDEX IcoercedForFactor;
277+
SEXP coercedForFactor = R_NilValue;
278+
PROTECT_WITH_INDEX(coercedForFactor, &IcoercedForFactor); nprotect++;
276279
for(int j=0; j<ncol; ++j) {
277280
int maxType=LGLSXP; // initialize with LGLSXP for test 2002.3 which has col x NULL in both lists to be filled with NA for #1871
278281
bool factor=false, orderedFactor=false; // ordered factor is class c("ordered","factor"). isFactor() is true when isOrdered() is true.
279282
int longestLen=-1, longestW=-1, longestI=-1; // just for ordered factor; longestLen must be initialized as -1 so that rbind zero-length ordered factor could work #4795
280283
PROTECT_INDEX ILongestLevels;
281284
SEXP longestLevels=R_NilValue; // just for ordered factor
282-
PROTECT_WITH_INDEX(longestLevels, &ILongestLevels); nprotect++;
285+
PROTECT_WITH_INDEX(longestLevels, &ILongestLevels);
283286
bool int64=false, date=false, posixct=false, itime=false, asis=false;
284287
const char *foundName=NULL;
285288
bool anyNotStringOrFactor=false;
@@ -349,7 +352,7 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg, SEXP ignor
349352
if (factor && anyNotStringOrFactor) {
350353
// in future warn, or use list column instead ... warning(_("Column %d contains a factor but not all items for the column are character or factor"), idcol+j+1);
351354
// some coercing from (likely) integer/numeric to character will be needed. But this coerce can feasibly fail with out-of-memory, so we have to do it up-front
352-
if (coercedForFactor==NULL) { coercedForFactor=PROTECT(allocVector(VECSXP, LENGTH(l))); nprotect++; }
355+
if (coercedForFactor==R_NilValue) REPROTECT(coercedForFactor=allocVector(VECSXP, LENGTH(l)), IcoercedForFactor);
353356
for (int i=0; i<LENGTH(l); ++i) {
354357
SEXP li = VECTOR_ELT(l, i);
355358
int w = usenames ? colMap[i*ncol + j] : (j<length(li) ? j : -1); // check if j exceeds length for fill=TRUE and usenames=FALSE #5444
@@ -563,7 +566,8 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg, SEXP ignor
563566
ansloc += thisnrow;
564567
}
565568
}
569+
UNPROTECT(1); // longestLevels
566570
}
567-
UNPROTECT(nprotect); // ans, ansNames, longestLevels? coercedForFactor?
571+
UNPROTECT(nprotect); // ans, ansNames, coercedForFactor
568572
return(ans);
569573
}

0 commit comments

Comments
 (0)