diff --git a/NEWS.md b/NEWS.md index 62b19f5d33..093be68697 100644 --- a/NEWS.md +++ b/NEWS.md @@ -44,6 +44,8 @@ 12. Internal functions used to signal errors are now marked as non-returning, silencing a compiler warning about potentially unchecked allocation failure. Thanks to Prof. Brian D. Ripley for the report and @aitap for the fix, [#7070](https://github.com/Rdatatable/data.table/pull/7070). +13. In rare cases, `data.table` failed to expand ALTREP columns when assigning a full column by reference. This could result in the target column getting modified unintentionally if the next call to the data.table was a modification by reference of the source column. E.g. in `DT[, b := as.character(a)]` the string conversion gets deferred and subsequent modification of column `a` would also modify column `b`, [#5400](https://github.com/Rdatatable/data.table/issues/5400). Thanks to @aquasync for the report and Václav Tlapák for the PR. + ### NOTES 1. Continued work to remove non-API C functions, [#6180](https://github.com/Rdatatable/data.table/issues/6180). Thanks Ivan Krylov for the PRs and for writing a clear and concise guide about the R API: https://aitap.codeberg.page/R-api/. diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index a0175738c5..dcaa55bce5 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -2281,15 +2281,38 @@ test(753.1, DT[,c("x1","x2"):=4:6, verbose = TRUE], data.table(a=letters[1:3],x= output = "RHS for item 2 has been duplicated") test(753.2, DT[2,x2:=7L], data.table(a=letters[1:3],x=3:1,x1=4:6,x2=c(4L,7L,6L),key="a")) DT = data.table(a=letters[3:1],x=1:3,y=4:6) -test(754.1, DT[,c("x1","y1","x2"):=list(x,y)], error="Supplied 3 columns to be assigned 2 items. Please see NEWS for v1.12.2") -test(754.2, DT[,c("x1","y1","x2"):=list(x,y,x)], data.table(a=letters[3:1],x=1:3,y=4:6,x1=1:3,y1=4:6,x2=1:3)) +test(754.01, DT[,c("x1","y1","x2"):=list(x,y)], error="Supplied 3 columns to be assigned 2 items. Please see NEWS for v1.12.2") +test(754.02, DT[,c("x1","y1","x2"):=list(x,y,x)], data.table(a=letters[3:1],x=1:3,y=4:6,x1=1:3,y1=4:6,x2=1:3)) # And non-recycling i.e. that a single column copy does copy the column DT = data.table(a=1:3) -test(754.3, DT[,b:=a][1,a:=4L][2,b:=5L], data.table(a=INT(4,2,3),b=INT(1,5,3))) -test(754.4, DT[,b:=a][3,b:=6L], data.table(a=INT(4,2,3),b=INT(4,2,6))) -test(754.5, DT[,a:=as.character(a),verbose=TRUE], output="Direct plonk.*no copy") +test(754.03, DT[, b := a][1, a := 4L][2, b := 5L], data.table(a=INT(4,2,3),b=INT(1,5,3))) +test(754.04, DT[, b := a][3, b := 6L], data.table(a=INT(4,2,3),b=INT(4,2,6))) +test(754.05, DT[, a := as.numeric(a), verbose=TRUE], output="Direct plonk.*no copy") RHS = as.integer(DT$a) -test(754.6, DT[,a:=RHS,verbose=TRUE], output="RHS for item 1 has been duplicated") +test(754.06, DT[, a:= RHS, verbose=TRUE], output="RHS for item 1 has been duplicated") +# Expand ALTREPS in assign.c, #5400 +# String conversion gets deferred +## first, a regression test of R itself -- we want to make sure our own test continues to be useful & testing its intended purpose +test(754.07, {a = 1:10; .Internal(inspect(a)); b = as.character(a); .Internal(inspect(b))}, output = "\\bcompact\\b.*\\bdeferred string conversion\\b") +test(754.08, DT[, a := as.character(a), verbose=TRUE], output="RHS for item 1 has been duplicated") +# Executing the code inside of test expands the ALTREP so we repeat the code +# in order to check the result after a further assignment +DT = data.table(a=1:3) +DT[, b := as.character(a)] +DT[, a := 5L] +test(754.09, DT, data.table(a=5L, b=as.character(1:3))) +# This function returns an ALTREP wrapper if the input is at least length 64 +testFun = function(x) { + x[FALSE] = 1 + x +} +DT = data.table(id=1:64, col1=0, col2=0) +test(754.10, DT[, col1 := testFun(col2), verbose = TRUE], output="RHS for item 1 has been duplicated") +DT = data.table(id=1:64, col1=0, col2=0) +DT[, col1 := testFun(col2)] +DT[, col2 := 999] +test(754.11, DT, data.table(id=1:64, col1=0, col2=999)) +rm(testFun) # Used to test warning on redundant by (#2282) but by=.EACHI has now superseded DT = data.table(a=letters[1:3],b=rep(c("d","e"),each=3),x=1:6,key=c('a', 'b')) diff --git a/src/assign.c b/src/assign.c index 9f133d6f88..b7e13aa4fe 100644 --- a/src/assign.c +++ b/src/assign.c @@ -550,11 +550,12 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values) if (length(rows)==0 && targetlen==vlen && (vlen>0 || nrow==0)) { if ( MAYBE_SHARED(thisvalue) || // set() protects the NAMED of atomic vectors from .Call setting arguments to 2 by wrapping with list (TYPEOF(values)==VECSXP && i>LENGTH(values)-1) || // recycled RHS would have columns pointing to others, #185. - (TYPEOF(values)!=VECSXP && i>0) // assigning the same values to a second column. Have to ensure a copy #2540 + (TYPEOF(values)!=VECSXP && i>0) || // assigning the same values to a second column. Have to ensure a copy #2540 + ALTREP(thisvalue) // Some ALTREP wrappers have survived to here, e.g. #5400 ) { if (verbose) { - Rprintf(_("RHS for item %d has been duplicated because MAYBE_REFERENCED==%d MAYBE_SHARED==%d, but then is being plonked. length(values)==%d; length(cols)==%d\n"), - i+1, MAYBE_REFERENCED(thisvalue), MAYBE_SHARED(thisvalue), length(values), length(cols)); + Rprintf(_("RHS for item %d has been duplicated because MAYBE_REFERENCED==%d MAYBE_SHARED==%d ALTREP==%d, but then is being plonked. length(values)==%d; length(cols)==%d\n"), + i+1, MAYBE_REFERENCED(thisvalue), MAYBE_SHARED(thisvalue), ALTREP(thisvalue), length(values), length(cols)); } thisvalue = copyAsPlain(thisvalue); // PROTECT not needed as assigned as element to protected list below. } else {