Rdatatable · MichaelChirico · Jul 14, 2025 · Jan 12, 2024 · Jan 13, 2024 · Jan 13, 2024
diff --git a/.dev/CRAN_Release.cmd b/.dev/CRAN_Release.cmd
@@ -195,15 +195,15 @@ R CMD build .
 export GITHUB_PAT="f1c.. github personal access token ..7ad"
 # avoids many too-many-requests in --as-cran's ping-all-URLs step (20 mins) inside the `checking CRAN incoming feasibility...` step.
 # Many thanks to Dirk for the tipoff that setting this env variable solves the problem, #4832.
-R CMD check data.table_1.14.99.tar.gz --as-cran
-R CMD INSTALL data.table_1.14.99.tar.gz --html
+R CMD check data.table_1.15.99.tar.gz --as-cran
+R CMD INSTALL data.table_1.15.99.tar.gz --html
 
 # Test C locale doesn't break test suite (#2771)
 echo LC_ALL=C > ~/.Renviron
 R
 Sys.getlocale()=="C"
 q("no")
-R CMD check data.table_1.14.99.tar.gz
+R CMD check data.table_1.15.99.tar.gz
 rm ~/.Renviron
 
 # Test non-English does not break test.data.table() due to translation of messages; #3039, #630
@@ -220,9 +220,9 @@ q("no")
 
 # User supplied PKG_CFLAGS and PKG_LIBS passed through, #4664
 # Next line from https://mac.r-project.org/openmp/. Should see the arguments passed through and then fail with gcc on linux.
-PKG_CFLAGS='-Xclang -fopenmp' PKG_LIBS=-lomp R CMD INSTALL data.table_1.14.99.tar.gz
+PKG_CFLAGS='-Xclang -fopenmp' PKG_LIBS=-lomp R CMD INSTALL data.table_1.15.99.tar.gz
 # Next line should work on Linux, just using superfluous and duplicate but valid parameters here to see them retained and work 
-PKG_CFLAGS='-fopenmp' PKG_LIBS=-lz R CMD INSTALL data.table_1.14.99.tar.gz
+PKG_CFLAGS='-fopenmp' PKG_LIBS=-lz R CMD INSTALL data.table_1.15.99.tar.gz
 
 R
 remove.packages("xml2")    # we checked the URLs; don't need to do it again (many minutes)
@@ -266,7 +266,7 @@ alias R310=~/build/R-3.1.0/bin/R
 ### END ONE TIME BUILD
 
 cd ~/GitHub/data.table
-R310 CMD INSTALL ./data.table_1.14.99.tar.gz
+R310 CMD INSTALL ./data.table_1.15.99.tar.gz
 R310
 require(data.table)
 test.data.table(script="*.Rraw")
@@ -278,15 +278,15 @@ test.data.table(script="*.Rraw")
 vi ~/.R/Makevars
 # Make line SHLIB_OPENMP_CFLAGS= active to remove -fopenmp
 R CMD build .
-R CMD INSTALL data.table_1.14.99.tar.gz   # ensure that -fopenmp is missing and there are no warnings
+R CMD INSTALL data.table_1.15.99.tar.gz   # ensure that -fopenmp is missing and there are no warnings
 R
 require(data.table)   # observe startup message about no OpenMP detected
 test.data.table()
 q("no")
 vi ~/.R/Makevars
 # revert change above
 R CMD build .
-R CMD check data.table_1.14.99.tar.gz
+R CMD check data.table_1.15.99.tar.gz
 
 
 #####################################################
@@ -341,11 +341,11 @@ alias Rdevel-strict-gcc='~/build/R-devel-strict-gcc/bin/R --vanilla'
 alias Rdevel-strict-clang='~/build/R-devel-strict-clang/bin/R --vanilla'
 
 cd ~/GitHub/data.table
-Rdevel-strict-[gcc|clang] CMD INSTALL data.table_1.14.99.tar.gz
+Rdevel-strict-[gcc|clang] CMD INSTALL data.table_1.15.99.tar.gz
 # Check UBSAN and ASAN flags appear in compiler output above. Rdevel was compiled with them so they should be
 # passed through to here. However, our configure script seems to get in the way and gets them from {R_HOME}/bin/R
 # So I needed to edit my ~/.R/Makevars to get CFLAGS the way I needed.
-Rdevel-strict-[gcc|clang] CMD check data.table_1.14.99.tar.gz
+Rdevel-strict-[gcc|clang] CMD check data.table_1.15.99.tar.gz
 # Use the (failed) output to get the list of currently needed packages and install them
 Rdevel-strict-[gcc|clang]
 isTRUE(.Machine$sizeof.longdouble==0)  # check noLD is being tested
@@ -354,7 +354,7 @@ install.packages(c("bit64", "bit", "R.utils", "xts", "zoo", "yaml", "knitr", "ma
                  Ncpus=4)
 # Issue #5491 showed that CRAN is running UBSAN on .Rd examples which found an error so we now run full R CMD check
 q("no")
-Rdevel-strict-[gcc|clang] CMD check data.table_1.14.99.tar.gz
+Rdevel-strict-[gcc|clang] CMD check data.table_1.15.99.tar.gz
 # UBSAN errors occur on stderr and don't affect R CMD check result. Made many failed attempts to capture them. So grep for them. 
 find data.table.Rcheck -name "*Rout*" -exec grep -H "runtime error" {} \;
 
@@ -391,7 +391,7 @@ cd R-devel-valgrind
 make
 cd ~/GitHub/data.table
 vi ~/.R/Makevars  # make the -O2 -g line active, for info on source lines with any problems
-Rdevel-valgrind CMD INSTALL data.table_1.14.99.tar.gz
+Rdevel-valgrind CMD INSTALL data.table_1.15.99.tar.gz
 R_DONT_USE_TK=true Rdevel-valgrind -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --show-leak-kinds=definite,possible --gen-suppressions=all --suppressions=./.dev/valgrind.supp -s"
 # the default for --show-leak-kinds is 'definite,possible' which we're setting explicitly here as a reminder. CRAN uses the default too.
 #   including 'reachable' (as 'all' does) generates too much output from R itself about by-design permanent blocks
@@ -429,7 +429,7 @@ cd ~/build/rchk/trunk
 . ../scripts/config.inc
 . ../scripts/cmpconfig.inc
 vi ~/.R/Makevars   # set CFLAGS=-O0 -g so that rchk can provide source line numbers
-echo 'install.packages("~/GitHub/data.table/data.table_1.14.99.tar.gz",repos=NULL)' | ./bin/R --slave
+echo 'install.packages("~/GitHub/data.table/data.table_1.15.99.tar.gz",repos=NULL)' | ./bin/R --slave
 # objcopy warnings (if any) can be ignored: https://github.com/kalibera/rchk/issues/17#issuecomment-497312504
 . ../scripts/check_package.sh data.table
 cat packages/lib/data.table/libs/*check
@@ -622,10 +622,10 @@ bunzip2 inst/tests/*.Rraw.bz2  # decompress *.Rraw again so as not to commit com
 # 3. Add new heading in NEWS for the next dev version. Add "(submitted to CRAN on <today>)" on the released heading.
 # 4. Bump minor version in dllVersion() in init.c
 # 5. Bump 3 minor version numbers in Makefile
-# 6. Search and replace this .dev/CRAN_Release.cmd to update 1.14.99 to 1.15.99 inc below, 1.15.0 to 1.16.0 above, 1.14.0 to 1.15.0 below
+# 6. Search and replace this .dev/CRAN_Release.cmd to update 1.15.99 to 1.16.99 inc below, 1.16.0 to 1.17.0 above, 1.15.0 to 1.16.0 below
 # 7. Another final gd to view all diffs using meld. (I have `alias gd='git difftool &> /dev/null'` and difftool meld: http://meldmerge.org/)
-# 8. Push to master with this consistent commit message: "1.15.0 on CRAN. Bump to 1.14.10"
-# 9. Take sha from step 8 and run `git tag 1.15.0 96c..sha..d77` then `git push origin 1.15.0` (not `git push --tags` according to https://stackoverflow.com/a/5195913/403310)
+# 8. Push to master with this consistent commit message: "1.16.0 on CRAN. Bump to 1.16.99"
+# 9. Take sha from step 8 and run `git tag 1.16.0 96c..sha..d77` then `git push origin 1.16.0` (not `git push --tags` according to https://stackoverflow.com/a/5195913/403310)
 ######
 
 ###### Bump dev for PATCH RELEASE

diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -3,12 +3,12 @@
 on:
   push:
     branches:
-      - main
       - master
+      - 1-15-99
   pull_request:
     branches:
-      - main
       - master
+      - 1-15-99
 
 name: R-CMD-check
 

diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
@@ -1,12 +1,12 @@
 on:
   push:
     branches:
-      - main
       - master
+      - 1-15-99
   pull_request:
     branches:
-      - main
       - master
+      - 1-15-99
 
 name: test-coverage
 

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: data.table
-Version: 1.14.99
+Version: 1.15.99
 Title: Extension of `data.frame`
-Depends: R (>= 3.1.0)
+Depends: R (>= 3.2.0)
 Imports: methods
 Suggests: bit64 (>= 4.0.0), bit (>= 4.0.4), R.utils, xts, zoo (>= 1.8-1), yaml, knitr, markdown
 Description: Fast aggregation of large data (e.g. 100GB in RAM), fast ordered joins, fast add/modify/delete of columns by group using no copies at all, list columns, friendly and fast character-separated-value read/write. Offers a natural and flexible syntax, for faster development.

diff --git a/Makefile b/Makefile
@@ -18,7 +18,7 @@ some:
 
 .PHONY: clean
 clean:
-	$(RM) data.table_1.14.99.tar.gz
+	$(RM) data.table_1.15.99.tar.gz
 	$(RM) src/*.o
 	$(RM) src/*.so
 
@@ -28,7 +28,7 @@ build:
 
 .PHONY: install
 install:
-	$(R) CMD INSTALL data.table_1.14.99.tar.gz
+	$(R) CMD INSTALL data.table_1.15.99.tar.gz
 
 .PHONY: uninstall
 uninstall:
@@ -40,7 +40,7 @@ test:
 
 .PHONY: check
 check:
-	_R_CHECK_CRAN_INCOMING_REMOTE_=false $(R) CMD check data.table_1.14.99.tar.gz --as-cran --ignore-vignettes --no-stop-on-test-error
+	_R_CHECK_CRAN_INCOMING_REMOTE_=false $(R) CMD check data.table_1.15.99.tar.gz --as-cran --ignore-vignettes --no-stop-on-test-error
 
 .PHONY: revision
 revision:

@@ -51,6 +51,7 @@ S3method(cube, data.table)
 S3method(rollup, data.table)
 export(frollmean)
 export(frollsum)
+export(frollmax)
 export(frollapply)
 export(nafill)
 export(setnafill)
@@ -202,3 +203,4 @@ S3method(format_list_item, default)
 
 export(fdroplevels)
 S3method(droplevels, data.table)
+export(frev)
@@ -1,5 +1,31 @@
 **If you are viewing this file on CRAN, please check [latest news on GitHub](https://github.com/Rdatatable/data.table/blob/master/NEWS.md) where the formatting is also better.**
 
+# data.table [v1.15.99]() (in development)
+
+## NEW FEATURES
+
+1. `print.data.table()` shows empty (`NULL`) list column entries as `[NULL]` for emphasis. Previously they would just print nothing (same as for empty string). Part of [#4198](https://github.com/Rdatatable/data.table/issues/4198). Thanks @sritchie73 for the proposal and fix.
+
+    ```R
+    data.table(a=list(NULL, ""))
+    #         a
+    #    <list>
+    # 1: [NULL]
+    # 2:
+    ```
+
+2. New window function `frollmax` computes the rolling maximum. Request came from @gpierard who needs left-aligned, adaptive, rolling max, [#5438](https://github.com/Rdatatable/data.table/issues/5438). Adaptive rolling functions did not have support for `align="left"`, therefore we added this feature as well for all adaptive rolling functions. We measure adaptive `frollmax` to be up to 50 times faster than the next fastest solution using `max` and grouping `by=.EACHI`.
+
+## NOTES
+
+1. `transform` method for data.table sped up substantially when creating new columns on large tables. Thanks to @OfekShilon for the report and PR. The implemented solution was proposed by @ColeMiller1.
+
+2. The documentation for the `fill` argument in `rbind()` and `rbindlist()` now notes the expected behaviour for missing `list` columns when `fill=TRUE`, namely to use `NULL` (not `NA`), [#4198](https://github.com/Rdatatable/data.table/pull/4198). Thanks @sritchie73 for the proposal and fix.
+
+3. data.table now depends on R 3.2.0 (2015) instead of 3.1.0 (2014). 1.17.0 will likely move to R 3.3.0 (2016). Recent versions of R have good features that we would gradually like to incorporate, and we see next to no usage of these very old versions of R.
+
+4. Erroneous assignment calls in `[` with a trailing comma (e.g. ``DT[, `:=`(a = 1, b = 2,)]``) get a friendlier error since this situation is common during refactoring and easy to miss visually. Thanks @MichaelChirico for the fix.
+
 # data.table [v1.14.99](https://github.com/Rdatatable/data.table/milestone/29)  (in development)
 
 ## BREAKING CHANGE
@@ -293,6 +319,24 @@
 
 41. `tables()` is faster by default by excluding the size of character strings in R's global cache (which may be shared) and excluding the size of list column items (which also may be shared). `mb=` now accepts any function which accepts a `data.table` and returns a higher and better estimate of its size in bytes, albeit more slowly; e.g. `mb = utils::object.size`.
 
+42. `base::rev` gains a fast method `frev(x, copy)` for atomic vectors/list, [#5885](https://github.com/Rdatatable/data.table/issues/5885). Thanks to Benjamin Schwendinger for suggesting and implementing.
+
+    ```R
+    x = sample(2e8)
+    microbenchmark::microbenchmark(
+      base = rev(x),
+      frev_copy = frev(x, copy=TRUE),
+      frev_inplace = frev(x, copy=FALSE),
+      times = 10L,
+      unit = "s"
+    )
+    # Unit: seconds
+    #          expr   min    lq  mean median   uq   max neval cld
+    #          base 1.376 1.397 1.864 1.544 1.917 4.274    10 a  
+    #     frev_copy 0.529 0.591 0.769 0.659 0.727 1.351    10  b 
+    #  frev_inplace 0.064 0.065 0.066 0.066 0.067 0.070    10   c
+    ```
+
 ## BUG FIXES
 
 1. `by=.EACHI` when `i` is keyed but `on=` different columns than `i`'s key could create an invalidly keyed result, [#4603](https://github.com/Rdatatable/data.table/issues/4603) [#4911](https://github.com/Rdatatable/data.table/issues/4911). Thanks to @myoung3 and @adamaltmejd for reporting, and @ColeMiller1 for the PR. An invalid key is where a `data.table` is marked as sorted by the key columns but the data is not sorted by those columns, leading to incorrect results from subsequent queries.
@@ -560,6 +604,8 @@
 
 55. `fread(URL)` with `https:` and `ftps:` could timeout if proxy settings were not guessed right by `curl::curl_download`, [#1686](https://github.com/Rdatatable/data.table/issues/1686). `fread(URL)` now uses `download.file()` as default for downloading files from urls. Thanks to @cderv for the report and Benjamin Schwendinger for the fix.
 
+56. `split.data.table()` works for downstream methods that don't implement `DT[i]` form (i.e., requiring `DT[i, j]` form, like plain `data.frame`s), for example `sf`'s `[.sf`, [#5365](https://github.com/Rdatatable/data.table/issues/5365). Thanks @barryrowlingson for the report and @michaelchirico for the fix.
+
 ## NOTES
 
 1. New feature 29 in v1.12.4 (Oct 2019) introduced zero-copy coercion. Our thinking is that requiring you to get the type right in the case of `0` (type double) vs `0L` (type integer) is too inconvenient for you the user. So such coercions happen in `data.table` automatically without warning. Thanks to zero-copy coercion there is no speed penalty, even when calling `set()` many times in a loop, so there's no speed penalty to warn you about either. However, we believe that assigning a character value such as `"2"` into an integer column is more likely to be a user mistake that you would like to be warned about. The type difference (character vs integer) may be the only clue that you have selected the wrong column, or typed the wrong variable to be assigned to that column. For this reason we view character to numeric-like coercion differently and will warn about it. If it is correct, then the warning is intended to nudge you to wrap the RHS with `as.<type>()` so that it is clear to readers of your code that a coercion from character to that type is intended. For example :

diff --git a/R/cedta.R b/R/cedta.R
@@ -22,6 +22,19 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow
 #   package authors could set it using assignInNamespace and then not revert its value properly which would
 #   cause subsequent calls from other packages to fail.
 
+# nocov start: very hard to reach this within our test suite -- the call stack within a call generated by e.g. knitr
+# for loop, not any(vapply_1b(.)), to allow early exit
+.any_eval_calls_in_stack <- function() {
+  calls = sys.calls()
+  # likelier to be close to the end of the call stack, right?
+  for (ii in length(calls):1) { # rev(seq_len(length(calls)))? See https://bugs.r-project.org/show_bug.cgi?id=18406.
+    the_call <- calls[[ii]][[1L]]
+    if (is.name(the_call) && (the_call %chin% c("eval", "evalq"))) return(TRUE)
+  }
+  return(FALSE)
+}
+# nocov end
+
 # cedta = Calling Environment Data.Table-Aware
 cedta = function(n=2L) {
   # Calling Environment Data Table Aware
@@ -41,7 +54,7 @@ cedta = function(n=2L) {
       (exists("debugger.look", parent.frame(n+1L)) ||
       (length(sc<-sys.calls())>=8L && sc[[length(sc)-7L]] %iscall% 'example')) ) || # 'example' for #2972
     (nsname=="base" && all(c("FUN", "X") %chin% ls(parent.frame(n)))) || # lapply
-    (nsname %chin% cedta.pkgEvalsUserCode && any(vapply_1b(sys.calls(), function(x) is.name(x[[1L]]) && (x[[1L]]=="eval" || x[[1L]]=="evalq")))) ||
+    (nsname %chin% cedta.pkgEvalsUserCode && .any_eval_calls_in_stack()) ||
     nsname %chin% cedta.override ||
     isTRUE(ns$.datatable.aware) ||  # As of Sep 2018: RCAS, caretEnsemble, dtplyr, rstanarm, rbokeh, CEMiTool, rqdatatable, RImmPort, BPRMeth, rlist
     tryCatch("data.table" %chin% get(".Depends",paste("package",nsname,sep=":"),inherits=FALSE),error=function(e)FALSE)  # both ns$.Depends and get(.Depends,ns) are not sufficient

diff --git a/R/data.table.R b/R/data.table.R
@@ -1128,7 +1128,16 @@ replace_dot_alias = function(e) {
         } else {
           # `:=`(c2=1L,c3=2L,...)
           lhs = names(jsub)[-1L]
-          if (any(lhs=="")) stopf("In %s(col1=val1, col2=val2, ...) form, all arguments must be named.", if (root == "let") "let" else "`:=`")
+          if (!all(named_idx <- nzchar(lhs))) {
+            # friendly error for common case: trailing terminal comma
+            n_lhs = length(lhs)
+            root_name <- if (root == "let") "let" else "`:=`"
+            if (!named_idx[n_lhs] && all(named_idx[-n_lhs])) {
+              stopf("In %s(col1=val1, col2=val2, ...) form, all arguments must be named, but the last argument has no name. Did you forget a trailing comma?", root_name)
+            } else {
+              stopf("In %s(col1=val1, col2=val2, ...) form, all arguments must be named, but these arguments lack names: %s.", root_name, brackify(which(!named_idx)))
+            }
+          }
           names(jsub)=""
           jsub[[1L]]=as.name("list")
         }
@@ -2345,25 +2354,10 @@ transform.data.table = function (`_data`, ...)
 # basically transform.data.frame with data.table instead of data.frame, and retains key
 {
   if (!cedta()) return(NextMethod()) # nocov
-  e = eval(substitute(list(...)), `_data`, parent.frame())
-  tags = names(e)
-  inx = chmatch(tags, names(`_data`))
-  matched = !is.na(inx)
-  if (any(matched)) {
-    .Call(C_unlock, `_data`) # fix for #1641, now covered by test 104.2
-    `_data`[,inx[matched]] = e[matched]
-    `_data` = as.data.table(`_data`)
-  }
-  if (!all(matched)) {
-    ans = do.call("data.table", c(list(`_data`), e[!matched]))
-  } else {
-    ans = `_data`
-  }
-  key.cols = key(`_data`)
-  if (!any(tags %chin% key.cols)) {
-    setattr(ans, "sorted", key.cols)
-  }
-  ans
+  `_data` = copy(`_data`)
+  e = eval(substitute(list(...)), `_data`, parent.frame()) 
+  set(`_data`, ,names(e), e)
+  `_data`
 }
 
 subset.data.table = function (x, subset, select, ...)
@@ -2469,7 +2463,8 @@ split.data.table = function(x, f, drop = FALSE, by, sorted = FALSE, keep.by = TR
     if (!missing(by))
       stopf("passing 'f' argument together with 'by' is not allowed, use 'by' when split by column in data.table and 'f' when split by external factor")
     # same as split.data.frame - handling all exceptions, factor orders etc, in a single stream of processing was a nightmare in factor and drop consistency
-    return(lapply(split(x = seq_len(nrow(x)), f = f, drop = drop, ...), function(ind) x[ind]))
+    # be sure to use x[ind, , drop = FALSE], not x[ind], in case downstream methods don't follow the same subsetting semantics (#5365)
+    return(lapply(split(x = seq_len(nrow(x)), f = f, drop = drop, ...), function(ind) x[ind, , drop = FALSE]))
   }
   if (missing(by)) stopf("Either 'by' or 'f' argument must be supplied")
   # check reserved column names during processing