Merge branch 'master' into fcoalesce_types

ben-schwen · ben-schwen · commit a1b10a52921f · 2026-04-28T09:21:01.000+02:00
diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R
@@ -120,6 +120,13 @@ test.list <- atime::atime_test_list(
       file.path("src", "init.c"),
       paste0("R_init_", Package_regex),
       paste0("R_init_", gsub("[.]", "_", new.Package_)))
+    # require C<23 for empty prototype declarations to work, #7689
+    descfile = file.path(new.pkg.path, "DESCRIPTION")
+    desc = as.data.frame(read.dcf(descfile))
+    desc$SystemRequirements = paste(
+      c(desc$SystemRequirements, "USE_C99"),
+      collapse = "; ")
+    write.dcf(desc, descfile)
     # allow compilation on new R versions where 'Calloc' is not defined
     pkg_find_replace(
       file.path("src", "*.c"),
@@ -199,10 +206,11 @@ test.list <- atime::atime_test_list(
         v2 = sample(5L, N, TRUE)
       )
     },
-    expr = data.table:::`[.data.table`(d, , max(v1) - min(v2), by = id),
+    PR7401="0216983c51e03e3f61d5e6f08f4ba0c42cceb22c", # Merge commit (https://github.com/Rdatatable/data.table/commit/0216983c51e03e3f61d5e6f08f4ba0c42cceb22c) of a PR (https://github.com/Rdatatable/data.table/pull/7401) which increased speed and memory usage of this test (https://github.com/Rdatatable/data.table/issues/7687)
     Before = "7a9eaf62ede487625200981018d8692be8c6f134", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/515de90a6068911a148e54343a3503043b8bb87c) in the PR (https://github.com/Rdatatable/data.table/pull/4164/commits) that introduced the regression
     Regression = "c152ced0e5799acee1589910c69c1a2c6586b95d", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/15f0598b9828d3af2eb8ddc9b38e0356f42afe4f) in the PR (https://github.com/Rdatatable/data.table/pull/4558/commits) that fixes the regression
-    Fixed = "f750448a2efcd258b3aba57136ee6a95ce56b302"), # Second commit of the PR (https://github.com/Rdatatable/data.table/pull/4558/commits) that fixes the regression
+    Fixed = "f750448a2efcd258b3aba57136ee6a95ce56b302", # Second commit of the PR (https://github.com/Rdatatable/data.table/pull/4558/commits) that fixes the regression
+    expr = data.table:::`[.data.table`(d, , max(v1) - min(v2), by = id)),
 
   # Issue with sorting again when already sorted, as reported in https://github.com/Rdatatable/data.table/issues/4498
   # Test case adapted from https://github.com/Rdatatable/data.table/pull/4501#issue-625311918 which is the fix PR.
diff --git a/.github/ISSUE_TEMPLATE/revdep_check_failure.yml b/.github/ISSUE_TEMPLATE/revdep_check_failure.yml
@@ -0,0 +1,55 @@
+name: Revdep check failure
+description: Report a reverse dependency (revdep) check failure that should be fixed before next CRAN release
+title: "PACKAGE check TYPE fails after PR_DESCRIPTION"
+labels: ["revdep"]
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Use this template to report a new revdep check failure found via
+        [revdep checks](https://github.com/Rdatatable/data.table/wiki/Revdep-checks).
+        Please verify the failure is real before filing; see checklist on that page.
+
+  - type: input
+    id: package
+    attributes:
+      label: Affected package
+      description: Link to package dev on github.
+      placeholder: e.g. https://github.com/NorskRegnesentral/shapr
+    validations:
+      required: true
+
+  - type: textarea
+    id: check-output
+    attributes:
+      label: Failing check output
+      description: |
+        Paste the relevant `R CMD check` output showing the failure.
+        The output can be found in the
+        [Monsoon results](https://rcdata.nau.edu/genomic-ml/data.table-revdeps/analyze/)
+        or from a local revdep check.
+      render: text
+    validations:
+      required: true
+
+  - type: input
+    id: first-bad-commit
+    attributes:
+      label: First bad commit/PR
+      description: |
+        Link to the commit or PR identified by `git bisect` as the
+        first bad commit (from the `first.bad.commit` column in the
+        Monsoon significant differences table).
+      placeholder: "https://github.com/Rdatatable/data.table/pull/1234"
+    validations:
+      required: true
+
+  - type: textarea
+    id: additional-context
+    attributes:
+      label: Additional context
+      description: |
+        Any other relevant information: @mentions of the commit/PR
+        author(s), links to Monsoon result pages, whether the fix
+        should come from data.table or from the revdep package, etc.
+        Minimal reproducible examples (MRE) can also be included here.
diff --git a/.github/workflows/pkgup.yaml b/.github/workflows/pkgup.yaml
@@ -58,10 +58,10 @@ jobs:
           Rscript -e 'tools::write_PACKAGES("public/src/contrib", fields="Revision")'
       - name: upload
         if: github.ref == 'refs/heads/master'
-        uses: actions/upload-pages-artifact@v4
+        uses: actions/upload-pages-artifact@v5
         with:
           path: "public"
       - name: deploy
         if: github.ref == 'refs/heads/master'
         id: deployment
-        uses: actions/deploy-pages@v4
+        uses: actions/deploy-pages@v5
diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
@@ -51,7 +51,7 @@ jobs:
           covr::to_cobertura(cov)
         shell: Rscript {0}
 
-      - uses: codecov/codecov-action@v5
+      - uses: codecov/codecov-action@v6
         with:
           fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }}
           files: ./cobertura.xml
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -15,7 +15,7 @@ variables:
   RUN_ALL_DATATABLE_TESTS: "yes"  ## run optional tests in CI
   R_REL_VERSION:  "4.5" # only raise when RTOOLS for REL is available
   R_REL_WIN_BIN:  "https://cloud.r-project.org/bin/windows/base/old/4.5.0/R-4.5.0-win.exe"
-  R_DEV_VERSION:  "4.6"
+  R_DEV_VERSION:  "4.7"
   R_DEV_WIN_BIN:  "https://cloud.r-project.org/bin/windows/base/R-devel-win.exe"
   R_OLD_VERSION:  "4.4"
   R_OLD_WIN_BIN:  "https://cloud.r-project.org/bin/windows/base/old/4.4.3/R-4.4.3-win.exe"
diff --git a/NEWS.md b/NEWS.md
@@ -30,6 +30,8 @@
 
 5. `tables()` can now optionally report `data.table` objects stored one level deep inside list objects when `depth=1L`, [#2606](https://github.com/Rdatatable/data.table/issues/2606). Thanks @MichaelChirico for the report and @manmita for the PR
 
+6. `yearqtr()` and `yearmon()` now gain an optional format specifier [#7694](https://github.com/Rdatatable/data.table/issues/7694). 'numeric' is the default, which preserves the original behavior, but 'character' formats `yearqtr()` as YYYYQ# (e.g. 2025Q2) and `yearmon()` as YYYYM## (e.g. 2025M02, 2025M10). Thanks to @jan-swissre for the report and @LunaticSage218 for the implementation.
+
 ### BUG FIXES
 
 1. `fread()` with `skip=0` and `(header=TRUE|FALSE)` no longer skips the first row when it has fewer fields than subsequent rows, [#7463](https://github.com/Rdatatable/data.table/issues/7463). Thanks @emayerhofer for the report and @ben-schwen for the fix.
@@ -48,7 +50,11 @@
 
 8. `frollapply()` no longer produces output longer than the input when the window length is also longer than the input [#7646](https://github.com/Rdatatable/data.table/issues/7646). Thanks to @hadley-johnson for reporting and @jangorecki for the fix.
 
-9. `fcoalesce()` and `setcoalesce()` could fail for inputs during implicit type coercions when items had different but still compatible underlying storage types (e.g., `Date` and `IDate`), #7545 (https://github.com/Rdatatable/data.table/issues/7545). This was particularly unexpected because `Date` objects may be stored as either integer or double. Thanks to @ethanbsmith for the report and @ben-schwen for the fix.
+9. `fread()` no longer replaces a literal header column name `"NA"` with an auto-generated `Vn` name when `na.strings` includes `"NA"`, [#5124](https://github.com/Rdatatable/data.table/issues/5124). Data rows still continue to parse `"NA"` as missing. Thanks @Mashin6 for the report and @shrektan for the fix.
+
+10. `fread()` no longer misreads dates with negative years, [#7704](https://github.com/Rdatatable/data.table/issues/7704). Thanks to @kevinushey for the report and @aitap for the fix.
+
+11. `fcoalesce()` and `setcoalesce()` could fail for inputs during implicit type coercions when items had different but still compatible underlying storage types (e.g., `Date` and `IDate`), #7545 (https://github.com/Rdatatable/data.table/issues/7545). This was particularly unexpected because `Date` objects may be stored as either integer or double. Thanks to @ethanbsmith for the report and @ben-schwen for the fix.
 
 ### Notes
 
@@ -120,15 +126,15 @@
 
 5. Negative and missing values of `n` argument of adaptive rolling functions trigger an error.
 
-### NOTICE OF INTENDED FUTURE POTENTIAL BREAKING CHANGES 
+### NOTICE OF INTENDED FUTURE POTENTIAL BREAKING CHANGES
 
 1. `data.table(x=1, <expr>)`, where `<expr>` is an expression resulting in a 1-column matrix without column names, will eventually have names `x` and `V2`, not `x` and `V1`, consistent with `data.table(x=1, <expr>)` where `<expr>` results in an atomic vector, for example `data.table(x=1, cbind(1))` and `data.table(x=1, 1)` will both have columns named `x` and `V2`. In this release, the matrix case continues to be named `V1`, but the new behavior can be activated by setting `options(datatable.old.matrix.autoname)` to `FALSE`. See point 5 under Bug Fixes for more context; this change will provide more internal consistency as well as more consistency with `data.frame()`.
 
 2. The behavior of `week()` will be changed in a future release to calculate weeks sequentially (days 1-7 as week 1), which is a potential breaking change. For now, the current "legacy" behavior, where week numbers advance every 7th day of the year (e.g., day 7 starts week 2), remains the default, and a deprecation warning will be issued when the old and new behaviors differ. Users can control this behavior with the temporary option `options(datatable.week = "...")`:
     *   `"sequential"`: Opt-in to the new, sequential behavior (no warning).
     *   `"legacy"`: Continue using the legacy behavior but suppress the deprecation warning.
 See [#2611](https://github.com/Rdatatable/data.table/issues/2611) for details. Thanks @MichaelChirico for the report and @venom1204 for the implementation.
-    
+
 ### NEW FEATURES
 
 1. New `sort_by()` method for data.tables, [#6662](https://github.com/Rdatatable/data.table/issues/6662). It uses `forder()` to improve upon the data.frame method and also matches `DT[order(...)]` behavior with respect to locale. Thanks @rikivillalba for the suggestion and PR.
@@ -407,7 +413,7 @@ See [#2611](https://github.com/Rdatatable/data.table/issues/2611) for details. T
 9. Fixed incorrect sorting of merges where the first column of a key is a factor with non-`sort()`-ed levels (e.g. `factor(1:2, 2:1)` and it is joined to a character column, [#5361](https://github.com/Rdatatable/data.table/issues/5361). Thanks to @gbrunick for the report, Benjamin Schwendinger for the fix, and @MichaelChirico for a follow-up fix caught by revdep testing.
 
 10. Spurious warnings from internal code in `cube()`, `rollup()`, and `groupingsets()` are no longer surfaced to the caller, [#6964](https://github.com/Rdatatable/data.table/issues/6964). Thanks @ferenci-tamas for the report and @venom1204 for the fix.
- 
+
 11. `droplevels()` works on 0-row data.tables, [#7043](https://github.com/Rdatatable/data.table/issues/7043). The result will have factor columns `factor(character())`, consistent with the data.frame method. Thanks @advieser for the report and @MichaelChirico for the fix.
 
 12. `print(..., col.names = 'none')` now correctly adapts column widths to the data content, ignoring the original column names and producing a more compact output, [#6882](https://github.com/Rdatatable/data.table/issues/6882). Thanks to @brooksambrose for the report and @venom1204 for the PR.
@@ -589,7 +595,7 @@ rowwiseDT(
 3. Tagging/naming arguments of `c()` in `j=c()` should now more closely follow base R conventions for concatenation of named lists during grouping, [#2311](https://github.com/Rdatatable/data.table/issues/2311). Naming an `lapply(.SD, FUN)` call as an argument of `c()` in `j` will now always cause that tag to get prepended (with a single dot separator) to the resulting column names. Additionally, naming a `list()` call as an argument of `c()` in `j` will now always cause that tag to get prepended to any names specified within the list call. This bug only affected queries with (1) `by=` grouping (2) `getOption("datatable.optimize") >= 1L` and (3) `lapply(.SD, FUN)` in `j`.
 
     While the names returned by `data.table` when `j=c()` will now mostly follow base R conventions for concatenating lists, note that names which are completely unspecified will still be named positionally, matching the typical behavior in `j` and `data.table()`. according to position in `j` (e.g. `V1`, `V2`).
-    
+
     Thanks to @franknarf1 for reporting and @myoung3 for the PR.
 
     ```r
diff --git a/R/IDateTime.R b/R/IDateTime.R
@@ -365,8 +365,30 @@ isoyear = function(x) as.integer(format(as.IDate(x), "%G"))
 month   = function(x) convertDate(as.IDate(x), "month")
 quarter = function(x) convertDate(as.IDate(x), "quarter")
 year    = function(x) convertDate(as.IDate(x), "year")
-yearmon = function(x) convertDate(as.IDate(x), "yearmon")
-yearqtr = function(x) convertDate(as.IDate(x), "yearqtr")
+yearmon = function(x, format = c("numeric", "character")) {
+  format = match.arg(format)
+  x_as_idate = as.IDate(x)
+  ymon = convertDate(x_as_idate, "yearmon")
+  if (format == "numeric") return(ymon)
+  ans = rep(NA_character_, length(x_as_idate))
+  ok = !is.na(x_as_idate)
+  yr = floor(ymon[ok])
+  mon = round((ymon[ok] - yr) * 12) + 1L
+  ans[ok] = sprintf("%dM%02d", as.integer(yr), as.integer(mon))
+  ans
+}
+yearqtr = function(x, format = c("numeric", "character")) {
+  format = match.arg(format)
+  x_as_idate = as.IDate(x)
+  yqtr = convertDate(x_as_idate, "yearqtr")
+  if (format == "numeric") return(yqtr)
+  ans = rep(NA_character_, length(x_as_idate))
+  ok = !is.na(x_as_idate)
+  yr = floor(yqtr[ok])
+  qtr = round((yqtr[ok] - yr) * 4) + 1L
+  ans[ok] = sprintf("%dQ%d", as.integer(yr), as.integer(qtr))
+  ans
+}
 
 convertDate = function(x, type) {
   type = match.arg(type, c("yday", "wday", "mday", "week", "month", "quarter", "year", "yearmon", "yearqtr"))
diff --git a/inst/tests/froll.Rraw b/inst/tests/froll.Rraw
@@ -9,11 +9,9 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) {
   froll = data.table:::froll
 }
 
-exact_NaN = isTRUE(capabilities()["long.double"]) && identical(as.integer(.Machine$longdouble.digits), 64L)
+exact_NaN = identical(NA_real_+0, NA_real_)
 if (!exact_NaN) {
-  cat("\n**** Skipping 8 NaN/NA algo='exact' tests because .Machine$longdouble.digits==", .Machine$longdouble.digits, " (!=64); e.g. under valgrind\n\n", sep="")
-  # for Matt when he runs valgrind it is 53, but 64 when running regular R
-  # froll.c uses long double and appears to require full long double accuracy in the algo='exact'
+  cat("\n**** Skipping 10 NaN/NA algo='exact' tests because NaN payload doesn't propagate through arithmetic operations\n\n")
 }
 
 ## rolling features
@@ -1456,8 +1454,10 @@ test(6001.731, between(frollvar(y, 3)[4L], 0, 1e-7))
 test(6001.732, between(frollsd(y, 3)[4L], 0, 1e-7))
 test(6001.733, frollvar(y, c(3,3,3,3), adaptive=TRUE)[4L], 0)
 test(6001.734, frollsd(y, c(3,3,3,3), adaptive=TRUE)[4L], 0)
-test(6001.740, frollvar(c(1.5,2.5,2,NA), c(3,3)), list(c(NA,NA,0.25,NA), c(NA,NA,0.25,NA)), output="running sequentially, because outer parallelism has been used", options=c(datatable.verbose=TRUE)) # ensure no nested parallelism in rolling functions #7352
-test(6001.741, frollsd(c(1.5,2.5,2,NA), c(3,3)), list(c(NA,NA,0.5,NA), c(NA,NA,0.5,NA)), output="running sequentially, because outer parallelism has been used", options=c(datatable.verbose=TRUE))
+if (exact_NaN) {
+  test(6001.740, frollvar(c(1.5,2.5,2,NA), c(3,3)), list(c(NA,NA,0.25,NA), c(NA,NA,0.25,NA)), output="running sequentially, because outer parallelism has been used", options=c(datatable.verbose=TRUE)) # ensure no nested parallelism in rolling functions #7352
+  test(6001.741, frollsd(c(1.5,2.5,2,NA), c(3,3)), list(c(NA,NA,0.5,NA), c(NA,NA,0.5,NA)), output="running sequentially, because outer parallelism has been used", options=c(datatable.verbose=TRUE))
+}
 test(6001.742, frollvar(c(1.5,2.5,2,1.5), c(3,3)), list(c(NA,NA,0.25,0.25), c(NA,NA,0.25,0.25)), notOutput="running sequentially, because outer parallelism has been used", options=c(datatable.verbose=TRUE)) # no NA - no fallback to exact
 test(6001.743, frollsd(c(1.5,2.5,2,1.5), c(3,3)), list(c(NA,NA,0.5,0.5), c(NA,NA,0.5,0.5)), notOutput="running sequentially, because outer parallelism has been used", options=c(datatable.verbose=TRUE))
 test(6001.744, frollvar(c(1.5,2.5,2,NA), 3), c(NA,NA,0.25,NA), notOutput="running sequentially, because outer parallelism has been used", options=c(datatable.verbose=TRUE)) # not vectorized - no outer parallelism
diff --git a/inst/tests/frollBatch.Rraw b/inst/tests/frollBatch.Rraw
@@ -9,14 +9,6 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) {
   froll = data.table:::froll
 }
 
-exact_NaN = isTRUE(capabilities()["long.double"]) && identical(as.integer(.Machine$longdouble.digits), 64L)
-if (!exact_NaN) {
-  cat("\n**** Skipping 7 NaN/NA algo='exact' tests because .Machine$longdouble.digits==", .Machine$longdouble.digits, " (!=64); e.g. under valgrind\n\n", sep="")
-  # for Matt when he runs valgrind it is 53, but 64 when running regular R
-  # froll.c uses long double and appears to require full long double accuracy in the algo='exact'
-}
-
-
 ## batch validation
 set.seed(108)
 makeNA = function(x, ratio=0.1, nf=FALSE) {
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
diff --git a/man/IDateTime.Rd b/man/IDateTime.Rd
diff --git a/src/fread.c b/src/fread.c