Skip to content

Commit a1b10a5

Browse files
committed
Merge branch 'master' into fcoalesce_types
2 parents 3dbc611 + 20f3be0 commit a1b10a5

12 files changed

Lines changed: 172 additions & 39 deletions

File tree

.ci/atime/tests.R

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,13 @@ test.list <- atime::atime_test_list(
120120
file.path("src", "init.c"),
121121
paste0("R_init_", Package_regex),
122122
paste0("R_init_", gsub("[.]", "_", new.Package_)))
123+
# require C<23 for empty prototype declarations to work, #7689
124+
descfile = file.path(new.pkg.path, "DESCRIPTION")
125+
desc = as.data.frame(read.dcf(descfile))
126+
desc$SystemRequirements = paste(
127+
c(desc$SystemRequirements, "USE_C99"),
128+
collapse = "; ")
129+
write.dcf(desc, descfile)
123130
# allow compilation on new R versions where 'Calloc' is not defined
124131
pkg_find_replace(
125132
file.path("src", "*.c"),
@@ -199,10 +206,11 @@ test.list <- atime::atime_test_list(
199206
v2 = sample(5L, N, TRUE)
200207
)
201208
},
202-
expr = data.table:::`[.data.table`(d, , max(v1) - min(v2), by = id),
209+
PR7401="0216983c51e03e3f61d5e6f08f4ba0c42cceb22c", # Merge commit (https://github.com/Rdatatable/data.table/commit/0216983c51e03e3f61d5e6f08f4ba0c42cceb22c) of a PR (https://github.com/Rdatatable/data.table/pull/7401) which increased speed and memory usage of this test (https://github.com/Rdatatable/data.table/issues/7687)
203210
Before = "7a9eaf62ede487625200981018d8692be8c6f134", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/515de90a6068911a148e54343a3503043b8bb87c) in the PR (https://github.com/Rdatatable/data.table/pull/4164/commits) that introduced the regression
204211
Regression = "c152ced0e5799acee1589910c69c1a2c6586b95d", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/15f0598b9828d3af2eb8ddc9b38e0356f42afe4f) in the PR (https://github.com/Rdatatable/data.table/pull/4558/commits) that fixes the regression
205-
Fixed = "f750448a2efcd258b3aba57136ee6a95ce56b302"), # Second commit of the PR (https://github.com/Rdatatable/data.table/pull/4558/commits) that fixes the regression
212+
Fixed = "f750448a2efcd258b3aba57136ee6a95ce56b302", # Second commit of the PR (https://github.com/Rdatatable/data.table/pull/4558/commits) that fixes the regression
213+
expr = data.table:::`[.data.table`(d, , max(v1) - min(v2), by = id)),
206214

207215
# Issue with sorting again when already sorted, as reported in https://github.com/Rdatatable/data.table/issues/4498
208216
# Test case adapted from https://github.com/Rdatatable/data.table/pull/4501#issue-625311918 which is the fix PR.
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
name: Revdep check failure
2+
description: Report a reverse dependency (revdep) check failure that should be fixed before next CRAN release
3+
title: "PACKAGE check TYPE fails after PR_DESCRIPTION"
4+
labels: ["revdep"]
5+
body:
6+
- type: markdown
7+
attributes:
8+
value: |
9+
Use this template to report a new revdep check failure found via
10+
[revdep checks](https://github.com/Rdatatable/data.table/wiki/Revdep-checks).
11+
Please verify the failure is real before filing; see checklist on that page.
12+
13+
- type: input
14+
id: package
15+
attributes:
16+
label: Affected package
17+
description: Link to package dev on github.
18+
placeholder: e.g. https://github.com/NorskRegnesentral/shapr
19+
validations:
20+
required: true
21+
22+
- type: textarea
23+
id: check-output
24+
attributes:
25+
label: Failing check output
26+
description: |
27+
Paste the relevant `R CMD check` output showing the failure.
28+
The output can be found in the
29+
[Monsoon results](https://rcdata.nau.edu/genomic-ml/data.table-revdeps/analyze/)
30+
or from a local revdep check.
31+
render: text
32+
validations:
33+
required: true
34+
35+
- type: input
36+
id: first-bad-commit
37+
attributes:
38+
label: First bad commit/PR
39+
description: |
40+
Link to the commit or PR identified by `git bisect` as the
41+
first bad commit (from the `first.bad.commit` column in the
42+
Monsoon significant differences table).
43+
placeholder: "https://github.com/Rdatatable/data.table/pull/1234"
44+
validations:
45+
required: true
46+
47+
- type: textarea
48+
id: additional-context
49+
attributes:
50+
label: Additional context
51+
description: |
52+
Any other relevant information: @mentions of the commit/PR
53+
author(s), links to Monsoon result pages, whether the fix
54+
should come from data.table or from the revdep package, etc.
55+
Minimal reproducible examples (MRE) can also be included here.

.github/workflows/pkgup.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,10 @@ jobs:
5858
Rscript -e 'tools::write_PACKAGES("public/src/contrib", fields="Revision")'
5959
- name: upload
6060
if: github.ref == 'refs/heads/master'
61-
uses: actions/upload-pages-artifact@v4
61+
uses: actions/upload-pages-artifact@v5
6262
with:
6363
path: "public"
6464
- name: deploy
6565
if: github.ref == 'refs/heads/master'
6666
id: deployment
67-
uses: actions/deploy-pages@v4
67+
uses: actions/deploy-pages@v5

.github/workflows/test-coverage.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ jobs:
5151
covr::to_cobertura(cov)
5252
shell: Rscript {0}
5353

54-
- uses: codecov/codecov-action@v5
54+
- uses: codecov/codecov-action@v6
5555
with:
5656
fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }}
5757
files: ./cobertura.xml

.gitlab-ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ variables:
1515
RUN_ALL_DATATABLE_TESTS: "yes" ## run optional tests in CI
1616
R_REL_VERSION: "4.5" # only raise when RTOOLS for REL is available
1717
R_REL_WIN_BIN: "https://cloud.r-project.org/bin/windows/base/old/4.5.0/R-4.5.0-win.exe"
18-
R_DEV_VERSION: "4.6"
18+
R_DEV_VERSION: "4.7"
1919
R_DEV_WIN_BIN: "https://cloud.r-project.org/bin/windows/base/R-devel-win.exe"
2020
R_OLD_VERSION: "4.4"
2121
R_OLD_WIN_BIN: "https://cloud.r-project.org/bin/windows/base/old/4.4.3/R-4.4.3-win.exe"

NEWS.md

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030

3131
5. `tables()` can now optionally report `data.table` objects stored one level deep inside list objects when `depth=1L`, [#2606](https://github.com/Rdatatable/data.table/issues/2606). Thanks @MichaelChirico for the report and @manmita for the PR
3232

33+
6. `yearqtr()` and `yearmon()` now gain an optional format specifier [#7694](https://github.com/Rdatatable/data.table/issues/7694). 'numeric' is the default, which preserves the original behavior, but 'character' formats `yearqtr()` as YYYYQ# (e.g. 2025Q2) and `yearmon()` as YYYYM## (e.g. 2025M02, 2025M10). Thanks to @jan-swissre for the report and @LunaticSage218 for the implementation.
34+
3335
### BUG FIXES
3436

3537
1. `fread()` with `skip=0` and `(header=TRUE|FALSE)` no longer skips the first row when it has fewer fields than subsequent rows, [#7463](https://github.com/Rdatatable/data.table/issues/7463). Thanks @emayerhofer for the report and @ben-schwen for the fix.
@@ -48,7 +50,11 @@
4850

4951
8. `frollapply()` no longer produces output longer than the input when the window length is also longer than the input [#7646](https://github.com/Rdatatable/data.table/issues/7646). Thanks to @hadley-johnson for reporting and @jangorecki for the fix.
5052

51-
9. `fcoalesce()` and `setcoalesce()` could fail for inputs during implicit type coercions when items had different but still compatible underlying storage types (e.g., `Date` and `IDate`), #7545 (https://github.com/Rdatatable/data.table/issues/7545). This was particularly unexpected because `Date` objects may be stored as either integer or double. Thanks to @ethanbsmith for the report and @ben-schwen for the fix.
53+
9. `fread()` no longer replaces a literal header column name `"NA"` with an auto-generated `Vn` name when `na.strings` includes `"NA"`, [#5124](https://github.com/Rdatatable/data.table/issues/5124). Data rows still continue to parse `"NA"` as missing. Thanks @Mashin6 for the report and @shrektan for the fix.
54+
55+
10. `fread()` no longer misreads dates with negative years, [#7704](https://github.com/Rdatatable/data.table/issues/7704). Thanks to @kevinushey for the report and @aitap for the fix.
56+
57+
11. `fcoalesce()` and `setcoalesce()` could fail for inputs during implicit type coercions when items had different but still compatible underlying storage types (e.g., `Date` and `IDate`), #7545 (https://github.com/Rdatatable/data.table/issues/7545). This was particularly unexpected because `Date` objects may be stored as either integer or double. Thanks to @ethanbsmith for the report and @ben-schwen for the fix.
5258

5359
### Notes
5460

@@ -120,15 +126,15 @@
120126
121127
5. Negative and missing values of `n` argument of adaptive rolling functions trigger an error.
122128
123-
### NOTICE OF INTENDED FUTURE POTENTIAL BREAKING CHANGES
129+
### NOTICE OF INTENDED FUTURE POTENTIAL BREAKING CHANGES
124130
125131
1. `data.table(x=1, <expr>)`, where `<expr>` is an expression resulting in a 1-column matrix without column names, will eventually have names `x` and `V2`, not `x` and `V1`, consistent with `data.table(x=1, <expr>)` where `<expr>` results in an atomic vector, for example `data.table(x=1, cbind(1))` and `data.table(x=1, 1)` will both have columns named `x` and `V2`. In this release, the matrix case continues to be named `V1`, but the new behavior can be activated by setting `options(datatable.old.matrix.autoname)` to `FALSE`. See point 5 under Bug Fixes for more context; this change will provide more internal consistency as well as more consistency with `data.frame()`.
126132
127133
2. The behavior of `week()` will be changed in a future release to calculate weeks sequentially (days 1-7 as week 1), which is a potential breaking change. For now, the current "legacy" behavior, where week numbers advance every 7th day of the year (e.g., day 7 starts week 2), remains the default, and a deprecation warning will be issued when the old and new behaviors differ. Users can control this behavior with the temporary option `options(datatable.week = "...")`:
128134
* `"sequential"`: Opt-in to the new, sequential behavior (no warning).
129135
* `"legacy"`: Continue using the legacy behavior but suppress the deprecation warning.
130136
See [#2611](https://github.com/Rdatatable/data.table/issues/2611) for details. Thanks @MichaelChirico for the report and @venom1204 for the implementation.
131-
137+
132138
### NEW FEATURES
133139
134140
1. New `sort_by()` method for data.tables, [#6662](https://github.com/Rdatatable/data.table/issues/6662). It uses `forder()` to improve upon the data.frame method and also matches `DT[order(...)]` behavior with respect to locale. Thanks @rikivillalba for the suggestion and PR.
@@ -407,7 +413,7 @@ See [#2611](https://github.com/Rdatatable/data.table/issues/2611) for details. T
407413
9. Fixed incorrect sorting of merges where the first column of a key is a factor with non-`sort()`-ed levels (e.g. `factor(1:2, 2:1)` and it is joined to a character column, [#5361](https://github.com/Rdatatable/data.table/issues/5361). Thanks to @gbrunick for the report, Benjamin Schwendinger for the fix, and @MichaelChirico for a follow-up fix caught by revdep testing.
408414
409415
10. Spurious warnings from internal code in `cube()`, `rollup()`, and `groupingsets()` are no longer surfaced to the caller, [#6964](https://github.com/Rdatatable/data.table/issues/6964). Thanks @ferenci-tamas for the report and @venom1204 for the fix.
410-
416+
411417
11. `droplevels()` works on 0-row data.tables, [#7043](https://github.com/Rdatatable/data.table/issues/7043). The result will have factor columns `factor(character())`, consistent with the data.frame method. Thanks @advieser for the report and @MichaelChirico for the fix.
412418
413419
12. `print(..., col.names = 'none')` now correctly adapts column widths to the data content, ignoring the original column names and producing a more compact output, [#6882](https://github.com/Rdatatable/data.table/issues/6882). Thanks to @brooksambrose for the report and @venom1204 for the PR.
@@ -589,7 +595,7 @@ rowwiseDT(
589595
3. Tagging/naming arguments of `c()` in `j=c()` should now more closely follow base R conventions for concatenation of named lists during grouping, [#2311](https://github.com/Rdatatable/data.table/issues/2311). Naming an `lapply(.SD, FUN)` call as an argument of `c()` in `j` will now always cause that tag to get prepended (with a single dot separator) to the resulting column names. Additionally, naming a `list()` call as an argument of `c()` in `j` will now always cause that tag to get prepended to any names specified within the list call. This bug only affected queries with (1) `by=` grouping (2) `getOption("datatable.optimize") >= 1L` and (3) `lapply(.SD, FUN)` in `j`.
590596
591597
While the names returned by `data.table` when `j=c()` will now mostly follow base R conventions for concatenating lists, note that names which are completely unspecified will still be named positionally, matching the typical behavior in `j` and `data.table()`. according to position in `j` (e.g. `V1`, `V2`).
592-
598+
593599
Thanks to @franknarf1 for reporting and @myoung3 for the PR.
594600
595601
```r

R/IDateTime.R

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -365,8 +365,30 @@ isoyear = function(x) as.integer(format(as.IDate(x), "%G"))
365365
month = function(x) convertDate(as.IDate(x), "month")
366366
quarter = function(x) convertDate(as.IDate(x), "quarter")
367367
year = function(x) convertDate(as.IDate(x), "year")
368-
yearmon = function(x) convertDate(as.IDate(x), "yearmon")
369-
yearqtr = function(x) convertDate(as.IDate(x), "yearqtr")
368+
yearmon = function(x, format = c("numeric", "character")) {
369+
format = match.arg(format)
370+
x_as_idate = as.IDate(x)
371+
ymon = convertDate(x_as_idate, "yearmon")
372+
if (format == "numeric") return(ymon)
373+
ans = rep(NA_character_, length(x_as_idate))
374+
ok = !is.na(x_as_idate)
375+
yr = floor(ymon[ok])
376+
mon = round((ymon[ok] - yr) * 12) + 1L
377+
ans[ok] = sprintf("%dM%02d", as.integer(yr), as.integer(mon))
378+
ans
379+
}
380+
yearqtr = function(x, format = c("numeric", "character")) {
381+
format = match.arg(format)
382+
x_as_idate = as.IDate(x)
383+
yqtr = convertDate(x_as_idate, "yearqtr")
384+
if (format == "numeric") return(yqtr)
385+
ans = rep(NA_character_, length(x_as_idate))
386+
ok = !is.na(x_as_idate)
387+
yr = floor(yqtr[ok])
388+
qtr = round((yqtr[ok] - yr) * 4) + 1L
389+
ans[ok] = sprintf("%dQ%d", as.integer(yr), as.integer(qtr))
390+
ans
391+
}
370392

371393
convertDate = function(x, type) {
372394
type = match.arg(type, c("yday", "wday", "mday", "week", "month", "quarter", "year", "yearmon", "yearqtr"))

inst/tests/froll.Rraw

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,9 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) {
99
froll = data.table:::froll
1010
}
1111

12-
exact_NaN = isTRUE(capabilities()["long.double"]) && identical(as.integer(.Machine$longdouble.digits), 64L)
12+
exact_NaN = identical(NA_real_+0, NA_real_)
1313
if (!exact_NaN) {
14-
cat("\n**** Skipping 8 NaN/NA algo='exact' tests because .Machine$longdouble.digits==", .Machine$longdouble.digits, " (!=64); e.g. under valgrind\n\n", sep="")
15-
# for Matt when he runs valgrind it is 53, but 64 when running regular R
16-
# froll.c uses long double and appears to require full long double accuracy in the algo='exact'
14+
cat("\n**** Skipping 10 NaN/NA algo='exact' tests because NaN payload doesn't propagate through arithmetic operations\n\n")
1715
}
1816

1917
## rolling features
@@ -1456,8 +1454,10 @@ test(6001.731, between(frollvar(y, 3)[4L], 0, 1e-7))
14561454
test(6001.732, between(frollsd(y, 3)[4L], 0, 1e-7))
14571455
test(6001.733, frollvar(y, c(3,3,3,3), adaptive=TRUE)[4L], 0)
14581456
test(6001.734, frollsd(y, c(3,3,3,3), adaptive=TRUE)[4L], 0)
1459-
test(6001.740, frollvar(c(1.5,2.5,2,NA), c(3,3)), list(c(NA,NA,0.25,NA), c(NA,NA,0.25,NA)), output="running sequentially, because outer parallelism has been used", options=c(datatable.verbose=TRUE)) # ensure no nested parallelism in rolling functions #7352
1460-
test(6001.741, frollsd(c(1.5,2.5,2,NA), c(3,3)), list(c(NA,NA,0.5,NA), c(NA,NA,0.5,NA)), output="running sequentially, because outer parallelism has been used", options=c(datatable.verbose=TRUE))
1457+
if (exact_NaN) {
1458+
test(6001.740, frollvar(c(1.5,2.5,2,NA), c(3,3)), list(c(NA,NA,0.25,NA), c(NA,NA,0.25,NA)), output="running sequentially, because outer parallelism has been used", options=c(datatable.verbose=TRUE)) # ensure no nested parallelism in rolling functions #7352
1459+
test(6001.741, frollsd(c(1.5,2.5,2,NA), c(3,3)), list(c(NA,NA,0.5,NA), c(NA,NA,0.5,NA)), output="running sequentially, because outer parallelism has been used", options=c(datatable.verbose=TRUE))
1460+
}
14611461
test(6001.742, frollvar(c(1.5,2.5,2,1.5), c(3,3)), list(c(NA,NA,0.25,0.25), c(NA,NA,0.25,0.25)), notOutput="running sequentially, because outer parallelism has been used", options=c(datatable.verbose=TRUE)) # no NA - no fallback to exact
14621462
test(6001.743, frollsd(c(1.5,2.5,2,1.5), c(3,3)), list(c(NA,NA,0.5,0.5), c(NA,NA,0.5,0.5)), notOutput="running sequentially, because outer parallelism has been used", options=c(datatable.verbose=TRUE))
14631463
test(6001.744, frollvar(c(1.5,2.5,2,NA), 3), c(NA,NA,0.25,NA), notOutput="running sequentially, because outer parallelism has been used", options=c(datatable.verbose=TRUE)) # not vectorized - no outer parallelism

inst/tests/frollBatch.Rraw

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,6 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) {
99
froll = data.table:::froll
1010
}
1111

12-
exact_NaN = isTRUE(capabilities()["long.double"]) && identical(as.integer(.Machine$longdouble.digits), 64L)
13-
if (!exact_NaN) {
14-
cat("\n**** Skipping 7 NaN/NA algo='exact' tests because .Machine$longdouble.digits==", .Machine$longdouble.digits, " (!=64); e.g. under valgrind\n\n", sep="")
15-
# for Matt when he runs valgrind it is 53, but 64 when running regular R
16-
# froll.c uses long double and appears to require full long double accuracy in the algo='exact'
17-
}
18-
19-
2012
## batch validation
2113
set.seed(108)
2214
makeNA = function(x, ratio=0.1, nf=FALSE) {

0 commit comments

Comments
 (0)