Skip to content

Commit 97b4536

Browse files
including helper in [.data.table
1 parent f792b15 commit 97b4536

3 files changed

Lines changed: 76 additions & 45 deletions

File tree

R/data.table.R

Lines changed: 69 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,56 +1036,80 @@ replace_dot_alias = function(e) {
10361036
while(colsub %iscall% "(") colsub = as.list(colsub)[[-1L]]
10371037
# fix for R-Forge #5190. colsub[[1L]] gave error when it's a symbol.
10381038
# NB: _unary_ '-', not _binary_ '-' (#5826). Test for '!' length-2 should be redundant but low-cost & keeps code concise.
1039-
if (colsub %iscall% c("!", "-") && length(colsub) == 2L) {
1040-
negate_sdcols = TRUE
1041-
colsub = colsub[[2L]]
1042-
} else negate_sdcols = FALSE
1043-
# fix for #1216, make sure the parentheses are peeled from expr of the form (((1:4)))
1044-
while(colsub %iscall% "(") colsub = as.list(colsub)[[-1L]]
1045-
if (colsub %iscall% ':' && length(colsub)==3L && !is.call(colsub[[2L]]) && !is.call(colsub[[3L]])) {
1046-
# .SDcols is of the format a:b, ensure none of : arguments is a call data.table(V1=-1L, V2=-2L, V3=-3L)[,.SD,.SDcols=-V2:-V1] #4231
1047-
.SDcols = eval(colsub, setattr(as.list(seq_along(x)), 'names', names_x), parent.frame())
1048-
} else {
1049-
if (colsub %iscall% 'patterns') {
1050-
patterns_list_or_vector = eval_with_cols(colsub, names_x)
1051-
.SDcols = if (is.list(patterns_list_or_vector)) {
1052-
# each pattern gives a new filter condition, intersect the end result
1053-
Reduce(intersect, patterns_list_or_vector)
1039+
try_processSDcols = !(colsub %iscall% c("!", "-") && length(colsub) == 2L) && !(colsub %iscall% ':') && !(colsub %iscall% 'patterns')
1040+
if (try_processSDcols) {
1041+
tryCatch({
1042+
sdcols_result = .processSDcols(
1043+
SDcols_sub = colsub,
1044+
SDcols_missing = FALSE,
1045+
x = x,
1046+
jsub = jsub,
1047+
by = union(bynames, allbyvars),
1048+
enclos = parent.frame()
1049+
)
1050+
if (!is.null(sdcols_result)) {
1051+
ansvars = sdvars = sdcols_result$ansvars
1052+
ansvals = sdcols_result$ansvals
10541053
} else {
1055-
patterns_list_or_vector
1054+
try_processSDcols = FALSE
10561055
}
1056+
}, error = function(e) {
1057+
try_processSDcols <<- FALSE
1058+
})
1059+
}
1060+
if (!try_processSDcols) {
1061+
1062+
if (colsub %iscall% c("!", "-") && length(colsub) == 2L) {
1063+
negate_sdcols = TRUE
1064+
colsub = colsub[[2L]]
1065+
} else negate_sdcols = FALSE
1066+
# fix for #1216, make sure the parentheses are peeled from expr of the form (((1:4)))
1067+
while(colsub %iscall% "(") colsub = as.list(colsub)[[-1L]]
1068+
if (colsub %iscall% ':' && length(colsub)==3L && !is.call(colsub[[2L]]) && !is.call(colsub[[3L]])) {
1069+
# .SDcols is of the format a:b, ensure none of : arguments is a call data.table(V1=-1L, V2=-2L, V3=-3L)[,.SD,.SDcols=-V2:-V1] #4231
1070+
.SDcols = eval(colsub, setattr(as.list(seq_along(x)), 'names', names_x), parent.frame())
10571071
} else {
1058-
.SDcols = eval(colsub, parent.frame(), parent.frame())
1059-
# allow filtering via function in .SDcols, #3950
1060-
if (is.function(.SDcols)) {
1061-
.SDcols = lapply(x, .SDcols)
1062-
if (any(idx <- lengths(.SDcols) > 1L | vapply_1c(.SDcols, typeof) != 'logical' | vapply_1b(.SDcols, anyNA)))
1063-
stopf("When .SDcols is a function, it is applied to each column; the output of this function must be a non-missing boolean scalar signalling inclusion/exclusion of the column. However, these conditions were not met for: %s", brackify(names(x)[idx]))
1064-
.SDcols = unlist(.SDcols, use.names = FALSE)
1072+
if (colsub %iscall% 'patterns') {
1073+
patterns_list_or_vector = eval_with_cols(colsub, names_x)
1074+
.SDcols = if (is.list(patterns_list_or_vector)) {
1075+
# each pattern gives a new filter condition, intersect the end result
1076+
Reduce(intersect, patterns_list_or_vector)
1077+
} else {
1078+
patterns_list_or_vector
1079+
}
1080+
} else {
1081+
.SDcols = eval(colsub, parent.frame(), parent.frame())
1082+
# allow filtering via function in .SDcols, #3950
1083+
if (is.function(.SDcols)) {
1084+
.SDcols = lapply(x, .SDcols)
1085+
if (any(idx <- lengths(.SDcols) > 1L | vapply_1c(.SDcols, typeof) != 'logical' | vapply_1b(.SDcols, anyNA)))
1086+
stopf("When .SDcols is a function, it is applied to each column; the output of this function must be a non-missing boolean scalar signalling inclusion/exclusion of the column. However, these conditions were not met for: %s", brackify(names(x)[idx]))
1087+
.SDcols = unlist(.SDcols, use.names = FALSE)
1088+
}
10651089
}
10661090
}
1067-
}
1068-
if (anyNA(.SDcols))
1069-
stopf(".SDcols missing at the following indices: %s", brackify(which(is.na(.SDcols))))
1070-
if (is.logical(.SDcols)) {
1071-
if (length(.SDcols)!=length(x)) stopf(".SDcols is a logical vector of length %d but there are %d columns", length(.SDcols), length(x))
1072-
ansvals = which_(.SDcols, !negate_sdcols)
1073-
ansvars = sdvars = names_x[ansvals]
1074-
} else if (is.numeric(.SDcols)) {
1075-
.SDcols = as.integer(.SDcols)
1076-
# if .SDcols is numeric, use 'dupdiff' instead of 'setdiff'
1077-
if (length(unique(sign(.SDcols))) > 1L) stopf(".SDcols is numeric but has both +ve and -ve indices")
1078-
if (any(idx <- abs(.SDcols)>ncol(x) | abs(.SDcols)<1L))
1079-
stopf(".SDcols is numeric but out of bounds [1, %d] at: %s", ncol(x), brackify(which(idx)))
1080-
ansvars = sdvars = if (negate_sdcols) dupdiff(names_x[-.SDcols], bynames) else names_x[.SDcols]
1081-
ansvals = if (negate_sdcols) setdiff(seq_along(names(x)), c(.SDcols, which(names(x) %chin% bynames))) else .SDcols
1082-
} else {
1083-
if (!is.character(.SDcols)) stopf(".SDcols should be column numbers or names")
1084-
if (!all(idx <- .SDcols %chin% names_x))
1085-
stopf("Some items of .SDcols are not column names: %s", brackify(.SDcols[!idx]))
1086-
ansvars = sdvars = if (negate_sdcols) setdiff(names_x, c(.SDcols, bynames)) else .SDcols
1087-
# dups = FALSE here. DT[, .SD, .SDcols=c("x", "x")] again doesn't really help with which 'x' to keep (and if '-' which x to remove)
1088-
ansvals = chmatch(ansvars, names_x)
1091+
if (anyNA(.SDcols))
1092+
stopf(".SDcols missing at the following indices: %s", brackify(which(is.na(.SDcols))))
1093+
if (is.logical(.SDcols)) {
1094+
if (length(.SDcols)!=length(x)) stopf(".SDcols is a logical vector of length %d but there are %d columns", length(.SDcols), length(x))
1095+
ansvals = which_(.SDcols, !negate_sdcols)
1096+
ansvars = sdvars = names_x[ansvals]
1097+
} else if (is.numeric(.SDcols)) {
1098+
.SDcols = as.integer(.SDcols)
1099+
# if .SDcols is numeric, use 'dupdiff' instead of 'setdiff'
1100+
if (length(unique(sign(.SDcols))) > 1L) stopf(".SDcols is numeric but has both +ve and -ve indices")
1101+
if (any(idx <- abs(.SDcols)>ncol(x) | abs(.SDcols)<1L))
1102+
stopf(".SDcols is numeric but out of bounds [1, %d] at: %s", ncol(x), brackify(which(idx)))
1103+
ansvars = sdvars = if (negate_sdcols) dupdiff(names_x[-.SDcols], bynames) else names_x[.SDcols]
1104+
ansvals = if (negate_sdcols) setdiff(seq_along(names(x)), c(.SDcols, which(names(x) %chin% bynames))) else .SDcols
1105+
} else {
1106+
if (!is.character(.SDcols)) stopf(".SDcols should be column numbers or names")
1107+
if (!all(idx <- .SDcols %chin% names_x))
1108+
stopf("Some items of .SDcols are not column names: %s", brackify(.SDcols[!idx]))
1109+
ansvars = sdvars = if (negate_sdcols) setdiff(names_x, c(.SDcols, bynames)) else .SDcols
1110+
# dups = FALSE here. DT[, .SD, .SDcols=c("x", "x")] again doesn't really help with which 'x' to keep (and if '-' which x to remove)
1111+
ansvals = chmatch(ansvars, names_x)
1112+
}
10891113
}
10901114
}
10911115
# fix for long standing FR/bug, #495 and #484

R/groupingsets.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ rollup.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) {
3636
} else {
3737
.SDcols = eval(sub.result, enclos)
3838
}
39+
if (anyNA(.SDcols))
40+
stopf(".SDcols missing at the following indices: %s", brackify(which(is.na(.SDcols))))
3941
if (is.character(.SDcols)) {
4042
idx = .SDcols %chin% names_x
4143
if (!all(idx))

inst/tests/tests.Rraw

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11468,6 +11468,11 @@ sets = local({
1146811468
by=c("color","year","status")
1146911469
lapply(length(by):0, function(i) by[0:i])
1147011470
})
11471+
test(1750.25,
11472+
cube(copy(dt), j = lapply(.SD, mean), by = "color", .SDcols = 4, id=TRUE),
11473+
groupingsets(dt, j = lapply(.SD, mean), by = "color", .SDcols = "amount",
11474+
sets = list("color", character(0)), id = TRUE)
11475+
)
1147111476
test(1750.31,
1147211477
rollup(dt, j = c(list(cnt=.N), lapply(.SD, sum)), by = c("color","year","status"), id=TRUE),
1147311478
groupingsets(dt, j = c(list(cnt=.N), lapply(.SD, sum)), by = c("color","year","status"), sets=sets, id=TRUE)

0 commit comments

Comments
 (0)