diff --git a/R/bmerge.R b/R/bmerge.R index dffca5e44f..90a669b650 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -86,6 +86,20 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type) } + handle_complex_merge_type = function(dt, col, side_name, other_name) { + if (any(Im(dt[[col]]) != 0, na.rm = TRUE)) { + stopf("Joining on complex numbers with non-zero imaginary part is not supported. Column: %s", side_name) + } + from_detail = gettext(" (complex with zero imaginary part)") + coerce_col(dt, col, "complex", "double", side_name, other_name, from_detail = from_detail, verbose = verbose) + return("double") + } + if (i_merge_type == "complex") { + i_merge_type = handle_complex_merge_type(i, icol, iname, xname) + } + if (x_merge_type == "complex") { + x_merge_type = handle_complex_merge_type(x, xcol, xname, iname) + } # we check factors first to cater for the case when trying to do rolling joins on factors if (x_merge_type == i_merge_type) { if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, x_merge_type, xname) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index d5801b923e..1edd9d8a32 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15746,7 +15746,7 @@ DT1 = data.table(a = sample(3L, 15L, TRUE) + .1, b=sample(c(TRUE, FALSE, NA), 15 DT2 = data.table(a = sample(3L, 6L, TRUE) + .1, b=sample(c(TRUE, FALSE, NA), 6L, TRUE)) test(2069.32, DT1[DT2, .(y = sum(b, na.rm=TRUE)), by=.EACHI, on=c(a = 'a', b="b")]$y, rep(0L, 6L)) DT = data.table(z = 1i) -test(2069.33, DT[DT, on = 'z'], error = "Type 'complex' is not supported for joining/merging") +test(2069.33, DT[DT, on = 'z'], error = "Joining on complex numbers with non-zero imaginary part is not supported. Column: i.z") # forder verbose message when !isReallyReal Date, #1738 date_dbl = as.Date(as.double(seq(as.Date("2015-01-01"), as.Date("2015-01-05"), by="days")), origin="1970-01-01") @@ -17326,7 +17326,7 @@ test(2182.75, melt(data.table(a=10, b=20), measure.vars=list(n="a"), variable.fa measurev = function(cols)cols # user-defined function for computing measure.vars, same name as data.table::measure but user-defined version should be used. test(2183.00001, melt(DT.wide, measure.vars=measurev()), data.table(variable=factor(c("a2","b1","b2")), value=c(2,1,2))) measurev = list("foo", "bar")#measurev below should not use this since it is not a function. -test(2183.00002, melt(DTid, measure.vars=measurev(list(value.name=NULL, num=as.complex), pattern="([ab])([12])")), error="Type 'complex' is not supported for joining/merging") +test(2183.00002, melt(DTid, measure.vars=measurev(list(value.name=NULL, num=as.complex), pattern="([ab])([12])")), error="variable_table does not support column type 'complex' for column 'num'") test(2183.00004, melt(DTid, measure.vars=measurev(list(value.name=NULL, istr=NULL), pattern="([ab])([12])"))[order(b)], data.table(id=1, istr=paste(c(1,2)), a=c(NA, 2), b=c(1,2))) test(2183.00005, melt(DTid, measure.vars=measurev(list(column=NULL, istr=NULL), pattern="([ab])([12])", multiple.keyword="column"))[order(b)], data.table(id=1, istr=paste(c(1,2)), a=c(NA, 2), b=c(1,2)))#same computation but different multiple.keyword iris.dt = data.table(iris) @@ -17349,7 +17349,7 @@ test(2183.00060, melt(DTid, measure.vars=measurev(list(letter=myfac, value.name= measure = function(cols)cols # user-defined function for computing measure.vars, same name as data.table::measure but user-defined version should be used. test(2183.01, melt(DT.wide, measure.vars=measure()), data.table(variable=factor(c("a2","b1","b2")), value=c(2,1,2))) measure = list("foo", "bar")#measure below should not use this since it is not a function. -test(2183.02, melt(DTid, measure.vars=measure(value.name, num=as.complex, pattern="([ab])([12])")), error="Type 'complex' is not supported for joining/merging") +test(2183.02, melt(DTid, measure.vars=measure(value.name, num=as.complex, pattern="([ab])([12])")), error="variable_table does not support column type 'complex' for column 'num'") test(2183.03, melt(DTid, measure.vars=structure(list(a=c(NA,"a2"),b=c("b1","b2")), variable_table=data.table(number=as.complex(1:2)))), error="variable_table does not support column type 'complex' for column 'number'") test(2183.04, melt(DTid, measure.vars=measure(value.name, istr, pattern="([ab])([12])"))[order(b)], data.table(id=1, istr=paste(c(1,2)), a=c(NA, 2), b=c(1,2))) test(2183.05, melt(DTid, measure.vars=measure(column, istr, pattern="([ab])([12])", multiple.keyword="column"))[order(b)], data.table(id=1, istr=paste(c(1,2)), a=c(NA, 2), b=c(1,2)))#same computation but different multiple.keyword @@ -21167,3 +21167,41 @@ test(2317.6, DT1[DF1, on='a', .(d = x.a + i.d)]$d, 5) test(2317.7, DT1[DF2, on='a', e := i.e]$e, 5) test(2317.8, DT1[DF2, on='a', e2 := x.a + i.e]$e2, 6) test(2317.9, DT1[DF2, on='a', .(e = x.a + i.e)]$e, 6) + +#6627 +test(2318.1, { + DT1 = data.table(a = complex(real = 1:3, imaginary = 0), x = letters[1:3]) + DT2 = data.table(a = 2L) + res = DT1[DT2, on = "a"] + identical(res$x, "b") && is.numeric(res$a) && res$a == 2 +}) +test(2318.2, { + DT1 = data.table(a = complex(real = c(1, 2, 3), imaginary = 0), x = letters[1:3]) + DT2 = data.table(a = c(2.0, 3.0)) # double + res = DT1[DT2, on = "a"] + identical(res$x, c("b", "c")) && typeof(res$a) == "double" +}) +test(2318.3, { + DT1 = data.table(a = c(1L, 2L, 3L)) # integer + DT2 = data.table(a = complex(real = c(2, 3), imaginary = 0), y = letters[1:2]) + res = DT1[DT2, on = "a"] + identical(res$y, c("a", "b")) && typeof(res$a) == "integer" +}) +test(2318.4, { + DT1 = data.table(a = complex(real = c(1, 2, 3), imaginary = 0), x = letters[1:3]) + DT2 = data.table(a = complex(real = c(2, 3), imaginary = 0)) + res = DT1[DT2, on = "a"] + identical(res$x, c("b", "c")) && typeof(res$a) == "complex" +}) +test(2318.5, { + DT1 = data.table(a = complex(real = c(1, 2), imaginary = c(0, 1)), x = letters[1:2]) + DT2 = data.table(a = 2L) + msg = tryCatch(DT1[DT2, on = "a"], error = function(e) e$message) + grepl("non-zero imaginary part", msg) +}) +test(2318.6, { + DT1 = data.table(a = 2L) + DT2 = data.table(a = complex(real = c(1, 2), imaginary = c(0, 1)), y = c("a", "b")) + msg = tryCatch(DT1[DT2, on = "a"], error = function(e) e$message) + grepl("non-zero imaginary part", msg) +}) \ No newline at end of file diff --git a/man/data.table.Rd b/man/data.table.Rd index 22b8223e95..5bda30376f 100644 --- a/man/data.table.Rd +++ b/man/data.table.Rd @@ -231,6 +231,12 @@ A \code{data.table} is a \code{list} of vectors, just like a \code{data.frame}. See the \code{see also} section for the several other \emph{methods} that are available for operating on data.tables efficiently. + +A \code{data.table} supports joins on columns of type \code{complex}. The join logic is as follows: +\itemize{ + \item If a \code{complex} column contains values with only a zero imaginary part (e.g., \code{10+0i}), it is treated as a \code{double} for the join, allowing it to match with \code{integer} and \code{double} columns successfully. + \item If any value in a \code{complex} join column has a non-zero imaginary part (e.g., \code{10+2i}), the join will stop with an error, as there is no defined way to sort or match such a number against a real number. +} } \references{ \url{https://r-datatable.com} (\code{data.table} homepage)\cr @@ -441,6 +447,27 @@ DT[, c(.(y=max(y)), lapply(.SD, min)), by=rleid(v), .SDcols=v:b] # Support guide and links: # https://github.com/Rdatatable/data.table/wiki/Support +# Example: Joining with a system that uses complex IDs (#6627) + +# Case 1: Joining a "clean" set of products. +# Here, `products_clean` only contains IDs with zero imaginary parts. +products_clean = data.table(id = c(101+0i, 103+0i), name = c("widget", "thingamajig")) +sales = data.table(product_id = c(101, 103), units_sold = c(50, 75)) + +# This join works because the 'id' column in `products_clean` has no non-zero imaginary parts. +products_clean[sales, on = .(id = product_id), nomatch = 0] + +# Case 2: Joining a list that includes "bad" IDs. +# Here, `products_all` contains an ID with a non-zero imaginary part (102+1i). +products_all = data.table(id = c(101+0i, 102+1i, 103+0i), name = c("widget", "gadget", "thingamajig")) + +# The join fails because the entire 'id' column is checked first. +try(products_all[sales, on = .(id = product_id), nomatch = 0]) + +\dontshow{ + rm(products_clean, sales, products_all) +} + \dontrun{ if (interactive()) { vignette(package="data.table") # 9 vignettes