Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: tiledb
Type: Package
Version: 0.33.0
Version: 0.33.0.1
Title: Modern Database Engine for Complex Data Based on Multi-Dimensional Arrays
Authors@R: c(
person("TileDB, Inc.", role = c("aut", "cph")),
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# tiledb (development version)

## Bug Fixes

* The factor levels are now remapped as expected when updating an array with values that include no additional factor levels (@cgiachalis in [#844](https://github.com/TileDB-Inc/TileDB-R/pull/844))

# tiledb 0.33.0

* This release of the R package builds against [TileDB 2.29.0](https://github.com/TileDB-Inc/TileDB/releases/tag/2.29.0), and has also been tested against earlier releases as well as the development version
Expand Down
8 changes: 8 additions & 0 deletions R/TileDBArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -1498,6 +1498,14 @@ setMethod(
ase <- tiledb_array_schema_evolution_extend_enumeration(ase, arr, allnames[k], added_enums)
tiledb::tiledb_array_schema_evolution_array_evolve(ase, uri)
value[[k]] <- factor(value[[k]], levels = unique(c(dictionary, added_enums)), ordered = is.ordered(value[[k]]))

} else if (!setequal(new_levels, dictionary) && !is.null(new_levels)) {
# relevel when having a subset of existing levels, e.g "c" out of c("a","b","c")
# See issue: https://github.com/TileDB-Inc/TileDB-R/issues/843
levels <- unique(c(dictionary, new_levels))
is_ordered <- tiledb_attribute_is_ordered_enumeration_ptr(attr, arrptr)
value[[k]] <- factor(value[[k]], levels = levels, ordered = is_ordered)
spdl::trace("[tiledb_array] '[<-' releveled column {} {}", k, is_ordered)
}
}

Expand Down
73 changes: 73 additions & 0 deletions inst/tinytest/test_arrayschemaevolution.R
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,79 @@ ref <- rbind(df1, df2)
expect_equivalent(res, ref) # equivalent because of query status attribute


## test that factor levels are re-leveled with new updates
## but existing levels (case with subset of current levels fixing issue 843)
uri <- tempfile()
df1 <- data.frame(id = 1:3, obs = factor(c("A", "B", "C")))
fromDataFrame(df1, uri, col_index=1, tile_domain=c(1L, 5L))
df2 <- data.frame(id = 4:5, obs = factor(c("B", "C")))
fromDataFrame(df2, uri, col_index=1, mode="append")

res <- tiledb_array(uri, return_as="data.frame")[]

expect_equal(nrow(res), 5)
expect_equal(nlevels(res[["obs"]]), 3)
expect_equal(levels(res[["obs"]]), c("A", "B", "C"))
expect_equal(as.integer(res[["obs"]]), c(1L, 2L, 3L, 2L, 3L))

ref <- rbind(df1, df2)
expect_equivalent(res, ref) # equivalent because of query status attribute

## test that factor levels are re-leveled with new updates
## but existing levels (case with all current levels)
uri <- tempfile()
df1 <- data.frame(id = 1:3, obs = factor(c("A", "B", "C")))
fromDataFrame(df1, uri, col_index=1, tile_domain=c(1L, 6L))
df2 <- data.frame(id = 4:6, obs = factor(c("B", "C", "A")))
fromDataFrame(df2, uri, col_index=1, mode="append")

res <- tiledb_array(uri, return_as="data.frame")[]

expect_equal(nrow(res), 6)
expect_equal(nlevels(res[["obs"]]), 3)
expect_equal(levels(res[["obs"]]), c("A", "B", "C"))
expect_equal(as.integer(res[["obs"]]), c(1L, 2L, 3L, 2L, 3L, 1L))

ref <- rbind(df1, df2)
expect_equivalent(res, ref) # equivalent because of query status attribute


## test that ordered factor levels are re-leveled with new updates
## but existing levels (case with subset of current levels fixing issue 843)
uri <- tempfile()
df1 <- data.frame(id = 1:3, obs = ordered(c("A", "B", "C")))
fromDataFrame(df1, uri, col_index=1, tile_domain=c(1L, 5L))
df2 <- data.frame(id = 4:5, obs = ordered(c("B", "C")))
fromDataFrame(df2, uri, col_index=1, mode="append")

res <- tiledb_array(uri, return_as="data.frame")[]

expect_equal(nrow(res), 5)
expect_equal(nlevels(res[["obs"]]), 3)
expect_equal(levels(res[["obs"]]), c("A", "B", "C"))
expect_equal(as.integer(res[["obs"]]), c(1L, 2L, 3L, 2L, 3L))

ref <- rbind(df1, df2)
expect_equivalent(res, ref) # equivalent because of query status attribute

## test that ordered factor levels are re-leveled with new updates
## but existing levels (case with all current levels)
uri <- tempfile()
df1 <- data.frame(id = 1:3, obs = ordered(c("A", "B", "C")))
fromDataFrame(df1, uri, col_index=1, tile_domain=c(1L, 6L))
df2 <- data.frame(id = 4:6, obs = ordered(c("B", "C", "A")))
fromDataFrame(df2, uri, col_index=1, mode="append")

res <- tiledb_array(uri, return_as="data.frame")[]

expect_equal(nrow(res), 6)
expect_equal(nlevels(res[["obs"]]), 3)
expect_equal(levels(res[["obs"]]), c("A", "B", "C"))
expect_equal(as.integer(res[["obs"]]), c(1L, 2L, 3L, 2L, 3L, 1L))

ref <- rbind(df1, df2)
expect_equivalent(res, ref) # equivalent because of query status attribute

## another test for growing
uri <- tempfile()
df1 <- data.frame(rows=11:14, a=200+0:3, b=factor(rep(c("blue", "ornage"), each=2)))
Expand Down