Skip to content

Commit 01de444

Browse files
Relevel a factor column when adding a subset of current factors (#844)
* Relevel a factor column when adding a subset of current factors * Keep first relevelling before schema evolution * New levels should be not null * Switch `isFALSE` -> `!` Co-authored-by: Paul Hoffman <mojaveazure@users.noreply.github.com> * Update DESCRIPTION and NEWS.md --------- Co-authored-by: Paul Hoffman <mojaveazure@users.noreply.github.com>
1 parent 2b6c0c0 commit 01de444

4 files changed

Lines changed: 88 additions & 1 deletion

File tree

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: tiledb
22
Type: Package
3-
Version: 0.33.0
3+
Version: 0.33.0.1
44
Title: Modern Database Engine for Complex Data Based on Multi-Dimensional Arrays
55
Authors@R: c(
66
person("TileDB, Inc.", role = c("aut", "cph")),

NEWS.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
# tiledb (development version)
2+
3+
## Bug Fixes
4+
5+
* The factor levels are now remapped as expected when updating an array with values that include no additional factor levels (@cgiachalis in [#844](https://github.com/TileDB-Inc/TileDB-R/pull/844))
6+
17
# tiledb 0.33.0
28

39
* This release of the R package builds against [TileDB 2.29.0](https://github.com/TileDB-Inc/TileDB/releases/tag/2.29.0), and has also been tested against earlier releases as well as the development version

R/TileDBArray.R

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1498,6 +1498,14 @@ setMethod(
14981498
ase <- tiledb_array_schema_evolution_extend_enumeration(ase, arr, allnames[k], added_enums)
14991499
tiledb::tiledb_array_schema_evolution_array_evolve(ase, uri)
15001500
value[[k]] <- factor(value[[k]], levels = unique(c(dictionary, added_enums)), ordered = is.ordered(value[[k]]))
1501+
1502+
} else if (!setequal(new_levels, dictionary) && !is.null(new_levels)) {
1503+
# relevel when having a subset of existing levels, e.g "c" out of c("a","b","c")
1504+
# See issue: https://github.com/TileDB-Inc/TileDB-R/issues/843
1505+
levels <- unique(c(dictionary, new_levels))
1506+
is_ordered <- tiledb_attribute_is_ordered_enumeration_ptr(attr, arrptr)
1507+
value[[k]] <- factor(value[[k]], levels = levels, ordered = is_ordered)
1508+
spdl::trace("[tiledb_array] '[<-' releveled column {} {}", k, is_ordered)
15011509
}
15021510
}
15031511

inst/tinytest/test_arrayschemaevolution.R

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,79 @@ ref <- rbind(df1, df2)
179179
expect_equivalent(res, ref) # equivalent because of query status attribute
180180

181181

182+
## test that factor levels are re-leveled with new updates
183+
## but existing levels (case with subset of current levels fixing issue 843)
184+
uri <- tempfile()
185+
df1 <- data.frame(id = 1:3, obs = factor(c("A", "B", "C")))
186+
fromDataFrame(df1, uri, col_index=1, tile_domain=c(1L, 5L))
187+
df2 <- data.frame(id = 4:5, obs = factor(c("B", "C")))
188+
fromDataFrame(df2, uri, col_index=1, mode="append")
189+
190+
res <- tiledb_array(uri, return_as="data.frame")[]
191+
192+
expect_equal(nrow(res), 5)
193+
expect_equal(nlevels(res[["obs"]]), 3)
194+
expect_equal(levels(res[["obs"]]), c("A", "B", "C"))
195+
expect_equal(as.integer(res[["obs"]]), c(1L, 2L, 3L, 2L, 3L))
196+
197+
ref <- rbind(df1, df2)
198+
expect_equivalent(res, ref) # equivalent because of query status attribute
199+
200+
## test that factor levels are re-leveled with new updates
201+
## but existing levels (case with all current levels)
202+
uri <- tempfile()
203+
df1 <- data.frame(id = 1:3, obs = factor(c("A", "B", "C")))
204+
fromDataFrame(df1, uri, col_index=1, tile_domain=c(1L, 6L))
205+
df2 <- data.frame(id = 4:6, obs = factor(c("B", "C", "A")))
206+
fromDataFrame(df2, uri, col_index=1, mode="append")
207+
208+
res <- tiledb_array(uri, return_as="data.frame")[]
209+
210+
expect_equal(nrow(res), 6)
211+
expect_equal(nlevels(res[["obs"]]), 3)
212+
expect_equal(levels(res[["obs"]]), c("A", "B", "C"))
213+
expect_equal(as.integer(res[["obs"]]), c(1L, 2L, 3L, 2L, 3L, 1L))
214+
215+
ref <- rbind(df1, df2)
216+
expect_equivalent(res, ref) # equivalent because of query status attribute
217+
218+
219+
## test that ordered factor levels are re-leveled with new updates
220+
## but existing levels (case with subset of current levels fixing issue 843)
221+
uri <- tempfile()
222+
df1 <- data.frame(id = 1:3, obs = ordered(c("A", "B", "C")))
223+
fromDataFrame(df1, uri, col_index=1, tile_domain=c(1L, 5L))
224+
df2 <- data.frame(id = 4:5, obs = ordered(c("B", "C")))
225+
fromDataFrame(df2, uri, col_index=1, mode="append")
226+
227+
res <- tiledb_array(uri, return_as="data.frame")[]
228+
229+
expect_equal(nrow(res), 5)
230+
expect_equal(nlevels(res[["obs"]]), 3)
231+
expect_equal(levels(res[["obs"]]), c("A", "B", "C"))
232+
expect_equal(as.integer(res[["obs"]]), c(1L, 2L, 3L, 2L, 3L))
233+
234+
ref <- rbind(df1, df2)
235+
expect_equivalent(res, ref) # equivalent because of query status attribute
236+
237+
## test that ordered factor levels are re-leveled with new updates
238+
## but existing levels (case with all current levels)
239+
uri <- tempfile()
240+
df1 <- data.frame(id = 1:3, obs = ordered(c("A", "B", "C")))
241+
fromDataFrame(df1, uri, col_index=1, tile_domain=c(1L, 6L))
242+
df2 <- data.frame(id = 4:6, obs = ordered(c("B", "C", "A")))
243+
fromDataFrame(df2, uri, col_index=1, mode="append")
244+
245+
res <- tiledb_array(uri, return_as="data.frame")[]
246+
247+
expect_equal(nrow(res), 6)
248+
expect_equal(nlevels(res[["obs"]]), 3)
249+
expect_equal(levels(res[["obs"]]), c("A", "B", "C"))
250+
expect_equal(as.integer(res[["obs"]]), c(1L, 2L, 3L, 2L, 3L, 1L))
251+
252+
ref <- rbind(df1, df2)
253+
expect_equivalent(res, ref) # equivalent because of query status attribute
254+
182255
## another test for growing
183256
uri <- tempfile()
184257
df1 <- data.frame(rows=11:14, a=200+0:3, b=factor(rep(c("blue", "ornage"), each=2)))

0 commit comments

Comments
 (0)