Skip to content

Commit 6a74096

Browse files
committed
polish based on comments in 0602237
1 parent fdd40d0 commit 6a74096

7 files changed

Lines changed: 572 additions & 89 deletions

File tree

RcppTskit/NEWS.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,21 @@ and releases adhere to [Semantic Versioning](https://semver.org/spec/v2.0.0.html
7575
`pointer` to `xptr`.
7676
- Ensured `TableCollection$tree_sequence()` matches `tskit Python` API:
7777
it now builds indexes on the `TableCollection`, if indexes are not present.
78+
- Expanded `TableCollection$*_table_get_row()` documentation with runnable
79+
examples, Python `__getitem__` references, and explicit note that numeric
80+
row indices are validated and converted to 32-bit integers.
81+
- Aligned low-level row getters with `tskit C` caveats by returning
82+
`mutations = NULL` for `rtsk_site_table_get_row()`, exposing `edge` and
83+
`inherited_state` (as `NULL` for table access) in
84+
`rtsk_mutation_table_get_row()`, and exposing `nodes` in
85+
`rtsk_individual_table_get_row()`.
86+
- Kept high-level `TableCollection$individual_table_get_row()`,
87+
`TableCollection$site_table_get_row()`, and
88+
`TableCollection$mutation_table_get_row()` aligned with `tskit Python`
89+
row-shape semantics.
90+
- Refined integer validators to use a single 32-bit integer-like validation
91+
path with explicit `strict` handling for integer-only versus numeric-allowed
92+
inputs across scalar and optional vector arguments.
7893
- We now use `bit64::integer64` (signed 64 bit integer) instead of `int` aiming
7994
to approach `tsk_size_t` in `tskit C` (unsigned 64 bit integer); in low-level
8095
`rtsk_treeseq_get_num_*()` wrappers and count/metadata-length fields.

RcppTskit/R/Class-TableCollection.R

Lines changed: 96 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ TableCollection <- R6Class(
138138
rtsk_table_collection_get_num_provenances(self$xptr)
139139
},
140140

141-
#' @description Add a row to the provenances table.
141+
#' @description Add a row to the provenance table.
142142
#' @param timestamp character string timestamp for the new provenance.
143143
#' @param record character string record for the new provenance.
144144
#' @details See the \code{tskit Python} equivalent at
@@ -163,10 +163,18 @@ TableCollection <- R6Class(
163163
)
164164
},
165165

166-
#' @description Get one row from the provenances table.
167-
#' @param index integer scalar row index (0-based).
166+
#' @description Get one row from the provenance table.
167+
#' @param index integer or numeric scalar row index (0-based).
168+
#' @details See the \code{tskit Python} equivalent at
169+
#' \url{https://tskit.dev/tskit/docs/stable/python-api.html#tskit.ProvenanceTable.__getitem__}.
170+
#' The function accepts numeric \code{index} for ease of use, but converts
171+
#' it to integer after checking that conversion to 32-bit integer succeeds.
168172
#' @return A named list with fields \code{id}, \code{timestamp},
169173
#' and \code{record}.
174+
#' @examples
175+
#' ts_file <- system.file("examples/test.trees", package = "RcppTskit")
176+
#' tc <- tc_load(ts_file)
177+
#' tc$provenance_table_get_row(0)
170178
provenance_table_get_row = function(index) {
171179
validate_row_index(index)
172180
rtsk_provenance_table_get_row(self$xptr, index = as.integer(index))
@@ -204,9 +212,17 @@ TableCollection <- R6Class(
204212
)
205213
},
206214

207-
#' @description Get one row from the populations table.
208-
#' @param index integer scalar row index (0-based).
215+
#' @description Get one row from the population table.
216+
#' @param index integer or numeric scalar row index (0-based).
217+
#' @details See the \code{tskit Python} equivalent at
218+
#' \url{https://tskit.dev/tskit/docs/stable/python-api.html#tskit.PopulationTable.__getitem__}.
219+
#' The function accepts numeric \code{index} for ease of use, but converts
220+
#' it to integer after checking that conversion to 32-bit integer succeeds.
209221
#' @return A named list with fields \code{id} and \code{metadata}.
222+
#' @examples
223+
#' ts_file <- system.file("examples/test.trees", package = "RcppTskit")
224+
#' tc <- tc_load(ts_file)
225+
#' tc$population_table_get_row(0)
210226
population_table_get_row = function(index) {
211227
validate_row_index(index)
212228
rtsk_population_table_get_row(self$xptr, index = as.integer(index))
@@ -287,11 +303,24 @@ TableCollection <- R6Class(
287303
)
288304
},
289305

290-
#' @description Get one row from the migrations table.
291-
#' @param index integer scalar row index (0-based).
306+
#' @description Get one row from the migration table.
307+
#' @param index integer or numeric scalar row index (0-based).
308+
#' @details See the \code{tskit Python} equivalent at
309+
#' \url{https://tskit.dev/tskit/docs/stable/python-api.html#tskit.MigrationTable.__getitem__}.
310+
#' The function accepts numeric \code{index} for ease of use, but converts
311+
#' it to integer after checking that conversion to 32-bit integer succeeds.
292312
#' @return A named list with fields \code{id}, \code{left}, \code{right},
293313
#' \code{node}, \code{source}, \code{dest}, \code{time}, and
294314
#' \code{metadata}.
315+
#' @examples
316+
#' ts_file <- system.file("examples/test.trees", package = "RcppTskit")
317+
#' tc <- tc_load(ts_file)
318+
#' if (as.integer(tc$num_migrations()) == 0L) {
319+
#' tc$migration_table_add_row(
320+
#' left = 0, right = 1, node = 0L, source = 0L, dest = 0L, time = 1
321+
#' )
322+
#' }
323+
#' tc$migration_table_get_row(0)
295324
migration_table_get_row = function(index) {
296325
validate_row_index(index)
297326
rtsk_migration_table_get_row(self$xptr, index = as.integer(index))
@@ -348,13 +377,22 @@ TableCollection <- R6Class(
348377
)
349378
},
350379

351-
#' @description Get one row from the individuals table.
352-
#' @param index integer scalar row index (0-based).
380+
#' @description Get one row from the individual table.
381+
#' @param index integer or numeric scalar row index (0-based).
382+
#' @details See the \code{tskit Python} equivalent at
383+
#' \url{https://tskit.dev/tskit/docs/stable/python-api.html#tskit.IndividualTable.__getitem__}.
384+
#' The function accepts numeric \code{index} for ease of use, but converts
385+
#' it to integer after checking that conversion to 32-bit integer succeeds.
353386
#' @return A named list with fields \code{id}, \code{flags},
354387
#' \code{location}, \code{parents}, and \code{metadata}.
388+
#' @examples
389+
#' ts_file <- system.file("examples/test.trees", package = "RcppTskit")
390+
#' tc <- tc_load(ts_file)
391+
#' tc$individual_table_get_row(0)
355392
individual_table_get_row = function(index) {
356393
validate_row_index(index)
357-
rtsk_individual_table_get_row(self$xptr, index = as.integer(index))
394+
row <- rtsk_individual_table_get_row(self$xptr, index = as.integer(index))
395+
row[c("id", "flags", "location", "parents", "metadata")]
358396
},
359397

360398
#' @description Get the number of nodes in a table collection.
@@ -432,11 +470,13 @@ TableCollection <- R6Class(
432470
# TODO: Similarly with add_row method on the R side, maybe?
433471
# TODO: ALSO, should we use 0-based or 1-based access to elements of an object!? I think not!?
434472
# And should we allow characters as ID names?
435-
#' @description Get one row from the nodes table.
436-
#' @param index integer scalar row index (0-based).
473+
#' @description Get one row from the node table.
474+
#' @param index integer or numeric scalar row index (0-based).
437475
#' @details In \code{tskit Python}, rows are accessed by indexing a
438476
#' \code{NodeTable}, for example \code{tables.nodes[index]}; see
439477
#' \url{https://tskit.dev/tskit/docs/stable/python-api.html#tskit.NodeTable}.
478+
#' The function accepts numeric \code{index} for ease of use, but converts
479+
#' it to integer after checking that conversion to 32-bit integer succeeds.
440480
#' @return A named list with fields \code{id}, \code{flags}, \code{time},
441481
#' \code{population}, \code{individual}, and \code{metadata}.
442482
#' @examples
@@ -510,10 +550,18 @@ TableCollection <- R6Class(
510550
)
511551
},
512552

513-
#' @description Get one row from the edges table.
514-
#' @param index integer scalar row index (0-based).
553+
#' @description Get one row from the edge table.
554+
#' @param index integer or numeric scalar row index (0-based).
555+
#' @details See the \code{tskit Python} equivalent at
556+
#' \url{https://tskit.dev/tskit/docs/stable/python-api.html#tskit.EdgeTable.__getitem__}.
557+
#' The function accepts numeric \code{index} for ease of use, but converts
558+
#' it to integer after checking that conversion to 32-bit integer succeeds.
515559
#' @return A named list with fields \code{id}, \code{left}, \code{right},
516560
#' \code{parent}, \code{child}, and \code{metadata}.
561+
#' @examples
562+
#' ts_file <- system.file("examples/test.trees", package = "RcppTskit")
563+
#' tc <- tc_load(ts_file)
564+
#' tc$edge_table_get_row(0)
517565
edge_table_get_row = function(index) {
518566
validate_row_index(index)
519567
rtsk_edge_table_get_row(self$xptr, index = as.integer(index))
@@ -560,13 +608,22 @@ TableCollection <- R6Class(
560608
)
561609
},
562610

563-
#' @description Get one row from the sites table.
564-
#' @param index integer scalar row index (0-based).
611+
#' @description Get one row from the site table.
612+
#' @param index integer or numeric scalar row index (0-based).
613+
#' @details See the \code{tskit Python} equivalent at
614+
#' \url{https://tskit.dev/tskit/docs/stable/python-api.html#tskit.SiteTable.__getitem__}.
615+
#' The function accepts numeric \code{index} for ease of use, but converts
616+
#' it to integer after checking that conversion to 32-bit integer succeeds.
565617
#' @return A named list with fields \code{id}, \code{position},
566618
#' \code{ancestral_state}, and \code{metadata}.
619+
#' @examples
620+
#' ts_file <- system.file("examples/test.trees", package = "RcppTskit")
621+
#' tc <- tc_load(ts_file)
622+
#' tc$site_table_get_row(0)
567623
site_table_get_row = function(index) {
568624
validate_row_index(index)
569-
rtsk_site_table_get_row(self$xptr, index = as.integer(index))
625+
row <- rtsk_site_table_get_row(self$xptr, index = as.integer(index))
626+
row[c("id", "position", "ancestral_state", "metadata")]
570627
},
571628

572629
#' @description Get the number of mutations in a table collection.
@@ -645,14 +702,31 @@ TableCollection <- R6Class(
645702
)
646703
},
647704

648-
#' @description Get one row from the mutations table.
649-
#' @param index integer scalar row index (0-based).
705+
#' @description Get one row from the mutation table.
706+
#' @param index integer or numeric scalar row index (0-based).
707+
#' @details See the \code{tskit Python} equivalent at
708+
#' \url{https://tskit.dev/tskit/docs/stable/python-api.html#tskit.MutationTable.__getitem__}.
709+
#' The function accepts numeric \code{index} for ease of use, but converts
710+
#' it to integer after checking that conversion to 32-bit integer succeeds.
650711
#' @return A named list with fields \code{id}, \code{site}, \code{node},
651-
#' \code{parent}, \code{time}, \code{derived_state}, and
652-
#' \code{metadata}.
712+
#' \code{derived_state}, \code{parent}, \code{metadata}, and
713+
#' \code{time}.
714+
#' @examples
715+
#' ts_file <- system.file("examples/test.trees", package = "RcppTskit")
716+
#' tc <- tc_load(ts_file)
717+
#' tc$mutation_table_get_row(0)
653718
mutation_table_get_row = function(index) {
654719
validate_row_index(index)
655-
rtsk_mutation_table_get_row(self$xptr, index = as.integer(index))
720+
row <- rtsk_mutation_table_get_row(self$xptr, index = as.integer(index))
721+
row[c(
722+
"id",
723+
"site",
724+
"node",
725+
"derived_state",
726+
"parent",
727+
"metadata",
728+
"time"
729+
)]
656730
},
657731

658732
#' @description Get the sequence length.

RcppTskit/R/RcppTskit.R

Lines changed: 51 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -127,45 +127,56 @@ validate_integer_scalar_arg <- function(
127127
allow_null = FALSE,
128128
strict = FALSE
129129
) {
130-
int_min <- -as.numeric(.Machine$integer.max) - 1
131-
int_max <- .Machine$integer.max
132-
133130
if (is.null(value)) {
134131
if (allow_null) {
135132
return(NULL)
136133
}
137134
stop(name, " cannot be NULL.", call. = FALSE)
138135
}
139136

140-
is_type_ok <- if (strict) is.integer(value) else is.numeric(value)
141-
is_valid_scalar <- is_type_ok &&
142-
length(value) == 1L &&
143-
!is.na(value) &&
144-
is.finite(value) &&
145-
value == trunc(value)
146-
147-
in_range <- is_valid_scalar &&
148-
value >= int_min &&
149-
value <= int_max &&
150-
(is.null(minimum) || value >= minimum)
151-
152-
if (!in_range) {
153-
reqs <- if (identical(minimum, 0L)) {
154-
"a non-NA zero or positive integer scalar within 32-bit range"
155-
} else {
156-
"a non-NA integer scalar within 32-bit range"
157-
}
158-
if (!is.null(minimum) && !identical(minimum, 0L)) {
159-
reqs <- paste0(reqs, " (>= ", minimum, ")")
137+
abort <- function() {
138+
msg <- "a non-NA integer scalar within 32-bit range"
139+
if (identical(minimum, 0L)) {
140+
msg <- "a non-NA zero or positive integer scalar within 32-bit range"
141+
} else if (!is.null(minimum)) {
142+
msg <- paste0(msg, " (>= ", minimum, ")")
160143
}
161144
if (allow_null) {
162-
reqs <- paste("NULL or", reqs)
145+
stop(name, " must be NULL or ", msg, "!", call. = FALSE)
146+
}
147+
stop(name, " must be ", msg, "!", call. = FALSE)
148+
}
149+
150+
is_valid_type <- if (strict) {
151+
is.integer(value)
152+
} else {
153+
is.numeric(value)
154+
}
155+
if (!is_valid_type || length(value) != 1L || is.na(value)) {
156+
abort()
157+
}
158+
159+
if (is.integer(value)) {
160+
if (!is.null(minimum) && value < minimum) {
161+
abort()
163162
}
163+
return(value)
164+
}
165+
166+
value_num <- as.numeric(value)
167+
int_min <- -as.numeric(.Machine$integer.max) - 1
168+
int_max <- as.numeric(.Machine$integer.max)
169+
is_whole <- value_num %% 1 == 0
170+
is_within_bounds <- value_num >= int_min && value_num <= int_max
171+
is_above_min <- is.null(minimum) || value_num >= minimum
164172

165-
stop(name, " must be ", reqs, "!", call. = FALSE)
173+
if (
174+
!is.finite(value_num) || !is_whole || !is_within_bounds || !is_above_min
175+
) {
176+
abort()
166177
}
167178

168-
as.integer(value)
179+
return(as.integer(value_num))
169180
}
170181

171182
# INTERNAL
@@ -207,30 +218,37 @@ validate_optional_numeric_vector_arg <- function(value, name) {
207218
# unless \code{strict = TRUE} (in that case this function throws an error).
208219
# @return No return value; called for side effects.
209220
validate_optional_integer_vector_arg <- function(value, name, strict = FALSE) {
210-
int_min <- -(.Machine$integer.max) - 1
211-
int_max <- .Machine$integer.max
221+
int_min <- -as.numeric(.Machine$integer.max) - 1
222+
int_max <- as.numeric(.Machine$integer.max)
223+
212224
if (is.null(value)) {
213225
return(invisible(NULL))
214226
}
227+
215228
if (strict && !is.integer(value)) {
216229
stop(
217230
name,
218231
" must be NULL or an integer vector with no NA values within 32-bit range!"
219232
)
220233
}
234+
235+
value_num <- suppressWarnings(as.numeric(value))
236+
221237
if (
222238
!is.numeric(value) ||
223-
anyNA(value) ||
224-
!all(is.finite(value)) ||
225-
any(value != trunc(value)) ||
226-
any(value < int_min) ||
227-
any(value > int_max)
239+
anyNA(value_num) ||
240+
!all(is.finite(value_num)) ||
241+
any(value_num != trunc(value_num)) ||
242+
any(value_num < int_min) ||
243+
any(value_num > int_max)
228244
) {
229245
stop(
230246
name,
231247
" must be NULL or an integer vector with no NA values within 32-bit range!"
232248
)
233249
}
250+
251+
invisible(as.integer(value_num))
234252
}
235253

236254
# INTERNAL

0 commit comments

Comments
 (0)