Skip to content

Commit c443f64

Browse files
committed
Merge PR 132 onto current upstream/main
2 parents 45a4656 + 9563bb9 commit c443f64

12 files changed

Lines changed: 1168 additions & 2 deletions

RcppTskit/NEWS.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,18 @@ and releases adhere to [Semantic Versioning](https://semver.org/spec/v2.0.0.html
4040
- Added `rtsk_mutation_table_add_row()` and
4141
`TableCollection$mutation_table_add_row()` to append mutation rows from
4242
\code{R}, mirroring `tsk_mutation_table_add_row()`.
43+
- Added `rtsk_node_table_get_row()` and `TableCollection$node_table_get_row()`
44+
to retrieve node-table rows by 0-based row ID.
45+
- Added `rtsk_table_collection_sort()` and `TableCollection$sort()` to sort
46+
table collections with 0-based `edge_start` semantics.
47+
- Added low-level variant iterators
48+
(`rtsk_variant_iterator_init()`/`rtsk_variant_iterator_next()`) and a
49+
user-facing `TreeSequence$variants()` method to iterate over decoded
50+
site-by-site variants from \code{R}, aligned with `tskit` Python API
51+
semantics for `samples`, `isolated_as_missing`, `alleles`, and
52+
`left`/`right` intervals.
53+
- Added `rtsk_treeseq_get_samples()` and `TreeSequence$samples()` to retrieve
54+
sample node IDs from a tree sequence.
4355
- TODO
4456

4557
### Changed

RcppTskit/R/Class-TableCollection.R

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,47 @@ TableCollection <- R6Class(
103103
TreeSequence$new(xptr = ts_xptr)
104104
},
105105

106+
#' @description Sort this table collection in place.
107+
#' @param edge_start integer scalar edge-table start row (0-based).
108+
#' @param no_check_integrity logical; when \code{TRUE}, pass
109+
#' \code{TSK_NO_CHECK_INTEGRITY} to \code{tskit C}.
110+
#' @details See the \code{tskit Python} equivalent at
111+
#' \url{https://tskit.dev/tskit/docs/latest/python-api.html#tskit.TableCollection.sort}.
112+
#' @return No return value; called for side effects.
113+
#' @examples
114+
#' ts_file <- system.file("examples/test.trees", package = "RcppTskit")
115+
#' tc <- tc_load(ts_file)
116+
#' tc$sort()
117+
sort = function(edge_start = 0L, no_check_integrity = FALSE) {
118+
if (
119+
is.null(edge_start) ||
120+
length(edge_start) != 1L ||
121+
is.na(as.integer(edge_start))
122+
) {
123+
stop("edge_start must be a non-NA integer scalar (0-based)!")
124+
}
125+
if (as.integer(edge_start) < 0L) {
126+
stop("edge_start must be >= 0 (0-based)!")
127+
}
128+
if (
129+
!is.logical(no_check_integrity) ||
130+
length(no_check_integrity) != 1L ||
131+
is.na(no_check_integrity)
132+
) {
133+
stop("no_check_integrity must be TRUE/FALSE!")
134+
}
135+
options <- if (isTRUE(no_check_integrity)) {
136+
as.integer(rtsk_const_tsk_no_check_integrity())
137+
} else {
138+
0L
139+
}
140+
rtsk_table_collection_sort(
141+
tc = self$xptr,
142+
edge_start = as.integer(edge_start),
143+
options = options
144+
)
145+
},
146+
106147
#' @description Get the number of provenances in a table collection.
107148
#' @return A signed 64 bit integer \code{bit64::integer64}.
108149
#' @examples
@@ -218,6 +259,27 @@ TableCollection <- R6Class(
218259
rtsk_table_collection_get_num_nodes(self$xptr)
219260
},
220261

262+
#' @description Get one row from the nodes table.
263+
#' @param row_id integer scalar node row ID (0-based).
264+
#' @details The ID is 0-based, matching \code{tskit C/Python} semantics.
265+
#' @return A named list with fields \code{id}, \code{flags}, \code{time},
266+
#' \code{population}, \code{individual}, and \code{metadata}.
267+
#' @examples
268+
#' ts_file <- system.file("examples/test.trees", package = "RcppTskit")
269+
#' tc <- tc_load(ts_file)
270+
#' tc$node_table_get_row(0L)
271+
node_table_get_row = function(row_id) {
272+
if (
273+
is.null(row_id) || length(row_id) != 1L || is.na(as.integer(row_id))
274+
) {
275+
stop("row_id must be a non-NA integer scalar (0-based)!")
276+
}
277+
if (as.integer(row_id) < 0L) {
278+
stop("row_id must be >= 0 (0-based)!")
279+
}
280+
rtsk_node_table_get_row(self$xptr, row_id = as.integer(row_id))
281+
},
282+
221283
#' @description Add a row to the nodes table.
222284
#' @param flags integer scalar flags for the new node.
223285
#' @param time numeric scalar time value for the new node.

RcppTskit/R/Class-TreeSequence.R

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,90 @@ TreeSequence <- R6Class(
161161
)
162162
},
163163

164+
#' @description Iterate over sites as decoded variants.
165+
#' @param samples Optional integer vector of sample node IDs to decode.
166+
#' @param isolated_as_missing Logical; decode isolated samples as missing
167+
#' data (\code{TRUE}, default) or as ancestral state (\code{FALSE}).
168+
#' @param alleles Optional character vector of allele states; when set,
169+
#' genotypes are indexed to this allele order.
170+
#' @param impute_missing_data Deprecated alias for
171+
#' \code{!isolated_as_missing}.
172+
#' @param copy Logical; currently only \code{TRUE} is supported.
173+
#' @param left Left genomic coordinate (inclusive).
174+
#' @param right Right genomic coordinate (exclusive). \code{NULL} means
175+
#' sequence length.
176+
#' @details See the \code{tskit Python} equivalent at
177+
#' \url{https://tskit.dev/tskit/docs/latest/python-api.html#tskit.TreeSequence.variants}.
178+
#' @return A simple iterator object with methods \code{next()} and
179+
#' \code{next_variant()} that each return either a variant list or
180+
#' \code{NULL} at end.
181+
#' @examples
182+
#' ts_file <- system.file("examples/test.trees", package = "RcppTskit")
183+
#' ts <- ts_load(ts_file)
184+
#' it <- ts$variants()
185+
#' v1 <- it$next_variant()
186+
#' v2 <- it$next_variant()
187+
#' is.list(v1)
188+
#' is.list(v2)
189+
variants = function(
190+
samples = NULL,
191+
isolated_as_missing = TRUE,
192+
alleles = NULL,
193+
impute_missing_data = NULL,
194+
copy = TRUE,
195+
left = 0,
196+
right = NULL
197+
) {
198+
if (!is.logical(copy) || length(copy) != 1 || is.na(copy)) {
199+
stop("copy must be TRUE/FALSE!")
200+
}
201+
if (!copy) {
202+
stop("copy = FALSE is not supported yet!")
203+
}
204+
if (!is.null(impute_missing_data)) {
205+
if (
206+
!is.logical(impute_missing_data) ||
207+
length(impute_missing_data) != 1 ||
208+
is.na(impute_missing_data)
209+
) {
210+
stop("impute_missing_data must be TRUE/FALSE or NULL!")
211+
}
212+
mapped <- !impute_missing_data
213+
if (
214+
!missing(isolated_as_missing) &&
215+
!identical(isolated_as_missing, mapped)
216+
) {
217+
stop(
218+
"isolated_as_missing and impute_missing_data are inconsistent!"
219+
)
220+
}
221+
warning(
222+
"impute_missing_data is deprecated; use isolated_as_missing",
223+
call. = FALSE
224+
)
225+
isolated_as_missing <- mapped
226+
}
227+
228+
iter_xptr <- rtsk_variant_iterator_init(
229+
ts = self$xptr,
230+
samples = samples,
231+
isolated_as_missing = isolated_as_missing,
232+
alleles = alleles,
233+
left = left,
234+
right = if (is.null(right)) NA_real_ else right
235+
)
236+
237+
env <- new.env(parent = emptyenv())
238+
env$iter_xptr <- iter_xptr
239+
next_fun <- function() {
240+
rtsk_variant_iterator_next(env$iter_xptr)
241+
}
242+
structure(
243+
list(`next` = next_fun, next_variant = next_fun),
244+
class = "rtsk_variant_iterator"
245+
)
246+
},
247+
164248
#' @description Get the number of provenances in a tree sequence.
165249
#' @return A signed 64 bit integer \code{bit64::integer64}.
166250
#' @details See the \code{tskit Python} equivalent at
@@ -221,6 +305,18 @@ TreeSequence <- R6Class(
221305
rtsk_treeseq_get_num_samples(self$xptr)
222306
},
223307

308+
#' @description Get sample node IDs in this tree sequence.
309+
#' @return An integer vector with sample node IDs (0-based).
310+
#' @details See the \code{tskit Python} equivalent at
311+
#' \url{https://tskit.dev/tskit/docs/latest/python-api.html#tskit.TreeSequence.samples}.
312+
#' @examples
313+
#' ts_file <- system.file("examples/test.trees", package = "RcppTskit")
314+
#' ts <- ts_load(ts_file)
315+
#' ts$samples()
316+
samples = function() {
317+
rtsk_treeseq_get_samples(self$xptr)
318+
},
319+
224320
#' @description Get the number of nodes in a tree sequence.
225321
#' @return A signed 64 bit integer \code{bit64::integer64}.
226322
#' @details See the \code{tskit Python} equivalent at

RcppTskit/R/RcppExports.R

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,26 @@
11
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
22
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
33

4+
rtsk_variant_iterator_init <- function(ts, samples = NULL, isolated_as_missing = TRUE, alleles = NULL, left = 0.0, right = NA_real_) {
5+
.Call(`_RcppTskit_rtsk_variant_iterator_init`, ts, samples, isolated_as_missing, alleles, left, right)
6+
}
7+
8+
rtsk_variant_iterator_next <- function(iterator) {
9+
.Call(`_RcppTskit_rtsk_variant_iterator_next`, iterator)
10+
}
11+
12+
test_rtsk_variant_iterator_force_null_first_allele <- function(enabled) {
13+
invisible(.Call(`_RcppTskit_test_rtsk_variant_iterator_force_null_first_allele`, enabled))
14+
}
15+
16+
test_rtsk_variant_iterator_set_site_bounds <- function(iterator, next_site_id, stop_site_id) {
17+
invisible(.Call(`_RcppTskit_test_rtsk_variant_iterator_set_site_bounds`, iterator, next_site_id, stop_site_id))
18+
}
19+
20+
test_variant_site_index_range <- function(start, stop) {
21+
invisible(.Call(`_RcppTskit_test_variant_site_index_range`, start, stop))
22+
}
23+
424
test_validate_options <- function(options, supported) {
525
.Call(`_RcppTskit_test_validate_options`, options, supported)
626
}
@@ -31,6 +51,10 @@ tskit_version <- function() {
3151
.Call(`_RcppTskit_tskit_version`)
3252
}
3353

54+
rtsk_const_tsk_no_check_integrity <- function() {
55+
.Call(`_RcppTskit_rtsk_const_tsk_no_check_integrity`)
56+
}
57+
3458
rtsk_treeseq_load <- function(filename, options = 0L) {
3559
.Call(`_RcppTskit_rtsk_treeseq_load`, filename, options)
3660
}
@@ -75,6 +99,10 @@ rtsk_treeseq_get_num_samples <- function(ts) {
7599
.Call(`_RcppTskit_rtsk_treeseq_get_num_samples`, ts)
76100
}
77101

102+
rtsk_treeseq_get_samples <- function(ts) {
103+
.Call(`_RcppTskit_rtsk_treeseq_get_samples`, ts)
104+
}
105+
78106
rtsk_treeseq_get_num_nodes <- function(ts) {
79107
.Call(`_RcppTskit_rtsk_treeseq_get_num_nodes`, ts)
80108
}
@@ -195,6 +223,10 @@ rtsk_table_collection_drop_index <- function(tc, options = 0L) {
195223
invisible(.Call(`_RcppTskit_rtsk_table_collection_drop_index`, tc, options))
196224
}
197225

226+
rtsk_table_collection_sort <- function(tc, edge_start = 0L, options = 0L) {
227+
invisible(.Call(`_RcppTskit_rtsk_table_collection_sort`, tc, edge_start, options))
228+
}
229+
198230
rtsk_table_collection_summary <- function(tc) {
199231
.Call(`_RcppTskit_rtsk_table_collection_summary`, tc)
200232
}
@@ -211,6 +243,10 @@ rtsk_node_table_add_row <- function(tc, flags = 0L, time = 0, population = -1L,
211243
.Call(`_RcppTskit_rtsk_node_table_add_row`, tc, flags, time, population, individual, metadata)
212244
}
213245

246+
rtsk_node_table_get_row <- function(tc, row_id) {
247+
.Call(`_RcppTskit_rtsk_node_table_get_row`, tc, row_id)
248+
}
249+
214250
rtsk_edge_table_add_row <- function(tc, left, right, parent, child, metadata = NULL) {
215251
.Call(`_RcppTskit_rtsk_edge_table_add_row`, tc, left, right, parent, child, metadata)
216252
}

RcppTskit/inst/include/RcppTskit_public.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
// RcppTskit.cpp
1010
Rcpp::IntegerVector kastore_version();
1111
Rcpp::IntegerVector tskit_version();
12+
int rtsk_const_tsk_no_check_integrity();
1213

1314
// sync default options with .cpp!
1415
SEXP rtsk_treeseq_load(const std::string &filename, int options = 0);
@@ -18,12 +19,19 @@ void rtsk_table_collection_dump(SEXP tc, const std::string &filename,
1819
int options = 0);
1920
SEXP rtsk_treeseq_copy_tables(SEXP ts, int options = 0);
2021
SEXP rtsk_treeseq_init(SEXP tc, int options = 0);
22+
SEXP rtsk_variant_iterator_init(
23+
SEXP ts, Rcpp::Nullable<Rcpp::IntegerVector> samples = R_NilValue,
24+
bool isolated_as_missing = true,
25+
Rcpp::Nullable<Rcpp::CharacterVector> alleles = R_NilValue,
26+
double left = 0.0, double right = NA_REAL);
27+
SEXP rtsk_variant_iterator_next(SEXP iterator);
2128

2229
SEXP rtsk_treeseq_get_num_provenances(SEXP ts);
2330
SEXP rtsk_treeseq_get_num_populations(SEXP ts);
2431
SEXP rtsk_treeseq_get_num_migrations(SEXP ts);
2532
SEXP rtsk_treeseq_get_num_individuals(SEXP ts);
2633
SEXP rtsk_treeseq_get_num_samples(SEXP ts);
34+
Rcpp::IntegerVector rtsk_treeseq_get_samples(SEXP ts);
2735
SEXP rtsk_treeseq_get_num_nodes(SEXP ts);
2836
SEXP rtsk_treeseq_get_num_edges(SEXP ts);
2937
SEXP rtsk_treeseq_get_num_trees(SEXP ts);
@@ -55,6 +63,7 @@ Rcpp::String rtsk_table_collection_get_file_uuid(SEXP tc);
5563
bool rtsk_table_collection_has_index(SEXP tc, int options = 0);
5664
void rtsk_table_collection_build_index(SEXP tc, int options = 0);
5765
void rtsk_table_collection_drop_index(SEXP tc, int options = 0);
66+
void rtsk_table_collection_sort(SEXP tc, int edge_start = 0, int options = 0);
5867
Rcpp::List rtsk_table_collection_summary(SEXP tc);
5968
Rcpp::List rtsk_table_collection_metadata_length(SEXP tc);
6069
int rtsk_individual_table_add_row(
@@ -65,6 +74,7 @@ int rtsk_individual_table_add_row(
6574
int rtsk_node_table_add_row(
6675
SEXP tc, int flags = 0, double time = 0, int population = -1,
6776
int individual = -1, Rcpp::Nullable<Rcpp::RawVector> metadata = R_NilValue);
77+
Rcpp::List rtsk_node_table_get_row(SEXP tc, int row_id);
6878
int rtsk_edge_table_add_row(
6979
SEXP tc, double left, double right, int parent, int child,
7080
Rcpp::Nullable<Rcpp::RawVector> metadata = R_NilValue);

0 commit comments

Comments
 (0)