Skip to content

Commit 8ad892b

Browse files
committed
add function .variants()
1 parent 8a64bdf commit 8ad892b

9 files changed

Lines changed: 498 additions & 0 deletions

File tree

RcppTskit/NEWS.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,12 @@ and releases adhere to [Semantic Versioning](https://semver.org/spec/v2.0.0.html
4040
- Added `rtsk_mutation_table_add_row()` and
4141
`TableCollection$mutation_table_add_row()` to append mutation rows from
4242
\code{R}, mirroring `tsk_mutation_table_add_row()`.
43+
- Added low-level variant iterators
44+
(`rtsk_variant_iterator_init()`/`rtsk_variant_iterator_next()`) and a
45+
user-facing `TreeSequence$variants()` method to iterate over decoded
46+
site-by-site variants from \code{R}, aligned with `tskit` Python API
47+
semantics for `samples`, `isolated_as_missing`, `alleles`, and
48+
`left`/`right` intervals.
4349
- TODO
4450

4551
### Changed

RcppTskit/R/Class-TreeSequence.R

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,90 @@ TreeSequence <- R6Class(
161161
)
162162
},
163163

164+
#' @description Iterate over sites as decoded variants.
165+
#' @param samples Optional integer vector of sample node IDs to decode.
166+
#' @param isolated_as_missing Logical; decode isolated samples as missing
167+
#' data (\code{TRUE}, default) or as ancestral state (\code{FALSE}).
168+
#' @param alleles Optional character vector of allele states; when set,
169+
#' genotypes are indexed to this allele order.
170+
#' @param impute_missing_data Deprecated alias for
171+
#' \code{!isolated_as_missing}.
172+
#' @param copy Logical; currently only \code{TRUE} is supported.
173+
#' @param left Left genomic coordinate (inclusive).
174+
#' @param right Right genomic coordinate (exclusive). \code{NULL} means
175+
#' sequence length.
176+
#' @details See the \code{tskit Python} equivalent at
177+
#' \url{https://tskit.dev/tskit/docs/latest/python-api.html#tskit.TreeSequence.variants}.
178+
#' @return A simple iterator object with methods \code{next()} and
179+
#' \code{next_variant()} that each return either a variant list or
180+
#' \code{NULL} at end.
181+
#' @examples
182+
#' ts_file <- system.file("examples/test.trees", package = "RcppTskit")
183+
#' ts <- ts_load(ts_file)
184+
#' it <- ts$variants()
185+
#' v1 <- it$next_variant()
186+
#' v2 <- it$next_variant()
187+
#' is.list(v1)
188+
#' is.list(v2)
189+
variants = function(
190+
samples = NULL,
191+
isolated_as_missing = TRUE,
192+
alleles = NULL,
193+
impute_missing_data = NULL,
194+
copy = TRUE,
195+
left = 0,
196+
right = NULL
197+
) {
198+
if (!is.logical(copy) || length(copy) != 1 || is.na(copy)) {
199+
stop("copy must be TRUE/FALSE!")
200+
}
201+
if (!copy) {
202+
stop("copy = FALSE is not supported yet!")
203+
}
204+
if (!is.null(impute_missing_data)) {
205+
if (
206+
!is.logical(impute_missing_data) ||
207+
length(impute_missing_data) != 1 ||
208+
is.na(impute_missing_data)
209+
) {
210+
stop("impute_missing_data must be TRUE/FALSE or NULL!")
211+
}
212+
mapped <- !impute_missing_data
213+
if (
214+
!missing(isolated_as_missing) &&
215+
!identical(isolated_as_missing, mapped)
216+
) {
217+
stop(
218+
"isolated_as_missing and impute_missing_data are inconsistent!"
219+
)
220+
}
221+
warning(
222+
"impute_missing_data is deprecated; use isolated_as_missing",
223+
call. = FALSE
224+
)
225+
isolated_as_missing <- mapped
226+
}
227+
228+
iter_xptr <- rtsk_variant_iterator_init(
229+
ts = self$xptr,
230+
samples = samples,
231+
isolated_as_missing = isolated_as_missing,
232+
alleles = alleles,
233+
left = left,
234+
right = if (is.null(right)) NA_real_ else right
235+
)
236+
237+
env <- new.env(parent = emptyenv())
238+
env$iter_xptr <- iter_xptr
239+
next_fun <- function() {
240+
rtsk_variant_iterator_next(env$iter_xptr)
241+
}
242+
structure(
243+
list(`next` = next_fun, next_variant = next_fun),
244+
class = "rtsk_variant_iterator"
245+
)
246+
},
247+
164248
#' @description Get the number of provenances in a tree sequence.
165249
#' @return A signed 64 bit integer \code{bit64::integer64}.
166250
#' @details See the \code{tskit Python} equivalent at

RcppTskit/R/RcppExports.R

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,14 @@
11
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
22
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
33

4+
rtsk_variant_iterator_init <- function(ts, samples = NULL, isolated_as_missing = TRUE, alleles = NULL, left = 0.0, right = NA_real_) {
5+
.Call(`_RcppTskit_rtsk_variant_iterator_init`, ts, samples, isolated_as_missing, alleles, left, right)
6+
}
7+
8+
rtsk_variant_iterator_next <- function(iterator) {
9+
.Call(`_RcppTskit_rtsk_variant_iterator_next`, iterator)
10+
}
11+
412
test_validate_options <- function(options, supported) {
513
.Call(`_RcppTskit_test_validate_options`, options, supported)
614
}

RcppTskit/inst/include/RcppTskit_public.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@ void rtsk_table_collection_dump(SEXP tc, std::string &filename,
1818
int options = 0);
1919
SEXP rtsk_treeseq_copy_tables(SEXP ts, int options = 0);
2020
SEXP rtsk_treeseq_init(SEXP tc, int options = 0);
21+
SEXP rtsk_variant_iterator_init(
22+
SEXP ts, Rcpp::Nullable<Rcpp::IntegerVector> samples = R_NilValue,
23+
bool isolated_as_missing = true,
24+
Rcpp::Nullable<Rcpp::CharacterVector> alleles = R_NilValue,
25+
double left = 0.0, double right = NA_REAL);
26+
SEXP rtsk_variant_iterator_next(SEXP iterator);
2127

2228
SEXP rtsk_treeseq_get_num_provenances(SEXP ts);
2329
SEXP rtsk_treeseq_get_num_populations(SEXP ts);

RcppTskit/man/TreeSequence.Rd

Lines changed: 78 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

RcppTskit/src/RcppExports.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,33 @@ Rcpp::Rostream<true>& Rcpp::Rcout = Rcpp::Rcpp_cout_get();
1010
Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
1111
#endif
1212

13+
// rtsk_variant_iterator_init
14+
SEXP rtsk_variant_iterator_init(const SEXP ts, const Rcpp::Nullable<Rcpp::IntegerVector> samples, const bool isolated_as_missing, const Rcpp::Nullable<Rcpp::CharacterVector> alleles, const double left, const double right);
15+
RcppExport SEXP _RcppTskit_rtsk_variant_iterator_init(SEXP tsSEXP, SEXP samplesSEXP, SEXP isolated_as_missingSEXP, SEXP allelesSEXP, SEXP leftSEXP, SEXP rightSEXP) {
16+
BEGIN_RCPP
17+
Rcpp::RObject rcpp_result_gen;
18+
Rcpp::RNGScope rcpp_rngScope_gen;
19+
Rcpp::traits::input_parameter< const SEXP >::type ts(tsSEXP);
20+
Rcpp::traits::input_parameter< const Rcpp::Nullable<Rcpp::IntegerVector> >::type samples(samplesSEXP);
21+
Rcpp::traits::input_parameter< const bool >::type isolated_as_missing(isolated_as_missingSEXP);
22+
Rcpp::traits::input_parameter< const Rcpp::Nullable<Rcpp::CharacterVector> >::type alleles(allelesSEXP);
23+
Rcpp::traits::input_parameter< const double >::type left(leftSEXP);
24+
Rcpp::traits::input_parameter< const double >::type right(rightSEXP);
25+
rcpp_result_gen = Rcpp::wrap(rtsk_variant_iterator_init(ts, samples, isolated_as_missing, alleles, left, right));
26+
return rcpp_result_gen;
27+
END_RCPP
28+
}
29+
// rtsk_variant_iterator_next
30+
SEXP rtsk_variant_iterator_next(const SEXP iterator);
31+
RcppExport SEXP _RcppTskit_rtsk_variant_iterator_next(SEXP iteratorSEXP) {
32+
BEGIN_RCPP
33+
Rcpp::RObject rcpp_result_gen;
34+
Rcpp::RNGScope rcpp_rngScope_gen;
35+
Rcpp::traits::input_parameter< const SEXP >::type iterator(iteratorSEXP);
36+
rcpp_result_gen = Rcpp::wrap(rtsk_variant_iterator_next(iterator));
37+
return rcpp_result_gen;
38+
END_RCPP
39+
}
1340
// test_validate_options
1441
int test_validate_options(const int options, const int supported);
1542
RcppExport SEXP _RcppTskit_test_validate_options(SEXP optionsSEXP, SEXP supportedSEXP) {
@@ -742,6 +769,8 @@ END_RCPP
742769
}
743770

744771
static const R_CallMethodDef CallEntries[] = {
772+
{"_RcppTskit_rtsk_variant_iterator_init", (DL_FUNC) &_RcppTskit_rtsk_variant_iterator_init, 6},
773+
{"_RcppTskit_rtsk_variant_iterator_next", (DL_FUNC) &_RcppTskit_rtsk_variant_iterator_next, 1},
745774
{"_RcppTskit_test_validate_options", (DL_FUNC) &_RcppTskit_test_validate_options, 2},
746775
{"_RcppTskit_test_rtsk_wrap_tsk_size_t_as_integer64", (DL_FUNC) &_RcppTskit_test_rtsk_wrap_tsk_size_t_as_integer64, 2},
747776
{"_RcppTskit_kastore_version", (DL_FUNC) &_RcppTskit_kastore_version, 0},

0 commit comments

Comments
 (0)