-
Notifications
You must be signed in to change notification settings - Fork 5
Adding variants iterator and other functions #132
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
c309e3a
8bb475d
094183a
ccaf32a
fef7582
db0b90b
e33fc2d
352d7e5
b343f23
0602237
fdd40d0
6a74096
7ec71f3
67f3173
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -161,6 +161,90 @@ TreeSequence <- R6Class( | |
| ) | ||
| }, | ||
|
|
||
| #' @description Iterate over sites as decoded variants. | ||
| #' @param samples Optional integer vector of sample node IDs to decode. | ||
| #' @param isolated_as_missing Logical; decode isolated samples as missing | ||
| #' data (\code{TRUE}, default) or as ancestral state (\code{FALSE}). | ||
| #' @param alleles Optional character vector of allele states; when set, | ||
| #' genotypes are indexed to this allele order. | ||
| #' @param impute_missing_data Deprecated alias for | ||
| #' \code{!isolated_as_missing}. | ||
| #' @param copy Logical; currently only \code{TRUE} is supported. | ||
| #' @param left Left genomic coordinate (inclusive). | ||
| #' @param right Right genomic coordinate (exclusive). \code{NULL} means | ||
| #' sequence length. | ||
| #' @details See the \code{tskit Python} equivalent at | ||
| #' \url{https://tskit.dev/tskit/docs/latest/python-api.html#tskit.TreeSequence.variants}. | ||
| #' @return A simple iterator object with methods \code{next()} and | ||
| #' \code{next_variant()} that each return either a variant list or | ||
| #' \code{NULL} at end. | ||
| #' @examples | ||
| #' ts_file <- system.file("examples/test.trees", package = "RcppTskit") | ||
| #' ts <- ts_load(ts_file) | ||
| #' it <- ts$variants() | ||
| #' v1 <- it$next_variant() | ||
| #' v2 <- it$next_variant() | ||
| #' is.list(v1) | ||
| #' is.list(v2) | ||
| variants = function( | ||
| samples = NULL, | ||
| isolated_as_missing = TRUE, | ||
| alleles = NULL, | ||
| impute_missing_data = NULL, | ||
| copy = TRUE, | ||
| left = 0, | ||
| right = NULL | ||
| ) { | ||
| if (!is.logical(copy) || length(copy) != 1 || is.na(copy)) { | ||
| stop("copy must be TRUE/FALSE!") | ||
| } | ||
| if (!copy) { | ||
| stop("copy = FALSE is not supported yet!") | ||
| } | ||
| if (!is.null(impute_missing_data)) { | ||
| if ( | ||
| !is.logical(impute_missing_data) || | ||
| length(impute_missing_data) != 1 || | ||
| is.na(impute_missing_data) | ||
| ) { | ||
| stop("impute_missing_data must be TRUE/FALSE or NULL!") | ||
| } | ||
| mapped <- !impute_missing_data | ||
| if ( | ||
| !missing(isolated_as_missing) && | ||
| !identical(isolated_as_missing, mapped) | ||
| ) { | ||
| stop( | ||
| "isolated_as_missing and impute_missing_data are inconsistent!" | ||
| ) | ||
| } | ||
| warning( | ||
| "impute_missing_data is deprecated; use isolated_as_missing", | ||
| call. = FALSE | ||
| ) | ||
| isolated_as_missing <- mapped | ||
| } | ||
|
|
||
| iter_xptr <- rtsk_variant_iterator_init( | ||
| ts = self$xptr, | ||
| samples = samples, | ||
| isolated_as_missing = isolated_as_missing, | ||
| alleles = alleles, | ||
| left = left, | ||
| right = if (is.null(right)) NA_real_ else right | ||
| ) | ||
|
|
||
| env <- new.env(parent = emptyenv()) | ||
| env$iter_xptr <- iter_xptr | ||
| next_fun <- function() { | ||
| rtsk_variant_iterator_next(env$iter_xptr) | ||
| } | ||
| structure( | ||
| list(`next` = next_fun, next_variant = next_fun), | ||
| class = "rtsk_variant_iterator" | ||
| ) | ||
| }, | ||
|
|
||
| #' @description Get the number of provenances in a tree sequence. | ||
| #' @return A signed 64 bit integer \code{bit64::integer64}. | ||
| #' @details See the \code{tskit Python} equivalent at | ||
|
|
@@ -221,6 +305,18 @@ TreeSequence <- R6Class( | |
| rtsk_treeseq_get_num_samples(self$xptr) | ||
| }, | ||
|
|
||
| #' @description Get sample node IDs in this tree sequence. | ||
| #' @return An integer vector with sample node IDs (0-based). | ||
| #' @details See the \code{tskit Python} equivalent at | ||
| #' \url{https://tskit.dev/tskit/docs/latest/python-api.html#tskit.TreeSequence.samples}. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The Python method has
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that this implementation was driven by the C method, which only accepts TreeSequence, but let's ensure R API matches Python API - looking at the source of https://tskit.dev/tskit/docs/latest/_modules/tskit/trees.html#TreeSequence.samples I see that these args are all handled on the Python side (first all sample IDs are obtained by calling the low-level C function and then subsetting is done in Python - suggesting we should handle these also on R side). Looking at the docs I now also see that
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @LynxJinyangii this one also has TODO that is not addressed
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| #' @examples | ||
| #' ts_file <- system.file("examples/test.trees", package = "RcppTskit") | ||
| #' ts <- ts_load(ts_file) | ||
| #' ts$samples() | ||
| samples = function() { | ||
| rtsk_treeseq_get_samples(self$xptr) | ||
| }, | ||
|
|
||
| #' @description Get the number of nodes in a tree sequence. | ||
| #' @return A signed 64 bit integer \code{bit64::integer64}. | ||
| #' @details See the \code{tskit Python} equivalent at | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,26 @@ | ||
| # Generated by using Rcpp::compileAttributes() -> do not edit by hand | ||
| # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 | ||
|
|
||
| rtsk_variant_iterator_init <- function(ts, samples = NULL, isolated_as_missing = TRUE, alleles = NULL, left = 0.0, right = NA_real_) { | ||
| .Call(`_RcppTskit_rtsk_variant_iterator_init`, ts, samples, isolated_as_missing, alleles, left, right) | ||
| } | ||
|
|
||
| rtsk_variant_iterator_next <- function(iterator) { | ||
| .Call(`_RcppTskit_rtsk_variant_iterator_next`, iterator) | ||
| } | ||
|
|
||
| test_rtsk_variant_iterator_force_null_first_allele <- function(enabled) { | ||
| invisible(.Call(`_RcppTskit_test_rtsk_variant_iterator_force_null_first_allele`, enabled)) | ||
| } | ||
|
|
||
| test_rtsk_variant_iterator_set_site_bounds <- function(iterator, next_site_id, stop_site_id) { | ||
| invisible(.Call(`_RcppTskit_test_rtsk_variant_iterator_set_site_bounds`, iterator, next_site_id, stop_site_id)) | ||
| } | ||
|
|
||
| test_variant_site_index_range <- function(start, stop) { | ||
| invisible(.Call(`_RcppTskit_test_variant_site_index_range`, start, stop)) | ||
| } | ||
|
|
||
| test_validate_options <- function(options, supported) { | ||
| .Call(`_RcppTskit_test_validate_options`, options, supported) | ||
| } | ||
|
|
@@ -31,6 +51,10 @@ tskit_version <- function() { | |
| .Call(`_RcppTskit_tskit_version`) | ||
| } | ||
|
|
||
| rtsk_const_tsk_no_check_integrity <- function() { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since Python sort method does not have options I assume we do not need
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. removed |
||
| .Call(`_RcppTskit_rtsk_const_tsk_no_check_integrity`) | ||
| } | ||
|
|
||
| rtsk_treeseq_load <- function(filename, options = 0L) { | ||
| .Call(`_RcppTskit_rtsk_treeseq_load`, filename, options) | ||
| } | ||
|
|
@@ -75,6 +99,10 @@ rtsk_treeseq_get_num_samples <- function(ts) { | |
| .Call(`_RcppTskit_rtsk_treeseq_get_num_samples`, ts) | ||
| } | ||
|
|
||
| rtsk_treeseq_get_samples <- function(ts) { | ||
| .Call(`_RcppTskit_rtsk_treeseq_get_samples`, ts) | ||
| } | ||
|
|
||
| rtsk_treeseq_get_num_nodes <- function(ts) { | ||
| .Call(`_RcppTskit_rtsk_treeseq_get_num_nodes`, ts) | ||
| } | ||
|
|
@@ -195,6 +223,10 @@ rtsk_table_collection_drop_index <- function(tc, options = 0L) { | |
| invisible(.Call(`_RcppTskit_rtsk_table_collection_drop_index`, tc, options)) | ||
| } | ||
|
|
||
| rtsk_table_collection_sort <- function(tc, edge_start = 0L, options = 0L) { | ||
| invisible(.Call(`_RcppTskit_rtsk_table_collection_sort`, tc, edge_start, options)) | ||
| } | ||
|
|
||
| rtsk_table_collection_summary <- function(tc) { | ||
| .Call(`_RcppTskit_rtsk_table_collection_summary`, tc) | ||
| } | ||
|
|
@@ -211,6 +243,10 @@ rtsk_node_table_add_row <- function(tc, flags = 0L, time = 0, population = -1L, | |
| .Call(`_RcppTskit_rtsk_node_table_add_row`, tc, flags, time, population, individual, metadata) | ||
| } | ||
|
|
||
| rtsk_node_table_get_row <- function(tc, row_id) { | ||
| .Call(`_RcppTskit_rtsk_node_table_get_row`, tc, row_id) | ||
| } | ||
|
|
||
| rtsk_edge_table_add_row <- function(tc, left, right, parent, child, metadata = NULL) { | ||
| .Call(`_RcppTskit_rtsk_edge_table_add_row`, tc, left, right, parent, child, metadata) | ||
| } | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.