Skip to content

Commit 376f9e5

Browse files
committed
Add get_samples(), node_table_get_row() and table_collection_sort()
1 parent 1bb9b3d commit 376f9e5

11 files changed

Lines changed: 491 additions & 2 deletions

RcppTskit/NEWS.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,18 @@ and releases adhere to [Semantic Versioning](https://semver.org/spec/v2.0.0.html
4040
- Added `rtsk_mutation_table_add_row()` and
4141
`TableCollection$mutation_table_add_row()` to append mutation rows from
4242
\code{R}, mirroring `tsk_mutation_table_add_row()`.
43+
- Added `rtsk_node_table_get_row()` and `TableCollection$node_table_get_row()`
44+
to retrieve node-table rows by 0-based row ID.
45+
- Added `rtsk_table_collection_sort()` and `TableCollection$sort()` to sort
46+
table collections with 0-based `edge_start` semantics.
4347
- Added low-level variant iterators
4448
(`rtsk_variant_iterator_init()`/`rtsk_variant_iterator_next()`) and a
4549
user-facing `TreeSequence$variants()` method to iterate over decoded
4650
site-by-site variants from \code{R}, aligned with `tskit` Python API
4751
semantics for `samples`, `isolated_as_missing`, `alleles`, and
4852
`left`/`right` intervals.
53+
- Added `rtsk_treeseq_get_samples()` and `TreeSequence$samples()` to retrieve
54+
sample node IDs from a tree sequence.
4955
- TODO
5056

5157
### Changed

RcppTskit/R/Class-TableCollection.R

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,47 @@ TableCollection <- R6Class(
103103
TreeSequence$new(xptr = ts_xptr)
104104
},
105105

106+
#' @description Sort this table collection in place.
107+
#' @param edge_start integer scalar edge-table start row (0-based).
108+
#' @param no_check_integrity logical; when \code{TRUE}, pass
109+
#' \code{TSK_NO_CHECK_INTEGRITY} to \code{tskit C}.
110+
#' @details See the \code{tskit Python} equivalent at
111+
#' \url{https://tskit.dev/tskit/docs/latest/python-api.html#tskit.TableCollection.sort}.
112+
#' @return No return value; called for side effects.
113+
#' @examples
114+
#' ts_file <- system.file("examples/test.trees", package = "RcppTskit")
115+
#' tc <- tc_load(ts_file)
116+
#' tc$sort()
117+
sort = function(edge_start = 0L, no_check_integrity = FALSE) {
118+
if (
119+
is.null(edge_start) ||
120+
length(edge_start) != 1L ||
121+
is.na(as.integer(edge_start))
122+
) {
123+
stop("edge_start must be a non-NA integer scalar (0-based)!")
124+
}
125+
if (as.integer(edge_start) < 0L) {
126+
stop("edge_start must be >= 0 (0-based)!")
127+
}
128+
if (
129+
!is.logical(no_check_integrity) ||
130+
length(no_check_integrity) != 1L ||
131+
is.na(no_check_integrity)
132+
) {
133+
stop("no_check_integrity must be TRUE/FALSE!")
134+
}
135+
options <- if (isTRUE(no_check_integrity)) {
136+
as.integer(rtsk_const_tsk_no_check_integrity())
137+
} else {
138+
0L
139+
}
140+
rtsk_table_collection_sort(
141+
tc = self$xptr,
142+
edge_start = as.integer(edge_start),
143+
options = options
144+
)
145+
},
146+
106147
#' @description Get the number of provenances in a table collection.
107148
#' @return A signed 64 bit integer \code{bit64::integer64}.
108149
#' @examples
@@ -202,6 +243,27 @@ TableCollection <- R6Class(
202243
rtsk_table_collection_get_num_nodes(self$xptr)
203244
},
204245

246+
#' @description Get one row from the nodes table.
247+
#' @param row_id integer scalar node row ID (0-based).
248+
#' @details The ID is 0-based, matching \code{tskit C/Python} semantics.
249+
#' @return A named list with fields \code{id}, \code{flags}, \code{time},
250+
#' \code{population}, \code{individual}, and \code{metadata}.
251+
#' @examples
252+
#' ts_file <- system.file("examples/test.trees", package = "RcppTskit")
253+
#' tc <- tc_load(ts_file)
254+
#' tc$node_table_get_row(0L)
255+
node_table_get_row = function(row_id) {
256+
if (
257+
is.null(row_id) || length(row_id) != 1L || is.na(as.integer(row_id))
258+
) {
259+
stop("row_id must be a non-NA integer scalar (0-based)!")
260+
}
261+
if (as.integer(row_id) < 0L) {
262+
stop("row_id must be >= 0 (0-based)!")
263+
}
264+
rtsk_node_table_get_row(self$xptr, row_id = as.integer(row_id))
265+
},
266+
205267
#' @description Add a row to the nodes table.
206268
#' @param flags integer flags for the new node.
207269
#' @param time numeric time value for the new node.

RcppTskit/R/Class-TreeSequence.R

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,18 @@ TreeSequence <- R6Class(
305305
rtsk_treeseq_get_num_samples(self$xptr)
306306
},
307307

308+
#' @description Get sample node IDs in this tree sequence.
309+
#' @return An integer vector with sample node IDs (0-based).
310+
#' @details See the \code{tskit Python} equivalent at
311+
#' \url{https://tskit.dev/tskit/docs/latest/python-api.html#tskit.TreeSequence.samples}.
312+
#' @examples
313+
#' ts_file <- system.file("examples/test.trees", package = "RcppTskit")
314+
#' ts <- ts_load(ts_file)
315+
#' ts$samples()
316+
samples = function() {
317+
rtsk_treeseq_get_samples(self$xptr)
318+
},
319+
308320
#' @description Get the number of nodes in a tree sequence.
309321
#' @return A signed 64 bit integer \code{bit64::integer64}.
310322
#' @details See the \code{tskit Python} equivalent at

RcppTskit/R/RcppExports.R

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@ tskit_version <- function() {
5151
.Call(`_RcppTskit_tskit_version`)
5252
}
5353

54+
rtsk_const_tsk_no_check_integrity <- function() {
55+
.Call(`_RcppTskit_rtsk_const_tsk_no_check_integrity`)
56+
}
57+
5458
rtsk_treeseq_load <- function(filename, options = 0L) {
5559
.Call(`_RcppTskit_rtsk_treeseq_load`, filename, options)
5660
}
@@ -95,6 +99,10 @@ rtsk_treeseq_get_num_samples <- function(ts) {
9599
.Call(`_RcppTskit_rtsk_treeseq_get_num_samples`, ts)
96100
}
97101

102+
rtsk_treeseq_get_samples <- function(ts) {
103+
.Call(`_RcppTskit_rtsk_treeseq_get_samples`, ts)
104+
}
105+
98106
rtsk_treeseq_get_num_nodes <- function(ts) {
99107
.Call(`_RcppTskit_rtsk_treeseq_get_num_nodes`, ts)
100108
}
@@ -215,6 +223,10 @@ rtsk_table_collection_drop_index <- function(tc, options = 0L) {
215223
invisible(.Call(`_RcppTskit_rtsk_table_collection_drop_index`, tc, options))
216224
}
217225

226+
rtsk_table_collection_sort <- function(tc, edge_start = 0L, options = 0L) {
227+
invisible(.Call(`_RcppTskit_rtsk_table_collection_sort`, tc, edge_start, options))
228+
}
229+
218230
rtsk_table_collection_summary <- function(tc) {
219231
.Call(`_RcppTskit_rtsk_table_collection_summary`, tc)
220232
}
@@ -231,6 +243,10 @@ rtsk_node_table_add_row <- function(tc, flags = 0L, time = 0, population = -1L,
231243
.Call(`_RcppTskit_rtsk_node_table_add_row`, tc, flags, time, population, individual, metadata)
232244
}
233245

246+
rtsk_node_table_get_row <- function(tc, row_id) {
247+
.Call(`_RcppTskit_rtsk_node_table_get_row`, tc, row_id)
248+
}
249+
234250
rtsk_edge_table_add_row <- function(tc, left, right, parent, child, metadata = NULL) {
235251
.Call(`_RcppTskit_rtsk_edge_table_add_row`, tc, left, right, parent, child, metadata)
236252
}

RcppTskit/inst/include/RcppTskit_public.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
// RcppTskit.cpp
1010
Rcpp::IntegerVector kastore_version();
1111
Rcpp::IntegerVector tskit_version();
12+
int rtsk_const_tsk_no_check_integrity();
1213

1314
// sync default options with .cpp!
1415
SEXP rtsk_treeseq_load(std::string &filename, int options = 0);
@@ -30,6 +31,7 @@ SEXP rtsk_treeseq_get_num_populations(SEXP ts);
3031
SEXP rtsk_treeseq_get_num_migrations(SEXP ts);
3132
SEXP rtsk_treeseq_get_num_individuals(SEXP ts);
3233
SEXP rtsk_treeseq_get_num_samples(SEXP ts);
34+
Rcpp::IntegerVector rtsk_treeseq_get_samples(SEXP ts);
3335
SEXP rtsk_treeseq_get_num_nodes(SEXP ts);
3436
SEXP rtsk_treeseq_get_num_edges(SEXP ts);
3537
SEXP rtsk_treeseq_get_num_trees(SEXP ts);
@@ -61,6 +63,7 @@ Rcpp::String rtsk_table_collection_get_file_uuid(SEXP tc);
6163
bool rtsk_table_collection_has_index(SEXP tc, int options = 0);
6264
void rtsk_table_collection_build_index(SEXP tc, int options = 0);
6365
void rtsk_table_collection_drop_index(SEXP tc, int options = 0);
66+
void rtsk_table_collection_sort(SEXP tc, int edge_start = 0, int options = 0);
6467
Rcpp::List rtsk_table_collection_summary(SEXP tc);
6568
Rcpp::List rtsk_table_collection_metadata_length(SEXP tc);
6669
int rtsk_individual_table_add_row(
@@ -71,6 +74,7 @@ int rtsk_individual_table_add_row(
7174
int rtsk_node_table_add_row(
7275
SEXP tc, int flags = 0, double time = 0, int population = -1,
7376
int individual = -1, Rcpp::Nullable<Rcpp::RawVector> metadata = R_NilValue);
77+
Rcpp::List rtsk_node_table_get_row(SEXP tc, int row_id);
7478
int rtsk_edge_table_add_row(
7579
SEXP tc, double left, double right, int parent, int child,
7680
Rcpp::Nullable<Rcpp::RawVector> metadata = R_NilValue);

RcppTskit/man/TableCollection.Rd

Lines changed: 91 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

RcppTskit/man/TreeSequence.Rd

Lines changed: 37 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)