Skip to content

Commit 876eee2

Browse files
committed
Provide access to more ts/tc properties in R
Fixes #68
1 parent eb6e19c commit 876eee2

23 files changed

Lines changed: 1402 additions & 188 deletions

AGENTS.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ These notes apply to this repository root and the `RcppTskit/` package.
1414
* We run R CMD check for every code change.
1515
* We keep local quality gates green before handoff.
1616
* We update `RcppTskit/NEWS.md` for user-visible behavior or API changes.
17+
* We aim for a comparative tskit Python API and tskit R API and
18+
similarly for tskit C API and tskit C++ API
19+
(the later is RcppTskit C++ binding to tskit C API).
1720

1821
## Permission
1922

RcppTskit/DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ Authors@R: c(
1212
Description: 'Tskit' enables efficient storage, manipulation, and analysis
1313
of ancestral recombination graphs (ARGs) using succinct tree sequence
1414
encoding. The tree sequence encoding of an ARG is described in Wong et
15-
al. (2024) <doi:10.1093/genetics/iyae100>, while `tskit` project is
15+
al. (2024) <doi:10.1093/genetics/iyae100>, while 'tskit' project is
1616
described in Jeffrey et al. (2026) <doi:10.48550/arXiv.2602.09649>.
1717
See also <https://tskit.dev> for project news, documentation, and
1818
tutorials. 'Tskit' provides 'Python', 'C', and 'Rust' application

RcppTskit/NEWS.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,27 @@ All notable changes to RcppTskit are documented in this file.
44
The file format is based on [Keep a Changelog](https://keepachangelog.com),
55
and releases adhere to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
66

7+
## [Unreleased]
8+
9+
### Added (new features)
10+
11+
- Added the following scalar getters to match tskit C/Python API
12+
- `TreeSequence$discrete_genome()` to query whether genome coordinates
13+
are discrete integer values.
14+
- `TreeSequence$has_reference_sequence()` to query whether a tree sequence
15+
contains a reference genome sequence.
16+
- `TreeSequence$discrete_time()` to query whether time values are discrete
17+
integer values.
18+
- `TreeSequence$file_uuid()` to query the UUID of the source `.trees` file.
19+
- `TableCollection$has_reference_sequence()` to query whether a table
20+
collection contains a reference genome sequence.
21+
- `TableCollection$file_uuid()` to query the UUID of the source `.trees`
22+
file.
23+
- `TableCollection$sequence_length()` to query the sequence length.
24+
- `TableCollection$time_units()` to query the time units.
25+
- `TableCollection$has_index()` to query whether edge indexes are present.
26+
- TODO
27+
728
## [0.2.0] - 2026-02-22
829

930
### Added (new features)

RcppTskit/R/Class-TableCollection.R

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ TableCollection <- R6Class(
1414
#' @param file a string specifying the full path of the tree sequence file.
1515
#' @param skip_tables logical; if \code{TRUE}, load only non-table information.
1616
#' @param skip_reference_sequence logical; if \code{TRUE}, skip loading
17-
#' reference sequence information.
17+
#' reference genome sequence information.
1818
#' @param pointer an external pointer (\code{externalptr}) to a table collection.
1919
#' @details See the corresponding Python function at
2020
#' \url{https://github.com/tskit-dev/tskit/blob/dc394d72d121c99c6dcad88f7a4873880924dd72/python/tskit/tables.py#L3463}.
@@ -88,12 +88,63 @@ TableCollection <- R6Class(
8888
tree_sequence = function() {
8989
# See https://tskit.dev/tskit/docs/stable/c-api.html#c.TSK_TS_INIT_BUILD_INDEXES
9090
# TSK_TS_INIT_BUILD_INDEXES (1 << 0) is bitwShiftL(1L, 0) or just 1L
91-
# TODO: Should we also use https://tskit.dev/tskit/docs/stable/c-api.html#c.TSK_TS_INIT_COMPUTE_MUTATION_PARENTS?
91+
# TODO: Should we also use TSK_TS_INIT_COMPUTE_MUTATION_PARENTS in TableCollection$tree_sequence()? #65
92+
# https://github.com/HighlanderLab/RcppTskit/issues/65
9293
init_options <- bitwShiftL(1L, 0)
9394
ts_ptr <- tc_ptr_to_ts_ptr(self$pointer, options = init_options)
9495
TreeSequence$new(pointer = ts_ptr)
9596
},
9697

98+
#' @description Get the sequence length.
99+
#' @examples
100+
#' tc_file <- system.file("examples/test.trees", package = "RcppTskit")
101+
#' tc <- tc_load(tc_file)
102+
#' tc$sequence_length()
103+
sequence_length = function() {
104+
tc_ptr_sequence_length(self$pointer)
105+
},
106+
107+
#' @description Get the time units string.
108+
#' @examples
109+
#' tc_file <- system.file("examples/test.trees", package = "RcppTskit")
110+
#' tc <- tc_load(tc_file)
111+
#' tc$time_units()
112+
time_units = function() {
113+
tc_ptr_time_units(self$pointer)
114+
},
115+
116+
#' @description Get whether the table collection has edge indexes.
117+
#' @examples
118+
#' tc_file <- system.file("examples/test.trees", package = "RcppTskit")
119+
#' tc <- tc_load(tc_file)
120+
#' tc$has_index()
121+
has_index = function() {
122+
tc_ptr_has_index(self$pointer)
123+
},
124+
125+
#' @description Get whether the table collection has a reference genome sequence.
126+
#' @examples
127+
#' tc_file1 <- system.file("examples/test.trees", package = "RcppTskit")
128+
#' tc_file2 <- system.file("examples/test_with_ref_seq.trees", package = "RcppTskit")
129+
#' tc1 <- tc_load(tc_file1)
130+
#' tc1$has_reference_sequence()
131+
#' tc2 <- tc_load(tc_file2)
132+
#' tc2$has_reference_sequence()
133+
has_reference_sequence = function() {
134+
tc_ptr_has_reference_sequence(self$pointer)
135+
},
136+
137+
#' @description Get the file UUID string.
138+
#' @details Returns the UUID of the file the table collection was loaded from.
139+
#' If unavailable, returns \code{NA_character_}.
140+
#' @examples
141+
#' tc_file <- system.file("examples/test.trees", package = "RcppTskit")
142+
#' tc <- tc_load(tc_file)
143+
#' tc$file_uuid()
144+
file_uuid = function() {
145+
tc_ptr_file_uuid(self$pointer)
146+
},
147+
97148
#' @description This function saves a table collection from R to disk and
98149
#' loads it into reticulate Python for use with the \code{tskit} Python API.
99150
#' @param tskit_module reticulate Python module of \code{tskit}. By default,

RcppTskit/R/Class-TreeSequence.R

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ TreeSequence <- R6Class(
1515
#' @param file a string specifying the full path of the tree sequence file.
1616
#' @param skip_tables logical; if \code{TRUE}, load only non-table information.
1717
#' @param skip_reference_sequence logical; if \code{TRUE}, skip loading
18-
#' reference sequence information.
18+
#' reference genome sequence information.
1919
#' @param pointer an external pointer (\code{externalptr}) to a tree sequence.
2020
#' @details See the corresponding Python function at
2121
#' \url{https://tskit.dev/tskit/docs/latest/python-api.html#tskit.load}.
@@ -252,6 +252,32 @@ TreeSequence <- R6Class(
252252
ts_ptr_sequence_length(self$pointer)
253253
},
254254

255+
#' @description Get the discrete genome status.
256+
#' @details Returns \code{TRUE} if all genomic coordinates in the tree
257+
#' sequence are discrete integer values.
258+
#' @examples
259+
#' ts_file1 <- system.file("examples/test.trees", package = "RcppTskit")
260+
#' ts_file2 <- system.file("examples/test_non_discrete_genome.trees", package = "RcppTskit")
261+
#' ts1 <- ts_load(ts_file1)
262+
#' ts1$discrete_genome()
263+
#' ts2 <- ts_load(ts_file2)
264+
#' ts2$discrete_genome()
265+
discrete_genome = function() {
266+
ts_ptr_discrete_genome(self$pointer)
267+
},
268+
269+
#' @description Get whether the tree sequence has a reference genome sequence.
270+
#' @examples
271+
#' ts_file1 <- system.file("examples/test.trees", package = "RcppTskit")
272+
#' ts_file2 <- system.file("examples/test_with_ref_seq.trees", package = "RcppTskit")
273+
#' ts1 <- ts_load(ts_file1)
274+
#' ts1$has_reference_sequence()
275+
#' ts2 <- ts_load(ts_file2)
276+
#' ts2$has_reference_sequence()
277+
has_reference_sequence = function() {
278+
ts_ptr_has_reference_sequence(self$pointer)
279+
},
280+
255281
#' @description Get the time units string.
256282
#' @examples
257283
#' ts_file <- system.file("examples/test.trees", package = "RcppTskit")
@@ -261,6 +287,20 @@ TreeSequence <- R6Class(
261287
ts_ptr_time_units(self$pointer)
262288
},
263289

290+
#' @description Get the discrete time status.
291+
#' @details Returns \code{TRUE} if all time values in the tree sequence are
292+
#' discrete integer values.
293+
#' @examples
294+
#' ts_file1 <- system.file("examples/test.trees", package = "RcppTskit")
295+
#' ts_file2 <- system.file("examples/test_discrete_time.trees", package = "RcppTskit")
296+
#' ts1 <- ts_load(ts_file1)
297+
#' ts1$discrete_time()
298+
#' ts2 <- ts_load(ts_file2)
299+
#' ts2$discrete_time()
300+
discrete_time = function() {
301+
ts_ptr_discrete_time(self$pointer)
302+
},
303+
264304
#' @description Get the min time in node table and mutation table.
265305
#' @examples
266306
#' ts_file <- system.file("examples/test.trees", package = "RcppTskit")
@@ -287,6 +327,17 @@ TreeSequence <- R6Class(
287327
#' ts$metadata_length()
288328
metadata_length = function() {
289329
ts_ptr_metadata_length(self$pointer)
330+
},
331+
332+
#' @description Get the file UUID string.
333+
#' @details Returns the UUID of the file the tree sequence was loaded from.
334+
#' If unavailable, returns \code{NA_character_}.
335+
#' @examples
336+
#' ts_file <- system.file("examples/test.trees", package = "RcppTskit")
337+
#' ts <- ts_load(ts_file)
338+
#' ts$file_uuid()
339+
file_uuid = function() {
340+
ts_ptr_file_uuid(self$pointer)
290341
}
291342
)
292343
)

RcppTskit/R/RcppExports.R

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,10 +91,22 @@ ts_ptr_sequence_length <- function(ts) {
9191
.Call(`_RcppTskit_ts_ptr_sequence_length`, ts)
9292
}
9393

94+
ts_ptr_discrete_genome <- function(ts) {
95+
.Call(`_RcppTskit_ts_ptr_discrete_genome`, ts)
96+
}
97+
98+
ts_ptr_has_reference_sequence <- function(ts) {
99+
.Call(`_RcppTskit_ts_ptr_has_reference_sequence`, ts)
100+
}
101+
94102
ts_ptr_time_units <- function(ts) {
95103
.Call(`_RcppTskit_ts_ptr_time_units`, ts)
96104
}
97105

106+
ts_ptr_discrete_time <- function(ts) {
107+
.Call(`_RcppTskit_ts_ptr_discrete_time`, ts)
108+
}
109+
98110
ts_ptr_min_time <- function(ts) {
99111
.Call(`_RcppTskit_ts_ptr_min_time`, ts)
100112
}
@@ -103,18 +115,42 @@ ts_ptr_max_time <- function(ts) {
103115
.Call(`_RcppTskit_ts_ptr_max_time`, ts)
104116
}
105117

106-
ts_ptr_summary <- function(ts) {
107-
.Call(`_RcppTskit_ts_ptr_summary`, ts)
118+
ts_ptr_file_uuid <- function(ts) {
119+
.Call(`_RcppTskit_ts_ptr_file_uuid`, ts)
108120
}
109121

110-
tc_ptr_summary <- function(tc) {
111-
.Call(`_RcppTskit_tc_ptr_summary`, tc)
122+
ts_ptr_summary <- function(ts) {
123+
.Call(`_RcppTskit_ts_ptr_summary`, ts)
112124
}
113125

114126
ts_ptr_metadata_length <- function(ts) {
115127
.Call(`_RcppTskit_ts_ptr_metadata_length`, ts)
116128
}
117129

130+
tc_ptr_sequence_length <- function(tc) {
131+
.Call(`_RcppTskit_tc_ptr_sequence_length`, tc)
132+
}
133+
134+
tc_ptr_has_reference_sequence <- function(tc) {
135+
.Call(`_RcppTskit_tc_ptr_has_reference_sequence`, tc)
136+
}
137+
138+
tc_ptr_time_units <- function(tc) {
139+
.Call(`_RcppTskit_tc_ptr_time_units`, tc)
140+
}
141+
142+
tc_ptr_file_uuid <- function(tc) {
143+
.Call(`_RcppTskit_tc_ptr_file_uuid`, tc)
144+
}
145+
146+
tc_ptr_has_index <- function(tc) {
147+
.Call(`_RcppTskit_tc_ptr_has_index`, tc)
148+
}
149+
150+
tc_ptr_summary <- function(tc) {
151+
.Call(`_RcppTskit_tc_ptr_summary`, tc)
152+
}
153+
118154
tc_ptr_metadata_length <- function(tc) {
119155
.Call(`_RcppTskit_tc_ptr_metadata_length`, tc)
120156
}

0 commit comments

Comments
 (0)