Skip to content

Commit ac19636

Browse files
authored
Merge pull request #565 from petrelharp/tskit_v1b2
update to tskit 1.0.0
2 parents 727770c + 2f6ce23 commit ac19636

9 files changed

Lines changed: 1604 additions & 854 deletions

File tree

core/species.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7468,7 +7468,7 @@ void Species::WriteTreeSequence(std::string &p_recording_tree_path, bool p_simpl
74687468
// Add in the mutation.parent information; valid tree sequences need parents, but we don't keep them while running
74697469
ret = tsk_table_collection_build_index(&output_tables, 0);
74707470
if (ret < 0) handle_error("tsk_table_collection_build_index", ret);
7471-
ret = tsk_table_collection_compute_mutation_parents(&output_tables, 0);
7471+
ret = tsk_table_collection_compute_mutation_parents(&output_tables, TSK_NO_CHECK_INTEGRITY);
74727472
if (ret < 0) handle_error("tsk_table_collection_compute_mutation_parents", ret);
74737473

74747474
{
@@ -7855,7 +7855,7 @@ void Species::CrosscheckTreeSeqIntegrity(void)
78557855
ret = tsk_table_collection_build_index(tables_copy, 0);
78567856
if (ret < 0) handle_error("tsk_table_collection_build_index", ret);
78577857

7858-
ret = tsk_table_collection_compute_mutation_parents(tables_copy, 0);
7858+
ret = tsk_table_collection_compute_mutation_parents(tables_copy, TSK_NO_CHECK_INTEGRITY);
78597859
if (ret < 0) handle_error("tsk_table_collection_compute_mutation_parents", ret);
78607860

78617861
}

treerec/_README

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ The code in `tskit/kastore` is from [kastore/c](https://github.com/tskit-dev/kas
88

99
**Changelog:**
1010

11+
- *30 November 2025*: updated to tskit 1.0.0
12+
- *24 September 2025*: updated to tskit C_1.2.0 (=1.0.0b2) (still kastore C_2.1.1)
1113
- *15 April 2025*: updated to tskit C_1.1.4 (=0.6.2) (still kastore C_2.1.1)
1214
- *7 November 2023*: updated to tskit C_1.1.2 (=0.5.5) (still kastore C_2.1.1)
1315
- *2 June 2022*: updated to tskit C_1.0.0 and kastore C_2.1.1

treerec/tskit/core.c

Lines changed: 87 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,8 @@ tsk_strerror_internal(int err)
168168
break;
169169
case TSK_ERR_FILE_VERSION_TOO_OLD:
170170
ret = "tskit file version too old. Please upgrade using the "
171-
"'tskit upgrade' command. (TSK_ERR_FILE_VERSION_TOO_OLD)";
171+
"'tskit upgrade' command from tskit version<0.6.2. "
172+
"(TSK_ERR_FILE_VERSION_TOO_OLD)";
172173
break;
173174
case TSK_ERR_FILE_VERSION_TOO_NEW:
174175
ret = "tskit file version is too new for this instance. "
@@ -346,6 +347,12 @@ tsk_strerror_internal(int err)
346347
"(TSK_ERR_DISALLOWED_UNKNOWN_MUTATION_TIME)";
347348
break;
348349

350+
case TSK_ERR_BAD_MUTATION_PARENT:
351+
ret = "A mutation's parent is not consistent with the topology of the tree. "
352+
"Use compute_mutation_parents to set the parents correctly."
353+
"(TSK_ERR_BAD_MUTATION_PARENT)";
354+
break;
355+
349356
/* Migration errors */
350357
case TSK_ERR_UNSORTED_MIGRATIONS:
351358
ret = "Migrations must be sorted by time. (TSK_ERR_UNSORTED_MIGRATIONS)";
@@ -519,9 +526,13 @@ tsk_strerror_internal(int err)
519526
"(TSK_ERR_BAD_SAMPLE_PAIR_TIMES)";
520527
break;
521528
case TSK_ERR_BAD_TIME_WINDOWS:
522-
ret = "Time windows must be strictly increasing and end at infinity. "
529+
ret = "Time windows must start at zero and be strictly increasing. "
523530
"(TSK_ERR_BAD_TIME_WINDOWS)";
524531
break;
532+
case TSK_ERR_BAD_TIME_WINDOWS_END:
533+
ret = "Time windows must end at infinity for this method. "
534+
"(TSK_ERR_BAD_TIME_WINDOWS_END)";
535+
break;
525536
case TSK_ERR_BAD_NODE_TIME_WINDOW:
526537
ret = "Node time does not fall within assigned time window. "
527538
"(TSK_ERR_BAD_NODE_TIME_WINDOW)";
@@ -573,6 +584,14 @@ tsk_strerror_internal(int err)
573584
ret = "Must have at least one allele when specifying an allele map. "
574585
"(TSK_ERR_ZERO_ALLELES)";
575586
break;
587+
case TSK_ERR_BAD_ALLELE_LENGTH:
588+
ret = "Alleles used when decoding alignments must have length one. "
589+
"(TSK_ERR_BAD_ALLELE_LENGTH)";
590+
break;
591+
case TSK_ERR_MISSING_CHAR_COLLISION:
592+
ret = "Alleles used when decoding alignments must not match the missing "
593+
"data character. (TSK_ERR_MISSING_CHAR_COLLISION)";
594+
break;
576595

577596
/* Distance metric errors */
578597
case TSK_ERR_SAMPLE_SIZE_MISMATCH:
@@ -1249,16 +1268,16 @@ tsk_avl_tree_int_ordered_nodes(const tsk_avl_tree_int_t *self, tsk_avl_node_int_
12491268
}
12501269

12511270
// Bit Array implementation. Allows us to store unsigned integers in a compact manner.
1252-
// Currently implemented as an array of 32-bit unsigned integers for ease of counting.
1271+
// Currently implemented as an array of 32-bit unsigned integers.
12531272

12541273
int
1255-
tsk_bit_array_init(tsk_bit_array_t *self, tsk_size_t num_bits, tsk_size_t length)
1274+
tsk_bitset_init(tsk_bitset_t *self, tsk_size_t num_bits, tsk_size_t length)
12561275
{
12571276
int ret = 0;
12581277

1259-
self->size = (num_bits >> TSK_BIT_ARRAY_CHUNK)
1260-
+ (num_bits % TSK_BIT_ARRAY_NUM_BITS ? 1 : 0);
1261-
self->data = tsk_calloc(self->size * length, sizeof(*self->data));
1278+
self->row_len = (num_bits / TSK_BITSET_BITS) + (num_bits % TSK_BITSET_BITS ? 1 : 0);
1279+
self->len = length;
1280+
self->data = tsk_calloc(self->row_len * length, sizeof(*self->data));
12621281
if (self->data == NULL) {
12631282
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
12641283
goto out;
@@ -1267,96 +1286,111 @@ tsk_bit_array_init(tsk_bit_array_t *self, tsk_size_t num_bits, tsk_size_t length
12671286
return ret;
12681287
}
12691288

1270-
void
1271-
tsk_bit_array_get_row(const tsk_bit_array_t *self, tsk_size_t row, tsk_bit_array_t *out)
1272-
{
1273-
out->size = self->size;
1274-
out->data = self->data + (row * self->size);
1275-
}
1289+
#define BITSET_DATA_ROW(bs, row) ((bs)->data + (row) * (bs)->row_len)
12761290

12771291
void
1278-
tsk_bit_array_intersect(
1279-
const tsk_bit_array_t *self, const tsk_bit_array_t *other, tsk_bit_array_t *out)
1292+
tsk_bitset_intersect(const tsk_bitset_t *self, tsk_size_t self_row,
1293+
const tsk_bitset_t *other, tsk_size_t other_row, tsk_bitset_t *out)
12801294
{
1281-
for (tsk_size_t i = 0; i < self->size; i++) {
1282-
out->data[i] = self->data[i] & other->data[i];
1295+
const tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, self_row);
1296+
const tsk_bitset_val_t *restrict other_d = BITSET_DATA_ROW(other, other_row);
1297+
tsk_bitset_val_t *restrict out_d = out->data;
1298+
for (tsk_size_t i = 0; i < self->row_len; i++) {
1299+
out_d[i] = self_d[i] & other_d[i];
12831300
}
12841301
}
12851302

12861303
void
1287-
tsk_bit_array_subtract(tsk_bit_array_t *self, const tsk_bit_array_t *other)
1304+
tsk_bitset_subtract(tsk_bitset_t *self, tsk_size_t self_row, const tsk_bitset_t *other,
1305+
tsk_size_t other_row)
12881306
{
1289-
for (tsk_size_t i = 0; i < self->size; i++) {
1290-
self->data[i] &= ~(other->data[i]);
1307+
tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, self_row);
1308+
const tsk_bitset_val_t *restrict other_d = BITSET_DATA_ROW(other, other_row);
1309+
for (tsk_size_t i = 0; i < self->row_len; i++) {
1310+
self_d[i] &= ~(other_d[i]);
12911311
}
12921312
}
12931313

12941314
void
1295-
tsk_bit_array_add(tsk_bit_array_t *self, const tsk_bit_array_t *other)
1315+
tsk_bitset_union(tsk_bitset_t *self, tsk_size_t self_row, const tsk_bitset_t *other,
1316+
tsk_size_t other_row)
12961317
{
1297-
for (tsk_size_t i = 0; i < self->size; i++) {
1298-
self->data[i] |= other->data[i];
1318+
tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, self_row);
1319+
const tsk_bitset_val_t *restrict other_d = BITSET_DATA_ROW(other, other_row);
1320+
for (tsk_size_t i = 0; i < self->row_len; i++) {
1321+
self_d[i] |= other_d[i];
12991322
}
13001323
}
13011324

13021325
void
1303-
tsk_bit_array_add_bit(tsk_bit_array_t *self, const tsk_bit_array_value_t bit)
1326+
tsk_bitset_set_bit(tsk_bitset_t *self, tsk_size_t row, const tsk_bitset_val_t bit)
13041327
{
1305-
tsk_bit_array_value_t i = bit >> TSK_BIT_ARRAY_CHUNK;
1306-
self->data[i] |= (tsk_bit_array_value_t) 1 << (bit - (TSK_BIT_ARRAY_NUM_BITS * i));
1328+
tsk_bitset_val_t i = (bit / TSK_BITSET_BITS);
1329+
*(BITSET_DATA_ROW(self, row) + i) |= (tsk_bitset_val_t) 1
1330+
<< (bit - (TSK_BITSET_BITS * i));
13071331
}
13081332

13091333
bool
1310-
tsk_bit_array_contains(const tsk_bit_array_t *self, const tsk_bit_array_value_t bit)
1334+
tsk_bitset_contains(const tsk_bitset_t *self, tsk_size_t row, const tsk_bitset_val_t bit)
13111335
{
1312-
tsk_bit_array_value_t i = bit >> TSK_BIT_ARRAY_CHUNK;
1313-
return self->data[i]
1314-
& ((tsk_bit_array_value_t) 1 << (bit - (TSK_BIT_ARRAY_NUM_BITS * i)));
1336+
tsk_bitset_val_t i = (bit / TSK_BITSET_BITS);
1337+
return *(BITSET_DATA_ROW(self, row) + i)
1338+
& ((tsk_bitset_val_t) 1 << (bit - (TSK_BITSET_BITS * i)));
13151339
}
13161340

1317-
tsk_size_t
1318-
tsk_bit_array_count(const tsk_bit_array_t *self)
1341+
static inline uint32_t
1342+
popcount(tsk_bitset_val_t v)
13191343
{
1320-
// Utilizes 12 operations per bit array. NB this only works on 32 bit integers.
1344+
// Utilizes 12 operations per chunk. NB this only works on 32 bit integers.
13211345
// Taken from:
13221346
// https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
13231347
// There's a nice breakdown of this algorithm here:
13241348
// https://stackoverflow.com/a/109025
1325-
// Could probably do better with explicit SIMD (instead of SWAR), but not as
1326-
// portable: https://arxiv.org/pdf/1611.07612.pdf
13271349
//
1328-
// There is one solution to explore further, which uses __builtin_popcountll.
1329-
// This option is relatively simple, but requires a 64 bit bit array and also
1330-
// involves some compiler flag plumbing (-mpopcnt)
1350+
// The gcc/clang compiler flag will -mpopcnt will convert this code to a
1351+
// popcnt instruction (most if not all modern CPUs will support this). The
1352+
// popcnt instruction will yield some speed improvements, which depend on
1353+
// the tree sequence.
1354+
//
1355+
// NB: 32bit counting is typically faster than 64bit counting for this task.
1356+
// (at least on x86-64)
13311357

1332-
tsk_bit_array_value_t tmp;
1333-
tsk_size_t i, count = 0;
1358+
v = v - ((v >> 1) & 0x55555555);
1359+
v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
1360+
return (((v + (v >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
1361+
}
1362+
1363+
tsk_size_t
1364+
tsk_bitset_count(const tsk_bitset_t *self, tsk_size_t row)
1365+
{
1366+
tsk_size_t i = 0;
1367+
tsk_size_t count = 0;
1368+
const tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, row);
13341369

1335-
for (i = 0; i < self->size; i++) {
1336-
tmp = self->data[i] - ((self->data[i] >> 1) & 0x55555555);
1337-
tmp = (tmp & 0x33333333) + ((tmp >> 2) & 0x33333333);
1338-
count += (((tmp + (tmp >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
1370+
for (i = 0; i < self->row_len; i++) {
1371+
count += popcount(self_d[i]);
13391372
}
13401373
return count;
13411374
}
13421375

13431376
void
1344-
tsk_bit_array_get_items(
1345-
const tsk_bit_array_t *self, tsk_id_t *items, tsk_size_t *n_items)
1377+
tsk_bitset_get_items(
1378+
const tsk_bitset_t *self, tsk_size_t row, tsk_id_t *items, tsk_size_t *n_items)
13461379
{
13471380
// Get the items stored in the row of a bitset.
1348-
// Uses a de Bruijn sequence lookup table to determine the lowest bit set. See the
1349-
// wikipedia article for more info: https://w.wiki/BYiF
1381+
// Uses a de Bruijn sequence lookup table to determine the lowest bit set.
1382+
// See the wikipedia article for more info: https://w.wiki/BYiF
13501383

13511384
tsk_size_t i, n, off;
1352-
tsk_bit_array_value_t v, lsb; // least significant bit
1385+
tsk_bitset_val_t v, lsb; // least significant bit
13531386
static const tsk_id_t lookup[32] = { 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25,
13541387
17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 };
1388+
const tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, row);
13551389

13561390
n = 0;
1357-
for (i = 0; i < self->size; i++) {
1358-
v = self->data[i];
1359-
off = i * ((tsk_size_t) TSK_BIT_ARRAY_NUM_BITS);
1391+
for (i = 0; i < self->row_len; i++) {
1392+
v = self_d[i];
1393+
off = i * TSK_BITSET_BITS;
13601394
if (v == 0) {
13611395
continue;
13621396
}
@@ -1370,7 +1404,7 @@ tsk_bit_array_get_items(
13701404
}
13711405

13721406
void
1373-
tsk_bit_array_free(tsk_bit_array_t *self)
1407+
tsk_bitset_free(tsk_bitset_t *self)
13741408
{
13751409
tsk_safe_free(self->data);
13761410
}

treerec/tskit/core.h

Lines changed: 47 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -147,12 +147,12 @@ sizes and types of externally visible structs.
147147
The library minor version. Incremented when non-breaking backward-compatible changes
148148
to the API or ABI are introduced, i.e., the addition of a new function.
149149
*/
150-
#define TSK_VERSION_MINOR 1
150+
#define TSK_VERSION_MINOR 3
151151
/**
152152
The library patch version. Incremented when any changes not relevant to the
153153
to the API or ABI are introduced, i.e., internal refactors of bugfixes.
154154
*/
155-
#define TSK_VERSION_PATCH 4
155+
#define TSK_VERSION_PATCH 0
156156
/** @} */
157157

158158
/*
@@ -283,7 +283,7 @@ A file could not be read because it is in the wrong format
283283
/**
284284
The file is in tskit format, but the version is too old for the
285285
library to read. The file should be upgraded to the latest version
286-
using the ``tskit upgrade`` command line utility.
286+
using the ``tskit upgrade`` command line utility from tskit version<0.6.2.
287287
*/
288288
#define TSK_ERR_FILE_VERSION_TOO_OLD -101
289289
/**
@@ -510,6 +510,12 @@ Some mutations have TSK_UNKNOWN_TIME in an algorithm where that's
510510
disallowed (use compute_mutation_times?).
511511
*/
512512
#define TSK_ERR_DISALLOWED_UNKNOWN_MUTATION_TIME -510
513+
514+
/**
515+
A mutation's parent was not consistent with the topology of the tree.
516+
*/
517+
#define TSK_ERR_BAD_MUTATION_PARENT -511
518+
513519
/** @} */
514520

515521
/**
@@ -740,13 +746,17 @@ Sample times do not all equal the start of first time window
740746
*/
741747
#define TSK_ERR_BAD_SAMPLE_PAIR_TIMES -923
742748
/**
743-
Time windows are not strictly increasing ending at infinity
749+
Time windows are not strictly increasing
744750
*/
745751
#define TSK_ERR_BAD_TIME_WINDOWS -924
746752
/**
753+
Time windows do not end at infinity
754+
*/
755+
#define TSK_ERR_BAD_TIME_WINDOWS_END -925
756+
/**
747757
Node time does not fall within assigned time window
748758
*/
749-
#define TSK_ERR_BAD_NODE_TIME_WINDOW -925
759+
#define TSK_ERR_BAD_NODE_TIME_WINDOW -926
750760
/** @} */
751761

752762
/**
@@ -793,6 +803,14 @@ More than 2147483647 alleles were specified.
793803
A user-specified allele map was used, but it contained zero alleles.
794804
*/
795805
#define TSK_ERR_ZERO_ALLELES -1103
806+
/**
807+
An allele used when decoding alignments had length other than one.
808+
*/
809+
#define TSK_ERR_BAD_ALLELE_LENGTH -1104
810+
/**
811+
An allele used when decoding alignments matched the missing data character.
812+
*/
813+
#define TSK_ERR_MISSING_CHAR_COLLISION -1105
796814
/** @} */
797815

798816
/**
@@ -1094,29 +1112,31 @@ FILE *tsk_get_debug_stream(void);
10941112

10951113
/* Bit Array functionality */
10961114

1097-
typedef uint32_t tsk_bit_array_value_t;
1115+
// define a 32-bit chunk size for our bitsets.
1116+
// this means we'll be able to hold 32 distinct items in each 32 bit uint
1117+
#define TSK_BITSET_BITS ((tsk_size_t) 32)
1118+
typedef uint32_t tsk_bitset_val_t;
1119+
10981120
typedef struct {
1099-
tsk_size_t size; // Number of chunks per row
1100-
tsk_bit_array_value_t *data; // Array data
1101-
} tsk_bit_array_t;
1102-
1103-
#define TSK_BIT_ARRAY_CHUNK 5U
1104-
#define TSK_BIT_ARRAY_NUM_BITS (1U << TSK_BIT_ARRAY_CHUNK)
1105-
1106-
int tsk_bit_array_init(tsk_bit_array_t *self, tsk_size_t num_bits, tsk_size_t length);
1107-
void tsk_bit_array_free(tsk_bit_array_t *self);
1108-
void tsk_bit_array_get_row(
1109-
const tsk_bit_array_t *self, tsk_size_t row, tsk_bit_array_t *out);
1110-
void tsk_bit_array_intersect(
1111-
const tsk_bit_array_t *self, const tsk_bit_array_t *other, tsk_bit_array_t *out);
1112-
void tsk_bit_array_subtract(tsk_bit_array_t *self, const tsk_bit_array_t *other);
1113-
void tsk_bit_array_add(tsk_bit_array_t *self, const tsk_bit_array_t *other);
1114-
void tsk_bit_array_add_bit(tsk_bit_array_t *self, const tsk_bit_array_value_t bit);
1115-
bool tsk_bit_array_contains(
1116-
const tsk_bit_array_t *self, const tsk_bit_array_value_t bit);
1117-
tsk_size_t tsk_bit_array_count(const tsk_bit_array_t *self);
1118-
void tsk_bit_array_get_items(
1119-
const tsk_bit_array_t *self, tsk_id_t *items, tsk_size_t *n_items);
1121+
tsk_size_t row_len; // Number of size TSK_BITSET_BITS chunks per row
1122+
tsk_size_t len; // Number of rows
1123+
tsk_bitset_val_t *data;
1124+
} tsk_bitset_t;
1125+
1126+
int tsk_bitset_init(tsk_bitset_t *self, tsk_size_t num_bits, tsk_size_t length);
1127+
void tsk_bitset_free(tsk_bitset_t *self);
1128+
void tsk_bitset_intersect(const tsk_bitset_t *self, tsk_size_t self_row,
1129+
const tsk_bitset_t *other, tsk_size_t other_row, tsk_bitset_t *out);
1130+
void tsk_bitset_subtract(tsk_bitset_t *self, tsk_size_t self_row,
1131+
const tsk_bitset_t *other, tsk_size_t other_row);
1132+
void tsk_bitset_union(tsk_bitset_t *self, tsk_size_t self_row, const tsk_bitset_t *other,
1133+
tsk_size_t other_row);
1134+
void tsk_bitset_set_bit(tsk_bitset_t *self, tsk_size_t row, const tsk_bitset_val_t bit);
1135+
bool tsk_bitset_contains(
1136+
const tsk_bitset_t *self, tsk_size_t row, const tsk_bitset_val_t bit);
1137+
tsk_size_t tsk_bitset_count(const tsk_bitset_t *self, tsk_size_t row);
1138+
void tsk_bitset_get_items(
1139+
const tsk_bitset_t *self, tsk_size_t row, tsk_id_t *items, tsk_size_t *n_items);
11201140

11211141
#ifdef __cplusplus
11221142
}

0 commit comments

Comments
 (0)