Skip to content

Commit 196affc

Browse files
authored
Fix abort on inverted time-travel range (#890)
Time-travel queries are supposed to return an empty result set when the time range is empty. This extends to inverted ranges as well, i.e. an empty result set is always returned when the start time is after the end time. However, the underlying TileDB query actually aborts when an inverted range is given. This was usually missed by CI because the test was so small that the start and end wall clock times used were the same, resulting in the expected behaviour. This commit fixes the issue by short circuiting and skipping the TileDB query altogether when the range is inverted.
1 parent ca5a8b9 commit 196affc

3 files changed

Lines changed: 55 additions & 8 deletions

File tree

libtiledbvcf/src/dataset/tiledbvcfdataset.cc

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include <cstdlib>
3030
#include <future>
3131
#include <map>
32+
#include <optional>
3233
#include <string>
3334
#include <vector>
3435

@@ -571,12 +572,12 @@ void TileDBVCFDataset::open(
571572
// 'vcf.end_timestamp' in tiledb_config. If either timestamp is provided,
572573
// reopen the arrays.
573574
bool reopen = false;
575+
std::optional<uint64_t> start_timestamp;
576+
std::optional<uint64_t> end_timestamp;
574577

575578
try {
576-
auto start_timestamp = std::stoull(cfg_.get("vcf.start_timestamp"));
577-
data_array_->set_open_timestamp_start(start_timestamp);
578-
vcf_header_array_->set_open_timestamp_start(start_timestamp);
579-
LOG_INFO("Using vcf.start_timestamp from config: {}", start_timestamp);
579+
start_timestamp = std::stoull(cfg_.get("vcf.start_timestamp"));
580+
LOG_INFO("Using vcf.start_timestamp from config: {}", *start_timestamp);
580581
reopen = true;
581582
} catch (const tiledb::TileDBError& ex) {
582583
LOG_TRACE("'vcf.start_timestamp' not specified in config, using default");
@@ -587,10 +588,8 @@ void TileDBVCFDataset::open(
587588
}
588589

589590
try {
590-
auto end_timestamp = std::stoull(cfg_.get("vcf.end_timestamp"));
591-
data_array_->set_open_timestamp_end(end_timestamp);
592-
vcf_header_array_->set_open_timestamp_end(end_timestamp);
593-
LOG_INFO("Using vcf.end_timestamp from config: {}", end_timestamp);
591+
end_timestamp = std::stoull(cfg_.get("vcf.end_timestamp"));
592+
LOG_INFO("Using vcf.end_timestamp from config: {}", *end_timestamp);
594593
reopen = true;
595594
} catch (const tiledb::TileDBError& ex) {
596595
LOG_TRACE("'vcf.end_timestamp' not specified in config, using default");
@@ -600,6 +599,29 @@ void TileDBVCFDataset::open(
600599
cfg_.get("vcf.end_timestamp"));
601600
}
602601

602+
// If the user supplied an inverted range (start > end), short-circuit:
603+
// mark the dataset as having an empty time range and skip the reopen.
604+
// Calling reopen() with start > end would surface as an uncaught
605+
// "Range is empty" exception in some TileDB versions and abort the process.
606+
if (start_timestamp && end_timestamp && *start_timestamp > *end_timestamp) {
607+
LOG_INFO(
608+
"vcf.start_timestamp ({}) > vcf.end_timestamp ({}); treating as "
609+
"empty time range",
610+
*start_timestamp,
611+
*end_timestamp);
612+
empty_time_range_ = true;
613+
reopen = false;
614+
} else {
615+
if (start_timestamp) {
616+
data_array_->set_open_timestamp_start(*start_timestamp);
617+
vcf_header_array_->set_open_timestamp_start(*start_timestamp);
618+
}
619+
if (end_timestamp) {
620+
data_array_->set_open_timestamp_end(*end_timestamp);
621+
vcf_header_array_->set_open_timestamp_end(*end_timestamp);
622+
}
623+
}
624+
603625
if (reopen) {
604626
data_array_->reopen();
605627
vcf_header_array_->reopen();

libtiledbvcf/src/dataset/tiledbvcfdataset.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -812,6 +812,16 @@ class TileDBVCFDataset {
812812
/** Returns true if the dataset is tiledb cloud URI. */
813813
bool tiledb_cloud_dataset() const;
814814

815+
/**
816+
* Returns true if the user requested an inverted time-travel range
817+
* (vcf.start_timestamp > vcf.end_timestamp). In that case the arrays were
818+
* never reopened with the inverted range, and callers should treat reads
819+
* as producing zero results.
820+
*/
821+
bool empty_time_range() const {
822+
return empty_time_range_;
823+
}
824+
815825
/**
816826
* Gets the datatype of a particular exportable attribute that is not fmt or
817827
* info
@@ -881,6 +891,11 @@ class TileDBVCFDataset {
881891
/** Set to true when the dataset is opened. */
882892
bool open_;
883893

894+
/** Set to true when open() was called with an inverted time-travel range
895+
* (start_timestamp > end_timestamp). When true, reads should short-circuit
896+
* to zero results without consulting the underlying arrays. */
897+
bool empty_time_range_ = false;
898+
884899
/** The dataset's general metadata (does not contain sample header data). */
885900
Metadata metadata_;
886901

libtiledbvcf/src/read/reader.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,12 @@ void Reader::read() {
365365
throw std::runtime_error(
366366
"Error exporting records; reader has not been initialized.");
367367

368+
if (dataset_->empty_time_range()) {
369+
read_state_.status = ReadStatus::COMPLETED;
370+
buffers_a.reset(nullptr);
371+
return;
372+
}
373+
368374
bool pending_work = true;
369375
switch (read_state_.status) {
370376
case ReadStatus::COMPLETED:
@@ -2587,6 +2593,10 @@ void Reader::info_attribute_count(int32_t* count) {
25872593
void Reader::sample_count(int32_t* count) {
25882594
if (count == nullptr)
25892595
throw std::runtime_error("Error getting sample count");
2596+
if (dataset_->empty_time_range()) {
2597+
*count = 0;
2598+
return;
2599+
}
25902600
*count = dataset_->sample_names().size();
25912601
}
25922602

0 commit comments

Comments
 (0)