Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
202 changes: 202 additions & 0 deletions libtiledbvcf/src/utils/shared_ptr_pool.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
/**
* @section LICENSE
*
* The MIT License
*
* @copyright Copyright (c) 2026 TileDB, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/

#ifndef TILEDB_VCF_SHARED_PTR_POOL_H
#define TILEDB_VCF_SHARED_PTR_POOL_H

#include <memory>
#include <queue>

#include "utils/logger_public.h"

namespace tiledb {
namespace vcf {

/**
* Manages a pool of `std::shared_ptr<T>` instances, enabling instances of `T`
* to be reused. The pool can be managed either manually or automatically. In
* MANUAL mode, shared pointers must be returned using `return_ptr_to_pool()` to
* prevent them from being automatically destroyed when the pointer's use count
* reaches 0. In AUTOMATIC mode, `SharedPtrPool` keeps a copy of each
* `std::shared_ptr<T>` and releases it for reuse when its use count reaches 1.
*
* Note that AUTOMATIC mode uses a lazy algorithm for releasing pointers, so
* it is only appropriate when pointers can be released in roughly the same
* order that they were added to the pool's queue.
*
* The class can be used as follows:
* ```
* class SharedPtrUser : public SharedPtrPool<RecordHeapV4> {
* std::shared_ptr<RecordHeapV4::Node> automatic_create_or_reuse() {
* return get_ptr_from_pool();
* }
* std::shared_ptr<RecordHeapV4::Node> manual_create_or_reuse() {
* if (ptr_pool_empty()) {
* return create_pool_ptr(new RecordHeapV4::Node());
* } else {
* return reuse_pool_ptr();
* }
* }
* }
* ```
*/
template <typename T>
class SharedPtrPool {
public:
enum SharingMode {
MANUAL,
AUTOMATIC,
};

/**
* Returns the string equivalent of the given `SharingMode`.
*
* @param mode The mode to convert to a string
*/
inline const std::string modeToString(SharingMode mode) {
switch (mode) {
case MANUAL:
return "MANUAL";
case AUTOMATIC:
return "AUTOMATIC";
default:
LOG_ERROR("{} is not a valid SharingMode", mode);
}
// NO-OP: The default case will exit the program
return "";
}

/**
* Constructor that determines the management mode to be used.
*
* @param mode The mode to use for managing SafeSharedBCFRec pointers
*/
SharedPtrPool(SharingMode mode = SharingMode::AUTOMATIC)
: mode_(mode) {
}

protected:
/**
* Return pointer to pool for reuse.
*
* @param ptr The pointer to return to the pool.
*/
void return_ptr_to_pool(std::shared_ptr<T>& ptr) {
if (mode_ == SharingMode::MANUAL) {
shared_ptr_pool_.emplace(std::move(ptr));
} else {
LOG_ERROR(
"VCFV4::return_record records cannot be manually returned for mode "
"{}",
modeToString(mode_));
}
}

/**
* Creates a new record and adds it to the `shared_ptr_pool_`.
*
* @tparam Args The types of arguments that will be passed to the constructor
* @param args The arguments to pass to the constructor
* @return The pointer created
*/
template <typename... Args>
std::shared_ptr<T> create_pool_ptr(Args... args) {
std::shared_ptr<T> ptr(args...);
if (mode_ == SharingMode::AUTOMATIC) {
shared_ptr_pool_.push(ptr);
}
return ptr;
}

/**
* Gets the front pointer from the pool returns it. This method provides no
* safety checks and should only be used after using `ptr_pool_empty()` to
* determine if there's a pointer ready to be reused.
*
* @return The pointer being reused
*/
std::shared_ptr<T> reuse_pool_ptr() {
std::shared_ptr<T> ptr = shared_ptr_pool_.front();
shared_ptr_pool_.pop();
if (mode_ == SharingMode::AUTOMATIC) {
shared_ptr_pool_.push(ptr);
}
return ptr;
}

/**
* Checks if the pool is empty, i.e. there isn't a pointer ready to be reused.
*
* @return True if the pool is empty
*/
bool ptr_pool_empty() {
if (shared_ptr_pool_.empty()) {
return true;
} else if (!shared_ptr_pool_.empty() && mode_ == SharingMode::AUTOMATIC) {
std::shared_ptr<T>& ptr = shared_ptr_pool_.front();
if (ptr.use_count() > 1) {
return true;
}
}
return false;
}

/**
* Checks if there's a pointer ready to be reused and, if yes, returns it.
* Otherwise, a new pointer is created.
*
* @return The `std::shared_ptr<T>` that was created or reused
*/
std::shared_ptr<T> get_ptr_from_pool() {
if (ptr_pool_empty()) {
return create_pool_ptr(new T());
}
return reuse_pool_ptr();
}

/** Clears the pointers from the pool. */
void clear_ptr_pool() {
std::queue<std::shared_ptr<T>>().swap(shared_ptr_pool_);
}

/** Swap all fields with the given `SharedPtrPool` instance. */
void swap(SharedPtrPool<T>& other) {
std::swap(mode_, other.mode_);
std::swap(shared_ptr_pool_, other.shared_ptr_pool_);
}

private:
/** The mode used to manage the SafeSharedBCFRec pointers. */
SharingMode mode_;

/** Stale pointers available for reuse. */
std::queue<std::shared_ptr<T>> shared_ptr_pool_;
};

} // namespace vcf
} // namespace tiledb

#endif // TILEDB_VCF_SHARED_PTR_POOL_H
26 changes: 12 additions & 14 deletions libtiledbvcf/src/vcf/vcf_v4.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@
*/

#include "vcf/vcf_v4.h"
#include "utils/logger_public.h"

namespace tiledb {
namespace vcf {

VCFV4::VCFV4()
: open_(false)
VCFV4::VCFV4(SharingMode mode)
: SharedPtrPool<bcf1_t>(mode)
, open_(false)
, inited_(false)
, max_record_buffer_size_(10000)
, hdr_(nullptr)
Expand Down Expand Up @@ -102,7 +102,7 @@ void VCFV4::open(const std::string& file, const std::string& index_file) {
void VCFV4::close() {
// Clear the record queue and associated allocation pool.
std::queue<SafeSharedBCFRec>().swap(record_queue_);
std::queue<SafeSharedBCFRec>().swap(record_queue_pool_);
clear_ptr_pool();

if (index_hts_ != nullptr) {
hts_idx_destroy(index_hts_);
Expand Down Expand Up @@ -156,7 +156,7 @@ void VCFV4::pop_record() {
}

void VCFV4::return_record(SafeSharedBCFRec& record) {
record_queue_pool_.emplace(std::move(record));
return_ptr_to_pool(record);
}

std::string VCFV4::contig_name(bcf1_t* const r) const {
Expand Down Expand Up @@ -301,17 +301,15 @@ void VCFV4::read_records() {
break;
}

if (!record_queue_pool_.empty()) {
// Pop a stale record for re-use. Note that `bcf_copy`
// destroys (frees) the stale data to prevent a memory
// leak.
SafeSharedBCFRec r = record_queue_pool_.front();
record_queue_pool_.pop();
bcf_copy(r.get(), tmp_r.get());
// Add a new record to the pool or reuse a record from the pool
if (ptr_pool_empty()) {
SafeSharedBCFRec r = create_pool_ptr(bcf_dup(tmp_r.get()), bcf_destroy);
bcf_unpack(r.get(), BCF_UN_ALL);
record_queue_.emplace(std::move(r));
} else {
SafeSharedBCFRec r(bcf_dup(tmp_r.get()), bcf_destroy);
SafeSharedBCFRec r = reuse_pool_ptr();
// Use `bcf_copy` to destroy (free) the stale data to prevent memory leaks
bcf_copy(r.get(), tmp_r.get());
bcf_unpack(r.get(), BCF_UN_ALL);
record_queue_.emplace(std::move(r));
}
Expand All @@ -328,11 +326,11 @@ void VCFV4::read_records() {
}

void VCFV4::swap(VCFV4& other) {
SharedPtrPool<bcf1_t>::swap(other);
std::swap(open_, other.open_);
std::swap(path_, other.path_);
std::swap(index_path_, other.index_path_);
std::swap(record_queue_, other.record_queue_);
std::swap(record_queue_pool_, other.record_queue_pool_);
record_iter_.swap(other.record_iter_);
std::swap(hdr_, other.hdr_);
std::swap(index_tbx_, other.index_tbx_);
Expand Down
24 changes: 19 additions & 5 deletions libtiledbvcf/src/vcf/vcf_v4.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@
#include <unordered_map>
#include <vector>

#include "utils/logger_public.h"
#include "utils/shared_ptr_pool.h"
#include "utils/utils.h"
#include "vcf/htslib_value.h"
#include "vcf/region.h"
Expand All @@ -52,9 +54,24 @@ namespace vcf {
/**
* Class wrapping a BCF/VCF file to allow iteration over records.
*/
class VCFV4 {
class VCFV4 : public SharedPtrPool<bcf1_t> {
public:
VCFV4();
typedef SharedPtrPool::SharingMode SharingMode;

/**
* Constructor that determines how `SafeSharedBCFRec` records are managed. In
* MANUAL mode, records must be returned using `return_record()` to prevent
* them from being automatically destroyed when the `SafeSharedBCFRec` counter
* reaches 0. In AUTOMATIC mode, `VCFV4` keeps a copy of each
* `SafeSharedBCFRec` and releases it for reuse when the counter reachers 1.
*
* Note that AUTOMATIC mode uses a lazy algorithm for releasing records, so
* it is only appropriate when records can be released in roughly the same
* order that they were popped.
*
* @param mode The mode to use for managing SafeSharedBCFRec pointers
*/
VCFV4(SharingMode mode = SharingMode::MANUAL);
~VCFV4();

VCFV4(VCFV4&& other) = delete;
Expand Down Expand Up @@ -181,9 +198,6 @@ class VCFV4 {
/** The buffered records. */
std::queue<SafeSharedBCFRec> record_queue_;

/** Stale records available for re-use in `record_queue_`. */
std::queue<SafeSharedBCFRec> record_queue_pool_;

/** The BCF/TBX record iterator. */
Iter record_iter_;

Expand Down
Loading