|
| 1 | +#include <cstdlib> |
| 2 | + |
| 3 | +#include <executorch/extension/memory_allocator/cpu_caching_malloc_allocator.h> |
| 4 | +#include <executorch/extension/memory_allocator/memory_allocator_utils.h> |
| 5 | + |
| 6 | +namespace executorch::extension { |
| 7 | + |
| 8 | +CPUCachingAllocator::CPUCachingAllocator(uint32_t max_size) |
| 9 | + : MemoryAllocator(0, nullptr) { |
| 10 | + max_size_ = max_size; |
| 11 | + current_size_ = 0; |
| 12 | +} |
| 13 | + |
| 14 | +void* CPUCachingAllocator::allocate(size_t size, size_t alignment) { |
| 15 | + EXECUTORCH_TRACK_ALLOCATION(prof_id(), size); |
| 16 | + |
| 17 | + if (!isPowerOf2(alignment)) { |
| 18 | + ET_LOG(Error, "Alignment %zu is not a power of 2", alignment); |
| 19 | + return nullptr; |
| 20 | + } |
| 21 | + alignment = std::max(alignment, kCachingAllocatorDefaultAlignment); |
| 22 | + auto adjusted_size_value = |
| 23 | + executorch::extension::utils::get_aligned_size(size, alignment); |
| 24 | + if (!adjusted_size_value.ok()) { |
| 25 | + return nullptr; |
| 26 | + } |
| 27 | + size = adjusted_size_value.get(); |
| 28 | + |
| 29 | + std::lock_guard<std::mutex> guard(mutex_); |
| 30 | + const auto& it = available_map_.find(size); |
| 31 | + // Two choices here. |
| 32 | + // 1. Return cached memory |
| 33 | + // 2. Allocate new memory |
| 34 | + // 2 can lead to current_size > max_size_ |
| 35 | + if (it == available_map_.end() || it->second.empty()) { |
| 36 | + void* ptr = std::malloc(size); |
| 37 | + if (ptr == nullptr) { |
| 38 | + ET_LOG(Error, "Failed to allocate memory"); |
| 39 | + return nullptr; |
| 40 | + } |
| 41 | + current_size_ += size; |
| 42 | + allocation_map_[ptr] = size; |
| 43 | + return alignPointer(ptr, alignment); |
| 44 | + } |
| 45 | + void* ptr = it->second.back(); |
| 46 | + it->second.pop_back(); |
| 47 | + allocation_map_[ptr] = size; |
| 48 | + return alignPointer(ptr, alignment); |
| 49 | +} |
| 50 | + |
| 51 | +void CPUCachingAllocator::free_everything() { |
| 52 | + // We dont lock mutex_ here because it will cause deadlock otherwise |
| 53 | + // we could use recursive_mutex but we just design this differently since |
| 54 | + // free_cache is not a public API anyways |
| 55 | + for (const auto& it : available_map_) { |
| 56 | + for (const auto ptr : it.second) { |
| 57 | + std::free(ptr); |
| 58 | + } |
| 59 | + } |
| 60 | + available_map_.clear(); |
| 61 | + for (const auto& it : allocation_map_) { |
| 62 | + void* ptr = it.first; |
| 63 | + std::free(ptr); |
| 64 | + } |
| 65 | + allocation_map_.clear(); |
| 66 | + // Note that purely by the design, clearing available map does not |
| 67 | + // mean that our current allocated size is zero. |
| 68 | + current_size_ = 0; |
| 69 | +} |
| 70 | + |
| 71 | +void CPUCachingAllocator::reset() { |
| 72 | + std::lock_guard<std::mutex> guard(mutex_); |
| 73 | + // We make the default allocations, via allcate to be either |
| 74 | + // a. gotten via cached memory OR |
| 75 | + // b. allocated via malloced and not yet cached |
| 76 | + // So if current_size_ (allocated) is larger than the max_size_ |
| 77 | + // for now we simply deallocate everything. |
| 78 | + if (current_size_ > max_size_) { |
| 79 | + free_everything(); |
| 80 | + } else { |
| 81 | + for (auto& it : allocation_map_) { |
| 82 | + void* ptr = it.first; |
| 83 | + size_t alloc_size = it.second; |
| 84 | + // Cache the memory |
| 85 | + available_map_[alloc_size].push_back(ptr); |
| 86 | + } |
| 87 | + allocation_map_.clear(); |
| 88 | + } |
| 89 | +} |
| 90 | + |
| 91 | +CPUCachingAllocator::~CPUCachingAllocator() { |
| 92 | + // destructor must be called in thread safe manner |
| 93 | + reset(); |
| 94 | + free_everything(); |
| 95 | +} |
| 96 | + |
| 97 | +} // namespace executorch::extension |
0 commit comments