Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions include/hwmalloc/detail/fixed_size_heap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ class fixed_size_heap

public:
fixed_size_heap(Context* context, std::size_t block_size, std::size_t segment_size,
bool never_free, std::size_t num_reserve_segments)
bool never_free, std::size_t num_reserve_segments,
const typename pool_type::segment_alloc_cb_type& segment_alloc_cb = nullptr)
: m_context(context)
, m_block_size(block_size)
, m_segment_size(segment_size)
Expand All @@ -52,12 +53,13 @@ class fixed_size_heap
for (auto [n, i] : numa().local_nodes())
{
m_pools[i] = std::make_unique<pool_type>(m_context, m_block_size, m_segment_size, n,
m_never_free, m_num_reserve_segments);
m_never_free, m_num_reserve_segments, segment_alloc_cb);
#if HWMALLOC_ENABLE_DEVICE
for (unsigned int j = 0; j < m_num_devices; ++j)
{
m_device_pools[i * m_num_devices + j] = std::make_unique<pool_type>(m_context,
m_block_size, m_segment_size, n, (int)j, m_never_free, m_num_reserve_segments);
m_block_size, m_segment_size, n, (int)j, m_never_free, m_num_reserve_segments,
segment_alloc_cb);
}
#endif
}
Expand Down
38 changes: 24 additions & 14 deletions include/hwmalloc/detail/pool.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <mutex>
#include <memory>
#include <stdexcept>
#include <functional>

namespace hwmalloc
{
Expand All @@ -27,6 +28,7 @@ class pool
using block_type = typename segment_type::block;
using stack_type = boost::lockfree::stack<block_type>;
using segment_map = std::unordered_map<segment_type*, std::unique_ptr<segment_type>>;
using segment_alloc_cb_type = std::function<void()>;

private:
static std::size_t num_pages(std::size_t segment_size) noexcept
Expand All @@ -48,20 +50,25 @@ class pool
}

private:
Context* m_context;
std::size_t m_block_size;
std::size_t m_segment_size;
std::size_t m_numa_node;
bool m_never_free;
std::size_t m_num_reserve_segments;
stack_type m_free_stack;
segment_map m_segments;
std::mutex m_mutex;
int m_device_id = 0;
bool m_allocate_on_device = false;
Context* m_context;
std::size_t m_block_size;
std::size_t m_segment_size;
std::size_t m_numa_node;
bool m_never_free;
std::size_t m_num_reserve_segments;
stack_type m_free_stack;
segment_map m_segments;
std::mutex m_mutex;
int m_device_id = 0;
bool m_allocate_on_device = false;
// Callback executed when a segment is allocated. Can be used to track the number of actual
// memory allocations.
const segment_alloc_cb_type& m_segment_alloc_cb;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please correct me if I'm missing something: I think this will be a dangling reference when using the default value in the fixed_size_heap constructor. The nullptr will produce a prvalue for the call to the constructor. The const& in the constructor will keep the prvalue alive until the call to the constructor finishes. The const& member will point to the prvalue that has been destroyed once the constructor call finishes.

Options:

  • make the default point to a static variable
  • pass and store the callback by value

The second option is seemingly simple, but the reference to the parent class is hidden in the captured this. Safe in the default case, but may be too implicit in the non-default case?


void add_segment()
{
if (m_segment_alloc_cb)
m_segment_alloc_cb();
auto a =
check_allocation(numa().allocate(num_pages(m_segment_size), m_numa_node), m_numa_node);
#if HWMALLOC_ENABLE_DEVICE
Expand Down Expand Up @@ -90,21 +97,24 @@ class pool

public:
pool(Context* context, std::size_t block_size, std::size_t segment_size, std::size_t numa_node,
bool never_free, std::size_t num_reserve_segments)
bool never_free, std::size_t num_reserve_segments,
const segment_alloc_cb_type& segment_alloc_cb)
: m_context{context}
, m_block_size{block_size}
, m_segment_size{segment_size}
, m_numa_node{numa_node}
, m_never_free{never_free}
, m_num_reserve_segments{std::max(num_reserve_segments, 1ul)}
, m_free_stack(segment_size / block_size)
, m_segment_alloc_cb(segment_alloc_cb)
{
}

#if HWMALLOC_ENABLE_DEVICE
pool(Context* context, std::size_t block_size, std::size_t segment_size, std::size_t numa_node,
int device_id, bool never_free, std::size_t num_reserve_segments)
: pool(context, block_size, segment_size, numa_node, never_free, num_reserve_segments)
int device_id, bool never_free, std::size_t num_reserve_segments,
const segment_alloc_cb_type& segment_alloc_cb)
: pool(context, block_size, segment_size, numa_node, never_free, num_reserve_segments, segment_alloc_cb)
{
m_device_id = device_id;
m_allocate_on_device = true;
Expand Down
163 changes: 73 additions & 90 deletions include/hwmalloc/heap.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* ghex-org
*
* Copyright (c) 2014-2023, ETH Zurich
* Copyright (c) 2014-2025, ETH Zurich
* All rights reserved.
*
* Please, refer to the LICENSE file in the root directory.
Expand All @@ -14,9 +14,11 @@
#include <hwmalloc/fancy_ptr/void_ptr.hpp>
#include <hwmalloc/fancy_ptr/const_void_ptr.hpp>
#include <hwmalloc/fancy_ptr/unique_ptr.hpp>
#include <hwmalloc/heap_config.hpp>
#include <hwmalloc/allocator.hpp>
#include <vector>
#include <unordered_map>
#include <iostream>

namespace hwmalloc
{
Expand Down Expand Up @@ -56,6 +58,9 @@ class heap
template<typename T>
using unique_ptr = unique_ptr<T, block_type>;

// Note: sizes below are defaults and can be changed through heap_config and
// environment variables.
//
// There are 5 size classes that the heap uses. For each size class it relies on a
// fixed_size_heap. The size classes are:
// - tiny: heaps with linearly increasing block sizes, each heap backed by 16KiB segments
Expand Down Expand Up @@ -98,78 +103,71 @@ class heap
// : : m_huge_heaps: map

private:
static constexpr std::size_t log2_c(std::size_t n) noexcept
static std::size_t tiny_bucket_index(std::size_t n, std::size_t tiny_increment,
std::size_t tiny_increment_shift) noexcept
{
return ((n < 2) ? 1 : 1 + log2_c(n >> 1));
return ((n + tiny_increment - 1) >> tiny_increment_shift) - 1;
}

static const std::size_t s_tiny_limit = (1u << 7); // 128
static const std::size_t s_small_limit = (1u << 10); // 1024
static const std::size_t s_large_limit = (1u << 16); // 65536

static const std::size_t s_bucket_shift = log2_c(s_tiny_limit) - 1;

static const std::size_t s_tiny_segment = 0x04000; // 16KiB
static const std::size_t s_small_segment = 0x08000; // 32KiB
static const std::size_t s_large_segment = 0x10000; // 64KiB

static const std::size_t s_tiny_increment_shift = 3;
static const std::size_t s_tiny_increment = (1u << s_tiny_increment_shift); // = 8

static const std::size_t s_num_tiny_heaps = s_tiny_limit / s_tiny_increment;
static const std::size_t s_num_small_heaps = log2_c(s_small_limit) - log2_c(s_tiny_limit);
static const std::size_t s_num_large_heaps = log2_c(s_large_limit) - log2_c(s_small_limit);

static std::size_t tiny_bucket_index(std::size_t n) noexcept
static std::size_t bucket_index(std::size_t n, std::size_t bucket_shift) noexcept
{
return ((n + s_tiny_increment - 1) >> s_tiny_increment_shift) - 1;
}

static std::size_t bucket_index(std::size_t n) noexcept
{
return log2_c((n - 1) >> s_bucket_shift) - 1;
}

static constexpr std::size_t round_to_pow_of_2(std::size_t n) noexcept
{
return 1u << log2_c(n - 1);
return detail::log2_c((n - 1) >> bucket_shift) - 1;
}

private:
heap_config m_config;
Context* m_context;
std::size_t m_max_size;
bool m_never_free;
std::size_t m_num_reserve_segments;
heap_vector m_tiny_heaps;
heap_vector m_heaps;
heap_map m_huge_heaps;
std::mutex m_mutex;
std::size_t m_num_huge_alloc;
bool m_num_huge_alloc_did_warn = false;
typename fixed_size_heap_type::pool_type::segment_alloc_cb_type
m_huge_segment_alloc_cb;

public:
heap(Context* context, bool never_free = false, std::size_t num_reserve_segments = 1)
: m_context{context}
, m_max_size(std::max(round_to_pow_of_2(s_large_limit * 2), s_large_limit))
, m_never_free{never_free}
, m_num_reserve_segments{num_reserve_segments}
, m_tiny_heaps(s_tiny_limit / s_tiny_increment)
, m_heaps(bucket_index(m_max_size) + 1)
heap(Context* context, heap_config const& config = get_default_heap_config())
: m_config{config}
, m_context{context}
, m_max_size(
std::max(detail::round_to_pow_of_2(m_config.m_large_limit * 2), m_config.m_large_limit))
, m_tiny_heaps(m_config.m_tiny_limit / m_config.m_tiny_increment)
, m_heaps(bucket_index(m_max_size, m_config.m_bucket_shift) + 1)
{
for (std::size_t i = 0; i < m_tiny_heaps.size(); ++i)
m_tiny_heaps[i] = std::make_unique<fixed_size_heap_type>(m_context,
s_tiny_increment * (i + 1), s_tiny_segment, m_never_free, m_num_reserve_segments);
m_config.m_tiny_increment * (i + 1), m_config.m_tiny_segment_size,
m_config.m_never_free, m_config.m_num_reserve_segments);

for (std::size_t i = 0; i < s_num_small_heaps; ++i)
for (std::size_t i = 0; i < m_config.m_num_small_heaps; ++i)
m_heaps[i] = std::make_unique<fixed_size_heap_type>(m_context,
(s_tiny_limit << (i + 1)), s_small_segment, m_never_free, m_num_reserve_segments);
(m_config.m_tiny_limit << (i + 1)), m_config.m_small_segment_size,
m_config.m_never_free, m_config.m_num_reserve_segments);

for (std::size_t i = 0; i < m_config.m_num_large_heaps; ++i)
m_heaps[i + m_config.m_num_small_heaps] = std::make_unique<fixed_size_heap_type>(
m_context, (m_config.m_small_limit << (i + 1)), m_config.m_large_segment_size,
m_config.m_never_free, m_config.m_num_reserve_segments);

for (std::size_t i = 0; i < s_num_large_heaps; ++i)
m_heaps[i + s_num_small_heaps] = std::make_unique<fixed_size_heap_type>(m_context,
(s_small_limit << (i + 1)), s_large_segment, m_never_free, m_num_reserve_segments);
for (std::size_t i = 0;
i < m_heaps.size() - (m_config.m_num_small_heaps + m_config.m_num_large_heaps); ++i)
m_heaps[i + m_config.m_num_small_heaps + m_config.m_num_large_heaps] =
std::make_unique<fixed_size_heap_type>(m_context,
(m_config.m_large_limit << (i + 1)), (m_config.m_large_limit << (i + 1)),
m_config.m_never_free, m_config.m_num_reserve_segments);

for (std::size_t i = 0; i < m_heaps.size() - (s_num_small_heaps + s_num_large_heaps); ++i)
m_heaps[i + s_num_small_heaps + s_num_large_heaps] =
std::make_unique<fixed_size_heap_type>(m_context, (s_large_limit << (i + 1)),
(s_large_limit << (i + 1)), m_never_free, m_num_reserve_segments);
if (m_config.m_num_huge_alloc_warn_threshold > 0)
m_huge_segment_alloc_cb = [this] () {
m_num_huge_alloc += 1;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Open question that we didn't find a final answer for: Does this need to be atomic since this may be incremented concurrently for different heaps?

I think our conclusion was that each pool is thread safe, but e.g. updating a map of pools is not under a lock, so the assumption is that at this level allocations are anyway serialized? Thus no need for an atomic?

if (!m_num_huge_alloc_did_warn && m_num_huge_alloc >= m_config.m_num_huge_alloc_warn_threshold) {
m_num_huge_alloc_did_warn = true;
std::cerr
<< "WARNING: huge allocation count exceeds HWMALLOC_NUM_HUGE_ALLOC_WARN_THERESHOLD="
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
<< "WARNING: huge allocation count exceeds HWMALLOC_NUM_HUGE_ALLOC_WARN_THERESHOLD="
<< "WARNING: huge allocation count exceeds HWMALLOC_NUM_HUGE_ALLOC_WARN_THRESHOLD="

<< m_config.m_num_huge_alloc_warn_threshold << std::endl;
}
};
}

heap(heap const&) = delete;
Expand All @@ -194,20 +192,23 @@ class heap

pointer allocate(std::size_t size, std::size_t numa_node)
{
if (size <= s_tiny_limit)
return {m_tiny_heaps[tiny_bucket_index(size)]->allocate(numa_node)};
if (size <= m_config.m_tiny_limit)
return {m_tiny_heaps[tiny_bucket_index(size, m_config.m_tiny_increment,
m_config.m_tiny_increment_shift)]
->allocate(numa_node)};
else if (size <= m_max_size)
return {m_heaps[bucket_index(size)]->allocate(numa_node)};
return {m_heaps[bucket_index(size, m_config.m_bucket_shift)]->allocate(numa_node)};
else
{
fixed_size_heap_type* h;
{
std::lock_guard<std::mutex> lock(m_mutex);
const auto s = round_to_pow_of_2(size);
const auto s = detail::round_to_pow_of_2(size);
auto& u_ptr = m_huge_heaps[s];
if (!u_ptr)
u_ptr = std::make_unique<fixed_size_heap_type>(m_context, s, s, m_never_free,
m_num_reserve_segments);
if (!u_ptr) {
u_ptr = std::make_unique<fixed_size_heap_type>(m_context, s, s,
m_config.m_never_free, m_config.m_num_reserve_segments, m_huge_segment_alloc_cb);
}
h = u_ptr.get();
}
return {h->allocate(numa_node)};
Expand All @@ -223,20 +224,27 @@ class heap
#if HWMALLOC_ENABLE_DEVICE
pointer allocate(std::size_t size, std::size_t numa_node, int device_id)
{
if (size <= s_tiny_limit)
return {m_tiny_heaps[tiny_bucket_index(size)]->allocate(numa_node, device_id)};
if (size <= m_config.m_tiny_limit)
return {m_tiny_heaps[tiny_bucket_index(size, m_config.m_tiny_increment,
m_config.m_tiny_increment_shift)]
->allocate(numa_node, device_id)};
else if (size <= m_max_size)
return {m_heaps[bucket_index(size)]->allocate(numa_node, device_id)};
return {m_heaps[bucket_index(size, m_config.m_bucket_shift)]->allocate(numa_node,
device_id)};
else
{
fixed_size_heap_type* h;
{
std::lock_guard<std::mutex> lock(m_mutex);
const auto s = round_to_pow_of_2(size);
const auto s = detail::round_to_pow_of_2(size);
auto& u_ptr = m_huge_heaps[s];
if (!u_ptr)
u_ptr = std::make_unique<fixed_size_heap_type>(m_context, s, s, m_never_free,
m_num_reserve_segments);
if (!u_ptr) {
typename fixed_size_heap_type::pool_type::segment_alloc_cb_type& segment_alloc_cb;
if (m_config.m_num_huge_alloc_warn_threshold > 0)
segment_alloc_cb = std::bind(&heap::huge_alloc_cb, this);
u_ptr = std::make_unique<fixed_size_heap_type>(m_context, s, s,
m_config.m_never_free, m_config.m_num_reserve_segments, m_huge_segment_alloc_cb);
}
h = u_ptr.get();
}
return {h->allocate(numa_node, device_id)};
Expand Down Expand Up @@ -293,29 +301,4 @@ class heap
}
};

template<typename Context>
const std::size_t heap<Context>::s_tiny_limit;
template<typename Context>
const std::size_t heap<Context>::s_small_limit;
template<typename Context>
const std::size_t heap<Context>::s_large_limit;
template<typename Context>
const std::size_t heap<Context>::s_bucket_shift;
template<typename Context>
const std::size_t heap<Context>::s_tiny_segment;
template<typename Context>
const std::size_t heap<Context>::s_small_segment;
template<typename Context>
const std::size_t heap<Context>::s_large_segment;
template<typename Context>
const std::size_t heap<Context>::s_tiny_increment_shift;
template<typename Context>
const std::size_t heap<Context>::s_tiny_increment;
template<typename Context>
const std::size_t heap<Context>::s_num_tiny_heaps;
template<typename Context>
const std::size_t heap<Context>::s_num_small_heaps;
template<typename Context>
const std::size_t heap<Context>::s_num_large_heaps;

} // namespace hwmalloc
Loading