diff --git a/docs/specs/bonwick_slab.pdf b/docs/specs/bonwick_slab.pdf new file mode 100644 index 00000000..9af19213 Binary files /dev/null and b/docs/specs/bonwick_slab.pdf differ diff --git a/include/kernel/kmalloc.h b/include/kernel/kmalloc.h index f373c701..6dbd5023 100644 --- a/include/kernel/kmalloc.h +++ b/include/kernel/kmalloc.h @@ -51,6 +51,8 @@ #include +#include + /** * @enum kmalloc_flags * @brief Feature flags passed to the kmalloc function family @@ -59,17 +61,56 @@ typedef enum kmalloc_flags { KMALLOC_KERNEL = 0, /* Default allocation flags. */ } kmalloc_flags; -/** - * Allocate @c size bytes and return a pointer to the allocated memory. +#define KMALLOC_CACHE_MIN_SIZE 16 +#define KMALLOC_CACHE_MAX_SIZE 16384 +#define KMALLOC_CACHE_COUNT 11 + +/* + * @return The index of the smallest cache that can contain a @size bytes object * - * An area allocated using this function MUST be freed manually. + * This function is inlined and marked 'const' so that it is optimized out by + * the compiler when passing a value known at compile time. + */ +static ALWAYS_INLINE __attribute__((const)) int kmalloc_cache_index(size_t size) +{ + if (size <= 16) return 0; + if (size <= 32) return 1; + if (size <= 64) return 2; + if (size <= 128) return 3; + if (size <= 256) return 4; + if (size <= 512) return 5; + if (size <= 1024) return 6; + if (size <= 2048) return 7; + if (size <= 4096) return 8; + if (size <= 8192) return 9; + if (size <= 16384) return 10; + + return -1; +} + +/** Allocate kernel memory from one of the global memory caches. * - * @param size The number of bytes to allocate - * @param flags Feature flags, must be a combination of @ref kmalloc_flags + * @see kmalloc_cache_index() to know which cache_index to use. * - * @return The starting address of the newly allocated area + * @param cache_index Index of the cache to allocate an object from. + * @param flags Allocation flags to use. */ -void *kmalloc(size_t size, int flags); +void *kmalloc_from_cache(int cache_index, int flags); + +/* + * + */ +static ALWAYS_INLINE void *kmalloc(size_t size, int flags) +{ + int cache_index; + + cache_index = kmalloc_cache_index(size); + if (cache_index < 0) + return NULL; + + return kmalloc_from_cache(cache_index, flags); +} + /** * Allocate @c nmemb members of @c size bytes and initialize its content to 0. @@ -127,4 +168,6 @@ void *kmalloc_dma(size_t size); /** Free a buffer allocated through @ref kmalloc_dma */ void kfree_dma(void *dma_ptr); +void kmalloc_api_init(void); + /** @} */ diff --git a/include/kernel/memory.h b/include/kernel/memory.h index ae985e24..eec008e7 100644 --- a/include/kernel/memory.h +++ b/include/kernel/memory.h @@ -170,4 +170,20 @@ extern u32 _kernel_code_end; #define PAGE_ALIGN_UP(_ptr) align_up_ptr(_ptr, PAGE_SIZE) #define PAGE_ALIGNED(_ptr) is_aligned_ptr(_ptr, PAGE_SIZE) +#ifndef __ASSEMBLER__ + +struct multiboot_info; + +/** + * Initialize the memory subsystem. + * + * After this function returns it is safe to call the different memory + * allocation APIs (vm_*, kmalloc). + * + * @param mbt System information structure passed by the bootloader. + */ +void memory_init(struct multiboot_info *); + +#endif /* !__ASSEMBLER__ */ + #endif /* KERNEL_MEMORY_H */ diff --git a/include/kernel/memory/slab.h b/include/kernel/memory/slab.h new file mode 100644 index 00000000..8fa4ac9e --- /dev/null +++ b/include/kernel/memory/slab.h @@ -0,0 +1,72 @@ +#ifndef KERNEL_MEMORY_SLAB_H +#define KERNEL_MEMORY_SLAB_H + +#include +#include +#include + +#include +#include + +/* + * + */ +struct kmem_cache { + llist_t slabs_full; + llist_t slabs_partial; + llist_t slabs_free; + spinlock_t lock; + + size_t obj_size; + int obj_align; + size_t obj_real_size; + unsigned int coloring_offset_next; + + void (*constructor)(void *data); + void (*destructor)(void *data); + + const char *name; + int flags; +}; + +/* + * + */ +struct kmem_slab { + void *page; + struct kmem_bufctl *free; + struct kmem_cache *cache; + atomic_t refcount; + unsigned int coloring_offset; + node_t this; +}; + +/** Create a new cache. */ +struct kmem_cache *kmem_cache_create(const char *name, size_t obj_size, + int obj_align, void (*constructor)(void *), + void (*destructor)(void *)); + +/** Allocate an object from a cache + * + * @param cache The cache + * @param flags Combination of allocation flags + */ +void *kmem_cache_alloc(struct kmem_cache *cache, int flags); + +/** Free a cache and all its slabs. + * + * NOTE: The caller should be sure that no objects allocated from this cache + * are still being used when calling this function. + */ +void kmem_cache_destroy(struct kmem_cache *cache); + +/** Free an object allocated by a cache. + * + * @param cache The cache the object was allocated from + * @param obj The object to free + */ +void kmem_cache_free(struct kmem_cache *cache, void *obj); + +int kmem_cache_api_init(void); + +#endif /* KERNEL_MEMORY_SLAB_H */ diff --git a/include/kernel/pmm.h b/include/kernel/pmm.h index d475341c..f5c14e0e 100644 --- a/include/kernel/pmm.h +++ b/include/kernel/pmm.h @@ -63,6 +63,7 @@ enum page_flags { PAGE_AVAILABLE = BIT(0), ///< This page has not been allocated PAGE_COW = BIT(1), ///< Currently used in a CoW mapping + PAGE_SLAB = BIT(2), ///< Page allocated by the slab allocator }; /** Represents a physical pageframe @@ -71,6 +72,15 @@ enum page_flags { struct page { uint8_t flags; ///< Combination of @ref page_flags uint8_t refcount; ///< How many processes reference that page + + union { + /* + * Data for pages allocated by the slab allocator (flags & PAGE_SLAB). + */ + struct { + struct kmem_cache *cache; + } slab; + }; }; /** diff --git a/include/kernel/spinlock.h b/include/kernel/spinlock.h index c503269e..c40a6cec 100644 --- a/include/kernel/spinlock.h +++ b/include/kernel/spinlock.h @@ -92,6 +92,12 @@ static ALWAYS_INLINE void spinlock_release(spinlock_t *lock) __atomic_clear(&lock->locked, __ATOMIC_RELEASE); } +/** Check whether a lock is currently held by someone. */ +static ALWAYS_INLINE bool spinlock_is_held(const spinlock_t *lock) +{ + return __atomic_load_n(&lock->locked, __ATOMIC_ACQUIRE); +} + typedef struct { spinlock_t *lock; bool done; diff --git a/include/kernel/vm.h b/include/kernel/vm.h index b15a62d1..0999329c 100644 --- a/include/kernel/vm.h +++ b/include/kernel/vm.h @@ -133,6 +133,9 @@ struct vm_segment_driver { * located. */ error_t (*vm_fault)(struct address_space *, struct vm_segment *); + + /** Map this segment onto a physical address. */ + error_t (*vm_map)(struct address_space *, struct vm_segment *, vm_flags_t); }; /** Kernel-only address-space. @@ -224,4 +227,10 @@ void vm_free(struct address_space *, void *); /** Find the address space's segment that contains the given address */ struct vm_segment *vm_find(const struct address_space *, void *); +/** Map a virtual address segment onto a physical page. + * + * @return E_EXIST if the virtual address contained a previous mapping. + */ +error_t vm_map(struct address_space *, void *); + #endif /* KERNEL_VM_H */ diff --git a/include/utils/math.h b/include/utils/math.h index 96531d1a..27eb665e 100644 --- a/include/utils/math.h +++ b/include/utils/math.h @@ -38,6 +38,9 @@ _tmp < 0 ? -_tmp : _tmp; \ }) +/** @return whether a value is a power of two */ +#define is_power_of_2(_x) (_x != 0 && ((_x & (_x - 1)) == 0)) + #define __align_mask(_value, _power) ((__typeof__(_value))((_power)-1)) /** @brief Align @c _value to the next multiple of @c _power diff --git a/kernel/arch/i686/mmu.c b/kernel/arch/i686/mmu.c index 179fef92..332b8322 100644 --- a/kernel/arch/i686/mmu.c +++ b/kernel/arch/i686/mmu.c @@ -609,10 +609,16 @@ static INTERRUPT_HANDLER_FUNCTION(page_fault) if (!error.present || is_cow) { as = IS_KERNEL_ADDRESS(faulty_address) ? &kernel_address_space : current->process->as; + if (unlikely(!as)) { + log_err("page_fault: address space is NULL"); + goto page_fault_panic; + } + if (!address_space_fault(as, faulty_address, is_cow)) return E_SUCCESS; } +page_fault_panic: PANIC("PAGE FAULT at " FMT32 ": %s access on a %s page %s", faulty_address, error.write ? "write" : "read", error.present ? "protected" : "non-present", diff --git a/kernel/build.mk b/kernel/build.mk index 46ddfb74..61e086dd 100644 --- a/kernel/build.mk +++ b/kernel/build.mk @@ -36,10 +36,12 @@ KERNEL_SRCS := \ misc/worker.c \ misc/semaphore.c \ misc/uacpi.c \ + memory/memory.c \ memory/pmm.c \ memory/vmm.c \ memory/address_space.c \ memory/vm_normal.c \ + memory/slab.c \ net/net.c \ net/packet.c \ net/socket.c \ diff --git a/kernel/fs/tar.c b/kernel/fs/tar.c index 85980004..bb465a28 100644 --- a/kernel/fs/tar.c +++ b/kernel/fs/tar.c @@ -193,7 +193,7 @@ static void tar_node_free(tree_node_t *node) /* Do not free vnode, let it be free'd by vnode_release(). */ if (tar_node->vnode) tar_node->vnode->pdata = NULL; - kfree(node); + kfree(tar_node); } /* diff --git a/kernel/main.c b/kernel/main.c index 328fc026..c51fcbf4 100644 --- a/kernel/main.c +++ b/kernel/main.c @@ -13,10 +13,9 @@ #include #include #include -#include +#include #include #include -#include #include #include #include @@ -170,16 +169,11 @@ void kernel_main(struct multiboot_info *mbt, unsigned int magic) /* * Now that we have a minimal working setup, we can enable paging * and initialize the virtual memory allocation API. - * After this step, we are able to allocate & free kernel memory as usual. + * + * After this step, we are able to allocate & free kernel memory. */ - if (!pmm_init(mbt)) - PANIC("Could not initialize the physical memory manager"); + memory_init(mbt); - log_info("Initializing MMU"); - if (!mmu_init()) - PANIC("Failed to initialize virtual address space"); - - address_space_init(&kernel_address_space); process_init_kernel_process(); /* diff --git a/kernel/memory/address_space.c b/kernel/memory/address_space.c index e72ebd7f..f7654216 100644 --- a/kernel/memory/address_space.c +++ b/kernel/memory/address_space.c @@ -358,6 +358,32 @@ void vm_free(struct address_space *as, void *addr) } } +error_t vm_map(struct address_space *as, void *addr) +{ + struct vm_segment *segment; + error_t ret; + + if (addr == NULL) + return E_SUCCESS; + + if ((vaddr_t)addr % PAGE_SIZE) { + log_warn("freeing unaligned virtual address: %p (skipping)", addr); + return E_INVAL; + } + + locked_scope (&as->lock) { + segment = vm_find(as, addr); + if (!segment) { + log_dbg("free: no backing segment for %p", addr); + return E_NOENT; + } + + ret = segment->driver->vm_map(as, segment, segment->flags); + } + + return ret; +} + static int vm_segment_contains(const void *this, const void *addr) { const struct vm_segment *segment = to_segment(this); diff --git a/kernel/memory/kmalloc.c b/kernel/memory/kmalloc.c index 39ed02c2..869ef8a1 100644 --- a/kernel/memory/kmalloc.c +++ b/kernel/memory/kmalloc.c @@ -1,180 +1,26 @@ -#include +#define LOG_DOMAIN "kmalloc" + #include #include #include +#include #include #include -#include -#include -#include -#include - -#include -#include -#include -#include -#include +#include #include -/** - * @defgroup kmalloc_internals Kmalloc - Internals - * @ingroup kmalloc - * - * Internal functions and structures used for managing buckets. - * - * @{ - */ - -static DECLARE_LLIST(kernel_buckets); - -/** All returned addresses are aligned on a 32B boundary */ -#define KMALLOC_ALIGNMENT (32) - -/** - * @brief Magic value to detect if a block is free - * - * To prevent corrupting the metadata by freeing the same block multiple times - * we write this number right after the linked list node when freeing a block. - * We then check for this arbitrary value before freeing it. If it's present - * this means we're freeing an already free block. - */ -#define KMALLOC_FREE_MAGIC (0x3402CECE) -#define BLOCK_FREE_MAGIC(_block) \ - ((uint32_t *)(((void *)_block) + sizeof(node_t))) - -/** - * @brief The metadata for a single bucket - * @struct bucket_meta - * - * A bucket's metadata is stored at the beginning of its mapped area, - * inside its first block. - * - */ -typedef struct bucket_meta { - u32 block_size; ///< The size of each blocks inside this bucket - u16 block_count; ///< Number of blocks currently malloc'd - u16 flags; ///< Flags for this bucket - llist_t free; ///< Head of the freelist - node_t this; - char data[] __attribute__((aligned(KMALLOC_ALIGNMENT))); -} bucket_t; - -static_assert(sizeof(bucket_t) <= KMALLOC_ALIGNMENT, "Bucket metadata MUST fit " - "into a single block"); - -static inline struct bucket_meta *to_bucket(node_t *this) -{ - return container_of(this, bucket_t, this); -} - -static inline size_t bucket_compute_size(size_t block_size) -{ - return align_up(KMALLOC_ALIGNMENT + block_size, PAGE_SIZE); -} +static struct kmem_cache *kmalloc_size_caches[KMALLOC_CACHE_COUNT]; -/** Find a bucket containing with at least one free block of the given size */ -static bucket_t *bucket_find(llist_t *buckets, size_t size, const u16 flags) -{ - FOREACH_LLIST (node, buckets) { - bucket_t *bucket = to_bucket(node); - if (bucket->block_size == size && !llist_is_empty(&bucket->free) && - bucket->flags == flags) - return bucket; - } - - return NULL; -} +static const char *kmalloc_cache_names[] = { + "size-16", "size-32", "size-64", "size-128", + "size-256", "size-512", "size-1024", "size-2048", + "size-4096", "size-8192", "size-16384", +}; -/** Reserve a free block inside a bucket */ -static void *bucket_get_free_block(bucket_t *bucket) +void *kmalloc_from_cache(int cache_index, int flags) { - void *block = llist_pop(&bucket->free); - bucket->block_count += 1; - // remove KMALLOC_FREE_MAGIC - *BLOCK_FREE_MAGIC(block) = 0x0; - return block; -} - -/** Create a new empty bucket for blocks of size @c block_size */ -static struct bucket_meta *bucket_create(struct address_space *as, - llist_t *buckets, size_t block_size, - const u16 flags) -{ - size_t bucket_size = bucket_compute_size(block_size); - bucket_t *bucket = vm_alloc(as, bucket_size, VM_KERNEL_RW | flags); - - if (IS_ERR(bucket)) - return NULL; - - bucket->block_size = block_size; - bucket->block_count = 0; - bucket->flags = flags; - - INIT_LLIST(bucket->free); - - // Generate the intrusive freelist - node_t *node = (node_t *)bucket->data; - size_t nb_blocks = (bucket_size - sizeof(bucket_t)) / block_size; - for (size_t i = 0; i < nb_blocks; ++i) { - *BLOCK_FREE_MAGIC(node) = KMALLOC_FREE_MAGIC; - llist_add(&bucket->free, node); - node = (void *)node + block_size; - } - - llist_add(buckets, &bucket->this); - - return bucket; -} - -/** Free a block inside a bucket */ -static void -bucket_free_block(struct address_space *as, bucket_t *bucket, void *block) -{ - // Check if block is already free or not - uint32_t *const block_free_magic = BLOCK_FREE_MAGIC(block); - if (*block_free_magic == KMALLOC_FREE_MAGIC) - return; // block is already free - - // If all blocks are free, unmap the bucket to avoid hording memory - if (bucket->block_count == 1) { - llist_remove(&bucket->this); - vm_free(as, bucket); - return; - } - - *block_free_magic = KMALLOC_FREE_MAGIC; - llist_add(&bucket->free, block); - bucket->block_count -= 1; -} - -static ALWAYS_INLINE bucket_t *bucket_from_block(void *block) -{ - return (bucket_t *)align_down((u32)block, PAGE_SIZE); -} - -/** @} */ - -void *kmalloc(size_t size, int flags) -{ - struct address_space *as; - - if (size == 0) - return NULL; - - as = &kernel_address_space; - - size = align_up(size, KMALLOC_ALIGNMENT); - size = bit_next_pow32(size); - - bucket_t *bucket = bucket_find(&kernel_buckets, size, flags); - if (bucket == NULL) - bucket = bucket_create(as, &kernel_buckets, size, flags); - - if (bucket == NULL) - return NULL; - - return bucket_get_free_block(bucket); + return kmem_cache_alloc(kmalloc_size_caches[cache_index], flags); } void *kcalloc(size_t nmemb, size_t size, int flags) @@ -191,40 +37,45 @@ void *kcalloc(size_t nmemb, size_t size, int flags) void kfree(void *ptr) { - struct address_space *as; + struct page *page; + paddr_t paddr; if (ptr == NULL) return; - as = &kernel_address_space; - bucket_free_block(as, bucket_from_block(ptr), ptr); + paddr = mmu_find_physical((vaddr_t)ptr); + page = address_to_page(paddr); + + kmem_cache_free(page->slab.cache, ptr); } void *krealloc(void *ptr, size_t size, int flags) { - if (size == 0) { - kfree(ptr); - return NULL; - } + struct page *page; + paddr_t paddr; if (ptr == NULL) return kmalloc(size, flags); - size = align_up(size, KMALLOC_ALIGNMENT); - size = bit_next_pow32(size); + paddr = mmu_find_physical((vaddr_t)ptr); + page = address_to_page(paddr); - // Reuse same block if it is large enough - bucket_t *bucket = bucket_from_block(ptr); - if (bucket->block_size >= size) + if (!(page->flags & PAGE_SLAB)) { + WARN("reallocating an invalid pointer: %p", ptr); return ptr; + } - void *new = kmalloc(size, flags); - if (new != NULL) - memcpy(new, ptr, size); + if (size == 0) { + kfree(ptr); + return NULL; + } - kfree(ptr); + /* No need to reallocate, current slab object is large enough already. */ + if (page->slab.cache->obj_size >= size) + return ptr; - return new; + kfree(ptr); + return kmalloc(size, flags); } void *krealloc_array(void *ptr, size_t nmemb, size_t size, int flags) @@ -262,3 +113,18 @@ void kfree_dma(void *dma_ptr) vm_free(&kernel_address_space, dma_ptr); } + +void kmalloc_api_init(void) +{ + struct kmem_cache *cache; + size_t obj_size = KMALLOC_CACHE_MIN_SIZE; + + for (int i = 0; i < KMALLOC_CACHE_COUNT; ++i, obj_size <<= 1) { + cache = kmem_cache_create(kmalloc_cache_names[i], obj_size, 16, NULL, + NULL); + if (!cache) + PANIC("failed to init kmalloc cache: '%s'", kmalloc_cache_names[i]); + + kmalloc_size_caches[i] = cache; + } +} diff --git a/kernel/memory/memory.c b/kernel/memory/memory.c new file mode 100644 index 00000000..793fd94b --- /dev/null +++ b/kernel/memory/memory.c @@ -0,0 +1,27 @@ +#define LOG_DOMAIN "memory" + +#include +#include +#include +#include +#include +#include +#include + +#include + +void memory_init(struct multiboot_info *mbt) +{ + log_info("Initializing pageframe allocator"); + if (!pmm_init(mbt)) + PANIC("Failed to initialize the physical memory manager"); + + log_info("Initializing MMU"); + if (!mmu_init()) + PANIC("Failed to initialize virtual address space"); + + address_space_init(&kernel_address_space); + + kmem_cache_api_init(); + kmalloc_api_init(); +} diff --git a/kernel/memory/pmm.c b/kernel/memory/pmm.c index 14d13c18..60bf9d14 100644 --- a/kernel/memory/pmm.c +++ b/kernel/memory/pmm.c @@ -225,8 +225,6 @@ void page_put(struct page *page) bool pmm_init(struct multiboot_info *mbt) { - log_info("Initializing pageframe allocator"); - if (!pmm_initialize_pages(mbt)) { return false; } diff --git a/kernel/memory/slab.c b/kernel/memory/slab.c new file mode 100644 index 00000000..e55f0c14 --- /dev/null +++ b/kernel/memory/slab.c @@ -0,0 +1,562 @@ +/* + * SunOs' slab allocator implementation. + * + * TODO: kmem_cache_reap() for reclaiming memory from caches when running low. + * + * References: + * - Bonwick94 + */ + +#define LOG_DOMAIN "slab" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +enum kmem_cache_flag { + CACHE_F_EXTERNAL, /* slab & bufctl structs stored in an external buffer. */ +}; + +/* + * + */ +struct kmem_bufctl { + union { + struct kmem_bufctl *next; /* Used when bufctl is inside the freelist. */ + struct kmem_slab *slab; /* Used when the object has been allocated. */ + }; + + /* + * NOTE: The object and the hash table entry are placed inside a union + * so that the object and the entry's key can be used interchangeably. + */ + union { + void *obj; + struct hashtable_entry hash; + }; +}; + +static struct kmem_cache kmem_cache_cache; +static struct kmem_cache kmem_slab_cache; +static struct kmem_cache kmem_bufctl_cache; + +#define KMEM_SLAB_MIN_SIZE sizeof(struct kmem_bufctl) +#define KMEM_SLAB_MIN_ALIGN 1 + +/* + * Slabs whose objects are larger than this one are considered 'large slabs'. + * + * Large slabs do not store the kmem_bufctl structures inside the slab directly, + * but keep them stored inside a dedicated buffer. + */ +#define KMEM_SLAB_LARGE_SIZE (PAGE_SIZE / 8) + +/* + * Address to kmem_bufctl hashmap. + */ +static DECLARE_HASHTABLE(kmem_bufctl_hashmap, 256); +static DECLARE_SPINLOCK(kmem_bufctl_hashmap_lock); + +/* + * NOTE: Using a spinlock to protect against simultaneous accesses to the + * slab lists makes it unsafe to use the kmem_cache API in uninterruptible + * contexts. We should switch to disabling interrupts instead if we need + * to allocate memory while inside an interrupt handler. + */ +static inline void kmem_cache_lock(struct kmem_cache *cache) +{ + spinlock_acquire(&cache->lock); +} + +/* + * + */ +static inline void kmem_cache_unlock(struct kmem_cache *cache) +{ + spinlock_release(&cache->lock); +} + +/* + * Find the start address of a slab object. + */ +static inline void *kmem_slab_obj_start(const struct kmem_slab *slab, void *obj) +{ + off_t offset = obj - slab->page; + + return obj - (offset % slab->cache->obj_real_size); +} + +/* + * Find the start address of the next object in a slab. + * + * This function assumes that @c obj points to the start of the object. + */ +static inline void *kmem_slab_obj_next(const struct kmem_slab *slab, void *obj) +{ + return obj + slab->cache->obj_real_size; +} + +/* + * Check whether @c has been allocated by @c slab. + */ +static inline bool +kmem_slab_contains_obj(const struct kmem_slab *slab, void *obj) +{ + return PAGE_ALIGN_DOWN(obj) == slab->page; +} + +/* + * Free a single slab and all the bufctl structure it contains. + */ +static void kmem_slab_destroy(struct kmem_slab *slab) +{ + struct kmem_cache *cache = slab->cache; + struct kmem_bufctl *bufctl; + struct kmem_bufctl *next; + + if (atomic_read(&slab->refcount)) { + log_warn("%s: slab@%p has %d active entries when destroying", + cache->name, slab->page, atomic_read(&slab->refcount)); + } + + llist_remove(&slab->this); + vm_free(&kernel_address_space, slab->page); + + next = slab->free; + while ((bufctl = next) != NULL) { + next = bufctl->next; + locked_scope (&kmem_bufctl_hashmap_lock) { + hashtable_remove(&kmem_bufctl_hashmap, &bufctl->hash.key); + } + if (cache->destructor) + cache->destructor(bufctl->obj); + if (BIT_READ(cache->flags, CACHE_F_EXTERNAL)) + kmem_cache_free(&kmem_bufctl_cache, bufctl); + } + + kmem_cache_free(&kmem_slab_cache, slab); +} + +/* + * + */ +static void +kmem_slab_free_obj(struct kmem_slab *slab, struct kmem_bufctl *bufctl) +{ + bufctl->next = slab->free; + SWAP(slab->free, bufctl); + + if (unlikely(atomic_dec(&slab->refcount) <= 1)) { + /* All objects in the slab are now free. */ + llist_remove(&slab->this); + llist_add(&slab->cache->slabs_free, &slab->this); + } else if (unlikely(bufctl == NULL)) { + /* Slab was full before freeing the object. */ + llist_remove(&slab->this); + llist_add(&slab->cache->slabs_partial, &slab->this); + } +} + +/* + * Generate the slab's initial freelist and initialize each object. + * + * This function only works for normal sized objects. For larger objects, + * use kmem_slab_init_large_objects(). + */ +static void kmem_slab_init_objects(struct kmem_slab *slab) +{ + struct kmem_cache *cache = slab->cache; + struct kmem_bufctl *bufctl; + struct kmem_bufctl **next_bufctl; + void *end = slab; + void *obj; + + obj = slab->page + slab->coloring_offset; + next_bufctl = &slab->free; + + while (obj + cache->obj_real_size <= end) { + bufctl = obj + cache->obj_size; + *next_bufctl = bufctl; + next_bufctl = &bufctl->next; + bufctl->obj = obj; + + locked_scope (&kmem_bufctl_hashmap_lock) { + bufctl->hash.key = bufctl->obj; + hashtable_insert(&kmem_bufctl_hashmap, &bufctl->hash); + } + + if (cache->constructor) + cache->constructor(obj); + obj += cache->obj_real_size; + } + + *next_bufctl = NULL; +} + +/* + * Generate the slab's initial freelist and initiliaze each object. + * + * This is the large object version of @ref kmem_slab_init_objects() for slabs + * in which bufctl structures are stored in a dedicated external page. + */ +static error_t kmem_slab_init_large_objects(struct kmem_slab *slab) +{ + struct kmem_cache *cache = slab->cache; + size_t slab_size = align_up(cache->obj_size, PAGE_SIZE); + struct kmem_bufctl *bufctl; + struct kmem_bufctl **next_bufctl; + void *end = slab->page + slab_size; + void *obj; + + obj = slab->page + slab->coloring_offset; + next_bufctl = &slab->free; + + while (obj + cache->obj_real_size <= end) { + bufctl = kmem_cache_alloc(&kmem_bufctl_cache, 0); + if (!bufctl) { + *next_bufctl = NULL; + goto err; + } + + *next_bufctl = bufctl; + next_bufctl = &bufctl->next; + bufctl->obj = obj; + + locked_scope (&kmem_bufctl_hashmap_lock) { + bufctl->hash.key = bufctl->obj; + hashtable_insert(&kmem_bufctl_hashmap, &bufctl->hash); + } + + if (cache->constructor) + cache->constructor(obj); + obj += cache->obj_real_size; + } + + *next_bufctl = NULL; + + return 0; + +err: + bufctl = slab->free; + while (bufctl) { + struct kmem_bufctl *to_free = bufctl; + bufctl = bufctl->next; + kmem_cache_free(&kmem_bufctl_cache, to_free); + } + + return -1; +} + +/* + * Allocate, construct and add a new slab to the cache. + * + * The frontend slab API should call this function when all slabs are full when + * allocating. + * + * This function must be called with the cache's lock held. + * + * @return The added slab, or an pointer encoded error. + */ +static struct kmem_slab *kmem_cache_grow(struct kmem_cache *cache, int flags) +{ + struct kmem_slab *slab; + void *page; + paddr_t paddr; + size_t slab_size; + unsigned int max_color_offset; + + UNUSED(flags); + + slab_size = align_up(cache->obj_size, PAGE_SIZE); + + page = vm_alloc(&kernel_address_space, slab_size, VM_KERNEL_RW); + if (!page) + return PTR_ERR(E_NOMEM); + + if (BIT_READ(cache->flags, CACHE_F_EXTERNAL)) { + max_color_offset = slab_size % cache->obj_real_size; + slab = kmem_cache_alloc(&kmem_slab_cache, 0); + if (!slab) { + slab = PTR_ERR(E_NOMEM); + goto err; + } + } else { + /* The slab struct is placed the end of the slab */ + slab = page + slab_size - sizeof(*slab); + max_color_offset = (slab_size - sizeof(*slab)) % cache->obj_real_size; + } + + slab->page = page; + slab->cache = cache; + + /* + * Set this slab's cache coloring offset and update the kmem_cache + * offset for the next allocated slab. + */ + slab->coloring_offset = cache->coloring_offset_next; + cache->coloring_offset_next += cache->obj_align; + if (cache->coloring_offset_next > max_color_offset) + cache->coloring_offset_next = 0; + + if (BIT_READ(cache->flags, CACHE_F_EXTERNAL)) { + /* The page is not guaranteed to be accessed in this case, but we need + * a physical page to initialize the page structure later. */ + vm_map(&kernel_address_space, page); + if (kmem_slab_init_large_objects(slab)) + goto err; + } else { + kmem_slab_init_objects(slab); + } + + paddr = mmu_find_physical((vaddr_t)page); + for (size_t off = 0; off < slab_size; off += PAGE_SIZE) { + address_to_page(paddr + off)->flags |= PAGE_SLAB; + address_to_page(paddr + off)->slab.cache = cache; + } + + atomic_write(&slab->refcount, 0); + llist_add_tail(&cache->slabs_free, &slab->this); + + return slab; + +err: + vm_free(&kernel_address_space, page); + return slab; +} + +/* + * + */ +void *kmem_cache_alloc(struct kmem_cache *cache, int flags) +{ + struct kmem_slab *slab; + struct kmem_bufctl *bufctl; + llist_t *slabs; + void *obj = NULL; + + kmem_cache_lock(cache); + + /* + * Find a slab with at least one free object, and if none are present + * allocate one. + */ + if (!llist_is_empty(&cache->slabs_partial)) { + slabs = &cache->slabs_partial; + slab = container_of(llist_first(slabs), struct kmem_slab, this); + } else { + slabs = &cache->slabs_free; + if (!llist_is_empty(&cache->slabs_free)) { + slab = container_of(llist_first(slabs), struct kmem_slab, this); + } else { + slab = kmem_cache_grow(cache, flags); + if (IS_ERR(slab)) + goto out; + } + } + + /* Pop the free object from the slab's freelist. */ + bufctl = slab->free; + atomic_inc(&slab->refcount); + slab->free = bufctl->next; + + /* Make the now active bufctl point to its slab. */ + bufctl->slab = slab; + obj = bufctl->obj; + + if (slab->free == NULL) { + llist_remove(&slab->this); + llist_add(&cache->slabs_full, &slab->this); + } else if (slabs == &cache->slabs_free) { + llist_remove(&slab->this); + llist_add(&cache->slabs_partial, &slab->this); + } + +out: + kmem_cache_unlock(cache); + return obj; +} + +/* + * + */ +void kmem_cache_free(struct kmem_cache *cache, void *obj) +{ + struct kmem_slab *slab; + struct kmem_bufctl *bufctl; + struct hashtable_entry *hash_entry; + struct page *page; + paddr_t paddr; + + UNUSED(cache); + + /* + * Slab pages always have a backing physical page since we access them + * to create the freelist in kmem_cache_grow(). If this is not verified + * it either means this page is not a slab page or that something went + * wrong somewhere. + */ + paddr = mmu_find_physical((vaddr_t)obj); + if (WARN_ON_MSG(IS_ERR(paddr), "free: no backing page for object at %p", + obj)) { + return; + } + + page = address_to_page(paddr); + if (WARN_ON_MSG(!(page->flags & PAGE_SLAB), + "free: object at %p is not a slab object (flags: %04x)", + obj, page->flags)) { + return; + } + + locked_scope (&kmem_bufctl_hashmap_lock) { + obj = align_down_ptr(obj, cache->obj_align); + hash_entry = hashtable_find(&kmem_bufctl_hashmap, obj); + if (WARN_ON_MSG(!hash_entry, "free: no bufctl found for %p", obj)) + return; + } + + bufctl = container_of(hash_entry, struct kmem_bufctl, hash); + slab = bufctl->slab; + if (WARN_ON(slab->cache != cache)) + return; + + kmem_cache_lock(cache); + kmem_slab_free_obj(slab, bufctl); + kmem_cache_unlock(cache); +} + +/* + * Initialize a kmem_cache structure. + */ +static void kmem_cache_init(struct kmem_cache *cache, const char *name, + size_t obj_size, int obj_align, + void (*constructor)(void *), + void (*destructor)(void *)) +{ + cache->name = name; + cache->obj_size = obj_size; + cache->obj_align = obj_align; + cache->constructor = constructor; + cache->destructor = destructor; + + cache->coloring_offset_next = 0; + cache->flags = 0; + + /* + * Slabs and bufctl struct for large objects are stored inside a dedicated + * external buffer. Regular slabs append the kmem_bufctl struct after + * the object directly inside the slab. + */ + if (obj_size >= KMEM_SLAB_LARGE_SIZE) { + BIT_SET(cache->flags, CACHE_F_EXTERNAL); + cache->obj_real_size = align_up(obj_size, obj_align); + } else { + cache->obj_real_size = align_up(obj_size + sizeof(struct kmem_bufctl), + obj_align); + } +} + +/* + * + */ +struct kmem_cache *kmem_cache_create(const char *name, size_t obj_size, + int obj_align, void (*constructor)(void *), + void (*destructor)(void *)) +{ + struct kmem_cache *cache; + + if (obj_size < KMEM_SLAB_MIN_SIZE) + return PTR_ERR(E_INVAL); + + if (obj_align < KMEM_SLAB_MIN_ALIGN || !is_power_of_2(obj_align)) + return PTR_ERR(E_INVAL); + + cache = kmem_cache_alloc(&kmem_cache_cache, 0); + if (!cache) { + log_err("failed to allocate %s cache", name); + return PTR_ERR(E_NOMEM); + } + + kmem_cache_init(cache, name, obj_size, obj_align, constructor, destructor); + + return cache; +} + +/* + * + */ +void kmem_cache_destroy(struct kmem_cache *cache) +{ + struct kmem_slab *slab; + struct kmem_slab *next; + + FOREACH_LLIST_ENTRY_SAFE (slab, next, &cache->slabs_partial, this) { + kmem_slab_destroy(slab); + } + + FOREACH_LLIST_ENTRY_SAFE (slab, next, &cache->slabs_full, this) { + kmem_slab_destroy(slab); + } + + FOREACH_LLIST_ENTRY_SAFE (slab, next, &cache->slabs_free, this) { + kmem_slab_destroy(slab); + } +} + +/* + * Constructor for kmem_cache structures. + */ +static void kmem_cache_constructor(void *data) +{ + struct kmem_cache *cache = data; + + INIT_LLIST(cache->slabs_full); + INIT_LLIST(cache->slabs_partial); + INIT_LLIST(cache->slabs_free); + INIT_SPINLOCK(cache->lock); +} + +/* + * Destructor for kmem_cache structures. + */ +static void kmem_cache_destructor(void *data) +{ + UNUSED(data); +} + +/* + * + */ +int kmem_cache_api_init(void) +{ + /* + * Larger slabs require a separate hashtable, wihch we would need to + * dynamically allocate entries for. This is not feasible for the bootstrap + * cache of kmem_cache structures. + */ + static_assert(sizeof(struct kmem_cache) < KMEM_SLAB_LARGE_SIZE); + + hashtable_init(&kmem_bufctl_hashmap); + INIT_SPINLOCK(kmem_bufctl_hashmap_lock); + + kmem_cache_constructor(&kmem_cache_cache); + kmem_cache_init(&kmem_cache_cache, "kmem_cache", sizeof(struct kmem_cache), + 1, kmem_cache_constructor, kmem_cache_destructor); + + kmem_cache_constructor(&kmem_slab_cache); + kmem_cache_init(&kmem_slab_cache, "kmem_slab", sizeof(struct kmem_slab), 1, + NULL, NULL); + + kmem_cache_constructor(&kmem_bufctl_cache); + kmem_cache_init(&kmem_bufctl_cache, "kmem_bufctl", + sizeof(struct kmem_bufctl), 1, NULL, NULL); + + return 0; +} diff --git a/kernel/memory/vm_direct.c b/kernel/memory/vm_direct.c deleted file mode 100644 index e69de29b..00000000 diff --git a/kernel/memory/vm_normal.c b/kernel/memory/vm_normal.c index 378e45d3..96059653 100644 --- a/kernel/memory/vm_normal.c +++ b/kernel/memory/vm_normal.c @@ -13,6 +13,37 @@ struct vm_segment *vm_normal_alloc(struct address_space *as, vaddr_t addr, return vmm_allocate(as->vmm, addr, size, flags); } +static error_t vm_normal_map(struct address_space *as, + struct vm_segment *segment, vm_flags_t flags) +{ + size_t size = segment->size; + paddr_t phys; + error_t err; + + AS_ASSERT_OWNED(as); + + for (size_t off = 0; off < size; off += PAGE_SIZE) { + + /* If the address already contained a mapping keep it. */ + if (mmu_is_mapped(segment->start + off)) + continue; + + phys = pmm_allocate(); + if (phys == PMM_INVALID_PAGEFRAME) { + err = E_NOMEM; + goto exit_error; + } + + mmu_map(segment->start + off, phys, flags); + } + + return E_SUCCESS; + +exit_error: + /* Don't free pages, they should be released by vm_free(). */ + return err; +} + struct vm_segment *vm_normal_alloc_at(struct address_space *as, paddr_t phys, size_t size, vm_flags_t flags) { @@ -143,4 +174,5 @@ const struct vm_segment_driver vm_normal = { .vm_free = vm_normal_free, .vm_fault = vm_normal_fault, .vm_resize = vm_normal_resize, + .vm_map = vm_normal_map, }; diff --git a/kernel/sys/process.c b/kernel/sys/process.c index a0b6fb21..d75df4d0 100644 --- a/kernel/sys/process.c +++ b/kernel/sys/process.c @@ -425,7 +425,10 @@ static void thread_free(thread_t *thread) llist_remove(&thread->proc_this); vm_free(&kernel_address_space, thread_get_kernel_stack(thread)); - kfree(thread); + + /* initial thread is statically allocated so we can't kfree() it. */ + if (thread != &kernel_process_initial_thread) + kfree(thread); /* * Release reference this threads holds onto the process.