From 86005faacad3db9f96e3a1d326c4702a3cfb66e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20DUBOIN?= Date: Thu, 25 Dec 2025 22:32:32 +0100 Subject: [PATCH 1/8] i686: cpu: add macros for fields of control registers Remove the previous magic values, and add bit macros with explicit names. This makes reading the code easier, and is more is sync with what's done for device registers. --- include/kernel/arch/i686/cpu.h | 17 +++++++++++++++++ kernel/arch/i686/mmu.c | 4 ++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/include/kernel/arch/i686/cpu.h b/include/kernel/arch/i686/cpu.h index 74912f98..2c40b5da 100644 --- a/include/kernel/arch/i686/cpu.h +++ b/include/kernel/arch/i686/cpu.h @@ -3,9 +3,14 @@ #include +#include #include #include +/* + * Register read/write wrappers. + */ + // Read from a 32-bits register #define READ_REGISTER_OPS(_reg) \ static ALWAYS_INLINE u32 read_##_reg() \ @@ -32,6 +37,18 @@ MAP(WRITE_REGISTER_OPS, CPU_32BIT_REGISTERS) #undef WRITE_REGISTER_OPS #undef READ_REGISTER_OPS +/* + * CPU control registers. + */ + +#define CR0_PG BIT(31) /* Paging enable */ + +#define CR4_PAE BIT(5) /* PAE paging enable */ + +/* + * ASM instruction wrappers. + */ + /* Write a single byte at a given I/O port address. */ static ALWAYS_INLINE void outb(uint16_t port, uint8_t val) { diff --git a/kernel/arch/i686/mmu.c b/kernel/arch/i686/mmu.c index 332b8322..4e16498b 100644 --- a/kernel/arch/i686/mmu.c +++ b/kernel/arch/i686/mmu.c @@ -556,12 +556,12 @@ bool mmu_init(void) // According to 4.3, to activate 32-bit mode paging we must: // 1. set CR4.PAE to 0 (de-activate PAE) u32 cr4 = read_cr4(); - BIT_CLEAR(cr4, 5); // PAE = bit 6 + cr4 &= ~CR4_PAE; write_cr4(cr4); // 2. set CR0.PG to 1 (activate paging) u32 cr0 = read_cr0(); - BIT_SET(cr0, 31); // PG = bit 32 + cr0 |= CR0_PG; write_cr0(cr0); paging_enabled = true; From 8cc8c1cefd69b8635da3e1cde6d70d113e22096f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20DUBOIN?= Date: Sat, 21 Jun 2025 18:37:59 +0200 Subject: [PATCH 2/8] i686: cpu: add CPUID macro We do not check whether the running CPU supports the CPUID instruction, but this should not be the case unless we are running on ancient hardware. --- include/kernel/arch/i686/cpu.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/include/kernel/arch/i686/cpu.h b/include/kernel/arch/i686/cpu.h index 2c40b5da..311536ab 100644 --- a/include/kernel/arch/i686/cpu.h +++ b/include/kernel/arch/i686/cpu.h @@ -126,4 +126,31 @@ static ALWAYS_INLINE void insl(uint16_t port, uint32_t *buffer, size_t size) : "memory"); } +#include /* provided by GCC */ + +#define cpuid(leaf, eax, ebx, ecx, edx) __get_cpuid(leaf, eax, ebx, ecx, edx) + +/* + * Define quick helper functions for CPUID calls that only need to access one + * of the result registers. + */ +#define CPUID_FUNCTION(_reg) \ + static inline uint32_t cpuid_##_reg(uint32_t leaf) \ + { \ + uint32_t eax; \ + uint32_t ebx; \ + uint32_t ecx; \ + uint32_t edx; \ + \ + cpuid(leaf, &eax, &ebx, &ecx, &edx); \ + return _reg; \ + } + +CPUID_FUNCTION(eax) +CPUID_FUNCTION(ebx) +CPUID_FUNCTION(ecx) +CPUID_FUNCTION(edx) + +#undef CPUID_FUNCTION + #endif /* KERNEL_I686_UTILS_CPU_OPS_H */ From 2459f1170c80a259111d5c62d85b6e82738041aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20DUBOIN?= Date: Sat, 21 Jun 2025 18:40:35 +0200 Subject: [PATCH 3/8] i686: cpu: add MSR read/write instruction --- include/kernel/arch/i686/cpu.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/include/kernel/arch/i686/cpu.h b/include/kernel/arch/i686/cpu.h index 311536ab..f241dc51 100644 --- a/include/kernel/arch/i686/cpu.h +++ b/include/kernel/arch/i686/cpu.h @@ -153,4 +153,22 @@ CPUID_FUNCTION(edx) #undef CPUID_FUNCTION + +/* Read from specific register */ +static inline uint64_t rdmsr(uint32_t msr) +{ + uint32_t eax; + uint32_t edx; + ASM("rdmsr" : "=a"(eax), "=d"(edx) : "c"(msr)); + return (((uint64_t)edx) << 32) | eax; +} + +/* Write into model specific register */ +static inline void wrmsr(uint32_t msr, uint64_t val) +{ + uint32_t eax = val; + uint32_t edx = val >> 32; + ASM("wrmsr" : : "a"(eax), "d"(edx), "c"(msr)); +} + #endif /* KERNEL_I686_UTILS_CPU_OPS_H */ From eac690ce2d8ead87e13cc15b91e84845dff2c762 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20DUBOIN?= Date: Fri, 26 Dec 2025 01:11:21 +0100 Subject: [PATCH 4/8] i686: cpu: add cpuinfo structure This structure contains information about the system's CPU. It is intended to be used to fetch the CPU's capabilities instead of using and parsing the CPUID instruction each time. We will surely be adding more fields in the future (e.g. processor model). --- include/kernel/arch/i686/cpu.h | 111 +++++++++++++++++++++++++++++++++ kernel/arch/i686/build.mk | 1 + kernel/arch/i686/cpu.c | 109 ++++++++++++++++++++++++++++++++ 3 files changed, 221 insertions(+) create mode 100644 kernel/arch/i686/cpu.c diff --git a/include/kernel/arch/i686/cpu.h b/include/kernel/arch/i686/cpu.h index f241dc51..f70fee29 100644 --- a/include/kernel/arch/i686/cpu.h +++ b/include/kernel/arch/i686/cpu.h @@ -7,6 +7,17 @@ #include #include +#define CPU_CACHE_ALIGN 64 /* 64B L1 cache lines */ + +#define X86_FEATURE_WORDS 2 /* Number of CPUID leaves that contain features. */ + +struct x86_cpuinfo { + const char *vendor; + u32 features[X86_FEATURE_WORDS]; +}; + +extern struct x86_cpuinfo cpuinfo; + /* * Register read/write wrappers. */ @@ -153,6 +164,106 @@ CPUID_FUNCTION(edx) #undef CPUID_FUNCTION +#define CPUID_LEAF_GETVENDOR 0 +#define CPUID_LEAF_GETFEATURES 1 +#define CPUID_LEAF_GETFEATURES_EXT 7 + +/* Vendor codes used by popular hypervisors. */ +#define signature_QEMU_ebx 0x47435443 // [TCGT]CGTCGTCG +#define signature_KVM_ebx 0x4D564B20 // [ KVM]KVMKVM +#define signature_VMWARE_ebx 0x61774D56 // [VMwa]reVMware +#define signature_VIRTUALBOX_ebx 0x786F4256 // [VBox]VBoxVBox +#define signature_XEN_ebx 0x566E6558 // [XenV]MMXenVMM +#define signature_HYPERV_ebx 0x7263694D // [Micr]osoft Hv +#define signature_PARALLELS_ebx 0x6C727020 // [ prl] hyperv +#define signature_PARALLELS_ALT_ebx 0x6570726C // [lrpe]pyh vr +#define signature_BHYVE_ebx 0x76796862 // [bhyv]e bhyve +#define signature_QNX_ebx 0x20584E51 // [ QNX]QVMBSQG + +#define X86_FEATURES(F) \ + \ + /* Features in %ecx for leaf 1 */ \ + F(SSE3, 0, 0), \ + F(PCLMUL, 0, 1), \ + F(DTES64, 0, 2), \ + F(MONITOR, 0, 3), \ + F(DSCPL, 0, 4), \ + F(VMX, 0, 5), \ + F(SMX, 0, 6), \ + F(EIST, 0, 7), \ + F(TM2, 0, 8), \ + F(SSSE3, 0, 9), \ + F(CNXTID, 0, 10), \ + F(FMA, 0, 12), \ + F(CMPXCHG16B, 0, 13), \ + F(xTPR, 0, 14), \ + F(PDCM, 0, 15), \ + F(PCID, 0, 17), \ + F(DCA, 0, 18), \ + F(SSE41, 0, 19), \ + F(SSE42, 0, 20), \ + F(x2APIC, 0, 21), \ + F(MOVBE, 0, 22), \ + F(POPCNT, 0, 23), \ + F(TSCDeadline, 0, 24), \ + F(AES, 0, 25), \ + F(XSAVE, 0, 26), \ + F(OSXSAVE, 0, 27), \ + F(AVX, 0, 28), \ + F(F16C, 0, 29), \ + F(RDRND, 0, 30), \ + \ + /* Features in %edx for leaf 1 */ \ + F(FPU, 1, 0), \ + F(VME, 1, 1), \ + F(DE, 1, 2), \ + F(PSE, 1, 3), \ + F(TSC, 1, 4), \ + F(MSR, 1, 5), \ + F(PAE, 1, 6), \ + F(MCE, 1, 7), \ + F(CMPXCHG8B, 1, 8), \ + F(APIC, 1, 9), \ + F(SEP, 1, 11), \ + F(MTRR, 1, 12), \ + F(PGE, 1, 13), \ + F(MCA, 1, 14), \ + F(CMOV, 1, 15), \ + F(PAT, 1, 16), \ + F(PSE36, 1, 17), \ + F(PSN, 1, 18), \ + F(CLFSH, 1, 19), \ + F(DS, 1, 21), \ + F(ACPI, 1, 22), \ + F(MMX, 1, 23), \ + F(FXSAVE, 1, 24), \ + F(SSE, 1, 25), \ + F(SSE2, 1, 26), \ + F(SS, 1, 27), \ + F(HTT, 1, 28), \ + F(TM, 1, 29), \ + F(PBE, 1, 31), \ + +#define X86_FEATURE_NAME(_feature) X86_FEATURE_##_feature +#define X86_FEATURE_VAL(_word, _bit) ((_word << X86_FEATURE_WORD_OFF) | (_bit & 0xff)) +#define X86_FEATURE_WORD_OFF 8 + +enum x86_cpu_feature { +#define DEFINE_X86_FEATURE(_name, _word, _bit) \ + X86_FEATURE_NAME(_name) = X86_FEATURE_VAL(_word, _bit) +X86_FEATURES(DEFINE_X86_FEATURE) +#undef DEFINE_X86_FEATURE +}; + +static inline bool cpu_test_feature(enum x86_cpu_feature feature) +{ + int leaf = (feature >> X86_FEATURE_WORD_OFF); + int bit = feature & (BIT(X86_FEATURE_WORD_OFF) - 1); + + return BIT_READ(cpuinfo.features[leaf], bit); +} + +#define cpu_has_feature(_feature) cpu_test_feature(X86_FEATURE_NAME(_feature)) /* Read from specific register */ static inline uint64_t rdmsr(uint32_t msr) diff --git a/kernel/arch/i686/build.mk b/kernel/arch/i686/build.mk index 7ee0f3db..b25ab07b 100644 --- a/kernel/arch/i686/build.mk +++ b/kernel/arch/i686/build.mk @@ -5,6 +5,7 @@ QEMU := qemu-system-i386 KERNEL_ARCH_SRCS := \ crt0.S \ + cpu.c \ gdt.S \ gdt.c \ interrupts.c \ diff --git a/kernel/arch/i686/cpu.c b/kernel/arch/i686/cpu.c new file mode 100644 index 00000000..10be31f2 --- /dev/null +++ b/kernel/arch/i686/cpu.c @@ -0,0 +1,109 @@ +#define LOG_DOMAIN "i686" + +#include +#include +#include + +#include + +struct x86_cpuinfo cpuinfo; + +struct x86_cpu_vendor { + const char *vendor; + u32 ebx; + u32 ecx; + u32 edx; +}; + +#define CPU_VENDOR(_vendor, _name) \ + { \ + .vendor = _name, \ + .ebx = signature_##_vendor##_ebx, \ + .ecx = signature_##_vendor##_ecx, \ + .edx = signature_##_vendor##_edx, \ + } + +#define CPU_VENDOR_HV(_vendor, _name) \ + { \ + .vendor = _name, \ + .ebx = signature_##_vendor##_ebx, \ + } + +static struct x86_cpu_vendor cpu_vendors[] = { + CPU_VENDOR(AMD, "AMD"), + CPU_VENDOR(INTEL, "Intel"), + CPU_VENDOR_HV(KVM, "KVM"), + CPU_VENDOR_HV(VMWARE, "VMWare"), + CPU_VENDOR_HV(VIRTUALBOX, "VirtualBox"), + CPU_VENDOR_HV(XEN, "Xen"), + CPU_VENDOR_HV(HYPERV, "Microsoft Hypervisor"), +}; + +static const char *feature_name[32 * X86_FEATURE_WORDS] = { +#define X86_FEATURE_STRING(_name, _word, _bit) [_word * 32 + _bit] = stringify(_name) + X86_FEATURES(X86_FEATURE_STRING) +#undef X86_FEATURE_STRING +}; + +/* + * + */ +static void cpu_dump_info(enum log_level level, const struct x86_cpuinfo *cpu) +{ + log(level, LOG_DOMAIN, "CPU Information"); + log(level, LOG_DOMAIN, "Vendor: %s", cpu->vendor); + + log(level, LOG_DOMAIN, "Features: "); + for (int leaf = 0; leaf < X86_FEATURE_WORDS; ++leaf) { + for (int bit = 0; bit < 32; ++bit) { + if (cpu_test_feature(X86_FEATURE_VAL(leaf, bit))) { + if (feature_name[leaf * 32 + bit]) + printk("%s ", feature_name[leaf * 32 + bit]); + } + } + } + printk("\n"); +} + +/* + * + */ +static void cpu_init_info(struct x86_cpuinfo *cpu) +{ + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + + cpu->vendor = "unknown"; + + /* Find vendor information */ + cpuid(CPUID_LEAF_GETVENDOR, &eax, &ebx, &ecx, &edx); + for (size_t i = 0; i < ARRAY_SIZE(cpu_vendors); ++i) { + if (cpu_vendors[i].ebx != ebx) + continue; + if (cpu_vendors[i].ecx && cpu_vendors[i].ecx != ecx) + continue; + if (cpu_vendors[i].edx && cpu_vendors[i].edx != edx) + continue; + cpu->vendor = cpu_vendors[i].vendor; + break; + } + + cpu->features[0] = cpuid_ecx(CPUID_LEAF_GETFEATURES); + cpu->features[1] = cpuid_edx(CPUID_LEAF_GETFEATURES); + + cpu_dump_info(LOG_LEVEL_INFO, cpu); +} + +/* + * Initialize the CPU and configure it in a known state. + */ +error_t cpu_init(void) +{ + cpu_init_info(&cpuinfo); + + return E_SUCCESS; +} + +DECLARE_INITCALL(INIT_BOOTSTRAP, cpu_init); From 7ed20709bc0759b116d7341655ca33d45a74526a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20DUBOIN?= Date: Fri, 26 Dec 2025 13:03:32 +0100 Subject: [PATCH 5/8] i686: mmu: refactor mmu intialization Split the mmu_init() function into distinct parts using sub-functions to make it more readable. This will also help when having to initialize the PAT table in the next commits. --- kernel/arch/i686/mmu.c | 68 +++++++++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 28 deletions(-) diff --git a/kernel/arch/i686/mmu.c b/kernel/arch/i686/mmu.c index 4e16498b..1037b3db 100644 --- a/kernel/arch/i686/mmu.c +++ b/kernel/arch/i686/mmu.c @@ -508,23 +508,11 @@ error_t mmu_copy_on_write(vaddr_t addr) return ret; } -bool mmu_init(void) +/* + * Initialize the content of the page directory. + */ +static void mmu_init_page_directory(paddr_t page_directory) { - paddr_t page_directory; - paddr_t page_table; - - if (paging_enabled) { - log_warn("Trying to re-enable paging. Skipping."); - return false; - } - - interrupts_set_handler(PAGE_FAULT, INTERRUPT_HANDLER(page_fault), NULL); - - page_directory = KERNEL_HIGHER_HALF_PHYSICAL(kernel_startup_page_directory); - - // Initialize the kernel's page directory - kernel_address_space.mmu = page_directory; - // Mark all PDEs as "absent" (present = 0), and writable for (size_t entry = 0; entry < MMU_PDE_COUNT; entry++) { kernel_startup_page_directory[entry] = (mmu_pde_t){ @@ -550,25 +538,49 @@ bool mmu_init(void) // (soon hopefully) mmu_offset_map(0, KERNEL_HIGHER_HALF_PHYSICAL(KERNEL_CODE_END), KERNEL_HIGHER_HALF_OFFSET, PROT_EXEC | PROT_READ); +} + +bool mmu_init(void) +{ + paddr_t page_directory; + paddr_t page_table; + u32 val; + + if (paging_enabled) { + log_warn("Trying to re-enable paging. Skipping."); + return false; + } + + interrupts_set_handler(PAGE_FAULT, INTERRUPT_HANDLER(page_fault), NULL); + + page_directory = KERNEL_HIGHER_HALF_PHYSICAL(kernel_startup_page_directory); + kernel_address_space.mmu = page_directory; + + mmu_init_page_directory(page_directory); + + /* + * Enable 32b mode paging. + */ mmu_load(page_directory); - // According to 4.3, to activate 32-bit mode paging we must: - // 1. set CR4.PAE to 0 (de-activate PAE) - u32 cr4 = read_cr4(); - cr4 &= ~CR4_PAE; - write_cr4(cr4); + val = read_cr4(); + val &= ~CR4_PAE; + write_cr4(val); - // 2. set CR0.PG to 1 (activate paging) - u32 cr0 = read_cr0(); - cr0 |= CR0_PG; - write_cr0(cr0); + val = read_cr0(); + val |= CR0_PG; + write_cr0(val); paging_enabled = true; - // Pre-allocate all shared kernel page table entries - // We NEED to allocate them now for them to be present inside the IDLE - // task's page table. + /* + * Pre-allocate all shared kernel page table entries. + * + * We MUST allocate them now for them to be present inside the IDLE + * task's page table. + */ + for (size_t i = MMU_PDE_KERNEL_FIRST; i < MMU_PDE_COUNT - 1; i++) { if (kernel_startup_page_directory[i].present) continue; From 6028326f83cc96e719e97b9cc52146de5e57247a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20DUBOIN?= Date: Fri, 26 Dec 2025 13:05:18 +0100 Subject: [PATCH 6/8] i686: mmu: add caching policy configuration Reference: Intel SDM 2023 volume 3 chapter 12 (Memory cache control) --- include/kernel/arch/i686/cpu.h | 6 + include/kernel/mmu.h | 36 +++++- kernel/arch/i686/cpu.c | 20 ++++ kernel/arch/i686/mmu.c | 199 ++++++++++++++++++++++++++++----- 4 files changed, 233 insertions(+), 28 deletions(-) diff --git a/include/kernel/arch/i686/cpu.h b/include/kernel/arch/i686/cpu.h index f70fee29..37b391a6 100644 --- a/include/kernel/arch/i686/cpu.h +++ b/include/kernel/arch/i686/cpu.h @@ -53,6 +53,8 @@ MAP(WRITE_REGISTER_OPS, CPU_32BIT_REGISTERS) */ #define CR0_PG BIT(31) /* Paging enable */ +#define CR0_CD BIT(30) /* Cache disable */ +#define CR0_NW BIT(29) /* Not write-through */ #define CR4_PAE BIT(5) /* PAE paging enable */ @@ -265,6 +267,10 @@ static inline bool cpu_test_feature(enum x86_cpu_feature feature) #define cpu_has_feature(_feature) cpu_test_feature(X86_FEATURE_NAME(_feature)) +enum x86_msr { + MSR_PAT = 0x277, +}; + /* Read from specific register */ static inline uint64_t rdmsr(uint32_t msr) { diff --git a/include/kernel/mmu.h b/include/kernel/mmu.h index e2d64c5a..fa0db4ac 100644 --- a/include/kernel/mmu.h +++ b/include/kernel/mmu.h @@ -29,6 +29,8 @@ #include #include +#include + #include /** @@ -44,6 +46,16 @@ typedef enum mmu_prot { PROT_KERNEL = 0x8, /*!< Pages should be accessible only from the kernel */ } mmu_prot; +/** @enum mmu_caching_policy + * @brief Caching policies. + */ +typedef enum mmu_caching_policy { + POLICY_UC = BIT(6), /*!< Uncachealbe memory. */ + POLICY_WC = BIT(7), /*!< Write-combining memory. */ + POLICY_WT = BIT(8), /*!< Write-through memory. */ + POLICY_WB = BIT(9), /*!< Write-back memory. */ +} mmu_policy_t; + /** Initialize the MMU's paging system * * This function is responsible for setting any required bit inside the CPU's @@ -87,7 +99,7 @@ void mmu_load(paddr_t mmu); * @param virt The virtual address * @param physical Its physical equivalent * @param prot Protection rule in use for this page. - * A combination of @ref mmu_prot flags. + * A combination of @ref mmu_prot and @ref mmu_caching_policy flags. * * @return False if the address was already mapped before */ @@ -100,7 +112,7 @@ bool mmu_map(vaddr_t virt, paddr_t physical, int prot); * @param physical Its physical equivalent * @param size The size of the region to map * @param prot Protection rule in use for this page. - * A combination of @ref mmu_prot flags. + * A combination of @ref mmu_prot and @ref mmu_caching_policy flags. * * @return False if the address was already mapped before */ @@ -136,7 +148,7 @@ void mmu_unmap_range(vaddr_t start, vaddr_t end); * @param start the starting page of the address range * @param end the ending address of the address range * @param prot Protection rule in use for this page. - * A combination of @ref mmu_prot flags. + * A combination of @ref mmu_prot and @ref mmu_caching_policy flags. */ void mmu_identity_map(paddr_t start, paddr_t end, int prot); @@ -151,4 +163,22 @@ static inline bool mmu_is_mapped(vaddr_t addr) return !IS_ERR(mmu_find_physical(addr)); } +/** Configure the caching policy in effect when accessing a page. + * + * @param vaddr The page's virtual address + * @param policy Caching policy applied to this page. + * A combination of @ref mmu_caching_policy flags. + */ +error_t mmu_set_policy(vaddr_t, mmu_policy_t policy); + +/** Configure the caching policy in effect when accessing a range of pages. + * + * @param start The virtual address of the first page in the address range + * @param size The size of the address range + * @param policy Caching policy applied to this page. + * A combination of @ref mmu_caching_policy flags. + */ +error_t mmu_set_policy_range(vaddr_t range_start, size_t range_size, + mmu_policy_t policy); + #endif /* KERNEL_MMU_H */ diff --git a/kernel/arch/i686/cpu.c b/kernel/arch/i686/cpu.c index 10be31f2..953141ad 100644 --- a/kernel/arch/i686/cpu.c +++ b/kernel/arch/i686/cpu.c @@ -8,6 +8,25 @@ struct x86_cpuinfo cpuinfo; +/* + * + */ +static void cpu_init_caches(void) +{ + u32 val; + + /* + * Enable caching globally. + * + * Caching policies can still be selectively configured using page table + * entries or MTRR registers. + */ + val = read_cr0(); + val &= ~CR0_CD; + val &= ~CR0_NW; + write_cr0(val); +} + struct x86_cpu_vendor { const char *vendor; u32 ebx; @@ -102,6 +121,7 @@ static void cpu_init_info(struct x86_cpuinfo *cpu) error_t cpu_init(void) { cpu_init_info(&cpuinfo); + cpu_init_caches(); return E_SUCCESS; } diff --git a/kernel/arch/i686/mmu.c b/kernel/arch/i686/mmu.c index 1037b3db..5d954625 100644 --- a/kernel/arch/i686/mmu.c +++ b/kernel/arch/i686/mmu.c @@ -190,7 +190,7 @@ void mmu_load(paddr_t page_directory) * [0x0000; 0x00FF] to the virtual range [0xFF00; 0xFFFF] */ static void -mmu_offset_map(paddr_t start, paddr_t end, int64_t offset, int prot); +mmu_offset_map(paddr_t start, paddr_t end, int64_t offset, int flags); /** @brief Inititialize a new page directory * @return The physical address of the new page_directory, 0 if error. @@ -226,9 +226,110 @@ void mmu_destroy(paddr_t mmu) pmm_free(mmu); } -// TODO: We do not have a way to quickly map and access an arbitrary physical address. -// This prevents us from cloning an arbitrary MMU instance. This is the reason why -// this function currently only takes in the destination MMU as a parameter. +/* + * + */ +static inline mmu_pde_t *mmu_get_active_page_directory(void) +{ + if (unlikely(!paging_enabled)) + return kernel_startup_page_directory; + + return MMU_RECURSIVE_PAGE_DIRECTORY_ADDRESS; +} + +/* + * Configure the caching policy on a page level. The caller must invalidate + * the address' TLB entry after calling this function. + */ +static error_t +__mmu_set_policy(mmu_pde_t *page_directory, vaddr_t vaddr, int policy) +{ + mmu_decode_t address = {.raw = vaddr}; + mmu_pte_t *pte; + bool pat = false; + bool pwt = false; + bool pcd = false; + + /* Sanitize input in case we were called from mmu_map(). */ + policy &= POLICY_UC | POLICY_WT | POLICY_WB | POLICY_WC; + if (!policy) + policy = POLICY_WB; /* Enable caching by default. */ + + if (!page_directory[address.pde].present) + return E_NOENT; + + pte = &MMU_RECURSIVE_PAGE_TABLE_ADDRESS(address.pde)[address.pte]; + switch (policy) { + case POLICY_WB: + break; + case POLICY_WT: + pwt = true; + break; + case POLICY_UC: + pcd = true; + break; + case POLICY_WC: + pat = true; + break; + + default: + WARN("invalid caching policy: %02x\n", policy); + return E_INVAL; + } + + if (pat && !cpu_has_feature(PAT)) { + log_warn("unsupported policy: %02x (requires PAT support)", policy); + return E_NOT_SUPPORTED; + } + + pte->pat = pat; + pte->pcd = pcd; + pte->pwt = pwt; + + return E_SUCCESS; +} + +/* + * + */ +error_t mmu_set_policy(vaddr_t vaddr, mmu_policy_t policy) +{ + mmu_pde_t *page_directory; + error_t err; + + page_directory = mmu_get_active_page_directory(); + err = __mmu_set_policy(page_directory, vaddr, policy); + if (err) + return err; + + mmu_flush_tlb(vaddr); + return E_SUCCESS; +} + +/* + * + */ +error_t mmu_set_policy_range(vaddr_t range_start, size_t range_size, + mmu_policy_t policy) +{ + error_t ret = E_SUCCESS; + + range_size = align_down(range_size, PAGE_SIZE); + for (size_t off = 0; off < range_size; off += PAGE_SIZE) { + /* Keep going when an error happens but return the first error code. */ + error_t err = mmu_set_policy(range_start + off, policy); + if (err && !ret) + ret = err; + } + + return ret; +} + +// TODO: We do not have a way to quickly map and access an arbitrary physical +// address. +// This prevents us from cloning an arbitrary MMU instance. This is the +// reason why this function currently only takes in the destination MMU as +// a parameter. void mmu_clone(paddr_t destination) { page_directory_t src_page_directory; @@ -269,24 +370,15 @@ void mmu_clone(paddr_t destination) vm_free(&kernel_address_space, dst_page_directory); } -bool mmu_map(vaddr_t virtual, paddr_t pageframe, int prot) +bool mmu_map(vaddr_t virtual, paddr_t pageframe, int flags) { mmu_decode_t address = {.raw = virtual}; - - if (virtual % PAGE_SIZE) - return false; - - // TODO: We hardcode the pde/pte to be un-accessible when in user mode. - // This will also cause an issue when reaching userspace later. - - page_directory_t page_directory; + page_directory_t page_directory = mmu_get_active_page_directory(); page_table_t page_table; bool new_page_table = false; - if (paging_enabled) - page_directory = MMU_RECURSIVE_PAGE_DIRECTORY_ADDRESS; - else - page_directory = kernel_startup_page_directory; + if (virtual % PAGE_SIZE) + return false; if (!page_directory[address.pde].present) { u32 page_table = pmm_allocate(); @@ -321,14 +413,17 @@ bool mmu_map(vaddr_t virtual, paddr_t pageframe, int prot) page_table[address.pte] = (mmu_pte_t){ .present = 1, .page_frame = TO_PFN(pageframe), - .writable = boolean(prot & PROT_WRITE), - .user = !(prot & PROT_KERNEL), + .writable = boolean(flags & PROT_WRITE), + .user = !(flags & PROT_KERNEL), }; + /* No need to flush since the entry has not been cached yet. */ + __mmu_set_policy(page_directory, virtual, flags); + return true; } -bool mmu_map_range(vaddr_t virtual, paddr_t physical, size_t size, int prot) +bool mmu_map_range(vaddr_t virtual, paddr_t physical, size_t size, int flags) { size_t range; @@ -338,7 +433,7 @@ bool mmu_map_range(vaddr_t virtual, paddr_t physical, size_t size, int prot) } for (range = 0; range < size; range += PAGE_SIZE) { - if (!mmu_map(virtual + range, physical + range, prot)) + if (!mmu_map(virtual + range, physical + range, flags)) break; } @@ -412,16 +507,17 @@ void mmu_unmap_range(vaddr_t start, vaddr_t end) mmu_unmap(start); } -static void mmu_offset_map(paddr_t start, paddr_t end, int64_t offset, int prot) +static void +mmu_offset_map(paddr_t start, paddr_t end, int64_t offset, int flags) { for (; start < end; start += PAGE_SIZE) { - mmu_map(start + offset, start, prot); + mmu_map(start + offset, start, flags); } } -void mmu_identity_map(paddr_t start, paddr_t end, int prot) +void mmu_identity_map(paddr_t start, paddr_t end, int flags) { - mmu_offset_map(start, end, 0, prot); + mmu_offset_map(start, end, 0, flags); } paddr_t mmu_find_physical(vaddr_t virtual) @@ -508,6 +604,55 @@ error_t mmu_copy_on_write(vaddr_t addr) return ret; } +/* Memory types used to configure the MTRR and PAT tables. */ +enum memory_type { + MEM_UC = 0x00, + MEM_WC = 0x01, + MEM_WT = 0x04, + MEM_WP = 0x05, + MEM_WB = 0x06, + MEM_UC_MINUS = 0x07, /* Valid only for the PAT table. */ +}; + +/* + * Fill the page attribute table. + */ +static void mmu_init_pat(void) +{ + u64 pat = 0; + + if (!cpu_has_feature(PAT)) { + log_info("PAT not present on this platform"); + return; + } + +#define PAT(n, val) (((u64)val & 0xff) << (n * 8)) + + /* Configure a PAT entry for each caching related bit inside a PTE. */ + pat |= PAT(0, MEM_WB); + pat |= PAT(1, MEM_WT); + pat |= PAT(2, MEM_UC); + pat |= PAT(4, MEM_WC); + + wrmsr(MSR_PAT, pat); + +#undef PATn +} + +/* + * + */ +static void mmu_init_mtrr(void) +{ + if (!cpu_has_feature(MTRR)) { + log_info("MTRR not present this platform"); + return; + } + + /* TODO add support for MTRRs. */ + not_implemented("MTRR"); +} + /* * Initialize the content of the page directory. */ @@ -553,6 +698,10 @@ bool mmu_init(void) interrupts_set_handler(PAGE_FAULT, INTERRUPT_HANDLER(page_fault), NULL); + /* Initialize caching structures. */ + mmu_init_pat(); + mmu_init_mtrr(); + page_directory = KERNEL_HIGHER_HALF_PHYSICAL(kernel_startup_page_directory); kernel_address_space.mmu = page_directory; From 4867e1041a07d11f81545c1b55d0a035c2b130d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20DUBOIN?= Date: Fri, 26 Dec 2025 16:11:50 +0100 Subject: [PATCH 7/8] memory/vm: add cache policy configuration flags The caching policy used for a vm_segment is now explicitely configured inside the segment's flag. The chosen policy is forwarded to the MMU API when mapping an address inside the segment. This commit also adds the vm_set_policy() function that can be called to dynamically re-configure these policies for an already existing segment. When no policies are specified the API defaults back to using write-back caching (regular caching). --- include/kernel/vm.h | 15 +++++++ kernel/memory/address_space.c | 78 +++++++++++++++++++++++++++++++++++ kernel/memory/vm_normal.c | 17 +++++++- 3 files changed, 108 insertions(+), 2 deletions(-) diff --git a/include/kernel/vm.h b/include/kernel/vm.h index 0999329c..0b5b8506 100644 --- a/include/kernel/vm.h +++ b/include/kernel/vm.h @@ -62,6 +62,12 @@ typedef enum vm_flags { VM_KERNEL = BIT(3), /*!< Pages should only be accessible from kernel */ VM_CLEAR = BIT(4), /*!< Page content should be reset when allocating */ VM_FIXED = BIT(5), /*!< Start address in vm_alloc_at() is not a hint */ + + /* Caching policies. */ + VM_CACHE_UC = BIT(6), /*!< Uncacheable. */ + VM_CACHE_WC = BIT(7), /*!< Write-combining. */ + VM_CACHE_WT = BIT(8), /*!< Write-through. */ + VM_CACHE_WB = BIT(9), /*!< Write-back (default). */ } vm_flags_t; #define VM_KERNEL_RO (VM_KERNEL | VM_READ) @@ -72,6 +78,8 @@ typedef enum vm_flags { #define VM_USER_WO (VM_WRITE) #define VM_USER_RW (VM_READ | VM_WRITE) +#define VM_CACHE_MASK (VM_CACHE_UC | VM_CACHE_WT | VM_CACHE_WB | VM_CACHE_WC) + /** Segment driver * * There exists different types of memory segments. A segment driver defines @@ -136,6 +144,10 @@ struct vm_segment_driver { /** Map this segment onto a physical address. */ error_t (*vm_map)(struct address_space *, struct vm_segment *, vm_flags_t); + + /** Configure the effective caching policy for a segment. */ + error_t (*vm_set_policy)(struct address_space *, struct vm_segment *, + vm_flags_t policy); }; /** Kernel-only address-space. @@ -233,4 +245,7 @@ struct vm_segment *vm_find(const struct address_space *, void *); */ error_t vm_map(struct address_space *, void *); +/** Change the effective caching policy for address inside a segment. */ +error_t vm_set_policy(struct address_space *, void *, vm_flags_t policy); + #endif /* KERNEL_VM_H */ diff --git a/kernel/memory/address_space.c b/kernel/memory/address_space.c index 7f3ace84..facb5674 100644 --- a/kernel/memory/address_space.c +++ b/kernel/memory/address_space.c @@ -24,6 +24,10 @@ static_assert((int)VM_EXEC == (int)PROT_EXEC); static_assert((int)VM_READ == (int)PROT_READ); static_assert((int)VM_WRITE == (int)PROT_WRITE); static_assert((int)VM_KERNEL == (int)PROT_KERNEL); +static_assert((int)VM_CACHE_UC == (int)POLICY_UC); +static_assert((int)VM_CACHE_WB == (int)POLICY_WB); +static_assert((int)VM_CACHE_WT == (int)POLICY_WT); +static_assert((int)VM_CACHE_WC == (int)POLICY_WC); static DECLARE_LLIST(kernel_segments); @@ -274,6 +278,33 @@ error_t address_space_fault(struct address_space *as, void *addr, bool is_cow) return segment->driver->vm_fault(as, segment); } +/* + * Check whether the provided flags combination is correct. + */ +static inline bool +vm_flags_validate(struct address_space *as, vm_flags_t *flags) +{ + UNUSED(as); + + switch (*flags & VM_CACHE_MASK) { + case 0: + /* Default caching policy: write-back. */ + *flags |= VM_CACHE_WB; + break; + + case VM_CACHE_UC: + case VM_CACHE_WC: + case VM_CACHE_WT: + case VM_CACHE_WB: + break; + + default: + return false; + } + + return true; +} + void *vm_alloc_start(struct address_space *as, void *addr, size_t size, vm_flags_t flags) { @@ -283,6 +314,9 @@ void *vm_alloc_start(struct address_space *as, void *addr, size_t size, if (size % PAGE_SIZE) return NULL; + if (!vm_flags_validate(as, &flags)) + return NULL; + driver = vm_find_driver(flags); if (!driver) return NULL; @@ -317,6 +351,9 @@ void *vm_alloc_at(struct address_space *as, paddr_t phys, size_t size, if (phys % PAGE_SIZE) return NULL; + if (!vm_flags_validate(as, &flags)) + return NULL; + driver = vm_find_driver(flags); if (!driver) return NULL; @@ -353,6 +390,11 @@ void vm_free(struct address_space *as, void *addr) return; } + /* NOTE: We should not be freeing the whole segment at once. We may want + * to free only part of a segment. This means treating each memory + * segment as a single big 'object', and could also cause issues + * when implementing VMA merging later. + */ vm_segment_remove(as, segment); segment->driver->vm_free(as, segment); } @@ -486,3 +528,39 @@ void *sys_sbrk(intptr_t increment) return vm_brk(current->process->as, current->process->as->brk_end + increment); } + +/* + * + */ +error_t vm_set_policy(struct address_space *as, void *addr, vm_flags_t policy) +{ + struct vm_segment *segment; + error_t ret; + + AS_ASSERT_OWNED(as); + + policy &= VM_CACHE_MASK; + if (!vm_flags_validate(as, &policy)) + return E_INVAL; + + locked_scope (&as->lock) { + segment = vm_find(as, addr); + if (!segment) + return E_NOENT; + + /* NOTE: We should be adding a size parameter if we ever want to change + * the policy for part of a segment only. + */ + ret = segment->driver->vm_set_policy(as, segment, policy); + if (ret) { + log_warn("failed to set caching policy for [%p-%p]: %pe", + (void *)segment->start, (void *)segment_end(segment), + &ret); + } + + segment->flags &= ~VM_CACHE_MASK; + segment->flags |= policy; + } + + return E_SUCCESS; +} diff --git a/kernel/memory/vm_normal.c b/kernel/memory/vm_normal.c index 96059653..cc645608 100644 --- a/kernel/memory/vm_normal.c +++ b/kernel/memory/vm_normal.c @@ -7,8 +7,9 @@ #include -struct vm_segment *vm_normal_alloc(struct address_space *as, vaddr_t addr, - size_t size, vm_flags_t flags) +static struct vm_segment *vm_normal_alloc(struct address_space *as, + vaddr_t addr, size_t size, + vm_flags_t flags) { return vmm_allocate(as->vmm, addr, size, flags); } @@ -168,6 +169,17 @@ static error_t vm_normal_resize(struct address_space *as, return E_SUCCESS; } +/* + * + */ +static error_t vm_normal_set_policy(struct address_space *as, + struct vm_segment *segment, + vm_flags_t policy) +{ + AS_ASSERT_OWNED(as); + return mmu_set_policy_range(segment->start, segment->size, (int)policy); +} + const struct vm_segment_driver vm_normal = { .vm_alloc = vm_normal_alloc, .vm_alloc_at = vm_normal_alloc_at, @@ -175,4 +187,5 @@ const struct vm_segment_driver vm_normal = { .vm_fault = vm_normal_fault, .vm_resize = vm_normal_resize, .vm_map = vm_normal_map, + .vm_set_policy = vm_normal_set_policy, }; From 50bb8bdab95e1640b9b20c418a4d2eb6223cb3d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20DUBOIN?= Date: Fri, 26 Dec 2025 17:34:23 +0100 Subject: [PATCH 8/8] memory: configure memory mapped I/O registers as uncacheable Accesses to memory-mapped I/O often have side-effects and should not be combined or delayed. Add a new VM_IOMEM flag that should be used with memory-mapped registers. --- include/kernel/vm.h | 6 ++++++ kernel/devices/pci.c | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/kernel/vm.h b/include/kernel/vm.h index 0b5b8506..efa51fbf 100644 --- a/include/kernel/vm.h +++ b/include/kernel/vm.h @@ -78,6 +78,12 @@ typedef enum vm_flags { #define VM_USER_WO (VM_WRITE) #define VM_USER_RW (VM_READ | VM_WRITE) +/* + * Memory-mapped I/O should be mapped as uncacheable since writes/read can + * have side-effects and should not be combined or delayed. + */ +#define VM_IOMEM (VM_KERNEL_RW | VM_CACHE_UC) + #define VM_CACHE_MASK (VM_CACHE_UC | VM_CACHE_WT | VM_CACHE_WB | VM_CACHE_WC) /** Segment driver diff --git a/kernel/devices/pci.c b/kernel/devices/pci.c index 3f9d9229..18f18e2f 100644 --- a/kernel/devices/pci.c +++ b/kernel/devices/pci.c @@ -190,7 +190,7 @@ static void pci_device_setup_bars(struct pci_device *device) device->bars[i].phys = bar & PCI_BAR_MEMORY_ADDRESS_MASK; device->bars[i].data = vm_alloc_at( &kernel_address_space, device->bars[i].phys, - align_up(size, PAGE_SIZE), VM_READ | VM_WRITE); + align_up(size, PAGE_SIZE), VM_IOMEM); if (IS_ERR(device->bars[i].data)) log_warn("failed to allocate bar[%d]: %d", i, ERR_FROM_PTR(device->bars[i].data));