diff --git a/include/kernel/arch/i686/cpu.h b/include/kernel/arch/i686/cpu.h index 74912f98..37b391a6 100644 --- a/include/kernel/arch/i686/cpu.h +++ b/include/kernel/arch/i686/cpu.h @@ -3,9 +3,25 @@ #include +#include #include #include +#define CPU_CACHE_ALIGN 64 /* 64B L1 cache lines */ + +#define X86_FEATURE_WORDS 2 /* Number of CPUID leaves that contain features. */ + +struct x86_cpuinfo { + const char *vendor; + u32 features[X86_FEATURE_WORDS]; +}; + +extern struct x86_cpuinfo cpuinfo; + +/* + * Register read/write wrappers. + */ + // Read from a 32-bits register #define READ_REGISTER_OPS(_reg) \ static ALWAYS_INLINE u32 read_##_reg() \ @@ -32,6 +48,20 @@ MAP(WRITE_REGISTER_OPS, CPU_32BIT_REGISTERS) #undef WRITE_REGISTER_OPS #undef READ_REGISTER_OPS +/* + * CPU control registers. + */ + +#define CR0_PG BIT(31) /* Paging enable */ +#define CR0_CD BIT(30) /* Cache disable */ +#define CR0_NW BIT(29) /* Not write-through */ + +#define CR4_PAE BIT(5) /* PAE paging enable */ + +/* + * ASM instruction wrappers. + */ + /* Write a single byte at a given I/O port address. */ static ALWAYS_INLINE void outb(uint16_t port, uint8_t val) { @@ -109,4 +139,153 @@ static ALWAYS_INLINE void insl(uint16_t port, uint32_t *buffer, size_t size) : "memory"); } +#include /* provided by GCC */ + +#define cpuid(leaf, eax, ebx, ecx, edx) __get_cpuid(leaf, eax, ebx, ecx, edx) + +/* + * Define quick helper functions for CPUID calls that only need to access one + * of the result registers. + */ +#define CPUID_FUNCTION(_reg) \ + static inline uint32_t cpuid_##_reg(uint32_t leaf) \ + { \ + uint32_t eax; \ + uint32_t ebx; \ + uint32_t ecx; \ + uint32_t edx; \ + \ + cpuid(leaf, &eax, &ebx, &ecx, &edx); \ + return _reg; \ + } + +CPUID_FUNCTION(eax) +CPUID_FUNCTION(ebx) +CPUID_FUNCTION(ecx) +CPUID_FUNCTION(edx) + +#undef CPUID_FUNCTION + +#define CPUID_LEAF_GETVENDOR 0 +#define CPUID_LEAF_GETFEATURES 1 +#define CPUID_LEAF_GETFEATURES_EXT 7 + +/* Vendor codes used by popular hypervisors. */ +#define signature_QEMU_ebx 0x47435443 // [TCGT]CGTCGTCG +#define signature_KVM_ebx 0x4D564B20 // [ KVM]KVMKVM +#define signature_VMWARE_ebx 0x61774D56 // [VMwa]reVMware +#define signature_VIRTUALBOX_ebx 0x786F4256 // [VBox]VBoxVBox +#define signature_XEN_ebx 0x566E6558 // [XenV]MMXenVMM +#define signature_HYPERV_ebx 0x7263694D // [Micr]osoft Hv +#define signature_PARALLELS_ebx 0x6C727020 // [ prl] hyperv +#define signature_PARALLELS_ALT_ebx 0x6570726C // [lrpe]pyh vr +#define signature_BHYVE_ebx 0x76796862 // [bhyv]e bhyve +#define signature_QNX_ebx 0x20584E51 // [ QNX]QVMBSQG + +#define X86_FEATURES(F) \ + \ + /* Features in %ecx for leaf 1 */ \ + F(SSE3, 0, 0), \ + F(PCLMUL, 0, 1), \ + F(DTES64, 0, 2), \ + F(MONITOR, 0, 3), \ + F(DSCPL, 0, 4), \ + F(VMX, 0, 5), \ + F(SMX, 0, 6), \ + F(EIST, 0, 7), \ + F(TM2, 0, 8), \ + F(SSSE3, 0, 9), \ + F(CNXTID, 0, 10), \ + F(FMA, 0, 12), \ + F(CMPXCHG16B, 0, 13), \ + F(xTPR, 0, 14), \ + F(PDCM, 0, 15), \ + F(PCID, 0, 17), \ + F(DCA, 0, 18), \ + F(SSE41, 0, 19), \ + F(SSE42, 0, 20), \ + F(x2APIC, 0, 21), \ + F(MOVBE, 0, 22), \ + F(POPCNT, 0, 23), \ + F(TSCDeadline, 0, 24), \ + F(AES, 0, 25), \ + F(XSAVE, 0, 26), \ + F(OSXSAVE, 0, 27), \ + F(AVX, 0, 28), \ + F(F16C, 0, 29), \ + F(RDRND, 0, 30), \ + \ + /* Features in %edx for leaf 1 */ \ + F(FPU, 1, 0), \ + F(VME, 1, 1), \ + F(DE, 1, 2), \ + F(PSE, 1, 3), \ + F(TSC, 1, 4), \ + F(MSR, 1, 5), \ + F(PAE, 1, 6), \ + F(MCE, 1, 7), \ + F(CMPXCHG8B, 1, 8), \ + F(APIC, 1, 9), \ + F(SEP, 1, 11), \ + F(MTRR, 1, 12), \ + F(PGE, 1, 13), \ + F(MCA, 1, 14), \ + F(CMOV, 1, 15), \ + F(PAT, 1, 16), \ + F(PSE36, 1, 17), \ + F(PSN, 1, 18), \ + F(CLFSH, 1, 19), \ + F(DS, 1, 21), \ + F(ACPI, 1, 22), \ + F(MMX, 1, 23), \ + F(FXSAVE, 1, 24), \ + F(SSE, 1, 25), \ + F(SSE2, 1, 26), \ + F(SS, 1, 27), \ + F(HTT, 1, 28), \ + F(TM, 1, 29), \ + F(PBE, 1, 31), \ + +#define X86_FEATURE_NAME(_feature) X86_FEATURE_##_feature +#define X86_FEATURE_VAL(_word, _bit) ((_word << X86_FEATURE_WORD_OFF) | (_bit & 0xff)) +#define X86_FEATURE_WORD_OFF 8 + +enum x86_cpu_feature { +#define DEFINE_X86_FEATURE(_name, _word, _bit) \ + X86_FEATURE_NAME(_name) = X86_FEATURE_VAL(_word, _bit) +X86_FEATURES(DEFINE_X86_FEATURE) +#undef DEFINE_X86_FEATURE +}; + +static inline bool cpu_test_feature(enum x86_cpu_feature feature) +{ + int leaf = (feature >> X86_FEATURE_WORD_OFF); + int bit = feature & (BIT(X86_FEATURE_WORD_OFF) - 1); + + return BIT_READ(cpuinfo.features[leaf], bit); +} + +#define cpu_has_feature(_feature) cpu_test_feature(X86_FEATURE_NAME(_feature)) + +enum x86_msr { + MSR_PAT = 0x277, +}; + +/* Read from specific register */ +static inline uint64_t rdmsr(uint32_t msr) +{ + uint32_t eax; + uint32_t edx; + ASM("rdmsr" : "=a"(eax), "=d"(edx) : "c"(msr)); + return (((uint64_t)edx) << 32) | eax; +} + +/* Write into model specific register */ +static inline void wrmsr(uint32_t msr, uint64_t val) +{ + uint32_t eax = val; + uint32_t edx = val >> 32; + ASM("wrmsr" : : "a"(eax), "d"(edx), "c"(msr)); +} + #endif /* KERNEL_I686_UTILS_CPU_OPS_H */ diff --git a/include/kernel/mmu.h b/include/kernel/mmu.h index e2d64c5a..fa0db4ac 100644 --- a/include/kernel/mmu.h +++ b/include/kernel/mmu.h @@ -29,6 +29,8 @@ #include #include +#include + #include /** @@ -44,6 +46,16 @@ typedef enum mmu_prot { PROT_KERNEL = 0x8, /*!< Pages should be accessible only from the kernel */ } mmu_prot; +/** @enum mmu_caching_policy + * @brief Caching policies. + */ +typedef enum mmu_caching_policy { + POLICY_UC = BIT(6), /*!< Uncachealbe memory. */ + POLICY_WC = BIT(7), /*!< Write-combining memory. */ + POLICY_WT = BIT(8), /*!< Write-through memory. */ + POLICY_WB = BIT(9), /*!< Write-back memory. */ +} mmu_policy_t; + /** Initialize the MMU's paging system * * This function is responsible for setting any required bit inside the CPU's @@ -87,7 +99,7 @@ void mmu_load(paddr_t mmu); * @param virt The virtual address * @param physical Its physical equivalent * @param prot Protection rule in use for this page. - * A combination of @ref mmu_prot flags. + * A combination of @ref mmu_prot and @ref mmu_caching_policy flags. * * @return False if the address was already mapped before */ @@ -100,7 +112,7 @@ bool mmu_map(vaddr_t virt, paddr_t physical, int prot); * @param physical Its physical equivalent * @param size The size of the region to map * @param prot Protection rule in use for this page. - * A combination of @ref mmu_prot flags. + * A combination of @ref mmu_prot and @ref mmu_caching_policy flags. * * @return False if the address was already mapped before */ @@ -136,7 +148,7 @@ void mmu_unmap_range(vaddr_t start, vaddr_t end); * @param start the starting page of the address range * @param end the ending address of the address range * @param prot Protection rule in use for this page. - * A combination of @ref mmu_prot flags. + * A combination of @ref mmu_prot and @ref mmu_caching_policy flags. */ void mmu_identity_map(paddr_t start, paddr_t end, int prot); @@ -151,4 +163,22 @@ static inline bool mmu_is_mapped(vaddr_t addr) return !IS_ERR(mmu_find_physical(addr)); } +/** Configure the caching policy in effect when accessing a page. + * + * @param vaddr The page's virtual address + * @param policy Caching policy applied to this page. + * A combination of @ref mmu_caching_policy flags. + */ +error_t mmu_set_policy(vaddr_t, mmu_policy_t policy); + +/** Configure the caching policy in effect when accessing a range of pages. + * + * @param start The virtual address of the first page in the address range + * @param size The size of the address range + * @param policy Caching policy applied to this page. + * A combination of @ref mmu_caching_policy flags. + */ +error_t mmu_set_policy_range(vaddr_t range_start, size_t range_size, + mmu_policy_t policy); + #endif /* KERNEL_MMU_H */ diff --git a/include/kernel/vm.h b/include/kernel/vm.h index 0999329c..efa51fbf 100644 --- a/include/kernel/vm.h +++ b/include/kernel/vm.h @@ -62,6 +62,12 @@ typedef enum vm_flags { VM_KERNEL = BIT(3), /*!< Pages should only be accessible from kernel */ VM_CLEAR = BIT(4), /*!< Page content should be reset when allocating */ VM_FIXED = BIT(5), /*!< Start address in vm_alloc_at() is not a hint */ + + /* Caching policies. */ + VM_CACHE_UC = BIT(6), /*!< Uncacheable. */ + VM_CACHE_WC = BIT(7), /*!< Write-combining. */ + VM_CACHE_WT = BIT(8), /*!< Write-through. */ + VM_CACHE_WB = BIT(9), /*!< Write-back (default). */ } vm_flags_t; #define VM_KERNEL_RO (VM_KERNEL | VM_READ) @@ -72,6 +78,14 @@ typedef enum vm_flags { #define VM_USER_WO (VM_WRITE) #define VM_USER_RW (VM_READ | VM_WRITE) +/* + * Memory-mapped I/O should be mapped as uncacheable since writes/read can + * have side-effects and should not be combined or delayed. + */ +#define VM_IOMEM (VM_KERNEL_RW | VM_CACHE_UC) + +#define VM_CACHE_MASK (VM_CACHE_UC | VM_CACHE_WT | VM_CACHE_WB | VM_CACHE_WC) + /** Segment driver * * There exists different types of memory segments. A segment driver defines @@ -136,6 +150,10 @@ struct vm_segment_driver { /** Map this segment onto a physical address. */ error_t (*vm_map)(struct address_space *, struct vm_segment *, vm_flags_t); + + /** Configure the effective caching policy for a segment. */ + error_t (*vm_set_policy)(struct address_space *, struct vm_segment *, + vm_flags_t policy); }; /** Kernel-only address-space. @@ -233,4 +251,7 @@ struct vm_segment *vm_find(const struct address_space *, void *); */ error_t vm_map(struct address_space *, void *); +/** Change the effective caching policy for address inside a segment. */ +error_t vm_set_policy(struct address_space *, void *, vm_flags_t policy); + #endif /* KERNEL_VM_H */ diff --git a/kernel/arch/i686/build.mk b/kernel/arch/i686/build.mk index 7ee0f3db..b25ab07b 100644 --- a/kernel/arch/i686/build.mk +++ b/kernel/arch/i686/build.mk @@ -5,6 +5,7 @@ QEMU := qemu-system-i386 KERNEL_ARCH_SRCS := \ crt0.S \ + cpu.c \ gdt.S \ gdt.c \ interrupts.c \ diff --git a/kernel/arch/i686/cpu.c b/kernel/arch/i686/cpu.c new file mode 100644 index 00000000..953141ad --- /dev/null +++ b/kernel/arch/i686/cpu.c @@ -0,0 +1,129 @@ +#define LOG_DOMAIN "i686" + +#include +#include +#include + +#include + +struct x86_cpuinfo cpuinfo; + +/* + * + */ +static void cpu_init_caches(void) +{ + u32 val; + + /* + * Enable caching globally. + * + * Caching policies can still be selectively configured using page table + * entries or MTRR registers. + */ + val = read_cr0(); + val &= ~CR0_CD; + val &= ~CR0_NW; + write_cr0(val); +} + +struct x86_cpu_vendor { + const char *vendor; + u32 ebx; + u32 ecx; + u32 edx; +}; + +#define CPU_VENDOR(_vendor, _name) \ + { \ + .vendor = _name, \ + .ebx = signature_##_vendor##_ebx, \ + .ecx = signature_##_vendor##_ecx, \ + .edx = signature_##_vendor##_edx, \ + } + +#define CPU_VENDOR_HV(_vendor, _name) \ + { \ + .vendor = _name, \ + .ebx = signature_##_vendor##_ebx, \ + } + +static struct x86_cpu_vendor cpu_vendors[] = { + CPU_VENDOR(AMD, "AMD"), + CPU_VENDOR(INTEL, "Intel"), + CPU_VENDOR_HV(KVM, "KVM"), + CPU_VENDOR_HV(VMWARE, "VMWare"), + CPU_VENDOR_HV(VIRTUALBOX, "VirtualBox"), + CPU_VENDOR_HV(XEN, "Xen"), + CPU_VENDOR_HV(HYPERV, "Microsoft Hypervisor"), +}; + +static const char *feature_name[32 * X86_FEATURE_WORDS] = { +#define X86_FEATURE_STRING(_name, _word, _bit) [_word * 32 + _bit] = stringify(_name) + X86_FEATURES(X86_FEATURE_STRING) +#undef X86_FEATURE_STRING +}; + +/* + * + */ +static void cpu_dump_info(enum log_level level, const struct x86_cpuinfo *cpu) +{ + log(level, LOG_DOMAIN, "CPU Information"); + log(level, LOG_DOMAIN, "Vendor: %s", cpu->vendor); + + log(level, LOG_DOMAIN, "Features: "); + for (int leaf = 0; leaf < X86_FEATURE_WORDS; ++leaf) { + for (int bit = 0; bit < 32; ++bit) { + if (cpu_test_feature(X86_FEATURE_VAL(leaf, bit))) { + if (feature_name[leaf * 32 + bit]) + printk("%s ", feature_name[leaf * 32 + bit]); + } + } + } + printk("\n"); +} + +/* + * + */ +static void cpu_init_info(struct x86_cpuinfo *cpu) +{ + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + + cpu->vendor = "unknown"; + + /* Find vendor information */ + cpuid(CPUID_LEAF_GETVENDOR, &eax, &ebx, &ecx, &edx); + for (size_t i = 0; i < ARRAY_SIZE(cpu_vendors); ++i) { + if (cpu_vendors[i].ebx != ebx) + continue; + if (cpu_vendors[i].ecx && cpu_vendors[i].ecx != ecx) + continue; + if (cpu_vendors[i].edx && cpu_vendors[i].edx != edx) + continue; + cpu->vendor = cpu_vendors[i].vendor; + break; + } + + cpu->features[0] = cpuid_ecx(CPUID_LEAF_GETFEATURES); + cpu->features[1] = cpuid_edx(CPUID_LEAF_GETFEATURES); + + cpu_dump_info(LOG_LEVEL_INFO, cpu); +} + +/* + * Initialize the CPU and configure it in a known state. + */ +error_t cpu_init(void) +{ + cpu_init_info(&cpuinfo); + cpu_init_caches(); + + return E_SUCCESS; +} + +DECLARE_INITCALL(INIT_BOOTSTRAP, cpu_init); diff --git a/kernel/arch/i686/mmu.c b/kernel/arch/i686/mmu.c index 332b8322..5d954625 100644 --- a/kernel/arch/i686/mmu.c +++ b/kernel/arch/i686/mmu.c @@ -190,7 +190,7 @@ void mmu_load(paddr_t page_directory) * [0x0000; 0x00FF] to the virtual range [0xFF00; 0xFFFF] */ static void -mmu_offset_map(paddr_t start, paddr_t end, int64_t offset, int prot); +mmu_offset_map(paddr_t start, paddr_t end, int64_t offset, int flags); /** @brief Inititialize a new page directory * @return The physical address of the new page_directory, 0 if error. @@ -226,9 +226,110 @@ void mmu_destroy(paddr_t mmu) pmm_free(mmu); } -// TODO: We do not have a way to quickly map and access an arbitrary physical address. -// This prevents us from cloning an arbitrary MMU instance. This is the reason why -// this function currently only takes in the destination MMU as a parameter. +/* + * + */ +static inline mmu_pde_t *mmu_get_active_page_directory(void) +{ + if (unlikely(!paging_enabled)) + return kernel_startup_page_directory; + + return MMU_RECURSIVE_PAGE_DIRECTORY_ADDRESS; +} + +/* + * Configure the caching policy on a page level. The caller must invalidate + * the address' TLB entry after calling this function. + */ +static error_t +__mmu_set_policy(mmu_pde_t *page_directory, vaddr_t vaddr, int policy) +{ + mmu_decode_t address = {.raw = vaddr}; + mmu_pte_t *pte; + bool pat = false; + bool pwt = false; + bool pcd = false; + + /* Sanitize input in case we were called from mmu_map(). */ + policy &= POLICY_UC | POLICY_WT | POLICY_WB | POLICY_WC; + if (!policy) + policy = POLICY_WB; /* Enable caching by default. */ + + if (!page_directory[address.pde].present) + return E_NOENT; + + pte = &MMU_RECURSIVE_PAGE_TABLE_ADDRESS(address.pde)[address.pte]; + switch (policy) { + case POLICY_WB: + break; + case POLICY_WT: + pwt = true; + break; + case POLICY_UC: + pcd = true; + break; + case POLICY_WC: + pat = true; + break; + + default: + WARN("invalid caching policy: %02x\n", policy); + return E_INVAL; + } + + if (pat && !cpu_has_feature(PAT)) { + log_warn("unsupported policy: %02x (requires PAT support)", policy); + return E_NOT_SUPPORTED; + } + + pte->pat = pat; + pte->pcd = pcd; + pte->pwt = pwt; + + return E_SUCCESS; +} + +/* + * + */ +error_t mmu_set_policy(vaddr_t vaddr, mmu_policy_t policy) +{ + mmu_pde_t *page_directory; + error_t err; + + page_directory = mmu_get_active_page_directory(); + err = __mmu_set_policy(page_directory, vaddr, policy); + if (err) + return err; + + mmu_flush_tlb(vaddr); + return E_SUCCESS; +} + +/* + * + */ +error_t mmu_set_policy_range(vaddr_t range_start, size_t range_size, + mmu_policy_t policy) +{ + error_t ret = E_SUCCESS; + + range_size = align_down(range_size, PAGE_SIZE); + for (size_t off = 0; off < range_size; off += PAGE_SIZE) { + /* Keep going when an error happens but return the first error code. */ + error_t err = mmu_set_policy(range_start + off, policy); + if (err && !ret) + ret = err; + } + + return ret; +} + +// TODO: We do not have a way to quickly map and access an arbitrary physical +// address. +// This prevents us from cloning an arbitrary MMU instance. This is the +// reason why this function currently only takes in the destination MMU as +// a parameter. void mmu_clone(paddr_t destination) { page_directory_t src_page_directory; @@ -269,24 +370,15 @@ void mmu_clone(paddr_t destination) vm_free(&kernel_address_space, dst_page_directory); } -bool mmu_map(vaddr_t virtual, paddr_t pageframe, int prot) +bool mmu_map(vaddr_t virtual, paddr_t pageframe, int flags) { mmu_decode_t address = {.raw = virtual}; - - if (virtual % PAGE_SIZE) - return false; - - // TODO: We hardcode the pde/pte to be un-accessible when in user mode. - // This will also cause an issue when reaching userspace later. - - page_directory_t page_directory; + page_directory_t page_directory = mmu_get_active_page_directory(); page_table_t page_table; bool new_page_table = false; - if (paging_enabled) - page_directory = MMU_RECURSIVE_PAGE_DIRECTORY_ADDRESS; - else - page_directory = kernel_startup_page_directory; + if (virtual % PAGE_SIZE) + return false; if (!page_directory[address.pde].present) { u32 page_table = pmm_allocate(); @@ -321,14 +413,17 @@ bool mmu_map(vaddr_t virtual, paddr_t pageframe, int prot) page_table[address.pte] = (mmu_pte_t){ .present = 1, .page_frame = TO_PFN(pageframe), - .writable = boolean(prot & PROT_WRITE), - .user = !(prot & PROT_KERNEL), + .writable = boolean(flags & PROT_WRITE), + .user = !(flags & PROT_KERNEL), }; + /* No need to flush since the entry has not been cached yet. */ + __mmu_set_policy(page_directory, virtual, flags); + return true; } -bool mmu_map_range(vaddr_t virtual, paddr_t physical, size_t size, int prot) +bool mmu_map_range(vaddr_t virtual, paddr_t physical, size_t size, int flags) { size_t range; @@ -338,7 +433,7 @@ bool mmu_map_range(vaddr_t virtual, paddr_t physical, size_t size, int prot) } for (range = 0; range < size; range += PAGE_SIZE) { - if (!mmu_map(virtual + range, physical + range, prot)) + if (!mmu_map(virtual + range, physical + range, flags)) break; } @@ -412,16 +507,17 @@ void mmu_unmap_range(vaddr_t start, vaddr_t end) mmu_unmap(start); } -static void mmu_offset_map(paddr_t start, paddr_t end, int64_t offset, int prot) +static void +mmu_offset_map(paddr_t start, paddr_t end, int64_t offset, int flags) { for (; start < end; start += PAGE_SIZE) { - mmu_map(start + offset, start, prot); + mmu_map(start + offset, start, flags); } } -void mmu_identity_map(paddr_t start, paddr_t end, int prot) +void mmu_identity_map(paddr_t start, paddr_t end, int flags) { - mmu_offset_map(start, end, 0, prot); + mmu_offset_map(start, end, 0, flags); } paddr_t mmu_find_physical(vaddr_t virtual) @@ -508,23 +604,60 @@ error_t mmu_copy_on_write(vaddr_t addr) return ret; } -bool mmu_init(void) +/* Memory types used to configure the MTRR and PAT tables. */ +enum memory_type { + MEM_UC = 0x00, + MEM_WC = 0x01, + MEM_WT = 0x04, + MEM_WP = 0x05, + MEM_WB = 0x06, + MEM_UC_MINUS = 0x07, /* Valid only for the PAT table. */ +}; + +/* + * Fill the page attribute table. + */ +static void mmu_init_pat(void) { - paddr_t page_directory; - paddr_t page_table; + u64 pat = 0; - if (paging_enabled) { - log_warn("Trying to re-enable paging. Skipping."); - return false; + if (!cpu_has_feature(PAT)) { + log_info("PAT not present on this platform"); + return; } - interrupts_set_handler(PAGE_FAULT, INTERRUPT_HANDLER(page_fault), NULL); +#define PAT(n, val) (((u64)val & 0xff) << (n * 8)) - page_directory = KERNEL_HIGHER_HALF_PHYSICAL(kernel_startup_page_directory); + /* Configure a PAT entry for each caching related bit inside a PTE. */ + pat |= PAT(0, MEM_WB); + pat |= PAT(1, MEM_WT); + pat |= PAT(2, MEM_UC); + pat |= PAT(4, MEM_WC); - // Initialize the kernel's page directory - kernel_address_space.mmu = page_directory; + wrmsr(MSR_PAT, pat); + +#undef PATn +} + +/* + * + */ +static void mmu_init_mtrr(void) +{ + if (!cpu_has_feature(MTRR)) { + log_info("MTRR not present this platform"); + return; + } + + /* TODO add support for MTRRs. */ + not_implemented("MTRR"); +} +/* + * Initialize the content of the page directory. + */ +static void mmu_init_page_directory(paddr_t page_directory) +{ // Mark all PDEs as "absent" (present = 0), and writable for (size_t entry = 0; entry < MMU_PDE_COUNT; entry++) { kernel_startup_page_directory[entry] = (mmu_pde_t){ @@ -550,25 +683,53 @@ bool mmu_init(void) // (soon hopefully) mmu_offset_map(0, KERNEL_HIGHER_HALF_PHYSICAL(KERNEL_CODE_END), KERNEL_HIGHER_HALF_OFFSET, PROT_EXEC | PROT_READ); +} + +bool mmu_init(void) +{ + paddr_t page_directory; + paddr_t page_table; + u32 val; + + if (paging_enabled) { + log_warn("Trying to re-enable paging. Skipping."); + return false; + } + + interrupts_set_handler(PAGE_FAULT, INTERRUPT_HANDLER(page_fault), NULL); + + /* Initialize caching structures. */ + mmu_init_pat(); + mmu_init_mtrr(); + + page_directory = KERNEL_HIGHER_HALF_PHYSICAL(kernel_startup_page_directory); + kernel_address_space.mmu = page_directory; + + mmu_init_page_directory(page_directory); + + /* + * Enable 32b mode paging. + */ mmu_load(page_directory); - // According to 4.3, to activate 32-bit mode paging we must: - // 1. set CR4.PAE to 0 (de-activate PAE) - u32 cr4 = read_cr4(); - BIT_CLEAR(cr4, 5); // PAE = bit 6 - write_cr4(cr4); + val = read_cr4(); + val &= ~CR4_PAE; + write_cr4(val); - // 2. set CR0.PG to 1 (activate paging) - u32 cr0 = read_cr0(); - BIT_SET(cr0, 31); // PG = bit 32 - write_cr0(cr0); + val = read_cr0(); + val |= CR0_PG; + write_cr0(val); paging_enabled = true; - // Pre-allocate all shared kernel page table entries - // We NEED to allocate them now for them to be present inside the IDLE - // task's page table. + /* + * Pre-allocate all shared kernel page table entries. + * + * We MUST allocate them now for them to be present inside the IDLE + * task's page table. + */ + for (size_t i = MMU_PDE_KERNEL_FIRST; i < MMU_PDE_COUNT - 1; i++) { if (kernel_startup_page_directory[i].present) continue; diff --git a/kernel/devices/pci.c b/kernel/devices/pci.c index 3f9d9229..18f18e2f 100644 --- a/kernel/devices/pci.c +++ b/kernel/devices/pci.c @@ -190,7 +190,7 @@ static void pci_device_setup_bars(struct pci_device *device) device->bars[i].phys = bar & PCI_BAR_MEMORY_ADDRESS_MASK; device->bars[i].data = vm_alloc_at( &kernel_address_space, device->bars[i].phys, - align_up(size, PAGE_SIZE), VM_READ | VM_WRITE); + align_up(size, PAGE_SIZE), VM_IOMEM); if (IS_ERR(device->bars[i].data)) log_warn("failed to allocate bar[%d]: %d", i, ERR_FROM_PTR(device->bars[i].data)); diff --git a/kernel/memory/address_space.c b/kernel/memory/address_space.c index 7f3ace84..facb5674 100644 --- a/kernel/memory/address_space.c +++ b/kernel/memory/address_space.c @@ -24,6 +24,10 @@ static_assert((int)VM_EXEC == (int)PROT_EXEC); static_assert((int)VM_READ == (int)PROT_READ); static_assert((int)VM_WRITE == (int)PROT_WRITE); static_assert((int)VM_KERNEL == (int)PROT_KERNEL); +static_assert((int)VM_CACHE_UC == (int)POLICY_UC); +static_assert((int)VM_CACHE_WB == (int)POLICY_WB); +static_assert((int)VM_CACHE_WT == (int)POLICY_WT); +static_assert((int)VM_CACHE_WC == (int)POLICY_WC); static DECLARE_LLIST(kernel_segments); @@ -274,6 +278,33 @@ error_t address_space_fault(struct address_space *as, void *addr, bool is_cow) return segment->driver->vm_fault(as, segment); } +/* + * Check whether the provided flags combination is correct. + */ +static inline bool +vm_flags_validate(struct address_space *as, vm_flags_t *flags) +{ + UNUSED(as); + + switch (*flags & VM_CACHE_MASK) { + case 0: + /* Default caching policy: write-back. */ + *flags |= VM_CACHE_WB; + break; + + case VM_CACHE_UC: + case VM_CACHE_WC: + case VM_CACHE_WT: + case VM_CACHE_WB: + break; + + default: + return false; + } + + return true; +} + void *vm_alloc_start(struct address_space *as, void *addr, size_t size, vm_flags_t flags) { @@ -283,6 +314,9 @@ void *vm_alloc_start(struct address_space *as, void *addr, size_t size, if (size % PAGE_SIZE) return NULL; + if (!vm_flags_validate(as, &flags)) + return NULL; + driver = vm_find_driver(flags); if (!driver) return NULL; @@ -317,6 +351,9 @@ void *vm_alloc_at(struct address_space *as, paddr_t phys, size_t size, if (phys % PAGE_SIZE) return NULL; + if (!vm_flags_validate(as, &flags)) + return NULL; + driver = vm_find_driver(flags); if (!driver) return NULL; @@ -353,6 +390,11 @@ void vm_free(struct address_space *as, void *addr) return; } + /* NOTE: We should not be freeing the whole segment at once. We may want + * to free only part of a segment. This means treating each memory + * segment as a single big 'object', and could also cause issues + * when implementing VMA merging later. + */ vm_segment_remove(as, segment); segment->driver->vm_free(as, segment); } @@ -486,3 +528,39 @@ void *sys_sbrk(intptr_t increment) return vm_brk(current->process->as, current->process->as->brk_end + increment); } + +/* + * + */ +error_t vm_set_policy(struct address_space *as, void *addr, vm_flags_t policy) +{ + struct vm_segment *segment; + error_t ret; + + AS_ASSERT_OWNED(as); + + policy &= VM_CACHE_MASK; + if (!vm_flags_validate(as, &policy)) + return E_INVAL; + + locked_scope (&as->lock) { + segment = vm_find(as, addr); + if (!segment) + return E_NOENT; + + /* NOTE: We should be adding a size parameter if we ever want to change + * the policy for part of a segment only. + */ + ret = segment->driver->vm_set_policy(as, segment, policy); + if (ret) { + log_warn("failed to set caching policy for [%p-%p]: %pe", + (void *)segment->start, (void *)segment_end(segment), + &ret); + } + + segment->flags &= ~VM_CACHE_MASK; + segment->flags |= policy; + } + + return E_SUCCESS; +} diff --git a/kernel/memory/vm_normal.c b/kernel/memory/vm_normal.c index 96059653..cc645608 100644 --- a/kernel/memory/vm_normal.c +++ b/kernel/memory/vm_normal.c @@ -7,8 +7,9 @@ #include -struct vm_segment *vm_normal_alloc(struct address_space *as, vaddr_t addr, - size_t size, vm_flags_t flags) +static struct vm_segment *vm_normal_alloc(struct address_space *as, + vaddr_t addr, size_t size, + vm_flags_t flags) { return vmm_allocate(as->vmm, addr, size, flags); } @@ -168,6 +169,17 @@ static error_t vm_normal_resize(struct address_space *as, return E_SUCCESS; } +/* + * + */ +static error_t vm_normal_set_policy(struct address_space *as, + struct vm_segment *segment, + vm_flags_t policy) +{ + AS_ASSERT_OWNED(as); + return mmu_set_policy_range(segment->start, segment->size, (int)policy); +} + const struct vm_segment_driver vm_normal = { .vm_alloc = vm_normal_alloc, .vm_alloc_at = vm_normal_alloc_at, @@ -175,4 +187,5 @@ const struct vm_segment_driver vm_normal = { .vm_fault = vm_normal_fault, .vm_resize = vm_normal_resize, .vm_map = vm_normal_map, + .vm_set_policy = vm_normal_set_policy, };