From e789001951832b9d46d2583df6a9e1e6de475276 Mon Sep 17 00:00:00 2001 From: evanstella Date: Mon, 28 Feb 2022 12:02:14 -0500 Subject: [PATCH 1/7] pmu stuff --- .../tests/unit_pingshmem/ping.c | 24 ++++++++ src/platform/i386/chal.c | 1 + src/platform/i386/chal/chal_proto.h | 15 ++++- src/platform/i386/chal_cpu.h | 3 +- src/platform/i386/kernel.c | 2 + src/platform/x86_64/chal_pmu.h | 58 +++++++++++++++++++ tools/run.sh | 3 +- 7 files changed, 102 insertions(+), 4 deletions(-) create mode 100644 src/platform/x86_64/chal_pmu.h diff --git a/src/components/implementation/tests/unit_pingshmem/ping.c b/src/components/implementation/tests/unit_pingshmem/ping.c index fe3b3048b..34c94d4ea 100644 --- a/src/components/implementation/tests/unit_pingshmem/ping.c +++ b/src/components/implementation/tests/unit_pingshmem/ping.c @@ -19,6 +19,16 @@ char *pong_test_strings[] = { shm_bm_t shm; +static unsigned long +rdpmc (unsigned long cntr) +{ + unsigned int low, high; + + asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (cntr)); + + return low | ((unsigned long)high) << 32; +} + void ping_test_objread(void) { @@ -296,14 +306,28 @@ main(void) shm_bm_init_testobj(shm); pongshmem_test_map(id); + printc("Counter: %lu\n", rdpmc(0)); + ping_test_objread(); + printc("Counter: %lu\n", rdpmc(0)); + ping_test_bigalloc(); + printc("Counter: %lu\n", rdpmc(0)); + ping_test_objfree(); + printc("Counter: %lu\n", rdpmc(0)); + ping_test_bigfree(); + printc("Counter: %lu\n", rdpmc(0)); + ping_test_refcnt(); + printc("Counter: %lu\n", rdpmc(0)); + ping_bench_syncinv(); ping_bench_msgpassing(); + printc("Counter: %lu\n", rdpmc(0)); + return 0; diff --git a/src/platform/i386/chal.c b/src/platform/i386/chal.c index 854ee7369..fe70c84bb 100644 --- a/src/platform/i386/chal.c +++ b/src/platform/i386/chal.c @@ -5,6 +5,7 @@ #include "chal_cpu.h" u32_t free_thd_id; +asid_t free_asid = 1; char timer_detector[PAGE_SIZE] PAGE_ALIGNED; extern void *cos_kmem, *cos_kmem_base; u32_t chal_msr_mhz = 0; diff --git a/src/platform/i386/chal/chal_proto.h b/src/platform/i386/chal/chal_proto.h index 8429b3de9..20dd9dda1 100644 --- a/src/platform/i386/chal/chal_proto.h +++ b/src/platform/i386/chal/chal_proto.h @@ -12,6 +12,11 @@ #define PGTBL_ENTRY_ORDER 9 #define PGTBL_FLAG_MASK 0xf800000000000fff #define PGTBL_FRAME_MASK (~PGTBL_FLAG_MASK) + +#define MAX_ASID_BITS 12 +#define MAX_NUM_ASID (1<pgtbl)); + unsigned long cr3 = (unsigned long)pt->pgtbl | pt->asid; + asm volatile("mov %0, %%cr3" : : "r"(cr3)); } +extern asid_t free_asid; static inline asid_t chal_asid_alloc(void) -{ return 0; } +{ + return 0; + // if (unlikely(free_asid >= MAX_NUM_ASID)) assert(0); + // return cos_faa((int *)&free_asid, 1); +} #endif /* CHAL_PROTO_H */ diff --git a/src/platform/i386/chal_cpu.h b/src/platform/i386/chal_cpu.h index d04ae5765..c80f61431 100644 --- a/src/platform/i386/chal_cpu.h +++ b/src/platform/i386/chal_cpu.h @@ -12,6 +12,7 @@ typedef enum { CR4_PGE = 1 << 7, /* page global bit enabled */ CR4_PCE = 1 << 8, /* user-level access to performance counters enabled (rdpmc) */ CR4_OSFXSR = 1 << 9, /* floating point enabled */ + CR4_PCIDE = 1 << 17, /* Process Context IDentifiers Enable */ CR4_SMEP = 1 << 20, /* Supervisor Mode Execution Protection Enable */ CR4_SMAP = 1 << 21 /* Supervisor Mode Access Protection Enable */ } cr4_flags_t; @@ -120,7 +121,7 @@ chal_cpu_init(void) #if defined(__x86_64__) u32_t low = 0, high = 0; - chal_cpu_cr4_set(cr4 | CR4_PSE | CR4_PGE); + chal_cpu_cr4_set(cr4 | CR4_PSE | CR4_PGE | CR4_PCE); readmsr(MSR_IA32_EFER, &low, &high); writemsr(MSR_IA32_EFER,low | 0x1, high); diff --git a/src/platform/i386/kernel.c b/src/platform/i386/kernel.c index 7a286de74..2b73a17ec 100644 --- a/src/platform/i386/kernel.c +++ b/src/platform/i386/kernel.c @@ -5,6 +5,7 @@ #include "boot_comp.h" #include "mem_layout.h" #include "chal_cpu.h" +#include "chal_pmu.h" #include #include @@ -185,6 +186,7 @@ kmain(unsigned long mboot_addr, unsigned long mboot_magic) smp_init(cores_ready); cores_ready[INIT_CORE] = 1; + chal_pmu_init(); kern_boot_upcall(); diff --git a/src/platform/x86_64/chal_pmu.h b/src/platform/x86_64/chal_pmu.h new file mode 100644 index 000000000..422dc4007 --- /dev/null +++ b/src/platform/x86_64/chal_pmu.h @@ -0,0 +1,58 @@ +#ifndef CHAL_PMU +#define CHAL_PMU + +#include "chal_config.h" + + +/* x86 MSR IA32_PERF_GLOBAL_CTRL Programming Bits */ +#define x86_MSR_GLBL_CTRL_EN_PC0 (1 << 0) +#define x86_MSR_GLBL_CTRL_EN_PC1 (1 << 1) +#define x86_MSR_GLBL_CTRL_EN_PC2 (1 << 2) +#define x86_MSR_GLBL_CTRL_EN_PC3 (1 << 3) +#define x86_MSR_GLBL_CTRL_EN_FC0 (1ul << 32) +#define x86_MSR_GLBL_CTRL_EN_FC1 (1ul << 33) +#define x86_MSR_GLBL_CTRL_EN_FC2 (1ul << 34) + + +/* x86 MSR IA32_PERFEVTSELX Programming Bits */ +#define X86_MSR_EVTSEL_EVTMSK_SFT 8 +#define X86_MSR_EVTSEL_CMASK_SFT 24 +#define X86_MSR_EVTSEL_USR (1 << 16) +#define X86_MSR_EVTSEL_OS (1 << 17) +#define X86_MSR_EVTSEL_E (1 << 18) +#define X86_MSR_EVTSEL_INT (1 << 20) +#define X86_MSR_EVTSEL_ANYTHD (1 << 21) +#define X86_MSR_EVTSEL_EN (1 << 22) +#define X86_MSR_EVTSEL_INV (1 << 23) + +/* MSR addresses */ +#define MSR_PERFEVTSEL0 390 +#define MSR_PERFEVTSEL1 391 +#define MSR_PERFEVTSEL2 392 +#define MSR_PERFEVTSEL3 393 +#define MSR_FIXED_CTR_CTRL 909 +#define MSR_PERF_GLOBAL_CTRL 911 +#define MSR_PMC1 193 +#define MSR_PMC2 194 +#define MSR_PMC3 195 +#define MSR_PMC4 196 + +static inline void +chal_pmu_init(void) +{ + unsigned long perf_global_ctrl = x86_MSR_GLBL_CTRL_EN_PC0; + unsigned long perf_fixed_ctr = 1 || (1 << 1); + u32_t perf_evt_sel = X86_MSR_EVTSEL_USR | X86_MSR_EVTSEL_OS | X86_MSR_EVTSEL_EN | X86_MSR_EVTSEL_ANYTHD | 0x0208; + + asm volatile ("wrmsr" : : "a"((u32_t)perf_global_ctrl), "d"((u32_t)(perf_global_ctrl >> 32)), "c"(MSR_PERF_GLOBAL_CTRL)); + asm volatile ("wrmsr" : : "a"((u32_t)perf_fixed_ctr), "d"((u32_t)(perf_fixed_ctr >> 32)), "c"(MSR_FIXED_CTR_CTRL)); + asm volatile ("wrmsr" : : "a"(perf_evt_sel), "d"(0), "c"(MSR_PERFEVTSEL0)); +} + +static inline void +chal_pmu_evtset(u8_t evt) +{ + +} + +#endif \ No newline at end of file diff --git a/tools/run.sh b/tools/run.sh index b2c07f5d9..77bc08e7b 100755 --- a/tools/run.sh +++ b/tools/run.sh @@ -44,7 +44,8 @@ fi if [ "${arch}" == "x86_64" ] then - qemu-system-x86_64 ${kvm_flag} -cpu max -smp ${vcpus},cores=${num_cores},threads=${num_threads},sockets=${num_sockets} -m ${mem_size} -cdrom $1 -no-reboot -nographic -s ${debug_flag} + qemu-system-x86_64 ${kvm_flag} -cpu max -smp ${vcpus},cores=${num_cores},threads=${num_threads},sockets=${num_sockets}, -m ${mem_size} -cdrom $1 -no-reboot -nographic -s ${debug_flag} + elif [ "${arch}" == "i386" ] then qemu-system-i386 ${kvm_flag} -cpu max -smp ${vcpus},cores=${num_cores},threads=${num_threads},sockets=${num_sockets} -m ${mem_size} -cdrom $1 -no-reboot -nographic -s ${debug_flag} From 21f7ad6feaa68f7e068b7096d9f9d392f8c4d854 Mon Sep 17 00:00:00 2001 From: evanstella Date: Sat, 5 Mar 2022 10:12:49 -0500 Subject: [PATCH 2/7] pmu implementation --- composition_scripts/unit_pmu.toml | 36 +++++ .../no_interface/llbooter/llbooter.c | 1 + .../implementation/tests/unit_pmu/Makefile | 18 +++ .../implementation/tests/unit_pmu/pmu_test.c | 61 +++++++ .../interface/pongshmem/pongshmem.h | 2 +- src/components/lib/kernel/cos_kernel_api.c | 12 ++ src/components/lib/kernel/cos_kernel_api.h | 3 + src/kernel/capinv.c | 17 ++ src/kernel/include/pmu.h | 47 ++++++ src/kernel/include/shared/cos_types.h | 4 +- src/platform/armv7a/chal/chal_plat.h | 5 + src/platform/i386/chal.c | 7 + src/platform/i386/chal/chal_plat.h | 60 ++++++- src/platform/i386/chal/chal_proto.h | 11 +- src/platform/i386/chal_cpu.h | 2 +- src/platform/i386/kernel.c | 2 - src/platform/x86_64/Makefile | 1 + src/platform/x86_64/chal_pmu.c | 149 ++++++++++++++++++ src/platform/x86_64/chal_pmu.h | 58 ------- 19 files changed, 425 insertions(+), 71 deletions(-) create mode 100644 composition_scripts/unit_pmu.toml create mode 100644 src/components/implementation/tests/unit_pmu/Makefile create mode 100644 src/components/implementation/tests/unit_pmu/pmu_test.c create mode 100644 src/kernel/include/pmu.h create mode 100644 src/platform/x86_64/chal_pmu.c delete mode 100644 src/platform/x86_64/chal_pmu.h diff --git a/composition_scripts/unit_pmu.toml b/composition_scripts/unit_pmu.toml new file mode 100644 index 000000000..bb400cf44 --- /dev/null +++ b/composition_scripts/unit_pmu.toml @@ -0,0 +1,36 @@ +[system] +description = "Simplest system with both capability manager and scheduler to test shared memory implementation" + +[[components]] +name = "booter" +img = "no_interface.llbooter" +implements = [{interface = "init"}, {interface = "addr"}] +deps = [{srv = "kernel", interface = "init", variant = "kernel"}] +constructor = "kernel" + +[[components]] +name = "capmgr" +img = "capmgr.simple" +deps = [{srv = "booter", interface = "init"}, {srv = "booter", interface = "addr"}] +implements = [{interface = "capmgr"}, {interface = "init"}, {interface = "memmgr"}, {interface = "capmgr_create"}] +constructor = "booter" + +[[components]] +name = "sched" +img = "sched.root_fprr" +deps = [{srv = "capmgr", interface = "init"}, {srv = "capmgr", interface = "capmgr"}, {srv = "capmgr", interface = "memmgr"}] +implements = [{interface = "sched"}, {interface = "init"}] +constructor = "booter" + +[[components]] +name = "pong" +img = "pong.pingpong" +deps = [{srv = "sched", interface = "init"}, {srv = "capmgr", interface = "capmgr_create"}] +implements = [{interface = "pong"}] +constructor = "booter" + +[[components]] +name = "unit_pmu" +img = "tests.unit_pmu" +deps = [{srv = "sched", interface = "init"}, {srv = "capmgr", interface = "capmgr_create"}, {srv = "capmgr", interface = "memmgr"}, {srv = "pong", interface = "pong"}] +constructor = "booter" \ No newline at end of file diff --git a/src/components/implementation/no_interface/llbooter/llbooter.c b/src/components/implementation/no_interface/llbooter/llbooter.c index c512d53bb..803620826 100644 --- a/src/components/implementation/no_interface/llbooter/llbooter.c +++ b/src/components/implementation/no_interface/llbooter/llbooter.c @@ -514,6 +514,7 @@ cos_init(void) { booter_init(); comps_init(); + } void diff --git a/src/components/implementation/tests/unit_pmu/Makefile b/src/components/implementation/tests/unit_pmu/Makefile new file mode 100644 index 000000000..e87700969 --- /dev/null +++ b/src/components/implementation/tests/unit_pmu/Makefile @@ -0,0 +1,18 @@ +# Required variables used to drive the compilation process. It is OK +# for many of these to be empty. +# +# The set of interfaces that this component exports for use by other +# components. This is a list of the interface names. +INTERFACE_EXPORTS = +# The interfaces this component is dependent on for compilation (this +# is a list of directory names in interface/) +INTERFACE_DEPENDENCIES = init pong memmgr +# The library dependencies this component is reliant on for +# compilation/linking (this is a list of directory names in lib/) +LIBRARY_DEPENDENCIES = kernel ps +# Note: Both the interface and library dependencies should be +# *minimal*. That is to say that removing a dependency should cause +# the build to fail. The build system does not validate this +# minimality; that's on you! + +include Makefile.subsubdir diff --git a/src/components/implementation/tests/unit_pmu/pmu_test.c b/src/components/implementation/tests/unit_pmu/pmu_test.c new file mode 100644 index 000000000..5c33534ff --- /dev/null +++ b/src/components/implementation/tests/unit_pmu/pmu_test.c @@ -0,0 +1,61 @@ +#include +#include +#include +#include + +#define NUM_PAGES 100 + +static unsigned long +rdpmc (unsigned long cntr) +{ + unsigned int low, high; + + asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (cntr)); + + return low | ((unsigned long)high) << 32; +} + +int +main(void) +{ + cos_pmu_enable_fixed_counter(BOOT_CAPTBL_SELF_INITHW_BASE, 0); + cos_pmu_enable_fixed_counter(BOOT_CAPTBL_SELF_INITHW_BASE, 1); + cos_pmu_program_event_counter(BOOT_CAPTBL_SELF_INITHW_BASE, 0, 0x49, 0x0E); + cos_pmu_program_event_counter(BOOT_CAPTBL_SELF_INITHW_BASE, 1, 0xC5, 0x11); + + unsigned long hw_instructions, core_cycles, dtlb_misses, branch_mispredicts; + char *buf; + int i; + + buf = (char *)memmgr_heap_page_allocn(NUM_PAGES); + + /* write to a bunch of memory */ + for (i = 0; i < NUM_PAGES*PAGE_SIZE; i++) { + buf[i] = (char)(i % 128); + } + + dtlb_misses = rdpmc(0); + branch_mispredicts = rdpmc(1); + /* super poorly documented way to read intel's fixed counters */ + hw_instructions = rdpmc(1<<30); + core_cycles = rdpmc((1<<30)+1); + + /* context switch */ + pong_call(); + + /* write to a bunch of memory */ + for (i = 0; i < NUM_PAGES*PAGE_SIZE; i++) { + buf[i] = -(char)(i % 128); + } + + hw_instructions = rdpmc(1<<30) - hw_instructions; + core_cycles = rdpmc((1<<30)+1) - core_cycles; + dtlb_misses = rdpmc(0) - dtlb_misses; + branch_mispredicts = rdpmc(1) - branch_mispredicts; + + printc("HW Instructions: %lu\n", hw_instructions); + printc("Core Cycles: %lu\n", core_cycles); + printc("DTLB Misses: %lu\n", dtlb_misses); + printc("Branch Mispredicts: %lu\n", branch_mispredicts); + +} diff --git a/src/components/interface/pongshmem/pongshmem.h b/src/components/interface/pongshmem/pongshmem.h index 35e2f2d6d..90c1b90b1 100644 --- a/src/components/interface/pongshmem/pongshmem.h +++ b/src/components/interface/pongshmem/pongshmem.h @@ -4,7 +4,7 @@ #include #include -#define BENCH_ITER 2048 +#define BENCH_ITER 1024 struct obj_test { int id; diff --git a/src/components/lib/kernel/cos_kernel_api.c b/src/components/lib/kernel/cos_kernel_api.c index 91d91421e..a6be830d4 100644 --- a/src/components/lib/kernel/cos_kernel_api.c +++ b/src/components/lib/kernel/cos_kernel_api.c @@ -1310,3 +1310,15 @@ cos_hw_map(struct cos_compinfo *ci, hwcap_t hwc, paddr_t pa, unsigned int len) return (void *)va; } + +int +cos_pmu_program_event_counter(hwcap_t hwc, u8_t cntr, u8_t evnt, u8_t umask) +{ + return call_cap_op(hwc, CAPTBL_OP_HW_PMU_PROG_EVT_CNTR, cntr, evnt, umask, 0); +} + +int +cos_pmu_enable_fixed_counter(hwcap_t hwc, u8_t cntr) +{ + return call_cap_op(hwc, CAPTBL_OP_HW_PMU_EN_FIXED_CNTR, cntr, 0, 0, 0); +} diff --git a/src/components/lib/kernel/cos_kernel_api.h b/src/components/lib/kernel/cos_kernel_api.h index 6f9260379..f62f8c9c7 100644 --- a/src/components/lib/kernel/cos_kernel_api.h +++ b/src/components/lib/kernel/cos_kernel_api.h @@ -201,6 +201,9 @@ int cos_hw_tlbstall(hwcap_t hwc); int cos_hw_tlbstall_recount(hwcap_t hwc); void cos_hw_shutdown(hwcap_t hwc); +int cos_pmu_enable_fixed_counter(hwcap_t hwc, u8_t cntr); +int cos_pmu_program_event_counter(hwcap_t hwc, u8_t cntr, u8_t evnt, u8_t umask); + capid_t cos_capid_bump_alloc(struct cos_compinfo *ci, cap_t cap); diff --git a/src/kernel/capinv.c b/src/kernel/capinv.c index ec51bcf1a..50a1806c9 100644 --- a/src/kernel/capinv.c +++ b/src/kernel/capinv.c @@ -15,6 +15,7 @@ #include "include/tcap.h" #include "include/chal/defs.h" #include "include/hw.h" +#include "include/pmu.h" #include "include/chal/chal_proto.h" #define COS_DEFAULT_RET_CAP 0 @@ -1652,6 +1653,22 @@ static int __attribute__((noinline)) composite_syscall_slowpath(struct pt_regs * ret = chal_tlbstall_recount(0); break; } + case CAPTBL_OP_HW_PMU_PROG_EVT_CNTR: { + u8_t cntr = __userregs_get1(regs); + u8_t evt = __userregs_get2(regs); + u8_t umask = __userregs_get2(regs); + + if ((ret = pmu_event_cntr_enable(cntr))) { + goto err; + } + ret = pmu_event_cntr_program(cntr, evt, umask); + break; + } + case CAPTBL_OP_HW_PMU_EN_FIXED_CNTR: { + u8_t cntr = __userregs_get1(regs); + ret = pmu_fixed_cntr_enable(cntr); + break; + } default: goto err; } diff --git a/src/kernel/include/pmu.h b/src/kernel/include/pmu.h new file mode 100644 index 000000000..f540527ba --- /dev/null +++ b/src/kernel/include/pmu.h @@ -0,0 +1,47 @@ +#ifndef PMU_H +#define PMU_H + +#include "chal_config.h" + +/* + * TODO + */ + +int chal_pmu_fixed_cntr_enable(u8_t cntr); +int chal_pmu_fixed_cntr_disable(u8_t cntr); +int chal_pmu_event_cntr_enable(u8_t cntr); + +int chal_pmu_event_cntr_disable(u8_t cntr); +int chal_pmu_event_cntr_program(u8_t cntr, u8_t evt, u8_t umask); + +static int +pmu_fixed_cntr_enable(u8_t cntr) +{ + return chal_pmu_fixed_cntr_enable(cntr); +} + +static int +pmu_fixed_cntr_disable(u8_t cntr) +{ + return chal_pmu_fixed_cntr_disable(cntr); +} + +static int +pmu_event_cntr_enable(u8_t cntr) +{ + return chal_pmu_event_cntr_enable(cntr); +} + +static int +pmu_event_cntr_disable(u8_t cntr) +{ + return chal_pmu_event_cntr_disable(cntr); +} + +static int +pmu_event_cntr_program(u8_t cntr, u8_t evnt, u8_t umask) +{ + return chal_pmu_event_cntr_program(cntr, evnt, umask); +} + +#endif \ No newline at end of file diff --git a/src/kernel/include/shared/cos_types.h b/src/kernel/include/shared/cos_types.h index 215f0fa38..8dce8f9f2 100644 --- a/src/kernel/include/shared/cos_types.h +++ b/src/kernel/include/shared/cos_types.h @@ -142,7 +142,9 @@ typedef enum { CAPTBL_OP_HW_L1FLUSH, CAPTBL_OP_HW_TLBFLUSH, CAPTBL_OP_HW_TLBSTALL, - CAPTBL_OP_HW_TLBSTALL_RECOUNT + CAPTBL_OP_HW_TLBSTALL_RECOUNT, + CAPTBL_OP_HW_PMU_PROG_EVT_CNTR, + CAPTBL_OP_HW_PMU_EN_FIXED_CNTR } syscall_op_t; typedef enum { diff --git a/src/platform/armv7a/chal/chal_plat.h b/src/platform/armv7a/chal/chal_plat.h index 28b5f7ce8..ea8e8ebb4 100644 --- a/src/platform/armv7a/chal/chal_plat.h +++ b/src/platform/armv7a/chal/chal_plat.h @@ -22,6 +22,11 @@ chal_flush_tlb(void) { } +static inline void +chal_flush_tlb_asid(asid_t asid) +{ +} + static inline void * chal_pa2va(paddr_t address) { diff --git a/src/platform/i386/chal.c b/src/platform/i386/chal.c index fe70c84bb..f3f49a733 100644 --- a/src/platform/i386/chal.c +++ b/src/platform/i386/chal.c @@ -10,6 +10,8 @@ char timer_detector[PAGE_SIZE] PAGE_ALIGNED; extern void *cos_kmem, *cos_kmem_base; u32_t chal_msr_mhz = 0; +u8_t processor_num_pmc; + paddr_t chal_kernel_mem_pa; void * @@ -132,6 +134,11 @@ chal_init(void) printk("\tCPUID max frequency: %d (* 1Mhz)\n", (b << 16) >> 16); } + /* Get the number of PMCs available */ + chal_cpuid(0x0a, &a, &b, &c, &d); + processor_num_pmc = (a >> 8) & 0xFF; + + /* FIXME: on x86_64, cannot get platform info on qemu */ readmsr(MSR_PLATFORM_INFO, &a, &b); a = (a >> 8) & ((1<<7)-1); diff --git a/src/platform/i386/chal/chal_plat.h b/src/platform/i386/chal/chal_plat.h index 68f2a4152..6286bfdd0 100644 --- a/src/platform/i386/chal/chal_plat.h +++ b/src/platform/i386/chal/chal_plat.h @@ -7,6 +7,46 @@ int chal_tlbstall(void); int chal_tlbstall_recount(int a); int chal_tlbflush(int a); + +#if defined(__x86_64__) +#define CR3_NO_FLUSH (1ul << 63) +#endif + +/* Reference Intel 64 and IA-32 Architecture Software Developer's Manual, Volume 2 */ +#define INVPCID_TYPE_INDIVIDUAL_ADDR 0 /* invalidate all tlb entries for a PCID used to map a vaddr */ +#define INVPCID_TYPE_SINGLE_CONTEXT 1 /* invalidate all tlb entries (not global) for a PCID */ +#define INVPCID_TYPE_ALL_CONTEXT_GLOBAL 2 /* invalidate all tlb entries, including global */ +#define INVPCID_TYPE_ALL_CONTEXT 3 /* invalidate all tlb entries, not including global */ + +static inline void +__invpcid(u64_t pcid, u64_t addr, unsigned long type) +{ + /* + * invpcid takes a 128 bit value from memory as: | address | 000...0 | pcid | + * 127 63 11 0 + * This is the case in both 32 and 64 bit execution modes + */ + struct { u64_t pcid; u64_t addr; } desc = { .pcid = pcid, .addr = addr }; + + asm volatile("invpcid %0, %1" : : "m"(desc), "r"(type) : "memory"); +} + +#endif + +static inline unsigned long +__readcr3(void) +{ + unsigned long val; + asm volatile("mov %%cr3, %0" : "=r"(val)); + return val; +} + +static inline void +__writecr3(unsigned long val) +{ + asm volatile("mov %0, %%cr3" : : "r"(val)); +} + /* This flushes all levels of cache of the current logical CPU. */ static inline void chal_flush_cache(void) @@ -14,17 +54,31 @@ chal_flush_cache(void) asm volatile("wbinvd" : : : "memory"); } +/* This won't flush global TLB (pinned with PGE) entries. */ +static inline void +chal_flush_tlb(void) +{ + /* FIXME: what if no invpcid (pre Intel Haswell and AMD Zen 3) */ + /* faster than cr3 r+w */ + __invpcid(0, 0, INVPCID_TYPE_ALL_CONTEXT); +} + static inline void chal_flush_tlb_global(void) { + /* FIXME: what if no invpcid (pre Intel Haswell and AMD Zen 3) */ + /* faster than cr4 r+w */ + __invpcid(0, 0, INVPCID_TYPE_ALL_CONTEXT_GLOBAL); } + static inline void -chal_remote_tlb_flush(int target_cpu) +chal_flush_tlb_asid(asid_t asid) { + __invpcid((u64_t)asid, 0, INVPCID_TYPE_SINGLE_CONTEXT); } -/* This won't flush global TLB (pinned with PGE) entries. */ + static inline void -chal_flush_tlb(void) +chal_remote_tlb_flush(int target_cpu) { } diff --git a/src/platform/i386/chal/chal_proto.h b/src/platform/i386/chal/chal_proto.h index 20dd9dda1..49fd4295b 100644 --- a/src/platform/i386/chal/chal_proto.h +++ b/src/platform/i386/chal/chal_proto.h @@ -16,7 +16,6 @@ #define MAX_ASID_BITS 12 #define MAX_NUM_ASID (1<pgtbl | pt->asid; - asm volatile("mov %0, %%cr3" : : "r"(cr3)); + __writecr3((unsigned long)pt->pgtbl | pt->asid | CR3_NO_FLUSH); } extern asid_t free_asid; static inline asid_t chal_asid_alloc(void) { +#if defined(__x86_64__) + if (unlikely(free_asid >= MAX_NUM_ASID)) assert(0); + return cos_faa((int *)&free_asid, 1); +#elif defined(__i386__) return 0; - // if (unlikely(free_asid >= MAX_NUM_ASID)) assert(0); - // return cos_faa((int *)&free_asid, 1); +#endif } #endif /* CHAL_PROTO_H */ diff --git a/src/platform/i386/chal_cpu.h b/src/platform/i386/chal_cpu.h index c80f61431..7fcc471e8 100644 --- a/src/platform/i386/chal_cpu.h +++ b/src/platform/i386/chal_cpu.h @@ -121,7 +121,7 @@ chal_cpu_init(void) #if defined(__x86_64__) u32_t low = 0, high = 0; - chal_cpu_cr4_set(cr4 | CR4_PSE | CR4_PGE | CR4_PCE); + chal_cpu_cr4_set(cr4 | CR4_PSE | CR4_PGE | CR4_PCE | CR4_PCIDE); readmsr(MSR_IA32_EFER, &low, &high); writemsr(MSR_IA32_EFER,low | 0x1, high); diff --git a/src/platform/i386/kernel.c b/src/platform/i386/kernel.c index 2b73a17ec..7a286de74 100644 --- a/src/platform/i386/kernel.c +++ b/src/platform/i386/kernel.c @@ -5,7 +5,6 @@ #include "boot_comp.h" #include "mem_layout.h" #include "chal_cpu.h" -#include "chal_pmu.h" #include #include @@ -186,7 +185,6 @@ kmain(unsigned long mboot_addr, unsigned long mboot_magic) smp_init(cores_ready); cores_ready[INIT_CORE] = 1; - chal_pmu_init(); kern_boot_upcall(); diff --git a/src/platform/x86_64/Makefile b/src/platform/x86_64/Makefile index 7c9130fff..ca31d4e15 100644 --- a/src/platform/x86_64/Makefile +++ b/src/platform/x86_64/Makefile @@ -51,6 +51,7 @@ OBJS += vga.o OBJS += exception.o OBJS += lapic.o OBJS += chal_pgtbl.o +OBJS += chal_pmu.o COS_OBJ += pgtbl.o COS_OBJ += retype_tbl.o diff --git a/src/platform/x86_64/chal_pmu.c b/src/platform/x86_64/chal_pmu.c new file mode 100644 index 000000000..4752ac64c --- /dev/null +++ b/src/platform/x86_64/chal_pmu.c @@ -0,0 +1,149 @@ +#ifndef CHAL_PMU +#define CHAL_PMU + +#include "shared/cos_types.h" +#include "pmu.h" + +/* + * The code below is for the intel x86 pmu specifically; + * Documenntation for programming with this feature is + * found primarily in the Intel 64 and I-32 Architectures + * Software Developer's Manual, Volume 4. + */ + +/* MSR addresses */ +#define MSR_PMC0 193 +#define MSR_PMC1 194 +#define MSR_PMC2 195 +#define MSR_PMC3 196 +#define MSR_PERFEVTSELX_BASE 390 +#define MSR_PERFEVTSEL0 390 +#define MSR_PERFEVTSEL1 391 +#define MSR_PERFEVTSEL2 392 +#define MSR_PERFEVTSEL3 393 +#define MSR_FIXED_CTR_CTRL 909 +#define MSR_PERF_GLOBAL_CTRL 911 + +/* x86 MSR IA32_PERFEVTSELX Programming Bits */ +#define IA32_PERFEVTSELX_UMASK_SHFT 8 +#define IA32_PERFEVTSELX_CMASK_SHFT 24 +#define IA32_PERFEVTSELX_USR (1 << 16) +#define IA32_PERFEVTSELX_OS (1 << 17) +#define IA32_PERFEVTSELX_E (1 << 18) +#define IA32_PERFEVTSELX_INT (1 << 20) +#define IA32_PERFEVTSELX_ANYTHD (1 << 21) /* deprecated on pmu version 4 */ +#define IA32_PERFEVTSELX_EN (1 << 22) +#define IA32_PERFEVTSELX_INV (1 << 23) + +/* IA32_FIXED_CTR_CTRL bits */ +#define IA32_FIXED_CTR_CTRL_USR_EN (1 << 0) +#define IA32_FIXED_CTR_CTRL_OS_EN (1 << 1) + +#define PMU_NUM_FIXED_PMC 3 + +/* set during chal initialization */ +extern u8_t processor_num_pmc; + +static inline void +wrmsr(u32_t msr_addr, u32_t in_lo, u32_t in_hi) +{ + asm volatile ("wrmsr" : : "a"(in_lo), "d"(in_hi), "c"(msr_addr)); +} + +static inline void +rdmsr(u32_t msr_addr, u32_t *out_lo, u32_t *out_hi) +{ + asm volatile ("rdmsr" : "=a"(*out_lo), "=d"(*out_hi) : "c"(msr_addr)); +} + +int +chal_pmu_fixed_cntr_enable(u8_t cntr) +{ + u32_t perf_global_ctrl_lo, perf_global_ctrl_hi; + u32_t perf_fixed_ctrl_lo, perf_fixed_ctrl_hi; + + if (cntr >= PMU_NUM_FIXED_PMC) return -EINVAL; + + rdmsr(MSR_PERF_GLOBAL_CTRL, &perf_global_ctrl_lo, &perf_global_ctrl_hi); + rdmsr(MSR_FIXED_CTR_CTRL, &perf_fixed_ctrl_lo, &perf_fixed_ctrl_hi); + + /* fixed counter enable is in the high dword */ + perf_global_ctrl_hi |= 1 << cntr; + /* enable OS and USR mode counting of event, maybe this should be made optional? */ + perf_fixed_ctrl_lo |= (IA32_FIXED_CTR_CTRL_USR_EN | IA32_FIXED_CTR_CTRL_OS_EN) << (cntr * 4); + + wrmsr(MSR_PERF_GLOBAL_CTRL, perf_global_ctrl_lo, perf_global_ctrl_hi); + wrmsr(MSR_FIXED_CTR_CTRL, perf_fixed_ctrl_lo, perf_fixed_ctrl_hi); + + return 0; +} + +int +chal_pmu_fixed_cntr_disable(u8_t cntr) +{ + u32_t perf_global_ctrl_lo, perf_global_ctrl_hi; + u32_t perf_fixed_ctrl_lo, perf_fixed_ctrl_hi; + + if (cntr >= PMU_NUM_FIXED_PMC) return -EINVAL; + + rdmsr(MSR_PERF_GLOBAL_CTRL, &perf_global_ctrl_lo, &perf_global_ctrl_hi); + rdmsr(MSR_FIXED_CTR_CTRL, &perf_fixed_ctrl_lo, &perf_fixed_ctrl_hi); + + /* fixed counter enable is in the high dword */ + perf_global_ctrl_hi &= ~(1 << cntr); + /* enable OS and USR mode counting of event, maybe this should be made optional? */ + perf_fixed_ctrl_lo &= ~((IA32_FIXED_CTR_CTRL_USR_EN | IA32_FIXED_CTR_CTRL_OS_EN) << (cntr * 4)); + + wrmsr(MSR_PERF_GLOBAL_CTRL, perf_global_ctrl_lo, perf_global_ctrl_hi); + wrmsr(MSR_FIXED_CTR_CTRL, perf_fixed_ctrl_lo, perf_fixed_ctrl_hi); + + return 0; +} + +int +chal_pmu_event_cntr_enable(u8_t cntr) +{ + u32_t perf_global_ctrl_lo, perf_global_ctrl_hi; + + if (cntr >= processor_num_pmc) return -EINVAL; + + rdmsr(MSR_PERF_GLOBAL_CTRL, &perf_global_ctrl_lo, &perf_global_ctrl_hi); + perf_global_ctrl_lo |= (1ul << cntr); + wrmsr(MSR_PERF_GLOBAL_CTRL, perf_global_ctrl_lo, perf_global_ctrl_hi); + + return 0; +} + +int +chal_pmu_event_cntr_disable(u8_t cntr) +{ + u32_t perf_global_ctrl_lo, perf_global_ctrl_hi; + + if (cntr >= processor_num_pmc) return -EINVAL; + + rdmsr(MSR_PERF_GLOBAL_CTRL, &perf_global_ctrl_lo, &perf_global_ctrl_hi); + perf_global_ctrl_lo &= ~(1ul << cntr); + wrmsr(MSR_PERF_GLOBAL_CTRL, perf_global_ctrl_lo, perf_global_ctrl_hi); + + return 0; +} + +int +chal_pmu_event_cntr_program(u8_t cntr, u8_t evt, u8_t umask) +{ + u32_t perf_evt_sel; + + if (cntr >= processor_num_pmc) return -EINVAL; + + /* enable counter; count USR and OS mode events; again, maybe optional? */ + perf_evt_sel = IA32_PERFEVTSELX_USR | IA32_PERFEVTSELX_OS | IA32_PERFEVTSELX_EN; + + /* set event */ + perf_evt_sel |= evt | (umask << IA32_PERFEVTSELX_UMASK_SHFT); + wrmsr(MSR_PERFEVTSELX_BASE + cntr, perf_evt_sel, 0); + + return 0; +} + + +#endif \ No newline at end of file diff --git a/src/platform/x86_64/chal_pmu.h b/src/platform/x86_64/chal_pmu.h deleted file mode 100644 index 422dc4007..000000000 --- a/src/platform/x86_64/chal_pmu.h +++ /dev/null @@ -1,58 +0,0 @@ -#ifndef CHAL_PMU -#define CHAL_PMU - -#include "chal_config.h" - - -/* x86 MSR IA32_PERF_GLOBAL_CTRL Programming Bits */ -#define x86_MSR_GLBL_CTRL_EN_PC0 (1 << 0) -#define x86_MSR_GLBL_CTRL_EN_PC1 (1 << 1) -#define x86_MSR_GLBL_CTRL_EN_PC2 (1 << 2) -#define x86_MSR_GLBL_CTRL_EN_PC3 (1 << 3) -#define x86_MSR_GLBL_CTRL_EN_FC0 (1ul << 32) -#define x86_MSR_GLBL_CTRL_EN_FC1 (1ul << 33) -#define x86_MSR_GLBL_CTRL_EN_FC2 (1ul << 34) - - -/* x86 MSR IA32_PERFEVTSELX Programming Bits */ -#define X86_MSR_EVTSEL_EVTMSK_SFT 8 -#define X86_MSR_EVTSEL_CMASK_SFT 24 -#define X86_MSR_EVTSEL_USR (1 << 16) -#define X86_MSR_EVTSEL_OS (1 << 17) -#define X86_MSR_EVTSEL_E (1 << 18) -#define X86_MSR_EVTSEL_INT (1 << 20) -#define X86_MSR_EVTSEL_ANYTHD (1 << 21) -#define X86_MSR_EVTSEL_EN (1 << 22) -#define X86_MSR_EVTSEL_INV (1 << 23) - -/* MSR addresses */ -#define MSR_PERFEVTSEL0 390 -#define MSR_PERFEVTSEL1 391 -#define MSR_PERFEVTSEL2 392 -#define MSR_PERFEVTSEL3 393 -#define MSR_FIXED_CTR_CTRL 909 -#define MSR_PERF_GLOBAL_CTRL 911 -#define MSR_PMC1 193 -#define MSR_PMC2 194 -#define MSR_PMC3 195 -#define MSR_PMC4 196 - -static inline void -chal_pmu_init(void) -{ - unsigned long perf_global_ctrl = x86_MSR_GLBL_CTRL_EN_PC0; - unsigned long perf_fixed_ctr = 1 || (1 << 1); - u32_t perf_evt_sel = X86_MSR_EVTSEL_USR | X86_MSR_EVTSEL_OS | X86_MSR_EVTSEL_EN | X86_MSR_EVTSEL_ANYTHD | 0x0208; - - asm volatile ("wrmsr" : : "a"((u32_t)perf_global_ctrl), "d"((u32_t)(perf_global_ctrl >> 32)), "c"(MSR_PERF_GLOBAL_CTRL)); - asm volatile ("wrmsr" : : "a"((u32_t)perf_fixed_ctr), "d"((u32_t)(perf_fixed_ctr >> 32)), "c"(MSR_FIXED_CTR_CTRL)); - asm volatile ("wrmsr" : : "a"(perf_evt_sel), "d"(0), "c"(MSR_PERFEVTSEL0)); -} - -static inline void -chal_pmu_evtset(u8_t evt) -{ - -} - -#endif \ No newline at end of file From dbdeeb283e9c70a937542fc95278a42c6af02181 Mon Sep 17 00:00:00 2001 From: evanstella Date: Mon, 7 Mar 2022 11:48:18 -0500 Subject: [PATCH 3/7] asid pgtbl switch implementation --- .../tests/unit_defcompinfo/unit_defcompinfo.c | 2 +- .../tests/unit_pingshmem/ping.c | 21 ++++------ .../implementation/tests/unit_pmu/pmu_test.c | 4 +- src/components/lib/crt/crt.c | 8 ++-- src/components/lib/kernel/cos_kernel_api.c | 8 ++-- src/components/lib/kernel/cos_kernel_api.h | 4 +- src/kernel/capinv.c | 8 ++-- src/kernel/include/component.h | 2 +- src/kernel/include/pgtbl.h | 6 +-- src/kernel/pgtbl.c | 4 +- src/platform/i386/boot_comp.c | 6 +-- src/platform/i386/chal.c | 4 +- src/platform/i386/chal/chal_plat.h | 2 - src/platform/i386/chal/chal_proto.h | 42 ++++++++++++------- src/platform/i386/chal_pgtbl.c | 15 ++++--- 15 files changed, 79 insertions(+), 57 deletions(-) diff --git a/src/components/implementation/tests/unit_defcompinfo/unit_defcompinfo.c b/src/components/implementation/tests/unit_defcompinfo/unit_defcompinfo.c index e9b14c89e..58216b28a 100644 --- a/src/components/implementation/tests/unit_defcompinfo/unit_defcompinfo.c +++ b/src/components/implementation/tests/unit_defcompinfo/unit_defcompinfo.c @@ -131,7 +131,7 @@ cos_init(void) struct cos_compinfo *child_ci = cos_compinfo_get(&child_defci[id]); printc("\tCreating new %s component [%d]\n", is_sched ? "scheduler" : "simple", id); - child_utpt = cos_pgtbl_alloc(ci); + child_utpt = cos_pgtbl_alloc(ci, (asid_t)0); assert(child_utpt); cos_meminfo_init(&(child_ci->mi), BOOT_MEM_KM_BASE, CHILD_UNTYPED_SIZE, child_utpt); diff --git a/src/components/implementation/tests/unit_pingshmem/ping.c b/src/components/implementation/tests/unit_pingshmem/ping.c index 34c94d4ea..301674daa 100644 --- a/src/components/implementation/tests/unit_pingshmem/ping.c +++ b/src/components/implementation/tests/unit_pingshmem/ping.c @@ -276,6 +276,14 @@ ping_bench_msgpassing(void) /* reset memory for test */ shm_bm_init_testobj(shm); + /* + * Counting seems to slowdown execution by a not-significant amount of cycles. + * Not sure if this is a hardware thing of has to do with the virtualization of + * the PMU. + * Comment out this line for a more consistant tsc read. + */ + cos_pmu_program_event_counter(BOOT_CAPTBL_SELF_INITHW_BASE, 0, 0x49, 0x0E); + begin = ps_tsc(); for (i = 0; i < BENCH_ITER; i++) { /* allocate an obj from shared mem */ @@ -286,7 +294,7 @@ ping_bench_msgpassing(void) } end = ps_tsc(); bench = (end - begin) / BENCH_ITER; - printc("BENCHMARK Message passing: %llu cycles\n", bench); + printc("BENCHMARK Message passing: %llu cycles, DTLB misses: %lu\n", bench, rdpmc(0)); } int @@ -306,22 +314,11 @@ main(void) shm_bm_init_testobj(shm); pongshmem_test_map(id); - printc("Counter: %lu\n", rdpmc(0)); - ping_test_objread(); - printc("Counter: %lu\n", rdpmc(0)); - ping_test_bigalloc(); - printc("Counter: %lu\n", rdpmc(0)); - ping_test_objfree(); - printc("Counter: %lu\n", rdpmc(0)); - ping_test_bigfree(); - printc("Counter: %lu\n", rdpmc(0)); - ping_test_refcnt(); - printc("Counter: %lu\n", rdpmc(0)); ping_bench_syncinv(); diff --git a/src/components/implementation/tests/unit_pmu/pmu_test.c b/src/components/implementation/tests/unit_pmu/pmu_test.c index 5c33534ff..dfe1343bf 100644 --- a/src/components/implementation/tests/unit_pmu/pmu_test.c +++ b/src/components/implementation/tests/unit_pmu/pmu_test.c @@ -3,7 +3,7 @@ #include #include -#define NUM_PAGES 100 +#define NUM_PAGES 1000 static unsigned long rdpmc (unsigned long cntr) @@ -18,8 +18,10 @@ rdpmc (unsigned long cntr) int main(void) { + /* cheaty way to test PMU counters; should figure out a better API */ cos_pmu_enable_fixed_counter(BOOT_CAPTBL_SELF_INITHW_BASE, 0); cos_pmu_enable_fixed_counter(BOOT_CAPTBL_SELF_INITHW_BASE, 1); + /* enable architecture specific counter events (reference https://perfmon-events.intel.com/) */ cos_pmu_program_event_counter(BOOT_CAPTBL_SELF_INITHW_BASE, 0, 0x49, 0x0E); cos_pmu_program_event_counter(BOOT_CAPTBL_SELF_INITHW_BASE, 1, 0xC5, 0x11); diff --git a/src/components/lib/crt/crt.c b/src/components/lib/crt/crt.c index 28fdc75fd..df556e8dc 100644 --- a/src/components/lib/crt/crt.c +++ b/src/components/lib/crt/crt.c @@ -266,7 +266,7 @@ crt_comp_create_from(struct crt_comp *c, char *name, compid_t id, struct crt_chk assert(inv.server->id != chkpt->c->id); } - ret = cos_compinfo_alloc(ci, c->ro_addr, BOOT_CAPTBL_FREE, c->entry_addr, root_ci); + ret = cos_compinfo_alloc(ci, c->ro_addr, BOOT_CAPTBL_FREE, c->entry_addr, (asid_t)0, root_ci); assert(!ret); mem = cos_page_bump_allocn(root_ci, chkpt->tot_sz_mem); @@ -312,6 +312,7 @@ crt_comp_create_from(struct crt_comp *c, char *name, compid_t id, struct crt_chk * * @return: 0 on success, != 0 on error. */ +int next_asid = 1; /* FIXME: This is to test ASID Effectivness. Replace with namespace implementation */ int crt_comp_create(struct crt_comp *c, char *name, compid_t id, void *elf_hdr, vaddr_t info) { @@ -333,7 +334,8 @@ crt_comp_create(struct crt_comp *c, char *name, compid_t id, void *elf_hdr, vadd printc("\t\t elf obj: ro [0x%lx, 0x%lx), data [0x%lx, 0x%lx), bss [0x%lx, 0x%lx).\n", c->ro_addr, c->ro_addr + ro_sz, c->rw_addr, c->rw_addr + data_sz, c->rw_addr + data_sz, c->rw_addr + data_sz + bss_sz); - ret = cos_compinfo_alloc(ci, c->ro_addr, BOOT_CAPTBL_FREE, c->entry_addr, root_ci); + /* FIXME: Replace next_asid with namespace implementation */ + ret = cos_compinfo_alloc(ci, c->ro_addr, BOOT_CAPTBL_FREE, c->entry_addr, next_asid++, root_ci); assert(!ret); tot_sz = round_up_to_page(round_up_to_page(ro_sz) + data_sz + bss_sz); @@ -1082,7 +1084,7 @@ crt_comp_exec(struct crt_comp *c, struct crt_comp_exec_context *ctxt) if (crt_comp_alias_in(c, c, &compres, CRT_COMP_ALIAS_PGTBL | CRT_COMP_ALIAS_COMP)) BUG(); /* Set up the untyped memory in the new component */ - utpt = cos_pgtbl_alloc(ci); + utpt = cos_pgtbl_alloc(ci, (asid_t)0); assert(utpt); cos_meminfo_init(&(target_ci->mi), BOOT_MEM_KM_BASE, ctxt->memsz, utpt); cos_meminfo_alloc(target_ci, BOOT_MEM_KM_BASE, ctxt->memsz); diff --git a/src/components/lib/kernel/cos_kernel_api.c b/src/components/lib/kernel/cos_kernel_api.c index a6be830d4..8c2982ae0 100644 --- a/src/components/lib/kernel/cos_kernel_api.c +++ b/src/components/lib/kernel/cos_kernel_api.c @@ -802,7 +802,7 @@ cos_captbl_alloc(struct cos_compinfo *ci) } pgtblcap_t -cos_pgtbl_alloc(struct cos_compinfo *ci) +cos_pgtbl_alloc(struct cos_compinfo *ci, asid_t asid) { vaddr_t kmem; capid_t cap; @@ -812,7 +812,7 @@ cos_pgtbl_alloc(struct cos_compinfo *ci) assert(ci); if (__alloc_mem_cap(ci, CAP_PGTBL, &kmem, &cap)) return 0; - if (call_cap_op(ci->captbl_cap, CAPTBL_OP_PGTBLACTIVATE, cap, __compinfo_metacap(ci)->mi.pgtbl_cap, kmem, 0)) + if (call_cap_op(ci->captbl_cap, CAPTBL_OP_PGTBLACTIVATE, cap, __compinfo_metacap(ci)->mi.pgtbl_cap, kmem, asid << 16)) BUG(); return cap; @@ -845,7 +845,7 @@ cos_comp_alloc(struct cos_compinfo *ci, captblcap_t ctc, pgtblcap_t ptc, vaddr_t int cos_compinfo_alloc(struct cos_compinfo *ci, vaddr_t heap_ptr, capid_t cap_frontier, vaddr_t entry, - struct cos_compinfo *ci_resources) + asid_t asid, struct cos_compinfo *ci_resources) { pgtblcap_t ptc; captblcap_t ctc; @@ -854,7 +854,7 @@ cos_compinfo_alloc(struct cos_compinfo *ci, vaddr_t heap_ptr, capid_t cap_fronti printd("cos_compinfo_alloc\n"); - ptc = cos_pgtbl_alloc(ci_resources); + ptc = cos_pgtbl_alloc(ci_resources, asid); assert(ptc); ctc = cos_captbl_alloc(ci_resources); assert(ctc); diff --git a/src/components/lib/kernel/cos_kernel_api.h b/src/components/lib/kernel/cos_kernel_api.h index f62f8c9c7..0702d966e 100644 --- a/src/components/lib/kernel/cos_kernel_api.h +++ b/src/components/lib/kernel/cos_kernel_api.h @@ -107,10 +107,10 @@ int cos_pgtbl_intern_expandwith(struct cos_compinfo *ci, pgtblcap_t intern, vadd * This uses the next three functions to allocate a new component and * correctly populate ci (allocating all resources from ci_resources). */ -int cos_compinfo_alloc(struct cos_compinfo *ci, vaddr_t heap_ptr, capid_t cap_frontier, vaddr_t entry, +int cos_compinfo_alloc(struct cos_compinfo *ci, vaddr_t heap_ptr, capid_t cap_frontier, vaddr_t entry, asid_t asid, struct cos_compinfo *ci_resources); captblcap_t cos_captbl_alloc(struct cos_compinfo *ci); -pgtblcap_t cos_pgtbl_alloc(struct cos_compinfo *ci); +pgtblcap_t cos_pgtbl_alloc(struct cos_compinfo *ci, asid_t asid); compcap_t cos_comp_alloc(struct cos_compinfo *ci, captblcap_t ctc, pgtblcap_t ptc, vaddr_t entry); void cos_comp_capfrontier_update(struct cos_compinfo *ci, capid_t cap_frontier); diff --git a/src/kernel/capinv.c b/src/kernel/capinv.c index 50a1806c9..1892f936d 100644 --- a/src/kernel/capinv.c +++ b/src/kernel/capinv.c @@ -1123,9 +1123,10 @@ static int __attribute__((noinline)) composite_syscall_slowpath(struct pt_regs * capid_t pt_entry = __userregs_get1(regs); capid_t pgtbl_cap = __userregs_get2(regs); vaddr_t kmem_cap = __userregs_get3(regs); - capid_t pgtbl_lvl = __userregs_get4(regs); - /* FIXME: change lvl to order */ - ret = chal_pgtbl_pgtblactivate(ct, cap, pt_entry, pgtbl_cap, kmem_cap, pgtbl_lvl); + capid_t pgtbl_lvl = __userregs_get4(regs) & 0xFFFF; + asid_t asid = __userregs_get4(regs) >> 16; + + ret = chal_pgtbl_pgtblactivate(ct, cap, pt_entry, pgtbl_cap, kmem_cap, pgtbl_lvl, asid); break; } @@ -1666,6 +1667,7 @@ static int __attribute__((noinline)) composite_syscall_slowpath(struct pt_regs * } case CAPTBL_OP_HW_PMU_EN_FIXED_CNTR: { u8_t cntr = __userregs_get1(regs); + ret = pmu_fixed_cntr_enable(cntr); break; } diff --git a/src/kernel/include/component.h b/src/kernel/include/component.h index ef335bc94..e04b31d1d 100644 --- a/src/kernel/include/component.h +++ b/src/kernel/include/component.h @@ -59,7 +59,7 @@ comp_activate(struct captbl *t, capid_t cap, capid_t capin, capid_t captbl_cap, compc->entry_addr = entry_addr; compc->info.pgtblinfo.pgtbl = ptc->pgtbl; - compc->info.pgtblinfo.asid = chal_asid_alloc(); + compc->info.pgtblinfo.asid = ptc->asid; compc->info.captbl = ctc->captbl; compc->pgd = ptc; compc->ct_top = ctc; diff --git a/src/kernel/include/pgtbl.h b/src/kernel/include/pgtbl.h index 7709ba029..9520a4590 100644 --- a/src/kernel/include/pgtbl.h +++ b/src/kernel/include/pgtbl.h @@ -66,7 +66,7 @@ unsigned long *pgtbl_lkup_pgd(pgtbl_t pt, vaddr_t addr, word_t *flags); int pgtbl_get_cosframe(pgtbl_t pt, vaddr_t frame_addr, paddr_t *cosframe, vaddr_t *order); vaddr_t pgtbl_translate(pgtbl_t pt, vaddr_t addr, word_t *flags); pgtbl_t pgtbl_create(void *page, void *curr_pgtbl); -int pgtbl_activate(struct captbl *t, unsigned long cap, unsigned long capin, pgtbl_t pgtbl, u32_t lvl); +int pgtbl_activate(struct captbl *t, unsigned long cap, unsigned long capin, pgtbl_t pgtbl, u32_t lvl, asid_t asid); int pgtbl_deactivate(struct captbl *t, struct cap_captbl *dest_ct_cap, unsigned long capin, livenessid_t lid, capid_t pgtbl_cap, capid_t cosframe_addr, const int root); int pgtbl_mapping_scan(struct cap_pgtbl *pt); @@ -105,7 +105,7 @@ unsigned long chal_pgtbl_flag(unsigned long input); int chal_pgtbl_kmem_act(pgtbl_t pt, vaddr_t addr, unsigned long *kern_addr, unsigned long **pte_ret); int chal_tlb_quiescence_check(u64_t timestamp); int chal_cap_memactivate(struct captbl *ct, struct cap_pgtbl *pt, capid_t frame_cap, capid_t dest_pt, vaddr_t vaddr, vaddr_t order); -int chal_pgtbl_activate(struct captbl *t, unsigned long cap, unsigned long capin, pgtbl_t pgtbl, u32_t lvl); +int chal_pgtbl_activate(struct captbl *t, unsigned long cap, unsigned long capin, pgtbl_t pgtbl, u32_t lvl, asid_t asid); int chal_pgtbl_deactivate(struct captbl *t, struct cap_captbl *dest_ct_cap, unsigned long capin, livenessid_t lid, capid_t pgtbl_cap, capid_t cosframe_addr, const int root); @@ -127,7 +127,7 @@ int chal_pgtbl_quie_check(u32_t orig_v); void chal_pgtbl_init_pte(void *pte); /* Creation of the table object - not to be confused with activation of cap */ -int chal_pgtbl_pgtblactivate(struct captbl *ct, capid_t cap, capid_t pt_entry, capid_t pgtbl_cap, vaddr_t kmem_cap, capid_t pgtbl_lvl); +int chal_pgtbl_pgtblactivate(struct captbl *ct, capid_t cap, capid_t pt_entry, capid_t pgtbl_cap, vaddr_t kmem_cap, capid_t pgtbl_lvl, asid_t asid); /* Deactivate */ int chal_pgtbl_deact_pre(struct cap_header *ch, u32_t pa); /* Page mapping */ diff --git a/src/kernel/pgtbl.c b/src/kernel/pgtbl.c index 3849da5fe..143342171 100644 --- a/src/kernel/pgtbl.c +++ b/src/kernel/pgtbl.c @@ -25,9 +25,9 @@ cap_memactivate(struct captbl *ct, struct cap_pgtbl *pt, capid_t frame_cap, capi } int -pgtbl_activate(struct captbl *t, unsigned long cap, unsigned long capin, pgtbl_t pgtbl, u32_t lvl) +pgtbl_activate(struct captbl *t, unsigned long cap, unsigned long capin, pgtbl_t pgtbl, u32_t lvl, asid_t asid) { - return chal_pgtbl_activate(t, cap, capin, pgtbl, lvl); + return chal_pgtbl_activate(t, cap, capin, pgtbl, lvl, asid); } int diff --git a/src/platform/i386/boot_comp.c b/src/platform/i386/boot_comp.c index 7ce078de7..2bd0493f0 100644 --- a/src/platform/i386/boot_comp.c +++ b/src/platform/i386/boot_comp.c @@ -110,7 +110,7 @@ boot_pgtbl_expand(struct captbl *ct, capid_t pgdcap, capid_t ptecap, const char unsigned int nptes = 0, lvl, i; struct cap_pgtbl *pte_cap, *pgd_cap; - if (pgtbl_activate(ct, BOOT_CAPTBL_SELF_CT, ptecap, NULL, 1)) assert(0); + if (pgtbl_activate(ct, BOOT_CAPTBL_SELF_CT, ptecap, NULL, 1, (asid_t)0)) assert(0); for (lvl = 1; lvl < PGTBL_DEPTH; lvl++) { nptes = boot_nptes(user_vaddr, range, lvl); ptes = mem_boot_alloc(nptes); @@ -292,7 +292,7 @@ kern_boot_comp(const cpuid_t cpu_id) assert(boot_vm_pgd); memcpy((void *)boot_vm_pgd + KERNEL_PGD_REGION_OFFSET, (void *)(&boot_comp_pgd) + KERNEL_PGD_REGION_OFFSET, KERNEL_PGD_REGION_SIZE); - if (pgtbl_activate(glb_boot_ct, BOOT_CAPTBL_SELF_CT, BOOT_CAPTBL_SELF_PT, (pgtbl_t)chal_va2pa(boot_vm_pgd), 0)) assert(0); + if (pgtbl_activate(glb_boot_ct, BOOT_CAPTBL_SELF_CT, BOOT_CAPTBL_SELF_PT, (pgtbl_t)chal_va2pa(boot_vm_pgd), 0, (asid_t)0)) assert(0); /* Map in the virtual memory */ ret = boot_elf_process(glb_boot_ct, BOOT_CAPTBL_SELF_PT, BOOT_CAPTBL_BOOTVM_PTE, "booter VM", @@ -312,7 +312,7 @@ kern_boot_comp(const cpuid_t cpu_id) * Need to account for the pages that will be allocated as * PTEs */ - if (pgtbl_activate(glb_boot_ct, BOOT_CAPTBL_SELF_CT, BOOT_CAPTBL_SELF_UNTYPED_PT, pgtbl, 0)) assert(0); + if (pgtbl_activate(glb_boot_ct, BOOT_CAPTBL_SELF_CT, BOOT_CAPTBL_SELF_UNTYPED_PT, pgtbl, 0, (asid_t)0)) assert(0); boot_pgtbl_expand(glb_boot_ct, BOOT_CAPTBL_SELF_UNTYPED_PT, BOOT_CAPTBL_KM_PTE, "untyped memory", BOOT_MEM_KM_BASE, mem_utmem_end() - mem_boot_end()); diff --git a/src/platform/i386/chal.c b/src/platform/i386/chal.c index f3f49a733..f986710a9 100644 --- a/src/platform/i386/chal.c +++ b/src/platform/i386/chal.c @@ -9,11 +9,13 @@ asid_t free_asid = 1; char timer_detector[PAGE_SIZE] PAGE_ALIGNED; extern void *cos_kmem, *cos_kmem_base; u32_t chal_msr_mhz = 0; - u8_t processor_num_pmc; paddr_t chal_kernel_mem_pa; +/* maps asids with the pgtbl they are identifying in the tlb */ +pgtbl_t tlb_asid_active[NUM_CPU][NUM_ASID_MAX]; + void * chal_alloc_kern_mem(int order) { diff --git a/src/platform/i386/chal/chal_plat.h b/src/platform/i386/chal/chal_plat.h index 6286bfdd0..fb0e54703 100644 --- a/src/platform/i386/chal/chal_plat.h +++ b/src/platform/i386/chal/chal_plat.h @@ -31,8 +31,6 @@ __invpcid(u64_t pcid, u64_t addr, unsigned long type) asm volatile("invpcid %0, %1" : : "m"(desc), "r"(type) : "memory"); } -#endif - static inline unsigned long __readcr3(void) { diff --git a/src/platform/i386/chal/chal_proto.h b/src/platform/i386/chal/chal_proto.h index 49fd4295b..323c70d06 100644 --- a/src/platform/i386/chal/chal_proto.h +++ b/src/platform/i386/chal/chal_proto.h @@ -13,8 +13,8 @@ #define PGTBL_FLAG_MASK 0xf800000000000fff #define PGTBL_FRAME_MASK (~PGTBL_FLAG_MASK) -#define MAX_ASID_BITS 12 -#define MAX_NUM_ASID (1<pgtbl | pt->asid | CR3_NO_FLUSH); +{ + pgtbl_t *curr_cached = &tlb_asid_active[get_cpuid()][pt->asid]; + + /* currently cached pgtbl is this pgtbl */ + if (*curr_cached == pt->pgtbl) { + __writecr3((unsigned long)pt->pgtbl | pt->asid | CR3_NO_FLUSH); + return; + } + + /* no pgtbl cached for this asid */ + if (*curr_cached == 0) { + __writecr3((unsigned long)pt->pgtbl | pt->asid | CR3_NO_FLUSH); + } + /* different pgtbl cached for this asid, need to invalidate asid */ + else { + __writecr3((unsigned long)pt->pgtbl | pt->asid); + } + + *curr_cached = pt->pgtbl; } -extern asid_t free_asid; static inline asid_t chal_asid_alloc(void) { -#if defined(__x86_64__) - if (unlikely(free_asid >= MAX_NUM_ASID)) assert(0); - return cos_faa((int *)&free_asid, 1); -#elif defined(__i386__) return 0; -#endif } #endif /* CHAL_PROTO_H */ diff --git a/src/platform/i386/chal_pgtbl.c b/src/platform/i386/chal_pgtbl.c index 5eee6badf..9a691a482 100644 --- a/src/platform/i386/chal_pgtbl.c +++ b/src/platform/i386/chal_pgtbl.c @@ -228,7 +228,7 @@ chal_cap_memactivate(struct captbl *ct, struct cap_pgtbl *pt, capid_t frame_cap, } int -chal_pgtbl_activate(struct captbl *t, unsigned long cap, unsigned long capin, pgtbl_t pgtbl, u32_t lvl) +chal_pgtbl_activate(struct captbl *t, unsigned long cap, unsigned long capin, pgtbl_t pgtbl, u32_t lvl, asid_t asid) { struct cap_pgtbl *pt; int ret; @@ -236,6 +236,7 @@ chal_pgtbl_activate(struct captbl *t, unsigned long cap, unsigned long capin, pg pt = (struct cap_pgtbl *)__cap_capactivate_pre(t, cap, capin, CAP_PGTBL, &ret); if (unlikely(!pt)) return ret; pt->pgtbl = pgtbl; + pt->asid = asid; pt->refcnt_flags = 1; pt->parent = NULL; /* new cap has no parent. only copied cap has. */ @@ -650,7 +651,7 @@ chal_pgtbl_init_pte(void *pte) } int -chal_pgtbl_pgtblactivate(struct captbl *ct, capid_t cap, capid_t pt_entry, capid_t pgtbl_cap, vaddr_t kmem_cap, capid_t pgtbl_lvl) +chal_pgtbl_pgtblactivate(struct captbl *ct, capid_t cap, capid_t pt_entry, capid_t pgtbl_cap, vaddr_t kmem_cap, capid_t pgtbl_lvl, asid_t asid) { pgtbl_t new_pt, curr_pt; vaddr_t kmem_addr = 0; @@ -660,7 +661,11 @@ chal_pgtbl_pgtblactivate(struct captbl *ct, capid_t cap, capid_t pt_entry, capid ret = cap_kmem_activate(ct, pgtbl_cap, kmem_cap, (unsigned long *)&kmem_addr, &pte); if (unlikely(ret)) return ret; assert(kmem_addr && pte); - +#if defined(__i386__) + /* x86-32 does not support PCID :( */ + if (asid != 0) return -EINVAL; +#endif + if (pgtbl_lvl == 0) { /* PGD */ struct cap_pgtbl *cap_pt = (struct cap_pgtbl *)captbl_lkup(ct, pgtbl_cap); @@ -670,14 +675,14 @@ chal_pgtbl_pgtblactivate(struct captbl *ct, capid_t cap, capid_t pt_entry, capid assert(curr_pt); new_pt = pgtbl_create((void *)kmem_addr, curr_pt); - ret = pgtbl_activate(ct, cap, pt_entry, new_pt, 0); + ret = pgtbl_activate(ct, cap, pt_entry, new_pt, 0, asid); } else if (pgtbl_lvl < 0 || pgtbl_lvl > 3 ) { /* Not supported yet. */ printk("cos: warning - PGTBL level greater than 4 not supported yet. \n"); ret = -1; } else { pgtbl_init_pte((void *)kmem_addr); - ret = pgtbl_activate(ct, cap, pt_entry, (pgtbl_t)kmem_addr, pgtbl_lvl); + ret = pgtbl_activate(ct, cap, pt_entry, (pgtbl_t)kmem_addr, pgtbl_lvl, asid); } if (ret) kmem_unalloc(pte); From 8ebb5219a222243af69400bcd4a55c86638b8bdd Mon Sep 17 00:00:00 2001 From: evanstella Date: Mon, 7 Mar 2022 14:00:47 -0500 Subject: [PATCH 4/7] add asid to cosdefkernel --- src/components/lib/kernel/cos_defkernel_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/lib/kernel/cos_defkernel_api.c b/src/components/lib/kernel/cos_defkernel_api.c index cfb7597e9..5df3c0a2e 100644 --- a/src/components/lib/kernel/cos_defkernel_api.c +++ b/src/components/lib/kernel/cos_defkernel_api.c @@ -135,7 +135,7 @@ cos_defcompinfo_child_alloc(struct cos_defcompinfo *child_defci, vaddr_t entry, struct cos_aep_info *child_aep = cos_sched_aep_get(child_defci); assert(curr_defci_init_status == INITIALIZED); - ret = cos_compinfo_alloc(child_ci, heap_ptr, cap_frontier, entry, ci); + ret = cos_compinfo_alloc(child_ci, heap_ptr, cap_frontier, entry, (asid_t)0, ci); if (ret) return ret; ret = cos_aep_alloc_intern(child_aep, child_defci, 0, is_sched ? sched_aep : NULL, NULL, NULL, 0); From 4db069078fe6b9aab0ec5b850aef86ddf418a296 Mon Sep 17 00:00:00 2001 From: evanstella Date: Tue, 8 Mar 2022 13:18:34 -0500 Subject: [PATCH 5/7] formatting --- src/components/implementation/tests/unit_pingshmem/ping.c | 4 ++-- src/components/interface/pongshmem/pongshmem.h | 2 +- src/components/lib/crt/crt.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/components/implementation/tests/unit_pingshmem/ping.c b/src/components/implementation/tests/unit_pingshmem/ping.c index 301674daa..a07681001 100644 --- a/src/components/implementation/tests/unit_pingshmem/ping.c +++ b/src/components/implementation/tests/unit_pingshmem/ping.c @@ -277,10 +277,10 @@ ping_bench_msgpassing(void) shm_bm_init_testobj(shm); /* - * Counting seems to slowdown execution by a not-significant amount of cycles. + * Counting seems to slowdown execution by a non-zero amount of cycles. * Not sure if this is a hardware thing of has to do with the virtualization of * the PMU. - * Comment out this line for a more consistant tsc read. + * Comment out this line for a more consistent tsc read. */ cos_pmu_program_event_counter(BOOT_CAPTBL_SELF_INITHW_BASE, 0, 0x49, 0x0E); diff --git a/src/components/interface/pongshmem/pongshmem.h b/src/components/interface/pongshmem/pongshmem.h index 90c1b90b1..35e2f2d6d 100644 --- a/src/components/interface/pongshmem/pongshmem.h +++ b/src/components/interface/pongshmem/pongshmem.h @@ -4,7 +4,7 @@ #include #include -#define BENCH_ITER 1024 +#define BENCH_ITER 2048 struct obj_test { int id; diff --git a/src/components/lib/crt/crt.c b/src/components/lib/crt/crt.c index df556e8dc..8fb742cfe 100644 --- a/src/components/lib/crt/crt.c +++ b/src/components/lib/crt/crt.c @@ -293,6 +293,7 @@ crt_comp_create_from(struct crt_comp *c, char *name, compid_t id, struct crt_chk return 0; } +int next_asid = 1; /* FIXME: This is to test ASID effectiveness. Replace with namespace implementation */ /** * Create the component from the elf object including all the resource * tables, and memory. @@ -312,7 +313,6 @@ crt_comp_create_from(struct crt_comp *c, char *name, compid_t id, struct crt_chk * * @return: 0 on success, != 0 on error. */ -int next_asid = 1; /* FIXME: This is to test ASID Effectivness. Replace with namespace implementation */ int crt_comp_create(struct crt_comp *c, char *name, compid_t id, void *elf_hdr, vaddr_t info) { From 094767662f6179bc6496b82b7953fdb308a9bee3 Mon Sep 17 00:00:00 2001 From: evanstella Date: Tue, 8 Mar 2022 13:21:31 -0500 Subject: [PATCH 6/7] remove erroneous printc --- src/components/implementation/tests/unit_pingshmem/ping.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/components/implementation/tests/unit_pingshmem/ping.c b/src/components/implementation/tests/unit_pingshmem/ping.c index a07681001..f105f4f24 100644 --- a/src/components/implementation/tests/unit_pingshmem/ping.c +++ b/src/components/implementation/tests/unit_pingshmem/ping.c @@ -323,7 +323,6 @@ main(void) ping_bench_syncinv(); ping_bench_msgpassing(); - printc("Counter: %lu\n", rdpmc(0)); From e75a549ab89bf4e76365ecfea969996648f32bb9 Mon Sep 17 00:00:00 2001 From: evanstella Date: Tue, 8 Mar 2022 13:29:34 -0500 Subject: [PATCH 7/7] remove tlb miss counting from benchmark --- .../implementation/tests/unit_pingshmem/ping.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/components/implementation/tests/unit_pingshmem/ping.c b/src/components/implementation/tests/unit_pingshmem/ping.c index f105f4f24..ba90e07ea 100644 --- a/src/components/implementation/tests/unit_pingshmem/ping.c +++ b/src/components/implementation/tests/unit_pingshmem/ping.c @@ -276,14 +276,6 @@ ping_bench_msgpassing(void) /* reset memory for test */ shm_bm_init_testobj(shm); - /* - * Counting seems to slowdown execution by a non-zero amount of cycles. - * Not sure if this is a hardware thing of has to do with the virtualization of - * the PMU. - * Comment out this line for a more consistent tsc read. - */ - cos_pmu_program_event_counter(BOOT_CAPTBL_SELF_INITHW_BASE, 0, 0x49, 0x0E); - begin = ps_tsc(); for (i = 0; i < BENCH_ITER; i++) { /* allocate an obj from shared mem */ @@ -294,7 +286,7 @@ ping_bench_msgpassing(void) } end = ps_tsc(); bench = (end - begin) / BENCH_ITER; - printc("BENCHMARK Message passing: %llu cycles, DTLB misses: %lu\n", bench, rdpmc(0)); + printc("BENCHMARK Message passing: %llu cycles\n", bench); } int