Skip to content

Commit 56ba347

Browse files
committed
Fix long-standing mips delay slot issue
1 parent 8a28463 commit 56ba347

File tree

6 files changed

+140
-106
lines changed

6 files changed

+140
-106
lines changed

qemu/include/exec/gen-icount.h

+13-2
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,25 @@ static inline void gen_tb_start(TCGContext *tcg_ctx, TranslationBlock *tb)
4141
tcg_gen_ld_i32(tcg_ctx, count, tcg_ctx->cpu_env,
4242
offsetof(ArchCPU, neg.icount_decr.u32) -
4343
offsetof(ArchCPU, env));
44-
44+
// Unicorn:
45+
// We CANT'T use brcondi_i32 here or we will fail liveness analysis
46+
// because it marks the end of BB
47+
if (tcg_ctx->delay_slot_flag != NULL) {
48+
TCGv_i32 tmp = tcg_const_i32(tcg_ctx, 0);
49+
// dest = (c1 cond c2 ? v1 : v2)
50+
tcg_gen_movcond_i32(tcg_ctx, TCG_COND_GT, count, tcg_ctx->delay_slot_flag, tmp, tcg_ctx->delay_slot_flag, count);
51+
tcg_temp_free_i32(tcg_ctx, tmp);
52+
}
4553
tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_LT, count, 0, tcg_ctx->exitreq_label);
46-
4754
tcg_temp_free_i32(tcg_ctx, count);
4855
}
4956

5057
static inline void gen_tb_end(TCGContext *tcg_ctx, TranslationBlock *tb, int num_insns)
5158
{
59+
if (tcg_ctx->delay_slot_flag != NULL){
60+
tcg_temp_free_i32(tcg_ctx, tcg_ctx->delay_slot_flag);
61+
}
62+
tcg_ctx->delay_slot_flag = NULL;
5263
if (tb_cflags(tb) & CF_USE_ICOUNT) {
5364
/* Update the num_insn immediate parameter now that we know
5465
* the actual insn count. */

qemu/include/tcg/tcg.h

+1
Original file line numberDiff line numberDiff line change
@@ -664,6 +664,7 @@ struct TCGContext {
664664
struct TCGLabelPoolData *pool_labels;
665665
#endif
666666

667+
TCGv_i32 delay_slot_flag;
667668
TCGLabel *exitreq_label;
668669

669670
TCGTempSet free_temps[TCG_TYPE_COUNT * 2];

qemu/target/mips/translate.c

+18-10
Original file line numberDiff line numberDiff line change
@@ -6006,6 +6006,10 @@ static inline void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
60066006
{
60076007
TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
60086008
if (use_goto_tb(ctx, dest)) {
6009+
// Unicorn: Force save pc for delay slot instructions, this should be
6010+
// harmless either goto_tb is taken or not but will give correct
6011+
// pc after execution stops within the delay slot.
6012+
gen_save_pc(tcg_ctx, dest);
60096013
tcg_gen_goto_tb(tcg_ctx, n);
60106014
gen_save_pc(tcg_ctx, dest);
60116015
tcg_gen_exit_tb(tcg_ctx, ctx->base.tb, n);
@@ -30888,6 +30892,9 @@ static void mips_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
3088830892

3088930893
static void mips_tr_tb_start(DisasContextBase *dcbase, CPUState *cs)
3089030894
{
30895+
DisasContext *ctx = container_of(dcbase, DisasContext, base);
30896+
TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
30897+
tcg_ctx->delay_slot_flag = tcg_temp_local_new_i32(tcg_ctx);
3089130898
}
3089230899

3089330900
static void mips_tr_insn_start(DisasContextBase *dcbase, CPUState *cs)
@@ -30897,6 +30904,7 @@ static void mips_tr_insn_start(DisasContextBase *dcbase, CPUState *cs)
3089730904

3089830905
tcg_gen_insn_start(tcg_ctx, ctx->base.pc_next, ctx->hflags & MIPS_HFLAG_BMASK,
3089930906
ctx->btarget);
30907+
tcg_gen_movi_i32(tcg_ctx, tcg_ctx->delay_slot_flag, 0);
3090030908
}
3090130909

3090230910
static bool mips_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cs,
@@ -30928,6 +30936,7 @@ static void mips_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
3092830936
int insn_bytes;
3092930937
int is_slot;
3093030938
bool hook_insn = false;
30939+
TCGv_i32 dyn_is_slot = NULL;
3093130940

3093230941
is_slot = ctx->hflags & MIPS_HFLAG_BMASK;
3093330942

@@ -30939,6 +30948,10 @@ static void mips_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
3093930948
return;
3094030949
}
3094130950

30951+
dyn_is_slot = tcg_const_i32(tcg_ctx, 0);
30952+
slot_op = tcg_last_op(tcg_ctx);
30953+
tcg_gen_mov_i32(tcg_ctx, tcg_ctx->delay_slot_flag, dyn_is_slot);
30954+
3094230955
// Unicorn: trace this instruction on request
3094330956
if (HOOK_EXISTS_BOUNDED(uc, UC_HOOK_CODE, ctx->base.pc_next)) {
3094430957

@@ -30949,17 +30962,7 @@ static void mips_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
3094930962
prev_op = tcg_last_op(tcg_ctx);
3095030963
hook_insn = true;
3095130964
gen_uc_tracecode(tcg_ctx, 4, UC_HOOK_CODE_IDX, uc, ctx->base.pc_next);
30952-
30953-
// TODO: Memory hooks, maybe use icount_decr.low?
30954-
TCGLabel *skip_label = gen_new_label(tcg_ctx);
30955-
TCGv_i32 dyn_is_slot = tcg_const_i32(tcg_ctx, 0);
30956-
slot_op = tcg_last_op(tcg_ctx);
30957-
// if slot, skip exit_request
30958-
// tcg is smart enough to optimize this branch away
30959-
tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_GT, dyn_is_slot, 0, skip_label);
30960-
tcg_temp_free_i32(tcg_ctx, dyn_is_slot);
3096130965
check_exit_request(tcg_ctx);
30962-
gen_set_label(tcg_ctx, skip_label);
3096330966
}
3096430967

3096530968
if (ctx->insn_flags & ISA_NANOMIPS32) {
@@ -30978,6 +30981,7 @@ static void mips_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
3097830981
} else {
3097930982
generate_exception_end(ctx, EXCP_RI);
3098030983
g_assert(ctx->base.is_jmp == DISAS_NORETURN);
30984+
slot_op->args[1] = is_slot;
3098130985
return;
3098230986
}
3098330987

@@ -31072,6 +31076,10 @@ static void mips_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
3107231076
g_assert_not_reached();
3107331077
}
3107431078
}
31079+
if (tcg_ctx->delay_slot_flag) {
31080+
tcg_temp_free_i32(tcg_ctx, tcg_ctx->delay_slot_flag);
31081+
}
31082+
tcg_ctx->delay_slot_flag = NULL;
3107531083
}
3107631084

3107731085
static void mips_sync_pc(DisasContextBase *db, CPUState *cpu)

qemu/tcg/tcg-op.c

+10-2
Original file line numberDiff line numberDiff line change
@@ -2866,9 +2866,17 @@ void check_exit_request(TCGContext *tcg_ctx)
28662866
tcg_gen_ld_i32(tcg_ctx, count, tcg_ctx->cpu_env,
28672867
offsetof(ArchCPU, neg.icount_decr.u32) -
28682868
offsetof(ArchCPU, env));
2869-
2869+
// Unicorn:
2870+
// We CANT'T use brcondi_i32 here or we will fail liveness analysis
2871+
// because it marks the end of BB
2872+
if (tcg_ctx->delay_slot_flag != NULL) {
2873+
TCGv_i32 tmp = tcg_const_i32(tcg_ctx, 0);
2874+
// dest = (c1 cond c2 ? v1 : v2)
2875+
tcg_gen_movcond_i32(tcg_ctx, TCG_COND_GT, count, tcg_ctx->delay_slot_flag, tmp, tcg_ctx->delay_slot_flag, count);
2876+
tcg_temp_free_i32(tcg_ctx, tmp);
2877+
}
2878+
28702879
tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_LT, count, 0, tcg_ctx->exitreq_label);
2871-
28722880
tcg_temp_free_i32(tcg_ctx, count);
28732881
}
28742882

tests/regress/tcg_liveness_analysis_bug_issue-287.py

+82-83
Original file line numberDiff line numberDiff line change
@@ -2,89 +2,88 @@
22
from unicorn import *
33
from unicorn.arm_const import *
44

5-
# issue #287
6-
# Initial Register States: R0=3, R1=24, R2=16, R3=0
7-
#
8-
# ----- code start -----
9-
# CMP R0,R1,LSR#3
10-
# SUBCS R0,R0,R1,LSR#3 # CPU flags got changed in these two instructions, and *REMEMBERED*, now NF == VF == 0
11-
# CMP R0,#1 # CPU flags changed again, now NF == 1, VF == 0, but they are not properly *REMEMBERED*
12-
# MOV R1,R1,LSR#4
13-
# SUBGES R2,R2,#4 # according to the result of CMP, we should skip this op
14-
#
15-
# MOVGE R3,#100 # since changed flags are not *REMEMBERED* in CMP, now NF == VF == 0, which result in wrong branch
16-
# # at the end of this code block, should R3 == 0
17-
# ----- code end ------
18-
#
19-
# TCG ops are correct, plain op translation is done correctly,
20-
# but there're In-Memory bits invisible from ops that control the host code generation.
21-
# all these codes are in one TCG translation-block, so wrong things could happen.
22-
# detail explanation is given on the right side.
23-
# remember, both set_label and brcond are point to refresh the dead_temps and mem_temps states in TCG
24-
#
25-
# ----- TCG ops ------
26-
# ld_i32 tmp5,env,$0xfffffffffffffff4
27-
# movi_i32 tmp6,$0x0
28-
# brcond_i32 tmp5,tmp6,ne,$0x0
29-
# mov_i32 tmp5,r1 -------------------------
30-
# movi_i32 tmp6,$0x3 |
31-
# shr_i32 tmp5,r1,tmp6 |
32-
# mov_i32 tmp6,r0 |
33-
# sub_i32 NF,r0,tmp5 |
34-
# mov_i32 ZF,NF |
35-
# setcond_i32 CF,r0,tmp5,geu | # This part is "CMP R0,R1,LSR#3"
36-
# xor_i32 VF,NF,r0 |-----> # and "SUBCS R0,R0,R1,LSR#3"
37-
# xor_i32 tmp7,r0,tmp5 | # the last op in this block, set_label get a chance to refresh the TCG globals memory states,
38-
# and_i32 VF,VF,tmp7 | # so things get back to normal states
39-
# mov_i32 tmp6,NF | # these codes are not affected by the bug. Let's called this Part-D
40-
# movi_i32 tmp5,$0x0 |
41-
# brcond_i32 CF,tmp5,eq,$0x1 |
42-
# mov_i32 tmp5,r1 |
43-
# movi_i32 tmp6,$0x3 |
44-
# shr_i32 tmp5,r1,tmp6 |
45-
# mov_i32 tmp6,r0 |
46-
# sub_i32 tmp6,r0,tmp5 |
47-
# mov_i32 r0,tmp6 |
48-
# set_label $0x1 -------------------------
49-
# movi_i32 tmp5,$0x1 ----------------- # Let's called this Part-C
50-
# mov_i32 tmp6,r0 | # NF is used as output operand again!
51-
# sub_i32 NF,r0,tmp5 ----------------|-----> # but it is stated as Not-In-Memory,
52-
# mov_i32 ZF,NF | # no need to sync it after calculation.
53-
# setcond_i32 CF,r0,tmp5,geu | # the generated host code does not write NF
54-
# xor_i32 VF,NF,r0 | # back to its memory location, hence forgot. And the CPU flags after this calculation is not changed.
55-
# xor_i32 tmp7,r0,tmp5 | # Caution: the following SUBGES's condition check is right, even though the generated host code does not *REMEMBER* NF, it will cache the calculated result and serve SUBGES correctly
56-
# and_i32 VF,VF,tmp7 |
57-
# mov_i32 tmp6,NF |
58-
# mov_i32 tmp5,r1 | # this part is "CMP R0,#1"
59-
# movi_i32 tmp6,$0x4 | # and "MOV R1,R1,LSR#4"
60-
# shr_i32 tmp5,r1,tmp6 | # and "SUBGES R2,R2,#4"
61-
# mov_i32 r1,tmp5 |-----> # This is the part where problem start to arise
62-
# xor_i32 tmp5,VF,NF |
63-
# movi_i32 tmp6,$0x0 |
64-
# brcond_i32 tmp5,tmp6,lt,$0x2 --------|-----> # QEMU will refresh the InMemory bit for TCG globals here, but Unicorn won't
65-
# movi_i32 tmp5,$0x4 |
66-
# mov_i32 tmp6,r2 | # this is the 1st bug-related op get analyzed.
67-
# sub_i32 NF,r2,tmp5 ----------------|-----> # here, NF is an output operand, it's flagged dead
68-
# mov_i32 ZF,NF | # and the InMemory bit is clear, tell the previous(above) ops
69-
# setcond_i32 CF,r2,tmp5,geu | # if it is used as output operand again, do not sync it
70-
# xor_i32 VF,NF,r2 | # so the generated host-code for previous ops will not write it back to Memory
71-
# xor_i32 tmp7,r2,tmp5 | # Caution: the CPU flags after this calculation is also right, because the set_label is a point of refresh, make them *REMEMBERED*
72-
# and_i32 VF,VF,tmp7 | # Let's call this Part-B
73-
# mov_i32 tmp6,NF |
74-
# mov_i32 r2,ZF |
75-
# set_label $0x2 -----------------
76-
# xor_i32 tmp5,VF,NF -----------------
77-
# movi_i32 tmp6,$0x0 |
78-
# brcond_i32 tmp5,tmp6,lt,$0x3 | # Let's call this Part-A
79-
# movi_i32 tmp5,$0x64 | # if Part-B is not skipped, this part won't go wrong, because we'll check the CPU flags as the result of Part-B, it's *REMEMBERED*
80-
# movi_i32 r3,$0x64 |-----> # but if Part-B is skipped,
81-
# set_label $0x3 | # what should we expected? we will check the condition based on the result of Part-D!!!
82-
# call wfi,$0x0,$0,env | # because result of Part-C is lost. this is why things go wrong.
83-
# set_label $0x0 |
84-
# exit_tb $0x7f6401714013 -----------------
85-
# ###########
86-
# ----- TCG ends ------
87-
5+
'''
6+
issue #287
7+
Initial Register States: R0=3, R1=24, R2=16, R3=0
8+
----- code start -----
9+
CMP R0,R1,LSR#3
10+
SUBCS R0,R0,R1,LSR#3 # CPU flags got changed in these two instructions, and *REMEMBERED*, now NF == VF == 0
11+
CMP R0,#1 # CPU flags changed again, now NF == 1, VF == 0, but they are not properly *REMEMBERED*
12+
MOV R1,R1,LSR#4
13+
SUBGES R2,R2,#4 # according to the result of CMP, we should skip this op
14+
15+
MOVGE R3,#100 # since changed flags are not *REMEMBERED* in CMP, now NF == VF == 0, which result in wrong branch
16+
# at the end of this code block, should R3 == 0
17+
----- code end ------
18+
19+
# TCG ops are correct, plain op translation is done correctly,
20+
# but there're In-Memory bits invisible from ops that control the host code generation.
21+
# all these codes are in one TCG translation-block, so wrong things could happen.
22+
# detail explanation is given on the right side.
23+
# remember, both set_label and brcond are point to refresh the dead_temps and mem_temps states in TCG
24+
----- TCG ops ------
25+
ld_i32 tmp5,env,$0xfffffffffffffff4
26+
movi_i32 tmp6,$0x0
27+
brcond_i32 tmp5,tmp6,ne,$0x0
28+
mov_i32 tmp5,r1 -------------------------
29+
movi_i32 tmp6,$0x3 |
30+
shr_i32 tmp5,r1,tmp6 |
31+
mov_i32 tmp6,r0 |
32+
sub_i32 NF,r0,tmp5 |
33+
mov_i32 ZF,NF |
34+
setcond_i32 CF,r0,tmp5,geu | # This part is "CMP R0,R1,LSR#3"
35+
xor_i32 VF,NF,r0 |-----> # and "SUBCS R0,R0,R1,LSR#3"
36+
xor_i32 tmp7,r0,tmp5 | # the last op in this block, set_label get a chance to refresh the TCG globals memory states,
37+
and_i32 VF,VF,tmp7 | # so things get back to normal states
38+
mov_i32 tmp6,NF | # these codes are not affected by the bug. Let's called this Part-D
39+
movi_i32 tmp5,$0x0 |
40+
brcond_i32 CF,tmp5,eq,$0x1 |
41+
mov_i32 tmp5,r1 |
42+
movi_i32 tmp6,$0x3 |
43+
shr_i32 tmp5,r1,tmp6 |
44+
mov_i32 tmp6,r0 |
45+
sub_i32 tmp6,r0,tmp5 |
46+
mov_i32 r0,tmp6 |
47+
set_label $0x1 -------------------------
48+
movi_i32 tmp5,$0x1 ----------------- # Let's called this Part-C
49+
mov_i32 tmp6,r0 | # NF is used as output operand again!
50+
sub_i32 NF,r0,tmp5 ----------------|-----> # but it is stated as Not-In-Memory,
51+
mov_i32 ZF,NF | # no need to sync it after calculation.
52+
setcond_i32 CF,r0,tmp5,geu | # the generated host code does not write NF
53+
xor_i32 VF,NF,r0 | # back to its memory location, hence forgot. And the CPU flags after this calculation is not changed.
54+
xor_i32 tmp7,r0,tmp5 | # Caution: the following SUBGES's condition check is right, even though the generated host code does not *REMEMBER* NF, it will cache the calculated result and serve SUBGES correctly
55+
and_i32 VF,VF,tmp7 |
56+
mov_i32 tmp6,NF |
57+
mov_i32 tmp5,r1 | # this part is "CMP R0,#1"
58+
movi_i32 tmp6,$0x4 | # and "MOV R1,R1,LSR#4"
59+
shr_i32 tmp5,r1,tmp6 | # and "SUBGES R2,R2,#4"
60+
mov_i32 r1,tmp5 |-----> # This is the part where problem start to arise
61+
xor_i32 tmp5,VF,NF |
62+
movi_i32 tmp6,$0x0 |
63+
brcond_i32 tmp5,tmp6,lt,$0x2 --------|-----> # QEMU will refresh the InMemory bit for TCG globals here, but Unicorn won't
64+
movi_i32 tmp5,$0x4 |
65+
mov_i32 tmp6,r2 | # this is the 1st bug-related op get analyzed.
66+
sub_i32 NF,r2,tmp5 ----------------|-----> # here, NF is an output operand, it's flagged dead
67+
mov_i32 ZF,NF | # and the InMemory bit is clear, tell the previous(above) ops
68+
setcond_i32 CF,r2,tmp5,geu | # if it is used as output operand again, do not sync it
69+
xor_i32 VF,NF,r2 | # so the generated host-code for previous ops will not write it back to Memory
70+
xor_i32 tmp7,r2,tmp5 | # Caution: the CPU flags after this calculation is also right, because the set_label is a point of refresh, make them *REMEMBERED*
71+
and_i32 VF,VF,tmp7 | # Let's call this Part-B
72+
mov_i32 tmp6,NF |
73+
mov_i32 r2,ZF |
74+
set_label $0x2 -----------------
75+
xor_i32 tmp5,VF,NF -----------------
76+
movi_i32 tmp6,$0x0 |
77+
brcond_i32 tmp5,tmp6,lt,$0x3 | # Let's call this Part-A
78+
movi_i32 tmp5,$0x64 | # if Part-B is not skipped, this part won't go wrong, because we'll check the CPU flags as the result of Part-B, it's *REMEMBERED*
79+
movi_i32 r3,$0x64 |-----> # but if Part-B is skipped,
80+
set_label $0x3 | # what should we expected? we will check the condition based on the result of Part-D!!!
81+
call wfi,$0x0,$0,env | # because result of Part-C is lost. this is why things go wrong.
82+
set_label $0x0 |
83+
exit_tb $0x7f6401714013 -----------------
84+
###########
85+
----- TCG ends ------
86+
'''
8887

8988
CODE = (
9089
b'\xa1\x01\x50\xe1' # cmp r0, r1, lsr #3

tests/unit/test_mips.c

+16-9
Original file line numberDiff line numberDiff line change
@@ -101,22 +101,29 @@ static void test_mips_stop_delay_slot_from_qiling(void)
101101
{
102102
uc_engine *uc;
103103
// 24 06 00 03 addiu $a2, $zero, 3
104-
// 10 a6 00 79 beq $a1, $a2, 0x47c8da4
104+
// 10 a6 00 79 beq $a1, $a2, 0x1e8
105105
// 30 42 00 fc andi $v0, $v0, 0xfc
106+
// 10 40 00 32 beqz $v0, 0x47c8c90
107+
// 24 ab ff da addiu $t3, $a1, -0x26
108+
// 2d 62 00 02 sltiu $v0, $t3, 2
109+
// 10 40 00 32 beqz $v0, 0x47c8c9c
110+
// 00 00 00 00 nop
106111
char code[] =
107-
"\x24\x06\x00\x03\x10\xa6\x00\x79\x30\x42\x00\xfc";
112+
"\x24\x06\x00\x03\x10\xa6\x00\x79\x30\x42\x00\xfc\x10\x40\x00\x32\x24\xab\xff\xda\x2d\x62\x00\x02\x10\x40\x00\x32\x00\x00\x00\x00";
108113
uint32_t r_pc = 0x0;
109-
uint32_t r_a2 = 1;
110-
114+
uint32_t r_v0 = 0xff;
115+
uint32_t r_a1 = 0x3;
116+
111117
uc_common_setup(&uc, UC_ARCH_MIPS, UC_MODE_MIPS32 | UC_MODE_BIG_ENDIAN,
112118
code, sizeof(code) - 1);
113-
114-
OK(uc_reg_write(uc, UC_MIPS_REG_A2, &r_a2));
115-
116-
OK(uc_emu_start(uc, code_start, code_start + sizeof(code) - 1, 0, 2));
119+
OK(uc_reg_write(uc, UC_MIPS_REG_V0, &r_v0));
120+
OK(uc_reg_write(uc, UC_MIPS_REG_A1, &r_a1));
121+
OK(uc_emu_start(uc, code_start, code_start + sizeof(code) + 16, 0, 2));
117122

118123
OK(uc_reg_read(uc, UC_MIPS_REG_PC, &r_pc));
119-
TEST_CHECK(r_pc == code_start + 12);
124+
OK(uc_reg_read(uc, UC_MIPS_REG_V0, &r_v0));
125+
TEST_CHECK(r_pc == code_start + 4 + 0x1e8);
126+
TEST_CHECK(r_v0 == 0xfc);
120127

121128
OK(uc_close(uc));
122129
}

0 commit comments

Comments
 (0)