* [Qemu-devel] [PULL 1/6] target-sh4: add flags markups for FP helpers
2015-09-13 21:18 [Qemu-devel] [PULL 0/6] sh4-next queue Aurelien Jarno
@ 2015-09-13 21:18 ` Aurelien Jarno
2015-09-13 21:18 ` [Qemu-devel] [PULL 2/6] target-sh4: use deposit in swap.b instruction Aurelien Jarno
` (5 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Aurelien Jarno @ 2015-09-13 21:18 UTC (permalink / raw)
To: qemu-devel; +Cc: Aurelien Jarno
Most floating point helpers can trigger an exception, but don't change
the globals. Mark these helpers as TCG_CALL_NO_WG.
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target-sh4/helper.h | 34 +++++++++++++++++-----------------
1 file changed, 17 insertions(+), 17 deletions(-)
diff --git a/target-sh4/helper.h b/target-sh4/helper.h
index c9bc407..dce859c 100644
--- a/target-sh4/helper.h
+++ b/target-sh4/helper.h
@@ -18,28 +18,28 @@ DEF_HELPER_2(ld_fpscr, void, env, i32)
DEF_HELPER_FLAGS_1(fabs_FT, TCG_CALL_NO_RWG_SE, f32, f32)
DEF_HELPER_FLAGS_1(fabs_DT, TCG_CALL_NO_RWG_SE, f64, f64)
-DEF_HELPER_3(fadd_FT, f32, env, f32, f32)
-DEF_HELPER_3(fadd_DT, f64, env, f64, f64)
-DEF_HELPER_2(fcnvsd_FT_DT, f64, env, f32)
-DEF_HELPER_2(fcnvds_DT_FT, f32, env, f64)
+DEF_HELPER_FLAGS_3(fadd_FT, TCG_CALL_NO_WG, f32, env, f32, f32)
+DEF_HELPER_FLAGS_3(fadd_DT, TCG_CALL_NO_WG, f64, env, f64, f64)
+DEF_HELPER_FLAGS_2(fcnvsd_FT_DT, TCG_CALL_NO_WG, f64, env, f32)
+DEF_HELPER_FLAGS_2(fcnvds_DT_FT, TCG_CALL_NO_WG, f32, env, f64)
DEF_HELPER_3(fcmp_eq_FT, void, env, f32, f32)
DEF_HELPER_3(fcmp_eq_DT, void, env, f64, f64)
DEF_HELPER_3(fcmp_gt_FT, void, env, f32, f32)
DEF_HELPER_3(fcmp_gt_DT, void, env, f64, f64)
-DEF_HELPER_3(fdiv_FT, f32, env, f32, f32)
-DEF_HELPER_3(fdiv_DT, f64, env, f64, f64)
-DEF_HELPER_2(float_FT, f32, env, i32)
-DEF_HELPER_2(float_DT, f64, env, i32)
-DEF_HELPER_4(fmac_FT, f32, env, f32, f32, f32)
-DEF_HELPER_3(fmul_FT, f32, env, f32, f32)
-DEF_HELPER_3(fmul_DT, f64, env, f64, f64)
+DEF_HELPER_FLAGS_3(fdiv_FT, TCG_CALL_NO_WG, f32, env, f32, f32)
+DEF_HELPER_FLAGS_3(fdiv_DT, TCG_CALL_NO_WG, f64, env, f64, f64)
+DEF_HELPER_FLAGS_2(float_FT, TCG_CALL_NO_WG, f32, env, i32)
+DEF_HELPER_FLAGS_2(float_DT, TCG_CALL_NO_WG, f64, env, i32)
+DEF_HELPER_FLAGS_4(fmac_FT, TCG_CALL_NO_WG, f32, env, f32, f32, f32)
+DEF_HELPER_FLAGS_3(fmul_FT, TCG_CALL_NO_WG, f32, env, f32, f32)
+DEF_HELPER_FLAGS_3(fmul_DT, TCG_CALL_NO_WG, f64, env, f64, f64)
DEF_HELPER_FLAGS_1(fneg_T, TCG_CALL_NO_RWG_SE, f32, f32)
-DEF_HELPER_3(fsub_FT, f32, env, f32, f32)
-DEF_HELPER_3(fsub_DT, f64, env, f64, f64)
-DEF_HELPER_2(fsqrt_FT, f32, env, f32)
-DEF_HELPER_2(fsqrt_DT, f64, env, f64)
-DEF_HELPER_2(ftrc_FT, i32, env, f32)
-DEF_HELPER_2(ftrc_DT, i32, env, f64)
+DEF_HELPER_FLAGS_3(fsub_FT, TCG_CALL_NO_WG, f32, env, f32, f32)
+DEF_HELPER_FLAGS_3(fsub_DT, TCG_CALL_NO_WG, f64, env, f64, f64)
+DEF_HELPER_FLAGS_2(fsqrt_FT, TCG_CALL_NO_WG, f32, env, f32)
+DEF_HELPER_FLAGS_2(fsqrt_DT, TCG_CALL_NO_WG, f64, env, f64)
+DEF_HELPER_FLAGS_2(ftrc_FT, TCG_CALL_NO_WG, i32, env, f32)
+DEF_HELPER_FLAGS_2(ftrc_DT, TCG_CALL_NO_WG, i32, env, f64)
DEF_HELPER_3(fipr, void, env, i32, i32)
DEF_HELPER_2(ftrv, void, env, i32)
--
2.1.4
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [Qemu-devel] [PULL 3/6] target-sh4: improve cmp/str instruction
2015-09-13 21:18 [Qemu-devel] [PULL 0/6] sh4-next queue Aurelien Jarno
2015-09-13 21:18 ` [Qemu-devel] [PULL 1/6] target-sh4: add flags markups for FP helpers Aurelien Jarno
2015-09-13 21:18 ` [Qemu-devel] [PULL 2/6] target-sh4: use deposit in swap.b instruction Aurelien Jarno
@ 2015-09-13 21:18 ` Aurelien Jarno
2015-09-13 21:18 ` [Qemu-devel] [PULL 4/6] target-sh4: improve shld instruction Aurelien Jarno
` (3 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Aurelien Jarno @ 2015-09-13 21:18 UTC (permalink / raw)
To: qemu-devel; +Cc: Aurelien Jarno
Instead of testing bytes one by one, we can use the following trick
from https://graphics.stanford.edu/~seander/bithacks.html:
haszero(v) = (v - 0x01010101) & ~v & 0x80808080
The subexpression v - 0x01010101, evaluates to a high bit set in any
byte whenever the corresponding byte in v is zero or greater than 0x80.
The sub-expression ~v & 0x80808080 evaluates to high bits set in bytes
where the byte of v doesn't have its high bit set (so the byte was less
than 0x80). Finally, by ANDing these two sub-expressions the result is
the high bits set where the bytes in v were zero, since the high bits
set due to a value greater than 0x80 in the first sub-expression are
masked off by the second.
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target-sh4/translate.c | 17 +++++------------
1 file changed, 5 insertions(+), 12 deletions(-)
diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index 50043cf..ca6ef5a 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -688,18 +688,11 @@ static void _decode_opc(DisasContext * ctx)
{
TCGv cmp1 = tcg_temp_new();
TCGv cmp2 = tcg_temp_new();
- tcg_gen_xor_i32(cmp1, REG(B7_4), REG(B11_8));
- tcg_gen_andi_i32(cmp2, cmp1, 0xff000000);
- tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, cmp2, 0);
- tcg_gen_andi_i32(cmp2, cmp1, 0x00ff0000);
- tcg_gen_setcondi_i32(TCG_COND_EQ, cmp2, cmp2, 0);
- tcg_gen_or_i32(cpu_sr_t, cpu_sr_t, cmp2);
- tcg_gen_andi_i32(cmp2, cmp1, 0x0000ff00);
- tcg_gen_setcondi_i32(TCG_COND_EQ, cmp2, cmp2, 0);
- tcg_gen_or_i32(cpu_sr_t, cpu_sr_t, cmp2);
- tcg_gen_andi_i32(cmp2, cmp1, 0x000000ff);
- tcg_gen_setcondi_i32(TCG_COND_EQ, cmp2, cmp2, 0);
- tcg_gen_or_i32(cpu_sr_t, cpu_sr_t, cmp2);
+ tcg_gen_xor_i32(cmp2, REG(B7_4), REG(B11_8));
+ tcg_gen_subi_i32(cmp1, cmp2, 0x01010101);
+ tcg_gen_andc_i32(cmp1, cmp1, cmp2);
+ tcg_gen_andi_i32(cmp1, cmp1, 0x80808080);
+ tcg_gen_setcondi_i32(TCG_COND_NE, cpu_sr_t, cmp1, 0);
tcg_temp_free(cmp2);
tcg_temp_free(cmp1);
}
--
2.1.4
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [Qemu-devel] [PULL 4/6] target-sh4: improve shld instruction
2015-09-13 21:18 [Qemu-devel] [PULL 0/6] sh4-next queue Aurelien Jarno
` (2 preceding siblings ...)
2015-09-13 21:18 ` [Qemu-devel] [PULL 3/6] target-sh4: improve cmp/str instruction Aurelien Jarno
@ 2015-09-13 21:18 ` Aurelien Jarno
2015-09-13 21:18 ` [Qemu-devel] [PULL 5/6] target-sh4: improve shad instruction Aurelien Jarno
` (2 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Aurelien Jarno @ 2015-09-13 21:18 UTC (permalink / raw)
To: qemu-devel; +Cc: Aurelien Jarno
The SH4 shld instruction can shift in both direction, depending on the
sign of the shift. This is currently implemented using branches, which
is not really efficient and prevents the optimizer to do its job. In
practice it is often used with a constant loaded in a register just
before.
Simplify the implementation by computing both the value shifted to the
left and to the right, and then selecting the correct one with a
movcond. As with a negative value the shift amount can go up to 32 which
is undefined, we shift the value in two steps.
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target-sh4/translate.c | 48 ++++++++++++++++++++++--------------------------
1 file changed, 22 insertions(+), 26 deletions(-)
diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index ca6ef5a..c8dd3a7 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -867,32 +867,28 @@ static void _decode_opc(DisasContext * ctx)
return;
case 0x400d: /* shld Rm,Rn */
{
- TCGLabel *label1 = gen_new_label();
- TCGLabel *label2 = gen_new_label();
- TCGLabel *label3 = gen_new_label();
- TCGv shift;
- tcg_gen_brcondi_i32(TCG_COND_LT, REG(B7_4), 0, label1);
- /* Rm positive, shift to the left */
- shift = tcg_temp_new();
- tcg_gen_andi_i32(shift, REG(B7_4), 0x1f);
- tcg_gen_shl_i32(REG(B11_8), REG(B11_8), shift);
- tcg_temp_free(shift);
- tcg_gen_br(label3);
- /* Rm negative, shift to the right */
- gen_set_label(label1);
- shift = tcg_temp_new();
- tcg_gen_andi_i32(shift, REG(B7_4), 0x1f);
- tcg_gen_brcondi_i32(TCG_COND_EQ, shift, 0, label2);
- tcg_gen_not_i32(shift, REG(B7_4));
- tcg_gen_andi_i32(shift, shift, 0x1f);
- tcg_gen_addi_i32(shift, shift, 1);
- tcg_gen_shr_i32(REG(B11_8), REG(B11_8), shift);
- tcg_temp_free(shift);
- tcg_gen_br(label3);
- /* Rm = -32 */
- gen_set_label(label2);
- tcg_gen_movi_i32(REG(B11_8), 0);
- gen_set_label(label3);
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+
+ tcg_gen_andi_i32(t0, REG(B7_4), 0x1f);
+
+ /* positive case: shift to the left */
+ tcg_gen_shl_i32(t1, REG(B11_8), t0);
+
+ /* negative case: shift to the right in two steps to
+ correctly handle the -32 case */
+ tcg_gen_xori_i32(t0, t0, 0x1f);
+ tcg_gen_shr_i32(t2, REG(B11_8), t0);
+ tcg_gen_shri_i32(t2, t2, 1);
+
+ /* select between the two cases */
+ tcg_gen_movi_i32(t0, 0);
+ tcg_gen_movcond_i32(TCG_COND_GE, REG(B11_8), REG(B7_4), t0, t1, t2);
+
+ tcg_temp_free(t0);
+ tcg_temp_free(t1);
+ tcg_temp_free(t2);
}
return;
case 0x3008: /* sub Rm,Rn */
--
2.1.4
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [Qemu-devel] [PULL 5/6] target-sh4: improve shad instruction
2015-09-13 21:18 [Qemu-devel] [PULL 0/6] sh4-next queue Aurelien Jarno
` (3 preceding siblings ...)
2015-09-13 21:18 ` [Qemu-devel] [PULL 4/6] target-sh4: improve shld instruction Aurelien Jarno
@ 2015-09-13 21:18 ` Aurelien Jarno
2015-09-13 21:18 ` [Qemu-devel] [PULL 6/6] sh4: Fix initramfs initialization for endiannes-mismatched targets Aurelien Jarno
2015-09-14 13:05 ` [Qemu-devel] [PULL 0/6] sh4-next queue Peter Maydell
6 siblings, 0 replies; 8+ messages in thread
From: Aurelien Jarno @ 2015-09-13 21:18 UTC (permalink / raw)
To: qemu-devel; +Cc: Aurelien Jarno
The SH4 shad instruction can shift in both direction, depending on the
sign of the shift. This is currently implemented using branches, which
is not really efficient and prevents the optimizer to do its job. In
practice it is often used with a constant loaded in a register just
before.
Simplify the implementation by computing both the value shifted to the
left and to the right, and then selecting the correct one with a
movcond. As with a negative value the shift amount can go up to 32 which
is undefined, we shift the value in two steps.
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target-sh4/translate.c | 53 +++++++++++++++++++++-----------------------------
1 file changed, 22 insertions(+), 31 deletions(-)
diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index c8dd3a7..724c0e7 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -832,37 +832,28 @@ static void _decode_opc(DisasContext * ctx)
return;
case 0x400c: /* shad Rm,Rn */
{
- TCGLabel *label1 = gen_new_label();
- TCGLabel *label2 = gen_new_label();
- TCGLabel *label3 = gen_new_label();
- TCGLabel *label4 = gen_new_label();
- TCGv shift;
- tcg_gen_brcondi_i32(TCG_COND_LT, REG(B7_4), 0, label1);
- /* Rm positive, shift to the left */
- shift = tcg_temp_new();
- tcg_gen_andi_i32(shift, REG(B7_4), 0x1f);
- tcg_gen_shl_i32(REG(B11_8), REG(B11_8), shift);
- tcg_temp_free(shift);
- tcg_gen_br(label4);
- /* Rm negative, shift to the right */
- gen_set_label(label1);
- shift = tcg_temp_new();
- tcg_gen_andi_i32(shift, REG(B7_4), 0x1f);
- tcg_gen_brcondi_i32(TCG_COND_EQ, shift, 0, label2);
- tcg_gen_not_i32(shift, REG(B7_4));
- tcg_gen_andi_i32(shift, shift, 0x1f);
- tcg_gen_addi_i32(shift, shift, 1);
- tcg_gen_sar_i32(REG(B11_8), REG(B11_8), shift);
- tcg_temp_free(shift);
- tcg_gen_br(label4);
- /* Rm = -32 */
- gen_set_label(label2);
- tcg_gen_brcondi_i32(TCG_COND_LT, REG(B11_8), 0, label3);
- tcg_gen_movi_i32(REG(B11_8), 0);
- tcg_gen_br(label4);
- gen_set_label(label3);
- tcg_gen_movi_i32(REG(B11_8), 0xffffffff);
- gen_set_label(label4);
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+
+ tcg_gen_andi_i32(t0, REG(B7_4), 0x1f);
+
+ /* positive case: shift to the left */
+ tcg_gen_shl_i32(t1, REG(B11_8), t0);
+
+ /* negative case: shift to the right in two steps to
+ correctly handle the -32 case */
+ tcg_gen_xori_i32(t0, t0, 0x1f);
+ tcg_gen_sar_i32(t2, REG(B11_8), t0);
+ tcg_gen_sari_i32(t2, t2, 1);
+
+ /* select between the two cases */
+ tcg_gen_movi_i32(t0, 0);
+ tcg_gen_movcond_i32(TCG_COND_GE, REG(B11_8), REG(B7_4), t0, t1, t2);
+
+ tcg_temp_free(t0);
+ tcg_temp_free(t1);
+ tcg_temp_free(t2);
}
return;
case 0x400d: /* shld Rm,Rn */
--
2.1.4
^ permalink raw reply related [flat|nested] 8+ messages in thread