QEMU-Devel Archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH] tcg/mips: Fix clobbering of qemu_ld inputs
@ 2015-09-14 10:34 James Hogan
  2015-09-14 16:55 ` Richard Henderson
  2015-09-17 15:52 ` Aurelien Jarno
  0 siblings, 2 replies; 3+ messages in thread
From: James Hogan @ 2015-09-14 10:34 UTC (permalink / raw
  To: Aurelien Jarno; +Cc: James Hogan, qemu-devel, Richard Henderson

The MIPS TCG backend implements qemu_ld with 64-bit targets using the v0
register (base) as a temporary to load the upper half of the QEMU TLB
comparator (see line 5 below), however this happens before the input
address is used (line 8 to mask off the low bits for the TLB
comparison, and line 12 to add the host-guest offset). If the input
address (addrl) also happens to have been placed in v0 (as in the second
column below), it gets clobbered before it is used.

     addrl in t2              addrl in v0

 1 srl     a0,t2,0x7        srl     a0,v0,0x7
 2 andi    a0,a0,0x1fe0     andi    a0,a0,0x1fe0
 3 addu    a0,a0,s0         addu    a0,a0,s0
 4 lw      at,9136(a0)      lw      at,9136(a0)      set TCG_TMP0 (at)
 5 lw      v0,9140(a0)      lw      v0,9140(a0)      set base (v0)
 6 li      t9,-4093         li      t9,-4093
 7 lw      a0,9160(a0)      lw      a0,9160(a0)      set addend (a0)
 8 and     t9,t9,t2         and     t9,t9,v0         use addrl
 9 bne     at,t9,0x836d8c8  bne     at,t9,0x836d838  use TCG_TMP0
10  nop                      nop
11 bne     v0,t8,0x836d8c8  bne     v0,a1,0x836d838  use base
12  addu   v0,a0,t2          addu   v0,a0,v0         use addrl, addend
13 lw      t0,0(v0)         lw      t0,0(v0)

Fix by using TCG_TMP0 (at) as the temporary instead of v0 (base),
pushing the load on line 5 forward into the delay slot of the low
comparison (line 10). The early load of the addend on line 7 also needs
pushing even further for 64-bit targets, or it will clobber a0 before
we're done with it. The output for 32-bit targets is unaffected.

 srl     a0,v0,0x7
 andi    a0,a0,0x1fe0
 addu    a0,a0,s0
 lw      at,9136(a0)
-lw      v0,9140(a0)      load high comparator
 li      t9,-4093
-lw      a0,9160(a0)      load addend
 and     t9,t9,v0
 bne     at,t9,0x836d838
- nop
+ lw     at,9140(a0)      load high comparator
+lw      a0,9160(a0)      load addend
-bne     v0,a1,0x836d838
+bne     at,a1,0x836d838
  addu   v0,a0,v0
 lw      t0,0(v0)

Suggested-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: Aurelien Jarno <aurelien@aurel32.net>
---
Tested with mips64el target (as in output diff above) and mipsel
target (confirmed that the case was hit at least once during guest
kernel boot and continued to work fine).
---
 tcg/mips/tcg-target.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index c0ce520228a7..38c968220b9a 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -962,30 +962,34 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
         add_off -= 0x7ff0;
     }
 
-    /* Load the tlb comparator.  */
-    if (TARGET_LONG_BITS == 64) {
-        tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0, cmp_off + LO_OFF);
-        tcg_out_opc_imm(s, OPC_LW, base, TCG_REG_A0, cmp_off + HI_OFF);
-    } else {
-        tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0, cmp_off);
-    }
+    /* Load the (low half) tlb comparator.  */
+    tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0,
+                    cmp_off + (TARGET_LONG_BITS == 64 ? LO_OFF : 0));
 
     /* Mask the page bits, keeping the alignment bits to compare against.
-       In between, load the tlb addend for the fast path.  */
+       In between on 32-bit targets, load the tlb addend for the fast path.  */
     tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1,
                  TARGET_PAGE_MASK | ((1 << s_bits) - 1));
-    tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off);
+    if (TARGET_LONG_BITS == 32) {
+        tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off);
+    }
     tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl);
 
     label_ptr[0] = s->code_ptr;
     tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0);
 
+    /* Load and test the high half tlb comparator.  */
     if (TARGET_LONG_BITS == 64) {
         /* delay slot */
-        tcg_out_nop(s);
+        tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0, cmp_off + HI_OFF);
+
+        /* Load the tlb addend for the fast path. We can't do it earlier with
+           64-bit targets or we'll clobber a0 before reading the high half tlb
+           comparator.  */
+        tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off);
 
         label_ptr[1] = s->code_ptr;
-        tcg_out_opc_br(s, OPC_BNE, addrh, base);
+        tcg_out_opc_br(s, OPC_BNE, addrh, TCG_TMP0);
     }
 
     /* delay slot */
-- 
2.4.6

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [Qemu-devel] [PATCH] tcg/mips: Fix clobbering of qemu_ld inputs
  2015-09-14 10:34 [Qemu-devel] [PATCH] tcg/mips: Fix clobbering of qemu_ld inputs James Hogan
@ 2015-09-14 16:55 ` Richard Henderson
  2015-09-17 15:52 ` Aurelien Jarno
  1 sibling, 0 replies; 3+ messages in thread
From: Richard Henderson @ 2015-09-14 16:55 UTC (permalink / raw
  To: James Hogan, Aurelien Jarno; +Cc: qemu-devel

On 09/14/2015 03:34 AM, James Hogan wrote:
> The MIPS TCG backend implements qemu_ld with 64-bit targets using the v0
> register (base) as a temporary to load the upper half of the QEMU TLB
> comparator (see line 5 below), however this happens before the input
> address is used (line 8 to mask off the low bits for the TLB
> comparison, and line 12 to add the host-guest offset). If the input
> address (addrl) also happens to have been placed in v0 (as in the second
> column below), it gets clobbered before it is used.
> 
>      addrl in t2              addrl in v0
> 
>  1 srl     a0,t2,0x7        srl     a0,v0,0x7
>  2 andi    a0,a0,0x1fe0     andi    a0,a0,0x1fe0
>  3 addu    a0,a0,s0         addu    a0,a0,s0
>  4 lw      at,9136(a0)      lw      at,9136(a0)      set TCG_TMP0 (at)
>  5 lw      v0,9140(a0)      lw      v0,9140(a0)      set base (v0)
>  6 li      t9,-4093         li      t9,-4093
>  7 lw      a0,9160(a0)      lw      a0,9160(a0)      set addend (a0)
>  8 and     t9,t9,t2         and     t9,t9,v0         use addrl
>  9 bne     at,t9,0x836d8c8  bne     at,t9,0x836d838  use TCG_TMP0
> 10  nop                      nop
> 11 bne     v0,t8,0x836d8c8  bne     v0,a1,0x836d838  use base
> 12  addu   v0,a0,t2          addu   v0,a0,v0         use addrl, addend
> 13 lw      t0,0(v0)         lw      t0,0(v0)
> 
> Fix by using TCG_TMP0 (at) as the temporary instead of v0 (base),
> pushing the load on line 5 forward into the delay slot of the low
> comparison (line 10). The early load of the addend on line 7 also needs
> pushing even further for 64-bit targets, or it will clobber a0 before
> we're done with it. The output for 32-bit targets is unaffected.
> 
>  srl     a0,v0,0x7
>  andi    a0,a0,0x1fe0
>  addu    a0,a0,s0
>  lw      at,9136(a0)
> -lw      v0,9140(a0)      load high comparator
>  li      t9,-4093
> -lw      a0,9160(a0)      load addend
>  and     t9,t9,v0
>  bne     at,t9,0x836d838
> - nop
> + lw     at,9140(a0)      load high comparator
> +lw      a0,9160(a0)      load addend
> -bne     v0,a1,0x836d838
> +bne     at,a1,0x836d838
>   addu   v0,a0,v0
>  lw      t0,0(v0)
> 
> Suggested-by: Richard Henderson <rth@twiddle.net>
> Signed-off-by: James Hogan <james.hogan@imgtec.com>
> Cc: Aurelien Jarno <aurelien@aurel32.net>

Reviewed-by: Richard Henderson <rth@twiddle.net>


r~

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [Qemu-devel] [PATCH] tcg/mips: Fix clobbering of qemu_ld inputs
  2015-09-14 10:34 [Qemu-devel] [PATCH] tcg/mips: Fix clobbering of qemu_ld inputs James Hogan
  2015-09-14 16:55 ` Richard Henderson
@ 2015-09-17 15:52 ` Aurelien Jarno
  1 sibling, 0 replies; 3+ messages in thread
From: Aurelien Jarno @ 2015-09-17 15:52 UTC (permalink / raw
  To: James Hogan; +Cc: qemu-devel, Richard Henderson

On 2015-09-14 11:34, James Hogan wrote:
> The MIPS TCG backend implements qemu_ld with 64-bit targets using the v0
> register (base) as a temporary to load the upper half of the QEMU TLB
> comparator (see line 5 below), however this happens before the input
> address is used (line 8 to mask off the low bits for the TLB
> comparison, and line 12 to add the host-guest offset). If the input
> address (addrl) also happens to have been placed in v0 (as in the second
> column below), it gets clobbered before it is used.
> 
>      addrl in t2              addrl in v0
> 
>  1 srl     a0,t2,0x7        srl     a0,v0,0x7
>  2 andi    a0,a0,0x1fe0     andi    a0,a0,0x1fe0
>  3 addu    a0,a0,s0         addu    a0,a0,s0
>  4 lw      at,9136(a0)      lw      at,9136(a0)      set TCG_TMP0 (at)
>  5 lw      v0,9140(a0)      lw      v0,9140(a0)      set base (v0)
>  6 li      t9,-4093         li      t9,-4093
>  7 lw      a0,9160(a0)      lw      a0,9160(a0)      set addend (a0)
>  8 and     t9,t9,t2         and     t9,t9,v0         use addrl
>  9 bne     at,t9,0x836d8c8  bne     at,t9,0x836d838  use TCG_TMP0
> 10  nop                      nop
> 11 bne     v0,t8,0x836d8c8  bne     v0,a1,0x836d838  use base
> 12  addu   v0,a0,t2          addu   v0,a0,v0         use addrl, addend
> 13 lw      t0,0(v0)         lw      t0,0(v0)
> 
> Fix by using TCG_TMP0 (at) as the temporary instead of v0 (base),
> pushing the load on line 5 forward into the delay slot of the low
> comparison (line 10). The early load of the addend on line 7 also needs
> pushing even further for 64-bit targets, or it will clobber a0 before
> we're done with it. The output for 32-bit targets is unaffected.
> 
>  srl     a0,v0,0x7
>  andi    a0,a0,0x1fe0
>  addu    a0,a0,s0
>  lw      at,9136(a0)
> -lw      v0,9140(a0)      load high comparator
>  li      t9,-4093
> -lw      a0,9160(a0)      load addend
>  and     t9,t9,v0
>  bne     at,t9,0x836d838
> - nop
> + lw     at,9140(a0)      load high comparator
> +lw      a0,9160(a0)      load addend
> -bne     v0,a1,0x836d838
> +bne     at,a1,0x836d838
>   addu   v0,a0,v0
>  lw      t0,0(v0)
> 
> Suggested-by: Richard Henderson <rth@twiddle.net>
> Signed-off-by: James Hogan <james.hogan@imgtec.com>
> Cc: Aurelien Jarno <aurelien@aurel32.net>
> ---
> Tested with mips64el target (as in output diff above) and mipsel
> target (confirmed that the case was hit at least once during guest
> kernel boot and continued to work fine).
> ---
>  tcg/mips/tcg-target.c | 26 +++++++++++++++-----------
>  1 file changed, 15 insertions(+), 11 deletions(-)
> 
> diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
> index c0ce520228a7..38c968220b9a 100644
> --- a/tcg/mips/tcg-target.c
> +++ b/tcg/mips/tcg-target.c
> @@ -962,30 +962,34 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
>          add_off -= 0x7ff0;
>      }
>  
> -    /* Load the tlb comparator.  */
> -    if (TARGET_LONG_BITS == 64) {
> -        tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0, cmp_off + LO_OFF);
> -        tcg_out_opc_imm(s, OPC_LW, base, TCG_REG_A0, cmp_off + HI_OFF);
> -    } else {
> -        tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0, cmp_off);
> -    }
> +    /* Load the (low half) tlb comparator.  */
> +    tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0,
> +                    cmp_off + (TARGET_LONG_BITS == 64 ? LO_OFF : 0));
>  
>      /* Mask the page bits, keeping the alignment bits to compare against.
> -       In between, load the tlb addend for the fast path.  */
> +       In between on 32-bit targets, load the tlb addend for the fast path.  */
>      tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1,
>                   TARGET_PAGE_MASK | ((1 << s_bits) - 1));
> -    tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off);
> +    if (TARGET_LONG_BITS == 32) {
> +        tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off);
> +    }
>      tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl);
>  
>      label_ptr[0] = s->code_ptr;
>      tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0);
>  
> +    /* Load and test the high half tlb comparator.  */
>      if (TARGET_LONG_BITS == 64) {
>          /* delay slot */
> -        tcg_out_nop(s);
> +        tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0, cmp_off + HI_OFF);
> +
> +        /* Load the tlb addend for the fast path. We can't do it earlier with
> +           64-bit targets or we'll clobber a0 before reading the high half tlb
> +           comparator.  */
> +        tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off);
>  
>          label_ptr[1] = s->code_ptr;
> -        tcg_out_opc_br(s, OPC_BNE, addrh, base);
> +        tcg_out_opc_br(s, OPC_BNE, addrh, TCG_TMP0);
>      }
>  
>      /* delay slot */

Thanks for fixing this. I guess we also want this patch in the stable
releases, so I'll add a Cc to stable in the pull request.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>

-- 
Aurelien Jarno                          GPG: 4096R/1DDD8C9B
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2015-09-17 15:52 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-09-14 10:34 [Qemu-devel] [PATCH] tcg/mips: Fix clobbering of qemu_ld inputs James Hogan
2015-09-14 16:55 ` Richard Henderson
2015-09-17 15:52 ` Aurelien Jarno

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).