* [PATCH v3 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits
@ 2015-06-25 0:04 Mario Smarduch
2015-06-25 0:04 ` [PATCH v3 1/2] arm64: KVM: Optimize arm64 skip 30-50% vfp/simd save/restore " Mario Smarduch
` (2 more replies)
0 siblings, 3 replies; 11+ messages in thread
From: Mario Smarduch @ 2015-06-25 0:04 UTC (permalink / raw
To: linux-arm-kernel
Currently we save/restore fp/simd on each exit. Fist patch optimizes arm64
save/restore, we only do so on Guest access. hackbench and
several lmbench tests show anywhere from 30% to above 50% optimzation
achieved.
In second patch 32-bit handler is updated to keep exit handling consistent
with 64-bit code.
Changes since v1:
- Addressed Marcs comments
- Verified optimization improvements with lmbench and hackbench, updated
commit message
Changes since v2:
- only for patch 2/2
- Reworked trapping to vfp access handler
Changes since v3:
- Only for patch 2/2
- Removed load_vcpu in switch_to_guest_vfp per Marcs comment
- Got another chance to replace an unreferenced label with a comment
Mario Smarduch (2):
Optimize arm64 skip 30-50% vfp/simd save/restore on exits
keep arm vfp/simd exit handling consistent with arm64
arch/arm/kvm/interrupts.S | 14 +++++++-----
arch/arm64/include/asm/kvm_arm.h | 5 ++++-
arch/arm64/kvm/hyp.S | 46 +++++++++++++++++++++++++++++++++++---
3 files changed, 55 insertions(+), 10 deletions(-)
--
1.7.9.5
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v3 1/2] arm64: KVM: Optimize arm64 skip 30-50% vfp/simd save/restore on exits
2015-06-25 0:04 [PATCH v3 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits Mario Smarduch
@ 2015-06-25 0:04 ` Mario Smarduch
2015-07-01 13:46 ` Christoffer Dall
2015-06-25 0:04 ` [PATCH v3 2/2] arm: KVM: keep arm vfp/simd exit handling consistent with arm64 Mario Smarduch
2015-07-01 9:49 ` [PATCH v3 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits Christoffer Dall
2 siblings, 1 reply; 11+ messages in thread
From: Mario Smarduch @ 2015-06-25 0:04 UTC (permalink / raw
To: linux-arm-kernel
This patch only saves and restores FP/SIMD registers on Guest access. To do
this cptr_el2 FP/SIMD trap is set on Guest entry and later checked on exit.
lmbench, hackbench show significant improvements, for 30-50% exits FP/SIMD
context is not saved/restored
Signed-off-by: Mario Smarduch <m.smarduch@samsung.com>
---
arch/arm64/include/asm/kvm_arm.h | 5 ++++-
arch/arm64/kvm/hyp.S | 46 +++++++++++++++++++++++++++++++++++---
2 files changed, 47 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index ac6fafb..7605e09 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -171,10 +171,13 @@
#define HSTR_EL2_TTEE (1 << 16)
#define HSTR_EL2_T(x) (1 << x)
+/* Hyp Coproccessor Trap Register Shifts */
+#define CPTR_EL2_TFP_SHIFT 10
+
/* Hyp Coprocessor Trap Register */
#define CPTR_EL2_TCPAC (1 << 31)
#define CPTR_EL2_TTA (1 << 20)
-#define CPTR_EL2_TFP (1 << 10)
+#define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT)
/* Hyp Debug Configuration Register bits */
#define MDCR_EL2_TDRA (1 << 11)
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 5befd01..de0788f 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -673,6 +673,15 @@
tbz \tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target
.endm
+/*
+ * Check cptr VFP/SIMD accessed bit, if set VFP/SIMD not accessed by guest.
+ */
+.macro skip_fpsimd_state tmp, target
+ mrs \tmp, cptr_el2
+ tbnz \tmp, #CPTR_EL2_TFP_SHIFT, \target
+.endm
+
+
.macro compute_debug_state target
// Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY
// is set, we do a full save/restore cycle and disable trapping.
@@ -763,6 +772,7 @@
ldr x2, [x0, #VCPU_HCR_EL2]
msr hcr_el2, x2
mov x2, #CPTR_EL2_TTA
+ orr x2, x2, #CPTR_EL2_TFP
msr cptr_el2, x2
mov x2, #(1 << 15) // Trap CP15 Cr=15
@@ -785,7 +795,6 @@
.macro deactivate_traps
mov x2, #HCR_RW
msr hcr_el2, x2
- msr cptr_el2, xzr
msr hstr_el2, xzr
mrs x2, mdcr_el2
@@ -912,6 +921,28 @@ __restore_fpsimd:
restore_fpsimd
ret
+switch_to_guest_fpsimd:
+ push x4, lr
+
+ mrs x2, cptr_el2
+ bic x2, x2, #CPTR_EL2_TFP
+ msr cptr_el2, x2
+
+ mrs x0, tpidr_el2
+
+ ldr x2, [x0, #VCPU_HOST_CONTEXT]
+ kern_hyp_va x2
+ bl __save_fpsimd
+
+ add x2, x0, #VCPU_CONTEXT
+ bl __restore_fpsimd
+
+ pop x4, lr
+ pop x2, x3
+ pop x0, x1
+
+ eret
+
/*
* u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu);
*
@@ -932,7 +963,6 @@ ENTRY(__kvm_vcpu_run)
kern_hyp_va x2
save_host_regs
- bl __save_fpsimd
bl __save_sysregs
compute_debug_state 1f
@@ -948,7 +978,6 @@ ENTRY(__kvm_vcpu_run)
add x2, x0, #VCPU_CONTEXT
bl __restore_sysregs
- bl __restore_fpsimd
skip_debug_state x3, 1f
bl __restore_debug
@@ -967,7 +996,9 @@ __kvm_vcpu_return:
add x2, x0, #VCPU_CONTEXT
save_guest_regs
+ skip_fpsimd_state x3, 1f
bl __save_fpsimd
+1:
bl __save_sysregs
skip_debug_state x3, 1f
@@ -986,7 +1017,11 @@ __kvm_vcpu_return:
kern_hyp_va x2
bl __restore_sysregs
+ skip_fpsimd_state x3, 1f
bl __restore_fpsimd
+1:
+ /* Clear FPSIMD and Trace trapping */
+ msr cptr_el2, xzr
skip_debug_state x3, 1f
// Clear the dirty flag for the next run, as all the state has
@@ -1201,6 +1236,11 @@ el1_trap:
* x1: ESR
* x2: ESR_EC
*/
+
+ /* Guest accessed VFP/SIMD registers, save host, restore Guest */
+ cmp x2, #ESR_ELx_EC_FP_ASIMD
+ b.eq switch_to_guest_fpsimd
+
cmp x2, #ESR_ELx_EC_DABT_LOW
mov x0, #ESR_ELx_EC_IABT_LOW
ccmp x2, x0, #4, ne
--
1.7.9.5
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v3 2/2] arm: KVM: keep arm vfp/simd exit handling consistent with arm64
2015-06-25 0:04 [PATCH v3 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits Mario Smarduch
2015-06-25 0:04 ` [PATCH v3 1/2] arm64: KVM: Optimize arm64 skip 30-50% vfp/simd save/restore " Mario Smarduch
@ 2015-06-25 0:04 ` Mario Smarduch
2015-07-01 13:50 ` Christoffer Dall
2015-07-01 9:49 ` [PATCH v3 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits Christoffer Dall
2 siblings, 1 reply; 11+ messages in thread
From: Mario Smarduch @ 2015-06-25 0:04 UTC (permalink / raw
To: linux-arm-kernel
After enhancing arm64 FP/SIMD exit handling, ARMv7 VFP exit branch is moved
to guest trap handling. This allows us to keep exit handling flow between both
architectures consistent.
Signed-off-by: Mario Smarduch <m.smarduch@samsung.com>
---
arch/arm/kvm/interrupts.S | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 79caf79..b245b4e 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -363,10 +363,6 @@ hyp_hvc:
@ Check syndrome register
mrc p15, 4, r1, c5, c2, 0 @ HSR
lsr r0, r1, #HSR_EC_SHIFT
-#ifdef CONFIG_VFPv3
- cmp r0, #HSR_EC_CP_0_13
- beq switch_to_guest_vfp
-#endif
cmp r0, #HSR_EC_HVC
bne guest_trap @ Not HVC instr.
@@ -380,7 +376,10 @@ hyp_hvc:
cmp r2, #0
bne guest_trap @ Guest called HVC
-host_switch_to_hyp:
+ /*
+ * Getting here means host called HVC, we shift parameters and branch
+ * to Hyp function.
+ */
pop {r0, r1, r2}
/* Check for __hyp_get_vectors */
@@ -411,6 +410,10 @@ guest_trap:
@ Check if we need the fault information
lsr r1, r1, #HSR_EC_SHIFT
+#ifdef CONFIG_VFPv3
+ cmp r1, #HSR_EC_CP_0_13
+ beq switch_to_guest_vfp
+#endif
cmp r1, #HSR_EC_IABT
mrceq p15, 4, r2, c6, c0, 2 @ HIFAR
beq 2f
@@ -479,7 +482,6 @@ guest_trap:
*/
#ifdef CONFIG_VFPv3
switch_to_guest_vfp:
- load_vcpu @ Load VCPU pointer to r0
push {r3-r7}
@ NEON/VFP used. Turn on VFP access.
--
1.7.9.5
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v3 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits
2015-06-25 0:04 [PATCH v3 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits Mario Smarduch
2015-06-25 0:04 ` [PATCH v3 1/2] arm64: KVM: Optimize arm64 skip 30-50% vfp/simd save/restore " Mario Smarduch
2015-06-25 0:04 ` [PATCH v3 2/2] arm: KVM: keep arm vfp/simd exit handling consistent with arm64 Mario Smarduch
@ 2015-07-01 9:49 ` Christoffer Dall
2015-07-02 17:49 ` Mario Smarduch
2 siblings, 1 reply; 11+ messages in thread
From: Christoffer Dall @ 2015-07-01 9:49 UTC (permalink / raw
To: linux-arm-kernel
On Wed, Jun 24, 2015 at 05:04:10PM -0700, Mario Smarduch wrote:
> Currently we save/restore fp/simd on each exit. Fist patch optimizes arm64
> save/restore, we only do so on Guest access. hackbench and
> several lmbench tests show anywhere from 30% to above 50% optimzation
> achieved.
>
> In second patch 32-bit handler is updated to keep exit handling consistent
> with 64-bit code.
30-50% of what? The overhead or overall performance?
>
> Changes since v1:
> - Addressed Marcs comments
> - Verified optimization improvements with lmbench and hackbench, updated
> commit message
>
> Changes since v2:
> - only for patch 2/2
> - Reworked trapping to vfp access handler
>
> Changes since v3:
> - Only for patch 2/2
> - Removed load_vcpu in switch_to_guest_vfp per Marcs comment
> - Got another chance to replace an unreferenced label with a comment
>
>
> Mario Smarduch (2):
> Optimize arm64 skip 30-50% vfp/simd save/restore on exits
> keep arm vfp/simd exit handling consistent with arm64
>
> arch/arm/kvm/interrupts.S | 14 +++++++-----
> arch/arm64/include/asm/kvm_arm.h | 5 ++++-
> arch/arm64/kvm/hyp.S | 46 +++++++++++++++++++++++++++++++++++---
> 3 files changed, 55 insertions(+), 10 deletions(-)
>
> --
> 1.7.9.5
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v3 1/2] arm64: KVM: Optimize arm64 skip 30-50% vfp/simd save/restore on exits
2015-06-25 0:04 ` [PATCH v3 1/2] arm64: KVM: Optimize arm64 skip 30-50% vfp/simd save/restore " Mario Smarduch
@ 2015-07-01 13:46 ` Christoffer Dall
2015-07-02 21:51 ` Mario Smarduch
0 siblings, 1 reply; 11+ messages in thread
From: Christoffer Dall @ 2015-07-01 13:46 UTC (permalink / raw
To: linux-arm-kernel
On Wed, Jun 24, 2015 at 05:04:11PM -0700, Mario Smarduch wrote:
> This patch only saves and restores FP/SIMD registers on Guest access. To do
> this cptr_el2 FP/SIMD trap is set on Guest entry and later checked on exit.
> lmbench, hackbench show significant improvements, for 30-50% exits FP/SIMD
> context is not saved/restored
>
> Signed-off-by: Mario Smarduch <m.smarduch@samsung.com>
> ---
> arch/arm64/include/asm/kvm_arm.h | 5 ++++-
> arch/arm64/kvm/hyp.S | 46 +++++++++++++++++++++++++++++++++++---
> 2 files changed, 47 insertions(+), 4 deletions(-)
>
> diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
> index ac6fafb..7605e09 100644
> --- a/arch/arm64/include/asm/kvm_arm.h
> +++ b/arch/arm64/include/asm/kvm_arm.h
> @@ -171,10 +171,13 @@
> #define HSTR_EL2_TTEE (1 << 16)
> #define HSTR_EL2_T(x) (1 << x)
>
> +/* Hyp Coproccessor Trap Register Shifts */
> +#define CPTR_EL2_TFP_SHIFT 10
> +
> /* Hyp Coprocessor Trap Register */
> #define CPTR_EL2_TCPAC (1 << 31)
> #define CPTR_EL2_TTA (1 << 20)
> -#define CPTR_EL2_TFP (1 << 10)
> +#define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT)
>
> /* Hyp Debug Configuration Register bits */
> #define MDCR_EL2_TDRA (1 << 11)
> diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
> index 5befd01..de0788f 100644
> --- a/arch/arm64/kvm/hyp.S
> +++ b/arch/arm64/kvm/hyp.S
> @@ -673,6 +673,15 @@
> tbz \tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target
> .endm
>
> +/*
> + * Check cptr VFP/SIMD accessed bit, if set VFP/SIMD not accessed by guest.
This comment doesn't really help me understand the function, may I
suggest:
Branch to target if CPTR_EL2.TFP bit is set (VFP/SIMD trapping enabled)
> + */
> +.macro skip_fpsimd_state tmp, target
> + mrs \tmp, cptr_el2
> + tbnz \tmp, #CPTR_EL2_TFP_SHIFT, \target
> +.endm
> +
> +
> .macro compute_debug_state target
> // Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY
> // is set, we do a full save/restore cycle and disable trapping.
> @@ -763,6 +772,7 @@
> ldr x2, [x0, #VCPU_HCR_EL2]
> msr hcr_el2, x2
> mov x2, #CPTR_EL2_TTA
> + orr x2, x2, #CPTR_EL2_TFP
> msr cptr_el2, x2
>
> mov x2, #(1 << 15) // Trap CP15 Cr=15
> @@ -785,7 +795,6 @@
> .macro deactivate_traps
> mov x2, #HCR_RW
> msr hcr_el2, x2
> - msr cptr_el2, xzr
> msr hstr_el2, xzr
>
> mrs x2, mdcr_el2
> @@ -912,6 +921,28 @@ __restore_fpsimd:
> restore_fpsimd
> ret
>
> +switch_to_guest_fpsimd:
> + push x4, lr
> +
> + mrs x2, cptr_el2
> + bic x2, x2, #CPTR_EL2_TFP
> + msr cptr_el2, x2
> +
> + mrs x0, tpidr_el2
> +
> + ldr x2, [x0, #VCPU_HOST_CONTEXT]
> + kern_hyp_va x2
> + bl __save_fpsimd
> +
> + add x2, x0, #VCPU_CONTEXT
> + bl __restore_fpsimd
> +
> + pop x4, lr
> + pop x2, x3
> + pop x0, x1
> +
> + eret
> +
> /*
> * u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu);
> *
> @@ -932,7 +963,6 @@ ENTRY(__kvm_vcpu_run)
> kern_hyp_va x2
>
> save_host_regs
> - bl __save_fpsimd
> bl __save_sysregs
>
> compute_debug_state 1f
> @@ -948,7 +978,6 @@ ENTRY(__kvm_vcpu_run)
> add x2, x0, #VCPU_CONTEXT
>
> bl __restore_sysregs
> - bl __restore_fpsimd
>
> skip_debug_state x3, 1f
> bl __restore_debug
> @@ -967,7 +996,9 @@ __kvm_vcpu_return:
> add x2, x0, #VCPU_CONTEXT
>
> save_guest_regs
> + skip_fpsimd_state x3, 1f
> bl __save_fpsimd
> +1:
> bl __save_sysregs
>
> skip_debug_state x3, 1f
> @@ -986,7 +1017,11 @@ __kvm_vcpu_return:
> kern_hyp_va x2
>
> bl __restore_sysregs
> + skip_fpsimd_state x3, 1f
> bl __restore_fpsimd
> +1:
> + /* Clear FPSIMD and Trace trapping */
> + msr cptr_el2, xzr
why not simply move the deactivate_traps down here instead?
>
> skip_debug_state x3, 1f
> // Clear the dirty flag for the next run, as all the state has
> @@ -1201,6 +1236,11 @@ el1_trap:
> * x1: ESR
> * x2: ESR_EC
> */
> +
> + /* Guest accessed VFP/SIMD registers, save host, restore Guest */
> + cmp x2, #ESR_ELx_EC_FP_ASIMD
> + b.eq switch_to_guest_fpsimd
> +
> cmp x2, #ESR_ELx_EC_DABT_LOW
> mov x0, #ESR_ELx_EC_IABT_LOW
> ccmp x2, x0, #4, ne
> --
> 1.7.9.5
>
Otherwise looks good,
-Christoffer
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v3 2/2] arm: KVM: keep arm vfp/simd exit handling consistent with arm64
2015-06-25 0:04 ` [PATCH v3 2/2] arm: KVM: keep arm vfp/simd exit handling consistent with arm64 Mario Smarduch
@ 2015-07-01 13:50 ` Christoffer Dall
0 siblings, 0 replies; 11+ messages in thread
From: Christoffer Dall @ 2015-07-01 13:50 UTC (permalink / raw
To: linux-arm-kernel
On Wed, Jun 24, 2015 at 05:04:12PM -0700, Mario Smarduch wrote:
> After enhancing arm64 FP/SIMD exit handling, ARMv7 VFP exit branch is moved
> to guest trap handling. This allows us to keep exit handling flow between both
> architectures consistent.
>
> Signed-off-by: Mario Smarduch <m.smarduch@samsung.com>
> ---
> arch/arm/kvm/interrupts.S | 14 ++++++++------
> 1 file changed, 8 insertions(+), 6 deletions(-)
>
> diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
> index 79caf79..b245b4e 100644
> --- a/arch/arm/kvm/interrupts.S
> +++ b/arch/arm/kvm/interrupts.S
> @@ -363,10 +363,6 @@ hyp_hvc:
> @ Check syndrome register
> mrc p15, 4, r1, c5, c2, 0 @ HSR
> lsr r0, r1, #HSR_EC_SHIFT
> -#ifdef CONFIG_VFPv3
> - cmp r0, #HSR_EC_CP_0_13
> - beq switch_to_guest_vfp
> -#endif
> cmp r0, #HSR_EC_HVC
> bne guest_trap @ Not HVC instr.
>
> @@ -380,7 +376,10 @@ hyp_hvc:
> cmp r2, #0
> bne guest_trap @ Guest called HVC
>
> -host_switch_to_hyp:
> + /*
> + * Getting here means host called HVC, we shift parameters and branch
> + * to Hyp function.
> + */
> pop {r0, r1, r2}
>
> /* Check for __hyp_get_vectors */
> @@ -411,6 +410,10 @@ guest_trap:
>
> @ Check if we need the fault information
> lsr r1, r1, #HSR_EC_SHIFT
> +#ifdef CONFIG_VFPv3
> + cmp r1, #HSR_EC_CP_0_13
> + beq switch_to_guest_vfp
> +#endif
> cmp r1, #HSR_EC_IABT
> mrceq p15, 4, r2, c6, c0, 2 @ HIFAR
> beq 2f
> @@ -479,7 +482,6 @@ guest_trap:
> */
> #ifdef CONFIG_VFPv3
> switch_to_guest_vfp:
> - load_vcpu @ Load VCPU pointer to r0
> push {r3-r7}
>
> @ NEON/VFP used. Turn on VFP access.
> --
> 1.7.9.5
>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v3 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits
2015-07-01 9:49 ` [PATCH v3 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits Christoffer Dall
@ 2015-07-02 17:49 ` Mario Smarduch
2015-07-02 20:58 ` Christoffer Dall
0 siblings, 1 reply; 11+ messages in thread
From: Mario Smarduch @ 2015-07-02 17:49 UTC (permalink / raw
To: linux-arm-kernel
On 07/01/2015 02:49 AM, Christoffer Dall wrote:
> On Wed, Jun 24, 2015 at 05:04:10PM -0700, Mario Smarduch wrote:
>> Currently we save/restore fp/simd on each exit. Fist patch optimizes arm64
>> save/restore, we only do so on Guest access. hackbench and
>> several lmbench tests show anywhere from 30% to above 50% optimzation
>> achieved.
>>
>> In second patch 32-bit handler is updated to keep exit handling consistent
>> with 64-bit code.
>
> 30-50% of what? The overhead or overall performance?
Yes, so considering all exits to Host KVM anywhere from 30 to 50%
didn't require an fp/simd switch.
Anything else you like to see added here?
>
>>
>> Changes since v1:
>> - Addressed Marcs comments
>> - Verified optimization improvements with lmbench and hackbench, updated
>> commit message
>>
>> Changes since v2:
>> - only for patch 2/2
>> - Reworked trapping to vfp access handler
>>
>> Changes since v3:
>> - Only for patch 2/2
>> - Removed load_vcpu in switch_to_guest_vfp per Marcs comment
>> - Got another chance to replace an unreferenced label with a comment
>>
>>
>> Mario Smarduch (2):
>> Optimize arm64 skip 30-50% vfp/simd save/restore on exits
>> keep arm vfp/simd exit handling consistent with arm64
>>
>> arch/arm/kvm/interrupts.S | 14 +++++++-----
>> arch/arm64/include/asm/kvm_arm.h | 5 ++++-
>> arch/arm64/kvm/hyp.S | 46 +++++++++++++++++++++++++++++++++++---
>> 3 files changed, 55 insertions(+), 10 deletions(-)
>>
>> --
>> 1.7.9.5
>>
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v3 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits
2015-07-02 17:49 ` Mario Smarduch
@ 2015-07-02 20:58 ` Christoffer Dall
0 siblings, 0 replies; 11+ messages in thread
From: Christoffer Dall @ 2015-07-02 20:58 UTC (permalink / raw
To: linux-arm-kernel
On Thu, Jul 02, 2015 at 10:49:03AM -0700, Mario Smarduch wrote:
> On 07/01/2015 02:49 AM, Christoffer Dall wrote:
> > On Wed, Jun 24, 2015 at 05:04:10PM -0700, Mario Smarduch wrote:
> >> Currently we save/restore fp/simd on each exit. Fist patch optimizes arm64
> >> save/restore, we only do so on Guest access. hackbench and
> >> several lmbench tests show anywhere from 30% to above 50% optimzation
> >> achieved.
> >>
> >> In second patch 32-bit handler is updated to keep exit handling consistent
> >> with 64-bit code.
> >
> > 30-50% of what? The overhead or overall performance?
>
> Yes, so considering all exits to Host KVM anywhere from 30 to 50%
> didn't require an fp/simd switch.
>
> Anything else you like to see added here?
No, I'm good with them. Marc is handling the tree these days so I'll
leave it up to him if we want to adjust patch 1 or what to do.
Thanks!
-Christoffer
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v3 1/2] arm64: KVM: Optimize arm64 skip 30-50% vfp/simd save/restore on exits
2015-07-01 13:46 ` Christoffer Dall
@ 2015-07-02 21:51 ` Mario Smarduch
2015-07-03 11:53 ` Christoffer Dall
0 siblings, 1 reply; 11+ messages in thread
From: Mario Smarduch @ 2015-07-02 21:51 UTC (permalink / raw
To: linux-arm-kernel
On 07/01/2015 06:46 AM, Christoffer Dall wrote:
> On Wed, Jun 24, 2015 at 05:04:11PM -0700, Mario Smarduch wrote:
>> This patch only saves and restores FP/SIMD registers on Guest access. To do
>> this cptr_el2 FP/SIMD trap is set on Guest entry and later checked on exit.
>> lmbench, hackbench show significant improvements, for 30-50% exits FP/SIMD
>> context is not saved/restored
>>
>> Signed-off-by: Mario Smarduch <m.smarduch@samsung.com>
>> ---
>> arch/arm64/include/asm/kvm_arm.h | 5 ++++-
>> arch/arm64/kvm/hyp.S | 46 +++++++++++++++++++++++++++++++++++---
>> 2 files changed, 47 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
>> index ac6fafb..7605e09 100644
>> --- a/arch/arm64/include/asm/kvm_arm.h
>> +++ b/arch/arm64/include/asm/kvm_arm.h
>> @@ -171,10 +171,13 @@
>> #define HSTR_EL2_TTEE (1 << 16)
>> #define HSTR_EL2_T(x) (1 << x)
>>
>> +/* Hyp Coproccessor Trap Register Shifts */
>> +#define CPTR_EL2_TFP_SHIFT 10
>> +
>> /* Hyp Coprocessor Trap Register */
>> #define CPTR_EL2_TCPAC (1 << 31)
>> #define CPTR_EL2_TTA (1 << 20)
>> -#define CPTR_EL2_TFP (1 << 10)
>> +#define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT)
>>
>> /* Hyp Debug Configuration Register bits */
>> #define MDCR_EL2_TDRA (1 << 11)
>> diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
>> index 5befd01..de0788f 100644
>> --- a/arch/arm64/kvm/hyp.S
>> +++ b/arch/arm64/kvm/hyp.S
>> @@ -673,6 +673,15 @@
>> tbz \tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target
>> .endm
>>
>> +/*
>> + * Check cptr VFP/SIMD accessed bit, if set VFP/SIMD not accessed by guest.
>
> This comment doesn't really help me understand the function, may I
> suggest:
>
> Branch to target if CPTR_EL2.TFP bit is set (VFP/SIMD trapping enabled)
Yes actually describes what it does.
>
>> + */
>> +.macro skip_fpsimd_state tmp, target
>> + mrs \tmp, cptr_el2
>> + tbnz \tmp, #CPTR_EL2_TFP_SHIFT, \target
>> +.endm
>> +
>> +
>> .macro compute_debug_state target
>> // Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY
>> // is set, we do a full save/restore cycle and disable trapping.
>> @@ -763,6 +772,7 @@
>> ldr x2, [x0, #VCPU_HCR_EL2]
>> msr hcr_el2, x2
>> mov x2, #CPTR_EL2_TTA
>> + orr x2, x2, #CPTR_EL2_TFP
>> msr cptr_el2, x2
>>
>> mov x2, #(1 << 15) // Trap CP15 Cr=15
>> @@ -785,7 +795,6 @@
>> .macro deactivate_traps
>> mov x2, #HCR_RW
>> msr hcr_el2, x2
>> - msr cptr_el2, xzr
>> msr hstr_el2, xzr
>>
>> mrs x2, mdcr_el2
>> @@ -912,6 +921,28 @@ __restore_fpsimd:
>> restore_fpsimd
>> ret
>>
>> +switch_to_guest_fpsimd:
>> + push x4, lr
>> +
>> + mrs x2, cptr_el2
>> + bic x2, x2, #CPTR_EL2_TFP
>> + msr cptr_el2, x2
>> +
>> + mrs x0, tpidr_el2
>> +
>> + ldr x2, [x0, #VCPU_HOST_CONTEXT]
>> + kern_hyp_va x2
>> + bl __save_fpsimd
>> +
>> + add x2, x0, #VCPU_CONTEXT
>> + bl __restore_fpsimd
>> +
>> + pop x4, lr
>> + pop x2, x3
>> + pop x0, x1
>> +
>> + eret
>> +
>> /*
>> * u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu);
>> *
>> @@ -932,7 +963,6 @@ ENTRY(__kvm_vcpu_run)
>> kern_hyp_va x2
>>
>> save_host_regs
>> - bl __save_fpsimd
>> bl __save_sysregs
>>
>> compute_debug_state 1f
>> @@ -948,7 +978,6 @@ ENTRY(__kvm_vcpu_run)
>> add x2, x0, #VCPU_CONTEXT
>>
>> bl __restore_sysregs
>> - bl __restore_fpsimd
>>
>> skip_debug_state x3, 1f
>> bl __restore_debug
>> @@ -967,7 +996,9 @@ __kvm_vcpu_return:
>> add x2, x0, #VCPU_CONTEXT
>>
>> save_guest_regs
>> + skip_fpsimd_state x3, 1f
>> bl __save_fpsimd
>> +1:
>> bl __save_sysregs
>>
>> skip_debug_state x3, 1f
>> @@ -986,7 +1017,11 @@ __kvm_vcpu_return:
>> kern_hyp_va x2
>>
>> bl __restore_sysregs
>> + skip_fpsimd_state x3, 1f
>> bl __restore_fpsimd
>> +1:
>> + /* Clear FPSIMD and Trace trapping */
>> + msr cptr_el2, xzr
>
> why not simply move the deactivate_traps down here instead?
Putting deactivate_traps there trashes x2, setup earlier
to restore debug, host registers from host context
Do we want deactivate_traps to use another register and
move the macro there? Or leave as is?
- Mario
>
>>
>> skip_debug_state x3, 1f
>> // Clear the dirty flag for the next run, as all the state has
>> @@ -1201,6 +1236,11 @@ el1_trap:
>> * x1: ESR
>> * x2: ESR_EC
>> */
>> +
>> + /* Guest accessed VFP/SIMD registers, save host, restore Guest */
>> + cmp x2, #ESR_ELx_EC_FP_ASIMD
>> + b.eq switch_to_guest_fpsimd
>> +
>> cmp x2, #ESR_ELx_EC_DABT_LOW
>> mov x0, #ESR_ELx_EC_IABT_LOW
>> ccmp x2, x0, #4, ne
>> --
>> 1.7.9.5
>>
>
> Otherwise looks good,
> -Christoffer
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v3 1/2] arm64: KVM: Optimize arm64 skip 30-50% vfp/simd save/restore on exits
2015-07-02 21:51 ` Mario Smarduch
@ 2015-07-03 11:53 ` Christoffer Dall
2015-07-03 18:56 ` Mario Smarduch
0 siblings, 1 reply; 11+ messages in thread
From: Christoffer Dall @ 2015-07-03 11:53 UTC (permalink / raw
To: linux-arm-kernel
On Thu, Jul 02, 2015 at 02:51:57PM -0700, Mario Smarduch wrote:
> On 07/01/2015 06:46 AM, Christoffer Dall wrote:
> > On Wed, Jun 24, 2015 at 05:04:11PM -0700, Mario Smarduch wrote:
> >> This patch only saves and restores FP/SIMD registers on Guest access. To do
> >> this cptr_el2 FP/SIMD trap is set on Guest entry and later checked on exit.
> >> lmbench, hackbench show significant improvements, for 30-50% exits FP/SIMD
> >> context is not saved/restored
> >>
> >> Signed-off-by: Mario Smarduch <m.smarduch@samsung.com>
> >> ---
> >> arch/arm64/include/asm/kvm_arm.h | 5 ++++-
> >> arch/arm64/kvm/hyp.S | 46 +++++++++++++++++++++++++++++++++++---
> >> 2 files changed, 47 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
> >> index ac6fafb..7605e09 100644
> >> --- a/arch/arm64/include/asm/kvm_arm.h
> >> +++ b/arch/arm64/include/asm/kvm_arm.h
> >> @@ -171,10 +171,13 @@
> >> #define HSTR_EL2_TTEE (1 << 16)
> >> #define HSTR_EL2_T(x) (1 << x)
> >>
> >> +/* Hyp Coproccessor Trap Register Shifts */
> >> +#define CPTR_EL2_TFP_SHIFT 10
> >> +
> >> /* Hyp Coprocessor Trap Register */
> >> #define CPTR_EL2_TCPAC (1 << 31)
> >> #define CPTR_EL2_TTA (1 << 20)
> >> -#define CPTR_EL2_TFP (1 << 10)
> >> +#define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT)
> >>
> >> /* Hyp Debug Configuration Register bits */
> >> #define MDCR_EL2_TDRA (1 << 11)
> >> diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
> >> index 5befd01..de0788f 100644
> >> --- a/arch/arm64/kvm/hyp.S
> >> +++ b/arch/arm64/kvm/hyp.S
> >> @@ -673,6 +673,15 @@
> >> tbz \tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target
> >> .endm
> >>
> >> +/*
> >> + * Check cptr VFP/SIMD accessed bit, if set VFP/SIMD not accessed by guest.
> >
> > This comment doesn't really help me understand the function, may I
> > suggest:
> >
> > Branch to target if CPTR_EL2.TFP bit is set (VFP/SIMD trapping enabled)
>
> Yes actually describes what it does.
>
> >
> >> + */
> >> +.macro skip_fpsimd_state tmp, target
> >> + mrs \tmp, cptr_el2
> >> + tbnz \tmp, #CPTR_EL2_TFP_SHIFT, \target
> >> +.endm
> >> +
> >> +
> >> .macro compute_debug_state target
> >> // Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY
> >> // is set, we do a full save/restore cycle and disable trapping.
> >> @@ -763,6 +772,7 @@
> >> ldr x2, [x0, #VCPU_HCR_EL2]
> >> msr hcr_el2, x2
> >> mov x2, #CPTR_EL2_TTA
> >> + orr x2, x2, #CPTR_EL2_TFP
> >> msr cptr_el2, x2
> >>
> >> mov x2, #(1 << 15) // Trap CP15 Cr=15
> >> @@ -785,7 +795,6 @@
> >> .macro deactivate_traps
> >> mov x2, #HCR_RW
> >> msr hcr_el2, x2
> >> - msr cptr_el2, xzr
> >> msr hstr_el2, xzr
> >>
> >> mrs x2, mdcr_el2
> >> @@ -912,6 +921,28 @@ __restore_fpsimd:
> >> restore_fpsimd
> >> ret
> >>
> >> +switch_to_guest_fpsimd:
> >> + push x4, lr
> >> +
> >> + mrs x2, cptr_el2
> >> + bic x2, x2, #CPTR_EL2_TFP
> >> + msr cptr_el2, x2
> >> +
> >> + mrs x0, tpidr_el2
> >> +
> >> + ldr x2, [x0, #VCPU_HOST_CONTEXT]
> >> + kern_hyp_va x2
> >> + bl __save_fpsimd
> >> +
> >> + add x2, x0, #VCPU_CONTEXT
> >> + bl __restore_fpsimd
> >> +
> >> + pop x4, lr
> >> + pop x2, x3
> >> + pop x0, x1
> >> +
> >> + eret
> >> +
> >> /*
> >> * u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu);
> >> *
> >> @@ -932,7 +963,6 @@ ENTRY(__kvm_vcpu_run)
> >> kern_hyp_va x2
> >>
> >> save_host_regs
> >> - bl __save_fpsimd
> >> bl __save_sysregs
> >>
> >> compute_debug_state 1f
> >> @@ -948,7 +978,6 @@ ENTRY(__kvm_vcpu_run)
> >> add x2, x0, #VCPU_CONTEXT
> >>
> >> bl __restore_sysregs
> >> - bl __restore_fpsimd
> >>
> >> skip_debug_state x3, 1f
> >> bl __restore_debug
> >> @@ -967,7 +996,9 @@ __kvm_vcpu_return:
> >> add x2, x0, #VCPU_CONTEXT
> >>
> >> save_guest_regs
> >> + skip_fpsimd_state x3, 1f
> >> bl __save_fpsimd
> >> +1:
> >> bl __save_sysregs
> >>
> >> skip_debug_state x3, 1f
> >> @@ -986,7 +1017,11 @@ __kvm_vcpu_return:
> >> kern_hyp_va x2
> >>
> >> bl __restore_sysregs
> >> + skip_fpsimd_state x3, 1f
> >> bl __restore_fpsimd
> >> +1:
> >> + /* Clear FPSIMD and Trace trapping */
> >> + msr cptr_el2, xzr
> >
> > why not simply move the deactivate_traps down here instead?
>
> Putting deactivate_traps there trashes x2, setup earlier
> to restore debug, host registers from host context
>
> Do we want deactivate_traps to use another register and
> move the macro there? Or leave as is?
>
There was some clean symmetry in the code by using deactivate traps, but
given this, I don't care strongly which way we end up doing it.
Thanks,
-Christoffer
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v3 1/2] arm64: KVM: Optimize arm64 skip 30-50% vfp/simd save/restore on exits
2015-07-03 11:53 ` Christoffer Dall
@ 2015-07-03 18:56 ` Mario Smarduch
0 siblings, 0 replies; 11+ messages in thread
From: Mario Smarduch @ 2015-07-03 18:56 UTC (permalink / raw
To: linux-arm-kernel
On 07/03/2015 04:53 AM, Christoffer Dall wrote:
> On Thu, Jul 02, 2015 at 02:51:57PM -0700, Mario Smarduch wrote:
>> On 07/01/2015 06:46 AM, Christoffer Dall wrote:
>>> On Wed, Jun 24, 2015 at 05:04:11PM -0700, Mario Smarduch wrote:
>>>> This patch only saves and restores FP/SIMD registers on Guest access. To do
>>>> this cptr_el2 FP/SIMD trap is set on Guest entry and later checked on exit.
>>>> lmbench, hackbench show significant improvements, for 30-50% exits FP/SIMD
>>>> context is not saved/restored
>>>>
>>>> Signed-off-by: Mario Smarduch <m.smarduch@samsung.com>
>>>> ---
>>>> arch/arm64/include/asm/kvm_arm.h | 5 ++++-
>>>> arch/arm64/kvm/hyp.S | 46 +++++++++++++++++++++++++++++++++++---
>>>> 2 files changed, 47 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
>>>> index ac6fafb..7605e09 100644
>>>> --- a/arch/arm64/include/asm/kvm_arm.h
>>>> +++ b/arch/arm64/include/asm/kvm_arm.h
>>>> @@ -171,10 +171,13 @@
>>>> #define HSTR_EL2_TTEE (1 << 16)
>>>> #define HSTR_EL2_T(x) (1 << x)
>>>>
>>>> +/* Hyp Coproccessor Trap Register Shifts */
>>>> +#define CPTR_EL2_TFP_SHIFT 10
>>>> +
>>>> /* Hyp Coprocessor Trap Register */
>>>> #define CPTR_EL2_TCPAC (1 << 31)
>>>> #define CPTR_EL2_TTA (1 << 20)
>>>> -#define CPTR_EL2_TFP (1 << 10)
>>>> +#define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT)
>>>>
>>>> /* Hyp Debug Configuration Register bits */
>>>> #define MDCR_EL2_TDRA (1 << 11)
>>>> diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
>>>> index 5befd01..de0788f 100644
>>>> --- a/arch/arm64/kvm/hyp.S
>>>> +++ b/arch/arm64/kvm/hyp.S
>>>> @@ -673,6 +673,15 @@
>>>> tbz \tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target
>>>> .endm
>>>>
>>>> +/*
>>>> + * Check cptr VFP/SIMD accessed bit, if set VFP/SIMD not accessed by guest.
>>>
>>> This comment doesn't really help me understand the function, may I
>>> suggest:
>>>
>>> Branch to target if CPTR_EL2.TFP bit is set (VFP/SIMD trapping enabled)
>>
>> Yes actually describes what it does.
>>
>>>
>>>> + */
>>>> +.macro skip_fpsimd_state tmp, target
>>>> + mrs \tmp, cptr_el2
>>>> + tbnz \tmp, #CPTR_EL2_TFP_SHIFT, \target
>>>> +.endm
>>>> +
>>>> +
>>>> .macro compute_debug_state target
>>>> // Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY
>>>> // is set, we do a full save/restore cycle and disable trapping.
>>>> @@ -763,6 +772,7 @@
>>>> ldr x2, [x0, #VCPU_HCR_EL2]
>>>> msr hcr_el2, x2
>>>> mov x2, #CPTR_EL2_TTA
>>>> + orr x2, x2, #CPTR_EL2_TFP
>>>> msr cptr_el2, x2
>>>>
>>>> mov x2, #(1 << 15) // Trap CP15 Cr=15
>>>> @@ -785,7 +795,6 @@
>>>> .macro deactivate_traps
>>>> mov x2, #HCR_RW
>>>> msr hcr_el2, x2
>>>> - msr cptr_el2, xzr
>>>> msr hstr_el2, xzr
>>>>
>>>> mrs x2, mdcr_el2
>>>> @@ -912,6 +921,28 @@ __restore_fpsimd:
>>>> restore_fpsimd
>>>> ret
>>>>
>>>> +switch_to_guest_fpsimd:
>>>> + push x4, lr
>>>> +
>>>> + mrs x2, cptr_el2
>>>> + bic x2, x2, #CPTR_EL2_TFP
>>>> + msr cptr_el2, x2
>>>> +
>>>> + mrs x0, tpidr_el2
>>>> +
>>>> + ldr x2, [x0, #VCPU_HOST_CONTEXT]
>>>> + kern_hyp_va x2
>>>> + bl __save_fpsimd
>>>> +
>>>> + add x2, x0, #VCPU_CONTEXT
>>>> + bl __restore_fpsimd
>>>> +
>>>> + pop x4, lr
>>>> + pop x2, x3
>>>> + pop x0, x1
>>>> +
>>>> + eret
>>>> +
>>>> /*
>>>> * u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu);
>>>> *
>>>> @@ -932,7 +963,6 @@ ENTRY(__kvm_vcpu_run)
>>>> kern_hyp_va x2
>>>>
>>>> save_host_regs
>>>> - bl __save_fpsimd
>>>> bl __save_sysregs
>>>>
>>>> compute_debug_state 1f
>>>> @@ -948,7 +978,6 @@ ENTRY(__kvm_vcpu_run)
>>>> add x2, x0, #VCPU_CONTEXT
>>>>
>>>> bl __restore_sysregs
>>>> - bl __restore_fpsimd
>>>>
>>>> skip_debug_state x3, 1f
>>>> bl __restore_debug
>>>> @@ -967,7 +996,9 @@ __kvm_vcpu_return:
>>>> add x2, x0, #VCPU_CONTEXT
>>>>
>>>> save_guest_regs
>>>> + skip_fpsimd_state x3, 1f
>>>> bl __save_fpsimd
>>>> +1:
>>>> bl __save_sysregs
>>>>
>>>> skip_debug_state x3, 1f
>>>> @@ -986,7 +1017,11 @@ __kvm_vcpu_return:
>>>> kern_hyp_va x2
>>>>
>>>> bl __restore_sysregs
>>>> + skip_fpsimd_state x3, 1f
>>>> bl __restore_fpsimd
>>>> +1:
>>>> + /* Clear FPSIMD and Trace trapping */
>>>> + msr cptr_el2, xzr
>>>
>>> why not simply move the deactivate_traps down here instead?
>>
>> Putting deactivate_traps there trashes x2, setup earlier
>> to restore debug, host registers from host context
>>
>> Do we want deactivate_traps to use another register and
>> move the macro there? Or leave as is?
>>
> There was some clean symmetry in the code by using deactivate traps, but
> given this, I don't care strongly which way we end up doing it.
I agree. Also it probably makes more sense to keep the trap disable
code together to match the trap enables, without objection from
Marc I'll rework the patch, repost it after Monday.
Thanks,
- Mario
>
> Thanks,
> -Christoffer
>
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2015-07-03 18:56 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-06-25 0:04 [PATCH v3 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits Mario Smarduch
2015-06-25 0:04 ` [PATCH v3 1/2] arm64: KVM: Optimize arm64 skip 30-50% vfp/simd save/restore " Mario Smarduch
2015-07-01 13:46 ` Christoffer Dall
2015-07-02 21:51 ` Mario Smarduch
2015-07-03 11:53 ` Christoffer Dall
2015-07-03 18:56 ` Mario Smarduch
2015-06-25 0:04 ` [PATCH v3 2/2] arm: KVM: keep arm vfp/simd exit handling consistent with arm64 Mario Smarduch
2015-07-01 13:50 ` Christoffer Dall
2015-07-01 9:49 ` [PATCH v3 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits Christoffer Dall
2015-07-02 17:49 ` Mario Smarduch
2015-07-02 20:58 ` Christoffer Dall
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).