All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] x86/spec-ctrl: Reduce HVM RSB overhead
@ 2022-08-09 17:00 Andrew Cooper
  2022-08-09 17:00 ` [PATCH 1/2] x86/spec-ctrl: Enumeration for PBRSB_NO Andrew Cooper
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Andrew Cooper @ 2022-08-09 17:00 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper

Xen happens to be not vulnerable to PBRSB, but it turns out we can improve the
performance on all eIBRS systems.

Andrew Cooper (2):
  x86/spec-ctrl: Enumeration for PBRSB_NO
  x86/spec-ctrl: Reduce HVM RSB overhead where possible

 xen/arch/x86/hvm/vmx/entry.S           |   1 +
 xen/arch/x86/hvm/vmx/vmx.c             |  20 ++++++-
 xen/arch/x86/include/asm/cpufeatures.h |   1 +
 xen/arch/x86/include/asm/msr-index.h   |   1 +
 xen/arch/x86/msr.c                     |   5 +-
 xen/arch/x86/spec_ctrl.c               | 106 +++++++++++++++++++++++++++++++--
 6 files changed, 126 insertions(+), 8 deletions(-)

-- 
2.11.0



^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/2] x86/spec-ctrl: Enumeration for PBRSB_NO
  2022-08-09 17:00 [PATCH 0/2] x86/spec-ctrl: Reduce HVM RSB overhead Andrew Cooper
@ 2022-08-09 17:00 ` Andrew Cooper
  2022-08-10  7:10   ` Jan Beulich
  2022-08-09 17:00 ` [PATCH 2/2] x86/spec-ctrl: Reduce HVM RSB overhead where possible Andrew Cooper
  2022-08-11 10:55 ` [PATCH 0/2] x86/spec-ctrl: Reduce HVM RSB overhead Andrew Cooper
  2 siblings, 1 reply; 9+ messages in thread
From: Andrew Cooper @ 2022-08-09 17:00 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Jan Beulich, Roger Pau Monné, Wei Liu

The PBRSB_NO bit indicates that the CPU is not vulnerable to the Post-Barrier
RSB speculative vulnerability.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>
---
 xen/arch/x86/include/asm/msr-index.h | 1 +
 xen/arch/x86/msr.c                   | 5 +++--
 xen/arch/x86/spec_ctrl.c             | 3 ++-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/xen/arch/x86/include/asm/msr-index.h b/xen/arch/x86/include/asm/msr-index.h
index 1a928ea6af2f..0a8852f3c246 100644
--- a/xen/arch/x86/include/asm/msr-index.h
+++ b/xen/arch/x86/include/asm/msr-index.h
@@ -84,6 +84,7 @@
 #define  ARCH_CAPS_FB_CLEAR_CTRL            (_AC(1, ULL) << 18)
 #define  ARCH_CAPS_RRSBA                    (_AC(1, ULL) << 19)
 #define  ARCH_CAPS_BHI_NO                   (_AC(1, ULL) << 20)
+#define  ARCH_CAPS_PBRSB_NO                 (_AC(1, ULL) << 24)
 
 #define MSR_FLUSH_CMD                       0x0000010b
 #define  FLUSH_CMD_L1D                      (_AC(1, ULL) <<  0)
diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
index 170f04179347..d2e2dc2a6b91 100644
--- a/xen/arch/x86/msr.c
+++ b/xen/arch/x86/msr.c
@@ -74,7 +74,8 @@ static void __init calculate_host_policy(void)
          ARCH_CAPS_SKIP_L1DFL | ARCH_CAPS_SSB_NO | ARCH_CAPS_MDS_NO |
          ARCH_CAPS_IF_PSCHANGE_MC_NO | ARCH_CAPS_TSX_CTRL | ARCH_CAPS_TAA_NO |
          ARCH_CAPS_SBDR_SSDP_NO | ARCH_CAPS_FBSDP_NO | ARCH_CAPS_PSDP_NO |
-         ARCH_CAPS_FB_CLEAR | ARCH_CAPS_RRSBA | ARCH_CAPS_BHI_NO);
+         ARCH_CAPS_FB_CLEAR | ARCH_CAPS_RRSBA | ARCH_CAPS_BHI_NO |
+         ARCH_CAPS_PBRSB_NO);
 }
 
 static void __init calculate_pv_max_policy(void)
@@ -166,7 +167,7 @@ int init_domain_msr_policy(struct domain *d)
              ARCH_CAPS_SSB_NO | ARCH_CAPS_MDS_NO | ARCH_CAPS_IF_PSCHANGE_MC_NO |
              ARCH_CAPS_TAA_NO | ARCH_CAPS_SBDR_SSDP_NO | ARCH_CAPS_FBSDP_NO |
              ARCH_CAPS_PSDP_NO | ARCH_CAPS_FB_CLEAR | ARCH_CAPS_RRSBA |
-             ARCH_CAPS_BHI_NO);
+             ARCH_CAPS_BHI_NO | ARCH_CAPS_PBRSB_NO);
     }
 
     d->arch.msr = mp;
diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
index d2cd5459739f..160cc68086c6 100644
--- a/xen/arch/x86/spec_ctrl.c
+++ b/xen/arch/x86/spec_ctrl.c
@@ -419,7 +419,7 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
      * Hardware read-only information, stating immunity to certain issues, or
      * suggestions of which mitigation to use.
      */
-    printk("  Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+    printk("  Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
            (caps & ARCH_CAPS_RDCL_NO)                        ? " RDCL_NO"        : "",
            (caps & ARCH_CAPS_IBRS_ALL)                       ? " IBRS_ALL"       : "",
            (caps & ARCH_CAPS_RSBA)                           ? " RSBA"           : "",
@@ -431,6 +431,7 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
            (caps & ARCH_CAPS_SBDR_SSDP_NO)                   ? " SBDR_SSDP_NO"   : "",
            (caps & ARCH_CAPS_FBSDP_NO)                       ? " FBSDP_NO"       : "",
            (caps & ARCH_CAPS_PSDP_NO)                        ? " PSDP_NO"        : "",
+           (caps & ARCH_CAPS_PBRSB_NO)                       ? " PBRSB_NO"       : "",
            (e8b  & cpufeat_mask(X86_FEATURE_IBRS_ALWAYS))    ? " IBRS_ALWAYS"    : "",
            (e8b  & cpufeat_mask(X86_FEATURE_STIBP_ALWAYS))   ? " STIBP_ALWAYS"   : "",
            (e8b  & cpufeat_mask(X86_FEATURE_IBRS_FAST))      ? " IBRS_FAST"      : "",
-- 
2.11.0



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/2] x86/spec-ctrl: Reduce HVM RSB overhead where possible
  2022-08-09 17:00 [PATCH 0/2] x86/spec-ctrl: Reduce HVM RSB overhead Andrew Cooper
  2022-08-09 17:00 ` [PATCH 1/2] x86/spec-ctrl: Enumeration for PBRSB_NO Andrew Cooper
@ 2022-08-09 17:00 ` Andrew Cooper
  2022-08-09 20:20   ` Jason Andryuk
  2022-08-10  8:00   ` Jan Beulich
  2022-08-11 10:55 ` [PATCH 0/2] x86/spec-ctrl: Reduce HVM RSB overhead Andrew Cooper
  2 siblings, 2 replies; 9+ messages in thread
From: Andrew Cooper @ 2022-08-09 17:00 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Jan Beulich, Roger Pau Monné, Wei Liu

The documentation for eIBRS has finally been clarified to state that it is
intended to flush the RSB on VMExit.  So in principle, we shouldn't have been
using opt_rsb_hvm on eIBRS hardware.

However, dropping the 32 entry RSB stuff makes us vulnerable to Post-Barrier
RSB speculation on affected Intel CPUs.

Introduce hvm_rsb_calculations() which selects between a 32-entry stuff, a
PBRSB specific workaround, or nothing, based on hardware details.

To mitigate PBRSB, put an LFENCE at the top of vmx_vmexit_handler().  This
forces the necessary safety property, without having to do a 1-entry RSB stuff
and fix up the stack(s) afterwards.

Update opt_rsb_hvm to be tristate.  On eIBRS-capable CPUs not susceptible to
PBRSB, this disables HVM RSB software protections entirely.  On eIBRS-capable
CPUs suceptible to to PBRSB, this reduces a 32-entry RSB stuff down to just
one LFENCE.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>
---
 xen/arch/x86/hvm/vmx/entry.S           |   1 +
 xen/arch/x86/hvm/vmx/vmx.c             |  20 ++++++-
 xen/arch/x86/include/asm/cpufeatures.h |   1 +
 xen/arch/x86/spec_ctrl.c               | 103 ++++++++++++++++++++++++++++++++-
 4 files changed, 120 insertions(+), 5 deletions(-)

diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S
index 5f5de45a1309..222495aed19f 100644
--- a/xen/arch/x86/hvm/vmx/entry.S
+++ b/xen/arch/x86/hvm/vmx/entry.S
@@ -44,6 +44,7 @@ ENTRY(vmx_asm_vmexit_handler)
         .endm
         ALTERNATIVE "", restore_spec_ctrl, X86_FEATURE_SC_MSR_HVM
         /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+        /* On PBRSB-vulenrable hardware, `ret` not safe before the start of vmx_vmexit_handler() */
 
         /* Hardware clears MSR_DEBUGCTL on VMExit.  Reinstate it if debugging Xen. */
         .macro restore_lbr
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 17e103188a53..8a6a5cf20525 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -3934,8 +3934,24 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
 {
     unsigned long exit_qualification, exit_reason, idtv_info, intr_info = 0;
     unsigned int vector = 0, mode;
-    struct vcpu *v = current;
-    struct domain *currd = v->domain;
+    struct vcpu *v;
+    struct domain *currd;
+
+    /*
+     * To mitigate Post-Barrier RSB speculation, we must force one CALL
+     * instruction to retire before letting a RET instruction execute.
+     *
+     * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed
+     * before this point.
+     *
+     * Defer any non-trivial variable initialisation to avoid problems if the
+     * compiler decides to out-of-line any helpers.  This depends on
+     * alternative() being a full compiler barrier too.
+     */
+    alternative("", "lfence", X86_BUG_PBRSB);
+
+    v = current;
+    currd = v->domain;
 
     __vmread(GUEST_RIP,    &regs->rip);
     __vmread(GUEST_RSP,    &regs->rsp);
diff --git a/xen/arch/x86/include/asm/cpufeatures.h b/xen/arch/x86/include/asm/cpufeatures.h
index 672c9ee22ba2..fdb9bff833c1 100644
--- a/xen/arch/x86/include/asm/cpufeatures.h
+++ b/xen/arch/x86/include/asm/cpufeatures.h
@@ -49,6 +49,7 @@ XEN_CPUFEATURE(IBPB_ENTRY_HVM,    X86_SYNTH(29)) /* MSR_PRED_CMD used by Xen for
 #define X86_BUG_FPU_PTRS          X86_BUG( 0) /* (F)X{SAVE,RSTOR} doesn't save/restore FOP/FIP/FDP. */
 #define X86_BUG_NULL_SEG          X86_BUG( 1) /* NULL-ing a selector preserves the base and limit. */
 #define X86_BUG_CLFLUSH_MFENCE    X86_BUG( 2) /* MFENCE needed to serialise CLFLUSH */
+#define X86_BUG_PBRSB             X86_BUG( 3) /* CPU suffers from Post-Barrier RSB speculation */
 
 /* Total number of capability words, inc synth and bug words. */
 #define NCAPINTS (FSCAPINTS + X86_NR_SYNTH + X86_NR_BUG) /* N 32-bit words worth of info */
diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
index 160cc68086c6..ffad202200ad 100644
--- a/xen/arch/x86/spec_ctrl.c
+++ b/xen/arch/x86/spec_ctrl.c
@@ -35,7 +35,7 @@
 static bool __initdata opt_msr_sc_pv = true;
 static bool __initdata opt_msr_sc_hvm = true;
 static int8_t __initdata opt_rsb_pv = -1;
-static bool __initdata opt_rsb_hvm = true;
+static int8_t __initdata opt_rsb_hvm = -1;
 static int8_t __ro_after_init opt_md_clear_pv = -1;
 static int8_t __ro_after_init opt_md_clear_hvm = -1;
 
@@ -515,7 +515,8 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
             boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) ||
             opt_eager_fpu || opt_md_clear_hvm)       ? ""               : " None",
            boot_cpu_has(X86_FEATURE_SC_MSR_HVM)      ? " MSR_SPEC_CTRL" : "",
-           boot_cpu_has(X86_FEATURE_SC_RSB_HVM)      ? " RSB"           : "",
+           boot_cpu_has(X86_FEATURE_SC_RSB_HVM)      ? " RSB"           :
+           boot_cpu_has(X86_BUG_PBRSB)               ? " PBRSB"         : "",
            opt_eager_fpu                             ? " EAGER_FPU"     : "",
            opt_md_clear_hvm                          ? " MD_CLEAR"      : "",
            boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM)  ? " IBPB-entry"    : "");
@@ -718,6 +719,77 @@ static bool __init rsb_is_full_width(void)
     return true;
 }
 
+/*
+ * HVM guests can create arbitrary RSB entries, including ones which point at
+ * Xen supervisor mappings.
+ *
+ * Traditionally, the RSB is not isolated on vmexit, so Xen needs to take
+ * safety precautions to prevent RSB speculation from consuming guest values.
+ *
+ * Intel eIBRS specifies that the RSB is flushed:
+ *   1) on VMExit when IBRS=1, or
+ *   2) shortly thereafter when Xen restores the host IBRS=1 setting.
+ * However, a subset of eIBRS-capable parts also suffer PBRSB and need
+ * software assistance to maintain RSB safety.
+ */
+static __init enum hvm_rsb {
+    hvm_rsb_none,
+    hvm_rsb_pbrsb,
+    hvm_rsb_stuff32,
+} hvm_rsb_calculations(uint64_t caps)
+{
+    if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+         boot_cpu_data.x86 != 6 )
+        return hvm_rsb_stuff32;
+
+    if ( !(caps & ARCH_CAPS_IBRS_ALL) )
+        return hvm_rsb_stuff32;
+
+    if ( caps & ARCH_CAPS_PBRSB_NO )
+        return hvm_rsb_none;
+
+    /*
+     * We're choosing between the eIBRS-capable models which don't enumerate
+     * PBRSB_NO.  Earlier steppings of some models don't enumerate eIBRS and
+     * are excluded above.
+     */
+    switch ( boot_cpu_data.x86_model )
+    {
+        /*
+         * Core (inc Hybrid) CPUs to date (August 2022) are vulenrable.
+         */
+    case 0x55: /* Skylake X */
+    case 0x6a: /* Ice Lake SP */
+    case 0x6c: /* Ice Lake D */
+    case 0x7e: /* Ice Lake client */
+    case 0x8a: /* Lakefield (SNC/TMT) */
+    case 0x8c: /* Tiger Lake U */
+    case 0x8d: /* Tiger Lake H */
+    case 0x8e: /* Skylake-L */
+    case 0x97: /* Alder Lake S */
+    case 0x9a: /* Alder Lake H/P/U */
+    case 0x9e: /* Skylake */
+    case 0xa5: /* Comet Lake */
+    case 0xa6: /* Comet Lake U62 */
+    case 0xa7: /* Rocket Lake */
+        return hvm_rsb_pbrsb;
+
+        /*
+         * Atom CPUs are not vulnerable.
+         */
+    case 0x7a: /* Gemini Lake */
+    case 0x86: /* Snow Ridge (Tremont) */
+    case 0x96: /* Elkhart Lake (Tremont) */
+    case 0x9c: /* Jasper Lake (Tremont) */
+        return hvm_rsb_none;
+
+    default:
+        printk("Unrecognised CPU model %#x - using software HVM RSB mitigations\n",
+               boot_cpu_data.x86_model);
+        return hvm_rsb_stuff32;
+    }
+}
+
 /* Calculate whether this CPU speculates past #NM */
 static bool __init should_use_eager_fpu(void)
 {
@@ -1110,6 +1182,7 @@ void spec_ctrl_init_domain(struct domain *d)
 void __init init_speculation_mitigations(void)
 {
     enum ind_thunk thunk = THUNK_DEFAULT;
+    enum hvm_rsb hvm_rsb;
     bool has_spec_ctrl, ibrs = false, hw_smt_enabled;
     bool cpu_has_bug_taa;
     uint64_t caps = 0;
@@ -1327,9 +1400,33 @@ void __init init_speculation_mitigations(void)
      * HVM guests can always poison the RSB to point at Xen supervisor
      * mappings.
      */
+    hvm_rsb = hvm_rsb_calculations(caps);
+    if ( opt_rsb_hvm == -1 )
+        opt_rsb_hvm = hvm_rsb != hvm_rsb_none;
+
     if ( opt_rsb_hvm )
     {
-        setup_force_cpu_cap(X86_FEATURE_SC_RSB_HVM);
+        switch ( hvm_rsb )
+        {
+        case hvm_rsb_pbrsb:
+            setup_force_cpu_cap(X86_BUG_PBRSB);
+            break;
+
+        case hvm_rsb_none:
+            /*
+             * Somewhat arbitrary.  If something is wrong and the user has
+             * forced HVM RSB protections on a system where we think nothing
+             * is necessary, they they possibly know something we dont.
+             *
+             * Use stuff32 in this case, which is the most protection we can
+             * muster.
+             */
+            fallthrough;
+
+        case hvm_rsb_stuff32:
+            setup_force_cpu_cap(X86_FEATURE_SC_RSB_HVM);
+            break;
+        }
 
         /*
          * For SVM, Xen's RSB safety actions are performed before STGI, so
-- 
2.11.0



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] x86/spec-ctrl: Reduce HVM RSB overhead where possible
  2022-08-09 17:00 ` [PATCH 2/2] x86/spec-ctrl: Reduce HVM RSB overhead where possible Andrew Cooper
@ 2022-08-09 20:20   ` Jason Andryuk
  2022-08-11 17:05     ` Andrew Cooper
  2022-08-10  8:00   ` Jan Beulich
  1 sibling, 1 reply; 9+ messages in thread
From: Jason Andryuk @ 2022-08-09 20:20 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Xen-devel, Jan Beulich, Roger Pau Monné, Wei Liu

On Tue, Aug 9, 2022 at 1:01 PM Andrew Cooper <andrew.cooper3@citrix.com> wrote:
>
> The documentation for eIBRS has finally been clarified to state that it is
> intended to flush the RSB on VMExit.  So in principle, we shouldn't have been
> using opt_rsb_hvm on eIBRS hardware.
>
> However, dropping the 32 entry RSB stuff makes us vulnerable to Post-Barrier
> RSB speculation on affected Intel CPUs.
>
> Introduce hvm_rsb_calculations() which selects between a 32-entry stuff, a
> PBRSB specific workaround, or nothing, based on hardware details.
>
> To mitigate PBRSB, put an LFENCE at the top of vmx_vmexit_handler().  This
> forces the necessary safety property, without having to do a 1-entry RSB stuff
> and fix up the stack(s) afterwards.
>
> Update opt_rsb_hvm to be tristate.  On eIBRS-capable CPUs not susceptible to
> PBRSB, this disables HVM RSB software protections entirely.  On eIBRS-capable
> CPUs suceptible to to PBRSB, this reduces a 32-entry RSB stuff down to just
> one LFENCE.
>
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
> ---
> CC: Jan Beulich <JBeulich@suse.com>
> CC: Roger Pau Monné <roger.pau@citrix.com>
> CC: Wei Liu <wl@xen.org>
> ---
>  xen/arch/x86/hvm/vmx/entry.S           |   1 +
>  xen/arch/x86/hvm/vmx/vmx.c             |  20 ++++++-
>  xen/arch/x86/include/asm/cpufeatures.h |   1 +
>  xen/arch/x86/spec_ctrl.c               | 103 ++++++++++++++++++++++++++++++++-
>  4 files changed, 120 insertions(+), 5 deletions(-)
>
> diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S
> index 5f5de45a1309..222495aed19f 100644
> --- a/xen/arch/x86/hvm/vmx/entry.S
> +++ b/xen/arch/x86/hvm/vmx/entry.S
> @@ -44,6 +44,7 @@ ENTRY(vmx_asm_vmexit_handler)
>          .endm
>          ALTERNATIVE "", restore_spec_ctrl, X86_FEATURE_SC_MSR_HVM
>          /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
> +        /* On PBRSB-vulenrable hardware, `ret` not safe before the start of vmx_vmexit_handler() */

vulnerable

>
>          /* Hardware clears MSR_DEBUGCTL on VMExit.  Reinstate it if debugging Xen. */
>          .macro restore_lbr
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index 17e103188a53..8a6a5cf20525 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -3934,8 +3934,24 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
>  {
>      unsigned long exit_qualification, exit_reason, idtv_info, intr_info = 0;
>      unsigned int vector = 0, mode;
> -    struct vcpu *v = current;
> -    struct domain *currd = v->domain;
> +    struct vcpu *v;
> +    struct domain *currd;
> +
> +    /*
> +     * To mitigate Post-Barrier RSB speculation, we must force one CALL
> +     * instruction to retire before letting a RET instruction execute.

I think it would be clearer if this comment mentioned LFENCE like the
commit message does.  Looking at this change without the commit
message the connection is not obvious to me at least.  Maybe "we must
force one CALL instruction to retire (with LFENCE) before letting a
RET instruction execute"?

> +     *
> +     * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed
> +     * before this point.
> +     *
> +     * Defer any non-trivial variable initialisation to avoid problems if the
> +     * compiler decides to out-of-line any helpers.  This depends on
> +     * alternative() being a full compiler barrier too.
> +     */
> +    alternative("", "lfence", X86_BUG_PBRSB);
> +
> +    v = current;
> +    currd = v->domain;
>
>      __vmread(GUEST_RIP,    &regs->rip);
>      __vmread(GUEST_RSP,    &regs->rsp);


> +    /*
> +     * We're choosing between the eIBRS-capable models which don't enumerate
> +     * PBRSB_NO.  Earlier steppings of some models don't enumerate eIBRS and
> +     * are excluded above.
> +     */
> +    switch ( boot_cpu_data.x86_model )
> +    {
> +        /*
> +         * Core (inc Hybrid) CPUs to date (August 2022) are vulenrable.

vulnerable

> +        case hvm_rsb_none:
> +            /*
> +             * Somewhat arbitrary.  If something is wrong and the user has
> +             * forced HVM RSB protections on a system where we think nothing
> +             * is necessary, they they possibly know something we dont.

"then they" and "don't"

Regards,
Jason


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] x86/spec-ctrl: Enumeration for PBRSB_NO
  2022-08-09 17:00 ` [PATCH 1/2] x86/spec-ctrl: Enumeration for PBRSB_NO Andrew Cooper
@ 2022-08-10  7:10   ` Jan Beulich
  0 siblings, 0 replies; 9+ messages in thread
From: Jan Beulich @ 2022-08-10  7:10 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Roger Pau Monné, Wei Liu, Xen-devel

On 09.08.2022 19:00, Andrew Cooper wrote:
> The PBRSB_NO bit indicates that the CPU is not vulnerable to the Post-Barrier
> RSB speculative vulnerability.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

Reviewed-by: Jan Beulich <jbeulich@suse.com>



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] x86/spec-ctrl: Reduce HVM RSB overhead where possible
  2022-08-09 17:00 ` [PATCH 2/2] x86/spec-ctrl: Reduce HVM RSB overhead where possible Andrew Cooper
  2022-08-09 20:20   ` Jason Andryuk
@ 2022-08-10  8:00   ` Jan Beulich
  2022-08-11 18:06     ` Andrew Cooper
  1 sibling, 1 reply; 9+ messages in thread
From: Jan Beulich @ 2022-08-10  8:00 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Roger Pau Monné, Wei Liu, Xen-devel

On 09.08.2022 19:00, Andrew Cooper wrote:
> --- a/xen/arch/x86/hvm/vmx/entry.S
> +++ b/xen/arch/x86/hvm/vmx/entry.S
> @@ -44,6 +44,7 @@ ENTRY(vmx_asm_vmexit_handler)
>          .endm
>          ALTERNATIVE "", restore_spec_ctrl, X86_FEATURE_SC_MSR_HVM
>          /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
> +        /* On PBRSB-vulenrable hardware, `ret` not safe before the start of vmx_vmexit_handler() */

Besides the spelling issue mentioned by Jason I think this line also
wants wrapping. Maybe the two comments also want combining to just
one, such that "WARNING!" clearly applies to both parts.

> @@ -515,7 +515,8 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
>              boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) ||
>              opt_eager_fpu || opt_md_clear_hvm)       ? ""               : " None",
>             boot_cpu_has(X86_FEATURE_SC_MSR_HVM)      ? " MSR_SPEC_CTRL" : "",
> -           boot_cpu_has(X86_FEATURE_SC_RSB_HVM)      ? " RSB"           : "",
> +           boot_cpu_has(X86_FEATURE_SC_RSB_HVM)      ? " RSB"           :
> +           boot_cpu_has(X86_BUG_PBRSB)               ? " PBRSB"         : "",
>             opt_eager_fpu                             ? " EAGER_FPU"     : "",
>             opt_md_clear_hvm                          ? " MD_CLEAR"      : "",
>             boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM)  ? " IBPB-entry"    : "");

Along the lines of half of what fdbf8bdfebc2 ("x86/spec-ctrl:
correct per-guest-type reporting of MD_CLEAR") did, I think you also want
to extend the other (earlier) conditional in this function invocation.

I also wonder whether it wouldn't be helpful to parenthesize the new
construct, such that it'll be more obvious that this is a double
conditional operator determining a single function argument.

> @@ -718,6 +719,77 @@ static bool __init rsb_is_full_width(void)
>      return true;
>  }
>  
> +/*
> + * HVM guests can create arbitrary RSB entries, including ones which point at
> + * Xen supervisor mappings.
> + *
> + * Traditionally, the RSB is not isolated on vmexit, so Xen needs to take
> + * safety precautions to prevent RSB speculation from consuming guest values.
> + *
> + * Intel eIBRS specifies that the RSB is flushed:
> + *   1) on VMExit when IBRS=1, or
> + *   2) shortly thereafter when Xen restores the host IBRS=1 setting.
> + * However, a subset of eIBRS-capable parts also suffer PBRSB and need
> + * software assistance to maintain RSB safety.
> + */
> +static __init enum hvm_rsb {
> +    hvm_rsb_none,
> +    hvm_rsb_pbrsb,
> +    hvm_rsb_stuff32,
> +} hvm_rsb_calculations(uint64_t caps)
> +{
> +    if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
> +         boot_cpu_data.x86 != 6 )
> +        return hvm_rsb_stuff32;
> +
> +    if ( !(caps & ARCH_CAPS_IBRS_ALL) )
> +        return hvm_rsb_stuff32;
> +
> +    if ( caps & ARCH_CAPS_PBRSB_NO )
> +        return hvm_rsb_none;
> +
> +    /*
> +     * We're choosing between the eIBRS-capable models which don't enumerate
> +     * PBRSB_NO.  Earlier steppings of some models don't enumerate eIBRS and
> +     * are excluded above.
> +     */
> +    switch ( boot_cpu_data.x86_model )
> +    {
> +        /*
> +         * Core (inc Hybrid) CPUs to date (August 2022) are vulenrable.
> +         */
> +    case 0x55: /* Skylake X */
> +    case 0x6a: /* Ice Lake SP */
> +    case 0x6c: /* Ice Lake D */
> +    case 0x7e: /* Ice Lake client */
> +    case 0x8a: /* Lakefield (SNC/TMT) */
> +    case 0x8c: /* Tiger Lake U */
> +    case 0x8d: /* Tiger Lake H */
> +    case 0x8e: /* Skylake-L */

Hmm, is SDM Vol 4's initial table wrong then in stating Kaby Lake /
Coffee Lake for this and ...

> +    case 0x97: /* Alder Lake S */
> +    case 0x9a: /* Alder Lake H/P/U */
> +    case 0x9e: /* Skylake */

... this? Otoh I notice that intel-family.h also says Skylake in
respective comments, despite the constants themselves being named
differently. Yet again ...

> +    case 0xa5: /* Comet Lake */
> +    case 0xa6: /* Comet Lake U62 */

... you call these Comet Lake when intel-family.h says Skylake also for
these (and names the latter's variable COMETLAKE_L).

What is in the comments here is of course not of primary concern for
getting this patch in, but the named anomalies will stand out when all
of this is switched over to use intel-family.h's constants.

> @@ -1327,9 +1400,33 @@ void __init init_speculation_mitigations(void)
>       * HVM guests can always poison the RSB to point at Xen supervisor
>       * mappings.
>       */
> +    hvm_rsb = hvm_rsb_calculations(caps);
> +    if ( opt_rsb_hvm == -1 )
> +        opt_rsb_hvm = hvm_rsb != hvm_rsb_none;
> +
>      if ( opt_rsb_hvm )
>      {
> -        setup_force_cpu_cap(X86_FEATURE_SC_RSB_HVM);
> +        switch ( hvm_rsb )
> +        {
> +        case hvm_rsb_pbrsb:
> +            setup_force_cpu_cap(X86_BUG_PBRSB);
> +            break;
> +
> +        case hvm_rsb_none:
> +            /*
> +             * Somewhat arbitrary.  If something is wrong and the user has
> +             * forced HVM RSB protections on a system where we think nothing
> +             * is necessary, they they possibly know something we dont.
> +             *
> +             * Use stuff32 in this case, which is the most protection we can
> +             * muster.
> +             */
> +            fallthrough;
> +
> +        case hvm_rsb_stuff32:
> +            setup_force_cpu_cap(X86_FEATURE_SC_RSB_HVM);
> +            break;
> +        }
>  
>          /*
>           * For SVM, Xen's RSB safety actions are performed before STGI, so

For people using e.g. "spec-ctrl=no-ibrs" but leaving RSB stuffing enabled
(or force-enabling it) we'd need to have an LFENCE somewhere as well. Since
putting one in the RSB stuffing macro would require a runtime conditional
(for its use for alternatives patching), can't we leverage the one
controlled by this logic? That'll be a slight abuse of the name of
X86_BUG_PBRSB, but probably acceptable with a suitable comment. In the end
it'll simply be another fall-through, I guess:

        switch ( hvm_rsb )
        {
        case hvm_rsb_none:
            /* ... */
            fallthrough;

        case hvm_rsb_stuff32:
            setup_force_cpu_cap(X86_FEATURE_SC_RSB_HVM);

            /* ... */
            if ( boot_cpu_has(X86_FEATURE_SC_MSR_HVM) )
                break;
            fallthrough;
        case hvm_rsb_pbrsb:
            setup_force_cpu_cap(X86_BUG_PBRSB);
            break;
        }

That way, aiui, it also wouldn't get in the way of print_details(), which
checks X86_FEATURE_SC_RSB_HVM first.

Otoh I can see reasons why for the stuffing the LFENCE would really need
to live inside the macro, and in particular ahead of the RET there. Since
we don't want to have a runtime conditional there, I guess we should then,
as a fallback, extend the text in the command line doc to warn about the
inter-dependency. After all people "knowing what they are doing" doesn't
imply them knowing implementation details of Xen. But then I'd still be
a little concerned of the "they possibly know something we don't" case.

Jan


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 0/2] x86/spec-ctrl: Reduce HVM RSB overhead
  2022-08-09 17:00 [PATCH 0/2] x86/spec-ctrl: Reduce HVM RSB overhead Andrew Cooper
  2022-08-09 17:00 ` [PATCH 1/2] x86/spec-ctrl: Enumeration for PBRSB_NO Andrew Cooper
  2022-08-09 17:00 ` [PATCH 2/2] x86/spec-ctrl: Reduce HVM RSB overhead where possible Andrew Cooper
@ 2022-08-11 10:55 ` Andrew Cooper
  2 siblings, 0 replies; 9+ messages in thread
From: Andrew Cooper @ 2022-08-11 10:55 UTC (permalink / raw)
  To: xen-devel

On 09/08/2022 18:00, Andrew Cooper wrote:
> Xen happens to be not vulnerable to PBRSB, but it turns out we can improve the
> performance on all eIBRS systems.
>
> Andrew Cooper (2):
>   x86/spec-ctrl: Enumeration for PBRSB_NO
>   x86/spec-ctrl: Reduce HVM RSB overhead where possible

Network perf testing on IceLake Server says this turns into a 2% guest
bandwidth improvement.

~Andrew

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] x86/spec-ctrl: Reduce HVM RSB overhead where possible
  2022-08-09 20:20   ` Jason Andryuk
@ 2022-08-11 17:05     ` Andrew Cooper
  0 siblings, 0 replies; 9+ messages in thread
From: Andrew Cooper @ 2022-08-11 17:05 UTC (permalink / raw)
  To: Jason Andryuk; +Cc: Xen-devel, Jan Beulich, Roger Pau Monne, Wei Liu

On 09/08/2022 21:20, Jason Andryuk wrote:
> On Tue, Aug 9, 2022 at 1:01 PM Andrew Cooper <andrew.cooper3@citrix.com> wrote:
>> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
>> index 17e103188a53..8a6a5cf20525 100644
>> --- a/xen/arch/x86/hvm/vmx/vmx.c
>> +++ b/xen/arch/x86/hvm/vmx/vmx.c
>> @@ -3934,8 +3934,24 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
>>  {
>>      unsigned long exit_qualification, exit_reason, idtv_info, intr_info = 0;
>>      unsigned int vector = 0, mode;
>> -    struct vcpu *v = current;
>> -    struct domain *currd = v->domain;
>> +    struct vcpu *v;
>> +    struct domain *currd;
>> +
>> +    /*
>> +     * To mitigate Post-Barrier RSB speculation, we must force one CALL
>> +     * instruction to retire before letting a RET instruction execute.
> I think it would be clearer if this comment mentioned LFENCE like the
> commit message does.  Looking at this change without the commit
> message the connection is not obvious to me at least.  Maybe "we must
> force one CALL instruction to retire (with LFENCE) before letting a
> RET instruction execute"?

While I'm sympathetic to trying to make this easier to follow, throwing
extra LFENCE's around isn't the right way forward IMO.

LFENCE *is* the basis of a lot of software mitigations, because it has
been specified by Intel and AMD to also be a dispatch barrier.

This has been covered in multiple whitepapers from both vendors, and has
been updated in the main manuals for 4 years or now now.

~Andrew

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] x86/spec-ctrl: Reduce HVM RSB overhead where possible
  2022-08-10  8:00   ` Jan Beulich
@ 2022-08-11 18:06     ` Andrew Cooper
  0 siblings, 0 replies; 9+ messages in thread
From: Andrew Cooper @ 2022-08-11 18:06 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Roger Pau Monne, Wei Liu, Xen-devel

On 10/08/2022 09:00, Jan Beulich wrote:
> On 09.08.2022 19:00, Andrew Cooper wrote:
>> --- a/xen/arch/x86/hvm/vmx/entry.S
>> +++ b/xen/arch/x86/hvm/vmx/entry.S
>> @@ -44,6 +44,7 @@ ENTRY(vmx_asm_vmexit_handler)
>>          .endm
>>          ALTERNATIVE "", restore_spec_ctrl, X86_FEATURE_SC_MSR_HVM
>>          /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
>> +        /* On PBRSB-vulenrable hardware, `ret` not safe before the start of vmx_vmexit_handler() */
> Besides the spelling issue mentioned by Jason I think this line also
> wants wrapping. Maybe the two comments also want combining to just
> one, such that "WARNING!" clearly applies to both parts.
>
>> @@ -515,7 +515,8 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
>>              boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) ||
>>              opt_eager_fpu || opt_md_clear_hvm)       ? ""               : " None",
>>             boot_cpu_has(X86_FEATURE_SC_MSR_HVM)      ? " MSR_SPEC_CTRL" : "",
>> -           boot_cpu_has(X86_FEATURE_SC_RSB_HVM)      ? " RSB"           : "",
>> +           boot_cpu_has(X86_FEATURE_SC_RSB_HVM)      ? " RSB"           :
>> +           boot_cpu_has(X86_BUG_PBRSB)               ? " PBRSB"         : "",
>>             opt_eager_fpu                             ? " EAGER_FPU"     : "",
>>             opt_md_clear_hvm                          ? " MD_CLEAR"      : "",
>>             boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM)  ? " IBPB-entry"    : "");
> Along the lines of half of what fdbf8bdfebc2 ("x86/spec-ctrl:
> correct per-guest-type reporting of MD_CLEAR") did, I think you also want
> to extend the other (earlier) conditional in this function invocation.

Oh yes, good point.

> I also wonder whether it wouldn't be helpful to parenthesize the new
> construct, such that it'll be more obvious that this is a double
> conditional operator determining a single function argument.

I haven't done that elsewhere.  Personally, I find it easier to follow
the commas on the RHS.

>
>> @@ -718,6 +719,77 @@ static bool __init rsb_is_full_width(void)
>>      return true;
>>  }
>>  
>> +/*
>> + * HVM guests can create arbitrary RSB entries, including ones which point at
>> + * Xen supervisor mappings.
>> + *
>> + * Traditionally, the RSB is not isolated on vmexit, so Xen needs to take
>> + * safety precautions to prevent RSB speculation from consuming guest values.
>> + *
>> + * Intel eIBRS specifies that the RSB is flushed:
>> + *   1) on VMExit when IBRS=1, or
>> + *   2) shortly thereafter when Xen restores the host IBRS=1 setting.
>> + * However, a subset of eIBRS-capable parts also suffer PBRSB and need
>> + * software assistance to maintain RSB safety.
>> + */
>> +static __init enum hvm_rsb {
>> +    hvm_rsb_none,
>> +    hvm_rsb_pbrsb,
>> +    hvm_rsb_stuff32,
>> +} hvm_rsb_calculations(uint64_t caps)
>> +{
>> +    if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
>> +         boot_cpu_data.x86 != 6 )
>> +        return hvm_rsb_stuff32;
>> +
>> +    if ( !(caps & ARCH_CAPS_IBRS_ALL) )
>> +        return hvm_rsb_stuff32;
>> +
>> +    if ( caps & ARCH_CAPS_PBRSB_NO )
>> +        return hvm_rsb_none;
>> +
>> +    /*
>> +     * We're choosing between the eIBRS-capable models which don't enumerate
>> +     * PBRSB_NO.  Earlier steppings of some models don't enumerate eIBRS and
>> +     * are excluded above.
>> +     */
>> +    switch ( boot_cpu_data.x86_model )
>> +    {
>> +        /*
>> +         * Core (inc Hybrid) CPUs to date (August 2022) are vulenrable.
>> +         */
>> +    case 0x55: /* Skylake X */
>> +    case 0x6a: /* Ice Lake SP */
>> +    case 0x6c: /* Ice Lake D */
>> +    case 0x7e: /* Ice Lake client */
>> +    case 0x8a: /* Lakefield (SNC/TMT) */
>> +    case 0x8c: /* Tiger Lake U */
>> +    case 0x8d: /* Tiger Lake H */
>> +    case 0x8e: /* Skylake-L */
> Hmm, is SDM Vol 4's initial table wrong then in stating Kaby Lake /
> Coffee Lake for this and ...
>
>> +    case 0x97: /* Alder Lake S */
>> +    case 0x9a: /* Alder Lake H/P/U */
>> +    case 0x9e: /* Skylake */
> ... this? Otoh I notice that intel-family.h also says Skylake in
> respective comments, despite the constants themselves being named
> differently. Yet again ...
>
>> +    case 0xa5: /* Comet Lake */
>> +    case 0xa6: /* Comet Lake U62 */
> ... you call these Comet Lake when intel-family.h says Skylake also for
> these (and names the latter's variable COMETLAKE_L).
>
> What is in the comments here is of course not of primary concern for
> getting this patch in, but the named anomalies will stand out when all
> of this is switched over to use intel-family.h's constants.

Naming in Skylake-uarch is a total mess.  Half is core codenames, and
half is marketing attempting to cover the fact that nothing much changed
in the 10's of steppings for 0x8e/0x9e.

But yes, I do need to clean up a few details here.  I'm still waiting
for some corrections to be made in official docs.

>
>> @@ -1327,9 +1400,33 @@ void __init init_speculation_mitigations(void)
>>       * HVM guests can always poison the RSB to point at Xen supervisor
>>       * mappings.
>>       */
>> +    hvm_rsb = hvm_rsb_calculations(caps);
>> +    if ( opt_rsb_hvm == -1 )
>> +        opt_rsb_hvm = hvm_rsb != hvm_rsb_none;
>> +
>>      if ( opt_rsb_hvm )
>>      {
>> -        setup_force_cpu_cap(X86_FEATURE_SC_RSB_HVM);
>> +        switch ( hvm_rsb )
>> +        {
>> +        case hvm_rsb_pbrsb:
>> +            setup_force_cpu_cap(X86_BUG_PBRSB);
>> +            break;
>> +
>> +        case hvm_rsb_none:
>> +            /*
>> +             * Somewhat arbitrary.  If something is wrong and the user has
>> +             * forced HVM RSB protections on a system where we think nothing
>> +             * is necessary, they they possibly know something we dont.
>> +             *
>> +             * Use stuff32 in this case, which is the most protection we can
>> +             * muster.
>> +             */
>> +            fallthrough;
>> +
>> +        case hvm_rsb_stuff32:
>> +            setup_force_cpu_cap(X86_FEATURE_SC_RSB_HVM);
>> +            break;
>> +        }
>>  
>>          /*
>>           * For SVM, Xen's RSB safety actions are performed before STGI, so
> For people using e.g. "spec-ctrl=no-ibrs" but leaving RSB stuffing enabled
> (or force-enabling it) we'd need to have an LFENCE somewhere as well.

We don't, but it's subtle.

Attempting to exploit PBRSB is a sub-case of trying to exploit general
RSB speculation on other processors which doesn't flush the RSB on vmexit.

Xen doesn't architecturally execute more RETs than CALLs (unlike other
open source hypervisors which do have a problem here), so an attacker
first needs to control speculation to find a non-architectural path with
excess RETs.

This is already makes it a lack-of-defence-in-depth type problem,
because if the attacker could control speculation like that, they'd not
care about chaining it like this to a more complicated exploit.

An attacker has to find enough rets to unwind all the CALLs Xen has done
thus far (3 in this example.  2 from the first RSB loop, and the call up
into the vmexit handler), and then one extra to consume the bad RSB
entry.  i.e. they need to find an unexpected code sequence in Xen with 4
excess RETs, assuming they can find a gadget in vmx_vmexit_handler()
only which they can control speculation with.

All the HVM funcs are altcalls now, which would have been be the obvious
place to try and attack, but can't be attacked any more.  We do have
some indirect branches, and other mechanisms in place to try and protect
them.

But... an attacker has to do all of this, in the speculative shadow of
the mispredicted loop exit, taking it firmly from "theoretically" into
"impossible" territory.

~Andrew

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2022-08-11 18:07 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-09 17:00 [PATCH 0/2] x86/spec-ctrl: Reduce HVM RSB overhead Andrew Cooper
2022-08-09 17:00 ` [PATCH 1/2] x86/spec-ctrl: Enumeration for PBRSB_NO Andrew Cooper
2022-08-10  7:10   ` Jan Beulich
2022-08-09 17:00 ` [PATCH 2/2] x86/spec-ctrl: Reduce HVM RSB overhead where possible Andrew Cooper
2022-08-09 20:20   ` Jason Andryuk
2022-08-11 17:05     ` Andrew Cooper
2022-08-10  8:00   ` Jan Beulich
2022-08-11 18:06     ` Andrew Cooper
2022-08-11 10:55 ` [PATCH 0/2] x86/spec-ctrl: Reduce HVM RSB overhead Andrew Cooper

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.