All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
From: Paul McKenney <paulmckrcu@gmail.com>
To: Josh Poimboeuf <jpoimboe@kernel.org>
Cc: "Paul E. McKenney" <paulmck@kernel.org>,
	x86@kernel.org, linux-kernel@vger.kernel.org,
	 Linus Torvalds <torvalds@linux-foundation.org>,
	 Daniel Sneddon <daniel.sneddon@linux.intel.com>,
	 Pawan Gupta <pawan.kumar.gupta@linux.intel.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	 Alexandre Chartre <alexandre.chartre@oracle.com>,
	 Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>,
	Peter Zijlstra <peterz@infradead.org>,
	 Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	Sean Christopherson <seanjc@google.com>,
	 Andrew Cooper <andrew.cooper3@citrix.com>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	 Nikolay Borisov <nik.borisov@suse.com>,
	KP Singh <kpsingh@kernel.org>,  Waiman Long <longman@redhat.com>,
	Borislav Petkov <bp@alien8.de>, Ingo Molnar <mingo@kernel.org>
Subject: Re: [PATCH v4 3/5] x86/syscall: Mark exit[_group] syscall handlers __noreturn
Date: Sun, 21 Apr 2024 14:47:20 -0700	[thread overview]
Message-ID: <CAJzB8QFx344hSSYy4jigtmQX+KfSpFOn+18WAfZAeym5LUMoKg@mail.gmail.com> (raw)
In-Reply-To: <CAJzB8QF_+51+rrJmq3iXkaAbmbbyKYVf0m_LpQCRSLS_FgHUMQ@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 14430 bytes --]

And this definitely helped, thank you!

However, this one still remains:

vmlinux.o: warning: objtool: ia32_sys_call+0x29b6:
__ia32_sys_exit_group() is missing a __noreturn annotation

Please see below for my diffs against next-20240419, in case I messed
something up.

I attached a copy as well, given that I am away from mutt, hence using
gmail directly.

                                                Thanx, Paul

-----------------------------------------------------

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 6de50b80702e6..9810ba2857a5c 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -49,7 +49,10 @@ static __always_inline bool do_syscall_x64(struct
pt_regs *regs, int nr)

  if (likely(unr < NR_syscalls)) {
  unr = array_index_nospec(unr, NR_syscalls);
- regs->ax = x64_sys_call(regs, unr);
+ if (likely(cpu_feature_enabled(X86_FEATURE_INDIRECT_BRANCH_OK)))
+ regs->ax = sys_call_table[unr](regs);
+ else
+ regs->ax = x64_sys_call(regs, unr);
  return true;
  }
  return false;
@@ -66,7 +69,10 @@ static __always_inline bool do_syscall_x32(struct
pt_regs *regs, int nr)

  if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
  xnr = array_index_nospec(xnr, X32_NR_syscalls);
- regs->ax = x32_sys_call(regs, xnr);
+ if (likely(cpu_feature_enabled(X86_FEATURE_INDIRECT_BRANCH_OK)))
+ regs->ax = x32_sys_call_table[xnr](regs);
+ else
+ regs->ax = x32_sys_call(regs, xnr);
  return true;
  }
  return false;
@@ -162,7 +168,10 @@ static __always_inline void
do_syscall_32_irqs_on(struct pt_regs *regs, int nr)

  if (likely(unr < IA32_NR_syscalls)) {
  unr = array_index_nospec(unr, IA32_NR_syscalls);
- regs->ax = ia32_sys_call(regs, unr);
+ if (likely(cpu_feature_enabled(X86_FEATURE_INDIRECT_BRANCH_OK)))
+ regs->ax = ia32_sys_call_table[unr](regs);
+ else
+ regs->ax = ia32_sys_call(regs, unr);
  } else if (nr != -1) {
  regs->ax = __ia32_sys_ni_syscall(regs);
  }
diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
index c2235bae17ef6..aab31760b4e3e 100644
--- a/arch/x86/entry/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c
@@ -14,25 +14,16 @@
 #endif

 #define __SYSCALL(nr, sym) extern long __ia32_##sym(const struct pt_regs *);
-
 #include <asm/syscalls_32.h>
 #undef __SYSCALL

-/*
- * The sys_call_table[] is no longer used for system calls, but
- * kernel/trace/trace_syscalls.c still wants to know the system
- * call address.
- */
-#ifdef CONFIG_X86_32
 #define __SYSCALL(nr, sym) __ia32_##sym,
-const sys_call_ptr_t sys_call_table[] = {
+const sys_call_ptr_t ia32_sys_call_table[] = {
 #include <asm/syscalls_32.h>
 };
 #undef __SYSCALL
-#endif

 #define __SYSCALL(nr, sym) case nr: return __ia32_##sym(regs);
-
 long ia32_sys_call(const struct pt_regs *regs, unsigned int nr)
 {
  switch (nr) {
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index 33b3f09e6f151..ff36a993a07e0 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -8,14 +8,13 @@
 #include <asm/syscall.h>

 #define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *);
+#define __SYSCALL_NORETURN(nr, sym) extern long __noreturn
__x64_##sym(const struct pt_regs *);
 #include <asm/syscalls_64.h>
 #undef __SYSCALL

-/*
- * The sys_call_table[] is no longer used for system calls, but
- * kernel/trace/trace_syscalls.c still wants to know the system
- * call address.
- */
+#undef __SYSCALL_NORETURN
+#define __SYSCALL_NORETURN __SYSCALL
+
 #define __SYSCALL(nr, sym) __x64_##sym,
 const sys_call_ptr_t sys_call_table[] = {
 #include <asm/syscalls_64.h>
@@ -23,7 +22,6 @@ const sys_call_ptr_t sys_call_table[] = {
 #undef __SYSCALL

 #define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
-
 long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
 {
  switch (nr) {
diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
index 03de4a9321318..4221ecce6e689 100644
--- a/arch/x86/entry/syscall_x32.c
+++ b/arch/x86/entry/syscall_x32.c
@@ -8,11 +8,20 @@
 #include <asm/syscall.h>

 #define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *);
+#define __SYSCALL_NORETURN(nr, sym) extern long __noreturn
__x64_##sym(const struct pt_regs *);
 #include <asm/syscalls_x32.h>
 #undef __SYSCALL

-#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
+#undef __SYSCALL_NORETURN
+#define __SYSCALL_NORETURN __SYSCALL
+
+#define __SYSCALL(nr, sym) __x64_##sym,
+const sys_call_ptr_t x32_sys_call_table[] = {
+#include <asm/syscalls_x32.h>
+};
+#undef __SYSCALL

+#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
 long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
 {
  switch (nr) {
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl
b/arch/x86/entry/syscalls/syscall_64.tbl
index a396f6e6ab5bf..7ec68d94eb593 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -2,7 +2,7 @@
 # 64-bit system call numbers and entry vectors
 #
 # The format is:
-# <number> <abi> <name> <entry point>
+# <number> <abi> <name> <entry point> [0 noreturn]
 #
 # The __x64_sys_*() stubs are created on-the-fly for sys_*() system calls
 #
@@ -68,7 +68,7 @@
 57 common fork sys_fork
 58 common vfork sys_vfork
 59 64 execve sys_execve
-60 common exit sys_exit
+60 common exit sys_exit 0 noreturn
 61 common wait4 sys_wait4
 62 common kill sys_kill
 63 common uname sys_newuname
@@ -239,7 +239,7 @@
 228 common clock_gettime sys_clock_gettime
 229 common clock_getres sys_clock_getres
 230 common clock_nanosleep sys_clock_nanosleep
-231 common exit_group sys_exit_group
+231 common exit_group sys_exit_group 0 noreturn
 232 common epoll_wait sys_epoll_wait
 233 common epoll_ctl sys_epoll_ctl
 234 common tgkill sys_tgkill
diff --git a/arch/x86/include/asm/cpufeatures.h
b/arch/x86/include/asm/cpufeatures.h
index 3c7434329661c..d64b0a5291f10 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -470,6 +470,7 @@
 #define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */
 #define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW
control enabled */
 #define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear
branch history at vmexit using SW loop */
+#define X86_FEATURE_INDIRECT_BRANCH_OK (21*32+ 5) /* "" It's OK to
use indirect branches */

 /*
  * BUG word(s)
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 2fc7bc3863ff6..dfb59521244c2 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -16,14 +16,20 @@
 #include <asm/thread_info.h> /* for TS_COMPAT */
 #include <asm/unistd.h>

-/* This is used purely for kernel/trace/trace_syscalls.c */
 typedef long (*sys_call_ptr_t)(const struct pt_regs *);
 extern const sys_call_ptr_t sys_call_table[];

+#if defined(CONFIG_X86_32)
+#define ia32_sys_call_table sys_call_table
+#else
 /*
  * These may not exist, but still put the prototypes in so we
  * can use IS_ENABLED().
  */
+extern const sys_call_ptr_t ia32_sys_call_table[];
+extern const sys_call_ptr_t x32_sys_call_table[];
+#endif
+
 extern long ia32_sys_call(const struct pt_regs *, unsigned int nr);
 extern long x32_sys_call(const struct pt_regs *, unsigned int nr);
 extern long x64_sys_call(const struct pt_regs *, unsigned int nr);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index ab18185894dfd..5fca46c78daf2 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1665,6 +1665,12 @@ static void __init bhi_select_mitigation(void)
  if (!IS_ENABLED(CONFIG_X86_64))
  return;

+ /*
+ * There's no HW mitigation in place.  Mark indirect branches as
+ * "not OK".
+ */
+ setup_clear_cpu_cap(X86_FEATURE_INDIRECT_BRANCH_OK);
+
  /* Mitigate KVM by default */
  setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT);
  pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n");
@@ -1679,6 +1685,28 @@ static void __init spectre_v2_select_mitigation(void)
  enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
  enum spectre_v2_mitigation mode = SPECTRE_V2_NONE;

+ /*
+ * X86_FEATURE_INDIRECT_BRANCH_OK indicates that indirect calls are
+ * "OK" to use due to (at least) one of the following being true:
+ *
+ *   - the CPU isn't vulnerable to Spectre v2, BHI, etc;
+ *
+ *   - a HW mitigation is in place (e.g., IBRS, eIBRS+BHI_DIS_S); or
+ *
+ *   - the user disabled mitigations.
+ *
+ * Clearing the bit enables certain indirect branch "easy targets" [*]
+ * to be converted to a series of direct branches.
+ *
+ * Assume innocence until proven guilty: set it now and clear it later
+ * if/when needed.
+ *
+ * [*] The closer the indirect branch is to kernel entry, and the more
+ *     user-controlled registers there are, the easier target it may be
+ *     for future Spectre v2 variants.
+ */
+ setup_force_cpu_cap(X86_FEATURE_INDIRECT_BRANCH_OK);
+
  /*
  * If the CPU is not affected and the command line mode is NONE or AUTO
  * then nothing to do.
@@ -1765,11 +1793,16 @@ static void __init spectre_v2_select_mitigation(void)
  break;

  case SPECTRE_V2_LFENCE:
+ setup_clear_cpu_cap(X86_FEATURE_INDIRECT_BRANCH_OK);
+ fallthrough;
  case SPECTRE_V2_EIBRS_LFENCE:
  setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE);
- fallthrough;
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+ break;

  case SPECTRE_V2_RETPOLINE:
+ setup_clear_cpu_cap(X86_FEATURE_INDIRECT_BRANCH_OK);
+ fallthrough;
  case SPECTRE_V2_EIBRS_RETPOLINE:
  setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
  break;
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index 89df5d89d6640..c7d4bf955d2ba 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -24,6 +24,7 @@
 #define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native)

 #define __SYSCALL(nr, sym) extern asmlinkage long sym(unsigned long,
unsigned long, unsigned long, unsigned long, unsigned long, unsigned
long);
+#define __SYSCALL_NORETURN __SYSCALL
 #include <asm/syscalls_32.h>

 #undef __SYSCALL
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c
index b0b4cfd2308c8..4760c40ae5cd0 100644
--- a/arch/x86/um/sys_call_table_64.c
+++ b/arch/x86/um/sys_call_table_64.c
@@ -19,6 +19,7 @@
 #define sys_ioperm sys_ni_syscall

 #define __SYSCALL(nr, sym) extern asmlinkage long sym(unsigned long,
unsigned long, unsigned long, unsigned long, unsigned long, unsigned
long);
+#define __SYSCALL_NORETURN __SYSCALL
 #include <asm/syscalls_64.h>

 #undef __SYSCALL
diff --git a/scripts/syscalltbl.sh b/scripts/syscalltbl.sh
index 6abe143889ef6..16487d47e06a3 100755
--- a/scripts/syscalltbl.sh
+++ b/scripts/syscalltbl.sh
@@ -54,7 +54,7 @@ nxt=0

 grep -E "^[0-9]+[[:space:]]+$abis" "$infile" | {

- while read nr abi name native compat ; do
+ while read nr abi name native compat noreturn; do

  if [ $nxt -gt $nr ]; then
  echo "error: $infile: syscall table is not sorted or duplicates the
same syscall number" >&2
@@ -66,7 +66,9 @@ grep -E "^[0-9]+[[:space:]]+$abis" "$infile" | {
  nxt=$((nxt + 1))
  done

- if [ -n "$compat" ]; then
+ if [ -n "$noreturn" ]; then
+ echo "__SYSCALL_NORETURN($nr, $native)"
+ elif [ -n "$compat" ]; then
  echo "__SYSCALL_WITH_COMPAT($nr, $native, $compat)"
  elif [ -n "$native" ]; then
  echo "__SYSCALL($nr, $native)"
diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h
index 7ebf29c911849..1e8141ef1b15d 100644
--- a/tools/objtool/noreturns.h
+++ b/tools/objtool/noreturns.h
@@ -7,12 +7,16 @@
  * Yes, this is unfortunate.  A better solution is in the works.
  */
 NORETURN(__fortify_panic)
+NORETURN(__ia32_sys_exit)
+NORETURN(__ia32_sys_exit_group)
 NORETURN(__kunit_abort)
 NORETURN(__module_put_and_kthread_exit)
 NORETURN(__reiserfs_panic)
 NORETURN(__stack_chk_fail)
 NORETURN(__tdx_hypercall_failed)
 NORETURN(__ubsan_handle_builtin_unreachable)
+NORETURN(__x64_sys_exit)
+NORETURN(__x64_sys_exit_group)
 NORETURN(arch_cpu_idle_dead)
 NORETURN(bch2_trans_in_restart_error)
 NORETURN(bch2_trans_restart_error)

On Sun, Apr 21, 2024 at 1:40 PM Paul McKenney <paulmckrcu@gmail.com> wrote:
>
> They apply fine as is, so I have started tests with that pair of patches.
>
>                                               Thanx, Paul
>
> On Sat, Apr 20, 2024 at 10:25 PM Josh Poimboeuf <jpoimboe@kernel.org> wrote:
> >
> > On Sat, Apr 20, 2024 at 06:58:58AM -0700, Paul E. McKenney wrote:
> > > On Fri, Apr 19, 2024 at 02:09:49PM -0700, Josh Poimboeuf wrote:
> > > > The direct-call syscall dispatch functions don't know that the exit()
> > > > and exit_group() syscall handlers don't return.  As a result the call
> > > > sites aren't optimized accordingly.
> > > >
> > > > Fix that by marking those exit syscall declarations as __noreturn.
> > > >
> > > > Fixes the following warnings:
> > > >
> > > >   vmlinux.o: warning: objtool: x64_sys_call+0x2804: __x64_sys_exit() is missing a __noreturn annotation
> > > >   vmlinux.o: warning: objtool: ia32_sys_call+0x29b6: __ia32_sys_exit_group() is missing a __noreturn annotation
> > > >
> > > > Fixes: 7390db8aea0d ("x86/bhi: Add support for clearing branch history at syscall entry")
> > > > Reported-by: "Paul E. McKenney" <paulmck@kernel.org>
> > > > Closes: https://lkml.kernel.org/lkml/6dba9b32-db2c-4e6d-9500-7a08852f17a3@paulmck-laptop
> > > > Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
> > >
> > > Looks good, but it does not apply on top of current -next and I don't
> > > trust myself to hand-apply it (something about having just got off of
> > > a flight across the big pond).
> > >
> > > Could you please let me know what else do I need to pull in to be able
> > > to cleanly apply this one?
> >
> > This patch has a dependency on an earlier patch in the set:
> >
> >   https://lkml.kernel.org/lkml/982d05a2f669140f26500bee643011896d661094.1713559768.git.jpoimboe@kernel.org
> >
> > Though I think it's not a hard dependency and I could reverse the order
> > of the patches if needed.
> >
> > --
> > Josh

[-- Attachment #2: diffs --]
[-- Type: application/octet-stream, Size: 11722 bytes --]

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 6de50b80702e6..9810ba2857a5c 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -49,7 +49,10 @@ static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
 
 	if (likely(unr < NR_syscalls)) {
 		unr = array_index_nospec(unr, NR_syscalls);
-		regs->ax = x64_sys_call(regs, unr);
+		if (likely(cpu_feature_enabled(X86_FEATURE_INDIRECT_BRANCH_OK)))
+			regs->ax = sys_call_table[unr](regs);
+		else
+			regs->ax = x64_sys_call(regs, unr);
 		return true;
 	}
 	return false;
@@ -66,7 +69,10 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
 
 	if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
 		xnr = array_index_nospec(xnr, X32_NR_syscalls);
-		regs->ax = x32_sys_call(regs, xnr);
+		if (likely(cpu_feature_enabled(X86_FEATURE_INDIRECT_BRANCH_OK)))
+			regs->ax = x32_sys_call_table[xnr](regs);
+		else
+			regs->ax = x32_sys_call(regs, xnr);
 		return true;
 	}
 	return false;
@@ -162,7 +168,10 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr)
 
 	if (likely(unr < IA32_NR_syscalls)) {
 		unr = array_index_nospec(unr, IA32_NR_syscalls);
-		regs->ax = ia32_sys_call(regs, unr);
+		if (likely(cpu_feature_enabled(X86_FEATURE_INDIRECT_BRANCH_OK)))
+			regs->ax = ia32_sys_call_table[unr](regs);
+		else
+			regs->ax = ia32_sys_call(regs, unr);
 	} else if (nr != -1) {
 		regs->ax = __ia32_sys_ni_syscall(regs);
 	}
diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
index c2235bae17ef6..aab31760b4e3e 100644
--- a/arch/x86/entry/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c
@@ -14,25 +14,16 @@
 #endif
 
 #define __SYSCALL(nr, sym) extern long __ia32_##sym(const struct pt_regs *);
-
 #include <asm/syscalls_32.h>
 #undef __SYSCALL
 
-/*
- * The sys_call_table[] is no longer used for system calls, but
- * kernel/trace/trace_syscalls.c still wants to know the system
- * call address.
- */
-#ifdef CONFIG_X86_32
 #define __SYSCALL(nr, sym) __ia32_##sym,
-const sys_call_ptr_t sys_call_table[] = {
+const sys_call_ptr_t ia32_sys_call_table[] = {
 #include <asm/syscalls_32.h>
 };
 #undef __SYSCALL
-#endif
 
 #define __SYSCALL(nr, sym) case nr: return __ia32_##sym(regs);
-
 long ia32_sys_call(const struct pt_regs *regs, unsigned int nr)
 {
 	switch (nr) {
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index 33b3f09e6f151..ff36a993a07e0 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -8,14 +8,13 @@
 #include <asm/syscall.h>
 
 #define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *);
+#define __SYSCALL_NORETURN(nr, sym) extern long __noreturn __x64_##sym(const struct pt_regs *);
 #include <asm/syscalls_64.h>
 #undef __SYSCALL
 
-/*
- * The sys_call_table[] is no longer used for system calls, but
- * kernel/trace/trace_syscalls.c still wants to know the system
- * call address.
- */
+#undef __SYSCALL_NORETURN
+#define __SYSCALL_NORETURN __SYSCALL
+
 #define __SYSCALL(nr, sym) __x64_##sym,
 const sys_call_ptr_t sys_call_table[] = {
 #include <asm/syscalls_64.h>
@@ -23,7 +22,6 @@ const sys_call_ptr_t sys_call_table[] = {
 #undef __SYSCALL
 
 #define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
-
 long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
 {
 	switch (nr) {
diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
index 03de4a9321318..4221ecce6e689 100644
--- a/arch/x86/entry/syscall_x32.c
+++ b/arch/x86/entry/syscall_x32.c
@@ -8,11 +8,20 @@
 #include <asm/syscall.h>
 
 #define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *);
+#define __SYSCALL_NORETURN(nr, sym) extern long __noreturn __x64_##sym(const struct pt_regs *);
 #include <asm/syscalls_x32.h>
 #undef __SYSCALL
 
-#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
+#undef __SYSCALL_NORETURN
+#define __SYSCALL_NORETURN __SYSCALL
+
+#define __SYSCALL(nr, sym) __x64_##sym,
+const sys_call_ptr_t x32_sys_call_table[] = {
+#include <asm/syscalls_x32.h>
+};
+#undef __SYSCALL
 
+#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
 long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
 {
 	switch (nr) {
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index a396f6e6ab5bf..7ec68d94eb593 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -2,7 +2,7 @@
 # 64-bit system call numbers and entry vectors
 #
 # The format is:
-# <number> <abi> <name> <entry point>
+# <number> <abi> <name> <entry point> [0 noreturn]
 #
 # The __x64_sys_*() stubs are created on-the-fly for sys_*() system calls
 #
@@ -68,7 +68,7 @@
 57	common	fork			sys_fork
 58	common	vfork			sys_vfork
 59	64	execve			sys_execve
-60	common	exit			sys_exit
+60	common	exit			sys_exit		0	noreturn
 61	common	wait4			sys_wait4
 62	common	kill			sys_kill
 63	common	uname			sys_newuname
@@ -239,7 +239,7 @@
 228	common	clock_gettime		sys_clock_gettime
 229	common	clock_getres		sys_clock_getres
 230	common	clock_nanosleep		sys_clock_nanosleep
-231	common	exit_group		sys_exit_group
+231	common	exit_group		sys_exit_group		0	noreturn
 232	common	epoll_wait		sys_epoll_wait
 233	common	epoll_ctl		sys_epoll_ctl
 234	common	tgkill			sys_tgkill
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 3c7434329661c..d64b0a5291f10 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -470,6 +470,7 @@
 #define X86_FEATURE_BHI_CTRL		(21*32+ 2) /* "" BHI_DIS_S HW control available */
 #define X86_FEATURE_CLEAR_BHB_HW	(21*32+ 3) /* "" BHI_DIS_S HW control enabled */
 #define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */
+#define X86_FEATURE_INDIRECT_BRANCH_OK	(21*32+ 5) /* "" It's OK to use indirect branches */
 
 /*
  * BUG word(s)
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 2fc7bc3863ff6..dfb59521244c2 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -16,14 +16,20 @@
 #include <asm/thread_info.h>	/* for TS_COMPAT */
 #include <asm/unistd.h>
 
-/* This is used purely for kernel/trace/trace_syscalls.c */
 typedef long (*sys_call_ptr_t)(const struct pt_regs *);
 extern const sys_call_ptr_t sys_call_table[];
 
+#if defined(CONFIG_X86_32)
+#define ia32_sys_call_table sys_call_table
+#else
 /*
  * These may not exist, but still put the prototypes in so we
  * can use IS_ENABLED().
  */
+extern const sys_call_ptr_t ia32_sys_call_table[];
+extern const sys_call_ptr_t x32_sys_call_table[];
+#endif
+
 extern long ia32_sys_call(const struct pt_regs *, unsigned int nr);
 extern long x32_sys_call(const struct pt_regs *, unsigned int nr);
 extern long x64_sys_call(const struct pt_regs *, unsigned int nr);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index ab18185894dfd..5fca46c78daf2 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1665,6 +1665,12 @@ static void __init bhi_select_mitigation(void)
 	if (!IS_ENABLED(CONFIG_X86_64))
 		return;
 
+	/*
+	 * There's no HW mitigation in place.  Mark indirect branches as
+	 * "not OK".
+	 */
+	setup_clear_cpu_cap(X86_FEATURE_INDIRECT_BRANCH_OK);
+
 	/* Mitigate KVM by default */
 	setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT);
 	pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n");
@@ -1679,6 +1685,28 @@ static void __init spectre_v2_select_mitigation(void)
 	enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
 	enum spectre_v2_mitigation mode = SPECTRE_V2_NONE;
 
+	/*
+	 * X86_FEATURE_INDIRECT_BRANCH_OK indicates that indirect calls are
+	 * "OK" to use due to (at least) one of the following being true:
+	 *
+	 *   - the CPU isn't vulnerable to Spectre v2, BHI, etc;
+	 *
+	 *   - a HW mitigation is in place (e.g., IBRS, eIBRS+BHI_DIS_S); or
+	 *
+	 *   - the user disabled mitigations.
+	 *
+	 * Clearing the bit enables certain indirect branch "easy targets" [*]
+	 * to be converted to a series of direct branches.
+	 *
+	 * Assume innocence until proven guilty: set it now and clear it later
+	 * if/when needed.
+	 *
+	 * [*] The closer the indirect branch is to kernel entry, and the more
+	 *     user-controlled registers there are, the easier target it may be
+	 *     for future Spectre v2 variants.
+	 */
+	setup_force_cpu_cap(X86_FEATURE_INDIRECT_BRANCH_OK);
+
 	/*
 	 * If the CPU is not affected and the command line mode is NONE or AUTO
 	 * then nothing to do.
@@ -1765,11 +1793,16 @@ static void __init spectre_v2_select_mitigation(void)
 		break;
 
 	case SPECTRE_V2_LFENCE:
+		setup_clear_cpu_cap(X86_FEATURE_INDIRECT_BRANCH_OK);
+		fallthrough;
 	case SPECTRE_V2_EIBRS_LFENCE:
 		setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE);
-		fallthrough;
+		setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+		break;
 
 	case SPECTRE_V2_RETPOLINE:
+		setup_clear_cpu_cap(X86_FEATURE_INDIRECT_BRANCH_OK);
+		fallthrough;
 	case SPECTRE_V2_EIBRS_RETPOLINE:
 		setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
 		break;
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index 89df5d89d6640..c7d4bf955d2ba 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -24,6 +24,7 @@
 #define __SYSCALL_WITH_COMPAT(nr, native, compat)	__SYSCALL(nr, native)
 
 #define __SYSCALL(nr, sym) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
+#define __SYSCALL_NORETURN __SYSCALL
 #include <asm/syscalls_32.h>
 
 #undef __SYSCALL
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c
index b0b4cfd2308c8..4760c40ae5cd0 100644
--- a/arch/x86/um/sys_call_table_64.c
+++ b/arch/x86/um/sys_call_table_64.c
@@ -19,6 +19,7 @@
 #define sys_ioperm sys_ni_syscall
 
 #define __SYSCALL(nr, sym) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
+#define __SYSCALL_NORETURN __SYSCALL
 #include <asm/syscalls_64.h>
 
 #undef __SYSCALL
diff --git a/scripts/syscalltbl.sh b/scripts/syscalltbl.sh
index 6abe143889ef6..16487d47e06a3 100755
--- a/scripts/syscalltbl.sh
+++ b/scripts/syscalltbl.sh
@@ -54,7 +54,7 @@ nxt=0
 
 grep -E "^[0-9]+[[:space:]]+$abis" "$infile" | {
 
-	while read nr abi name native compat ; do
+	while read nr abi name native compat noreturn; do
 
 		if [ $nxt -gt $nr ]; then
 			echo "error: $infile: syscall table is not sorted or duplicates the same syscall number" >&2
@@ -66,7 +66,9 @@ grep -E "^[0-9]+[[:space:]]+$abis" "$infile" | {
 			nxt=$((nxt + 1))
 		done
 
-		if [ -n "$compat" ]; then
+		if [ -n "$noreturn" ]; then
+			echo "__SYSCALL_NORETURN($nr, $native)"
+		elif [ -n "$compat" ]; then
 			echo "__SYSCALL_WITH_COMPAT($nr, $native, $compat)"
 		elif [ -n "$native" ]; then
 			echo "__SYSCALL($nr, $native)"
diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h
index 7ebf29c911849..1e8141ef1b15d 100644
--- a/tools/objtool/noreturns.h
+++ b/tools/objtool/noreturns.h
@@ -7,12 +7,16 @@
  * Yes, this is unfortunate.  A better solution is in the works.
  */
 NORETURN(__fortify_panic)
+NORETURN(__ia32_sys_exit)
+NORETURN(__ia32_sys_exit_group)
 NORETURN(__kunit_abort)
 NORETURN(__module_put_and_kthread_exit)
 NORETURN(__reiserfs_panic)
 NORETURN(__stack_chk_fail)
 NORETURN(__tdx_hypercall_failed)
 NORETURN(__ubsan_handle_builtin_unreachable)
+NORETURN(__x64_sys_exit)
+NORETURN(__x64_sys_exit_group)
 NORETURN(arch_cpu_idle_dead)
 NORETURN(bch2_trans_in_restart_error)
 NORETURN(bch2_trans_restart_error)

  reply	other threads:[~2024-04-21 21:47 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-19 21:09 [PATCH 0/5] x86/bugs: more BHI fixes Josh Poimboeuf
2024-04-19 21:09 ` [PATCH v4 1/5] x86/bugs: Only harden syscalls when needed Josh Poimboeuf
2024-04-22  8:09   ` Yujie Liu
2024-05-07  5:17     ` Josh Poimboeuf
2024-04-19 21:09 ` [PATCH v4 2/5] cpu/speculation: Fix CPU mitigation defaults for !x86 Josh Poimboeuf
2024-04-20  0:09   ` Sean Christopherson
2024-04-23 14:10     ` Sean Christopherson
2024-04-24  5:35       ` Josh Poimboeuf
2024-04-19 21:09 ` [PATCH v4 3/5] x86/syscall: Mark exit[_group] syscall handlers __noreturn Josh Poimboeuf
2024-04-20 13:58   ` Paul E. McKenney
2024-04-21  5:25     ` Josh Poimboeuf
2024-04-21 20:40       ` Paul McKenney
2024-04-21 21:47         ` Paul McKenney [this message]
2024-05-02 23:48           ` Paul McKenney
2024-05-03 15:38             ` Paul E. McKenney
2024-05-03 19:56             ` Josh Poimboeuf
2024-05-03 20:44               ` Josh Poimboeuf
2024-05-03 23:33                 ` Paul E. McKenney
2024-05-03 23:48                   ` Josh Poimboeuf
2024-05-04 16:48                     ` Paul E. McKenney
2024-04-19 21:09 ` [PATCH v4 4/5] x86/bugs: Remove duplicate Spectre cmdline option descriptions Josh Poimboeuf
2024-04-19 21:09 ` [PATCH v4 5/5] x86/bugs: Add 'spectre_bhi=vmexit' cmdline option Josh Poimboeuf
2024-04-19 21:46   ` Josh Poimboeuf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAJzB8QFx344hSSYy4jigtmQX+KfSpFOn+18WAfZAeym5LUMoKg@mail.gmail.com \
    --to=paulmckrcu@gmail.com \
    --cc=alexandre.chartre@oracle.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=bp@alien8.de \
    --cc=daniel.sneddon@linux.intel.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=jpoimboe@kernel.org \
    --cc=konrad.wilk@oracle.com \
    --cc=kpsingh@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=longman@redhat.com \
    --cc=mingo@kernel.org \
    --cc=nik.borisov@suse.com \
    --cc=paulmck@kernel.org \
    --cc=pawan.kumar.gupta@linux.intel.com \
    --cc=peterz@infradead.org \
    --cc=seanjc@google.com \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.