LKML Archive mirror
 help / color / mirror / Atom feed
From: Denys Vlasenko <dvlasenk@redhat.com>
To: Ingo Molnar <mingo@kernel.org>
Cc: Denys Vlasenko <dvlasenk@redhat.com>, Thomas Graf <tgraf@suug.ch>,
	Peter Zijlstra <peterz@infradead.org>,
	David Rientjes <rientjes@google.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	linux-kernel@vger.kernel.org
Subject: [PATCH] force inlining of some byteswap operations
Date: Thu,  4 Feb 2016 20:45:35 +0100	[thread overview]
Message-ID: <1454615136-32308-2-git-send-email-dvlasenk@redhat.com> (raw)
In-Reply-To: <1454615136-32308-1-git-send-email-dvlasenk@redhat.com>

Sometimes gcc mysteriously doesn't inline
very small functions we expect to be inlined. See
    https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66122

With this .config:
http://busybox.net/~vda/kernel_config_OPTIMIZE_INLINING_and_Os,
the following functions get deinlined many times.
Examples of disassembly:

<get_unaligned_be16> (12 copies, 51 calls):
       66 8b 07                mov    (%rdi),%ax
       55                      push   %rbp
       48 89 e5                mov    %rsp,%rbp
       86 e0                   xchg   %ah,%al
       5d                      pop    %rbp
       c3                      retq

<get_unaligned_be32> (12 copies, 135 calls):
       8b 07                   mov    (%rdi),%eax
       55                      push   %rbp
       48 89 e5                mov    %rsp,%rbp
       0f c8                   bswap  %eax
       5d                      pop    %rbp
       c3                      retq

<get_unaligned_be64> (2 copies, 20 calls):
       48 8b 07                mov    (%rdi),%rax
       55                      push   %rbp
       48 89 e5                mov    %rsp,%rbp
       48 0f c8                bswap  %rax
       5d                      pop    %rbp
       c3                      retq

<__swab16p> (16 copies, 146 calls):
       55                      push   %rbp
       89 f8                   mov    %edi,%eax
       86 e0                   xchg   %ah,%al
       48 89 e5                mov    %rsp,%rbp
       5d                      pop    %rbp
       c3                      retq

<__swab32p> (43 copies, ~560 calls):
       55                      push   %rbp
       89 f8                   mov    %edi,%eax
       0f c8                   bswap  %eax
       48 89 e5                mov    %rsp,%rbp
       5d                      pop    %rbp
       c3                      retq

<__swab64p> (21 copies, 119 calls):
       55                      push   %rbp
       48 89 f8                mov    %rdi,%rax
       48 0f c8                bswap  %rax
       48 89 e5                mov    %rsp,%rbp
       5d                      pop    %rbp
       c3                      retq

<__swab32s> (6 copies, 47 calls):
       8b 07                   mov    (%rdi),%eax
       55                      push   %rbp
       48 89 e5                mov    %rsp,%rbp
       0f c8                   bswap  %eax
       89 07                   mov    %eax,(%rdi)
       5d                      pop    %rbp
       c3                      retq

This patch fixes this via s/inline/__always_inline/.
Code size decrease after the patch is ~4.5k:

    text     data      bss       dec     hex filename
92202377 20826112 36417536 149446025 8e85d89 vmlinux
92197848 20826112 36417536 149441496 8e84bd8 vmlinux5_swap_after

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org
---
 include/uapi/linux/byteorder/big_endian.h    | 24 ++++++++++++------------
 include/uapi/linux/byteorder/little_endian.h | 24 ++++++++++++------------
 include/uapi/linux/swab.h                    | 10 +++++-----
 3 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/include/uapi/linux/byteorder/big_endian.h b/include/uapi/linux/byteorder/big_endian.h
index 6723744..cdab17a 100644
--- a/include/uapi/linux/byteorder/big_endian.h
+++ b/include/uapi/linux/byteorder/big_endian.h
@@ -40,51 +40,51 @@
 #define __cpu_to_be16(x) ((__force __be16)(__u16)(x))
 #define __be16_to_cpu(x) ((__force __u16)(__be16)(x))
 
-static inline __le64 __cpu_to_le64p(const __u64 *p)
+static __always_inline __le64 __cpu_to_le64p(const __u64 *p)
 {
 	return (__force __le64)__swab64p(p);
 }
-static inline __u64 __le64_to_cpup(const __le64 *p)
+static __always_inline __u64 __le64_to_cpup(const __le64 *p)
 {
 	return __swab64p((__u64 *)p);
 }
-static inline __le32 __cpu_to_le32p(const __u32 *p)
+static __always_inline __le32 __cpu_to_le32p(const __u32 *p)
 {
 	return (__force __le32)__swab32p(p);
 }
-static inline __u32 __le32_to_cpup(const __le32 *p)
+static __always_inline __u32 __le32_to_cpup(const __le32 *p)
 {
 	return __swab32p((__u32 *)p);
 }
-static inline __le16 __cpu_to_le16p(const __u16 *p)
+static __always_inline __le16 __cpu_to_le16p(const __u16 *p)
 {
 	return (__force __le16)__swab16p(p);
 }
-static inline __u16 __le16_to_cpup(const __le16 *p)
+static __always_inline __u16 __le16_to_cpup(const __le16 *p)
 {
 	return __swab16p((__u16 *)p);
 }
-static inline __be64 __cpu_to_be64p(const __u64 *p)
+static __always_inline __be64 __cpu_to_be64p(const __u64 *p)
 {
 	return (__force __be64)*p;
 }
-static inline __u64 __be64_to_cpup(const __be64 *p)
+static __always_inline __u64 __be64_to_cpup(const __be64 *p)
 {
 	return (__force __u64)*p;
 }
-static inline __be32 __cpu_to_be32p(const __u32 *p)
+static __always_inline __be32 __cpu_to_be32p(const __u32 *p)
 {
 	return (__force __be32)*p;
 }
-static inline __u32 __be32_to_cpup(const __be32 *p)
+static __always_inline __u32 __be32_to_cpup(const __be32 *p)
 {
 	return (__force __u32)*p;
 }
-static inline __be16 __cpu_to_be16p(const __u16 *p)
+static __always_inline __be16 __cpu_to_be16p(const __u16 *p)
 {
 	return (__force __be16)*p;
 }
-static inline __u16 __be16_to_cpup(const __be16 *p)
+static __always_inline __u16 __be16_to_cpup(const __be16 *p)
 {
 	return (__force __u16)*p;
 }
diff --git a/include/uapi/linux/byteorder/little_endian.h b/include/uapi/linux/byteorder/little_endian.h
index d876736..4b93f2b 100644
--- a/include/uapi/linux/byteorder/little_endian.h
+++ b/include/uapi/linux/byteorder/little_endian.h
@@ -40,51 +40,51 @@
 #define __cpu_to_be16(x) ((__force __be16)__swab16((x)))
 #define __be16_to_cpu(x) __swab16((__force __u16)(__be16)(x))
 
-static inline __le64 __cpu_to_le64p(const __u64 *p)
+static __always_inline __le64 __cpu_to_le64p(const __u64 *p)
 {
 	return (__force __le64)*p;
 }
-static inline __u64 __le64_to_cpup(const __le64 *p)
+static __always_inline __u64 __le64_to_cpup(const __le64 *p)
 {
 	return (__force __u64)*p;
 }
-static inline __le32 __cpu_to_le32p(const __u32 *p)
+static __always_inline __le32 __cpu_to_le32p(const __u32 *p)
 {
 	return (__force __le32)*p;
 }
-static inline __u32 __le32_to_cpup(const __le32 *p)
+static __always_inline __u32 __le32_to_cpup(const __le32 *p)
 {
 	return (__force __u32)*p;
 }
-static inline __le16 __cpu_to_le16p(const __u16 *p)
+static __always_inline __le16 __cpu_to_le16p(const __u16 *p)
 {
 	return (__force __le16)*p;
 }
-static inline __u16 __le16_to_cpup(const __le16 *p)
+static __always_inline __u16 __le16_to_cpup(const __le16 *p)
 {
 	return (__force __u16)*p;
 }
-static inline __be64 __cpu_to_be64p(const __u64 *p)
+static __always_inline __be64 __cpu_to_be64p(const __u64 *p)
 {
 	return (__force __be64)__swab64p(p);
 }
-static inline __u64 __be64_to_cpup(const __be64 *p)
+static __always_inline __u64 __be64_to_cpup(const __be64 *p)
 {
 	return __swab64p((__u64 *)p);
 }
-static inline __be32 __cpu_to_be32p(const __u32 *p)
+static __always_inline __be32 __cpu_to_be32p(const __u32 *p)
 {
 	return (__force __be32)__swab32p(p);
 }
-static inline __u32 __be32_to_cpup(const __be32 *p)
+static __always_inline __u32 __be32_to_cpup(const __be32 *p)
 {
 	return __swab32p((__u32 *)p);
 }
-static inline __be16 __cpu_to_be16p(const __u16 *p)
+static __always_inline __be16 __cpu_to_be16p(const __u16 *p)
 {
 	return (__force __be16)__swab16p(p);
 }
-static inline __u16 __be16_to_cpup(const __be16 *p)
+static __always_inline __u16 __be16_to_cpup(const __be16 *p)
 {
 	return __swab16p((__u16 *)p);
 }
diff --git a/include/uapi/linux/swab.h b/include/uapi/linux/swab.h
index 0e011eb..3f10e53 100644
--- a/include/uapi/linux/swab.h
+++ b/include/uapi/linux/swab.h
@@ -151,7 +151,7 @@ static inline __attribute_const__ __u32 __fswahb32(__u32 val)
  * __swab16p - return a byteswapped 16-bit value from a pointer
  * @p: pointer to a naturally-aligned 16-bit value
  */
-static inline __u16 __swab16p(const __u16 *p)
+static __always_inline __u16 __swab16p(const __u16 *p)
 {
 #ifdef __arch_swab16p
 	return __arch_swab16p(p);
@@ -164,7 +164,7 @@ static inline __u16 __swab16p(const __u16 *p)
  * __swab32p - return a byteswapped 32-bit value from a pointer
  * @p: pointer to a naturally-aligned 32-bit value
  */
-static inline __u32 __swab32p(const __u32 *p)
+static __always_inline __u32 __swab32p(const __u32 *p)
 {
 #ifdef __arch_swab32p
 	return __arch_swab32p(p);
@@ -177,7 +177,7 @@ static inline __u32 __swab32p(const __u32 *p)
  * __swab64p - return a byteswapped 64-bit value from a pointer
  * @p: pointer to a naturally-aligned 64-bit value
  */
-static inline __u64 __swab64p(const __u64 *p)
+static __always_inline __u64 __swab64p(const __u64 *p)
 {
 #ifdef __arch_swab64p
 	return __arch_swab64p(p);
@@ -232,7 +232,7 @@ static inline void __swab16s(__u16 *p)
  * __swab32s - byteswap a 32-bit value in-place
  * @p: pointer to a naturally-aligned 32-bit value
  */
-static inline void __swab32s(__u32 *p)
+static __always_inline void __swab32s(__u32 *p)
 {
 #ifdef __arch_swab32s
 	__arch_swab32s(p);
@@ -245,7 +245,7 @@ static inline void __swab32s(__u32 *p)
  * __swab64s - byteswap a 64-bit value in-place
  * @p: pointer to a naturally-aligned 64-bit value
  */
-static inline void __swab64s(__u64 *p)
+static __always_inline void __swab64s(__u64 *p)
 {
 #ifdef __arch_swab64s
 	__arch_swab64s(p);
-- 
1.8.1.4

  reply	other threads:[~2016-02-04 19:45 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-02-04 19:45 [PATCH] asm-generic: force inlining of some atomic_long operations Denys Vlasenko
2016-02-04 19:45 ` Denys Vlasenko [this message]
2016-02-05  7:28   ` [PATCH] force inlining of some byteswap operations Ingo Molnar
2016-04-13  3:36   ` This patch triggers a bad gcc bug (was Re: [PATCH] force inlining of some byteswap operations) Josh Poimboeuf
2016-04-13 12:12     ` Denys Vlasenko
2016-04-13 12:36       ` Josh Poimboeuf
2016-04-13 15:15         ` Josh Poimboeuf
2016-04-13 16:55           ` James Bottomley
2016-04-13 17:10             ` Josh Poimboeuf
2016-04-14 15:29               ` Denys Vlasenko
2016-04-14 15:57                 ` Josh Poimboeuf
2016-04-14 17:09                   ` Denys Vlasenko
2016-04-15  5:45                     ` Ingo Molnar
2016-04-15 13:47                       ` Josh Poimboeuf
2016-04-15 22:20                         ` Josh Poimboeuf
2016-04-16  9:03                           ` Ingo Molnar
2016-04-18 13:39                             ` Josh Poimboeuf
2016-04-18 14:07                               ` Arnd Bergmann
2016-04-18 14:12                                 ` Josh Poimboeuf
2016-04-18 14:21                                   ` Arnd Bergmann
2016-04-19  8:52                               ` Ingo Molnar
2016-04-19 13:56                                 ` [PATCH] scsi: fc: force inlining of wwn conversion functions Josh Poimboeuf
2016-04-22 23:17                                   ` Quinn Tran
2016-04-25 16:07                                   ` Josh Poimboeuf
2016-04-26  2:40                                     ` Martin K. Petersen
2016-04-26  3:37                                       ` James Bottomley
2016-04-26  7:22                                         ` Arnd Bergmann
2016-04-26  8:35                                           ` Christoph Hellwig
2016-04-26 10:05                                             ` Arnd Bergmann
2016-04-26 13:06                                           ` Martin K. Petersen
2016-04-26 15:58                                             ` Arnd Bergmann
2016-04-26 22:36                                               ` James Bottomley
2016-04-27  0:44                                                 ` Martin K. Petersen
2016-04-27 11:05                                               ` Martin Jambor
2016-04-27 21:34                                                 ` Arnd Bergmann
2016-04-28 14:58                                                   ` Chris Metcalf
2016-04-28 15:23                                                     ` Arnd Bergmann
2016-04-28 15:48                                                       ` Chris Metcalf
2016-04-27 22:00                                                 ` [PATCH, RFT] byteswap: try to avoid __builtin_constant_p gcc bug Arnd Bergmann
2016-04-27 22:11                                                   ` Josh Poimboeuf
2016-04-28 16:27                                                     ` Quinn Tran
2016-04-16  7:42                       ` This patch triggers a bad gcc bug (was Re: [PATCH] force inlining of some byteswap operations) Arnd Bergmann
2016-04-18 13:22                         ` Josh Poimboeuf
2016-02-04 19:45 ` [PATCH] force inlining of unaligned byteswap operations Denys Vlasenko
2016-02-05  7:28   ` Ingo Molnar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1454615136-32308-2-git-send-email-dvlasenk@redhat.com \
    --to=dvlasenk@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=rientjes@google.com \
    --cc=tgraf@suug.ch \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).