From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751534AbcBEGlO (ORCPT ); Fri, 5 Feb 2016 01:41:14 -0500 Received: from mail4.hitachi.co.jp ([133.145.228.5]:43877 "EHLO mail4.hitachi.co.jp" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751449AbcBEGlL (ORCPT ); Fri, 5 Feb 2016 01:41:11 -0500 From: =?utf-8?B?5bmz5p2+6ZuF5bezIC8gSElSQU1BVFXvvIxNQVNBTUk=?= To: "'Rusty Russell'" , "linux-kernel@vger.kernel.org" CC: Weilong Chen , "\"PDate:Wed\"@ozlabs.org" <"PDate:Wed"@ozlabs.org>, "3@ozlabs.org" <3@ozlabs.org>, "Feb@ozlabs.org" , "2016@ozlabs.org" <2016@ozlabs.org>, "\"16:55:26\"@ozlabs.org" <"16:55:26"@ozlabs.org>, "+1030@ozlabs.org" <+1030@ozlabs.org>, "stable@kernel.org" Subject: RE: [PATCH 3/3] modules: fix longstanding /proc/kallsyms vs module insertion race. Thread-Topic: [!][PATCH 3/3] modules: fix longstanding /proc/kallsyms vs module insertion race. Thread-Index: AQHRX6ozruhIxDUtLESX3nNFOIXaMZ8cqpHw Date: Fri, 5 Feb 2016 06:41:06 +0000 Message-ID: <50399556C9727B4D88A595C8584AAB37B4DDD5AF@GSjpTKYDCembx31.service.hitachi.net> References: <1454631253-14379-1-git-send-email-rusty@rustcorp.com.au> <1454631253-14379-4-git-send-email-rusty@rustcorp.com.au> In-Reply-To: <1454631253-14379-4-git-send-email-rusty@rustcorp.com.au> Accept-Language: ja-JP, en-US Content-Language: ja-JP X-MS-Has-Attach: X-MS-TNEF-Correlator: x-originating-ip: [10.198.220.63] Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: 8bit X-MIME-Autoconverted: from base64 to 8bit by mail.home.local id u156fRqx012711 From: Rusty Russell [mailto:rusty@rustcorp.com.au] > >For CONFIG_KALLSYMS, we keep two symbol tables and two string tables. >There's one full copy, marked SHF_ALLOC and laid out at the end of the >module's init section. There's also a cut-down version that only >contains core symbols and strings, and lives in the module's core >section. > >After module init (and before we free the module memory), we switch >the mod->symtab, mod->num_symtab and mod->strtab to point to the core >versions. We do this under the module_mutex. > >However, kallsyms doesn't take the module_mutex: it uses >preempt_disable() and rcu tricks to walk through the modules, because >it's used in the oops path. It's also used in /proc/kallsyms. >There's nothing atomic about the change of these variables, so we can >get the old (larger!) num_symtab and the new symtab pointer; in fact >this is what I saw when trying to reproduce. > >By grouping these variables together, we can use a >carefully-dereferenced pointer to ensure we always get one or the >other (the free of the module init section is already done in an RCU >callback, so that's safe). We allocate the init one at the end of the >module init section, and keep the core one inside the struct module >itself (it could also have been allocated at the end of the module >core, but that's probably overkill). > >Reported-by: Weilong Chen >Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=111541 >Cc: stable@kernel.org >Signed-off-by: Rusty Russell Looks good to me. Reviewed-by: Masami Hiramatsu Thanks! >--- > include/linux/module.h | 19 +++++---- > kernel/module.c | 112 ++++++++++++++++++++++++++++++------------------- > 2 files changed, 79 insertions(+), 52 deletions(-) > >diff --git a/include/linux/module.h b/include/linux/module.h >index 4560d8f1545d..2bb0c3085706 100644 >--- a/include/linux/module.h >+++ b/include/linux/module.h >@@ -324,6 +324,12 @@ struct module_layout { > #define __module_layout_align > #endif > >+struct mod_kallsyms { >+ Elf_Sym *symtab; >+ unsigned int num_symtab; >+ char *strtab; >+}; >+ > struct module { > enum module_state state; > >@@ -405,15 +411,10 @@ struct module { > #endif > > #ifdef CONFIG_KALLSYMS >- /* >- * We keep the symbol and string tables for kallsyms. >- * The core_* fields below are temporary, loader-only (they >- * could really be discarded after module init). >- */ >- Elf_Sym *symtab, *core_symtab; >- unsigned int num_symtab, core_num_syms; >- char *strtab, *core_strtab; >- >+ /* Protected by RCU and/or module_mutex: use rcu_dereference() */ >+ struct mod_kallsyms *kallsyms; >+ struct mod_kallsyms core_kallsyms; >+ > /* Section attributes */ > struct module_sect_attrs *sect_attrs; > >diff --git a/kernel/module.c b/kernel/module.c >index 1e79d8157712..9537da37ce87 100644 >--- a/kernel/module.c >+++ b/kernel/module.c >@@ -303,6 +303,9 @@ struct load_info { > struct _ddebug *debug; > unsigned int num_debug; > bool sig_ok; >+#ifdef CONFIG_KALLSYMS >+ unsigned long mod_kallsyms_init_off; >+#endif > struct { > unsigned int sym, str, mod, vers, info, pcpu; > } index; >@@ -2480,10 +2483,21 @@ static void layout_symtab(struct module *mod, struct load_info *info) > strsect->sh_flags |= SHF_ALLOC; > strsect->sh_entsize = get_offset(mod, &mod->init_layout.size, strsect, > info->index.str) | INIT_OFFSET_MASK; >- mod->init_layout.size = debug_align(mod->init_layout.size); > pr_debug("\t%s\n", info->secstrings + strsect->sh_name); >+ >+ /* We'll tack temporary mod_kallsyms on the end. */ >+ mod->init_layout.size = ALIGN(mod->init_layout.size, >+ __alignof__(struct mod_kallsyms)); >+ info->mod_kallsyms_init_off = mod->init_layout.size; >+ mod->init_layout.size += sizeof(struct mod_kallsyms); >+ mod->init_layout.size = debug_align(mod->init_layout.size); > } > >+/* >+ * We use the full symtab and strtab which layout_symtab arranged to >+ * be appended to the init section. Later we switch to the cut-down >+ * core-only ones. >+ */ > static void add_kallsyms(struct module *mod, const struct load_info *info) > { > unsigned int i, ndst; >@@ -2492,29 +2506,34 @@ static void add_kallsyms(struct module *mod, const struct load_info *info) > char *s; > Elf_Shdr *symsec = &info->sechdrs[info->index.sym]; > >- mod->symtab = (void *)symsec->sh_addr; >- mod->num_symtab = symsec->sh_size / sizeof(Elf_Sym); >+ /* Set up to point into init section. */ >+ mod->kallsyms = mod->init_layout.base + info->mod_kallsyms_init_off; >+ >+ mod->kallsyms->symtab = (void *)symsec->sh_addr; >+ mod->kallsyms->num_symtab = symsec->sh_size / sizeof(Elf_Sym); > /* Make sure we get permanent strtab: don't use info->strtab. */ >- mod->strtab = (void *)info->sechdrs[info->index.str].sh_addr; >+ mod->kallsyms->strtab = (void *)info->sechdrs[info->index.str].sh_addr; > > /* Set types up while we still have access to sections. */ >- for (i = 0; i < mod->num_symtab; i++) >- mod->symtab[i].st_info = elf_type(&mod->symtab[i], info); >- >- mod->core_symtab = dst = mod->core_layout.base + info->symoffs; >- mod->core_strtab = s = mod->core_layout.base + info->stroffs; >- src = mod->symtab; >- for (ndst = i = 0; i < mod->num_symtab; i++) { >+ for (i = 0; i < mod->kallsyms->num_symtab; i++) >+ mod->kallsyms->symtab[i].st_info >+ = elf_type(&mod->kallsyms->symtab[i], info); >+ >+ /* Now populate the cut down core kallsyms for after init. */ >+ mod->core_kallsyms.symtab = dst = mod->core_layout.base + info->symoffs; >+ mod->core_kallsyms.strtab = s = mod->core_layout.base + info->stroffs; >+ src = mod->kallsyms->symtab; >+ for (ndst = i = 0; i < mod->kallsyms->num_symtab; i++) { > if (i == 0 || > is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum, > info->index.pcpu)) { > dst[ndst] = src[i]; >- dst[ndst++].st_name = s - mod->core_strtab; >- s += strlcpy(s, &mod->strtab[src[i].st_name], >+ dst[ndst++].st_name = s - mod->core_kallsyms.strtab; >+ s += strlcpy(s, &mod->kallsyms->strtab[src[i].st_name], > KSYM_NAME_LEN) + 1; > } > } >- mod->core_num_syms = ndst; >+ mod->core_kallsyms.num_symtab = ndst; > } > #else > static inline void layout_symtab(struct module *mod, struct load_info *info) >@@ -3263,9 +3282,8 @@ static noinline int do_init_module(struct module *mod) > module_put(mod); > trim_init_extable(mod); > #ifdef CONFIG_KALLSYMS >- mod->num_symtab = mod->core_num_syms; >- mod->symtab = mod->core_symtab; >- mod->strtab = mod->core_strtab; >+ /* Switch to core kallsyms now init is done: kallsyms may be walking! */ >+ rcu_assign_pointer(mod->kallsyms, &mod->core_kallsyms); > #endif > mod_tree_remove_init(mod); > disable_ro_nx(&mod->init_layout); >@@ -3627,9 +3645,9 @@ static inline int is_arm_mapping_symbol(const char *str) > && (str[2] == '\0' || str[2] == '.'); > } > >-static const char *symname(struct module *mod, unsigned int symnum) >+static const char *symname(struct mod_kallsyms *kallsyms, unsigned int symnum) > { >- return mod->strtab + mod->symtab[symnum].st_name; >+ return kallsyms->strtab + kallsyms->symtab[symnum].st_name; > } > > static const char *get_ksymbol(struct module *mod, >@@ -3639,6 +3657,7 @@ static const char *get_ksymbol(struct module *mod, > { > unsigned int i, best = 0; > unsigned long nextval; >+ struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms); > > /* At worse, next value is at end of module */ > if (within_module_init(addr, mod)) >@@ -3648,32 +3667,32 @@ static const char *get_ksymbol(struct module *mod, > > /* Scan for closest preceding symbol, and next symbol. (ELF > starts real symbols at 1). */ >- for (i = 1; i < mod->num_symtab; i++) { >- if (mod->symtab[i].st_shndx == SHN_UNDEF) >+ for (i = 1; i < kallsyms->num_symtab; i++) { >+ if (kallsyms->symtab[i].st_shndx == SHN_UNDEF) > continue; > > /* We ignore unnamed symbols: they're uninformative > * and inserted at a whim. */ >- if (*symname(mod, i) == '\0' >- || is_arm_mapping_symbol(symname(mod, i))) >+ if (*symname(kallsyms, i) == '\0' >+ || is_arm_mapping_symbol(symname(kallsyms, i))) > continue; > >- if (mod->symtab[i].st_value <= addr >- && mod->symtab[i].st_value > mod->symtab[best].st_value) >+ if (kallsyms->symtab[i].st_value <= addr >+ && kallsyms->symtab[i].st_value > kallsyms->symtab[best].st_value) > best = i; >- if (mod->symtab[i].st_value > addr >- && mod->symtab[i].st_value < nextval) >- nextval = mod->symtab[i].st_value; >+ if (kallsyms->symtab[i].st_value > addr >+ && kallsyms->symtab[i].st_value < nextval) >+ nextval = kallsyms->symtab[i].st_value; > } > > if (!best) > return NULL; > > if (size) >- *size = nextval - mod->symtab[best].st_value; >+ *size = nextval - kallsyms->symtab[best].st_value; > if (offset) >- *offset = addr - mod->symtab[best].st_value; >- return symname(mod, best); >+ *offset = addr - kallsyms->symtab[best].st_value; >+ return symname(kallsyms, best); > } > > /* For kallsyms to ask for address resolution. NULL means not found. Careful >@@ -3763,18 +3782,21 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, > > preempt_disable(); > list_for_each_entry_rcu(mod, &modules, list) { >+ struct mod_kallsyms *kallsyms; >+ > if (mod->state == MODULE_STATE_UNFORMED) > continue; >- if (symnum < mod->num_symtab) { >- *value = mod->symtab[symnum].st_value; >- *type = mod->symtab[symnum].st_info; >- strlcpy(name, symname(mod, symnum), KSYM_NAME_LEN); >+ kallsyms = rcu_dereference_sched(mod->kallsyms); >+ if (symnum < kallsyms->num_symtab) { >+ *value = kallsyms->symtab[symnum].st_value; >+ *type = kallsyms->symtab[symnum].st_info; >+ strlcpy(name, symname(kallsyms, symnum), KSYM_NAME_LEN); > strlcpy(module_name, mod->name, MODULE_NAME_LEN); > *exported = is_exported(name, *value, mod); > preempt_enable(); > return 0; > } >- symnum -= mod->num_symtab; >+ symnum -= kallsyms->num_symtab; > } > preempt_enable(); > return -ERANGE; >@@ -3783,11 +3805,12 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, > static unsigned long mod_find_symname(struct module *mod, const char *name) > { > unsigned int i; >+ struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms); > >- for (i = 0; i < mod->num_symtab; i++) >- if (strcmp(name, symname(mod, i)) == 0 && >- mod->symtab[i].st_info != 'U') >- return mod->symtab[i].st_value; >+ for (i = 0; i < kallsyms->num_symtab; i++) >+ if (strcmp(name, symname(kallsyms, i)) == 0 && >+ kallsyms->symtab[i].st_info != 'U') >+ return kallsyms->symtab[i].st_value; > return 0; > } > >@@ -3826,11 +3849,14 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, > module_assert_mutex(); > > list_for_each_entry(mod, &modules, list) { >+ /* We hold module_mutex: no need for rcu_dereference_sched */ >+ struct mod_kallsyms *kallsyms = mod->kallsyms; >+ > if (mod->state == MODULE_STATE_UNFORMED) > continue; >- for (i = 0; i < mod->num_symtab; i++) { >- ret = fn(data, symname(mod, i), >- mod, mod->symtab[i].st_value); >+ for (i = 0; i < kallsyms->num_symtab; i++) { >+ ret = fn(data, symname(kallsyms, i), >+ mod, kallsyms->symtab[i].st_value); > if (ret != 0) > return ret; > } >-- >2.5.0