All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH nf-next] netfilter: nf_conntrack: restrict conntrack_buckets value
@ 2019-04-08 10:42 Taehee Yoo
  2019-04-08 21:11 ` Florian Westphal
  0 siblings, 1 reply; 3+ messages in thread
From: Taehee Yoo @ 2019-04-08 10:42 UTC (permalink / raw
  To: pablo, fw, netfilter-devel; +Cc: ap420073

In order to avoid wastefull memory allocation, conntrack bucket size
should be lower than conntrack_max size.
When a conntrack_max is changed, a conntrack_buckets will be changed to be
under a conntrack_max value.
But, a conntrack_buckets can be over than a conntrack_max only when
a conntrack_max is lower than minimum of a conntrack_buckets.

TEST
   sysctl net.netfilter.nf_conntrack_max=100000 -w
   sysctl net.netfilter.nf_conntrack_buckets=200000 -w
second command will be failed because of this patch.

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
---
 include/net/netfilter/nf_conntrack.h    |  4 +--
 net/netfilter/nf_conntrack_core.c       | 34 ++++++++++++++++++++-----
 net/netfilter/nf_conntrack_expect.c     |  2 +-
 net/netfilter/nf_conntrack_helper.c     |  2 +-
 net/netfilter/nf_conntrack_standalone.c | 29 ++++++++++++++++++---
 net/netfilter/nf_nat_core.c             |  2 +-
 6 files changed, 59 insertions(+), 14 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 5ee7b30b4917..39565339f218 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -179,7 +179,7 @@ void nf_ct_netns_put(struct net *net, u8 nfproto);
  * Allocate a hashtable of hlist_head (if nulls == 0),
  * or hlist_nulls_head (if nulls == 1)
  */
-void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls);
+void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls, bool roundup);
 
 int nf_conntrack_hash_check_insert(struct nf_conn *ct);
 bool nf_ct_delete(struct nf_conn *ct, u32 pid, int report);
@@ -287,7 +287,7 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct)
 struct kernel_param;
 
 int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp);
-int nf_conntrack_hash_resize(unsigned int hashsize);
+int nf_conntrack_hash_resize(unsigned int hashsize, bool roundup);
 
 extern struct hlist_nulls_head *nf_conntrack_hash;
 extern unsigned int nf_conntrack_htable_size;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 82bfbeef46af..7215e535615b 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -2210,7 +2210,12 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
 	}
 }
 
-void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
+static unsigned int nf_ct_hashtable_min(void)
+{
+	return roundup(1, PAGE_SIZE / sizeof(struct hlist_nulls_head));
+}
+
+void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls, bool roundup)
 {
 	struct hlist_nulls_head *hash;
 	unsigned int nr_slots, i;
@@ -2219,7 +2224,17 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
 		return NULL;
 
 	BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head));
-	nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head));
+	if (roundup) {
+		*sizep = roundup(*sizep, PAGE_SIZE /
+					 sizeof(struct hlist_nulls_head));
+	} else {
+		*sizep = rounddown(*sizep, PAGE_SIZE /
+					   sizeof(struct hlist_nulls_head));
+		if (!*sizep)
+			*sizep = nf_ct_hashtable_min();
+	}
+
+	nr_slots = *sizep;
 
 	hash = kvmalloc_array(nr_slots, sizeof(struct hlist_nulls_head),
 			      GFP_KERNEL | __GFP_ZERO);
@@ -2232,7 +2247,7 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
 }
 EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable);
 
-int nf_conntrack_hash_resize(unsigned int hashsize)
+int nf_conntrack_hash_resize(unsigned int hashsize, bool roundup)
 {
 	int i, bucket;
 	unsigned int old_size;
@@ -2243,7 +2258,7 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
 	if (!hashsize)
 		return -EINVAL;
 
-	hash = nf_ct_alloc_hashtable(&hashsize, 1);
+	hash = nf_ct_alloc_hashtable(&hashsize, 1, roundup);
 	if (!hash)
 		return -ENOMEM;
 
@@ -2253,6 +2268,12 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
 		return 0;
 	}
 
+	if (hashsize != nf_ct_hashtable_min() &&
+	    hashsize > nf_conntrack_max) {
+		kvfree(hash);
+		return -E2BIG;
+	}
+
 	local_bh_disable();
 	nf_conntrack_all_lock();
 	write_seqcount_begin(&nf_conntrack_generation);
@@ -2305,7 +2326,7 @@ int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp)
 	if (rc)
 		return rc;
 
-	return nf_conntrack_hash_resize(hashsize);
+	return nf_conntrack_hash_resize(hashsize, true);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
 
@@ -2377,7 +2398,8 @@ int nf_conntrack_init_start(void)
 		max_factor = 4;
 	}
 
-	nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1);
+	nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1,
+						  true);
 	if (!nf_conntrack_hash)
 		return -ENOMEM;
 
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 334d6e5b7762..82fca1480455 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -698,7 +698,7 @@ int nf_conntrack_expect_init(void)
 	if (!nf_ct_expect_cachep)
 		return -ENOMEM;
 
-	nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
+	nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0, true);
 	if (!nf_ct_expect_hash) {
 		kmem_cache_destroy(nf_ct_expect_cachep);
 		return -ENOMEM;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 274baf1dab87..3d22b80fe9d7 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -483,7 +483,7 @@ int nf_conntrack_helper_init(void)
 	int ret;
 	nf_ct_helper_hsize = 1; /* gets rounded up to use one page */
 	nf_ct_helper_hash =
-		nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0);
+		nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0, true);
 	if (!nf_ct_helper_hash)
 		return -ENOMEM;
 
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index c2ae14c720b4..d9a6480178e5 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -515,6 +515,29 @@ static int log_invalid_proto_max __read_mostly = 255;
 /* size the user *wants to set */
 static unsigned int nf_conntrack_htable_size_user __read_mostly;
 
+static int
+nf_conntrack_max_sysctl(struct ctl_table *table, int write,
+			void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret;
+
+	ret = proc_dointvec(table, write, buffer, lenp, ppos);
+	if (ret < 0 || !write)
+		return ret;
+
+	if (net_eq(&init_net, current->nsproxy->net_ns) &&
+	    nf_conntrack_max < nf_conntrack_htable_size) {
+		ret = nf_conntrack_hash_resize(nf_conntrack_max ?
+					       nf_conntrack_max : 1, false);
+		if (ret < 0)
+			return ret;
+
+		nf_conntrack_htable_size_user = nf_conntrack_htable_size;
+	}
+
+	return ret;
+}
+
 static int
 nf_conntrack_hash_sysctl(struct ctl_table *table, int write,
 			 void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -526,7 +549,7 @@ nf_conntrack_hash_sysctl(struct ctl_table *table, int write,
 		return ret;
 
 	/* update ret, we might not be able to satisfy request */
-	ret = nf_conntrack_hash_resize(nf_conntrack_htable_size_user);
+	ret = nf_conntrack_hash_resize(nf_conntrack_htable_size_user, true);
 
 	/* update it to the actual value used by conntrack */
 	nf_conntrack_htable_size_user = nf_conntrack_htable_size;
@@ -605,7 +628,7 @@ static struct ctl_table nf_ct_sysctl_table[] = {
 		.data		= &nf_conntrack_max,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= nf_conntrack_max_sysctl,
 	},
 	[NF_SYSCTL_CT_COUNT] = {
 		.procname	= "nf_conntrack_count",
@@ -913,7 +936,7 @@ static struct ctl_table nf_ct_netfilter_table[] = {
 		.data		= &nf_conntrack_max,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= nf_conntrack_max_sysctl,
 	},
 	{ }
 };
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index af7dc6537758..d48f43328342 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -1156,7 +1156,7 @@ static int __init nf_nat_init(void)
 	if (nf_nat_htable_size < CONNTRACK_LOCKS)
 		nf_nat_htable_size = CONNTRACK_LOCKS;
 
-	nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0);
+	nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0, true);
 	if (!nf_nat_bysource)
 		return -ENOMEM;
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH nf-next] netfilter: nf_conntrack: restrict conntrack_buckets value
  2019-04-08 10:42 [PATCH nf-next] netfilter: nf_conntrack: restrict conntrack_buckets value Taehee Yoo
@ 2019-04-08 21:11 ` Florian Westphal
  2019-04-08 23:15   ` Taehee Yoo
  0 siblings, 1 reply; 3+ messages in thread
From: Florian Westphal @ 2019-04-08 21:11 UTC (permalink / raw
  To: Taehee Yoo; +Cc: pablo, fw, netfilter-devel

Taehee Yoo <ap420073@gmail.com> wrote:
> In order to avoid wastefull memory allocation, conntrack bucket size
> should be lower than conntrack_max size.
> When a conntrack_max is changed, a conntrack_buckets will be changed to be
> under a conntrack_max value.
> But, a conntrack_buckets can be over than a conntrack_max only when
> a conntrack_max is lower than minimum of a conntrack_buckets.
> 
> TEST
>    sysctl net.netfilter.nf_conntrack_max=100000 -w
>    sysctl net.netfilter.nf_conntrack_buckets=200000 -w
> second command will be failed because of this patch.

Are you sure this is correct?

IIRC nf_conntrack_buckets is a global value, whereas nf_conntrack_max
is per netns.

So, with 100 netns nf_conntrack_buckets should be set to a much larger
value.

Also, we hash and insert each conntrack entry twice, once for original
and once for the reverse direction.

So, setting buckets to twice the max count is fine even for the 'init
netns only' case.


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH nf-next] netfilter: nf_conntrack: restrict conntrack_buckets value
  2019-04-08 21:11 ` Florian Westphal
@ 2019-04-08 23:15   ` Taehee Yoo
  0 siblings, 0 replies; 3+ messages in thread
From: Taehee Yoo @ 2019-04-08 23:15 UTC (permalink / raw
  To: Florian Westphal; +Cc: Pablo Neira Ayuso, Netfilter Developer Mailing List

On Tue, 9 Apr 2019 at 06:11, Florian Westphal <fw@strlen.de> wrote:
>

Hi Florian,

> Taehee Yoo <ap420073@gmail.com> wrote:
> > In order to avoid wastefull memory allocation, conntrack bucket size
> > should be lower than conntrack_max size.
> > When a conntrack_max is changed, a conntrack_buckets will be changed to be
> > under a conntrack_max value.
> > But, a conntrack_buckets can be over than a conntrack_max only when
> > a conntrack_max is lower than minimum of a conntrack_buckets.
> >
> > TEST
> >    sysctl net.netfilter.nf_conntrack_max=100000 -w
> >    sysctl net.netfilter.nf_conntrack_buckets=200000 -w
> > second command will be failed because of this patch.
>
> Are you sure this is correct?
>
> IIRC nf_conntrack_buckets is a global value, whereas nf_conntrack_max
> is per netns.
>
> So, with 100 netns nf_conntrack_buckets should be set to a much larger
> value.
>
> Also, we hash and insert each conntrack entry twice, once for original
> and once for the reverse direction.
>
> So, setting buckets to twice the max count is fine even for the 'init
> netns only' case.
>

Thank you for review!
I checked about conntrack_max and conntrack_buckets.
Your review is right.
conntrack_max is global variable but session count is pernet.
So, in netns condition, large bucket would be needed.

So, this patch is not correct.

Thank you!

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2019-04-08 23:15 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-04-08 10:42 [PATCH nf-next] netfilter: nf_conntrack: restrict conntrack_buckets value Taehee Yoo
2019-04-08 21:11 ` Florian Westphal
2019-04-08 23:15   ` Taehee Yoo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.