[patch added to the 3.12 stable tree] mtip32xx: dynamically allocate buffer in debugfs functions

All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed

* [patch added to the 3.12 stable tree] mtip32xx: dynamically allocate buffer in debugfs functions
@ 2015-09-14 14:31 Jiri Slaby
  2015-09-14 14:31 ` [patch added to the 3.12 stable tree] cifs: Send a logoff request before removing a smb session Jiri Slaby
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: Jiri Slaby @ 2015-09-14 14:31 UTC (permalink / raw)
  To: stable; +Cc: David Milburn, Jens Axboe, Jiri Slaby

From: David Milburn <dmilburn@redhat.com>

This patch has been added to the 3.12 stable tree. If you have any
objections, please let us know.

===============

commit c8afd0dcbd14e2352258f2e2d359b36d0edd459f upstream.

Dynamically allocate buf to prevent warnings:

drivers/block/mtip32xx/mtip32xx.c: In function ‘mtip_hw_read_device_status’:
drivers/block/mtip32xx/mtip32xx.c:2823: warning: the frame size of 1056 bytes is larger than 1024 bytes
drivers/block/mtip32xx/mtip32xx.c: In function ‘mtip_hw_read_registers’:
drivers/block/mtip32xx/mtip32xx.c:2894: warning: the frame size of 1056 bytes is larger than 1024 bytes
drivers/block/mtip32xx/mtip32xx.c: In function ‘mtip_hw_read_flags’:
drivers/block/mtip32xx/mtip32xx.c:2917: warning: the frame size of 1056 bytes is larger than 1024 bytes

Signed-off-by: David Milburn <dmilburn@redhat.com>
Acked-by: Asai Thambi S P <asamymuthupa@micron.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
---
 drivers/block/mtip32xx/mtip32xx.c | 47 ++++++++++++++++++++++++++++++---------
 1 file changed, 37 insertions(+), 10 deletions(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 560227b817fe..01c7270b5e84 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -2810,34 +2810,51 @@ static ssize_t show_device_status(struct device_driver *drv, char *buf)
 static ssize_t mtip_hw_read_device_status(struct file *f, char __user *ubuf,
 						size_t len, loff_t *offset)
 {
+	struct driver_data *dd =  (struct driver_data *)f->private_data;
 	int size = *offset;
-	char buf[MTIP_DFS_MAX_BUF_SIZE];
+	char *buf;
+	int rv = 0;
 
 	if (!len || *offset)
 		return 0;
 
+	buf = kzalloc(MTIP_DFS_MAX_BUF_SIZE, GFP_KERNEL);
+	if (!buf) {
+		dev_err(&dd->pdev->dev,
+			"Memory allocation: status buffer\n");
+		return -ENOMEM;
+	}
+
 	size += show_device_status(NULL, buf);
 
 	*offset = size <= len ? size : len;
 	size = copy_to_user(ubuf, buf, *offset);
 	if (size)
-		return -EFAULT;
+		rv = -EFAULT;
 
-	return *offset;
+	kfree(buf);
+	return rv ? rv : *offset;
 }
 
 static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf,
 				  size_t len, loff_t *offset)
 {
 	struct driver_data *dd =  (struct driver_data *)f->private_data;
-	char buf[MTIP_DFS_MAX_BUF_SIZE];
+	char *buf;
 	u32 group_allocated;
 	int size = *offset;
-	int n;
+	int n, rv = 0;
 
 	if (!len || size)
 		return 0;
 
+	buf = kzalloc(MTIP_DFS_MAX_BUF_SIZE, GFP_KERNEL);
+	if (!buf) {
+		dev_err(&dd->pdev->dev,
+			"Memory allocation: register buffer\n");
+		return -ENOMEM;
+	}
+
 	size += sprintf(&buf[size], "H/ S ACTive      : [ 0x");
 
 	for (n = dd->slot_groups-1; n >= 0; n--)
@@ -2892,21 +2909,30 @@ static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf,
 	*offset = size <= len ? size : len;
 	size = copy_to_user(ubuf, buf, *offset);
 	if (size)
-		return -EFAULT;
+		rv = -EFAULT;
 
-	return *offset;
+	kfree(buf);
+	return rv ? rv : *offset;
 }
 
 static ssize_t mtip_hw_read_flags(struct file *f, char __user *ubuf,
 				  size_t len, loff_t *offset)
 {
 	struct driver_data *dd =  (struct driver_data *)f->private_data;
-	char buf[MTIP_DFS_MAX_BUF_SIZE];
+	char *buf;
 	int size = *offset;
+	int rv = 0;
 
 	if (!len || size)
 		return 0;
 
+	buf = kzalloc(MTIP_DFS_MAX_BUF_SIZE, GFP_KERNEL);
+	if (!buf) {
+		dev_err(&dd->pdev->dev,
+			"Memory allocation: flag buffer\n");
+		return -ENOMEM;
+	}
+
 	size += sprintf(&buf[size], "Flag-port : [ %08lX ]\n",
 							dd->port->flags);
 	size += sprintf(&buf[size], "Flag-dd   : [ %08lX ]\n",
@@ -2915,9 +2941,10 @@ static ssize_t mtip_hw_read_flags(struct file *f, char __user *ubuf,
 	*offset = size <= len ? size : len;
 	size = copy_to_user(ubuf, buf, *offset);
 	if (size)
-		return -EFAULT;
+		rv = -EFAULT;
 
-	return *offset;
+	kfree(buf);
+	return rv ? rv : *offset;
 }
 
 static const struct file_operations mtip_device_status_fops = {
-- 
2.5.2


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [patch added to the 3.12 stable tree] cifs: Send a logoff request before removing a smb session
  2015-09-14 14:31 [patch added to the 3.12 stable tree] mtip32xx: dynamically allocate buffer in debugfs functions Jiri Slaby
@ 2015-09-14 14:31 ` Jiri Slaby
  2015-09-14 14:31 ` [patch added to the 3.12 stable tree] lpfc: Fix scsi prep dma buf error Jiri Slaby
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Jiri Slaby @ 2015-09-14 14:31 UTC (permalink / raw)
  To: stable; +Cc: Shirish Pargaonkar, Steve French, Jiri Slaby

From: Shirish Pargaonkar <shirishpargaonkar@gmail.com>

This patch has been added to the 3.12 stable tree. If you have any
objections, please let us know.

===============

commit 7f48558e6489d032b1584b0cc9ac4bb11072c034 upstream.

Send a smb session logoff request before removing smb session off of the list.
On a signed smb session, remvoing a session off of the list before sending
a logoff request results in server returning an error for lack of
smb signature.

Never seen an error during smb logoff, so as per MS-SMB2 3.2.5.1,
not sure how an error during logoff should be retried. So for now,
if a server returns an error to a logoff request, log the error and
remove the session off of the list.

Signed-off-by: Shirish Pargaonkar <shirishpargaonkar@gmail.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
---
 fs/cifs/connect.c       | 25 ++++++++++++++++++++-----
 fs/cifs/smb2transport.c | 10 ++++++++--
 fs/cifs/transport.c     | 11 +++++++++--
 3 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 89b5519085c2..ebad721656f3 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2245,6 +2245,8 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol)
 
 	spin_lock(&cifs_tcp_ses_lock);
 	list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+		if (ses->status == CifsExiting)
+			continue;
 		if (!match_session(ses, vol))
 			continue;
 		++ses->ses_count;
@@ -2258,24 +2260,37 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol)
 static void
 cifs_put_smb_ses(struct cifs_ses *ses)
 {
-	unsigned int xid;
+	unsigned int rc, xid;
 	struct TCP_Server_Info *server = ses->server;
 
 	cifs_dbg(FYI, "%s: ses_count=%d\n", __func__, ses->ses_count);
+
 	spin_lock(&cifs_tcp_ses_lock);
+	if (ses->status == CifsExiting) {
+		spin_unlock(&cifs_tcp_ses_lock);
+		return;
+	}
 	if (--ses->ses_count > 0) {
 		spin_unlock(&cifs_tcp_ses_lock);
 		return;
 	}
-
-	list_del_init(&ses->smb_ses_list);
+	if (ses->status == CifsGood)
+		ses->status = CifsExiting;
 	spin_unlock(&cifs_tcp_ses_lock);
 
-	if (ses->status == CifsGood && server->ops->logoff) {
+	if (ses->status == CifsExiting && server->ops->logoff) {
 		xid = get_xid();
-		server->ops->logoff(xid, ses);
+		rc = server->ops->logoff(xid, ses);
+		if (rc)
+			cifs_dbg(VFS, "%s: Session Logoff failure rc=%d\n",
+				__func__, rc);
 		_free_xid(xid);
 	}
+
+	spin_lock(&cifs_tcp_ses_lock);
+	list_del_init(&ses->smb_ses_list);
+	spin_unlock(&cifs_tcp_ses_lock);
+
 	sesInfoFree(ses);
 	cifs_put_tcp_session(server);
 }
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 340abca3aa52..ee1963b2e5a7 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -516,13 +516,19 @@ smb2_get_mid_entry(struct cifs_ses *ses, struct smb2_hdr *buf,
 		return -EAGAIN;
 	}
 
-	if (ses->status != CifsGood) {
-		/* check if SMB2 session is bad because we are setting it up */
+	if (ses->status == CifsNew) {
 		if ((buf->Command != SMB2_SESSION_SETUP) &&
 		    (buf->Command != SMB2_NEGOTIATE))
 			return -EAGAIN;
 		/* else ok - we are setting up session */
 	}
+
+	if (ses->status == CifsExiting) {
+		if (buf->Command != SMB2_LOGOFF)
+			return -EAGAIN;
+		/* else ok - we are shutting down the session */
+	}
+
 	*mid = smb2_mid_entry_alloc(buf, ses->server);
 	if (*mid == NULL)
 		return -ENOMEM;
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 800b938e4061..ebb46e311e0b 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -431,13 +431,20 @@ static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf,
 		return -EAGAIN;
 	}
 
-	if (ses->status != CifsGood) {
-		/* check if SMB session is bad because we are setting it up */
+	if (ses->status == CifsNew) {
 		if ((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) &&
 			(in_buf->Command != SMB_COM_NEGOTIATE))
 			return -EAGAIN;
 		/* else ok - we are setting up session */
 	}
+
+	if (ses->status == CifsExiting) {
+		/* check if SMB session is bad because we are setting it up */
+		if (in_buf->Command != SMB_COM_LOGOFF_ANDX)
+			return -EAGAIN;
+		/* else ok - we are shutting down session */
+	}
+
 	*ppmidQ = AllocMidQEntry(in_buf, ses->server);
 	if (*ppmidQ == NULL)
 		return -ENOMEM;
-- 
2.5.2


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [patch added to the 3.12 stable tree] lpfc: Fix scsi prep dma buf error.
  2015-09-14 14:31 [patch added to the 3.12 stable tree] mtip32xx: dynamically allocate buffer in debugfs functions Jiri Slaby
  2015-09-14 14:31 ` [patch added to the 3.12 stable tree] cifs: Send a logoff request before removing a smb session Jiri Slaby
@ 2015-09-14 14:31 ` Jiri Slaby
  2015-09-14 14:31 ` [patch added to the 3.12 stable tree] bio: fix argument of __bio_add_page() for max_sectors > 0xffff Jiri Slaby
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Jiri Slaby @ 2015-09-14 14:31 UTC (permalink / raw)
  To: stable; +Cc: James Smart, Dick Kennedy, James Bottomley, Jiri Slaby

From: James Smart <james.smart@avagotech.com>

This patch has been added to the 3.12 stable tree. If you have any
objections, please let us know.

===============

commit 5116fbf136ea21b8678a85eee5c03508736ada9f upstream.

Didn't check for less-than-or-equal zero. Means we may later call
scsi_dma_unmap() even though we don't have valid mappings.

Signed-off-by: Dick Kennedy <dick.kennedy@avagotech.com>
Signed-off-by: James Smart <james.smart@avagotech.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: James Bottomley <JBottomley@Odin.com>
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
---
 drivers/scsi/lpfc/lpfc_scsi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index c913e8cc3b26..ed7759980c47 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -3423,7 +3423,7 @@ lpfc_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd)
 		 */
 
 		nseg = scsi_dma_map(scsi_cmnd);
-		if (unlikely(!nseg))
+		if (unlikely(nseg <= 0))
 			return 1;
 		sgl += 1;
 		/* clear the last flag in the fcp_rsp map entry */
-- 
2.5.2


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [patch added to the 3.12 stable tree] bio: fix argument of __bio_add_page() for max_sectors > 0xffff
  2015-09-14 14:31 [patch added to the 3.12 stable tree] mtip32xx: dynamically allocate buffer in debugfs functions Jiri Slaby
  2015-09-14 14:31 ` [patch added to the 3.12 stable tree] cifs: Send a logoff request before removing a smb session Jiri Slaby
  2015-09-14 14:31 ` [patch added to the 3.12 stable tree] lpfc: Fix scsi prep dma buf error Jiri Slaby
@ 2015-09-14 14:31 ` Jiri Slaby
  2015-09-14 14:31 ` [patch added to the 3.12 stable tree] dm cache mq: fix memory allocation failure for large cache devices Jiri Slaby
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Jiri Slaby @ 2015-09-14 14:31 UTC (permalink / raw)
  To: stable; +Cc: Akinobu Mita, Jens Axboe, Alexander Viro, Jiri Slaby

From: Akinobu Mita <akinobu.mita@gmail.com>

This patch has been added to the 3.12 stable tree. If you have any
objections, please let us know.

===============

commit 34f2fd8dfe6185b0eaaf7d661281713a6170b077 upstream.

The data type of max_sectors and max_hw_sectors in queue settings are
unsigned int.  But these values are passed to __bio_add_page() as an
argument whose data type is unsigned short.  In the worst case such as
max_sectors is 0x10000, bio_add_page() can't add a page and IOs can't
proceed.

Cc: Jens Axboe <axboe@kernel.dk>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
---
 fs/bio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/bio.c b/fs/bio.c
index ea5035da4d9a..e7fb3f82f5f5 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -601,7 +601,7 @@ EXPORT_SYMBOL(bio_get_nr_vecs);
 
 static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
 			  *page, unsigned int len, unsigned int offset,
-			  unsigned short max_sectors)
+			  unsigned int max_sectors)
 {
 	int retried_segments = 0;
 	struct bio_vec *bvec;
-- 
2.5.2


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [patch added to the 3.12 stable tree] dm cache mq: fix memory allocation failure for large cache devices
  2015-09-14 14:31 [patch added to the 3.12 stable tree] mtip32xx: dynamically allocate buffer in debugfs functions Jiri Slaby
                   ` (2 preceding siblings ...)
  2015-09-14 14:31 ` [patch added to the 3.12 stable tree] bio: fix argument of __bio_add_page() for max_sectors > 0xffff Jiri Slaby
@ 2015-09-14 14:31 ` Jiri Slaby
  2015-09-14 14:31 ` [patch added to the 3.12 stable tree] aio: fix reqs_available handling Jiri Slaby
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Jiri Slaby @ 2015-09-14 14:31 UTC (permalink / raw)
  To: stable; +Cc: Heinz Mauelshagen, Mike Snitzer, Jiri Slaby

From: Heinz Mauelshagen <heinzm@redhat.com>

This patch has been added to the 3.12 stable tree. If you have any
objections, please let us know.

===============

commit 14f398ca2f26a2ed6236aec54395e0fa06ec8a82 upstream.

The memory allocated for the multiqueue policy's hash table doesn't need
to be physically contiguous.  Use vzalloc() instead of kzalloc().
Fedora has been carrying this fix since 10/10/2013.

Failure seen during creation of a 10TB cached device with a 2048 sector
block size and 411GB cache size:

 dmsetup: page allocation failure: order:9, mode:0x10c0d0
 CPU: 11 PID: 29235 Comm: dmsetup Not tainted 3.10.4 #3
 Hardware name: Supermicro X8DTL/X8DTL, BIOS 2.1a       12/30/2011
  000000000010c0d0 ffff880090941898 ffffffff81387ab4 ffff880090941928
  ffffffff810bb26f 0000000000000009 000000000010c0d0 ffff880090941928
  ffffffff81385dbc ffffffff815f3840 ffffffff00000000 000002000010c0d0
 Call Trace:
  [<ffffffff81387ab4>] dump_stack+0x19/0x1b
  [<ffffffff810bb26f>] warn_alloc_failed+0x110/0x124
  [<ffffffff81385dbc>] ? __alloc_pages_direct_compact+0x17c/0x18e
  [<ffffffff810bda2e>] __alloc_pages_nodemask+0x6c7/0x75e
  [<ffffffff810bdad7>] __get_free_pages+0x12/0x3f
  [<ffffffff810ea148>] kmalloc_order_trace+0x29/0x88
  [<ffffffff810ec1fd>] __kmalloc+0x36/0x11b
  [<ffffffffa031eeed>] ? mq_create+0x1dc/0x2cf [dm_cache_mq]
  [<ffffffffa031efc0>] mq_create+0x2af/0x2cf [dm_cache_mq]
  [<ffffffffa0314605>] dm_cache_policy_create+0xa7/0xd2 [dm_cache]
  [<ffffffffa0312530>] ? cache_ctr+0x245/0xa13 [dm_cache]
  [<ffffffffa031263e>] cache_ctr+0x353/0xa13 [dm_cache]
  [<ffffffffa012b916>] dm_table_add_target+0x227/0x2ce [dm_mod]
  [<ffffffffa012e8e4>] table_load+0x286/0x2ac [dm_mod]
  [<ffffffffa012e65e>] ? dev_wait+0x8a/0x8a [dm_mod]
  [<ffffffffa012e324>] ctl_ioctl+0x39a/0x3c2 [dm_mod]
  [<ffffffffa012e35a>] dm_ctl_ioctl+0xe/0x12 [dm_mod]
  [<ffffffff81101181>] vfs_ioctl+0x21/0x34
  [<ffffffff811019d3>] do_vfs_ioctl+0x3b1/0x3f4
  [<ffffffff810f4d2e>] ? ____fput+0x9/0xb
  [<ffffffff81050b6c>] ? task_work_run+0x7e/0x92
  [<ffffffff81101a68>] SyS_ioctl+0x52/0x82
  [<ffffffff81391d92>] system_call_fastpath+0x16/0x1b

Signed-off-by: Heinz Mauelshagen <heinzm@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
---
 drivers/md/dm-cache-policy-mq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c
index 4296155090b2..afcd18428945 100644
--- a/drivers/md/dm-cache-policy-mq.c
+++ b/drivers/md/dm-cache-policy-mq.c
@@ -855,7 +855,7 @@ static void mq_destroy(struct dm_cache_policy *p)
 	struct mq_policy *mq = to_mq_policy(p);
 
 	free_bitset(mq->allocation_bitset);
-	kfree(mq->table);
+	vfree(mq->table);
 	free_entries(mq);
 	kfree(mq);
 }
@@ -1106,7 +1106,7 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,
 
 	mq->nr_buckets = next_power(from_cblock(cache_size) / 2, 16);
 	mq->hash_bits = ffs(mq->nr_buckets) - 1;
-	mq->table = kzalloc(sizeof(*mq->table) * mq->nr_buckets, GFP_KERNEL);
+	mq->table = vzalloc(sizeof(*mq->table) * mq->nr_buckets);
 	if (!mq->table)
 		goto bad_alloc_table;
 
-- 
2.5.2


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [patch added to the 3.12 stable tree] aio: fix reqs_available handling
  2015-09-14 14:31 [patch added to the 3.12 stable tree] mtip32xx: dynamically allocate buffer in debugfs functions Jiri Slaby
                   ` (3 preceding siblings ...)
  2015-09-14 14:31 ` [patch added to the 3.12 stable tree] dm cache mq: fix memory allocation failure for large cache devices Jiri Slaby
@ 2015-09-14 14:31 ` Jiri Slaby
  2015-09-14 14:31 ` [patch added to the 3.12 stable tree] netfilter: nf_conntrack: fix RCU race in nf_conntrack_find_get Jiri Slaby
  2015-09-14 14:31 ` [patch added to the 3.12 stable tree] netfilter: nf_conntrack: don't release a conntrack with non-zero refcnt Jiri Slaby
  6 siblings, 0 replies; 8+ messages in thread
From: Jiri Slaby @ 2015-09-14 14:31 UTC (permalink / raw)
  To: stable
  Cc: Benjamin LaHaise, Kent Overstreet, Mateusz Guzik, Petr Matousek,
	Linus Torvalds, Jiri Slaby

From: Benjamin LaHaise <bcrl@kvack.org>

This patch has been added to the 3.12 stable tree. If you have any
objections, please let us know.

===============

commit d856f32a86b2b015ab180ab7a55e455ed8d3ccc5 upstream.

As reported by Dan Aloni, commit f8567a3845ac ("aio: fix aio request
leak when events are reaped by userspace") introduces a regression when
user code attempts to perform io_submit() with more events than are
available in the ring buffer.  Reverting that commit would reintroduce a
regression when user space event reaping is used.

Fixing this bug is a bit more involved than the previous attempts to fix
this regression.  Since we do not have a single point at which we can
count events as being reaped by user space and io_getevents(), we have
to track event completion by looking at the number of events left in the
event ring.  So long as there are as many events in the ring buffer as
there have been completion events generate, we cannot call
put_reqs_available().  The code to check for this is now placed in
refill_reqs_available().

A test program from Dan and modified by me for verifying this bug is available
at http://www.kvack.org/~bcrl/20140824-aio_bug.c .

Reported-by: Dan Aloni <dan@kernelim.com>
Signed-off-by: Benjamin LaHaise <bcrl@kvack.org>
Acked-by: Dan Aloni <dan@kernelim.com>
Cc: Kent Overstreet <kmo@daterainc.com>
Cc: Mateusz Guzik <mguzik@redhat.com>
Cc: Petr Matousek <pmatouse@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
---
 fs/aio.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 73 insertions(+), 4 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 329e6c1f3a43..31a5cb74ae1f 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -146,6 +146,7 @@ struct kioctx {
 
 	struct {
 		unsigned	tail;
+		unsigned	completed_events;
 		spinlock_t	completion_lock;
 	} ____cacheline_aligned_in_smp;
 
@@ -899,6 +900,68 @@ out:
 	return ret;
 }
 
+/* refill_reqs_available
+ *	Updates the reqs_available reference counts used for tracking the
+ *	number of free slots in the completion ring.  This can be called
+ *	from aio_complete() (to optimistically update reqs_available) or
+ *	from aio_get_req() (the we're out of events case).  It must be
+ *	called holding ctx->completion_lock.
+ */
+static void refill_reqs_available(struct kioctx *ctx, unsigned head,
+                                  unsigned tail)
+{
+	unsigned events_in_ring, completed;
+
+	/* Clamp head since userland can write to it. */
+	head %= ctx->nr_events;
+	if (head <= tail)
+		events_in_ring = tail - head;
+	else
+		events_in_ring = ctx->nr_events - (head - tail);
+
+	completed = ctx->completed_events;
+	if (events_in_ring < completed)
+		completed -= events_in_ring;
+	else
+		completed = 0;
+
+	if (!completed)
+		return;
+
+	ctx->completed_events -= completed;
+	put_reqs_available(ctx, completed);
+}
+
+/* user_refill_reqs_available
+ *	Called to refill reqs_available when aio_get_req() encounters an
+ *	out of space in the completion ring.
+ */
+static void user_refill_reqs_available(struct kioctx *ctx)
+{
+	spin_lock_irq(&ctx->completion_lock);
+	if (ctx->completed_events) {
+		struct aio_ring *ring;
+		unsigned head;
+
+		/* Access of ring->head may race with aio_read_events_ring()
+		 * here, but that's okay since whether we read the old version
+		 * or the new version, and either will be valid.  The important
+		 * part is that head cannot pass tail since we prevent
+		 * aio_complete() from updating tail by holding
+		 * ctx->completion_lock.  Even if head is invalid, the check
+		 * against ctx->completed_events below will make sure we do the
+		 * safe/right thing.
+		 */
+		ring = kmap_atomic(ctx->ring_pages[0]);
+		head = ring->head;
+		kunmap_atomic(ring);
+
+		refill_reqs_available(ctx, head, ctx->tail);
+	}
+
+	spin_unlock_irq(&ctx->completion_lock);
+}
+
 /* aio_get_req
  *	Allocate a slot for an aio request.
  * Returns NULL if no requests are free.
@@ -907,8 +970,11 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx)
 {
 	struct kiocb *req;
 
-	if (!get_reqs_available(ctx))
-		return NULL;
+	if (!get_reqs_available(ctx)) {
+		user_refill_reqs_available(ctx);
+		if (!get_reqs_available(ctx))
+			return NULL;
+	}
 
 	req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO);
 	if (unlikely(!req))
@@ -967,8 +1033,8 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
 	struct kioctx	*ctx = iocb->ki_ctx;
 	struct aio_ring	*ring;
 	struct io_event	*ev_page, *event;
+	unsigned tail, pos, head;
 	unsigned long	flags;
-	unsigned tail, pos;
 
 	/*
 	 * Special case handling for sync iocbs:
@@ -1029,10 +1095,14 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
 	ctx->tail = tail;
 
 	ring = kmap_atomic(ctx->ring_pages[0]);
+	head = ring->head;
 	ring->tail = tail;
 	kunmap_atomic(ring);
 	flush_dcache_page(ctx->ring_pages[0]);
 
+	ctx->completed_events++;
+	if (ctx->completed_events > 1)
+		refill_reqs_available(ctx, head, tail);
 	spin_unlock_irqrestore(&ctx->completion_lock, flags);
 
 	pr_debug("added to ring %p at [%u]\n", iocb, tail);
@@ -1047,7 +1117,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
 
 	/* everything turned out well, dispose of the aiocb. */
 	kiocb_free(iocb);
-	put_reqs_available(ctx, 1);
 
 	/*
 	 * We have to order our ring_info tail store above and test
-- 
2.5.2


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [patch added to the 3.12 stable tree] netfilter: nf_conntrack: fix RCU race in nf_conntrack_find_get
  2015-09-14 14:31 [patch added to the 3.12 stable tree] mtip32xx: dynamically allocate buffer in debugfs functions Jiri Slaby
                   ` (4 preceding siblings ...)
  2015-09-14 14:31 ` [patch added to the 3.12 stable tree] aio: fix reqs_available handling Jiri Slaby
@ 2015-09-14 14:31 ` Jiri Slaby
  2015-09-14 14:31 ` [patch added to the 3.12 stable tree] netfilter: nf_conntrack: don't release a conntrack with non-zero refcnt Jiri Slaby
  6 siblings, 0 replies; 8+ messages in thread
From: Jiri Slaby @ 2015-09-14 14:31 UTC (permalink / raw)
  To: stable
  Cc: Andrey Vagin, Eric Dumazet, Florian Westphal, Pablo Neira Ayuso,
	Patrick McHardy, Jozsef Kadlecsik, David S. Miller,
	Cyrill Gorcunov, Jiri Slaby

From: Andrey Vagin <avagin@openvz.org>

This patch has been added to the 3.12 stable tree. If you have any
objections, please let us know.

===============

[ Upstream commit c6825c0976fa7893692e0e43b09740b419b23c09 ]

Lets look at destroy_conntrack:

hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
...
nf_conntrack_free(ct)
	kmem_cache_free(net->ct.nf_conntrack_cachep, ct);

net->ct.nf_conntrack_cachep is created with SLAB_DESTROY_BY_RCU.

The hash is protected by rcu, so readers look up conntracks without
locks.
A conntrack is removed from the hash, but in this moment a few readers
still can use the conntrack. Then this conntrack is released and another
thread creates conntrack with the same address and the equal tuple.
After this a reader starts to validate the conntrack:
* It's not dying, because a new conntrack was created
* nf_ct_tuple_equal() returns true.

But this conntrack is not initialized yet, so it can not be used by two
threads concurrently. In this case BUG_ON may be triggered from
nf_nat_setup_info().

Florian Westphal suggested to check the confirm bit too. I think it's
right.

task 1			task 2			task 3
			nf_conntrack_find_get
			 ____nf_conntrack_find
destroy_conntrack
 hlist_nulls_del_rcu
 nf_conntrack_free
 kmem_cache_free
						__nf_conntrack_alloc
						 kmem_cache_alloc
						 memset(&ct->tuplehash[IP_CT_DIR_MAX],
			 if (nf_ct_is_dying(ct))
			 if (!nf_ct_tuple_equal()

I'm not sure, that I have ever seen this race condition in a real life.
Currently we are investigating a bug, which is reproduced on a few nodes.
In our case one conntrack is initialized from a few tasks concurrently,
we don't have any other explanation for this.

<2>[46267.083061] kernel BUG at net/ipv4/netfilter/nf_nat_core.c:322!
...
<4>[46267.083951] RIP: 0010:[<ffffffffa01e00a4>]  [<ffffffffa01e00a4>] nf_nat_setup_info+0x564/0x590 [nf_nat]
...
<4>[46267.085549] Call Trace:
<4>[46267.085622]  [<ffffffffa023421b>] alloc_null_binding+0x5b/0xa0 [iptable_nat]
<4>[46267.085697]  [<ffffffffa02342bc>] nf_nat_rule_find+0x5c/0x80 [iptable_nat]
<4>[46267.085770]  [<ffffffffa0234521>] nf_nat_fn+0x111/0x260 [iptable_nat]
<4>[46267.085843]  [<ffffffffa0234798>] nf_nat_out+0x48/0xd0 [iptable_nat]
<4>[46267.085919]  [<ffffffff814841b9>] nf_iterate+0x69/0xb0
<4>[46267.085991]  [<ffffffff81494e70>] ? ip_finish_output+0x0/0x2f0
<4>[46267.086063]  [<ffffffff81484374>] nf_hook_slow+0x74/0x110
<4>[46267.086133]  [<ffffffff81494e70>] ? ip_finish_output+0x0/0x2f0
<4>[46267.086207]  [<ffffffff814b5890>] ? dst_output+0x0/0x20
<4>[46267.086277]  [<ffffffff81495204>] ip_output+0xa4/0xc0
<4>[46267.086346]  [<ffffffff814b65a4>] raw_sendmsg+0x8b4/0x910
<4>[46267.086419]  [<ffffffff814c10fa>] inet_sendmsg+0x4a/0xb0
<4>[46267.086491]  [<ffffffff814459aa>] ? sock_update_classid+0x3a/0x50
<4>[46267.086562]  [<ffffffff81444d67>] sock_sendmsg+0x117/0x140
<4>[46267.086638]  [<ffffffff8151997b>] ? _spin_unlock_bh+0x1b/0x20
<4>[46267.086712]  [<ffffffff8109d370>] ? autoremove_wake_function+0x0/0x40
<4>[46267.086785]  [<ffffffff81495e80>] ? do_ip_setsockopt+0x90/0xd80
<4>[46267.086858]  [<ffffffff8100be0e>] ? call_function_interrupt+0xe/0x20
<4>[46267.086936]  [<ffffffff8118cb10>] ? ub_slab_ptr+0x20/0x90
<4>[46267.087006]  [<ffffffff8118cb10>] ? ub_slab_ptr+0x20/0x90
<4>[46267.087081]  [<ffffffff8118f2e8>] ? kmem_cache_alloc+0xd8/0x1e0
<4>[46267.087151]  [<ffffffff81445599>] sys_sendto+0x139/0x190
<4>[46267.087229]  [<ffffffff81448c0d>] ? sock_setsockopt+0x16d/0x6f0
<4>[46267.087303]  [<ffffffff810efa47>] ? audit_syscall_entry+0x1d7/0x200
<4>[46267.087378]  [<ffffffff810ef795>] ? __audit_syscall_exit+0x265/0x290
<4>[46267.087454]  [<ffffffff81474885>] ? compat_sys_setsockopt+0x75/0x210
<4>[46267.087531]  [<ffffffff81474b5f>] compat_sys_socketcall+0x13f/0x210
<4>[46267.087607]  [<ffffffff8104dea3>] ia32_sysret+0x0/0x5
<4>[46267.087676] Code: 91 20 e2 01 75 29 48 89 de 4c 89 f7 e8 56 fa ff ff 85 c0 0f 84 68 fc ff ff 0f b6 4d c6 41 8b 45 00 e9 4d fb ff ff e8 7c 19 e9 e0 <0f> 0b eb fe f6 05 17 91 20 e2 80 74 ce 80 3d 5f 2e 00 00 00 74
<1>[46267.088023] RIP  [<ffffffffa01e00a4>] nf_nat_setup_info+0x564/0x590

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Florian Westphal <fw@strlen.de>
Cc: Pablo Neira Ayuso <pablo@netfilter.org>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Andrey Vagin <avagin@openvz.org>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
---
 net/netfilter/nf_conntrack_core.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 5d892febd64c..da541b78b88e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -318,6 +318,21 @@ static void death_by_timeout(unsigned long ul_conntrack)
 	nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0);
 }
 
+static inline bool
+nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
+			const struct nf_conntrack_tuple *tuple,
+			u16 zone)
+{
+	struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+
+	/* A conntrack can be recreated with the equal tuple,
+	 * so we need to check that the conntrack is confirmed
+	 */
+	return nf_ct_tuple_equal(tuple, &h->tuple) &&
+		nf_ct_zone(ct) == zone &&
+		nf_ct_is_confirmed(ct);
+}
+
 /*
  * Warning :
  * - Caller must take a reference on returned object
@@ -339,8 +354,7 @@ ____nf_conntrack_find(struct net *net, u16 zone,
 	local_bh_disable();
 begin:
 	hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[bucket], hnnode) {
-		if (nf_ct_tuple_equal(tuple, &h->tuple) &&
-		    nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)) == zone) {
+		if (nf_ct_key_equal(h, tuple, zone)) {
 			NF_CT_STAT_INC(net, found);
 			local_bh_enable();
 			return h;
@@ -387,8 +401,7 @@ begin:
 			     !atomic_inc_not_zero(&ct->ct_general.use)))
 			h = NULL;
 		else {
-			if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple) ||
-				     nf_ct_zone(ct) != zone)) {
+			if (unlikely(!nf_ct_key_equal(h, tuple, zone))) {
 				nf_ct_put(ct);
 				goto begin;
 			}
-- 
2.5.2


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [patch added to the 3.12 stable tree] netfilter: nf_conntrack: don't release a conntrack with non-zero refcnt
  2015-09-14 14:31 [patch added to the 3.12 stable tree] mtip32xx: dynamically allocate buffer in debugfs functions Jiri Slaby
                   ` (5 preceding siblings ...)
  2015-09-14 14:31 ` [patch added to the 3.12 stable tree] netfilter: nf_conntrack: fix RCU race in nf_conntrack_find_get Jiri Slaby
@ 2015-09-14 14:31 ` Jiri Slaby
  6 siblings, 0 replies; 8+ messages in thread
From: Jiri Slaby @ 2015-09-14 14:31 UTC (permalink / raw)
  To: stable
  Cc: Pablo Neira Ayuso, Eric Dumazet, Andrew Vagin, Florian Westphal,
	Jiri Slaby

From: Pablo Neira Ayuso <pablo@netfilter.org>

This patch has been added to the 3.12 stable tree. If you have any
objections, please let us know.

===============

[ Upstream commit e53376bef2cd97d3e3f61fdc677fb8da7d03d0da ]

With this patch, the conntrack refcount is initially set to zero and
it is bumped once it is added to any of the list, so we fulfill
Eric's golden rule which is that all released objects always have a
refcount that equals zero.

Andrey Vagin reports that nf_conntrack_free can't be called for a
conntrack with non-zero ref-counter, because it can race with
nf_conntrack_find_get().

A conntrack slab is created with SLAB_DESTROY_BY_RCU. Non-zero
ref-counter says that this conntrack is used. So when we release
a conntrack with non-zero counter, we break this assumption.

CPU1                                    CPU2
____nf_conntrack_find()
                                        nf_ct_put()
                                         destroy_conntrack()
                                        ...
                                        init_conntrack
                                         __nf_conntrack_alloc (set use = 1)
atomic_inc_not_zero(&ct->use) (use = 2)
                                         if (!l4proto->new(ct, skb, dataoff, timeouts))
                                          nf_conntrack_free(ct); (use = 2 !!!)
                                        ...
                                        __nf_conntrack_alloc (set use = 1)
 if (!nf_ct_key_equal(h, tuple, zone))
  nf_ct_put(ct); (use = 0)
   destroy_conntrack()
                                        /* continue to work with CT */

After applying the path "[PATCH] netfilter: nf_conntrack: fix RCU
race in nf_conntrack_find_get" another bug was triggered in
destroy_conntrack():

<4>[67096.759334] ------------[ cut here ]------------
<2>[67096.759353] kernel BUG at net/netfilter/nf_conntrack_core.c:211!
...
<4>[67096.759837] Pid: 498649, comm: atdd veid: 666 Tainted: G         C ---------------    2.6.32-042stab084.18 #1 042stab084_18 /DQ45CB
<4>[67096.759932] RIP: 0010:[<ffffffffa03d99ac>]  [<ffffffffa03d99ac>] destroy_conntrack+0x15c/0x190 [nf_conntrack]
<4>[67096.760255] Call Trace:
<4>[67096.760255]  [<ffffffff814844a7>] nf_conntrack_destroy+0x17/0x30
<4>[67096.760255]  [<ffffffffa03d9bb5>] nf_conntrack_find_get+0x85/0x130 [nf_conntrack]
<4>[67096.760255]  [<ffffffffa03d9fb2>] nf_conntrack_in+0x352/0xb60 [nf_conntrack]
<4>[67096.760255]  [<ffffffffa048c771>] ipv4_conntrack_local+0x51/0x60 [nf_conntrack_ipv4]
<4>[67096.760255]  [<ffffffff81484419>] nf_iterate+0x69/0xb0
<4>[67096.760255]  [<ffffffff814b5b00>] ? dst_output+0x0/0x20
<4>[67096.760255]  [<ffffffff814845d4>] nf_hook_slow+0x74/0x110
<4>[67096.760255]  [<ffffffff814b5b00>] ? dst_output+0x0/0x20
<4>[67096.760255]  [<ffffffff814b66d5>] raw_sendmsg+0x775/0x910
<4>[67096.760255]  [<ffffffff8104c5a8>] ? flush_tlb_others_ipi+0x128/0x130
<4>[67096.760255]  [<ffffffff8100bc4e>] ? apic_timer_interrupt+0xe/0x20
<4>[67096.760255]  [<ffffffff8100bc4e>] ? apic_timer_interrupt+0xe/0x20
<4>[67096.760255]  [<ffffffff814c136a>] inet_sendmsg+0x4a/0xb0
<4>[67096.760255]  [<ffffffff81444e93>] ? sock_sendmsg+0x13/0x140
<4>[67096.760255]  [<ffffffff81444f97>] sock_sendmsg+0x117/0x140
<4>[67096.760255]  [<ffffffff8102e299>] ? native_smp_send_reschedule+0x49/0x60
<4>[67096.760255]  [<ffffffff81519beb>] ? _spin_unlock_bh+0x1b/0x20
<4>[67096.760255]  [<ffffffff8109d930>] ? autoremove_wake_function+0x0/0x40
<4>[67096.760255]  [<ffffffff814960f0>] ? do_ip_setsockopt+0x90/0xd80
<4>[67096.760255]  [<ffffffff8100bc4e>] ? apic_timer_interrupt+0xe/0x20
<4>[67096.760255]  [<ffffffff8100bc4e>] ? apic_timer_interrupt+0xe/0x20
<4>[67096.760255]  [<ffffffff814457c9>] sys_sendto+0x139/0x190
<4>[67096.760255]  [<ffffffff810efa77>] ? audit_syscall_entry+0x1d7/0x200
<4>[67096.760255]  [<ffffffff810ef7c5>] ? __audit_syscall_exit+0x265/0x290
<4>[67096.760255]  [<ffffffff81474daf>] compat_sys_socketcall+0x13f/0x210
<4>[67096.760255]  [<ffffffff8104dea3>] ia32_sysret+0x0/0x5

I have reused the original title for the RFC patch that Andrey posted and
most of the original patch description.

Cc: Eric Dumazet <edumazet@google.com>
Cc: Andrew Vagin <avagin@parallels.com>
Cc: Florian Westphal <fw@strlen.de>
Reported-by: Andrew Vagin <avagin@parallels.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Andrew Vagin <avagin@parallels.com>
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
---
 include/net/netfilter/nf_conntrack.h |  2 ++
 net/netfilter/nf_conntrack_core.c    | 34 +++++++++++++++++++++++++++++-----
 net/netfilter/nf_synproxy_core.c     |  5 ++---
 net/netfilter/xt_CT.c                |  7 +------
 4 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 0c1288a50e8b..a68a061882f4 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -293,6 +293,8 @@ extern unsigned int nf_conntrack_max;
 extern unsigned int nf_conntrack_hash_rnd;
 void init_nf_conntrack_hash_rnd(void);
 
+void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl);
+
 #define NF_CT_STAT_INC(net, count)	  __this_cpu_inc((net)->ct.stat->count)
 #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count)
 
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index da541b78b88e..cf9bfc5ddb34 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -463,7 +463,9 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
 			goto out;
 
 	add_timer(&ct->timeout);
-	nf_conntrack_get(&ct->ct_general);
+	smp_wmb();
+	/* The caller holds a reference to this object */
+	atomic_set(&ct->ct_general.use, 2);
 	__nf_conntrack_hash_insert(ct, hash, repl_hash);
 	NF_CT_STAT_INC(net, insert);
 	spin_unlock_bh(&nf_conntrack_lock);
@@ -477,6 +479,21 @@ out:
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
 
+/* deletion from this larval template list happens via nf_ct_put() */
+void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl)
+{
+	__set_bit(IPS_TEMPLATE_BIT, &tmpl->status);
+	__set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
+	nf_conntrack_get(&tmpl->ct_general);
+
+	spin_lock_bh(&nf_conntrack_lock);
+	/* Overload tuple linked list to put us in template list. */
+	hlist_nulls_add_head_rcu(&tmpl->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
+				 &net->ct.tmpl);
+	spin_unlock_bh(&nf_conntrack_lock);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_tmpl_insert);
+
 /* Confirm a connection given skb; places it in hash table */
 int
 __nf_conntrack_confirm(struct sk_buff *skb)
@@ -748,11 +765,10 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
 		nf_ct_zone->id = zone;
 	}
 #endif
-	/*
-	 * changes to lookup keys must be done before setting refcnt to 1
+	/* Because we use RCU lookups, we set ct_general.use to zero before
+	 * this is inserted in any list.
 	 */
-	smp_wmb();
-	atomic_set(&ct->ct_general.use, 1);
+	atomic_set(&ct->ct_general.use, 0);
 	return ct;
 
 #ifdef CONFIG_NF_CONNTRACK_ZONES
@@ -776,6 +792,11 @@ void nf_conntrack_free(struct nf_conn *ct)
 {
 	struct net *net = nf_ct_net(ct);
 
+	/* A freed object has refcnt == 0, that's
+	 * the golden rule for SLAB_DESTROY_BY_RCU
+	 */
+	NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 0);
+
 	nf_ct_ext_destroy(ct);
 	atomic_dec(&net->ct.count);
 	nf_ct_ext_free(ct);
@@ -870,6 +891,9 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 		NF_CT_STAT_INC(net, new);
 	}
 
+	/* Now it is inserted into the unconfirmed list, bump refcount */
+	nf_conntrack_get(&ct->ct_general);
+
 	/* Overload tuple linked list to put us in unconfirmed list. */
 	hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
 		       &net->ct.unconfirmed);
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index cdf4567ba9b3..bf6e9a144dac 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -362,9 +362,8 @@ static int __net_init synproxy_net_init(struct net *net)
 		goto err2;
 	if (!nfct_synproxy_ext_add(ct))
 		goto err2;
-	__set_bit(IPS_TEMPLATE_BIT, &ct->status);
-	__set_bit(IPS_CONFIRMED_BIT, &ct->status);
 
+	nf_conntrack_tmpl_insert(net, ct);
 	snet->tmpl = ct;
 
 	snet->stats = alloc_percpu(struct synproxy_stats);
@@ -389,7 +388,7 @@ static void __net_exit synproxy_net_exit(struct net *net)
 {
 	struct synproxy_net *snet = synproxy_pernet(net);
 
-	nf_conntrack_free(snet->tmpl);
+	nf_ct_put(snet->tmpl);
 	synproxy_proc_exit(net);
 	free_percpu(snet->stats);
 }
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index da35ac06a975..889960193544 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -226,12 +226,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
 			goto err3;
 	}
 
-	__set_bit(IPS_TEMPLATE_BIT, &ct->status);
-	__set_bit(IPS_CONFIRMED_BIT, &ct->status);
-
-	/* Overload tuple linked list to put us in template list. */
-	hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
-				 &par->net->ct.tmpl);
+	nf_conntrack_tmpl_insert(par->net, ct);
 out:
 	info->ct = ct;
 	return 0;
-- 
2.5.2


^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2015-09-14 14:31 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-09-14 14:31 [patch added to the 3.12 stable tree] mtip32xx: dynamically allocate buffer in debugfs functions Jiri Slaby
2015-09-14 14:31 ` [patch added to the 3.12 stable tree] cifs: Send a logoff request before removing a smb session Jiri Slaby
2015-09-14 14:31 ` [patch added to the 3.12 stable tree] lpfc: Fix scsi prep dma buf error Jiri Slaby
2015-09-14 14:31 ` [patch added to the 3.12 stable tree] bio: fix argument of __bio_add_page() for max_sectors > 0xffff Jiri Slaby
2015-09-14 14:31 ` [patch added to the 3.12 stable tree] dm cache mq: fix memory allocation failure for large cache devices Jiri Slaby
2015-09-14 14:31 ` [patch added to the 3.12 stable tree] aio: fix reqs_available handling Jiri Slaby
2015-09-14 14:31 ` [patch added to the 3.12 stable tree] netfilter: nf_conntrack: fix RCU race in nf_conntrack_find_get Jiri Slaby
2015-09-14 14:31 ` [patch added to the 3.12 stable tree] netfilter: nf_conntrack: don't release a conntrack with non-zero refcnt Jiri Slaby

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.