From mboxrd@z Thu Jan  1 00:00:00 1970
From: "Xie, Huawei" <huawei.xie@intel.com>
Subject: Re: [PATCH v2 5/5] lib_vhost: Add support copying
 scattered mbuf to vring
Date: Sun, 31 May 2015 09:10:50 +0000
Message-ID: <C37D651A908B024F974696C65296B57B0F4D042B@SHSMSX101.ccr.corp.intel.com>
References: <1430720780-27525-1-git-send-email-changchun.ouyang@intel.com>
 <1432826207-8428-1-git-send-email-changchun.ouyang@intel.com>
 <1432826207-8428-6-git-send-email-changchun.ouyang@intel.com>
Mime-Version: 1.0
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: quoted-printable
To: "Ouyang, Changchun" <changchun.ouyang@intel.com>, "dev@dpdk.org"
 <dev@dpdk.org>
Return-path: <dev-bounces@dpdk.org>
Received: from mga03.intel.com (mga03.intel.com [134.134.136.65])
 by dpdk.org (Postfix) with ESMTP id 518042716
 for <dev@dpdk.org>; Sun, 31 May 2015 11:10:55 +0200 (CEST)
Content-Language: en-US
List-Id: patches and discussions about DPDK <dev.dpdk.org>
List-Unsubscribe: <http://dpdk.org/ml/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://dpdk.org/ml/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <http://dpdk.org/ml/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
Errors-To: dev-bounces@dpdk.org
Sender: "dev" <dev-bounces@dpdk.org>

virtio_dev_rx & scatter_rx & merge-able rx should be merged and the code=0A=
could be much simpler, unless there is special performance consideration.=
=0A=
=0A=
=0A=
On 5/28/2015 11:17 PM, Ouyang, Changchun wrote:=0A=
> Add support copying scattered mbuf to vring which is done by dev_scatter_=
rx,=0A=
> and check the 'next' pointer in mbuf on the fly to select suitable functi=
on to rx packets.=0A=
>=0A=
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>=0A=
> ---=0A=
>  lib/librte_vhost/vhost_rxtx.c | 116 ++++++++++++++++++++++++++++++++++++=
+++++-=0A=
>  1 file changed, 115 insertions(+), 1 deletion(-)=0A=
>=0A=
> diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.=
c=0A=
> index bb56ae1..3086bb4 100644=0A=
> --- a/lib/librte_vhost/vhost_rxtx.c=0A=
> +++ b/lib/librte_vhost/vhost_rxtx.c=0A=
> @@ -46,7 +46,8 @@=0A=
>   * This function adds buffers to the virtio devices RX virtqueue. Buffer=
s can=0A=
>   * be received from the physical port or from another virtio device. A p=
acket=0A=
>   * count is returned to indicate the number of packets that are succesfu=
lly=0A=
> - * added to the RX queue. This function works when mergeable is disabled=
.=0A=
> + * added to the RX queue. This function works when mergeable is disabled=
 and=0A=
> + * the mbuf is not scattered.=0A=
>   */=0A=
>  static inline uint32_t __attribute__((always_inline))=0A=
>  virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,=0A=
> @@ -447,6 +448,103 @@ fill_buf_vec(struct vhost_virtqueue *vq, uint16_t i=
d, uint32_t *vec_idx)=0A=
>  }=0A=
>  =0A=
>  /*=0A=
> + * This function works for scatter-gather RX.=0A=
> + */=0A=
> +static inline uint32_t __attribute__((always_inline))=0A=
> +virtio_dev_scatter_rx(struct virtio_net *dev, uint16_t queue_id,=0A=
> +	struct rte_mbuf **pkts, uint32_t count)=0A=
> +{=0A=
> +	struct vhost_virtqueue *vq;=0A=
> +	uint32_t pkt_idx =3D 0, entry_success =3D 0;=0A=
> +	uint16_t avail_idx;=0A=
> +	uint16_t res_base_idx, res_end_idx;=0A=
> +	uint8_t success =3D 0;=0A=
> +=0A=
> +	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_scatter_rx()\n",=0A=
> +		dev->device_fh);=0A=
use __func__=0A=
> +	if (unlikely(queue_id !=3D VIRTIO_RXQ))=0A=
> +		LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");=0A=
> +=0A=
> +	vq =3D dev->virtqueue[VIRTIO_RXQ];=0A=
> +	count =3D RTE_MIN((uint32_t)MAX_PKT_BURST, count);=0A=
> +=0A=
> +	if (count =3D=3D 0)=0A=
> +		return 0;=0A=
> +=0A=
> +	for (pkt_idx =3D 0; pkt_idx < count; pkt_idx++) {=0A=
> +		uint32_t secure_len =3D 0;=0A=
> +		uint32_t vec_idx =3D 0;=0A=
> +		uint32_t pkt_len =3D pkts[pkt_idx]->pkt_len + vq->vhost_hlen;=0A=
> +=0A=
> +		do {=0A=
> +			/*=0A=
> +			 * As many data cores may want access to available=0A=
> +			 * buffers, they need to be reserved.=0A=
> +			 */=0A=
> +			res_base_idx =3D vq->last_used_idx_res;=0A=
> +			avail_idx =3D *((volatile uint16_t *)&vq->avail->idx);=0A=
> +=0A=
> +			if (unlikely(res_base_idx =3D=3D avail_idx)) {=0A=
> +				LOG_DEBUG(VHOST_DATA,=0A=
> +					"(%"PRIu64") Failed "=0A=
> +					"to get enough desc from "=0A=
> +					"vring\n",=0A=
> +					dev->device_fh);=0A=
> +				return pkt_idx;=0A=
> +			} else {=0A=
> +				uint16_t wrapped_idx =3D=0A=
> +					(res_base_idx) & (vq->size - 1);=0A=
> +				uint32_t idx =3D vq->avail->ring[wrapped_idx];=0A=
> +=0A=
> +				update_secure_len(vq, idx, &secure_len);=0A=
> +			}=0A=
> +=0A=
> +			if (pkt_len > secure_len) {=0A=
> +				LOG_DEBUG(VHOST_DATA,=0A=
> +					"(%"PRIu64") Failed "=0A=
> +					"to get enough desc from "=0A=
> +					"vring\n",=0A=
> +					dev->device_fh);=0A=
> +				return pkt_idx;=0A=
> +			}=0A=
The behavior for virtio_dev_rx and virtio_dev_merge_rx is totally=0A=
different. I think they should behave in the same way.=0A=
virtio_dev_rx updates used->len to zero while this one returns immediately.=
=0A=
=0A=
Besides, with this implementation, if the caller retransmit the=0A=
mbuf(which has pkt_len larger the secure_len), it will enter endless loop.=
=0A=
=0A=
> +=0A=
> +			/* vq->last_used_idx_res is atomically updated. */=0A=
> +			success =3D rte_atomic16_cmpset(&vq->last_used_idx_res,=0A=
> +							res_base_idx,=0A=
> +							res_base_idx + 1);=0A=
> +		} while (success =3D=3D 0);=0A=
=0A=
Here the behavior becomes different again in reserving vring entries.=0A=
=0A=
> +=0A=
> +		fill_buf_vec(vq, res_base_idx, &vec_idx);=0A=
> +=0A=
> +		res_end_idx =3D res_base_idx + 1;=0A=
> +=0A=
> +		entry_success =3D copy_from_mbuf_to_vring(dev, res_base_idx,=0A=
> +			res_end_idx, pkts[pkt_idx]);=0A=
> +=0A=
> +		rte_compiler_barrier();=0A=
> +=0A=
> +		/*=0A=
> +		 * Wait until it's our turn to add our buffer=0A=
> +		 * to the used ring.=0A=
> +		 */=0A=
> +		while (unlikely(vq->last_used_idx !=3D res_base_idx))=0A=
> +			rte_pause();=0A=
> +=0A=
> +		*(volatile uint16_t *)&vq->used->idx +=3D entry_success;=0A=
> +		vq->last_used_idx =3D res_end_idx;=0A=
> +=0A=
> +		/* flush used->idx update before we read avail->flags. */=0A=
> +		rte_mb();=0A=
> +=0A=
> +		/* Kick the guest if necessary. */=0A=
> +		if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))=0A=
> +			eventfd_write((int)vq->callfd, 1);=0A=
> +	}=0A=
> +=0A=
> +	return count;=0A=
> +}=0A=
> +=0A=
> +/*=0A=
>   * This function works for mergeable RX.=0A=
>   */=0A=
>  static inline uint32_t __attribute__((always_inline))=0A=
> @@ -545,12 +643,28 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_=
t queue_id,=0A=
>  	return count;=0A=
>  }=0A=
>  =0A=
> +/*=0A=
> + * Return 1 if any mbuf is scattered, otherwise return 0.=0A=
> + */=0A=
> +static inline uint32_t __attribute__((always_inline))=0A=
> +check_scatter(struct rte_mbuf **pkts, uint16_t count)=0A=
> +{=0A=
> +	uint32_t i;=0A=
> +	for (i =3D 0; i < count; i++) {=0A=
> +		if (pkts[i]->next !=3D NULL)=0A=
> +			return 1;=0A=
> +	}=0A=
> +	return 0;=0A=
> +}=0A=
> +=0A=
>  uint16_t=0A=
>  rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,=0A=
>  	struct rte_mbuf **pkts, uint16_t count)=0A=
>  {=0A=
>  	if (unlikely(dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)))=0A=
>  		return virtio_dev_merge_rx(dev, queue_id, pkts, count);=0A=
> +	else if (unlikely(check_scatter(pkts, count) =3D=3D 1))=0A=
> +		return virtio_dev_scatter_rx(dev, queue_id, pkts, count);=0A=
>  	else=0A=
>  		return virtio_dev_rx(dev, queue_id, pkts, count);=0A=
>  }=0A=
=0A=