All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
From: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
To: dev@dpdk.org
Cc: Felix Marti <felix@chelsio.com>,
	Kumar Sanghvi <kumaras@chelsio.com>,
	Nirranjan Kirubaharan <nirranjan@chelsio.com>
Subject: [PATCH v3 3/9] cxgbe: add device configuration and RX support for cxgbe PMD.
Date: Thu, 18 Jun 2015 17:47:05 +0530	[thread overview]
Message-ID: <1221eb246c993a64b88350542ac9198be69876d9.1434628361.git.rahul.lakkireddy@chelsio.com> (raw)
In-Reply-To: <cover.1434628361.git.rahul.lakkireddy@chelsio.com>
In-Reply-To: <cover.1434628361.git.rahul.lakkireddy@chelsio.com>

Adds RX support for the cxgbe poll mode driver.  This patch:

1. Adds rx queue related eth_dev_ops.
2. Adds RSS support.
3. Adds dev_configure() and dev_infos_get() eth_dev_ops.
4. Adds rx_pkt_burst for receiving packets.

Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Signed-off-by: Kumar Sanghvi <kumaras@chelsio.com>
---
v3:
- No changes.

v2:
- This patch is a subset of patch 2/5 submitted in v1.
- Cleanup some RX related macros and code.

 drivers/net/cxgbe/cxgbe.h        |   6 +
 drivers/net/cxgbe/cxgbe_ethdev.c | 183 ++++++++
 drivers/net/cxgbe/cxgbe_main.c   | 350 +++++++++++++++
 drivers/net/cxgbe/sge.c          | 915 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 1454 insertions(+)

diff --git a/drivers/net/cxgbe/cxgbe.h b/drivers/net/cxgbe/cxgbe.h
index 44d48dc..90d1db0 100644
--- a/drivers/net/cxgbe/cxgbe.h
+++ b/drivers/net/cxgbe/cxgbe.h
@@ -44,5 +44,11 @@
 #define CXGBE_DEFAULT_RX_DESC_SIZE    1024 /* Default RX ring size */
 
 int cxgbe_probe(struct adapter *adapter);
+void init_rspq(struct adapter *adap, struct sge_rspq *q, unsigned int us,
+	       unsigned int cnt, unsigned int size, unsigned int iqe_size);
+int setup_sge_fwevtq(struct adapter *adapter);
+void cfg_queues(struct rte_eth_dev *eth_dev);
+int cfg_queue_count(struct rte_eth_dev *eth_dev);
+int setup_rss(struct port_info *pi);
 
 #endif /* _CXGBE_H_ */
diff --git a/drivers/net/cxgbe/cxgbe_ethdev.c b/drivers/net/cxgbe/cxgbe_ethdev.c
index 30d39b4..1c69973 100644
--- a/drivers/net/cxgbe/cxgbe_ethdev.c
+++ b/drivers/net/cxgbe/cxgbe_ethdev.c
@@ -85,7 +85,189 @@
  */
 #include "t4_pci_id_tbl.h"
 
+static uint16_t cxgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+				uint16_t nb_pkts)
+{
+	struct sge_eth_rxq *rxq = (struct sge_eth_rxq *)rx_queue;
+	unsigned int work_done;
+
+	CXGBE_DEBUG_RX(adapter, "%s: rxq->rspq.cntxt_id = %u; nb_pkts = %d\n",
+		       __func__, rxq->rspq.cntxt_id, nb_pkts);
+
+	if (cxgbe_poll(&rxq->rspq, rx_pkts, (unsigned int)nb_pkts, &work_done))
+		dev_err(adapter, "error in cxgbe poll\n");
+
+	CXGBE_DEBUG_RX(adapter, "%s: work_done = %u\n", __func__, work_done);
+	return work_done;
+}
+
+static void cxgbe_dev_info_get(struct rte_eth_dev *eth_dev,
+			       struct rte_eth_dev_info *device_info)
+{
+	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
+	struct adapter *adapter = pi->adapter;
+	int max_queues = adapter->sge.max_ethqsets / adapter->params.nports;
+
+	device_info->min_rx_bufsize = 68; /* XXX: Smallest pkt size */
+	device_info->max_rx_pktlen = 1500; /* XXX: For now we support mtu */
+	device_info->max_rx_queues = max_queues;
+	device_info->max_tx_queues = max_queues;
+	device_info->max_mac_addrs = 1;
+	/* XXX: For now we support one MAC/port */
+	device_info->max_vfs = adapter->params.arch.vfcount;
+	device_info->max_vmdq_pools = 0; /* XXX: For now no support for VMDQ */
+
+	device_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP |
+				       DEV_RX_OFFLOAD_IPV4_CKSUM |
+				       DEV_RX_OFFLOAD_UDP_CKSUM |
+				       DEV_RX_OFFLOAD_TCP_CKSUM;
+
+	device_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT |
+				       DEV_TX_OFFLOAD_IPV4_CKSUM |
+				       DEV_TX_OFFLOAD_UDP_CKSUM |
+				       DEV_TX_OFFLOAD_TCP_CKSUM |
+				       DEV_TX_OFFLOAD_TCP_TSO;
+
+	device_info->reta_size = pi->rss_size;
+}
+
+static int cxgbe_dev_rx_queue_start(struct rte_eth_dev *eth_dev,
+				    uint16_t tx_queue_id);
+static void cxgbe_dev_rx_queue_release(void *q);
+
+static int cxgbe_dev_configure(struct rte_eth_dev *eth_dev)
+{
+	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
+	struct adapter *adapter = pi->adapter;
+	int err;
+
+	CXGBE_FUNC_TRACE();
+
+	if (!(adapter->flags & FW_QUEUE_BOUND)) {
+		err = setup_sge_fwevtq(adapter);
+		if (err)
+			return err;
+		adapter->flags |= FW_QUEUE_BOUND;
+	}
+
+	err = cfg_queue_count(eth_dev);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int cxgbe_dev_rx_queue_start(struct rte_eth_dev *eth_dev,
+				    uint16_t rx_queue_id)
+{
+	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
+	struct adapter *adap = pi->adapter;
+	struct sge_rspq *q;
+
+	dev_debug(adapter, "%s: pi->port_id = %d; rx_queue_id = %d\n",
+		  __func__, pi->port_id, rx_queue_id);
+
+	q = eth_dev->data->rx_queues[rx_queue_id];
+	return t4_sge_eth_rxq_start(adap, q);
+}
+
+static int cxgbe_dev_rx_queue_stop(struct rte_eth_dev *eth_dev,
+				   uint16_t rx_queue_id)
+{
+	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
+	struct adapter *adap = pi->adapter;
+	struct sge_rspq *q;
+
+	dev_debug(adapter, "%s: pi->port_id = %d; rx_queue_id = %d\n",
+		  __func__, pi->port_id, rx_queue_id);
+
+	q = eth_dev->data->rx_queues[rx_queue_id];
+	return t4_sge_eth_rxq_stop(adap, q);
+}
+
+static int cxgbe_dev_rx_queue_setup(struct rte_eth_dev *eth_dev,
+				    uint16_t queue_idx,	uint16_t nb_desc,
+				    unsigned int socket_id,
+				    const struct rte_eth_rxconf *rx_conf,
+				    struct rte_mempool *mp)
+{
+	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
+	struct adapter *adapter = pi->adapter;
+	struct sge *s = &adapter->sge;
+	struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset + queue_idx];
+	int err = 0;
+	int msi_idx = 0;
+	unsigned int temp_nb_desc;
+
+	RTE_SET_USED(rx_conf);
+
+	dev_debug(adapter, "%s: eth_dev->data->nb_rx_queues = %d; queue_idx = %d; nb_desc = %d; socket_id = %d; mp = %p\n",
+		  __func__, eth_dev->data->nb_rx_queues, queue_idx, nb_desc,
+		  socket_id, mp);
+
+	/*  Free up the existing queue  */
+	if (eth_dev->data->rx_queues[queue_idx]) {
+		cxgbe_dev_rx_queue_release(eth_dev->data->rx_queues[queue_idx]);
+		eth_dev->data->rx_queues[queue_idx] = NULL;
+	}
+
+	eth_dev->data->rx_queues[queue_idx] = (void *)rxq;
+
+	/* Sanity Checking
+	 *
+	 * nb_desc should be > 0 and <= CXGBE_MAX_RING_DESC_SIZE
+	 */
+	temp_nb_desc = nb_desc;
+	if (nb_desc < CXGBE_MIN_RING_DESC_SIZE) {
+		dev_warn(adapter, "%s: number of descriptors must be >= %d. Using default [%d]\n",
+			 __func__, CXGBE_MIN_RING_DESC_SIZE,
+			 CXGBE_DEFAULT_RX_DESC_SIZE);
+		temp_nb_desc = CXGBE_DEFAULT_RX_DESC_SIZE;
+	} else if (nb_desc > CXGBE_MAX_RING_DESC_SIZE) {
+		dev_err(adapter, "%s: number of descriptors must be between %d and %d inclusive. Default [%d]\n",
+			__func__, CXGBE_MIN_RING_DESC_SIZE,
+			CXGBE_MAX_RING_DESC_SIZE, CXGBE_DEFAULT_RX_DESC_SIZE);
+		return -(EINVAL);
+	}
+
+	rxq->rspq.size = temp_nb_desc;
+	if ((&rxq->fl) != NULL)
+		rxq->fl.size = temp_nb_desc;
+
+	err = t4_sge_alloc_rxq(adapter, &rxq->rspq, false, eth_dev, msi_idx,
+			       &rxq->fl, t4_ethrx_handler,
+			       t4_get_mps_bg_map(adapter, pi->tx_chan), mp,
+			       queue_idx, socket_id);
+
+	dev_debug(adapter, "%s: err = %d; port_id = %d; cntxt_id = %u\n",
+		  __func__, err, pi->port_id, rxq->rspq.cntxt_id);
+	return err;
+}
+
+static void cxgbe_dev_rx_queue_release(void *q)
+{
+	struct sge_eth_rxq *rxq = (struct sge_eth_rxq *)q;
+	struct sge_rspq *rq = &rxq->rspq;
+
+	if (rq) {
+		struct port_info *pi = (struct port_info *)
+				       (rq->eth_dev->data->dev_private);
+		struct adapter *adap = pi->adapter;
+
+		dev_debug(adapter, "%s: pi->port_id = %d; rx_queue_id = %d\n",
+			  __func__, pi->port_id, rxq->rspq.cntxt_id);
+
+		t4_sge_eth_rxq_release(adap, rxq);
+	}
+}
+
 static struct eth_dev_ops cxgbe_eth_dev_ops = {
+	.dev_configure		= cxgbe_dev_configure,
+	.dev_infos_get		= cxgbe_dev_info_get,
+	.rx_queue_setup         = cxgbe_dev_rx_queue_setup,
+	.rx_queue_start		= cxgbe_dev_rx_queue_start,
+	.rx_queue_stop		= cxgbe_dev_rx_queue_stop,
+	.rx_queue_release	= cxgbe_dev_rx_queue_release,
 };
 
 /*
@@ -103,6 +285,7 @@ static int eth_cxgbe_dev_init(struct rte_eth_dev *eth_dev)
 	CXGBE_FUNC_TRACE();
 
 	eth_dev->dev_ops = &cxgbe_eth_dev_ops;
+	eth_dev->rx_pkt_burst = &cxgbe_recv_pkts;
 
 	/* for secondary processes, we don't initialise any further as primary
 	 * has already done this work.
diff --git a/drivers/net/cxgbe/cxgbe_main.c b/drivers/net/cxgbe/cxgbe_main.c
index 6f3a6db..abcef6b 100644
--- a/drivers/net/cxgbe/cxgbe_main.c
+++ b/drivers/net/cxgbe/cxgbe_main.c
@@ -67,6 +67,249 @@
 #include "t4_msg.h"
 #include "cxgbe.h"
 
+/*
+ * Response queue handler for the FW event queue.
+ */
+static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
+			  __rte_unused const struct pkt_gl *gl)
+{
+	u8 opcode = ((const struct rss_header *)rsp)->opcode;
+
+	rsp++;                                          /* skip RSS header */
+
+	/*
+	 * FW can send EGR_UPDATEs encapsulated in a CPL_FW4_MSG.
+	 */
+	if (unlikely(opcode == CPL_FW4_MSG &&
+		     ((const struct cpl_fw4_msg *)rsp)->type ==
+		      FW_TYPE_RSSCPL)) {
+		rsp++;
+		opcode = ((const struct rss_header *)rsp)->opcode;
+		rsp++;
+		if (opcode != CPL_SGE_EGR_UPDATE) {
+			dev_err(q->adapter, "unexpected FW4/CPL %#x on FW event queue\n",
+				opcode);
+			goto out;
+		}
+	}
+
+	if (likely(opcode == CPL_SGE_EGR_UPDATE)) {
+		/* do nothing */
+	} else if (opcode == CPL_FW6_MSG || opcode == CPL_FW4_MSG) {
+		const struct cpl_fw6_msg *msg = (const void *)rsp;
+
+		t4_handle_fw_rpl(q->adapter, msg->data);
+	} else {
+		dev_err(adapter, "unexpected CPL %#x on FW event queue\n",
+			opcode);
+	}
+out:
+	return 0;
+}
+
+int setup_sge_fwevtq(struct adapter *adapter)
+{
+	struct sge *s = &adapter->sge;
+	int err = 0;
+	int msi_idx = 0;
+
+	err = t4_sge_alloc_rxq(adapter, &s->fw_evtq, true, adapter->eth_dev,
+			       msi_idx, NULL, fwevtq_handler, -1, NULL, 0,
+			       rte_socket_id());
+	return err;
+}
+
+static int closest_timer(const struct sge *s, int time)
+{
+	unsigned int i, match = 0;
+	int delta, min_delta = INT_MAX;
+
+	for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
+		delta = time - s->timer_val[i];
+		if (delta < 0)
+			delta = -delta;
+		if (delta < min_delta) {
+			min_delta = delta;
+			match = i;
+		}
+	}
+	return match;
+}
+
+static int closest_thres(const struct sge *s, int thres)
+{
+	unsigned int i, match = 0;
+	int delta, min_delta = INT_MAX;
+
+	for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
+		delta = thres - s->counter_val[i];
+		if (delta < 0)
+			delta = -delta;
+		if (delta < min_delta) {
+			min_delta = delta;
+			match = i;
+		}
+	}
+	return match;
+}
+
+/**
+ * cxgb4_set_rspq_intr_params - set a queue's interrupt holdoff parameters
+ * @q: the Rx queue
+ * @us: the hold-off time in us, or 0 to disable timer
+ * @cnt: the hold-off packet count, or 0 to disable counter
+ *
+ * Sets an Rx queue's interrupt hold-off time and packet count.  At least
+ * one of the two needs to be enabled for the queue to generate interrupts.
+ */
+int cxgb4_set_rspq_intr_params(struct sge_rspq *q, unsigned int us,
+			       unsigned int cnt)
+{
+	struct adapter *adap = q->adapter;
+	unsigned int timer_val;
+
+	if (cnt) {
+		int err;
+		u32 v, new_idx;
+
+		new_idx = closest_thres(&adap->sge, cnt);
+		if (q->desc && q->pktcnt_idx != new_idx) {
+			/* the queue has already been created, update it */
+			v = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
+			    V_FW_PARAMS_PARAM_X(
+			    FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) |
+			    V_FW_PARAMS_PARAM_YZ(q->cntxt_id);
+			err = t4_set_params(adap, adap->mbox, adap->pf, 0, 1,
+					    &v, &new_idx);
+			if (err)
+				return err;
+		}
+		q->pktcnt_idx = new_idx;
+	}
+
+	timer_val = (us == 0) ? X_TIMERREG_RESTART_COUNTER :
+				closest_timer(&adap->sge, us);
+
+	if ((us | cnt) == 0)
+		q->intr_params = V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX);
+	else
+		q->intr_params = V_QINTR_TIMER_IDX(timer_val) |
+				 V_QINTR_CNT_EN(cnt > 0);
+	return 0;
+}
+
+static inline bool is_x_1g_port(const struct link_config *lc)
+{
+	return ((lc->supported & FW_PORT_CAP_SPEED_1G) != 0);
+}
+
+static inline bool is_x_10g_port(const struct link_config *lc)
+{
+	return ((lc->supported & FW_PORT_CAP_SPEED_10G) != 0 ||
+		(lc->supported & FW_PORT_CAP_SPEED_40G) != 0 ||
+		(lc->supported & FW_PORT_CAP_SPEED_100G) != 0);
+}
+
+inline void init_rspq(struct adapter *adap, struct sge_rspq *q,
+		      unsigned int us, unsigned int cnt,
+		      unsigned int size, unsigned int iqe_size)
+{
+	q->adapter = adap;
+	cxgb4_set_rspq_intr_params(q, us, cnt);
+	q->iqe_len = iqe_size;
+	q->size = size;
+}
+
+int cfg_queue_count(struct rte_eth_dev *eth_dev)
+{
+	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
+	struct adapter *adap = pi->adapter;
+	struct sge *s = &adap->sge;
+	unsigned int max_queues = s->max_ethqsets / adap->params.nports;
+
+	if ((eth_dev->data->nb_rx_queues < 1) ||
+	    (eth_dev->data->nb_tx_queues < 1))
+		return -EINVAL;
+
+	if ((eth_dev->data->nb_rx_queues > max_queues) ||
+	    (eth_dev->data->nb_tx_queues > max_queues))
+		return -EINVAL;
+
+	if (eth_dev->data->nb_rx_queues > pi->rss_size)
+		return -EINVAL;
+
+	/* We must configure RSS, since config has changed*/
+	pi->flags &= ~PORT_RSS_DONE;
+
+	pi->n_rx_qsets = eth_dev->data->nb_rx_queues;
+	pi->n_tx_qsets = eth_dev->data->nb_tx_queues;
+
+	return 0;
+}
+
+void cfg_queues(struct rte_eth_dev *eth_dev)
+{
+	struct rte_config *config = rte_eal_get_configuration();
+	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
+	struct adapter *adap = pi->adapter;
+	struct sge *s = &adap->sge;
+	unsigned int i, nb_ports = 0, qidx = 0;
+	unsigned int q_per_port = 0;
+
+	if (!(adap->flags & CFG_QUEUES)) {
+		for_each_port(adap, i) {
+			struct port_info *tpi = adap2pinfo(adap, i);
+
+			nb_ports += (is_x_10g_port(&tpi->link_cfg)) ||
+				     is_x_1g_port(&tpi->link_cfg) ? 1 : 0;
+		}
+
+		/*
+		 * We default up to # of cores queues per 1G/10G port.
+		 */
+		if (nb_ports)
+			q_per_port = (MAX_ETH_QSETS -
+				     (adap->params.nports - nb_ports)) /
+				     nb_ports;
+
+		if (q_per_port > config->lcore_count)
+			q_per_port = config->lcore_count;
+
+		for_each_port(adap, i) {
+			struct port_info *pi = adap2pinfo(adap, i);
+
+			pi->first_qset = qidx;
+
+			/* Initially n_rx_qsets == n_tx_qsets */
+			pi->n_rx_qsets = (is_x_10g_port(&pi->link_cfg) ||
+					  is_x_1g_port(&pi->link_cfg)) ?
+					  q_per_port : 1;
+			pi->n_tx_qsets = pi->n_rx_qsets;
+
+			if (pi->n_rx_qsets > pi->rss_size)
+				pi->n_rx_qsets = pi->rss_size;
+
+			qidx += pi->n_rx_qsets;
+		}
+
+		s->max_ethqsets = qidx;
+
+		for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) {
+			struct sge_eth_rxq *r = &s->ethrxq[i];
+
+			init_rspq(adap, &r->rspq, 0, 0, 1024, 64);
+			r->usembufs = 1;
+			r->fl.size = (r->usembufs ? 1024 : 72);
+		}
+
+		for (i = 0; i < ARRAY_SIZE(s->ethtxq); i++)
+			s->ethtxq[i].q.size = 1024;
+
+		init_rspq(adap, &adap->sge.fw_evtq, 0, 0, 1024, 64);
+		adap->flags |= CFG_QUEUES;
+	}
+}
+
 static void setup_memwin(struct adapter *adap)
 {
 	u32 mem_win0_base;
@@ -89,6 +332,25 @@ static void setup_memwin(struct adapter *adap)
 					MEMWIN_NIC));
 }
 
+static int init_rss(struct adapter *adap)
+{
+	unsigned int i;
+	int err;
+
+	err = t4_init_rss_mode(adap, adap->mbox);
+	if (err)
+		return err;
+
+	for_each_port(adap, i) {
+		struct port_info *pi = adap2pinfo(adap, i);
+
+		pi->rss = rte_zmalloc(NULL, pi->rss_size, 0);
+		if (!pi->rss)
+			return -ENOMEM;
+	}
+	return 0;
+}
+
 static void print_port_info(struct adapter *adap)
 {
 	int i;
@@ -564,6 +826,87 @@ void t4_os_portmod_changed(const struct adapter *adap, int port_id)
 			 pi->port_id, pi->mod_type);
 }
 
+/**
+ * cxgb4_write_rss - write the RSS table for a given port
+ * @pi: the port
+ * @queues: array of queue indices for RSS
+ *
+ * Sets up the portion of the HW RSS table for the port's VI to distribute
+ * packets to the Rx queues in @queues.
+ */
+int cxgb4_write_rss(const struct port_info *pi, const u16 *queues)
+{
+	u16 *rss;
+	int i, err;
+	struct adapter *adapter = pi->adapter;
+	const struct sge_eth_rxq *rxq;
+
+	/*  Should never be called before setting up sge eth rx queues */
+	BUG_ON(!(adapter->flags & FULL_INIT_DONE));
+
+	rxq = &adapter->sge.ethrxq[pi->first_qset];
+	rss = rte_zmalloc(NULL, pi->rss_size * sizeof(u16), 0);
+	if (!rss)
+		return -ENOMEM;
+
+	/* map the queue indices to queue ids */
+	for (i = 0; i < pi->rss_size; i++, queues++)
+		rss[i] = rxq[*queues].rspq.abs_id;
+
+	err = t4_config_rss_range(adapter, adapter->pf, pi->viid, 0,
+				  pi->rss_size, rss, pi->rss_size);
+	/*
+	 * If Tunnel All Lookup isn't specified in the global RSS
+	 * Configuration, then we need to specify a default Ingress
+	 * Queue for any ingress packets which aren't hashed.  We'll
+	 * use our first ingress queue ...
+	 */
+	if (!err)
+		err = t4_config_vi_rss(adapter, adapter->mbox, pi->viid,
+				       F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN |
+				       F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN |
+				       F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN |
+				       F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN |
+				       F_FW_RSS_VI_CONFIG_CMD_UDPEN,
+				       rss[0]);
+	rte_free(rss);
+	return err;
+}
+
+/**
+ * setup_rss - configure RSS
+ * @adapter: the adapter
+ *
+ * Sets up RSS to distribute packets to multiple receive queues.  We
+ * configure the RSS CPU lookup table to distribute to the number of HW
+ * receive queues, and the response queue lookup table to narrow that
+ * down to the response queues actually configured for each port.
+ * We always configure the RSS mapping for all ports since the mapping
+ * table has plenty of entries.
+ */
+int setup_rss(struct port_info *pi)
+{
+	int j, err;
+	struct adapter *adapter = pi->adapter;
+
+	dev_debug(adapter, "%s:  pi->rss_size = %u; pi->n_rx_qsets = %u\n",
+		  __func__, pi->rss_size, pi->n_rx_qsets);
+
+	if (!pi->flags & PORT_RSS_DONE) {
+		if (adapter->flags & FULL_INIT_DONE) {
+			/* Fill default values with equal distribution */
+			for (j = 0; j < pi->rss_size; j++)
+				pi->rss[j] = j % pi->n_rx_qsets;
+
+			err = cxgb4_write_rss(pi, pi->rss);
+			if (err)
+				return err;
+			pi->flags |= PORT_RSS_DONE;
+		}
+	}
+	return 0;
+}
+
 int cxgbe_probe(struct adapter *adapter)
 {
 	struct port_info *pi;
@@ -662,6 +1005,7 @@ allocate_mac:
 		pi->eth_dev->data->dev_private = pi;
 		pi->eth_dev->driver = adapter->eth_dev->driver;
 		pi->eth_dev->dev_ops = adapter->eth_dev->dev_ops;
+		pi->eth_dev->rx_pkt_burst = adapter->eth_dev->rx_pkt_burst;
 		TAILQ_INIT(&pi->eth_dev->link_intr_cbs);
 
 		pi->eth_dev->data->mac_addrs = rte_zmalloc(name,
@@ -683,8 +1027,14 @@ allocate_mac:
 		}
 	}
 
+	cfg_queues(adapter->eth_dev);
+
 	print_port_info(adapter);
 
+	err = init_rss(adapter);
+	if (err)
+		goto out_free;
+
 	return 0;
 
 out_free:
diff --git a/drivers/net/cxgbe/sge.c b/drivers/net/cxgbe/sge.c
index 20da0fd..ff71be2 100644
--- a/drivers/net/cxgbe/sge.c
+++ b/drivers/net/cxgbe/sge.c
@@ -69,6 +69,13 @@
 #include "cxgbe.h"
 
 /*
+ * Max number of Rx buffers we replenish at a time.
+ */
+#define MAX_RX_REFILL 16U
+
+#define NOMEM_TMR_IDX (SGE_NTIMERS - 1)
+
+/*
  * Rx buffer sizes for "usembufs" Free List buffers (one ingress packet
  * per mbuf buffer).  We currently only support two sizes for 1500- and
  * 9000-byte MTUs. We could easily support more but there doesn't seem to be
@@ -118,6 +125,914 @@ enum {
 };
 
 /**
+ * fl_cap - return the capacity of a free-buffer list
+ * @fl: the FL
+ *
+ * Returns the capacity of a free-buffer list.  The capacity is less than
+ * the size because one descriptor needs to be left unpopulated, otherwise
+ * HW will think the FL is empty.
+ */
+static inline unsigned int fl_cap(const struct sge_fl *fl)
+{
+	return fl->size - 8;   /* 1 descriptor = 8 buffers */
+}
+
+/**
+ * fl_starving - return whether a Free List is starving.
+ * @adapter: pointer to the adapter
+ * @fl: the Free List
+ *
+ * Tests specified Free List to see whether the number of buffers
+ * available to the hardware has falled below our "starvation"
+ * threshold.
+ */
+static inline bool fl_starving(const struct adapter *adapter,
+			       const struct sge_fl *fl)
+{
+	const struct sge *s = &adapter->sge;
+
+	return fl->avail - fl->pend_cred <= s->fl_starve_thres;
+}
+
+static inline unsigned int get_buf_size(struct adapter *adapter,
+					const struct rx_sw_desc *d)
+{
+	struct sge *s = &adapter->sge;
+	unsigned int rx_buf_size_idx = d->dma_addr & RX_BUF_SIZE;
+	unsigned int buf_size;
+
+	switch (rx_buf_size_idx) {
+	case RX_SMALL_PG_BUF:
+		buf_size = PAGE_SIZE;
+		break;
+
+	case RX_LARGE_PG_BUF:
+		buf_size = PAGE_SIZE << s->fl_pg_order;
+		break;
+
+	case RX_SMALL_MTU_BUF:
+		buf_size = FL_MTU_SMALL_BUFSIZE(adapter);
+		break;
+
+	case RX_LARGE_MTU_BUF:
+		buf_size = FL_MTU_LARGE_BUFSIZE(adapter);
+		break;
+
+	default:
+		BUG_ON(1);
+		buf_size = 0; /* deal with bogus compiler warnings */
+		/* NOTREACHED */
+	}
+
+	return buf_size;
+}
+
+/**
+ * free_rx_bufs - free the Rx buffers on an SGE free list
+ * @q: the SGE free list to free buffers from
+ * @n: how many buffers to free
+ *
+ * Release the next @n buffers on an SGE free-buffer Rx queue.   The
+ * buffers must be made inaccessible to HW before calling this function.
+ */
+static void free_rx_bufs(struct sge_fl *q, int n)
+{
+	unsigned int cidx = q->cidx;
+	struct rx_sw_desc *d;
+
+	d = &q->sdesc[cidx];
+	while (n--) {
+		if (d->buf) {
+			rte_pktmbuf_free(d->buf);
+			d->buf = NULL;
+		}
+		++d;
+		if (++cidx == q->size) {
+			cidx = 0;
+			d = q->sdesc;
+		}
+		q->avail--;
+	}
+	q->cidx = cidx;
+}
+
+/**
+ * unmap_rx_buf - unmap the current Rx buffer on an SGE free list
+ * @q: the SGE free list
+ *
+ * Unmap the current buffer on an SGE free-buffer Rx queue.   The
+ * buffer must be made inaccessible to HW before calling this function.
+ *
+ * This is similar to @free_rx_bufs above but does not free the buffer.
+ * Do note that the FL still loses any further access to the buffer.
+ */
+static void unmap_rx_buf(struct sge_fl *q)
+{
+	if (++q->cidx == q->size)
+		q->cidx = 0;
+	q->avail--;
+}
+
+static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
+{
+	if (q->pend_cred >= 8) {
+		u32 val = adap->params.arch.sge_fl_db;
+
+		if (is_t4(adap->params.chip))
+			val |= V_PIDX(q->pend_cred / 8);
+		else
+			val |= V_PIDX_T5(q->pend_cred / 8);
+
+		/*
+		 * Make sure all memory writes to the Free List queue are
+		 * committed before we tell the hardware about them.
+		 */
+		wmb();
+
+		/*
+		 * If we don't have access to the new User Doorbell (T5+), use
+		 * the old doorbell mechanism; otherwise use the new BAR2
+		 * mechanism.
+		 */
+		if (unlikely(!q->bar2_addr)) {
+			t4_write_reg(adap, MYPF_REG(A_SGE_PF_KDOORBELL),
+				     val | V_QID(q->cntxt_id));
+		} else {
+			writel(val | V_QID(q->bar2_qid),
+			       (void *)((uintptr_t)q->bar2_addr +
+			       SGE_UDB_KDOORBELL));
+
+			/*
+			 * This Write memory Barrier will force the write to
+			 * the User Doorbell area to be flushed.
+			 */
+			wmb();
+		}
+		q->pend_cred &= 7;
+	}
+}
+
+static inline struct rte_mbuf *cxgbe_rxmbuf_alloc(struct rte_mempool *mp)
+{
+	struct rte_mbuf *m;
+
+	m = __rte_mbuf_raw_alloc(mp);
+	__rte_mbuf_sanity_check_raw(m, 0);
+	return m;
+}
+
+static inline void set_rx_sw_desc(struct rx_sw_desc *sd, void *buf,
+				  dma_addr_t mapping)
+{
+	sd->buf = buf;
+	sd->dma_addr = mapping;      /* includes size low bits */
+}
+
+/**
+ * refill_fl_usembufs - refill an SGE Rx buffer ring with mbufs
+ * @adap: the adapter
+ * @q: the ring to refill
+ * @n: the number of new buffers to allocate
+ *
+ * (Re)populate an SGE free-buffer queue with up to @n new packet buffers,
+ * allocated with the supplied gfp flags.  The caller must assure that
+ * @n does not exceed the queue's capacity.  If afterwards the queue is
+ * found critically low mark it as starving in the bitmap of starving FLs.
+ *
+ * Returns the number of buffers allocated.
+ */
+static unsigned int refill_fl_usembufs(struct adapter *adap, struct sge_fl *q,
+				       int n)
+{
+	struct sge_eth_rxq *rxq = container_of(q, struct sge_eth_rxq, fl);
+	unsigned int cred = q->avail;
+	__be64 *d = &q->desc[q->pidx];
+	struct rx_sw_desc *sd = &q->sdesc[q->pidx];
+	unsigned int buf_size_idx = RX_SMALL_MTU_BUF;
+
+	while (n--) {
+		struct rte_mbuf *mbuf = cxgbe_rxmbuf_alloc(rxq->rspq.mb_pool);
+		dma_addr_t mapping;
+
+		if (!mbuf) {
+			dev_debug(adap, "%s: mbuf alloc failed\n", __func__);
+			q->alloc_failed++;
+			rxq->rspq.eth_dev->data->rx_mbuf_alloc_failed++;
+			goto out;
+		}
+
+		mbuf->data_off = RTE_PKTMBUF_HEADROOM;
+		mbuf->next = NULL;
+
+		mapping = (dma_addr_t)(mbuf->buf_physaddr + mbuf->data_off);
+
+		mapping |= buf_size_idx;
+		*d++ = cpu_to_be64(mapping);
+		set_rx_sw_desc(sd, mbuf, mapping);
+		sd++;
+
+		q->avail++;
+		if (++q->pidx == q->size) {
+			q->pidx = 0;
+			sd = q->sdesc;
+			d = q->desc;
+		}
+	}
+
+out:    cred = q->avail - cred;
+	q->pend_cred += cred;
+	ring_fl_db(adap, q);
+
+	if (unlikely(fl_starving(adap, q))) {
+		/*
+		 * Make sure data has been written to free list
+		 */
+		wmb();
+		q->low++;
+	}
+
+	return cred;
+}
+
+/**
+ * refill_fl - refill an SGE Rx buffer ring with mbufs
+ * @adap: the adapter
+ * @q: the ring to refill
+ * @n: the number of new buffers to allocate
+ *
+ * (Re)populate an SGE free-buffer queue with up to @n new packet buffers,
+ * allocated with the supplied gfp flags.  The caller must assure that
+ * @n does not exceed the queue's capacity.  Returns the number of buffers
+ * allocated.
+ */
+static unsigned int refill_fl(struct adapter *adap, struct sge_fl *q, int n)
+{
+	return refill_fl_usembufs(adap, q, n);
+}
+
+static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
+{
+	refill_fl(adap, fl, min(MAX_RX_REFILL, fl_cap(fl) - fl->avail));
+}
+
+/**
+ * alloc_ring - allocate resources for an SGE descriptor ring
+ * @dev: the PCI device's core device
+ * @nelem: the number of descriptors
+ * @elem_size: the size of each descriptor
+ * @sw_size: the size of the SW state associated with each ring element
+ * @phys: the physical address of the allocated ring
+ * @metadata: address of the array holding the SW state for the ring
+ * @stat_size: extra space in HW ring for status information
+ * @node: preferred node for memory allocations
+ *
+ * Allocates resources for an SGE descriptor ring, such as Tx queues,
+ * free buffer lists, or response queues.  Each SGE ring requires
+ * space for its HW descriptors plus, optionally, space for the SW state
+ * associated with each HW entry (the metadata).  The function returns
+ * three values: the virtual address for the HW ring (the return value
+ * of the function), the bus address of the HW ring, and the address
+ * of the SW ring.
+ */
+static void *alloc_ring(size_t nelem, size_t elem_size,
+			size_t sw_size, dma_addr_t *phys, void *metadata,
+			size_t stat_size, __rte_unused uint16_t queue_id,
+			int socket_id, const char *z_name,
+			const char *z_name_sw)
+{
+	size_t len = CXGBE_MAX_RING_DESC_SIZE * elem_size + stat_size;
+	const struct rte_memzone *tz;
+	void *s = NULL;
+
+	dev_debug(adapter, "%s: nelem = %lu; elem_size = %lu; sw_size = %lu; "
+		  "stat_size = %lu; queue_id = %u; socket_id = %d; z_name = %s;"
+		  " z_name_sw = %s\n", __func__, nelem, elem_size, sw_size,
+		  stat_size, queue_id, socket_id, z_name, z_name_sw);
+
+	tz = rte_memzone_lookup(z_name);
+	if (tz) {
+		dev_debug(adapter, "%s: tz exists...returning existing..\n",
+			  __func__);
+		goto alloc_sw_ring;
+	}
+
+	/*
+	 * Allocate TX/RX ring hardware descriptors. A memzone large enough to
+	 * handle the maximum ring size is allocated in order to allow for
+	 * resizing in later calls to the queue setup function.
+	 */
+	tz = rte_memzone_reserve_aligned(z_name, len, socket_id, 0, 4096);
+	if (!tz)
+		return NULL;
+
+alloc_sw_ring:
+	memset(tz->addr, 0, len);
+	if (sw_size) {
+		s = rte_zmalloc_socket(z_name_sw, nelem * sw_size,
+				       RTE_CACHE_LINE_SIZE, socket_id);
+
+		if (!s) {
+			dev_err(adapter, "%s: failed to get sw_ring memory\n",
+				__func__);
+			return NULL;
+		}
+	}
+	if (metadata)
+		*(void **)metadata = s;
+
+	*phys = (uint64_t)tz->phys_addr;
+	return tz->addr;
+}
+
+/**
+ * t4_pktgl_to_mbuf_usembufs - build an mbuf from a packet gather list
+ * @gl: the gather list
+ *
+ * Builds an mbuf from the given packet gather list.  Returns the mbuf or
+ * %NULL if mbuf allocation failed.
+ */
+static struct rte_mbuf *t4_pktgl_to_mbuf_usembufs(const struct pkt_gl *gl)
+{
+	/*
+	 * If there's only one mbuf fragment, just return that.
+	 */
+	if (likely(gl->nfrags == 1))
+		return gl->mbufs[0];
+
+	return NULL;
+}
+
+/**
+ * t4_pktgl_to_mbuf - build an mbuf from a packet gather list
+ * @gl: the gather list
+ *
+ * Builds an mbuf from the given packet gather list.  Returns the mbuf or
+ * %NULL if mbuf allocation failed.
+ */
+static struct rte_mbuf *t4_pktgl_to_mbuf(const struct pkt_gl *gl)
+{
+	return t4_pktgl_to_mbuf_usembufs(gl);
+}
+
+#define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
+	((dma_addr_t) ((mb)->buf_physaddr + (mb)->data_off))
+
+/**
+ * t4_ethrx_handler - process an ingress ethernet packet
+ * @q: the response queue that received the packet
+ * @rsp: the response queue descriptor holding the RX_PKT message
+ * @si: the gather list of packet fragments
+ *
+ * Process an ingress ethernet packet and deliver it to the stack.
+ */
+int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
+		     const struct pkt_gl *si)
+{
+	struct rte_mbuf *mbuf;
+	const struct cpl_rx_pkt *pkt;
+	const struct rss_header *rss_hdr;
+	bool csum_ok;
+	struct sge_eth_rxq *rxq = container_of(q, struct sge_eth_rxq, rspq);
+
+	rss_hdr = (const void *)rsp;
+	pkt = (const void *)&rsp[1];
+	csum_ok = pkt->csum_calc && !pkt->err_vec;
+
+	mbuf = t4_pktgl_to_mbuf(si);
+	if (unlikely(!mbuf)) {
+		rxq->stats.rx_drops++;
+		return 0;
+	}
+
+	mbuf->port = pkt->iff;
+	if (pkt->l2info & htonl(F_RXF_IP)) {
+		mbuf->ol_flags |= PKT_RX_IPV4_HDR;
+		if (unlikely(!csum_ok))
+			mbuf->ol_flags |= PKT_RX_IP_CKSUM_BAD;
+
+		if ((pkt->l2info & htonl(F_RXF_UDP | F_RXF_TCP)) && !csum_ok)
+			mbuf->ol_flags |= PKT_RX_L4_CKSUM_BAD;
+	} else if (pkt->l2info & htonl(F_RXF_IP6)) {
+		mbuf->ol_flags |= PKT_RX_IPV6_HDR;
+	}
+
+	mbuf->port = pkt->iff;
+
+	if (!rss_hdr->filter_tid && rss_hdr->hash_type) {
+		mbuf->ol_flags |= PKT_RX_RSS_HASH;
+		mbuf->hash.rss = ntohl(rss_hdr->hash_val);
+	}
+
+	if (pkt->vlan_ex) {
+		mbuf->ol_flags |= PKT_RX_VLAN_PKT;
+		mbuf->vlan_tci = ntohs(pkt->vlan);
+	}
+	rxq->stats.pkts++;
+	rxq->stats.rx_bytes += mbuf->pkt_len;
+
+	return 0;
+}
+
+/**
+ * restore_rx_bufs - put back a packet's Rx buffers
+ * @q: the SGE free list
+ * @frags: number of FL buffers to restore
+ *
+ * Puts back on an FL the Rx buffers.  The buffers have already been
+ * unmapped and are left unmapped, we mark them so to prevent further
+ * unmapping attempts.
+ *
+ * This function undoes a series of @unmap_rx_buf calls when we find out
+ * that the current packet can't be processed right away afterall and we
+ * need to come back to it later.  This is a very rare event and there's
+ * no effort to make this particularly efficient.
+ */
+static void restore_rx_bufs(struct sge_fl *q, int frags)
+{
+	while (frags--) {
+		if (q->cidx == 0)
+			q->cidx = q->size - 1;
+		else
+			q->cidx--;
+		q->avail++;
+	}
+}
+
+/**
+ * is_new_response - check if a response is newly written
+ * @r: the response descriptor
+ * @q: the response queue
+ *
+ * Returns true if a response descriptor contains a yet unprocessed
+ * response.
+ */
+static inline bool is_new_response(const struct rsp_ctrl *r,
+				   const struct sge_rspq *q)
+{
+	return (r->u.type_gen >> S_RSPD_GEN) == q->gen;
+}
+
+#define CXGB4_MSG_AN ((void *)1)
+
+/**
+ * rspq_next - advance to the next entry in a response queue
+ * @q: the queue
+ *
+ * Updates the state of a response queue to advance it to the next entry.
+ */
+static inline void rspq_next(struct sge_rspq *q)
+{
+	q->cur_desc = (const __be64 *)((const char *)q->cur_desc + q->iqe_len);
+	if (unlikely(++q->cidx == q->size)) {
+		q->cidx = 0;
+		q->gen ^= 1;
+		q->cur_desc = q->desc;
+	}
+}
+
+/**
+ * process_responses - process responses from an SGE response queue
+ * @q: the ingress queue to process
+ * @budget: how many responses can be processed in this round
+ * @rx_pkts: mbuf to put the pkts
+ *
+ * Process responses from an SGE response queue up to the supplied budget.
+ * Responses include received packets as well as control messages from FW
+ * or HW.
+ *
+ * Additionally choose the interrupt holdoff time for the next interrupt
+ * on this queue.  If the system is under memory shortage use a fairly
+ * long delay to help recovery.
+ */
+static int process_responses(struct sge_rspq *q, int budget,
+			     struct rte_mbuf **rx_pkts)
+{
+	int ret = 0, rsp_type;
+	int budget_left = budget;
+	const struct rsp_ctrl *rc;
+	struct sge_eth_rxq *rxq = container_of(q, struct sge_eth_rxq, rspq);
+	struct adapter *adapter = q->adapter;
+
+	while (likely(budget_left)) {
+		rc = (const struct rsp_ctrl *)
+		     ((const char *)q->cur_desc + (q->iqe_len - sizeof(*rc)));
+
+		if (!is_new_response(rc, q))
+			break;
+
+		/*
+		 * Ensure response has been read
+		 */
+		rmb();
+		rsp_type = G_RSPD_TYPE(rc->u.type_gen);
+
+		if (likely(rsp_type == X_RSPD_TYPE_FLBUF)) {
+			struct pkt_gl si;
+			const struct rx_sw_desc *rsd;
+			struct rte_mbuf *pkt = NULL;
+			u32 len = ntohl(rc->pldbuflen_qid), bufsz, frags;
+
+			si.usembufs = rxq->usembufs;
+			/*
+			 * In "use mbufs" mode, we don't pack multiple
+			 * ingress packets per buffer (mbuf) so we
+			 * should _always_ get a "New Buffer" flags
+			 * from the SGE.  Also, since we hand the
+			 * mbuf's up to the host stack for it to
+			 * eventually free, we don't release the mbuf's
+			 * in the driver (in contrast to the "packed
+			 * page" mode where the driver needs to
+			 * release its reference on the page buffers).
+			 */
+			BUG_ON(!(len & F_RSPD_NEWBUF));
+			len = G_RSPD_LEN(len);
+			si.tot_len = len;
+
+			/* gather packet fragments */
+			for (frags = 0; len; frags++) {
+				rsd = &rxq->fl.sdesc[rxq->fl.cidx];
+				bufsz = min(get_buf_size(adapter, rsd),	len);
+				pkt = rsd->buf;
+				pkt->data_len = bufsz;
+				pkt->pkt_len = bufsz;
+				si.mbufs[frags] = pkt;
+				len -= bufsz;
+				unmap_rx_buf(&rxq->fl);
+			}
+
+			si.va = RTE_PTR_ADD(si.mbufs[0]->buf_addr,
+					    si.mbufs[0]->data_off);
+			rte_prefetch1(si.va);
+
+			/*
+			 * For the "use mbuf" case here, we can end up
+			 * chewing through our Free List very rapidly
+			 * with one entry per Ingress packet getting
+			 * consumed.  So if the handler() successfully
+			 * consumed the mbuf, check to see if we can
+			 * refill the Free List incrementally in the
+			 * loop ...
+			 */
+			si.nfrags = frags;
+			ret = q->handler(q, q->cur_desc, &si);
+
+			if (unlikely(ret != 0)) {
+				restore_rx_bufs(&rxq->fl, frags);
+			} else {
+				rx_pkts[budget - budget_left] = pkt;
+				if (fl_cap(&rxq->fl) - rxq->fl.avail >= 8)
+					__refill_fl(q->adapter, &rxq->fl);
+			}
+
+		} else if (likely(rsp_type == X_RSPD_TYPE_CPL)) {
+			ret = q->handler(q, q->cur_desc, NULL);
+		} else {
+			ret = q->handler(q, (const __be64 *)rc, CXGB4_MSG_AN);
+		}
+
+		if (unlikely(ret)) {
+			/* couldn't process descriptor, back off for recovery */
+			q->next_intr_params = V_QINTR_TIMER_IDX(NOMEM_TMR_IDX);
+			break;
+		}
+
+		rspq_next(q);
+		budget_left--;
+	}
+
+	/*
+	 * If this is a Response Queue with an associated Free List and
+	 * there's room for another chunk of new Free List buffer pointers,
+	 * refill the Free List.
+	 */
+
+	if (q->offset >= 0 && fl_cap(&rxq->fl) - rxq->fl.avail >= 8)
+		__refill_fl(q->adapter, &rxq->fl);
+
+	return budget - budget_left;
+}
+
+int cxgbe_poll(struct sge_rspq *q, struct rte_mbuf **rx_pkts,
+	       unsigned int budget, unsigned int *work_done)
+{
+	unsigned int params;
+	u32 val;
+	int err = 0;
+
+	*work_done = process_responses(q, budget, rx_pkts);
+	params = V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX);
+	q->next_intr_params = params;
+	val = V_CIDXINC(*work_done) | V_SEINTARM(params);
+
+	if (*work_done) {
+		/*
+		 * If we don't have access to the new User GTS (T5+),
+		 * use the old doorbell mechanism; otherwise use the new
+		 * BAR2 mechanism.
+		 */
+		if (unlikely(!q->bar2_addr))
+			t4_write_reg(q->adapter, MYPF_REG(A_SGE_PF_GTS),
+				     val | V_INGRESSQID((u32)q->cntxt_id));
+		else {
+			writel(val | V_INGRESSQID(q->bar2_qid),
+			       (void *)((uintptr_t)q->bar2_addr +
+			       SGE_UDB_GTS));
+			/*
+			 * This Write memory Barrier will force the write to
+			 * the User Doorbell area to be flushed.
+			 */
+			wmb();
+		}
+	}
+
+	return err;
+}
+
+/**
+ * bar2_address - return the BAR2 address for an SGE Queue's Registers
+ * @adapter: the adapter
+ * @qid: the SGE Queue ID
+ * @qtype: the SGE Queue Type (Egress or Ingress)
+ * @pbar2_qid: BAR2 Queue ID or 0 for Queue ID inferred SGE Queues
+ *
+ * Returns the BAR2 address for the SGE Queue Registers associated with
+ * @qid.  If BAR2 SGE Registers aren't available, returns NULL.  Also
+ * returns the BAR2 Queue ID to be used with writes to the BAR2 SGE
+ * Queue Registers.  If the BAR2 Queue ID is 0, then "Inferred Queue ID"
+ * Registers are supported (e.g. the Write Combining Doorbell Buffer).
+ */
+static void __iomem *bar2_address(struct adapter *adapter, unsigned int qid,
+				  enum t4_bar2_qtype qtype,
+				  unsigned int *pbar2_qid)
+{
+	u64 bar2_qoffset;
+	int ret;
+
+	ret = t4_bar2_sge_qregs(adapter, qid, qtype, &bar2_qoffset, pbar2_qid);
+	if (ret)
+		return NULL;
+
+	return adapter->bar2 + bar2_qoffset;
+}
+
+int t4_sge_eth_rxq_start(struct adapter *adap, struct sge_rspq *rq)
+{
+	struct sge_eth_rxq *rxq = container_of(rq, struct sge_eth_rxq, rspq);
+	unsigned int fl_id = rxq->fl.size ? rxq->fl.cntxt_id : 0xffff;
+
+	return t4_iq_start_stop(adap, adap->mbox, true, adap->pf, 0,
+				rq->cntxt_id, fl_id, 0xffff);
+}
+
+int t4_sge_eth_rxq_stop(struct adapter *adap, struct sge_rspq *rq)
+{
+	struct sge_eth_rxq *rxq = container_of(rq, struct sge_eth_rxq, rspq);
+	unsigned int fl_id = rxq->fl.size ? rxq->fl.cntxt_id : 0xffff;
+
+	return t4_iq_start_stop(adap, adap->mbox, false, adap->pf, 0,
+				rq->cntxt_id, fl_id, 0xffff);
+}
+
+/*
+ * @intr_idx: MSI/MSI-X vector if >=0, -(absolute qid + 1) if < 0
+ * @cong: < 0 -> no congestion feedback, >= 0 -> congestion channel map
+ */
+int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
+		     struct rte_eth_dev *eth_dev, int intr_idx,
+		     struct sge_fl *fl, rspq_handler_t hnd, int cong,
+		     struct rte_mempool *mp, int queue_id, int socket_id)
+{
+	int ret, flsz = 0;
+	struct fw_iq_cmd c;
+	struct sge *s = &adap->sge;
+	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
+	char z_name[RTE_MEMZONE_NAMESIZE];
+	char z_name_sw[RTE_MEMZONE_NAMESIZE];
+	unsigned int nb_refill;
+
+	/* Size needs to be multiple of 16, including status entry. */
+	iq->size = roundup(iq->size, 16);
+
+	snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
+		 eth_dev->driver->pci_drv.name, fwevtq ? "fwq_ring" : "rx_ring",
+		 eth_dev->data->port_id, queue_id);
+	snprintf(z_name_sw, sizeof(z_name_sw), "%s_sw_ring", z_name);
+
+	iq->desc = alloc_ring(iq->size, iq->iqe_len, 0, &iq->phys_addr, NULL, 0,
+			      queue_id, socket_id, z_name, z_name_sw);
+	if (!iq->desc)
+		return -ENOMEM;
+
+	memset(&c, 0, sizeof(c));
+	c.op_to_vfn = htonl(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST |
+			    F_FW_CMD_WRITE | F_FW_CMD_EXEC |
+			    V_FW_IQ_CMD_PFN(adap->pf) | V_FW_IQ_CMD_VFN(0));
+	c.alloc_to_len16 = htonl(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART |
+				 (sizeof(c) / 16));
+	c.type_to_iqandstindex =
+		htonl(V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) |
+		      V_FW_IQ_CMD_IQASYNCH(fwevtq) |
+		      V_FW_IQ_CMD_VIID(pi->viid) |
+		      V_FW_IQ_CMD_IQANDST(intr_idx < 0) |
+		      V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT) |
+		      V_FW_IQ_CMD_IQANDSTINDEX(intr_idx >= 0 ? intr_idx :
+							       -intr_idx - 1));
+	c.iqdroprss_to_iqesize =
+		htons(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) |
+		      F_FW_IQ_CMD_IQGTSMODE |
+		      V_FW_IQ_CMD_IQINTCNTTHRESH(iq->pktcnt_idx) |
+		      V_FW_IQ_CMD_IQESIZE(ilog2(iq->iqe_len) - 4));
+	c.iqsize = htons(iq->size);
+	c.iqaddr = cpu_to_be64(iq->phys_addr);
+	if (cong >= 0)
+		c.iqns_to_fl0congen = htonl(F_FW_IQ_CMD_IQFLINTCONGEN);
+
+	if (fl) {
+		struct sge_eth_rxq *rxq = container_of(fl, struct sge_eth_rxq,
+						       fl);
+		enum chip_type chip = CHELSIO_CHIP_VERSION(adap->params.chip);
+
+		/*
+		 * Allocate the ring for the hardware free list (with space
+		 * for its status page) along with the associated software
+		 * descriptor ring.  The free list size needs to be a multiple
+		 * of the Egress Queue Unit and at least 2 Egress Units larger
+		 * than the SGE's Egress Congrestion Threshold
+		 * (fl_starve_thres - 1).
+		 */
+		if (fl->size < s->fl_starve_thres - 1 + 2 * 8)
+			fl->size = s->fl_starve_thres - 1 + 2 * 8;
+		fl->size = roundup(fl->size, 8);
+
+		snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
+			 eth_dev->driver->pci_drv.name,
+			 fwevtq ? "fwq_ring" : "fl_ring",
+			 eth_dev->data->port_id, queue_id);
+		snprintf(z_name_sw, sizeof(z_name_sw), "%s_sw_ring", z_name);
+
+		fl->desc = alloc_ring(fl->size, sizeof(__be64),
+				      sizeof(struct rx_sw_desc),
+				      &fl->addr, &fl->sdesc, s->stat_len,
+				      queue_id, socket_id, z_name, z_name_sw);
+
+		if (!fl->desc)
+			goto fl_nomem;
+
+		flsz = fl->size / 8 + s->stat_len / sizeof(struct tx_desc);
+		c.iqns_to_fl0congen |=
+			htonl(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) |
+			      (unlikely(rxq->usembufs) ?
+			       0 : F_FW_IQ_CMD_FL0PACKEN) |
+			      F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO |
+			      F_FW_IQ_CMD_FL0PADEN);
+		if (cong >= 0)
+			c.iqns_to_fl0congen |=
+				htonl(V_FW_IQ_CMD_FL0CNGCHMAP(cong) |
+				      F_FW_IQ_CMD_FL0CONGCIF |
+				      F_FW_IQ_CMD_FL0CONGEN);
+
+		/* In T6, for egress queue type FL there is internal overhead
+		 * of 16B for header going into FLM module.
+		 * Hence maximum allowed burst size will be 448 bytes.
+		 */
+		c.fl0dcaen_to_fl0cidxfthresh =
+			htons(V_FW_IQ_CMD_FL0FBMIN(X_FETCHBURSTMIN_64B) |
+			      V_FW_IQ_CMD_FL0FBMAX((chip <= CHELSIO_T5) ?
+			      X_FETCHBURSTMAX_512B : X_FETCHBURSTMAX_256B));
+		c.fl0size = htons(flsz);
+		c.fl0addr = cpu_to_be64(fl->addr);
+	}
+
+	ret = t4_wr_mbox(adap, adap->mbox, &c, sizeof(c), &c);
+	if (ret)
+		goto err;
+
+	iq->cur_desc = iq->desc;
+	iq->cidx = 0;
+	iq->gen = 1;
+	iq->next_intr_params = iq->intr_params;
+	iq->cntxt_id = ntohs(c.iqid);
+	iq->abs_id = ntohs(c.physiqid);
+	iq->bar2_addr = bar2_address(adap, iq->cntxt_id, T4_BAR2_QTYPE_INGRESS,
+				     &iq->bar2_qid);
+	iq->size--;                           /* subtract status entry */
+	iq->eth_dev = eth_dev;
+	iq->handler = hnd;
+	iq->mb_pool = mp;
+
+	/* set offset to -1 to distinguish ingress queues without FL */
+	iq->offset = fl ? 0 : -1;
+
+	if (fl) {
+		fl->cntxt_id = ntohs(c.fl0id);
+		fl->avail = 0;
+		fl->pend_cred = 0;
+		fl->pidx = 0;
+		fl->cidx = 0;
+		fl->alloc_failed = 0;
+
+		/*
+		 * Note, we must initialize the BAR2 Free List User Doorbell
+		 * information before refilling the Free List!
+		 */
+		fl->bar2_addr = bar2_address(adap, fl->cntxt_id,
+					     T4_BAR2_QTYPE_EGRESS,
+					     &fl->bar2_qid);
+
+		nb_refill = refill_fl(adap, fl, fl_cap(fl));
+		if (nb_refill != fl_cap(fl)) {
+			ret = -ENOMEM;
+			dev_err(adap, "%s: mbuf alloc failed with error: %d\n",
+				__func__, ret);
+			goto refill_fl_err;
+		}
+	}
+
+	/*
+	 * For T5 and later we attempt to set up the Congestion Manager values
+	 * of the new RX Ethernet Queue.  This should really be handled by
+	 * firmware because it's more complex than any host driver wants to
+	 * get involved with and it's different per chip and this is almost
+	 * certainly wrong.  Formware would be wrong as well, but it would be
+	 * a lot easier to fix in one place ...  For now we do something very
+	 * simple (and hopefully less wrong).
+	 */
+	if (!is_t4(adap->params.chip) && cong >= 0) {
+		u32 param, val;
+		int i;
+
+		param = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
+			 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) |
+			 V_FW_PARAMS_PARAM_YZ(iq->cntxt_id));
+		if (cong == 0) {
+			val = V_CONMCTXT_CNGTPMODE(X_CONMCTXT_CNGTPMODE_QUEUE);
+		} else {
+			val = V_CONMCTXT_CNGTPMODE(
+					X_CONMCTXT_CNGTPMODE_CHANNEL);
+			for (i = 0; i < 4; i++) {
+				if (cong & (1 << i))
+					val |= V_CONMCTXT_CNGCHMAP(1 <<
+								   (i << 2));
+			}
+		}
+		ret = t4_set_params(adap, adap->mbox, adap->pf, 0, 1,
+				    &param, &val);
+		if (ret)
+			dev_warn(adap->pdev_dev, "Failed to set Congestion Manager Context for Ingress Queue %d: %d\n",
+				 iq->cntxt_id, -ret);
+	}
+
+	return 0;
+
+refill_fl_err:
+	t4_iq_free(adap, adap->mbox, adap->pf, 0, FW_IQ_TYPE_FL_INT_CAP,
+		   iq->cntxt_id, fl ? fl->cntxt_id : 0xffff, 0xffff);
+fl_nomem:
+	ret = -ENOMEM;
+err:
+	iq->cntxt_id = 0;
+	iq->abs_id = 0;
+	if (iq->desc)
+		iq->desc = NULL;
+
+	if (fl && fl->desc) {
+		rte_free(fl->sdesc);
+		fl->cntxt_id = 0;
+		fl->sdesc = NULL;
+		fl->desc = NULL;
+	}
+	return ret;
+}
+
+static void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq,
+			 struct sge_fl *fl)
+{
+	unsigned int fl_id = fl ? fl->cntxt_id : 0xffff;
+
+	t4_iq_free(adap, adap->mbox, adap->pf, 0, FW_IQ_TYPE_FL_INT_CAP,
+		   rq->cntxt_id, fl_id, 0xffff);
+	rq->cntxt_id = 0;
+	rq->abs_id = 0;
+	rq->desc = NULL;
+
+	if (fl) {
+		free_rx_bufs(fl, fl->avail);
+		rte_free(fl->sdesc);
+		fl->sdesc = NULL;
+		fl->cntxt_id = 0;
+		fl->desc = NULL;
+	}
+}
+
+void t4_sge_eth_rxq_release(struct adapter *adap, struct sge_eth_rxq *rxq)
+{
+	if (rxq->rspq.desc) {
+		t4_sge_eth_rxq_stop(adap, &rxq->rspq);
+		free_rspq_fl(adap, &rxq->rspq, rxq->fl.size ? &rxq->fl : NULL);
+	}
+}
+
+/**
  * t4_sge_init - initialize SGE
  * @adap: the adapter
  *
-- 
2.4.1

  parent reply	other threads:[~2015-06-18 12:18 UTC|newest]

Thread overview: 61+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-05-22 13:24 [PATCH 0/5] Chelsio Terminator 5 (T5) 10G/40G Poll Mode Driver Rahul Lakkireddy
2015-05-22 13:24 ` [PATCH 1/5] cxgbe: add hardware specific api for all supported Chelsio T5 series adapters Rahul Lakkireddy
2015-05-22 13:24 ` [PATCH 2/5] cxgbe: add cxgbe poll mode driver Rahul Lakkireddy
2015-05-22 16:42   ` Stephen Hemminger
2015-05-23  5:57     ` Rahul Lakkireddy
2015-05-26 17:02       ` Rahul Lakkireddy
2015-05-26 17:24         ` Stephen Hemminger
2015-05-26 18:13           ` Rahul Lakkireddy
2015-05-22 16:43   ` Stephen Hemminger
2015-05-23  5:56     ` Rahul Lakkireddy
2015-05-22 16:46   ` Stephen Hemminger
2015-05-23  5:53     ` Rahul Lakkireddy
2015-05-27  5:49       ` Thomas Monjalon
2015-05-27 11:26         ` Rahul Lakkireddy
2015-05-22 13:24 ` [PATCH 3/5] doc: add cxgbe PMD documentation under doc/guides/nics/cxgbe.rst Rahul Lakkireddy
2015-05-27  5:38   ` Thomas Monjalon
2015-05-27 11:24     ` Rahul Lakkireddy
2015-05-22 13:24 ` [PATCH 4/5] config: enable cxgbe PMD for compilation and linking Rahul Lakkireddy
2015-05-22 13:24 ` [PATCH 5/5] maintainers: claim responsibility for cxgbe PMD Rahul Lakkireddy
2015-06-01 17:30 ` [PATCH v2 00/11] Chelsio Terminator 5 (T5) 10G/40G Poll Mode Driver Rahul Lakkireddy
2015-06-01 17:30   ` [PATCH v2 01/11] cxgbe: add hardware specific api for all supported Chelsio T5 series adapters Rahul Lakkireddy
2015-06-01 17:30   ` [PATCH v2 02/11] cxgbe: add cxgbe poll mode driver Rahul Lakkireddy
2015-06-17 12:30     ` Thomas Monjalon
2015-06-18  7:06       ` Rahul Lakkireddy
2015-06-01 17:30   ` [PATCH v2 03/11] cxgbe: add device configuration and RX support for cxgbe PMD Rahul Lakkireddy
2015-06-01 17:30   ` [PATCH v2 04/11] cxgbe: add TX " Rahul Lakkireddy
2015-06-01 17:30   ` [PATCH v2 05/11] cxgbe: add device related operations " Rahul Lakkireddy
2015-06-01 17:30   ` [PATCH v2 06/11] cxgbe: add port statistics " Rahul Lakkireddy
2015-06-01 17:30   ` [PATCH v2 07/11] cxgbe: add link related functions " Rahul Lakkireddy
2015-06-01 17:30   ` [PATCH v2 08/11] cxgbe: add flow control " Rahul Lakkireddy
2015-06-01 17:30   ` [PATCH v2 09/11] doc: add cxgbe PMD documentation under doc/guides/nics/cxgbe.rst Rahul Lakkireddy
2015-06-01 17:30   ` [PATCH v2 10/11] config: enable cxgbe PMD for compilation and linking Rahul Lakkireddy
2015-06-01 17:30   ` [PATCH v2 11/11] maintainers: claim responsibility for cxgbe PMD Rahul Lakkireddy
2015-06-18 12:17   ` [PATCH v3 0/9] Chelsio Terminator 5 (T5) 10G/40G Poll Mode Driver Rahul Lakkireddy
2015-06-18 12:17     ` [PATCH v3 1/9] cxgbe: add hardware specific api for all supported Chelsio T5 series adapters Rahul Lakkireddy
2015-06-18 12:17     ` [PATCH v3 2/9] cxgbe: add cxgbe poll mode driver Rahul Lakkireddy
2015-06-28 19:32       ` Thomas Monjalon
2015-06-29 23:23         ` Rahul Lakkireddy
2015-06-18 12:17     ` Rahul Lakkireddy [this message]
2015-06-28 19:34       ` [PATCH v3 3/9] cxgbe: add device configuration and RX support for cxgbe PMD Thomas Monjalon
2015-06-29 23:18         ` Rahul Lakkireddy
2015-06-18 12:17     ` [PATCH v3 4/9] cxgbe: add TX " Rahul Lakkireddy
2015-06-18 12:17     ` [PATCH v3 5/9] cxgbe: add device related operations " Rahul Lakkireddy
2015-06-18 12:17     ` [PATCH v3 6/9] cxgbe: add port statistics " Rahul Lakkireddy
2015-06-18 12:17     ` [PATCH v3 7/9] cxgbe: add link related functions " Rahul Lakkireddy
2015-06-18 12:17     ` [PATCH v3 8/9] cxgbe: add flow control " Rahul Lakkireddy
2015-06-18 12:17     ` [PATCH v3 9/9] doc: add cxgbe PMD documentation under doc/guides/nics/cxgbe.rst Rahul Lakkireddy
2015-06-18 13:47       ` Mcnamara, John
2015-06-18 13:44     ` [PATCH v3 0/9] Chelsio Terminator 5 (T5) 10G/40G Poll Mode Driver Mcnamara, John
2015-06-29 23:28     ` [PATCH v4 " Rahul Lakkireddy
2015-06-29 23:28       ` [PATCH v4 1/9] cxgbe: add hardware specific api for all supported Chelsio T5 series adapters Rahul Lakkireddy
2015-06-29 23:28       ` [PATCH v4 2/9] cxgbe: add cxgbe poll mode driver Rahul Lakkireddy
2015-06-29 23:28       ` [PATCH v4 3/9] cxgbe: add device configuration and RX support for cxgbe PMD Rahul Lakkireddy
2015-06-29 23:28       ` [PATCH v4 4/9] cxgbe: add TX " Rahul Lakkireddy
2015-06-29 23:28       ` [PATCH v4 5/9] cxgbe: add device related operations " Rahul Lakkireddy
2015-06-29 23:28       ` [PATCH v4 6/9] cxgbe: add port statistics " Rahul Lakkireddy
2015-06-29 23:28       ` [PATCH v4 7/9] cxgbe: add link related functions " Rahul Lakkireddy
2015-06-29 23:28       ` [PATCH v4 8/9] cxgbe: add flow control " Rahul Lakkireddy
2015-06-29 23:28       ` [PATCH v4 9/9] doc: add cxgbe PMD documentation under doc/guides/nics/cxgbe.rst Rahul Lakkireddy
2015-06-30 21:01       ` [PATCH v4 0/9] Chelsio Terminator 5 (T5) 10G/40G Poll Mode Driver Thomas Monjalon
2015-07-01  6:35         ` Rahul Lakkireddy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1221eb246c993a64b88350542ac9198be69876d9.1434628361.git.rahul.lakkireddy@chelsio.com \
    --to=rahul.lakkireddy@chelsio.com \
    --cc=dev@dpdk.org \
    --cc=felix@chelsio.com \
    --cc=kumaras@chelsio.com \
    --cc=nirranjan@chelsio.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.