diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx4')
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/Kconfig | 12 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/Makefile | 1 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/en_cq.c | 14 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c | 255 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 21 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/en_main.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 40 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/en_port.h | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/en_resources.c | 6 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/en_rx.c | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/en_tx.c | 84 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/mlx4.h | 20 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 36 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/port.c | 62 |
14 files changed, 461 insertions, 98 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig index 1bb9353..5f027f9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig @@ -11,6 +11,18 @@ config MLX4_EN This driver supports Mellanox Technologies ConnectX Ethernet devices. +config MLX4_EN_DCB + bool "Data Center Bridging (DCB) Support" + default y + depends on MLX4_EN && DCB + ---help--- + Say Y here if you want to use Data Center Bridging (DCB) in the + driver. + If set to N, will not be able to configure QoS and ratelimit attributes. + This flag is depended on the kernel's DCB support. + + If unsure, set to Y + config MLX4_CORE tristate depends on PCI diff --git a/drivers/net/ethernet/mellanox/mlx4/Makefile b/drivers/net/ethernet/mellanox/mlx4/Makefile index 4a40ab9..293127d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/Makefile +++ b/drivers/net/ethernet/mellanox/mlx4/Makefile @@ -7,3 +7,4 @@ obj-$(CONFIG_MLX4_EN) += mlx4_en.o mlx4_en-y := en_main.o en_tx.o en_rx.o en_ethtool.o en_port.o en_cq.o \ en_resources.o en_netdev.o en_selftest.o +mlx4_en-$(CONFIG_MLX4_EN_DCB) += en_dcb_nl.o diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 00b8127..908a460 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -124,11 +124,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, cq->mcq.comp = cq->is_tx ? mlx4_en_tx_irq : mlx4_en_rx_irq; cq->mcq.event = mlx4_en_cq_event; - if (cq->is_tx) { - init_timer(&cq->timer); - cq->timer.function = mlx4_en_poll_tx_cq; - cq->timer.data = (unsigned long) cq; - } else { + if (!cq->is_tx) { netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_rx_cq, 64); napi_enable(&cq->napi); } @@ -151,16 +147,12 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) { - struct mlx4_en_dev *mdev = priv->mdev; - - if (cq->is_tx) - del_timer(&cq->timer); - else { + if (!cq->is_tx) { napi_disable(&cq->napi); netif_napi_del(&cq->napi); } - mlx4_cq_free(mdev->dev, &cq->mcq); + mlx4_cq_free(priv->mdev->dev, &cq->mcq); } /* Set rx cq moderation parameters */ diff --git a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c new file mode 100644 index 0000000..5d367958 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c @@ -0,0 +1,255 @@ +/* + * Copyright (c) 2011 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/dcbnl.h> +#include <linux/math64.h> + +#include "mlx4_en.h" + +static int mlx4_en_dcbnl_ieee_getets(struct net_device *dev, + struct ieee_ets *ets) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct ieee_ets *my_ets = &priv->ets; + + /* No IEEE PFC settings available */ + if (!my_ets) + return -EINVAL; + + ets->ets_cap = IEEE_8021QAZ_MAX_TCS; + ets->cbs = my_ets->cbs; + memcpy(ets->tc_tx_bw, my_ets->tc_tx_bw, sizeof(ets->tc_tx_bw)); + memcpy(ets->tc_tsa, my_ets->tc_tsa, sizeof(ets->tc_tsa)); + memcpy(ets->prio_tc, my_ets->prio_tc, sizeof(ets->prio_tc)); + + return 0; +} + +static int mlx4_en_ets_validate(struct mlx4_en_priv *priv, struct ieee_ets *ets) +{ + int i; + int total_ets_bw = 0; + int has_ets_tc = 0; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + if (ets->prio_tc[i] > MLX4_EN_NUM_UP) { + en_err(priv, "Bad priority in UP <=> TC mapping. TC: %d, UP: %d\n", + i, ets->prio_tc[i]); + return -EINVAL; + } + + switch (ets->tc_tsa[i]) { + case IEEE_8021QAZ_TSA_STRICT: + break; + case IEEE_8021QAZ_TSA_ETS: + has_ets_tc = 1; + total_ets_bw += ets->tc_tx_bw[i]; + break; + default: + en_err(priv, "TC[%d]: Not supported TSA: %d\n", + i, ets->tc_tsa[i]); + return -ENOTSUPP; + } + } + + if (has_ets_tc && total_ets_bw != MLX4_EN_BW_MAX) { + en_err(priv, "Bad ETS BW sum: %d. Should be exactly 100%%\n", + total_ets_bw); + return -EINVAL; + } + + return 0; +} + +static int mlx4_en_config_port_scheduler(struct mlx4_en_priv *priv, + struct ieee_ets *ets, u16 *ratelimit) +{ + struct mlx4_en_dev *mdev = priv->mdev; + int num_strict = 0; + int i; + __u8 tc_tx_bw[IEEE_8021QAZ_MAX_TCS] = { 0 }; + __u8 pg[IEEE_8021QAZ_MAX_TCS] = { 0 }; + + ets = ets ?: &priv->ets; + ratelimit = ratelimit ?: priv->maxrate; + + /* higher TC means higher priority => lower pg */ + for (i = IEEE_8021QAZ_MAX_TCS - 1; i >= 0; i--) { + switch (ets->tc_tsa[i]) { + case IEEE_8021QAZ_TSA_STRICT: + pg[i] = num_strict++; + tc_tx_bw[i] = MLX4_EN_BW_MAX; + break; + case IEEE_8021QAZ_TSA_ETS: + pg[i] = MLX4_EN_TC_ETS; + tc_tx_bw[i] = ets->tc_tx_bw[i] ?: MLX4_EN_BW_MIN; + break; + } + } + + return mlx4_SET_PORT_SCHEDULER(mdev->dev, priv->port, tc_tx_bw, pg, + ratelimit); +} + +static int +mlx4_en_dcbnl_ieee_setets(struct net_device *dev, struct ieee_ets *ets) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int err; + + err = mlx4_en_ets_validate(priv, ets); + if (err) + return err; + + err = mlx4_SET_PORT_PRIO2TC(mdev->dev, priv->port, ets->prio_tc); + if (err) + return err; + + err = mlx4_en_config_port_scheduler(priv, ets, NULL); + if (err) + return err; + + memcpy(&priv->ets, ets, sizeof(priv->ets)); + + return 0; +} + +static int mlx4_en_dcbnl_ieee_getpfc(struct net_device *dev, + struct ieee_pfc *pfc) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + pfc->pfc_cap = IEEE_8021QAZ_MAX_TCS; + pfc->pfc_en = priv->prof->tx_ppp; + + return 0; +} + +static int mlx4_en_dcbnl_ieee_setpfc(struct net_device *dev, + struct ieee_pfc *pfc) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int err; + + en_dbg(DRV, priv, "cap: 0x%x en: 0x%x mbc: 0x%x delay: %d\n", + pfc->pfc_cap, + pfc->pfc_en, + pfc->mbc, + pfc->delay); + + priv->prof->rx_pause = priv->prof->tx_pause = !!pfc->pfc_en; + priv->prof->rx_ppp = priv->prof->tx_ppp = pfc->pfc_en; + + err = mlx4_SET_PORT_general(mdev->dev, priv->port, + priv->rx_skb_size + ETH_FCS_LEN, + priv->prof->tx_pause, + priv->prof->tx_ppp, + priv->prof->rx_pause, + priv->prof->rx_ppp); + if (err) + en_err(priv, "Failed setting pause params\n"); + + return err; +} + +static u8 mlx4_en_dcbnl_getdcbx(struct net_device *dev) +{ + return DCB_CAP_DCBX_VER_IEEE; +} + +static u8 mlx4_en_dcbnl_setdcbx(struct net_device *dev, u8 mode) +{ + if ((mode & DCB_CAP_DCBX_LLD_MANAGED) || + (mode & DCB_CAP_DCBX_VER_CEE) || + !(mode & DCB_CAP_DCBX_VER_IEEE) || + !(mode & DCB_CAP_DCBX_HOST)) + return 1; + + return 0; +} + +#define MLX4_RATELIMIT_UNITS_IN_KB 100000 /* rate-limit HW unit in Kbps */ +static int mlx4_en_dcbnl_ieee_getmaxrate(struct net_device *dev, + struct ieee_maxrate *maxrate) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + int i; + + if (!priv->maxrate) + return -EINVAL; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + maxrate->tc_maxrate[i] = + priv->maxrate[i] * MLX4_RATELIMIT_UNITS_IN_KB; + + return 0; +} + +static int mlx4_en_dcbnl_ieee_setmaxrate(struct net_device *dev, + struct ieee_maxrate *maxrate) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + u16 tmp[IEEE_8021QAZ_MAX_TCS]; + int i, err; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + /* Convert from Kbps into HW units, rounding result up. + * Setting to 0, means unlimited BW. + */ + tmp[i] = div_u64(maxrate->tc_maxrate[i] + + MLX4_RATELIMIT_UNITS_IN_KB - 1, + MLX4_RATELIMIT_UNITS_IN_KB); + } + + err = mlx4_en_config_port_scheduler(priv, NULL, tmp); + if (err) + return err; + + memcpy(priv->maxrate, tmp, sizeof(*priv->maxrate)); + + return 0; +} + +const struct dcbnl_rtnl_ops mlx4_en_dcbnl_ops = { + .ieee_getets = mlx4_en_dcbnl_ieee_getets, + .ieee_setets = mlx4_en_dcbnl_ieee_setets, + .ieee_getmaxrate = mlx4_en_dcbnl_ieee_getmaxrate, + .ieee_setmaxrate = mlx4_en_dcbnl_ieee_setmaxrate, + .ieee_getpfc = mlx4_en_dcbnl_ieee_getpfc, + .ieee_setpfc = mlx4_en_dcbnl_ieee_setpfc, + + .getdcbx = mlx4_en_dcbnl_getdcbx, + .setdcbx = mlx4_en_dcbnl_setdcbx, +}; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 70346fd..72901ce 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -83,7 +83,7 @@ static const char main_strings[][ETH_GSTRING_LEN] = { #define NUM_ALL_STATS (NUM_MAIN_STATS + NUM_PORT_STATS + NUM_PKT_STATS + NUM_PERF_STATS) static const char mlx4_en_test_names[][ETH_GSTRING_LEN]= { - "Interupt Test", + "Interrupt Test", "Link Test", "Speed Test", "Register Test", @@ -359,8 +359,8 @@ static int mlx4_en_get_coalesce(struct net_device *dev, { struct mlx4_en_priv *priv = netdev_priv(dev); - coal->tx_coalesce_usecs = 0; - coal->tx_max_coalesced_frames = 0; + coal->tx_coalesce_usecs = priv->tx_usecs; + coal->tx_max_coalesced_frames = priv->tx_frames; coal->rx_coalesce_usecs = priv->rx_usecs; coal->rx_max_coalesced_frames = priv->rx_frames; @@ -388,6 +388,21 @@ static int mlx4_en_set_coalesce(struct net_device *dev, MLX4_EN_RX_COAL_TIME : coal->rx_coalesce_usecs; + /* Setting TX coalescing parameters */ + if (coal->tx_coalesce_usecs != priv->tx_usecs || + coal->tx_max_coalesced_frames != priv->tx_frames) { + priv->tx_usecs = coal->tx_coalesce_usecs; + priv->tx_frames = coal->tx_max_coalesced_frames; + for (i = 0; i < priv->tx_ring_num; i++) { + priv->tx_cq[i].moder_cnt = priv->tx_frames; + priv->tx_cq[i].moder_time = priv->tx_usecs; + if (mlx4_en_set_cq_moder(priv, &priv->tx_cq[i])) { + en_warn(priv, "Failed changing moderation " + "for TX cq %d\n", i); + } + } + } + /* Set adaptive coalescing params */ priv->pkt_rate_low = coal->pkt_rate_low; priv->rx_usecs_low = coal->rx_coalesce_usecs_low; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index 2097a7d..346fdb2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -114,7 +114,7 @@ static int mlx4_en_get_profile(struct mlx4_en_dev *mdev) params->prof[i].tx_ring_size = MLX4_EN_DEF_TX_RING_SIZE; params->prof[i].rx_ring_size = MLX4_EN_DEF_RX_RING_SIZE; params->prof[i].tx_ring_num = MLX4_EN_NUM_TX_RINGS + - (!!pfcrx) * MLX4_EN_NUM_PPP_RINGS; + MLX4_EN_NUM_PPP_RINGS; params->prof[i].rss_rings = 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 31b455a..eaa8fad 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -45,6 +45,14 @@ #include "mlx4_en.h" #include "en_port.h" +static int mlx4_en_setup_tc(struct net_device *dev, u8 up) +{ + if (up != MLX4_EN_NUM_UP) + return -EINVAL; + + return 0; +} + static int mlx4_en_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) { struct mlx4_en_priv *priv = netdev_priv(dev); @@ -421,6 +429,8 @@ static void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv) */ priv->rx_frames = MLX4_EN_RX_COAL_TARGET; priv->rx_usecs = MLX4_EN_RX_COAL_TIME; + priv->tx_frames = MLX4_EN_TX_COAL_PKTS; + priv->tx_usecs = MLX4_EN_TX_COAL_TIME; en_dbg(INTR, priv, "Default coalesing params for mtu:%d - " "rx_frames:%d rx_usecs:%d\n", priv->dev->mtu, priv->rx_frames, priv->rx_usecs); @@ -437,8 +447,8 @@ static void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv) for (i = 0; i < priv->tx_ring_num; i++) { cq = &priv->tx_cq[i]; - cq->moder_cnt = MLX4_EN_TX_COAL_PKTS; - cq->moder_time = MLX4_EN_TX_COAL_TIME; + cq->moder_cnt = priv->tx_frames; + cq->moder_time = priv->tx_usecs; } /* Reset auto-moderation params */ @@ -650,12 +660,18 @@ int mlx4_en_start_port(struct net_device *dev) /* Configure ring */ tx_ring = &priv->tx_ring[i]; - err = mlx4_en_activate_tx_ring(priv, tx_ring, cq->mcq.cqn); + err = mlx4_en_activate_tx_ring(priv, tx_ring, cq->mcq.cqn, + max(0, i - MLX4_EN_NUM_TX_RINGS)); if (err) { en_err(priv, "Failed allocating Tx ring\n"); mlx4_en_deactivate_cq(priv, cq); goto tx_err; } + tx_ring->tx_queue = netdev_get_tx_queue(dev, i); + + /* Arm CQ for TX completions */ + mlx4_en_arm_cq(priv, cq); + /* Set initial ownership of all Tx TXBBs to SW (1) */ for (j = 0; j < tx_ring->buf_size; j += STAMP_STRIDE) *((u32 *) (tx_ring->buf + j)) = 0xffffffff; @@ -797,12 +813,15 @@ static void mlx4_en_restart(struct work_struct *work) watchdog_task); struct mlx4_en_dev *mdev = priv->mdev; struct net_device *dev = priv->dev; + int i; en_dbg(DRV, priv, "Watchdog task called for port %d\n", priv->port); mutex_lock(&mdev->state_lock); if (priv->port_up) { mlx4_en_stop_port(dev); + for (i = 0; i < priv->tx_ring_num; i++) + netdev_tx_reset_queue(priv->tx_ring[i].tx_queue); if (mlx4_en_start_port(dev)) en_err(priv, "Failed restarting port %d\n", priv->port); } @@ -966,6 +985,7 @@ void mlx4_en_destroy_netdev(struct net_device *dev) mutex_unlock(&mdev->state_lock); mlx4_en_free_resources(priv); + free_netdev(dev); } @@ -1036,6 +1056,7 @@ static const struct net_device_ops mlx4_netdev_ops = { .ndo_poll_controller = mlx4_en_netpoll, #endif .ndo_set_features = mlx4_en_set_features, + .ndo_setup_tc = mlx4_en_setup_tc, }; int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, @@ -1079,6 +1100,10 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, INIT_WORK(&priv->watchdog_task, mlx4_en_restart); INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate); INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats); +#ifdef CONFIG_MLX4_EN_DCB + if (!mlx4_is_slave(priv->mdev->dev)) + dev->dcbnl_ops = &mlx4_en_dcbnl_ops; +#endif /* Query for default mac and max mtu */ priv->max_mtu = mdev->dev->caps.eth_mtu_cap[priv->port]; @@ -1113,6 +1138,15 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, netif_set_real_num_tx_queues(dev, priv->tx_ring_num); netif_set_real_num_rx_queues(dev, priv->rx_ring_num); + netdev_set_num_tc(dev, MLX4_EN_NUM_UP); + + /* First 9 rings are for UP 0 */ + netdev_set_tc_queue(dev, 0, MLX4_EN_NUM_TX_RINGS + 1, 0); + + /* Partition Tx queues evenly amongst UP's 1-7 */ + for (i = 1; i < MLX4_EN_NUM_UP; i++) + netdev_set_tc_queue(dev, i, 1, MLX4_EN_NUM_TX_RINGS + i); + SET_ETHTOOL_OPS(dev, &mlx4_en_ethtool_ops); /* Set defualt MAC */ diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.h b/drivers/net/ethernet/mellanox/mlx4/en_port.h index 6934fd7..745090b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_port.h +++ b/drivers/net/ethernet/mellanox/mlx4/en_port.h @@ -39,6 +39,8 @@ #define SET_PORT_PROMISC_SHIFT 31 #define SET_PORT_MC_PROMISC_SHIFT 30 +#define MLX4_EN_NUM_TC 8 + #define VLAN_FLTR_SIZE 128 struct mlx4_set_vlan_fltr_mbox { __be32 entry[VLAN_FLTR_SIZE]; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c index bcbc54c..10c24c7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c @@ -39,7 +39,7 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, int is_tx, int rss, int qpn, int cqn, - struct mlx4_qp_context *context) + int user_prio, struct mlx4_qp_context *context) { struct mlx4_en_dev *mdev = priv->mdev; @@ -57,6 +57,10 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, context->local_qpn = cpu_to_be32(qpn); context->pri_path.ackto = 1 & 0x07; context->pri_path.sched_queue = 0x83 | (priv->port - 1) << 6; + if (user_prio >= 0) { + context->pri_path.sched_queue |= user_prio << 3; + context->pri_path.feup = 1 << 6; + } context->pri_path.counter_index = 0xff; context->cqn_send = cpu_to_be32(cqn); context->cqn_recv = cpu_to_be32(cqn); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 9adbd53..d49a7ac 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -823,7 +823,7 @@ static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn, memset(context, 0, sizeof *context); mlx4_en_fill_qp_context(priv, ring->actual_size, ring->stride, 0, 0, - qpn, ring->cqn, context); + qpn, ring->cqn, -1, context); context->db_rec_addr = cpu_to_be64(ring->wqres.db.dma); /* Cancel FCS removal if FW allows */ @@ -890,7 +890,7 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) } rss_map->indir_qp.event = mlx4_en_sqp_event; mlx4_en_fill_qp_context(priv, 0, 0, 0, 1, priv->base_qpn, - priv->rx_ring[0].cqn, &context); + priv->rx_ring[0].cqn, -1, &context); if (!priv->prof->rss_rings || priv->prof->rss_rings > priv->rx_ring_num) rss_rings = priv->rx_ring_num; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 1796824..9a38483 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -67,8 +67,6 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, inline_thold = min(inline_thold, MAX_INLINE); - spin_lock_init(&ring->comp_lock); - tmp = size * sizeof(struct mlx4_en_tx_info); ring->tx_info = vmalloc(tmp); if (!ring->tx_info) @@ -156,7 +154,7 @@ void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, - int cq) + int cq, int user_prio) { struct mlx4_en_dev *mdev = priv->mdev; int err; @@ -174,7 +172,7 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, ring->doorbell_qpn = ring->qp.qpn << 8; mlx4_en_fill_qp_context(priv, ring->size, ring->stride, 1, 0, ring->qpn, - ring->cqn, &ring->context); + ring->cqn, user_prio, &ring->context); if (ring->bf_enabled) ring->context.usr_page = cpu_to_be32(ring->bf.uar->index); @@ -317,6 +315,8 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) int size = cq->size; u32 size_mask = ring->size_mask; struct mlx4_cqe *buf = cq->buf; + u32 packets = 0; + u32 bytes = 0; if (!priv->port_up) return; @@ -345,6 +345,8 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) priv, ring, ring_index, !!((ring->cons + txbbs_skipped) & ring->size)); + packets++; + bytes += ring->tx_info[ring_index].nr_bytes; } while (ring_index != new_index); ++cons_index; @@ -361,13 +363,14 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) mlx4_cq_set_ci(mcq); wmb(); ring->cons += txbbs_skipped; + netdev_tx_completed_queue(ring->tx_queue, packets, bytes); /* Wakeup Tx queue if this ring stopped it */ if (unlikely(ring->blocked)) { if ((u32) (ring->prod - ring->cons) <= ring->size - HEADROOM - MAX_DESC_TXBBS) { ring->blocked = 0; - netif_tx_wake_queue(netdev_get_tx_queue(dev, cq->ring)); + netif_tx_wake_queue(ring->tx_queue); priv->port_stats.wake_queue++; } } @@ -377,41 +380,12 @@ void mlx4_en_tx_irq(struct mlx4_cq *mcq) { struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq); struct mlx4_en_priv *priv = netdev_priv(cq->dev); - struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring]; - if (!spin_trylock(&ring->comp_lock)) - return; mlx4_en_process_tx_cq(cq->dev, cq); - mod_timer(&cq->timer, jiffies + 1); - spin_unlock(&ring->comp_lock); + mlx4_en_arm_cq(priv, cq); } -void mlx4_en_poll_tx_cq(unsigned long data) -{ - struct mlx4_en_cq *cq = (struct mlx4_en_cq *) data; - struct mlx4_en_priv *priv = netdev_priv(cq->dev); - struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring]; - u32 inflight; - - INC_PERF_COUNTER(priv->pstats.tx_poll); - - if (!spin_trylock_irq(&ring->comp_lock)) { - mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT); - return; - } - mlx4_en_process_tx_cq(cq->dev, cq); - inflight = (u32) (ring->prod - ring->cons - ring->last_nr_txbb); - - /* If there are still packets in flight and the timer has not already - * been scheduled by the Tx routine then schedule it here to guarantee - * completion processing of these packets */ - if (inflight && priv->port_up) - mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT); - - spin_unlock_irq(&ring->comp_lock); -} - static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, u32 index, @@ -440,25 +414,6 @@ static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv, return ring->buf + index * TXBB_SIZE; } -static inline void mlx4_en_xmit_poll(struct mlx4_en_priv *priv, int tx_ind) -{ - struct mlx4_en_cq *cq = &priv->tx_cq[tx_ind]; - struct mlx4_en_tx_ring *ring = &priv->tx_ring[tx_ind]; - unsigned long flags; - - /* If we don't have a pending timer, set one up to catch our recent - post in case the interface becomes idle */ - if (!timer_pending(&cq->timer)) - mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT); - - /* Poll the CQ every mlx4_en_TX_MODER_POLL packets */ - if ((++ring->poll_cnt & (MLX4_EN_TX_POLL_MODER - 1)) == 0) - if (spin_trylock_irqsave(&ring->comp_lock, flags)) { - mlx4_en_process_tx_cq(priv->dev, cq); - spin_unlock_irqrestore(&ring->comp_lock, flags); - } -} - static int is_inline(struct sk_buff *skb, void **pfrag) { void *ptr; @@ -570,13 +525,9 @@ static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, struct sk_buff *sk u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb) { - struct mlx4_en_priv *priv = netdev_priv(dev); u16 vlan_tag = 0; - /* If we support per priority flow control and the packet contains - * a vlan tag, send the packet to the TX ring assigned to that priority - */ - if (priv->prof->rx_ppp && vlan_tx_tag_present(skb)) { + if (vlan_tx_tag_present(skb)) { vlan_tag = vlan_tx_tag_get(skb); return MLX4_EN_NUM_TX_RINGS + (vlan_tag >> 13); } @@ -594,7 +545,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_tx_ring *ring; - struct mlx4_en_cq *cq; struct mlx4_en_tx_desc *tx_desc; struct mlx4_wqe_data_seg *data; struct skb_frag_struct *frag; @@ -638,13 +588,10 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(((int)(ring->prod - ring->cons)) > ring->size - HEADROOM - MAX_DESC_TXBBS)) { /* every full Tx ring stops queue */ - netif_tx_stop_queue(netdev_get_tx_queue(dev, tx_ind)); + netif_tx_stop_queue(ring->tx_queue); ring->blocked = 1; priv->port_stats.queue_stopped++; - /* Use interrupts to find out when queue opened */ - cq = &priv->tx_cq[tx_ind]; - mlx4_en_arm_cq(priv, cq); return NETDEV_TX_BUSY; } @@ -707,7 +654,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) priv->port_stats.tso_packets++; i = ((skb->len - lso_header_size) / skb_shinfo(skb)->gso_size) + !!((skb->len - lso_header_size) % skb_shinfo(skb)->gso_size); - ring->bytes += skb->len + (i - 1) * lso_header_size; + tx_info->nr_bytes = skb->len + (i - 1) * lso_header_size; ring->packets += i; } else { /* Normal (Non LSO) packet */ @@ -715,10 +662,12 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) ((ring->prod & ring->size) ? cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0); data = &tx_desc->data; - ring->bytes += max(skb->len, (unsigned int) ETH_ZLEN); + tx_info->nr_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); ring->packets++; } + ring->bytes += tx_info->nr_bytes; + netdev_tx_sent_queue(ring->tx_queue, tx_info->nr_bytes); AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, skb->len); @@ -792,9 +741,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) iowrite32be(ring->doorbell_qpn, ring->bf.uar->map + MLX4_SEND_DOORBELL); } - /* Poll CQ here */ - mlx4_en_xmit_poll(priv, tx_ind); - return NETDEV_TX_OK; tx_drop: diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 2a0ff2c..cd56f1a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -53,6 +53,26 @@ #define DRV_VERSION "1.1" #define DRV_RELDATE "Dec, 2011" +#define MLX4_NUM_UP 8 +#define MLX4_NUM_TC 8 +#define MLX4_RATELIMIT_UNITS 3 /* 100 Mbps */ +#define MLX4_RATELIMIT_DEFAULT 0xffff + +struct mlx4_set_port_prio2tc_context { + u8 prio2tc[4]; +}; + +struct mlx4_port_scheduler_tc_cfg_be { + __be16 pg; + __be16 bw_precentage; + __be16 max_bw_units; /* 3-100Mbps, 4-1Gbps, other values - reserved */ + __be16 max_bw_value; +}; + +struct mlx4_set_port_scheduler_context { + struct mlx4_port_scheduler_tc_cfg_be tc[MLX4_NUM_TC]; +}; + enum { MLX4_HCR_BASE = 0x80680, MLX4_HCR_SIZE = 0x0001c, diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index d69fee4..5d87637 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -40,6 +40,9 @@ #include <linux/mutex.h> #include <linux/netdevice.h> #include <linux/if_vlan.h> +#ifdef CONFIG_MLX4_EN_DCB +#include <linux/dcbnl.h> +#endif #include <linux/mlx4/device.h> #include <linux/mlx4/qp.h> @@ -111,6 +114,7 @@ enum { #define MLX4_EN_NUM_TX_RINGS 8 #define MLX4_EN_NUM_PPP_RINGS 8 #define MAX_TX_RINGS (MLX4_EN_NUM_TX_RINGS + MLX4_EN_NUM_PPP_RINGS) +#define MLX4_EN_NUM_UP 8 #define MLX4_EN_DEF_TX_RING_SIZE 512 #define MLX4_EN_DEF_RX_RING_SIZE 1024 @@ -118,7 +122,7 @@ enum { #define MLX4_EN_RX_COAL_TARGET 44 #define MLX4_EN_RX_COAL_TIME 0x10 -#define MLX4_EN_TX_COAL_PKTS 5 +#define MLX4_EN_TX_COAL_PKTS 16 #define MLX4_EN_TX_COAL_TIME 0x80 #define MLX4_EN_RX_RATE_LOW 400000 @@ -196,6 +200,7 @@ enum cq_type { struct mlx4_en_tx_info { struct sk_buff *skb; u32 nr_txbb; + u32 nr_bytes; u8 linear; u8 data_offset; u8 inl; @@ -251,9 +256,9 @@ struct mlx4_en_tx_ring { unsigned long bytes; unsigned long packets; unsigned long tx_csum; - spinlock_t comp_lock; struct mlx4_bf bf; bool bf_enabled; + struct netdev_queue *tx_queue; }; struct mlx4_en_rx_desc { @@ -304,8 +309,6 @@ struct mlx4_en_cq { spinlock_t lock; struct net_device *dev; struct napi_struct napi; - /* Per-core Tx cq processing support */ - struct timer_list timer; int size; int buf_size; unsigned vector; @@ -411,6 +414,15 @@ struct mlx4_en_frag_info { }; +#ifdef CONFIG_MLX4_EN_DCB +/* Minimal TC BW - setting to 0 will block traffic */ +#define MLX4_EN_BW_MIN 1 +#define MLX4_EN_BW_MAX 100 /* Utilize 100% of the line */ + +#define MLX4_EN_TC_ETS 7 + +#endif + struct mlx4_en_priv { struct mlx4_en_dev *mdev; struct mlx4_en_port_profile *prof; @@ -484,6 +496,11 @@ struct mlx4_en_priv { int vids[128]; bool wol; struct device *ddev; + +#ifdef CONFIG_MLX4_EN_DCB + struct ieee_ets ets; + u16 maxrate[IEEE_8021QAZ_MAX_TCS]; +#endif }; enum mlx4_en_wol { @@ -512,7 +529,6 @@ void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); int mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); -void mlx4_en_poll_tx_cq(unsigned long data); void mlx4_en_tx_irq(struct mlx4_cq *mcq); u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb); netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev); @@ -522,7 +538,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ri void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring); int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, - int cq); + int cq, int user_prio); void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring); @@ -540,8 +556,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, int budget); int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget); void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, - int is_tx, int rss, int qpn, int cqn, - struct mlx4_qp_context *context); + int is_tx, int rss, int qpn, int cqn, int user_prio, + struct mlx4_qp_context *context); void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event); int mlx4_en_map_buffer(struct mlx4_buf *buf); void mlx4_en_unmap_buffer(struct mlx4_buf *buf); @@ -558,6 +574,10 @@ int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, struct mlx4_en_priv *priv); int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset); int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port); +#ifdef CONFIG_MLX4_EN_DCB +extern const struct dcbnl_rtnl_ops mlx4_en_dcbnl_ops; +#endif + #define MLX4_EN_NUM_SELF_TEST 5 void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf); u64 mlx4_en_mac_to_u64(u8 *addr); diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 77535ff..55b12e6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -834,6 +834,68 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, } EXPORT_SYMBOL(mlx4_SET_PORT_qpn_calc); +int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_set_port_prio2tc_context *context; + int err; + u32 in_mod; + int i; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + context = mailbox->buf; + memset(context, 0, sizeof *context); + + for (i = 0; i < MLX4_NUM_UP; i += 2) + context->prio2tc[i >> 1] = prio2tc[i] << 4 | prio2tc[i + 1]; + + in_mod = MLX4_SET_PORT_PRIO2TC << 8 | port; + err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL(mlx4_SET_PORT_PRIO2TC); + +int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw, + u8 *pg, u16 *ratelimit) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_set_port_scheduler_context *context; + int err; + u32 in_mod; + int i; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + context = mailbox->buf; + memset(context, 0, sizeof *context); + + for (i = 0; i < MLX4_NUM_TC; i++) { + struct mlx4_port_scheduler_tc_cfg_be *tc = &context->tc[i]; + u16 r = ratelimit && ratelimit[i] ? ratelimit[i] : + MLX4_RATELIMIT_DEFAULT; + + tc->pg = htons(pg[i]); + tc->bw_precentage = htons(tc_tx_bw[i]); + + tc->max_bw_units = htons(MLX4_RATELIMIT_UNITS); + tc->max_bw_value = htons(r); + } + + in_mod = MLX4_SET_PORT_SCHEDULER << 8 | port; + err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL(mlx4_SET_PORT_SCHEDULER); + int mlx4_SET_MCAST_FLTR_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, |