From fba75200ad92892bf32d8d6f1443c6f1e4f48676 Mon Sep 17 00:00:00 2001 From: Bryan O'Sullivan Date: Sat, 1 Jul 2006 04:36:09 -0700 Subject: [PATCH] IB/ipath: fixes to performance get counters for IB compliance This patch fixes some problems uncovered during IB compliance testing to return the right values for error counters returned by the Performance Get Counters packet. Signed-off-by: Ralph Campbell Signed-off-by: Bryan O'Sullivan Cc: "Michael S. Tsirkin" Cc: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/hw/ipath/ipath_driver.c | 17 ++++++++++ drivers/infiniband/hw/ipath/ipath_intr.c | 1 + drivers/infiniband/hw/ipath/ipath_kernel.h | 5 +++ drivers/infiniband/hw/ipath/ipath_layer.c | 9 ++++-- drivers/infiniband/hw/ipath/ipath_layer.h | 2 ++ drivers/infiniband/hw/ipath/ipath_mad.c | 52 ++++++++++++++++-------------- drivers/infiniband/hw/ipath/ipath_ud.c | 11 ++++++- drivers/infiniband/hw/ipath/ipath_verbs.c | 20 ++++++++++++ drivers/infiniband/hw/ipath/ipath_verbs.h | 3 ++ 9 files changed, 93 insertions(+), 27 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 979ae29..4109913 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -460,6 +460,8 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, * by ipath_setup_htconfig. */ dd->ipath_flags = 0; + dd->ipath_lli_counter = 0; + dd->ipath_lli_errors = 0; if (dd->ipath_f_bus(dd, pdev)) ipath_dev_err(dd, "Failed to setup config space; " @@ -942,6 +944,18 @@ reloop: "tlen=%x opcode=%x egridx=%x: %s\n", eflags, l, etype, tlen, bthbytes[0], ips_get_index((__le32 *) rc), emsg); + /* Count local link integrity errors. */ + if (eflags & (INFINIPATH_RHF_H_ICRCERR | + INFINIPATH_RHF_H_VCRCERR)) { + u8 n = (dd->ipath_ibcctrl >> + INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) & + INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK; + + if (++dd->ipath_lli_counter > n) { + dd->ipath_lli_counter = 0; + dd->ipath_lli_errors++; + } + } } else if (etype == RCVHQ_RCV_TYPE_NON_KD) { int ret = __ipath_verbs_rcv(dd, rc + 1, ebuf, tlen); @@ -949,6 +963,9 @@ reloop: ipath_cdbg(VERBOSE, "received IB packet, " "not SMA (QP=%x)\n", qp); + if (dd->ipath_lli_counter) + dd->ipath_lli_counter--; + } else if (etype == RCVHQ_RCV_TYPE_EAGER) { if (qp == IPATH_KD_QP && bthbytes[0] == ipath_layer_rcv_opcode && diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index f6ca59b..30160bc 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -262,6 +262,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd, | IPATH_LINKACTIVE | IPATH_LINKARMED); *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; + dd->ipath_lli_counter = 0; if (!noprint) { if (((dd->ipath_lastibcstat >> INFINIPATH_IBCS_LINKSTATE_SHIFT) & diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 493100d..fe3c862 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -507,6 +507,11 @@ struct ipath_devdata { u8 ipath_pci_cacheline; /* LID mask control */ u8 ipath_lmc; + + /* local link integrity counter */ + u32 ipath_lli_counter; + /* local link integrity errors */ + u32 ipath_lli_errors; }; extern struct list_head ipath_dev_list; diff --git a/drivers/infiniband/hw/ipath/ipath_layer.c b/drivers/infiniband/hw/ipath/ipath_layer.c index 0f8b529..6e3d9bf 100644 --- a/drivers/infiniband/hw/ipath/ipath_layer.c +++ b/drivers/infiniband/hw/ipath/ipath_layer.c @@ -1032,19 +1032,22 @@ int ipath_layer_get_counters(struct ipath_devdata *dd, ipath_snap_cntr(dd, dd->ipath_cregs->cr_ibsymbolerrcnt); cntrs->link_error_recovery_counter = ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt); + /* + * The link downed counter counts when the other side downs the + * connection. We add in the number of times we downed the link + * due to local link integrity errors to compensate. + */ cntrs->link_downed_counter = ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkdowncnt); cntrs->port_rcv_errors = ipath_snap_cntr(dd, dd->ipath_cregs->cr_rxdroppktcnt) + ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvovflcnt) + ipath_snap_cntr(dd, dd->ipath_cregs->cr_portovflcnt) + - ipath_snap_cntr(dd, dd->ipath_cregs->cr_errrcvflowctrlcnt) + ipath_snap_cntr(dd, dd->ipath_cregs->cr_err_rlencnt) + ipath_snap_cntr(dd, dd->ipath_cregs->cr_invalidrlencnt) + ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) + ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) + ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) + - ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlinkcnt) + ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt); cntrs->port_rcv_remphys_errors = ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt); @@ -1058,6 +1061,8 @@ int ipath_layer_get_counters(struct ipath_devdata *dd, ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); cntrs->port_rcv_packets = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); + cntrs->local_link_integrity_errors = dd->ipath_lli_errors; + cntrs->excessive_buffer_overrun_errors = 0; /* XXX */ ret = 0; diff --git a/drivers/infiniband/hw/ipath/ipath_layer.h b/drivers/infiniband/hw/ipath/ipath_layer.h index 5dcffc7..ee617e8 100644 --- a/drivers/infiniband/hw/ipath/ipath_layer.h +++ b/drivers/infiniband/hw/ipath/ipath_layer.h @@ -55,6 +55,8 @@ struct ipath_layer_counters { u64 port_rcv_data; u64 port_xmit_packets; u64 port_rcv_packets; + u32 local_link_integrity_errors; + u32 excessive_buffer_overrun_errors; }; /* diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c index 8f76534..9575692 100644 --- a/drivers/infiniband/hw/ipath/ipath_mad.c +++ b/drivers/infiniband/hw/ipath/ipath_mad.c @@ -613,6 +613,9 @@ struct ib_pma_portcounters { #define IB_PMA_SEL_PORT_RCV_ERRORS __constant_htons(0x0008) #define IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS __constant_htons(0x0010) #define IB_PMA_SEL_PORT_XMIT_DISCARDS __constant_htons(0x0040) +#define IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS __constant_htons(0x0200) +#define IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS __constant_htons(0x0400) +#define IB_PMA_SEL_PORT_VL15_DROPPED __constant_htons(0x0800) #define IB_PMA_SEL_PORT_XMIT_DATA __constant_htons(0x1000) #define IB_PMA_SEL_PORT_RCV_DATA __constant_htons(0x2000) #define IB_PMA_SEL_PORT_XMIT_PACKETS __constant_htons(0x4000) @@ -859,6 +862,10 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp, cntrs.port_rcv_data -= dev->z_port_rcv_data; cntrs.port_xmit_packets -= dev->z_port_xmit_packets; cntrs.port_rcv_packets -= dev->z_port_rcv_packets; + cntrs.local_link_integrity_errors -= + dev->z_local_link_integrity_errors; + cntrs.excessive_buffer_overrun_errors -= + dev->z_excessive_buffer_overrun_errors; memset(pmp->data, 0, sizeof(pmp->data)); @@ -896,6 +903,16 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp, else p->port_xmit_discards = cpu_to_be16((u16)cntrs.port_xmit_discards); + if (cntrs.local_link_integrity_errors > 0xFUL) + cntrs.local_link_integrity_errors = 0xFUL; + if (cntrs.excessive_buffer_overrun_errors > 0xFUL) + cntrs.excessive_buffer_overrun_errors = 0xFUL; + p->lli_ebor_errors = (cntrs.local_link_integrity_errors << 4) | + cntrs.excessive_buffer_overrun_errors; + if (dev->n_vl15_dropped > 0xFFFFUL) + p->vl15_dropped = __constant_cpu_to_be16(0xFFFF); + else + p->vl15_dropped = cpu_to_be16((u16)dev->n_vl15_dropped); if (cntrs.port_xmit_data > 0xFFFFFFFFUL) p->port_xmit_data = __constant_cpu_to_be32(0xFFFFFFFF); else @@ -990,6 +1007,17 @@ static int recv_pma_set_portcounters(struct ib_perf *pmp, if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DISCARDS) dev->z_port_xmit_discards = cntrs.port_xmit_discards; + if (p->counter_select & IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS) + dev->z_local_link_integrity_errors = + cntrs.local_link_integrity_errors; + + if (p->counter_select & IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS) + dev->z_excessive_buffer_overrun_errors = + cntrs.excessive_buffer_overrun_errors; + + if (p->counter_select & IB_PMA_SEL_PORT_VL15_DROPPED) + dev->n_vl15_dropped = 0; + if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DATA) dev->z_port_xmit_data = cntrs.port_xmit_data; @@ -1275,32 +1303,8 @@ int ipath_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, struct ib_wc *in_wc, struct ib_grh *in_grh, struct ib_mad *in_mad, struct ib_mad *out_mad) { - struct ipath_ibdev *dev = to_idev(ibdev); int ret; - /* - * Snapshot current HW counters to "clear" them. - * This should be done when the driver is loaded except that for - * some reason we get a zillion errors when brining up the link. - */ - if (dev->rcv_errors == 0) { - struct ipath_layer_counters cntrs; - - ipath_layer_get_counters(to_idev(ibdev)->dd, &cntrs); - dev->rcv_errors++; - dev->z_symbol_error_counter = cntrs.symbol_error_counter; - dev->z_link_error_recovery_counter = - cntrs.link_error_recovery_counter; - dev->z_link_downed_counter = cntrs.link_downed_counter; - dev->z_port_rcv_errors = cntrs.port_rcv_errors + 1; - dev->z_port_rcv_remphys_errors = - cntrs.port_rcv_remphys_errors; - dev->z_port_xmit_discards = cntrs.port_xmit_discards; - dev->z_port_xmit_data = cntrs.port_xmit_data; - dev->z_port_rcv_data = cntrs.port_rcv_data; - dev->z_port_xmit_packets = cntrs.port_xmit_packets; - dev->z_port_rcv_packets = cntrs.port_rcv_packets; - } switch (in_mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: case IB_MGMT_CLASS_SUBN_LID_ROUTED: diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index 3b6d00b..4b05029 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -560,7 +560,16 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, spin_lock_irqsave(&rq->lock, flags); if (rq->tail == rq->head) { spin_unlock_irqrestore(&rq->lock, flags); - dev->n_pkt_drops++; + /* + * Count VL15 packets dropped due to no receive buffer. + * Otherwise, count them as buffer overruns since usually, + * the HW will be able to receive packets even if there are + * no QPs with posted receive buffers. + */ + if (qp->ibqp.qp_num == 0) + dev->n_vl15_dropped++; + else + dev->rcv_errors++; goto bail; } /* Silently drop packets which are too big. */ diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 059701d..ab4d533 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -981,6 +981,7 @@ static int ipath_verbs_register_sysfs(struct ib_device *dev); */ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd) { + struct ipath_layer_counters cntrs; struct ipath_ibdev *idev; struct ib_device *dev; int ret; @@ -1031,6 +1032,25 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd) idev->pma_counter_select[5] = IB_PMA_PORT_XMIT_WAIT; idev->link_width_enabled = 3; /* 1x or 4x */ + /* Snapshot current HW counters to "clear" them. */ + ipath_layer_get_counters(dd, &cntrs); + idev->z_symbol_error_counter = cntrs.symbol_error_counter; + idev->z_link_error_recovery_counter = + cntrs.link_error_recovery_counter; + idev->z_link_downed_counter = cntrs.link_downed_counter; + idev->z_port_rcv_errors = cntrs.port_rcv_errors; + idev->z_port_rcv_remphys_errors = + cntrs.port_rcv_remphys_errors; + idev->z_port_xmit_discards = cntrs.port_xmit_discards; + idev->z_port_xmit_data = cntrs.port_xmit_data; + idev->z_port_rcv_data = cntrs.port_rcv_data; + idev->z_port_xmit_packets = cntrs.port_xmit_packets; + idev->z_port_rcv_packets = cntrs.port_rcv_packets; + idev->z_local_link_integrity_errors = + cntrs.local_link_integrity_errors; + idev->z_excessive_buffer_overrun_errors = + cntrs.excessive_buffer_overrun_errors; + /* * The system image GUID is supposed to be the same for all * IB HCAs in a single system but since there can be other diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index c57058f..1cb7970 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -460,6 +460,8 @@ struct ipath_ibdev { u64 z_port_xmit_packets; /* starting count for PMA */ u64 z_port_rcv_packets; /* starting count for PMA */ u32 z_pkey_violations; /* starting count for PMA */ + u32 z_local_link_integrity_errors; /* starting count for PMA */ + u32 z_excessive_buffer_overrun_errors; /* starting count for PMA */ u32 n_rc_resends; u32 n_rc_acks; u32 n_rc_qacks; @@ -469,6 +471,7 @@ struct ipath_ibdev { u32 n_other_naks; u32 n_timeouts; u32 n_pkt_drops; + u32 n_vl15_dropped; u32 n_wqe_errs; u32 n_rdma_dup_busy; u32 n_piowait; -- cgit v1.1