From 5c2433cdc72a03446184e2f898be3b05095b5ed0 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 24 Oct 2011 21:25:21 +0000 Subject: caif: Fix BUG() with network namespaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 08613e4626c06ca408fc55071f6aedee36986a87 upstream. The caif code will register its own pernet_operations, and then register a netdevice_notifier. Each time the netdevice_notifier is triggered, it'll do some stuff... including a lookup of its own pernet stuff with net_generic(). If the net_generic() call ever returns NULL, the caif code will BUG(). That doesn't seem *so* unreasonable, I suppose — it does seem like it should never happen. However, it *does* happen. When we clone a network namespace, setup_net() runs through all the pernet_operations one at a time. It gets to loopback before it gets to caif. And loopback_net_init() registers a netdevice... while caif hasn't been initialised. So the caif netdevice notifier triggers, and immediately goes BUG(). We could imagine a complex and overengineered solution to this generic class of problems, but this patch takes the simple approach. It just makes caif_device_notify() *not* go looking for its pernet data structures if the device it's being notified about isn't a caif device in the first place. Signed-off-by: David Woodhouse Acked-by: Sjur Brændeland Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/caif/caif_dev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 682c0fe..dbdaa95 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -209,8 +209,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what, enum cfcnfg_phy_preference pref; enum cfcnfg_phy_type phy_type; struct cfcnfg *cfg; - struct caif_device_entry_list *caifdevs = - caif_device_list(dev_net(dev)); + struct caif_device_entry_list *caifdevs; if (dev->type != ARPHRD_CAIF) return 0; @@ -219,6 +218,8 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what, if (cfg == NULL) return 0; + caifdevs = caif_device_list(dev_net(dev)); + switch (what) { case NETDEV_REGISTER: caifd = caif_device_alloc(dev); -- cgit v1.1 From 32779fa06584fdcab2228a36c3a846fa0a6f5cdb Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 21 Oct 2011 06:24:20 +0000 Subject: rtnetlink: Add missing manual netlink notification in dev_change_net_namespaces commit d2237d35748e7f448a9c2d9dc6a85ef637466e24 upstream. Renato Westphal noticed that since commit a2835763e130c343ace5320c20d33c281e7097b7 "rtnetlink: handle rtnl_link netlink notifications manually" was merged we no longer send a netlink message when a networking device is moved from one network namespace to another. Fix this by adding the missing manual notification in dev_change_net_namespaces. Since all network devices that are processed by dev_change_net_namspaces are in the initialized state the complicated tests that guard the manual rtmsg_ifinfo calls in rollback_registered and register_netdevice are unnecessary and we can just perform a plain notification. Tested-by: Renato Westphal Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 9c58c1e..f14f601 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6105,6 +6105,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); + rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); /* * Flush the unicast and multicast chains -- cgit v1.1 From 4641f8a010eb97303fbc9bd8a094139b52ffd629 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 4 Oct 2011 18:27:10 +0200 Subject: mac80211: fix offchannel TX cookie matching commit 28a1bcdb57d50f3038a255741ecc83e391e5282e upstream. When I introduced in-kernel off-channel TX I introduced a bug -- the work can't be canceled again because the code clear the skb pointer. Fix this by keeping track separately of whether TX status has already been reported. Reported-by: Jouni Malinen Tested-by: Jouni Malinen Signed-off-by: Johannes Berg Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- net/mac80211/cfg.c | 2 +- net/mac80211/ieee80211_i.h | 1 + net/mac80211/status.c | 2 +- net/mac80211/work.c | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index be70c70..143a006 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1798,7 +1798,7 @@ ieee80211_offchan_tx_done(struct ieee80211_work *wk, struct sk_buff *skb) * so in that case userspace will have to deal with it. */ - if (wk->offchan_tx.wait && wk->offchan_tx.frame) + if (wk->offchan_tx.wait && !wk->offchan_tx.status) cfg80211_mgmt_tx_status(wk->sdata->dev, (unsigned long) wk->offchan_tx.frame, wk->ie, wk->ie_len, false, GFP_KERNEL); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 090b0ec..0c5aed2 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -328,6 +328,7 @@ struct ieee80211_work { struct { struct sk_buff *frame; u32 wait; + bool status; } offchan_tx; }; diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 1658efa..04cdbaf 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -336,7 +336,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) continue; if (wk->offchan_tx.frame != skb) continue; - wk->offchan_tx.frame = NULL; + wk->offchan_tx.status = true; break; } rcu_read_unlock(); diff --git a/net/mac80211/work.c b/net/mac80211/work.c index d2e7f0e..dc8ec05 100644 --- a/net/mac80211/work.c +++ b/net/mac80211/work.c @@ -553,7 +553,7 @@ ieee80211_offchannel_tx(struct ieee80211_work *wk) /* * After this, offchan_tx.frame remains but now is no * longer a valid pointer -- we still need it as the - * cookie for canceling this work. + * cookie for canceling this work/status matching. */ ieee80211_tx_skb(wk->sdata, wk->offchan_tx.frame); -- cgit v1.1 From babba877daf7a7ee0cb03dfb5e63f23e2d32dddf Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Fri, 21 Oct 2011 00:49:15 +0000 Subject: net: hold sock reference while processing tx timestamps commit da92b194cc36b5dc1fbd85206aeeffd80bee0c39 upstream. The pair of functions, * skb_clone_tx_timestamp() * skb_complete_tx_timestamp() were designed to allow timestamping in PHY devices. The first function, called during the MAC driver's hard_xmit method, identifies PTP protocol packets, clones them, and gives them to the PHY device driver. The PHY driver may hold onto the packet and deliver it at a later time using the second function, which adds the packet to the socket's error queue. As pointed out by Johannes, nothing prevents the socket from disappearing while the cloned packet is sitting in the PHY driver awaiting a timestamp. This patch fixes the issue by taking a reference on the socket for each such packet. In addition, the comments regarding the usage of these function are expanded to highlight the rule that PHY drivers must use skb_complete_tx_timestamp() to release the packet, in order to release the socket reference, too. These functions first appeared in v2.6.36. Reported-by: Johannes Berg Signed-off-by: Richard Cochran Signed-off-by: Eric Dumazet Reviewed-by: Johannes Berg Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/timestamping.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/timestamping.c b/net/core/timestamping.c index 7e7ca37..97d036a 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -57,9 +57,13 @@ void skb_clone_tx_timestamp(struct sk_buff *skb) case PTP_CLASS_V2_VLAN: phydev = skb->dev->phydev; if (likely(phydev->drv->txtstamp)) { + if (!atomic_inc_not_zero(&sk->sk_refcnt)) + return; clone = skb_clone(skb, GFP_ATOMIC); - if (!clone) + if (!clone) { + sock_put(sk); return; + } clone->sk = sk; phydev->drv->txtstamp(phydev, clone, type); } @@ -76,8 +80,11 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, struct sock_exterr_skb *serr; int err; - if (!hwtstamps) + if (!hwtstamps) { + sock_put(sk); + kfree_skb(skb); return; + } *skb_hwtstamps(skb) = *hwtstamps; serr = SKB_EXT_ERR(skb); @@ -86,6 +93,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; skb->sk = NULL; err = sock_queue_err_skb(sk, skb); + sock_put(sk); if (err) kfree_skb(skb); } -- cgit v1.1 From 99dfac8ab234555a54f7fa6e4b7bb08bb355158b Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 6 Oct 2011 11:19:41 +0000 Subject: bridge: fix hang on removal of bridge via netlink [ Upstream commit 1ce5cce895309862d2c35d922816adebe094fe4a ] Need to cleanup bridge device timers and ports when being bridge device is being removed via netlink. This fixes the problem of observed when doing: ip link add br0 type bridge ip link set dev eth1 master br0 ip link set br0 up ip link del br0 which would cause br0 to hang in unregister_netdev because of leftover reference count. Reported-by: Sridhar Samudrala Signed-off-by: Stephen Hemminger Acked-by: Sridhar Samudrala Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_if.c | 9 +++++---- net/bridge/br_netlink.c | 1 + net/bridge/br_private.h | 1 + 3 files changed, 7 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 6f156c1..4490873 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -161,9 +161,10 @@ static void del_nbp(struct net_bridge_port *p) call_rcu(&p->rcu, destroy_nbp_rcu); } -/* called with RTNL */ -static void del_br(struct net_bridge *br, struct list_head *head) +/* Delete bridge device */ +void br_dev_delete(struct net_device *dev, struct list_head *head) { + struct net_bridge *br = netdev_priv(dev); struct net_bridge_port *p, *n; list_for_each_entry_safe(p, n, &br->port_list, list) { @@ -268,7 +269,7 @@ int br_del_bridge(struct net *net, const char *name) } else - del_br(netdev_priv(dev), NULL); + br_dev_delete(dev, NULL); rtnl_unlock(); return ret; @@ -445,7 +446,7 @@ void __net_exit br_net_exit(struct net *net) rtnl_lock(); for_each_netdev(net, dev) if (dev->priv_flags & IFF_EBRIDGE) - del_br(netdev_priv(dev), &list); + br_dev_delete(dev, &list); unregister_netdevice_many(&list); rtnl_unlock(); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index ffb0dc4..2c16055 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -208,6 +208,7 @@ static struct rtnl_link_ops br_link_ops __read_mostly = { .priv_size = sizeof(struct net_bridge), .setup = br_dev_setup, .validate = br_validate, + .dellink = br_dev_delete, }; int __init br_netlink_init(void) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 78cc364..857a021 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -294,6 +294,7 @@ static inline int br_is_root_bridge(const struct net_bridge *br) /* br_device.c */ extern void br_dev_setup(struct net_device *dev); +extern void br_dev_delete(struct net_device *dev, struct list_head *list); extern netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev); #ifdef CONFIG_NET_POLL_CONTROLLER -- cgit v1.1 From 92dc979cf8a5b2439ac2764cd56675407136d329 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Fri, 23 Sep 2011 08:23:47 +0000 Subject: can bcm: fix tx_setup off-by-one errors [ Upstream commit aabdcb0b553b9c9547b1a506b34d55a764745870 ] This patch fixes two off-by-one errors that canceled each other out. Checking for the same condition two times in bcm_tx_timeout_tsklet() reduced the count of frames to be sent by one. This did not show up the first time tx_setup is invoked as an additional frame is sent due to TX_ANNONCE. Invoking a second tx_setup on the same item led to a reduced (by 1) number of sent frames. Reported-by: Andre Naujoks Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/can/bcm.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/can/bcm.c b/net/can/bcm.c index 184a657..8838b86 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -364,9 +364,6 @@ static void bcm_tx_timeout_tsklet(unsigned long data) bcm_send_to_user(op, &msg_head, NULL, 0); } - } - - if (op->kt_ival1.tv64 && (op->count > 0)) { /* send (next) frame */ bcm_can_tx(op); @@ -969,8 +966,9 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, /* spec: send can_frame when starting timer */ op->flags |= TX_ANNOUNCE; - if (op->kt_ival1.tv64 && (op->count > 0)) { - /* op->count-- is done in bcm_tx_timeout_handler */ + /* only start timer when having more frames than sent below */ + if (op->kt_ival1.tv64 && (op->count > 1)) { + /* op->count-- is done in bcm_tx_timeout_tsklet */ hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL); } else @@ -978,8 +976,11 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, HRTIMER_MODE_REL); } - if (op->flags & TX_ANNOUNCE) + if (op->flags & TX_ANNOUNCE) { bcm_can_tx(op); + if (op->kt_ival1.tv64 && (op->count > 0)) + op->count--; + } return msg_head->nframes * CFSIZ + MHSIZ; } -- cgit v1.1 From cbbd42eb61241fcd721f3b02bcb612894e2f02e6 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Sat, 22 Oct 2011 21:58:20 +0000 Subject: ipv4: fix ipsec forward performance regression [ Upstream commit b73233960a59ee66e09d642f13d0592b13651e94 ] There is bug in commit 5e2b61f(ipv4: Remove flowi from struct rtable). It makes xfrm4_fill_dst() modify wrong data structure. Signed-off-by: Zheng Yan Reported-by: Kim Phillips Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/xfrm4_policy.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 981e43e..581fe0a 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -79,13 +79,13 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, struct rtable *rt = (struct rtable *)xdst->route; const struct flowi4 *fl4 = &fl->u.ip4; - rt->rt_key_dst = fl4->daddr; - rt->rt_key_src = fl4->saddr; - rt->rt_key_tos = fl4->flowi4_tos; - rt->rt_route_iif = fl4->flowi4_iif; - rt->rt_iif = fl4->flowi4_iif; - rt->rt_oif = fl4->flowi4_oif; - rt->rt_mark = fl4->flowi4_mark; + xdst->u.rt.rt_key_dst = fl4->daddr; + xdst->u.rt.rt_key_src = fl4->saddr; + xdst->u.rt.rt_key_tos = fl4->flowi4_tos; + xdst->u.rt.rt_route_iif = fl4->flowi4_iif; + xdst->u.rt.rt_iif = fl4->flowi4_iif; + xdst->u.rt.rt_oif = fl4->flowi4_oif; + xdst->u.rt.rt_mark = fl4->flowi4_mark; xdst->u.dst.dev = dev; dev_hold(dev); -- cgit v1.1 From c11deb8d898318a2fea9a291b16a7a2b2507cc94 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 7 Oct 2011 05:35:46 +0000 Subject: l2tp: fix a potential skb leak in l2tp_xmit_skb() [ Upstream commit 835acf5da239b91edb9f7ebe36516999e156e6ee ] l2tp_xmit_skb() can leak one skb if skb_cow_head() returns an error. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index ed8a233..71c292e 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1045,8 +1045,10 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len headroom = NET_SKB_PAD + sizeof(struct iphdr) + uhlen + hdr_len; old_headroom = skb_headroom(skb); - if (skb_cow_head(skb, headroom)) + if (skb_cow_head(skb, headroom)) { + dev_kfree_skb(skb); goto abort; + } new_headroom = skb_headroom(skb); skb_orphan(skb); -- cgit v1.1 From 37c88f5fe7f287a945949e6f4570700c210ebe0f Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 29 Sep 2011 17:10:10 +0000 Subject: tcp: properly handle md5sig_pool references [ Upstream commit 260fcbeb1ae9e768a44c9925338fbacb0d7e5ba9 ] tcp_v4_clear_md5_list() assumes that multiple tcp md5sig peers only hold one reference to md5sig_pool. but tcp_v4_md5_do_add() increases use count of md5sig_pool for each peer. This patch makes tcp_v4_md5_do_add() only increases use count for the first tcp md5sig peer. Signed-off-by: Zheng Yan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_ipv4.c | 11 +++++++---- net/ipv6/tcp_ipv6.c | 8 +++++--- 2 files changed, 12 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b3e6956..69790aa 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -909,18 +909,21 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, } sk_nocaps_add(sk, NETIF_F_GSO_MASK); } - if (tcp_alloc_md5sig_pool(sk) == NULL) { + + md5sig = tp->md5sig_info; + if (md5sig->entries4 == 0 && + tcp_alloc_md5sig_pool(sk) == NULL) { kfree(newkey); return -ENOMEM; } - md5sig = tp->md5sig_info; if (md5sig->alloced4 == md5sig->entries4) { keys = kmalloc((sizeof(*keys) * (md5sig->entries4 + 1)), GFP_ATOMIC); if (!keys) { kfree(newkey); - tcp_free_md5sig_pool(); + if (md5sig->entries4 == 0) + tcp_free_md5sig_pool(); return -ENOMEM; } @@ -964,6 +967,7 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr) kfree(tp->md5sig_info->keys4); tp->md5sig_info->keys4 = NULL; tp->md5sig_info->alloced4 = 0; + tcp_free_md5sig_pool(); } else if (tp->md5sig_info->entries4 != i) { /* Need to do some manipulation */ memmove(&tp->md5sig_info->keys4[i], @@ -971,7 +975,6 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr) (tp->md5sig_info->entries4 - i) * sizeof(struct tcp4_md5sig_key)); } - tcp_free_md5sig_pool(); return 0; } } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 7c43e86..af78a16 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -605,7 +605,8 @@ static int tcp_v6_md5_do_add(struct sock *sk, const struct in6_addr *peer, } sk_nocaps_add(sk, NETIF_F_GSO_MASK); } - if (tcp_alloc_md5sig_pool(sk) == NULL) { + if (tp->md5sig_info->entries6 == 0 && + tcp_alloc_md5sig_pool(sk) == NULL) { kfree(newkey); return -ENOMEM; } @@ -614,8 +615,9 @@ static int tcp_v6_md5_do_add(struct sock *sk, const struct in6_addr *peer, (tp->md5sig_info->entries6 + 1)), GFP_ATOMIC); if (!keys) { - tcp_free_md5sig_pool(); kfree(newkey); + if (tp->md5sig_info->entries6 == 0) + tcp_free_md5sig_pool(); return -ENOMEM; } @@ -661,6 +663,7 @@ static int tcp_v6_md5_do_del(struct sock *sk, const struct in6_addr *peer) kfree(tp->md5sig_info->keys6); tp->md5sig_info->keys6 = NULL; tp->md5sig_info->alloced6 = 0; + tcp_free_md5sig_pool(); } else { /* shrink the database */ if (tp->md5sig_info->entries6 != i) @@ -669,7 +672,6 @@ static int tcp_v6_md5_do_del(struct sock *sk, const struct in6_addr *peer) (tp->md5sig_info->entries6 - i) * sizeof (tp->md5sig_info->keys6[0])); } - tcp_free_md5sig_pool(); return 0; } } -- cgit v1.1 From b00654416d4baccf2269ec888e55d6df069cd36f Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Sun, 2 Oct 2011 04:21:50 +0000 Subject: tcp: properly update lost_cnt_hint during shifting [ Upstream commit 1e5289e121372a3494402b1b131b41bfe1cf9b7f ] lost_skb_hint is used by tcp_mark_head_lost() to mark the first unhandled skb. lost_cnt_hint is the number of packets or sacked packets before the lost_skb_hint; When shifting a skb that is before the lost_skb_hint, if tcp_is_fack() is ture, the skb has already been counted in the lost_cnt_hint; if tcp_is_fack() is false, tcp_sacktag_one() will increase the lost_cnt_hint. So tcp_shifted_skb() does not need to adjust the lost_cnt_hint by itself. When shifting a skb that is equal to lost_skb_hint, the shifted packets will not be counted by tcp_mark_head_lost(). So tcp_shifted_skb() should adjust the lost_cnt_hint even tcp_is_fack(tp) is true. Signed-off-by: Zheng Yan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b6771f9..c68040f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1380,9 +1380,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, BUG_ON(!pcount); - /* Tweak before seqno plays */ - if (!tcp_is_fack(tp) && tcp_is_sack(tp) && tp->lost_skb_hint && - !before(TCP_SKB_CB(tp->lost_skb_hint)->seq, TCP_SKB_CB(skb)->seq)) + if (skb == tp->lost_skb_hint) tp->lost_cnt_hint += pcount; TCP_SKB_CB(prev)->end_seq += shifted; -- cgit v1.1 From 2146d4667b5fbdb0e186ca8bc6d9cbe7ac42dfdc Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Sun, 25 Sep 2011 02:21:30 +0000 Subject: ipv6: nullify ipv6_ac_list and ipv6_fl_list when creating new socket [ Upstream commit 676a1184e8afd4fed7948232df1ff91517400859 ] ipv6_ac_list and ipv6_fl_list from listening socket are inadvertently shared with new socket created for connection. Signed-off-by: Zheng Yan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/tcp_ipv6.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index af78a16..296510a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1409,6 +1409,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp->af_specific = &tcp_sock_ipv6_mapped_specific; #endif + newnp->ipv6_ac_list = NULL; + newnp->ipv6_fl_list = NULL; newnp->pktoptions = NULL; newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); @@ -1473,6 +1475,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, First: no IPv4 options. */ newinet->inet_opt = NULL; + newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; /* Clone RX bits */ -- cgit v1.1 From 62d8d0b9b6c459789876335156c56110042f2158 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 30 Sep 2011 10:38:28 +0000 Subject: make PACKET_STATISTICS getsockopt report consistently between ring and non-ring [ Upstream commit 7091fbd82cd5686444ffe9935ed6a8190101fe9d ] This is a minor change. Up until kernel 2.6.32, getsockopt(fd, SOL_PACKET, PACKET_STATISTICS, ...) would return total and dropped packets since its last invocation. The introduction of socket queue overflow reporting [1] changed drop rate calculation in the normal packet socket path, but not when using a packet ring. As a result, the getsockopt now returns different statistics depending on the reception method used. With a ring, it still returns the count since the last call, as counts are incremented in tpacket_rcv and reset in getsockopt. Without a ring, it returns 0 if no drops occurred since the last getsockopt and the total drops over the lifespan of the socket otherwise. The culprit is this line in packet_rcv, executed on a drop: drop_n_acct: po->stats.tp_drops = atomic_inc_return(&sk->sk_drops); As it shows, the new drop number it taken from the socket drop counter, which is not reset at getsockopt. I put together a small example that demonstrates the issue [2]. It runs for 10 seconds and overflows the queue/ring on every odd second. The reported drop rates are: ring: 16, 0, 16, 0, 16, ... non-ring: 0, 15, 0, 30, 0, 46, 0, 60, 0 , 74. Note how the even ring counts monotonically increase. Because the getsockopt adds tp_drops to tp_packets, total counts are similarly reported cumulatively. Long story short, reinstating the original code, as the below patch does, fixes the issue at the cost of additional per-packet cycles. Another solution that does not introduce per-packet overhead is be to keep the current data path, record the value of sk_drops at getsockopt() at call N in a new field in struct packetsock and subtract that when reporting at call N+1. I'll be happy to code that, instead, it's just more messy. [1] http://patchwork.ozlabs.org/patch/35665/ [2] http://kernel.googlecode.com/files/test-packetsock-getstatistics.c Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c0c3cda..fafb968 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -654,7 +654,10 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, return 0; drop_n_acct: - po->stats.tp_drops = atomic_inc_return(&sk->sk_drops); + spin_lock(&sk->sk_receive_queue.lock); + po->stats.tp_drops++; + atomic_inc(&sk->sk_drops); + spin_unlock(&sk->sk_receive_queue.lock); drop_n_restore: if (skb_head != skb->data && skb_shared(skb)) { -- cgit v1.1 From 8adc3d3df0562b8dc4008f458081dcc2d8b98863 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Thu, 29 Sep 2011 15:33:47 -0400 Subject: can bcm: fix incomplete tx_setup fix commit 12d0d0d3a7349daa95dbfd5d7df8146255bc7c67 upstream. The commit aabdcb0b553b9c9547b1a506b34d55a764745870 ("can bcm: fix tx_setup off-by-one errors") fixed only a part of the original problem reported by Andre Naujoks. It turned out that the original code needed to be re-ordered to reduce complexity and to finally fix the reported frame counting issues. Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/can/bcm.c | 48 +++++++++++++++++++++--------------------------- 1 file changed, 21 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/can/bcm.c b/net/can/bcm.c index 8838b86..c6cc66f 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -343,6 +343,18 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head, } } +static void bcm_tx_start_timer(struct bcm_op *op) +{ + if (op->kt_ival1.tv64 && op->count) + hrtimer_start(&op->timer, + ktime_add(ktime_get(), op->kt_ival1), + HRTIMER_MODE_ABS); + else if (op->kt_ival2.tv64) + hrtimer_start(&op->timer, + ktime_add(ktime_get(), op->kt_ival2), + HRTIMER_MODE_ABS); +} + static void bcm_tx_timeout_tsklet(unsigned long data) { struct bcm_op *op = (struct bcm_op *)data; @@ -364,23 +376,12 @@ static void bcm_tx_timeout_tsklet(unsigned long data) bcm_send_to_user(op, &msg_head, NULL, 0); } - - /* send (next) frame */ bcm_can_tx(op); - hrtimer_start(&op->timer, - ktime_add(ktime_get(), op->kt_ival1), - HRTIMER_MODE_ABS); - } else { - if (op->kt_ival2.tv64) { + } else if (op->kt_ival2.tv64) + bcm_can_tx(op); - /* send (next) frame */ - bcm_can_tx(op); - hrtimer_start(&op->timer, - ktime_add(ktime_get(), op->kt_ival2), - HRTIMER_MODE_ABS); - } - } + bcm_tx_start_timer(op); } /* @@ -960,28 +961,21 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, hrtimer_cancel(&op->timer); } - if ((op->flags & STARTTIMER) && - ((op->kt_ival1.tv64 && op->count) || op->kt_ival2.tv64)) { - + if (op->flags & STARTTIMER) { + hrtimer_cancel(&op->timer); /* spec: send can_frame when starting timer */ op->flags |= TX_ANNOUNCE; - - /* only start timer when having more frames than sent below */ - if (op->kt_ival1.tv64 && (op->count > 1)) { - /* op->count-- is done in bcm_tx_timeout_tsklet */ - hrtimer_start(&op->timer, op->kt_ival1, - HRTIMER_MODE_REL); - } else - hrtimer_start(&op->timer, op->kt_ival2, - HRTIMER_MODE_REL); } if (op->flags & TX_ANNOUNCE) { bcm_can_tx(op); - if (op->kt_ival1.tv64 && (op->count > 0)) + if (op->count) op->count--; } + if (op->flags & STARTTIMER) + bcm_tx_start_timer(op); + return msg_head->nframes * CFSIZ + MHSIZ; } -- cgit v1.1 From ce0f562ecf544f386b6ae95f490cd06f7da2deb4 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 3 Oct 2011 18:14:45 +0000 Subject: bridge: leave carrier on for empty bridge [ Upstream commit b64b73d7d0c480f75684519c6134e79d50c1b341 ] This resolves a regression seen by some users of bridging. Some users use the bridge like a dummy device. They expect to be able to put an IPv6 address on the device with no ports attached. Although there are better ways of doing this, there is no reason to not allow it. Note: the bridge still will reflect the state of ports in the bridge if there are any added. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_device.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 32b8f9f..ff3ed60 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -91,7 +91,6 @@ static int br_dev_open(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); - netif_carrier_off(dev); netdev_update_features(dev); netif_start_queue(dev); br_stp_enable_bridge(br); @@ -108,8 +107,6 @@ static int br_dev_stop(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); - netif_carrier_off(dev); - br_stp_disable_bridge(br); br_multicast_stop(br); -- cgit v1.1 From 5796ee30587cb5f887a7fe6182c2bbcc3d31f0ad Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Oct 2011 02:30:50 +0000 Subject: net: Unlock sock before calling sk_free() [ Upstream commit b0691c8ee7c28a72748ff32e91b165ec12ae4de6 ] Signed-off-by: Thomas Gleixner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/sock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 6e81978..aebb419 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1257,6 +1257,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) /* It is still raw copy of parent, so invalidate * destructor and make plain sk_free() */ newsk->sk_destruct = NULL; + bh_unlock_sock(newsk); sk_free(newsk); newsk = NULL; goto out; -- cgit v1.1 From 6fa9e3e3e01b8741eead6e00bb968ef3b4fddc3f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 25 Oct 2011 10:25:49 +1100 Subject: NFS/sunrpc: don't use a credential with extra groups. commit dc6f55e9f8dac4b6479be67c5c9128ad37bb491f upstream. The sunrpc layer keeps a cache of recently used credentials and 'unx_match' is used to find the credential which matches the current process. However unx_match allows a match when the cached credential has extra groups at the end of uc_gids list which are not in the process group list. So if a process with a list of (say) 4 group accesses a file and gains access because of the last group in the list, then another process with the same uid and gid, and a gid list being the first tree of the gids of the original process tries to access the file, it will be granted access even though it shouldn't as the wrong rpc credential will be used. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/auth_unix.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 4cb70dc..e50502d 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -129,6 +129,9 @@ unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags) for (i = 0; i < groups ; i++) if (cred->uc_gids[i] != GROUP_AT(acred->group_info, i)) return 0; + if (groups < NFS_NGROUPS && + cred->uc_gids[groups] != NOGROUP) + return 0; return 1; } -- cgit v1.1 From 632abf8b3f714da01daec2d43ffd9cd7b47f53a9 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Thu, 20 Oct 2011 19:05:49 +0200 Subject: mac80211: fix remain_off_channel regression commit eaa7af2ae582c9a8c51b374c48d5970b748a5ce2 upstream. The offchannel code is currently broken - we should remain_off_channel if the work was started, and the work's channel and channel_type are the same as local->tmp_channel and local->tmp_channel_type. However, if wk->chan_type and local->tmp_channel_type coexist (e.g. have the same channel type), we won't remain_off_channel. This behavior was introduced by commit da2fd1f ("mac80211: Allow work items to use existing channel type.") Tested-by: Ben Greear Signed-off-by: Eliad Peller Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- net/mac80211/work.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/work.c b/net/mac80211/work.c index dc8ec05..0199f97 100644 --- a/net/mac80211/work.c +++ b/net/mac80211/work.c @@ -1060,8 +1060,8 @@ static void ieee80211_work_work(struct work_struct *work) continue; if (wk->chan != local->tmp_channel) continue; - if (ieee80211_work_ct_coexists(wk->chan_type, - local->tmp_channel_type)) + if (!ieee80211_work_ct_coexists(wk->chan_type, + local->tmp_channel_type)) continue; remain_off_channel = true; } -- cgit v1.1 From 42c6d01ce89d43598fc804cf7c141d3252fe93b5 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Thu, 20 Oct 2011 19:05:50 +0200 Subject: mac80211: config hw when going back on-channel commit 6911bf0453e0d6ea8eb694a4ce67a68d071c538e upstream. When going back on-channel, we should reconfigure the hw iff the hardware is not already configured to the operational channel. Signed-off-by: Eliad Peller Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- net/mac80211/work.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/work.c b/net/mac80211/work.c index 0199f97..52b758d 100644 --- a/net/mac80211/work.c +++ b/net/mac80211/work.c @@ -1067,7 +1067,6 @@ static void ieee80211_work_work(struct work_struct *work) } if (!remain_off_channel && local->tmp_channel) { - bool on_oper_chan = ieee80211_cfg_on_oper_channel(local); local->tmp_channel = NULL; /* If tmp_channel wasn't operating channel, then * we need to go back on-channel. @@ -1077,7 +1076,7 @@ static void ieee80211_work_work(struct work_struct *work) * we still need to do a hardware config. Currently, * we cannot be here while scanning, however. */ - if (ieee80211_cfg_on_oper_channel(local) && !on_oper_chan) + if (!ieee80211_cfg_on_oper_channel(local)) ieee80211_hw_config(local, 0); /* At the least, we need to disable offchannel_ps, -- cgit v1.1 From 041f9e20b7f794fd7b4932e05d0c938a9ebbf658 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 28 Oct 2011 11:59:47 +0200 Subject: mac80211: disable powersave for broken APs commit 05cb91085760ca378f28fc274fbf77fc4fd9886c upstream. Only AID values 1-2007 are valid, but some APs have been found to send random bogus values, in the reported case an AP that was sending the AID field value 0xffff, an AID of 0x3fff (16383). There isn't much we can do but disable powersave since there's no way it can work properly in this case. Reported-by: Bill C Riemers Signed-off-by: Johannes Berg Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- net/mac80211/ieee80211_i.h | 1 + net/mac80211/mlme.c | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 0c5aed2..3fdac77 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -373,6 +373,7 @@ struct ieee80211_if_managed { unsigned long timers_running; /* used for quiesce/restart */ bool powersave; /* powersave requested for this iface */ + bool broken_ap; /* AP is broken -- turn off powersave */ enum ieee80211_smps_mode req_smps, /* requested smps mode */ ap_smps, /* smps mode AP thinks we're in */ driver_smps_mode; /* smps mode request */ diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 7a334fd..1563250 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -613,6 +613,9 @@ static bool ieee80211_powersave_allowed(struct ieee80211_sub_if_data *sdata) if (!mgd->powersave) return false; + if (mgd->broken_ap) + return false; + if (!mgd->associated) return false; @@ -1450,10 +1453,21 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk, capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info); if ((aid & (BIT(15) | BIT(14))) != (BIT(15) | BIT(14))) - printk(KERN_DEBUG "%s: invalid aid value %d; bits 15:14 not " - "set\n", sdata->name, aid); + printk(KERN_DEBUG + "%s: invalid AID value 0x%x; bits 15:14 not set\n", + sdata->name, aid); aid &= ~(BIT(15) | BIT(14)); + ifmgd->broken_ap = false; + + if (aid == 0 || aid > IEEE80211_MAX_AID) { + printk(KERN_DEBUG + "%s: invalid AID value %d (out of range), turn off PS\n", + sdata->name, aid); + aid = 0; + ifmgd->broken_ap = true; + } + pos = mgmt->u.assoc_resp.variable; ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); -- cgit v1.1 From 3fa57c1bf5fb311544199b7837a08b9f5bf5e6e4 Mon Sep 17 00:00:00 2001 From: dpward Date: Mon, 5 Sep 2011 16:47:24 +0000 Subject: net: Handle different key sizes between address families in flow cache commit aa1c366e4febc7f5c2b84958a2dd7cd70e28f9d0 upstream. With the conversion of struct flowi to a union of AF-specific structs, some operations on the flow cache need to account for the exact size of the key. Signed-off-by: David Ward Signed-off-by: David S. Miller Cc: Kim Phillips Signed-off-by: Greg Kroah-Hartman --- net/core/flow.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/core/flow.c b/net/core/flow.c index 990703b..a6bda2a 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -172,29 +172,26 @@ static void flow_new_hash_rnd(struct flow_cache *fc, static u32 flow_hash_code(struct flow_cache *fc, struct flow_cache_percpu *fcp, - const struct flowi *key) + const struct flowi *key, + size_t keysize) { const u32 *k = (const u32 *) key; + const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32); - return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd) + return jhash2(k, length, fcp->hash_rnd) & (flow_cache_hash_size(fc) - 1); } -typedef unsigned long flow_compare_t; - /* I hear what you're saying, use memcmp. But memcmp cannot make - * important assumptions that we can here, such as alignment and - * constant size. + * important assumptions that we can here, such as alignment. */ -static int flow_key_compare(const struct flowi *key1, const struct flowi *key2) +static int flow_key_compare(const struct flowi *key1, const struct flowi *key2, + size_t keysize) { const flow_compare_t *k1, *k1_lim, *k2; - const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t); - - BUILD_BUG_ON(sizeof(struct flowi) % sizeof(flow_compare_t)); k1 = (const flow_compare_t *) key1; - k1_lim = k1 + n_elem; + k1_lim = k1 + keysize; k2 = (const flow_compare_t *) key2; @@ -215,6 +212,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, struct flow_cache_entry *fle, *tfle; struct hlist_node *entry; struct flow_cache_object *flo; + size_t keysize; unsigned int hash; local_bh_disable(); @@ -222,6 +220,11 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, fle = NULL; flo = NULL; + + keysize = flow_key_size(family); + if (!keysize) + goto nocache; + /* Packet really early in init? Making flow_cache_init a * pre-smp initcall would solve this. --RR */ if (!fcp->hash_table) @@ -230,11 +233,11 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, if (fcp->hash_rnd_recalc) flow_new_hash_rnd(fc, fcp); - hash = flow_hash_code(fc, fcp, key); + hash = flow_hash_code(fc, fcp, key, keysize); hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) { if (tfle->family == family && tfle->dir == dir && - flow_key_compare(key, &tfle->key) == 0) { + flow_key_compare(key, &tfle->key, keysize) == 0) { fle = tfle; break; } @@ -248,7 +251,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, if (fle) { fle->family = family; fle->dir = dir; - memcpy(&fle->key, key, sizeof(*key)); + memcpy(&fle->key, key, keysize * sizeof(flow_compare_t)); fle->object = NULL; hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]); fcp->hash_count++; -- cgit v1.1 From 492d7eff2def54c6e5521ee82764ca22025e7030 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 8 Nov 2011 12:28:33 +0100 Subject: mac80211: fix NULL dereference in radiotap code commit f8d1ccf15568268c76f913b45ecdd33134387f1a upstream. When receiving failed PLCP frames is enabled, there won't be a rate pointer when we add the radiotap header and thus the kernel will crash. Fix this by not assuming the rate pointer is always valid. It's still always valid for frames that have good PLCP though, and that is checked & enforced. This was broken by my commit fc88518916793af8ad6a02e05ff254d95c36d875 Author: Johannes Berg Date: Fri Jul 30 13:23:12 2010 +0200 mac80211: don't check rates on PLCP error frames where I removed the check in this case but didn't take into account that the rate info would be used. Reported-by: Xiaokang Qin Signed-off-by: Johannes Berg Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- net/mac80211/rx.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 7fa8c6b..378bd67 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -140,8 +140,9 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, pos++; /* IEEE80211_RADIOTAP_RATE */ - if (status->flag & RX_FLAG_HT) { + if (!rate || status->flag & RX_FLAG_HT) { /* + * Without rate information don't add it. If we have, * MCS information is a separate field in radiotap, * added below. The byte here is needed as padding * for the channel though, so initialise it to 0. @@ -162,12 +163,14 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, else if (status->flag & RX_FLAG_HT) put_unaligned_le16(IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ, pos); - else if (rate->flags & IEEE80211_RATE_ERP_G) + else if (rate && rate->flags & IEEE80211_RATE_ERP_G) put_unaligned_le16(IEEE80211_CHAN_OFDM | IEEE80211_CHAN_2GHZ, pos); - else + else if (rate) put_unaligned_le16(IEEE80211_CHAN_CCK | IEEE80211_CHAN_2GHZ, pos); + else + put_unaligned_le16(IEEE80211_CHAN_2GHZ, pos); pos += 2; /* IEEE80211_RADIOTAP_DBM_ANTSIGNAL */ -- cgit v1.1 From ae1e9df381d5197015356e22de9d7acaf646a9ab Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 8 Nov 2011 13:04:41 +0100 Subject: mac80211: fix bug in ieee80211_build_probe_req commit 5b2bbf75a24d6b06afff6de0eb4819413fd81971 upstream. ieee80211_probereq_get() can return NULL in which case we should clean up & return NULL in ieee80211_build_probe_req() as well. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- net/mac80211/util.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/mac80211/util.c b/net/mac80211/util.c index d3fe2d2..2124db8 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1047,6 +1047,8 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, skb = ieee80211_probereq_get(&local->hw, &sdata->vif, ssid, ssid_len, buf, buf_len); + if (!skb) + goto out; if (dst) { mgmt = (struct ieee80211_mgmt *) skb->data; @@ -1055,6 +1057,8 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, } IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; + + out: kfree(buf); return skb; -- cgit v1.1 From c1ce1705eb0778927a0b81a6eb29e8ff193aa3de Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 3 Nov 2011 09:27:01 +0100 Subject: nl80211: fix HT capability attribute validation commit 6c7394197af90f6a332180e33f5d025d3037d883 upstream. Since the NL80211_ATTR_HT_CAPABILITY attribute is used as a struct, it needs a minimum, not maximum length. Enforce that properly. Not doing so could potentially lead to reading after the buffer. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- net/wireless/nl80211.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 1ac9443..3dac76f 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -126,8 +126,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_MESH_CONFIG] = { .type = NLA_NESTED }, [NL80211_ATTR_SUPPORT_MESH_AUTH] = { .type = NLA_FLAG }, - [NL80211_ATTR_HT_CAPABILITY] = { .type = NLA_BINARY, - .len = NL80211_HT_CAPABILITY_LEN }, + [NL80211_ATTR_HT_CAPABILITY] = { .len = NL80211_HT_CAPABILITY_LEN }, [NL80211_ATTR_MGMT_SUBTYPE] = { .type = NLA_U8 }, [NL80211_ATTR_IE] = { .type = NLA_BINARY, -- cgit v1.1 From 5c02e3ae0a16cfc2fb86549cca9898ead2b0d666 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 8 Nov 2011 14:28:06 -0800 Subject: cfg80211: fix bug on regulatory core exit on access to last_request commit 58ebacc66bd11be2327edcefc79de94bd6f5bb4a upstream. Commit 4d9d88d1 by Scott James Remnant added the .uevent() callback for the regulatory device used during the platform device registration. The change was done to account for queuing up udev change requests through udevadm triggers. The change also meant that upon regulatory core exit we will now send a uevent() but the uevent() callback, reg_device_uevent(), also accessed last_request. Right before commiting device suicide we free'd last_request but never set it to NULL so platform_device_unregister() would lead to bogus kernel paging request. Fix this and also simply supress uevents right before we commit suicide as they are pointless. This fix is required for kernels >= v2.6.39 $ git describe --contains 4d9d88d1 v2.6.39-rc1~468^2~25^2^2~21 The impact of not having this present is that a bogus paging access may occur (only read) upon cfg80211 unload time. You may also get this BUG complaint below. Although Johannes could not reproduce the issue this fix is theoretically correct. mac80211_hwsim: unregister radios mac80211_hwsim: closing netlink BUG: unable to handle kernel paging request at ffff88001a06b5ab IP: [] reg_device_uevent+0x1a/0x50 [cfg80211] PGD 1836063 PUD 183a063 PMD 1ffcb067 PTE 1a06b160 Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC CPU 0 Modules linked in: cfg80211(-) [last unloaded: mac80211] Pid: 2279, comm: rmmod Tainted: G W 3.1.0-wl+ #663 Bochs Bochs RIP: 0010:[] [] reg_device_uevent+0x1a/0x50 [cfg80211] RSP: 0000:ffff88001c5f9d58 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff88001d2eda88 RCX: ffff88001c7468fc RDX: ffff88001a06b5a0 RSI: ffff88001c7467b0 RDI: ffff88001c7467b0 RBP: ffff88001c5f9d58 R08: 000000000000ffff R09: 000000000000ffff R10: 0000000000000000 R11: 0000000000000001 R12: ffff88001c7467b0 R13: ffff88001d2eda78 R14: ffffffff8164a840 R15: 0000000000000001 FS: 00007f8a91d8a6e0(0000) GS:ffff88001fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: ffff88001a06b5ab CR3: 000000001c62e000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process rmmod (pid: 2279, threadinfo ffff88001c5f8000, task ffff88000023c780) Stack: ffff88001c5f9d98 ffffffff812ff7e5 ffffffff8176ab3d ffff88001c7468c2 000000000000ffff ffff88001d2eda88 ffff88001c7467b0 ffff880000114820 ffff88001c5f9e38 ffffffff81241dc7 ffff88001c5f9db8 ffffffff81040189 Call Trace: [] dev_uevent+0xc5/0x170 [] kobject_uevent_env+0x1f7/0x490 [] ? sub_preempt_count+0x29/0x60 [] ? _raw_spin_unlock_irqrestore+0x4a/0x90 [] ? devres_release_all+0x27/0x60 [] kobject_uevent+0xb/0x10 [] device_del+0x157/0x1b0 [] platform_device_del+0x1d/0x90 [] platform_device_unregister+0x16/0x30 [] regulatory_exit+0x5d/0x180 [cfg80211] [] cfg80211_exit+0x2b/0x45 [cfg80211] [] sys_delete_module+0x16c/0x220 [] ? trace_hardirqs_on_caller+0x7e/0x120 [] system_call_fastpath+0x16/0x1b Code: RIP [] reg_device_uevent+0x1a/0x50 [cfg80211] RSP CR2: ffff88001a06b5ab ---[ end trace 147c5099a411e8c0 ]--- Reported-by: Johannes Berg Cc: Scott James Remnant Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- net/wireless/reg.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 379574c..213103e 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2254,6 +2254,9 @@ void /* __init_or_exit */ regulatory_exit(void) kfree(last_request); + last_request = NULL; + dev_set_uevent_suppress(®_pdev->dev, true); + platform_device_unregister(reg_pdev); spin_lock_bh(®_pending_beacons_lock); -- cgit v1.1 From d11d8cff78f91d5b956975b5c9e4c271e15f864e Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Thu, 10 Nov 2011 15:10:23 +0000 Subject: ip6_tunnel: copy parms.name after register_netdevice commit 731abb9cb27aef6013ce60808a04e04a545f3f4e upstream. Commit 1c5cae815d removed an explicit call to dev_alloc_name in ip6_tnl_create because register_netdevice will now create a valid name. This works for the net_device itself. However the tunnel keeps a copy of the name in the parms structure for the ip6_tnl associated with the tunnel. parms.name is set by copying the net_device name in ip6_tnl_dev_init_gen. That function is called from ip6_tnl_dev_init in ip6_tnl_create, but it is done before register_netdevice is called so the name is set to a bogus value in the parms.name structure. This shows up if you do a simple tunnel add, followed by a tunnel show: [root@localhost ~]# ip -6 tunnel add remote fec0::100 local fec0::200 [root@localhost ~]# ip -6 tunnel show ip6tnl0: ipv6/ipv6 remote :: local :: encaplimit 0 hoplimit 0 tclass 0x00 flowlabel 0x00000 (flowinfo 0x00000000) ip6tnl%d: ipv6/ipv6 remote fec0::100 local fec0::200 encaplimit 4 hoplimit 64 tclass 0x00 flowlabel 0x00000 (flowinfo 0x00000000) [root@localhost ~]# Fix this by moving the strcpy out of ip6_tnl_dev_init_gen, and calling it after register_netdevice has successfully returned. Signed-off-by: Josh Boyer Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_tunnel.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 36c2842..848e494 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -289,6 +289,8 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p) if ((err = register_netdevice(dev)) < 0) goto failed_free; + strcpy(t->parms.name, dev->name); + dev_hold(dev); ip6_tnl_link(ip6n, t); return t; @@ -1397,7 +1399,6 @@ ip6_tnl_dev_init_gen(struct net_device *dev) struct ip6_tnl *t = netdev_priv(dev); t->dev = dev; - strcpy(t->parms.name, dev->name); dev->tstats = alloc_percpu(struct pcpu_tstats); if (!dev->tstats) return -ENOMEM; @@ -1477,6 +1478,7 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n) static int __net_init ip6_tnl_init_net(struct net *net) { struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + struct ip6_tnl *t = NULL; int err; ip6n->tnls[0] = ip6n->tnls_wc; @@ -1497,6 +1499,10 @@ static int __net_init ip6_tnl_init_net(struct net *net) err = register_netdev(ip6n->fb_tnl_dev); if (err < 0) goto err_register; + + t = netdev_priv(ip6n->fb_tnl_dev); + + strcpy(t->parms.name, ip6n->fb_tnl_dev->name); return 0; err_register: -- cgit v1.1 From 421f55945fb8515c6f322d20e9b0a2d390235e05 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 Nov 2011 14:44:28 +0200 Subject: SUNRPC: Ensure we return EAGAIN in xs_nospace if congestion is cleared commit 24ca9a847791fd53d9b217330b15f3c285827a18 upstream. By returning '0' instead of 'EAGAIN' when the tests in xs_nospace() fail to find evidence of socket congestion, we are making the RPC engine believe that the message was incorrectly sent and so it disconnects the socket instead of just retrying. The bug appears to have been introduced by commit 5e3771ce2d6a69e10fcc870cdf226d121d868491 (SUNRPC: Ensure that xs_nospace return values are propagated). Reported-by: Andrew Cooper Signed-off-by: Trond Myklebust Tested-by: Andrew Cooper Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtsock.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 72abb73..ea75079 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -485,7 +485,7 @@ static int xs_nospace(struct rpc_task *task) struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - int ret = 0; + int ret = -EAGAIN; dprintk("RPC: %5u xmit incomplete (%u left of %u)\n", task->tk_pid, req->rq_slen - req->rq_bytes_sent, @@ -497,7 +497,6 @@ static int xs_nospace(struct rpc_task *task) /* Don't race with disconnect */ if (xprt_connected(xprt)) { if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) { - ret = -EAGAIN; /* * Notify TCP that we're limited by the application * window size -- cgit v1.1 From e30922bd0c3d24c27f457a64997ed4a47161621e Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Thu, 24 Nov 2011 18:13:56 +0200 Subject: nl80211: fix MAC address validation commit e007b857e88097c96c45620bf3b04a4e309053d1 upstream. MAC addresses have a fixed length. The current policy allows passing < ETH_ALEN bytes, which might result in reading beyond the buffer. Signed-off-by: Eliad Peller Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- net/wireless/nl80211.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 3dac76f..0c2b808 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -83,8 +83,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_IFINDEX] = { .type = NLA_U32 }, [NL80211_ATTR_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ-1 }, - [NL80211_ATTR_MAC] = { .type = NLA_BINARY, .len = ETH_ALEN }, - [NL80211_ATTR_PREV_BSSID] = { .type = NLA_BINARY, .len = ETH_ALEN }, + [NL80211_ATTR_MAC] = { .len = ETH_ALEN }, + [NL80211_ATTR_PREV_BSSID] = { .len = ETH_ALEN }, [NL80211_ATTR_KEY] = { .type = NLA_NESTED, }, [NL80211_ATTR_KEY_DATA] = { .type = NLA_BINARY, -- cgit v1.1 From 6cb4e0db2f318c0730f31622fc5812a19e7ff379 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 21 Nov 2011 10:44:00 +0100 Subject: cfg80211: fix regulatory NULL dereference commit de3584bd62d87b4c250129fbc46ca52c80330add upstream. By the time userspace returns with a response to the regulatory domain request, the wiphy causing the request might have gone away. If this is so, reject the update but mark the request as having been processed anyway. Cc: Luis R. Rodriguez Signed-off-by: Johannes Berg Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- net/wireless/reg.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 213103e..3c5ddeaf 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2026,6 +2026,10 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) } request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); + if (!request_wiphy) { + reg_set_request_processed(); + return -ENODEV; + } if (!last_request->intersect) { int r; -- cgit v1.1 From 64a1b241dd84ed6b12b62bbb67b380609bdd50b2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 24 Nov 2011 20:06:14 +0100 Subject: mac80211: don't stop a single aggregation session twice commit 24f50a9d165745fd0701c6e089d35f58a229ea69 upstream. Nikolay noticed (by code review) that mac80211 can attempt to stop an aggregation session while it is already being stopped. So to fix it, check whether stop is already being done and bail out if so. Also move setting the STOPPING state into the lock so things are properly atomic. Reported-by: Nikolay Martynov Signed-off-by: Johannes Berg Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- net/mac80211/agg-tx.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index c8be8ef..2341875 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -162,6 +162,12 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, return -ENOENT; } + /* if we're already stopping ignore any new requests to stop */ + if (test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { + spin_unlock_bh(&sta->lock); + return -EALREADY; + } + if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state)) { /* not even started yet! */ ieee80211_assign_tid_tx(sta, tid, NULL); @@ -170,6 +176,8 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, return 0; } + set_bit(HT_AGG_STATE_STOPPING, &tid_tx->state); + spin_unlock_bh(&sta->lock); #ifdef CONFIG_MAC80211_HT_DEBUG @@ -177,8 +185,6 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, sta->sta.addr, tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ - set_bit(HT_AGG_STATE_STOPPING, &tid_tx->state); - del_timer_sync(&tid_tx->addba_resp_timer); /* -- cgit v1.1 From 20f8d725863ca926a199cab1bc5cf31f8bf53cb0 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 27 Nov 2011 15:29:44 +0200 Subject: mac80211: fix race between the AGG SM and the Tx data path commit 2a1e0fd175dcfd72096ba9291d31e3b1b5342e60 upstream. When a packet is supposed to sent be as an a-MPDU, mac80211 sets IEEE80211_TX_CTL_AMPDU to let the driver know. On the other hand, mac80211 configures the driver for aggregration with the ampdu_action callback. There is race between these two mechanisms since the following scenario can occur when the BA agreement is torn down: Tx softIRQ drv configuration ========== ================= check OPERATIONAL bit Set the TX_CTL_AMPDU bit in the packet clear OPERATIONAL bit stop Tx AGG Pass Tx packet to the driver. In that case the driver would get a packet with TX_CTL_AMPDU set although it has already been notified that the BA session has been torn down. To fix this, we need to synchronize all the Qdisc activity after we cleared the OPERATIONAL bit. After that step, all the following packets will be buffered until the driver reports it is ready to get new packets for this RA / TID. This buffering allows not to run into another race that would send packets with TX_CTL_AMPDU unset while the driver hasn't been requested to tear down the BA session yet. This race occurs in practice and iwlwifi complains with a WARN_ON when it happens. Signed-off-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- net/mac80211/agg-tx.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'net') diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 2341875..42a59c2 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -194,6 +194,20 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, */ clear_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state); + /* + * There might be a few packets being processed right now (on + * another CPU) that have already gotten past the aggregation + * check when it was still OPERATIONAL and consequently have + * IEEE80211_TX_CTL_AMPDU set. In that case, this code might + * call into the driver at the same time or even before the + * TX paths calls into it, which could confuse the driver. + * + * Wait for all currently running TX paths to finish before + * telling the driver. New packets will not go through since + * the aggregation session is no longer OPERATIONAL. + */ + synchronize_net(); + tid_tx->stop_initiator = initiator; tid_tx->tx_stop = tx; -- cgit v1.1 From 3ed26be17352133a2dadbc4212a5d23b403b0980 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 28 Nov 2011 16:47:15 -0500 Subject: cfg80211: fix race on init and driver registration commit a042994dd377d86bff9446ee76151ceb6267c9ba upstream. There is a theoretical race that if hit will trigger a crash. The race is between when we issue the first regulatory hint, regulatory_hint_core(), gets processed by the workqueue and between when the first device gets registered to the wireless core. This is not easy to reproduce but it was easy to do so through the regulatory simulator I have been working on. This is a port of the fix I implemented there [1]. [1] https://github.com/mcgrof/regsim/commit/a246ccf81f059cb662eee288aa13100f631e4cc8 Cc: Johannes Berg Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- net/wireless/reg.c | 43 +++++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 3c5ddeaf..0625ada 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -57,8 +57,17 @@ #define REG_DBG_PRINT(args...) #endif +static struct regulatory_request core_request_world = { + .initiator = NL80211_REGDOM_SET_BY_CORE, + .alpha2[0] = '0', + .alpha2[1] = '0', + .intersect = false, + .processed = true, + .country_ie_env = ENVIRON_ANY, +}; + /* Receipt of information from last regulatory request */ -static struct regulatory_request *last_request; +static struct regulatory_request *last_request = &core_request_world; /* To trigger userspace events */ static struct platform_device *reg_pdev; @@ -150,7 +159,7 @@ static char user_alpha2[2]; module_param(ieee80211_regdom, charp, 0444); MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code"); -static void reset_regdomains(void) +static void reset_regdomains(bool full_reset) { /* avoid freeing static information or freeing something twice */ if (cfg80211_regdomain == cfg80211_world_regdom) @@ -165,6 +174,13 @@ static void reset_regdomains(void) cfg80211_world_regdom = &world_regdom; cfg80211_regdomain = NULL; + + if (!full_reset) + return; + + if (last_request != &core_request_world) + kfree(last_request); + last_request = &core_request_world; } /* @@ -175,7 +191,7 @@ static void update_world_regdomain(const struct ieee80211_regdomain *rd) { BUG_ON(!last_request); - reset_regdomains(); + reset_regdomains(false); cfg80211_world_regdom = rd; cfg80211_regdomain = rd; @@ -1396,7 +1412,8 @@ static int __regulatory_hint(struct wiphy *wiphy, } new_request: - kfree(last_request); + if (last_request != &core_request_world) + kfree(last_request); last_request = pending_request; last_request->intersect = intersect; @@ -1566,9 +1583,6 @@ static int regulatory_hint_core(const char *alpha2) { struct regulatory_request *request; - kfree(last_request); - last_request = NULL; - request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); if (!request) @@ -1766,7 +1780,7 @@ static void restore_regulatory_settings(bool reset_user) mutex_lock(&cfg80211_mutex); mutex_lock(®_mutex); - reset_regdomains(); + reset_regdomains(true); restore_alpha2(alpha2, reset_user); /* @@ -2035,7 +2049,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) int r; if (last_request->initiator != NL80211_REGDOM_SET_BY_DRIVER) { - reset_regdomains(); + reset_regdomains(false); cfg80211_regdomain = rd; return 0; } @@ -2056,7 +2070,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) if (r) return r; - reset_regdomains(); + reset_regdomains(false); cfg80211_regdomain = rd; return 0; } @@ -2081,7 +2095,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) rd = NULL; - reset_regdomains(); + reset_regdomains(false); cfg80211_regdomain = intersected_rd; return 0; @@ -2101,7 +2115,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) kfree(rd); rd = NULL; - reset_regdomains(); + reset_regdomains(false); cfg80211_regdomain = intersected_rd; return 0; @@ -2254,11 +2268,8 @@ void /* __init_or_exit */ regulatory_exit(void) mutex_lock(&cfg80211_mutex); mutex_lock(®_mutex); - reset_regdomains(); - - kfree(last_request); + reset_regdomains(true); - last_request = NULL; dev_set_uevent_suppress(®_pdev->dev, true); platform_device_unregister(reg_pdev); -- cgit v1.1 From 7e06614ab127e8c9de4af2e8f9d66953d3e68297 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 28 Nov 2011 16:47:16 -0500 Subject: cfg80211: amend regulatory NULL dereference fix commit 0bac71af6e66dc798bf07d0c0dd14ee5503362f9 upstream. Johannes' patch for "cfg80211: fix regulatory NULL dereference" broke user regulaotry hints and it did not address the fact that last_request was left populated even if the previous regulatory hint was stale due to the wiphy disappearing. Fix user reguluatory hints by only bailing out if for those regulatory hints where a request_wiphy is expected. The stale last_request considerations are addressed through the previous fixes on last_request where we reset the last_request to a static world regdom request upon reset_regdomains(). In this case though we further enhance the effect by simply restoring reguluatory settings completely. Cc: Johannes Berg Signed-off-by: Luis R. Rodriguez Reviewed-by: Johannes Berg Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- net/wireless/reg.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 0625ada..ca76d8b 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2040,8 +2040,10 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) } request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); - if (!request_wiphy) { - reg_set_request_processed(); + if (!request_wiphy && + (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER || + last_request->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE)) { + schedule_delayed_work(®_timeout, 0); return -ENODEV; } -- cgit v1.1 From 5009514a0967db0060656c70de812b8643417da7 Mon Sep 17 00:00:00 2001 From: Nikolay Martynov Date: Sat, 10 Dec 2011 17:31:23 +0100 Subject: mac80211: fix race condition caused by late addBA response Upstream commit d305a6557b2c4dca0110f05ffe745b1ef94adb80. If addBA responses comes in just after addba_resp_timer has expired mac80211 will still accept it and try to open the aggregation session. This causes drivers to be confused and in some cases even crash. This patch fixes the race condition and makes sure that if addba_resp_timer has expired addBA response is not longer accepted and we do not try to open half-closed session. Signed-off-by: Nikolay Martynov [some adjustments] Signed-off-by: Johannes Berg Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- net/mac80211/agg-tx.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 42a59c2..db7db43 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -792,12 +792,27 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, goto out; } - del_timer(&tid_tx->addba_resp_timer); + del_timer_sync(&tid_tx->addba_resp_timer); #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "switched off addBA timer for tid %d\n", tid); #endif + /* + * addba_resp_timer may have fired before we got here, and + * caused WANT_STOP to be set. If the stop then was already + * processed further, STOPPING might be set. + */ + if (test_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state) || + test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG + "got addBA resp for tid %d but we already gave up\n", + tid); +#endif + goto out; + } + if (le16_to_cpu(mgmt->u.action.u.addba_resp.status) == WLAN_STATUS_SUCCESS) { /* -- cgit v1.1 From 919130981e72465867038ac1dec823dbd7a87eb5 Mon Sep 17 00:00:00 2001 From: Ted Feng Date: Thu, 8 Dec 2011 00:46:21 +0000 Subject: ipip, sit: copy parms.name after register_netdevice commit 72b36015ba43a3cca5303f5534d2c3e1899eae29 upstream. Same fix as 731abb9cb2 for ipip and sit tunnel. Commit 1c5cae815d removed an explicit call to dev_alloc_name in ipip_tunnel_locate and ipip6_tunnel_locate, because register_netdevice will now create a valid name, however the tunnel keeps a copy of the name in the private parms structure. Fix this by copying the name back after register_netdevice has successfully returned. This shows up if you do a simple tunnel add, followed by a tunnel show: $ sudo ip tunnel add mode ipip remote 10.2.20.211 $ ip tunnel tunl0: ip/ip remote any local any ttl inherit nopmtudisc tunl%d: ip/ip remote 10.2.20.211 local any ttl inherit $ sudo ip tunnel add mode sit remote 10.2.20.212 $ ip tunnel sit0: ipv6/ip remote any local any ttl 64 nopmtudisc 6rd-prefix 2002::/16 sit%d: ioctl 89f8 failed: No such device sit%d: ipv6/ip remote 10.2.20.212 local any ttl inherit Signed-off-by: Ted Feng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ipip.c | 7 ++++++- net/ipv6/sit.c | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 378b20b..6f06f7f 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -285,6 +285,8 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net, if (register_netdevice(dev) < 0) goto failed_free; + strcpy(nt->parms.name, dev->name); + dev_hold(dev); ipip_tunnel_link(ipn, nt); return nt; @@ -759,7 +761,6 @@ static int ipip_tunnel_init(struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); @@ -825,6 +826,7 @@ static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head) static int __net_init ipip_init_net(struct net *net) { struct ipip_net *ipn = net_generic(net, ipip_net_id); + struct ip_tunnel *t; int err; ipn->tunnels[0] = ipn->tunnels_wc; @@ -848,6 +850,9 @@ static int __net_init ipip_init_net(struct net *net) if ((err = register_netdev(ipn->fb_tunnel_dev))) goto err_reg_dev; + t = netdev_priv(ipn->fb_tunnel_dev); + + strcpy(t->parms.name, ipn->fb_tunnel_dev->name); return 0; err_reg_dev: diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1cca576..38490d5 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -263,6 +263,8 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, if (register_netdevice(dev) < 0) goto failed_free; + strcpy(nt->parms.name, dev->name); + dev_hold(dev); ipip6_tunnel_link(sitn, nt); @@ -1141,7 +1143,6 @@ static int ipip6_tunnel_init(struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); @@ -1204,6 +1205,7 @@ static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_hea static int __net_init sit_init_net(struct net *net) { struct sit_net *sitn = net_generic(net, sit_net_id); + struct ip_tunnel *t; int err; sitn->tunnels[0] = sitn->tunnels_wc; @@ -1228,6 +1230,9 @@ static int __net_init sit_init_net(struct net *net) if ((err = register_netdev(sitn->fb_tunnel_dev))) goto err_reg_dev; + t = netdev_priv(sitn->fb_tunnel_dev); + + strcpy(t->parms.name, sitn->fb_tunnel_dev->name); return 0; err_reg_dev: -- cgit v1.1 From afa2450ce311b3182c737c3fda59bb557da93409 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 7 Dec 2011 09:02:21 +0100 Subject: mac80211: fix another race in aggregation start commit 15062e6a8524f5977f2cbdf6e3eb2f144262f74e upstream. Emmanuel noticed that when mac80211 stops the queues for aggregation that can leave a packet pending. This packet will be given to the driver after the AMPDU callback, but as a non-aggregated packet which messes up the sequence number etc. I also noticed by looking at the code that if packets are being processed while we clear the WANT_START bit, they might see it cleared already and queue up on tid_tx->pending. If the driver then rejects the new aggregation session we leak the packet. Fix both of these issues by changing this code to not stop the queues at all. Instead, let packets queue up on the tid_tx->pending queue instead of letting them get to the driver, and add code to recover properly in case the driver rejects the session. (The patch looks large because it has to move two functions to before their new use.) Reported-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- net/mac80211/agg-tx.c | 86 ++++++++++++++++++++++++--------------------------- 1 file changed, 41 insertions(+), 45 deletions(-) (limited to 'net') diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index db7db43..b7f4f5c 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -304,6 +304,38 @@ ieee80211_wake_queue_agg(struct ieee80211_local *local, int tid) __release(agg_queue); } +/* + * splice packets from the STA's pending to the local pending, + * requires a call to ieee80211_agg_splice_finish later + */ +static void __acquires(agg_queue) +ieee80211_agg_splice_packets(struct ieee80211_local *local, + struct tid_ampdu_tx *tid_tx, u16 tid) +{ + int queue = ieee80211_ac_from_tid(tid); + unsigned long flags; + + ieee80211_stop_queue_agg(local, tid); + + if (WARN(!tid_tx, "TID %d gone but expected when splicing aggregates" + " from the pending queue\n", tid)) + return; + + if (!skb_queue_empty(&tid_tx->pending)) { + spin_lock_irqsave(&local->queue_stop_reason_lock, flags); + /* copy over remaining packets */ + skb_queue_splice_tail_init(&tid_tx->pending, + &local->pending[queue]); + spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); + } +} + +static void __releases(agg_queue) +ieee80211_agg_splice_finish(struct ieee80211_local *local, u16 tid) +{ + ieee80211_wake_queue_agg(local, tid); +} + void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) { struct tid_ampdu_tx *tid_tx; @@ -315,19 +347,17 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) tid_tx = rcu_dereference_protected_tid_tx(sta, tid); /* - * While we're asking the driver about the aggregation, - * stop the AC queue so that we don't have to worry - * about frames that came in while we were doing that, - * which would require us to put them to the AC pending - * afterwards which just makes the code more complex. + * Start queuing up packets for this aggregation session. + * We're going to release them once the driver is OK with + * that. */ - ieee80211_stop_queue_agg(local, tid); - clear_bit(HT_AGG_STATE_WANT_START, &tid_tx->state); /* - * make sure no packets are being processed to get - * valid starting sequence number + * Make sure no packets are being processed. This ensures that + * we have a valid starting sequence number and that in-flight + * packets have been flushed out and no packets for this TID + * will go into the driver during the ampdu_action call. */ synchronize_net(); @@ -341,17 +371,15 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) " tid %d\n", tid); #endif spin_lock_bh(&sta->lock); + ieee80211_agg_splice_packets(local, tid_tx, tid); ieee80211_assign_tid_tx(sta, tid, NULL); + ieee80211_agg_splice_finish(local, tid); spin_unlock_bh(&sta->lock); - ieee80211_wake_queue_agg(local, tid); kfree_rcu(tid_tx, rcu_head); return; } - /* we can take packets again now */ - ieee80211_wake_queue_agg(local, tid); - /* activate the timer for the recipient's addBA response */ mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL); #ifdef CONFIG_MAC80211_HT_DEBUG @@ -471,38 +499,6 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, } EXPORT_SYMBOL(ieee80211_start_tx_ba_session); -/* - * splice packets from the STA's pending to the local pending, - * requires a call to ieee80211_agg_splice_finish later - */ -static void __acquires(agg_queue) -ieee80211_agg_splice_packets(struct ieee80211_local *local, - struct tid_ampdu_tx *tid_tx, u16 tid) -{ - int queue = ieee80211_ac_from_tid(tid); - unsigned long flags; - - ieee80211_stop_queue_agg(local, tid); - - if (WARN(!tid_tx, "TID %d gone but expected when splicing aggregates" - " from the pending queue\n", tid)) - return; - - if (!skb_queue_empty(&tid_tx->pending)) { - spin_lock_irqsave(&local->queue_stop_reason_lock, flags); - /* copy over remaining packets */ - skb_queue_splice_tail_init(&tid_tx->pending, - &local->pending[queue]); - spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); - } -} - -static void __releases(agg_queue) -ieee80211_agg_splice_finish(struct ieee80211_local *local, u16 tid) -{ - ieee80211_wake_queue_agg(local, tid); -} - static void ieee80211_agg_tx_operational(struct ieee80211_local *local, struct sta_info *sta, u16 tid) { -- cgit v1.1 From 7eac8f9de24674cc55ee9797d05447bbfbdf1a96 Mon Sep 17 00:00:00 2001 From: Alex Juncu Date: Thu, 15 Dec 2011 23:01:25 +0000 Subject: llc: llc_cmsg_rcv was getting called after sk_eat_skb. [ Upstream commit 9cef310fcdee12b49b8b4c96fd8f611c8873d284 ] Received non stream protocol packets were calling llc_cmsg_rcv that used a skb after that skb was released by sk_eat_skb. This caused received STP packets to generate kernel panics. Signed-off-by: Alexandru Juncu Signed-off-by: Kunjan Naik Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/llc/af_llc.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index dfd3a64..a18e6c3 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -833,15 +833,15 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, copied += used; len -= used; + /* For non stream protcols we get one packet per recvmsg call */ + if (sk->sk_type != SOCK_STREAM) + goto copy_uaddr; + if (!(flags & MSG_PEEK)) { sk_eat_skb(sk, skb, 0); *seq = 0; } - /* For non stream protcols we get one packet per recvmsg call */ - if (sk->sk_type != SOCK_STREAM) - goto copy_uaddr; - /* Partial read */ if (used + offset < skb->len) continue; @@ -857,6 +857,12 @@ copy_uaddr: } if (llc_sk(sk)->cmsg_flags) llc_cmsg_rcv(msg, skb); + + if (!(flags & MSG_PEEK)) { + sk_eat_skb(sk, skb, 0); + *seq = 0; + } + goto out; } -- cgit v1.1 From 477a897533f9ab9a6ebb6eedfa9ca3760caa94b2 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 22 Dec 2011 02:05:07 +0000 Subject: mqprio: Avoid panic if no options are provided [ Upstream commit 7838f2ce36b6ab5c13ef20b1857e3bbd567f1759 ] Userspace may not provide TCA_OPTIONS, in fact tc currently does so not do so if no arguments are specified on the command line. Return EINVAL instead of panicing. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_mqprio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index ea17cbe..59b26b8 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -106,7 +106,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) if (!netif_is_multiqueue(dev)) return -EOPNOTSUPP; - if (nla_len(opt) < sizeof(*qopt)) + if (!opt || nla_len(opt) < sizeof(*qopt)) return -EINVAL; qopt = nla_data(opt); -- cgit v1.1 From 9ec14c04ec6be93ff397adf250bc91ee77742bfb Mon Sep 17 00:00:00 2001 From: Gerlando Falauto Date: Mon, 19 Dec 2011 22:58:04 +0000 Subject: net: have ipconfig not wait if no dev is available [ Upstream commit cd7816d14953c8af910af5bb92f488b0b277e29d ] previous commit 3fb72f1e6e6165c5f495e8dc11c5bbd14c73385c makes IP-Config wait for carrier on at least one network device. Before waiting (predefined value 120s), check that at least one device was successfully brought up. Otherwise (e.g. buggy bootloader which does not set the MAC address) there is no point in waiting for carrier. Cc: Micha Nelissen Cc: Holger Brunck Signed-off-by: Gerlando Falauto Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ipconfig.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index ab7e554..7fbcaba 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -252,6 +252,10 @@ static int __init ic_open_devs(void) } } + /* no point in waiting if we could not bring up at least one device */ + if (!ic_first_dev) + goto have_carrier; + /* wait for a carrier on at least one device */ start = jiffies; while (jiffies - start < msecs_to_jiffies(CONF_CARRIER_TIMEOUT)) { -- cgit v1.1 From 01d6bbab3834409c220083f25810be9f1a553054 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 11 Dec 2011 23:42:53 +0000 Subject: sch_gred: should not use GFP_KERNEL while holding a spinlock [ Upstream commit 3f1e6d3fd37bd4f25e5b19f1c7ca21850426c33f ] gred_change_vq() is called under sch_tree_lock(sch). This means a spinlock is held, and we are not allowed to sleep in this context. We might pre-allocate memory using GFP_KERNEL before taking spinlock, but this is not suitable for stable material. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_gred.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index b9493a0..6cd8ddf 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -385,7 +385,7 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp, struct gred_sched_data *q; if (table->tab[dp] == NULL) { - table->tab[dp] = kzalloc(sizeof(*q), GFP_KERNEL); + table->tab[dp] = kzalloc(sizeof(*q), GFP_ATOMIC); if (table->tab[dp] == NULL) return -ENOMEM; } -- cgit v1.1 From f6e4c89e089ae671a677242edb9e8b08c369c415 Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Fri, 16 Dec 2011 12:44:15 +0000 Subject: sctp: fix incorrect overflow check on autoclose [ Upstream commit 2692ba61a82203404abd7dd2a027bda962861f74 ] Commit 8ffd3208 voids the previous patches f6778aab and 810c0719 for limiting the autoclose value. If userspace passes in -1 on 32-bit platform, the overflow check didn't work and autoclose would be set to 0xffffffff. This patch defines a max_autoclose (in seconds) for limiting the value and exposes it through sysctl, with the following intentions. 1) Avoid overflowing autoclose * HZ. 2) Keep the default autoclose bound consistent across 32- and 64-bit platforms (INT_MAX / HZ in this patch). 3) Keep the autoclose value consistent between setsockopt() and getsockopt() calls. Suggested-by: Vlad Yasevich Signed-off-by: Xi Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/associola.c | 2 +- net/sctp/protocol.c | 3 +++ net/sctp/socket.c | 2 -- net/sctp/sysctl.c | 13 +++++++++++++ 4 files changed, 17 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 4a62888..17a6e65 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -173,7 +173,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0; asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay; asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = - (unsigned long)sp->autoclose * HZ; + min_t(unsigned long, sp->autoclose, sctp_max_autoclose) * HZ; /* Initializes the timers */ for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 207175b..946afd6 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1144,6 +1144,9 @@ SCTP_STATIC __init int sctp_init(void) sctp_max_instreams = SCTP_DEFAULT_INSTREAMS; sctp_max_outstreams = SCTP_DEFAULT_OUTSTREAMS; + /* Initialize maximum autoclose timeout. */ + sctp_max_autoclose = INT_MAX / HZ; + /* Initialize handle used for association ids. */ idr_init(&sctp_assocs_id); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d3ccf79..fa9b5c7 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2129,8 +2129,6 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval, return -EINVAL; if (copy_from_user(&sp->autoclose, optval, optlen)) return -EFAULT; - /* make sure it won't exceed MAX_SCHEDULE_TIMEOUT */ - sp->autoclose = min_t(long, sp->autoclose, MAX_SCHEDULE_TIMEOUT / HZ); return 0; } diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 50cb57f..6752f48 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -53,6 +53,10 @@ static int sack_timer_min = 1; static int sack_timer_max = 500; static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */ static int rwnd_scale_max = 16; +static unsigned long max_autoclose_min = 0; +static unsigned long max_autoclose_max = + (MAX_SCHEDULE_TIMEOUT / HZ > UINT_MAX) + ? UINT_MAX : MAX_SCHEDULE_TIMEOUT / HZ; extern long sysctl_sctp_mem[3]; extern int sysctl_sctp_rmem[3]; @@ -251,6 +255,15 @@ static ctl_table sctp_table[] = { .extra1 = &one, .extra2 = &rwnd_scale_max, }, + { + .procname = "max_autoclose", + .data = &sctp_max_autoclose, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + .extra1 = &max_autoclose_min, + .extra2 = &max_autoclose_max, + }, { /* sentinel */ } }; -- cgit v1.1 From 0e5fe3ed8d751c7be333fa193882e91dcc289158 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 19 Dec 2011 04:11:40 +0000 Subject: sctp: Do not account for sizeof(struct sk_buff) in estimated rwnd [ Upstream commit a76c0adf60f6ca5ff3481992e4ea0383776b24d2 ] When checking whether a DATA chunk fits into the estimated rwnd a full sizeof(struct sk_buff) is added to the needed chunk size. This quickly exhausts the available rwnd space and leads to packets being sent which are much below the PMTU limit. This can lead to much worse performance. The reason for this behaviour was to avoid putting too much memory pressure on the receiver. The concept is not completely irational because a Linux receiver does in fact clone an skb for each DATA chunk delivered. However, Linux also reserves half the available socket buffer space for data structures therefore usage of it is already accounted for. When proposing to change this the last time it was noted that this behaviour was introduced to solve a performance issue caused by rwnd overusage in combination with small DATA chunks. Trying to reproduce this I found that with the sk_buff overhead removed, the performance would improve significantly unless socket buffer limits are increased. The following numbers have been gathered using a patched iperf supporting SCTP over a live 1 Gbit ethernet network. The -l option was used to limit DATA chunk sizes. The numbers listed are based on the average of 3 test runs each. Default values have been used for sk_(r|w)mem. Chunk Size Unpatched No Overhead ------------------------------------- 4 15.2 Kbit [!] 12.2 Mbit [!] 8 35.8 Kbit [!] 26.0 Mbit [!] 16 95.5 Kbit [!] 54.4 Mbit [!] 32 106.7 Mbit 102.3 Mbit 64 189.2 Mbit 188.3 Mbit 128 331.2 Mbit 334.8 Mbit 256 537.7 Mbit 536.0 Mbit 512 766.9 Mbit 766.6 Mbit 1024 810.1 Mbit 808.6 Mbit Signed-off-by: Thomas Graf Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/output.c | 8 +------- net/sctp/outqueue.c | 6 ++---- 2 files changed, 3 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/sctp/output.c b/net/sctp/output.c index 08b3cea..817174e 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -697,13 +697,7 @@ static void sctp_packet_append_data(struct sctp_packet *packet, /* Keep track of how many bytes are in flight to the receiver. */ asoc->outqueue.outstanding_bytes += datasize; - /* Update our view of the receiver's rwnd. Include sk_buff overhead - * while updating peer.rwnd so that it reduces the chances of a - * receiver running out of receive buffer space even when receive - * window is still open. This can happen when a sender is sending - * sending small messages. - */ - datasize += sizeof(struct sk_buff); + /* Update our view of the receiver's rwnd. */ if (datasize < rwnd) rwnd -= datasize; else diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index d036821..1f2938f 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -411,8 +411,7 @@ void sctp_retransmit_mark(struct sctp_outq *q, chunk->transport->flight_size -= sctp_data_size(chunk); q->outstanding_bytes -= sctp_data_size(chunk); - q->asoc->peer.rwnd += (sctp_data_size(chunk) + - sizeof(struct sk_buff)); + q->asoc->peer.rwnd += sctp_data_size(chunk); } continue; } @@ -432,8 +431,7 @@ void sctp_retransmit_mark(struct sctp_outq *q, * (Section 7.2.4)), add the data size of those * chunks to the rwnd. */ - q->asoc->peer.rwnd += (sctp_data_size(chunk) + - sizeof(struct sk_buff)); + q->asoc->peer.rwnd += sctp_data_size(chunk); q->outstanding_bytes -= sctp_data_size(chunk); if (chunk->transport) transport->flight_size -= sctp_data_size(chunk); -- cgit v1.1 From 6c3efb1526c3fcdab3e5bbc9c77710b306493507 Mon Sep 17 00:00:00 2001 From: Weiping Pan Date: Thu, 1 Dec 2011 15:47:06 +0000 Subject: ipv4: flush route cache after change accept_local [ Upstream commit d01ff0a049f749e0bf10a35bb23edd012718c8c2 ] After reset ipv4_devconf->data[IPV4_DEVCONF_ACCEPT_LOCAL] to 0, we should flush route cache, or it will continue receive packets with local source address, which should be dropped. Signed-off-by: Weiping Pan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/devinet.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 4155abc..7d7fb20 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1490,7 +1490,9 @@ static int devinet_conf_proc(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { + int old_value = *(int *)ctl->data; int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + int new_value = *(int *)ctl->data; if (write) { struct ipv4_devconf *cnf = ctl->extra1; @@ -1501,6 +1503,9 @@ static int devinet_conf_proc(ctl_table *ctl, int write, if (cnf == net->ipv4.devconf_dflt) devinet_copy_dflt_conf(net, i); + if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1) + if ((new_value == 0) && (old_value != 0)) + rt_cache_flush(net, 0); } return ret; -- cgit v1.1 From ad5dd5dc45d80c397dfe314934e91d0ead793928 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 21 Dec 2011 15:47:16 -0500 Subject: ipv4: reintroduce route cache garbage collector [ Upstream commit 9f28a2fc0bd77511f649c0a788c7bf9a5fd04edb ] Commit 2c8cec5c10b (ipv4: Cache learned PMTU information in inetpeer) removed IP route cache garbage collector a bit too soon, as this gc was responsible for expired routes cleanup, releasing their neighbour reference. As pointed out by Robert Gladewitz, recent kernels can fill and exhaust their neighbour cache. Reintroduce the garbage collection, since we'll have to wait our neighbour lookups become refcount-less to not depend on this stuff. Reported-by: Robert Gladewitz Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/route.c | 106 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 75ef66f..b01f569 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -132,6 +132,9 @@ static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; static int ip_rt_min_advmss __read_mostly = 256; static int rt_chain_length_max __read_mostly = 20; +static struct delayed_work expires_work; +static unsigned long expires_ljiffies; + /* * Interface to generic destination cache. */ @@ -821,6 +824,97 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth) return ONE; } +static void rt_check_expire(void) +{ + static unsigned int rover; + unsigned int i = rover, goal; + struct rtable *rth; + struct rtable __rcu **rthp; + unsigned long samples = 0; + unsigned long sum = 0, sum2 = 0; + unsigned long delta; + u64 mult; + + delta = jiffies - expires_ljiffies; + expires_ljiffies = jiffies; + mult = ((u64)delta) << rt_hash_log; + if (ip_rt_gc_timeout > 1) + do_div(mult, ip_rt_gc_timeout); + goal = (unsigned int)mult; + if (goal > rt_hash_mask) + goal = rt_hash_mask + 1; + for (; goal > 0; goal--) { + unsigned long tmo = ip_rt_gc_timeout; + unsigned long length; + + i = (i + 1) & rt_hash_mask; + rthp = &rt_hash_table[i].chain; + + if (need_resched()) + cond_resched(); + + samples++; + + if (rcu_dereference_raw(*rthp) == NULL) + continue; + length = 0; + spin_lock_bh(rt_hash_lock_addr(i)); + while ((rth = rcu_dereference_protected(*rthp, + lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) { + prefetch(rth->dst.rt_next); + if (rt_is_expired(rth)) { + *rthp = rth->dst.rt_next; + rt_free(rth); + continue; + } + if (rth->dst.expires) { + /* Entry is expired even if it is in use */ + if (time_before_eq(jiffies, rth->dst.expires)) { +nofree: + tmo >>= 1; + rthp = &rth->dst.rt_next; + /* + * We only count entries on + * a chain with equal hash inputs once + * so that entries for different QOS + * levels, and other non-hash input + * attributes don't unfairly skew + * the length computation + */ + length += has_noalias(rt_hash_table[i].chain, rth); + continue; + } + } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) + goto nofree; + + /* Cleanup aged off entries. */ + *rthp = rth->dst.rt_next; + rt_free(rth); + } + spin_unlock_bh(rt_hash_lock_addr(i)); + sum += length; + sum2 += length*length; + } + if (samples) { + unsigned long avg = sum / samples; + unsigned long sd = int_sqrt(sum2 / samples - avg*avg); + rt_chain_length_max = max_t(unsigned long, + ip_rt_gc_elasticity, + (avg + 4*sd) >> FRACT_BITS); + } + rover = i; +} + +/* + * rt_worker_func() is run in process context. + * we call rt_check_expire() to scan part of the hash table + */ +static void rt_worker_func(struct work_struct *work) +{ + rt_check_expire(); + schedule_delayed_work(&expires_work, ip_rt_gc_interval); +} + /* * Perturbation of rt_genid by a small quantity [1..256] * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() @@ -3088,6 +3182,13 @@ static ctl_table ipv4_route_table[] = { .proc_handler = proc_dointvec_jiffies, }, { + .procname = "gc_interval", + .data = &ip_rt_gc_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { .procname = "redirect_load", .data = &ip_rt_redirect_load, .maxlen = sizeof(int), @@ -3297,6 +3398,11 @@ int __init ip_rt_init(void) devinet_init(); ip_fib_init(); + INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func); + expires_ljiffies = jiffies; + schedule_delayed_work(&expires_work, + net_random() % ip_rt_gc_interval + ip_rt_gc_interval); + if (ip_rt_proc_init()) printk(KERN_ERR "Unable to create route proc files\n"); #ifdef CONFIG_XFRM -- cgit v1.1 From 732e81a7579eb0adb26aeadb209e919ee984d01e Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 22 Dec 2011 17:03:29 +1100 Subject: ipv4: using prefetch requires including prefetch.h [ Upstream commit b9eda06f80b0db61a73bd87c6b0eb67d8aca55ad ] Signed-off-by: Stephen Rothwell Acked-by: Eric Dumazet Acked-by: David Miller Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- net/ipv4/route.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b01f569..4845bfe 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -91,6 +91,7 @@ #include #include #include +#include #include #include #include -- cgit v1.1 From 49ffa26eca87d3518ed88d3e6feebf1b80837a15 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 9 Jan 2012 14:06:46 -0800 Subject: igmp: Avoid zero delay when receiving odd mixture of IGMP queries commit a8c1f65c79cbbb2f7da782d4c9d15639a9b94b27 upstream. Commit 5b7c84066733c5dfb0e4016d939757b38de189e4 ('ipv4: correct IGMP behavior on v3 query during v2-compatibility mode') added yet another case for query parsing, which can result in max_delay = 0. Substitute a value of 1, as in the usual v3 case. Reported-by: Simon McVittie References: http://bugs.debian.org/654876 Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index d577199..e0d42db 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -875,6 +875,8 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, * to be intended in a v3 query. */ max_delay = IGMPV3_MRC(ih3->code)*(HZ/IGMP_TIMER_SCALE); + if (!max_delay) + max_delay = 1; /* can't mod w/ 0 */ } else { /* v3 */ if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) return; -- cgit v1.1 From b9e11747e1227d7ad67c5b80be4b206e4059687e Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 11 Jan 2012 09:26:54 +0100 Subject: mac80211: fix rx->key NULL pointer dereference in promiscuous mode commit 1140afa862842ac3e56678693050760edc4ecde9 upstream. Since: commit 816c04fe7ef01dd9649f5ccfe796474db8708be5 Author: Christian Lamparter Date: Sat Apr 30 15:24:30 2011 +0200 mac80211: consolidate MIC failure report handling is possible to that we dereference rx->key == NULL when driver set RX_FLAG_MMIC_STRIPPED and not RX_FLAG_IV_STRIPPED and we are in promiscuous mode. This happen with rt73usb and rt61pci at least. Before the commit we always check rx->key against NULL, so I assume fix should be done in mac80211 (also mic_fail path has similar check). References: https://bugzilla.redhat.com/show_bug.cgi?id=769766 http://rt2x00.serialmonkey.com/pipermail/users_rt2x00.serialmonkey.com/2012-January/004395.html Reported-by: Stuart D Gathman Reported-by: Kai Wohlfahrt Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- net/mac80211/wpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 8f6a302..aa1c40a 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -109,7 +109,7 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) if (status->flag & RX_FLAG_MMIC_ERROR) goto mic_fail; - if (!(status->flag & RX_FLAG_IV_STRIPPED)) + if (!(status->flag & RX_FLAG_IV_STRIPPED) && rx->key) goto update_iv; return RX_CONTINUE; -- cgit v1.1 From b09577ca6680033a4315e2f5cb3a95ebbb8dea79 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 22 Dec 2011 18:22:49 -0700 Subject: svcrpc: fix double-free on shutdown of nfsd after changing pool mode commit 61c8504c428edcebf23b97775a129c5b393a302b upstream. The pool_to and to_pool fields of the global svc_pool_map are freed on shutdown, but are initialized in nfsd startup only in the SVC_POOL_PERCPU and SVC_POOL_PERNODE cases. They *are* initialized to zero on kernel startup. So as long as you use only SVC_POOL_GLOBAL (the default), this will never be a problem. You're also OK if you only ever use SVC_POOL_PERCPU or SVC_POOL_PERNODE. However, the following sequence events leads to a double-free: 1. set SVC_POOL_PERCPU or SVC_POOL_PERNODE 2. start nfsd: both fields are initialized. 3. shutdown nfsd: both fields are freed. 4. set SVC_POOL_GLOBAL 5. start nfsd: the fields are left untouched. 6. shutdown nfsd: now we try to free them again. Step 4 is actually unnecessary, since (for some bizarre reason), nfsd automatically resets the pool mode to SVC_POOL_GLOBAL on shutdown. Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/svc.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 2b90292..131da58 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -167,6 +167,7 @@ svc_pool_map_alloc_arrays(struct svc_pool_map *m, unsigned int maxpools) fail_free: kfree(m->to_pool); + m->to_pool = NULL; fail: return -ENOMEM; } @@ -287,7 +288,9 @@ svc_pool_map_put(void) if (!--m->count) { m->mode = SVC_POOL_DEFAULT; kfree(m->to_pool); + m->to_pool = NULL; kfree(m->pool_to); + m->pool_to = NULL; m->npools = 0; } -- cgit v1.1 From 7df22768c0af8769d805f6db21144d71d91fe13d Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 29 Nov 2011 11:35:35 -0500 Subject: svcrpc: destroy server sockets all at once commit 2fefb8a09e7ed251ae8996e0c69066e74c5aa560 upstream. There's no reason I can see that we need to call sv_shutdown between closing the two lists of sockets. Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/svc.c | 7 +------ net/sunrpc/svc_xprt.c | 11 ++++++++++- 2 files changed, 11 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 131da58..4d5cb99 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -476,16 +476,11 @@ svc_destroy(struct svc_serv *serv) del_timer_sync(&serv->sv_temptimer); - svc_close_all(&serv->sv_tempsocks); + svc_close_all(serv); if (serv->sv_shutdown) serv->sv_shutdown(serv); - svc_close_all(&serv->sv_permsocks); - - BUG_ON(!list_empty(&serv->sv_permsocks)); - BUG_ON(!list_empty(&serv->sv_tempsocks)); - cache_clean_deferred(serv); if (svc_serv_is_pooled(serv)) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index bd31208..9cb2621 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -936,7 +936,7 @@ void svc_close_xprt(struct svc_xprt *xprt) } EXPORT_SYMBOL_GPL(svc_close_xprt); -void svc_close_all(struct list_head *xprt_list) +static void svc_close_list(struct list_head *xprt_list) { struct svc_xprt *xprt; struct svc_xprt *tmp; @@ -954,6 +954,15 @@ void svc_close_all(struct list_head *xprt_list) } } +void svc_close_all(struct svc_serv *serv) +{ + svc_close_list(&serv->sv_tempsocks); + svc_close_list(&serv->sv_permsocks); + BUG_ON(!list_empty(&serv->sv_permsocks)); + BUG_ON(!list_empty(&serv->sv_tempsocks)); + +} + /* * Handle defer and revisit of requests */ -- cgit v1.1 From a141a5eb3ab45131cb168e7a561d662722b43ec3 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 29 Nov 2011 17:00:26 -0500 Subject: svcrpc: avoid memory-corruption on pool shutdown commit b4f36f88b3ee7cf26bf0be84e6c7fc15f84dcb71 upstream. Socket callbacks use svc_xprt_enqueue() to add an xprt to a pool->sp_sockets list. In normal operation a server thread will later come along and take the xprt off that list. On shutdown, after all the threads have exited, we instead manually walk the sv_tempsocks and sv_permsocks lists to find all the xprt's and delete them. So the sp_sockets lists don't really matter any more. As a result, we've mostly just ignored them and hoped they would go away. Which has gotten us into trouble; witness for example ebc63e531cc6 "svcrpc: fix list-corrupting race on nfsd shutdown", the result of Ben Greear noticing that a still-running svc_xprt_enqueue() could re-add an xprt to an sp_sockets list just before it was deleted. The fix was to remove it from the list at the end of svc_delete_xprt(). But that only made corruption less likely--I can see nothing that prevents a svc_xprt_enqueue() from adding another xprt to the list at the same moment that we're removing this xprt from the list. In fact, despite the earlier xpo_detach(), I don't even see what guarantees that svc_xprt_enqueue() couldn't still be running on this xprt. So, instead, note that svc_xprt_enqueue() essentially does: lock sp_lock if XPT_BUSY unset add to sp_sockets unlock sp_lock So, if we do: set XPT_BUSY on every xprt. Empty every sp_sockets list, under the sp_socks locks. Then we're left knowing that the sp_sockets lists are all empty and will stay that way, since any svc_xprt_enqueue() will check XPT_BUSY under the sp_lock and see it set. And *then* we can continue deleting the xprt's. (Thanks to Jeff Layton for being correctly suspicious of this code....) Cc: Ben Greear Cc: Jeff Layton Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/svc.c | 10 +++++++++- net/sunrpc/svc_xprt.c | 48 +++++++++++++++++++++++++++++------------------- 2 files changed, 38 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 4d5cb99..ce5f111 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -475,7 +475,15 @@ svc_destroy(struct svc_serv *serv) printk("svc_destroy: no threads for serv=%p!\n", serv); del_timer_sync(&serv->sv_temptimer); - + /* + * The set of xprts (contained in the sv_tempsocks and + * sv_permsocks lists) is now constant, since it is modified + * only by accepting new sockets (done by service threads in + * svc_recv) or aging old ones (done by sv_temptimer), or + * configuration changes (excluded by whatever locking the + * caller is using--nfsd_mutex in the case of nfsd). So it's + * safe to traverse those lists and shut everything down: + */ svc_close_all(serv); if (serv->sv_shutdown) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 9cb2621..9d7ed0b 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -901,14 +901,7 @@ void svc_delete_xprt(struct svc_xprt *xprt) spin_lock_bh(&serv->sv_lock); if (!test_and_set_bit(XPT_DETACHED, &xprt->xpt_flags)) list_del_init(&xprt->xpt_list); - /* - * The only time we're called while xpt_ready is still on a list - * is while the list itself is about to be destroyed (in - * svc_destroy). BUT svc_xprt_enqueue could still be attempting - * to add new entries to the sp_sockets list, so we can't leave - * a freed xprt on it. - */ - list_del_init(&xprt->xpt_ready); + BUG_ON(!list_empty(&xprt->xpt_ready)); if (test_bit(XPT_TEMP, &xprt->xpt_flags)) serv->sv_tmpcnt--; spin_unlock_bh(&serv->sv_lock); @@ -939,28 +932,45 @@ EXPORT_SYMBOL_GPL(svc_close_xprt); static void svc_close_list(struct list_head *xprt_list) { struct svc_xprt *xprt; - struct svc_xprt *tmp; - /* - * The server is shutting down, and no more threads are running. - * svc_xprt_enqueue() might still be running, but at worst it - * will re-add the xprt to sp_sockets, which will soon get - * freed. So we don't bother with any more locking, and don't - * leave the close to the (nonexistent) server threads: - */ - list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) { + list_for_each_entry(xprt, xprt_list, xpt_list) { set_bit(XPT_CLOSE, &xprt->xpt_flags); - svc_delete_xprt(xprt); + set_bit(XPT_BUSY, &xprt->xpt_flags); } } void svc_close_all(struct svc_serv *serv) { + struct svc_pool *pool; + struct svc_xprt *xprt; + struct svc_xprt *tmp; + int i; + svc_close_list(&serv->sv_tempsocks); svc_close_list(&serv->sv_permsocks); + + for (i = 0; i < serv->sv_nrpools; i++) { + pool = &serv->sv_pools[i]; + + spin_lock_bh(&pool->sp_lock); + while (!list_empty(&pool->sp_sockets)) { + xprt = list_first_entry(&pool->sp_sockets, struct svc_xprt, xpt_ready); + list_del_init(&xprt->xpt_ready); + } + spin_unlock_bh(&pool->sp_lock); + } + /* + * At this point the sp_sockets lists will stay empty, since + * svc_enqueue will not add new entries without taking the + * sp_lock and checking XPT_BUSY. + */ + list_for_each_entry_safe(xprt, tmp, &serv->sv_tempsocks, xpt_list) + svc_delete_xprt(xprt); + list_for_each_entry_safe(xprt, tmp, &serv->sv_permsocks, xpt_list) + svc_delete_xprt(xprt); + BUG_ON(!list_empty(&serv->sv_permsocks)); BUG_ON(!list_empty(&serv->sv_tempsocks)); - } /* -- cgit v1.1 From d253520a7b2c2223fb4f704f06d10f2c547bdeef Mon Sep 17 00:00:00 2001 From: Nick Bowler Date: Tue, 8 Nov 2011 12:12:44 +0000 Subject: ah: Correctly pass error codes in ahash output callback. commit 069294e813ed5f27f82613b027609bcda5f1b914 upstream. The AH4/6 ahash output callbacks pass nexthdr to xfrm_output_resume instead of the error code. This appears to be a copy+paste error from the input case, where nexthdr is expected. This causes the driver to continuously add AH headers to the datagram until either an allocation fails and the packet is dropped or the ahash driver hits a synchronous fallback and the resulting monstrosity is transmitted. Correct this issue by simply passing the error code unadulterated. Signed-off-by: Nick Bowler Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ah4.c | 2 -- net/ipv6/ah6.c | 2 -- 2 files changed, 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index c1f4154..33ca186 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -136,8 +136,6 @@ static void ah_output_done(struct crypto_async_request *base, int err) memcpy(top_iph+1, iph+1, top_iph->ihl*4 - sizeof(struct iphdr)); } - err = ah->nexthdr; - kfree(AH_SKB_CB(skb)->tmp); xfrm_output_resume(skb, err); } diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 2195ae6..ede4d9d 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -324,8 +324,6 @@ static void ah6_output_done(struct crypto_async_request *base, int err) #endif } - err = ah->nexthdr; - kfree(AH_SKB_CB(skb)->tmp); xfrm_output_resume(skb, err); } -- cgit v1.1 From c0ab420c6822529fa5aba05668e1e983b065460f Mon Sep 17 00:00:00 2001 From: Nick Bowler Date: Tue, 8 Nov 2011 12:12:45 +0000 Subject: ah: Read nexthdr value before overwriting it in ahash input callback. commit b7ea81a58adc123a4e980cb0eff9eb5c144b5dc7 upstream. The AH4/6 ahash input callbacks read out the nexthdr field from the AH header *after* they overwrite that header. This is obviously not going to end well. Fix it up. Signed-off-by: Nick Bowler Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ah4.c | 4 ++-- net/ipv6/ah6.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 33ca186..c7056b2 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -262,12 +262,12 @@ static void ah_input_done(struct crypto_async_request *base, int err) if (err) goto out; + err = ah->nexthdr; + skb->network_header += ah_hlen; memcpy(skb_network_header(skb), work_iph, ihl); __skb_pull(skb, ah_hlen + ihl); skb_set_transport_header(skb, -ihl); - - err = ah->nexthdr; out: kfree(AH_SKB_CB(skb)->tmp); xfrm_input_resume(skb, err); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index ede4d9d..7a33aaa 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -464,12 +464,12 @@ static void ah6_input_done(struct crypto_async_request *base, int err) if (err) goto out; + err = ah->nexthdr; + skb->network_header += ah_hlen; memcpy(skb_network_header(skb), work_iph, hdr_len); __skb_pull(skb, ah_hlen + hdr_len); skb_set_transport_header(skb, -hdr_len); - - err = ah->nexthdr; out: kfree(AH_SKB_CB(skb)->tmp); xfrm_input_resume(skb, err); -- cgit v1.1 From ffee9a18f29a0645c2d117083e025f557c738018 Mon Sep 17 00:00:00 2001 From: Nick Bowler Date: Thu, 10 Nov 2011 09:01:27 +0000 Subject: ah: Don't return NET_XMIT_DROP on input. commit 4b90a603a1b21d63cf743cc833680cb195a729f6 upstream. When the ahash driver returns -EBUSY, AH4/6 input functions return NET_XMIT_DROP, presumably copied from the output code path. But returning transmit codes on input doesn't make a lot of sense. Since NET_XMIT_DROP is a positive int, this gets interpreted as the next header type (i.e., success). As that can only end badly, remove the check. Signed-off-by: Nick Bowler Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ah4.c | 2 -- net/ipv6/ah6.c | 2 -- 2 files changed, 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index c7056b2..36d1440 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -369,8 +369,6 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) if (err == -EINPROGRESS) goto out; - if (err == -EBUSY) - err = NET_XMIT_DROP; goto out_free; } diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 7a33aaa..4c0f894 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -581,8 +581,6 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) if (err == -EINPROGRESS) goto out; - if (err == -EBUSY) - err = NET_XMIT_DROP; goto out_free; } -- cgit v1.1 From 561331eae0a03d0c4cf60f3cf485aa3e8aa5ab48 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Jan 2012 00:41:38 +0000 Subject: netns: fix net_alloc_generic() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 073862ba5d249c20bd5c49fc6d904ff0e1f6a672 ] When a new net namespace is created, we should attach to it a "struct net_generic" with enough slots (even empty), or we can hit the following BUG_ON() : [ 200.752016] kernel BUG at include/net/netns/generic.h:40! ... [ 200.752016] [] ? get_cfcnfg+0x3a/0x180 [ 200.752016] [] ? lockdep_rtnl_is_held+0x10/0x20 [ 200.752016] [] caif_device_notify+0x2e/0x530 [ 200.752016] [] notifier_call_chain+0x67/0x110 [ 200.752016] [] raw_notifier_call_chain+0x11/0x20 [ 200.752016] [] call_netdevice_notifiers+0x32/0x60 [ 200.752016] [] register_netdevice+0x196/0x300 [ 200.752016] [] register_netdev+0x19/0x30 [ 200.752016] [] loopback_net_init+0x4a/0xa0 [ 200.752016] [] ops_init+0x42/0x180 [ 200.752016] [] setup_net+0x6b/0x100 [ 200.752016] [] copy_net_ns+0x86/0x110 [ 200.752016] [] create_new_namespaces+0xd9/0x190 net_alloc_generic() should take into account the maximum index into the ptr array, as a subsystem might use net_generic() anytime. This also reduces number of reallocations in net_assign_generic() Reported-by: Sasha Levin Tested-by: Sasha Levin Signed-off-by: Eric Dumazet Cc: Sjur Brændeland Cc: Eric W. Biederman Cc: Pavel Emelyanov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/net_namespace.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index ea489db..0b0211d 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -29,6 +29,20 @@ EXPORT_SYMBOL(init_net); #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ +static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS; + +static struct net_generic *net_alloc_generic(void) +{ + struct net_generic *ng; + size_t generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]); + + ng = kzalloc(generic_size, GFP_KERNEL); + if (ng) + ng->len = max_gen_ptrs; + + return ng; +} + static int net_assign_generic(struct net *net, int id, void *data) { struct net_generic *ng, *old_ng; @@ -42,8 +56,7 @@ static int net_assign_generic(struct net *net, int id, void *data) if (old_ng->len >= id) goto assign; - ng = kzalloc(sizeof(struct net_generic) + - id * sizeof(void *), GFP_KERNEL); + ng = net_alloc_generic(); if (ng == NULL) return -ENOMEM; @@ -58,7 +71,6 @@ static int net_assign_generic(struct net *net, int id, void *data) * the old copy for kfree after a grace period. */ - ng->len = id; memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*)); rcu_assign_pointer(net->gen, ng); @@ -159,18 +171,6 @@ out_undo: goto out; } -static struct net_generic *net_alloc_generic(void) -{ - struct net_generic *ng; - size_t generic_size = sizeof(struct net_generic) + - INITIAL_NET_GEN_PTRS * sizeof(void *); - - ng = kzalloc(generic_size, GFP_KERNEL); - if (ng) - ng->len = INITIAL_NET_GEN_PTRS; - - return ng; -} #ifdef CONFIG_NET_NS static struct kmem_cache *net_cachep; @@ -481,6 +481,7 @@ again: } return error; } + max_gen_ptrs = max_t(unsigned int, max_gen_ptrs, *ops->id); } error = __register_pernet_operations(list, ops); if (error) { -- cgit v1.1 From 62252cba2867cec7cc484ebb2d3ec705c41d9684 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 26 Jan 2012 14:04:53 +0000 Subject: net caif: Register properly as a pernet subsystem. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8a8ee9aff6c3077dd9c2c7a77478e8ed362b96c6 ] caif is a subsystem and as such it needs to register with register_pernet_subsys instead of register_pernet_device. Among other problems using register_pernet_device was resulting in net_generic being called before the caif_net structure was allocated. Which has been causing net_generic to fail with either BUG_ON's or by return NULL pointers. A more ugly problem that could be caused is packets in flight why the subsystem is shutting down. To remove confusion also remove the cruft cause by inappropriately trying to fix this bug. With the aid of the previous patch I have tested this patch and confirmed that using register_pernet_subsys makes the failure go away as it should. Signed-off-by: Eric W. Biederman Acked-by: Sjur Brændeland Tested-by: Sasha Levin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/caif/caif_dev.c | 11 ++++------- net/caif/cfcnfg.c | 1 - 2 files changed, 4 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index dbdaa95..5ba4366 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -53,7 +53,6 @@ struct cfcnfg *get_cfcnfg(struct net *net) struct caif_net *caifn; BUG_ON(!net); caifn = net_generic(net, caif_net_id); - BUG_ON(!caifn); return caifn->cfg; } EXPORT_SYMBOL(get_cfcnfg); @@ -63,7 +62,6 @@ static struct caif_device_entry_list *caif_device_list(struct net *net) struct caif_net *caifn; BUG_ON(!net); caifn = net_generic(net, caif_net_id); - BUG_ON(!caifn); return &caifn->caifdevs; } @@ -92,7 +90,6 @@ static struct caif_device_entry *caif_device_alloc(struct net_device *dev) struct caif_device_entry *caifd; caifdevs = caif_device_list(dev_net(dev)); - BUG_ON(!caifdevs); caifd = kzalloc(sizeof(*caifd), GFP_ATOMIC); if (!caifd) @@ -108,7 +105,7 @@ static struct caif_device_entry *caif_get(struct net_device *dev) struct caif_device_entry_list *caifdevs = caif_device_list(dev_net(dev)); struct caif_device_entry *caifd; - BUG_ON(!caifdevs); + list_for_each_entry_rcu(caifd, &caifdevs->list, list) { if (caifd->netdev == dev) return caifd; @@ -349,7 +346,7 @@ static struct notifier_block caif_device_notifier = { static int caif_init_net(struct net *net) { struct caif_net *caifn = net_generic(net, caif_net_id); - BUG_ON(!caifn); + INIT_LIST_HEAD(&caifn->caifdevs.list); mutex_init(&caifn->caifdevs.lock); @@ -414,7 +411,7 @@ static int __init caif_device_init(void) { int result; - result = register_pernet_device(&caif_net_ops); + result = register_pernet_subsys(&caif_net_ops); if (result) return result; @@ -427,7 +424,7 @@ static int __init caif_device_init(void) static void __exit caif_device_exit(void) { - unregister_pernet_device(&caif_net_ops); + unregister_pernet_subsys(&caif_net_ops); unregister_netdevice_notifier(&caif_device_notifier); dev_remove_pack(&caif_packet_type); } diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c index 52fe33b..bca32d7 100644 --- a/net/caif/cfcnfg.c +++ b/net/caif/cfcnfg.c @@ -313,7 +313,6 @@ int caif_connect_client(struct net *net, struct caif_connect_request *conn_req, int err; struct cfctrl_link_param param; struct cfcnfg *cfg = get_cfcnfg(net); - caif_assert(cfg != NULL); rcu_read_lock(); err = caif_connect_req_to_link_param(cfg, conn_req, ¶m); -- cgit v1.1 From 1334533665277ccc5568c5104cd2358788a02e02 Mon Sep 17 00:00:00 2001 From: James Chapman Date: Wed, 25 Jan 2012 02:39:05 +0000 Subject: l2tp: l2tp_ip - fix possible oops on packet receive [ Upstream commit 68315801dbf3ab2001679fd2074c9dc5dcf87dfa ] When a packet is received on an L2TP IP socket (L2TPv3 IP link encapsulation), the l2tpip socket's backlog_rcv function calls xfrm4_policy_check(). This is not necessary, since it was called before the skb was added to the backlog. With CONFIG_NET_NS enabled, xfrm4_policy_check() will oops if skb->dev is null, so this trivial patch removes the call. This bug has always been present, but only when CONFIG_NET_NS is enabled does it cause problems. Most users are probably using UDP encapsulation for L2TP, hence the problem has only recently surfaced. EIP: 0060:[] EFLAGS: 00210246 CPU: 0 EIP is at l2tp_ip_recvmsg+0xd4/0x2a7 EAX: 00000001 EBX: d77b5180 ECX: 00000000 EDX: 00200246 ESI: 00000000 EDI: d63cbd30 EBP: d63cbd18 ESP: d63cbcf4 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 Call Trace: [] sock_common_recvmsg+0x31/0x46 [] __sock_recvmsg_nosec+0x45/0x4d [] __sock_recvmsg+0x31/0x3b [] sock_recvmsg+0x96/0xab [] ? might_fault+0x47/0x81 [] ? might_fault+0x47/0x81 [] ? _copy_from_user+0x31/0x115 [] ? copy_from_user+0x8/0xa [] ? verify_iovec+0x3e/0x78 [] __sys_recvmsg+0x10a/0x1aa [] ? sock_recvmsg+0x0/0xab [] ? __lock_acquire+0xbdf/0xbee [] ? do_page_fault+0x193/0x375 [] ? fcheck_files+0x9b/0xca [] ? fget_light+0x2a/0x9c [] sys_recvmsg+0x2b/0x43 [] sys_socketcall+0x16d/0x1a5 [] ? trace_hardirqs_on_thunk+0xc/0x10 [] sysenter_do_call+0x12/0x38 Code: c6 05 8c ea a8 c1 01 e8 0c d4 d9 ff 85 f6 74 07 3e ff 86 80 00 00 00 b9 17 b6 2b c1 ba 01 00 00 00 b8 78 ed 48 c1 e8 23 f6 d9 ff 76 0c 68 28 e3 30 c1 68 2d 44 41 c1 e8 89 57 01 00 83 c4 0c Signed-off-by: James Chapman Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_ip.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index b6466e7..858ca23 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -393,11 +393,6 @@ static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb) { int rc; - if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) - goto drop; - - nf_reset(skb); - /* Charge it to the socket, dropping if the queue is full. */ rc = sock_queue_rcv_skb(sk, skb); if (rc < 0) -- cgit v1.1 From f217c4711d71aa6811b6e71d219b9efafa5d55a6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 24 Jan 2012 17:03:44 -0500 Subject: rds: Make rds_sock_lock BH rather than IRQ safe. [ Upstream commit efc3dbc37412c027e363736b4f4c74ee5e8ecffc ] rds_sock_info() triggers locking warnings because we try to perform a local_bh_enable() (via sock_i_ino()) while hardware interrupts are disabled (via taking rds_sock_lock). There is no reason for rds_sock_lock to be a hardware IRQ disabling lock, none of these access paths run in hardware interrupt context. Therefore making it a BH disabling lock is safe and sufficient to fix this bug. Reported-by: Kumar Sanghvi Reported-by: Josh Boyer Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rds/af_rds.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index bb6ad81..424ff62 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -68,7 +68,6 @@ static int rds_release(struct socket *sock) { struct sock *sk = sock->sk; struct rds_sock *rs; - unsigned long flags; if (!sk) goto out; @@ -94,10 +93,10 @@ static int rds_release(struct socket *sock) rds_rdma_drop_keys(rs); rds_notify_queue_get(rs, NULL); - spin_lock_irqsave(&rds_sock_lock, flags); + spin_lock_bh(&rds_sock_lock); list_del_init(&rs->rs_item); rds_sock_count--; - spin_unlock_irqrestore(&rds_sock_lock, flags); + spin_unlock_bh(&rds_sock_lock); rds_trans_put(rs->rs_transport); @@ -409,7 +408,6 @@ static const struct proto_ops rds_proto_ops = { static int __rds_create(struct socket *sock, struct sock *sk, int protocol) { - unsigned long flags; struct rds_sock *rs; sock_init_data(sock, sk); @@ -426,10 +424,10 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol) spin_lock_init(&rs->rs_rdma_lock); rs->rs_rdma_keys = RB_ROOT; - spin_lock_irqsave(&rds_sock_lock, flags); + spin_lock_bh(&rds_sock_lock); list_add_tail(&rs->rs_item, &rds_sock_list); rds_sock_count++; - spin_unlock_irqrestore(&rds_sock_lock, flags); + spin_unlock_bh(&rds_sock_lock); return 0; } @@ -471,12 +469,11 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len, { struct rds_sock *rs; struct rds_incoming *inc; - unsigned long flags; unsigned int total = 0; len /= sizeof(struct rds_info_message); - spin_lock_irqsave(&rds_sock_lock, flags); + spin_lock_bh(&rds_sock_lock); list_for_each_entry(rs, &rds_sock_list, rs_item) { read_lock(&rs->rs_recv_lock); @@ -492,7 +489,7 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len, read_unlock(&rs->rs_recv_lock); } - spin_unlock_irqrestore(&rds_sock_lock, flags); + spin_unlock_bh(&rds_sock_lock); lens->nr = total; lens->each = sizeof(struct rds_info_message); @@ -504,11 +501,10 @@ static void rds_sock_info(struct socket *sock, unsigned int len, { struct rds_info_socket sinfo; struct rds_sock *rs; - unsigned long flags; len /= sizeof(struct rds_info_socket); - spin_lock_irqsave(&rds_sock_lock, flags); + spin_lock_bh(&rds_sock_lock); if (len < rds_sock_count) goto out; @@ -529,7 +525,7 @@ out: lens->nr = rds_sock_count; lens->each = sizeof(struct rds_info_socket); - spin_unlock_irqrestore(&rds_sock_lock, flags); + spin_unlock_bh(&rds_sock_lock); } static void rds_exit(void) -- cgit v1.1 From 8b4bb350e120fe0b32a0b1b8d227e65af03e3993 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sat, 28 Jan 2012 17:29:46 +0000 Subject: tcp: fix tcp_trim_head() to adjust segment count with skb MSS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5b35e1e6e9ca651e6b291c96d1106043c9af314a ] This commit fixes tcp_trim_head() to recalculate the number of segments in the skb with the skb's existing MSS, so trimming the head causes the skb segment count to be monotonically non-increasing - it should stay the same or go down, but not increase. Previously tcp_trim_head() used the current MSS of the connection. But if there was a decrease in MSS between original transmission and ACK (e.g. due to PMTUD), this could cause tcp_trim_head() to counter-intuitively increase the segment count when trimming bytes off the head of an skb. This violated assumptions in tcp_tso_acked() that tcp_trim_head() only decreases the packet count, so that packets_acked in tcp_tso_acked() could underflow, leading tcp_clean_rtx_queue() to pass u32 pkts_acked values as large as 0xffffffff to ca_ops->pkts_acked(). As an aside, if tcp_trim_head() had really wanted the skb to reflect the current MSS, it should have called tcp_set_skb_tso_segs() unconditionally, since a decrease in MSS would mean that a single-packet skb should now be sliced into multiple segments. Signed-off-by: Neal Cardwell Acked-by: Nandita Dukkipati Acked-by: Ilpo Järvinen Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_output.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 882e0b0..faf257b 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1134,11 +1134,9 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) sk_mem_uncharge(sk, len); sock_set_flag(sk, SOCK_QUEUE_SHRUNK); - /* Any change of skb->len requires recalculation of tso - * factor and mss. - */ + /* Any change of skb->len requires recalculation of tso factor. */ if (tcp_skb_pcount(skb) > 1) - tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk)); + tcp_set_skb_tso_segs(sk, skb, tcp_skb_mss(skb)); return 0; } -- cgit v1.1 From 81ecd154d0b07bd5dab6e4f09336cb068b70bcb9 Mon Sep 17 00:00:00 2001 From: shawnlu Date: Fri, 20 Jan 2012 12:22:04 +0000 Subject: tcp: md5: using remote adress for md5 lookup in rst packet [ Upstream commit 8a622e71f58ec9f092fc99eacae0e6cf14f6e742 ] md5 key is added in socket through remote address. remote address should be used in finding md5 key when sending out reset packet. Signed-off-by: shawnlu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 69790aa..53b0125 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -630,7 +630,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) arg.iov[0].iov_len = sizeof(rep.th); #ifdef CONFIG_TCP_MD5SIG - key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL; + key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr) : NULL; if (key) { rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 296510a..51587a0 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1096,7 +1096,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) #ifdef CONFIG_TCP_MD5SIG if (sk) - key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr); + key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr); #endif if (th->ack) -- cgit v1.1 From 8a533666d1591cf4ea596c6bd710e2fe682cb56a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Feb 2012 16:13:19 -0500 Subject: net: fix NULL dereferences in check_peer_redir() [ Upstream commit d3aaeb38c40e5a6c08dd31a1b64da65c4352be36, along with dependent backports of commits: 69cce1d1404968f78b177a0314f5822d5afdbbfb 9de79c127cccecb11ae6a21ab1499e87aa222880 218fa90f072e4aeff9003d57e390857f4f35513e 580da35a31f91a594f3090b7a2c39b85cb051a12 f7e57044eeb1841847c24aa06766c8290c202583 e049f28883126c689cf95859480d9ee4ab23b7fa ] Gergely Kalman reported crashes in check_peer_redir(). It appears commit f39925dbde778 (ipv4: Cache learned redirect information in inetpeer.) added a race, leading to possible NULL ptr dereference. Since we can now change dst neighbour, we should make sure a reader can safely use a neighbour. Add RCU protection to dst neighbour, and make sure check_peer_redir() can be called safely by different cpus in parallel. As neighbours are already freed after one RCU grace period, this patch should not add typical RCU penalty (cache cold effects) Many thanks to Gergely for providing a pretty report pointing to the bug. Reported-by: Gergely Kalman Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/atm/clip.c | 16 ++++++++----- net/bridge/br_netfilter.c | 6 +++-- net/core/dst.c | 15 ++++++++---- net/core/neighbour.c | 19 +++++++++------ net/decnet/dn_neigh.c | 8 +++---- net/decnet/dn_route.c | 18 ++++++++------- net/ipv4/arp.c | 28 +++++++++++++--------- net/ipv4/ip_gre.c | 2 +- net/ipv4/ip_output.c | 22 ++++++++++++++---- net/ipv4/route.c | 31 +++++++++++++++---------- net/ipv6/addrconf.c | 2 +- net/ipv6/ip6_fib.c | 2 +- net/ipv6/ip6_output.c | 40 ++++++++++++++++++++++++-------- net/ipv6/ndisc.c | 4 ++-- net/ipv6/route.c | 59 +++++++++++++++++++++++++++++------------------ net/ipv6/sit.c | 4 ++-- net/sched/sch_teql.c | 31 ++++++++++++++++--------- net/xfrm/xfrm_policy.c | 2 +- 18 files changed, 200 insertions(+), 109 deletions(-) (limited to 'net') diff --git a/net/atm/clip.c b/net/atm/clip.c index 1d4be60..5889074 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -364,33 +364,37 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct clip_priv *clip_priv = PRIV(dev); + struct dst_entry *dst = skb_dst(skb); struct atmarp_entry *entry; + struct neighbour *n; struct atm_vcc *vcc; int old; unsigned long flags; pr_debug("(skb %p)\n", skb); - if (!skb_dst(skb)) { + if (!dst) { pr_err("skb_dst(skb) == NULL\n"); dev_kfree_skb(skb); dev->stats.tx_dropped++; return NETDEV_TX_OK; } - if (!skb_dst(skb)->neighbour) { + n = dst_get_neighbour(dst); + if (!n) { #if 0 - skb_dst(skb)->neighbour = clip_find_neighbour(skb_dst(skb), 1); - if (!skb_dst(skb)->neighbour) { + n = clip_find_neighbour(skb_dst(skb), 1); + if (!n) { dev_kfree_skb(skb); /* lost that one */ dev->stats.tx_dropped++; return 0; } + dst_set_neighbour(dst, n); #endif pr_err("NO NEIGHBOUR !\n"); dev_kfree_skb(skb); dev->stats.tx_dropped++; return NETDEV_TX_OK; } - entry = NEIGH2ENTRY(skb_dst(skb)->neighbour); + entry = NEIGH2ENTRY(n); if (!entry->vccs) { if (time_after(jiffies, entry->expires)) { /* should be resolved */ @@ -407,7 +411,7 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb, } pr_debug("neigh %p, vccs %p\n", entry, entry->vccs); ATM_SKB(skb)->vcc = vcc = entry->vccs->vcc; - pr_debug("using neighbour %p, vcc %p\n", skb_dst(skb)->neighbour, vcc); + pr_debug("using neighbour %p, vcc %p\n", n, vcc); if (entry->vccs->encap) { void *here; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 56149ec..3dc7f54 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -343,24 +343,26 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct neighbour *neigh; struct dst_entry *dst; skb->dev = bridge_parent(skb->dev); if (!skb->dev) goto free_skb; dst = skb_dst(skb); + neigh = dst_get_neighbour(dst); if (dst->hh) { neigh_hh_bridge(dst->hh, skb); skb->dev = nf_bridge->physindev; return br_handle_frame_finish(skb); - } else if (dst->neighbour) { + } else if (neigh) { /* the neighbour function below overwrites the complete * MAC header, so we save the Ethernet source address and * protocol number. */ skb_copy_from_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), skb->nf_bridge->data, ETH_HLEN-ETH_ALEN); /* tell br_dev_xmit to continue with forwarding */ nf_bridge->mask |= BRNF_BRIDGED_DNAT; - return dst->neighbour->output(skb); + return neigh->output(skb); } free_skb: kfree_skb(skb); diff --git a/net/core/dst.c b/net/core/dst.c index 6135f36..8246d47 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -171,7 +171,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, dst_init_metrics(dst, dst_default_metrics, true); dst->expires = 0UL; dst->path = dst; - dst->neighbour = NULL; + RCU_INIT_POINTER(dst->_neighbour, NULL); dst->hh = NULL; #ifdef CONFIG_XFRM dst->xfrm = NULL; @@ -231,7 +231,7 @@ struct dst_entry *dst_destroy(struct dst_entry * dst) smp_rmb(); again: - neigh = dst->neighbour; + neigh = rcu_dereference_protected(dst->_neighbour, 1); hh = dst->hh; child = dst->child; @@ -240,7 +240,7 @@ again: hh_cache_put(hh); if (neigh) { - dst->neighbour = NULL; + RCU_INIT_POINTER(dst->_neighbour, NULL); neigh_release(neigh); } @@ -367,14 +367,19 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev, if (!unregister) { dst->input = dst->output = dst_discard; } else { + struct neighbour *neigh; + dst->dev = dev_net(dst->dev)->loopback_dev; dev_hold(dst->dev); dev_put(dev); - if (dst->neighbour && dst->neighbour->dev == dev) { - dst->neighbour->dev = dst->dev; + rcu_read_lock(); + neigh = dst_get_neighbour(dst); + if (neigh && neigh->dev == dev) { + neigh->dev = dst->dev; dev_hold(dst->dev); dev_put(dev); } + rcu_read_unlock(); } } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 16db887..8c54aff 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1173,12 +1173,17 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, while (neigh->nud_state & NUD_VALID && (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { - struct neighbour *n1 = neigh; + struct dst_entry *dst = skb_dst(skb); + struct neighbour *n2, *n1 = neigh; write_unlock_bh(&neigh->lock); + + rcu_read_lock(); /* On shaper/eql skb->dst->neighbour != neigh :( */ - if (skb_dst(skb) && skb_dst(skb)->neighbour) - n1 = skb_dst(skb)->neighbour; + if (dst && (n2 = dst_get_neighbour(dst)) != NULL) + n1 = n2; n1->output(skb); + rcu_read_unlock(); + write_lock_bh(&neigh->lock); } skb_queue_purge(&neigh->arp_queue); @@ -1300,10 +1305,10 @@ EXPORT_SYMBOL(neigh_compat_output); int neigh_resolve_output(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - struct neighbour *neigh; + struct neighbour *neigh = dst_get_neighbour(dst); int rc = 0; - if (!dst || !(neigh = dst->neighbour)) + if (!dst) goto discard; __skb_pull(skb, skb_network_offset(skb)); @@ -1333,7 +1338,7 @@ out: return rc; discard: NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n", - dst, dst ? dst->neighbour : NULL); + dst, neigh); out_kfree_skb: rc = -EINVAL; kfree_skb(skb); @@ -1347,7 +1352,7 @@ int neigh_connected_output(struct sk_buff *skb) { int err; struct dst_entry *dst = skb_dst(skb); - struct neighbour *neigh = dst->neighbour; + struct neighbour *neigh = dst_get_neighbour(dst); struct net_device *dev = neigh->dev; unsigned int seq; diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 602dade..9810610 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -208,7 +208,7 @@ static int dn_neigh_output_packet(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct dn_route *rt = (struct dn_route *)dst; - struct neighbour *neigh = dst->neighbour; + struct neighbour *neigh = dst_get_neighbour(dst); struct net_device *dev = neigh->dev; char mac_addr[ETH_ALEN]; @@ -227,7 +227,7 @@ static int dn_neigh_output_packet(struct sk_buff *skb) static int dn_long_output(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - struct neighbour *neigh = dst->neighbour; + struct neighbour *neigh = dst_get_neighbour(dst); struct net_device *dev = neigh->dev; int headroom = dev->hard_header_len + sizeof(struct dn_long_packet) + 3; unsigned char *data; @@ -274,7 +274,7 @@ static int dn_long_output(struct sk_buff *skb) static int dn_short_output(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - struct neighbour *neigh = dst->neighbour; + struct neighbour *neigh = dst_get_neighbour(dst); struct net_device *dev = neigh->dev; int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2; struct dn_short_packet *sp; @@ -318,7 +318,7 @@ static int dn_short_output(struct sk_buff *skb) static int dn_phase3_output(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - struct neighbour *neigh = dst->neighbour; + struct neighbour *neigh = dst_get_neighbour(dst); struct net_device *dev = neigh->dev; int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2; struct dn_short_packet *sp; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 74544bc..b91b603 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -241,9 +241,11 @@ static int dn_dst_gc(struct dst_ops *ops) */ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu) { + struct neighbour *n = dst_get_neighbour(dst); u32 min_mtu = 230; - struct dn_dev *dn = dst->neighbour ? - rcu_dereference_raw(dst->neighbour->dev->dn_ptr) : NULL; + struct dn_dev *dn; + + dn = n ? rcu_dereference_raw(n->dev->dn_ptr) : NULL; if (dn && dn->use_long == 0) min_mtu -= 6; @@ -715,7 +717,7 @@ static int dn_output(struct sk_buff *skb) int err = -EINVAL; - if ((neigh = dst->neighbour) == NULL) + if ((neigh = dst_get_neighbour(dst)) == NULL) goto error; skb->dev = dev; @@ -750,7 +752,7 @@ static int dn_forward(struct sk_buff *skb) struct dst_entry *dst = skb_dst(skb); struct dn_dev *dn_db = rcu_dereference(dst->dev->dn_ptr); struct dn_route *rt; - struct neighbour *neigh = dst->neighbour; + struct neighbour *neigh = dst_get_neighbour(dst); int header_len; #ifdef CONFIG_NETFILTER struct net_device *dev = skb->dev; @@ -833,11 +835,11 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) } rt->rt_type = res->type; - if (dev != NULL && rt->dst.neighbour == NULL) { + if (dev != NULL && dst_get_neighbour(&rt->dst) == NULL) { n = __neigh_lookup_errno(&dn_neigh_table, &rt->rt_gateway, dev); if (IS_ERR(n)) return PTR_ERR(n); - rt->dst.neighbour = n; + dst_set_neighbour(&rt->dst, n); } if (dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu) @@ -1144,7 +1146,7 @@ make_route: rt->rt_dst_map = fld.daddr; rt->rt_src_map = fld.saddr; - rt->dst.neighbour = neigh; + dst_set_neighbour(&rt->dst, neigh); neigh = NULL; rt->dst.lastuse = jiffies; @@ -1416,7 +1418,7 @@ make_route: rt->fld.flowidn_iif = in_dev->ifindex; rt->fld.flowidn_mark = fld.flowidn_mark; - rt->dst.neighbour = neigh; + dst_set_neighbour(&rt->dst, neigh); rt->dst.lastuse = jiffies; rt->dst.output = dn_rt_bug; switch(res.type) { diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 1b74d3b..1d5675e 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -518,26 +518,32 @@ EXPORT_SYMBOL(arp_find); /* END OF OBSOLETE FUNCTIONS */ +struct neighbour *__arp_bind_neighbour(struct dst_entry *dst, __be32 nexthop) +{ + struct net_device *dev = dst->dev; + + if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) + nexthop = 0; + return __neigh_lookup_errno( +#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) + dev->type == ARPHRD_ATM ? + clip_tbl_hook : +#endif + &arp_tbl, &nexthop, dev); +} + int arp_bind_neighbour(struct dst_entry *dst) { struct net_device *dev = dst->dev; - struct neighbour *n = dst->neighbour; + struct neighbour *n = dst_get_neighbour(dst); if (dev == NULL) return -EINVAL; if (n == NULL) { - __be32 nexthop = ((struct rtable *)dst)->rt_gateway; - if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) - nexthop = 0; - n = __neigh_lookup_errno( -#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) - dev->type == ARPHRD_ATM ? - clip_tbl_hook : -#endif - &arp_tbl, &nexthop, dev); + n = __arp_bind_neighbour(dst, ((struct rtable *)dst)->rt_gateway); if (IS_ERR(n)) return PTR_ERR(n); - dst->neighbour = n; + dst_set_neighbour(dst, n); } return 0; } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 8871067..d7bb94c 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -731,9 +731,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (skb->protocol == htons(ETH_P_IPV6)) { + struct neighbour *neigh = dst_get_neighbour(skb_dst(skb)); const struct in6_addr *addr6; int addr_type; - struct neighbour *neigh = skb_dst(skb)->neighbour; if (neigh == NULL) goto tx_error; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 0c99db4..51a3eec 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -182,6 +182,8 @@ static inline int ip_finish_output2(struct sk_buff *skb) struct rtable *rt = (struct rtable *)dst; struct net_device *dev = dst->dev; unsigned int hh_len = LL_RESERVED_SPACE(dev); + struct neighbour *neigh; + int res; if (rt->rt_type == RTN_MULTICAST) { IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len); @@ -203,10 +205,22 @@ static inline int ip_finish_output2(struct sk_buff *skb) skb = skb2; } - if (dst->hh) - return neigh_hh_output(dst->hh, skb); - else if (dst->neighbour) - return dst->neighbour->output(skb); + rcu_read_lock(); + if (dst->hh) { + int res = neigh_hh_output(dst->hh, skb); + + rcu_read_unlock(); + return res; + } else { + neigh = dst_get_neighbour(dst); + if (neigh) { + res = neigh->output(skb); + + rcu_read_unlock(); + return res; + } + rcu_read_unlock(); + } if (net_ratelimit()) printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n"); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4845bfe..65ff2e5 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -416,7 +416,13 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) "HHUptod\tSpecDst"); else { struct rtable *r = v; - int len; + struct neighbour *n; + int len, HHUptod; + + rcu_read_lock(); + n = dst_get_neighbour(&r->dst); + HHUptod = (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0; + rcu_read_unlock(); seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", @@ -431,8 +437,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) dst_metric(&r->dst, RTAX_RTTVAR)), r->rt_key_tos, r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, - r->dst.hh ? (r->dst.hh->hh_output == - dev_queue_xmit) : 0, + HHUptod, r->rt_spec_dst, &len); seq_printf(seq, "%*s\n", 127 - len, ""); @@ -1688,23 +1693,25 @@ static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) { struct rtable *rt = (struct rtable *) dst; __be32 orig_gw = rt->rt_gateway; + struct neighbour *n, *old_n; dst_confirm(&rt->dst); - neigh_release(rt->dst.neighbour); - rt->dst.neighbour = NULL; - rt->rt_gateway = peer->redirect_learned.a4; - if (arp_bind_neighbour(&rt->dst) || - !(rt->dst.neighbour->nud_state & NUD_VALID)) { - if (rt->dst.neighbour) - neigh_event_send(rt->dst.neighbour, NULL); + n = __arp_bind_neighbour(&rt->dst, rt->rt_gateway); + if (IS_ERR(n)) + return PTR_ERR(n); + old_n = xchg(&rt->dst._neighbour, n); + if (old_n) + neigh_release(old_n); + if (!n || !(n->nud_state & NUD_VALID)) { + if (n) + neigh_event_send(n, NULL); rt->rt_gateway = orig_gw; return -EAGAIN; } else { rt->rt_flags |= RTCF_REDIRECTED; - call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, - rt->dst.neighbour); + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); } return 0; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 498b927..0f335c6 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -656,7 +656,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, * layer address of our nexhop router */ - if (rt->rt6i_nexthop == NULL) + if (dst_get_neighbour_raw(&rt->dst) == NULL) ifa->flags &= ~IFA_F_OPTIMISTIC; ifa->idev = idev; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 4076a0b..0f9b37a 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1455,7 +1455,7 @@ static int fib6_age(struct rt6_info *rt, void *arg) RT6_TRACE("aging clone %p\n", rt); return -1; } else if ((rt->rt6i_flags & RTF_GATEWAY) && - (!(rt->rt6i_nexthop->flags & NTF_ROUTER))) { + (!(dst_get_neighbour_raw(&rt->dst)->flags & NTF_ROUTER))) { RT6_TRACE("purging route %p via non-router but gateway\n", rt); return -1; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index e17596b..9cbf176 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -100,6 +100,8 @@ static int ip6_finish_output2(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; + struct neighbour *neigh; + int res; skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; @@ -134,10 +136,22 @@ static int ip6_finish_output2(struct sk_buff *skb) skb->len); } - if (dst->hh) - return neigh_hh_output(dst->hh, skb); - else if (dst->neighbour) - return dst->neighbour->output(skb); + rcu_read_lock(); + if (dst->hh) { + res = neigh_hh_output(dst->hh, skb); + + rcu_read_unlock(); + return res; + } else { + neigh = dst_get_neighbour(dst); + if (neigh) { + res = neigh->output(skb); + + rcu_read_unlock(); + return res; + } + rcu_read_unlock(); + } IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); @@ -385,6 +399,7 @@ int ip6_forward(struct sk_buff *skb) struct ipv6hdr *hdr = ipv6_hdr(skb); struct inet6_skb_parm *opt = IP6CB(skb); struct net *net = dev_net(dst->dev); + struct neighbour *n; u32 mtu; if (net->ipv6.devconf_all->forwarding == 0) @@ -459,11 +474,10 @@ int ip6_forward(struct sk_buff *skb) send redirects to source routed frames. We don't send redirects to frames decapsulated from IPsec. */ - if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 && - !skb_sec_path(skb)) { + n = dst_get_neighbour(dst); + if (skb->dev == dst->dev && n && opt->srcrt == 0 && !skb_sec_path(skb)) { struct in6_addr *target = NULL; struct rt6_info *rt; - struct neighbour *n = dst->neighbour; /* * incoming and outgoing devices are the same @@ -949,8 +963,11 @@ out: static int ip6_dst_lookup_tail(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6) { - int err; struct net *net = sock_net(sk); +#ifdef CONFIG_IPV6_OPTIMISTIC_DAD + struct neighbour *n; +#endif + int err; if (*dst == NULL) *dst = ip6_route_output(net, sk, fl6); @@ -976,11 +993,14 @@ static int ip6_dst_lookup_tail(struct sock *sk, * dst entry and replace it instead with the * dst entry of the nexthop router */ - if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) { + rcu_read_lock(); + n = dst_get_neighbour(*dst); + if (n && !(n->nud_state & NUD_VALID)) { struct inet6_ifaddr *ifp; struct flowi6 fl_gw6; int redirect; + rcu_read_unlock(); ifp = ipv6_get_ifaddr(net, &fl6->saddr, (*dst)->dev, 1); @@ -1000,6 +1020,8 @@ static int ip6_dst_lookup_tail(struct sock *sk, if ((err = (*dst)->error)) goto out_err_release; } + } else { + rcu_read_unlock(); } #endif diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 7596f07..10a8d41 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1244,7 +1244,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) rt = rt6_get_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev); if (rt) - neigh = rt->rt6i_nexthop; + neigh = dst_get_neighbour(&rt->dst); if (rt && lifetime == 0) { neigh_clone(neigh); @@ -1265,7 +1265,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) return; } - neigh = rt->rt6i_nexthop; + neigh = dst_get_neighbour(&rt->dst); if (neigh == NULL) { ND_PRINTK0(KERN_ERR "ICMPv6 RA: %s() got default router without neighbour.\n", diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0ef1f08..e70e902 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -356,7 +356,7 @@ out: #ifdef CONFIG_IPV6_ROUTER_PREF static void rt6_probe(struct rt6_info *rt) { - struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL; + struct neighbour *neigh; /* * Okay, this does not seem to be appropriate * for now, however, we need to check if it @@ -365,8 +365,10 @@ static void rt6_probe(struct rt6_info *rt) * Router Reachability Probe MUST be rate-limited * to no more than one per minute. */ + rcu_read_lock(); + neigh = rt ? dst_get_neighbour(&rt->dst) : NULL; if (!neigh || (neigh->nud_state & NUD_VALID)) - return; + goto out; read_lock_bh(&neigh->lock); if (!(neigh->nud_state & NUD_VALID) && time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { @@ -379,8 +381,11 @@ static void rt6_probe(struct rt6_info *rt) target = (struct in6_addr *)&neigh->primary_key; addrconf_addr_solict_mult(target, &mcaddr); ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL); - } else + } else { read_unlock_bh(&neigh->lock); + } +out: + rcu_read_unlock(); } #else static inline void rt6_probe(struct rt6_info *rt) @@ -404,8 +409,11 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif) static inline int rt6_check_neigh(struct rt6_info *rt) { - struct neighbour *neigh = rt->rt6i_nexthop; + struct neighbour *neigh; int m; + + rcu_read_lock(); + neigh = dst_get_neighbour(&rt->dst); if (rt->rt6i_flags & RTF_NONEXTHOP || !(rt->rt6i_flags & RTF_GATEWAY)) m = 1; @@ -422,6 +430,7 @@ static inline int rt6_check_neigh(struct rt6_info *rt) read_unlock_bh(&neigh->lock); } else m = 0; + rcu_read_unlock(); return m; } @@ -745,8 +754,7 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, const struct in6_add dst_free(&rt->dst); return NULL; } - rt->rt6i_nexthop = neigh; - + dst_set_neighbour(&rt->dst, neigh); } return rt; @@ -760,7 +768,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, const struct in6_a rt->rt6i_dst.plen = 128; rt->rt6i_flags |= RTF_CACHE; rt->dst.flags |= DST_HOST; - rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop); + dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst))); } return rt; } @@ -794,7 +802,7 @@ restart: dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); - if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) + if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); else if (!(rt->dst.flags & DST_HOST)) nrt = rt6_alloc_clone(rt, &fl6->daddr); @@ -1058,7 +1066,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, } rt->rt6i_idev = idev; - rt->rt6i_nexthop = neigh; + dst_set_neighbour(&rt->dst, neigh); atomic_set(&rt->dst.__refcnt, 1); dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255); rt->dst.output = ip6_output; @@ -1338,12 +1346,12 @@ int ip6_route_add(struct fib6_config *cfg) rt->rt6i_prefsrc.plen = 0; if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { - rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); - if (IS_ERR(rt->rt6i_nexthop)) { - err = PTR_ERR(rt->rt6i_nexthop); - rt->rt6i_nexthop = NULL; + struct neighbour *neigh = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); + if (IS_ERR(neigh)) { + err = PTR_ERR(neigh); goto out; } + dst_set_neighbour(&rt->dst, neigh); } rt->rt6i_flags = cfg->fc_flags; @@ -1574,7 +1582,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, dst_confirm(&rt->dst); /* Duplicate redirect: silently ignore. */ - if (neigh == rt->dst.neighbour) + if (neigh == dst_get_neighbour_raw(&rt->dst)) goto out; nrt = ip6_rt_copy(rt); @@ -1590,7 +1598,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, nrt->dst.flags |= DST_HOST; ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); - nrt->rt6i_nexthop = neigh_clone(neigh); + dst_set_neighbour(&nrt->dst, neigh_clone(neigh)); if (ip6_ins_rt(nrt)) goto out; @@ -1670,7 +1678,7 @@ again: 1. It is connected route. Action: COW 2. It is gatewayed route or NONEXTHOP route. Action: clone it. */ - if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) + if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) nrt = rt6_alloc_cow(rt, daddr, saddr); else nrt = rt6_alloc_clone(rt, daddr); @@ -2035,7 +2043,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, return ERR_CAST(neigh); } - rt->rt6i_nexthop = neigh; + dst_set_neighbour(&rt->dst, neigh); ipv6_addr_copy(&rt->rt6i_dst.addr, addr); rt->rt6i_dst.plen = 128; @@ -2312,6 +2320,7 @@ static int rt6_fill_node(struct net *net, struct nlmsghdr *nlh; long expires; u32 table; + struct neighbour *n; if (prefix) { /* user wants prefix routes only */ if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { @@ -2400,8 +2409,11 @@ static int rt6_fill_node(struct net *net, if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) goto nla_put_failure; - if (rt->dst.neighbour) - NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key); + rcu_read_lock(); + n = dst_get_neighbour(&rt->dst); + if (n) + NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key); + rcu_read_unlock(); if (rt->dst.dev) NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); @@ -2585,6 +2597,7 @@ struct rt6_proc_arg static int rt6_info_route(struct rt6_info *rt, void *p_arg) { struct seq_file *m = p_arg; + struct neighbour *n; seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); @@ -2593,12 +2606,14 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) #else seq_puts(m, "00000000000000000000000000000000 00 "); #endif - - if (rt->rt6i_nexthop) { - seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key); + rcu_read_lock(); + n = dst_get_neighbour(&rt->dst); + if (n) { + seq_printf(m, "%pi6", n->primary_key); } else { seq_puts(m, "00000000000000000000000000000000"); } + rcu_read_unlock(); seq_printf(m, " %08x %08x %08x %08x %8s\n", rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), rt->dst.__use, rt->rt6i_flags, diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 38490d5..f56acd0 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -679,7 +679,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, struct neighbour *neigh = NULL; if (skb_dst(skb)) - neigh = skb_dst(skb)->neighbour; + neigh = dst_get_neighbour(skb_dst(skb)); if (neigh == NULL) { if (net_ratelimit()) @@ -704,7 +704,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, struct neighbour *neigh = NULL; if (skb_dst(skb)) - neigh = skb_dst(skb)->neighbour; + neigh = dst_get_neighbour(skb_dst(skb)); if (neigh == NULL) { if (net_ratelimit()) diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 45cd300..4f4c52c 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -225,11 +225,11 @@ static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt) static int -__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev) +__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, + struct net_device *dev, struct netdev_queue *txq, + struct neighbour *mn) { - struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0); - struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc); - struct neighbour *mn = skb_dst(skb)->neighbour; + struct teql_sched_data *q = qdisc_priv(txq->qdisc); struct neighbour *n = q->ncache; if (mn->tbl == NULL) @@ -262,17 +262,26 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device * } static inline int teql_resolve(struct sk_buff *skb, - struct sk_buff *skb_res, struct net_device *dev) + struct sk_buff *skb_res, + struct net_device *dev, + struct netdev_queue *txq) { - struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); + struct dst_entry *dst = skb_dst(skb); + struct neighbour *mn; + int res; + if (txq->qdisc == &noop_qdisc) return -ENODEV; - if (dev->header_ops == NULL || - skb_dst(skb) == NULL || - skb_dst(skb)->neighbour == NULL) + if (!dev->header_ops || !dst) return 0; - return __teql_resolve(skb, skb_res, dev); + + rcu_read_lock(); + mn = dst_get_neighbour(dst); + res = mn ? __teql_resolve(skb, skb_res, dev, txq, mn) : 0; + rcu_read_unlock(); + + return res; } static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev) @@ -307,7 +316,7 @@ restart: continue; } - switch (teql_resolve(skb, skb_res, slave)) { + switch (teql_resolve(skb, skb_res, slave, slave_txq)) { case 0: if (__netif_tx_trylock(slave_txq)) { unsigned int length = qdisc_pkt_len(skb); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 5ce74a3..7803eb6 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1497,7 +1497,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, goto free_dst; /* Copy neighbour for reachability confirmation */ - dst0->neighbour = neigh_clone(dst->neighbour); + dst_set_neighbour(dst0, neigh_clone(dst_get_neighbour(dst))); xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len); xfrm_init_pmtu(dst_prev); -- cgit v1.1 From 36935521cd67e3df9a1db71591cf224252d6082c Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Wed, 1 Feb 2012 18:48:09 +0200 Subject: mac80211: timeout a single frame in the rx reorder buffer commit 07ae2dfcf4f7143ce191c6436da1c33f179af0d6 upstream. The current code checks for stored_mpdu_num > 1, causing the reorder_timer to be triggered indefinitely, but the frame is never timed-out (until the next packet is received) Signed-off-by: Eliad Peller Acked-by: Johannes Berg Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- net/mac80211/rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 378bd67..4100065 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -610,7 +610,7 @@ static void ieee80211_sta_reorder_release(struct ieee80211_hw *hw, index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) % tid_agg_rx->buf_size; if (!tid_agg_rx->reorder_buf[index] && - tid_agg_rx->stored_mpdu_num > 1) { + tid_agg_rx->stored_mpdu_num) { /* * No buffers ready to be released, but check whether any * frames in the reorder buffer have timed out. -- cgit v1.1 From 0a3e045705af3ea9d61560d4f6ffe2ce62f81992 Mon Sep 17 00:00:00 2001 From: Mohammed Shafi Shajakhan Date: Thu, 9 Feb 2012 19:59:43 +0530 Subject: mac80211: Fix a rwlock bad magic bug commit b57e6b560fc2a2742910ac5ca0eb2c46e45aeac2 upstream. read_lock(&tpt_trig->trig.leddev_list_lock) is accessed via the path ieee80211_open (->) ieee80211_do_open (->) ieee80211_mod_tpt_led_trig (->) ieee80211_start_tpt_led_trig (->) tpt_trig_timer before initializing it. the intilization of this read/write lock happens via the path ieee80211_led_init (->) led_trigger_register, but we are doing 'ieee80211_led_init' after 'ieeee80211_if_add' where we register netdev_ops. so we access leddev_list_lock before initializing it and causes the following bug in chrome laptops with AR928X cards with the following script while true do sudo modprobe -v ath9k sleep 3 sudo modprobe -r ath9k sleep 3 done BUG: rwlock bad magic on CPU#1, wpa_supplicant/358, f5b9eccc Pid: 358, comm: wpa_supplicant Not tainted 3.0.13 #1 Call Trace: [<8137b9df>] rwlock_bug+0x3d/0x47 [<81179830>] do_raw_read_lock+0x19/0x29 [<8137f063>] _raw_read_lock+0xd/0xf [] tpt_trig_timer+0xc3/0x145 [mac80211] [] ieee80211_mod_tpt_led_trig+0x152/0x174 [mac80211] [] ieee80211_do_open+0x11e/0x42e [mac80211] [] ? ieee80211_check_concurrent_iface+0x26/0x13c [mac80211] [] ieee80211_open+0x48/0x4c [mac80211] [<812dbed8>] __dev_open+0x82/0xab [<812dc0c9>] __dev_change_flags+0x9c/0x113 [<812dc1ae>] dev_change_flags+0x18/0x44 [<8132144f>] devinet_ioctl+0x243/0x51a [<81321ba9>] inet_ioctl+0x93/0xac [<812cc951>] sock_ioctl+0x1c6/0x1ea [<812cc78b>] ? might_fault+0x20/0x20 [<810b1ebb>] do_vfs_ioctl+0x46e/0x4a2 [<810a6ebb>] ? fget_light+0x2f/0x70 [<812ce549>] ? sys_recvmsg+0x3e/0x48 [<810b1f35>] sys_ioctl+0x46/0x69 [<8137fa77>] sysenter_do_call+0x12/0x2 Cc: Gary Morain Cc: Paul Stewart Cc: Abhijit Pradhan Cc: Vasanthakumar Thiagarajan Cc: Rajkumar Manoharan Acked-by: Johannes Berg Tested-by: Mohammed Shafi Shajakhan Signed-off-by: Mohammed Shafi Shajakhan Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- net/mac80211/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 866f269..1e36fb3 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -910,6 +910,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) wiphy_debug(local->hw.wiphy, "Failed to initialize wep: %d\n", result); + ieee80211_led_init(local); + rtnl_lock(); result = ieee80211_init_rate_ctrl_alg(local, @@ -931,8 +933,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) rtnl_unlock(); - ieee80211_led_init(local); - local->network_latency_notifier.notifier_call = ieee80211_max_network_latency; result = pm_qos_add_notifier(PM_QOS_NETWORK_LATENCY, -- cgit v1.1 From c3e8445f6ec4ad66c5143d6df8528f7440429b91 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 6 Feb 2012 15:14:37 -0500 Subject: net: Make qdisc_skb_cb upper size bound explicit. [ Upstream commit 16bda13d90c8d5da243e2cfa1677e62ecce26860 ] Just like skb->cb[], so that qdisc_skb_cb can be encapsulated inside of other data structures. This is intended to be used by IPoIB so that it can remember addressing information stored at hard_header_ops->create() time that it can fetch when the packet gets to the transmit routine. Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_choke.c | 3 +-- net/sched/sch_netem.c | 3 +-- net/sched/sch_sfb.c | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 06afbae..178ee83 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -225,8 +225,7 @@ struct choke_skb_cb { static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb) { - BUILD_BUG_ON(sizeof(skb->cb) < - sizeof(struct qdisc_skb_cb) + sizeof(struct choke_skb_cb)); + qdisc_cb_private_validate(skb, sizeof(struct choke_skb_cb)); return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data; } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 69c35f6..87b9658 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -117,8 +117,7 @@ struct netem_skb_cb { static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) { - BUILD_BUG_ON(sizeof(skb->cb) < - sizeof(struct qdisc_skb_cb) + sizeof(struct netem_skb_cb)); + qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb)); return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data; } diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 0a833d0..47ee29f 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -93,8 +93,7 @@ struct sfb_skb_cb { static inline struct sfb_skb_cb *sfb_skb_cb(const struct sk_buff *skb) { - BUILD_BUG_ON(sizeof(skb->cb) < - sizeof(struct qdisc_skb_cb) + sizeof(struct sfb_skb_cb)); + qdisc_cb_private_validate(skb, sizeof(struct sfb_skb_cb)); return (struct sfb_skb_cb *)qdisc_skb_cb(skb)->data; } -- cgit v1.1 From 32fa5d83232b026092505e9165e119e5806b930d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Feb 2012 08:51:50 +0000 Subject: gro: more generic L2 header check [ Upstream commit 5ca3b72c5da47d95b83857b768def6172fbc080a ] Shlomo Pongratz reported GRO L2 header check was suited for Ethernet only, and failed on IB/ipoib traffic. He provided a patch faking a zeroed header to let GRO aggregates frames. Roland Dreier, Herbert Xu, and others suggested we change GRO L2 header check to be more generic, ie not assuming L2 header is 14 bytes, but taking into account hard_header_len. __napi_gro_receive() has special handling for the common case (Ethernet) to avoid a memcmp() call and use an inline optimized function instead. Signed-off-by: Eric Dumazet Reported-by: Shlomo Pongratz Cc: Roland Dreier Cc: Or Gerlitz Cc: Herbert Xu Tested-by: Sean Hefty Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index f14f601..17fdbf8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3434,14 +3434,20 @@ static inline gro_result_t __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff *p; + unsigned int maclen = skb->dev->hard_header_len; for (p = napi->gro_list; p; p = p->next) { unsigned long diffs; diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; diffs |= p->vlan_tci ^ skb->vlan_tci; - diffs |= compare_ether_header(skb_mac_header(p), - skb_gro_mac_header(skb)); + if (maclen == ETH_HLEN) + diffs |= compare_ether_header(skb_mac_header(p), + skb_gro_mac_header(skb)); + else if (!diffs) + diffs = memcmp(skb_mac_header(p), + skb_gro_mac_header(skb), + maclen); NAPI_GRO_CB(p)->same_flow = !diffs; NAPI_GRO_CB(p)->flush = 0; } -- cgit v1.1 From 23b139ecf944b097a0493262cbc04886363bd8e6 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Tue, 27 Sep 2011 15:16:08 -0400 Subject: ipv6-multicast: Fix memory leak in input path. [ Upstream commit 2015de5fe2a47086a3260802275932bfd810884e ] Have to free the skb before returning if we fail the fib lookup. Signed-off-by: Ben Greear Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6mr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 82a8099..450a1ff 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2051,8 +2051,10 @@ int ip6_mr_input(struct sk_buff *skb) int err; err = ip6mr_fib_lookup(net, &fl6, &mrt); - if (err < 0) + if (err < 0) { + kfree_skb(skb); return err; + } read_lock(&mrt_lock); cache = ip6mr_cache_find(mrt, -- cgit v1.1 From 24190a04c967d91e718bbe7a871e418edb9424aa Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Fri, 23 Sep 2011 13:11:01 +0000 Subject: ipv6-multicast: Fix memory leak in IPv6 multicast. [ Upstream commit 67928c4041606f02725f3c95c4c0404e4532df1b ] If reg_vif_xmit cannot find a routing entry, be sure to free the skb before returning the error. Signed-off-by: Ben Greear Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6mr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 450a1ff..86e3cc1 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -696,8 +696,10 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, int err; err = ip6mr_fib_lookup(net, &fl6, &mrt); - if (err < 0) + if (err < 0) { + kfree_skb(skb); return err; + } read_lock(&mrt_lock); dev->stats.tx_bytes += skb->len; -- cgit v1.1 From c4f2403478002bdb0a46a62f87909aeda8058d8b Mon Sep 17 00:00:00 2001 From: Li Wei Date: Tue, 8 Nov 2011 21:39:28 +0000 Subject: ipv4: fix for ip_options_rcv_srr() daddr update. [ Upstream commit b12f62efb8ec0b9523bdb6c2d412c07193086de9 ] When opt->srr_is_hit is set skb_rtable(skb) has been updated for 'nexthop' and iph->daddr should always equals to skb_rtable->rt_dst holds, We need update iph->daddr either. Signed-off-by: Li Wei Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_options.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index ec93335..05d20cc 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -640,6 +640,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) } if (srrptr <= srrspace) { opt->srr_is_hit = 1; + iph->daddr = nexthop; opt->is_changed = 1; } return 0; -- cgit v1.1 From 5805d4729059f578c8283ccc44021342fbe8c93d Mon Sep 17 00:00:00 2001 From: Li Wei Date: Tue, 22 Nov 2011 23:33:10 +0000 Subject: ipv4: Save nexthop address of LSRR/SSRR option to IPCB. [ Upstream commit ac8a48106be49c422575ddc7531b776f8eb49610 ] We can not update iph->daddr in ip_options_rcv_srr(), It is too early. When some exception ocurred later (eg. in ip_forward() when goto sr_failed) we need the ip header be identical to the original one as ICMP need it. Add a field 'nexthop' in struct ip_options to save nexthop of LSRR or SSRR option. Signed-off-by: Li Wei Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_forward.c | 2 +- net/ipv4/ip_options.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 3b34d1c..29a07b6 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -84,7 +84,7 @@ int ip_forward(struct sk_buff *skb) rt = skb_rtable(skb); - if (opt->is_strictroute && ip_hdr(skb)->daddr != rt->rt_gateway) + if (opt->is_strictroute && opt->nexthop != rt->rt_gateway) goto sr_failed; if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) && diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 05d20cc..1e60f76 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -568,12 +568,13 @@ void ip_forward_options(struct sk_buff *skb) ) { if (srrptr + 3 > srrspace) break; - if (memcmp(&ip_hdr(skb)->daddr, &optptr[srrptr-1], 4) == 0) + if (memcmp(&opt->nexthop, &optptr[srrptr-1], 4) == 0) break; } if (srrptr + 3 <= srrspace) { opt->is_changed = 1; ip_rt_get_source(&optptr[srrptr-1], skb, rt); + ip_hdr(skb)->daddr = opt->nexthop; optptr[2] = srrptr+4; } else if (net_ratelimit()) printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n"); @@ -640,7 +641,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) } if (srrptr <= srrspace) { opt->srr_is_hit = 1; - iph->daddr = nexthop; + opt->nexthop = nexthop; opt->is_changed = 1; } return 0; -- cgit v1.1 From ab2fd30a38e23f0b6f2e331889b31f4f6fb2378a Mon Sep 17 00:00:00 2001 From: Li Wei Date: Thu, 9 Feb 2012 21:15:25 +0000 Subject: ipv4: Fix wrong order of ip_rt_get_source() and update iph->daddr. [ Upstream commit 5dc7883f2a7c25f8df40d7479687153558cd531b ] This patch fix a bug which introduced by commit ac8a4810 (ipv4: Save nexthop address of LSRR/SSRR option to IPCB.).In that patch, we saved the nexthop of SRR in ip_option->nexthop and update iph->daddr until we get to ip_forward_options(), but we need to update it before ip_rt_get_source(), otherwise we may get a wrong src. Signed-off-by: Li Wei Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 1e60f76..42dd1a9 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -573,8 +573,8 @@ void ip_forward_options(struct sk_buff *skb) } if (srrptr + 3 <= srrspace) { opt->is_changed = 1; - ip_rt_get_source(&optptr[srrptr-1], skb, rt); ip_hdr(skb)->daddr = opt->nexthop; + ip_rt_get_source(&optptr[srrptr-1], skb, rt); optptr[2] = srrptr+4; } else if (net_ratelimit()) printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n"); -- cgit v1.1 From 1831cd9e1fb43c2e700e795827cb9531e679b0ef Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 10 Feb 2012 04:07:11 +0000 Subject: net: Don't proxy arp respond if iif == rt->dst.dev if private VLAN is disabled [ Upstream commit 70620c46ac2b45c24b0f22002fdf5ddd1f7daf81 ] Commit 653241 (net: RFC3069, private VLAN proxy arp support) changed the behavior of arp proxy to send arp replies back out on the interface the request came in even if the private VLAN feature is disabled. Previously we checked rt->dst.dev != skb->dev for in scenarios, when proxy arp is enabled on for the netdevice and also when individual proxy neighbour entries have been added. This patch adds the check back for the pneigh_lookup() scenario. Signed-off-by: Thomas Graf Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/arp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 1d5675e..d8f852d 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -906,7 +906,8 @@ static int arp_process(struct sk_buff *skb) if (addr_type == RTN_UNICAST && (arp_fwd_proxy(in_dev, dev, rt) || arp_fwd_pvlan(in_dev, dev, rt, sip, tip) || - pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) { + (rt->dst.dev != dev && + pneigh_lookup(&arp_tbl, net, &tip, dev, 0)))) { n = neigh_event_ns(&arp_tbl, sha, &sip, dev); if (n) neigh_release(n); -- cgit v1.1 From 9f8a28dca634c4aa13127ed2ea5de94c1e47dbb7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 14 Feb 2012 10:11:59 +0000 Subject: netpoll: netpoll_poll_dev() should access dev->flags [ Upstream commit 58e05f357a039a94aa36475f8c110256f693a239 ] commit 5a698af53f (bond: service netpoll arp queue on master device) tested IFF_SLAVE flag against dev->priv_flags instead of dev->flags Signed-off-by: Eric Dumazet Cc: WANG Cong Acked-by: Neil Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/netpoll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 18d9cbd..05db410 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -193,7 +193,7 @@ void netpoll_poll_dev(struct net_device *dev) poll_napi(dev); - if (dev->priv_flags & IFF_SLAVE) { + if (dev->flags & IFF_SLAVE) { if (dev->npinfo) { struct net_device *bond_dev = dev->master; struct sk_buff *skb; -- cgit v1.1 From 1609e23b0c313d58664bcc0677eadd9464c8c2cc Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Wed, 4 Jan 2012 17:35:26 +0000 Subject: net_sched: Bug in netem reordering [ Upstream commit eb10192447370f19a215a8c2749332afa1199d46 ] Not now, but it looks you are correct. q->qdisc is NULL until another additional qdisc is attached (beside tfifo). See 50612537e9ab2969312. The following patch should work. From: Hagen Paul Pfeifer netem: catch NULL pointer by updating the real qdisc statistic Reported-by: Vijay Subramanian Signed-off-by: Hagen Paul Pfeifer Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_netem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 87b9658..2f68459 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -381,8 +381,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) q->counter = 0; __skb_queue_head(&q->qdisc->q, skb); - q->qdisc->qstats.backlog += qdisc_pkt_len(skb); - q->qdisc->qstats.requeues++; + sch->qstats.backlog += qdisc_pkt_len(skb); + sch->qstats.requeues++; ret = NET_XMIT_SUCCESS; } -- cgit v1.1 From 39b73fb4fedd23980b720d0272e5e26510cd6940 Mon Sep 17 00:00:00 2001 From: Shawn Lu Date: Sat, 4 Feb 2012 12:38:09 +0000 Subject: tcp_v4_send_reset: binding oif to iif in no sock case [ Upstream commit e2446eaab5585555a38ea0df4e01ff313dbb4ac9 ] Binding RST packet outgoing interface to incoming interface for tcp v4 when there is no socket associate with it. when sk is not NULL, using sk->sk_bound_dev_if instead. (suggested by Eric Dumazet). This has few benefits: 1. tcp_v6_send_reset already did that. 2. This helps tcp connect with SO_BINDTODEVICE set. When connection is lost, we still able to sending out RST using same interface. 3. we are sending reply, it is most likely to be succeed if iif is used Signed-off-by: Shawn Lu Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_ipv4.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 53b0125..04c6592 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -650,6 +650,11 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) arg.iov[0].iov_len, IPPROTO_TCP, 0); arg.csumoffset = offsetof(struct tcphdr, check) / 2; arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; + /* When socket is gone, all binding information is lost. + * routing might fail in this case. using iif for oif to + * make sure we can deliver it + */ + arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb); net = dev_net(skb_dst(skb)->dev); ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, -- cgit v1.1 From 382e8f84cb3db5295aa2f142a11e42eac6544ab4 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sun, 12 Feb 2012 18:37:09 +0000 Subject: tcp: allow tcp_sacktag_one() to tag ranges not aligned with skbs [ Upstream commit cc9a672ee522d4805495b98680f4a3db5d0a0af9 ] This commit allows callers of tcp_sacktag_one() to pass in sequence ranges that do not align with skb boundaries, as tcp_shifted_skb() needs to do in an upcoming fix in this patch series. In fact, now tcp_sacktag_one() does not need to depend on an input skb at all, which makes its semantics and dependencies more clear. Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c68040f..af41044 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1289,25 +1289,26 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, return in_sack; } -static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, - struct tcp_sacktag_state *state, +/* Mark the given newly-SACKed range as such, adjusting counters and hints. */ +static u8 tcp_sacktag_one(struct sock *sk, + struct tcp_sacktag_state *state, u8 sacked, + u32 start_seq, u32 end_seq, int dup_sack, int pcount) { struct tcp_sock *tp = tcp_sk(sk); - u8 sacked = TCP_SKB_CB(skb)->sacked; int fack_count = state->fack_count; /* Account D-SACK for retransmitted packet. */ if (dup_sack && (sacked & TCPCB_RETRANS)) { if (tp->undo_marker && tp->undo_retrans && - after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) + after(end_seq, tp->undo_marker)) tp->undo_retrans--; if (sacked & TCPCB_SACKED_ACKED) state->reord = min(fack_count, state->reord); } /* Nothing to do; acked frame is about to be dropped (was ACKed). */ - if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) + if (!after(end_seq, tp->snd_una)) return sacked; if (!(sacked & TCPCB_SACKED_ACKED)) { @@ -1326,13 +1327,13 @@ static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, /* New sack for not retransmitted frame, * which was in hole. It is reordering. */ - if (before(TCP_SKB_CB(skb)->seq, + if (before(start_seq, tcp_highest_sack_seq(tp))) state->reord = min(fack_count, state->reord); /* SACK enhanced F-RTO (RFC4138; Appendix B) */ - if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) + if (!after(end_seq, tp->frto_highmark)) state->flag |= FLAG_ONLY_ORIG_SACKED; } @@ -1350,8 +1351,7 @@ static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) && - before(TCP_SKB_CB(skb)->seq, - TCP_SKB_CB(tp->lost_skb_hint)->seq)) + before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq)) tp->lost_cnt_hint += pcount; if (fack_count > tp->fackets_out) @@ -1407,7 +1407,11 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, } /* We discard results */ - tcp_sacktag_one(skb, sk, state, dup_sack, pcount); + tcp_sacktag_one(sk, state, + TCP_SKB_CB(skb)->sacked, + TCP_SKB_CB(skb)->seq, + TCP_SKB_CB(skb)->end_seq, + dup_sack, pcount); /* Difference in this won't matter, both ACKed by the same cumul. ACK */ TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); @@ -1646,10 +1650,14 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, break; if (in_sack) { - TCP_SKB_CB(skb)->sacked = tcp_sacktag_one(skb, sk, - state, - dup_sack, - tcp_skb_pcount(skb)); + TCP_SKB_CB(skb)->sacked = + tcp_sacktag_one(sk, + state, + TCP_SKB_CB(skb)->sacked, + TCP_SKB_CB(skb)->seq, + TCP_SKB_CB(skb)->end_seq, + dup_sack, + tcp_skb_pcount(skb)); if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) -- cgit v1.1 From dd31c1ce7ef7b363215081fde02f13bf3e50b5a1 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sun, 12 Feb 2012 18:37:10 +0000 Subject: tcp: fix range tcp_shifted_skb() passes to tcp_sacktag_one() [ Upstream commit daef52bab1fd26e24e8e9578f8fb33ba1d0cb412 ] Fix the newly-SACKed range to be the range of newly-shifted bytes. Previously - since 832d11c5cd076abc0aa1eaf7be96c81d1a59ce41 - tcp_shifted_skb() incorrectly called tcp_sacktag_one() with the start and end sequence numbers of the skb it passes in set to the range just beyond the range that is newly-SACKed. This commit also removes a special-case adjustment to lost_cnt_hint in tcp_shifted_skb() since the pre-existing adjustment of lost_cnt_hint in tcp_sacktag_one() now properly handles this things now that the correct start sequence number is passed in. Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index af41044..78689e5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1370,6 +1370,9 @@ static u8 tcp_sacktag_one(struct sock *sk, return sacked; } +/* Shift newly-SACKed bytes from this skb to the immediately previous + * already-SACKed sk_buff. Mark the newly-SACKed bytes as such. + */ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, struct tcp_sacktag_state *state, unsigned int pcount, int shifted, int mss, @@ -1377,12 +1380,11 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *prev = tcp_write_queue_prev(sk, skb); + u32 start_seq = TCP_SKB_CB(skb)->seq; /* start of newly-SACKed */ + u32 end_seq = start_seq + shifted; /* end of newly-SACKed */ BUG_ON(!pcount); - if (skb == tp->lost_skb_hint) - tp->lost_cnt_hint += pcount; - TCP_SKB_CB(prev)->end_seq += shifted; TCP_SKB_CB(skb)->seq += shifted; @@ -1406,12 +1408,11 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, skb_shinfo(skb)->gso_type = 0; } - /* We discard results */ - tcp_sacktag_one(sk, state, - TCP_SKB_CB(skb)->sacked, - TCP_SKB_CB(skb)->seq, - TCP_SKB_CB(skb)->end_seq, - dup_sack, pcount); + /* Adjust counters and hints for the newly sacked sequence range but + * discard the return value since prev is already marked. + */ + tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, + start_seq, end_seq, dup_sack, pcount); /* Difference in this won't matter, both ACKed by the same cumul. ACK */ TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); -- cgit v1.1 From 623f1904ef55789082259573bb6248df5fea3d92 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 13 Feb 2012 20:22:08 +0000 Subject: tcp: fix tcp_shifted_skb() adjustment of lost_cnt_hint for FACK [ Upstream commit 0af2a0d0576205dda778d25c6c344fc6508fc81d ] This commit ensures that lost_cnt_hint is correctly updated in tcp_shifted_skb() for FACK TCP senders. The lost_cnt_hint adjustment in tcp_sacktag_one() only applies to non-FACK senders, so FACK senders need their own adjustment. This applies the spirit of 1e5289e121372a3494402b1b131b41bfe1cf9b7f - except now that the sequence range passed into tcp_sacktag_one() is correct we need only have a special case adjustment for FACK. Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 78689e5..ee08f11f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1385,6 +1385,10 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, BUG_ON(!pcount); + /* Adjust hint for FACK. Non-FACK is handled in tcp_sacktag_one(). */ + if (tcp_is_fack(tp) && (skb == tp->lost_skb_hint)) + tp->lost_cnt_hint += pcount; + TCP_SKB_CB(prev)->end_seq += shifted; TCP_SKB_CB(skb)->seq += shifted; -- cgit v1.1 From bebee22bcbf0026f92141990972bd5863ef9b69c Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Mon, 24 Oct 2011 02:56:38 -0400 Subject: route: fix ICMP redirect validation [ Upstream commit 7cc9150ebe8ec06cafea9f1c10d92ddacf88d8ae ] The commit f39925dbde7788cfb96419c0f092b086aa325c0f (ipv4: Cache learned redirect information in inetpeer.) removed some ICMP packet validations which are required by RFC 1122, section 3.2.2.2: ... A Redirect message SHOULD be silently discarded if the new gateway address it specifies is not on the same connected (sub-) net through which the Redirect arrived [INTRO:2, Appendix A], or if the source of the Redirect is not the current first-hop gateway for the specified destination (see Section 3.3.1). Signed-off-by: Flavio Leitner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/route.c | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 65ff2e5..f881be2 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1373,7 +1373,12 @@ static void rt_del(unsigned hash, struct rtable *rt) void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, __be32 saddr, struct net_device *dev) { + int s, i; struct in_device *in_dev = __in_dev_get_rcu(dev); + struct rtable *rt; + __be32 skeys[2] = { saddr, 0 }; + int ikeys[2] = { dev->ifindex, 0 }; + struct flowi4 fl4; struct inet_peer *peer; struct net *net; @@ -1396,13 +1401,34 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, goto reject_redirect; } - peer = inet_getpeer_v4(daddr, 1); - if (peer) { - peer->redirect_learned.a4 = new_gw; + memset(&fl4, 0, sizeof(fl4)); + fl4.daddr = daddr; + for (s = 0; s < 2; s++) { + for (i = 0; i < 2; i++) { + fl4.flowi4_oif = ikeys[i]; + fl4.saddr = skeys[s]; + rt = __ip_route_output_key(net, &fl4); + if (IS_ERR(rt)) + continue; - inet_putpeer(peer); + if (rt->dst.error || rt->dst.dev != dev || + rt->rt_gateway != old_gw) { + ip_rt_put(rt); + continue; + } - atomic_inc(&__rt_peer_genid); + if (!rt->peer) + rt_bind_peer(rt, rt->rt_dst, 1); + + peer = rt->peer; + if (peer) { + peer->redirect_learned.a4 = new_gw; + atomic_inc(&__rt_peer_genid); + } + + ip_rt_put(rt); + return; + } } return; -- cgit v1.1 From 42ab5316ddcaa0de23e88e8a3d363c767b9ab0b3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 18 Nov 2011 15:24:32 -0500 Subject: ipv4: fix redirect handling [ Upstream commit 9cc20b268a5a14f5e57b8ad405a83513ab0d78dc ] commit f39925dbde77 (ipv4: Cache learned redirect information in inetpeer.) introduced a regression in ICMP redirect handling. It assumed ipv4_dst_check() would be called because all possible routes were attached to the inetpeer we modify in ip_rt_redirect(), but thats not true. commit 7cc9150ebe (route: fix ICMP redirect validation) tried to fix this but solution was not complete. (It fixed only one route) So we must lookup existing routes (including different TOS values) and call check_peer_redir() on them. Reported-by: Ivan Zahariev Signed-off-by: Eric Dumazet CC: Flavio Leitner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/route.c | 109 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 58 insertions(+), 51 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f881be2..6b95f74 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1369,16 +1369,41 @@ static void rt_del(unsigned hash, struct rtable *rt) spin_unlock_bh(rt_hash_lock_addr(hash)); } +static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) +{ + struct rtable *rt = (struct rtable *) dst; + __be32 orig_gw = rt->rt_gateway; + struct neighbour *n, *old_n; + + dst_confirm(&rt->dst); + + rt->rt_gateway = peer->redirect_learned.a4; + n = __arp_bind_neighbour(&rt->dst, rt->rt_gateway); + if (IS_ERR(n)) + return PTR_ERR(n); + old_n = xchg(&rt->dst._neighbour, n); + if (old_n) + neigh_release(old_n); + if (!n || !(n->nud_state & NUD_VALID)) { + if (n) + neigh_event_send(n, NULL); + rt->rt_gateway = orig_gw; + return -EAGAIN; + } else { + rt->rt_flags |= RTCF_REDIRECTED; + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); + } + return 0; +} + /* called in rcu_read_lock() section */ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, __be32 saddr, struct net_device *dev) { int s, i; struct in_device *in_dev = __in_dev_get_rcu(dev); - struct rtable *rt; __be32 skeys[2] = { saddr, 0 }; int ikeys[2] = { dev->ifindex, 0 }; - struct flowi4 fl4; struct inet_peer *peer; struct net *net; @@ -1401,33 +1426,42 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, goto reject_redirect; } - memset(&fl4, 0, sizeof(fl4)); - fl4.daddr = daddr; for (s = 0; s < 2; s++) { for (i = 0; i < 2; i++) { - fl4.flowi4_oif = ikeys[i]; - fl4.saddr = skeys[s]; - rt = __ip_route_output_key(net, &fl4); - if (IS_ERR(rt)) - continue; - - if (rt->dst.error || rt->dst.dev != dev || - rt->rt_gateway != old_gw) { - ip_rt_put(rt); - continue; - } + unsigned int hash; + struct rtable __rcu **rthp; + struct rtable *rt; + + hash = rt_hash(daddr, skeys[s], ikeys[i], rt_genid(net)); + + rthp = &rt_hash_table[hash].chain; + + while ((rt = rcu_dereference(*rthp)) != NULL) { + rthp = &rt->dst.rt_next; + + if (rt->rt_key_dst != daddr || + rt->rt_key_src != skeys[s] || + rt->rt_oif != ikeys[i] || + rt_is_input_route(rt) || + rt_is_expired(rt) || + !net_eq(dev_net(rt->dst.dev), net) || + rt->dst.error || + rt->dst.dev != dev || + rt->rt_gateway != old_gw) + continue; - if (!rt->peer) - rt_bind_peer(rt, rt->rt_dst, 1); + if (!rt->peer) + rt_bind_peer(rt, rt->rt_dst, 1); - peer = rt->peer; - if (peer) { - peer->redirect_learned.a4 = new_gw; - atomic_inc(&__rt_peer_genid); + peer = rt->peer; + if (peer) { + if (peer->redirect_learned.a4 != new_gw) { + peer->redirect_learned.a4 = new_gw; + atomic_inc(&__rt_peer_genid); + } + check_peer_redir(&rt->dst, peer); + } } - - ip_rt_put(rt); - return; } } return; @@ -1715,33 +1749,6 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) } } -static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) -{ - struct rtable *rt = (struct rtable *) dst; - __be32 orig_gw = rt->rt_gateway; - struct neighbour *n, *old_n; - - dst_confirm(&rt->dst); - - rt->rt_gateway = peer->redirect_learned.a4; - n = __arp_bind_neighbour(&rt->dst, rt->rt_gateway); - if (IS_ERR(n)) - return PTR_ERR(n); - old_n = xchg(&rt->dst._neighbour, n); - if (old_n) - neigh_release(old_n); - if (!n || !(n->nud_state & NUD_VALID)) { - if (n) - neigh_event_send(n, NULL); - rt->rt_gateway = orig_gw; - return -EAGAIN; - } else { - rt->rt_flags |= RTCF_REDIRECTED; - call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); - } - return 0; -} - static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) { struct rtable *rt = (struct rtable *) dst; -- cgit v1.1 From 426f45680cc71385a8929f11654c789f5019315c Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 27 Jan 2012 10:45:27 +0900 Subject: ipvs: fix matching of fwmark templates during scheduling commit e0aac52e17a3db68fe2ceae281780a70fc69957f upstream. Commit f11017ec2d1859c661f4e2b12c4a8d250e1f47cf (2.6.37) moved the fwmark variable in subcontext that is invalidated before reaching the ip_vs_ct_in_get call. As vaddr is provided as pointer in the param structure make sure the fwmark variable is in same context. As the fwmark templates can not be matched, more and more template connections are created and the controlled connections can not go to single real server. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/ipvs/ip_vs_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 24c28d2..0787bed 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -233,6 +233,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, __be16 dport = 0; /* destination port to forward */ unsigned int flags; struct ip_vs_conn_param param; + const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) }; union nf_inet_addr snet; /* source network of the client, after masking */ @@ -268,7 +269,6 @@ ip_vs_sched_persist(struct ip_vs_service *svc, { int protocol = iph.protocol; const union nf_inet_addr *vaddr = &iph.daddr; - const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) }; __be16 vport = 0; if (dst_port == svc->port) { -- cgit v1.1 From 34a9660ba1a8b98adf852f4f1090bdf084ccf991 Mon Sep 17 00:00:00 2001 From: Mohammed Shafi Shajakhan Date: Mon, 20 Feb 2012 10:05:31 +0530 Subject: mac80211: zero initialize count field in ieee80211_tx_rate commit 8617b093d0031837a7be9b32bc674580cfb5f6b5 upstream. rate control algorithms concludes the rate as invalid with rate[i].idx < -1 , while they do also check for rate[i].count is non-zero. it would be safer to zero initialize the 'count' field. recently we had a ath9k rate control crash where the ath9k rate control in ath_tx_status assumed to check only for rate[i].count being non-zero in one instance and ended up in using invalid rate index for 'connection monitoring NULL func frames' which eventually lead to the crash. thanks to Pavel Roskin for fixing it and finding the root cause. https://bugzilla.redhat.com/show_bug.cgi?id=768639 Cc: Pavel Roskin Signed-off-by: Mohammed Shafi Shajakhan Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- net/mac80211/rate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 3d5a2cb..816590b 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -314,7 +314,7 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { info->control.rates[i].idx = -1; info->control.rates[i].flags = 0; - info->control.rates[i].count = 1; + info->control.rates[i].count = 0; } if (sdata->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) -- cgit v1.1 From 035e3f6e8d1353abcbefd5b87710f8ae8bf1b4f6 Mon Sep 17 00:00:00 2001 From: Michel Machado Date: Tue, 21 Feb 2012 11:04:13 +0000 Subject: neighbour: Fixed race condition at tbl->nht [ Upstream commit 84338a6c9dbb6ff3de4749864020f8f25d86fc81 ] When the fixed race condition happens: 1. While function neigh_periodic_work scans the neighbor hash table pointed by field tbl->nht, it unlocks and locks tbl->lock between buckets in order to call cond_resched. 2. Assume that function neigh_periodic_work calls cond_resched, that is, the lock tbl->lock is available, and function neigh_hash_grow runs. 3. Once function neigh_hash_grow finishes, and RCU calls neigh_hash_free_rcu, the original struct neigh_hash_table that function neigh_periodic_work was using doesn't exist anymore. 4. Once back at neigh_periodic_work, whenever the old struct neigh_hash_table is accessed, things can go badly. Signed-off-by: Michel Machado CC: "David S. Miller" CC: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/neighbour.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 8c54aff..96bb0a3 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -823,6 +823,8 @@ next_elt: write_unlock_bh(&tbl->lock); cond_resched(); write_lock_bh(&tbl->lock); + nht = rcu_dereference_protected(tbl->nht, + lockdep_is_held(&tbl->lock)); } /* Cycle through all hash buckets every base_reachable_time/2 ticks. * ARP entry timeouts range from 1/2 base_reachable_time to 3/2 -- cgit v1.1 From e38b849e2fc481c5a6924f6872468104969e5d3c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Feb 2012 10:55:02 +0000 Subject: ipsec: be careful of non existing mac headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 03606895cd98c0a628b17324fd7b5ff15db7e3cd ] Niccolo Belli reported ipsec crashes in case we handle a frame without mac header (atm in his case) Before copying mac header, better make sure it is present. Bugzilla reference: https://bugzilla.kernel.org/show_bug.cgi?id=42809 Reported-by: Niccolò Belli Tested-by: Niccolò Belli Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/xfrm4_mode_beet.c | 5 +---- net/ipv4/xfrm4_mode_tunnel.c | 6 ++---- net/ipv6/xfrm6_mode_beet.c | 6 +----- net/ipv6/xfrm6_mode_tunnel.c | 6 ++---- 4 files changed, 6 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c index 6341818..e3db3f9 100644 --- a/net/ipv4/xfrm4_mode_beet.c +++ b/net/ipv4/xfrm4_mode_beet.c @@ -110,10 +110,7 @@ static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb) skb_push(skb, sizeof(*iph)); skb_reset_network_header(skb); - - memmove(skb->data - skb->mac_len, skb_mac_header(skb), - skb->mac_len); - skb_set_mac_header(skb, -skb->mac_len); + skb_mac_header_rebuild(skb); xfrm4_beet_make_header(skb); diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 534972e..ed4bf11 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -66,7 +66,6 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { - const unsigned char *old_mac; int err = -EINVAL; if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP) @@ -84,10 +83,9 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) if (!(x->props.flags & XFRM_STATE_NOECN)) ipip_ecn_decapsulate(skb); - old_mac = skb_mac_header(skb); - skb_set_mac_header(skb, -skb->mac_len); - memmove(skb_mac_header(skb), old_mac, skb->mac_len); skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + err = 0; out: diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c index 3437d7d..f37cba9 100644 --- a/net/ipv6/xfrm6_mode_beet.c +++ b/net/ipv6/xfrm6_mode_beet.c @@ -80,7 +80,6 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb) { struct ipv6hdr *ip6h; - const unsigned char *old_mac; int size = sizeof(struct ipv6hdr); int err; @@ -90,10 +89,7 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb) __skb_push(skb, size); skb_reset_network_header(skb); - - old_mac = skb_mac_header(skb); - skb_set_mac_header(skb, -skb->mac_len); - memmove(skb_mac_header(skb), old_mac, skb->mac_len); + skb_mac_header_rebuild(skb); xfrm6_beet_make_header(skb); diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 4d6edff..23ecd68 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -63,7 +63,6 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { int err = -EINVAL; - const unsigned char *old_mac; if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6) goto out; @@ -80,10 +79,9 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) if (!(x->props.flags & XFRM_STATE_NOECN)) ipip6_ecn_decapsulate(skb); - old_mac = skb_mac_header(skb); - skb_set_mac_header(skb, -skb->mac_len); - memmove(skb_mac_header(skb), old_mac, skb->mac_len); skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + err = 0; out: -- cgit v1.1 From 85526d578a0c7b6723c1a429b39870ce3bfec11c Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sun, 26 Feb 2012 10:06:19 +0000 Subject: tcp: fix false reordering signal in tcp_shifted_skb [ Upstream commit 4c90d3b30334833450ccbb02f452d4972a3c3c3f ] When tcp_shifted_skb() shifts bytes from the skb that is currently pointed to by 'highest_sack' then the increment of TCP_SKB_CB(skb)->seq implicitly advances tcp_highest_sack_seq(). This implicit advancement, combined with the recent fix to pass the correct SACKed range into tcp_sacktag_one(), caused tcp_sacktag_one() to think that the newly SACKed range was before the tcp_highest_sack_seq(), leading to a call to tcp_update_reordering() with a degree of reordering matching the size of the newly SACKed range (typically just 1 packet, which is a NOP, but potentially larger). This commit fixes this by simply calling tcp_sacktag_one() before the TCP_SKB_CB(skb)->seq advancement that can advance our notion of the highest SACKed sequence. Correspondingly, we can simplify the code a little now that tcp_shifted_skb() should update the lost_cnt_hint in all cases where skb == tp->lost_skb_hint. Signed-off-by: Neal Cardwell Acked-by: Yuchung Cheng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ee08f11f..8eb1302 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1385,8 +1385,16 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, BUG_ON(!pcount); - /* Adjust hint for FACK. Non-FACK is handled in tcp_sacktag_one(). */ - if (tcp_is_fack(tp) && (skb == tp->lost_skb_hint)) + /* Adjust counters and hints for the newly sacked sequence + * range but discard the return value since prev is already + * marked. We must tag the range first because the seq + * advancement below implicitly advances + * tcp_highest_sack_seq() when skb is highest_sack. + */ + tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, + start_seq, end_seq, dup_sack, pcount); + + if (skb == tp->lost_skb_hint) tp->lost_cnt_hint += pcount; TCP_SKB_CB(prev)->end_seq += shifted; @@ -1412,12 +1420,6 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, skb_shinfo(skb)->gso_type = 0; } - /* Adjust counters and hints for the newly sacked sequence range but - * discard the return value since prev is already marked. - */ - tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, - start_seq, end_seq, dup_sack, pcount); - /* Difference in this won't matter, both ACKed by the same cumul. ACK */ TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); -- cgit v1.1 From 6046dc7d1b061d0f8e820757716de7df5079aa35 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Fri, 2 Mar 2012 21:36:51 +0000 Subject: tcp: don't fragment SACKed skbs in tcp_mark_head_lost() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c0638c247f559e1a16ee79e54df14bca2cb679ea ] In tcp_mark_head_lost() we should not attempt to fragment a SACKed skb to mark the first portion as lost. This is for two primary reasons: (1) tcp_shifted_skb() coalesces adjacent regions of SACKed skbs. When doing this, it preserves the sum of their packet counts in order to reflect the real-world dynamics on the wire. But given that skbs can have remainders that do not align to MSS boundaries, this packet count preservation means that for SACKed skbs there is not necessarily a direct linear relationship between tcp_skb_pcount(skb) and skb->len. Thus tcp_mark_head_lost()'s previous attempts to fragment off and mark as lost a prefix of length (packets - oldcnt)*mss from SACKed skbs were leading to occasional failures of the WARN_ON(len > skb->len) in tcp_fragment() (which used to be a BUG_ON(); see the recent "crash in tcp_fragment" thread on netdev). (2) there is no real point in fragmenting off part of a SACKed skb and calling tcp_skb_mark_lost() on it, since tcp_skb_mark_lost() is a NOP for SACKed skbs. Signed-off-by: Neal Cardwell Acked-by: Ilpo Järvinen Acked-by: Yuchung Cheng Acked-by: Nandita Dukkipati Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8eb1302..ab9dcfb 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2549,6 +2549,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) if (cnt > packets) { if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) || + (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) || (oldcnt >= packets)) break; -- cgit v1.1 From b77a726051a42d382ec1d20c49b3a60bf7dd54d8 Mon Sep 17 00:00:00 2001 From: Ulrich Weber Date: Mon, 5 Mar 2012 04:52:44 +0000 Subject: bridge: check return value of ipv6_dev_get_saddr() [ Upstream commit d1d81d4c3dd886d5fa25a2c4fa1e39cb89613712 ] otherwise source IPv6 address of ICMPV6_MGM_QUERY packet might be random junk if IPv6 is disabled on interface or link-local address is not yet ready (DAD). Signed-off-by: Ulrich Weber Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_multicast.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 995cbe0..c23a4b1 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -446,8 +446,11 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, ip6h->nexthdr = IPPROTO_HOPOPTS; ip6h->hop_limit = 1; ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1)); - ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0, - &ip6h->saddr); + if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0, + &ip6h->saddr)) { + kfree_skb(skb); + return NULL; + } ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest); hopopt = (u8 *)(ip6h + 1); -- cgit v1.1 From 619b6e476fdf85f17d80df72f647f2fb85535339 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 5 Mar 2012 19:35:04 +0000 Subject: tcp: fix tcp_shift_skb_data() to not shift SACKed data below snd_una [ Upstream commit 4648dc97af9d496218a05353b0e442b3dfa6aaab ] This commit fixes tcp_shift_skb_data() so that it does not shift SACKed data below snd_una. This fixes an issue whose symptoms exactly match reports showing tp->sacked_out going negative since 3.3.0-rc4 (see "WARNING: at net/ipv4/tcp_input.c:3418" thread on netdev). Since 2008 (832d11c5cd076abc0aa1eaf7be96c81d1a59ce41) tcp_shift_skb_data() had been shifting SACKed ranges that were below snd_una. It checked that the *end* of the skb it was about to shift from was above snd_una, but did not check that the end of the actual shifted range was above snd_una; this commit adds that check. Shifting SACKed ranges below snd_una is problematic because for such ranges tcp_sacktag_one() short-circuits: it does not declare anything as SACKed and does not increase sacked_out. Before the fixes in commits cc9a672ee522d4805495b98680f4a3db5d0a0af9 and daef52bab1fd26e24e8e9578f8fb33ba1d0cb412, shifting SACKed ranges below snd_una happened to work because tcp_shifted_skb() was always (incorrectly) passing in to tcp_sacktag_one() an skb whose end_seq tcp_shift_skb_data() had already guaranteed was beyond snd_una. Hence tcp_sacktag_one() never short-circuited and always increased tp->sacked_out in this case. After those two fixes, my testing has verified that shifting SACKed ranges below snd_una could cause tp->sacked_out to go negative with the following sequence of events: (1) tcp_shift_skb_data() sees an skb whose end_seq is beyond snd_una, then shifts a prefix of that skb that is below snd_una (2) tcp_shifted_skb() increments the packet count of the already-SACKed prev sk_buff (3) tcp_sacktag_one() sees the end of the new SACKed range is below snd_una, so it short-circuits and doesn't increase tp->sacked_out (5) tcp_clean_rtx_queue() sees the SACKed skb has been ACKed, decrements tp->sacked_out by this "inflated" pcount that was missing a matching increase in tp->sacked_out, and hence tp->sacked_out underflows to a u32 like 0xFFFFFFFF, which casted to s32 is negative. (6) this leads to the warnings seen in the recent "WARNING: at net/ipv4/tcp_input.c:3418" thread on the netdev list; e.g.: tcp_input.c:3418 WARN_ON((int)tp->sacked_out < 0); More generally, I think this bug can be tickled in some cases where two or more ACKs from the receiver are lost and then a DSACK arrives that is immediately above an existing SACKed skb in the write queue. This fix changes tcp_shift_skb_data() to abort this sequence at step (1) in the scenario above by noticing that the bytes are below snd_una and not shifting them. Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ab9dcfb..72b1857 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1567,6 +1567,10 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, } } + /* tcp_sacktag_one() won't SACK-tag ranges below snd_una */ + if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una)) + goto fallback; + if (!skb_shift(prev, skb, len)) goto fallback; if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack)) -- cgit v1.1 From 94962718da9d6df11a3a30511016707e4e9451dc Mon Sep 17 00:00:00 2001 From: Li Wei Date: Mon, 5 Mar 2012 14:45:17 +0000 Subject: IPv6: Fix not join all-router mcast group when forwarding set. [ Upstream commit d6ddef9e641d1229d4ec841dc75ae703171c3e92 ] When forwarding was set and a new net device is register, we need add this device to the all-router mcast group. Signed-off-by: Li Wei Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/addrconf.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0f335c6..be29337 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -433,6 +433,10 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) /* Join all-node multicast group */ ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes); + /* Join all-router multicast group if forwarding is set */ + if (ndev->cnf.forwarding && dev && (dev->flags & IFF_MULTICAST)) + ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters); + return ndev; } -- cgit v1.1 From 137a954db947096bd9378ff5a6a77336231f4a90 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 10 Mar 2012 09:20:21 +0000 Subject: tcp: fix syncookie regression [ Upstream commit dfd25ffffc132c00070eed64200e8950da5d7e9d ] commit ea4fc0d619 (ipv4: Don't use rt->rt_{src,dst} in ip_queue_xmit()) added a serious regression on synflood handling. Simon Kirby discovered a successful connection was delayed by 20 seconds before being responsive. In my tests, I discovered that xmit frames were lost, and needed ~4 retransmits and a socket dst rebuild before being really sent. In case of syncookie initiated connection, we use a different path to initialize the socket dst, and inet->cork.fl.u.ip4 is left cleared. As ip_queue_xmit() now depends on inet flow being setup, fix this by copying the temp flowi4 we use in cookie_v4_check(). Reported-by: Simon Kirby Bisected-by: Simon Kirby Signed-off-by: Eric Dumazet Tested-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/syncookies.c | 30 ++++++++++++++++-------------- net/ipv4/tcp_ipv4.c | 10 +++++++--- 2 files changed, 23 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 4382629..895f215 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -277,6 +277,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct rtable *rt; __u8 rcv_wscale; bool ecn_ok = false; + struct flowi4 fl4; if (!sysctl_tcp_syncookies || !th->ack || th->rst) goto out; @@ -344,20 +345,16 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, * hasn't changed since we received the original syn, but I see * no easy way to do this. */ - { - struct flowi4 fl4; - - flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk), - RT_SCOPE_UNIVERSE, IPPROTO_TCP, - inet_sk_flowi_flags(sk), - (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, - ireq->loc_addr, th->source, th->dest); - security_req_classify_flow(req, flowi4_to_flowi(&fl4)); - rt = ip_route_output_key(sock_net(sk), &fl4); - if (IS_ERR(rt)) { - reqsk_free(req); - goto out; - } + flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk), + RT_SCOPE_UNIVERSE, IPPROTO_TCP, + inet_sk_flowi_flags(sk), + (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, + ireq->loc_addr, th->source, th->dest); + security_req_classify_flow(req, flowi4_to_flowi(&fl4)); + rt = ip_route_output_key(sock_net(sk), &fl4); + if (IS_ERR(rt)) { + reqsk_free(req); + goto out; } /* Try to redo what tcp_v4_send_synack did. */ @@ -371,5 +368,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, ireq->rcv_wscale = rcv_wscale; ret = get_cookie_sock(sk, skb, req, &rt->dst); + /* ip_queue_xmit() depends on our flow being setup + * Normal sockets get it right from inet_csk_route_child_sock() + */ + if (ret) + inet_sk(ret)->cork.fl.u.ip4 = fl4; out: return ret; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 04c6592..53a5af6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1454,9 +1454,13 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; newinet->inet_id = newtp->write_seq ^ jiffies; - if (!dst && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL) - goto put_and_exit; - + if (!dst) { + dst = inet_csk_route_child_sock(sk, newsk, req); + if (!dst) + goto put_and_exit; + } else { + /* syncookie case : see end of cookie_v4_check() */ + } sk_setup_caps(newsk, dst); tcp_mtup_init(newsk); -- cgit v1.1 From 97490c46fea06ecf95f989789e72c54fba2e2584 Mon Sep 17 00:00:00 2001 From: "RongQing.Li" Date: Thu, 15 Mar 2012 22:54:14 +0000 Subject: ipv6: Don't dev_hold(dev) in ip6_mc_find_dev_rcu. [ Upstream commit c577923756b7fe9071f28a76b66b83b306d1d001 ] ip6_mc_find_dev_rcu() is called with rcu_read_lock(), so don't need to dev_hold(). With dev_hold(), not corresponding dev_put(), will lead to leak. [ bug introduced in 96b52e61be1 (ipv6: mcast: RCU conversions) ] Signed-off-by: RongQing.Li Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/mcast.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index ee7839f..2257366 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -257,7 +257,6 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net, if (rt) { dev = rt->rt6i_dev; - dev_hold(dev); dst_release(&rt->dst); } } else -- cgit v1.1 From 77d77ab09b1b57cf3cc30d0dbdf8c55137146a8f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 19 Mar 2012 13:39:35 -0400 Subject: SUNRPC: We must not use list_for_each_entry_safe() in rpc_wake_up() commit 540a0f7584169651f485e8ab67461fcb06934e38 upstream. The problem is that for the case of priority queues, we have to assume that __rpc_remove_wait_queue_priority will move new elements from the tk_wait.links lists into the queue->tasks[] list. We therefore cannot use list_for_each_entry_safe() on queue->tasks[], since that will skip these new tasks that __rpc_remove_wait_queue_priority is adding. Without this fix, rpc_wake_up and rpc_wake_up_status will both fail to wake up all functions on priority wait queues, which can result in some nasty hangs. Reported-by: Andy Adamson Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/sched.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 4814e24..b6bb225 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -480,14 +480,18 @@ EXPORT_SYMBOL_GPL(rpc_wake_up_next); */ void rpc_wake_up(struct rpc_wait_queue *queue) { - struct rpc_task *task, *next; struct list_head *head; spin_lock_bh(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { - list_for_each_entry_safe(task, next, head, u.tk_wait.list) + while (!list_empty(head)) { + struct rpc_task *task; + task = list_first_entry(head, + struct rpc_task, + u.tk_wait.list); rpc_wake_up_task_queue_locked(queue, task); + } if (head == &queue->tasks[0]) break; head--; @@ -505,13 +509,16 @@ EXPORT_SYMBOL_GPL(rpc_wake_up); */ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) { - struct rpc_task *task, *next; struct list_head *head; spin_lock_bh(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { - list_for_each_entry_safe(task, next, head, u.tk_wait.list) { + while (!list_empty(head)) { + struct rpc_task *task; + task = list_first_entry(head, + struct rpc_task, + u.tk_wait.list); task->tk_status = status; rpc_wake_up_task_queue_locked(queue, task); } -- cgit v1.1 From 6a26d49c67b852cfd9144f86f628f57c8ca00977 Mon Sep 17 00:00:00 2001 From: Benjamin LaHaise Date: Tue, 20 Mar 2012 03:57:54 +0000 Subject: Fix pppol2tp getsockname() [ Upstream commit bbdb32cb5b73597386913d052165423b9d736145 ] While testing L2TP functionality, I came across a bug in getsockname(). The IP address returned within the pppol2tp_addr's addr memember was not being set to the IP address in use. This bug is caused by using inet_sk() on the wrong socket (the L2TP socket rather than the underlying UDP socket), and was likely introduced during the addition of L2TPv3 support. Signed-off-by: Benjamin LaHaise Signed-off-by: James Chapman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_ppp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 39a21d0..13f9868 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -908,7 +908,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr, goto end_put_sess; } - inet = inet_sk(sk); + inet = inet_sk(tunnel->sock); if (tunnel->version == 2) { struct sockaddr_pppol2tp sp; len = sizeof(sp); -- cgit v1.1 From e033155d0b495becfdc28f46ad20089dc8a13060 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Mar 2012 09:53:52 +0000 Subject: net: fix a potential rcu_read_lock() imbalance in rt6_fill_node() [ Upstream commit 94f826b8076e2cb92242061e92f21b5baa3eccc2 ] Commit f2c31e32b378 (net: fix NULL dereferences in check_peer_redir() ) added a regression in rt6_fill_node(), leading to rcu_read_lock() imbalance. Thats because NLA_PUT() can make a jump to nla_put_failure label. Fix this by using nla_put() Many thanks to Ben Greear for his help Reported-by: Ben Greear Reported-by: Dave Jones Signed-off-by: Eric Dumazet Tested-by: Ben Greear Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/route.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e70e902..8e600f8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2411,8 +2411,12 @@ static int rt6_fill_node(struct net *net, rcu_read_lock(); n = dst_get_neighbour(&rt->dst); - if (n) - NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key); + if (n) { + if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) { + rcu_read_unlock(); + goto nla_put_failure; + } + } rcu_read_unlock(); if (rt->dst.dev) -- cgit v1.1 From 99b8230daca979e0c0b988cfb80566791354a077 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 21 Mar 2012 06:58:03 +0000 Subject: net: fix napi_reuse_skb() skb reserve [ Upstream commit 2a2a459eeeff48640dc557548ce576d666ab06ed ] napi->skb is allocated in napi_get_frags() using netdev_alloc_skb_ip_align(), with a reserve of NET_SKB_PAD + NET_IP_ALIGN bytes. However, when such skb is recycled in napi_reuse_skb(), it ends with a reserve of NET_IP_ALIGN which is suboptimal. Signed-off-by: Eric Dumazet Cc: Herbert Xu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 17fdbf8..f134f88 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3504,7 +3504,8 @@ EXPORT_SYMBOL(napi_gro_receive); static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) { __skb_pull(skb, skb_headlen(skb)); - skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); + /* restore the reserve we had after netdev_alloc_skb_ip_align() */ + skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb)); skb->vlan_tci = 0; skb->dev = napi->dev; skb->skb_iif = 0; -- cgit v1.1 From 276b5b3b43807632637cdd160eba2c5facb7e14a Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Mon, 19 Mar 2012 13:01:07 +0000 Subject: Remove printk from rds_sendmsg [ Upstream commit a6506e1486181975d318344143aca722b2b91621 ] no socket layer outputs a message for this error and neither should rds. Signed-off-by: Dave Jones Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rds/send.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/rds/send.c b/net/rds/send.c index d58ae5f..c803341 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -932,7 +932,6 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, /* Mirror Linux UDP mirror of BSD error message compatibility */ /* XXX: Perhaps MSG_MORE someday */ if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT)) { - printk(KERN_INFO "msg_flags 0x%08X\n", msg->msg_flags); ret = -EOPNOTSUPP; goto out; } -- cgit v1.1 From 096e4eafb36436b6ce6e8dc372bc5f6e11804e68 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 21 Mar 2012 23:36:13 +0000 Subject: xfrm: Access the replay notify functions via the registered callbacks [ Upstream commit 1265fd616782ef03b98fd19f65c2b47fcd4ea11f ] We call the wrong replay notify function when we use ESN replay handling. This leads to the fact that we don't send notifications if we use ESN. Fix this by calling the registered callbacks instead of xfrm_replay_notify(). Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_replay.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index b11ea69..3235023 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -166,7 +166,7 @@ static void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq) } if (xfrm_aevent_is_on(xs_net(x))) - xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); + x->repl->notify(x, XFRM_REPLAY_UPDATE); } static int xfrm_replay_overflow_bmp(struct xfrm_state *x, struct sk_buff *skb) @@ -293,7 +293,7 @@ static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq) } if (xfrm_aevent_is_on(xs_net(x))) - xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); + x->repl->notify(x, XFRM_REPLAY_UPDATE); } static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event) @@ -502,7 +502,7 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) } if (xfrm_aevent_is_on(xs_net(x))) - xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); + x->repl->notify(x, XFRM_REPLAY_UPDATE); } static struct xfrm_replay xfrm_replay_legacy = { -- cgit v1.1 From 8bb8ebe7b77228209006ea945104e37294608b93 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 18 Jan 2012 12:56:02 +0300 Subject: nfsd: don't allow zero length strings in cache_parse() commit 6d8d17499810479eabd10731179c04b2ca22152f upstream. There is no point in passing a zero length string here and quite a few of that cache_parse() implementations will Oops if count is zero. Signed-off-by: Dan Carpenter Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/cache.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 72ad836..4530a91 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -828,6 +828,8 @@ static ssize_t cache_do_downcall(char *kaddr, const char __user *buf, { ssize_t ret; + if (count == 0) + return -EINVAL; if (copy_from_user(kaddr, buf, count)) return -EFAULT; kaddr[count] = '\0'; -- cgit v1.1 From cea90bebaab0e962cf9a954ef41cc1893da7b3bf Mon Sep 17 00:00:00 2001 From: "danborkmann@iogearbox.net" Date: Tue, 27 Mar 2012 22:47:43 +0000 Subject: rose_dev: fix memcpy-bug in rose_set_mac_address [ Upstream commit 81213b5e8ae68e204aa7a3f83c4f9100405dbff9 ] If both addresses equal, nothing needs to be done. If the device is down, then we simply copy the new address to dev->dev_addr. If the device is up, then we add another loopback device with the new address, and if that does not fail, we remove the loopback device with the old address. And only then, we update the dev->dev_addr. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rose/rose_dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c index 178ff4f..2679507 100644 --- a/net/rose/rose_dev.c +++ b/net/rose/rose_dev.c @@ -96,11 +96,11 @@ static int rose_set_mac_address(struct net_device *dev, void *addr) struct sockaddr *sa = addr; int err; - if (!memcpy(dev->dev_addr, sa->sa_data, dev->addr_len)) + if (!memcmp(dev->dev_addr, sa->sa_data, dev->addr_len)) return 0; if (dev->flags & IFF_UP) { - err = rose_add_loopback_node((rose_address *)dev->dev_addr); + err = rose_add_loopback_node((rose_address *)sa->sa_data); if (err) return err; -- cgit v1.1 From eb221774b352966c562b5c92a28d01ddc1bc4393 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Mon, 19 Mar 2012 16:00:26 +0100 Subject: mac80211: fix possible tid_rx->reorder_timer use after free commit d72308bff5c2fa207949a5925b020bce74495e33 upstream. Is possible that we will arm the tid_rx->reorder_timer after del_timer_sync() in ___ieee80211_stop_rx_ba_session(). We need to stop timer after RCU grace period finish, so move it to ieee80211_free_tid_rx(). Timer will not be armed again, as rcu_dereference(sta->ampdu_mlme.tid_rx[tid]) will return NULL. Debug object detected problem with the following warning: ODEBUG: free active (active state 0) object type: timer_list hint: sta_rx_agg_reorder_timer_expired+0x0/0xf0 [mac80211] Bug report (with all warning messages): https://bugzilla.redhat.com/show_bug.cgi?id=804007 Reported-by: "jan p. springer" Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- net/mac80211/agg-rx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 9c0d76c..1a41b14 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -48,6 +48,8 @@ static void ieee80211_free_tid_rx(struct rcu_head *h) container_of(h, struct tid_ampdu_rx, rcu_head); int i; + del_timer_sync(&tid_rx->reorder_timer); + for (i = 0; i < tid_rx->buf_size; i++) dev_kfree_skb(tid_rx->reorder_buf[i]); kfree(tid_rx->reorder_buf); @@ -87,7 +89,6 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, tid, 0, reason); del_timer_sync(&tid_rx->session_timer); - del_timer_sync(&tid_rx->reorder_timer); call_rcu(&tid_rx->rcu_head, ieee80211_free_tid_rx); } -- cgit v1.1 From 073c4aec557b87d0c5b6f8b6b1910b356088eaf3 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Mon, 2 Apr 2012 13:44:56 +0200 Subject: Bluetooth: Fix l2cap conn failures for ssp devices commit 18daf1644e634bae951a6e3d4d19d89170209762 upstream Commit 330605423c fixed l2cap conn establishment for non-ssp remote devices by not setting HCI_CONN_ENCRYPT_PEND every time conn security is tested (which was always returning failure on any subsequent security checks). However, this broke l2cap conn establishment for ssp remote devices when an ACL link was already established at SDP-level security. This fix ensures that encryption must be pending whenever authentication is also pending. Signed-off-by: Peter Hurley Tested-by: Daniel Wagner Acked-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_conn.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index bcd158f..4bb16b8 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -548,6 +548,10 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) { struct hci_cp_auth_requested cp; + + /* encrypt must be pending if auth is also pending */ + set_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend); + cp.handle = cpu_to_le16(conn->handle); hci_send_cmd(conn->hdev, HCI_OP_AUTH_REQUESTED, sizeof(cp), &cp); -- cgit v1.1