From b032981516fbd7efb19efb9b380e1651f751af2e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 29 Apr 2012 09:08:22 +0000 Subject: netem: fix possible skb leak [ Upstream commit 116a0fc31c6c9b8fc821be5a96e5bf0b43260131 ] skb_checksum_help(skb) can return an error, we must free skb in this case. qdisc_drop(skb, sch) can also be feeded with a NULL skb (if skb_unshare() failed), so lets use this generic helper. Signed-off-by: Eric Dumazet Cc: Stephen Hemminger Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_netem.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 2f68459..945f3dd 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -350,10 +350,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { if (!(skb = skb_unshare(skb, GFP_ATOMIC)) || (skb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_help(skb))) { - sch->qstats.drops++; - return NET_XMIT_DROP; - } + skb_checksum_help(skb))) + return qdisc_drop(skb, sch); skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8); } -- cgit v1.1 From 4e133084e946f32d6fc489c43274d68a8428a7ed Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 6 Apr 2012 15:33:35 +0000 Subject: net: In unregister_netdevice_notifier unregister the netdevices. [ Upstream commit 7d3d43dab4e978d8d9ad1acf8af15c9b1c4b0f0f ] We already synthesize events in register_netdevice_notifier and synthesizing events in unregister_netdevice_notifier allows to us remove the need for special case cleanup code. This change should be safe as it adds no new cases for existing callers of unregiser_netdevice_notifier to handle. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index f134f88..1e77897 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1406,14 +1406,34 @@ EXPORT_SYMBOL(register_netdevice_notifier); * register_netdevice_notifier(). The notifier is unlinked into the * kernel structures and may then be reused. A negative errno code * is returned on a failure. + * + * After unregistering unregister and down device events are synthesized + * for all devices on the device list to the removed notifier to remove + * the need for special case cleanup code. */ int unregister_netdevice_notifier(struct notifier_block *nb) { + struct net_device *dev; + struct net *net; int err; rtnl_lock(); err = raw_notifier_chain_unregister(&netdev_chain, nb); + if (err) + goto unlock; + + for_each_net(net) { + for_each_netdev(net, dev) { + if (dev->flags & IFF_UP) { + nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); + nb->notifier_call(nb, NETDEV_DOWN, dev); + } + nb->notifier_call(nb, NETDEV_UNREGISTER, dev); + nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev); + } + } +unlock: rtnl_unlock(); return err; } -- cgit v1.1 From ff422223cc56542d31575afd05b02bd1493abf4b Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 2 May 2012 03:58:43 +0000 Subject: net: l2tp: unlock socket lock before returning from l2tp_ip_sendmsg [ Upstream commit 84768edbb2721637620b2d84501bb0d5aed603f1 ] l2tp_ip_sendmsg could return without releasing socket lock, making it all the way to userspace, and generating the following warning: [ 130.891594] ================================================ [ 130.894569] [ BUG: lock held when returning to user space! ] [ 130.897257] 3.4.0-rc5-next-20120501-sasha #104 Tainted: G W [ 130.900336] ------------------------------------------------ [ 130.902996] trinity/8384 is leaving the kernel with locks still held! [ 130.906106] 1 lock held by trinity/8384: [ 130.907924] #0: (sk_lock-AF_INET){+.+.+.}, at: [] l2tp_ip_sendmsg+0x2f/0x550 Introduced by commit 2f16270 ("l2tp: Fix locking in l2tp_ip.c"). Signed-off-by: Sasha Levin Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_ip.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 858ca23..ea52d02 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -441,8 +441,9 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m daddr = lip->l2tp_addr.s_addr; } else { + rc = -EDESTADDRREQ; if (sk->sk_state != TCP_ESTABLISHED) - return -EDESTADDRREQ; + goto out; daddr = inet->inet_daddr; connected = 1; -- cgit v1.1 From f1aadd585872545e03701a91b1f2e9d66a35d5d3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 May 2012 02:28:41 +0000 Subject: tcp: change tcp_adv_win_scale and tcp_rmem[2] [ Upstream commit b49960a05e32121d29316cfdf653894b88ac9190 ] tcp_adv_win_scale default value is 2, meaning we expect a good citizen skb to have skb->len / skb->truesize ratio of 75% (3/4) In 2.6 kernels we (mis)accounted for typical MSS=1460 frame : 1536 + 64 + 256 = 1856 'estimated truesize', and 1856 * 3/4 = 1392. So these skbs were considered as not bloated. With recent truesize fixes, a typical MSS=1460 frame truesize is now the more precise : 2048 + 256 = 2304. But 2304 * 3/4 = 1728. So these skb are not good citizen anymore, because 1460 < 1728 (GRO can escape this problem because it build skbs with a too low truesize.) This also means tcp advertises a too optimistic window for a given allocated rcvspace : When receiving frames, sk_rmem_alloc can hit sk_rcvbuf limit and we call tcp_prune_queue()/tcp_collapse() too often, especially when application is slow to drain its receive queue or in case of losses (netperf is fast, scp is slow). This is a major latency source. We should adjust the len/truesize ratio to 50% instead of 75% This patch : 1) changes tcp_adv_win_scale default to 1 instead of 2 2) increase tcp_rmem[2] limit from 4MB to 6MB to take into account better truesize tracking and to allow autotuning tcp receive window to reach same value than before. Note that same amount of kernel memory is consumed compared to 2.6 kernels. Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Tom Herbert Cc: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp.c | 9 +++++---- net/ipv4/tcp_input.c | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 80b988f..74f84a48 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3221,7 +3221,7 @@ void __init tcp_init(void) { struct sk_buff *skb = NULL; unsigned long limit; - int i, max_share, cnt; + int i, max_rshare, max_wshare, cnt; unsigned long jiffy = jiffies; BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); @@ -3285,15 +3285,16 @@ void __init tcp_init(void) /* Set per-socket limits to no more than 1/128 the pressure threshold */ limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7); - max_share = min(4UL*1024*1024, limit); + max_wshare = min(4UL*1024*1024, limit); + max_rshare = min(6UL*1024*1024, limit); sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; sysctl_tcp_wmem[1] = 16*1024; - sysctl_tcp_wmem[2] = max(64*1024, max_share); + sysctl_tcp_wmem[2] = max(64*1024, max_wshare); sysctl_tcp_rmem[0] = SK_MEM_QUANTUM; sysctl_tcp_rmem[1] = 87380; - sysctl_tcp_rmem[2] = max(87380, max_share); + sysctl_tcp_rmem[2] = max(87380, max_rshare); printk(KERN_INFO "TCP: Hash tables configured " "(established %u bind %u)\n", diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c3a9f03..7410a8c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -83,7 +83,7 @@ int sysctl_tcp_ecn __read_mostly = 2; EXPORT_SYMBOL(sysctl_tcp_ecn); int sysctl_tcp_dsack __read_mostly = 1; int sysctl_tcp_app_win __read_mostly = 31; -int sysctl_tcp_adv_win_scale __read_mostly = 2; +int sysctl_tcp_adv_win_scale __read_mostly = 1; EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); int sysctl_tcp_stdurg __read_mostly; -- cgit v1.1 From 6e29cea334bf13fcb19f20cc80b65f9a1f2f329e Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Thu, 17 May 2012 11:14:14 +0000 Subject: tcp: do_tcp_sendpages() must try to push data out on oom conditions commit bad115cfe5b509043b684d3a007ab54b80090aa1 upstream. Since recent changes on TCP splicing (starting with commits 2f533844 "tcp: allow splice() to build full TSO packets" and 35f9c09f "tcp: tcp_sendpages() should call tcp_push() once"), I started seeing massive stalls when forwarding traffic between two sockets using splice() when pipe buffers were larger than socket buffers. Latest changes (net: netdev_alloc_skb() use build_skb()) made the problem even more apparent. The reason seems to be that if do_tcp_sendpages() fails on out of memory condition without being able to send at least one byte, tcp_push() is not called and the buffers cannot be flushed. After applying the attached patch, I cannot reproduce the stalls at all and the data rate it perfectly stable and steady under any condition which previously caused the problem to be permanent. The issue seems to have been there since before the kernel migrated to git, which makes me think that the stalls I occasionally experienced with tux during stress-tests years ago were probably related to the same issue. This issue was first encountered on 3.0.31 and 3.2.17, so please backport to -stable. Signed-off-by: Willy Tarreau Acked-by: Eric Dumazet Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 74f84a48..6db041d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -850,8 +850,7 @@ new_segment: wait_for_sndbuf: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); wait_for_memory: - if (copied) - tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); + tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) goto do_error; -- cgit v1.1 From ec8f0159dc6f3f39db5a644d94fd25709c301934 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Fri, 23 Mar 2012 07:23:31 -0700 Subject: cfg80211: warn if db.txt is empty with CONFIG_CFG80211_INTERNAL_REGDB commit 80007efeff0568375b08faf93c7aad65602cb97e upstream. It has happened twice now where elaborate troubleshooting has undergone on systems where CONFIG_CFG80211_INTERNAL_REGDB [0] has been set but yet net/wireless/db.txt was not updated. Despite the documentation on this it seems system integrators could use some more help with this, so throw out a kernel warning at boot time when their database is empty. This does mean that the error-prone system integrator won't likely realize the issue until they boot the machine but -- it does not seem to make sense to enable a build bug breaking random build testing. [0] http://wireless.kernel.org/en/developers/Regulatory/CRDA#CONFIG_CFG80211_INTERNAL_REGDB Cc: Stephen Rothwell Cc: Youngsin Lee Cc: Raja Mani Cc: Senthil Kumar Balasubramanian Cc: Vipin Mehta Cc: yahuan@qca.qualcomm.com Cc: jjan@qca.qualcomm.com Cc: vthiagar@qca.qualcomm.com Cc: henrykim@qualcomm.com Cc: jouni@qca.qualcomm.com Cc: athiruve@qca.qualcomm.com Cc: cjkim@qualcomm.com Cc: philipk@qca.qualcomm.com Cc: sunnykim@qualcomm.com Cc: sskwak@qualcomm.com Cc: kkim@qualcomm.com Cc: mattbyun@qualcomm.com Cc: ryanlee@qualcomm.com Cc: simbap@qualcomm.com Cc: krislee@qualcomm.com Cc: conner@qualcomm.com Cc: hojinkim@qualcomm.com Cc: honglee@qualcomm.com Cc: johnwkim@qualcomm.com Cc: jinyong@qca.qualcomm.com Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- net/wireless/reg.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index ca76d8b..7457697 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -379,7 +379,15 @@ static void reg_regdb_query(const char *alpha2) schedule_work(®_regdb_work); } + +/* Feel free to add any other sanity checks here */ +static void reg_regdb_size_check(void) +{ + /* We should ideally BUILD_BUG_ON() but then random builds would fail */ + WARN_ONCE(!reg_regdb_size, "db.txt is empty, you should update it..."); +} #else +static inline void reg_regdb_size_check(void) {} static inline void reg_regdb_query(const char *alpha2) {} #endif /* CONFIG_CFG80211_INTERNAL_REGDB */ @@ -2225,6 +2233,8 @@ int __init regulatory_init(void) spin_lock_init(®_requests_lock); spin_lock_init(®_pending_beacons_lock); + reg_regdb_size_check(); + cfg80211_regdomain = cfg80211_world_regdom; user_alpha2[0] = '9'; -- cgit v1.1 From 6ec1d66c8d22bc76ebf37860a87ca399790beb5d Mon Sep 17 00:00:00 2001 From: Eyal Shapira Date: Tue, 29 May 2012 02:00:22 -0700 Subject: mac80211: fix ADDBA declined after suspend with wowlan commit 7b21aea04d084916ac4e0e8852dcc9cd60ec0d1d upstream. WLAN_STA_BLOCK_BA is set while suspending but doesn't get cleared when resuming in case of wowlan. This causes further ADDBA requests received to be rejected. Fix it by clearing it in the wowlan path as well. Signed-off-by: Eyal Shapira Reviewed-by: Johannes Berg Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- net/mac80211/util.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 2124db8..11d9d49 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1254,6 +1254,12 @@ int ieee80211_reconfig(struct ieee80211_local *local) } } + /* add back keys */ + list_for_each_entry(sdata, &local->interfaces, list) + if (ieee80211_sdata_running(sdata)) + ieee80211_enable_keys(sdata); + + wake_up: /* * Clear the WLAN_STA_BLOCK_BA flag so new aggregation * sessions can be established after a resume. @@ -1275,12 +1281,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) mutex_unlock(&local->sta_mtx); } - /* add back keys */ - list_for_each_entry(sdata, &local->interfaces, list) - if (ieee80211_sdata_running(sdata)) - ieee80211_enable_keys(sdata); - - wake_up: ieee80211_wake_queues_by_reason(hw, IEEE80211_QUEUE_STOP_REASON_SUSPEND); -- cgit v1.1 From f77baf324713ef5d6ae9ae63a77aed2bfbe4333d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 10 May 2012 22:16:32 -0400 Subject: ipv4: Do not use dead fib_info entries. [ Upstream commit dccd9ecc374462e5d6a5b8f8110415a86c2213d8 ] Due to RCU lookups and RCU based release, fib_info objects can be found during lookup which have fi->fib_dead set. We must ignore these entries, otherwise we risk dereferencing the parts of the entry which are being torn down. Reported-by: Yevgen Pronenko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fib_trie.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 58c25ea..0d884eb 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1371,6 +1371,8 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) continue; + if (fi->fib_dead) + continue; if (fa->fa_info->fib_scope < flp->flowi4_scope) continue; fib_alias_accessed(fa); -- cgit v1.1 From 49d7872377ba34fc2b2e1e460073a387e7adcfae Mon Sep 17 00:00:00 2001 From: Yanmin Zhang Date: Wed, 23 May 2012 15:39:45 +0000 Subject: ipv4: fix the rcu race between free_fib_info and ip_route_output_slow [ Upstream commit e49cc0da7283088c5e03d475ffe2fdcb24a6d5b1 ] We hit a kernel OOPS. <3>[23898.789643] BUG: sleeping function called from invalid context at /data/buildbot/workdir/ics/hardware/intel/linux-2.6/arch/x86/mm/fault.c:1103 <3>[23898.862215] in_atomic(): 0, irqs_disabled(): 0, pid: 10526, name: Thread-6683 <4>[23898.967805] HSU serial 0000:00:05.1: 0000:00:05.2:HSU serial prevented me to suspend... <4>[23899.258526] Pid: 10526, comm: Thread-6683 Tainted: G W 3.0.8-137685-ge7742f9 #1 <4>[23899.357404] HSU serial 0000:00:05.1: 0000:00:05.2:HSU serial prevented me to suspend... <4>[23899.904225] Call Trace: <4>[23899.989209] [] ? pgtable_bad+0x130/0x130 <4>[23900.000416] [] __might_sleep+0x10a/0x110 <4>[23900.007357] [] do_page_fault+0xd1/0x3c0 <4>[23900.013764] [] ? restore_all+0xf/0xf <4>[23900.024024] [] ? napi_complete+0x8b/0x690 <4>[23900.029297] [] ? pgtable_bad+0x130/0x130 <4>[23900.123739] [] ? pgtable_bad+0x130/0x130 <4>[23900.128955] [] error_code+0x5f/0x64 <4>[23900.133466] [] ? pgtable_bad+0x130/0x130 <4>[23900.138450] [] ? __ip_route_output_key+0x698/0x7c0 <4>[23900.144312] [] ? __ip_route_output_key+0x38d/0x7c0 <4>[23900.150730] [] ip_route_output_flow+0x1f/0x60 <4>[23900.156261] [] ip4_datagram_connect+0x188/0x2b0 <4>[23900.161960] [] ? _raw_spin_unlock_bh+0x1f/0x30 <4>[23900.167834] [] inet_dgram_connect+0x36/0x80 <4>[23900.173224] [] ? _copy_from_user+0x48/0x140 <4>[23900.178817] [] sys_connect+0x9a/0xd0 <4>[23900.183538] [] ? alloc_file+0xdc/0x240 <4>[23900.189111] [] ? sub_preempt_count+0x3d/0x50 Function free_fib_info resets nexthop_nh->nh_dev to NULL before releasing fi. Other cpu might be accessing fi. Fixing it by delaying the releasing. With the patch, we ran MTBF testing on Android mobile for 12 hours and didn't trigger the issue. Thank Eric for very detailed review/checking the issue. Signed-off-by: Yanmin Zhang Signed-off-by: Kun Jiang Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fib_semantics.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 33e2c35..7e454ba 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -142,6 +142,18 @@ const struct fib_prop fib_props[RTN_MAX + 1] = { }; /* Release a nexthop info record */ +static void free_fib_info_rcu(struct rcu_head *head) +{ + struct fib_info *fi = container_of(head, struct fib_info, rcu); + + change_nexthops(fi) { + if (nexthop_nh->nh_dev) + dev_put(nexthop_nh->nh_dev); + } endfor_nexthops(fi); + + release_net(fi->fib_net); + kfree(fi); +} void free_fib_info(struct fib_info *fi) { @@ -149,14 +161,8 @@ void free_fib_info(struct fib_info *fi) pr_warning("Freeing alive fib_info %p\n", fi); return; } - change_nexthops(fi) { - if (nexthop_nh->nh_dev) - dev_put(nexthop_nh->nh_dev); - nexthop_nh->nh_dev = NULL; - } endfor_nexthops(fi); fib_info_cnt--; - release_net(fi->fib_net); - kfree_rcu(fi, rcu); + call_rcu(&fi->rcu, free_fib_info_rcu); } void fib_release_info(struct fib_info *fi) -- cgit v1.1 From 5111df358197ca9c5001bf2bb542fc8c346bb5b5 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Sat, 26 May 2012 01:30:53 +0000 Subject: ipv6: fix incorrect ipsec fragment [ Upstream commit 0c1833797a5a6ec23ea9261d979aa18078720b74 ] Since commit ad0081e43a "ipv6: Fragment locally generated tunnel-mode IPSec6 packets as needed" the fragment of packets is incorrect. because tunnel mode needs IPsec headers and trailer for all fragments, while on transport mode it is sufficient to add the headers to the first fragment and the trailer to the last. so modify mtu and maxfraglen base on ipsec mode and if fragment is first or last. with my test,it work well(every fragment's size is the mtu) and does not trigger slow fragment path. Changes from v1: though optimization, mtu_prev and maxfraglen_prev can be delete. replace xfrm mode codes with dst_entry's new frag DST_XFRM_TUNNEL. add fuction ip6_append_data_mtu to make codes clearer. Signed-off-by: Gao feng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_output.c | 68 +++++++++++++++++++++++++++++++++++++------------- net/xfrm/xfrm_policy.c | 3 +++ 2 files changed, 53 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 9cbf176..ae9f6d4 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1194,6 +1194,29 @@ static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; } +static void ip6_append_data_mtu(int *mtu, + int *maxfraglen, + unsigned int fragheaderlen, + struct sk_buff *skb, + struct rt6_info *rt) +{ + if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { + if (skb == NULL) { + /* first fragment, reserve header_len */ + *mtu = *mtu - rt->dst.header_len; + + } else { + /* + * this fragment is not first, the headers + * space is regarded as data space. + */ + *mtu = dst_mtu(rt->dst.path); + } + *maxfraglen = ((*mtu - fragheaderlen) & ~7) + + fragheaderlen - sizeof(struct frag_hdr); + } +} + int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, @@ -1203,7 +1226,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct inet_cork *cork; - struct sk_buff *skb; + struct sk_buff *skb, *skb_prev = NULL; unsigned int maxfraglen, fragheaderlen; int exthdrlen; int hh_len; @@ -1260,8 +1283,12 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, inet->cork.fl.u.ip6 = *fl6; np->cork.hop_limit = hlimit; np->cork.tclass = tclass; - mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? - rt->dst.dev->mtu : dst_mtu(rt->dst.path); + if (rt->dst.flags & DST_XFRM_TUNNEL) + mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? + rt->dst.dev->mtu : dst_mtu(&rt->dst); + else + mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? + rt->dst.dev->mtu : dst_mtu(rt->dst.path); if (np->frag_size < mtu) { if (np->frag_size) mtu = np->frag_size; @@ -1356,38 +1383,43 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, unsigned int fraglen; unsigned int fraggap; unsigned int alloclen; - struct sk_buff *skb_prev; alloc_new_skb: - skb_prev = skb; - /* There's no room in the current skb */ - if (skb_prev) - fraggap = skb_prev->len - maxfraglen; + if (skb) + fraggap = skb->len - maxfraglen; else fraggap = 0; + /* update mtu and maxfraglen if necessary */ + if (skb == NULL || skb_prev == NULL) + ip6_append_data_mtu(&mtu, &maxfraglen, + fragheaderlen, skb, rt); + + skb_prev = skb; /* * If remaining data exceeds the mtu, * we know we need more fragment(s). */ datalen = length + fraggap; - if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) - datalen = maxfraglen - fragheaderlen; - fraglen = datalen + fragheaderlen; + if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) + datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; if ((flags & MSG_MORE) && !(rt->dst.dev->features&NETIF_F_SG)) alloclen = mtu; else alloclen = datalen + fragheaderlen; - /* - * The last fragment gets additional space at tail. - * Note: we overallocate on fragments with MSG_MODE - * because we have no idea if we're the last one. - */ - if (datalen == length + fraggap) - alloclen += rt->dst.trailer_len; + if (datalen != length + fraggap) { + /* + * this is not the last fragment, the trailer + * space is regarded as data space. + */ + datalen += rt->dst.trailer_len; + } + + alloclen += rt->dst.trailer_len; + fraglen = datalen + fragheaderlen; /* * We just reserve space for fragment header. diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 7803eb6..0c0e40e 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1917,6 +1917,9 @@ no_transform: } ok: xfrm_pols_put(pols, drop_pols); + if (dst && dst->xfrm && + dst->xfrm->props.mode == XFRM_MODE_TUNNEL) + dst->flags |= DST_XFRM_TUNNEL; return dst; nopol: -- cgit v1.1 From 3dc6bc132f2afce8da98e0f047c4cc8ae569d6cb Mon Sep 17 00:00:00 2001 From: James Chapman Date: Tue, 29 May 2012 23:13:23 +0000 Subject: l2tp: fix oops in L2TP IP sockets for connect() AF_UNSPEC case [ Upstream commit c51ce49735c183ef2592db70f918ee698716276b ] An application may call connect() to disconnect a socket using an address with family AF_UNSPEC. The L2TP IP sockets were not handling this case when the socket is not bound and an attempt to connect() using AF_UNSPEC in such cases would result in an oops. This patch addresses the problem by protecting the sk_prot->disconnect() call against trying to unhash the socket before it is bound. The patch also adds more checks that the sockaddr supplied to bind() and connect() calls is valid. RIP: 0010:[] [] inet_unhash+0x50/0xd0 RSP: 0018:ffff88001989be28 EFLAGS: 00010293 Stack: ffff8800407a8000 0000000000000000 ffff88001989be78 ffffffff82e3a249 ffffffff82e3a050 ffff88001989bec8 ffff88001989be88 ffff8800407a8000 0000000000000010 ffff88001989bec8 ffff88001989bea8 ffffffff82e42639 Call Trace: [] udp_disconnect+0x1f9/0x290 [] inet_dgram_connect+0x29/0x80 [] sys_connect+0x9c/0x100 Reported-by: Sasha Levin Signed-off-by: James Chapman Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_ip.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index ea52d02..78bc442 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -251,9 +251,16 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct inet_sock *inet = inet_sk(sk); struct sockaddr_l2tpip *addr = (struct sockaddr_l2tpip *) uaddr; - int ret = -EINVAL; + int ret; int chk_addr_ret; + if (!sock_flag(sk, SOCK_ZAPPED)) + return -EINVAL; + if (addr_len < sizeof(struct sockaddr_l2tpip)) + return -EINVAL; + if (addr->l2tp_family != AF_INET) + return -EINVAL; + ret = -EADDRINUSE; read_lock_bh(&l2tp_ip_lock); if (__l2tp_ip_bind_lookup(&init_net, addr->l2tp_addr.s_addr, sk->sk_bound_dev_if, addr->l2tp_conn_id)) @@ -283,6 +290,8 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) sk_del_node_init(sk); write_unlock_bh(&l2tp_ip_lock); ret = 0; + sock_reset_flag(sk, SOCK_ZAPPED); + out: release_sock(sk); @@ -303,13 +312,14 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len __be32 saddr; int oif, rc; - rc = -EINVAL; + if (sock_flag(sk, SOCK_ZAPPED)) /* Must bind first - autobinding does not work */ + return -EINVAL; + if (addr_len < sizeof(*lsa)) - goto out; + return -EINVAL; - rc = -EAFNOSUPPORT; if (lsa->l2tp_family != AF_INET) - goto out; + return -EAFNOSUPPORT; lock_sock(sk); @@ -363,6 +373,14 @@ out: return rc; } +static int l2tp_ip_disconnect(struct sock *sk, int flags) +{ + if (sock_flag(sk, SOCK_ZAPPED)) + return 0; + + return udp_disconnect(sk, flags); +} + static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) { @@ -591,7 +609,7 @@ static struct proto l2tp_ip_prot = { .close = l2tp_ip_close, .bind = l2tp_ip_bind, .connect = l2tp_ip_connect, - .disconnect = udp_disconnect, + .disconnect = l2tp_ip_disconnect, .ioctl = udp_ioctl, .destroy = l2tp_ip_destroy_sock, .setsockopt = ip_setsockopt, -- cgit v1.1 From 45d21c136849cda838e77928d544fc7cd9d6239c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 May 2012 13:29:51 +0000 Subject: pktgen: fix crash at module unload [ Upstream commit c57b54684060c8aced64a5b78ff69ff289af97b9 ] commit 7d3d43dab4e9 (net: In unregister_netdevice_notifier unregister the netdevices.) makes pktgen crashing at module unload. [ 296.820578] BUG: spinlock bad magic on CPU#6, rmmod/3267 [ 296.820719] lock: ffff880310c38000, .magic: ffff8803, .owner: /-1, .owner_cpu: -1 [ 296.820943] Pid: 3267, comm: rmmod Not tainted 3.4.0-rc5+ #254 [ 296.821079] Call Trace: [ 296.821211] [] spin_dump+0x8a/0x8f [ 296.821345] [] spin_bug+0x21/0x26 [ 296.821507] [] do_raw_spin_lock+0x131/0x140 [ 296.821648] [] _raw_spin_lock+0x1e/0x20 [ 296.821786] [] __pktgen_NN_threads+0x4d/0x140 [pktgen] [ 296.821928] [] pktgen_device_event+0x10d/0x1e0 [pktgen] [ 296.822073] [] unregister_netdevice_notifier+0x7f/0x100 [ 296.822216] [] pg_cleanup+0x48/0x73 [pktgen] [ 296.822357] [] sys_delete_module+0x17e/0x2a0 [ 296.822502] [] system_call_fastpath+0x16/0x1b Hold the pktgen_thread_lock while splicing pktgen_threads, and test pktgen_exiting in pktgen_device_event() to make unload faster. Signed-off-by: Eric Dumazet Cc: Eric W. Biederman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/pktgen.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index e35a6fb..0865ea0 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1932,7 +1932,7 @@ static int pktgen_device_event(struct notifier_block *unused, { struct net_device *dev = ptr; - if (!net_eq(dev_net(dev), &init_net)) + if (!net_eq(dev_net(dev), &init_net) || pktgen_exiting) return NOTIFY_DONE; /* It is OK that we do not hold the group lock right now, @@ -3755,12 +3755,18 @@ static void __exit pg_cleanup(void) { struct pktgen_thread *t; struct list_head *q, *n; + struct list_head list; /* Stop all interfaces & threads */ pktgen_exiting = true; - list_for_each_safe(q, n, &pktgen_threads) { + mutex_lock(&pktgen_thread_lock); + list_splice(&list, &pktgen_threads); + mutex_unlock(&pktgen_thread_lock); + + list_for_each_safe(q, n, &list) { t = list_entry(q, struct pktgen_thread, th_list); + list_del(&t->th_list); kthread_stop(t->tsk); kfree(t); } -- cgit v1.1 From a2abc1310ff689486730215fcea629b74b01abe4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 17 May 2012 23:52:26 +0000 Subject: pktgen: fix module unload for good [ Upstream commit d4b1133558e0d417342d5d2c49e4c35b428ff20d ] commit c57b5468406 (pktgen: fix crash at module unload) did a very poor job with list primitives. 1) list_splice() arguments were in the wrong order 2) list_splice(list, head) has undefined behavior if head is not initialized. 3) We should use the list_splice_init() variant to clear pktgen_threads list. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/pktgen.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 0865ea0..c0e0f76 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3755,13 +3755,13 @@ static void __exit pg_cleanup(void) { struct pktgen_thread *t; struct list_head *q, *n; - struct list_head list; + LIST_HEAD(list); /* Stop all interfaces & threads */ pktgen_exiting = true; mutex_lock(&pktgen_thread_lock); - list_splice(&list, &pktgen_threads); + list_splice_init(&pktgen_threads, &list); mutex_unlock(&pktgen_thread_lock); list_for_each_safe(q, n, &list) { -- cgit v1.1 From 83bba7979059b83df4edc16f747784c6990fc3bb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 10 May 2012 23:03:34 -0400 Subject: Revert "net: maintain namespace isolation between vlan and real device" [ Upstream commit 59b9997baba5242997ddc7bd96b1391f5275a5a4 ] This reverts commit 8a83a00b0735190384a348156837918271034144. It causes regressions for S390 devices, because it does an unconditional DST drop on SKBs for vlans and the QETH device needs the neighbour entry hung off the DST for certain things on transmit. Arnd can't remember exactly why he even needed this change. Conflicts: drivers/net/macvlan.c net/8021q/vlan_dev.c net/core/dev.c Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/8021q/vlan_dev.c | 2 +- net/core/dev.c | 36 +++++------------------------------- 2 files changed, 6 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 5b4f51d..d548456 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -154,7 +154,7 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, skb = __vlan_hwaccel_put_tag(skb, vlan_tci); } - skb_set_dev(skb, vlan_dev_info(dev)->real_dev); + skb->dev = vlan_dev_info(dev)->real_dev; len = skb->len; ret = dev_queue_xmit(skb); diff --git a/net/core/dev.c b/net/core/dev.c index 1e77897..a71eafc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1533,10 +1533,14 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) kfree_skb(skb); return NET_RX_DROP; } - skb_set_dev(skb, dev); + skb->dev = dev; + skb_dst_drop(skb); skb->tstamp.tv64 = 0; skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, dev); + skb->mark = 0; + secpath_reset(skb); + nf_reset(skb); return netif_rx(skb); } EXPORT_SYMBOL_GPL(dev_forward_skb); @@ -1791,36 +1795,6 @@ void netif_device_attach(struct net_device *dev) } EXPORT_SYMBOL(netif_device_attach); -/** - * skb_dev_set -- assign a new device to a buffer - * @skb: buffer for the new device - * @dev: network device - * - * If an skb is owned by a device already, we have to reset - * all data private to the namespace a device belongs to - * before assigning it a new device. - */ -#ifdef CONFIG_NET_NS -void skb_set_dev(struct sk_buff *skb, struct net_device *dev) -{ - skb_dst_drop(skb); - if (skb->dev && !net_eq(dev_net(skb->dev), dev_net(dev))) { - secpath_reset(skb); - nf_reset(skb); - skb_init_secmark(skb); - skb->mark = 0; - skb->priority = 0; - skb->nf_trace = 0; - skb->ipvs_property = 0; -#ifdef CONFIG_NET_SCHED - skb->tc_index = 0; -#endif - } - skb->dev = dev; -} -EXPORT_SYMBOL(skb_set_dev); -#endif /* CONFIG_NET_NS */ - /* * Invalidate hardware checksum when packet is to be mangled, and * complete checksum manually on outgoing path. -- cgit v1.1 From 337c934a55a7956bcd349b6f56c6add58d2550f3 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 4 May 2012 05:24:54 +0000 Subject: sctp: check cached dst before using it [ Upstream commit e0268868ba064980488fc8c194db3d8e9fb2959c ] dst_check() will take care of SA (and obsolete field), hence IPsec rekeying scenario is taken into account. Signed-off-by: Nicolas Dichtel Acked-by: Vlad Yaseivch Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/output.c | 4 +--- net/sctp/transport.c | 17 ----------------- 2 files changed, 1 insertion(+), 20 deletions(-) (limited to 'net') diff --git a/net/sctp/output.c b/net/sctp/output.c index 817174e..8fc4dcd 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -377,9 +377,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) */ skb_set_owner_w(nskb, sk); - /* The 'obsolete' field of dst is set to 2 when a dst is freed. */ - if (!dst || (dst->obsolete > 1)) { - dst_release(dst); + if (!sctp_transport_dst_check(tp)) { sctp_transport_route(tp, NULL, sctp_sk(sk)); if (asoc && (asoc->param_flags & SPP_PMTUD_ENABLE)) { sctp_assoc_sync_pmtu(asoc); diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 394c57c..8da4481 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -226,23 +226,6 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; } -/* this is a complete rip-off from __sk_dst_check - * the cookie is always 0 since this is how it's used in the - * pmtu code - */ -static struct dst_entry *sctp_transport_dst_check(struct sctp_transport *t) -{ - struct dst_entry *dst = t->dst; - - if (dst && dst->obsolete && dst->ops->check(dst, 0) == NULL) { - dst_release(t->dst); - t->dst = NULL; - return NULL; - } - - return dst; -} - void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) { struct dst_entry *dst; -- cgit v1.1 From 570986003b9cd61b7ccf03beacb56f5f5f6f3409 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Thu, 24 May 2012 11:32:38 +0000 Subject: xfrm: take net hdr len into account for esp payload size calculation [ Upstream commit 91657eafb64b4cb53ec3a2fbc4afc3497f735788 ] Corrects the function that determines the esp payload size. The calculations done in esp{4,6}_get_mtu() lead to overlength frames in transport mode for certain mtu values and suboptimal frames for others. According to what is done, mainly in esp{,6}_output() and tcp_mtu_to_mss(), net_header_len must be taken into account before doing the alignment calculation. Signed-off-by: Benjamin Poirier Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/esp4.c | 24 +++++++++--------------- net/ipv6/esp6.c | 18 +++++++----------- 2 files changed, 16 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index a5b4134..530787b 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -457,28 +457,22 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu) struct esp_data *esp = x->data; u32 blksize = ALIGN(crypto_aead_blocksize(esp->aead), 4); u32 align = max_t(u32, blksize, esp->padlen); - u32 rem; - - mtu -= x->props.header_len + crypto_aead_authsize(esp->aead); - rem = mtu & (align - 1); - mtu &= ~(align - 1); + unsigned int net_adj; switch (x->props.mode) { - case XFRM_MODE_TUNNEL: - break; - default: case XFRM_MODE_TRANSPORT: - /* The worst case */ - mtu -= blksize - 4; - mtu += min_t(u32, blksize - 4, rem); - break; case XFRM_MODE_BEET: - /* The worst case. */ - mtu += min_t(u32, IPV4_BEET_PHMAXLEN, rem); + net_adj = sizeof(struct iphdr); break; + case XFRM_MODE_TUNNEL: + net_adj = 0; + break; + default: + BUG(); } - return mtu - 2; + return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) - + net_adj) & ~(align - 1)) + (net_adj - 2); } static void esp4_err(struct sk_buff *skb, u32 info) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 1ac7938..65dd543 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -411,19 +411,15 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu) struct esp_data *esp = x->data; u32 blksize = ALIGN(crypto_aead_blocksize(esp->aead), 4); u32 align = max_t(u32, blksize, esp->padlen); - u32 rem; + unsigned int net_adj; - mtu -= x->props.header_len + crypto_aead_authsize(esp->aead); - rem = mtu & (align - 1); - mtu &= ~(align - 1); - - if (x->props.mode != XFRM_MODE_TUNNEL) { - u32 padsize = ((blksize - 1) & 7) + 1; - mtu -= blksize - padsize; - mtu += min_t(u32, blksize - padsize, rem); - } + if (x->props.mode != XFRM_MODE_TUNNEL) + net_adj = sizeof(struct ipv6hdr); + else + net_adj = 0; - return mtu - 2; + return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) - + net_adj) & ~(align - 1)) + (net_adj - 2); } static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, -- cgit v1.1 From 55fe02e968371dd1c0b5b1f9411f2fc8c2b84e7e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 31 May 2012 15:09:27 +0200 Subject: mac80211: clean up remain-on-channel on interface stop commit 71ecfa1893034eeb1c93e02e22ee2ad26d080858 upstream. When any interface goes down, it could be the one that we were doing a remain-on-channel with. We therefore need to cancel the remain-on-channel and flush the related work structs so they don't run after the interface has been removed or even destroyed. It's also possible in this case that an off-channel SKB was never transmitted, so free it if this is the case. Note that this can also happen if the driver finishes the off-channel period without ever starting it. Reported-by: Nirav Shah Signed-off-by: Johannes Berg Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- net/mac80211/iface.c | 12 ++++++++++++ net/mac80211/offchannel.c | 16 ++++++++++++++++ 2 files changed, 28 insertions(+) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 895eec1..65f3764c 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -498,6 +498,18 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, ieee80211_configure_filter(local); break; default: + mutex_lock(&local->mtx); + if (local->hw_roc_dev == sdata->dev && + local->hw_roc_channel) { + /* ignore return value since this is racy */ + drv_cancel_remain_on_channel(local); + ieee80211_queue_work(&local->hw, &local->hw_roc_done); + } + mutex_unlock(&local->mtx); + + flush_work(&local->hw_roc_start); + flush_work(&local->hw_roc_done); + flush_work(&sdata->work); /* * When we get here, the interface is marked down. diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 13427b1..c55eb9d 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -251,6 +251,22 @@ static void ieee80211_hw_roc_done(struct work_struct *work) return; } + /* was never transmitted */ + if (local->hw_roc_skb) { + u64 cookie; + + cookie = local->hw_roc_cookie ^ 2; + + cfg80211_mgmt_tx_status(local->hw_roc_dev, cookie, + local->hw_roc_skb->data, + local->hw_roc_skb->len, false, + GFP_KERNEL); + + kfree_skb(local->hw_roc_skb); + local->hw_roc_skb = NULL; + local->hw_roc_skb_for_status = NULL; + } + if (!local->hw_roc_for_tx) cfg80211_remain_on_channel_expired(local->hw_roc_dev, local->hw_roc_cookie, -- cgit v1.1 From f90b005ff35ab8e6ed3ddcbf79dee0baa48c429a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 5 Jun 2012 12:16:50 +0200 Subject: cfg80211: fix interface combinations check commit 463454b5dbd8dbab6e2fc6c557329e5b811b9c32 upstream. If a given interface combination doesn't contain a required interface type then we missed checking that and erroneously allowed it even though iface type wasn't there at all. Add a check that makes sure that all interface types are accounted for. Reported-by: Mohammed Shafi Shajakhan Signed-off-by: Johannes Berg Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- net/wireless/util.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/util.c b/net/wireless/util.c index 9c22330..30f68dc 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -937,6 +937,7 @@ int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype) { struct wireless_dev *wdev_iter; + u32 used_iftypes = BIT(iftype); int num[NUM_NL80211_IFTYPES]; int total = 1; int i, j; @@ -970,12 +971,14 @@ int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, num[wdev_iter->iftype]++; total++; + used_iftypes |= BIT(wdev_iter->iftype); } mutex_unlock(&rdev->devlist_mtx); for (i = 0; i < rdev->wiphy.n_iface_combinations; i++) { const struct ieee80211_iface_combination *c; struct ieee80211_iface_limit *limits; + u32 all_iftypes = 0; c = &rdev->wiphy.iface_combinations[i]; @@ -990,6 +993,7 @@ int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, if (rdev->wiphy.software_iftypes & BIT(iftype)) continue; for (j = 0; j < c->n_limits; j++) { + all_iftypes |= limits[j].types; if (!(limits[j].types & BIT(iftype))) continue; if (limits[j].max < num[iftype]) @@ -997,7 +1001,20 @@ int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, limits[j].max -= num[iftype]; } } - /* yay, it fits */ + + /* + * Finally check that all iftypes that we're currently + * using are actually part of this combination. If they + * aren't then we can't use this combination and have + * to continue to the next. + */ + if ((all_iftypes & used_iftypes) != used_iftypes) + goto cont; + + /* + * This combination covered all interface types and + * supported the requested numbers, so we're good. + */ kfree(limits); return 0; cont: -- cgit v1.1 From 00c4792f7501a6c761c47c5a7c0261e309e61949 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 31 May 2012 15:26:38 -0400 Subject: NFSv4.1: Fix a request leak on the back channel commit b3b02ae5865c2dcd506322e0fc6def59a042e72f upstream. If the call to svc_process_common() fails, then the request needs to be freed before we can exit bc_svc_process. Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/svc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index ce5f111..54c59ab 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1302,7 +1302,8 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, sizeof(req->rq_snd_buf)); return bc_send(req); } else { - /* Nothing to do to drop request */ + /* drop request */ + xprt_free_bc_request(req); return 0; } } -- cgit v1.1 From 325b4161bab9ba01b345c4483bbb0e36cb571943 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 30 May 2012 21:18:10 +0000 Subject: net: sock: validate data_len before allocating skb in sock_alloc_send_pskb() [ Upstream commit cc9b17ad29ecaa20bfe426a8d4dbfb94b13ff1cc ] We need to validate the number of pages consumed by data_len, otherwise frags array could be overflowed by userspace. So this patch validate data_len and return -EMSGSIZE when data_len may occupies more frags than MAX_SKB_FRAGS. Signed-off-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/sock.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index aebb419..b4bb59a 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1501,6 +1501,11 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, gfp_t gfp_mask; long timeo; int err; + int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + + err = -EMSGSIZE; + if (npages > MAX_SKB_FRAGS) + goto failure; gfp_mask = sk->sk_allocation; if (gfp_mask & __GFP_WAIT) @@ -1519,14 +1524,12 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { skb = alloc_skb(header_len, gfp_mask); if (skb) { - int npages; int i; /* No pages, we're done... */ if (!data_len) break; - npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT; skb->truesize += data_len; skb_shinfo(skb)->nr_frags = npages; for (i = 0; i < npages; i++) { -- cgit v1.1 From e99e096f4b388824e49ce5c6568eacf46c94438f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Jun 2012 00:07:20 +0000 Subject: net: l2tp_eth: fix kernel panic on rmmod l2tp_eth [ Upstream commit a06998b88b1651c5f71c0e35f528bf2057188ead ] We must prevent module unloading if some devices are still attached to l2tp_eth driver. Signed-off-by: Eric Dumazet Reported-by: Denys Fedoryshchenko Tested-by: Denys Fedoryshchenko Cc: James Chapman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_eth.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index d2726a7..3c55f63 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -167,6 +167,7 @@ static void l2tp_eth_delete(struct l2tp_session *session) if (dev) { unregister_netdev(dev); spriv->dev = NULL; + module_put(THIS_MODULE); } } } @@ -254,6 +255,7 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p if (rc < 0) goto out_del_dev; + __module_get(THIS_MODULE); /* Must be done after register_netdev() */ strlcpy(session->ifname, dev->name, IFNAMSIZ); -- cgit v1.1 From d3a673fb54a4d7f6d8cb4f7e72dac5f91583e6f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Sun, 22 Jan 2012 00:20:40 +0000 Subject: ethtool: allow ETHTOOL_GSSET_INFO for users MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f80400a26a2e8bff541de12834a1134358bb6642 ] Allow ETHTOOL_GSSET_INFO ethtool ioctl() for unprivileged users. ETHTOOL_GSTRINGS is already allowed, but is unusable without this one. Signed-off-by: Micha©© Miros©©aw Acked-by: Ben Hutchings Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/ethtool.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 4fb7704..891b19f 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1964,6 +1964,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GRXCSUM: case ETHTOOL_GTXCSUM: case ETHTOOL_GSG: + case ETHTOOL_GSSET_INFO: case ETHTOOL_GSTRINGS: case ETHTOOL_GTSO: case ETHTOOL_GPERMADDR: -- cgit v1.1 From 4acd9a65e16de99c4afe89e4ac8a16b501c92450 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 26 Jun 2012 05:48:45 +0000 Subject: bridge: Assign rtnl_link_ops to bridge devices created via ioctl (v2) [ Upstream commit 149ddd83a92b02c658d6c61f3276eb6500d585e8 ] This ensures that bridges created with brctl(8) or ioctl(2) directly also carry IFLA_LINKINFO when dumped over netlink. This also allows to create a bridge with ioctl(2) and delete it with RTM_DELLINK. Signed-off-by: Thomas Graf Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_if.c | 1 + net/bridge/br_netlink.c | 2 +- net/bridge/br_private.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 4490873..eae6a4e 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -241,6 +241,7 @@ int br_add_bridge(struct net *net, const char *name) return -ENOMEM; dev_net_set(dev, net); + dev->rtnl_link_ops = &br_link_ops; res = register_netdev(dev); if (res) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 2c16055..71861a9 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -203,7 +203,7 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } -static struct rtnl_link_ops br_link_ops __read_mostly = { +struct rtnl_link_ops br_link_ops __read_mostly = { .kind = "bridge", .priv_size = sizeof(struct net_bridge), .setup = br_dev_setup, diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 1ca1b1c..7c1f3a0 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -529,6 +529,7 @@ extern int (*br_fdb_test_addr_hook)(struct net_device *dev, unsigned char *addr) #endif /* br_netlink.c */ +extern struct rtnl_link_ops br_link_ops; extern int br_netlink_init(void); extern void br_netlink_fini(void); extern void br_ifinfo_notify(int event, struct net_bridge_port *port); -- cgit v1.1 From 5eceb057268c275e8193a03ed159bf540038feac Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 18 Jun 2012 12:08:33 +0000 Subject: ipv6: Move ipv6 proc file registration to end of init order [ Upstream commit d189634ecab947c10f6f832258b103d0bbfe73cc ] /proc/net/ipv6_route reflects the contents of fib_table_hash. The proc handler is installed in ip6_route_net_init() whereas fib_table_hash is allocated in fib6_net_init() _after_ the proc handler has been installed. This opens up a short time frame to access fib_table_hash with its pants down. Move the registration of the proc files to a later point in the init order to avoid the race. Tested :-) Signed-off-by: Thomas Graf Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/route.c | 41 +++++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8e600f8..7c5b4cb 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2846,10 +2846,6 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; -#ifdef CONFIG_PROC_FS - proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); - proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); -#endif net->ipv6.ip6_rt_gc_expire = 30*HZ; ret = 0; @@ -2870,10 +2866,6 @@ out_ip6_dst_ops: static void __net_exit ip6_route_net_exit(struct net *net) { -#ifdef CONFIG_PROC_FS - proc_net_remove(net, "ipv6_route"); - proc_net_remove(net, "rt6_stats"); -#endif kfree(net->ipv6.ip6_null_entry); #ifdef CONFIG_IPV6_MULTIPLE_TABLES kfree(net->ipv6.ip6_prohibit_entry); @@ -2882,11 +2874,33 @@ static void __net_exit ip6_route_net_exit(struct net *net) dst_entries_destroy(&net->ipv6.ip6_dst_ops); } +static int __net_init ip6_route_net_init_late(struct net *net) +{ +#ifdef CONFIG_PROC_FS + proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); + proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); +#endif + return 0; +} + +static void __net_exit ip6_route_net_exit_late(struct net *net) +{ +#ifdef CONFIG_PROC_FS + proc_net_remove(net, "ipv6_route"); + proc_net_remove(net, "rt6_stats"); +#endif +} + static struct pernet_operations ip6_route_net_ops = { .init = ip6_route_net_init, .exit = ip6_route_net_exit, }; +static struct pernet_operations ip6_route_net_late_ops = { + .init = ip6_route_net_init_late, + .exit = ip6_route_net_exit_late, +}; + static struct notifier_block ip6_route_dev_notifier = { .notifier_call = ip6_route_dev_notify, .priority = 0, @@ -2936,19 +2950,25 @@ int __init ip6_route_init(void) if (ret) goto xfrm6_init; + ret = register_pernet_subsys(&ip6_route_net_late_ops); + if (ret) + goto fib6_rules_init; + ret = -ENOBUFS; if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) || __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) || __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL)) - goto fib6_rules_init; + goto out_register_late_subsys; ret = register_netdevice_notifier(&ip6_route_dev_notifier); if (ret) - goto fib6_rules_init; + goto out_register_late_subsys; out: return ret; +out_register_late_subsys: + unregister_pernet_subsys(&ip6_route_net_late_ops); fib6_rules_init: fib6_rules_cleanup(); xfrm6_init: @@ -2967,6 +2987,7 @@ out_kmem_cache: void ip6_route_cleanup(void) { unregister_netdevice_notifier(&ip6_route_dev_notifier); + unregister_pernet_subsys(&ip6_route_net_late_ops); fib6_rules_cleanup(); xfrm6_fini(); fib6_gc_cleanup(); -- cgit v1.1 From c5a07578befc10cde568f54a2103ad4273ef98c1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Jun 2012 19:30:21 +0000 Subject: netpoll: fix netpoll_send_udp() bugs [ Upstream commit 954fba0274058d27c7c07b5ea07c41b3b7477894 ] Bogdan Hamciuc diagnosed and fixed following bug in netpoll_send_udp() : "skb->len += len;" instead of "skb_put(skb, len);" Meaning that _if_ a network driver needs to call skb_realloc_headroom(), only packet headers would be copied, leaving garbage in the payload. However the skb_realloc_headroom() must be avoided as much as possible since it requires memory and netpoll tries hard to work even if memory is exhausted (using a pool of preallocated skbs) It appears netpoll_send_udp() reserved 16 bytes for the ethernet header, which happens to work for typicall drivers but not all. Right thing is to use LL_RESERVED_SPACE(dev) (And also add dev->needed_tailroom of tailroom) This patch combines both fixes. Many thanks to Bogdan for raising this issue. Reported-by: Bogdan Hamciuc Signed-off-by: Eric Dumazet Tested-by: Bogdan Hamciuc Cc: Herbert Xu Cc: Neil Horman Reviewed-by: Neil Horman Reviewed-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/netpoll.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 05db410..207a178 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -357,22 +357,23 @@ EXPORT_SYMBOL(netpoll_send_skb_on_dev); void netpoll_send_udp(struct netpoll *np, const char *msg, int len) { - int total_len, eth_len, ip_len, udp_len; + int total_len, ip_len, udp_len; struct sk_buff *skb; struct udphdr *udph; struct iphdr *iph; struct ethhdr *eth; udp_len = len + sizeof(*udph); - ip_len = eth_len = udp_len + sizeof(*iph); - total_len = eth_len + ETH_HLEN + NET_IP_ALIGN; + ip_len = udp_len + sizeof(*iph); + total_len = ip_len + LL_RESERVED_SPACE(np->dev); - skb = find_skb(np, total_len, total_len - len); + skb = find_skb(np, total_len + np->dev->needed_tailroom, + total_len - len); if (!skb) return; skb_copy_to_linear_data(skb, msg, len); - skb->len += len; + skb_put(skb, len); skb_push(skb, sizeof(*udph)); skb_reset_transport_header(skb); -- cgit v1.1 From c229e2f6bab8ed64bf44110831d36221c648e1bf Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Tue, 12 Jun 2012 12:53:13 +0300 Subject: cfg80211: fix potential deadlock in regulatory commit fe20b39ec32e975f1054c0b7866c873a954adf05 upstream. reg_timeout_work() calls restore_regulatory_settings() which takes cfg80211_mutex. reg_set_request_processed() already holds cfg80211_mutex before calling cancel_delayed_work_sync(reg_timeout), so it might deadlock. Call the async cancel_delayed_work instead, in order to avoid the potential deadlock. This is the relevant lockdep warning: cfg80211: Calling CRDA for country: XX ====================================================== [ INFO: possible circular locking dependency detected ] 3.4.0-rc5-wl+ #26 Not tainted ------------------------------------------------------- kworker/0:2/1391 is trying to acquire lock: (cfg80211_mutex){+.+.+.}, at: [] restore_regulatory_settings+0x34/0x418 [cfg80211] but task is already holding lock: ((reg_timeout).work){+.+...}, at: [] process_one_work+0x1f0/0x480 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 ((reg_timeout).work){+.+...}: [] validate_chain+0xb94/0x10f0 [] __lock_acquire+0x8c8/0x9b0 [] lock_acquire+0xf0/0x114 [] wait_on_work+0x4c/0x154 [] __cancel_work_timer+0xd4/0x11c [] cancel_delayed_work_sync+0x1c/0x20 [] reg_set_request_processed+0x50/0x78 [cfg80211] [] set_regdom+0x550/0x600 [cfg80211] [] nl80211_set_reg+0x218/0x258 [cfg80211] [] genl_rcv_msg+0x1a8/0x1e8 [] netlink_rcv_skb+0x5c/0xc0 [] genl_rcv+0x28/0x34 [] netlink_unicast+0x15c/0x228 [] netlink_sendmsg+0x218/0x298 [] sock_sendmsg+0xa4/0xc0 [] __sys_sendmsg+0x1e4/0x268 [] sys_sendmsg+0x4c/0x70 [] ret_fast_syscall+0x0/0x3c -> #1 (reg_mutex){+.+.+.}: [] validate_chain+0xb94/0x10f0 [] __lock_acquire+0x8c8/0x9b0 [] lock_acquire+0xf0/0x114 [] mutex_lock_nested+0x48/0x320 [] reg_todo+0x30/0x538 [cfg80211] [] process_one_work+0x2a0/0x480 [] worker_thread+0x1bc/0x2bc [] kthread+0x98/0xa4 [] kernel_thread_exit+0x0/0x8 -> #0 (cfg80211_mutex){+.+.+.}: [] print_circular_bug+0x68/0x2cc [] validate_chain+0x978/0x10f0 [] __lock_acquire+0x8c8/0x9b0 [] lock_acquire+0xf0/0x114 [] mutex_lock_nested+0x48/0x320 [] restore_regulatory_settings+0x34/0x418 [cfg80211] [] reg_timeout_work+0x1c/0x20 [cfg80211] [] process_one_work+0x2a0/0x480 [] worker_thread+0x1bc/0x2bc [] kthread+0x98/0xa4 [] kernel_thread_exit+0x0/0x8 other info that might help us debug this: Chain exists of: cfg80211_mutex --> reg_mutex --> (reg_timeout).work Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock((reg_timeout).work); lock(reg_mutex); lock((reg_timeout).work); lock(cfg80211_mutex); *** DEADLOCK *** 2 locks held by kworker/0:2/1391: #0: (events){.+.+.+}, at: [] process_one_work+0x1f0/0x480 #1: ((reg_timeout).work){+.+...}, at: [] process_one_work+0x1f0/0x480 stack backtrace: [] (unwind_backtrace+0x0/0x12c) from [] (dump_stack+0x20/0x24) [] (dump_stack+0x20/0x24) from [] (print_circular_bug+0x280/0x2cc) [] (print_circular_bug+0x280/0x2cc) from [] (validate_chain+0x978/0x10f0) [] (validate_chain+0x978/0x10f0) from [] (__lock_acquire+0x8c8/0x9b0) [] (__lock_acquire+0x8c8/0x9b0) from [] (lock_acquire+0xf0/0x114) [] (lock_acquire+0xf0/0x114) from [] (mutex_lock_nested+0x48/0x320) [] (mutex_lock_nested+0x48/0x320) from [] (restore_regulatory_settings+0x34/0x418 [cfg80211]) [] (restore_regulatory_settings+0x34/0x418 [cfg80211]) from [] (reg_timeout_work+0x1c/0x20 [cfg80211]) [] (reg_timeout_work+0x1c/0x20 [cfg80211]) from [] (process_one_work+0x2a0/0x480) [] (process_one_work+0x2a0/0x480) from [] (worker_thread+0x1bc/0x2bc) [] (worker_thread+0x1bc/0x2bc) from [] (kthread+0x98/0xa4) [] (kthread+0x98/0xa4) from [] (kernel_thread_exit+0x0/0x8) cfg80211: Calling CRDA to update world regulatory domain cfg80211: World regulatory domain updated: cfg80211: (start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp) cfg80211: (2402000 KHz - 2472000 KHz @ 40000 KHz), (300 mBi, 2000 mBm) cfg80211: (2457000 KHz - 2482000 KHz @ 20000 KHz), (300 mBi, 2000 mBm) cfg80211: (2474000 KHz - 2494000 KHz @ 20000 KHz), (300 mBi, 2000 mBm) cfg80211: (5170000 KHz - 5250000 KHz @ 40000 KHz), (300 mBi, 2000 mBm) cfg80211: (5735000 KHz - 5835000 KHz @ 40000 KHz), (300 mBi, 2000 mBm) Signed-off-by: Eliad Peller Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/reg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 7457697..90b73d1 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1358,7 +1358,7 @@ static void reg_set_request_processed(void) spin_unlock(®_requests_lock); if (last_request->initiator == NL80211_REGDOM_SET_BY_USER) - cancel_delayed_work_sync(®_timeout); + cancel_delayed_work(®_timeout); if (need_more_processing) schedule_work(®_work); -- cgit v1.1 From 1dc1e5ad5a05da69c51446f9c8a2c097884fece7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 27 Jun 2012 15:38:56 +0200 Subject: mac80211: correct behaviour on unrecognised action frames commit 4b5ebccc40843104d980f0714bc86bfcd5568941 upstream. When receiving an "individually addressed" action frame, the receiver is required to return it to the sender. mac80211 gets this wrong as it also returns group addressed (mcast) frames to the sender. Fix this and update the reference to the new 802.11 standards version since things were shuffled around significantly. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- net/mac80211/rx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 4100065..667f559 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2291,7 +2291,7 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx) * frames that we didn't handle, including returning unknown * ones. For all other modes we will return them to the sender, * setting the 0x80 bit in the action category, as required by - * 802.11-2007 7.3.1.11. + * 802.11-2012 9.24.4. * Newer versions of hostapd shall also use the management frame * registration mechanisms, but older ones still use cooked * monitor interfaces so push all frames there. @@ -2301,6 +2301,9 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx) sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) return RX_DROP_MONITOR; + if (is_multicast_ether_addr(mgmt->da)) + return RX_DROP_MONITOR; + /* do not return rejected action frames */ if (mgmt->u.action.category & 0x80) return RX_DROP_UNUSABLE; -- cgit v1.1 From 1a4eda9788a542347dfc2b1684636bcbe2ee3f79 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 2 Dec 2011 23:41:42 +0000 Subject: tcp: drop SYN+FIN messages commit fdf5af0daf8019cec2396cdef8fb042d80fe71fa upstream. Denys Fedoryshchenko reported that SYN+FIN attacks were bringing his linux machines to their limits. Dont call conn_request() if the TCP flags includes SYN flag Reported-by: Denys Fedoryshchenko Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 7410a8c..6e33b79c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5761,6 +5761,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, goto discard; if (th->syn) { + if (th->fin) + goto discard; if (icsk->icsk_af_ops->conn_request(sk, skb) < 0) return 1; -- cgit v1.1 From 31b83ef7cfda5a7b74446ca70c1e231b24450cbd Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Fri, 8 Jun 2012 10:55:44 +0200 Subject: cfg80211: check iface combinations only when iface is running commit f8cdddb8d61d16a156229f0910f7ecfc7a82c003 upstream. Don't validate interface combinations on a stopped interface. Otherwise we might end up being able to create a new interface with a certain type, but won't be able to change an existing interface into that type. This also skips some other functions when interface is stopped and changing interface type. Signed-off-by: Michal Kazior Signed-off-by: Johannes Berg [Fixes regression introduced by cherry pick of 463454b5dbd8] Signed-off-by: Paul Gortmaker --- net/wireless/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/util.c b/net/wireless/util.c index 30f68dc..bbcb58e 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -807,7 +807,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, ntype == NL80211_IFTYPE_P2P_CLIENT)) return -EBUSY; - if (ntype != otype) { + if (ntype != otype && netif_running(dev)) { err = cfg80211_can_change_interface(rdev, dev->ieee80211_ptr, ntype); if (err) -- cgit v1.1 From eb65b85e1bce06b665b3568c19a249cd886fa6ff Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 23 Jul 2012 13:58:51 -0400 Subject: nfs: skip commit in releasepage if we're freeing memory for fs-related reasons commit 5cf02d09b50b1ee1c2d536c9cf64af5a7d433f56 upstream. We've had some reports of a deadlock where rpciod ends up with a stack trace like this: PID: 2507 TASK: ffff88103691ab40 CPU: 14 COMMAND: "rpciod/14" #0 [ffff8810343bf2f0] schedule at ffffffff814dabd9 #1 [ffff8810343bf3b8] nfs_wait_bit_killable at ffffffffa038fc04 [nfs] #2 [ffff8810343bf3c8] __wait_on_bit at ffffffff814dbc2f #3 [ffff8810343bf418] out_of_line_wait_on_bit at ffffffff814dbcd8 #4 [ffff8810343bf488] nfs_commit_inode at ffffffffa039e0c1 [nfs] #5 [ffff8810343bf4f8] nfs_release_page at ffffffffa038bef6 [nfs] #6 [ffff8810343bf528] try_to_release_page at ffffffff8110c670 #7 [ffff8810343bf538] shrink_page_list.clone.0 at ffffffff81126271 #8 [ffff8810343bf668] shrink_inactive_list at ffffffff81126638 #9 [ffff8810343bf818] shrink_zone at ffffffff8112788f #10 [ffff8810343bf8c8] do_try_to_free_pages at ffffffff81127b1e #11 [ffff8810343bf958] try_to_free_pages at ffffffff8112812f #12 [ffff8810343bfa08] __alloc_pages_nodemask at ffffffff8111fdad #13 [ffff8810343bfb28] kmem_getpages at ffffffff81159942 #14 [ffff8810343bfb58] fallback_alloc at ffffffff8115a55a #15 [ffff8810343bfbd8] ____cache_alloc_node at ffffffff8115a2d9 #16 [ffff8810343bfc38] kmem_cache_alloc at ffffffff8115b09b #17 [ffff8810343bfc78] sk_prot_alloc at ffffffff81411808 #18 [ffff8810343bfcb8] sk_alloc at ffffffff8141197c #19 [ffff8810343bfce8] inet_create at ffffffff81483ba6 #20 [ffff8810343bfd38] __sock_create at ffffffff8140b4a7 #21 [ffff8810343bfd98] xs_create_sock at ffffffffa01f649b [sunrpc] #22 [ffff8810343bfdd8] xs_tcp_setup_socket at ffffffffa01f6965 [sunrpc] #23 [ffff8810343bfe38] worker_thread at ffffffff810887d0 #24 [ffff8810343bfee8] kthread at ffffffff8108dd96 #25 [ffff8810343bff48] kernel_thread at ffffffff8100c1ca rpciod is trying to allocate memory for a new socket to talk to the server. The VM ends up calling ->releasepage to get more memory, and it tries to do a blocking commit. That commit can't succeed however without a connected socket, so we deadlock. Fix this by setting PF_FSTRANS on the workqueue task prior to doing the socket allocation, and having nfs_release_page check for that flag when deciding whether to do a commit call. Also, set PF_FSTRANS unconditionally in rpc_async_schedule since that function can also do allocations sometimes. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/sched.c | 2 ++ net/sunrpc/xprtrdma/transport.c | 3 ++- net/sunrpc/xprtsock.c | 10 ++++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index b6bb225..c57f97f 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -713,7 +713,9 @@ void rpc_execute(struct rpc_task *task) static void rpc_async_schedule(struct work_struct *work) { + current->flags |= PF_FSTRANS; __rpc_execute(container_of(work, struct rpc_task, u.tk_work)); + current->flags &= ~PF_FSTRANS; } /** diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 0867070..d0b5210 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -200,6 +200,7 @@ xprt_rdma_connect_worker(struct work_struct *work) int rc = 0; if (!xprt->shutdown) { + current->flags |= PF_FSTRANS; xprt_clear_connected(xprt); dprintk("RPC: %s: %sconnect\n", __func__, @@ -212,10 +213,10 @@ xprt_rdma_connect_worker(struct work_struct *work) out: xprt_wake_pending_tasks(xprt, rc); - out_clear: dprintk("RPC: %s: exit\n", __func__); xprt_clear_connecting(xprt); + current->flags &= ~PF_FSTRANS; } /* diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index ea75079..554111f 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1882,6 +1882,8 @@ static void xs_local_setup_socket(struct work_struct *work) if (xprt->shutdown) goto out; + current->flags |= PF_FSTRANS; + clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); status = __sock_create(xprt->xprt_net, AF_LOCAL, SOCK_STREAM, 0, &sock, 1); @@ -1915,6 +1917,7 @@ static void xs_local_setup_socket(struct work_struct *work) out: xprt_clear_connecting(xprt); xprt_wake_pending_tasks(xprt, status); + current->flags &= ~PF_FSTRANS; } static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) @@ -1957,6 +1960,8 @@ static void xs_udp_setup_socket(struct work_struct *work) if (xprt->shutdown) goto out; + current->flags |= PF_FSTRANS; + /* Start by resetting any existing state */ xs_reset_transport(transport); sock = xs_create_sock(xprt, transport, @@ -1975,6 +1980,7 @@ static void xs_udp_setup_socket(struct work_struct *work) out: xprt_clear_connecting(xprt); xprt_wake_pending_tasks(xprt, status); + current->flags &= ~PF_FSTRANS; } /* @@ -2100,6 +2106,8 @@ static void xs_tcp_setup_socket(struct work_struct *work) if (xprt->shutdown) goto out; + current->flags |= PF_FSTRANS; + if (!sock) { clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); sock = xs_create_sock(xprt, transport, @@ -2149,6 +2157,7 @@ static void xs_tcp_setup_socket(struct work_struct *work) case -EINPROGRESS: case -EALREADY: xprt_clear_connecting(xprt); + current->flags &= ~PF_FSTRANS; return; case -EINVAL: /* Happens, for instance, if the user specified a link @@ -2161,6 +2170,7 @@ out_eagain: out: xprt_clear_connecting(xprt); xprt_wake_pending_tasks(xprt, status); + current->flags &= ~PF_FSTRANS; } /** -- cgit v1.1 From 9b9f676623b92483d890d1a3a52611c09fc190f3 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 12 Jul 2012 03:39:11 +0000 Subject: sch_sfb: Fix missing NULL check [ Upstream commit 7ac2908e4b2edaec60e9090ddb4d9ceb76c05e7d ] Resolves-bug: https://bugzilla.kernel.org/show_bug.cgi?id=44461 Signed-off-by: Alan Cox Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_sfb.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 47ee29f..e85b2487 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -556,6 +556,8 @@ static int sfb_dump(struct Qdisc *sch, struct sk_buff *skb) sch->qstats.backlog = q->qdisc->qstats.backlog; opts = nla_nest_start(skb, TCA_OPTIONS); + if (opts == NULL) + goto nla_put_failure; NLA_PUT(skb, TCA_SFB_PARMS, sizeof(opt), &opt); return nla_nest_end(skb, opts); -- cgit v1.1 From 2f890d2777247beb207be1c99835a0c5e09d340c Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 16 Jul 2012 09:13:51 +0000 Subject: sctp: Fix list corruption resulting from freeing an association on a list [ Upstream commit 2eebc1e188e9e45886ee00662519849339884d6d ] A few days ago Dave Jones reported this oops: [22766.294255] general protection fault: 0000 [#1] PREEMPT SMP [22766.295376] CPU 0 [22766.295384] Modules linked in: [22766.387137] ffffffffa169f292 6b6b6b6b6b6b6b6b ffff880147c03a90 ffff880147c03a74 [22766.387135] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 00000000000 [22766.387136] Process trinity-watchdo (pid: 10896, threadinfo ffff88013e7d2000, [22766.387137] Stack: [22766.387140] ffff880147c03a10 [22766.387140] ffffffffa169f2b6 [22766.387140] ffff88013ed95728 [22766.387143] 0000000000000002 [22766.387143] 0000000000000000 [22766.387143] ffff880003fad062 [22766.387144] ffff88013c120000 [22766.387144] [22766.387145] Call Trace: [22766.387145] [22766.387150] [] ? __sctp_lookup_association+0x62/0xd0 [sctp] [22766.387154] [] __sctp_lookup_association+0x86/0xd0 [sctp] [22766.387157] [] sctp_rcv+0x207/0xbb0 [sctp] [22766.387161] [] ? trace_hardirqs_off_caller+0x28/0xd0 [22766.387163] [] ? nf_hook_slow+0x133/0x210 [22766.387166] [] ? ip_local_deliver_finish+0x4c/0x4c0 [22766.387168] [] ip_local_deliver_finish+0x18d/0x4c0 [22766.387169] [] ? ip_local_deliver_finish+0x4c/0x4c0 [22766.387171] [] ip_local_deliver+0x47/0x80 [22766.387172] [] ip_rcv_finish+0x150/0x680 [22766.387174] [] ip_rcv+0x214/0x320 [22766.387176] [] __netif_receive_skb+0x7b7/0x910 [22766.387178] [] ? __netif_receive_skb+0x11c/0x910 [22766.387180] [] ? put_lock_stats.isra.25+0xe/0x40 [22766.387182] [] netif_receive_skb+0x23/0x1f0 [22766.387183] [] ? dev_gro_receive+0x139/0x440 [22766.387185] [] napi_skb_finish+0x70/0xa0 [22766.387187] [] napi_gro_receive+0xf5/0x130 [22766.387218] [] e1000_receive_skb+0x59/0x70 [e1000e] [22766.387242] [] e1000_clean_rx_irq+0x28b/0x460 [e1000e] [22766.387266] [] e1000e_poll+0x78/0x430 [e1000e] [22766.387268] [] net_rx_action+0x1aa/0x3d0 [22766.387270] [] ? account_system_vtime+0x10f/0x130 [22766.387273] [] __do_softirq+0xe0/0x420 [22766.387275] [] call_softirq+0x1c/0x30 [22766.387278] [] do_softirq+0xd5/0x110 [22766.387279] [] irq_exit+0xd5/0xe0 [22766.387281] [] do_IRQ+0x63/0xd0 [22766.387283] [] common_interrupt+0x6f/0x6f [22766.387283] [22766.387284] [22766.387285] [] ? retint_swapgs+0x13/0x1b [22766.387285] Code: c0 90 5d c3 66 0f 1f 44 00 00 4c 89 c8 5d c3 0f 1f 00 55 48 89 e5 48 83 ec 20 48 89 5d e8 4c 89 65 f0 4c 89 6d f8 66 66 66 66 90 <0f> b7 87 98 00 00 00 48 89 fb 49 89 f5 66 c1 c0 08 66 39 46 02 [22766.387307] [22766.387307] RIP [22766.387311] [] sctp_assoc_is_match+0x19/0x90 [sctp] [22766.387311] RSP [22766.387142] ffffffffa16ab120 [22766.599537] ---[ end trace 3f6dae82e37b17f5 ]--- [22766.601221] Kernel panic - not syncing: Fatal exception in interrupt It appears from his analysis and some staring at the code that this is likely occuring because an association is getting freed while still on the sctp_assoc_hashtable. As a result, we get a gpf when traversing the hashtable while a freed node corrupts part of the list. Nominally I would think that an mibalanced refcount was responsible for this, but I can't seem to find any obvious imbalance. What I did note however was that the two places where we create an association using sctp_primitive_ASSOCIATE (__sctp_connect and sctp_sendmsg), have failure paths which free a newly created association after calling sctp_primitive_ASSOCIATE. sctp_primitive_ASSOCIATE brings us into the sctp_sf_do_prm_asoc path, which issues a SCTP_CMD_NEW_ASOC side effect, which in turn adds a new association to the aforementioned hash table. the sctp command interpreter that process side effects has not way to unwind previously processed commands, so freeing the association from the __sctp_connect or sctp_sendmsg error path would lead to a freed association remaining on this hash table. I've fixed this but modifying sctp_[un]hash_established to use hlist_del_init, which allows us to proerly use hlist_unhashed to check if the node is on a hashlist safely during a delete. That in turn alows us to safely call sctp_unhash_established in the __sctp_connect and sctp_sendmsg error paths before freeing them, regardles of what the associations state is on the hash list. I noted, while I was doing this, that the __sctp_unhash_endpoint was using hlist_unhsashed in a simmilar fashion, but never nullified any removed nodes pointers to make that function work properly, so I fixed that up in a simmilar fashion. I attempted to test this using a virtual guest running the SCTP_RR test from netperf in a loop while running the trinity fuzzer, both in a loop. I wasn't able to recreate the problem prior to this fix, nor was I able to trigger the failure after (neither of which I suppose is suprising). Given the trace above however, I think its likely that this is what we hit. Signed-off-by: Neil Horman Reported-by: davej@redhat.com CC: davej@redhat.com CC: "David S. Miller" CC: Vlad Yasevich CC: Sridhar Samudrala CC: linux-sctp@vger.kernel.org Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/input.c | 7 ++----- net/sctp/socket.c | 12 ++++++++++-- 2 files changed, 12 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sctp/input.c b/net/sctp/input.c index 741ed16..cd9eded 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -737,15 +737,12 @@ static void __sctp_unhash_endpoint(struct sctp_endpoint *ep) epb = &ep->base; - if (hlist_unhashed(&epb->node)) - return; - epb->hashent = sctp_ep_hashfn(epb->bind_addr.port); head = &sctp_ep_hashtable[epb->hashent]; sctp_write_lock(&head->lock); - __hlist_del(&epb->node); + hlist_del_init(&epb->node); sctp_write_unlock(&head->lock); } @@ -826,7 +823,7 @@ static void __sctp_unhash_established(struct sctp_association *asoc) head = &sctp_assoc_hashtable[epb->hashent]; sctp_write_lock(&head->lock); - __hlist_del(&epb->node); + hlist_del_init(&epb->node); sctp_write_unlock(&head->lock); } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 4434853..b70a3ee 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1160,8 +1160,14 @@ out_free: SCTP_DEBUG_PRINTK("About to exit __sctp_connect() free asoc: %p" " kaddrs: %p err: %d\n", asoc, kaddrs, err); - if (asoc) + if (asoc) { + /* sctp_primitive_ASSOCIATE may have added this association + * To the hash table, try to unhash it, just in case, its a noop + * if it wasn't hashed so we're safe + */ + sctp_unhash_established(asoc); sctp_association_free(asoc); + } return err; } @@ -1871,8 +1877,10 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, goto out_unlock; out_free: - if (new_asoc) + if (new_asoc) { + sctp_unhash_established(asoc); sctp_association_free(asoc); + } out_unlock: sctp_release_sock(sk); -- cgit v1.1 From 22cb83b5a318697b09fe1d6e237703d8371ab1fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sjur=20Br=C3=A6ndeland?= Date: Sun, 15 Jul 2012 10:10:14 +0000 Subject: caif: Fix access to freed pernet memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 96f80d123eff05c3cd4701463786b87952a6c3ac ] unregister_netdevice_notifier() must be called before unregister_pernet_subsys() to avoid accessing already freed pernet memory. This fixes the following oops when doing rmmod: Call Trace: [] caif_device_notify+0x4d/0x5a0 [caif] [] unregister_netdevice_notifier+0xb9/0x100 [] caif_device_exit+0x1c/0x250 [caif] [] sys_delete_module+0x1a4/0x300 [] ? trace_hardirqs_on_caller+0x15d/0x1e0 [] ? trace_hardirqs_on_thunk+0x3a/0x3 [] system_call_fastpath+0x1a/0x1f RIP [] caif_get+0x51/0xb0 [caif] Signed-off-by: Sjur Brændeland Acked-by: "Eric W. Biederman" Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/caif/caif_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 5ba4366..804e50f 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -424,9 +424,9 @@ static int __init caif_device_init(void) static void __exit caif_device_exit(void) { - unregister_pernet_subsys(&caif_net_ops); unregister_netdevice_notifier(&caif_device_notifier); dev_remove_pack(&caif_packet_type); + unregister_pernet_subsys(&caif_net_ops); } module_init(caif_device_init); -- cgit v1.1 From bca8ae51a3761a9fc4795641541a3478001975c1 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 17 Jul 2012 11:07:47 +0000 Subject: cipso: don't follow a NULL pointer when setsockopt() is called [ Upstream commit 89d7ae34cdda4195809a5a987f697a517a2a3177 ] As reported by Alan Cox, and verified by Lin Ming, when a user attempts to add a CIPSO option to a socket using the CIPSO_V4_TAG_LOCAL tag the kernel dies a terrible death when it attempts to follow a NULL pointer (the skb argument to cipso_v4_validate() is NULL when called via the setsockopt() syscall). This patch fixes this by first checking to ensure that the skb is non-NULL before using it to find the incoming network interface. In the unlikely case where the skb is NULL and the user attempts to add a CIPSO option with the _TAG_LOCAL tag we return an error as this is not something we want to allow. A simple reproducer, kindly supplied by Lin Ming, although you must have the CIPSO DOI #3 configure on the system first or you will be caught early in cipso_v4_validate(): #include #include #include #include #include struct local_tag { char type; char length; char info[4]; }; struct cipso { char type; char length; char doi[4]; struct local_tag local; }; int main(int argc, char **argv) { int sockfd; struct cipso cipso = { .type = IPOPT_CIPSO, .length = sizeof(struct cipso), .local = { .type = 128, .length = sizeof(struct local_tag), }, }; memset(cipso.doi, 0, 4); cipso.doi[3] = 3; sockfd = socket(AF_INET, SOCK_DGRAM, 0); #define SOL_IP 0 setsockopt(sockfd, SOL_IP, IP_OPTIONS, &cipso, sizeof(struct cipso)); return 0; } CC: Lin Ming Reported-by: Alan Cox Signed-off-by: Paul Moore Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/cipso_ipv4.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 2b3c23c..062876b 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1725,8 +1725,10 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) case CIPSO_V4_TAG_LOCAL: /* This is a non-standard tag that we only allow for * local connections, so if the incoming interface is - * not the loopback device drop the packet. */ - if (!(skb->dev->flags & IFF_LOOPBACK)) { + * not the loopback device drop the packet. Further, + * there is no legitimate reason for setting this from + * userspace so reject it if skb is NULL. */ + if (skb == NULL || !(skb->dev->flags & IFF_LOOPBACK)) { err_offset = opt_iter; goto validate_return_locked; } -- cgit v1.1 From 8a22bda491f4fb197c31c2d50c18a816be494a75 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Tue, 24 Jul 2012 08:16:25 +0000 Subject: wanmain: comparing array with NULL [ Upstream commit 8b72ff6484fe303e01498b58621810a114f3cf09 ] gcc really should warn about these ! Signed-off-by: Alan Cox Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/wanrouter/wanmain.c | 51 ++++++++++++++++++++++--------------------------- 1 file changed, 23 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c index 788a12c..2ab7850 100644 --- a/net/wanrouter/wanmain.c +++ b/net/wanrouter/wanmain.c @@ -602,36 +602,31 @@ static int wanrouter_device_new_if(struct wan_device *wandev, * successfully, add it to the interface list. */ - if (dev->name == NULL) { - err = -EINVAL; - } else { +#ifdef WANDEBUG + printk(KERN_INFO "%s: registering interface %s...\n", + wanrouter_modname, dev->name); +#endif - #ifdef WANDEBUG - printk(KERN_INFO "%s: registering interface %s...\n", - wanrouter_modname, dev->name); - #endif - - err = register_netdev(dev); - if (!err) { - struct net_device *slave = NULL; - unsigned long smp_flags=0; - - lock_adapter_irq(&wandev->lock, &smp_flags); - - if (wandev->dev == NULL) { - wandev->dev = dev; - } else { - for (slave=wandev->dev; - DEV_TO_SLAVE(slave); - slave = DEV_TO_SLAVE(slave)) - DEV_TO_SLAVE(slave) = dev; - } - ++wandev->ndev; - - unlock_adapter_irq(&wandev->lock, &smp_flags); - err = 0; /* done !!! */ - goto out; + err = register_netdev(dev); + if (!err) { + struct net_device *slave = NULL; + unsigned long smp_flags=0; + + lock_adapter_irq(&wandev->lock, &smp_flags); + + if (wandev->dev == NULL) { + wandev->dev = dev; + } else { + for (slave=wandev->dev; + DEV_TO_SLAVE(slave); + slave = DEV_TO_SLAVE(slave)) + DEV_TO_SLAVE(slave) = dev; } + ++wandev->ndev; + + unlock_adapter_irq(&wandev->lock, &smp_flags); + err = 0; /* done !!! */ + goto out; } if (wandev->del_if) wandev->del_if(wandev, dev); -- cgit v1.1 From 8d7c99de6821880884e6f9ade1c6f269d40ebbae Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 26 Jul 2012 22:52:21 +0000 Subject: tcp: Add TCP_USER_TIMEOUT negative value check [ Upstream commit 42493570100b91ef663c4c6f0c0fdab238f9d3c2 ] TCP_USER_TIMEOUT is a TCP level socket option that takes an unsigned int. But patch "tcp: Add TCP_USER_TIMEOUT socket option"(dca43c75) didn't check the negative values. If a user assign -1 to it, the socket will set successfully and wait for 4294967295 miliseconds. This patch add a negative value check to avoid this issue. Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6db041d..b6ec23c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2394,7 +2394,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level, /* Cap the max timeout in ms TCP will retry/retrans * before giving up and aborting (ETIMEDOUT) a connection. */ - icsk->icsk_user_timeout = msecs_to_jiffies(val); + if (val < 0) + err = -EINVAL; + else + icsk->icsk_user_timeout = msecs_to_jiffies(val); break; default: err = -ENOPROTOOPT; -- cgit v1.1 From c94eb3f964dd7c08486d7c1227c7b5b69d09aabe Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Fri, 27 Jul 2012 02:58:22 +0000 Subject: net: fix rtnetlink IFF_PROMISC and IFF_ALLMULTI handling [ Upstream commit b1beb681cba5358f62e6187340660ade226a5fcc ] When device flags are set using rtnetlink, IFF_PROMISC and IFF_ALLMULTI flags are handled specially. Function dev_change_flags sets IFF_PROMISC and IFF_ALLMULTI bits in dev->gflags according to the passed value but do_setlink passes a result of rtnl_dev_combine_flags which takes those bits from dev->flags. This can be easily trigerred by doing: tcpdump -i eth0 & ip l s up eth0 ip sets IFF_UP flag in ifi_flags and ifi_change, which is combined with IFF_PROMISC by rtnl_dev_combine_flags, causing __dev_change_flags to set IFF_PROMISC in gflags. Reported-by: Max Matveev Signed-off-by: Jiri Benc Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index abd936d..861d53f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -647,6 +647,12 @@ static void set_operstate(struct net_device *dev, unsigned char transition) } } +static unsigned int rtnl_dev_get_flags(const struct net_device *dev) +{ + return (dev->flags & ~(IFF_PROMISC | IFF_ALLMULTI)) | + (dev->gflags & (IFF_PROMISC | IFF_ALLMULTI)); +} + static unsigned int rtnl_dev_combine_flags(const struct net_device *dev, const struct ifinfomsg *ifm) { @@ -655,7 +661,7 @@ static unsigned int rtnl_dev_combine_flags(const struct net_device *dev, /* bugwards compatibility: ifi_change == 0 is treated as ~0 */ if (ifm->ifi_change) flags = (flags & ifm->ifi_change) | - (dev->flags & ~ifm->ifi_change); + (rtnl_dev_get_flags(dev) & ~ifm->ifi_change); return flags; } -- cgit v1.1 From 41f079a0e1089bf0ed2b795cc427bf40b72efe0e Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Fri, 27 Jul 2012 10:38:50 +0000 Subject: tcp: perform DMA to userspace only if there is a task waiting for it [ Upstream commit 59ea33a68a9083ac98515e4861c00e71efdc49a1 ] Back in 2006, commit 1a2449a87b ("[I/OAT]: TCP recv offload to I/OAT") added support for receive offloading to IOAT dma engine if available. The code in tcp_rcv_established() tries to perform early DMA copy if applicable. It however does so without checking whether the userspace task is actually expecting the data in the buffer. This is not a problem under normal circumstances, but there is a corner case where this doesn't work -- and that's when MSG_TRUNC flag to recvmsg() is used. If the IOAT dma engine is not used, the code properly checks whether there is a valid ucopy.task and the socket is owned by userspace, but misses the check in the dmaengine case. This problem can be observed in real trivially -- for example 'tbench' is a good reproducer, as it makes a heavy use of MSG_TRUNC. On systems utilizing IOAT, you will soon find tbench waiting indefinitely in sk_wait_data(), as they have been already early-copied in tcp_rcv_established() using dma engine. This patch introduces the same check we are performing in the simple iovec copy case to the IOAT case as well. It fixes the indefinite recvmsg(MSG_TRUNC) hangs. Signed-off-by: Jiri Kosina Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6e33b79c..b76aa2d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5340,7 +5340,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, if (tp->copied_seq == tp->rcv_nxt && len - tcp_header_len <= tp->ucopy.len) { #ifdef CONFIG_NET_DMA - if (tcp_dma_try_early_copy(sk, skb, tcp_header_len)) { + if (tp->ucopy.task == current && + sock_owned_by_user(sk) && + tcp_dma_try_early_copy(sk, skb, tcp_header_len)) { copied_early = 1; eaten = 1; } -- cgit v1.1 From d90c97ba987ab9f1c0a1e11697269814386243b9 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 20 Jul 2012 15:57:48 +0400 Subject: SUNRPC: return negative value in case rpcbind client creation error commit caea33da898e4e14f0ba58173e3b7689981d2c0b upstream. Without this patch kernel will panic on LockD start, because lockd_up() checks lockd_up_net() result for negative value. From my pow it's better to return negative value from rpcbind routines instead of replacing all such checks like in lockd_up(). Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/rpcb_clnt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index e45d2fb..bf0a7f6 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -193,7 +193,7 @@ static int rpcb_create_local_unix(void) if (IS_ERR(clnt)) { dprintk("RPC: failed to create AF_LOCAL rpcbind " "client (errno %ld).\n", PTR_ERR(clnt)); - result = -PTR_ERR(clnt); + result = PTR_ERR(clnt); goto out; } @@ -242,7 +242,7 @@ static int rpcb_create_local_net(void) if (IS_ERR(clnt)) { dprintk("RPC: failed to create local rpcbind " "client (errno %ld).\n", PTR_ERR(clnt)); - result = -PTR_ERR(clnt); + result = PTR_ERR(clnt); goto out; } -- cgit v1.1 From 61e0a9e79d85258d254c5bfb7832c40ba2c46ed4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 1 Aug 2012 21:03:21 +0200 Subject: mac80211: cancel mesh path timer commit dd4c9260e7f23f2e951cbfb2726e468c6d30306c upstream. The mesh path timer needs to be canceled when leaving the mesh as otherwise it could fire after the interface has been removed already. Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/mesh.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 29e9980..370aa94 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -490,6 +490,7 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) del_timer_sync(&sdata->u.mesh.housekeeping_timer); del_timer_sync(&sdata->u.mesh.mesh_path_root_timer); + del_timer_sync(&sdata->u.mesh.mesh_path_timer); /* * If the timer fired while we waited for it, it will have * requeued the work. Now the work will be running again -- cgit v1.1 From 118f98c7de67622c27ef9c968958b6de8483357f Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 4 Jul 2012 21:23:25 -0400 Subject: net: feed /dev/random with the MAC address when registering a device commit 7bf2357524408b97fec58344caf7397f8140c3fd upstream. Signed-off-by: "Theodore Ts'o" Cc: David Miller Cc: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 3 +++ net/core/rtnetlink.c | 1 + 2 files changed, 4 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index a71eafc..8235b81 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1163,6 +1163,7 @@ static int __dev_open(struct net_device *dev) net_dmaengine_get(); dev_set_rx_mode(dev); dev_activate(dev); + add_device_randomness(dev->dev_addr, dev->addr_len); } return ret; @@ -4730,6 +4731,7 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) err = ops->ndo_set_mac_address(dev, sa); if (!err) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + add_device_randomness(dev->dev_addr, dev->addr_len); return err; } EXPORT_SYMBOL(dev_set_mac_address); @@ -5507,6 +5509,7 @@ int register_netdevice(struct net_device *dev) dev_init_scheduler(dev); dev_hold(dev); list_netdevice(dev); + add_device_randomness(dev->dev_addr, dev->addr_len); /* Notify protocols, that a new device appeared. */ ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 861d53f..ac49ad5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1304,6 +1304,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, goto errout; send_addr_notify = 1; modified = 1; + add_device_randomness(dev->dev_addr, dev->addr_len); } if (tb[IFLA_MTU]) { -- cgit v1.1 From b27c59d2c23d5f326a74e02a799cc4d00246165a Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Thu, 2 Aug 2012 18:41:48 +0100 Subject: cfg80211: process pending events when unregistering net device commit 1f6fc43e621167492ed4b7f3b4269c584c3d6ccc upstream. libertas currently calls cfg80211_disconnected() when it is being brought down. This causes an event to be allocated, but since the wdev is already removed from the rdev by the time that the event processing work executes, the event is never processed or freed. http://article.gmane.org/gmane.linux.kernel.wireless.general/95666 Fix this leak, and other possible situations, by processing the event queue when a device is being unregistered. Thanks to Johannes Berg for the suggestion. Signed-off-by: Daniel Drake Reviewed-by: Johannes Berg Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- net/wireless/core.c | 5 +++++ net/wireless/core.h | 1 + net/wireless/util.c | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index 880dbe2..498c760 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -959,6 +959,11 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, */ synchronize_rcu(); INIT_LIST_HEAD(&wdev->list); + /* + * Ensure that all events have been processed and + * freed. + */ + cfg80211_process_wdev_events(wdev); break; case NETDEV_PRE_UP: if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype))) diff --git a/net/wireless/core.h b/net/wireless/core.h index a570ff9..8351645 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -426,6 +426,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, struct net_device *dev, enum nl80211_iftype ntype, u32 *flags, struct vif_params *params); void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev); +void cfg80211_process_wdev_events(struct wireless_dev *wdev); int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, diff --git a/net/wireless/util.c b/net/wireless/util.c index bbcb58e..4d69149 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -719,7 +719,7 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev) wdev->connect_keys = NULL; } -static void cfg80211_process_wdev_events(struct wireless_dev *wdev) +void cfg80211_process_wdev_events(struct wireless_dev *wdev) { struct cfg80211_event *ev; unsigned long flags; -- cgit v1.1 From 4f5a5866aa4dccf42b2fa1cdecf372025a3e86cc Mon Sep 17 00:00:00 2001 From: Liang Li Date: Thu, 2 Aug 2012 18:55:41 -0400 Subject: cfg80211: fix interface combinations check for ADHOC(IBSS) partial of commit 8e8b41f9d8c8e63fc92f899ace8da91a490ac573 upstream. As part of commit 463454b5dbd8 ("cfg80211: fix interface combinations check"), this extra check was introduced: if ((all_iftypes & used_iftypes) != used_iftypes) goto cont; However, most wireless NIC drivers did not advertise ADHOC in wiphy.iface_combinations[i].limits[] and hence we'll get -EBUSY when we bring up a ADHOC wlan with commands similar to: # iwconfig wlan0 mode ad-hoc && ifconfig wlan0 up In commit 8e8b41f9d8c8e ("cfg80211: enforce lack of interface combinations"), the change below fixes the issue: if (total == 1) return 0; But it also introduces other dependencies for stable. For example, a full cherry pick of 8e8b41f9d8c8e would introduce additional regressions unless we also start cherry picking driver specific fixes like the following: 9b4760e ath5k: add possible wiphy interface combinations 1ae2fc2 mac80211_hwsim: advertise interface combinations 20c8e8d ath9k: add possible wiphy interface combinations And the purpose of the 'if (total == 1)' is to cover the specific use case (IBSS, adhoc) that was mentioned above. So we just pick the specific part out from 8e8b41f9d8c8e here. Doing so gives stable kernels a way to fix the change introduced by 463454b5dbd8, without having to make cherry picks specific to various NIC drivers. Signed-off-by: Liang Li Signed-off-by: Paul Gortmaker Signed-off-by: Greg Kroah-Hartman --- net/wireless/util.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/wireless/util.c b/net/wireless/util.c index 4d69149..18e22be 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -975,6 +975,9 @@ int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, } mutex_unlock(&rdev->devlist_mtx); + if (total == 1) + return 0; + for (i = 0; i < rdev->wiphy.n_iface_combinations; i++) { const struct ieee80211_iface_combination *c; struct ieee80211_iface_limit *limits; -- cgit v1.1 From e684493e06addddaf273964dff6fa0f2204953b8 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 9 Aug 2012 18:12:28 -0400 Subject: svcrpc: fix BUG() in svc_tcp_clear_pages commit be1e44441a560c43c136a562d49a1c9623c91197 upstream. Examination of svc_tcp_clear_pages shows that it assumes sk_tcplen is consistent with sk_pages[] (in particular, sk_pages[n] can't be NULL if sk_tcplen would lead us to expect n pages of data). svc_tcp_restore_pages zeroes out sk_pages[] while leaving sk_tcplen. This is OK, since both functions are serialized by XPT_BUSY. However, that means the inconsistency must be repaired before dropping XPT_BUSY. Therefore we should be ensuring that svc_tcp_save_pages repairs the problem before exiting svc_tcp_recv_record on error. Symptoms were a BUG() in svc_tcp_clear_pages. Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/svcsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index af04f77..80c6c96 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1122,9 +1122,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) if (len >= 0) svsk->sk_tcplen += len; if (len != want) { + svc_tcp_save_pages(svsk, rqstp); if (len < 0 && len != -EAGAIN) goto err_other; - svc_tcp_save_pages(svsk, rqstp); dprintk("svc: incomplete TCP record (%d of %d)\n", svsk->sk_tcplen, svsk->sk_reclen); goto err_noclose; -- cgit v1.1 From 299ee067572cf58dd38d03770263cbca212e5332 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 17 Aug 2012 17:31:53 -0400 Subject: svcrpc: fix svc_xprt_enqueue/svc_recv busy-looping commit d10f27a750312ed5638c876e4bd6aa83664cccd8 upstream. The rpc server tries to ensure that there will be room to send a reply before it receives a request. It does this by tracking, in xpt_reserved, an upper bound on the total size of the replies that is has already committed to for the socket. Currently it is adding in the estimate for a new reply *before* it checks whether there is space available. If it finds that there is not space, it then subtracts the estimate back out. This may lead the subsequent svc_xprt_enqueue to decide that there is space after all. The results is a svc_recv() that will repeatedly return -EAGAIN, causing server threads to loop without doing any actual work. Reported-by: Michael Tokarev Tested-by: Michael Tokarev Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/svc_xprt.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 9d7ed0b..dad7af5 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -323,7 +323,6 @@ static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt) */ void svc_xprt_enqueue(struct svc_xprt *xprt) { - struct svc_serv *serv = xprt->xpt_server; struct svc_pool *pool; struct svc_rqst *rqstp; int cpu; @@ -369,8 +368,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) rqstp, rqstp->rq_xprt); rqstp->rq_xprt = xprt; svc_xprt_get(xprt); - rqstp->rq_reserved = serv->sv_max_mesg; - atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); pool->sp_stats.threads_woken++; wake_up(&rqstp->rq_wait); } else { @@ -650,8 +647,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) if (xprt) { rqstp->rq_xprt = xprt; svc_xprt_get(xprt); - rqstp->rq_reserved = serv->sv_max_mesg; - atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); /* As there is a shortage of threads and this request * had to be queued, don't allow the thread to wait so @@ -748,6 +743,8 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) else len = xprt->xpt_ops->xpo_recvfrom(rqstp); dprintk("svc: got len=%d\n", len); + rqstp->rq_reserved = serv->sv_max_mesg; + atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); } svc_xprt_received(xprt); -- cgit v1.1 From b8e52a4288c0c1a66c60e92fc57fd3d5e3918da0 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 20 Aug 2012 16:04:40 -0400 Subject: svcrpc: sends on closed socket should stop immediately commit f06f00a24d76e168ecb38d352126fd203937b601 upstream. svc_tcp_sendto sets XPT_CLOSE if we fail to transmit the entire reply. However, the XPT_CLOSE won't be acted on immediately. Meanwhile other threads could send further replies before the socket is really shut down. This can manifest as data corruption: for example, if a truncated read reply is followed by another rpc reply, that second reply will look to the client like further read data. Symptoms were data corruption preceded by svc_tcp_sendto logging something like kernel: rpc-srv/tcp: nfsd: sent only 963696 when sending 1048708 bytes - shutting down socket Reported-by: Malahal Naineni Tested-by: Malahal Naineni Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/svc_xprt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index dad7af5..05dbccf 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -801,7 +801,8 @@ int svc_send(struct svc_rqst *rqstp) /* Grab mutex to serialize outgoing data. */ mutex_lock(&xprt->xpt_mutex); - if (test_bit(XPT_DEAD, &xprt->xpt_flags)) + if (test_bit(XPT_DEAD, &xprt->xpt_flags) + || test_bit(XPT_CLOSE, &xprt->xpt_flags)) len = -ENOTCONN; else len = xprt->xpt_ops->xpo_sendto(rqstp); -- cgit v1.1 From a629a20ed248b9680cb0b2b05a751452067beeae Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:54 +0000 Subject: dccp: check ccid before dereferencing commit 276bdb82dedb290511467a5a4fdbe9f0b52dce6f upstream. ccid_hc_rx_getsockopt() and ccid_hc_tx_getsockopt() might be called with a NULL ccid pointer leading to a NULL pointer dereference. This could lead to a privilege escalation if the attacker is able to map page 0 and prepare it with a fake ccid_ops pointer. Signed-off-by: Mathias Krause Cc: Gerrit Renker Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dccp/ccid.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 75c3582..fb85d37 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -246,7 +246,7 @@ static inline int ccid_hc_rx_getsockopt(struct ccid *ccid, struct sock *sk, u32 __user *optval, int __user *optlen) { int rc = -ENOPROTOOPT; - if (ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL) + if (ccid != NULL && ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL) rc = ccid->ccid_ops->ccid_hc_rx_getsockopt(sk, optname, len, optval, optlen); return rc; @@ -257,7 +257,7 @@ static inline int ccid_hc_tx_getsockopt(struct ccid *ccid, struct sock *sk, u32 __user *optval, int __user *optlen) { int rc = -ENOPROTOOPT; - if (ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL) + if (ccid != NULL && ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL) rc = ccid->ccid_ops->ccid_hc_tx_getsockopt(sk, optname, len, optval, optlen); return rc; -- cgit v1.1 From 7f8742aecd30470b4ae8f9bb6bd0b9b6abb93c9f Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 30 Jul 2012 15:57:00 +0000 Subject: net: Allow driver to limit number of GSO segments per skb [ Upstream commit 30b678d844af3305cda5953467005cebb5d7b687 ] A peer (or local user) may cause TCP to use a nominal MSS of as little as 88 (actual MSS of 76 with timestamps). Given that we have a sufficiently prodigious local sender and the peer ACKs quickly enough, it is nevertheless possible to grow the window for such a connection to the point that we will try to send just under 64K at once. This results in a single skb that expands to 861 segments. In some drivers with TSO support, such an skb will require hundreds of DMA descriptors; a substantial fraction of a TX ring or even more than a full ring. The TX queue selected for the skb may stall and trigger the TX watchdog repeatedly (since the problem skb will be retried after the TX reset). This particularly affects sfc, for which the issue is designated as CVE-2012-3412. Therefore: 1. Add the field net_device::gso_max_segs holding the device-specific limit. 2. In netif_skb_features(), if the number of segments is too high then mask out GSO features to force fall back to software GSO. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 8235b81..065b342 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2050,6 +2050,9 @@ u32 netif_skb_features(struct sk_buff *skb) __be16 protocol = skb->protocol; u32 features = skb->dev->features; + if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs) + features &= ~NETIF_F_GSO_MASK; + if (protocol == htons(ETH_P_8021Q)) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; protocol = veh->h_vlan_encapsulated_proto; @@ -5870,6 +5873,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev_net_set(dev, &init_net); dev->gso_max_size = GSO_MAX_SIZE; + dev->gso_max_segs = GSO_MAX_SEGS; INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list); dev->ethtool_ntuple_list.count = 0; -- cgit v1.1 From 09c403dc7c8e73a4f9c553e4d84aef68af25ff65 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 30 Jul 2012 16:11:42 +0000 Subject: tcp: Apply device TSO segment limit earlier [ Upstream commit 1485348d2424e1131ea42efc033cbd9366462b01 ] Cache the device gso_max_segs in sock::sk_gso_max_segs and use it to limit the size of TSO skbs. This avoids the need to fall back to software GSO for local TCP senders. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/sock.c | 1 + net/ipv4/tcp.c | 4 +++- net/ipv4/tcp_cong.c | 3 ++- net/ipv4/tcp_output.c | 21 ++++++++++++--------- 4 files changed, 18 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index b4bb59a..56623ad 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1312,6 +1312,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) } else { sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; sk->sk_gso_max_size = dst->dev->gso_max_size; + sk->sk_gso_max_segs = dst->dev->gso_max_segs; } } } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b6ec23c..e57df66 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -739,7 +739,9 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, old_size_goal + mss_now > xmit_size_goal)) { xmit_size_goal = old_size_goal; } else { - tp->xmit_size_goal_segs = xmit_size_goal / mss_now; + tp->xmit_size_goal_segs = + min_t(u16, xmit_size_goal / mss_now, + sk->sk_gso_max_segs); xmit_size_goal = tp->xmit_size_goal_segs * mss_now; } } diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 850c737..6cebfd2 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -290,7 +290,8 @@ int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) left = tp->snd_cwnd - in_flight; if (sk_can_gso(sk) && left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd && - left * tp->mss_cache < sk->sk_gso_max_size) + left * tp->mss_cache < sk->sk_gso_max_size && + left < sk->sk_gso_max_segs) return 1; return left <= tcp_max_burst(tp); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index faf257b..e0b8bd1 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1310,21 +1310,21 @@ static void tcp_cwnd_validate(struct sock *sk) * when we would be allowed to send the split-due-to-Nagle skb fully. */ static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb, - unsigned int mss_now, unsigned int cwnd) + unsigned int mss_now, unsigned int max_segs) { struct tcp_sock *tp = tcp_sk(sk); - u32 needed, window, cwnd_len; + u32 needed, window, max_len; window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; - cwnd_len = mss_now * cwnd; + max_len = mss_now * max_segs; - if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk))) - return cwnd_len; + if (likely(max_len <= window && skb != tcp_write_queue_tail(sk))) + return max_len; needed = min(skb->len, window); - if (cwnd_len <= needed) - return cwnd_len; + if (max_len <= needed) + return max_len; return needed - needed % mss_now; } @@ -1551,7 +1551,8 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) limit = min(send_win, cong_win); /* If a full-sized TSO skb can be sent, do it. */ - if (limit >= sk->sk_gso_max_size) + if (limit >= min_t(unsigned int, sk->sk_gso_max_size, + sk->sk_gso_max_segs * tp->mss_cache)) goto send_now; /* Middle in queue won't get any more data, full sendable already? */ @@ -1777,7 +1778,9 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, limit = mss_now; if (tso_segs > 1 && !tcp_urg_mode(tp)) limit = tcp_mss_split_point(sk, skb, mss_now, - cwnd_quota); + min_t(unsigned int, + cwnd_quota, + sk->sk_gso_max_segs)); if (skb->len > limit && unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) -- cgit v1.1 From d5916dedd20a1714a8436f51ba4df13bd4c56474 Mon Sep 17 00:00:00 2001 From: Hiroaki SHIMODA Date: Fri, 3 Aug 2012 19:57:52 +0900 Subject: net_sched: gact: Fix potential panic in tcf_gact(). [ Upstream commit 696ecdc10622d86541f2e35cc16e15b6b3b1b67e ] gact_rand array is accessed by gact->tcfg_ptype whose value is assumed to less than MAX_RAND, but any range checks are not performed. So add a check in tcf_gact_init(). And in tcf_gact(), we can reduce a branch. Signed-off-by: Hiroaki SHIMODA Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/act_gact.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 2b4ab4b..89ddcb5 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -67,6 +67,9 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est, struct tcf_common *pc; int ret = 0; int err; +#ifdef CONFIG_GACT_PROB + struct tc_gact_p *p_parm = NULL; +#endif if (nla == NULL) return -EINVAL; @@ -82,6 +85,12 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est, #ifndef CONFIG_GACT_PROB if (tb[TCA_GACT_PROB] != NULL) return -EOPNOTSUPP; +#else + if (tb[TCA_GACT_PROB]) { + p_parm = nla_data(tb[TCA_GACT_PROB]); + if (p_parm->ptype >= MAX_RAND) + return -EINVAL; + } #endif pc = tcf_hash_check(parm->index, a, bind, &gact_hash_info); @@ -103,8 +112,7 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est, spin_lock_bh(&gact->tcf_lock); gact->tcf_action = parm->action; #ifdef CONFIG_GACT_PROB - if (tb[TCA_GACT_PROB] != NULL) { - struct tc_gact_p *p_parm = nla_data(tb[TCA_GACT_PROB]); + if (p_parm) { gact->tcfg_paction = p_parm->paction; gact->tcfg_pval = p_parm->pval; gact->tcfg_ptype = p_parm->ptype; @@ -132,7 +140,7 @@ static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result spin_lock(&gact->tcf_lock); #ifdef CONFIG_GACT_PROB - if (gact->tcfg_ptype && gact_rand[gact->tcfg_ptype] != NULL) + if (gact->tcfg_ptype) action = gact_rand[gact->tcfg_ptype](gact); else action = gact->tcf_action; -- cgit v1.1 From e869e6223d29115d2d8351e5e3f514ddc1099f29 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Wed, 8 Aug 2012 00:33:25 +0000 Subject: net/core: Fix potential memory leak in dev_set_alias() [ Upstream commit 7364e445f62825758fa61195d237a5b8ecdd06ec ] Do not leak memory by updating pointer with potentially NULL realloc return value. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 065b342..6b43ec6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1045,6 +1045,8 @@ rollback: */ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) { + char *new_ifalias; + ASSERT_RTNL(); if (len >= IFALIASZ) @@ -1058,9 +1060,10 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) return 0; } - dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL); - if (!dev->ifalias) + new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL); + if (!new_ifalias) return -ENOMEM; + dev->ifalias = new_ifalias; strlcpy(dev->ifalias, alias, len+1); return len; -- cgit v1.1 From 2d1244f43868053b12684af2f9fe918c5641d898 Mon Sep 17 00:00:00 2001 From: "danborkmann@iogearbox.net" Date: Fri, 10 Aug 2012 22:48:54 +0000 Subject: af_packet: remove BUG statement in tpacket_destruct_skb [ Upstream commit 7f5c3e3a80e6654cf48dfba7cf94f88c6b505467 ] Here's a quote of the comment about the BUG macro from asm-generic/bug.h: Don't use BUG() or BUG_ON() unless there's really no way out; one example might be detecting data structure corruption in the middle of an operation that can't be backed out of. If the (sub)system can somehow continue operating, perhaps with reduced functionality, it's probably not BUG-worthy. If you're tempted to BUG(), think again: is completely giving up really the *only* solution? There are usually better options, where users don't need to reboot ASAP and can mostly shut down cleanly. In our case, the status flag of a ring buffer slot is managed from both sides, the kernel space and the user space. This means that even though the kernel side might work as expected, the user space screws up and changes this flag right between the send(2) is triggered when the flag is changed to TP_STATUS_SENDING and a given skb is destructed after some time. Then, this will hit the BUG macro. As David suggested, the best solution is to simply remove this statement since it cannot be used for kernel side internal consistency checks. I've tested it and the system still behaves /stable/ in this case, so in accordance with the above comment, we should rather remove it. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index fafb968..1ab5a02 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -866,7 +866,6 @@ static void tpacket_destruct_skb(struct sk_buff *skb) if (likely(po->tx_ring.pg_vec)) { ph = skb_shinfo(skb)->destructor_arg; - BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING); BUG_ON(atomic_read(&po->tx_ring.pending) == 0); atomic_dec(&po->tx_ring.pending); __packet_set_status(po, ph, TP_STATUS_AVAILABLE); -- cgit v1.1 From f9b6caca04a5444f61c1b08ebf04c6f2c6ff2ec9 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 14 Aug 2012 08:54:51 +0000 Subject: ipv6: addrconf: Avoid calling netdevice notifiers with RCU read-side lock [ Upstream commit 4acd4945cd1e1f92b20d14e349c6c6a52acbd42d ] Cong Wang reports that lockdep detected suspicious RCU usage while enabling IPV6 forwarding: [ 1123.310275] =============================== [ 1123.442202] [ INFO: suspicious RCU usage. ] [ 1123.558207] 3.6.0-rc1+ #109 Not tainted [ 1123.665204] ------------------------------- [ 1123.768254] include/linux/rcupdate.h:430 Illegal context switch in RCU read-side critical section! [ 1123.992320] [ 1123.992320] other info that might help us debug this: [ 1123.992320] [ 1124.307382] [ 1124.307382] rcu_scheduler_active = 1, debug_locks = 0 [ 1124.522220] 2 locks held by sysctl/5710: [ 1124.648364] #0: (rtnl_mutex){+.+.+.}, at: [] rtnl_trylock+0x15/0x17 [ 1124.882211] #1: (rcu_read_lock){.+.+.+}, at: [] rcu_lock_acquire+0x0/0x29 [ 1125.085209] [ 1125.085209] stack backtrace: [ 1125.332213] Pid: 5710, comm: sysctl Not tainted 3.6.0-rc1+ #109 [ 1125.441291] Call Trace: [ 1125.545281] [] lockdep_rcu_suspicious+0x109/0x112 [ 1125.667212] [] rcu_preempt_sleep_check+0x45/0x47 [ 1125.781838] [] __might_sleep+0x1e/0x19b [...] [ 1127.445223] [] call_netdevice_notifiers+0x4a/0x4f [...] [ 1127.772188] [] dev_disable_lro+0x32/0x6b [ 1127.885174] [] dev_forward_change+0x30/0xcb [ 1128.013214] [] addrconf_forward_change+0x85/0xc5 [...] addrconf_forward_change() uses RCU iteration over the netdev list, which is unnecessary since it already holds the RTNL lock. We also cannot reasonably require netdevice notifier functions not to sleep. Reported-by: Cong Wang Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/addrconf.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index be29337..70d6a7f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -492,8 +492,7 @@ static void addrconf_forward_change(struct net *net, __s32 newf) struct net_device *dev; struct inet6_dev *idev; - rcu_read_lock(); - for_each_netdev_rcu(net, dev) { + for_each_netdev(net, dev) { idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.forwarding) ^ (!newf); @@ -502,7 +501,6 @@ static void addrconf_forward_change(struct net *net, __s32 newf) dev_forward_change(idev); } } - rcu_read_unlock(); } static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) -- cgit v1.1 From 9a897ce370b0bbe57961372c67bd0ffc2dd3f7ea Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:44 +0000 Subject: atm: fix info leak in getsockopt(SO_ATMPVC) [ Upstream commit e862f1a9b7df4e8196ebec45ac62295138aa3fc2 ] The ATM code fails to initialize the two padding bytes of struct sockaddr_atmpvc inserted for alignment. Add an explicit memset(0) before filling the structure to avoid the info leak. Signed-off-by: Mathias Krause Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/atm/common.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/atm/common.c b/net/atm/common.c index 22b963d..cc859ad 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -784,6 +784,7 @@ int vcc_getsockopt(struct socket *sock, int level, int optname, if (!vcc->dev || !test_bit(ATM_VF_ADDR, &vcc->flags)) return -ENOTCONN; + memset(&pvc, 0, sizeof(pvc)); pvc.sap_family = AF_ATMPVC; pvc.sap_addr.itf = vcc->dev->number; pvc.sap_addr.vpi = vcc->vpi; -- cgit v1.1 From d5d3ca708a08caa3a55531b6b781edcb0f64724d Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:45 +0000 Subject: atm: fix info leak via getsockname() [ Upstream commit 3c0c5cfdcd4d69ffc4b9c0907cec99039f30a50a ] The ATM code fails to initialize the two padding bytes of struct sockaddr_atmpvc inserted for alignment. Add an explicit memset(0) before filling the structure to avoid the info leak. Signed-off-by: Mathias Krause Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/atm/pvc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/atm/pvc.c b/net/atm/pvc.c index 437ee70..db0dd47 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -94,6 +94,7 @@ static int pvc_getname(struct socket *sock, struct sockaddr *sockaddr, return -ENOTCONN; *sockaddr_len = sizeof(struct sockaddr_atmpvc); addr = (struct sockaddr_atmpvc *)sockaddr; + memset(addr, 0, sizeof(*addr)); addr->sap_family = AF_ATMPVC; addr->sap_addr.itf = vcc->dev->number; addr->sap_addr.vpi = vcc->vpi; -- cgit v1.1 From 1b917a7e47ff3f86bf55cf22d810b97cc053fac8 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:46 +0000 Subject: Bluetooth: HCI - Fix info leak in getsockopt(HCI_FILTER) [ Upstream commit e15ca9a0ef9a86f0477530b0f44a725d67f889ee ] The HCI code fails to initialize the two padding bytes of struct hci_ufilter before copying it to userland -- that for leaking two bytes kernel stack. Add an explicit memset(0) before filling the structure to avoid the info leak. Signed-off-by: Mathias Krause Cc: Marcel Holtmann Cc: Gustavo Padovan Cc: Johan Hedberg Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_sock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 295e4a8..2dddaa3 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -644,6 +644,7 @@ static int hci_sock_setsockopt(struct socket *sock, int level, int optname, char { struct hci_filter *f = &hci_pi(sk)->filter; + memset(&uf, 0, sizeof(uf)); uf.type_mask = f->type_mask; uf.opcode = f->opcode; uf.event_mask[0] = *((u32 *) f->event_mask + 0); -- cgit v1.1 From f1c0a71da10bb2b9357b35a80ad315a353340c46 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:47 +0000 Subject: Bluetooth: HCI - Fix info leak via getsockname() [ Upstream commit 3f68ba07b1da811bf383b4b701b129bfcb2e4988 ] The HCI code fails to initialize the hci_channel member of struct sockaddr_hci and that for leaks two bytes kernel stack via the getsockname() syscall. Initialize hci_channel with 0 to avoid the info leak. Signed-off-by: Mathias Krause Cc: Marcel Holtmann Cc: Gustavo Padovan Cc: Johan Hedberg Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_sock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 2dddaa3..eb5cb6f 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -432,6 +432,7 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr, int *add *addr_len = sizeof(*haddr); haddr->hci_family = AF_BLUETOOTH; haddr->hci_dev = hdev->id; + haddr->hci_channel= 0; release_sock(sk); return 0; -- cgit v1.1 From 416a675770e625f4f5ccef3ce70b560abc94757a Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:49 +0000 Subject: Bluetooth: RFCOMM - Fix info leak in ioctl(RFCOMMGETDEVLIST) [ Upstream commit f9432c5ec8b1e9a09b9b0e5569e3c73db8de432a ] The RFCOMM code fails to initialize the two padding bytes of struct rfcomm_dev_list_req inserted for alignment before copying it to userland. Additionally there are two padding bytes in each instance of struct rfcomm_dev_info. The ioctl() that for disclosures two bytes plus dev_num times two bytes uninitialized kernel heap memory. Allocate the memory using kzalloc() to fix this issue. Signed-off-by: Mathias Krause Cc: Marcel Holtmann Cc: Gustavo Padovan Cc: Johan Hedberg Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/rfcomm/tty.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index c258796..bc1eb56 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -471,7 +471,7 @@ static int rfcomm_get_dev_list(void __user *arg) size = sizeof(*dl) + dev_num * sizeof(*di); - dl = kmalloc(size, GFP_KERNEL); + dl = kzalloc(size, GFP_KERNEL); if (!dl) return -ENOMEM; -- cgit v1.1 From 00553f5b9fd16a2c2287e41cc18217bdc7f9d310 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:50 +0000 Subject: Bluetooth: RFCOMM - Fix info leak via getsockname() [ Upstream commit 9344a972961d1a6d2c04d9008b13617bcb6ec2ef ] The RFCOMM code fails to initialize the trailing padding byte of struct sockaddr_rc added for alignment. It that for leaks one byte kernel stack via the getsockname() syscall. Add an explicit memset(0) before filling the structure to avoid the info leak. Signed-off-by: Mathias Krause Cc: Marcel Holtmann Cc: Gustavo Padovan Cc: Johan Hedberg Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/rfcomm/sock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 1b10727..9dfe702 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -544,6 +544,7 @@ static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int * BT_DBG("sock %p, sk %p", sock, sk); + memset(sa, 0, sizeof(*sa)); sa->rc_family = AF_BLUETOOTH; sa->rc_channel = rfcomm_pi(sk)->channel; if (peer) -- cgit v1.1 From 6ffb80e739e282d7b9ffa43c2ec2a9766c8099a1 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:51 +0000 Subject: Bluetooth: L2CAP - Fix info leak via getsockname() [ Upstream commit 792039c73cf176c8e39a6e8beef2c94ff46522ed ] The L2CAP code fails to initialize the l2_bdaddr_type member of struct sockaddr_l2 and the padding byte added for alignment. It that for leaks two bytes kernel stack via the getsockname() syscall. Add an explicit memset(0) before filling the structure to avoid the info leak. Signed-off-by: Mathias Krause Cc: Marcel Holtmann Cc: Gustavo Padovan Cc: Johan Hedberg Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/l2cap_sock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 8248303..9810d45 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -329,6 +329,7 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *l BT_DBG("sock %p, sk %p", sock, sk); + memset(la, 0, sizeof(struct sockaddr_l2)); addr->sa_family = AF_BLUETOOTH; *len = sizeof(struct sockaddr_l2); -- cgit v1.1 From 27fb5ec5224d73f8e05a0ec6a00bf6b07a591470 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:53 +0000 Subject: llc: fix info leak via getsockname() [ Upstream commit 3592aaeb80290bda0f2cf0b5456c97bfc638b192 ] The LLC code wrongly returns 0, i.e. "success", when the socket is zapped. Together with the uninitialized uaddrlen pointer argument from sys_getsockname this leads to an arbitrary memory leak of up to 128 bytes kernel stack via the getsockname() syscall. Return an error instead when the socket is zapped to prevent the info leak. Also remove the unnecessary memset(0). We don't directly write to the memory pointed by uaddr but memcpy() a local structure at the end of the function that is properly initialized. Signed-off-by: Mathias Krause Cc: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/llc/af_llc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index a18e6c3..99a60d5 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -966,14 +966,13 @@ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr, struct sockaddr_llc sllc; struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); - int rc = 0; + int rc = -EBADF; memset(&sllc, 0, sizeof(sllc)); lock_sock(sk); if (sock_flag(sk, SOCK_ZAPPED)) goto out; *uaddrlen = sizeof(sllc); - memset(uaddr, 0, *uaddrlen); if (peer) { rc = -ENOTCONN; if (sk->sk_state != TCP_ESTABLISHED) -- cgit v1.1 From 500e5c989e64489413ffde419b3d94789290a4b7 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:55 +0000 Subject: dccp: fix info leak via getsockopt(DCCP_SOCKOPT_CCID_TX_INFO) [ Upstream commit 7b07f8eb75aa3097cdfd4f6eac3da49db787381d ] The CCID3 code fails to initialize the trailing padding bytes of struct tfrc_tx_info added for alignment on 64 bit architectures. It that for potentially leaks four bytes kernel stack via the getsockopt() syscall. Add an explicit memset(0) before filling the structure to avoid the info leak. Signed-off-by: Mathias Krause Cc: Gerrit Renker Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dccp/ccids/ccid3.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 3d604e1..4caf63f 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -532,6 +532,7 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, case DCCP_SOCKOPT_CCID_TX_INFO: if (len < sizeof(tfrc)) return -EINVAL; + memset(&tfrc, 0, sizeof(tfrc)); tfrc.tfrctx_x = hc->tx_x; tfrc.tfrctx_x_recv = hc->tx_x_recv; tfrc.tfrctx_x_calc = hc->tx_x_calc; -- cgit v1.1 From b56518548aa6e99e80c6c67b5a7d7f2c8c614c74 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:56 +0000 Subject: ipvs: fix info leak in getsockopt(IP_VS_SO_GET_TIMEOUT) [ Upstream commit 2d8a041b7bfe1097af21441cb77d6af95f4f4680 ] If at least one of CONFIG_IP_VS_PROTO_TCP or CONFIG_IP_VS_PROTO_UDP is not set, __ip_vs_get_timeouts() does not fully initialize the structure that gets copied to userland and that for leaks up to 12 bytes of kernel stack. Add an explicit memset(0) before passing the structure to __ip_vs_get_timeouts() to avoid the info leak. Signed-off-by: Mathias Krause Cc: Wensong Zhang Cc: Simon Horman Cc: Julian Anastasov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netfilter/ipvs/ip_vs_ctl.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index a178cb3..9528ea0 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2675,6 +2675,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) { struct ip_vs_timeout_user t; + memset(&t, 0, sizeof(t)); __ip_vs_get_timeouts(net, &t); if (copy_to_user(user, &t, sizeof(t)) != 0) ret = -EFAULT; -- cgit v1.1 From 7a62b446c607d994f82a578bbca5995a0aa0183d Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:57 +0000 Subject: net: fix info leak in compat dev_ifconf() [ Upstream commit 43da5f2e0d0c69ded3d51907d9552310a6b545e8 ] The implementation of dev_ifconf() for the compat ioctl interface uses an intermediate ifc structure allocated in userland for the duration of the syscall. Though, it fails to initialize the padding bytes inserted for alignment and that for leaks four bytes of kernel stack. Add an explicit memset(0) before filling the structure to avoid the info leak. Signed-off-by: Mathias Krause Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/socket.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index cf41afc..1b0f0fc 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2645,6 +2645,7 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf))) return -EFAULT; + memset(&ifc, 0, sizeof(ifc)); if (ifc32.ifcbuf == 0) { ifc32.ifc_len = 0; ifc.ifc_len = 0; -- cgit v1.1 From f8df5b8a9dec89726e6bf8a2073c72a533c6d0c5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 23 Aug 2012 02:09:11 +0000 Subject: netlink: fix possible spoofing from non-root processes [ Upstream commit 20e1db19db5d6b9e4e83021595eab0dc8f107bef ] Non-root user-space processes can send Netlink messages to other processes that are well-known for being subscribed to Netlink asynchronous notifications. This allows ilegitimate non-root process to send forged messages to Netlink subscribers. The userspace process usually verifies the legitimate origin in two ways: a) Socket credentials. If UID != 0, then the message comes from some ilegitimate process and the message needs to be dropped. b) Netlink portID. In general, portID == 0 means that the origin of the messages comes from the kernel. Thus, discarding any message not coming from the kernel. However, ctnetlink sets the portID in event messages that has been triggered by some user-space process, eg. conntrack utility. So other processes subscribed to ctnetlink events, eg. conntrackd, know that the event was triggered by some user-space action. Neither of the two ways to discard ilegitimate messages coming from non-root processes can help for ctnetlink. This patch adds capability validation in case that dst_pid is set in netlink_sendmsg(). This approach is aggressive since existing applications using any Netlink bus to deliver messages between two user-space processes will break. Note that the exception is NETLINK_USERSOCK, since it is reserved for netlink-to-netlink userspace communication. Still, if anyone wants that his Netlink bus allows netlink-to-netlink userspace, then they can set NL_NONROOT_SEND. However, by default, I don't think it makes sense to allow to use NETLINK_ROUTE to communicate two processes that are sending no matter what information that is not related to link/neighbouring/routing. They should be using NETLINK_USERSOCK instead for that. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/af_netlink.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 24bc620..16a94a3 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1345,7 +1345,8 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, dst_pid = addr->nl_pid; dst_group = ffs(addr->nl_groups); err = -EPERM; - if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND)) + if ((dst_group || dst_pid) && + !netlink_capable(sock, NL_NONROOT_SEND)) goto out; } else { dst_pid = nlk->dst_pid; @@ -2098,6 +2099,7 @@ static void __init netlink_add_usersock_entry(void) rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners); nl_table[NETLINK_USERSOCK].module = THIS_MODULE; nl_table[NETLINK_USERSOCK].registered = 1; + nl_table[NETLINK_USERSOCK].nl_nonroot = NL_NONROOT_SEND; netlink_table_ungrab(); } -- cgit v1.1 From 928863a39f0ee47af97e7e17b909334dcb06cf1e Mon Sep 17 00:00:00 2001 From: "xeb@mail.ru" Date: Fri, 24 Aug 2012 01:07:38 +0000 Subject: l2tp: avoid to use synchronize_rcu in tunnel free function [ Upstream commit 99469c32f79a32d8481f87be0d3c66dad286f4ec ] Avoid to use synchronize_rcu in l2tp_tunnel_free because context may be atomic. Signed-off-by: Dmitry Kozlov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_core.c | 3 +-- net/l2tp/l2tp_core.h | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 71c292e..6a3d680 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1252,11 +1252,10 @@ static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel) /* Remove from tunnel list */ spin_lock_bh(&pn->l2tp_tunnel_list_lock); list_del_rcu(&tunnel->list); + kfree_rcu(tunnel, rcu); spin_unlock_bh(&pn->l2tp_tunnel_list_lock); - synchronize_rcu(); atomic_dec(&l2tp_tunnel_count); - kfree(tunnel); } /* Create a socket for the tunnel, if one isn't set up by diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index a16a48e..4393794 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -157,6 +157,7 @@ struct l2tp_tunnel_cfg { struct l2tp_tunnel { int magic; /* Should be L2TP_TUNNEL_MAGIC */ + struct rcu_head rcu; rwlock_t hlist_lock; /* protect session_hlist */ struct hlist_head session_hlist[L2TP_HASH_SIZE]; /* hashed list of sessions, -- cgit v1.1 From 896b6af471f67fc60cbab5a503cb8a764a8c47a4 Mon Sep 17 00:00:00 2001 From: Francesco Ruggeri Date: Fri, 24 Aug 2012 07:38:35 +0000 Subject: net: ipv4: ipmr_expire_timer causes crash when removing net namespace [ Upstream commit acbb219d5f53821b2d0080d047800410c0420ea1 ] When tearing down a net namespace, ipv4 mr_table structures are freed without first deactivating their timers. This can result in a crash in run_timer_softirq. This patch mimics the corresponding behaviour in ipv6. Locking and synchronization seem to be adequate. We are about to kfree mrt, so existing code should already make sure that no other references to mrt are pending or can be created by incoming traffic. The functions invoked here do not cause new references to mrt or other race conditions to be created. Invoking del_timer_sync guarantees that ipmr_expire_timer is inactive. Both ipmr_expire_process (whose completion we may have to wait in del_timer_sync) and mroute_clean_tables internally use mfc_unres_lock or other synchronizations when needed, and they both only modify mrt. Tested in Linux 3.4.8. Signed-off-by: Francesco Ruggeri Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ipmr.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index f81af8d..ec7d8e7 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -124,6 +124,8 @@ static DEFINE_SPINLOCK(mfc_unres_lock); static struct kmem_cache *mrt_cachep __read_mostly; static struct mr_table *ipmr_new_table(struct net *net, u32 id); +static void ipmr_free_table(struct mr_table *mrt); + static int ip_mr_forward(struct net *net, struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *cache, int local); @@ -131,6 +133,7 @@ static int ipmr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, vifi_t vifi, int assert); static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm); +static void mroute_clean_tables(struct mr_table *mrt); static void ipmr_expire_process(unsigned long arg); #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES @@ -271,7 +274,7 @@ static void __net_exit ipmr_rules_exit(struct net *net) list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { list_del(&mrt->list); - kfree(mrt); + ipmr_free_table(mrt); } fib_rules_unregister(net->ipv4.mr_rules_ops); } @@ -299,7 +302,7 @@ static int __net_init ipmr_rules_init(struct net *net) static void __net_exit ipmr_rules_exit(struct net *net) { - kfree(net->ipv4.mrt); + ipmr_free_table(net->ipv4.mrt); } #endif @@ -336,6 +339,13 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) return mrt; } +static void ipmr_free_table(struct mr_table *mrt) +{ + del_timer_sync(&mrt->ipmr_expire_timer); + mroute_clean_tables(mrt); + kfree(mrt); +} + /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) -- cgit v1.1 From 2cfb6b1d3632beffef774aad1f7b906fe1934fb2 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Fri, 14 Sep 2012 15:36:57 -0700 Subject: cfg80211: fix possible circular lock on reg_regdb_search() commit a85d0d7f3460b1a123b78e7f7e39bf72c37dfb78 upstream. When call_crda() is called we kick off a witch hunt search for the same regulatory domain on our internal regulatory database and that work gets kicked off on a workqueue, this is done while the cfg80211_mutex is held. If that workqueue kicks off it will first lock reg_regdb_search_mutex and later cfg80211_mutex but to ensure two CPUs will not contend against cfg80211_mutex the right thing to do is to have the reg_regdb_search() wait until the cfg80211_mutex is let go. The lockdep report is pasted below. cfg80211: Calling CRDA to update world regulatory domain ====================================================== [ INFO: possible circular locking dependency detected ] 3.3.8 #3 Tainted: G O ------------------------------------------------------- kworker/0:1/235 is trying to acquire lock: (cfg80211_mutex){+.+...}, at: [<816468a4>] set_regdom+0x78c/0x808 [cfg80211] but task is already holding lock: (reg_regdb_search_mutex){+.+...}, at: [<81646828>] set_regdom+0x710/0x808 [cfg80211] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (reg_regdb_search_mutex){+.+...}: [<800a8384>] lock_acquire+0x60/0x88 [<802950a8>] mutex_lock_nested+0x54/0x31c [<81645778>] is_world_regdom+0x9f8/0xc74 [cfg80211] -> #1 (reg_mutex#2){+.+...}: [<800a8384>] lock_acquire+0x60/0x88 [<802950a8>] mutex_lock_nested+0x54/0x31c [<8164539c>] is_world_regdom+0x61c/0xc74 [cfg80211] -> #0 (cfg80211_mutex){+.+...}: [<800a77b8>] __lock_acquire+0x10d4/0x17bc [<800a8384>] lock_acquire+0x60/0x88 [<802950a8>] mutex_lock_nested+0x54/0x31c [<816468a4>] set_regdom+0x78c/0x808 [cfg80211] other info that might help us debug this: Chain exists of: cfg80211_mutex --> reg_mutex#2 --> reg_regdb_search_mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(reg_regdb_search_mutex); lock(reg_mutex#2); lock(reg_regdb_search_mutex); lock(cfg80211_mutex); *** DEADLOCK *** 3 locks held by kworker/0:1/235: #0: (events){.+.+..}, at: [<80089a00>] process_one_work+0x230/0x460 #1: (reg_regdb_work){+.+...}, at: [<80089a00>] process_one_work+0x230/0x460 #2: (reg_regdb_search_mutex){+.+...}, at: [<81646828>] set_regdom+0x710/0x808 [cfg80211] stack backtrace: Call Trace: [<80290fd4>] dump_stack+0x8/0x34 [<80291bc4>] print_circular_bug+0x2ac/0x2d8 [<800a77b8>] __lock_acquire+0x10d4/0x17bc [<800a8384>] lock_acquire+0x60/0x88 [<802950a8>] mutex_lock_nested+0x54/0x31c [<816468a4>] set_regdom+0x78c/0x808 [cfg80211] Reported-by: Felix Fietkau Tested-by: Felix Fietkau Signed-off-by: Luis R. Rodriguez Reviewed-by: Johannes Berg Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- net/wireless/reg.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 90b73d1..d85a149 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -331,6 +331,9 @@ static void reg_regdb_search(struct work_struct *work) struct reg_regdb_search_request *request; const struct ieee80211_regdomain *curdom, *regdom; int i, r; + bool set_reg = false; + + mutex_lock(&cfg80211_mutex); mutex_lock(®_regdb_search_mutex); while (!list_empty(®_regdb_search_list)) { @@ -346,9 +349,7 @@ static void reg_regdb_search(struct work_struct *work) r = reg_copy_regd(®dom, curdom); if (r) break; - mutex_lock(&cfg80211_mutex); - set_regdom(regdom); - mutex_unlock(&cfg80211_mutex); + set_reg = true; break; } } @@ -356,6 +357,11 @@ static void reg_regdb_search(struct work_struct *work) kfree(request); } mutex_unlock(®_regdb_search_mutex); + + if (set_reg) + set_regdom(regdom); + + mutex_unlock(&cfg80211_mutex); } static DECLARE_WORK(reg_regdb_work, reg_regdb_search); -- cgit v1.1 From b64295e8b4d340a136f02f08baffa9efc0f2e0bc Mon Sep 17 00:00:00 2001 From: "Rustad, Mark D" Date: Wed, 18 Jul 2012 09:06:07 +0000 Subject: net: Statically initialize init_net.dev_base_head commit 734b65417b24d6eea3e3d7457e1f11493890ee1d upstream. This change eliminates an initialization-order hazard most recently seen when netprio_cgroup is built into the kernel. With thanks to Eric Dumazet for catching a bug. Signed-off-by: Mark Rustad Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 3 ++- net/core/net_namespace.c | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 6b43ec6..d8bc889 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6260,7 +6260,8 @@ static struct hlist_head *netdev_create_hash(void) /* Initialize per network namespace state */ static int __net_init netdev_init(struct net *net) { - INIT_LIST_HEAD(&net->dev_base_head); + if (net != &init_net) + INIT_LIST_HEAD(&net->dev_base_head); net->dev_name_head = netdev_create_hash(); if (net->dev_name_head == NULL) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 2772ed1..1642c30 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -24,7 +24,9 @@ static DEFINE_MUTEX(net_mutex); LIST_HEAD(net_namespace_list); EXPORT_SYMBOL_GPL(net_namespace_list); -struct net init_net; +struct net init_net = { + .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head), +}; EXPORT_SYMBOL(init_net); #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ -- cgit v1.1 From 25dee10e0c0b7093df5c32284cc85817820c6003 Mon Sep 17 00:00:00 2001 From: Weiping Pan Date: Mon, 23 Jul 2012 10:37:48 +0800 Subject: rds: set correct msg_namelen commit 06b6a1cf6e776426766298d055bb3991957d90a7 upstream. Jay Fenlason (fenlason@redhat.com) found a bug, that recvfrom() on an RDS socket can return the contents of random kernel memory to userspace if it was called with a address length larger than sizeof(struct sockaddr_in). rds_recvmsg() also fails to set the addr_len paramater properly before returning, but that's just a bug. There are also a number of cases wher recvfrom() can return an entirely bogus address. Anything in rds_recvmsg() that returns a non-negative value but does not go through the "sin = (struct sockaddr_in *)msg->msg_name;" code path at the end of the while(1) loop will return up to 128 bytes of kernel memory to userspace. And I write two test programs to reproduce this bug, you will see that in rds_server, fromAddr will be overwritten and the following sock_fd will be destroyed. Yes, it is the programmer's fault to set msg_namelen incorrectly, but it is better to make the kernel copy the real length of address to user space in such case. How to run the test programs ? I test them on 32bit x86 system, 3.5.0-rc7. 1 compile gcc -o rds_client rds_client.c gcc -o rds_server rds_server.c 2 run ./rds_server on one console 3 run ./rds_client on another console 4 you will see something like: server is waiting to receive data... old socket fd=3 server received data from client:data from client msg.msg_namelen=32 new socket fd=-1067277685 sendmsg() : Bad file descriptor /***************** rds_client.c ********************/ int main(void) { int sock_fd; struct sockaddr_in serverAddr; struct sockaddr_in toAddr; char recvBuffer[128] = "data from client"; struct msghdr msg; struct iovec iov; sock_fd = socket(AF_RDS, SOCK_SEQPACKET, 0); if (sock_fd < 0) { perror("create socket error\n"); exit(1); } memset(&serverAddr, 0, sizeof(serverAddr)); serverAddr.sin_family = AF_INET; serverAddr.sin_addr.s_addr = inet_addr("127.0.0.1"); serverAddr.sin_port = htons(4001); if (bind(sock_fd, (struct sockaddr*)&serverAddr, sizeof(serverAddr)) < 0) { perror("bind() error\n"); close(sock_fd); exit(1); } memset(&toAddr, 0, sizeof(toAddr)); toAddr.sin_family = AF_INET; toAddr.sin_addr.s_addr = inet_addr("127.0.0.1"); toAddr.sin_port = htons(4000); msg.msg_name = &toAddr; msg.msg_namelen = sizeof(toAddr); msg.msg_iov = &iov; msg.msg_iovlen = 1; msg.msg_iov->iov_base = recvBuffer; msg.msg_iov->iov_len = strlen(recvBuffer) + 1; msg.msg_control = 0; msg.msg_controllen = 0; msg.msg_flags = 0; if (sendmsg(sock_fd, &msg, 0) == -1) { perror("sendto() error\n"); close(sock_fd); exit(1); } printf("client send data:%s\n", recvBuffer); memset(recvBuffer, '\0', 128); msg.msg_name = &toAddr; msg.msg_namelen = sizeof(toAddr); msg.msg_iov = &iov; msg.msg_iovlen = 1; msg.msg_iov->iov_base = recvBuffer; msg.msg_iov->iov_len = 128; msg.msg_control = 0; msg.msg_controllen = 0; msg.msg_flags = 0; if (recvmsg(sock_fd, &msg, 0) == -1) { perror("recvmsg() error\n"); close(sock_fd); exit(1); } printf("receive data from server:%s\n", recvBuffer); close(sock_fd); return 0; } /***************** rds_server.c ********************/ int main(void) { struct sockaddr_in fromAddr; int sock_fd; struct sockaddr_in serverAddr; unsigned int addrLen; char recvBuffer[128]; struct msghdr msg; struct iovec iov; sock_fd = socket(AF_RDS, SOCK_SEQPACKET, 0); if(sock_fd < 0) { perror("create socket error\n"); exit(0); } memset(&serverAddr, 0, sizeof(serverAddr)); serverAddr.sin_family = AF_INET; serverAddr.sin_addr.s_addr = inet_addr("127.0.0.1"); serverAddr.sin_port = htons(4000); if (bind(sock_fd, (struct sockaddr*)&serverAddr, sizeof(serverAddr)) < 0) { perror("bind error\n"); close(sock_fd); exit(1); } printf("server is waiting to receive data...\n"); msg.msg_name = &fromAddr; /* * I add 16 to sizeof(fromAddr), ie 32, * and pay attention to the definition of fromAddr, * recvmsg() will overwrite sock_fd, * since kernel will copy 32 bytes to userspace. * * If you just use sizeof(fromAddr), it works fine. * */ msg.msg_namelen = sizeof(fromAddr) + 16; /* msg.msg_namelen = sizeof(fromAddr); */ msg.msg_iov = &iov; msg.msg_iovlen = 1; msg.msg_iov->iov_base = recvBuffer; msg.msg_iov->iov_len = 128; msg.msg_control = 0; msg.msg_controllen = 0; msg.msg_flags = 0; while (1) { printf("old socket fd=%d\n", sock_fd); if (recvmsg(sock_fd, &msg, 0) == -1) { perror("recvmsg() error\n"); close(sock_fd); exit(1); } printf("server received data from client:%s\n", recvBuffer); printf("msg.msg_namelen=%d\n", msg.msg_namelen); printf("new socket fd=%d\n", sock_fd); strcat(recvBuffer, "--data from server"); if (sendmsg(sock_fd, &msg, 0) == -1) { perror("sendmsg()\n"); close(sock_fd); exit(1); } } close(sock_fd); return 0; } Signed-off-by: Weiping Pan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rds/recv.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/rds/recv.c b/net/rds/recv.c index 596689e..51a8f8e 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -409,6 +409,8 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, rdsdebug("size %zu flags 0x%x timeo %ld\n", size, msg_flags, timeo); + msg->msg_namelen = 0; + if (msg_flags & MSG_OOB) goto out; @@ -484,6 +486,7 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, sin->sin_port = inc->i_hdr.h_sport; sin->sin_addr.s_addr = inc->i_saddr; memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + msg->msg_namelen = sizeof(*sin); } break; } -- cgit v1.1 From 34c0bc428c081c314c4b4ebf6c0fd335cf53c133 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 27 Apr 2012 10:11:48 +0000 Subject: drop_monitor: fix sleeping in invalid context warning commit cde2e9a651b76d8db36ae94cd0febc82b637e5dd upstream. Eric Dumazet pointed out this warning in the drop_monitor protocol to me: [ 38.352571] BUG: sleeping function called from invalid context at kernel/mutex.c:85 [ 38.352576] in_atomic(): 1, irqs_disabled(): 0, pid: 4415, name: dropwatch [ 38.352580] Pid: 4415, comm: dropwatch Not tainted 3.4.0-rc2+ #71 [ 38.352582] Call Trace: [ 38.352592] [] ? trace_napi_poll_hit+0xd0/0xd0 [ 38.352599] [] __might_sleep+0xca/0xf0 [ 38.352606] [] mutex_lock+0x26/0x50 [ 38.352610] [] ? trace_napi_poll_hit+0xd0/0xd0 [ 38.352616] [] tracepoint_probe_register+0x29/0x90 [ 38.352621] [] set_all_monitor_traces+0x105/0x170 [ 38.352625] [] net_dm_cmd_trace+0x2a/0x40 [ 38.352630] [] genl_rcv_msg+0x21a/0x2b0 [ 38.352636] [] ? zone_statistics+0x99/0xc0 [ 38.352640] [] ? genl_rcv+0x30/0x30 [ 38.352645] [] netlink_rcv_skb+0xa9/0xd0 [ 38.352649] [] genl_rcv+0x20/0x30 [ 38.352653] [] netlink_unicast+0x1ae/0x1f0 [ 38.352658] [] netlink_sendmsg+0x2b6/0x310 [ 38.352663] [] sock_sendmsg+0x10f/0x130 [ 38.352668] [] ? move_addr_to_kernel+0x60/0xb0 [ 38.352673] [] ? verify_iovec+0x64/0xe0 [ 38.352677] [] __sys_sendmsg+0x386/0x390 [ 38.352682] [] ? handle_mm_fault+0x139/0x210 [ 38.352687] [] ? do_page_fault+0x1ec/0x4f0 [ 38.352693] [] ? set_next_entity+0x9d/0xb0 [ 38.352699] [] ? tty_ldisc_deref+0x9/0x10 [ 38.352703] [] ? pick_next_task_fair+0x63/0x140 [ 38.352708] [] sys_sendmsg+0x44/0x80 [ 38.352713] [] system_call_fastpath+0x16/0x1b It stems from holding a spinlock (trace_state_lock) while attempting to register or unregister tracepoint hooks, making in_atomic() true in this context, leading to the warning when the tracepoint calls might_sleep() while its taking a mutex. Since we only use the trace_state_lock to prevent trace protocol state races, as well as hardware stat list updates on an rcu write side, we can just convert the spinlock to a mutex to avoid this problem. Signed-off-by: Neil Horman Reported-by: Eric Dumazet CC: David Miller Acked-by: Eric Dumazet Signed-off-by: David S. Miller Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- net/core/drop_monitor.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 7f36b38..f74d7d7 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -42,7 +42,7 @@ static void send_dm_alert(struct work_struct *unused); * netlink alerts */ static int trace_state = TRACE_OFF; -static DEFINE_SPINLOCK(trace_state_lock); +static DEFINE_MUTEX(trace_state_mutex); struct per_cpu_dm_data { struct work_struct dm_alert_work; @@ -213,7 +213,7 @@ static int set_all_monitor_traces(int state) struct dm_hw_stat_delta *new_stat = NULL; struct dm_hw_stat_delta *temp; - spin_lock(&trace_state_lock); + mutex_lock(&trace_state_mutex); if (state == trace_state) { rc = -EAGAIN; @@ -252,7 +252,7 @@ static int set_all_monitor_traces(int state) rc = -EINPROGRESS; out_unlock: - spin_unlock(&trace_state_lock); + mutex_unlock(&trace_state_mutex); return rc; } @@ -295,12 +295,12 @@ static int dropmon_net_event(struct notifier_block *ev_block, new_stat->dev = dev; new_stat->last_rx = jiffies; - spin_lock(&trace_state_lock); + mutex_lock(&trace_state_mutex); list_add_rcu(&new_stat->list, &hw_stats_list); - spin_unlock(&trace_state_lock); + mutex_unlock(&trace_state_mutex); break; case NETDEV_UNREGISTER: - spin_lock(&trace_state_lock); + mutex_lock(&trace_state_mutex); list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) { if (new_stat->dev == dev) { new_stat->dev = NULL; @@ -311,7 +311,7 @@ static int dropmon_net_event(struct notifier_block *ev_block, } } } - spin_unlock(&trace_state_lock); + mutex_unlock(&trace_state_mutex); break; } out: -- cgit v1.1 From b2f89a7caf4491dba4086663139e6fbc1ab59711 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 27 Apr 2012 10:11:49 +0000 Subject: drop_monitor: Make updating data->skb smp safe commit 3885ca785a3618593226687ced84f3f336dc3860 upstream. Eric Dumazet pointed out to me that the drop_monitor protocol has some holes in its smp protections. Specifically, its possible to replace data->skb while its being written. This patch corrects that by making data->skb an rcu protected variable. That will prevent it from being overwritten while a tracepoint is modifying it. Signed-off-by: Neil Horman Reported-by: Eric Dumazet CC: David Miller Acked-by: Eric Dumazet Signed-off-by: David S. Miller Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- net/core/drop_monitor.c | 70 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 54 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index f74d7d7..d75cbfc 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -46,7 +46,7 @@ static DEFINE_MUTEX(trace_state_mutex); struct per_cpu_dm_data { struct work_struct dm_alert_work; - struct sk_buff *skb; + struct sk_buff __rcu *skb; atomic_t dm_hit_count; struct timer_list send_timer; }; @@ -73,35 +73,58 @@ static int dm_hit_limit = 64; static int dm_delay = 1; static unsigned long dm_hw_check_delta = 2*HZ; static LIST_HEAD(hw_stats_list); +static int initialized = 0; static void reset_per_cpu_data(struct per_cpu_dm_data *data) { size_t al; struct net_dm_alert_msg *msg; struct nlattr *nla; + struct sk_buff *skb; + struct sk_buff *oskb = rcu_dereference_protected(data->skb, 1); al = sizeof(struct net_dm_alert_msg); al += dm_hit_limit * sizeof(struct net_dm_drop_point); al += sizeof(struct nlattr); - data->skb = genlmsg_new(al, GFP_KERNEL); - genlmsg_put(data->skb, 0, 0, &net_drop_monitor_family, - 0, NET_DM_CMD_ALERT); - nla = nla_reserve(data->skb, NLA_UNSPEC, sizeof(struct net_dm_alert_msg)); - msg = nla_data(nla); - memset(msg, 0, al); - atomic_set(&data->dm_hit_count, dm_hit_limit); + skb = genlmsg_new(al, GFP_KERNEL); + + if (skb) { + genlmsg_put(skb, 0, 0, &net_drop_monitor_family, + 0, NET_DM_CMD_ALERT); + nla = nla_reserve(skb, NLA_UNSPEC, + sizeof(struct net_dm_alert_msg)); + msg = nla_data(nla); + memset(msg, 0, al); + } else if (initialized) + schedule_work_on(smp_processor_id(), &data->dm_alert_work); + + /* + * Don't need to lock this, since we are guaranteed to only + * run this on a single cpu at a time. + * Note also that we only update data->skb if the old and new skb + * pointers don't match. This ensures that we don't continually call + * synchornize_rcu if we repeatedly fail to alloc a new netlink message. + */ + if (skb != oskb) { + rcu_assign_pointer(data->skb, skb); + + synchronize_rcu(); + + atomic_set(&data->dm_hit_count, dm_hit_limit); + } + } static void send_dm_alert(struct work_struct *unused) { struct sk_buff *skb; - struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data); /* * Grab the skb we're about to send */ - skb = data->skb; + skb = rcu_dereference_protected(data->skb, 1); /* * Replace it with a new one @@ -111,8 +134,10 @@ static void send_dm_alert(struct work_struct *unused) /* * Ship it! */ - genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL); + if (skb) + genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL); + put_cpu_var(dm_cpu_data); } /* @@ -123,9 +148,11 @@ static void send_dm_alert(struct work_struct *unused) */ static void sched_send_work(unsigned long unused) { - struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data); + + schedule_work_on(smp_processor_id(), &data->dm_alert_work); - schedule_work(&data->dm_alert_work); + put_cpu_var(dm_cpu_data); } static void trace_drop_common(struct sk_buff *skb, void *location) @@ -134,9 +161,16 @@ static void trace_drop_common(struct sk_buff *skb, void *location) struct nlmsghdr *nlh; struct nlattr *nla; int i; - struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + struct sk_buff *dskb; + struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data); + rcu_read_lock(); + dskb = rcu_dereference(data->skb); + + if (!dskb) + goto out; + if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) { /* * we're already at zero, discard this hit @@ -144,7 +178,7 @@ static void trace_drop_common(struct sk_buff *skb, void *location) goto out; } - nlh = (struct nlmsghdr *)data->skb->data; + nlh = (struct nlmsghdr *)dskb->data; nla = genlmsg_data(nlmsg_data(nlh)); msg = nla_data(nla); for (i = 0; i < msg->entries; i++) { @@ -157,7 +191,7 @@ static void trace_drop_common(struct sk_buff *skb, void *location) /* * We need to create a new entry */ - __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_drop_point)); + __nla_reserve_nohdr(dskb, sizeof(struct net_dm_drop_point)); nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point)); memcpy(msg->points[msg->entries].pc, &location, sizeof(void *)); msg->points[msg->entries].count = 1; @@ -169,6 +203,8 @@ static void trace_drop_common(struct sk_buff *skb, void *location) } out: + rcu_read_unlock(); + put_cpu_var(dm_cpu_data); return; } @@ -374,6 +410,8 @@ static int __init init_net_drop_monitor(void) data->send_timer.function = sched_send_work; } + initialized = 1; + goto out; out_unreg: -- cgit v1.1 From 2c51de7f1479784bf4f7d8fb86968fc87c252a3a Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 1 May 2012 08:18:02 +0000 Subject: drop_monitor: prevent init path from scheduling on the wrong cpu commit 4fdcfa12843bca38d0c9deff70c8720e4e8f515f upstream. I just noticed after some recent updates, that the init path for the drop monitor protocol has a minor error. drop monitor maintains a per cpu structure, that gets initalized from a single cpu. Normally this is fine, as the protocol isn't in use yet, but I recently made a change that causes a failed skb allocation to reschedule itself . Given the current code, the implication is that this workqueue reschedule will take place on the wrong cpu. If drop monitor is used early during the boot process, its possible that two cpus will access a single per-cpu structure in parallel, possibly leading to data corruption. This patch fixes the situation, by storing the cpu number that a given instance of this per-cpu data should be accessed from. In the case of a need for a reschedule, the cpu stored in the struct is assigned the rescheule, rather than the currently executing cpu Tested successfully by myself. Signed-off-by: Neil Horman CC: David Miller Signed-off-by: David S. Miller Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- net/core/drop_monitor.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index d75cbfc..e836592 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -49,6 +49,7 @@ struct per_cpu_dm_data { struct sk_buff __rcu *skb; atomic_t dm_hit_count; struct timer_list send_timer; + int cpu; }; struct dm_hw_stat_delta { @@ -73,7 +74,6 @@ static int dm_hit_limit = 64; static int dm_delay = 1; static unsigned long dm_hw_check_delta = 2*HZ; static LIST_HEAD(hw_stats_list); -static int initialized = 0; static void reset_per_cpu_data(struct per_cpu_dm_data *data) { @@ -96,8 +96,8 @@ static void reset_per_cpu_data(struct per_cpu_dm_data *data) sizeof(struct net_dm_alert_msg)); msg = nla_data(nla); memset(msg, 0, al); - } else if (initialized) - schedule_work_on(smp_processor_id(), &data->dm_alert_work); + } else + schedule_work_on(data->cpu, &data->dm_alert_work); /* * Don't need to lock this, since we are guaranteed to only @@ -121,6 +121,8 @@ static void send_dm_alert(struct work_struct *unused) struct sk_buff *skb; struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data); + WARN_ON_ONCE(data->cpu != smp_processor_id()); + /* * Grab the skb we're about to send */ @@ -403,14 +405,14 @@ static int __init init_net_drop_monitor(void) for_each_present_cpu(cpu) { data = &per_cpu(dm_cpu_data, cpu); - reset_per_cpu_data(data); + data->cpu = cpu; INIT_WORK(&data->dm_alert_work, send_dm_alert); init_timer(&data->send_timer); data->send_timer.data = cpu; data->send_timer.function = sched_send_work; + reset_per_cpu_data(data); } - initialized = 1; goto out; -- cgit v1.1 From 81cee4e9e6329bff1adafefed0ff2db85b360a41 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 4 Jun 2012 00:18:19 +0000 Subject: drop_monitor: dont sleep in atomic context commit bec4596b4e6770c7037f21f6bd27567b152dc0d6 upstream. drop_monitor calls several sleeping functions while in atomic context. BUG: sleeping function called from invalid context at mm/slub.c:943 in_atomic(): 1, irqs_disabled(): 0, pid: 2103, name: kworker/0:2 Pid: 2103, comm: kworker/0:2 Not tainted 3.5.0-rc1+ #55 Call Trace: [] __might_sleep+0xca/0xf0 [] kmem_cache_alloc_node+0x1b3/0x1c0 [] ? queue_delayed_work_on+0x11c/0x130 [] __alloc_skb+0x4b/0x230 [] ? reset_per_cpu_data+0x160/0x160 [drop_monitor] [] reset_per_cpu_data+0x2f/0x160 [drop_monitor] [] send_dm_alert+0x4b/0xb0 [drop_monitor] [] process_one_work+0x130/0x4c0 [] worker_thread+0x159/0x360 [] ? manage_workers.isra.27+0x240/0x240 [] kthread+0x93/0xa0 [] kernel_thread_helper+0x4/0x10 [] ? kthread_freezable_should_stop+0x80/0x80 [] ? gs_change+0xb/0xb Rework the logic to call the sleeping functions in right context. Use standard timer/workqueue api to let system chose any cpu to perform the allocation and netlink send. Also avoid a loop if reset_per_cpu_data() cannot allocate memory : use mod_timer() to wait 1/10 second before next try. Signed-off-by: Eric Dumazet Cc: Neil Horman Reviewed-by: Neil Horman Signed-off-by: David S. Miller Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- net/core/drop_monitor.c | 101 ++++++++++++++++-------------------------------- 1 file changed, 33 insertions(+), 68 deletions(-) (limited to 'net') diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index e836592..b856f87 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -33,9 +33,6 @@ #define TRACE_ON 1 #define TRACE_OFF 0 -static void send_dm_alert(struct work_struct *unused); - - /* * Globals, our netlink socket pointer * and the work handle that will send up @@ -45,11 +42,10 @@ static int trace_state = TRACE_OFF; static DEFINE_MUTEX(trace_state_mutex); struct per_cpu_dm_data { - struct work_struct dm_alert_work; - struct sk_buff __rcu *skb; - atomic_t dm_hit_count; - struct timer_list send_timer; - int cpu; + spinlock_t lock; + struct sk_buff *skb; + struct work_struct dm_alert_work; + struct timer_list send_timer; }; struct dm_hw_stat_delta { @@ -75,13 +71,13 @@ static int dm_delay = 1; static unsigned long dm_hw_check_delta = 2*HZ; static LIST_HEAD(hw_stats_list); -static void reset_per_cpu_data(struct per_cpu_dm_data *data) +static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data) { size_t al; struct net_dm_alert_msg *msg; struct nlattr *nla; struct sk_buff *skb; - struct sk_buff *oskb = rcu_dereference_protected(data->skb, 1); + unsigned long flags; al = sizeof(struct net_dm_alert_msg); al += dm_hit_limit * sizeof(struct net_dm_drop_point); @@ -96,65 +92,40 @@ static void reset_per_cpu_data(struct per_cpu_dm_data *data) sizeof(struct net_dm_alert_msg)); msg = nla_data(nla); memset(msg, 0, al); - } else - schedule_work_on(data->cpu, &data->dm_alert_work); - - /* - * Don't need to lock this, since we are guaranteed to only - * run this on a single cpu at a time. - * Note also that we only update data->skb if the old and new skb - * pointers don't match. This ensures that we don't continually call - * synchornize_rcu if we repeatedly fail to alloc a new netlink message. - */ - if (skb != oskb) { - rcu_assign_pointer(data->skb, skb); - - synchronize_rcu(); - - atomic_set(&data->dm_hit_count, dm_hit_limit); + } else { + mod_timer(&data->send_timer, jiffies + HZ / 10); } + spin_lock_irqsave(&data->lock, flags); + swap(data->skb, skb); + spin_unlock_irqrestore(&data->lock, flags); + + return skb; } -static void send_dm_alert(struct work_struct *unused) +static void send_dm_alert(struct work_struct *work) { struct sk_buff *skb; - struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data); + struct per_cpu_dm_data *data; - WARN_ON_ONCE(data->cpu != smp_processor_id()); + data = container_of(work, struct per_cpu_dm_data, dm_alert_work); - /* - * Grab the skb we're about to send - */ - skb = rcu_dereference_protected(data->skb, 1); + skb = reset_per_cpu_data(data); - /* - * Replace it with a new one - */ - reset_per_cpu_data(data); - - /* - * Ship it! - */ if (skb) genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL); - - put_cpu_var(dm_cpu_data); } /* * This is the timer function to delay the sending of an alert * in the event that more drops will arrive during the - * hysteresis period. Note that it operates under the timer interrupt - * so we don't need to disable preemption here + * hysteresis period. */ -static void sched_send_work(unsigned long unused) +static void sched_send_work(unsigned long _data) { - struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data); + struct per_cpu_dm_data *data = (struct per_cpu_dm_data *)_data; - schedule_work_on(smp_processor_id(), &data->dm_alert_work); - - put_cpu_var(dm_cpu_data); + schedule_work(&data->dm_alert_work); } static void trace_drop_common(struct sk_buff *skb, void *location) @@ -164,22 +135,17 @@ static void trace_drop_common(struct sk_buff *skb, void *location) struct nlattr *nla; int i; struct sk_buff *dskb; - struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data); - + struct per_cpu_dm_data *data; + unsigned long flags; - rcu_read_lock(); - dskb = rcu_dereference(data->skb); + local_irq_save(flags); + data = &__get_cpu_var(dm_cpu_data); + spin_lock(&data->lock); + dskb = data->skb; if (!dskb) goto out; - if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) { - /* - * we're already at zero, discard this hit - */ - goto out; - } - nlh = (struct nlmsghdr *)dskb->data; nla = genlmsg_data(nlmsg_data(nlh)); msg = nla_data(nla); @@ -189,7 +155,8 @@ static void trace_drop_common(struct sk_buff *skb, void *location) goto out; } } - + if (msg->entries == dm_hit_limit) + goto out; /* * We need to create a new entry */ @@ -201,13 +168,11 @@ static void trace_drop_common(struct sk_buff *skb, void *location) if (!timer_pending(&data->send_timer)) { data->send_timer.expires = jiffies + dm_delay * HZ; - add_timer_on(&data->send_timer, smp_processor_id()); + add_timer(&data->send_timer); } out: - rcu_read_unlock(); - put_cpu_var(dm_cpu_data); - return; + spin_unlock_irqrestore(&data->lock, flags); } static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location) @@ -405,11 +370,11 @@ static int __init init_net_drop_monitor(void) for_each_present_cpu(cpu) { data = &per_cpu(dm_cpu_data, cpu); - data->cpu = cpu; INIT_WORK(&data->dm_alert_work, send_dm_alert); init_timer(&data->send_timer); - data->send_timer.data = cpu; + data->send_timer.data = (unsigned long)data; data->send_timer.function = sched_send_work; + spin_lock_init(&data->lock); reset_per_cpu_data(data); } -- cgit v1.1 From 7218addc4b8bec641937e8236099f52974cf5687 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 4 Sep 2012 00:03:29 +0000 Subject: xfrm: Workaround incompatibility of ESN and async crypto [ Upstream commit 3b59df46a449ec9975146d71318c4777ad086744 ] ESN for esp is defined in RFC 4303. This RFC assumes that the sequence number counters are always up to date. However, this is not true if an async crypto algorithm is employed. If the sequence number counters are not up to date on sequence number check, we may incorrectly update the upper 32 bit of the sequence number. This leads to a DOS. We workaround this by comparing the upper sequence number, (used for authentication) with the upper sequence number computed after the async processing. We drop the packet if these numbers are different. To do this, we introduce a recheck function that does this check in the ESN case. Signed-off-by: Steffen Klassert Acked-by: Herbert Xu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_input.c | 2 +- net/xfrm/xfrm_replay.c | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 54a0dc2..ab2bb42 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -212,7 +212,7 @@ resume: /* only the first xfrm gets the encap type */ encap_type = 0; - if (async && x->repl->check(x, skb, seq)) { + if (async && x->repl->recheck(x, skb, seq)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); goto drop_unlock; } diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 3235023..379c176 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -437,6 +437,18 @@ err: return -EINVAL; } +static int xfrm_replay_recheck_esn(struct xfrm_state *x, + struct sk_buff *skb, __be32 net_seq) +{ + if (unlikely(XFRM_SKB_CB(skb)->seq.input.hi != + htonl(xfrm_replay_seqhi(x, net_seq)))) { + x->stats.replay_window++; + return -EINVAL; + } + + return xfrm_replay_check_esn(x, skb, net_seq); +} + static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) { unsigned int bitnr, nr, i; @@ -508,6 +520,7 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) static struct xfrm_replay xfrm_replay_legacy = { .advance = xfrm_replay_advance, .check = xfrm_replay_check, + .recheck = xfrm_replay_check, .notify = xfrm_replay_notify, .overflow = xfrm_replay_overflow, }; @@ -515,6 +528,7 @@ static struct xfrm_replay xfrm_replay_legacy = { static struct xfrm_replay xfrm_replay_bmp = { .advance = xfrm_replay_advance_bmp, .check = xfrm_replay_check_bmp, + .recheck = xfrm_replay_check_bmp, .notify = xfrm_replay_notify_bmp, .overflow = xfrm_replay_overflow_bmp, }; @@ -522,6 +536,7 @@ static struct xfrm_replay xfrm_replay_bmp = { static struct xfrm_replay xfrm_replay_esn = { .advance = xfrm_replay_advance_esn, .check = xfrm_replay_check_esn, + .recheck = xfrm_replay_recheck_esn, .notify = xfrm_replay_notify_bmp, .overflow = xfrm_replay_overflow_esn, }; -- cgit v1.1 From 66c41c804c27187c20f1c29aed3216caf69cca4f Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 13 Sep 2012 11:41:26 +0000 Subject: xfrm_user: return error pointer instead of NULL [ Upstream commit 864745d291b5ba80ea0bd0edcbe67273de368836 ] When dump_one_state() returns an error, e.g. because of a too small buffer to dump the whole xfrm state, xfrm_state_netlink() returns NULL instead of an error pointer. But its callers expect an error pointer and therefore continue to operate on a NULL skbuff. This could lead to a privilege escalation (execution of user code in kernel context) if the attacker has CAP_NET_ADMIN and is able to map address 0. Signed-off-by: Mathias Krause Acked-by: Steffen Klassert Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_user.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index c658cb3..dbd2852 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -862,6 +862,7 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, { struct xfrm_dump_info info; struct sk_buff *skb; + int err; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb) @@ -872,9 +873,10 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, info.nlmsg_seq = seq; info.nlmsg_flags = 0; - if (dump_one_state(x, 0, &info)) { + err = dump_one_state(x, 0, &info); + if (err) { kfree_skb(skb); - return NULL; + return ERR_PTR(err); } return skb; -- cgit v1.1 From 182d22d51bc2f57cded9eed61dbbcfb82b87da1c Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Fri, 14 Sep 2012 09:58:32 +0000 Subject: xfrm_user: return error pointer instead of NULL #2 [ Upstream commit c25463722509fef0ed630b271576a8c9a70236f3 ] When dump_one_policy() returns an error, e.g. because of a too small buffer to dump the whole xfrm policy, xfrm_policy_netlink() returns NULL instead of an error pointer. But its caller expects an error pointer and therefore continues to operate on a NULL skbuff. Signed-off-by: Mathias Krause Acked-by: Steffen Klassert Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_user.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index dbd2852..59801ce 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1531,6 +1531,7 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, { struct xfrm_dump_info info; struct sk_buff *skb; + int err; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) @@ -1541,9 +1542,10 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, info.nlmsg_seq = seq; info.nlmsg_flags = 0; - if (dump_one_policy(xp, dir, 0, &info) < 0) { + err = dump_one_policy(xp, dir, 0, &info); + if (err) { kfree_skb(skb); - return NULL; + return ERR_PTR(err); } return skb; -- cgit v1.1 From 72ab84bd1945bb593047564680ea919b8e13beeb Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Mon, 17 Sep 2012 22:40:10 +0000 Subject: xfrm: fix a read lock imbalance in make_blackhole [ Upstream commit 433a19548061bb5457b6ab77ed7ea58ca6e43ddb ] if xfrm_policy_get_afinfo returns 0, it has already released the read lock, xfrm_policy_put_afinfo should not be called again. Signed-off-by: Li RongQing Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 0c0e40e..7c8e0cb 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1759,7 +1759,7 @@ static struct dst_entry *make_blackhole(struct net *net, u16 family, if (!afinfo) { dst_release(dst_orig); - ret = ERR_PTR(-EINVAL); + return ERR_PTR(-EINVAL); } else { ret = afinfo->blackhole_route(net, dst_orig); } -- cgit v1.1 From 2ed1aeaca76644bf96d32fdd491e0d18afdcadbd Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 19 Sep 2012 11:33:38 +0000 Subject: xfrm_user: fix info leak in copy_to_user_auth() [ Upstream commit 4c87308bdea31a7b4828a51f6156e6f721a1fcc9 ] copy_to_user_auth() fails to initialize the remainder of alg_name and therefore discloses up to 54 bytes of heap memory via netlink to userland. Use strncpy() instead of strcpy() to fill the trailing bytes of alg_name with null bytes. Signed-off-by: Mathias Krause Acked-by: Steffen Klassert Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 59801ce..d78ebe2 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -742,7 +742,7 @@ static int copy_to_user_auth(struct xfrm_algo_auth *auth, struct sk_buff *skb) return -EMSGSIZE; algo = nla_data(nla); - strcpy(algo->alg_name, auth->alg_name); + strncpy(algo->alg_name, auth->alg_name, sizeof(algo->alg_name)); memcpy(algo->alg_key, auth->alg_key, (auth->alg_key_len + 7) / 8); algo->alg_key_len = auth->alg_key_len; -- cgit v1.1 From 2f21f42628061faa605c76c53449a325597137a7 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 19 Sep 2012 11:33:39 +0000 Subject: xfrm_user: fix info leak in copy_to_user_state() [ Upstream commit f778a636713a435d3a922c60b1622a91136560c1 ] The memory reserved to dump the xfrm state includes the padding bytes of struct xfrm_usersa_info added by the compiler for alignment (7 for amd64, 3 for i386). Add an explicit memset(0) before filling the buffer to avoid the info leak. Signed-off-by: Mathias Krause Acked-by: Steffen Klassert Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_user.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index d78ebe2..8e878eb 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -689,6 +689,7 @@ out: static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) { + memset(p, 0, sizeof(*p)); memcpy(&p->id, &x->id, sizeof(p->id)); memcpy(&p->sel, &x->sel, sizeof(p->sel)); memcpy(&p->lft, &x->lft, sizeof(p->lft)); -- cgit v1.1 From a601da719c73cedba80c788719594990e30a972f Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 19 Sep 2012 11:33:40 +0000 Subject: xfrm_user: fix info leak in copy_to_user_policy() [ Upstream commit 7b789836f434c87168eab067cfbed1ec4783dffd ] The memory reserved to dump the xfrm policy includes multiple padding bytes added by the compiler for alignment (padding bytes in struct xfrm_selector and struct xfrm_userpolicy_info). Add an explicit memset(0) before filling the buffer to avoid the heap info leak. Signed-off-by: Mathias Krause Acked-by: Steffen Klassert Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_user.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 8e878eb..b5215b4 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1300,6 +1300,7 @@ static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy static void copy_to_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p, int dir) { + memset(p, 0, sizeof(*p)); memcpy(&p->sel, &xp->selector, sizeof(p->sel)); memcpy(&p->lft, &xp->lft, sizeof(p->lft)); memcpy(&p->curlft, &xp->curlft, sizeof(p->curlft)); -- cgit v1.1 From c33fcb85ee97f354c5fbdb841b0be01a9c90f9b5 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 19 Sep 2012 11:33:41 +0000 Subject: xfrm_user: fix info leak in copy_to_user_tmpl() [ Upstream commit 1f86840f897717f86d523a13e99a447e6a5d2fa5 ] The memory used for the template copy is a local stack variable. As struct xfrm_user_tmpl contains multiple holes added by the compiler for alignment, not initializing the memory will lead to leaking stack bytes to userland. Add an explicit memset(0) to avoid the info leak. Initial version of the patch by Brad Spengler. Signed-off-by: Mathias Krause Cc: Brad Spengler Acked-by: Steffen Klassert Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_user.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b5215b4..de4874f 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1405,6 +1405,7 @@ static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb) struct xfrm_user_tmpl *up = &vec[i]; struct xfrm_tmpl *kp = &xp->xfrm_vec[i]; + memset(up, 0, sizeof(*up)); memcpy(&up->id, &kp->id, sizeof(up->id)); up->family = kp->encap_family; memcpy(&up->saddr, &kp->saddr, sizeof(up->saddr)); -- cgit v1.1 From cc4d0d8d729d4195bb22bff0de4139a3050a8c4f Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 19 Sep 2012 11:33:43 +0000 Subject: xfrm_user: don't copy esn replay window twice for new states [ Upstream commit e3ac104d41a97b42316915020ba228c505447d21 ] The ESN replay window was already fully initialized in xfrm_alloc_replay_state_esn(). No need to copy it again. Signed-off-by: Mathias Krause Cc: Steffen Klassert Acked-by: Steffen Klassert Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_user.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index de4874f..7be5d6a 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -442,10 +442,11 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info * * somehow made shareable and move it to xfrm_state.c - JHS * */ -static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs) +static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs, + int update_esn) { struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; - struct nlattr *re = attrs[XFRMA_REPLAY_ESN_VAL]; + struct nlattr *re = update_esn ? attrs[XFRMA_REPLAY_ESN_VAL] : NULL; struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; struct nlattr *et = attrs[XFRMA_ETIMER_THRESH]; struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH]; @@ -555,7 +556,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, goto error; /* override default values from above */ - xfrm_update_ae_params(x, attrs); + xfrm_update_ae_params(x, attrs, 0); return x; @@ -1801,7 +1802,7 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; spin_lock_bh(&x->lock); - xfrm_update_ae_params(x, attrs); + xfrm_update_ae_params(x, attrs, 1); spin_unlock_bh(&x->lock); c.event = nlh->nlmsg_type; -- cgit v1.1 From 3d39c3b09b4ef1fd7febdcf88f6bb9437cf4c141 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 20 Sep 2012 10:01:49 +0000 Subject: xfrm_user: ensure user supplied esn replay window is valid [ Upstream commit ecd7918745234e423dd87fcc0c077da557909720 ] The current code fails to ensure that the netlink message actually contains as many bytes as the header indicates. If a user creates a new state or updates an existing one but does not supply the bytes for the whole ESN replay window, the kernel copies random heap bytes into the replay bitmap, the ones happen to follow the XFRMA_REPLAY_ESN_VAL netlink attribute. This leads to following issues: 1. The replay window has random bits set confusing the replay handling code later on. 2. A malicious user could use this flaw to leak up to ~3.5kB of heap memory when she has access to the XFRM netlink interface (requires CAP_NET_ADMIN). Known users of the ESN replay window are strongSwan and Steffen's iproute2 patch (). The latter uses the interface with a bitmap supplied while the former does not. strongSwan is therefore prone to run into issue 1. To fix both issues without breaking existing userland allow using the XFRMA_REPLAY_ESN_VAL netlink attribute with either an empty bitmap or a fully specified one. For the former case we initialize the in-kernel bitmap with zero, for the latter we copy the user supplied bitmap. For state updates the full bitmap must be supplied. To prevent overflows in the bitmap length calculation the maximum size of bmp_len is limited to 128 by this patch -- resulting in a maximum replay window of 4096 packets. This should be sufficient for all real life scenarios (RFC 4303 recommends a default replay window size of 64). Signed-off-by: Mathias Krause Cc: Steffen Klassert Cc: Martin Willi Cc: Ben Hutchings Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_user.c | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 7be5d6a..05f82e6 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -123,9 +123,21 @@ static inline int verify_replay(struct xfrm_usersa_info *p, struct nlattr **attrs) { struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL]; + struct xfrm_replay_state_esn *rs; - if ((p->flags & XFRM_STATE_ESN) && !rt) - return -EINVAL; + if (p->flags & XFRM_STATE_ESN) { + if (!rt) + return -EINVAL; + + rs = nla_data(rt); + + if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8) + return -EINVAL; + + if (nla_len(rt) < xfrm_replay_state_esn_len(rs) && + nla_len(rt) != sizeof(*rs)) + return -EINVAL; + } if (!rt) return 0; @@ -370,14 +382,15 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es struct nlattr *rp) { struct xfrm_replay_state_esn *up; + int ulen; if (!replay_esn || !rp) return 0; up = nla_data(rp); + ulen = xfrm_replay_state_esn_len(up); - if (xfrm_replay_state_esn_len(replay_esn) != - xfrm_replay_state_esn_len(up)) + if (nla_len(rp) < ulen || xfrm_replay_state_esn_len(replay_esn) != ulen) return -EINVAL; return 0; @@ -388,22 +401,28 @@ static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn struct nlattr *rta) { struct xfrm_replay_state_esn *p, *pp, *up; + int klen, ulen; if (!rta) return 0; up = nla_data(rta); + klen = xfrm_replay_state_esn_len(up); + ulen = nla_len(rta) >= klen ? klen : sizeof(*up); - p = kmemdup(up, xfrm_replay_state_esn_len(up), GFP_KERNEL); + p = kzalloc(klen, GFP_KERNEL); if (!p) return -ENOMEM; - pp = kmemdup(up, xfrm_replay_state_esn_len(up), GFP_KERNEL); + pp = kzalloc(klen, GFP_KERNEL); if (!pp) { kfree(p); return -ENOMEM; } + memcpy(p, up, ulen); + memcpy(pp, up, ulen); + *replay_esn = p; *preplay_esn = pp; -- cgit v1.1 From 829f2161f7057a511df7a41e52c5a43cbf5a49d7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 11 Sep 2012 13:11:12 +0000 Subject: net-sched: sch_cbq: avoid infinite loop [ Upstream commit bdfc87f7d1e253e0a61e2fc6a75ea9d76f7fc03a ] Its possible to setup a bad cbq configuration leading to an infinite loop in cbq_classify() DEV_OUT=eth0 ICMP="match ip protocol 1 0xff" U32="protocol ip u32" DST="match ip dst" tc qdisc add dev $DEV_OUT root handle 1: cbq avpkt 1000 \ bandwidth 100mbit tc class add dev $DEV_OUT parent 1: classid 1:1 cbq \ rate 512kbit allot 1500 prio 5 bounded isolated tc filter add dev $DEV_OUT parent 1: prio 3 $U32 \ $ICMP $DST 192.168.3.234 flowid 1: Reported-by: Denys Fedoryschenko Tested-by: Denys Fedoryschenko Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_cbq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 24d94c0..599f67a 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -250,10 +250,11 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL) cl = defmap[TC_PRIO_BESTEFFORT]; - if (cl == NULL || cl->level >= head->level) + if (cl == NULL) goto fallback; } - + if (cl->level >= head->level) + goto fallback; #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_QUEUED: -- cgit v1.1 From 3f99feef88eb867056bd4459e4cf68da33af8861 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Sat, 15 Sep 2012 00:41:35 +0000 Subject: pkt_sched: fix virtual-start-time update in QFQ [ Upstream commit 71261956973ba9e0637848a5adb4a5819b4bae83 ] If the old timestamps of a class, say cl, are stale when the class becomes active, then QFQ may assign to cl a much higher start time than the maximum value allowed. This may happen when QFQ assigns to the start time of cl the finish time of a group whose classes are characterized by a higher value of the ratio max_class_pkt/weight_of_the_class with respect to that of cl. Inserting a class with a too high start time into the bucket list corrupts the data structure and may eventually lead to crashes. This patch limits the maximum start time assigned to a class. Signed-off-by: Paolo Valente Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_qfq.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 1033434..f86bc72 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -829,7 +829,10 @@ static void qfq_update_start(struct qfq_sched *q, struct qfq_class *cl) if (mask) { struct qfq_group *next = qfq_ffs(q, mask); if (qfq_gt(roundedF, next->F)) { - cl->S = next->F; + if (qfq_gt(limit, next->F)) + cl->S = next->F; + else /* preserve timestamp correctness */ + cl->S = limit; return; } } -- cgit v1.1 From 4ea3465a8c4f9aee60d5aee02715f04423d0da01 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Tue, 2 Oct 2012 06:14:17 +0000 Subject: 8021q: fix mac_len recomputation in vlan_untag() [ Upstream commit 5316cf9a5197eb80b2800e1acadde287924ca975 ] skb_reset_mac_len() relies on the value of the skb->network_header pointer, therefore we must wait for such pointer to be recalculated before computing the new mac_len value. Signed-off-by: Antonio Quartulli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/8021q/vlan_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 27263fb..c177f9e 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -106,7 +106,6 @@ static struct sk_buff *vlan_reorder_header(struct sk_buff *skb) return NULL; memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN); skb->mac_header += VLAN_HLEN; - skb_reset_mac_len(skb); return skb; } @@ -173,6 +172,8 @@ struct sk_buff *vlan_untag(struct sk_buff *skb) skb_reset_network_header(skb); skb_reset_transport_header(skb); + skb_reset_mac_len(skb); + return skb; err_free: -- cgit v1.1 From 61c7891cbfa587d9cdcede0e5441c3900e862df9 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Wed, 19 Sep 2012 19:25:34 +0000 Subject: ipv6: release reference of ip6_null_entry's dst entry in __ip6_del_rt [ Upstream commit 6825a26c2dc21eb4f8df9c06d3786ddec97cf53b ] as we hold dst_entry before we call __ip6_del_rt, so we should alse call dst_release not only return -ENOENT when the rt6_info is ip6_null_entry. and we already hold the dst entry, so I think it's safe to call dst_release out of the write-read lock. Signed-off-by: Gao feng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/route.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7c5b4cb..9172568 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1399,17 +1399,18 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) struct fib6_table *table; struct net *net = dev_net(rt->rt6i_dev); - if (rt == net->ipv6.ip6_null_entry) - return -ENOENT; + if (rt == net->ipv6.ip6_null_entry) { + err = -ENOENT; + goto out; + } table = rt->rt6i_table; write_lock_bh(&table->tb6_lock); - err = fib6_del(rt, info); - dst_release(&rt->dst); - write_unlock_bh(&table->tb6_lock); +out: + dst_release(&rt->dst); return err; } -- cgit v1.1 From 75cb41f8ea4e0fc3b6292eb8d7fcddfeafb4f718 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= Date: Fri, 14 Sep 2012 04:59:52 +0000 Subject: tcp: flush DMA queue before sk_wait_data if rcv_wnd is zero [ Upstream commit 15c041759bfcd9ab0a4e43f1c16e2644977d0467 ] If recv() syscall is called for a TCP socket so that - IOAT DMA is used - MSG_WAITALL flag is used - requested length is bigger than sk_rcvbuf - enough data has already arrived to bring rcv_wnd to zero then when tcp_recvmsg() gets to calling sk_wait_data(), receive window can be still zero while sk_async_wait_queue exhausts enough space to keep it zero. As this queue isn't cleaned until the tcp_service_net_dma() call, sk_wait_data() cannot receive any data and blocks forever. If zero receive window and non-empty sk_async_wait_queue is detected before calling sk_wait_data(), process the queue first. Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e57df66..dd3af6c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1592,8 +1592,14 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } #ifdef CONFIG_NET_DMA - if (tp->ucopy.dma_chan) - dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); + if (tp->ucopy.dma_chan) { + if (tp->rcv_wnd == 0 && + !skb_queue_empty(&sk->sk_async_wait_queue)) { + tcp_service_net_dma(sk, true); + tcp_cleanup_rbuf(sk, copied); + } else + dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); + } #endif if (copied >= target) { /* Do not sleep, just process backlog. */ -- cgit v1.1 From 126268e1d7790725c2bb0e394652c70ced6ee2ea Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 3 Sep 2012 04:27:42 +0000 Subject: sctp: Don't charge for data in sndbuf again when transmitting packet [ Upstream commit 4c3a5bdae293f75cdf729c6c00124e8489af2276 ] SCTP charges wmem_alloc via sctp_set_owner_w() in sctp_sendmsg() and via skb_set_owner_w() in sctp_packet_transmit(). If a sender runs out of sndbuf it will sleep in sctp_wait_for_sndbuf() and expects to be waken up by __sctp_write_space(). Buffer space charged via sctp_set_owner_w() is released in sctp_wfree() which calls __sctp_write_space() directly. Buffer space charged via skb_set_owner_w() is released via sock_wfree() which calls sk->sk_write_space() _if_ SOCK_USE_WRITE_QUEUE is not set. sctp_endpoint_init() sets SOCK_USE_WRITE_QUEUE on all sockets. Therefore if sctp_packet_transmit() manages to queue up more than sndbuf bytes, sctp_wait_for_sndbuf() will never be woken up again unless it is interrupted by a signal. This could be fixed by clearing the SOCK_USE_WRITE_QUEUE flag but ... Charging for the data twice does not make sense in the first place, it leads to overcharging sndbuf by a factor 2. Therefore this patch only charges a single byte in wmem_alloc when transmitting an SCTP packet to ensure that the socket stays alive until the packet has been released. This means that control chunks are no longer accounted for in wmem_alloc which I believe is not a problem as skb->truesize will typically lead to overcharging anyway and thus compensates for any control overhead. Signed-off-by: Thomas Graf CC: Vlad Yasevich CC: Neil Horman CC: David Miller Acked-by: Vlad Yasevich Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/output.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/output.c b/net/sctp/output.c index 8fc4dcd..32ba8d0 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -334,6 +334,25 @@ finish: return retval; } +static void sctp_packet_release_owner(struct sk_buff *skb) +{ + sk_free(skb->sk); +} + +static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk) +{ + skb_orphan(skb); + skb->sk = sk; + skb->destructor = sctp_packet_release_owner; + + /* + * The data chunks have already been accounted for in sctp_sendmsg(), + * therefore only reserve a single byte to keep socket around until + * the packet has been transmitted. + */ + atomic_inc(&sk->sk_wmem_alloc); +} + /* All packets are sent to the network through this function from * sctp_outq_tail(). * @@ -375,7 +394,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) /* Set the owning socket so that we know where to get the * destination IP address. */ - skb_set_owner_w(nskb, sk); + sctp_packet_set_owner_w(nskb, sk); if (!sctp_transport_dst_check(tp)) { sctp_transport_route(tp, NULL, sctp_sk(sk)); -- cgit v1.1 From 74665a9b4fca3420c07f1e583242a477b2eb34b0 Mon Sep 17 00:00:00 2001 From: Chema Gonzalez Date: Fri, 7 Sep 2012 13:40:50 +0000 Subject: net: small bug on rxhash calculation [ Upstream commit 6862234238e84648c305526af2edd98badcad1e0 ] In the current rxhash calculation function, while the sorting of the ports/addrs is coherent (you get the same rxhash for packets sharing the same 4-tuple, in both directions), ports and addrs are sorted independently. This implies packets from a connection between the same addresses but crossed ports hash to the same rxhash. For example, traffic between A=S:l and B=L:s is hashed (in both directions) from {L, S, {s, l}}. The same rxhash is obtained for packets between C=S:s and D=L:l. This patch ensures that you either swap both addrs and ports, or you swap none. Traffic between A and B, and traffic between C and D, get their rxhash from different sources ({L, S, {l, s}} for A<->B, and {L, S, {s, l}} for C<->D) The patch is co-written with Eric Dumazet Signed-off-by: Chema Gonzalez Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index d8bc889..df6ee28 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2559,16 +2559,17 @@ __u32 __skb_get_rxhash(struct sk_buff *skb) poff = proto_ports_offset(ip_proto); if (poff >= 0) { nhoff += ihl * 4 + poff; - if (pskb_may_pull(skb, nhoff + 4)) { + if (pskb_may_pull(skb, nhoff + 4)) ports.v32 = * (__force u32 *) (skb->data + nhoff); - if (ports.v16[1] < ports.v16[0]) - swap(ports.v16[0], ports.v16[1]); - } } /* get a consistent hash (same value on both flow directions) */ - if (addr2 < addr1) + if (addr2 < addr1 || + (addr2 == addr1 && + ports.v16[1] < ports.v16[0])) { swap(addr1, addr2); + swap(ports.v16[0], ports.v16[1]); + } hash = jhash_3words(addr1, addr2, ports.v32, hashrnd); if (!hash) -- cgit v1.1 From 1a6b2c9da08fe3dc1fa825dfefcc70010c088a35 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 24 Sep 2012 07:00:11 +0000 Subject: net: guard tcp_set_keepalive() to tcp sockets [ Upstream commit 3e10986d1d698140747fcfc2761ec9cb64c1d582 ] Its possible to use RAW sockets to get a crash in tcp_set_keepalive() / sk_reset_timer() Fix is to make sure socket is a SOCK_STREAM one. Reported-by: Dave Jones Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/sock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 56623ad..3da11ba 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -594,7 +594,8 @@ set_rcvbuf: case SO_KEEPALIVE: #ifdef CONFIG_INET - if (sk->sk_protocol == IPPROTO_TCP) + if (sk->sk_protocol == IPPROTO_TCP && + sk->sk_type == SOCK_STREAM) tcp_set_keepalive(sk, valbool); #endif sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool); -- cgit v1.1 From a1b995a2f5c69ae3088b153ed5d095561ded6eb4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 22 Sep 2012 00:08:29 +0000 Subject: ipv4: raw: fix icmp_filter() [ Upstream commit ab43ed8b7490cb387782423ecf74aeee7237e591 ] icmp_filter() should not modify its input, or else its caller would need to recompute ip_hdr() if skb->head is reallocated. Use skb_header_pointer() instead of pskb_may_pull() and change the prototype to make clear both sk and skb are const. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/raw.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index c9893d4..3d8bb18 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -130,18 +130,20 @@ found: * 0 - deliver * 1 - block */ -static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb) +static int icmp_filter(const struct sock *sk, const struct sk_buff *skb) { - int type; + struct icmphdr _hdr; + const struct icmphdr *hdr; - if (!pskb_may_pull(skb, sizeof(struct icmphdr))) + hdr = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_hdr), &_hdr); + if (!hdr) return 1; - type = icmp_hdr(skb)->type; - if (type < 32) { + if (hdr->type < 32) { __u32 data = raw_sk(sk)->filter.data; - return ((1 << type) & data) != 0; + return ((1U << hdr->type) & data) != 0; } /* Do not block unknown ICMP types */ -- cgit v1.1 From 27ab68c347da3242fc9f98bea187946e444a5f75 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Sep 2012 07:03:40 +0000 Subject: ipv6: raw: fix icmpv6_filter() [ Upstream commit 1b05c4b50edbddbdde715c4a7350629819f6655e ] icmpv6_filter() should not modify its input, or else its caller would need to recompute ipv6_hdr() if skb->head is reallocated. Use skb_header_pointer() instead of pskb_may_pull() and change the prototype to make clear both sk and skb are const. Also, if icmpv6 header cannot be found, do not deliver the packet, as we do in IPv4. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/raw.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index cc7313b..fb812a6 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -106,21 +106,20 @@ found: * 0 - deliver * 1 - block */ -static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb) +static int icmpv6_filter(const struct sock *sk, const struct sk_buff *skb) { - struct icmp6hdr *icmph; - struct raw6_sock *rp = raw6_sk(sk); - - if (pskb_may_pull(skb, sizeof(struct icmp6hdr))) { - __u32 *data = &rp->filter.data[0]; - int bit_nr; + struct icmp6hdr *_hdr; + const struct icmp6hdr *hdr; - icmph = (struct icmp6hdr *) skb->data; - bit_nr = icmph->icmp6_type; + hdr = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_hdr), &_hdr); + if (hdr) { + const __u32 *data = &raw6_sk(sk)->filter.data[0]; + unsigned int type = hdr->icmp6_type; - return (data[bit_nr >> 5] & (1 << (bit_nr & 31))) != 0; + return (data[type >> 5] & (1U << (type & 31))) != 0; } - return 0; + return 1; } #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) -- cgit v1.1 From 92da074473066d572bc39761a16e44299e104546 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Sep 2012 22:01:28 +0200 Subject: ipv6: mip6: fix mip6_mh_filter() [ Upstream commit 96af69ea2a83d292238bdba20e4508ee967cf8cb ] mip6_mh_filter() should not modify its input, or else its caller would need to recompute ipv6_hdr() if skb->head is reallocated. Use skb_header_pointer() instead of pskb_may_pull() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/mip6.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 43242e6..42853c4 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -84,28 +84,30 @@ static int mip6_mh_len(int type) static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) { - struct ip6_mh *mh; + struct ip6_mh _hdr; + const struct ip6_mh *mh; - if (!pskb_may_pull(skb, (skb_transport_offset(skb)) + 8) || - !pskb_may_pull(skb, (skb_transport_offset(skb) + - ((skb_transport_header(skb)[1] + 1) << 3)))) + mh = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_hdr), &_hdr); + if (!mh) return -1; - mh = (struct ip6_mh *)skb_transport_header(skb); + if (((mh->ip6mh_hdrlen + 1) << 3) > skb->len) + return -1; if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) { LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n", mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type)); - mip6_param_prob(skb, 0, ((&mh->ip6mh_hdrlen) - - skb_network_header(skb))); + mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_hdrlen) + + skb_network_header_len(skb)); return -1; } if (mh->ip6mh_proto != IPPROTO_NONE) { LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n", mh->ip6mh_proto); - mip6_param_prob(skb, 0, ((&mh->ip6mh_proto) - - skb_network_header(skb))); + mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_proto) + + skb_network_header_len(skb)); return -1; } -- cgit v1.1 From 60e6a188d4cb2ef0fb9865cd7b3d4fca7cf7213e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 4 Sep 2012 15:54:55 -0400 Subject: l2tp: fix a typo in l2tp_eth_dev_recv() [ Upstream commit c0cc88a7627c333de50b07b7c60b1d49d9d2e6cc ] While investigating l2tp bug, I hit a bug in eth_type_trans(), because not enough bytes were pulled in skb head. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 3c55f63..2cef50b 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -132,7 +132,7 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, printk("\n"); } - if (!pskb_may_pull(skb, sizeof(ETH_HLEN))) + if (!pskb_may_pull(skb, ETH_HLEN)) goto error; secpath_reset(skb); -- cgit v1.1 From 6a992a944a1c283359959745cb5e1f1dbca40a16 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Tue, 4 Sep 2012 04:13:18 +0000 Subject: netrom: copy_datagram_iovec can fail [ Upstream commit 6cf5c951175abcec4da470c50565cc0afe6cd11d ] Check for an error from this and if so bail properly. Signed-off-by: Alan Cox Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netrom/af_netrom.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 732152f..f156382 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1170,7 +1170,12 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock, msg->msg_flags |= MSG_TRUNC; } - skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + er = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + if (er < 0) { + skb_free_datagram(sk, skb); + release_sock(sk); + return er; + } if (sax != NULL) { sax->sax25_family = AF_NETROM; -- cgit v1.1 From 70875a0484cf2ce1864dab9f453bcc072f4b71c7 Mon Sep 17 00:00:00 2001 From: Ed Cashin Date: Wed, 19 Sep 2012 15:49:00 +0000 Subject: net: do not disable sg for packets requiring no checksum [ Upstream commit c0d680e577ff171e7b37dbdb1b1bf5451e851f04 ] A change in a series of VLAN-related changes appears to have inadvertently disabled the use of the scatter gather feature of network cards for transmission of non-IP ethernet protocols like ATA over Ethernet (AoE). Below is a reference to the commit that introduces a "harmonize_features" function that turns off scatter gather when the NIC does not support hardware checksumming for the ethernet protocol of an sk buff. commit f01a5236bd4b140198fbcc550f085e8361fd73fa Author: Jesse Gross Date: Sun Jan 9 06:23:31 2011 +0000 net offloading: Generalize netif_get_vlan_features(). The can_checksum_protocol function is not equipped to consider a protocol that does not require checksumming. Calling it for a protocol that requires no checksum is inappropriate. The patch below has harmonize_features call can_checksum_protocol when the protocol needs a checksum, so that the network layer is not forced to perform unnecessary skb linearization on the transmission of AoE packets. Unnecessary linearization results in decreased performance and increased memory pressure, as reported here: http://www.spinics.net/lists/linux-mm/msg15184.html The problem has probably not been widely experienced yet, because only recently has the kernel.org-distributed aoe driver acquired the ability to use payloads of over a page in size, with the patchset recently included in the mm tree: https://lkml.org/lkml/2012/8/28/140 The coraid.com-distributed aoe driver already could use payloads of greater than a page in size, but its users generally do not use the newest kernels. Signed-off-by: Ed Cashin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index df6ee28..5b84eaf 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2038,7 +2038,8 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol) static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features) { - if (!can_checksum_protocol(features, protocol)) { + if (skb->ip_summed != CHECKSUM_NONE && + !can_checksum_protocol(features, protocol)) { features &= ~NETIF_F_ALL_CSUM; features &= ~NETIF_F_SG; } else if (illegal_highdma(skb->dev, skb)) { -- cgit v1.1 From ef9fd53c07237e36bd396ffc1784714243869cda Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 12 Sep 2012 16:49:15 -0400 Subject: SUNRPC: Ensure that the TCP socket is closed when in CLOSE_WAIT commit a519fc7a70d1a918574bb826cc6905b87b482eb9 upstream. Instead of doing a shutdown() call, we need to do an actual close(). Ditto if/when the server is sending us junk RPC headers. Signed-off-by: Trond Myklebust Tested-by: Simon Kirby Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtsock.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 554111f..cfd7d15 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1015,6 +1015,16 @@ static void xs_udp_data_ready(struct sock *sk, int len) read_unlock_bh(&sk->sk_callback_lock); } +/* + * Helper function to force a TCP close if the server is sending + * junk and/or it has put us in CLOSE_WAIT + */ +static void xs_tcp_force_close(struct rpc_xprt *xprt) +{ + set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); + xprt_force_disconnect(xprt); +} + static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_reader *desc) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); @@ -1041,7 +1051,7 @@ static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_rea /* Sanity check of the record length */ if (unlikely(transport->tcp_reclen < 8)) { dprintk("RPC: invalid TCP record fragment length\n"); - xprt_force_disconnect(xprt); + xs_tcp_force_close(xprt); return; } dprintk("RPC: reading TCP record fragment of length %d\n", @@ -1122,7 +1132,7 @@ static inline void xs_tcp_read_calldir(struct sock_xprt *transport, break; default: dprintk("RPC: invalid request message type\n"); - xprt_force_disconnect(&transport->xprt); + xs_tcp_force_close(&transport->xprt); } xs_tcp_check_fraghdr(transport); } @@ -1445,6 +1455,8 @@ static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt) static void xs_sock_mark_closed(struct rpc_xprt *xprt) { smp_mb__before_clear_bit(); + clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); + clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); clear_bit(XPRT_CLOSING, &xprt->state); smp_mb__after_clear_bit(); @@ -1502,8 +1514,8 @@ static void xs_tcp_state_change(struct sock *sk) break; case TCP_CLOSE_WAIT: /* The server initiated a shutdown of the socket */ - xprt_force_disconnect(xprt); xprt->connect_cookie++; + xs_tcp_force_close(xprt); case TCP_CLOSING: /* * If the server closed down the connection, make sure that @@ -2146,8 +2158,7 @@ static void xs_tcp_setup_socket(struct work_struct *work) /* We're probably in TIME_WAIT. Get rid of existing socket, * and retry */ - set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); - xprt_force_disconnect(xprt); + xs_tcp_force_close(xprt); break; case -ECONNREFUSED: case -ECONNRESET: -- cgit v1.1 From dd81262194af3606f714c8b39c8eb35da8c78f00 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Tue, 9 Oct 2012 17:48:16 +0000 Subject: pktgen: fix crash when generating IPv6 packets commit 5aa8b572007c4bca1e6d3dd4c4820f1ae49d6bb2 upstream. For IPv6, sizeof(struct ipv6hdr) = 40, thus the following expression will result negative: datalen = pkt_dev->cur_pkt_size - 14 - sizeof(struct ipv6hdr) - sizeof(struct udphdr) - pkt_dev->pkt_overhead; And, the check "if (datalen < sizeof(struct pktgen_hdr))" will be passed as "datalen" is promoted to unsigned, therefore will cause a crash later. This is a quick fix by checking if "datalen" is negative. The following patch will increase the default value of 'min_pkt_size' for IPv6. This bug should exist for a long time, so Cc -stable too. Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/pktgen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index c0e0f76..01890e1 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2932,7 +2932,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, sizeof(struct ipv6hdr) - sizeof(struct udphdr) - pkt_dev->pkt_overhead; - if (datalen < sizeof(struct pktgen_hdr)) { + if (datalen < 0 || datalen < sizeof(struct pktgen_hdr)) { datalen = sizeof(struct pktgen_hdr); if (net_ratelimit()) pr_info("increased datalen to %d\n", datalen); -- cgit v1.1 From e3c418797de13ba083bede8192a7873483feaf1d Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 7 Jul 2012 20:30:11 +0300 Subject: ipvs: fix oops in ip_vs_dst_event on rmmod commit 283283c4da91adc44b03519f434ee1e7e91d6fdb upstream. After commit 39f618b4fd95ae243d940ec64c961009c74e3333 (3.4) "ipvs: reset ipvs pointer in netns" we can oops in ip_vs_dst_event on rmmod ip_vs because ip_vs_control_cleanup is called after the ipvs_core_ops subsys is unregistered and net->ipvs is NULL. Fix it by exiting early from ip_vs_dst_event if ipvs is NULL. It is safe because all services and dests for the net are already freed. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso Acked-by: David Miller Signed-off-by: Greg Kroah-Hartman --- net/netfilter/ipvs/ip_vs_ctl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 9528ea0..d75eb39 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1520,11 +1520,12 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = ptr; struct net *net = dev_net(dev); + struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_service *svc; struct ip_vs_dest *dest; unsigned int idx; - if (event != NETDEV_UNREGISTER) + if (event != NETDEV_UNREGISTER || !ipvs) return NOTIFY_DONE; IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name); EnterFunction(2); @@ -1550,7 +1551,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, } } - list_for_each_entry(dest, &net_ipvs(net)->dest_trash, n_list) { + list_for_each_entry(dest, &ipvs->dest_trash, n_list) { __ip_vs_dev_reset(dest, dev); } mutex_unlock(&__ip_vs_mutex); -- cgit v1.1 From 716362d00bff2312c0afffacbb9d4c9deb3faa96 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 29 Aug 2012 16:25:49 +0000 Subject: netfilter: nf_conntrack: fix racy timer handling with reliable events commit 5b423f6a40a0327f9d40bc8b97ce9be266f74368 upstream. Existing code assumes that del_timer returns true for alive conntrack entries. However, this is not true if reliable events are enabled. In that case, del_timer may return true for entries that were just inserted in the dying list. Note that packets / ctnetlink may hold references to conntrack entries that were just inserted to such list. This patch fixes the issue by adding an independent timer for event delivery. This increases the size of the ecache extension. Still we can revisit this later and use variable size extensions to allocate this area on demand. Tested-by: Oliver Smith Signed-off-by: Pablo Neira Ayuso Acked-by: David Miller Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_conntrack_core.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f7af8b8..dff164e 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -247,12 +247,15 @@ static void death_by_event(unsigned long ul_conntrack) { struct nf_conn *ct = (void *)ul_conntrack; struct net *net = nf_ct_net(ct); + struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct); + + BUG_ON(ecache == NULL); if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) { /* bad luck, let's retry again */ - ct->timeout.expires = jiffies + + ecache->timeout.expires = jiffies + (random32() % net->ct.sysctl_events_retry_timeout); - add_timer(&ct->timeout); + add_timer(&ecache->timeout); return; } /* we've got the event delivered, now it's dying */ @@ -266,6 +269,9 @@ static void death_by_event(unsigned long ul_conntrack) void nf_ct_insert_dying_list(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); + struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct); + + BUG_ON(ecache == NULL); /* add this conntrack to the dying list */ spin_lock_bh(&nf_conntrack_lock); @@ -273,10 +279,10 @@ void nf_ct_insert_dying_list(struct nf_conn *ct) &net->ct.dying); spin_unlock_bh(&nf_conntrack_lock); /* set a new timer to retry event delivery */ - setup_timer(&ct->timeout, death_by_event, (unsigned long)ct); - ct->timeout.expires = jiffies + + setup_timer(&ecache->timeout, death_by_event, (unsigned long)ct); + ecache->timeout.expires = jiffies + (random32() % net->ct.sysctl_events_retry_timeout); - add_timer(&ct->timeout); + add_timer(&ecache->timeout); } EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list); -- cgit v1.1 From 7e3cf6ea62cdaf3c556e43b09883e5d63c94536f Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 3 Apr 2012 22:02:01 +0200 Subject: netfilter: nf_ct_ipv4: packets with wrong ihl are invalid commit 07153c6ec074257ade76a461429b567cff2b3a1e upstream. It was reported that the Linux kernel sometimes logs: klogd: [2629147.402413] kernel BUG at net / netfilter / nf_conntrack_proto_tcp.c: 447! klogd: [1072212.887368] kernel BUG at net / netfilter / nf_conntrack_proto_tcp.c: 392 ipv4_get_l4proto() in nf_conntrack_l3proto_ipv4.c and tcp_error() in nf_conntrack_proto_tcp.c should catch malformed packets, so the errors at the indicated lines - TCP options parsing - should not happen. However, tcp_error() relies on the "dataoff" offset to the TCP header, calculated by ipv4_get_l4proto(). But ipv4_get_l4proto() does not check bogus ihl values in IPv4 packets, which then can slip through tcp_error() and get caught at the TCP options parsing routines. The patch fixes ipv4_get_l4proto() by invalidating packets with bogus ihl value. The patch closes netfilter bugzilla id 771. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso Acked-by: David Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index de9da21..d7d63f4 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -84,6 +84,14 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, *dataoff = nhoff + (iph->ihl << 2); *protonum = iph->protocol; + /* Check bogus IP headers */ + if (*dataoff > skb->len) { + pr_debug("nf_conntrack_ipv4: bogus IPv4 packet: " + "nhoff %u, ihl %u, skblen %u\n", + nhoff, iph->ihl << 2, skb->len); + return -NF_ACCEPT; + } + return NF_ACCEPT; } -- cgit v1.1 From 2b3e2b53d651ffe0b5256cbcb94e5b08b9e5d1cc Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 29 Aug 2012 15:24:09 +0000 Subject: netfilter: nf_nat_sip: fix incorrect handling of EBUSY for RTCP expectation commit 3f509c689a07a4aa989b426893d8491a7ffcc410 upstream. We're hitting bug while trying to reinsert an already existing expectation: kernel BUG at kernel/timer.c:895! invalid opcode: 0000 [#1] SMP [...] Call Trace: [] nf_ct_expect_related_report+0x4a0/0x57a [nf_conntrack] [] ? in4_pton+0x72/0x131 [] ip_nat_sdp_media+0xeb/0x185 [nf_nat_sip] [] set_expected_rtp_rtcp+0x32d/0x39b [nf_conntrack_sip] [] process_sdp+0x30c/0x3ec [nf_conntrack_sip] [] ? irq_exit+0x9a/0x9c [] ? ip_nat_sdp_media+0x185/0x185 [nf_nat_sip] We have to remove the RTP expectation if the RTCP expectation hits EBUSY since we keep trying with other ports until we succeed. Reported-by: Rafal Fitt Acked-by: David Miller Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/ipv4/netfilter/nf_nat_sip.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index e40cf78..b1a5e9d 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -501,7 +501,10 @@ static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff, ret = nf_ct_expect_related(rtcp_exp); if (ret == 0) break; - else if (ret != -EBUSY) { + else if (ret == -EBUSY) { + nf_ct_unexpect_related(rtp_exp); + continue; + } else if (ret < 0) { nf_ct_unexpect_related(rtp_exp); port = 0; break; -- cgit v1.1 From 01f66df0b96ac087c782c1890c6827d99776ac2f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 9 Aug 2012 10:08:47 +0000 Subject: netfilter: nf_nat_sip: fix via header translation with multiple parameters commit f22eb25cf5b1157b29ef88c793b71972efc47143 upstream. Via-headers are parsed beginning at the first character after the Via-address. When the address is translated first and its length decreases, the offset to start parsing at is incorrect and header parameters might be missed. Update the offset after translating the Via-address to fix this. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso Acked-by: David Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/netfilter/nf_nat_sip.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index b1a5e9d..cd6881e 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -148,7 +148,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, hdr, NULL, &matchoff, &matchlen, &addr, &port) > 0) { - unsigned int matchend, poff, plen, buflen, n; + unsigned int olen, matchend, poff, plen, buflen, n; char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; /* We're only interested in headers related to this @@ -163,11 +163,12 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, goto next; } + olen = *datalen; if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, &addr, port)) return NF_DROP; - matchend = matchoff + matchlen; + matchend = matchoff + matchlen + *datalen - olen; /* The maddr= parameter (RFC 2361) specifies where to send * the reply. */ -- cgit v1.1 From de07e511bed085d75bb16e7fb499d4a16f0d4475 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 16 Aug 2012 02:25:24 +0200 Subject: netfilter: nf_ct_expect: fix possible access to uninitialized timer commit 2614f86490122bf51eb7c12ec73927f1900f4e7d upstream. In __nf_ct_expect_check, the function refresh_timer returns 1 if a matching expectation is found and its timer is successfully refreshed. This results in nf_ct_expect_related returning 0. Note that at this point: - the passed expectation is not inserted in the expectation table and its timer was not initialized, since we have refreshed one matching/existing expectation. - nf_ct_expect_alloc uses kmem_cache_alloc, so the expectation timer is in some undefined state just after the allocation, until it is appropriately initialized. This can be a problem for the SIP helper during the expectation addition: ... if (nf_ct_expect_related(rtp_exp) == 0) { if (nf_ct_expect_related(rtcp_exp) != 0) nf_ct_unexpect_related(rtp_exp); ... Note that nf_ct_expect_related(rtp_exp) may return 0 for the timer refresh case that is detailed above. Then, if nf_ct_unexpect_related(rtcp_exp) returns != 0, nf_ct_unexpect_related(rtp_exp) is called, which does: spin_lock_bh(&nf_conntrack_lock); if (del_timer(&exp->timeout)) { nf_ct_unlink_expect(exp); nf_ct_expect_put(exp); } spin_unlock_bh(&nf_conntrack_lock); Note that del_timer always returns false if the timer has been initialized. However, the timer was not initialized since setup_timer was not called, therefore, the expectation timer remains in some undefined state. If I'm not missing anything, this may lead to the removal an unexistent expectation. To fix this, the optimization that allows refreshing an expectation is removed. Now nf_conntrack_expect_related looks more consistent to me since it always add the expectation in case that it returns success. Thanks to Patrick McHardy for participating in the discussion of this patch. I think this may be the source of the problem described by: http://marc.info/?l=netfilter-devel&m=134073514719421&w=2 Reported-by: Rafal Fitt Acked-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso Acked-by: David Miller Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_conntrack_expect.c | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index cd1e8e0..a3dffab 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -364,23 +364,6 @@ static void evict_oldest_expect(struct nf_conn *master, } } -static inline int refresh_timer(struct nf_conntrack_expect *i) -{ - struct nf_conn_help *master_help = nfct_help(i->master); - const struct nf_conntrack_expect_policy *p; - - if (!del_timer(&i->timeout)) - return 0; - - p = &rcu_dereference_protected( - master_help->helper, - lockdep_is_held(&nf_conntrack_lock) - )->expect_policy[i->class]; - i->timeout.expires = jiffies + p->timeout * HZ; - add_timer(&i->timeout); - return 1; -} - static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) { const struct nf_conntrack_expect_policy *p; @@ -388,7 +371,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) struct nf_conn *master = expect->master; struct nf_conn_help *master_help = nfct_help(master); struct net *net = nf_ct_exp_net(expect); - struct hlist_node *n; + struct hlist_node *n, *next; unsigned int h; int ret = 1; @@ -399,12 +382,12 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) goto out; } h = nf_ct_expect_dst_hash(&expect->tuple); - hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) { + hlist_for_each_entry_safe(i, n, next, &net->ct.expect_hash[h], hnode) { if (expect_matches(i, expect)) { - /* Refresh timer: if it's dying, ignore.. */ - if (refresh_timer(i)) { - ret = 0; - goto out; + if (del_timer(&i->timeout)) { + nf_ct_unlink_expect(i); + nf_ct_expect_put(i); + break; } } else if (expect_clash(i, expect)) { ret = -EBUSY; -- cgit v1.1 From fb3c4ac3ad18c262fed504ab6f666edbff304e63 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 7 May 2012 10:51:43 +0000 Subject: netfilter: limit, hashlimit: avoid duplicated inline commit 7a909ac70f6b0823d9f23a43f19598d4b57ac901 upstream. credit_cap can be set to credit, which avoids inlining user2credits twice. Also, remove inline keyword and let compiler decide. old: 684 192 0 876 36c net/netfilter/xt_limit.o 4927 344 32 5303 14b7 net/netfilter/xt_hashlimit.o now: 668 192 0 860 35c net/netfilter/xt_limit.o 4793 344 32 5169 1431 net/netfilter/xt_hashlimit.o Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Acked-by: David Miller Signed-off-by: Greg Kroah-Hartman --- net/netfilter/xt_hashlimit.c | 8 +++----- net/netfilter/xt_limit.c | 5 ++--- 2 files changed, 5 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 9228ee0..6092b0c 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -392,8 +392,7 @@ static void htable_put(struct xt_hashlimit_htable *hinfo) #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ) /* Precision saver. */ -static inline u_int32_t -user2credits(u_int32_t user) +static u32 user2credits(u32 user) { /* If multiplying would overflow... */ if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY)) @@ -403,7 +402,7 @@ user2credits(u_int32_t user) return (user * HZ * CREDITS_PER_JIFFY) / XT_HASHLIMIT_SCALE; } -static inline void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now) +static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now) { dh->rateinfo.credit += (now - dh->rateinfo.prev) * CREDITS_PER_JIFFY; if (dh->rateinfo.credit > dh->rateinfo.credit_cap) @@ -534,8 +533,7 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) dh->rateinfo.prev = jiffies; dh->rateinfo.credit = user2credits(hinfo->cfg.avg * hinfo->cfg.burst); - dh->rateinfo.credit_cap = user2credits(hinfo->cfg.avg * - hinfo->cfg.burst); + dh->rateinfo.credit_cap = dh->rateinfo.credit; dh->rateinfo.cost = user2credits(hinfo->cfg.avg); } else { /* update expiration timeout */ diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index 32b7a57..5c22ce8 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -88,8 +88,7 @@ limit_mt(const struct sk_buff *skb, struct xt_action_param *par) } /* Precision saver. */ -static u_int32_t -user2credits(u_int32_t user) +static u32 user2credits(u32 user) { /* If multiplying would overflow... */ if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY)) @@ -123,7 +122,7 @@ static int limit_mt_check(const struct xt_mtchk_param *par) 128. */ priv->prev = jiffies; priv->credit = user2credits(r->avg * r->burst); /* Credits full. */ - r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */ + r->credit_cap = priv->credit; /* Credits full. */ r->cost = user2credits(r->avg); } return 0; -- cgit v1.1 From dfd5603c251a87fbfcaef5d492656ae9011c9f7a Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Fri, 21 Sep 2012 22:26:52 +0000 Subject: netfilter: xt_limit: have r->cost != 0 case work commit 82e6bfe2fbc4d48852114c4f979137cd5bf1d1a8 upstream. Commit v2.6.19-rc1~1272^2~41 tells us that r->cost != 0 can happen when a running state is saved to userspace and then reinstated from there. Make sure that private xt_limit area is initialized with correct values. Otherwise, random matchings due to use of uninitialized memory. Signed-off-by: Jan Engelhardt Signed-off-by: Pablo Neira Ayuso Acked-by: David Miller Signed-off-by: Greg Kroah-Hartman --- net/netfilter/xt_limit.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index 5c22ce8..a4c1e45 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -117,11 +117,11 @@ static int limit_mt_check(const struct xt_mtchk_param *par) /* For SMP, we only want to use one set of state. */ r->master = priv; + /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies * + 128. */ + priv->prev = jiffies; + priv->credit = user2credits(r->avg * r->burst); /* Credits full. */ if (r->cost == 0) { - /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies * - 128. */ - priv->prev = jiffies; - priv->credit = user2credits(r->avg * r->burst); /* Credits full. */ r->credit_cap = priv->credit; /* Credits full. */ r->cost = user2credits(r->avg); } -- cgit v1.1 From 7a104fcedf491fe1470dab36605f1b1d55ad893d Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Tue, 17 Jul 2012 00:01:26 +0200 Subject: SUNRPC: Prevent kernel stack corruption on long values of flush commit 212ba90696ab4884e2025b0b13726d67aadc2cd4 upstream. The buffer size in read_flush() is too small for the longest possible values for it. This can lead to a kernel stack corruption: [ 43.047329] Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: ffffffff833e64b4 [ 43.047329] [ 43.049030] Pid: 6015, comm: trinity-child18 Tainted: G W 3.5.0-rc7-next-20120716-sasha #221 [ 43.050038] Call Trace: [ 43.050435] [] panic+0xcd/0x1f4 [ 43.050931] [] ? read_flush.isra.7+0xe4/0x100 [ 43.051602] [] __stack_chk_fail+0x16/0x20 [ 43.052206] [] read_flush.isra.7+0xe4/0x100 [ 43.052951] [] ? read_flush_pipefs+0x30/0x30 [ 43.053594] [] read_flush_procfs+0x2c/0x30 [ 43.053596] [] proc_reg_read+0x9c/0xd0 [ 43.053596] [] ? proc_reg_write+0xd0/0xd0 [ 43.053596] [] do_loop_readv_writev+0x4b/0x90 [ 43.053596] [] do_readv_writev+0xf6/0x1d0 [ 43.053596] [] vfs_readv+0x3e/0x60 [ 43.053596] [] sys_readv+0x48/0xb0 [ 43.053596] [] system_call_fastpath+0x1a/0x1f Signed-off-by: Sasha Levin Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/cache.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 4530a91..237a2ee 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1404,11 +1404,11 @@ static ssize_t read_flush(struct file *file, char __user *buf, size_t count, loff_t *ppos, struct cache_detail *cd) { - char tbuf[20]; + char tbuf[22]; unsigned long p = *ppos; size_t len; - sprintf(tbuf, "%lu\n", convert_to_wallclock(cd->flush_time)); + snprintf(tbuf, sizeof(tbuf), "%lu\n", convert_to_wallclock(cd->flush_time)); len = strlen(tbuf); if (p >= len) return 0; -- cgit v1.1 From 5891cb7c82658d26ca323639553b94b7272ebb68 Mon Sep 17 00:00:00 2001 From: "ramesh.nagappa@gmail.com" Date: Fri, 5 Oct 2012 19:10:15 +0000 Subject: net: Fix skb_under_panic oops in neigh_resolve_output [ Upstream commit e1f165032c8bade3a6bdf546f8faf61fda4dd01c ] The retry loop in neigh_resolve_output() and neigh_connected_output() call dev_hard_header() with out reseting the skb to network_header. This causes the retry to fail with skb_under_panic. The fix is to reset the network_header within the retry loop. Signed-off-by: Ramesh Nagappa Reviewed-by: Shawn Lu Reviewed-by: Robert Coulson Reviewed-by: Billie Alsup Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/neighbour.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 96bb0a3..eb8857a 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1313,8 +1313,6 @@ int neigh_resolve_output(struct sk_buff *skb) if (!dst) goto discard; - __skb_pull(skb, skb_network_offset(skb)); - if (!neigh_event_send(neigh, skb)) { int err; struct net_device *dev = neigh->dev; @@ -1326,6 +1324,7 @@ int neigh_resolve_output(struct sk_buff *skb) neigh_hh_init(neigh, dst, dst->ops->protocol); do { + __skb_pull(skb, skb_network_offset(skb)); seq = read_seqbegin(&neigh->ha_lock); err = dev_hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len); @@ -1358,9 +1357,8 @@ int neigh_connected_output(struct sk_buff *skb) struct net_device *dev = neigh->dev; unsigned int seq; - __skb_pull(skb, skb_network_offset(skb)); - do { + __skb_pull(skb, skb_network_offset(skb)); seq = read_seqbegin(&neigh->ha_lock); err = dev_hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len); -- cgit v1.1 From 445b290bd3c3470a220049648150eefaec64937a Mon Sep 17 00:00:00 2001 From: "jeff.liu" Date: Mon, 8 Oct 2012 18:57:27 +0000 Subject: RDS: fix rds-ping spinlock recursion [ Upstream commit 5175a5e76bbdf20a614fb47ce7a38f0f39e70226 ] This is the revised patch for fixing rds-ping spinlock recursion according to Venkat's suggestions. RDS ping/pong over TCP feature has been broken for years(2.6.39 to 3.6.0) since we have to set TCP cork and call kernel_sendmsg() between ping/pong which both need to lock "struct sock *sk". However, this lock has already been hold before rds_tcp_data_ready() callback is triggerred. As a result, we always facing spinlock resursion which would resulting in system panic. Given that RDS ping is only used to test the connectivity and not for serious performance measurements, we can queue the pong transmit to rds_wq as a delayed response. Reported-by: Dan Carpenter CC: Venkat Venkatsubra CC: David S. Miller CC: James Morris Signed-off-by: Jie Liu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rds/send.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/rds/send.c b/net/rds/send.c index c803341..f6bdfb0 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1121,7 +1121,7 @@ rds_send_pong(struct rds_connection *conn, __be16 dport) rds_stats_inc(s_send_pong); if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags)) - rds_send_xmit(conn); + queue_delayed_work(rds_wq, &conn->c_send_w, 0); rds_message_put(rm); return 0; -- cgit v1.1 From 4d306c27d6032f5b666e187a8d02bdd288025031 Mon Sep 17 00:00:00 2001 From: Alexey Kuznetsov Date: Fri, 12 Oct 2012 04:34:17 +0000 Subject: tcp: resets are misrouted [ Upstream commit 4c67525849e0b7f4bd4fab2487ec9e43ea52ef29 ] After commit e2446eaa ("tcp_v4_send_reset: binding oif to iif in no sock case").. tcp resets are always lost, when routing is asymmetric. Yes, backing out that patch will result in misrouting of resets for dead connections which used interface binding when were alive, but we actually cannot do anything here. What's died that's died and correct handling normal unbound connections is obviously a priority. Comment to comment: > This has few benefits: > 1. tcp_v6_send_reset already did that. It was done to route resets for IPv6 link local addresses. It was a mistake to do so for global addresses. The patch fixes this as well. Actually, the problem appears to be even more serious than guaranteed loss of resets. As reported by Sergey Soloviev , those misrouted resets create a lot of arp traffic and huge amount of unresolved arp entires putting down to knees NAT firewalls which use asymmetric routing. Signed-off-by: Alexey Kuznetsov Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_ipv4.c | 7 ++++--- net/ipv6/tcp_ipv6.c | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 53a5af6..d645c6f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -651,10 +651,11 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) arg.csumoffset = offsetof(struct tcphdr, check) / 2; arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; /* When socket is gone, all binding information is lost. - * routing might fail in this case. using iif for oif to - * make sure we can deliver it + * routing might fail in this case. No choice here, if we choose to force + * input interface, we will misroute in case of asymmetric route. */ - arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb); + if (sk) + arg.bound_dev_if = sk->sk_bound_dev_if; net = dev_net(skb_dst(skb)->dev); ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 848f963..a6d5850 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1060,7 +1060,8 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); fl6.flowi6_proto = IPPROTO_TCP; - fl6.flowi6_oif = inet6_iif(skb); + if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) + fl6.flowi6_oif = inet6_iif(skb); fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); -- cgit v1.1 From 5b34d96acadef305c862006ce5d079dfb6526ee8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 22 Oct 2012 17:14:36 -0400 Subject: SUNRPC: Get rid of the xs_error_report socket callback commit f878b657ce8e7d3673afe48110ec208a29e38c4a upstream. Chris Perl reports that we're seeing races between the wakeup call in xs_error_report and the connect attempts. Basically, Chris has shown that in certain circumstances, the call to xs_error_report causes the rpc_task that is responsible for reconnecting to wake up early, thus triggering a disconnect and retry. Since the sk->sk_error_report() calls in the socket layer are always followed by a tcp_done() in the cases where we care about waking up the rpc_tasks, just let the state_change callbacks take responsibility for those wake ups. Reported-by: Chris Perl Signed-off-by: Trond Myklebust Tested-by: Chris Perl Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtsock.c | 25 ------------------------- 1 file changed, 25 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index cfd7d15..fad08c8 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -243,7 +243,6 @@ struct sock_xprt { void (*old_data_ready)(struct sock *, int); void (*old_state_change)(struct sock *); void (*old_write_space)(struct sock *); - void (*old_error_report)(struct sock *); }; /* @@ -768,7 +767,6 @@ static void xs_save_old_callbacks(struct sock_xprt *transport, struct sock *sk) transport->old_data_ready = sk->sk_data_ready; transport->old_state_change = sk->sk_state_change; transport->old_write_space = sk->sk_write_space; - transport->old_error_report = sk->sk_error_report; } static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *sk) @@ -776,7 +774,6 @@ static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *s sk->sk_data_ready = transport->old_data_ready; sk->sk_state_change = transport->old_state_change; sk->sk_write_space = transport->old_write_space; - sk->sk_error_report = transport->old_error_report; } static void xs_reset_transport(struct sock_xprt *transport) @@ -1539,25 +1536,6 @@ static void xs_tcp_state_change(struct sock *sk) read_unlock_bh(&sk->sk_callback_lock); } -/** - * xs_error_report - callback mainly for catching socket errors - * @sk: socket - */ -static void xs_error_report(struct sock *sk) -{ - struct rpc_xprt *xprt; - - read_lock_bh(&sk->sk_callback_lock); - if (!(xprt = xprt_from_sock(sk))) - goto out; - dprintk("RPC: %s client %p...\n" - "RPC: error %d\n", - __func__, xprt, sk->sk_err); - xprt_wake_pending_tasks(xprt, -EAGAIN); -out: - read_unlock_bh(&sk->sk_callback_lock); -} - static void xs_write_space(struct sock *sk) { struct socket *sock; @@ -1857,7 +1835,6 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, sk->sk_user_data = xprt; sk->sk_data_ready = xs_local_data_ready; sk->sk_write_space = xs_udp_write_space; - sk->sk_error_report = xs_error_report; sk->sk_allocation = GFP_ATOMIC; xprt_clear_connected(xprt); @@ -1946,7 +1923,6 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) sk->sk_user_data = xprt; sk->sk_data_ready = xs_udp_data_ready; sk->sk_write_space = xs_udp_write_space; - sk->sk_error_report = xs_error_report; sk->sk_no_check = UDP_CSUM_NORCV; sk->sk_allocation = GFP_ATOMIC; @@ -2062,7 +2038,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) sk->sk_data_ready = xs_tcp_data_ready; sk->sk_state_change = xs_tcp_state_change; sk->sk_write_space = xs_tcp_write_space; - sk->sk_error_report = xs_error_report; sk->sk_allocation = GFP_ATOMIC; /* socket options */ -- cgit v1.1 From 5acfec95af2aa4a9982534557e3ad035e3e5ea86 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Oct 2012 11:35:47 -0400 Subject: SUNRPC: Clear the connect flag when socket state is TCP_CLOSE_WAIT commit d0bea455dd48da1ecbd04fedf00eb89437455fdc upstream. This is needed to ensure that we call xprt_connect() upon the next call to call_connect(). Signed-off-by: Trond Myklebust Tested-by: Chris Perl Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtsock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index fad08c8..6defa77 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1512,6 +1512,7 @@ static void xs_tcp_state_change(struct sock *sk) case TCP_CLOSE_WAIT: /* The server initiated a shutdown of the socket */ xprt->connect_cookie++; + clear_bit(XPRT_CONNECTED, &xprt->state); xs_tcp_force_close(xprt); case TCP_CLOSING: /* -- cgit v1.1 From 16b7109680b1a0c8b3ea6271645c5853f0ffaa3f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Oct 2012 11:40:02 -0400 Subject: Revert "SUNRPC: Ensure we close the socket on EPIPE errors too..." commit b9d2bb2ee537424a7f855e1f93eed44eb9ee0854 upstream. This reverts commit 55420c24a0d4d1fce70ca713f84aa00b6b74a70e. Now that we clear the connected flag when entering TCP_CLOSE_WAIT, the deadlock described in this commit is no longer possible. Instead, the resulting call to xs_tcp_shutdown() can interfere with pending reconnection attempts. Reported-by: Chris Perl Signed-off-by: Trond Myklebust Tested-by: Chris Perl Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 6defa77..2b17766 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -725,10 +725,10 @@ static int xs_tcp_send_request(struct rpc_task *task) dprintk("RPC: sendmsg returned unrecognized error %d\n", -status); case -ECONNRESET: - case -EPIPE: xs_tcp_shutdown(xprt); case -ECONNREFUSED: case -ENOTCONN: + case -EPIPE: clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); } -- cgit v1.1 From 910e425b29da7b601a99f49e81da666ab8ed5920 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Oct 2012 17:50:07 -0400 Subject: SUNRPC: Prevent races in xs_abort_connection() commit 4bc1e68ed6a8b59be8a79eb719be515a55c7bc68 upstream. The call to xprt_disconnect_done() that is triggered by a successful connection reset will trigger another automatic wakeup of all tasks on the xprt->pending rpc_wait_queue. In particular it will cause an early wake up of the task that called xprt_connect(). All we really want to do here is clear all the socket-specific state flags, so we split that functionality out of xs_sock_mark_closed() into a helper that can be called by xs_abort_connection() Reported-by: Chris Perl Signed-off-by: Trond Myklebust Tested-by: Chris Perl Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtsock.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 2b17766..2202a14 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1449,7 +1449,7 @@ static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt) xprt_clear_connecting(xprt); } -static void xs_sock_mark_closed(struct rpc_xprt *xprt) +static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt) { smp_mb__before_clear_bit(); clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); @@ -1457,6 +1457,11 @@ static void xs_sock_mark_closed(struct rpc_xprt *xprt) clear_bit(XPRT_CLOSE_WAIT, &xprt->state); clear_bit(XPRT_CLOSING, &xprt->state); smp_mb__after_clear_bit(); +} + +static void xs_sock_mark_closed(struct rpc_xprt *xprt) +{ + xs_sock_reset_connection_flags(xprt); /* Mark transport as closed and wake up all pending tasks */ xprt_disconnect_done(xprt); } @@ -1991,10 +1996,8 @@ static void xs_abort_connection(struct sock_xprt *transport) any.sa_family = AF_UNSPEC; result = kernel_connect(transport->sock, &any, sizeof(any), 0); if (!result) - xs_sock_mark_closed(&transport->xprt); - else - dprintk("RPC: AF_UNSPEC connect return code %d\n", - result); + xs_sock_reset_connection_flags(&transport->xprt); + dprintk("RPC: AF_UNSPEC connect return code %d\n", result); } static void xs_tcp_reuse_connection(struct sock_xprt *transport) -- cgit v1.1 From 0dab9d11a7900ebd2be763831e686c1da4789166 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Tue, 2 Oct 2012 21:34:23 +0200 Subject: mac80211: check if key has TKIP type before updating IV commit 4045f72bcf3c293c7c5932ef001742d8bb5ded76 upstream. This patch fix corruption which can manifest itself by following crash when switching on rfkill switch with rt2x00 driver: https://bugzilla.redhat.com/attachment.cgi?id=615362 Pointer key->u.ccmp.tfm of group key get corrupted in: ieee80211_rx_h_michael_mic_verify(): /* update IV in key information to be able to detect replays */ rx->key->u.tkip.rx[rx->security_idx].iv32 = rx->tkip_iv32; rx->key->u.tkip.rx[rx->security_idx].iv16 = rx->tkip_iv16; because rt2x00 always set RX_FLAG_MMIC_STRIPPED, even if key is not TKIP. We already check type of the key in different path in ieee80211_rx_h_michael_mic_verify() function, so adding additional check here is reasonable. Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- net/mac80211/wpa.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index aa1c40a..d9e03cf 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -109,7 +109,8 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) if (status->flag & RX_FLAG_MMIC_ERROR) goto mic_fail; - if (!(status->flag & RX_FLAG_IV_STRIPPED) && rx->key) + if (!(status->flag & RX_FLAG_IV_STRIPPED) && rx->key && + rx->key->conf.cipher == WLAN_CIPHER_SUITE_TKIP) goto update_iv; return RX_CONTINUE; -- cgit v1.1 From 58bca02682d3df9975ccabae2196f6aefcfeda3d Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 17 Oct 2012 13:56:19 +0200 Subject: cfg80211: fix antenna gain handling commit c4a9fafc77a5318f5ed26c509bbcddf03e18c201 upstream. No driver initializes chan->max_antenna_gain to something sensible, and the only place where it is being used right now is inside ath9k. This leads to ath9k potentially using less tx power than it can use, which can decrease performance/range in some rare cases. Rather than going through every single driver, this patch initializes chan->orig_mag in wiphy_register(), ignoring whatever value the driver left in there. If a driver for some reason wishes to limit it independent from regulatory rulesets, it can do so internally. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index 498c760..12c80cc 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -544,8 +544,7 @@ int wiphy_register(struct wiphy *wiphy) for (i = 0; i < sband->n_channels; i++) { sband->channels[i].orig_flags = sband->channels[i].flags; - sband->channels[i].orig_mag = - sband->channels[i].max_antenna_gain; + sband->channels[i].orig_mag = INT_MAX; sband->channels[i].orig_mpwr = sband->channels[i].max_power; sband->channels[i].band = band; -- cgit v1.1 From 4bdd5ed8d955d712e77bbabb42e9a3c4c50a47df Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 25 Oct 2012 21:51:59 +0200 Subject: wireless: drop invalid mesh address extension frames commit 7dd111e8ee10cc6816669eabcad3334447673236 upstream. The mesh header can have address extension by a 4th or a 5th and 6th address, but never both. Drop such frames in 802.11 -> 802.3 conversion along with any frames that have the wrong extension. Reviewed-by: Javier Cardona Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/util.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/wireless/util.c b/net/wireless/util.c index 18e22be..a0004a4 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -298,18 +298,15 @@ EXPORT_SYMBOL(ieee80211_get_hdrlen_from_skb); static int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr) { int ae = meshhdr->flags & MESH_FLAGS_AE; - /* 7.1.3.5a.2 */ + /* 802.11-2012, 8.2.4.7.3 */ switch (ae) { + default: case 0: return 6; case MESH_FLAGS_AE_A4: return 12; case MESH_FLAGS_AE_A5_A6: return 18; - case (MESH_FLAGS_AE_A4 | MESH_FLAGS_AE_A5_A6): - return 24; - default: - return 6; } } @@ -359,6 +356,8 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, /* make sure meshdr->flags is on the linear part */ if (!pskb_may_pull(skb, hdrlen + 1)) return -1; + if (meshdr->flags & MESH_FLAGS_AE_A4) + return -1; if (meshdr->flags & MESH_FLAGS_AE_A5_A6) { skb_copy_bits(skb, hdrlen + offsetof(struct ieee80211s_hdr, eaddr1), @@ -383,6 +382,8 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, /* make sure meshdr->flags is on the linear part */ if (!pskb_may_pull(skb, hdrlen + 1)) return -1; + if (meshdr->flags & MESH_FLAGS_AE_A5_A6) + return -1; if (meshdr->flags & MESH_FLAGS_AE_A4) skb_copy_bits(skb, hdrlen + offsetof(struct ieee80211s_hdr, eaddr1), -- cgit v1.1 From 6f8acfdc74e6b117623f84de77c4822656f95c53 Mon Sep 17 00:00:00 2001 From: Javier Cardona Date: Thu, 25 Oct 2012 11:10:18 -0700 Subject: mac80211: don't inspect Sequence Control field on control frames commit f7fbf70ee9db6da6033ae50d100e017ac1f26555 upstream. Per IEEE Std. 802.11-2012, Sec 8.2.4.4.1, the sequence Control field is not present in control frames. We noticed this problem when processing Block Ack Requests. Signed-off-by: Javier Cardona Signed-off-by: Javier Lopez Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/rx.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 667f559..10437a1 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1352,6 +1352,10 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) hdr = (struct ieee80211_hdr *)rx->skb->data; fc = hdr->frame_control; + + if (ieee80211_is_ctl(fc)) + return RX_CONTINUE; + sc = le16_to_cpu(hdr->seq_ctrl); frag = sc & IEEE80211_SCTL_FRAG; -- cgit v1.1 From 03938ad82dcc45ecc5695ed14f7869b06b233b8d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 26 Oct 2012 00:33:36 +0200 Subject: mac80211: check management frame header length commit 4a4f1a5808c8bb0b72a4f6e5904c53fb8c9cd966 upstream. Due to pskb_may_pull() checking the skb length, all non-management frames are checked on input whether their 802.11 header is fully present. Also add that check for management frames and remove a check that is now duplicate. This prevents accessing skb data beyond the frame end. Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/rx.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 10437a1..785b6e9 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1360,7 +1360,6 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) frag = sc & IEEE80211_SCTL_FRAG; if (likely((!ieee80211_has_morefrags(fc) && frag == 0) || - (rx->skb)->len < 24 || is_multicast_ether_addr(hdr->addr1))) { /* not fragmented */ goto out; @@ -2772,10 +2771,15 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, test_bit(SCAN_SW_SCANNING, &local->scanning))) status->rx_flags |= IEEE80211_RX_IN_SCAN; - if (ieee80211_is_mgmt(fc)) - err = skb_linearize(skb); - else + if (ieee80211_is_mgmt(fc)) { + /* drop frame if too short for header */ + if (skb->len < ieee80211_hdrlen(fc)) + err = -ENOBUFS; + else + err = skb_linearize(skb); + } else { err = !pskb_may_pull(skb, ieee80211_hdrlen(fc)); + } if (err) { dev_kfree_skb(skb); -- cgit v1.1 From 9c750c9809ddeb36dffc376ad26e7864d4c2a259 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Fri, 26 Oct 2012 18:54:25 +0200 Subject: mac80211: fix SSID copy on IBSS JOIN commit badecb001a310408d3473b1fc2ed5aefd0bc92a9 upstream. The 'ssid' field of the cfg80211_ibss_params is a u8 pointer and its length is likely to be less than IEEE80211_MAX_SSID_LEN most of the time. This patch fixes the ssid copy in ieee80211_ibss_join() by using the SSID length to prevent it from reading beyond the string. Signed-off-by: Antonio Quartulli [rewrapped commit message, small rewording] Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/ibss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 56c24ca..8adcc9e 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -940,7 +940,7 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, sdata->u.ibss.state = IEEE80211_IBSS_MLME_SEARCH; sdata->u.ibss.ibss_join_req = jiffies; - memcpy(sdata->u.ibss.ssid, params->ssid, IEEE80211_MAX_SSID_LEN); + memcpy(sdata->u.ibss.ssid, params->ssid, params->ssid_len); sdata->u.ibss.ssid_len = params->ssid_len; mutex_unlock(&sdata->u.ibss.mtx); -- cgit v1.1 From b23270416da409bd4e637a5acbe31a1126235fb6 Mon Sep 17 00:00:00 2001 From: Zijie Pan Date: Mon, 15 Oct 2012 03:56:39 +0000 Subject: sctp: fix call to SCTP_CMD_PROCESS_SACK in sctp_cmd_interpreter() [ Upstream commit f6e80abeab928b7c47cc1fbf53df13b4398a2bec ] Bug introduced by commit edfee0339e681a784ebacec7e8c2dc97dc6d2839 (sctp: check src addr when processing SACK to update transport state) Signed-off-by: Zijie Pan Signed-off-by: Nicolas Dichtel Acked-by: Vlad Yasevich Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/sm_sideeffect.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 6e0f882..edac24a 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1604,8 +1604,9 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, asoc->outqueue.outstanding_bytes; sackh.num_gap_ack_blocks = 0; sackh.num_dup_tsns = 0; + chunk->subh.sack_hdr = &sackh; sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_SACK, - SCTP_SACKH(&sackh)); + SCTP_CHUNK(chunk)); break; case SCTP_CMD_DISCARD_PACKET: -- cgit v1.1 From fdc00abab6858f9da6a40e03c831c252b4f419bd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Oct 2012 03:21:55 +0000 Subject: netlink: use kfree_rcu() in netlink_release() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6d772ac5578f711d1ce7b03535d1c95bffb21dff ] On some suspend/resume operations involving wimax device, we have noticed some intermittent memory corruptions in netlink code. Stéphane Marchesin tracked this corruption in netlink_update_listeners() and suggested a patch. It appears netlink_release() should use kfree_rcu() instead of kfree() for the listeners structure as it may be used by other cpus using RCU protection. netlink_release() must set to NULL the listeners pointer when it is about to be freed. Also have to protect netlink_update_listeners() and netlink_has_listeners() if listeners is NULL. Add a nl_deref_protected() lockdep helper to properly document which locks protects us. Reported-by: Jonathan Kliegman Signed-off-by: Eric Dumazet Cc: Stéphane Marchesin Cc: Sam Leffler Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/af_netlink.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 16a94a3..d29c222 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -137,6 +137,8 @@ static void netlink_destroy_callback(struct netlink_callback *cb); static DEFINE_RWLOCK(nl_table_lock); static atomic_t nl_table_users = ATOMIC_INIT(0); +#define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock)); + static ATOMIC_NOTIFIER_HEAD(netlink_chain); static u32 netlink_group_mask(u32 group) @@ -331,6 +333,11 @@ netlink_update_listeners(struct sock *sk) struct hlist_node *node; unsigned long mask; unsigned int i; + struct listeners *listeners; + + listeners = nl_deref_protected(tbl->listeners); + if (!listeners) + return; for (i = 0; i < NLGRPLONGS(tbl->groups); i++) { mask = 0; @@ -338,7 +345,7 @@ netlink_update_listeners(struct sock *sk) if (i < NLGRPLONGS(nlk_sk(sk)->ngroups)) mask |= nlk_sk(sk)->groups[i]; } - tbl->listeners->masks[i] = mask; + listeners->masks[i] = mask; } /* this function is only called with the netlink table "grabbed", which * makes sure updates are visible before bind or setsockopt return. */ @@ -519,7 +526,11 @@ static int netlink_release(struct socket *sock) if (netlink_is_kernel(sk)) { BUG_ON(nl_table[sk->sk_protocol].registered == 0); if (--nl_table[sk->sk_protocol].registered == 0) { - kfree(nl_table[sk->sk_protocol].listeners); + struct listeners *old; + + old = nl_deref_protected(nl_table[sk->sk_protocol].listeners); + RCU_INIT_POINTER(nl_table[sk->sk_protocol].listeners, NULL); + kfree_rcu(old, rcu); nl_table[sk->sk_protocol].module = NULL; nl_table[sk->sk_protocol].registered = 0; } @@ -950,7 +961,7 @@ int netlink_has_listeners(struct sock *sk, unsigned int group) rcu_read_lock(); listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners); - if (group - 1 < nl_table[sk->sk_protocol].groups) + if (listeners && group - 1 < nl_table[sk->sk_protocol].groups) res = test_bit(group - 1, listeners->masks); rcu_read_unlock(); @@ -1585,7 +1596,7 @@ int __netlink_change_ngroups(struct sock *sk, unsigned int groups) new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC); if (!new) return -ENOMEM; - old = rcu_dereference_raw(tbl->listeners); + old = nl_deref_protected(tbl->listeners); memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups)); rcu_assign_pointer(tbl->listeners, new); -- cgit v1.1 From 3297d60d4ebc83a4dc6c44abad18d181c6680b9e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Oct 2012 09:14:12 +0000 Subject: tcp: fix FIONREAD/SIOCINQ [ Upstream commit a3374c42aa5f7237e87ff3b0622018636b0c847e ] tcp_ioctl() tries to take into account if tcp socket received a FIN to report correct number bytes in receive queue. But its flaky because if the application ate the last skb, we return 1 instead of 0. Correct way to detect that FIN was received is to test SOCK_DONE. Reported-by: Elliot Hughes Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Tom Herbert Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index dd3af6c..2c423b6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -481,14 +481,12 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) !tp->urg_data || before(tp->urg_seq, tp->copied_seq) || !before(tp->urg_seq, tp->rcv_nxt)) { - struct sk_buff *skb; answ = tp->rcv_nxt - tp->copied_seq; - /* Subtract 1, if FIN is in queue. */ - skb = skb_peek_tail(&sk->sk_receive_queue); - if (answ && skb) - answ -= tcp_hdr(skb)->fin; + /* Subtract 1, if FIN was received */ + if (answ && sock_flag(sk, SOCK_DONE)) + answ--; } else answ = tp->urg_seq - tp->copied_seq; release_sock(sk); -- cgit v1.1 From db138ef294594d3721dae6409d9e13d6ebf3df39 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Wed, 24 Oct 2012 14:01:18 +0800 Subject: ipv6: Set default hoplimit as zero. [ Upstream commit 14edd87dc67311556f1254a8f29cf4dd6cb5b7d1 ] Commit a02e4b7dae4551(Demark default hoplimit as zero) only changes the hoplimit checking condition and default value in ip6_dst_hoplimit, not zeros all hoplimit default value. Keep the zeroing ip6_template_metrics[RTAX_HOPLIMIT - 1] to force it as const, cause as a37e6e344910(net: force dst_default_metrics to const section) Signed-off-by: Li RongQing Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/route.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9172568..da056c8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -171,7 +171,7 @@ static struct dst_ops ip6_dst_blackhole_ops = { }; static const u32 ip6_template_metrics[RTAX_MAX] = { - [RTAX_HOPLIMIT - 1] = 255, + [RTAX_HOPLIMIT - 1] = 0, }; static struct rt6_info ip6_null_entry_template = { @@ -1068,7 +1068,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, rt->rt6i_idev = idev; dst_set_neighbour(&rt->dst, neigh); atomic_set(&rt->dst.__refcnt, 1); - dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255); + dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0); rt->dst.output = ip6_output; spin_lock_bh(&icmp6_dst_lock); -- cgit v1.1 From 798c7dba9071df9cdd935da16066e1a17580d9ee Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 31 Oct 2012 02:45:32 +0000 Subject: net: fix divide by zero in tcp algorithm illinois [ Upstream commit 8f363b77ee4fbf7c3bbcf5ec2c5ca482d396d664 ] Reading TCP stats when using TCP Illinois congestion control algorithm can cause a divide by zero kernel oops. The division by zero occur in tcp_illinois_info() at: do_div(t, ca->cnt_rtt); where ca->cnt_rtt can become zero (when rtt_reset is called) Steps to Reproduce: 1. Register tcp_illinois: # sysctl -w net.ipv4.tcp_congestion_control=illinois 2. Monitor internal TCP information via command "ss -i" # watch -d ss -i 3. Establish new TCP conn to machine Either it fails at the initial conn, or else it needs to wait for a loss or a reset. This is only related to reading stats. The function avg_delay() also performs the same divide, but is guarded with a (ca->cnt_rtt > 0) at its calling point in update_params(). Thus, simply fix tcp_illinois_info(). Function tcp_illinois_info() / get_info() is called without socket lock. Thus, eliminate any race condition on ca->cnt_rtt by using a local stack variable. Simply reuse info.tcpv_rttcnt, as its already set to ca->cnt_rtt. Function avg_delay() is not affected by this race condition, as its called with the socket lock. Cc: Petr Matousek Signed-off-by: Jesper Dangaard Brouer Acked-by: Eric Dumazet Acked-by: Stephen Hemminger Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_illinois.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 813b43a..834857f 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -313,11 +313,13 @@ static void tcp_illinois_info(struct sock *sk, u32 ext, .tcpv_rttcnt = ca->cnt_rtt, .tcpv_minrtt = ca->base_rtt, }; - u64 t = ca->sum_rtt; - do_div(t, ca->cnt_rtt); - info.tcpv_rtt = t; + if (info.tcpv_rttcnt > 0) { + u64 t = ca->sum_rtt; + do_div(t, info.tcpv_rttcnt); + info.tcpv_rtt = t; + } nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); } } -- cgit v1.1 From 5a3e425d67aab6b6f0b3bd3fb372b8044b8e981b Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Mon, 29 Oct 2012 23:41:48 +0000 Subject: l2tp: fix oops in l2tp_eth_create() error path [ Upstream commit 789336360e0a2aeb9750c16ab704a02cbe035e9e ] When creating an L2TPv3 Ethernet session, if register_netdev() should fail for any reason (for example, automatic naming for "l2tpeth%d" interfaces hits the 32k-interface limit), the netdev is freed in the error path. However, the l2tp_eth_sess structure's dev pointer is left uncleared, and this results in l2tp_eth_delete() then attempting to unregister the same netdev later in the session teardown. This results in an oops. To avoid this, clear the session dev pointer in the error path. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_eth.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 2cef50b..64164fb 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -269,6 +269,7 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p out_del_dev: free_netdev(dev); + spriv->dev = NULL; out_del_session: l2tp_session_delete(session); out: -- cgit v1.1 From 2e570a4eeaf48f9a8302a0f71ff96ff9d0b723f6 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Tue, 6 Nov 2012 16:18:41 +0000 Subject: ipv6: send unsolicited neighbour advertisements to all-nodes [ Upstream commit 60713a0ca7fd6651b951cc1b4dbd528d1fc0281b ] As documented in RFC4861 (Neighbor Discovery for IP version 6) 7.2.6., unsolicited neighbour advertisements should be sent to the all-nodes multicast address. Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ndisc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 10a8d41..31ba78c 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -615,7 +615,7 @@ static void ndisc_send_unsol_na(struct net_device *dev) { struct inet6_dev *idev; struct inet6_ifaddr *ifa; - struct in6_addr mcaddr; + struct in6_addr mcaddr = IN6ADDR_LINKLOCAL_ALLNODES_INIT; idev = in6_dev_get(dev); if (!idev) @@ -623,7 +623,6 @@ static void ndisc_send_unsol_na(struct net_device *dev) read_lock_bh(&idev->lock); list_for_each_entry(ifa, &idev->addr_list, if_list) { - addrconf_addr_solict_mult(&ifa->addr, &mcaddr); ndisc_send_na(dev, NULL, &mcaddr, &ifa->addr, /*router=*/ !!idev->cnf.forwarding, /*solicited=*/ false, /*override=*/ true, -- cgit v1.1