From 98bdb0aa007ff7e8e0061936d8d0e210faf2e655 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 25 Jan 2011 08:17:48 -0800
Subject: libceph: fix socket read error handling

If we get EAGAIN when trying to read from the socket, it is not an error.
Return 0 from the helper in this case to simplify the error handling cases
in the caller (indirectly, try_read).

Fix try_read to pass any error to it's caller (con_work) instead of almost
always returning 0.  This let's us respond to things like socket
disconnects.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/messenger.c | 39 +++++++++++++++++++--------------------
 1 file changed, 19 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index dff633d..d95576d 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -252,8 +252,12 @@ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
 {
 	struct kvec iov = {buf, len};
 	struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
+	int r;
 
-	return kernel_recvmsg(sock, &msg, &iov, 1, len, msg.msg_flags);
+	r = kernel_recvmsg(sock, &msg, &iov, 1, len, msg.msg_flags);
+	if (r == -EAGAIN)
+		r = 0;
+	return r;
 }
 
 /*
@@ -1821,19 +1825,17 @@ more:
 			dout("try_read connecting\n");
 			ret = read_partial_banner(con);
 			if (ret <= 0)
-				goto done;
-			if (process_banner(con) < 0) {
-				ret = -1;
 				goto out;
-			}
+			ret = process_banner(con);
+			if (ret < 0)
+				goto out;
 		}
 		ret = read_partial_connect(con);
 		if (ret <= 0)
-			goto done;
-		if (process_connect(con) < 0) {
-			ret = -1;
 			goto out;
-		}
+		ret = process_connect(con);
+		if (ret < 0)
+			goto out;
 		goto more;
 	}
 
@@ -1848,7 +1850,7 @@ more:
 		dout("skipping %d / %d bytes\n", skip, -con->in_base_pos);
 		ret = ceph_tcp_recvmsg(con->sock, buf, skip);
 		if (ret <= 0)
-			goto done;
+			goto out;
 		con->in_base_pos += ret;
 		if (con->in_base_pos)
 			goto more;
@@ -1859,7 +1861,7 @@ more:
 		 */
 		ret = ceph_tcp_recvmsg(con->sock, &con->in_tag, 1);
 		if (ret <= 0)
-			goto done;
+			goto out;
 		dout("try_read got tag %d\n", (int)con->in_tag);
 		switch (con->in_tag) {
 		case CEPH_MSGR_TAG_MSG:
@@ -1870,7 +1872,7 @@ more:
 			break;
 		case CEPH_MSGR_TAG_CLOSE:
 			set_bit(CLOSED, &con->state);   /* fixme */
-			goto done;
+			goto out;
 		default:
 			goto bad_tag;
 		}
@@ -1882,13 +1884,12 @@ more:
 			case -EBADMSG:
 				con->error_msg = "bad crc";
 				ret = -EIO;
-				goto out;
+				break;
 			case -EIO:
 				con->error_msg = "io error";
-				goto out;
-			default:
-				goto done;
+				break;
 			}
+			goto out;
 		}
 		if (con->in_tag == CEPH_MSGR_TAG_READY)
 			goto more;
@@ -1898,15 +1899,13 @@ more:
 	if (con->in_tag == CEPH_MSGR_TAG_ACK) {
 		ret = read_partial_ack(con);
 		if (ret <= 0)
-			goto done;
+			goto out;
 		process_ack(con);
 		goto more;
 	}
 
-done:
-	ret = 0;
 out:
-	dout("try_read done on %p\n", con);
+	dout("try_read done on %p ret %d\n", con, ret);
 	return ret;
 
 bad_tag:
-- 
cgit v1.1


From 42961d2333a1855c649fa3790e258ab4f0fa66a4 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 25 Jan 2011 08:19:34 -0800
Subject: libceph: fix socket write error handling

Pass errors from writing to the socket up the stack.  If we get -EAGAIN,
return 0 from the helper to simplify the callers' checks.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/messenger.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index d95576d..35b36b8 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -268,13 +268,17 @@ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov,
 		     size_t kvlen, size_t len, int more)
 {
 	struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
+	int r;
 
 	if (more)
 		msg.msg_flags |= MSG_MORE;
 	else
 		msg.msg_flags |= MSG_EOR;  /* superfluous, but what the hell */
 
-	return kernel_sendmsg(sock, &msg, iov, kvlen, len);
+	r = kernel_sendmsg(sock, &msg, iov, kvlen, len);
+	if (r == -EAGAIN)
+		r = 0;
+	return r;
 }
 
 
@@ -851,6 +855,8 @@ static int write_partial_msg_pages(struct ceph_connection *con)
 		    (msg->pages || msg->pagelist || msg->bio || in_trail))
 			kunmap(page);
 
+		if (ret == -EAGAIN)
+			ret = 0;
 		if (ret <= 0)
 			goto out;
 
@@ -1741,16 +1747,12 @@ more_kvec:
 	if (con->out_skip) {
 		ret = write_partial_skip(con);
 		if (ret <= 0)
-			goto done;
-		if (ret < 0) {
-			dout("try_write write_partial_skip err %d\n", ret);
-			goto done;
-		}
+			goto out;
 	}
 	if (con->out_kvec_left) {
 		ret = write_partial_kvec(con);
 		if (ret <= 0)
-			goto done;
+			goto out;
 	}
 
 	/* msg pages? */
@@ -1765,11 +1767,11 @@ more_kvec:
 		if (ret == 1)
 			goto more_kvec;  /* we need to send the footer, too! */
 		if (ret == 0)
-			goto done;
+			goto out;
 		if (ret < 0) {
 			dout("try_write write_partial_msg_pages err %d\n",
 			     ret);
-			goto done;
+			goto out;
 		}
 	}
 
@@ -1793,10 +1795,9 @@ do_next:
 	/* Nothing to do! */
 	clear_bit(WRITE_PENDING, &con->state);
 	dout("try_write nothing else to write.\n");
-done:
 	ret = 0;
 out:
-	dout("try_write done on %p\n", con);
+	dout("try_write done on %p ret %d\n", con, ret);
 	return ret;
 }
 
-- 
cgit v1.1


From 5f04d5068a90602b93a7953e9a47c496705c6976 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 20 Feb 2011 11:49:45 -0800
Subject: net: Fix more stale on-stack list_head objects.

From: Eric W. Biederman <ebiederm@xmission.com>

In the beginning with batching unreg_list was a list that was used only
once in the lifetime of a network device (I think).  Now we have calls
using the unreg_list that can happen multiple times in the life of a
network device like dev_deactivate and dev_close that are also using the
unreg_list.  In addition in unregister_netdevice_queue we also do a
list_move because for devices like veth pairs it is possible that
unregister_netdevice_queue will be called multiple times.

So I think the change below to fix dev_deactivate which Eric D. missed
will fix this problem.  Now to go test that.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/iface.c    | 1 +
 net/sched/sch_generic.c | 1 +
 2 files changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 8acba45..7a10a8d 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1229,6 +1229,7 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local)
 	}
 	mutex_unlock(&local->iflist_mtx);
 	unregister_netdevice_many(&unreg_list);
+	list_del(&unreg_list);
 }
 
 static u32 ieee80211_idle_off(struct ieee80211_local *local,
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 34dc598..1bc6980 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -839,6 +839,7 @@ void dev_deactivate(struct net_device *dev)
 
 	list_add(&dev->unreg_list, &single);
 	dev_deactivate_many(&single);
+	list_del(&single);
 }
 
 static void dev_init_scheduler_queue(struct net_device *dev,
-- 
cgit v1.1


From c24f691b56107feeba076616982093ee2d3c8fb5 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Mon, 7 Feb 2011 12:57:04 +0000
Subject: tcp: undo_retrans counter fixes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix a bug that undo_retrans is incorrectly decremented when undo_marker is
not set or undo_retrans is already 0. This happens when sender receives
more DSACK ACKs than packets retransmitted during the current
undo phase. This may also happen when sender receives DSACK after
the undo operation is completed or cancelled.

Fix another bug that undo_retrans is incorrectly incremented when
sender retransmits an skb and tcp_skb_pcount(skb) > 1 (TSO). This case
is rare but not impossible.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c  | 5 +++--
 net/ipv4/tcp_output.c | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index eb7f82e..65f6c04 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1222,7 +1222,7 @@ static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb,
 	}
 
 	/* D-SACK for already forgotten data... Do dumb counting. */
-	if (dup_sack &&
+	if (dup_sack && tp->undo_marker && tp->undo_retrans &&
 	    !after(end_seq_0, prior_snd_una) &&
 	    after(end_seq_0, tp->undo_marker))
 		tp->undo_retrans--;
@@ -1299,7 +1299,8 @@ static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
 
 	/* Account D-SACK for retransmitted packet. */
 	if (dup_sack && (sacked & TCPCB_RETRANS)) {
-		if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
+		if (tp->undo_marker && tp->undo_retrans &&
+		    after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
 			tp->undo_retrans--;
 		if (sacked & TCPCB_SACKED_ACKED)
 			state->reord = min(fack_count, state->reord);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 406f320..dfa5beb 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2162,7 +2162,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 		if (!tp->retrans_stamp)
 			tp->retrans_stamp = TCP_SKB_CB(skb)->when;
 
-		tp->undo_retrans++;
+		tp->undo_retrans += tcp_skb_pcount(skb);
 
 		/* snd_nxt is stored to detect loss of retransmitted segment,
 		 * see tcp_input.c tcp_sacktag_write_queue().
-- 
cgit v1.1


From 4f919a3bc54da01db829c520ce4b1fabfde1c3f7 Mon Sep 17 00:00:00 2001
From: Daniel J Blueman <daniel.blueman@gmail.com>
Date: Tue, 22 Feb 2011 00:11:06 +0800
Subject: fix cfg80211_wext_siwfreq lock ordering...

I previously managed to reproduce a hang while scanning wireless
channels (reproducible with airodump-ng hopping channels); subsequent
lockdep instrumentation revealed a lock ordering issue.

Without knowing the design intent, it looks like the locks should be
taken in reverse order; please comment.

=======================================================
[ INFO: possible circular locking dependency detected ]
2.6.38-rc5-341cd #4
-------------------------------------------------------
airodump-ng/15445 is trying to acquire lock:
 (&rdev->devlist_mtx){+.+.+.}, at: [<ffffffff816b1266>]
cfg80211_wext_siwfreq+0xc6/0x100

but task is already holding lock:
 (&wdev->mtx){+.+.+.}, at: [<ffffffff816b125c>] cfg80211_wext_siwfreq+0xbc/0x100

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #1 (&wdev->mtx){+.+.+.}:
       [<ffffffff810a79d6>] lock_acquire+0xc6/0x280
       [<ffffffff816d6bce>] mutex_lock_nested+0x6e/0x4b0
       [<ffffffff81696080>] cfg80211_netdev_notifier_call+0x430/0x5f0
       [<ffffffff8109351b>] notifier_call_chain+0x8b/0x100
       [<ffffffff810935b1>] raw_notifier_call_chain+0x11/0x20
       [<ffffffff81576d92>] call_netdevice_notifiers+0x32/0x60
       [<ffffffff815771a4>] __dev_notify_flags+0x34/0x80
       [<ffffffff81577230>] dev_change_flags+0x40/0x70
       [<ffffffff8158587c>] do_setlink+0x1fc/0x8d0
       [<ffffffff81586042>] rtnl_setlink+0xf2/0x140
       [<ffffffff81586923>] rtnetlink_rcv_msg+0x163/0x270
       [<ffffffff8159d741>] netlink_rcv_skb+0xa1/0xd0
       [<ffffffff815867b0>] rtnetlink_rcv+0x20/0x30
       [<ffffffff8159d39a>] netlink_unicast+0x2ba/0x300
       [<ffffffff8159dd57>] netlink_sendmsg+0x267/0x3e0
       [<ffffffff8155e364>] sock_sendmsg+0xe4/0x110
       [<ffffffff8155f3a3>] sys_sendmsg+0x253/0x3b0
       [<ffffffff81003192>] system_call_fastpath+0x16/0x1b

-> #0 (&rdev->devlist_mtx){+.+.+.}:
       [<ffffffff810a7222>] __lock_acquire+0x1622/0x1d10
       [<ffffffff810a79d6>] lock_acquire+0xc6/0x280
       [<ffffffff816d6bce>] mutex_lock_nested+0x6e/0x4b0
       [<ffffffff816b1266>] cfg80211_wext_siwfreq+0xc6/0x100
       [<ffffffff816b2fad>] ioctl_standard_call+0x5d/0xd0
       [<ffffffff816b3223>] T.808+0x163/0x170
       [<ffffffff816b326a>] wext_handle_ioctl+0x3a/0x90
       [<ffffffff815798d2>] dev_ioctl+0x6f2/0x830
       [<ffffffff8155cf3d>] sock_ioctl+0xfd/0x290
       [<ffffffff8117dffd>] do_vfs_ioctl+0x9d/0x590
       [<ffffffff8117e53a>] sys_ioctl+0x4a/0x80
       [<ffffffff81003192>] system_call_fastpath+0x16/0x1b

other info that might help us debug this:

2 locks held by airodump-ng/15445:
 #0:  (rtnl_mutex){+.+.+.}, at: [<ffffffff81586782>] rtnl_lock+0x12/0x20
 #1:  (&wdev->mtx){+.+.+.}, at: [<ffffffff816b125c>]
cfg80211_wext_siwfreq+0xbc/0x100

stack backtrace:
Pid: 15445, comm: airodump-ng Not tainted 2.6.38-rc5-341cd #4
Call Trace:
 [<ffffffff810a3f0a>] ? print_circular_bug+0xfa/0x100
 [<ffffffff810a7222>] ? __lock_acquire+0x1622/0x1d10
 [<ffffffff810a1f99>] ? trace_hardirqs_off_caller+0x29/0xc0
 [<ffffffff810a79d6>] ? lock_acquire+0xc6/0x280
 [<ffffffff816b1266>] ? cfg80211_wext_siwfreq+0xc6/0x100
 [<ffffffff810a31d7>] ? mark_held_locks+0x67/0x90
 [<ffffffff816d6bce>] ? mutex_lock_nested+0x6e/0x4b0
 [<ffffffff816b1266>] ? cfg80211_wext_siwfreq+0xc6/0x100
 [<ffffffff810a31d7>] ? mark_held_locks+0x67/0x90
 [<ffffffff816b1266>] ? cfg80211_wext_siwfreq+0xc6/0x100
 [<ffffffff816b1266>] ? cfg80211_wext_siwfreq+0xc6/0x100
 [<ffffffff816b2fad>] ? ioctl_standard_call+0x5d/0xd0
 [<ffffffff8157818b>] ? __dev_get_by_name+0x9b/0xc0
 [<ffffffff816b2f50>] ? ioctl_standard_call+0x0/0xd0
 [<ffffffff816b3223>] ? T.808+0x163/0x170
 [<ffffffff8112ddf2>] ? might_fault+0x72/0xd0
 [<ffffffff816b326a>] ? wext_handle_ioctl+0x3a/0x90
 [<ffffffff8112de3b>] ? might_fault+0xbb/0xd0
 [<ffffffff815798d2>] ? dev_ioctl+0x6f2/0x830
 [<ffffffff810a1bae>] ? put_lock_stats+0xe/0x40
 [<ffffffff810a1c8c>] ? lock_release_holdtime+0xac/0x150
 [<ffffffff8155cf3d>] ? sock_ioctl+0xfd/0x290
 [<ffffffff8117dffd>] ? do_vfs_ioctl+0x9d/0x590
 [<ffffffff8116c8ff>] ? fget_light+0x1df/0x3c0
 [<ffffffff8117e53a>] ? sys_ioctl+0x4a/0x80
 [<ffffffff81003192>] ? system_call_fastpath+0x16/0x1b

Signed-off-by: Daniel J Blueman <daniel.blueman@gmail.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/wext-compat.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 3e5dbd4..d112f03 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -802,11 +802,11 @@ int cfg80211_wext_siwfreq(struct net_device *dev,
 			return freq;
 		if (freq == 0)
 			return -EINVAL;
-		wdev_lock(wdev);
 		mutex_lock(&rdev->devlist_mtx);
+		wdev_lock(wdev);
 		err = cfg80211_set_freq(rdev, wdev, freq, NL80211_CHAN_NO_HT);
-		mutex_unlock(&rdev->devlist_mtx);
 		wdev_unlock(wdev);
+		mutex_unlock(&rdev->devlist_mtx);
 		return err;
 	default:
 		return -EOPNOTSUPP;
-- 
cgit v1.1


From 9cc6e0c4c457f84bedcfb04e7dd58a36909c4ef7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@web.de>
Date: Tue, 15 Feb 2011 13:19:17 +0000
Subject: bridge: Fix IPv6 multicast snooping by storing correct protocol type
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The protocol type for IPv6 entries in the hash table for multicast
bridge snooping is falsely set to ETH_P_IP, marking it as an IPv4
address, instead of setting it to ETH_P_IPV6, which results in negative
look-ups in the hash table later.

Signed-off-by: Linus Lüssing <linus.luessing@web.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 09d5c098..17708fc 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -784,7 +784,7 @@ static int br_ip6_multicast_add_group(struct net_bridge *br,
 		return 0;
 
 	ipv6_addr_copy(&br_group.u.ip6, group);
-	br_group.proto = htons(ETH_P_IP);
+	br_group.proto = htons(ETH_P_IPV6);
 
 	return br_multicast_add_group(br, port, &br_group);
 }
-- 
cgit v1.1


From 649e984d00416cb1a254fdbebd6d3f9fa01c32fa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@web.de>
Date: Tue, 15 Feb 2011 13:19:18 +0000
Subject: bridge: Fix IPv6 multicast snooping by correcting offset in MLDv2
 report
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We actually want a pointer to the grec_nsrcr and not the following
field. Otherwise we can get very high values for *nsrcs as the first two
bytes of the IPv6 multicast address are being used instead, leading to
a failing pskb_may_pull() which results in MLDv2 reports not being
parsed.

Signed-off-by: Linus Lüssing <linus.luessing@web.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 17708fc..d69beaf 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1013,7 +1013,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
 
 		nsrcs = skb_header_pointer(skb,
 					   len + offsetof(struct mld2_grec,
-							  grec_mca),
+							  grec_nsrcs),
 					   sizeof(_nsrcs), &_nsrcs);
 		if (!nsrcs)
 			return -EINVAL;
-- 
cgit v1.1


From d41db9f3f71548f07b8b6d81a88220d0035b04f6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@web.de>
Date: Tue, 15 Feb 2011 13:19:19 +0000
Subject: bridge: Add missing ntohs()s for MLDv2 report parsing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The nsrcs number is 2 Byte wide, therefore we need to call ntohs()
before using it.

Signed-off-by: Linus Lüssing <linus.luessing@web.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index d69beaf..9ce2af1 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1020,11 +1020,12 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
 
 		if (!pskb_may_pull(skb,
 				   len + sizeof(*grec) +
-				   sizeof(struct in6_addr) * (*nsrcs)))
+				   sizeof(struct in6_addr) * ntohs(*nsrcs)))
 			return -EINVAL;
 
 		grec = (struct mld2_grec *)(skb->data + len);
-		len += sizeof(*grec) + sizeof(struct in6_addr) * (*nsrcs);
+		len += sizeof(*grec) +
+		       sizeof(struct in6_addr) * ntohs(*nsrcs);
 
 		/* We treat these as MLDv1 reports for now. */
 		switch (grec->grec_type) {
-- 
cgit v1.1


From e4de9f9e8333fbbae951c6e068f501f955123cf0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@web.de>
Date: Tue, 15 Feb 2011 13:19:21 +0000
Subject: bridge: Allow mcast snooping for transient link local addresses too
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently the multicast bridge snooping support is not active for
link local multicast. I assume this has been done to leave
important multicast data untouched, like IPv6 Neighborhood Discovery.

In larger, bridged, local networks it could however be desirable to
optimize for instance local multicast audio/video streaming too.

With the transient flag in IPv6 multicast addresses we have an easy
way to optimize such multimedia traffic without tempering with the
high priority multicast data from well-known addresses.

This patch alters the multicast bridge snooping for IPv6, to take
effect for transient multicast addresses instead of non-link-local
addresses.

Signed-off-by: Linus Lüssing <linus.luessing@web.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 9ce2af1..1207a5a 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -37,10 +37,9 @@
 	rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static inline int ipv6_is_local_multicast(const struct in6_addr *addr)
+static inline int ipv6_is_transient_multicast(const struct in6_addr *addr)
 {
-	if (ipv6_addr_is_multicast(addr) &&
-	    IPV6_ADDR_MC_SCOPE(addr) <= IPV6_ADDR_SCOPE_LINKLOCAL)
+	if (ipv6_addr_is_multicast(addr) && IPV6_ADDR_MC_FLAG_TRANSIENT(addr))
 		return 1;
 	return 0;
 }
@@ -780,7 +779,7 @@ static int br_ip6_multicast_add_group(struct net_bridge *br,
 {
 	struct br_ip br_group;
 
-	if (ipv6_is_local_multicast(group))
+	if (!ipv6_is_transient_multicast(group))
 		return 0;
 
 	ipv6_addr_copy(&br_group.u.ip6, group);
@@ -1341,7 +1340,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
 {
 	struct br_ip br_group;
 
-	if (ipv6_is_local_multicast(group))
+	if (!ipv6_is_transient_multicast(group))
 		return;
 
 	ipv6_addr_copy(&br_group.u.ip6, group);
-- 
cgit v1.1


From 36cff5a10c6b003fa2d0464848d5664b2bf723e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@web.de>
Date: Thu, 17 Feb 2011 08:17:51 +0000
Subject: bridge: Fix MLD queries' ethernet source address
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Map the IPv6 header's destination multicast address to an ethernet
source address instead of the MLD queries multicast address.

For instance for a general MLD query (multicast address in the MLD query
set to ::), this would wrongly be mapped to 33:33:00:00:00:00, although
an MLD queries destination MAC should always be 33:33:00:00:00:01 which
matches the IPv6 header's multicast destination ff02::1.

Signed-off-by: Linus Lüssing <linus.luessing@web.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 1207a5a..c1f24e4 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -434,7 +434,6 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
 	eth = eth_hdr(skb);
 
 	memcpy(eth->h_source, br->dev->dev_addr, 6);
-	ipv6_eth_mc_map(group, eth->h_dest);
 	eth->h_proto = htons(ETH_P_IPV6);
 	skb_put(skb, sizeof(*eth));
 
@@ -448,6 +447,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
 	ip6h->hop_limit = 1;
 	ipv6_addr_set(&ip6h->saddr, 0, 0, 0, 0);
 	ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
+	ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
 
 	hopopt = (u8 *)(ip6h + 1);
 	hopopt[0] = IPPROTO_ICMPV6;		/* next hdr */
-- 
cgit v1.1


From fe29ec41aaa51902aebd63658dfb04fe6fea8be5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@web.de>
Date: Thu, 17 Feb 2011 08:17:52 +0000
Subject: bridge: Use IPv6 link-local address for multicast listener queries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently the bridge multicast snooping feature periodically issues
IPv6 general multicast listener queries to sense the absence of a
listener.

For this, it uses :: as its source address - however RFC 2710 requires:
"To be valid, the Query message MUST come from a link-local IPv6 Source
Address". Current Linux kernel versions seem to follow this requirement
and ignore our bogus MLD queries.

With this commit a link local address from the bridge interface is being
used to issue the MLD query, resulting in other Linux devices which are
multicast listeners in the network to respond with a MLD response (which
was not the case before).

Signed-off-by: Linus Lüssing <linus.luessing@web.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index c1f24e4..030a002 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -445,7 +445,8 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
 	ip6h->payload_len = htons(8 + sizeof(*mldq));
 	ip6h->nexthdr = IPPROTO_HOPOPTS;
 	ip6h->hop_limit = 1;
-	ipv6_addr_set(&ip6h->saddr, 0, 0, 0, 0);
+	ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
+			   &ip6h->saddr);
 	ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
 	ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
 
-- 
cgit v1.1


From c486da34390846b430896a407b47f0cea3a4189c Mon Sep 17 00:00:00 2001
From: Lucian Adrian Grijincu <lucian.grijincu@gmail.com>
Date: Thu, 24 Feb 2011 19:48:03 +0000
Subject: sysctl: ipv6: use correct net in ipv6_sysctl_rtcache_flush

Before this patch issuing these commands:

  fd = open("/proc/sys/net/ipv6/route/flush")
  unshare(CLONE_NEWNET)
  write(fd, "stuff")

would flush the newly created net, not the original one.

The equivalent ipv4 code is correct (stores the net inside ->extra1).
Acked-by: Daniel Lezcano <daniel.lezcano@free.fr>

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a998db6..904312e 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2557,14 +2557,16 @@ static
 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
 			      void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-	struct net *net = current->nsproxy->net_ns;
-	int delay = net->ipv6.sysctl.flush_delay;
-	if (write) {
-		proc_dointvec(ctl, write, buffer, lenp, ppos);
-		fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
-		return 0;
-	} else
+	struct net *net;
+	int delay;
+	if (!write)
 		return -EINVAL;
+
+	net = (struct net *)ctl->extra1;
+	delay = net->ipv6.sysctl.flush_delay;
+	proc_dointvec(ctl, write, buffer, lenp, ppos);
+	fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
+	return 0;
 }
 
 ctl_table ipv6_route_table_template[] = {
@@ -2651,6 +2653,7 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
 
 	if (table) {
 		table[0].data = &net->ipv6.sysctl.flush_delay;
+		table[0].extra1 = net;
 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
-- 
cgit v1.1


From 0a93ea2e897bd793cc0aaaddc397eff32ac8d6fe Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Fri, 25 Feb 2011 15:33:17 +0000
Subject: RxRPC: Allocate tokens with kzalloc to avoid oops in rxrpc_destroy

With slab poisoning enabled, I see the following oops:

  Unable to handle kernel paging request for data at address 0x6b6b6b6b6b6b6b73
  ...
  NIP [c0000000006bc61c] .rxrpc_destroy+0x44/0x104
  LR [c0000000006bc618] .rxrpc_destroy+0x40/0x104
  Call Trace:
  [c0000000feb2bc00] [c0000000006bc618] .rxrpc_destroy+0x40/0x104 (unreliable)
  [c0000000feb2bc90] [c000000000349b2c] .key_cleanup+0x1a8/0x20c
  [c0000000feb2bd40] [c0000000000a2920] .process_one_work+0x2f4/0x4d0
  [c0000000feb2be00] [c0000000000a2d50] .worker_thread+0x254/0x468
  [c0000000feb2bec0] [c0000000000a868c] .kthread+0xbc/0xc8
  [c0000000feb2bf90] [c000000000020e00] .kernel_thread+0x54/0x70

We aren't initialising token->next, but the code in destroy_context relies
on the list being NULL terminated. Use kzalloc to zero out all the fields.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/rxrpc/ar-key.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index 5ee16f0..d763793 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -89,11 +89,11 @@ static int rxrpc_instantiate_xdr_rxkad(struct key *key, const __be32 *xdr,
 		return ret;
 
 	plen -= sizeof(*token);
-	token = kmalloc(sizeof(*token), GFP_KERNEL);
+	token = kzalloc(sizeof(*token), GFP_KERNEL);
 	if (!token)
 		return -ENOMEM;
 
-	token->kad = kmalloc(plen, GFP_KERNEL);
+	token->kad = kzalloc(plen, GFP_KERNEL);
 	if (!token->kad) {
 		kfree(token);
 		return -ENOMEM;
@@ -731,10 +731,10 @@ static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen)
 		goto error;
 
 	ret = -ENOMEM;
-	token = kmalloc(sizeof(*token), GFP_KERNEL);
+	token = kzalloc(sizeof(*token), GFP_KERNEL);
 	if (!token)
 		goto error;
-	token->kad = kmalloc(plen, GFP_KERNEL);
+	token->kad = kzalloc(plen, GFP_KERNEL);
 	if (!token->kad)
 		goto error_free;
 
-- 
cgit v1.1


From 5aca1a9e880e06bb7e5fd553a86a330ae7e218b5 Mon Sep 17 00:00:00 2001
From: Hagen Paul Pfeifer <hagen@jauu.net>
Date: Fri, 25 Feb 2011 13:58:54 -0800
Subject: net: handle addr_type of 0 properly

addr_type of 0 means that the type should be adopted from from_dev and
not from __hw_addr_del_multiple(). Unfortunately it isn't so and
addr_type will always be considered. Fix this by implementing the
considered and documented behavior.

Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev_addr_lists.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 508f9c1..133fd22 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -144,7 +144,7 @@ void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
 
 	list_for_each_entry(ha, &from_list->list, list) {
 		type = addr_type ? addr_type : ha->type;
-		__hw_addr_del(to_list, ha->addr, addr_len, addr_type);
+		__hw_addr_del(to_list, ha->addr, addr_len, type);
 	}
 }
 EXPORT_SYMBOL(__hw_addr_del_multiple);
-- 
cgit v1.1


From b44d211e166b4b0dae8ce379f9d2e3ac164b5b60 Mon Sep 17 00:00:00 2001
From: Andrey Vagin <avagin@openvz.org>
Date: Mon, 21 Feb 2011 02:40:47 +0000
Subject: netlink: handle errors from netlink_dump()

netlink_dump() may failed, but nobody handle its error.
It generates output data, when a previous portion has been returned to
user space. This mechanism works when all data isn't go in skb. If we
enter in netlink_recvmsg() and skb is absent in the recv queue, the
netlink_dump() will not been executed. So if netlink_dump() is failed
one time, the new data never appear and the reader will sleep forever.

netlink_dump() is called from two places:

1. from netlink_sendmsg->...->netlink_dump_start().
   In this place we can report error directly and it will be returned
   by sendmsg().

2. from netlink_recvmsg
   There we can't report error directly, because we have a portion of
   valid output data and call netlink_dump() for prepare the next portion.
   If netlink_dump() is failed, the socket will be mark as error and the
   next recvmsg will be failed.

Signed-off-by: Andrey Vagin <avagin@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 478181d..1f92459 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1407,7 +1407,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 	int noblock = flags&MSG_DONTWAIT;
 	size_t copied;
 	struct sk_buff *skb, *data_skb;
-	int err;
+	int err, ret;
 
 	if (flags&MSG_OOB)
 		return -EOPNOTSUPP;
@@ -1470,8 +1470,13 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 
 	skb_free_datagram(sk, skb);
 
-	if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2)
-		netlink_dump(sk);
+	if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
+		ret = netlink_dump(sk);
+		if (ret) {
+			sk->sk_err = ret;
+			sk->sk_error_report(sk);
+		}
+	}
 
 	scm_recv(sock, msg, siocb->scm, flags);
 out:
@@ -1736,6 +1741,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 	struct netlink_callback *cb;
 	struct sock *sk;
 	struct netlink_sock *nlk;
+	int ret;
 
 	cb = kzalloc(sizeof(*cb), GFP_KERNEL);
 	if (cb == NULL)
@@ -1764,9 +1770,13 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 	nlk->cb = cb;
 	mutex_unlock(nlk->cb_mutex);
 
-	netlink_dump(sk);
+	ret = netlink_dump(sk);
+
 	sock_put(sk);
 
+	if (ret)
+		return ret;
+
 	/* We successfully started a dump, by returning -EINTR we
 	 * signal not to send ACK even if it was requested.
 	 */
-- 
cgit v1.1


From ff75f40f44ae9b79d520bf32a05d35af74a805c0 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Tue, 22 Feb 2011 10:40:25 +0200
Subject: ipvs: fix dst_lock locking on dest update

	Fix dst_lock usage in __ip_vs_update_dest. We need
_bh locking because destination is updated in user context.
Can cause lockups on frequent destination updates.
Problem reported by Simon Kirby. Bug was introduced
in 2.6.37 from the "ipvs: changes for local real server"
change.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Hans Schillstrom <hans@schillstrom.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_ctl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 22f7ad5..ba98e13 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -808,9 +808,9 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
 	dest->u_threshold = udest->u_threshold;
 	dest->l_threshold = udest->l_threshold;
 
-	spin_lock(&dest->dst_lock);
+	spin_lock_bh(&dest->dst_lock);
 	ip_vs_dst_reset(dest);
-	spin_unlock(&dest->dst_lock);
+	spin_unlock_bh(&dest->dst_lock);
 
 	if (add)
 		ip_vs_new_estimator(&dest->stats);
-- 
cgit v1.1


From 720dc34bbbe9493c7bd48b2243058b4e447a929d Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 1 Mar 2011 23:02:07 -0800
Subject: dccp: fix oops on Reset after close

This fixes a bug in the order of dccp_rcv_state_process() that still permitted
reception even after closing the socket. A Reset after close thus causes a NULL
pointer dereference by not preventing operations on an already torn-down socket.

 dccp_v4_do_rcv()
	|
	| state other than OPEN
	v
 dccp_rcv_state_process()
	|
	| DCCP_PKT_RESET
	v
 dccp_rcv_reset()
	|
	v
 dccp_time_wait()

 WARNING: at net/ipv4/inet_timewait_sock.c:141 __inet_twsk_hashdance+0x48/0x128()
 Modules linked in: arc4 ecb carl9170 rt2870sta(C) mac80211 r8712u(C) crc_ccitt ah
 [<c0038850>] (unwind_backtrace+0x0/0xec) from [<c0055364>] (warn_slowpath_common)
 [<c0055364>] (warn_slowpath_common+0x4c/0x64) from [<c0055398>] (warn_slowpath_n)
 [<c0055398>] (warn_slowpath_null+0x1c/0x24) from [<c02b72d0>] (__inet_twsk_hashd)
 [<c02b72d0>] (__inet_twsk_hashdance+0x48/0x128) from [<c031caa0>] (dccp_time_wai)
 [<c031caa0>] (dccp_time_wait+0x40/0xc8) from [<c031c15c>] (dccp_rcv_state_proces)
 [<c031c15c>] (dccp_rcv_state_process+0x120/0x538) from [<c032609c>] (dccp_v4_do_)
 [<c032609c>] (dccp_v4_do_rcv+0x11c/0x14c) from [<c0286594>] (release_sock+0xac/0)
 [<c0286594>] (release_sock+0xac/0x110) from [<c031fd34>] (dccp_close+0x28c/0x380)
 [<c031fd34>] (dccp_close+0x28c/0x380) from [<c02d9a78>] (inet_release+0x64/0x70)

The fix is by testing the socket state first. Receiving a packet in Closed state
now also produces the required "No connection" Reset reply of RFC 4340, 8.3.1.

Reported-and-tested-by: Johan Hovold <jhovold@gmail.com>
Cc: stable@kernel.org
Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/input.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/dccp/input.c b/net/dccp/input.c
index 8cde009..4222e7a 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -614,6 +614,9 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		/* Caller (dccp_v4_do_rcv) will send Reset */
 		dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
 		return 1;
+	} else if (sk->sk_state == DCCP_CLOSED) {
+		dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
+		return 1;
 	}
 
 	if (sk->sk_state != DCCP_REQUESTING && sk->sk_state != DCCP_RESPOND) {
@@ -668,10 +671,6 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 	}
 
 	switch (sk->sk_state) {
-	case DCCP_CLOSED:
-		dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
-		return 1;
-
 	case DCCP_REQUESTING:
 		queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len);
 		if (queued >= 0)
-- 
cgit v1.1


From 9ef0298a8e5730d9a46d640014c727f3b4152870 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 2 Mar 2011 12:10:13 +0100
Subject: netfilter: nf_log: avoid oops in (un)bind with invalid nfproto values

Like many other places, we have to check that the array index is
within allowed limits, or otherwise, a kernel oops and other nastiness
can ensue when we access memory beyond the end of the array.

[ 5954.115381] BUG: unable to handle kernel paging request at 0000004000000000
[ 5954.120014] IP:  __find_logger+0x6f/0xa0
[ 5954.123979]  nf_log_bind_pf+0x2b/0x70
[ 5954.123979]  nfulnl_recv_config+0xc0/0x4a0 [nfnetlink_log]
[ 5954.123979]  nfnetlink_rcv_msg+0x12c/0x1b0 [nfnetlink]
...

The problem goes back to v2.6.30-rc1~1372~1342~31 where nf_log_bind
was decoupled from nf_log_register.

Reported-by: Miguel Di Ciurcio Filho <miguel.filho@gmail.com>,
  via irc.freenode.net/#netfilter
Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_log.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index b07393e..9181699 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -85,6 +85,8 @@ EXPORT_SYMBOL(nf_log_unregister);
 
 int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger)
 {
+	if (pf >= ARRAY_SIZE(nf_loggers))
+		return -EINVAL;
 	mutex_lock(&nf_log_mutex);
 	if (__find_logger(pf, logger->name) == NULL) {
 		mutex_unlock(&nf_log_mutex);
@@ -98,6 +100,8 @@ EXPORT_SYMBOL(nf_log_bind_pf);
 
 void nf_log_unbind_pf(u_int8_t pf)
 {
+	if (pf >= ARRAY_SIZE(nf_loggers))
+		return;
 	mutex_lock(&nf_log_mutex);
 	rcu_assign_pointer(nf_loggers[pf], NULL);
 	mutex_unlock(&nf_log_mutex);
-- 
cgit v1.1


From f3d7bc57c71eba3f279785111bb473b1ef68dcb6 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Wed, 2 Mar 2011 10:35:33 +0000
Subject: net: dcbnl: check correct ops in dcbnl_ieee_set()

The incorrect ops routine was being tested for in
DCB_ATTR_IEEE_PFC attributes. This patch corrects
it.

Currently, every driver implementing ieee_setets also
implements ieee_setpfc so this bug is not actualized
yet.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dcb/dcbnl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index d5074a5..c44348a 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1193,7 +1193,7 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlattr **tb,
 			goto err;
 	}
 
-	if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setets) {
+	if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setpfc) {
 		struct ieee_pfc *pfc = nla_data(ieee[DCB_ATTR_IEEE_PFC]);
 		err = ops->ieee_setpfc(netdev, pfc);
 		if (err)
-- 
cgit v1.1


From 10003453479ef287a73f8a39593f8f42687ea565 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 28 Feb 2011 03:27:43 +0000
Subject: AF_RXRPC: Handle receiving ACKALL packets

The OpenAFS server is now sending ACKALL packets, so we need to handle them.
Otherwise we report a protocol error and abort.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/ar-input.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c
index 8931500..1a2b0633 100644
--- a/net/rxrpc/ar-input.c
+++ b/net/rxrpc/ar-input.c
@@ -423,6 +423,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
 			goto protocol_error;
 		}
 
+	case RXRPC_PACKET_TYPE_ACKALL:
 	case RXRPC_PACKET_TYPE_ACK:
 		/* ACK processing is done in process context */
 		read_lock_bh(&call->state_lock);
-- 
cgit v1.1


From 1362fa078dae16776cd439791c6605b224ea6171 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 3 Mar 2011 11:28:58 +0000
Subject: DNS: Fix a NULL pointer deref when trying to read an error key
 [CVE-2011-1076]

When a DNS resolver key is instantiated with an error indication, attempts to
read that key will result in an oops because user_read() is expecting there to
be a payload - and there isn't one [CVE-2011-1076].

Give the DNS resolver key its own read handler that returns the error cached in
key->type_data.x[0] as an error rather than crashing.

Also make the kenter() at the beginning of dns_resolver_instantiate() limit the
amount of data it prints, since the data is not necessarily NUL-terminated.

The buggy code was added in:

	commit 4a2d789267e00b5a1175ecd2ddefcc78b83fbf09
	Author: Wang Lei <wang840925@gmail.com>
	Date:   Wed Aug 11 09:37:58 2010 +0100
	Subject: DNS: If the DNS server returns an error, allow that to be cached [ver #2]

This can trivially be reproduced by any user with the following program
compiled with -lkeyutils:

	#include <stdlib.h>
	#include <keyutils.h>
	#include <err.h>
	static char payload[] = "#dnserror=6";
	int main()
	{
		key_serial_t key;
		key = add_key("dns_resolver", "a", payload, sizeof(payload),
			      KEY_SPEC_SESSION_KEYRING);
		if (key == -1)
			err(1, "add_key");
		if (keyctl_read(key, NULL, 0) == -1)
			err(1, "read_key");
		return 0;
	}

What should happen is that keyctl_read() reports error 6 (ENXIO) to the user:

	dns-break: read_key: No such device or address

but instead the kernel oopses.

This cannot be reproduced with the 'keyutils add' or 'keyutils padd' commands
as both of those cut the data down below the NUL termination that must be
included in the data.  Without this dns_resolver_instantiate() will return
-EINVAL and the key will not be instantiated such that it can be read.

The oops looks like:

BUG: unable to handle kernel NULL pointer dereference at 0000000000000010
IP: [<ffffffff811b99f7>] user_read+0x4f/0x8f
PGD 3bdf8067 PUD 385b9067 PMD 0
Oops: 0000 [#1] SMP
last sysfs file: /sys/devices/pci0000:00/0000:00:19.0/irq
CPU 0
Modules linked in:

Pid: 2150, comm: dns-break Not tainted 2.6.38-rc7-cachefs+ #468                  /DG965RY
RIP: 0010:[<ffffffff811b99f7>]  [<ffffffff811b99f7>] user_read+0x4f/0x8f
RSP: 0018:ffff88003bf47f08  EFLAGS: 00010246
RAX: 0000000000000001 RBX: ffff88003b5ea378 RCX: ffffffff81972368
RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88003b5ea378
RBP: ffff88003bf47f28 R08: ffff88003be56620 R09: 0000000000000000
R10: 0000000000000395 R11: 0000000000000002 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: ffffffffffffffa1
FS:  00007feab5751700(0000) GS:ffff88003e000000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000010 CR3: 000000003de40000 CR4: 00000000000006f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process dns-break (pid: 2150, threadinfo ffff88003bf46000, task ffff88003be56090)
Stack:
 ffff88003b5ea378 ffff88003b5ea3a0 0000000000000000 0000000000000000
 ffff88003bf47f68 ffffffff811b708e ffff88003c442bc8 0000000000000000
 00000000004005a0 00007fffba368060 0000000000000000 0000000000000000
Call Trace:
 [<ffffffff811b708e>] keyctl_read_key+0xac/0xcf
 [<ffffffff811b7c07>] sys_keyctl+0x75/0xb6
 [<ffffffff81001f7b>] system_call_fastpath+0x16/0x1b
Code: 75 1f 48 83 7b 28 00 75 18 c6 05 58 2b fb 00 01 be bb 00 00 00 48 c7 c7 76 1c 75 81 e8 13 c2 e9 ff 4c 8b b3 e0 00 00 00 4d 85 ed <41> 0f b7 5e 10 74 2d 4d 85 e4 74 28 e8 98 79 ee ff 49 39 dd 48
RIP  [<ffffffff811b99f7>] user_read+0x4f/0x8f
 RSP <ffff88003bf47f08>
CR2: 0000000000000010

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
cc: Wang Lei <wang840925@gmail.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 net/dns_resolver/dns_key.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index 739435a..cfa7a5e 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -67,8 +67,9 @@ dns_resolver_instantiate(struct key *key, const void *_data, size_t datalen)
 	size_t result_len = 0;
 	const char *data = _data, *end, *opt;
 
-	kenter("%%%d,%s,'%s',%zu",
-	       key->serial, key->description, data, datalen);
+	kenter("%%%d,%s,'%*.*s',%zu",
+	       key->serial, key->description,
+	       (int)datalen, (int)datalen, data, datalen);
 
 	if (datalen <= 1 || !data || data[datalen - 1] != '\0')
 		return -EINVAL;
@@ -217,6 +218,19 @@ static void dns_resolver_describe(const struct key *key, struct seq_file *m)
 		seq_printf(m, ": %u", key->datalen);
 }
 
+/*
+ * read the DNS data
+ * - the key's semaphore is read-locked
+ */
+static long dns_resolver_read(const struct key *key,
+			      char __user *buffer, size_t buflen)
+{
+	if (key->type_data.x[0])
+		return key->type_data.x[0];
+
+	return user_read(key, buffer, buflen);
+}
+
 struct key_type key_type_dns_resolver = {
 	.name		= "dns_resolver",
 	.instantiate	= dns_resolver_instantiate,
@@ -224,7 +238,7 @@ struct key_type key_type_dns_resolver = {
 	.revoke		= user_revoke,
 	.destroy	= user_destroy,
 	.describe	= dns_resolver_describe,
-	.read		= user_read,
+	.read		= dns_resolver_read,
 };
 
 static int __init init_dns_resolver(void)
-- 
cgit v1.1