From ab434b60ab07f8c44246b6fb0cddee436687a09a Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Fri, 13 Jan 2012 22:22:03 -0800
Subject: ceph: initialize client debugfs outside of monc->mutex

Initializing debufs under monc->mutex introduces a lock dependency for
sb->s_type->i_mutex_key, which (combined with several other dependencies)
leads to an annoying lockdep warning.  There's no particular reason to do
the debugfs setup under this lock, so move it out.

It used to be the case that our first monmap could come from the OSD; that
is no longer the case with recent servers, so we will reliably set up the
client entry during the initial authentication.

We don't have to worry about racing with debugfs teardown by
ceph_debugfs_client_cleanup() because ceph_destroy_client() calls
ceph_msgr_flush() first, which will wait for the message dispatch work
to complete (and the debugfs init to complete).

Fixes: #1940
Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/ceph_common.c |  2 --
 net/ceph/mon_client.c  | 13 ++++++++++++-
 2 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 97f70e5..761ad9d 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -85,8 +85,6 @@ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid)
 	} else {
 		pr_info("client%lld fsid %pU\n", ceph_client_id(client), fsid);
 		memcpy(&client->fsid, fsid, sizeof(*fsid));
-		ceph_debugfs_client_init(client);
-		client->have_fsid = true;
 	}
 	return 0;
 }
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 0b62dea..1845cde 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -8,8 +8,8 @@
 
 #include <linux/ceph/mon_client.h>
 #include <linux/ceph/libceph.h>
+#include <linux/ceph/debugfs.h>
 #include <linux/ceph/decode.h>
-
 #include <linux/ceph/auth.h>
 
 /*
@@ -340,8 +340,19 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
 	client->monc.monmap = monmap;
 	kfree(old);
 
+	if (!client->have_fsid) {
+		client->have_fsid = true;
+		mutex_unlock(&monc->mutex);
+		/*
+		 * do debugfs initialization without mutex to avoid
+		 * creating a locking dependency
+		 */
+		ceph_debugfs_client_init(client);
+		goto out_unlocked;
+	}
 out:
 	mutex_unlock(&monc->mutex);
+out_unlocked:
 	wake_up_all(&client->auth_wq);
 }
 
-- 
cgit v1.1


From e2446eaab5585555a38ea0df4e01ff313dbb4ac9 Mon Sep 17 00:00:00 2001
From: Shawn Lu <shawn.lu@ericsson.com>
Date: Sat, 4 Feb 2012 12:38:09 +0000
Subject: tcp_v4_send_reset: binding oif to iif in no sock case

Binding RST packet outgoing interface to incoming interface
for tcp v4 when there is no socket associate with it.
when sk is not NULL, using sk->sk_bound_dev_if instead.
(suggested by Eric Dumazet).

This has few benefits:
1. tcp_v6_send_reset already did that.
2. This helps tcp connect with SO_BINDTODEVICE set. When
connection is lost, we still able to sending out RST using
same interface.
3. we are sending reply, it is most likely to be succeed
if iif is used

Signed-off-by: Shawn Lu <shawn.lu@ericsson.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 337ba4c..94d683a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -651,6 +651,11 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 	arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
+	/* When socket is gone, all binding information is lost.
+	 * routing might fail in this case. using iif for oif to
+	 * make sure we can deliver it
+	 */
+	arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb);
 
 	net = dev_net(skb_dst(skb)->dev);
 	arg.tos = ip_hdr(skb)->tos;
-- 
cgit v1.1


From 6d25886ee2fbc05a7bf4dae5f5ae345cb73df2fd Mon Sep 17 00:00:00 2001
From: Anisse Astier <anisse@astier.eu>
Date: Tue, 7 Feb 2012 07:39:11 +0000
Subject: net: Fix build regression when INET_UDP_DIAG=y and IPV6=m

Tested-by: Anisse Astier <anisse@astier.eu>

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index aa2a2c7..d183262 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -409,7 +409,7 @@ config INET_TCP_DIAG
 
 config INET_UDP_DIAG
 	tristate "UDP: socket monitoring interface"
-	depends on INET_DIAG
+	depends on INET_DIAG && (IPV6 || IPV6=n)
 	default n
 	---help---
 	  Support for UDP socket monitoring interface used by the ss tool.
-- 
cgit v1.1


From 5ca3b72c5da47d95b83857b768def6172fbc080a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 8 Feb 2012 08:51:50 +0000
Subject: gro: more generic L2 header check

Shlomo Pongratz reported GRO L2 header check was suited for Ethernet
only, and failed on IB/ipoib traffic.

He provided a patch faking a zeroed header to let GRO aggregates frames.

Roland Dreier, Herbert Xu, and others suggested we change GRO L2 header
check to be more generic, ie not assuming L2 header is 14 bytes, but
taking into account hard_header_len.

__napi_gro_receive() has special handling for the common case (Ethernet)
to avoid a memcmp() call and use an inline optimized function instead.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Reported-by: Shlomo Pongratz <shlomop@mellanox.com>
Cc: Roland Dreier <roland@kernel.org>
Cc: Or Gerlitz <ogerlitz@mellanox.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Tested-by: Sean Hefty <sean.hefty@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 115dee1..6ca32f6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3500,14 +3500,20 @@ static inline gro_result_t
 __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	struct sk_buff *p;
+	unsigned int maclen = skb->dev->hard_header_len;
 
 	for (p = napi->gro_list; p; p = p->next) {
 		unsigned long diffs;
 
 		diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
 		diffs |= p->vlan_tci ^ skb->vlan_tci;
-		diffs |= compare_ether_header(skb_mac_header(p),
-					      skb_gro_mac_header(skb));
+		if (maclen == ETH_HLEN)
+			diffs |= compare_ether_header(skb_mac_header(p),
+						      skb_gro_mac_header(skb));
+		else if (!diffs)
+			diffs = memcmp(skb_mac_header(p),
+				       skb_gro_mac_header(skb),
+				       maclen);
 		NAPI_GRO_CB(p)->same_flow = !diffs;
 		NAPI_GRO_CB(p)->flush = 0;
 	}
-- 
cgit v1.1


From 16bda13d90c8d5da243e2cfa1677e62ecce26860 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 6 Feb 2012 15:14:37 -0500
Subject: net: Make qdisc_skb_cb upper size bound explicit.

Just like skb->cb[], so that qdisc_skb_cb can be encapsulated inside
of other data structures.

This is intended to be used by IPoIB so that it can remember
addressing information stored at hard_header_ops->create() time that
it can fetch when the packet gets to the transmit routine.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_choke.c | 3 +--
 net/sched/sch_netem.c | 3 +--
 net/sched/sch_sfb.c   | 3 +--
 net/sched/sch_sfq.c   | 5 ++---
 4 files changed, 5 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index e465064..7e267d7 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -148,8 +148,7 @@ struct choke_skb_cb {
 
 static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
 {
-	BUILD_BUG_ON(sizeof(skb->cb) <
-		sizeof(struct qdisc_skb_cb) + sizeof(struct choke_skb_cb));
+	qdisc_cb_private_validate(skb, sizeof(struct choke_skb_cb));
 	return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
 }
 
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 2776012..e83d61c 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -130,8 +130,7 @@ struct netem_skb_cb {
 
 static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
 {
-	BUILD_BUG_ON(sizeof(skb->cb) <
-		sizeof(struct qdisc_skb_cb) + sizeof(struct netem_skb_cb));
+	qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb));
 	return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
 }
 
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 96e42ca..d7eea99 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -94,8 +94,7 @@ struct sfb_skb_cb {
 
 static inline struct sfb_skb_cb *sfb_skb_cb(const struct sk_buff *skb)
 {
-	BUILD_BUG_ON(sizeof(skb->cb) <
-		sizeof(struct qdisc_skb_cb) + sizeof(struct sfb_skb_cb));
+	qdisc_cb_private_validate(skb, sizeof(struct sfb_skb_cb));
 	return (struct sfb_skb_cb *)qdisc_skb_cb(skb)->data;
 }
 
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 67494ae..60d4718 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -166,9 +166,8 @@ struct sfq_skb_cb {
 
 static inline struct sfq_skb_cb *sfq_skb_cb(const struct sk_buff *skb)
 {
-       BUILD_BUG_ON(sizeof(skb->cb) <
-               sizeof(struct qdisc_skb_cb) + sizeof(struct sfq_skb_cb));
-       return (struct sfq_skb_cb *)qdisc_skb_cb(skb)->data;
+	qdisc_cb_private_validate(skb, sizeof(struct sfq_skb_cb));
+	return (struct sfq_skb_cb *)qdisc_skb_cb(skb)->data;
 }
 
 static unsigned int sfq_hash(const struct sfq_sched_data *q,
-- 
cgit v1.1


From a87dfe14a78501c931a4d5481efff6a809aa907d Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Fri, 10 Feb 2012 05:43:36 +0000
Subject: netprio_cgroup: fix an off-by-one bug

# mount -t cgroup xxx /mnt
  # mkdir /mnt/tmp
  # cat /mnt/tmp/net_prio.ifpriomap
  lo 0
  eth0 0
  virbr0 0
  # echo 'lo 999' > /mnt/tmp/net_prio.ifpriomap
  # cat /mnt/tmp/net_prio.ifpriomap
  lo 999
  eth0 0
  virbr0 4101267344

We got weired output, because we exceeded the boundary of the array.
We may even crash the kernel..

Origionally-authored-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: "David S. Miller" <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netprio_cgroup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 9ae183a..72c6387 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -108,7 +108,7 @@ static void extend_netdev_table(struct net_device *dev, u32 new_len)
 static void update_netdev_tables(void)
 {
 	struct net_device *dev;
-	u32 max_len = atomic_read(&max_prioidx);
+	u32 max_len = atomic_read(&max_prioidx) + 1;
 	struct netprio_map *map;
 
 	rtnl_lock();
-- 
cgit v1.1


From f5c38208d32412d72b97a4f0d44af0eb39feb20b Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Fri, 10 Feb 2012 05:43:37 +0000
Subject: netprio_cgroup: don't allocate prio table when a device is registered

So we delay the allocation till the priority is set through cgroup,
and this makes skb_update_priority() faster when it's not set.

This also eliminates an off-by-one bug similar with the one fixed
in the previous patch.

Origionally-authored-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: "David S. Miller" <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netprio_cgroup.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'net')

diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 72c6387..4dacc44 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -271,7 +271,6 @@ static int netprio_device_event(struct notifier_block *unused,
 {
 	struct net_device *dev = ptr;
 	struct netprio_map *old;
-	u32 max_len = atomic_read(&max_prioidx);
 
 	/*
 	 * Note this is called with rtnl_lock held so we have update side
@@ -279,11 +278,6 @@ static int netprio_device_event(struct notifier_block *unused,
 	 */
 
 	switch (event) {
-
-	case NETDEV_REGISTER:
-		if (max_len)
-			extend_netdev_table(dev, max_len);
-		break;
 	case NETDEV_UNREGISTER:
 		old = rtnl_dereference(dev->priomap);
 		RCU_INIT_POINTER(dev->priomap, NULL);
-- 
cgit v1.1


From 2b73bc65e2771372c818db7955709c8caedbf8b9 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Fri, 10 Feb 2012 05:43:38 +0000
Subject: netprio_cgroup: fix wrong memory access when NETPRIO_CGROUP=m

When the netprio_cgroup module is not loaded, net_prio_subsys_id
is -1, and so sock_update_prioidx() accesses cgroup_subsys array
with negative index subsys[-1].

Make the code resembles cls_cgroup code, which is bug free.

Origionally-authored-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: "David S. Miller" <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index 3e81fd2..02f8dfe 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1171,13 +1171,10 @@ EXPORT_SYMBOL(sock_update_classid);
 
 void sock_update_netprioidx(struct sock *sk)
 {
-	struct cgroup_netprio_state *state;
 	if (in_interrupt())
 		return;
-	rcu_read_lock();
-	state = task_netprio_state(current);
-	sk->sk_cgrp_prioidx = state ? state->prioidx : 0;
-	rcu_read_unlock();
+
+	sk->sk_cgrp_prioidx = task_netprioidx(current);
 }
 EXPORT_SYMBOL_GPL(sock_update_netprioidx);
 #endif
-- 
cgit v1.1


From 5dc7883f2a7c25f8df40d7479687153558cd531b Mon Sep 17 00:00:00 2001
From: Li Wei <lw@cn.fujitsu.com>
Date: Thu, 9 Feb 2012 21:15:25 +0000
Subject: ipv4: Fix wrong order of ip_rt_get_source() and update iph->daddr.

This patch fix a bug which introduced by commit ac8a4810 (ipv4: Save
nexthop address of LSRR/SSRR option to IPCB.).In that patch, we saved
the nexthop of SRR in ip_option->nexthop and update iph->daddr until
we get to ip_forward_options(), but we need to update it before
ip_rt_get_source(), otherwise we may get a wrong src.

Signed-off-by: Li Wei <lw@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_options.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 1e60f76..42dd1a9 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -573,8 +573,8 @@ void ip_forward_options(struct sk_buff *skb)
 		}
 		if (srrptr + 3 <= srrspace) {
 			opt->is_changed = 1;
-			ip_rt_get_source(&optptr[srrptr-1], skb, rt);
 			ip_hdr(skb)->daddr = opt->nexthop;
+			ip_rt_get_source(&optptr[srrptr-1], skb, rt);
 			optptr[2] = srrptr+4;
 		} else if (net_ratelimit())
 			printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n");
-- 
cgit v1.1


From 70620c46ac2b45c24b0f22002fdf5ddd1f7daf81 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Fri, 10 Feb 2012 04:07:11 +0000
Subject: net: Don't proxy arp respond if iif == rt->dst.dev if private VLAN is
 disabled

Commit 653241 (net: RFC3069, private VLAN proxy arp support) changed
the behavior of arp proxy to send arp replies back out on the interface
the request came in even if the private VLAN feature is disabled.

Previously we checked rt->dst.dev != skb->dev for in scenarios, when
proxy arp is enabled on for the netdevice and also when individual proxy
neighbour entries have been added.

This patch adds the check back for the pneigh_lookup() scenario.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Jesper Dangaard Brouer <hawk@comx.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/arp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 59402be..63e4989 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -863,7 +863,8 @@ static int arp_process(struct sk_buff *skb)
 			if (addr_type == RTN_UNICAST  &&
 			    (arp_fwd_proxy(in_dev, dev, rt) ||
 			     arp_fwd_pvlan(in_dev, dev, rt, sip, tip) ||
-			     pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) {
+			     (rt->dst.dev != dev &&
+			      pneigh_lookup(&arp_tbl, net, &tip, dev, 0)))) {
 				n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
 				if (n)
 					neigh_release(n);
-- 
cgit v1.1