From ab434b60ab07f8c44246b6fb0cddee436687a09a Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 13 Jan 2012 22:22:03 -0800 Subject: ceph: initialize client debugfs outside of monc->mutex Initializing debufs under monc->mutex introduces a lock dependency for sb->s_type->i_mutex_key, which (combined with several other dependencies) leads to an annoying lockdep warning. There's no particular reason to do the debugfs setup under this lock, so move it out. It used to be the case that our first monmap could come from the OSD; that is no longer the case with recent servers, so we will reliably set up the client entry during the initial authentication. We don't have to worry about racing with debugfs teardown by ceph_debugfs_client_cleanup() because ceph_destroy_client() calls ceph_msgr_flush() first, which will wait for the message dispatch work to complete (and the debugfs init to complete). Fixes: #1940 Signed-off-by: Sage Weil --- net/ceph/ceph_common.c | 2 -- net/ceph/mon_client.c | 13 ++++++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 97f70e5..761ad9d 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -85,8 +85,6 @@ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) } else { pr_info("client%lld fsid %pU\n", ceph_client_id(client), fsid); memcpy(&client->fsid, fsid, sizeof(*fsid)); - ceph_debugfs_client_init(client); - client->have_fsid = true; } return 0; } diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 0b62dea..1845cde 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -8,8 +8,8 @@ #include #include +#include #include - #include /* @@ -340,8 +340,19 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc, client->monc.monmap = monmap; kfree(old); + if (!client->have_fsid) { + client->have_fsid = true; + mutex_unlock(&monc->mutex); + /* + * do debugfs initialization without mutex to avoid + * creating a locking dependency + */ + ceph_debugfs_client_init(client); + goto out_unlocked; + } out: mutex_unlock(&monc->mutex); +out_unlocked: wake_up_all(&client->auth_wq); } -- cgit v1.1 From e2446eaab5585555a38ea0df4e01ff313dbb4ac9 Mon Sep 17 00:00:00 2001 From: Shawn Lu Date: Sat, 4 Feb 2012 12:38:09 +0000 Subject: tcp_v4_send_reset: binding oif to iif in no sock case Binding RST packet outgoing interface to incoming interface for tcp v4 when there is no socket associate with it. when sk is not NULL, using sk->sk_bound_dev_if instead. (suggested by Eric Dumazet). This has few benefits: 1. tcp_v6_send_reset already did that. 2. This helps tcp connect with SO_BINDTODEVICE set. When connection is lost, we still able to sending out RST using same interface. 3. we are sending reply, it is most likely to be succeed if iif is used Signed-off-by: Shawn Lu Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 337ba4c..94d683a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -651,6 +651,11 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) arg.iov[0].iov_len, IPPROTO_TCP, 0); arg.csumoffset = offsetof(struct tcphdr, check) / 2; arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; + /* When socket is gone, all binding information is lost. + * routing might fail in this case. using iif for oif to + * make sure we can deliver it + */ + arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb); net = dev_net(skb_dst(skb)->dev); arg.tos = ip_hdr(skb)->tos; -- cgit v1.1 From 6d25886ee2fbc05a7bf4dae5f5ae345cb73df2fd Mon Sep 17 00:00:00 2001 From: Anisse Astier Date: Tue, 7 Feb 2012 07:39:11 +0000 Subject: net: Fix build regression when INET_UDP_DIAG=y and IPV6=m Tested-by: Anisse Astier Signed-off-by: David S. Miller --- net/ipv4/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index aa2a2c7..d183262 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -409,7 +409,7 @@ config INET_TCP_DIAG config INET_UDP_DIAG tristate "UDP: socket monitoring interface" - depends on INET_DIAG + depends on INET_DIAG && (IPV6 || IPV6=n) default n ---help--- Support for UDP socket monitoring interface used by the ss tool. -- cgit v1.1 From 5ca3b72c5da47d95b83857b768def6172fbc080a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Feb 2012 08:51:50 +0000 Subject: gro: more generic L2 header check Shlomo Pongratz reported GRO L2 header check was suited for Ethernet only, and failed on IB/ipoib traffic. He provided a patch faking a zeroed header to let GRO aggregates frames. Roland Dreier, Herbert Xu, and others suggested we change GRO L2 header check to be more generic, ie not assuming L2 header is 14 bytes, but taking into account hard_header_len. __napi_gro_receive() has special handling for the common case (Ethernet) to avoid a memcmp() call and use an inline optimized function instead. Signed-off-by: Eric Dumazet Reported-by: Shlomo Pongratz Cc: Roland Dreier Cc: Or Gerlitz Cc: Herbert Xu Tested-by: Sean Hefty Signed-off-by: David S. Miller --- net/core/dev.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 115dee1..6ca32f6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3500,14 +3500,20 @@ static inline gro_result_t __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff *p; + unsigned int maclen = skb->dev->hard_header_len; for (p = napi->gro_list; p; p = p->next) { unsigned long diffs; diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; diffs |= p->vlan_tci ^ skb->vlan_tci; - diffs |= compare_ether_header(skb_mac_header(p), - skb_gro_mac_header(skb)); + if (maclen == ETH_HLEN) + diffs |= compare_ether_header(skb_mac_header(p), + skb_gro_mac_header(skb)); + else if (!diffs) + diffs = memcmp(skb_mac_header(p), + skb_gro_mac_header(skb), + maclen); NAPI_GRO_CB(p)->same_flow = !diffs; NAPI_GRO_CB(p)->flush = 0; } -- cgit v1.1 From 16bda13d90c8d5da243e2cfa1677e62ecce26860 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 6 Feb 2012 15:14:37 -0500 Subject: net: Make qdisc_skb_cb upper size bound explicit. Just like skb->cb[], so that qdisc_skb_cb can be encapsulated inside of other data structures. This is intended to be used by IPoIB so that it can remember addressing information stored at hard_header_ops->create() time that it can fetch when the packet gets to the transmit routine. Signed-off-by: David S. Miller --- net/sched/sch_choke.c | 3 +-- net/sched/sch_netem.c | 3 +-- net/sched/sch_sfb.c | 3 +-- net/sched/sch_sfq.c | 5 ++--- 4 files changed, 5 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index e465064..7e267d7 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -148,8 +148,7 @@ struct choke_skb_cb { static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb) { - BUILD_BUG_ON(sizeof(skb->cb) < - sizeof(struct qdisc_skb_cb) + sizeof(struct choke_skb_cb)); + qdisc_cb_private_validate(skb, sizeof(struct choke_skb_cb)); return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data; } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 2776012..e83d61c 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -130,8 +130,7 @@ struct netem_skb_cb { static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) { - BUILD_BUG_ON(sizeof(skb->cb) < - sizeof(struct qdisc_skb_cb) + sizeof(struct netem_skb_cb)); + qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb)); return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data; } diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 96e42ca..d7eea99 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -94,8 +94,7 @@ struct sfb_skb_cb { static inline struct sfb_skb_cb *sfb_skb_cb(const struct sk_buff *skb) { - BUILD_BUG_ON(sizeof(skb->cb) < - sizeof(struct qdisc_skb_cb) + sizeof(struct sfb_skb_cb)); + qdisc_cb_private_validate(skb, sizeof(struct sfb_skb_cb)); return (struct sfb_skb_cb *)qdisc_skb_cb(skb)->data; } diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 67494ae..60d4718 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -166,9 +166,8 @@ struct sfq_skb_cb { static inline struct sfq_skb_cb *sfq_skb_cb(const struct sk_buff *skb) { - BUILD_BUG_ON(sizeof(skb->cb) < - sizeof(struct qdisc_skb_cb) + sizeof(struct sfq_skb_cb)); - return (struct sfq_skb_cb *)qdisc_skb_cb(skb)->data; + qdisc_cb_private_validate(skb, sizeof(struct sfq_skb_cb)); + return (struct sfq_skb_cb *)qdisc_skb_cb(skb)->data; } static unsigned int sfq_hash(const struct sfq_sched_data *q, -- cgit v1.1 From a87dfe14a78501c931a4d5481efff6a809aa907d Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 10 Feb 2012 05:43:36 +0000 Subject: netprio_cgroup: fix an off-by-one bug # mount -t cgroup xxx /mnt # mkdir /mnt/tmp # cat /mnt/tmp/net_prio.ifpriomap lo 0 eth0 0 virbr0 0 # echo 'lo 999' > /mnt/tmp/net_prio.ifpriomap # cat /mnt/tmp/net_prio.ifpriomap lo 999 eth0 0 virbr0 4101267344 We got weired output, because we exceeded the boundary of the array. We may even crash the kernel.. Origionally-authored-by: Li Zefan Signed-off-by: Li Zefan Signed-off-by: Neil Horman CC: "David S. Miller" Signed-off-by: David S. Miller --- net/core/netprio_cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 9ae183a..72c6387 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -108,7 +108,7 @@ static void extend_netdev_table(struct net_device *dev, u32 new_len) static void update_netdev_tables(void) { struct net_device *dev; - u32 max_len = atomic_read(&max_prioidx); + u32 max_len = atomic_read(&max_prioidx) + 1; struct netprio_map *map; rtnl_lock(); -- cgit v1.1 From f5c38208d32412d72b97a4f0d44af0eb39feb20b Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 10 Feb 2012 05:43:37 +0000 Subject: netprio_cgroup: don't allocate prio table when a device is registered So we delay the allocation till the priority is set through cgroup, and this makes skb_update_priority() faster when it's not set. This also eliminates an off-by-one bug similar with the one fixed in the previous patch. Origionally-authored-by: Li Zefan Signed-off-by: Li Zefan Signed-off-by: Neil Horman CC: "David S. Miller" Signed-off-by: David S. Miller --- net/core/netprio_cgroup.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'net') diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 72c6387..4dacc44 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -271,7 +271,6 @@ static int netprio_device_event(struct notifier_block *unused, { struct net_device *dev = ptr; struct netprio_map *old; - u32 max_len = atomic_read(&max_prioidx); /* * Note this is called with rtnl_lock held so we have update side @@ -279,11 +278,6 @@ static int netprio_device_event(struct notifier_block *unused, */ switch (event) { - - case NETDEV_REGISTER: - if (max_len) - extend_netdev_table(dev, max_len); - break; case NETDEV_UNREGISTER: old = rtnl_dereference(dev->priomap); RCU_INIT_POINTER(dev->priomap, NULL); -- cgit v1.1 From 2b73bc65e2771372c818db7955709c8caedbf8b9 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 10 Feb 2012 05:43:38 +0000 Subject: netprio_cgroup: fix wrong memory access when NETPRIO_CGROUP=m When the netprio_cgroup module is not loaded, net_prio_subsys_id is -1, and so sock_update_prioidx() accesses cgroup_subsys array with negative index subsys[-1]. Make the code resembles cls_cgroup code, which is bug free. Origionally-authored-by: Li Zefan Signed-off-by: Li Zefan Signed-off-by: Neil Horman CC: "David S. Miller" Signed-off-by: David S. Miller --- net/core/sock.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 3e81fd2..02f8dfe 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1171,13 +1171,10 @@ EXPORT_SYMBOL(sock_update_classid); void sock_update_netprioidx(struct sock *sk) { - struct cgroup_netprio_state *state; if (in_interrupt()) return; - rcu_read_lock(); - state = task_netprio_state(current); - sk->sk_cgrp_prioidx = state ? state->prioidx : 0; - rcu_read_unlock(); + + sk->sk_cgrp_prioidx = task_netprioidx(current); } EXPORT_SYMBOL_GPL(sock_update_netprioidx); #endif -- cgit v1.1 From 5dc7883f2a7c25f8df40d7479687153558cd531b Mon Sep 17 00:00:00 2001 From: Li Wei Date: Thu, 9 Feb 2012 21:15:25 +0000 Subject: ipv4: Fix wrong order of ip_rt_get_source() and update iph->daddr. This patch fix a bug which introduced by commit ac8a4810 (ipv4: Save nexthop address of LSRR/SSRR option to IPCB.).In that patch, we saved the nexthop of SRR in ip_option->nexthop and update iph->daddr until we get to ip_forward_options(), but we need to update it before ip_rt_get_source(), otherwise we may get a wrong src. Signed-off-by: Li Wei Signed-off-by: David S. Miller --- net/ipv4/ip_options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 1e60f76..42dd1a9 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -573,8 +573,8 @@ void ip_forward_options(struct sk_buff *skb) } if (srrptr + 3 <= srrspace) { opt->is_changed = 1; - ip_rt_get_source(&optptr[srrptr-1], skb, rt); ip_hdr(skb)->daddr = opt->nexthop; + ip_rt_get_source(&optptr[srrptr-1], skb, rt); optptr[2] = srrptr+4; } else if (net_ratelimit()) printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n"); -- cgit v1.1 From 70620c46ac2b45c24b0f22002fdf5ddd1f7daf81 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 10 Feb 2012 04:07:11 +0000 Subject: net: Don't proxy arp respond if iif == rt->dst.dev if private VLAN is disabled Commit 653241 (net: RFC3069, private VLAN proxy arp support) changed the behavior of arp proxy to send arp replies back out on the interface the request came in even if the private VLAN feature is disabled. Previously we checked rt->dst.dev != skb->dev for in scenarios, when proxy arp is enabled on for the netdevice and also when individual proxy neighbour entries have been added. This patch adds the check back for the pneigh_lookup() scenario. Signed-off-by: Thomas Graf Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/ipv4/arp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 59402be..63e4989 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -863,7 +863,8 @@ static int arp_process(struct sk_buff *skb) if (addr_type == RTN_UNICAST && (arp_fwd_proxy(in_dev, dev, rt) || arp_fwd_pvlan(in_dev, dev, rt, sip, tip) || - pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) { + (rt->dst.dev != dev && + pneigh_lookup(&arp_tbl, net, &tip, dev, 0)))) { n = neigh_event_ns(&arp_tbl, sha, &sip, dev); if (n) neigh_release(n); -- cgit v1.1