From 22e091e5253da1e9ad7c0a82c2c84446fc403efe Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Fri, 12 Nov 2010 08:51:55 +0100 Subject: netfilter: ipv6: fix overlap check for fragments The type of FRAG6_CB(prev)->offset is int, skb->len is *unsigned* int, and offset is int. Without this patch, type conversion occurred to this expression, when (FRAG6_CB(prev)->offset + prev->len) is less than offset. Signed-off-by: Shan Wei Signed-off-by: Patrick McHardy --- net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 3a3f129..79d43aa 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -286,7 +286,7 @@ found: /* Check for overlap with preceding fragment. */ if (prev && - (NFCT_FRAG6_CB(prev)->offset + prev->len) - offset > 0) + (NFCT_FRAG6_CB(prev)->offset + prev->len) > offset) goto discard_fq; /* Look for overlap with succeeding segment. */ -- cgit v1.1 From 2de795707294972f6c34bae9de713e502c431296 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 27 Oct 2010 18:16:49 +0000 Subject: ipv6: addrconf: don't remove address state on ifdown if the address is being kept Currently, addrconf_ifdown does not delete statically configured IPv6 addresses when the interface is brought down. The intent is that when the interface comes back up the address will be usable again. However, this doesn't actually work, because the system stops listening on the corresponding solicited-node multicast address, so the address cannot respond to neighbor solicitations and thus receive traffic. Also, the code notifies the rest of the system that the address is being deleted (e.g, RTM_DELADDR), even though it is not. Fix it so that none of this state is updated if the address is being kept on the interface. Tested: Added a statically configured IPv6 address to an interface, started ping, brought link down, brought link up again. When link came up ping kept on going and "ip -6 maddr" showed that the host was still subscribed to there Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e048ec6..b41ce0f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2740,10 +2740,6 @@ static int addrconf_ifdown(struct net_device *dev, int how) /* Flag it for later restoration when link comes up */ ifa->flags |= IFA_F_TENTATIVE; ifa->state = INET6_IFADDR_STATE_DAD; - - write_unlock_bh(&idev->lock); - - in6_ifa_hold(ifa); } else { list_del(&ifa->if_list); @@ -2758,19 +2754,15 @@ static int addrconf_ifdown(struct net_device *dev, int how) ifa->state = INET6_IFADDR_STATE_DEAD; spin_unlock_bh(&ifa->state_lock); - if (state == INET6_IFADDR_STATE_DEAD) - goto put_ifa; + if (state == INET6_IFADDR_STATE_DEAD) { + in6_ifa_put(ifa); + } else { + __ipv6_ifa_notify(RTM_DELADDR, ifa); + atomic_notifier_call_chain(&inet6addr_chain, + NETDEV_DOWN, ifa); + } + write_lock_bh(&idev->lock); } - - __ipv6_ifa_notify(RTM_DELADDR, ifa); - if (ifa->state == INET6_IFADDR_STATE_DEAD) - atomic_notifier_call_chain(&inet6addr_chain, - NETDEV_DOWN, ifa); - -put_ifa: - in6_ifa_put(ifa); - - write_lock_bh(&idev->lock); } list_splice(&keep_list, &idev->addr_list); -- cgit v1.1 From 403856532734317d25ec86ab1e75b8133db7acc6 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Mon, 8 Nov 2010 12:33:48 +0000 Subject: ipv6: Warn users if maximum number of routes is reached. This gives users at least some clue as to what the problem might be and how to go about fixing it. Signed-off-by: Ben Greear Signed-off-by: David S. Miller --- net/ipv6/route.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index fc32833..96455ffb 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1945,8 +1945,12 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); struct neighbour *neigh; - if (rt == NULL) + if (rt == NULL) { + if (net_ratelimit()) + pr_warning("IPv6: Maximum number of routes reached," + " consider increasing route/max_size.\n"); return ERR_PTR(-ENOMEM); + } dev_hold(net->loopback_dev); in6_dev_hold(idev); -- cgit v1.1 From 8a22c99a80b0926585cfcbcc423ee2c49c1fd820 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 14 Nov 2010 17:05:00 +0000 Subject: net/ipv6/mcast.c: Remove unnecessary semicolons Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/ipv6/mcast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index d1444b9..9c50745 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -257,7 +257,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net, return NULL; idev = __in6_dev_get(dev); if (!idev) - return NULL;; + return NULL; read_lock_bh(&idev->lock); if (idev->dead) { read_unlock_bh(&idev->lock); -- cgit v1.1 From 9d82ca98f71fd686ef2f3017c5e3e6a4871b6e46 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 15 Nov 2010 20:29:21 +0000 Subject: ipv6: fix missing in6_ifa_put in addrconf Fix ref count bug introduced by commit 2de795707294972f6c34bae9de713e502c431296 Author: Lorenzo Colitti Date: Wed Oct 27 18:16:49 2010 +0000 ipv6: addrconf: don't remove address state on ifdown if the address is being kept Fix logic so that addrconf_ifdown() decrements the inet6_ifaddr refcnt correctly with in6_ifa_put(). Reported-by: Stephen Hemminger Signed-off-by: John Fastabend Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b41ce0f..aaa3ca4 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2754,13 +2754,13 @@ static int addrconf_ifdown(struct net_device *dev, int how) ifa->state = INET6_IFADDR_STATE_DEAD; spin_unlock_bh(&ifa->state_lock); - if (state == INET6_IFADDR_STATE_DEAD) { - in6_ifa_put(ifa); - } else { + if (state != INET6_IFADDR_STATE_DEAD) { __ipv6_ifa_notify(RTM_DELADDR, ifa); atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa); } + + in6_ifa_put(ifa); write_lock_bh(&idev->lock); } } -- cgit v1.1 From c31504dc0d1dc853dcee509d9999169a9097a717 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2010 19:58:26 +0000 Subject: udp: use atomic_inc_not_zero_hint UDP sockets refcount is usually 2, unless an incoming frame is going to be queued in receive or backlog queue. Using atomic_inc_not_zero_hint() permits to reduce latency, because processor issues less memory transactions. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/udp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 91def93..b541a4e 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -227,7 +227,7 @@ begin: if (result) { exact_match: - if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) result = NULL; else if (unlikely(compute_score2(result, net, saddr, sport, daddr, hnum, dif) < badness)) { @@ -294,7 +294,7 @@ begin: goto begin; if (result) { - if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) result = NULL; else if (unlikely(compute_score(result, net, hnum, saddr, sport, daddr, dport, dif) < badness)) { -- cgit v1.1 From b382b191ea9e9ccefc437433d23befe91f4a8925 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 16 Nov 2010 04:33:57 +0000 Subject: ipv6: AF_INET6 link address family IPv6 already exposes some address family data via netlink in the IFLA_PROTINFO attribute if RTM_GETLINK request is sent with the address family set to AF_INET6. We take over this format and reuse all the code. Signed-off-by: Thomas Graf Cc: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 122 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 89 insertions(+), 33 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index aaa3ca4..470e7ac 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3831,6 +3831,15 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao; } +static inline size_t inet6_ifla6_size(void) +{ + return nla_total_size(4) /* IFLA_INET6_FLAGS */ + + nla_total_size(sizeof(struct ifla_cacheinfo)) + + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */ + + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */ + + nla_total_size(ICMP6_MIB_MAX * 8); /* IFLA_INET6_ICMP6STATS */ +} + static inline size_t inet6_if_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ifinfomsg)) @@ -3838,13 +3847,7 @@ static inline size_t inet6_if_nlmsg_size(void) + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ + nla_total_size(4) /* IFLA_MTU */ + nla_total_size(4) /* IFLA_LINK */ - + nla_total_size( /* IFLA_PROTINFO */ - nla_total_size(4) /* IFLA_INET6_FLAGS */ - + nla_total_size(sizeof(struct ifla_cacheinfo)) - + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */ - + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */ - + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */ - ); + + nla_total_size(inet6_ifla6_size()); /* IFLA_PROTINFO */ } static inline void __snmp6_fill_stats(u64 *stats, void __percpu **mib, @@ -3891,15 +3894,76 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, } } +static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev) +{ + struct nlattr *nla; + struct ifla_cacheinfo ci; + + NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags); + + ci.max_reasm_len = IPV6_MAXPLEN; + ci.tstamp = (__u32)(TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) / HZ * 100 + + TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); + ci.reachable_time = idev->nd_parms->reachable_time; + ci.retrans_time = idev->nd_parms->retrans_time; + NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci); + + nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32)); + if (nla == NULL) + goto nla_put_failure; + ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla)); + + /* XXX - MC not implemented */ + + nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64)); + if (nla == NULL) + goto nla_put_failure; + snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla)); + + nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64)); + if (nla == NULL) + goto nla_put_failure; + snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla)); + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static size_t inet6_get_link_af_size(const struct net_device *dev) +{ + if (!__in6_dev_get(dev)) + return 0; + + return inet6_ifla6_size(); +} + +static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev) +{ + struct inet6_dev *idev = __in6_dev_get(dev); + + if (!idev) + return -ENODATA; + + if (inet6_fill_ifla6_attrs(skb, idev) < 0) + return -EMSGSIZE; + + return 0; +} + +static int inet6_parse_link_af(struct net_device *dev, const struct nlattr *nla) +{ + return -EOPNOTSUPP; +} + static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, u32 pid, u32 seq, int event, unsigned int flags) { struct net_device *dev = idev->dev; - struct nlattr *nla; struct ifinfomsg *hdr; struct nlmsghdr *nlh; void *protoinfo; - struct ifla_cacheinfo ci; nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags); if (nlh == NULL) @@ -3926,31 +3990,8 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, if (protoinfo == NULL) goto nla_put_failure; - NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags); - - ci.max_reasm_len = IPV6_MAXPLEN; - ci.tstamp = (__u32)(TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) / HZ * 100 - + TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); - ci.reachable_time = idev->nd_parms->reachable_time; - ci.retrans_time = idev->nd_parms->retrans_time; - NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci); - - nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32)); - if (nla == NULL) - goto nla_put_failure; - ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla)); - - /* XXX - MC not implemented */ - - nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64)); - if (nla == NULL) - goto nla_put_failure; - snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla)); - - nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64)); - if (nla == NULL) + if (inet6_fill_ifla6_attrs(skb, idev) < 0) goto nla_put_failure; - snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla)); nla_nest_end(skb, protoinfo); return nlmsg_end(skb, nlh); @@ -4621,6 +4662,13 @@ int unregister_inet6addr_notifier(struct notifier_block *nb) } EXPORT_SYMBOL(unregister_inet6addr_notifier); +static struct rtnl_af_ops inet6_ops = { + .family = AF_INET6, + .fill_link_af = inet6_fill_link_af, + .get_link_af_size = inet6_get_link_af_size, + .parse_link_af = inet6_parse_link_af, +}; + /* * Init / cleanup code */ @@ -4672,6 +4720,10 @@ int __init addrconf_init(void) addrconf_verify(0); + err = rtnl_af_register(&inet6_ops); + if (err < 0) + goto errout_af; + err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo); if (err < 0) goto errout; @@ -4687,6 +4739,8 @@ int __init addrconf_init(void) return 0; errout: + rtnl_af_unregister(&inet6_ops); +errout_af: unregister_netdevice_notifier(&ipv6_dev_notf); errlo: unregister_pernet_subsys(&addrconf_ops); @@ -4707,6 +4761,8 @@ void addrconf_cleanup(void) rtnl_lock(); + __rtnl_af_unregister(&inet6_ops); + /* clean dev list */ for_each_netdev(&init_net, dev) { if (__in6_dev_get(dev) == NULL) -- cgit v1.1 From 5811662b15db018c740c57d037523683fd3e6123 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Fri, 12 Nov 2010 18:43:55 +0000 Subject: net: use the macros defined for the members of flowi Use the macros defined for the members of flowi to clean the code up. Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 4 +--- net/ipv6/netfilter.c | 6 ++---- net/ipv6/route.c | 24 ++++++------------------ net/ipv6/sit.c | 14 ++++++-------- 4 files changed, 15 insertions(+), 33 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 6f32ffc..9fab274 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1843,9 +1843,7 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt, fl = (struct flowi) { .oif = vif->link, - .nl_u = { .ip6_u = - { .daddr = ipv6h->daddr, } - } + .fl6_dst = ipv6h->daddr, }; dst = ip6_route_output(net, NULL, &fl); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 7155b24..35915e8 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -18,10 +18,8 @@ int ip6_route_me_harder(struct sk_buff *skb) struct flowi fl = { .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, .mark = skb->mark, - .nl_u = - { .ip6_u = - { .daddr = iph->daddr, - .saddr = iph->saddr, } }, + .fl6_dst = iph->daddr, + .fl6_src = iph->saddr, }; dst = ip6_route_output(net, skb->sk, &fl); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 96455ffb..c346ccf 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -558,11 +558,7 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, { struct flowi fl = { .oif = oif, - .nl_u = { - .ip6_u = { - .daddr = *daddr, - }, - }, + .fl6_dst = *daddr, }; struct dst_entry *dst; int flags = strict ? RT6_LOOKUP_F_IFACE : 0; @@ -778,13 +774,9 @@ void ip6_route_input(struct sk_buff *skb) int flags = RT6_LOOKUP_F_HAS_SADDR; struct flowi fl = { .iif = skb->dev->ifindex, - .nl_u = { - .ip6_u = { - .daddr = iph->daddr, - .saddr = iph->saddr, - .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, - }, - }, + .fl6_dst = iph->daddr, + .fl6_src = iph->saddr, + .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, .mark = skb->mark, .proto = iph->nexthdr, }; @@ -1463,12 +1455,8 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, struct ip6rd_flowi rdfl = { .fl = { .oif = dev->ifindex, - .nl_u = { - .ip6_u = { - .daddr = *dest, - .saddr = *src, - }, - }, + .fl6_dst = *dest, + .fl6_src = *src, }, }; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index d6bfaec..6e48a80 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -730,10 +730,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, } { - struct flowi fl = { .nl_u = { .ip4_u = - { .daddr = dst, - .saddr = tiph->saddr, - .tos = RT_TOS(tos) } }, + struct flowi fl = { .fl4_dst = dst, + .fl4_src = tiph->saddr, + .fl4_tos = RT_TOS(tos), .oif = tunnel->parms.link, .proto = IPPROTO_IPV6 }; if (ip_route_output_key(dev_net(dev), &rt, &fl)) { @@ -855,10 +854,9 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) iph = &tunnel->parms.iph; if (iph->daddr) { - struct flowi fl = { .nl_u = { .ip4_u = - { .daddr = iph->daddr, - .saddr = iph->saddr, - .tos = RT_TOS(iph->tos) } }, + struct flowi fl = { .fl4_dst = iph->daddr, + .fl4_src = iph->saddr, + .fl4_tos = RT_TOS(iph->tos), .oif = tunnel->parms.link, .proto = IPPROTO_IPV6 }; struct rtable *rt; -- cgit v1.1 From 93908d192686d8285dd6441ff855df92a40103d2 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Wed, 17 Nov 2010 01:44:24 +0000 Subject: ipv6: Expose IFLA_PROTINFO timer values in msecs instead of jiffies IFLA_PROTINFO exposes timer related per device settings in jiffies. Change it to expose these values in msecs like the sysctl interface does. I did not find any users of IFLA_PROTINFO which rely on any of these values and even if there are, they are likely already broken because there is no way for them to reliably convert such a value to another time format. Signed-off-by: Thomas Graf Cc: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b41ce0f..52b7df7 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3798,8 +3798,10 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_AUTOCONF] = cnf->autoconf; array[DEVCONF_DAD_TRANSMITS] = cnf->dad_transmits; array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits; - array[DEVCONF_RTR_SOLICIT_INTERVAL] = cnf->rtr_solicit_interval; - array[DEVCONF_RTR_SOLICIT_DELAY] = cnf->rtr_solicit_delay; + array[DEVCONF_RTR_SOLICIT_INTERVAL] = + jiffies_to_msecs(cnf->rtr_solicit_interval); + array[DEVCONF_RTR_SOLICIT_DELAY] = + jiffies_to_msecs(cnf->rtr_solicit_delay); array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version; #ifdef CONFIG_IPV6_PRIVACY array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr; @@ -3813,7 +3815,8 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo; #ifdef CONFIG_IPV6_ROUTER_PREF array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref; - array[DEVCONF_RTR_PROBE_INTERVAL] = cnf->rtr_probe_interval; + array[DEVCONF_RTR_PROBE_INTERVAL] = + jiffies_to_msecs(cnf->rtr_probe_interval); #ifdef CONFIG_IPV6_ROUTE_INFO array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen; #endif -- cgit v1.1 From 18a31e1e282f9ed563b131526a88162ccbe04ee3 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Wed, 17 Nov 2010 04:12:02 +0000 Subject: ipv6: Expose reachable and retrans timer values as msecs Expose reachable and retrans timer values in msecs instead of jiffies. Both timer values are already exposed as msecs in the neighbour table netlink interface. The creation timestamp format with increased precision is kept but cleaned up. Signed-off-by: Thomas Graf Cc: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 52b7df7..2fc35b3 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -98,7 +98,11 @@ #endif #define INFINITY_LIFE_TIME 0xFFFFFFFF -#define TIME_DELTA(a, b) ((unsigned long)((long)(a) - (long)(b))) + +static inline u32 cstamp_delta(unsigned long cstamp) +{ + return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; +} #define ADDRCONF_TIMER_FUZZ_MINUS (HZ > 50 ? HZ/50 : 1) #define ADDRCONF_TIMER_FUZZ (HZ / 4) @@ -3444,10 +3448,8 @@ static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp, { struct ifa_cacheinfo ci; - ci.cstamp = (u32)(TIME_DELTA(cstamp, INITIAL_JIFFIES) / HZ * 100 - + TIME_DELTA(cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); - ci.tstamp = (u32)(TIME_DELTA(tstamp, INITIAL_JIFFIES) / HZ * 100 - + TIME_DELTA(tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); + ci.cstamp = cstamp_delta(cstamp); + ci.tstamp = cstamp_delta(tstamp); ci.ifa_prefered = preferred; ci.ifa_valid = valid; @@ -3932,10 +3934,9 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags); ci.max_reasm_len = IPV6_MAXPLEN; - ci.tstamp = (__u32)(TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) / HZ * 100 - + TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); - ci.reachable_time = idev->nd_parms->reachable_time; - ci.retrans_time = idev->nd_parms->retrans_time; + ci.tstamp = cstamp_delta(idev->tstamp); + ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time); + ci.retrans_time = jiffies_to_msecs(idev->nd_parms->retrans_time); NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci); nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32)); -- cgit v1.1 From 88b2a9a3d98a19496d64aadda7158c0ad51cbe7d Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 15 Nov 2010 20:29:21 +0000 Subject: ipv6: fix missing in6_ifa_put in addrconf Fix ref count bug introduced by commit 2de795707294972f6c34bae9de713e502c431296 Author: Lorenzo Colitti Date: Wed Oct 27 18:16:49 2010 +0000 ipv6: addrconf: don't remove address state on ifdown if the address is being kept Fix logic so that addrconf_ifdown() decrements the inet6_ifaddr refcnt correctly with in6_ifa_put(). Reported-by: Stephen Hemminger Signed-off-by: John Fastabend Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2fc35b3..23cc8e1 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2758,13 +2758,13 @@ static int addrconf_ifdown(struct net_device *dev, int how) ifa->state = INET6_IFADDR_STATE_DEAD; spin_unlock_bh(&ifa->state_lock); - if (state == INET6_IFADDR_STATE_DEAD) { - in6_ifa_put(ifa); - } else { + if (state != INET6_IFADDR_STATE_DEAD) { __ipv6_ifa_notify(RTM_DELADDR, ifa); atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa); } + + in6_ifa_put(ifa); write_lock_bh(&idev->lock); } } -- cgit v1.1 From 4de58dfebe6882dc1e8e8dc5ec062e28e99623cd Mon Sep 17 00:00:00 2001 From: Tracey Dent Date: Sun, 21 Nov 2010 15:03:19 +0000 Subject: Net: ipv6: netfiliter: Makefile: Remove deprecated kbuild goal definitions Changed Makefile to use -y instead of -objs because -objs is deprecated and not mentioned in Documentation/kbuild/makefiles.txt. Signed-off-by: Tracey Dent Signed-off-by: David S. Miller --- net/ipv6/netfilter/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 0a432c9..abfee91 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -11,13 +11,13 @@ obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o # objects for l3 independent conntrack -nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o +nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o # l3 independent conntrack obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o # defrag -nf_defrag_ipv6-objs := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o +nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o # matches -- cgit v1.1 From 456b61bca8ee324ab6c18b065e632c9a8c88aa39 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Nov 2010 13:12:15 +0000 Subject: ipv6: mcast: RCU conversion ipv6_sk_mc_lock rwlock becomes a spinlock. readers (inet6_mc_check()) now takes rcu_read_lock() instead of read lock. Writers dont need to disable BH anymore. struct ipv6_mc_socklist objects are reclaimed after one RCU grace period. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/mcast.c | 75 +++++++++++++++++++++++++++++++++----------------------- 1 file changed, 44 insertions(+), 31 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 9c50745..49f986d 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -82,7 +82,7 @@ static void *__mld2_query_bugs[] __attribute__((__unused__)) = { static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT; /* Big mc list lock for all the sockets */ -static DEFINE_RWLOCK(ipv6_sk_mc_lock); +static DEFINE_SPINLOCK(ipv6_sk_mc_lock); static void igmp6_join_group(struct ifmcaddr6 *ma); static void igmp6_leave_group(struct ifmcaddr6 *ma); @@ -123,6 +123,11 @@ int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF; * socket join on multicast group */ +#define for_each_pmc_rcu(np, pmc) \ + for (pmc = rcu_dereference(np->ipv6_mc_list); \ + pmc != NULL; \ + pmc = rcu_dereference(pmc->next)) + int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) { struct net_device *dev = NULL; @@ -134,15 +139,15 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) if (!ipv6_addr_is_multicast(addr)) return -EINVAL; - read_lock_bh(&ipv6_sk_mc_lock); - for (mc_lst=np->ipv6_mc_list; mc_lst; mc_lst=mc_lst->next) { + rcu_read_lock(); + for_each_pmc_rcu(np, mc_lst) { if ((ifindex == 0 || mc_lst->ifindex == ifindex) && ipv6_addr_equal(&mc_lst->addr, addr)) { - read_unlock_bh(&ipv6_sk_mc_lock); + rcu_read_unlock(); return -EADDRINUSE; } } - read_unlock_bh(&ipv6_sk_mc_lock); + rcu_read_unlock(); mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL); @@ -186,33 +191,41 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) return err; } - write_lock_bh(&ipv6_sk_mc_lock); + spin_lock(&ipv6_sk_mc_lock); mc_lst->next = np->ipv6_mc_list; - np->ipv6_mc_list = mc_lst; - write_unlock_bh(&ipv6_sk_mc_lock); + rcu_assign_pointer(np->ipv6_mc_list, mc_lst); + spin_unlock(&ipv6_sk_mc_lock); rcu_read_unlock(); return 0; } +static void ipv6_mc_socklist_reclaim(struct rcu_head *head) +{ + kfree(container_of(head, struct ipv6_mc_socklist, rcu)); +} /* * socket leave on multicast group */ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) { struct ipv6_pinfo *np = inet6_sk(sk); - struct ipv6_mc_socklist *mc_lst, **lnk; + struct ipv6_mc_socklist *mc_lst; + struct ipv6_mc_socklist __rcu **lnk; struct net *net = sock_net(sk); - write_lock_bh(&ipv6_sk_mc_lock); - for (lnk = &np->ipv6_mc_list; (mc_lst = *lnk) !=NULL ; lnk = &mc_lst->next) { + spin_lock(&ipv6_sk_mc_lock); + for (lnk = &np->ipv6_mc_list; + (mc_lst = rcu_dereference_protected(*lnk, + lockdep_is_held(&ipv6_sk_mc_lock))) !=NULL ; + lnk = &mc_lst->next) { if ((ifindex == 0 || mc_lst->ifindex == ifindex) && ipv6_addr_equal(&mc_lst->addr, addr)) { struct net_device *dev; *lnk = mc_lst->next; - write_unlock_bh(&ipv6_sk_mc_lock); + spin_unlock(&ipv6_sk_mc_lock); rcu_read_lock(); dev = dev_get_by_index_rcu(net, mc_lst->ifindex); @@ -225,11 +238,12 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) } else (void) ip6_mc_leave_src(sk, mc_lst, NULL); rcu_read_unlock(); - sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); + atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); + call_rcu(&mc_lst->rcu, ipv6_mc_socklist_reclaim); return 0; } } - write_unlock_bh(&ipv6_sk_mc_lock); + spin_unlock(&ipv6_sk_mc_lock); return -EADDRNOTAVAIL; } @@ -272,12 +286,13 @@ void ipv6_sock_mc_close(struct sock *sk) struct ipv6_mc_socklist *mc_lst; struct net *net = sock_net(sk); - write_lock_bh(&ipv6_sk_mc_lock); - while ((mc_lst = np->ipv6_mc_list) != NULL) { + spin_lock(&ipv6_sk_mc_lock); + while ((mc_lst = rcu_dereference_protected(np->ipv6_mc_list, + lockdep_is_held(&ipv6_sk_mc_lock))) != NULL) { struct net_device *dev; np->ipv6_mc_list = mc_lst->next; - write_unlock_bh(&ipv6_sk_mc_lock); + spin_unlock(&ipv6_sk_mc_lock); rcu_read_lock(); dev = dev_get_by_index_rcu(net, mc_lst->ifindex); @@ -290,11 +305,13 @@ void ipv6_sock_mc_close(struct sock *sk) } else (void) ip6_mc_leave_src(sk, mc_lst, NULL); rcu_read_unlock(); - sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); - write_lock_bh(&ipv6_sk_mc_lock); + atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); + call_rcu(&mc_lst->rcu, ipv6_mc_socklist_reclaim); + + spin_lock(&ipv6_sk_mc_lock); } - write_unlock_bh(&ipv6_sk_mc_lock); + spin_unlock(&ipv6_sk_mc_lock); } int ip6_mc_source(int add, int omode, struct sock *sk, @@ -328,8 +345,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, err = -EADDRNOTAVAIL; - read_lock(&ipv6_sk_mc_lock); - for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rcu(inet6, pmc) { if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface) continue; if (ipv6_addr_equal(&pmc->addr, group)) @@ -428,7 +444,6 @@ int ip6_mc_source(int add, int omode, struct sock *sk, done: if (pmclocked) write_unlock(&pmc->sflock); - read_unlock(&ipv6_sk_mc_lock); read_unlock_bh(&idev->lock); rcu_read_unlock(); if (leavegroup) @@ -466,14 +481,13 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) dev = idev->dev; err = 0; - read_lock(&ipv6_sk_mc_lock); if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) { leavegroup = 1; goto done; } - for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rcu(inet6, pmc) { if (pmc->ifindex != gsf->gf_interface) continue; if (ipv6_addr_equal(&pmc->addr, group)) @@ -521,7 +535,6 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) write_unlock(&pmc->sflock); err = 0; done: - read_unlock(&ipv6_sk_mc_lock); read_unlock_bh(&idev->lock); rcu_read_unlock(); if (leavegroup) @@ -562,7 +575,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, * so reading the list is safe. */ - for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rcu(inet6, pmc) { if (pmc->ifindex != gsf->gf_interface) continue; if (ipv6_addr_equal(group, &pmc->addr)) @@ -612,13 +625,13 @@ int inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, struct ip6_sf_socklist *psl; int rv = 1; - read_lock(&ipv6_sk_mc_lock); - for (mc = np->ipv6_mc_list; mc; mc = mc->next) { + rcu_read_lock(); + for_each_pmc_rcu(np, mc) { if (ipv6_addr_equal(&mc->addr, mc_addr)) break; } if (!mc) { - read_unlock(&ipv6_sk_mc_lock); + rcu_read_unlock(); return 1; } read_lock(&mc->sflock); @@ -638,7 +651,7 @@ int inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, rv = 0; } read_unlock(&mc->sflock); - read_unlock(&ipv6_sk_mc_lock); + rcu_read_unlock(); return rv; } -- cgit v1.1 From cf7afbfeb8ceb0187348d0a1a0db61305e25f05f Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 22 Nov 2010 01:31:54 +0000 Subject: rtnl: make link af-specific updates atomic As David pointed out correctly, updates to af-specific attributes are currently not atomic. If multiple changes are requested and one of them fails, previous updates may have been applied already leaving the link behind in a undefined state. This patch splits the function parse_link_af() into two functions validate_link_af() and set_link_at(). validate_link_af() is placed to validate_linkmsg() check for errors as early as possible before any changes to the link have been made. set_link_af() is called to commit the changes later. This method is not fail proof, while it is currently sufficient to make set_link_af() inerrable and thus 100% atomic, the validation function method will not be able to detect all error scenarios in the future, there will likely always be errors depending on states which are f.e. not protected by rtnl_mutex and thus may change between validation and setting. Also, instead of silently ignoring unknown address families and config blocks for address families which did not register a set function the errors EAFNOSUPPORT respectively EOPNOSUPPORT are returned to avoid comitting 4 out of 5 update requests without notifying the user. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4cf7605..1023ad0 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3956,11 +3956,6 @@ static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev) return 0; } -static int inet6_parse_link_af(struct net_device *dev, const struct nlattr *nla) -{ - return -EOPNOTSUPP; -} - static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, u32 pid, u32 seq, int event, unsigned int flags) { @@ -4670,7 +4665,6 @@ static struct rtnl_af_ops inet6_ops = { .family = AF_INET6, .fill_link_af = inet6_fill_link_af, .get_link_af_size = inet6_get_link_af_size, - .parse_link_af = inet6_parse_link_af, }; /* -- cgit v1.1 From d3c15cab213becc49a6f2ad7f48a59513a5f17dd Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Wed, 24 Nov 2010 21:47:56 +0000 Subject: ipv6: kill two unused macro definition 1. IPV6_TLV_TEL_DST_SIZE This has not been using for several years since created. 2. RT6_INFO_LEN commit 33120b30 kill all RT6_INFO_LEN's references, but only this definition remained. commit 33120b30cc3b8665204d4fcde7288638b0dd04d5 Author: Alexey Dobriyan Date: Tue Nov 6 05:27:11 2007 -0800 [IPV6]: Convert /proc/net/ipv6_route to seq_file interface Signed-off-by: Shan Wei Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 2 -- net/ipv6/route.c | 2 -- 2 files changed, 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 2a59610..b115555 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -58,8 +58,6 @@ MODULE_AUTHOR("Ville Nuorvala"); MODULE_DESCRIPTION("IPv6 tunneling device"); MODULE_LICENSE("GPL"); -#define IPV6_TLV_TEL_DST_SIZE 8 - #ifdef IP6_TNL_DEBUG #define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__) #else diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c346ccf..a0c4ad1 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2453,8 +2453,6 @@ static int ip6_route_dev_notify(struct notifier_block *this, #ifdef CONFIG_PROC_FS -#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1) - struct rt6_proc_arg { char *buffer; -- cgit v1.1 From 82a39eb6b3829a02e235656feddb4542517fcabc Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Thu, 25 Nov 2010 03:15:07 +0000 Subject: ipv6: Prepare the tree for un-inlined jhash. jhash is widely used in the kernel and because the functions are inlined, the cost in size is significant. Also, the new jhash functions are slightly larger than the previous ones so better un-inline. As a preparation step, the calls to the internal macros are replaced with the plain jhash function calls. Signed-off-by: Jozsef Kadlecsik Signed-off-by: David S. Miller --- net/ipv6/inet6_connection_sock.c | 22 ++++++++++------------ net/ipv6/reassembly.c | 36 ++++++++++++++++-------------------- 2 files changed, 26 insertions(+), 32 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 8a16280..861d252 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -60,18 +60,16 @@ EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict); static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport, const u32 rnd, const u16 synq_hsize) { - u32 a = (__force u32)raddr->s6_addr32[0]; - u32 b = (__force u32)raddr->s6_addr32[1]; - u32 c = (__force u32)raddr->s6_addr32[2]; - - a += JHASH_GOLDEN_RATIO; - b += JHASH_GOLDEN_RATIO; - c += rnd; - __jhash_mix(a, b, c); - - a += (__force u32)raddr->s6_addr32[3]; - b += (__force u32)rport; - __jhash_mix(a, b, c); + u32 c; + + c = jhash_3words((__force u32)raddr->s6_addr32[0], + (__force u32)raddr->s6_addr32[1], + (__force u32)raddr->s6_addr32[2], + rnd); + + c = jhash_2words((__force u32)raddr->s6_addr32[3], + (__force u32)rport, + c); return c & (synq_hsize - 1); } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 0f27664..07beeb0 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -104,26 +104,22 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr, const struct in6_addr *daddr, u32 rnd) { - u32 a, b, c; - - a = (__force u32)saddr->s6_addr32[0]; - b = (__force u32)saddr->s6_addr32[1]; - c = (__force u32)saddr->s6_addr32[2]; - - a += JHASH_GOLDEN_RATIO; - b += JHASH_GOLDEN_RATIO; - c += rnd; - __jhash_mix(a, b, c); - - a += (__force u32)saddr->s6_addr32[3]; - b += (__force u32)daddr->s6_addr32[0]; - c += (__force u32)daddr->s6_addr32[1]; - __jhash_mix(a, b, c); - - a += (__force u32)daddr->s6_addr32[2]; - b += (__force u32)daddr->s6_addr32[3]; - c += (__force u32)id; - __jhash_mix(a, b, c); + u32 c; + + c = jhash_3words((__force u32)saddr->s6_addr32[0], + (__force u32)saddr->s6_addr32[1], + (__force u32)saddr->s6_addr32[2], + rnd); + + c = jhash_3words((__force u32)saddr->s6_addr32[3], + (__force u32)daddr->s6_addr32[0], + (__force u32)daddr->s6_addr32[1], + c); + + c = jhash_3words((__force u32)daddr->s6_addr32[2], + (__force u32)daddr->s6_addr32[3], + (__force u32)id, + c); return c & (INETFRAGS_HASHSZ - 1); } -- cgit v1.1 From b3419363808f2481b24a817f491878e1795db4c7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 30 Nov 2010 12:27:11 -0800 Subject: ipv6: Add infrastructure to bind inet_peer objects to routes. They are only allowed on cached ipv6 routes. Signed-off-by: David S. Miller --- net/ipv6/route.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a0c4ad1..026caef 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -188,11 +188,29 @@ static void ip6_dst_destroy(struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; + struct inet_peer *peer = rt->rt6i_peer; if (idev != NULL) { rt->rt6i_idev = NULL; in6_dev_put(idev); } + if (peer) { + BUG_ON(!(rt->rt6i_flags & RTF_CACHE)); + rt->rt6i_peer = NULL; + inet_putpeer(peer); + } +} + +void rt6_bind_peer(struct rt6_info *rt, int create) +{ + struct inet_peer *peer; + + if (WARN_ON(!(rt->rt6i_flags & RTF_CACHE))) + return; + + peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create); + if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL) + inet_putpeer(peer); } static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, -- cgit v1.1 From 3f419d2d487821093ee46e898b5f8747f9edc9cd Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 29 Nov 2010 13:37:14 -0800 Subject: inet: Turn ->remember_stamp into ->get_peer in connection AF ops. Then we can make a completely generic tcp_remember_stamp() that uses ->get_peer() as a helper, minimizing the AF specific code and minimizing the eventual code duplication when we implement the ipv6 side of TW recycling. Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 7e41e2c..e394d00 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1818,10 +1818,10 @@ do_time_wait: goto discard_it; } -static int tcp_v6_remember_stamp(struct sock *sk) +struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it) { /* Alas, not yet... */ - return 0; + return NULL; } static const struct inet_connection_sock_af_ops ipv6_specific = { @@ -1830,7 +1830,7 @@ static const struct inet_connection_sock_af_ops ipv6_specific = { .rebuild_header = inet6_sk_rebuild_header, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, - .remember_stamp = tcp_v6_remember_stamp, + .get_peer = tcp_v6_get_peer, .net_header_len = sizeof(struct ipv6hdr), .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, @@ -1862,7 +1862,7 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = { .rebuild_header = inet_sk_rebuild_header, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, - .remember_stamp = tcp_v4_remember_stamp, + .get_peer = tcp_v4_get_peer, .net_header_len = sizeof(struct iphdr), .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, -- cgit v1.1 From 381601e5bbae78d7c18d946fe874a63957edea13 Mon Sep 17 00:00:00 2001 From: Anders Franzen Date: Wed, 24 Nov 2010 05:47:18 +0000 Subject: Make the ip6_tunnel reflect the true mtu. The ip6_tunnel always assumes it consumes 40 bytes (ip6 hdr) of the mtu of the underlaying device. So for a normal ethernet bearer, the mtu of the ip6_tunnel is 1460. However, when creating a tunnel the encap limit option is enabled by default, and it consumes 8 bytes more, so the true mtu shall be 1452. I dont really know if this breaks some statement in some RFC, so this is a request for comments. Signed-off-by: Anders Franzen Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 2a59610..70e891a 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1175,6 +1175,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) sizeof (struct ipv6hdr); dev->mtu = rt->rt6i_dev->mtu - sizeof (struct ipv6hdr); + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + dev->mtu-=8; if (dev->mtu < IPV6_MIN_MTU) dev->mtu = IPV6_MIN_MTU; @@ -1363,12 +1365,17 @@ static const struct net_device_ops ip6_tnl_netdev_ops = { static void ip6_tnl_dev_setup(struct net_device *dev) { + struct ip6_tnl *t; + dev->netdev_ops = &ip6_tnl_netdev_ops; dev->destructor = ip6_dev_free; dev->type = ARPHRD_TUNNEL6; dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr); dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr); + t = netdev_priv(dev); + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + dev->mtu-=8; dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); dev->features |= NETIF_F_NETNS_LOCAL; -- cgit v1.1 From 6dcdd1b3694a4fa2b85167a9c860c7613a7553c7 Mon Sep 17 00:00:00 2001 From: David McCullough Date: Mon, 29 Nov 2010 19:32:34 +0000 Subject: net/ipv6/sit.c: return unhandled skb to tunnel4_rcv I found a problem using an IPv6 over IPv4 tunnel. When CONFIG_IPV6_SIT was enabled, the packets would be rejected as net/ipv6/sit.c was catching all IPPROTO_IPV6 packets and returning an ICMP port unreachable error. I think this patch fixes the problem cleanly. I believe the code in net/ipv4/tunnel4.c:tunnel4_rcv takes care of it properly if none of the handlers claim the skb. Signed-off-by: David McCullough Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/sit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index d6bfaec..8c4d00c 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -606,8 +606,9 @@ static int ipip6_rcv(struct sk_buff *skb) return 0; } - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); + /* no tunnel matched, let upstream know, ipsec may handle it */ rcu_read_unlock(); + return 1; out: kfree_skb(skb); return 0; -- cgit v1.1 From ccb7c410ddc054b8c1ae780319bc98ae092d3854 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 1 Dec 2010 18:09:13 -0800 Subject: timewait_sock: Create and use getpeer op. The only thing AF-specific about remembering the timestamp for a time-wait TCP socket is getting the peer. Abstract that behind a new timewait_sock_ops vector. Support for real IPV6 sockets is not filled in yet, but curiously this makes timewait recycling start to work for v4-mapped ipv6 sockets. Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e394d00..5f73a18 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -906,12 +906,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { }; #endif -static struct timewait_sock_ops tcp6_timewait_sock_ops = { - .twsk_obj_size = sizeof(struct tcp6_timewait_sock), - .twsk_unique = tcp_twsk_unique, - .twsk_destructor= tcp_twsk_destructor, -}; - static void __tcp_v6_send_check(struct sk_buff *skb, struct in6_addr *saddr, struct in6_addr *daddr) { @@ -1818,12 +1812,30 @@ do_time_wait: goto discard_it; } -struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it) +static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it) +{ + /* Alas, not yet... */ + return NULL; +} + +static void *tcp_v6_tw_get_peer(struct sock *sk) { + struct inet_timewait_sock *tw = inet_twsk(sk); + + if (tw->tw_family == AF_INET) + return tcp_v4_tw_get_peer(sk); + /* Alas, not yet... */ return NULL; } +static struct timewait_sock_ops tcp6_timewait_sock_ops = { + .twsk_obj_size = sizeof(struct tcp6_timewait_sock), + .twsk_unique = tcp_twsk_unique, + .twsk_destructor= tcp_twsk_destructor, + .twsk_getpeer = tcp_v6_tw_get_peer, +}; + static const struct inet_connection_sock_af_ops ipv6_specific = { .queue_xmit = inet6_csk_xmit, .send_check = tcp_v6_send_check, -- cgit v1.1 From ae4694b2d3e4c0f47c0e804a68417be57e5daf85 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 2 Dec 2010 10:59:22 -0800 Subject: ipv6: Create inet6_csk_route_req(). Brother of ipv4's inet_csk_route_req(). Signed-off-by: David S. Miller --- net/ipv6/inet6_connection_sock.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 861d252..e46305d 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -54,6 +54,38 @@ int inet6_csk_bind_conflict(const struct sock *sk, EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict); +struct dst_entry *inet6_csk_route_req(struct sock *sk, + const struct request_sock *req) +{ + struct inet6_request_sock *treq = inet6_rsk(req); + struct ipv6_pinfo *np = inet6_sk(sk); + struct in6_addr *final_p, final; + struct dst_entry *dst; + struct flowi fl; + + memset(&fl, 0, sizeof(fl)); + fl.proto = IPPROTO_TCP; + ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); + final_p = fl6_update_dst(&fl, np->opt, &final); + ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); + fl.oif = sk->sk_bound_dev_if; + fl.mark = sk->sk_mark; + fl.fl_ip_dport = inet_rsk(req)->rmt_port; + fl.fl_ip_sport = inet_rsk(req)->loc_port; + security_req_classify_flow(req, &fl); + + if (ip6_dst_lookup(sk, &dst, &fl)) + return NULL; + + if (final_p) + ipv6_addr_copy(&fl.fl6_dst, final_p); + + if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) + return NULL; + + return dst; +} + /* * request_sock (formerly open request) hash tables. */ -- cgit v1.1 From 493f377d6dd56f4e98b198d637fe714ab124681b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 2 Dec 2010 12:14:29 -0800 Subject: tcp: Add timewait recycling bits to ipv6 connect code. This will also improve handling of ipv6 tcp socket request backlog when syncookies are not enabled. When backlog becomes very deep, last quarter of backlog is limited to validated destinations. Previously only ipv4 implemented this logic, but now ipv6 does too. Now we are only one step away from enabling timewait recycling for ipv6, and that step is simply filling in the implementation of tcp_v6_get_peer() and tcp_v6_tw_get_peer(). Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 101 +++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 76 insertions(+), 25 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5f73a18..c2ebbe1 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -130,6 +130,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; + struct rt6_info *rt; struct flowi fl; struct dst_entry *dst; int addr_type; @@ -280,6 +281,26 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk->sk_gso_type = SKB_GSO_TCPV6; __ip6_dst_store(sk, dst, NULL, NULL); + rt = (struct rt6_info *) dst; + if (tcp_death_row.sysctl_tw_recycle && + !tp->rx_opt.ts_recent_stamp && + ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) { + struct inet_peer *peer = rt6_get_peer(rt); + /* + * VJ's idea. We save last timestamp seen from + * the destination in peer table, when entering state + * TIME-WAIT * and initialize rx_opt.ts_recent from it, + * when trying new connection. + */ + if (peer) { + inet_peer_refcheck(peer); + if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) { + tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; + tp->rx_opt.ts_recent = peer->tcp_ts; + } + } + } + icsk->icsk_ext_hdr_len = 0; if (np->opt) icsk->icsk_ext_hdr_len = (np->opt->opt_flen + @@ -1170,6 +1191,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); __u32 isn = TCP_SKB_CB(skb)->when; + struct dst_entry *dst = NULL; #ifdef CONFIG_SYN_COOKIES int want_cookie = 0; #else @@ -1267,6 +1289,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) TCP_ECN_create_request(req, tcp_hdr(skb)); if (!isn) { + struct inet_peer *peer = NULL; + if (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { @@ -1279,13 +1303,57 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (!sk->sk_bound_dev_if && ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) treq->iif = inet6_iif(skb); - if (!want_cookie) { - isn = tcp_v6_init_sequence(skb); - } else { + + if (want_cookie) { isn = cookie_v6_init_sequence(sk, skb, &req->mss); req->cookie_ts = tmp_opt.tstamp_ok; + goto have_isn; + } + + /* VJ's idea. We save last timestamp seen + * from the destination in peer table, when entering + * state TIME-WAIT, and check against it before + * accepting new connection request. + * + * If "isn" is not zero, this request hit alive + * timewait bucket, so that all the necessary checks + * are made in the function processing timewait state. + */ + if (tmp_opt.saw_tstamp && + tcp_death_row.sysctl_tw_recycle && + (dst = inet6_csk_route_req(sk, req)) != NULL && + (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL && + ipv6_addr_equal((struct in6_addr *)peer->daddr.a6, + &treq->rmt_addr)) { + inet_peer_refcheck(peer); + if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && + (s32)(peer->tcp_ts - req->ts_recent) > + TCP_PAWS_WINDOW) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); + goto drop_and_release; + } + } + /* Kill the following clause, if you dislike this way. */ + else if (!sysctl_tcp_syncookies && + (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < + (sysctl_max_syn_backlog >> 2)) && + (!peer || !peer->tcp_ts_stamp) && + (!dst || !dst_metric(dst, RTAX_RTT))) { + /* Without syncookies last quarter of + * backlog is filled with destinations, + * proven to be alive. + * It means that we continue to communicate + * to destinations, already remembered + * to the moment of synflood. + */ + LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n", + &treq->rmt_addr, ntohs(tcp_hdr(skb)->source)); + goto drop_and_release; } + + isn = tcp_v6_init_sequence(skb); } +have_isn: tcp_rsk(req)->snt_isn = isn; security_inet_conn_request(sk, skb, req); @@ -1298,6 +1366,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); return 0; +drop_and_release: + dst_release(dst); drop_and_free: reqsk_free(req); drop: @@ -1376,28 +1446,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (sk_acceptq_is_full(sk)) goto out_overflow; - if (dst == NULL) { - struct in6_addr *final_p, final; - struct flowi fl; - - memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_TCP; - ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); - final_p = fl6_update_dst(&fl, opt, &final); - ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; - fl.fl_ip_dport = inet_rsk(req)->rmt_port; - fl.fl_ip_sport = inet_rsk(req)->loc_port; - security_req_classify_flow(req, &fl); - - if (ip6_dst_lookup(sk, &dst, &fl)) - goto out; - - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) + if (!dst) { + dst = inet6_csk_route_req(sk, req); + if (!dst) goto out; } -- cgit v1.1 From db3949c4506a21633469d71f2915cf660eea0a35 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 2 Dec 2010 11:52:07 -0800 Subject: tcp: Implement ipv6 ->get_peer() and ->tw_get_peer(). Now ipv6 timewait recycling is fully implemented and enabled. Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c2ebbe1..3194585 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1865,19 +1865,33 @@ do_time_wait: static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it) { - /* Alas, not yet... */ - return NULL; + struct rt6_info *rt = (struct rt6_info *) __sk_dst_get(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + struct inet_peer *peer; + + if (!rt || + !ipv6_addr_equal(&np->daddr, &rt->rt6i_dst.addr)) { + peer = inet_getpeer_v6(&np->daddr, 1); + *release_it = true; + } else { + if (!rt->rt6i_peer) + rt6_bind_peer(rt, 1); + peer = rt->rt6i_peer; + *release_it = true; + } + + return peer; } static void *tcp_v6_tw_get_peer(struct sock *sk) { + struct inet6_timewait_sock *tw6 = inet6_twsk(sk); struct inet_timewait_sock *tw = inet_twsk(sk); if (tw->tw_family == AF_INET) return tcp_v4_tw_get_peer(sk); - /* Alas, not yet... */ - return NULL; + return inet_getpeer_v6(&tw6->tw_v6_daddr, 1); } static struct timewait_sock_ops tcp6_timewait_sock_ops = { -- cgit v1.1 From b672083ed36a49c323737b7c7e1d5264a7c193af Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Wed, 1 Dec 2010 18:05:12 +0000 Subject: ipv6: use ND_REACHABLE_TIME and ND_RETRANS_TIMER instead of magic number ND_REACHABLE_TIME and ND_RETRANS_TIMER have defined since v2.6.12-rc2, but never been used. So use them instead of magic number. This patch also changes original code style to read comfortably . Thank YOSHIFUJI Hideaki for pointing it out. Signed-off-by: Shan Wei Signed-off-by: David S. Miller --- net/ipv6/ndisc.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 998d6d2..e18f841 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -141,18 +141,18 @@ struct neigh_table nd_tbl = { .proxy_redo = pndisc_redo, .id = "ndisc_cache", .parms = { - .tbl = &nd_tbl, - .base_reachable_time = 30 * HZ, - .retrans_time = 1 * HZ, - .gc_staletime = 60 * HZ, - .reachable_time = 30 * HZ, - .delay_probe_time = 5 * HZ, - .queue_len = 3, - .ucast_probes = 3, - .mcast_probes = 3, - .anycast_delay = 1 * HZ, - .proxy_delay = (8 * HZ) / 10, - .proxy_qlen = 64, + .tbl = &nd_tbl, + .base_reachable_time = ND_REACHABLE_TIME, + .retrans_time = ND_RETRANS_TIMER, + .gc_staletime = 60 * HZ, + .reachable_time = ND_REACHABLE_TIME, + .delay_probe_time = 5 * HZ, + .queue_len = 3, + .ucast_probes = 3, + .mcast_probes = 3, + .anycast_delay = 1 * HZ, + .proxy_delay = (8 * HZ) / 10, + .proxy_qlen = 64, }, .gc_interval = 30 * HZ, .gc_thresh1 = 128, -- cgit v1.1 From defb3519a64141608725e2dac5a5aa9a3c644bae Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 8 Dec 2010 21:16:57 -0800 Subject: net: Abstract away all dst_entry metrics accesses. Use helper functions to hide all direct accesses, especially writes, to dst_entry metrics values. This will allow us to: 1) More easily change how the metrics are stored. 2) Implement COW for metrics. In particular this will help us put metrics into the inetpeer cache if that is what we end up doing. We can make the _metrics member a pointer instead of an array, initially have it point at the read-only metrics in the FIB, and then on the first set grab an inetpeer entry and point the _metrics member there. Signed-off-by: David S. Miller Acked-by: Eric Dumazet --- net/ipv6/ndisc.c | 5 +++-- net/ipv6/route.c | 66 +++++++++++++++++++++++++++++++------------------------- 2 files changed, 40 insertions(+), 31 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index e18f841..2342545 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1259,7 +1259,8 @@ static void ndisc_router_discovery(struct sk_buff *skb) if (ra_msg->icmph.icmp6_hop_limit) { in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; if (rt) - rt->dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit; + dst_metric_set(&rt->dst, RTAX_HOPLIMIT, + ra_msg->icmph.icmp6_hop_limit); } skip_defrtr: @@ -1377,7 +1378,7 @@ skip_linkparms: in6_dev->cnf.mtu6 = mtu; if (rt) - rt->dst.metrics[RTAX_MTU-1] = mtu; + dst_metric_set(&rt->dst, RTAX_MTU, mtu); rt6_mtu_change(skb->dev, mtu); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 026caef..4aed081 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -129,7 +129,6 @@ static struct rt6_info ip6_null_entry_template = { .__use = 1, .obsolete = -1, .error = -ENETUNREACH, - .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, .input = ip6_pkt_discard, .output = ip6_pkt_discard_out, }, @@ -150,7 +149,6 @@ static struct rt6_info ip6_prohibit_entry_template = { .__use = 1, .obsolete = -1, .error = -EACCES, - .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, .input = ip6_pkt_prohibit, .output = ip6_pkt_prohibit_out, }, @@ -166,7 +164,6 @@ static struct rt6_info ip6_blk_hole_entry_template = { .__use = 1, .obsolete = -1, .error = -EINVAL, - .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, .input = dst_discard, .output = dst_discard, }, @@ -844,7 +841,7 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl new->input = dst_discard; new->output = dst_discard; - memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32)); + dst_copy_metrics(new, &ort->dst); new->dev = ort->dst.dev; if (new->dev) dev_hold(new->dev); @@ -928,10 +925,12 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { rt6->rt6i_flags |= RTF_MODIFIED; if (mtu < IPV6_MIN_MTU) { + u32 features = dst_metric(dst, RTAX_FEATURES); mtu = IPV6_MIN_MTU; - dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; + features |= RTAX_FEATURE_ALLFRAG; + dst_metric_set(dst, RTAX_FEATURES, features); } - dst->metrics[RTAX_MTU-1] = mtu; + dst_metric_set(dst, RTAX_MTU, mtu); call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); } } @@ -989,9 +988,9 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, rt->rt6i_idev = idev; rt->rt6i_nexthop = neigh; atomic_set(&rt->dst.__refcnt, 1); - rt->dst.metrics[RTAX_HOPLIMIT-1] = 255; - rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); - rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst)); + dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255); + dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev)); + dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst))); rt->dst.output = ip6_output; #if 0 /* there's no chance to use these for ndisc */ @@ -1305,17 +1304,17 @@ install_route: goto out; } - rt->dst.metrics[type - 1] = nla_get_u32(nla); + dst_metric_set(&rt->dst, type, nla_get_u32(nla)); } } } if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0) - rt->dst.metrics[RTAX_HOPLIMIT-1] = -1; + dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1); if (!dst_mtu(&rt->dst)) - rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); + dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(dev)); if (!dst_metric(&rt->dst, RTAX_ADVMSS)) - rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst)); + dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst))); rt->dst.dev = dev; rt->rt6i_idev = idev; rt->rt6i_table = table; @@ -1541,9 +1540,9 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); nrt->rt6i_nexthop = neigh_clone(neigh); /* Reset pmtu, it may be better */ - nrt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); - nrt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev), - dst_mtu(&nrt->dst)); + dst_metric_set(&nrt->dst, RTAX_MTU, ipv6_get_mtu(neigh->dev)); + dst_metric_set(&nrt->dst, RTAX_ADVMSS, ipv6_advmss(dev_net(neigh->dev), + dst_mtu(&nrt->dst))); if (ip6_ins_rt(nrt)) goto out; @@ -1602,9 +1601,12 @@ static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr, would return automatically. */ if (rt->rt6i_flags & RTF_CACHE) { - rt->dst.metrics[RTAX_MTU-1] = pmtu; - if (allfrag) - rt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; + dst_metric_set(&rt->dst, RTAX_MTU, pmtu); + if (allfrag) { + u32 features = dst_metric(&rt->dst, RTAX_FEATURES); + features |= RTAX_FEATURE_ALLFRAG; + dst_metric_set(&rt->dst, RTAX_FEATURES, features); + } dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires); rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; goto out; @@ -1621,9 +1623,12 @@ static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr, nrt = rt6_alloc_clone(rt, daddr); if (nrt) { - nrt->dst.metrics[RTAX_MTU-1] = pmtu; - if (allfrag) - nrt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; + dst_metric_set(&nrt->dst, RTAX_MTU, pmtu); + if (allfrag) { + u32 features = dst_metric(&nrt->dst, RTAX_FEATURES); + features |= RTAX_FEATURE_ALLFRAG; + dst_metric_set(&nrt->dst, RTAX_FEATURES, features); + } /* According to RFC 1981, detecting PMTU increase shouldn't be * happened within 5 mins, the recommended timer is 10 mins. @@ -1674,7 +1679,7 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) rt->dst.input = ort->dst.input; rt->dst.output = ort->dst.output; - memcpy(rt->dst.metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32)); + dst_copy_metrics(&rt->dst, &ort->dst); rt->dst.error = ort->dst.error; rt->dst.dev = ort->dst.dev; if (rt->dst.dev) @@ -1966,9 +1971,9 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->dst.output = ip6_output; rt->rt6i_dev = net->loopback_dev; rt->rt6i_idev = idev; - rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); - rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst)); - rt->dst.metrics[RTAX_HOPLIMIT-1] = -1; + dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev)); + dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst))); + dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1); rt->dst.obsolete = -1; rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; @@ -2068,8 +2073,8 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) (dst_mtu(&rt->dst) >= arg->mtu || (dst_mtu(&rt->dst) < arg->mtu && dst_mtu(&rt->dst) == idev->cnf.mtu6))) { - rt->dst.metrics[RTAX_MTU-1] = arg->mtu; - rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu); + dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); + dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, arg->mtu)); } return 0; } @@ -2295,7 +2300,7 @@ static int rt6_fill_node(struct net *net, NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); } - if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0) + if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) goto nla_put_failure; if (rt->dst.neighbour) @@ -2686,6 +2691,7 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.ip6_null_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_null_entry; net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; + dst_metric_set(&net->ipv6.ip6_null_entry->dst, RTAX_HOPLIMIT, 255); #ifdef CONFIG_IPV6_MULTIPLE_TABLES net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, @@ -2696,6 +2702,7 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.ip6_prohibit_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_prohibit_entry; net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; + dst_metric_set(&net->ipv6.ip6_prohibit_entry->dst, RTAX_HOPLIMIT, 255); net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, sizeof(*net->ipv6.ip6_blk_hole_entry), @@ -2705,6 +2712,7 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.ip6_blk_hole_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; + dst_metric_set(&net->ipv6.ip6_blk_hole_entry->dst, RTAX_HOPLIMIT, 255); #endif net->ipv6.sysctl.flush_delay = 0; -- cgit v1.1 From 68835aba4d9b74e2f94106d13b6a4bddc447c4c8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 Nov 2010 19:04:07 +0000 Subject: net: optimize INET input path further Followup of commit b178bb3dfc30 (net: reorder struct sock fields) Optimize INET input path a bit further, by : 1) moving sk_refcnt close to sk_lock. This reduces number of dirtied cache lines by one on 64bit arches (and 64 bytes cache line size). 2) moving inet_daddr & inet_rcv_saddr at the beginning of sk (same cache line than hash / family / bound_dev_if / nulls_node) This reduces number of accessed cache lines in lookups by one, and dont increase size of inet and timewait socks. inet and tw sockets now share same place-holder for these fields. Before patch : offsetof(struct sock, sk_refcnt) = 0x10 offsetof(struct sock, sk_lock) = 0x40 offsetof(struct sock, sk_receive_queue) = 0x60 offsetof(struct inet_sock, inet_daddr) = 0x270 offsetof(struct inet_sock, inet_rcv_saddr) = 0x274 After patch : offsetof(struct sock, sk_refcnt) = 0x44 offsetof(struct sock, sk_lock) = 0x48 offsetof(struct sock, sk_receive_queue) = 0x68 offsetof(struct inet_sock, inet_daddr) = 0x0 offsetof(struct inet_sock, inet_rcv_saddr) = 0x4 compute_score() (udp or tcp) now use a single cache line per ignored item, instead of two. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/udp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b541a4e..7aad127 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -54,8 +54,8 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) { const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); - __be32 sk1_rcv_saddr = inet_sk(sk)->inet_rcv_saddr; - __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); + __be32 sk1_rcv_saddr = sk_rcv_saddr(sk); + __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); int sk_ipv6only = ipv6_only_sock(sk); int sk2_ipv6only = inet_v6_ipv6only(sk2); int addr_type = ipv6_addr_type(sk_rcv_saddr6); -- cgit v1.1 From 5f75a1042feca37c0a436ba42a4b1f7f75c35778 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 7 Dec 2010 23:38:31 +0000 Subject: ipv6: fix nl group when advertising a new link New idev are advertised with NL group RTNLGRP_IPV6_IFADDR, but should use RTNLGRP_IPV6_IFINFO. Bug was introduced by commit 8d7a76c9. Signed-off-by: Wang Xuefu Signed-off-by: Nicolas Dichtel Acked-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 23cc8e1..93b7a93 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4021,11 +4021,11 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev) kfree_skb(skb); goto errout; } - rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); + rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFINFO, NULL, GFP_ATOMIC); return; errout: if (err < 0) - rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err); + rtnl_set_sk_err(net, RTNLGRP_IPV6_IFINFO, err); } static inline size_t inet6_prefix_nlmsg_size(void) -- cgit v1.1 From 457de4383ec6144df7d5a82cdfb110c825305a51 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 10 Dec 2010 13:16:09 -0800 Subject: ipv6: Fix 'release_it' logic in tcp_v6_get_peer() We accidently set it to "true" for the case where we are using a route bound peer. Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3194585..fee0768 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1877,7 +1877,7 @@ static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it) if (!rt->rt6i_peer) rt6_bind_peer(rt, 1); peer = rt->rt6i_peer; - *release_it = true; + *release_it = false; } return peer; -- cgit v1.1 From c07224005dd3fe746246acadc9be652a588a4d7f Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 9 Dec 2010 03:40:30 +0000 Subject: net/ipv6/udp.c: fix typo in flush_stack() skb1 should be passed as parameter to sk_rcvqueues_full() here. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/udp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 7aad127..26a8da3 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -602,7 +602,7 @@ static void flush_stack(struct sock **stack, unsigned int count, sk = stack[i]; if (skb1) { - if (sk_rcvqueues_full(sk, skb)) { + if (sk_rcvqueues_full(sk, skb1)) { kfree_skb(skb1); goto drop; } -- cgit v1.1 From 040253c931e336360453c8d81f76d1b010b2b5e7 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Wed, 8 Dec 2010 04:37:51 +0000 Subject: xfrm: Traffic Flow Confidentiality for IPv6 ESP Add TFC padding to all packets smaller than the boundary configured on the xfrm state. If the boundary is larger than the PMTU, limit padding to the PMTU. Signed-off-by: Martin Willi Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/esp6.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index ee9b93b..1b5c982 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -49,6 +49,8 @@ struct esp_skb_cb { #define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0])) +static u32 esp6_get_mtu(struct xfrm_state *x, int mtu); + /* * Allocate an AEAD request structure with extra space for SG and IV. * @@ -140,6 +142,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) int blksize; int clen; int alen; + int plen; + int tfclen; int nfrags; u8 *iv; u8 *tail; @@ -148,18 +152,26 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) /* skb is pure payload to encrypt */ err = -ENOMEM; - /* Round to block size */ - clen = skb->len; - aead = esp->aead; alen = crypto_aead_authsize(aead); + tfclen = 0; + if (x->tfcpad) { + struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb); + u32 padto; + + padto = min(x->tfcpad, esp6_get_mtu(x, dst->child_mtu_cached)); + if (skb->len < padto) + tfclen = padto - skb->len; + } blksize = ALIGN(crypto_aead_blocksize(aead), 4); - clen = ALIGN(clen + 2, blksize); + clen = ALIGN(skb->len + 2 + tfclen, blksize); if (esp->padlen) clen = ALIGN(clen, esp->padlen); + plen = clen - skb->len - tfclen; - if ((err = skb_cow_data(skb, clen - skb->len + alen, &trailer)) < 0) + err = skb_cow_data(skb, tfclen + plen + alen, &trailer); + if (err < 0) goto error; nfrags = err; @@ -174,13 +186,17 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) /* Fill padding... */ tail = skb_tail_pointer(trailer); + if (tfclen) { + memset(tail, 0, tfclen); + tail += tfclen; + } do { int i; - for (i=0; ilen - 2; i++) + for (i = 0; i < plen - 2; i++) tail[i] = i + 1; } while (0); - tail[clen-skb->len - 2] = (clen - skb->len) - 2; - tail[clen - skb->len - 1] = *skb_mac_header(skb); + tail[plen - 2] = plen - 2; + tail[plen - 1] = *skb_mac_header(skb); pskb_put(skb, trailer, clen - skb->len + alen); skb_push(skb, -skb_network_offset(skb)); -- cgit v1.1 From 376d940ee91318cc6becefbb9454bb4454d7473f Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Thu, 9 Dec 2010 04:37:48 +0000 Subject: inet6: Remove redundant unlikely() IS_ERR() already implies unlikely(), so it can be omitted here. Signed-off-by: Tobias Klauser Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 54e8e42..059a3de 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -810,7 +810,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features) } rcu_read_unlock(); - if (unlikely(IS_ERR(segs))) + if (IS_ERR(segs)) goto out; for (skb = segs; skb; skb = skb->next) { -- cgit v1.1 From abbf46ae0e4954584eac599bec73502c1c805e9e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 12 Dec 2010 21:14:46 -0800 Subject: ipv6: Use ip6_dst_hoplimit() instead of direct dst_metric() calls. Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6t_REJECT.c | 2 +- net/ipv6/route.c | 1 + net/ipv6/xfrm6_mode_tunnel.c | 3 ++- 3 files changed, 4 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 2933396..bf998fe 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -124,7 +124,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) skb_reset_network_header(nskb); ip6h = ipv6_hdr(nskb); ip6h->version = 6; - ip6h->hop_limit = dst_metric(dst, RTAX_HOPLIMIT); + ip6h->hop_limit = ip6_dst_hoplimit(dst); ip6h->nexthdr = IPPROTO_TCP; ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr); ipv6_addr_copy(&ip6h->daddr, &oip6h->saddr); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4aed081..9b2d7bc 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1119,6 +1119,7 @@ int ip6_dst_hoplimit(struct dst_entry *dst) } return hoplimit; } +EXPORT_SYMBOL(ip6_dst_hoplimit); /* * diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index b809812..645cb96 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -53,7 +54,7 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) if (x->props.flags & XFRM_STATE_NOECN) dsfield &= ~INET_ECN_MASK; ipv6_change_dsfield(top_iph, 0, dsfield); - top_iph->hop_limit = dst_metric(dst->child, RTAX_HOPLIMIT); + top_iph->hop_limit = ip6_dst_hoplimit(dst->child); ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr); ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr); return 0; -- cgit v1.1 From 5170ae824ddf1988a63fb12cbedcff817634c444 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 12 Dec 2010 21:35:57 -0800 Subject: net: Abstract RTAX_HOPLIMIT metric accesses behind helper. Signed-off-by: David S. Miller --- net/ipv6/route.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9b2d7bc..d9405d1 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1104,7 +1104,7 @@ static int ipv6_get_mtu(struct net_device *dev) int ip6_dst_hoplimit(struct dst_entry *dst) { - int hoplimit = dst_metric(dst, RTAX_HOPLIMIT); + int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); if (hoplimit < 0) { struct net_device *dev = dst->dev; struct inet6_dev *idev; @@ -1310,7 +1310,7 @@ install_route: } } - if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0) + if (dst_metric_raw(&rt->dst, RTAX_HOPLIMIT) == 0) dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1); if (!dst_mtu(&rt->dst)) dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(dev)); -- cgit v1.1 From a02e4b7dae455151c423e2f69ef222c502a321fd Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 12 Dec 2010 21:39:02 -0800 Subject: ipv6: Demark default hoplimit as zero. This is for consistency with ipv4. Using "-1" makes no sense. It was made this way a long time ago merely to be consistent with how the ipv6 socket hoplimit "default" is stored. Signed-off-by: David S. Miller --- net/ipv6/route.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d9405d1..98796b0 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1105,7 +1105,7 @@ static int ipv6_get_mtu(struct net_device *dev) int ip6_dst_hoplimit(struct dst_entry *dst) { int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); - if (hoplimit < 0) { + if (hoplimit == 0) { struct net_device *dev = dst->dev; struct inet6_dev *idev; @@ -1310,8 +1310,6 @@ install_route: } } - if (dst_metric_raw(&rt->dst, RTAX_HOPLIMIT) == 0) - dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1); if (!dst_mtu(&rt->dst)) dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(dev)); if (!dst_metric(&rt->dst, RTAX_ADVMSS)) -- cgit v1.1 From 0dbaee3b37e118a96bb7b8eb0d9bbaeeb46264be Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 13 Dec 2010 12:52:14 -0800 Subject: net: Abstract default ADVMSS behind an accessor. Make all RTAX_ADVMSS metric accesses go through a new helper function, dst_metric_advmss(). Leave the actual default metric as "zero" in the real metric slot, and compute the actual default value dynamically via a new dst_ops AF specific callback. For stacked IPSEC routes, we use the advmss of the path which preserves existing behavior. Unlike ipv4/ipv6, DecNET ties the advmss to the mtu and thus updates advmss on pmtu updates. This inconsistency in advmss handling results in more raw metric accesses than I wish we ended up with. Signed-off-by: David S. Miller --- net/ipv6/route.c | 16 +++++++--------- net/ipv6/tcp_ipv6.c | 2 +- 2 files changed, 8 insertions(+), 10 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 98796b0..d9cb832 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -76,6 +76,7 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort); static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); +static unsigned int ip6_default_advmss(const struct dst_entry *dst); static struct dst_entry *ip6_negative_advice(struct dst_entry *); static void ip6_dst_destroy(struct dst_entry *); static void ip6_dst_ifdown(struct dst_entry *, @@ -103,6 +104,7 @@ static struct dst_ops ip6_dst_ops_template = { .gc = ip6_dst_gc, .gc_thresh = 1024, .check = ip6_dst_check, + .default_advmss = ip6_default_advmss, .destroy = ip6_dst_destroy, .ifdown = ip6_dst_ifdown, .negative_advice = ip6_negative_advice, @@ -937,8 +939,12 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) static int ipv6_get_mtu(struct net_device *dev); -static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu) +static unsigned int ip6_default_advmss(const struct dst_entry *dst) { + struct net_device *dev = dst->dev; + unsigned int mtu = dst_mtu(dst); + struct net *net = dev_net(dev); + mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) @@ -990,7 +996,6 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, atomic_set(&rt->dst.__refcnt, 1); dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255); dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev)); - dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst))); rt->dst.output = ip6_output; #if 0 /* there's no chance to use these for ndisc */ @@ -1312,8 +1317,6 @@ install_route: if (!dst_mtu(&rt->dst)) dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(dev)); - if (!dst_metric(&rt->dst, RTAX_ADVMSS)) - dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst))); rt->dst.dev = dev; rt->rt6i_idev = idev; rt->rt6i_table = table; @@ -1540,8 +1543,6 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, nrt->rt6i_nexthop = neigh_clone(neigh); /* Reset pmtu, it may be better */ dst_metric_set(&nrt->dst, RTAX_MTU, ipv6_get_mtu(neigh->dev)); - dst_metric_set(&nrt->dst, RTAX_ADVMSS, ipv6_advmss(dev_net(neigh->dev), - dst_mtu(&nrt->dst))); if (ip6_ins_rt(nrt)) goto out; @@ -1971,7 +1972,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->rt6i_dev = net->loopback_dev; rt->rt6i_idev = idev; dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev)); - dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst))); dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1); rt->dst.obsolete = -1; @@ -2041,7 +2041,6 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) { struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; struct inet6_dev *idev; - struct net *net = dev_net(arg->dev); /* In IPv6 pmtu discovery is not optional, so that RTAX_MTU lock cannot disable it. @@ -2073,7 +2072,6 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) (dst_mtu(&rt->dst) < arg->mtu && dst_mtu(&rt->dst) == idev->cnf.mtu6))) { dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); - dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, arg->mtu)); } return 0; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index fee0768..20aa95e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1521,7 +1521,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, tcp_mtup_init(newsk); tcp_sync_mss(newsk, dst_mtu(dst)); - newtp->advmss = dst_metric(dst, RTAX_ADVMSS); + newtp->advmss = dst_metric_advmss(dst); tcp_initialize_rcv_mss(newsk); newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; -- cgit v1.1 From d33e455337ea2c71d09d7f4367d6ad6dd32b6965 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 14 Dec 2010 13:01:14 -0800 Subject: net: Abstract default MTU metric calculation behind an accessor. Like RTAX_ADVMSS, make the default calculation go through a dst_ops method rather than caching the computation in the routing cache entries. Now dst metrics are pretty much left as-is when new entries are created, thus optimizing metric sharing becomes a real possibility. Signed-off-by: David S. Miller --- net/ipv6/route.c | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d9cb832..e7efb26 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -77,6 +77,7 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort); static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ip6_default_advmss(const struct dst_entry *dst); +static unsigned int ip6_default_mtu(const struct dst_entry *dst); static struct dst_entry *ip6_negative_advice(struct dst_entry *); static void ip6_dst_destroy(struct dst_entry *); static void ip6_dst_ifdown(struct dst_entry *, @@ -105,6 +106,7 @@ static struct dst_ops ip6_dst_ops_template = { .gc_thresh = 1024, .check = ip6_dst_check, .default_advmss = ip6_default_advmss, + .default_mtu = ip6_default_mtu, .destroy = ip6_dst_destroy, .ifdown = ip6_dst_ifdown, .negative_advice = ip6_negative_advice, @@ -937,8 +939,6 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) } } -static int ipv6_get_mtu(struct net_device *dev); - static unsigned int ip6_default_advmss(const struct dst_entry *dst) { struct net_device *dev = dst->dev; @@ -961,6 +961,20 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst) return mtu; } +static unsigned int ip6_default_mtu(const struct dst_entry *dst) +{ + unsigned int mtu = IPV6_MIN_MTU; + struct inet6_dev *idev; + + rcu_read_lock(); + idev = __in6_dev_get(dst->dev); + if (idev) + mtu = idev->cnf.mtu6; + rcu_read_unlock(); + + return mtu; +} + static struct dst_entry *icmp6_dst_gc_list; static DEFINE_SPINLOCK(icmp6_dst_lock); @@ -995,7 +1009,6 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, rt->rt6i_nexthop = neigh; atomic_set(&rt->dst.__refcnt, 1); dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255); - dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev)); rt->dst.output = ip6_output; #if 0 /* there's no chance to use these for ndisc */ @@ -1094,19 +1107,6 @@ out: Remove it only when all the things will work! */ -static int ipv6_get_mtu(struct net_device *dev) -{ - int mtu = IPV6_MIN_MTU; - struct inet6_dev *idev; - - rcu_read_lock(); - idev = __in6_dev_get(dev); - if (idev) - mtu = idev->cnf.mtu6; - rcu_read_unlock(); - return mtu; -} - int ip6_dst_hoplimit(struct dst_entry *dst) { int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); @@ -1315,8 +1315,6 @@ install_route: } } - if (!dst_mtu(&rt->dst)) - dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(dev)); rt->dst.dev = dev; rt->rt6i_idev = idev; rt->rt6i_table = table; @@ -1541,8 +1539,6 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); nrt->rt6i_nexthop = neigh_clone(neigh); - /* Reset pmtu, it may be better */ - dst_metric_set(&nrt->dst, RTAX_MTU, ipv6_get_mtu(neigh->dev)); if (ip6_ins_rt(nrt)) goto out; @@ -1971,7 +1967,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->dst.output = ip6_output; rt->rt6i_dev = net->loopback_dev; rt->rt6i_idev = idev; - dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev)); dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1); rt->dst.obsolete = -1; -- cgit v1.1 From d3052b557a1c94c21f50465702fa886753ce6b43 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Sat, 11 Dec 2010 15:20:11 +0000 Subject: ipv6: delete expired route in ip6_pmtu_deliver The first big packets sent to a "low-MTU" client correctly triggers the creation of a temporary route containing the reduced MTU. But after the temporary route has expired, new ICMP6 "packet too big" will be sent, rt6_pmtu_discovery will find the previous EXPIRED route check that its mtu isn't bigger then in icmp packet and do nothing before the temporary route will not deleted by gc. I make the simple experiment: while :; do time ( dd if=/dev/zero bs=10K count=1 | ssh hostname dd of=/dev/null ) || break; done The "time" reports real 0m0.197s if a temporary route isn't expired, but it reports real 0m52.837s (!!!!) immediately after a temporare route has expired. Signed-off-by: Andrey Vagin Signed-off-by: David S. Miller --- net/ipv6/route.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 96455ffb..7659d6f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1565,11 +1565,16 @@ static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr, { struct rt6_info *rt, *nrt; int allfrag = 0; - +again: rt = rt6_lookup(net, daddr, saddr, ifindex, 0); if (rt == NULL) return; + if (rt6_check_expired(rt)) { + ip6_del_rt(rt); + goto again; + } + if (pmtu >= dst_mtu(&rt->dst)) goto out; -- cgit v1.1 From fcbdf09d9652c8919dcf47072e3ae7dcb4eb98ac Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Thu, 16 Dec 2010 14:26:56 -0800 Subject: net: fix nulls list corruptions in sk_prot_alloc Special care is taken inside sk_port_alloc to avoid overwriting skc_node/skc_nulls_node. We should also avoid overwriting skc_bind_node/skc_portaddr_node. The patch fixes the following crash: BUG: unable to handle kernel paging request at fffffffffffffff0 IP: [] udp4_lib_lookup2+0xad/0x370 [] __udp4_lib_lookup+0x282/0x360 [] __udp4_lib_rcv+0x31e/0x700 [] ? ip_local_deliver_finish+0x65/0x190 [] ? ip_local_deliver+0x88/0xa0 [] udp_rcv+0x15/0x20 [] ip_local_deliver_finish+0x65/0x190 [] ip_local_deliver+0x88/0xa0 [] ip_rcv_finish+0x32d/0x6f0 [] ? netif_receive_skb+0x99c/0x11c0 [] ip_rcv+0x2bb/0x350 [] netif_receive_skb+0x99c/0x11c0 Signed-off-by: Leonard Crestez Signed-off-by: Octavian Purdila Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/udp.c | 1 + net/ipv6/udplite.c | 1 + 2 files changed, 2 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 91def93..cd6cb7c 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1477,6 +1477,7 @@ struct proto udpv6_prot = { .compat_setsockopt = compat_udpv6_setsockopt, .compat_getsockopt = compat_udpv6_getsockopt, #endif + .clear_sk = sk_prot_clear_portaddr_nulls, }; static struct inet_protosw udpv6_protosw = { diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 5f48fad..986c4de 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -55,6 +55,7 @@ struct proto udplitev6_prot = { .compat_setsockopt = compat_udpv6_setsockopt, .compat_getsockopt = compat_udpv6_getsockopt, #endif + .clear_sk = sk_prot_clear_portaddr_nulls, }; static struct inet_protosw udplite6_protosw = { -- cgit v1.1 From 29ba5fed1bbd09c2cba890798c8f9eaab251401d Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 16 Dec 2010 11:28:12 +0000 Subject: ipv6: don't flush routes when setting loopback down When loopback device is being brought down, then keep the route table entries because they are special. The entries in the local table for linklocal routes and ::1 address should not be purged. This is a sub optimal solution to the problem and should be replaced by a better fix in future. Signed-off-by: Stephen Hemminger Acked-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 93b7a93..848b355 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2669,7 +2669,9 @@ static int addrconf_ifdown(struct net_device *dev, int how) ASSERT_RTNL(); - rt6_ifdown(net, dev); + /* Flush routes if device is being removed or it is not loopback */ + if (how || !(dev->flags & IFF_LOOPBACK)) + rt6_ifdown(net, dev); neigh_ifdown(&nd_tbl, dev); idev = __in6_dev_get(dev); -- cgit v1.1 From bc3ef6605ea325e41b586a76aadc3f731c317504 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 16 Dec 2010 17:42:40 +0000 Subject: ipv6: fib6_ifdown cleanup Remove (unnecessary) casts to make code cleaner. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv6/route.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d5c3b45..373bd04 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2009,11 +2009,11 @@ struct arg_dev_net { static int fib6_ifdown(struct rt6_info *rt, void *arg) { - struct net_device *dev = ((struct arg_dev_net *)arg)->dev; - struct net *net = ((struct arg_dev_net *)arg)->net; + const struct arg_dev_net *adn = arg; + const struct net_device *dev = adn->dev; - if (((void *)rt->rt6i_dev == dev || dev == NULL) && - rt != net->ipv6.ip6_null_entry) { + if ((rt->rt6i_dev == dev || dev == NULL) && + rt != adn->net->ipv6.ip6_null_entry) { RT6_TRACE("deleted by ifdown %p\n", rt); return -1; } -- cgit v1.1 From d1ed113f1669390da9898da3beddcc058d938587 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 16 Dec 2010 17:42:54 +0000 Subject: ipv6: remove duplicate neigh_ifdown When device is being set to down, neigh_ifdown was being called twice. Once from addrconf notifier and once from ndisc notifier. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 99d1888..5b189c9 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2672,7 +2672,6 @@ static int addrconf_ifdown(struct net_device *dev, int how) /* Flush routes if device is being removed or it is not loopback */ if (how || !(dev->flags & IFF_LOOPBACK)) rt6_ifdown(net, dev); - neigh_ifdown(&nd_tbl, dev); idev = __in6_dev_get(dev); if (idev == NULL) -- cgit v1.1 From ad0081e43af6de3fecf308b0d098f9611835766b Mon Sep 17 00:00:00 2001 From: David Stevens Date: Fri, 17 Dec 2010 11:42:42 +0000 Subject: ipv6: Fragment locally generated tunnel-mode IPSec6 packets as needed. This patch modifies IPsec6 to fragment IPv6 packets that are locally generated as needed. This version of the patch only fragments in tunnel mode, so that fragment headers will not be obscured by ESP in transport mode. Signed-off-by: David L Stevens Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 12 ++---------- net/ipv6/xfrm6_output.c | 16 +++++++++++++++- 2 files changed, 17 insertions(+), 11 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 99157b4..94b5bf1 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -56,7 +56,7 @@ #include #include -static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); +int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); int __ip6_local_out(struct sk_buff *skb) { @@ -145,14 +145,6 @@ static int ip6_finish_output2(struct sk_buff *skb) return -EINVAL; } -static inline int ip6_skb_dst_mtu(struct sk_buff *skb) -{ - struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; - - return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ? - skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); -} - static int ip6_finish_output(struct sk_buff *skb) { if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || @@ -601,7 +593,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) return offset; } -static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) +int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { struct sk_buff *frag; struct rt6_info *rt = (struct rt6_info*)skb_dst(skb); diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 6434bd5..8e688b3 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -17,6 +17,7 @@ #include #include #include +#include #include int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, @@ -88,8 +89,21 @@ static int xfrm6_output_finish(struct sk_buff *skb) return xfrm_output(skb); } +static int __xfrm6_output(struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct xfrm_state *x = dst->xfrm; + + if ((x && x->props.mode == XFRM_MODE_TUNNEL) && + ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || + dst_allfrag(skb_dst(skb)))) { + return ip6_fragment(skb, xfrm6_output_finish); + } + return xfrm6_output_finish(skb); +} + int xfrm6_output(struct sk_buff *skb) { return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, - skb_dst(skb)->dev, xfrm6_output_finish); + skb_dst(skb)->dev, __xfrm6_output); } -- cgit v1.1