From fa86d322d89995fef1bfb5cc768b89d8c22ea0d9 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 24 Mar 2008 14:48:59 -0700 Subject: [NEIGH]: Fix race between pneigh deletion and ipv6's ndisc_recv_ns (v3). Proxy neighbors do not have any reference counting, so any caller of pneigh_lookup (unless it's a netlink triggered add/del routine) should _not_ perform any actions on the found proxy entry. There's one exception from this rule - the ipv6's ndisc_recv_ns() uses found entry to check the flags for NTF_ROUTER. This creates a race between the ndisc and pneigh_delete - after the pneigh is returned to the caller, the nd_tbl.lock is dropped and the deleting procedure may proceed. One of the fixes would be to add a reference counting, but this problem exists for ndisc only. Besides such a patch would be too big for -rc4. So I propose to introduce a __pneigh_lookup() which is supposed to be called with the lock held and use it in ndisc code to check the flags on alive pneigh entry. Changes from v2: As David noticed, Exported the __pneigh_lookup() to ipv6 module. The checkpatch generates a warning on it, since the EXPORT_SYMBOL does not follow the symbol itself, but in this file all the exports come at the end, so I decided no to break this harmony. Changes from v1: Fixed comments from YOSHIFUJI - indentation of prototype in header and the pndisc_check_router() name - and a compilation fix, pointed by Daniel - the is_routed was (falsely) considered as uninitialized by gcc. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/core/neighbour.c | 23 +++++++++++++++++++++++ net/ipv6/ndisc.c | 22 ++++++++++++++++++---- 2 files changed, 41 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index d9a02b2..19b8e00 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -466,6 +466,28 @@ out_neigh_release: goto out; } +struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl, + struct net *net, const void *pkey, struct net_device *dev) +{ + struct pneigh_entry *n; + int key_len = tbl->key_len; + u32 hash_val = *(u32 *)(pkey + key_len - 4); + + hash_val ^= (hash_val >> 16); + hash_val ^= hash_val >> 8; + hash_val ^= hash_val >> 4; + hash_val &= PNEIGH_HASHMASK; + + for (n = tbl->phash_buckets[hash_val]; n; n = n->next) { + if (!memcmp(n->key, pkey, key_len) && + (n->net == net) && + (n->dev == dev || !n->dev)) + break; + } + + return n; +} + struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, struct net *net, const void *pkey, struct net_device *dev, int creat) @@ -2803,6 +2825,7 @@ EXPORT_SYMBOL(neigh_table_init_no_netlink); EXPORT_SYMBOL(neigh_update); EXPORT_SYMBOL(pneigh_enqueue); EXPORT_SYMBOL(pneigh_lookup); +EXPORT_SYMBOL_GPL(__pneigh_lookup); #ifdef CONFIG_ARPD EXPORT_SYMBOL(neigh_app_ns); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 51557c2..452a2ac 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -676,6 +676,20 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) } } +static struct pneigh_entry *pndisc_check_router(struct net_device *dev, + struct in6_addr *addr, int *is_router) +{ + struct pneigh_entry *n; + + read_lock_bh(&nd_tbl.lock); + n = __pneigh_lookup(&nd_tbl, &init_net, addr, dev); + if (n != NULL) + *is_router = (n->flags & NTF_ROUTER); + read_unlock_bh(&nd_tbl.lock); + + return n; +} + static void ndisc_recv_ns(struct sk_buff *skb) { struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb); @@ -692,7 +706,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) struct pneigh_entry *pneigh = NULL; int dad = ipv6_addr_any(saddr); int inc; - int is_router; + int is_router = 0; if (ipv6_addr_is_multicast(&msg->target)) { ND_PRINTK2(KERN_WARNING @@ -790,8 +804,8 @@ static void ndisc_recv_ns(struct sk_buff *skb) if (ipv6_chk_acast_addr(dev, &msg->target) || (idev->cnf.forwarding && (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) && - (pneigh = pneigh_lookup(&nd_tbl, &init_net, - &msg->target, dev, 0)) != NULL)) { + (pneigh = pndisc_check_router(dev, &msg->target, + &is_router)) != NULL)) { if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && skb->pkt_type != PACKET_HOST && inc != 0 && @@ -812,7 +826,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) goto out; } - is_router = !!(pneigh ? pneigh->flags & NTF_ROUTER : idev->cnf.forwarding); + is_router = !!(pneigh ? is_router : idev->cnf.forwarding); if (dad) { struct in6_addr maddr; -- cgit v1.1 From df9dcb4588aca9cc243cf1f3f454361a84e1cbdb Mon Sep 17 00:00:00 2001 From: Kazunori MIYAZAWA Date: Mon, 24 Mar 2008 14:51:51 -0700 Subject: [IPSEC]: Fix inter address family IPsec tunnel handling. Signed-off-by: Kazunori MIYAZAWA Signed-off-by: David S. Miller --- net/ipv4/xfrm4_mode_tunnel.c | 2 +- net/ipv4/xfrm4_output.c | 2 +- net/ipv6/xfrm6_mode_tunnel.c | 2 +- net/ipv6/xfrm6_output.c | 2 +- net/key/af_key.c | 2 +- net/xfrm/xfrm_input.c | 22 +++++++++++++++--- net/xfrm/xfrm_output.c | 18 ++++++++++++++- net/xfrm/xfrm_state.c | 54 ++++++++++++++++++++++++++++++++++++++------ net/xfrm/xfrm_user.c | 7 ++---- 9 files changed, 90 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 8dee617..584e6d7 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -41,7 +41,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->ihl = 5; top_iph->version = 4; - top_iph->protocol = x->inner_mode->afinfo->proto; + top_iph->protocol = xfrm_af2proto(skb->dst->ops->family); /* DS disclosed */ top_iph->tos = INET_ECN_encapsulate(XFRM_MODE_SKB_CB(skb)->tos, diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index d5a58a8..8c3180a 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -56,7 +56,7 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) { int err; - err = x->inner_mode->afinfo->extract_output(x, skb); + err = xfrm_inner_extract_output(x, skb); if (err) return err; diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 0c742fa..e20529b 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -45,7 +45,7 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl, sizeof(top_iph->flow_lbl)); - top_iph->nexthdr = x->inner_mode->afinfo->proto; + top_iph->nexthdr = xfrm_af2proto(skb->dst->ops->family); dsfield = XFRM_MODE_SKB_CB(skb)->tos; dsfield = INET_ECN_encapsulate(dsfield, dsfield); diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 79ccfb0..0af823c 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -62,7 +62,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) { int err; - err = x->inner_mode->afinfo->extract_output(x, skb); + err = xfrm_inner_extract_output(x, skb); if (err) return err; diff --git a/net/key/af_key.c b/net/key/af_key.c index 8b5f486..e9ef9af 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1219,7 +1219,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr, x->sel.prefixlen_s = addr->sadb_address_prefixlen; } - if (!x->sel.family) + if (x->props.mode == XFRM_MODE_TRANSPORT) x->sel.family = x->props.family; if (ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]) { diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 62188c6..7527940 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -84,14 +84,21 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) { + struct xfrm_mode *inner_mode = x->inner_mode; int err; err = x->outer_mode->afinfo->extract_input(x, skb); if (err) return err; - skb->protocol = x->inner_mode->afinfo->eth_proto; - return x->inner_mode->input2(x, skb); + if (x->sel.family == AF_UNSPEC) { + inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); + if (inner_mode == NULL) + return -EAFNOSUPPORT; + } + + skb->protocol = inner_mode->afinfo->eth_proto; + return inner_mode->input2(x, skb); } EXPORT_SYMBOL(xfrm_prepare_input); @@ -101,6 +108,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) __be32 seq; struct xfrm_state *x; xfrm_address_t *daddr; + struct xfrm_mode *inner_mode; unsigned int family; int decaps = 0; int async = 0; @@ -207,7 +215,15 @@ resume: XFRM_MODE_SKB_CB(skb)->protocol = nexthdr; - if (x->inner_mode->input(x, skb)) { + inner_mode = x->inner_mode; + + if (x->sel.family == AF_UNSPEC) { + inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); + if (inner_mode == NULL) + goto drop; + } + + if (inner_mode->input(x, skb)) { XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMODEERROR); goto drop; } diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 569d377..2519129 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -124,7 +124,7 @@ int xfrm_output_resume(struct sk_buff *skb, int err) if (!x) return dst_output(skb); - err = nf_hook(x->inner_mode->afinfo->family, + err = nf_hook(skb->dst->ops->family, NF_INET_POST_ROUTING, skb, NULL, skb->dst->dev, xfrm_output2); if (unlikely(err != 1)) @@ -193,4 +193,20 @@ int xfrm_output(struct sk_buff *skb) return xfrm_output2(skb); } + +int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) +{ + struct xfrm_mode *inner_mode; + if (x->sel.family == AF_UNSPEC) + inner_mode = xfrm_ip2inner_mode(x, + xfrm_af2proto(skb->dst->ops->family)); + else + inner_mode = x->inner_mode; + + if (inner_mode == NULL) + return -EAFNOSUPPORT; + return inner_mode->afinfo->extract_output(x, skb); +} + EXPORT_SYMBOL_GPL(xfrm_output); +EXPORT_SYMBOL_GPL(xfrm_inner_extract_output); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 7ba65e8..58f1f93 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -388,6 +388,8 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) kfree(x->coaddr); if (x->inner_mode) xfrm_put_mode(x->inner_mode); + if (x->inner_mode_iaf) + xfrm_put_mode(x->inner_mode_iaf); if (x->outer_mode) xfrm_put_mode(x->outer_mode); if (x->type) { @@ -523,6 +525,8 @@ struct xfrm_state *xfrm_state_alloc(void) x->lft.hard_packet_limit = XFRM_INF; x->replay_maxage = 0; x->replay_maxdiff = 0; + x->inner_mode = NULL; + x->inner_mode_iaf = NULL; spin_lock_init(&x->lock); } return x; @@ -796,7 +800,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, selector. */ if (x->km.state == XFRM_STATE_VALID) { - if (!xfrm_selector_match(&x->sel, fl, x->sel.family) || + if ((x->sel.family && !xfrm_selector_match(&x->sel, fl, x->sel.family)) || !security_xfrm_state_pol_flow_match(x, pol, fl)) continue; if (!best || @@ -1944,6 +1948,7 @@ int xfrm_state_mtu(struct xfrm_state *x, int mtu) int xfrm_init_state(struct xfrm_state *x) { struct xfrm_state_afinfo *afinfo; + struct xfrm_mode *inner_mode; int family = x->props.family; int err; @@ -1962,13 +1967,48 @@ int xfrm_init_state(struct xfrm_state *x) goto error; err = -EPROTONOSUPPORT; - x->inner_mode = xfrm_get_mode(x->props.mode, x->sel.family); - if (x->inner_mode == NULL) - goto error; - if (!(x->inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) && - family != x->sel.family) - goto error; + if (x->sel.family != AF_UNSPEC) { + inner_mode = xfrm_get_mode(x->props.mode, x->sel.family); + if (inner_mode == NULL) + goto error; + + if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) && + family != x->sel.family) { + xfrm_put_mode(inner_mode); + goto error; + } + + x->inner_mode = inner_mode; + } else { + struct xfrm_mode *inner_mode_iaf; + + inner_mode = xfrm_get_mode(x->props.mode, AF_INET); + if (inner_mode == NULL) + goto error; + + if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) { + xfrm_put_mode(inner_mode); + goto error; + } + + inner_mode_iaf = xfrm_get_mode(x->props.mode, AF_INET6); + if (inner_mode_iaf == NULL) + goto error; + + if (!(inner_mode_iaf->flags & XFRM_MODE_FLAG_TUNNEL)) { + xfrm_put_mode(inner_mode_iaf); + goto error; + } + + if (x->props.family == AF_INET) { + x->inner_mode = inner_mode; + x->inner_mode_iaf = inner_mode_iaf; + } else { + x->inner_mode = inner_mode_iaf; + x->inner_mode_iaf = inner_mode; + } + } x->type = xfrm_get_type(x->id.proto, family); if (x->type == NULL) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index f971ca5..5d96f27 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -288,12 +288,9 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info * memcpy(&x->props.saddr, &p->saddr, sizeof(x->props.saddr)); x->props.flags = p->flags; - /* - * Set inner address family if the KM left it as zero. - * See comment in validate_tmpl. - */ - if (!x->sel.family) + if (x->props.mode == XFRM_MODE_TRANSPORT) x->sel.family = p->family; + } /* -- cgit v1.1 From 0ed21b321a13421e2dfeaa70a6c324e05e3e91e6 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 26 Mar 2008 00:15:17 -0700 Subject: [VLAN]: Don't copy ALLMULTI/PROMISC flags from underlying device Changing these flags requires to use dev_set_allmulti/dev_set_promiscuity or dev_change_flags. Setting it directly causes two unwanted effects: - the next dev_change_flags call will notice a difference between dev->gflags and the actual flags, enable promisc/allmulti mode and incorrectly update dev->gflags - this keeps the underlying device in promisc/allmulti mode until the VLAN device is deleted Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 8fbcefe..480ea90 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -660,7 +660,7 @@ static int vlan_dev_init(struct net_device *dev) int subclass = 0; /* IFF_BROADCAST|IFF_MULTICAST; ??? */ - dev->flags = real_dev->flags & ~IFF_UP; + dev->flags = real_dev->flags & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI); dev->iflink = real_dev->ifindex; dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT))) | -- cgit v1.1 From 8c7230f781749cd7261b504c0bfa188bb96e77ee Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 26 Mar 2008 00:55:50 -0700 Subject: [IRDA]: Store irnet_socket termios properly. It should be a "struct ktermios" not a "struct termios". Based upon a build warning reported by Stephen Rothwell. Signed-off-by: David S. Miller --- net/irda/irnet/irnet.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h index bc2e15c..7873c39 100644 --- a/net/irda/irnet/irnet.h +++ b/net/irda/irnet/irnet.h @@ -405,7 +405,7 @@ typedef struct irnet_socket /* "pppd" interact directly with us on a /dev/ file */ struct file * file; /* File descriptor of this instance */ /* TTY stuff - to keep "pppd" happy */ - struct termios termios; /* Various tty flags */ + struct ktermios termios; /* Various tty flags */ /* Stuff for the control channel */ int event_index; /* Last read in the event log */ -- cgit v1.1 From 61ee6bd487b9cc160e533034eb338f2085dc7922 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 26 Mar 2008 02:12:11 -0700 Subject: [NET]: Fix multicast device ioctl checks SIOCADDMULTI/SIOCDELMULTI check whether the driver has a set_multicast_list method to determine whether it supports multicast. Drivers implementing secondary unicast support use set_rx_mode however. Check for both dev->set_multicast_mode and dev->set_rx_mode to determine multicast capabilities. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index fcdf03c..460e7f9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3329,7 +3329,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) return -EOPNOTSUPP; case SIOCADDMULTI: - if (!dev->set_multicast_list || + if ((!dev->set_multicast_list && !dev->set_rx_mode) || ifr->ifr_hwaddr.sa_family != AF_UNSPEC) return -EINVAL; if (!netif_device_present(dev)) @@ -3338,7 +3338,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) dev->addr_len, 1); case SIOCDELMULTI: - if (!dev->set_multicast_list || + if ((!dev->set_multicast_list && !dev->set_rx_mode) || ifr->ifr_hwaddr.sa_family != AF_UNSPEC) return -EINVAL; if (!netif_device_present(dev)) -- cgit v1.1 From 5c2e2e239ebe44e3fdc5f2ae270d96c4ceee4e9a Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Wed, 26 Mar 2008 02:14:38 -0700 Subject: [AX25]: Remove obsolete references to BKL from TODO file. Given that there are no apparent calls to lock_kernel() or unlock_kernel() under net/ax25, delete the TODO reference related to that. Signed-off-by: Robert P. J. Day Signed-off-by: David S. Miller --- net/ax25/TODO | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/ax25/TODO b/net/ax25/TODO index 4089c49..69fb4e3 100644 --- a/net/ax25/TODO +++ b/net/ax25/TODO @@ -9,10 +9,6 @@ being used. Routes to a device being taken down might be deleted by ax25_rt_device_down but added by somebody else before the device has been deleted fully. -Massive amounts of lock_kernel / unlock_kernel are just a temporary solution to -get around the removal of SOCKOPS_WRAP. A serious locking strategy has to be -implemented. - The ax25_rt_find_route synopsys is pervert but I somehow had to deal with the race caused by the static variable in it's previous implementation. -- cgit v1.1 From 7c0ecc4c4f8fd90988aab8a95297b9c0038b6160 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 26 Mar 2008 02:27:09 -0700 Subject: [ICMP]: Dst entry leak in icmp_send host re-lookup code (v2). Commit 8b7817f3a959ed99d7443afc12f78a7e1fcc2063 ([IPSEC]: Add ICMP host relookup support) introduced some dst leaks on error paths: the rt pointer can be forgotten to be put. Fix it bu going to a proper label. Found after net namespace's lo refused to unregister :) Many thanks to Den for valuable help during debugging. Herbert pointed out, that xfrm_lookup() will put the rtable in case of error itself, so the first goto fix is redundant. Signed-off-by: Pavel Emelyanov Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index a13c074..a944e80 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -591,7 +591,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) } if (xfrm_decode_session_reverse(skb_in, &fl, AF_INET)) - goto out_unlock; + goto ende; if (inet_addr_type(net, fl.fl4_src) == RTN_LOCAL) err = __ip_route_output_key(net, &rt2, &fl); @@ -601,7 +601,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) fl2.fl4_dst = fl.fl4_src; if (ip_route_output_key(net, &rt2, &fl2)) - goto out_unlock; + goto ende; /* Ugh! */ odst = skb_in->dst; @@ -614,7 +614,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) } if (err) - goto out_unlock; + goto ende; err = xfrm_lookup((struct dst_entry **)&rt2, &fl, NULL, XFRM_LOOKUP_ICMP); -- cgit v1.1 From 732c8bd590625e8bc0b88313b82930e336b2bec4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 26 Mar 2008 16:51:09 -0700 Subject: [IPSEC]: Fix BEET output The IPv6 BEET output function is incorrectly including the inner header in the payload to be protected. This causes a crash as the packet doesn't actually have that many bytes for a second header. The IPv4 BEET output on the other hand is broken when it comes to handling an inner IPv6 header since it always assumes an inner IPv4 header. This patch fixes both by making sure that neither BEET output function touches the inner header at all. All access is now done through the protocol-independent cb structure. Two new attributes are added to make this work, the IP header length and the IPv4 option length. They're filled in by the inner mode's output function. Thanks to Joakim Koskela for finding this problem. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/xfrm4_mode_beet.c | 11 +++++------ net/ipv4/xfrm4_state.c | 2 ++ net/ipv6/xfrm6_mode_beet.c | 1 + net/ipv6/xfrm6_state.c | 2 ++ 4 files changed, 10 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c index b47030b..9c798ab 100644 --- a/net/ipv4/xfrm4_mode_beet.c +++ b/net/ipv4/xfrm4_mode_beet.c @@ -39,13 +39,11 @@ static void xfrm4_beet_make_header(struct sk_buff *skb) static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb) { struct ip_beet_phdr *ph; - struct iphdr *iph, *top_iph; + struct iphdr *top_iph; int hdrlen, optlen; - iph = ip_hdr(skb); - hdrlen = 0; - optlen = iph->ihl * 4 - sizeof(*iph); + optlen = XFRM_MODE_SKB_CB(skb)->optlen; if (unlikely(optlen)) hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4); @@ -53,11 +51,12 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb) hdrlen); skb->mac_header = skb->network_header + offsetof(struct iphdr, protocol); - skb->transport_header = skb->network_header + sizeof(*iph); + skb->transport_header = skb->network_header + sizeof(*top_iph); xfrm4_beet_make_header(skb); - ph = (struct ip_beet_phdr *)__skb_pull(skb, sizeof(*iph) - hdrlen); + ph = (struct ip_beet_phdr *) + __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdrlen); top_iph = ip_hdr(skb); diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index fdeebe6..07735ed 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -52,10 +52,12 @@ int xfrm4_extract_header(struct sk_buff *skb) { struct iphdr *iph = ip_hdr(skb); + XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph); XFRM_MODE_SKB_CB(skb)->id = iph->id; XFRM_MODE_SKB_CB(skb)->frag_off = iph->frag_off; XFRM_MODE_SKB_CB(skb)->tos = iph->tos; XFRM_MODE_SKB_CB(skb)->ttl = iph->ttl; + XFRM_MODE_SKB_CB(skb)->optlen = iph->ihl * 4 - sizeof(*iph); memset(XFRM_MODE_SKB_CB(skb)->flow_lbl, 0, sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl)); diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c index 0527d11..d6ce400 100644 --- a/net/ipv6/xfrm6_mode_beet.c +++ b/net/ipv6/xfrm6_mode_beet.c @@ -45,6 +45,7 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb) skb->mac_header = skb->network_header + offsetof(struct ipv6hdr, nexthdr); skb->transport_header = skb->network_header + sizeof(*top_iph); + __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl); xfrm6_beet_make_header(skb); diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index dc817e0..ff1e1db 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -174,10 +174,12 @@ int xfrm6_extract_header(struct sk_buff *skb) { struct ipv6hdr *iph = ipv6_hdr(skb); + XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph); XFRM_MODE_SKB_CB(skb)->id = 0; XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF); XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph); XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit; + XFRM_MODE_SKB_CB(skb)->optlen = 0; memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl, sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl)); -- cgit v1.1 From 920fc941a9617f95ccb283037fe6f8a38d95bb69 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 27 Mar 2008 16:08:03 -0700 Subject: [ESP]: Ensure IV is in linear part of the skb to avoid BUG() due to OOB access ESP does not account for the IV size when calling pskb_may_pull() to ensure everything it accesses directly is within the linear part of a potential fragment. This results in a BUG() being triggered when the both the IPv4 and IPv6 ESP stack is fed with an skb where the first fragment ends between the end of the esp header and the end of the IV. This bug was found by Dirk Nehring . Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv4/esp4.c | 2 +- net/ipv6/esp6.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index f3ceca3..4e73e57 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -336,7 +336,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) struct scatterlist *asg; int err = -EINVAL; - if (!pskb_may_pull(skb, sizeof(*esph))) + if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead))) goto out; if (elen <= 0) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 0ec1402..c6bb4c6 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -282,7 +282,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) struct scatterlist *sg; struct scatterlist *asg; - if (!pskb_may_pull(skb, sizeof(*esph))) { + if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead))) { ret = -EINVAL; goto out; } -- cgit v1.1 From 8eeee8b152ae6bbe181518efaf62ba8e9c613693 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Thu, 27 Mar 2008 16:55:53 -0700 Subject: [NETFILTER]: Replate direct proc_fops assignment with proc_create call. This elliminates infamous race during module loading when one could lookup proc entry without proc_fops assigned. Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_queue.c | 8 +++----- net/ipv4/netfilter/ipt_CLUSTERIP.c | 5 ++--- net/ipv4/netfilter/ipt_recent.c | 3 +-- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | 7 ++----- net/ipv6/netfilter/ip6_queue.c | 8 +++----- net/netfilter/nf_conntrack_standalone.c | 9 +++------ net/netfilter/nf_log.c | 8 ++------ net/netfilter/nf_queue.c | 7 ++----- net/netfilter/nfnetlink_log.c | 9 ++------- net/netfilter/nfnetlink_queue.c | 9 ++------- net/netfilter/xt_hashlimit.c | 16 ++++++++-------- 11 files changed, 30 insertions(+), 59 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index fe05da4..4dc1628 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -588,11 +588,9 @@ static int __init ip_queue_init(void) } #ifdef CONFIG_PROC_FS - proc = create_proc_entry(IPQ_PROC_FS_NAME, 0, init_net.proc_net); - if (proc) { - proc->owner = THIS_MODULE; - proc->proc_fops = &ip_queue_proc_fops; - } else { + proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net, + &ip_queue_proc_fops); + if (!proc) { printk(KERN_ERR "ip_queue: failed to create proc entry\n"); goto cleanup_ipqnl; } diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index c6cf84c..52926c8 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -167,14 +167,13 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, __be32 ip, /* create proc dir entry */ sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip)); - c->pde = create_proc_entry(buffer, S_IWUSR|S_IRUSR, - clusterip_procdir); + c->pde = proc_create(buffer, S_IWUSR|S_IRUSR, + clusterip_procdir, &clusterip_proc_fops); if (!c->pde) { kfree(c); return NULL; } } - c->pde->proc_fops = &clusterip_proc_fops; c->pde->data = c; #endif diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index 8e8f042..50e0669 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c @@ -276,12 +276,11 @@ recent_mt_check(const char *tablename, const void *ip, for (i = 0; i < ip_list_hash_size; i++) INIT_LIST_HEAD(&t->iphash[i]); #ifdef CONFIG_PROC_FS - t->proc = create_proc_entry(t->name, ip_list_perms, proc_dir); + t->proc = proc_create(t->name, ip_list_perms, proc_dir, &recent_fops); if (t->proc == NULL) { kfree(t); goto out; } - t->proc->proc_fops = &recent_fops; t->proc->uid = ip_list_uid; t->proc->gid = ip_list_gid; t->proc->data = t; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 089252e..f500b0f 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -395,13 +395,10 @@ int __init nf_conntrack_ipv4_compat_init(void) if (!proc_exp) goto err2; - proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, init_net.proc_net_stat); + proc_stat = proc_create("ip_conntrack", S_IRUGO, + init_net.proc_net_stat, &ct_cpu_seq_fops); if (!proc_stat) goto err3; - - proc_stat->proc_fops = &ct_cpu_seq_fops; - proc_stat->owner = THIS_MODULE; - return 0; err3: diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index cc2f9af..8d366f7 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -591,11 +591,9 @@ static int __init ip6_queue_init(void) } #ifdef CONFIG_PROC_FS - proc = create_proc_entry(IPQ_PROC_FS_NAME, 0, init_net.proc_net); - if (proc) { - proc->owner = THIS_MODULE; - proc->proc_fops = &ip6_queue_proc_fops; - } else { + proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net, + &ip6_queue_proc_fops); + if (!proc) { printk(KERN_ERR "ip6_queue: failed to create proc entry\n"); goto cleanup_ipqnl; } diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index e88e96a..8599068 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -395,7 +395,7 @@ EXPORT_SYMBOL_GPL(nf_ct_log_invalid); static int __init nf_conntrack_standalone_init(void) { #ifdef CONFIG_PROC_FS - struct proc_dir_entry *proc, *proc_stat; + struct proc_dir_entry *proc; #endif int ret = 0; @@ -407,12 +407,9 @@ static int __init nf_conntrack_standalone_init(void) proc = proc_net_fops_create(&init_net, "nf_conntrack", 0440, &ct_file_ops); if (!proc) goto cleanup_init; - proc_stat = create_proc_entry("nf_conntrack", S_IRUGO, init_net.proc_net_stat); - if (!proc_stat) + if (!proc_create("nf_conntrack", S_IRUGO, + init_net.proc_net_stat, &ct_cpu_seq_fops)) goto cleanup_proc; - - proc_stat->proc_fops = &ct_cpu_seq_fops; - proc_stat->owner = THIS_MODULE; #endif #ifdef CONFIG_SYSCTL nf_ct_sysctl_header = register_sysctl_paths(nf_ct_path, diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index cec9976..bc11d70 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -168,13 +168,9 @@ static const struct file_operations nflog_file_ops = { int __init netfilter_log_init(void) { #ifdef CONFIG_PROC_FS - struct proc_dir_entry *pde; - - pde = create_proc_entry("nf_log", S_IRUGO, proc_net_netfilter); - if (!pde) + if (!proc_create("nf_log", S_IRUGO, + proc_net_netfilter, &nflog_file_ops)) return -1; - - pde->proc_fops = &nflog_file_ops; #endif return 0; } diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index ddc80ea..bbd2689 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -348,12 +348,9 @@ static const struct file_operations nfqueue_file_ops = { int __init netfilter_queue_init(void) { #ifdef CONFIG_PROC_FS - struct proc_dir_entry *pde; - - pde = create_proc_entry("nf_queue", S_IRUGO, proc_net_netfilter); - if (!pde) + if (!proc_create("nf_queue", S_IRUGO, + proc_net_netfilter, &nfqueue_file_ops)) return -1; - pde->proc_fops = &nfqueue_file_ops; #endif return 0; } diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index bf3f19b..b8173af 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -923,9 +923,6 @@ static const struct file_operations nful_file_ops = { static int __init nfnetlink_log_init(void) { int i, status = -ENOMEM; -#ifdef CONFIG_PROC_FS - struct proc_dir_entry *proc_nful; -#endif for (i = 0; i < INSTANCE_BUCKETS; i++) INIT_HLIST_HEAD(&instance_table[i]); @@ -943,11 +940,9 @@ static int __init nfnetlink_log_init(void) } #ifdef CONFIG_PROC_FS - proc_nful = create_proc_entry("nfnetlink_log", 0440, - proc_net_netfilter); - if (!proc_nful) + if (!proc_create("nfnetlink_log", 0440, + proc_net_netfilter, &nful_file_ops)) goto cleanup_subsys; - proc_nful->proc_fops = &nful_file_ops; #endif return status; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 012cb69..10522c0 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -896,9 +896,6 @@ static const struct file_operations nfqnl_file_ops = { static int __init nfnetlink_queue_init(void) { int i, status = -ENOMEM; -#ifdef CONFIG_PROC_FS - struct proc_dir_entry *proc_nfqueue; -#endif for (i = 0; i < INSTANCE_BUCKETS; i++) INIT_HLIST_HEAD(&instance_table[i]); @@ -911,11 +908,9 @@ static int __init nfnetlink_queue_init(void) } #ifdef CONFIG_PROC_FS - proc_nfqueue = create_proc_entry("nfnetlink_queue", 0440, - proc_net_netfilter); - if (!proc_nfqueue) + if (!proc_create("nfnetlink_queue", 0440, + proc_net_netfilter, &nfqnl_file_ops)) goto cleanup_subsys; - proc_nfqueue->proc_fops = &nfqnl_file_ops; #endif register_netdevice_notifier(&nfqnl_dev_notifier); diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 5418ce5..dc29007 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -237,14 +237,14 @@ static int htable_create_v0(struct xt_hashlimit_info *minfo, int family) hinfo->family = family; hinfo->rnd_initialized = 0; spin_lock_init(&hinfo->lock); - hinfo->pde = create_proc_entry(minfo->name, 0, - family == AF_INET ? hashlimit_procdir4 : - hashlimit_procdir6); + hinfo->pde = proc_create(minfo->name, 0, + family == AF_INET ? hashlimit_procdir4 : + hashlimit_procdir6, + &dl_file_ops); if (!hinfo->pde) { vfree(hinfo); return -1; } - hinfo->pde->proc_fops = &dl_file_ops; hinfo->pde->data = hinfo; setup_timer(&hinfo->timer, htable_gc, (unsigned long )hinfo); @@ -301,14 +301,14 @@ static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, hinfo->rnd_initialized = 0; spin_lock_init(&hinfo->lock); - hinfo->pde = create_proc_entry(minfo->name, 0, - family == AF_INET ? hashlimit_procdir4 : - hashlimit_procdir6); + hinfo->pde = proc_create(minfo->name, 0, + family == AF_INET ? hashlimit_procdir4 : + hashlimit_procdir6, + &dl_file_ops); if (hinfo->pde == NULL) { vfree(hinfo); return -1; } - hinfo->pde->proc_fops = &dl_file_ops; hinfo->pde->data = hinfo; setup_timer(&hinfo->timer, htable_gc, (unsigned long)hinfo); -- cgit v1.1