diff options
Diffstat (limited to 'net')
146 files changed, 5727 insertions, 899 deletions
diff --git a/net/Kconfig b/net/Kconfig index 7dfc949..9156578 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -37,6 +37,7 @@ config NETDEBUG source "net/packet/Kconfig" source "net/unix/Kconfig" source "net/xfrm/Kconfig" +source "net/iucv/Kconfig" config INET bool "TCP/IP networking" diff --git a/net/Makefile b/net/Makefile index ad4d14f..4854ac5 100644 --- a/net/Makefile +++ b/net/Makefile @@ -47,6 +47,7 @@ obj-$(CONFIG_IP_SCTP) += sctp/ obj-$(CONFIG_IEEE80211) += ieee80211/ obj-$(CONFIG_TIPC) += tipc/ obj-$(CONFIG_NETLABEL) += netlabel/ +obj-$(CONFIG_IUCV) += iucv/ ifeq ($(CONFIG_NET),y) obj-$(CONFIG_SYSCTL) += sysctl_net.o diff --git a/net/atm/common.c b/net/atm/common.c index fbabff4..a2878e9 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -816,7 +816,8 @@ static void __exit atm_exit(void) proto_unregister(&vcc_proto); } -module_init(atm_init); +subsys_initcall(atm_init); + module_exit(atm_exit); MODULE_LICENSE("GPL"); diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index ea3337a..a25fa8c 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -949,44 +949,29 @@ static ctl_table brnf_net_table[] = { }; #endif -int br_netfilter_init(void) +int __init br_netfilter_init(void) { - int i; - - for (i = 0; i < ARRAY_SIZE(br_nf_ops); i++) { - int ret; - - if ((ret = nf_register_hook(&br_nf_ops[i])) >= 0) - continue; - - while (i--) - nf_unregister_hook(&br_nf_ops[i]); + int ret; + ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); + if (ret < 0) return ret; - } - #ifdef CONFIG_SYSCTL brnf_sysctl_header = register_sysctl_table(brnf_net_table, 0); if (brnf_sysctl_header == NULL) { printk(KERN_WARNING "br_netfilter: can't register to sysctl.\n"); - for (i = 0; i < ARRAY_SIZE(br_nf_ops); i++) - nf_unregister_hook(&br_nf_ops[i]); - return -EFAULT; + nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); + return -ENOMEM; } #endif - printk(KERN_NOTICE "Bridge firewalling registered\n"); - return 0; } void br_netfilter_fini(void) { - int i; - - for (i = ARRAY_SIZE(br_nf_ops) - 1; i >= 0; i--) - nf_unregister_hook(&br_nf_ops[i]); + nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); #ifdef CONFIG_SYSCTL unregister_sysctl_table(brnf_sysctl_header); #endif diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index a913968..7d68b24 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -45,7 +45,7 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags); if (nlh == NULL) - return -ENOBUFS; + return -EMSGSIZE; hdr = nlmsg_data(nlh); hdr->ifi_family = AF_BRIDGE; @@ -72,7 +72,8 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por return nlmsg_end(skb, nlh); nla_put_failure: - return nlmsg_cancel(skb, nlh); + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } /* @@ -89,9 +90,12 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port) goto errout; err = br_fill_ifinfo(skb, port, 0, 0, event, 0); - /* failure implies BUG in br_nlmsg_size() */ - BUG_ON(err < 0); - + if (err < 0) { + /* -EMSGSIZE implies BUG in br_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); errout: if (err < 0) diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c index e4c6424..6afa4d0 100644 --- a/net/bridge/netfilter/ebt_ip.c +++ b/net/bridge/netfilter/ebt_ip.c @@ -93,6 +93,7 @@ static int ebt_ip_check(const char *tablename, unsigned int hookmask, return -EINVAL; if (info->protocol != IPPROTO_TCP && info->protocol != IPPROTO_UDP && + info->protocol != IPPROTO_UDPLITE && info->protocol != IPPROTO_SCTP && info->protocol != IPPROTO_DCCP) return -EINVAL; diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index a184f87..985df82 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -96,6 +96,7 @@ ebt_log_packet(unsigned int pf, unsigned int hooknum, NIPQUAD(ih->daddr), ih->tos, ih->protocol); if (ih->protocol == IPPROTO_TCP || ih->protocol == IPPROTO_UDP || + ih->protocol == IPPROTO_UDPLITE || ih->protocol == IPPROTO_SCTP || ih->protocol == IPPROTO_DCCP) { struct tcpudphdr _ports, *pptr; diff --git a/net/core/dev.c b/net/core/dev.c index 455d589..1e94a1b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3247,7 +3247,7 @@ void synchronize_net(void) * unregister_netdev() instead of this. */ -int unregister_netdevice(struct net_device *dev) +void unregister_netdevice(struct net_device *dev) { struct net_device *d, **dp; @@ -3258,7 +3258,9 @@ int unregister_netdevice(struct net_device *dev) if (dev->reg_state == NETREG_UNINITIALIZED) { printk(KERN_DEBUG "unregister_netdevice: device %s/%p never " "was registered\n", dev->name, dev); - return -ENODEV; + + WARN_ON(1); + return; } BUG_ON(dev->reg_state != NETREG_REGISTERED); @@ -3280,11 +3282,7 @@ int unregister_netdevice(struct net_device *dev) break; } } - if (!d) { - printk(KERN_ERR "unregister net_device: '%s' not found\n", - dev->name); - return -ENODEV; - } + BUG_ON(!d); dev->reg_state = NETREG_UNREGISTERING; @@ -3316,7 +3314,6 @@ int unregister_netdevice(struct net_device *dev) synchronize_net(); dev_put(dev); - return 0; } /** diff --git a/net/core/dst.c b/net/core/dst.c index 836ec66..1a53fb3 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -99,7 +99,14 @@ static void dst_run_gc(unsigned long dummy) printk("dst_total: %d/%d %ld\n", atomic_read(&dst_total), delayed, dst_gc_timer_expires); #endif - mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires); + /* if the next desired timer is more than 4 seconds in the future + * then round the timer to whole seconds + */ + if (dst_gc_timer_expires > 4*HZ) + mod_timer(&dst_gc_timer, + round_jiffies(jiffies + dst_gc_timer_expires)); + else + mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires); out: spin_unlock(&dst_lock); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 1df6cd45..215f1bff 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -331,7 +331,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, nlh = nlmsg_put(skb, pid, seq, type, sizeof(*frh), flags); if (nlh == NULL) - return -1; + return -EMSGSIZE; frh = nlmsg_data(nlh); frh->table = rule->table; @@ -359,7 +359,8 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, return nlmsg_end(skb, nlh); nla_put_failure: - return nlmsg_cancel(skb, nlh); + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } int fib_rules_dump(struct sk_buff *skb, struct netlink_callback *cb, int family) @@ -405,9 +406,12 @@ static void notify_rule_change(int event, struct fib_rule *rule, goto errout; err = fib_nl_fill_rule(skb, rule, pid, nlh->nlmsg_seq, event, 0, ops); - /* failure implies BUG in fib_rule_nlmsg_size() */ - BUG_ON(err < 0); - + if (err < 0) { + /* -EMSGSIZE implies BUG in fib_rule_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } err = rtnl_notify(skb, pid, ops->nlgroup, nlh, GFP_KERNEL); errout: if (err < 0) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index e7300b6..054d464 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -696,7 +696,10 @@ next_elt: if (!expire) expire = 1; - mod_timer(&tbl->gc_timer, now + expire); + if (expire>HZ) + mod_timer(&tbl->gc_timer, round_jiffies(now + expire)); + else + mod_timer(&tbl->gc_timer, now + expire); write_unlock(&tbl->lock); } @@ -1637,7 +1640,7 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); if (nlh == NULL) - return -ENOBUFS; + return -EMSGSIZE; ndtmsg = nlmsg_data(nlh); @@ -1706,7 +1709,8 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, nla_put_failure: read_unlock_bh(&tbl->lock); - return nlmsg_cancel(skb, nlh); + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int neightbl_fill_param_info(struct sk_buff *skb, @@ -1720,7 +1724,7 @@ static int neightbl_fill_param_info(struct sk_buff *skb, nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); if (nlh == NULL) - return -ENOBUFS; + return -EMSGSIZE; ndtmsg = nlmsg_data(nlh); @@ -1737,7 +1741,8 @@ static int neightbl_fill_param_info(struct sk_buff *skb, return nlmsg_end(skb, nlh); errout: read_unlock_bh(&tbl->lock); - return nlmsg_cancel(skb, nlh); + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl, @@ -1955,7 +1960,7 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); if (nlh == NULL) - return -ENOBUFS; + return -EMSGSIZE; ndm = nlmsg_data(nlh); ndm->ndm_family = neigh->ops->family; @@ -1987,7 +1992,8 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, return nlmsg_end(skb, nlh); nla_put_failure: - return nlmsg_cancel(skb, nlh); + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } @@ -2429,9 +2435,12 @@ static void __neigh_notify(struct neighbour *n, int type, int flags) goto errout; err = neigh_fill_info(skb, n, 0, 0, type, flags); - /* failure implies BUG in neigh_nlmsg_size() */ - BUG_ON(err < 0); - + if (err < 0) { + /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); errout: if (err < 0) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index e76539a..9bf9ae0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -320,7 +320,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); if (nlh == NULL) - return -ENOBUFS; + return -EMSGSIZE; ifm = nlmsg_data(nlh); ifm->ifi_family = AF_UNSPEC; @@ -384,7 +384,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, return nlmsg_end(skb, nlh); nla_put_failure: - return nlmsg_cancel(skb, nlh); + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) @@ -633,9 +634,12 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) err = rtnl_fill_ifinfo(nskb, dev, iw, iw_buf_len, RTM_NEWLINK, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, 0); - /* failure impilies BUG in if_nlmsg_size or wireless_rtnetlink_get */ - BUG_ON(err < 0); - + if (err < 0) { + /* -EMSGSIZE implies BUG in if_nlmsg_size */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(nskb); + goto errout; + } err = rtnl_unicast(nskb, NETLINK_CB(skb).pid); errout: kfree(iw_buf); @@ -678,9 +682,12 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) goto errout; err = rtnl_fill_ifinfo(skb, dev, NULL, 0, type, 0, 0, change, 0); - /* failure implies BUG in if_nlmsg_size() */ - BUG_ON(err < 0); - + if (err < 0) { + /* -EMSGSIZE implies BUG in if_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); errout: if (err < 0) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 40402c5..5c452a3 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -479,7 +479,8 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) ccid3_pr_debug("%s(%p), s=%u, w_init=%llu, " "R_sample=%dus, X=%u\n", dccp_role(sk), - sk, hctx->ccid3hctx_s, w_init, + sk, hctx->ccid3hctx_s, + (unsigned long long)w_init, (int)r_sample, (unsigned)(hctx->ccid3hctx_x >> 6)); @@ -1005,7 +1006,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) DCCP_BUG_ON(r_sample < 0); if (unlikely(r_sample <= t_elapsed)) DCCP_WARN("r_sample=%ldus, t_elapsed=%ldus\n", - r_sample, t_elapsed); + (long)r_sample, (long)t_elapsed); else r_sample -= t_elapsed; CCID3_RTT_SANITY_CHECK(r_sample); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 90c74b4..fa2c982 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -72,7 +72,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) tmp = ip_route_connect(&rt, nexthop, inet->saddr, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, IPPROTO_DCCP, - inet->sport, usin->sin_port, sk); + inet->sport, usin->sin_port, sk, 1); if (tmp < 0) return tmp; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 6b91a9d..79140b3 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1041,7 +1041,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - err = xfrm_lookup(&dst, &fl, sk, 0); + err = xfrm_lookup(&dst, &fl, sk, 1); if (err < 0) goto failure; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 63b3fa2..4843856 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1024,7 +1024,6 @@ static int __init dccp_init(void) do { dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE / sizeof(struct inet_ehash_bucket); - dccp_hashinfo.ehash_size >>= 1; while (dccp_hashinfo.ehash_size & (dccp_hashinfo.ehash_size - 1)) dccp_hashinfo.ehash_size--; @@ -1037,9 +1036,10 @@ static int __init dccp_init(void) goto out_free_bind_bucket_cachep; } - for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) { + for (i = 0; i < dccp_hashinfo.ehash_size; i++) { rwlock_init(&dccp_hashinfo.ehash[i].lock); INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain); + INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain); } bhash_order = ehash_order; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index ed083ab..90b3dfd 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -749,7 +749,7 @@ static int dn_nl_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa, nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags); if (nlh == NULL) - return -ENOBUFS; + return -EMSGSIZE; ifm = nlmsg_data(nlh); ifm->ifa_family = AF_DECnet; @@ -768,7 +768,8 @@ static int dn_nl_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa, return nlmsg_end(skb, nlh); nla_put_failure: - return nlmsg_cancel(skb, nlh); + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static void dn_ifaddr_notify(int event, struct dn_ifaddr *ifa) @@ -781,9 +782,12 @@ static void dn_ifaddr_notify(int event, struct dn_ifaddr *ifa) goto errout; err = dn_nl_fill_ifaddr(skb, ifa, 0, 0, event, 0); - /* failure implies BUG in dn_ifaddr_nlmsg_size() */ - BUG_ON(err < 0); - + if (err < 0) { + /* -EMSGSIZE implies BUG in dn_ifaddr_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } err = rtnl_notify(skb, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL); errout: if (err < 0) diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 13b2421..c1f0cc1 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -350,7 +350,7 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, nlmsg_failure: rtattr_failure: skb_trim(skb, b - skb->data); - return -1; + return -EMSGSIZE; } @@ -368,9 +368,12 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id, err = dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id, f->fn_type, f->fn_scope, &f->fn_key, z, DN_FIB_INFO(f), 0); - /* failure implies BUG in dn_fib_nlmsg_size() */ - BUG_ON(err < 0); - + if (err < 0) { + /* -EMSGSIZE implies BUG in dn_fib_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } err = rtnl_notify(skb, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); errout: if (err < 0) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8640096..5750a2b 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1007,7 +1007,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, sk->sk_protocol, - inet->sport, inet->dport, sk); + inet->sport, inet->dport, sk, 0); if (err) return err; diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 7b068a8..0072d79 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -49,7 +49,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) err = ip_route_connect(&rt, usin->sin_addr.s_addr, saddr, RT_CONN_FLAGS(sk), oif, sk->sk_protocol, - inet->sport, usin->sin_port, sk); + inet->sport, usin->sin_port, sk, 1); if (err) return err; if ((rt->rt_flags & RTCF_BROADCAST) && !sock_flag(sk, SOCK_BROADCAST)) { diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 480ace9..c402036 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1140,7 +1140,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags); if (nlh == NULL) - return -ENOBUFS; + return -EMSGSIZE; ifm = nlmsg_data(nlh); ifm->ifa_family = AF_INET; @@ -1167,7 +1167,8 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, return nlmsg_end(skb, nlh); nla_put_failure: - return nlmsg_cancel(skb, nlh); + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) @@ -1225,9 +1226,12 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh, goto errout; err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0); - /* failure implies BUG in inet_nlmsg_size() */ - BUG_ON(err < 0); - + if (err < 0) { + /* -EMSGSIZE implies BUG in inet_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); errout: if (err < 0) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index e63b8a9..be1028c 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -314,9 +314,12 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, err = fib_dump_info(skb, info->pid, seq, event, tb_id, fa->fa_type, fa->fa_scope, key, dst_len, fa->fa_tos, fa->fa_info, 0); - /* failure implies BUG in fib_nlmsg_size() */ - BUG_ON(err < 0); - + if (err < 0) { + /* -EMSGSIZE implies BUG in fib_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE, info->nlh, GFP_KERNEL); errout: @@ -960,7 +963,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags); if (nlh == NULL) - return -ENOBUFS; + return -EMSGSIZE; rtm = nlmsg_data(nlh); rtm->rtm_family = AF_INET; @@ -1031,7 +1034,8 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, return nlmsg_end(skb, nlh); nla_put_failure: - return nlmsg_cancel(skb, nlh); + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } /* diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 0017ccb..024ae56 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -455,6 +455,8 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, skb = add_grhead(skb, pmc, type, &pgr); first = 0; } + if (!skb) + return NULL; psrc = (__be32 *)skb_put(skb, sizeof(__be32)); *psrc = psf->sf_inaddr; scount++; stotal++; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 77761ac..8aa7d51 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -153,7 +153,7 @@ static int inet_csk_diag_fill(struct sock *sk, rtattr_failure: nlmsg_failure: skb_trim(skb, b - skb->data); - return -1; + return -EMSGSIZE; } static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, @@ -209,7 +209,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, return skb->len; nlmsg_failure: skb_trim(skb, previous_tail - skb->data); - return -1; + return -EMSGSIZE; } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, @@ -274,11 +274,14 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, if (!rep) goto out; - if (sk_diag_fill(sk, rep, req->idiag_ext, - NETLINK_CB(in_skb).pid, - nlh->nlmsg_seq, 0, nlh) <= 0) - BUG(); - + err = sk_diag_fill(sk, rep, req->idiag_ext, + NETLINK_CB(in_skb).pid, + nlh->nlmsg_seq, 0, nlh); + if (err < 0) { + WARN_ON(err == -EMSGSIZE); + kfree_skb(rep); + goto out; + } err = netlink_unicast(idiagnl, rep, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); if (err > 0) @@ -775,7 +778,7 @@ next_normal: struct inet_timewait_sock *tw; inet_twsk_for_each(tw, node, - &hashinfo->ehash[i + hashinfo->ehash_size].chain) { + &head->twchain) { if (num < s_num) goto next_dying; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 8c79c8a..150ace1 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -212,7 +212,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, write_lock(&head->lock); /* Check TIME-WAIT sockets first. */ - sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) { + sk_for_each(sk2, node, &head->twchain) { tw = inet_twsk(sk2); if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) { diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 9f414e3..a73cf93 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -78,8 +78,8 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, if (__sk_del_node_init(sk)) sock_prot_dec_use(sk->sk_prot); - /* Step 3: Hash TW into TIMEWAIT half of established hash table. */ - inet_twsk_add_node(tw, &(ehead + hashinfo->ehash_size)->chain); + /* Step 3: Hash TW into TIMEWAIT chain. */ + inet_twsk_add_node(tw, &ehead->twchain); atomic_inc(&tw->tw_refcnt); write_unlock(&ehead->lock); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 476cb60..51c8350 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1008,7 +1008,8 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) goto done; dev = t->dev; } - err = unregister_netdevice(dev); + unregister_netdevice(dev); + err = 0; break; default: diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 9d719d6..da8bbd2 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -754,7 +754,8 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) goto done; dev = t->dev; } - err = unregister_netdevice(dev); + unregister_netdevice(dev); + err = 0; break; default: diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 47bd3ad1..9b08e7a 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -361,32 +361,6 @@ config IP_NF_TARGET_ULOG To compile it as a module, choose M here. If unsure, say N. -config IP_NF_TARGET_TCPMSS - tristate "TCPMSS target support" - depends on IP_NF_IPTABLES - ---help--- - This option adds a `TCPMSS' target, which allows you to alter the - MSS value of TCP SYN packets, to control the maximum size for that - connection (usually limiting it to your outgoing interface's MTU - minus 40). - - This is used to overcome criminally braindead ISPs or servers which - block ICMP Fragmentation Needed packets. The symptoms of this - problem are that everything works fine from your Linux - firewall/router, but machines behind it can never exchange large - packets: - 1) Web browsers connect, then hang with no data received. - 2) Small mail works fine, but large emails hang. - 3) ssh works fine, but scp hangs after initial handshaking. - - Workaround: activate this option and add a rule to your firewall - configuration like: - - iptables -A FORWARD -p tcp --tcp-flags SYN,RST SYN \ - -j TCPMSS --clamp-mss-to-pmtu - - To compile it as a module, choose M here. If unsure, say N. - # NAT + specific targets: ip_conntrack config IP_NF_NAT tristate "Full NAT" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 16d177b..6625ec6 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -103,7 +103,6 @@ obj-$(CONFIG_IP_NF_TARGET_SAME) += ipt_SAME.o obj-$(CONFIG_IP_NF_NAT_SNMP_BASIC) += ip_nat_snmp_basic.o obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o -obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index 06e4e8a..c34f48f 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -50,12 +50,9 @@ static DEFINE_RWLOCK(tcp_lock); If it's non-zero, we mark only out of window RST segments as INVALID. */ int ip_ct_tcp_be_liberal __read_mostly = 0; -/* When connection is picked up from the middle, how many packets are required - to pass in each direction when we assume we are in sync - if any side uses - window scaling, we lost the game. - If it is set to zero, we disable picking up already established +/* If it is set to zero, we disable picking up already established connections. */ -int ip_ct_tcp_loose __read_mostly = 3; +int ip_ct_tcp_loose __read_mostly = 1; /* Max number of the retransmitted packets without receiving an (acceptable) ACK from the destination. If this number is reached, a shorter timer @@ -694,11 +691,10 @@ static int tcp_in_window(struct ip_ct_tcp *state, before(sack, receiver->td_end + 1), after(ack, receiver->td_end - MAXACKWINDOW(sender))); - if (sender->loose || receiver->loose || - (before(seq, sender->td_maxend + 1) && - after(end, sender->td_end - receiver->td_maxwin - 1) && - before(sack, receiver->td_end + 1) && - after(ack, receiver->td_end - MAXACKWINDOW(sender)))) { + if (before(seq, sender->td_maxend + 1) && + after(end, sender->td_end - receiver->td_maxwin - 1) && + before(sack, receiver->td_end + 1) && + after(ack, receiver->td_end - MAXACKWINDOW(sender))) { /* * Take into account window scaling (RFC 1323). */ @@ -743,15 +739,13 @@ static int tcp_in_window(struct ip_ct_tcp *state, state->retrans = 0; } } - /* - * Close the window of disabled window tracking :-) - */ - if (sender->loose) - sender->loose--; - res = 1; } else { - if (LOG_INVALID(IPPROTO_TCP)) + res = 0; + if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || + ip_ct_tcp_be_liberal) + res = 1; + if (!res && LOG_INVALID(IPPROTO_TCP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: %s ", before(seq, sender->td_maxend + 1) ? @@ -762,8 +756,6 @@ static int tcp_in_window(struct ip_ct_tcp *state, : "ACK is over the upper bound (ACKed data not seen yet)" : "SEQ is under the lower bound (already ACKed data retransmitted)" : "SEQ is over the upper bound (over the window of the receiver)"); - - res = ip_ct_tcp_be_liberal; } DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u " @@ -1105,8 +1097,6 @@ static int tcp_new(struct ip_conntrack *conntrack, tcp_options(skb, iph, th, &conntrack->proto.tcp.seen[0]); conntrack->proto.tcp.seen[1].flags = 0; - conntrack->proto.tcp.seen[0].loose = - conntrack->proto.tcp.seen[1].loose = 0; } else if (ip_ct_tcp_loose == 0) { /* Don't try to pick up connections. */ return 0; @@ -1127,11 +1117,11 @@ static int tcp_new(struct ip_conntrack *conntrack, conntrack->proto.tcp.seen[0].td_maxwin; conntrack->proto.tcp.seen[0].td_scale = 0; - /* We assume SACK. Should we assume window scaling too? */ + /* We assume SACK and liberal window checking to handle + * window scaling */ conntrack->proto.tcp.seen[0].flags = - conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM; - conntrack->proto.tcp.seen[0].loose = - conntrack->proto.tcp.seen[1].loose = ip_ct_tcp_loose; + conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM | + IP_CT_TCP_FLAG_BE_LIBERAL; } conntrack->proto.tcp.seen[1].td_end = 0; diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index 9d1a517..5e08c2b 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -246,8 +246,9 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple, if (maniptype == IP_NAT_MANIP_SRC) { if (find_appropriate_src(orig_tuple, tuple, range)) { DEBUGP("get_unique_tuple: Found current src map\n"); - if (!ip_nat_used_tuple(tuple, conntrack)) - return; + if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) + if (!ip_nat_used_tuple(tuple, conntrack)) + return; } } @@ -261,6 +262,13 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple, proto = ip_nat_proto_find_get(orig_tuple->dst.protonum); + /* Change protocol info to have some randomization */ + if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) { + proto->unique_tuple(tuple, range, maniptype, conntrack); + ip_nat_proto_put(proto); + return; + } + /* Only bother mapping if it's not already in range and unique */ if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || proto->in_range(tuple, maniptype, &range->min, &range->max)) diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c index ee80feb..2e5c4bc 100644 --- a/net/ipv4/netfilter/ip_nat_helper.c +++ b/net/ipv4/netfilter/ip_nat_helper.c @@ -183,7 +183,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb, datalen = (*pskb)->len - iph->ihl*4; if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { tcph->check = 0; - tcph->check = tcp_v4_check(tcph, datalen, + tcph->check = tcp_v4_check(datalen, iph->saddr, iph->daddr, csum_partial((char *)tcph, datalen, 0)); diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c index b586d18..14ff24f 100644 --- a/net/ipv4/netfilter/ip_nat_proto_tcp.c +++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c @@ -8,6 +8,7 @@ #include <linux/types.h> #include <linux/init.h> +#include <linux/random.h> #include <linux/netfilter.h> #include <linux/ip.h> #include <linux/tcp.h> @@ -75,6 +76,10 @@ tcp_unique_tuple(struct ip_conntrack_tuple *tuple, range_size = ntohs(range->max.tcp.port) - min + 1; } + /* Start from random port to avoid prediction */ + if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) + port = net_random(); + for (i = 0; i < range_size; i++, port++) { *portptr = htons(min + port % range_size); if (!ip_nat_used_tuple(tuple, conntrack)) { diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c index 5ced087..dfd5216 100644 --- a/net/ipv4/netfilter/ip_nat_proto_udp.c +++ b/net/ipv4/netfilter/ip_nat_proto_udp.c @@ -8,6 +8,7 @@ #include <linux/types.h> #include <linux/init.h> +#include <linux/random.h> #include <linux/netfilter.h> #include <linux/ip.h> #include <linux/udp.h> @@ -74,6 +75,10 @@ udp_unique_tuple(struct ip_conntrack_tuple *tuple, range_size = ntohs(range->max.udp.port) - min + 1; } + /* Start from random port to avoid prediction */ + if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) + port = net_random(); + for (i = 0; i < range_size; i++, port++) { *portptr = htons(min + port % range_size); if (!ip_nat_used_tuple(tuple, conntrack)) diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c index a176aa3..e1c8a05 100644 --- a/net/ipv4/netfilter/ip_nat_rule.c +++ b/net/ipv4/netfilter/ip_nat_rule.c @@ -86,7 +86,7 @@ static struct } }; -static struct ipt_table nat_table = { +static struct xt_table nat_table = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, @@ -99,7 +99,7 @@ static unsigned int ipt_snat_target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, - const struct ipt_target *target, + const struct xt_target *target, const void *targinfo) { struct ip_conntrack *ct; @@ -141,7 +141,7 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, - const struct ipt_target *target, + const struct xt_target *target, const void *targinfo) { struct ip_conntrack *ct; @@ -166,7 +166,7 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb, static int ipt_snat_checkentry(const char *tablename, const void *entry, - const struct ipt_target *target, + const struct xt_target *target, void *targinfo, unsigned int hook_mask) { @@ -182,7 +182,7 @@ static int ipt_snat_checkentry(const char *tablename, static int ipt_dnat_checkentry(const char *tablename, const void *entry, - const struct ipt_target *target, + const struct xt_target *target, void *targinfo, unsigned int hook_mask) { @@ -193,6 +193,10 @@ static int ipt_dnat_checkentry(const char *tablename, printk("DNAT: multiple ranges no longer supported\n"); return 0; } + if (mr->range[0].flags & IP_NAT_RANGE_PROTO_RANDOM) { + printk("DNAT: port randomization not supported\n"); + return 0; + } return 1; } @@ -257,8 +261,9 @@ int ip_nat_rule_find(struct sk_buff **pskb, return ret; } -static struct ipt_target ipt_snat_reg = { +static struct xt_target ipt_snat_reg = { .name = "SNAT", + .family = AF_INET, .target = ipt_snat_target, .targetsize = sizeof(struct ip_nat_multi_range_compat), .table = "nat", @@ -266,8 +271,9 @@ static struct ipt_target ipt_snat_reg = { .checkentry = ipt_snat_checkentry, }; -static struct ipt_target ipt_dnat_reg = { +static struct xt_target ipt_dnat_reg = { .name = "DNAT", + .family = AF_INET, .target = ipt_dnat_target, .targetsize = sizeof(struct ip_nat_multi_range_compat), .table = "nat", @@ -282,27 +288,27 @@ int __init ip_nat_rule_init(void) ret = ipt_register_table(&nat_table, &nat_initial_table.repl); if (ret != 0) return ret; - ret = ipt_register_target(&ipt_snat_reg); + ret = xt_register_target(&ipt_snat_reg); if (ret != 0) goto unregister_table; - ret = ipt_register_target(&ipt_dnat_reg); + ret = xt_register_target(&ipt_dnat_reg); if (ret != 0) goto unregister_snat; return ret; unregister_snat: - ipt_unregister_target(&ipt_snat_reg); + xt_unregister_target(&ipt_snat_reg); unregister_table: - ipt_unregister_table(&nat_table); + xt_unregister_table(&nat_table); return ret; } void ip_nat_rule_cleanup(void) { - ipt_unregister_target(&ipt_dnat_reg); - ipt_unregister_target(&ipt_snat_reg); + xt_unregister_target(&ipt_dnat_reg); + xt_unregister_target(&ipt_snat_reg); ipt_unregister_table(&nat_table); } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index fc1f153..5a7b3a3 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -216,7 +216,7 @@ ipt_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct ipt_table *table) + struct xt_table *table) { static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); u_int16_t offset; @@ -507,7 +507,7 @@ check_entry(struct ipt_entry *e, const char *name) static inline int check_match(struct ipt_entry_match *m, const char *name, const struct ipt_ip *ip, unsigned int hookmask) { - struct ipt_match *match; + struct xt_match *match; int ret; match = m->u.kernel.match; @@ -531,7 +531,7 @@ find_check_match(struct ipt_entry_match *m, unsigned int hookmask, unsigned int *i) { - struct ipt_match *match; + struct xt_match *match; int ret; match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name, @@ -557,7 +557,7 @@ err: static inline int check_target(struct ipt_entry *e, const char *name) { struct ipt_entry_target *t; - struct ipt_target *target; + struct xt_target *target; int ret; t = ipt_get_target(e); @@ -580,7 +580,7 @@ find_check_entry(struct ipt_entry *e, const char *name, unsigned int size, unsigned int *i) { struct ipt_entry_target *t; - struct ipt_target *target; + struct xt_target *target; int ret; unsigned int j; @@ -818,7 +818,7 @@ get_counters(const struct xt_table_info *t, } } -static inline struct xt_counters * alloc_counters(struct ipt_table *table) +static inline struct xt_counters * alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; @@ -843,7 +843,7 @@ static inline struct xt_counters * alloc_counters(struct ipt_table *table) static int copy_entries_to_user(unsigned int total_size, - struct ipt_table *table, + struct xt_table *table, void __user *userptr) { unsigned int off, num; @@ -1046,7 +1046,7 @@ static int compat_table_info(struct xt_table_info *info, static int get_info(void __user *user, int *len, int compat) { char name[IPT_TABLE_MAXNAMELEN]; - struct ipt_table *t; + struct xt_table *t; int ret; if (*len != sizeof(struct ipt_getinfo)) { @@ -1107,7 +1107,7 @@ get_entries(struct ipt_get_entries __user *uptr, int *len) { int ret; struct ipt_get_entries get; - struct ipt_table *t; + struct xt_table *t; if (*len < sizeof(get)) { duprintf("get_entries: %u < %d\n", *len, @@ -1151,7 +1151,7 @@ __do_replace(const char *name, unsigned int valid_hooks, void __user *counters_ptr) { int ret; - struct ipt_table *t; + struct xt_table *t; struct xt_table_info *oldinfo; struct xt_counters *counters; void *loc_cpu_old_entry; @@ -1302,7 +1302,7 @@ do_add_counters(void __user *user, unsigned int len, int compat) char *name; int size; void *ptmp; - struct ipt_table *t; + struct xt_table *t; struct xt_table_info *private; int ret = 0; void *loc_cpu_entry; @@ -1437,7 +1437,7 @@ compat_check_calc_match(struct ipt_entry_match *m, unsigned int hookmask, int *size, int *i) { - struct ipt_match *match; + struct xt_match *match; match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name, m->u.user.revision), @@ -1466,7 +1466,7 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e, const char *name) { struct ipt_entry_target *t; - struct ipt_target *target; + struct xt_target *target; unsigned int entry_offset; int ret, off, h, j; @@ -1550,7 +1550,7 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, struct xt_table_info *newinfo, unsigned char *base) { struct ipt_entry_target *t; - struct ipt_target *target; + struct xt_target *target; struct ipt_entry *de; unsigned int origsize; int ret, h; @@ -1795,7 +1795,7 @@ struct compat_ipt_get_entries }; static int compat_copy_entries_to_user(unsigned int total_size, - struct ipt_table *table, void __user *userptr) + struct xt_table *table, void __user *userptr) { unsigned int off, num; struct compat_ipt_entry e; @@ -1869,7 +1869,7 @@ compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len) { int ret; struct compat_ipt_get_entries get; - struct ipt_table *t; + struct xt_table *t; if (*len < sizeof(get)) { @@ -2052,7 +2052,7 @@ int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl) return 0; } -void ipt_unregister_table(struct ipt_table *table) +void ipt_unregister_table(struct xt_table *table) { struct xt_table_info *private; void *loc_cpu_entry; @@ -2124,7 +2124,7 @@ icmp_checkentry(const char *tablename, } /* The built-in targets: standard (NULL) and error. */ -static struct ipt_target ipt_standard_target = { +static struct xt_target ipt_standard_target = { .name = IPT_STANDARD_TARGET, .targetsize = sizeof(int), .family = AF_INET, @@ -2135,7 +2135,7 @@ static struct ipt_target ipt_standard_target = { #endif }; -static struct ipt_target ipt_error_target = { +static struct xt_target ipt_error_target = { .name = IPT_ERROR_TARGET, .target = ipt_error, .targetsize = IPT_FUNCTION_MAXNAMELEN, @@ -2158,7 +2158,7 @@ static struct nf_sockopt_ops ipt_sockopts = { #endif }; -static struct ipt_match icmp_matchstruct = { +static struct xt_match icmp_matchstruct = { .name = "icmp", .match = icmp_match, .matchsize = sizeof(struct ipt_icmp), diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index b1c1116..343c2ab 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -26,6 +26,7 @@ #include <linux/netfilter_arp.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> #include <net/netfilter/nf_conntrack_compat.h> @@ -247,6 +248,7 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) switch (iph->protocol) { case IPPROTO_TCP: case IPPROTO_UDP: + case IPPROTO_UDPLITE: case IPPROTO_SCTP: case IPPROTO_DCCP: case IPPROTO_ICMP: @@ -329,7 +331,7 @@ target(struct sk_buff **pskb, if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP && (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED+IP_CT_IS_REPLY)) - return IPT_CONTINUE; + return XT_CONTINUE; /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO, * TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here @@ -367,7 +369,7 @@ target(struct sk_buff **pskb, * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */ (*pskb)->pkt_type = PACKET_HOST; - return IPT_CONTINUE; + return XT_CONTINUE; } static int @@ -470,8 +472,9 @@ static void destroy(const struct xt_target *target, void *targinfo) nf_ct_l3proto_module_put(target->family); } -static struct ipt_target clusterip_tgt = { +static struct xt_target clusterip_tgt = { .name = "CLUSTERIP", + .family = AF_INET, .target = target, .targetsize = sizeof(struct ipt_clusterip_tgt_info), .checkentry = checkentry, @@ -727,7 +730,7 @@ static int __init ipt_clusterip_init(void) { int ret; - ret = ipt_register_target(&clusterip_tgt); + ret = xt_register_target(&clusterip_tgt); if (ret < 0) return ret; @@ -753,7 +756,7 @@ cleanup_hook: nf_unregister_hook(&cip_arp_ops); #endif /* CONFIG_PROC_FS */ cleanup_target: - ipt_unregister_target(&clusterip_tgt); + xt_unregister_target(&clusterip_tgt); return ret; } @@ -765,7 +768,7 @@ static void __exit ipt_clusterip_fini(void) remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent); #endif nf_unregister_hook(&cip_arp_ops); - ipt_unregister_target(&clusterip_tgt); + xt_unregister_target(&clusterip_tgt); } module_init(ipt_clusterip_init); diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index b55d670..b5ca593 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -9,12 +9,14 @@ * ipt_ECN.c,v 1.5 2002/08/18 19:36:51 laforge Exp */ +#include <linux/in.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ip.h> #include <linux/tcp.h> #include <net/checksum.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv4/ipt_ECN.h> @@ -95,7 +97,7 @@ target(struct sk_buff **pskb, if (!set_ect_tcp(pskb, einfo)) return NF_DROP; - return IPT_CONTINUE; + return XT_CONTINUE; } static int @@ -119,7 +121,7 @@ checkentry(const char *tablename, return 0; } if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) - && (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & IPT_INV_PROTO))) { + && (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) { printk(KERN_WARNING "ECN: cannot use TCP operations on a " "non-tcp rule\n"); return 0; @@ -127,8 +129,9 @@ checkentry(const char *tablename, return 1; } -static struct ipt_target ipt_ecn_reg = { +static struct xt_target ipt_ecn_reg = { .name = "ECN", + .family = AF_INET, .target = target, .targetsize = sizeof(struct ipt_ECN_info), .table = "mangle", @@ -138,12 +141,12 @@ static struct ipt_target ipt_ecn_reg = { static int __init ipt_ecn_init(void) { - return ipt_register_target(&ipt_ecn_reg); + return xt_register_target(&ipt_ecn_reg); } static void __exit ipt_ecn_fini(void) { - ipt_unregister_target(&ipt_ecn_reg); + xt_unregister_target(&ipt_ecn_reg); } module_init(ipt_ecn_init); diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index c96de16..f68370f 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -20,7 +20,7 @@ #include <net/route.h> #include <linux/netfilter.h> -#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ipt_LOG.h> MODULE_LICENSE("GPL"); @@ -432,7 +432,7 @@ ipt_log_target(struct sk_buff **pskb, ipt_log_packet(PF_INET, hooknum, *pskb, in, out, &li, loginfo->prefix); - return IPT_CONTINUE; + return XT_CONTINUE; } static int ipt_log_checkentry(const char *tablename, @@ -455,8 +455,9 @@ static int ipt_log_checkentry(const char *tablename, return 1; } -static struct ipt_target ipt_log_reg = { +static struct xt_target ipt_log_reg = { .name = "LOG", + .family = AF_INET, .target = ipt_log_target, .targetsize = sizeof(struct ipt_log_info), .checkentry = ipt_log_checkentry, @@ -471,8 +472,11 @@ static struct nf_logger ipt_log_logger ={ static int __init ipt_log_init(void) { - if (ipt_register_target(&ipt_log_reg)) - return -EINVAL; + int ret; + + ret = xt_register_target(&ipt_log_reg); + if (ret < 0) + return ret; if (nf_log_register(PF_INET, &ipt_log_logger) < 0) { printk(KERN_WARNING "ipt_LOG: not logging via system console " "since somebody else already registered for PF_INET\n"); @@ -486,7 +490,7 @@ static int __init ipt_log_init(void) static void __exit ipt_log_fini(void) { nf_log_unregister_logger(&ipt_log_logger); - ipt_unregister_target(&ipt_log_reg); + xt_unregister_target(&ipt_log_reg); } module_init(ipt_log_init); diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index d669685..91c42ef 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -25,7 +25,7 @@ #else #include <linux/netfilter_ipv4/ip_nat_rule.h> #endif -#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/x_tables.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); @@ -190,8 +190,9 @@ static struct notifier_block masq_inet_notifier = { .notifier_call = masq_inet_event, }; -static struct ipt_target masquerade = { +static struct xt_target masquerade = { .name = "MASQUERADE", + .family = AF_INET, .target = masquerade_target, .targetsize = sizeof(struct ip_nat_multi_range_compat), .table = "nat", @@ -204,7 +205,7 @@ static int __init ipt_masquerade_init(void) { int ret; - ret = ipt_register_target(&masquerade); + ret = xt_register_target(&masquerade); if (ret == 0) { /* Register for device down reports */ @@ -218,7 +219,7 @@ static int __init ipt_masquerade_init(void) static void __exit ipt_masquerade_fini(void) { - ipt_unregister_target(&masquerade); + xt_unregister_target(&masquerade); unregister_netdevice_notifier(&masq_dev_notifier); unregister_inetaddr_notifier(&masq_inet_notifier); } diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index 9390e90..b4acc24 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c @@ -15,6 +15,7 @@ #include <linux/netdevice.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> +#include <linux/netfilter/x_tables.h> #ifdef CONFIG_NF_NAT_NEEDED #include <net/netfilter/nf_nat_rule.h> #else @@ -88,8 +89,9 @@ target(struct sk_buff **pskb, return ip_nat_setup_info(ct, &newrange, hooknum); } -static struct ipt_target target_module = { +static struct xt_target target_module = { .name = MODULENAME, + .family = AF_INET, .target = target, .targetsize = sizeof(struct ip_nat_multi_range_compat), .table = "nat", @@ -101,12 +103,12 @@ static struct ipt_target target_module = { static int __init ipt_netmap_init(void) { - return ipt_register_target(&target_module); + return xt_register_target(&target_module); } static void __exit ipt_netmap_fini(void) { - ipt_unregister_target(&target_module); + xt_unregister_target(&target_module); } module_init(ipt_netmap_init); diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index 462eceb..54cd021 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -18,6 +18,7 @@ #include <net/protocol.h> #include <net/checksum.h> #include <linux/netfilter_ipv4.h> +#include <linux/netfilter/x_tables.h> #ifdef CONFIG_NF_NAT_NEEDED #include <net/netfilter/nf_nat_rule.h> #else @@ -104,8 +105,9 @@ redirect_target(struct sk_buff **pskb, return ip_nat_setup_info(ct, &newrange, hooknum); } -static struct ipt_target redirect_reg = { +static struct xt_target redirect_reg = { .name = "REDIRECT", + .family = AF_INET, .target = redirect_target, .targetsize = sizeof(struct ip_nat_multi_range_compat), .table = "nat", @@ -116,12 +118,12 @@ static struct ipt_target redirect_reg = { static int __init ipt_redirect_init(void) { - return ipt_register_target(&redirect_reg); + return xt_register_target(&redirect_reg); } static void __exit ipt_redirect_fini(void) { - ipt_unregister_target(&redirect_reg); + xt_unregister_target(&redirect_reg); } module_init(ipt_redirect_init); diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index f0319e5..e4a1ddb 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -22,6 +22,7 @@ #include <net/tcp.h> #include <net/route.h> #include <net/dst.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv4/ipt_REJECT.h> #ifdef CONFIG_BRIDGE_NETFILTER @@ -116,7 +117,7 @@ static void send_reset(struct sk_buff *oldskb, int hook) /* Adjust TCP checksum */ tcph->check = 0; - tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr), + tcph->check = tcp_v4_check(sizeof(struct tcphdr), nskb->nh.iph->saddr, nskb->nh.iph->daddr, csum_partial((char *)tcph, @@ -230,7 +231,7 @@ static int check(const char *tablename, } else if (rejinfo->with == IPT_TCP_RESET) { /* Must specify that it's a TCP packet */ if (e->ip.proto != IPPROTO_TCP - || (e->ip.invflags & IPT_INV_PROTO)) { + || (e->ip.invflags & XT_INV_PROTO)) { DEBUGP("REJECT: TCP_RESET invalid for non-tcp\n"); return 0; } @@ -238,8 +239,9 @@ static int check(const char *tablename, return 1; } -static struct ipt_target ipt_reject_reg = { +static struct xt_target ipt_reject_reg = { .name = "REJECT", + .family = AF_INET, .target = reject, .targetsize = sizeof(struct ipt_reject_info), .table = "filter", @@ -251,12 +253,12 @@ static struct ipt_target ipt_reject_reg = { static int __init ipt_reject_init(void) { - return ipt_register_target(&ipt_reject_reg); + return xt_register_target(&ipt_reject_reg); } static void __exit ipt_reject_fini(void) { - ipt_unregister_target(&ipt_reject_reg); + xt_unregister_target(&ipt_reject_reg); } module_init(ipt_reject_init); diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c index 3dcf294..a1cdd12 100644 --- a/net/ipv4/netfilter/ipt_SAME.c +++ b/net/ipv4/netfilter/ipt_SAME.c @@ -34,6 +34,7 @@ #include <net/protocol.h> #include <net/checksum.h> #include <linux/netfilter_ipv4.h> +#include <linux/netfilter/x_tables.h> #ifdef CONFIG_NF_NAT_NEEDED #include <net/netfilter/nf_nat_rule.h> #else @@ -186,8 +187,9 @@ same_target(struct sk_buff **pskb, return ip_nat_setup_info(ct, &newrange, hooknum); } -static struct ipt_target same_reg = { +static struct xt_target same_reg = { .name = "SAME", + .family = AF_INET, .target = same_target, .targetsize = sizeof(struct ipt_same_info), .table = "nat", @@ -199,12 +201,12 @@ static struct ipt_target same_reg = { static int __init ipt_same_init(void) { - return ipt_register_target(&same_reg); + return xt_register_target(&same_reg); } static void __exit ipt_same_fini(void) { - ipt_unregister_target(&same_reg); + xt_unregister_target(&same_reg); } module_init(ipt_same_init); diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c deleted file mode 100644 index 93eb5c3..0000000 --- a/net/ipv4/netfilter/ipt_TCPMSS.c +++ /dev/null @@ -1,207 +0,0 @@ -/* - * This is a module which is used for setting the MSS option in TCP packets. - * - * Copyright (C) 2000 Marc Boucher <marc@mbsi.ca> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/module.h> -#include <linux/skbuff.h> - -#include <linux/ip.h> -#include <net/tcp.h> - -#include <linux/netfilter_ipv4/ip_tables.h> -#include <linux/netfilter_ipv4/ipt_TCPMSS.h> - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); -MODULE_DESCRIPTION("iptables TCP MSS modification module"); - -static inline unsigned int -optlen(const u_int8_t *opt, unsigned int offset) -{ - /* Beware zero-length options: make finite progress */ - if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0) - return 1; - else - return opt[offset+1]; -} - -static unsigned int -ipt_tcpmss_target(struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) -{ - const struct ipt_tcpmss_info *tcpmssinfo = targinfo; - struct tcphdr *tcph; - struct iphdr *iph; - u_int16_t tcplen, newmss; - __be16 newtotlen, oldval; - unsigned int i; - u_int8_t *opt; - - if (!skb_make_writable(pskb, (*pskb)->len)) - return NF_DROP; - - iph = (*pskb)->nh.iph; - tcplen = (*pskb)->len - iph->ihl*4; - tcph = (void *)iph + iph->ihl*4; - - /* Since it passed flags test in tcp match, we know it is is - not a fragment, and has data >= tcp header length. SYN - packets should not contain data: if they did, then we risk - running over MTU, sending Frag Needed and breaking things - badly. --RR */ - if (tcplen != tcph->doff*4) { - if (net_ratelimit()) - printk(KERN_ERR - "ipt_tcpmss_target: bad length (%d bytes)\n", - (*pskb)->len); - return NF_DROP; - } - - if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) { - if (dst_mtu((*pskb)->dst) <= sizeof(struct iphdr) + - sizeof(struct tcphdr)) { - if (net_ratelimit()) - printk(KERN_ERR "ipt_tcpmss_target: " - "unknown or invalid path-MTU (%d)\n", - dst_mtu((*pskb)->dst)); - return NF_DROP; /* or IPT_CONTINUE ?? */ - } - - newmss = dst_mtu((*pskb)->dst) - sizeof(struct iphdr) - - sizeof(struct tcphdr); - } else - newmss = tcpmssinfo->mss; - - opt = (u_int8_t *)tcph; - for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)) { - if (opt[i] == TCPOPT_MSS && tcph->doff*4 - i >= TCPOLEN_MSS && - opt[i+1] == TCPOLEN_MSS) { - u_int16_t oldmss; - - oldmss = (opt[i+2] << 8) | opt[i+3]; - - if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU && - oldmss <= newmss) - return IPT_CONTINUE; - - opt[i+2] = (newmss & 0xff00) >> 8; - opt[i+3] = (newmss & 0x00ff); - - nf_proto_csum_replace2(&tcph->check, *pskb, - htons(oldmss), htons(newmss), 0); - return IPT_CONTINUE; - } - } - - /* - * MSS Option not found ?! add it.. - */ - if (skb_tailroom((*pskb)) < TCPOLEN_MSS) { - struct sk_buff *newskb; - - newskb = skb_copy_expand(*pskb, skb_headroom(*pskb), - TCPOLEN_MSS, GFP_ATOMIC); - if (!newskb) - return NF_DROP; - kfree_skb(*pskb); - *pskb = newskb; - iph = (*pskb)->nh.iph; - tcph = (void *)iph + iph->ihl*4; - } - - skb_put((*pskb), TCPOLEN_MSS); - - opt = (u_int8_t *)tcph + sizeof(struct tcphdr); - memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr)); - - nf_proto_csum_replace2(&tcph->check, *pskb, - htons(tcplen), htons(tcplen + TCPOLEN_MSS), 1); - opt[0] = TCPOPT_MSS; - opt[1] = TCPOLEN_MSS; - opt[2] = (newmss & 0xff00) >> 8; - opt[3] = (newmss & 0x00ff); - - nf_proto_csum_replace4(&tcph->check, *pskb, 0, *((__be32 *)opt), 0); - - oldval = ((__be16 *)tcph)[6]; - tcph->doff += TCPOLEN_MSS/4; - nf_proto_csum_replace2(&tcph->check, *pskb, - oldval, ((__be16 *)tcph)[6], 0); - - newtotlen = htons(ntohs(iph->tot_len) + TCPOLEN_MSS); - nf_csum_replace2(&iph->check, iph->tot_len, newtotlen); - iph->tot_len = newtotlen; - return IPT_CONTINUE; -} - -#define TH_SYN 0x02 - -static inline int find_syn_match(const struct ipt_entry_match *m) -{ - const struct ipt_tcp *tcpinfo = (const struct ipt_tcp *)m->data; - - if (strcmp(m->u.kernel.match->name, "tcp") == 0 && - tcpinfo->flg_cmp & TH_SYN && - !(tcpinfo->invflags & IPT_TCP_INV_FLAGS)) - return 1; - - return 0; -} - -/* Must specify -p tcp --syn/--tcp-flags SYN */ -static int -ipt_tcpmss_checkentry(const char *tablename, - const void *e_void, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) -{ - const struct ipt_tcpmss_info *tcpmssinfo = targinfo; - const struct ipt_entry *e = e_void; - - if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU && - (hook_mask & ~((1 << NF_IP_FORWARD) | - (1 << NF_IP_LOCAL_OUT) | - (1 << NF_IP_POST_ROUTING))) != 0) { - printk("TCPMSS: path-MTU clamping only supported in " - "FORWARD, OUTPUT and POSTROUTING hooks\n"); - return 0; - } - - if (IPT_MATCH_ITERATE(e, find_syn_match)) - return 1; - printk("TCPMSS: Only works on TCP SYN packets\n"); - return 0; -} - -static struct ipt_target ipt_tcpmss_reg = { - .name = "TCPMSS", - .target = ipt_tcpmss_target, - .targetsize = sizeof(struct ipt_tcpmss_info), - .proto = IPPROTO_TCP, - .checkentry = ipt_tcpmss_checkentry, - .me = THIS_MODULE, -}; - -static int __init ipt_tcpmss_init(void) -{ - return ipt_register_target(&ipt_tcpmss_reg); -} - -static void __exit ipt_tcpmss_fini(void) -{ - ipt_unregister_target(&ipt_tcpmss_reg); -} - -module_init(ipt_tcpmss_init); -module_exit(ipt_tcpmss_fini); diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c index 18e74ac..29b05a6 100644 --- a/net/ipv4/netfilter/ipt_TOS.c +++ b/net/ipv4/netfilter/ipt_TOS.c @@ -13,7 +13,7 @@ #include <linux/ip.h> #include <net/checksum.h> -#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ipt_TOS.h> MODULE_LICENSE("GPL"); @@ -40,7 +40,7 @@ target(struct sk_buff **pskb, iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos; nf_csum_replace2(&iph->check, htons(oldtos), htons(iph->tos)); } - return IPT_CONTINUE; + return XT_CONTINUE; } static int @@ -63,8 +63,9 @@ checkentry(const char *tablename, return 1; } -static struct ipt_target ipt_tos_reg = { +static struct xt_target ipt_tos_reg = { .name = "TOS", + .family = AF_INET, .target = target, .targetsize = sizeof(struct ipt_tos_target_info), .table = "mangle", @@ -74,12 +75,12 @@ static struct ipt_target ipt_tos_reg = { static int __init ipt_tos_init(void) { - return ipt_register_target(&ipt_tos_reg); + return xt_register_target(&ipt_tos_reg); } static void __exit ipt_tos_fini(void) { - ipt_unregister_target(&ipt_tos_reg); + xt_unregister_target(&ipt_tos_reg); } module_init(ipt_tos_init); diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c index fffe5ca..d2b6fa3 100644 --- a/net/ipv4/netfilter/ipt_TTL.c +++ b/net/ipv4/netfilter/ipt_TTL.c @@ -12,7 +12,7 @@ #include <linux/ip.h> #include <net/checksum.h> -#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ipt_TTL.h> MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); @@ -59,7 +59,7 @@ ipt_ttl_target(struct sk_buff **pskb, iph->ttl = new_ttl; } - return IPT_CONTINUE; + return XT_CONTINUE; } static int ipt_ttl_checkentry(const char *tablename, @@ -80,8 +80,9 @@ static int ipt_ttl_checkentry(const char *tablename, return 1; } -static struct ipt_target ipt_TTL = { +static struct xt_target ipt_TTL = { .name = "TTL", + .family = AF_INET, .target = ipt_ttl_target, .targetsize = sizeof(struct ipt_TTL_info), .table = "mangle", @@ -91,12 +92,12 @@ static struct ipt_target ipt_TTL = { static int __init ipt_ttl_init(void) { - return ipt_register_target(&ipt_TTL); + return xt_register_target(&ipt_TTL); } static void __exit ipt_ttl_fini(void) { - ipt_unregister_target(&ipt_TTL); + xt_unregister_target(&ipt_TTL); } module_init(ipt_ttl_init); diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index dbd3478..7af57a3 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -57,7 +57,7 @@ #include <linux/mm.h> #include <linux/moduleparam.h> #include <linux/netfilter.h> -#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ipt_ULOG.h> #include <net/sock.h> #include <linux/bitops.h> @@ -132,7 +132,6 @@ static void ulog_send(unsigned int nlgroupnum) ub->qlen = 0; ub->skb = NULL; ub->lastnlh = NULL; - } @@ -314,7 +313,7 @@ static unsigned int ipt_ulog_target(struct sk_buff **pskb, ipt_ulog_packet(hooknum, *pskb, in, out, loginfo, NULL); - return IPT_CONTINUE; + return XT_CONTINUE; } static void ipt_logfn(unsigned int pf, @@ -363,8 +362,9 @@ static int ipt_ulog_checkentry(const char *tablename, return 1; } -static struct ipt_target ipt_ulog_reg = { +static struct xt_target ipt_ulog_reg = { .name = "ULOG", + .family = AF_INET, .target = ipt_ulog_target, .targetsize = sizeof(struct ipt_ulog_info), .checkentry = ipt_ulog_checkentry, @@ -379,7 +379,7 @@ static struct nf_logger ipt_ulog_logger = { static int __init ipt_ulog_init(void) { - int i; + int ret, i; DEBUGP("ipt_ULOG: init module\n"); @@ -400,9 +400,10 @@ static int __init ipt_ulog_init(void) if (!nflognl) return -ENOMEM; - if (ipt_register_target(&ipt_ulog_reg) != 0) { + ret = xt_register_target(&ipt_ulog_reg); + if (ret < 0) { sock_release(nflognl->sk_socket); - return -EINVAL; + return ret; } if (nflog) nf_log_register(PF_INET, &ipt_ulog_logger); @@ -419,7 +420,7 @@ static void __exit ipt_ulog_fini(void) if (nflog) nf_log_unregister_logger(&ipt_ulog_logger); - ipt_unregister_target(&ipt_ulog_reg); + xt_unregister_target(&ipt_ulog_reg); sock_release(nflognl->sk_socket); /* remove pending timers and free allocated skb's */ @@ -435,7 +436,6 @@ static void __exit ipt_ulog_fini(void) ub->skb = NULL; } } - } module_init(ipt_ulog_init); diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c index 7b60eb7..648f555 100644 --- a/net/ipv4/netfilter/ipt_addrtype.c +++ b/net/ipv4/netfilter/ipt_addrtype.c @@ -16,7 +16,7 @@ #include <net/route.h> #include <linux/netfilter_ipv4/ipt_addrtype.h> -#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/x_tables.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); @@ -44,8 +44,9 @@ static int match(const struct sk_buff *skb, return ret; } -static struct ipt_match addrtype_match = { +static struct xt_match addrtype_match = { .name = "addrtype", + .family = AF_INET, .match = match, .matchsize = sizeof(struct ipt_addrtype_info), .me = THIS_MODULE @@ -53,12 +54,12 @@ static struct ipt_match addrtype_match = { static int __init ipt_addrtype_init(void) { - return ipt_register_match(&addrtype_match); + return xt_register_match(&addrtype_match); } static void __exit ipt_addrtype_fini(void) { - ipt_unregister_match(&addrtype_match); + xt_unregister_match(&addrtype_match); } module_init(ipt_addrtype_init); diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c index 1798f86..42f4122 100644 --- a/net/ipv4/netfilter/ipt_ah.c +++ b/net/ipv4/netfilter/ipt_ah.c @@ -6,12 +6,13 @@ * published by the Free Software Foundation. */ +#include <linux/in.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ip.h> #include <linux/netfilter_ipv4/ipt_ah.h> -#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/x_tables.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>"); @@ -86,8 +87,9 @@ checkentry(const char *tablename, return 1; } -static struct ipt_match ah_match = { +static struct xt_match ah_match = { .name = "ah", + .family = AF_INET, .match = match, .matchsize = sizeof(struct ipt_ah), .proto = IPPROTO_AH, @@ -97,12 +99,12 @@ static struct ipt_match ah_match = { static int __init ipt_ah_init(void) { - return ipt_register_match(&ah_match); + return xt_register_match(&ah_match); } static void __exit ipt_ah_fini(void) { - ipt_unregister_match(&ah_match); + xt_unregister_match(&ah_match); } module_init(ipt_ah_init); diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index dafbdec..37508b2 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c @@ -9,10 +9,13 @@ * published by the Free Software Foundation. */ +#include <linux/in.h> +#include <linux/ip.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/tcp.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv4/ipt_ecn.h> @@ -109,8 +112,9 @@ static int checkentry(const char *tablename, const void *ip_void, return 1; } -static struct ipt_match ecn_match = { +static struct xt_match ecn_match = { .name = "ecn", + .family = AF_INET, .match = match, .matchsize = sizeof(struct ipt_ecn_info), .checkentry = checkentry, @@ -119,12 +123,12 @@ static struct ipt_match ecn_match = { static int __init ipt_ecn_init(void) { - return ipt_register_match(&ecn_match); + return xt_register_match(&ecn_match); } static void __exit ipt_ecn_fini(void) { - ipt_unregister_match(&ecn_match); + xt_unregister_match(&ecn_match); } module_init(ipt_ecn_init); diff --git a/net/ipv4/netfilter/ipt_iprange.c b/net/ipv4/netfilter/ipt_iprange.c index 5202edd..05de593 100644 --- a/net/ipv4/netfilter/ipt_iprange.c +++ b/net/ipv4/netfilter/ipt_iprange.c @@ -10,7 +10,7 @@ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ip.h> -#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ipt_iprange.h> MODULE_LICENSE("GPL"); @@ -63,22 +63,22 @@ match(const struct sk_buff *skb, return 1; } -static struct ipt_match iprange_match = { +static struct xt_match iprange_match = { .name = "iprange", + .family = AF_INET, .match = match, .matchsize = sizeof(struct ipt_iprange_info), - .destroy = NULL, .me = THIS_MODULE }; static int __init ipt_iprange_init(void) { - return ipt_register_match(&iprange_match); + return xt_register_match(&iprange_match); } static void __exit ipt_iprange_fini(void) { - ipt_unregister_match(&iprange_match); + xt_unregister_match(&iprange_match); } module_init(ipt_iprange_init); diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c index 78c336f1..9f496ac 100644 --- a/net/ipv4/netfilter/ipt_owner.c +++ b/net/ipv4/netfilter/ipt_owner.c @@ -15,7 +15,7 @@ #include <net/sock.h> #include <linux/netfilter_ipv4/ipt_owner.h> -#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/x_tables.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); @@ -68,8 +68,9 @@ checkentry(const char *tablename, return 1; } -static struct ipt_match owner_match = { +static struct xt_match owner_match = { .name = "owner", + .family = AF_INET, .match = match, .matchsize = sizeof(struct ipt_owner_info), .hooks = (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_POST_ROUTING), @@ -79,12 +80,12 @@ static struct ipt_match owner_match = { static int __init ipt_owner_init(void) { - return ipt_register_match(&owner_match); + return xt_register_match(&owner_match); } static void __exit ipt_owner_fini(void) { - ipt_unregister_match(&owner_match); + xt_unregister_match(&owner_match); } module_init(ipt_owner_init); diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index 4db0e73..6b97b67 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c @@ -12,6 +12,7 @@ * Copyright 2002-2003, Stephen Frost, 2.5.x port by laforge@netfilter.org */ #include <linux/init.h> +#include <linux/ip.h> #include <linux/moduleparam.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> @@ -24,7 +25,7 @@ #include <linux/skbuff.h> #include <linux/inet.h> -#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ipt_recent.h> MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); @@ -462,8 +463,9 @@ static struct file_operations recent_fops = { }; #endif /* CONFIG_PROC_FS */ -static struct ipt_match recent_match = { +static struct xt_match recent_match = { .name = "recent", + .family = AF_INET, .match = ipt_recent_match, .matchsize = sizeof(struct ipt_recent_info), .checkentry = ipt_recent_checkentry, @@ -479,13 +481,13 @@ static int __init ipt_recent_init(void) return -EINVAL; ip_list_hash_size = 1 << fls(ip_list_tot); - err = ipt_register_match(&recent_match); + err = xt_register_match(&recent_match); #ifdef CONFIG_PROC_FS if (err) return err; proc_dir = proc_mkdir("ipt_recent", proc_net); if (proc_dir == NULL) { - ipt_unregister_match(&recent_match); + xt_unregister_match(&recent_match); err = -ENOMEM; } #endif @@ -495,7 +497,7 @@ static int __init ipt_recent_init(void) static void __exit ipt_recent_exit(void) { BUG_ON(!list_empty(&tables)); - ipt_unregister_match(&recent_match); + xt_unregister_match(&recent_match); #ifdef CONFIG_PROC_FS remove_proc_entry("ipt_recent", proc_net); #endif diff --git a/net/ipv4/netfilter/ipt_tos.c b/net/ipv4/netfilter/ipt_tos.c index 5549c39..5d33b51 100644 --- a/net/ipv4/netfilter/ipt_tos.c +++ b/net/ipv4/netfilter/ipt_tos.c @@ -8,11 +8,12 @@ * published by the Free Software Foundation. */ +#include <linux/ip.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netfilter_ipv4/ipt_tos.h> -#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/x_tables.h> MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("iptables TOS match module"); @@ -32,8 +33,9 @@ match(const struct sk_buff *skb, return (skb->nh.iph->tos == info->tos) ^ info->invert; } -static struct ipt_match tos_match = { +static struct xt_match tos_match = { .name = "tos", + .family = AF_INET, .match = match, .matchsize = sizeof(struct ipt_tos_info), .me = THIS_MODULE, @@ -41,12 +43,12 @@ static struct ipt_match tos_match = { static int __init ipt_multiport_init(void) { - return ipt_register_match(&tos_match); + return xt_register_match(&tos_match); } static void __exit ipt_multiport_fini(void) { - ipt_unregister_match(&tos_match); + xt_unregister_match(&tos_match); } module_init(ipt_multiport_init); diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c index a5243bd..d5cd984e 100644 --- a/net/ipv4/netfilter/ipt_ttl.c +++ b/net/ipv4/netfilter/ipt_ttl.c @@ -9,11 +9,12 @@ * published by the Free Software Foundation. */ +#include <linux/ip.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netfilter_ipv4/ipt_ttl.h> -#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/x_tables.h> MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("IP tables TTL matching module"); @@ -48,8 +49,9 @@ static int match(const struct sk_buff *skb, return 0; } -static struct ipt_match ttl_match = { +static struct xt_match ttl_match = { .name = "ttl", + .family = AF_INET, .match = match, .matchsize = sizeof(struct ipt_ttl_info), .me = THIS_MODULE, @@ -57,13 +59,12 @@ static struct ipt_match ttl_match = { static int __init ipt_ttl_init(void) { - return ipt_register_match(&ttl_match); + return xt_register_match(&ttl_match); } static void __exit ipt_ttl_fini(void) { - ipt_unregister_match(&ttl_match); - + xt_unregister_match(&ttl_match); } module_init(ipt_ttl_init); diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index e2e7dd8..51053cb 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -74,7 +74,7 @@ static struct } }; -static struct ipt_table packet_filter = { +static struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index af29398..a532e4d 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -103,7 +103,7 @@ static struct } }; -static struct ipt_table packet_mangler = { +static struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index bcbeb4a..5277550 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -79,7 +79,7 @@ static struct } }; -static struct ipt_table packet_raw = { +static struct xt_table packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 86a9227..998b255 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -254,8 +254,9 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, if (maniptype == IP_NAT_MANIP_SRC) { if (find_appropriate_src(orig_tuple, tuple, range)) { DEBUGP("get_unique_tuple: Found current src map\n"); - if (!nf_nat_used_tuple(tuple, ct)) - return; + if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) + if (!nf_nat_used_tuple(tuple, ct)) + return; } } @@ -269,6 +270,13 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, proto = nf_nat_proto_find_get(orig_tuple->dst.protonum); + /* Change protocol info to have some randomization */ + if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) { + proto->unique_tuple(tuple, range, maniptype, ct); + nf_nat_proto_put(proto); + return; + } + /* Only bother mapping if it's not already in range and unique */ if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || proto->in_range(tuple, maniptype, &range->min, &range->max)) && diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 98fbfc8..dc6738b 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -176,7 +176,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb, datalen = (*pskb)->len - iph->ihl*4; if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { tcph->check = 0; - tcph->check = tcp_v4_check(tcph, datalen, + tcph->check = tcp_v4_check(datalen, iph->saddr, iph->daddr, csum_partial((char *)tcph, datalen, 0)); diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c index 7e26a7e..439164c 100644 --- a/net/ipv4/netfilter/nf_nat_proto_tcp.c +++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c @@ -8,6 +8,7 @@ #include <linux/types.h> #include <linux/init.h> +#include <linux/random.h> #include <linux/ip.h> #include <linux/tcp.h> @@ -75,6 +76,9 @@ tcp_unique_tuple(struct nf_conntrack_tuple *tuple, range_size = ntohs(range->max.tcp.port) - min + 1; } + if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) + port = net_random(); + for (i = 0; i < range_size; i++, port++) { *portptr = htons(min + port % range_size); if (!nf_nat_used_tuple(tuple, ct)) diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c index ab0ce4c..8cae6e0 100644 --- a/net/ipv4/netfilter/nf_nat_proto_udp.c +++ b/net/ipv4/netfilter/nf_nat_proto_udp.c @@ -8,6 +8,7 @@ #include <linux/types.h> #include <linux/init.h> +#include <linux/random.h> #include <linux/ip.h> #include <linux/udp.h> @@ -73,6 +74,9 @@ udp_unique_tuple(struct nf_conntrack_tuple *tuple, range_size = ntohs(range->max.udp.port) - min + 1; } + if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) + port = net_random(); + for (i = 0; i < range_size; i++, port++) { *portptr = htons(min + port % range_size); if (!nf_nat_used_tuple(tuple, ct)) diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index b868ee0..7f95b4e 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -119,7 +119,7 @@ static struct } }; -static struct ipt_table nat_table = { +static struct xt_table nat_table = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, @@ -226,6 +226,10 @@ static int ipt_dnat_checkentry(const char *tablename, printk("DNAT: multiple ranges no longer supported\n"); return 0; } + if (mr->range[0].flags & IP_NAT_RANGE_PROTO_RANDOM) { + printk("DNAT: port randomization not supported\n"); + return 0; + } return 1; } @@ -290,7 +294,7 @@ int nf_nat_rule_find(struct sk_buff **pskb, return ret; } -static struct ipt_target ipt_snat_reg = { +static struct xt_target ipt_snat_reg = { .name = "SNAT", .target = ipt_snat_target, .targetsize = sizeof(struct nf_nat_multi_range_compat), diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 00d6dea..5a964a1 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -32,12 +32,6 @@ #define DEBUGP(format, args...) #endif -#define HOOKNAME(hooknum) ((hooknum) == NF_IP_POST_ROUTING ? "POST_ROUTING" \ - : ((hooknum) == NF_IP_PRE_ROUTING ? "PRE_ROUTING" \ - : ((hooknum) == NF_IP_LOCAL_OUT ? "LOCAL_OUT" \ - : ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN" \ - : "*ERROR*"))) - #ifdef CONFIG_XFRM static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) { diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index a6c63bb..fed6a1e 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -489,7 +489,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } security_sk_classify_flow(sk, &fl); - err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); + err = ip_route_output_flow(&rt, &fl, sk, 1); } if (err) goto done; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2daa0dc..baee304 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2635,7 +2635,7 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); if (nlh == NULL) - return -ENOBUFS; + return -EMSGSIZE; r = nlmsg_data(nlh); r->rtm_family = AF_INET; @@ -2718,7 +2718,8 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, return nlmsg_end(skb, nlh); nla_put_failure: - return nlmsg_cancel(skb, nlh); + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b67e0dd..5bd43d7 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2415,10 +2415,11 @@ void __init tcp_init(void) &tcp_hashinfo.ehash_size, NULL, 0); - tcp_hashinfo.ehash_size = (1 << tcp_hashinfo.ehash_size) >> 1; - for (i = 0; i < (tcp_hashinfo.ehash_size << 1); i++) { + tcp_hashinfo.ehash_size = 1 << tcp_hashinfo.ehash_size; + for (i = 0; i < tcp_hashinfo.ehash_size; i++) { rwlock_init(&tcp_hashinfo.ehash[i].lock); INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].chain); + INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].twchain); } tcp_hashinfo.bhash = @@ -2475,7 +2476,7 @@ void __init tcp_init(void) printk(KERN_INFO "TCP: Hash tables configured " "(established %d bind %d)\n", - tcp_hashinfo.ehash_size << 1, tcp_hashinfo.bhash_size); + tcp_hashinfo.ehash_size, tcp_hashinfo.bhash_size); tcp_register_congestion_control(&tcp_reno); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c26076f..c610989 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -936,28 +936,58 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ struct tcp_sock *tp = tcp_sk(sk); unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked; struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2); + struct sk_buff *cached_skb; int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; int reord = tp->packets_out; int prior_fackets; u32 lost_retrans = 0; int flag = 0; int dup_sack = 0; + int cached_fack_count; int i; + int first_sack_index; if (!tp->sacked_out) tp->fackets_out = 0; prior_fackets = tp->fackets_out; + /* Check for D-SACK. */ + if (before(ntohl(sp[0].start_seq), TCP_SKB_CB(ack_skb)->ack_seq)) { + dup_sack = 1; + tp->rx_opt.sack_ok |= 4; + NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV); + } else if (num_sacks > 1 && + !after(ntohl(sp[0].end_seq), ntohl(sp[1].end_seq)) && + !before(ntohl(sp[0].start_seq), ntohl(sp[1].start_seq))) { + dup_sack = 1; + tp->rx_opt.sack_ok |= 4; + NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV); + } + + /* D-SACK for already forgotten data... + * Do dumb counting. */ + if (dup_sack && + !after(ntohl(sp[0].end_seq), prior_snd_una) && + after(ntohl(sp[0].end_seq), tp->undo_marker)) + tp->undo_retrans--; + + /* Eliminate too old ACKs, but take into + * account more or less fresh ones, they can + * contain valid SACK info. + */ + if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window)) + return 0; + /* SACK fastpath: * if the only SACK change is the increase of the end_seq of * the first block then only apply that SACK block * and use retrans queue hinting otherwise slowpath */ flag = 1; - for (i = 0; i< num_sacks; i++) { - __u32 start_seq = ntohl(sp[i].start_seq); - __u32 end_seq = ntohl(sp[i].end_seq); + for (i = 0; i < num_sacks; i++) { + __be32 start_seq = sp[i].start_seq; + __be32 end_seq = sp[i].end_seq; - if (i == 0){ + if (i == 0) { if (tp->recv_sack_cache[i].start_seq != start_seq) flag = 0; } else { @@ -967,39 +997,14 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ } tp->recv_sack_cache[i].start_seq = start_seq; tp->recv_sack_cache[i].end_seq = end_seq; - - /* Check for D-SACK. */ - if (i == 0) { - u32 ack = TCP_SKB_CB(ack_skb)->ack_seq; - - if (before(start_seq, ack)) { - dup_sack = 1; - tp->rx_opt.sack_ok |= 4; - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV); - } else if (num_sacks > 1 && - !after(end_seq, ntohl(sp[1].end_seq)) && - !before(start_seq, ntohl(sp[1].start_seq))) { - dup_sack = 1; - tp->rx_opt.sack_ok |= 4; - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV); - } - - /* D-SACK for already forgotten data... - * Do dumb counting. */ - if (dup_sack && - !after(end_seq, prior_snd_una) && - after(end_seq, tp->undo_marker)) - tp->undo_retrans--; - - /* Eliminate too old ACKs, but take into - * account more or less fresh ones, they can - * contain valid SACK info. - */ - if (before(ack, prior_snd_una - tp->max_window)) - return 0; - } + } + /* Clear the rest of the cache sack blocks so they won't match mistakenly. */ + for (; i < ARRAY_SIZE(tp->recv_sack_cache); i++) { + tp->recv_sack_cache[i].start_seq = 0; + tp->recv_sack_cache[i].end_seq = 0; } + first_sack_index = 0; if (flag) num_sacks = 1; else { @@ -1016,6 +1021,10 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ tmp = sp[j]; sp[j] = sp[j+1]; sp[j+1] = tmp; + + /* Track where the first SACK block goes to */ + if (j == first_sack_index) + first_sack_index = j+1; } } @@ -1025,20 +1034,22 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ /* clear flag as used for different purpose in following code */ flag = 0; + /* Use SACK fastpath hint if valid */ + cached_skb = tp->fastpath_skb_hint; + cached_fack_count = tp->fastpath_cnt_hint; + if (!cached_skb) { + cached_skb = sk->sk_write_queue.next; + cached_fack_count = 0; + } + for (i=0; i<num_sacks; i++, sp++) { struct sk_buff *skb; __u32 start_seq = ntohl(sp->start_seq); __u32 end_seq = ntohl(sp->end_seq); int fack_count; - /* Use SACK fastpath hint if valid */ - if (tp->fastpath_skb_hint) { - skb = tp->fastpath_skb_hint; - fack_count = tp->fastpath_cnt_hint; - } else { - skb = sk->sk_write_queue.next; - fack_count = 0; - } + skb = cached_skb; + fack_count = cached_fack_count; /* Event "B" in the comment above. */ if (after(end_seq, tp->high_seq)) @@ -1048,8 +1059,12 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ int in_sack, pcount; u8 sacked; - tp->fastpath_skb_hint = skb; - tp->fastpath_cnt_hint = fack_count; + cached_skb = skb; + cached_fack_count = fack_count; + if (i == first_sack_index) { + tp->fastpath_skb_hint = skb; + tp->fastpath_cnt_hint = fack_count; + } /* The retransmission queue is always in order, so * we can short-circuit the walk early. diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 12de90a..f51d640 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -191,7 +191,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) tmp = ip_route_connect(&rt, nexthop, inet->saddr, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, IPPROTO_TCP, - inet->sport, usin->sin_port, sk); + inet->sport, usin->sin_port, sk, 1); if (tmp < 0) return tmp; @@ -502,11 +502,11 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) struct tcphdr *th = skb->h.th; if (skb->ip_summed == CHECKSUM_PARTIAL) { - th->check = ~tcp_v4_check(th, len, - inet->saddr, inet->daddr, 0); + th->check = ~tcp_v4_check(len, inet->saddr, + inet->daddr, 0); skb->csum_offset = offsetof(struct tcphdr, check); } else { - th->check = tcp_v4_check(th, len, inet->saddr, inet->daddr, + th->check = tcp_v4_check(len, inet->saddr, inet->daddr, csum_partial((char *)th, th->doff << 2, skb->csum)); @@ -525,7 +525,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb) th = skb->h.th; th->check = 0; - th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0); + th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0); skb->csum_offset = offsetof(struct tcphdr, check); skb->ip_summed = CHECKSUM_PARTIAL; return 0; @@ -747,7 +747,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, if (skb) { struct tcphdr *th = skb->h.th; - th->check = tcp_v4_check(th, skb->len, + th->check = tcp_v4_check(skb->len, ireq->loc_addr, ireq->rmt_addr, csum_partial((char *)th, skb->len, @@ -1514,7 +1514,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) static __sum16 tcp_v4_checksum_init(struct sk_buff *skb) { if (skb->ip_summed == CHECKSUM_COMPLETE) { - if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr, + if (!tcp_v4_check(skb->len, skb->nh.iph->saddr, skb->nh.iph->daddr, skb->csum)) { skb->ip_summed = CHECKSUM_UNNECESSARY; return 0; @@ -2051,7 +2051,7 @@ static void *established_get_first(struct seq_file *seq) } st->state = TCP_SEQ_STATE_TIME_WAIT; inet_twsk_for_each(tw, node, - &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) { + &tcp_hashinfo.ehash[st->bucket].twchain) { if (tw->tw_family != st->family) { continue; } @@ -2107,7 +2107,7 @@ get_tw: } st->state = TCP_SEQ_STATE_TIME_WAIT; - tw = tw_head(&tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain); + tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain); goto get_tw; found: cur = sk; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 975f447..58b7111 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -965,7 +965,8 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *sk u32 in_flight, cwnd; /* Don't be strict about the congestion window for the final FIN. */ - if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) + if ((TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && + tcp_skb_pcount(skb) == 1) return 1; in_flight = tcp_packets_in_flight(tp); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cfff930..8b54c68 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -629,7 +629,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, { .sport = inet->sport, .dport = dport } } }; security_sk_classify_flow(sk, &fl); - err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); + err = ip_route_output_flow(&rt, &fl, sk, 1); if (err) goto out; diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index e23c21d..e54c549 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -23,6 +23,12 @@ static inline void ipip_ecn_decapsulate(struct sk_buff *skb) IP_ECN_set_ce(inner_iph); } +static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) +{ + if (INET_ECN_is_ce(iph->tos)) + IP6_ECN_set_ce(skb->nh.ipv6h); +} + /* Add encapsulation header. * * The top IP header will be constructed per RFC 2401. The following fields @@ -36,6 +42,7 @@ static inline void ipip_ecn_decapsulate(struct sk_buff *skb) static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) { struct dst_entry *dst = skb->dst; + struct xfrm_dst *xdst = (struct xfrm_dst*)dst; struct iphdr *iph, *top_iph; int flags; @@ -48,15 +55,27 @@ static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->ihl = 5; top_iph->version = 4; + flags = x->props.flags; + /* DS disclosed */ - top_iph->tos = INET_ECN_encapsulate(iph->tos, iph->tos); + if (xdst->route->ops->family == AF_INET) { + top_iph->protocol = IPPROTO_IPIP; + top_iph->tos = INET_ECN_encapsulate(iph->tos, iph->tos); + top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? + 0 : (iph->frag_off & htons(IP_DF)); + } +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + else { + struct ipv6hdr *ipv6h = (struct ipv6hdr*)iph; + top_iph->protocol = IPPROTO_IPV6; + top_iph->tos = INET_ECN_encapsulate(iph->tos, ipv6_get_dsfield(ipv6h)); + top_iph->frag_off = 0; + } +#endif - flags = x->props.flags; if (flags & XFRM_STATE_NOECN) IP_ECN_clear(top_iph); - top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? - 0 : (iph->frag_off & htons(IP_DF)); if (!top_iph->frag_off) __ip_select_ident(top_iph, dst->child, 0); @@ -64,7 +83,6 @@ static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->saddr = x->props.saddr.a4; top_iph->daddr = x->id.daddr.a4; - top_iph->protocol = IPPROTO_IPIP; memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); return 0; @@ -75,8 +93,16 @@ static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) struct iphdr *iph = skb->nh.iph; int err = -EINVAL; - if (iph->protocol != IPPROTO_IPIP) - goto out; + switch(iph->protocol){ + case IPPROTO_IPIP: +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + case IPPROTO_IPV6: + break; +#endif + default: + goto out; + } + if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto out; @@ -84,10 +110,19 @@ static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) goto out; - if (x->props.flags & XFRM_STATE_DECAP_DSCP) - ipv4_copy_dscp(iph, skb->h.ipiph); - if (!(x->props.flags & XFRM_STATE_NOECN)) - ipip_ecn_decapsulate(skb); + if (iph->protocol == IPPROTO_IPIP) { + if (x->props.flags & XFRM_STATE_DECAP_DSCP) + ipv4_copy_dscp(iph, skb->h.ipiph); + if (!(x->props.flags & XFRM_STATE_NOECN)) + ipip_ecn_decapsulate(skb); + } +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + else { + if (!(x->props.flags & XFRM_STATE_NOECN)) + ipip6_ecn_decapsulate(iph, skb); + skb->protocol = htons(ETH_P_IPV6); + } +#endif skb->mac.raw = memmove(skb->data - skb->mac_len, skb->mac.raw, skb->mac_len); skb->nh.raw = skb->data; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index fb9f69c..699f27c 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -72,13 +72,11 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int struct dst_entry *dst, *dst_prev; struct rtable *rt0 = (struct rtable*)(*dst_p); struct rtable *rt = rt0; - __be32 remote = fl->fl4_dst; - __be32 local = fl->fl4_src; struct flowi fl_tunnel = { .nl_u = { .ip4_u = { - .saddr = local, - .daddr = remote, + .saddr = fl->fl4_src, + .daddr = fl->fl4_dst, .tos = fl->fl4_tos } } @@ -94,7 +92,6 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int for (i = 0; i < nx; i++) { struct dst_entry *dst1 = dst_alloc(&xfrm4_dst_ops); struct xfrm_dst *xdst; - int tunnel = 0; if (unlikely(dst1 == NULL)) { err = -ENOBUFS; @@ -116,19 +113,28 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int dst1->next = dst_prev; dst_prev = dst1; - if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { - remote = xfrm[i]->id.daddr.a4; - local = xfrm[i]->props.saddr.a4; - tunnel = 1; - } + header_len += xfrm[i]->props.header_len; trailer_len += xfrm[i]->props.trailer_len; - if (tunnel) { - fl_tunnel.fl4_src = local; - fl_tunnel.fl4_dst = remote; + if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL) { + unsigned short encap_family = xfrm[i]->props.family; + switch(encap_family) { + case AF_INET: + fl_tunnel.fl4_dst = xfrm[i]->id.daddr.a4; + fl_tunnel.fl4_src = xfrm[i]->props.saddr.a4; + break; +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + case AF_INET6: + ipv6_addr_copy(&fl_tunnel.fl6_dst, (struct in6_addr*)&xfrm[i]->id.daddr.a6); + ipv6_addr_copy(&fl_tunnel.fl6_src, (struct in6_addr*)&xfrm[i]->props.saddr.a6); + break; +#endif + default: + BUG_ON(1); + } err = xfrm_dst_lookup((struct xfrm_dst **)&rt, - &fl_tunnel, AF_INET); + &fl_tunnel, encap_family); if (err) goto error; } else @@ -145,6 +151,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int i = 0; for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) { struct xfrm_dst *x = (struct xfrm_dst*)dst_prev; + struct xfrm_state_afinfo *afinfo; x->u.rt.fl = *fl; dst_prev->xfrm = xfrm[i++]; @@ -162,8 +169,18 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int /* Copy neighbout for reachability confirmation */ dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour); dst_prev->input = rt->u.dst.input; - dst_prev->output = xfrm4_output; - if (rt->peer) + /* XXX: When IPv6 module can be unloaded, we should manage reference + * to xfrm6_output in afinfo->output. Miyazawa + * */ + afinfo = xfrm_state_get_afinfo(dst_prev->xfrm->props.family); + if (!afinfo) { + dst = *dst_p; + err = -EAFNOSUPPORT; + goto error; + } + dst_prev->output = afinfo->output; + xfrm_state_put_afinfo(afinfo); + if (dst_prev->xfrm->props.family == AF_INET && rt->peer) atomic_inc(&rt->peer->refcnt); x->u.rt.peer = rt->peer; /* Sheit... I remember I did this right. Apparently, @@ -274,7 +291,7 @@ static void xfrm4_dst_destroy(struct dst_entry *dst) if (likely(xdst->u.rt.idev)) in_dev_put(xdst->u.rt.idev); - if (likely(xdst->u.rt.peer)) + if (dst->xfrm->props.family == AF_INET && likely(xdst->u.rt.peer)) inet_putpeer(xdst->u.rt.peer); xfrm_dst_destroy(xdst); } diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 3cc3df0..93e2c06 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -51,6 +51,7 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = { .family = AF_INET, .init_flags = xfrm4_init_flags, .init_tempsel = __xfrm4_init_tempsel, + .output = xfrm4_output, }; void __init xfrm4_state_init(void) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e385469..fe5e1d8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3117,7 +3117,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); if (nlh == NULL) - return -ENOBUFS; + return -EMSGSIZE; put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope), ifa->idev->dev->ifindex); @@ -3137,8 +3137,10 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, } if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0 || - put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) - return nlmsg_cancel(skb, nlh); + put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) { + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; + } return nlmsg_end(skb, nlh); } @@ -3155,13 +3157,15 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); if (nlh == NULL) - return -ENOBUFS; + return -EMSGSIZE; put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); if (nla_put(skb, IFA_MULTICAST, 16, &ifmca->mca_addr) < 0 || put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp, - INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) - return nlmsg_cancel(skb, nlh); + INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) { + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; + } return nlmsg_end(skb, nlh); } @@ -3178,13 +3182,15 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); if (nlh == NULL) - return -ENOBUFS; + return -EMSGSIZE; put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); if (nla_put(skb, IFA_ANYCAST, 16, &ifaca->aca_addr) < 0 || put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp, - INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) - return nlmsg_cancel(skb, nlh); + INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) { + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; + } return nlmsg_end(skb, nlh); } @@ -3334,9 +3340,12 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWADDR, 0); - /* failure implies BUG in inet6_ifaddr_msgsize() */ - BUG_ON(err < 0); - + if (err < 0) { + /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout_ifa; + } err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); errout_ifa: in6_ifa_put(ifa); @@ -3354,9 +3363,12 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) goto errout; err = inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0); - /* failure implies BUG in inet6_ifaddr_msgsize() */ - BUG_ON(err < 0); - + if (err < 0) { + /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); errout: if (err < 0) @@ -3426,7 +3438,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags); if (nlh == NULL) - return -ENOBUFS; + return -EMSGSIZE; hdr = nlmsg_data(nlh); hdr->ifi_family = AF_INET6; @@ -3469,7 +3481,8 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, return nlmsg_end(skb, nlh); nla_put_failure: - return nlmsg_cancel(skb, nlh); + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) @@ -3507,9 +3520,12 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev) goto errout; err = inet6_fill_ifinfo(skb, idev, 0, 0, event, 0); - /* failure implies BUG in inet6_if_nlmsg_size() */ - BUG_ON(err < 0); - + if (err < 0) { + /* -EMSGSIZE implies BUG in inet6_if_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); errout: if (err < 0) @@ -3533,7 +3549,7 @@ static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, nlh = nlmsg_put(skb, pid, seq, event, sizeof(*pmsg), flags); if (nlh == NULL) - return -ENOBUFS; + return -EMSGSIZE; pmsg = nlmsg_data(nlh); pmsg->prefix_family = AF_INET6; @@ -3558,7 +3574,8 @@ static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, return nlmsg_end(skb, nlh); nla_put_failure: - return nlmsg_cancel(skb, nlh); + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static void inet6_prefix_notify(int event, struct inet6_dev *idev, @@ -3572,9 +3589,12 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, goto errout; err = inet6_fill_prefix(skb, idev, pinfo, 0, 0, event, 0); - /* failure implies BUG in inet6_prefix_nlmsg_size() */ - BUG_ON(err < 0); - + if (err < 0) { + /* -EMSGSIZE implies BUG in inet6_prefix_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); errout: if (err < 0) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 5c94fea..ecde301 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -178,7 +178,7 @@ ipv4_connected: if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((err = xfrm_lookup(&dst, &fl, sk, 1)) < 0) goto out; /* source address lookup done in ip6_dst_lookup */ diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index b7e5bae..e611169 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -79,7 +79,7 @@ struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo, goto hit; /* You sunk my battleship! */ } /* Must check for a TIME_WAIT'er before going to listener hash. */ - sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) { + sk_for_each(sk, node, &head->twchain) { const struct inet_timewait_sock *tw = inet_twsk(sk); if(*((__portpair *)&(tw->tw_dport)) == ports && @@ -183,7 +183,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, write_lock(&head->lock); /* Check TIME-WAIT sockets first. */ - sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) { + sk_for_each(sk2, node, &head->twchain) { const struct inet6_timewait_sock *tw6 = inet6_twsk(sk2); tw = inet_twsk(sk2); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 8d91834..2b9e3bb 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -999,7 +999,8 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) break; dev = t->dev; } - err = unregister_netdevice(dev); + err = 0; + unregister_netdevice(dev); break; default: err = -EINVAL; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 882cde4..e3ec216 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1582,6 +1582,8 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, skb = add_grhead(skb, pmc, type, &pgr); first = 0; } + if (!skb) + return NULL; psrc = (struct in6_addr *)skb_put(skb, sizeof(*psrc)); *psrc = psf->sf_addr; scount++; stotal++; diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index be7dd7d..681bb07 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -89,7 +89,6 @@ static int mip6_mh_len(int type) int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) { struct ip6_mh *mh; - int mhlen; if (!pskb_may_pull(skb, (skb->h.raw - skb->data) + 8) || !pskb_may_pull(skb, (skb->h.raw - skb->data) + ((skb->h.raw[1] + 1) << 3))) @@ -103,31 +102,6 @@ int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) mip6_param_prob(skb, 0, (&mh->ip6mh_hdrlen) - skb->nh.raw); return -1; } - mhlen = (mh->ip6mh_hdrlen + 1) << 3; - - if (skb->ip_summed == CHECKSUM_COMPLETE) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - if (csum_ipv6_magic(&skb->nh.ipv6h->saddr, - &skb->nh.ipv6h->daddr, - mhlen, IPPROTO_MH, - skb->csum)) { - LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH hw checksum failed\n"); - skb->ip_summed = CHECKSUM_NONE; - } - } - if (skb->ip_summed == CHECKSUM_NONE) { - if (csum_ipv6_magic(&skb->nh.ipv6h->saddr, - &skb->nh.ipv6h->daddr, - mhlen, IPPROTO_MH, - skb_checksum(skb, 0, mhlen, 0))) { - LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH checksum failed " - "[" NIP6_FMT " > " NIP6_FMT "]\n", - NIP6(skb->nh.ipv6h->saddr), - NIP6(skb->nh.ipv6h->daddr)); - return -1; - } - skb->ip_summed = CHECKSUM_UNNECESSARY; - } if (mh->ip6mh_proto != IPPROTO_NONE) { LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n", diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index adcd613..cd549ae 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -114,6 +114,14 @@ config IP6_NF_MATCH_AH To compile it as a module, choose M here. If unsure, say N. +config IP6_NF_MATCH_MH + tristate "MH match support" + depends on IP6_NF_IPTABLES + help + This module allows one to match MH packets. + + To compile it as a module, choose M here. If unsure, say N. + config IP6_NF_MATCH_EUI64 tristate "EUI64 address check" depends on IP6_NF_IPTABLES diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index ac1dfeb..4513eab 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -19,6 +19,7 @@ obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o +obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o # objects for l3 independent conntrack nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o nf_conntrack_reasm.o diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 99502c5..7083e1c 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -530,7 +530,7 @@ check_match(struct ip6t_entry_match *m, unsigned int hookmask, unsigned int *i) { - struct ip6t_match *match; + struct xt_match *match; int ret; match = try_then_request_module(xt_find_match(AF_INET6, m->u.user.name, @@ -564,14 +564,14 @@ err: return ret; } -static struct ip6t_target ip6t_standard_target; +static struct xt_target ip6t_standard_target; static inline int check_entry(struct ip6t_entry *e, const char *name, unsigned int size, unsigned int *i) { struct ip6t_entry_target *t; - struct ip6t_target *target; + struct xt_target *target; int ret; unsigned int j; @@ -1348,13 +1348,13 @@ icmp6_checkentry(const char *tablename, } /* The built-in targets: standard (NULL) and error. */ -static struct ip6t_target ip6t_standard_target = { +static struct xt_target ip6t_standard_target = { .name = IP6T_STANDARD_TARGET, .targetsize = sizeof(int), .family = AF_INET6, }; -static struct ip6t_target ip6t_error_target = { +static struct xt_target ip6t_error_target = { .name = IP6T_ERROR_TARGET, .target = ip6t_error, .targetsize = IP6T_FUNCTION_MAXNAMELEN, @@ -1371,7 +1371,7 @@ static struct nf_sockopt_ops ip6t_sockopts = { .get = do_ip6t_get_ctl, }; -static struct ip6t_match icmp6_matchstruct = { +static struct xt_match icmp6_matchstruct = { .name = "icmp6", .match = &icmp6_match, .matchsize = sizeof(struct ip6t_icmp), diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c index 435750f..04e5001 100644 --- a/net/ipv6/netfilter/ip6t_HL.c +++ b/net/ipv6/netfilter/ip6t_HL.c @@ -9,12 +9,13 @@ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ip.h> +#include <linux/ipv6.h> -#include <linux/netfilter_ipv6/ip6_tables.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6t_HL.h> MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>"); -MODULE_DESCRIPTION("IP tables Hop Limit modification module"); +MODULE_DESCRIPTION("IP6 tables Hop Limit modification module"); MODULE_LICENSE("GPL"); static unsigned int ip6t_hl_target(struct sk_buff **pskb, @@ -52,10 +53,9 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb, break; } - if (new_hl != ip6h->hop_limit) - ip6h->hop_limit = new_hl; + ip6h->hop_limit = new_hl; - return IP6T_CONTINUE; + return XT_CONTINUE; } static int ip6t_hl_checkentry(const char *tablename, @@ -79,8 +79,9 @@ static int ip6t_hl_checkentry(const char *tablename, return 1; } -static struct ip6t_target ip6t_HL = { +static struct xt_target ip6t_HL = { .name = "HL", + .family = AF_INET6, .target = ip6t_hl_target, .targetsize = sizeof(struct ip6t_HL_info), .table = "mangle", @@ -90,12 +91,12 @@ static struct ip6t_target ip6t_HL = { static int __init ip6t_hl_init(void) { - return ip6t_register_target(&ip6t_HL); + return xt_register_target(&ip6t_HL); } static void __exit ip6t_hl_fini(void) { - ip6t_unregister_target(&ip6t_HL); + xt_unregister_target(&ip6t_HL); } module_init(ip6t_hl_init); diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 33b1faa..5587a77 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -21,6 +21,7 @@ #include <net/tcp.h> #include <net/ipv6.h> #include <linux/netfilter.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> MODULE_AUTHOR("Jan Rekorajski <baggins@pld.org.pl>"); @@ -442,7 +443,7 @@ ip6t_log_target(struct sk_buff **pskb, ip6t_log_packet(PF_INET6, hooknum, *pskb, in, out, &li, loginfo->prefix); - return IP6T_CONTINUE; + return XT_CONTINUE; } @@ -466,8 +467,9 @@ static int ip6t_log_checkentry(const char *tablename, return 1; } -static struct ip6t_target ip6t_log_reg = { +static struct xt_target ip6t_log_reg = { .name = "LOG", + .family = AF_INET6, .target = ip6t_log_target, .targetsize = sizeof(struct ip6t_log_info), .checkentry = ip6t_log_checkentry, @@ -482,8 +484,11 @@ static struct nf_logger ip6t_logger = { static int __init ip6t_log_init(void) { - if (ip6t_register_target(&ip6t_log_reg)) - return -EINVAL; + int ret; + + ret = xt_register_target(&ip6t_log_reg); + if (ret < 0) + return ret; if (nf_log_register(PF_INET6, &ip6t_logger) < 0) { printk(KERN_WARNING "ip6t_LOG: not logging via system console " "since somebody else already registered for PF_INET6\n"); @@ -497,7 +502,7 @@ static int __init ip6t_log_init(void) static void __exit ip6t_log_fini(void) { nf_log_unregister_logger(&ip6t_logger); - ip6t_unregister_target(&ip6t_log_reg); + xt_unregister_target(&ip6t_log_reg); } module_init(ip6t_log_init); diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 311eae8..278349c 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -26,6 +26,7 @@ #include <net/ip6_fib.h> #include <net/ip6_route.h> #include <net/flow.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter_ipv6/ip6t_REJECT.h> @@ -234,7 +235,7 @@ static int check(const char *tablename, } else if (rejinfo->with == IP6T_TCP_RESET) { /* Must specify that it's a TCP packet */ if (e->ipv6.proto != IPPROTO_TCP - || (e->ipv6.invflags & IP6T_INV_PROTO)) { + || (e->ipv6.invflags & XT_INV_PROTO)) { DEBUGP("ip6t_REJECT: TCP_RESET illegal for non-tcp\n"); return 0; } @@ -242,8 +243,9 @@ static int check(const char *tablename, return 1; } -static struct ip6t_target ip6t_reject_reg = { +static struct xt_target ip6t_reject_reg = { .name = "REJECT", + .family = AF_INET6, .target = reject6_target, .targetsize = sizeof(struct ip6t_reject_info), .table = "filter", @@ -255,12 +257,12 @@ static struct ip6t_target ip6t_reject_reg = { static int __init ip6t_reject_init(void) { - return ip6t_register_target(&ip6t_reject_reg); + return xt_register_target(&ip6t_reject_reg); } static void __exit ip6t_reject_fini(void) { - ip6t_unregister_target(&ip6t_reject_reg); + xt_unregister_target(&ip6t_reject_reg); } module_init(ip6t_reject_init); diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index 4648664..456c76a 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -15,6 +15,7 @@ #include <net/checksum.h> #include <net/ipv6.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter_ipv6/ip6t_ah.h> @@ -118,8 +119,9 @@ checkentry(const char *tablename, return 1; } -static struct ip6t_match ah_match = { +static struct xt_match ah_match = { .name = "ah", + .family = AF_INET6, .match = match, .matchsize = sizeof(struct ip6t_ah), .checkentry = checkentry, @@ -128,12 +130,12 @@ static struct ip6t_match ah_match = { static int __init ip6t_ah_init(void) { - return ip6t_register_match(&ah_match); + return xt_register_match(&ah_match); } static void __exit ip6t_ah_fini(void) { - ip6t_unregister_match(&ah_match); + xt_unregister_match(&ah_match); } module_init(ip6t_ah_init); diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c index 4f6b84c..967bed7 100644 --- a/net/ipv6/netfilter/ip6t_eui64.c +++ b/net/ipv6/netfilter/ip6t_eui64.c @@ -12,6 +12,7 @@ #include <linux/ipv6.h> #include <linux/if_ether.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> MODULE_DESCRIPTION("IPv6 EUI64 address checking match"); @@ -61,8 +62,9 @@ match(const struct sk_buff *skb, return 0; } -static struct ip6t_match eui64_match = { +static struct xt_match eui64_match = { .name = "eui64", + .family = AF_INET6, .match = match, .matchsize = sizeof(int), .hooks = (1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_IN) | @@ -72,12 +74,12 @@ static struct ip6t_match eui64_match = { static int __init ip6t_eui64_init(void) { - return ip6t_register_match(&eui64_match); + return xt_register_match(&eui64_match); } static void __exit ip6t_eui64_fini(void) { - ip6t_unregister_match(&eui64_match); + xt_unregister_match(&eui64_match); } module_init(ip6t_eui64_init); diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c index cd22eaa..5a5da71 100644 --- a/net/ipv6/netfilter/ip6t_frag.c +++ b/net/ipv6/netfilter/ip6t_frag.c @@ -14,6 +14,7 @@ #include <net/checksum.h> #include <net/ipv6.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter_ipv6/ip6t_frag.h> @@ -135,8 +136,9 @@ checkentry(const char *tablename, return 1; } -static struct ip6t_match frag_match = { +static struct xt_match frag_match = { .name = "frag", + .family = AF_INET6, .match = match, .matchsize = sizeof(struct ip6t_frag), .checkentry = checkentry, @@ -145,12 +147,12 @@ static struct ip6t_match frag_match = { static int __init ip6t_frag_init(void) { - return ip6t_register_match(&frag_match); + return xt_register_match(&frag_match); } static void __exit ip6t_frag_fini(void) { - ip6t_unregister_match(&frag_match); + xt_unregister_match(&frag_match); } module_init(ip6t_frag_init); diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index 3f25bab..d2373c7 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -16,6 +16,7 @@ #include <asm/byteorder.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter_ipv6/ip6t_opts.h> diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c index 44a729e..601cc12 100644 --- a/net/ipv6/netfilter/ip6t_hl.c +++ b/net/ipv6/netfilter/ip6t_hl.c @@ -8,11 +8,12 @@ * published by the Free Software Foundation. */ +#include <linux/ipv6.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netfilter_ipv6/ip6t_hl.h> -#include <linux/netfilter_ipv6/ip6_tables.h> +#include <linux/netfilter/x_tables.h> MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>"); MODULE_DESCRIPTION("IP tables Hop Limit matching module"); @@ -48,8 +49,9 @@ static int match(const struct sk_buff *skb, return 0; } -static struct ip6t_match hl_match = { +static struct xt_match hl_match = { .name = "hl", + .family = AF_INET6, .match = match, .matchsize = sizeof(struct ip6t_hl_info), .me = THIS_MODULE, @@ -57,13 +59,12 @@ static struct ip6t_match hl_match = { static int __init ip6t_hl_init(void) { - return ip6t_register_match(&hl_match); + return xt_register_match(&hl_match); } static void __exit ip6t_hl_fini(void) { - ip6t_unregister_match(&hl_match); - + xt_unregister_match(&hl_match); } module_init(ip6t_hl_init); diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c index 3093c39..26ac084 100644 --- a/net/ipv6/netfilter/ip6t_ipv6header.c +++ b/net/ipv6/netfilter/ip6t_ipv6header.c @@ -18,6 +18,7 @@ #include <net/checksum.h> #include <net/ipv6.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter_ipv6/ip6t_ipv6header.h> @@ -140,8 +141,9 @@ ipv6header_checkentry(const char *tablename, return 1; } -static struct ip6t_match ip6t_ipv6header_match = { +static struct xt_match ip6t_ipv6header_match = { .name = "ipv6header", + .family = AF_INET6, .match = &ipv6header_match, .matchsize = sizeof(struct ip6t_ipv6header_info), .checkentry = &ipv6header_checkentry, @@ -151,12 +153,12 @@ static struct ip6t_match ip6t_ipv6header_match = { static int __init ipv6header_init(void) { - return ip6t_register_match(&ip6t_ipv6header_match); + return xt_register_match(&ip6t_ipv6header_match); } static void __exit ipv6header_exit(void) { - ip6t_unregister_match(&ip6t_ipv6header_match); + xt_unregister_match(&ip6t_ipv6header_match); } module_init(ipv6header_init); diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c new file mode 100644 index 0000000..2c7efc6 --- /dev/null +++ b/net/ipv6/netfilter/ip6t_mh.c @@ -0,0 +1,108 @@ +/* + * Copyright (C)2006 USAGI/WIDE Project + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Author: + * Masahide NAKAMURA @USAGI <masahide.nakamura.cz@hitachi.com> + * + * Based on net/netfilter/xt_tcpudp.c + * + */ +#include <linux/types.h> +#include <linux/module.h> +#include <net/ip.h> +#include <linux/ipv6.h> +#include <net/ipv6.h> +#include <net/mip6.h> + +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_ipv6/ip6t_mh.h> + +MODULE_DESCRIPTION("ip6t_tables match for MH"); +MODULE_LICENSE("GPL"); + +#ifdef DEBUG_IP_FIREWALL_USER +#define duprintf(format, args...) printk(format , ## args) +#else +#define duprintf(format, args...) +#endif + +/* Returns 1 if the type is matched by the range, 0 otherwise */ +static inline int +type_match(u_int8_t min, u_int8_t max, u_int8_t type, int invert) +{ + int ret; + + ret = (type >= min && type <= max) ^ invert; + return ret; +} + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct xt_match *match, + const void *matchinfo, + int offset, + unsigned int protoff, + int *hotdrop) +{ + struct ip6_mh _mh, *mh; + const struct ip6t_mh *mhinfo = matchinfo; + + /* Must not be a fragment. */ + if (offset) + return 0; + + mh = skb_header_pointer(skb, protoff, sizeof(_mh), &_mh); + if (mh == NULL) { + /* We've been asked to examine this packet, and we + can't. Hence, no choice but to drop. */ + duprintf("Dropping evil MH tinygram.\n"); + *hotdrop = 1; + return 0; + } + + return type_match(mhinfo->types[0], mhinfo->types[1], mh->ip6mh_type, + !!(mhinfo->invflags & IP6T_MH_INV_TYPE)); +} + +/* Called when user tries to insert an entry of this type. */ +static int +mh_checkentry(const char *tablename, + const void *entry, + const struct xt_match *match, + void *matchinfo, + unsigned int hook_mask) +{ + const struct ip6t_mh *mhinfo = matchinfo; + + /* Must specify no unknown invflags */ + return !(mhinfo->invflags & ~IP6T_MH_INV_MASK); +} + +static struct xt_match mh_match = { + .name = "mh", + .family = AF_INET6, + .checkentry = mh_checkentry, + .match = match, + .matchsize = sizeof(struct ip6t_mh), + .proto = IPPROTO_MH, + .me = THIS_MODULE, +}; + +static int __init ip6t_mh_init(void) +{ + return xt_register_match(&mh_match); +} + +static void __exit ip6t_mh_fini(void) +{ + xt_unregister_match(&mh_match); +} + +module_init(ip6t_mh_init); +module_exit(ip6t_mh_fini); diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c index 4eb9bbc..43738bb 100644 --- a/net/ipv6/netfilter/ip6t_owner.c +++ b/net/ipv6/netfilter/ip6t_owner.c @@ -16,6 +16,7 @@ #include <linux/netfilter_ipv6/ip6t_owner.h> #include <linux/netfilter_ipv6/ip6_tables.h> +#include <linux/netfilter/x_tables.h> MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); MODULE_DESCRIPTION("IP6 tables owner matching module"); @@ -69,8 +70,9 @@ checkentry(const char *tablename, return 1; } -static struct ip6t_match owner_match = { +static struct xt_match owner_match = { .name = "owner", + .family = AF_INET6, .match = match, .matchsize = sizeof(struct ip6t_owner_info), .hooks = (1 << NF_IP6_LOCAL_OUT) | (1 << NF_IP6_POST_ROUTING), @@ -80,12 +82,12 @@ static struct ip6t_match owner_match = { static int __init ip6t_owner_init(void) { - return ip6t_register_match(&owner_match); + return xt_register_match(&owner_match); } static void __exit ip6t_owner_fini(void) { - ip6t_unregister_match(&owner_match); + xt_unregister_match(&owner_match); } module_init(ip6t_owner_init); diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index 54d7d14..81ab00d 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -16,6 +16,7 @@ #include <asm/byteorder.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter_ipv6/ip6t_rt.h> @@ -221,8 +222,9 @@ checkentry(const char *tablename, return 1; } -static struct ip6t_match rt_match = { +static struct xt_match rt_match = { .name = "rt", + .family = AF_INET6, .match = match, .matchsize = sizeof(struct ip6t_rt), .checkentry = checkentry, @@ -231,12 +233,12 @@ static struct ip6t_match rt_match = { static int __init ip6t_rt_init(void) { - return ip6t_register_match(&rt_match); + return xt_register_match(&rt_match); } static void __exit ip6t_rt_fini(void) { - ip6t_unregister_match(&rt_match); + xt_unregister_match(&rt_match); } module_init(ip6t_rt_init); diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 2fc07c7..112a21d 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -19,25 +19,6 @@ MODULE_DESCRIPTION("ip6tables filter table"); #define FILTER_VALID_HOOKS ((1 << NF_IP6_LOCAL_IN) | (1 << NF_IP6_FORWARD) | (1 << NF_IP6_LOCAL_OUT)) -/* Standard entry. */ -struct ip6t_standard -{ - struct ip6t_entry entry; - struct ip6t_standard_target target; -}; - -struct ip6t_error_target -{ - struct ip6t_entry_target target; - char errorname[IP6T_FUNCTION_MAXNAMELEN]; -}; - -struct ip6t_error -{ - struct ip6t_entry entry; - struct ip6t_error_target target; -}; - static struct { struct ip6t_replace repl; @@ -92,7 +73,7 @@ static struct } }; -static struct ip6t_table packet_filter = { +static struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 6250e86..5f5aa0e 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -29,25 +29,6 @@ MODULE_DESCRIPTION("ip6tables mangle table"); #define DEBUGP(x, args...) #endif -/* Standard entry. */ -struct ip6t_standard -{ - struct ip6t_entry entry; - struct ip6t_standard_target target; -}; - -struct ip6t_error_target -{ - struct ip6t_entry_target target; - char errorname[IP6T_FUNCTION_MAXNAMELEN]; -}; - -struct ip6t_error -{ - struct ip6t_entry entry; - struct ip6t_error_target target; -}; - static struct { struct ip6t_replace repl; @@ -122,7 +103,7 @@ static struct } }; -static struct ip6t_table packet_mangler = { +static struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index b4154da..277bf34 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -14,25 +14,6 @@ #define DEBUGP(x, args...) #endif -/* Standard entry. */ -struct ip6t_standard -{ - struct ip6t_entry entry; - struct ip6t_standard_target target; -}; - -struct ip6t_error_target -{ - struct ip6t_entry_target target; - char errorname[IP6T_FUNCTION_MAXNAMELEN]; -}; - -struct ip6t_error -{ - struct ip6t_entry entry; - struct ip6t_error_target target; -}; - static struct { struct ip6t_replace repl; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 4ae1b19..c2d8059 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -815,7 +815,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((err = xfrm_lookup(&dst, &fl, sk, 1)) < 0) goto out; if (hlimit < 0) { @@ -1094,10 +1094,19 @@ static void rawv6_close(struct sock *sk, long timeout) static int rawv6_init_sk(struct sock *sk) { - if (inet_sk(sk)->num == IPPROTO_ICMPV6) { - struct raw6_sock *rp = raw6_sk(sk); + struct raw6_sock *rp = raw6_sk(sk); + + switch (inet_sk(sk)->num) { + case IPPROTO_ICMPV6: rp->checksum = 1; rp->offset = 2; + break; + case IPPROTO_MH: + rp->checksum = 1; + rp->offset = 4; + break; + default: + break; } return(0); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 5f0043c..19c906f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -311,12 +311,21 @@ static inline void rt6_probe(struct rt6_info *rt) static int inline rt6_check_dev(struct rt6_info *rt, int oif) { struct net_device *dev = rt->rt6i_dev; - if (!oif || dev->ifindex == oif) + int ret = 0; + + if (!oif) return 2; - if ((dev->flags & IFF_LOOPBACK) && - rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) - return 1; - return 0; + if (dev->flags & IFF_LOOPBACK) { + if (!WARN_ON(rt->rt6i_idev == NULL) && + rt->rt6i_idev->dev->ifindex == oif) + ret = 1; + else + return 0; + } + if (dev->ifindex == oif) + return 2; + + return ret; } static int inline rt6_check_neigh(struct rt6_info *rt) @@ -2040,7 +2049,7 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags); if (nlh == NULL) - return -ENOBUFS; + return -EMSGSIZE; rtm = nlmsg_data(nlh); rtm->rtm_family = AF_INET6; @@ -2111,7 +2120,8 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, return nlmsg_end(skb, nlh); nla_put_failure: - return nlmsg_cancel(skb, nlh); + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } int rt6_dump_route(struct rt6_info *rt, void *p_arg) @@ -2222,9 +2232,12 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) goto errout; err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0); - /* failure implies BUG in rt6_nlmsg_size() */ - BUG_ON(err < 0); - + if (err < 0) { + /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any()); errout: if (err < 0) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 77b7b09..47cfead 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -686,7 +686,8 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) goto done; dev = t->dev; } - err = unregister_netdevice(dev); + unregister_netdevice(dev); + err = 0; break; default: diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c25e930..dcb7b00 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -265,7 +265,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((err = xfrm_lookup(&dst, &fl, sk, 1)) < 0) goto failure; if (saddr == NULL) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index f52a5c3..15e5195 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -736,7 +736,7 @@ do_udp_sendmsg: if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((err = xfrm_lookup(&dst, &fl, sk, 1)) < 0) goto out; if (hlimit < 0) { diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 5e7d8a7..0bc866c 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -25,6 +25,12 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) IP6_ECN_set_ce(inner_iph); } +static inline void ip6ip_ecn_decapsulate(struct sk_buff *skb) +{ + if (INET_ECN_is_ce(ipv6_get_dsfield(skb->nh.ipv6h))) + IP_ECN_set_ce(skb->h.ipiph); +} + /* Add encapsulation header. * * The top IP header will be constructed per RFC 2401. The following fields @@ -40,6 +46,7 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) { struct dst_entry *dst = skb->dst; + struct xfrm_dst *xdst = (struct xfrm_dst*)dst; struct ipv6hdr *iph, *top_iph; int dsfield; @@ -52,16 +59,24 @@ static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) skb->h.ipv6h = top_iph + 1; top_iph->version = 6; - top_iph->priority = iph->priority; - top_iph->flow_lbl[0] = iph->flow_lbl[0]; - top_iph->flow_lbl[1] = iph->flow_lbl[1]; - top_iph->flow_lbl[2] = iph->flow_lbl[2]; + if (xdst->route->ops->family == AF_INET6) { + top_iph->priority = iph->priority; + top_iph->flow_lbl[0] = iph->flow_lbl[0]; + top_iph->flow_lbl[1] = iph->flow_lbl[1]; + top_iph->flow_lbl[2] = iph->flow_lbl[2]; + top_iph->nexthdr = IPPROTO_IPV6; + } else { + top_iph->priority = 0; + top_iph->flow_lbl[0] = 0; + top_iph->flow_lbl[1] = 0; + top_iph->flow_lbl[2] = 0; + top_iph->nexthdr = IPPROTO_IPIP; + } dsfield = ipv6_get_dsfield(top_iph); dsfield = INET_ECN_encapsulate(dsfield, dsfield); if (x->props.flags & XFRM_STATE_NOECN) dsfield &= ~INET_ECN_MASK; ipv6_change_dsfield(top_iph, 0, dsfield); - top_iph->nexthdr = IPPROTO_IPV6; top_iph->hop_limit = dst_metric(dst->child, RTAX_HOPLIMIT); ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr); ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr); @@ -72,7 +87,8 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { int err = -EINVAL; - if (skb->nh.raw[IP6CB(skb)->nhoff] != IPPROTO_IPV6) + if (skb->nh.raw[IP6CB(skb)->nhoff] != IPPROTO_IPV6 + && skb->nh.raw[IP6CB(skb)->nhoff] != IPPROTO_IPIP) goto out; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto out; @@ -81,10 +97,16 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) goto out; - if (x->props.flags & XFRM_STATE_DECAP_DSCP) - ipv6_copy_dscp(skb->nh.ipv6h, skb->h.ipv6h); - if (!(x->props.flags & XFRM_STATE_NOECN)) - ipip6_ecn_decapsulate(skb); + if (skb->nh.raw[IP6CB(skb)->nhoff] == IPPROTO_IPV6) { + if (x->props.flags & XFRM_STATE_DECAP_DSCP) + ipv6_copy_dscp(skb->nh.ipv6h, skb->h.ipv6h); + if (!(x->props.flags & XFRM_STATE_NOECN)) + ipip6_ecn_decapsulate(skb); + } else { + if (!(x->props.flags & XFRM_STATE_NOECN)) + ip6ip_ecn_decapsulate(skb); + skb->protocol = htons(ETH_P_IP); + } skb->mac.raw = memmove(skb->data - skb->mac_len, skb->mac.raw, skb->mac_len); skb->nh.raw = skb->data; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 8dffd4d..59480e9 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -131,13 +131,11 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int struct dst_entry *dst, *dst_prev; struct rt6_info *rt0 = (struct rt6_info*)(*dst_p); struct rt6_info *rt = rt0; - struct in6_addr *remote = &fl->fl6_dst; - struct in6_addr *local = &fl->fl6_src; struct flowi fl_tunnel = { .nl_u = { .ip6_u = { - .saddr = *local, - .daddr = *remote + .saddr = fl->fl6_src, + .daddr = fl->fl6_dst, } } }; @@ -153,7 +151,6 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int for (i = 0; i < nx; i++) { struct dst_entry *dst1 = dst_alloc(&xfrm6_dst_ops); struct xfrm_dst *xdst; - int tunnel = 0; if (unlikely(dst1 == NULL)) { err = -ENOBUFS; @@ -177,19 +174,27 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int dst1->next = dst_prev; dst_prev = dst1; - if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { - remote = __xfrm6_bundle_addr_remote(xfrm[i], remote); - local = __xfrm6_bundle_addr_local(xfrm[i], local); - tunnel = 1; - } + __xfrm6_bundle_len_inc(&header_len, &nfheader_len, xfrm[i]); trailer_len += xfrm[i]->props.trailer_len; - if (tunnel) { - ipv6_addr_copy(&fl_tunnel.fl6_dst, remote); - ipv6_addr_copy(&fl_tunnel.fl6_src, local); + if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL) { + unsigned short encap_family = xfrm[i]->props.family; + switch(encap_family) { + case AF_INET: + fl_tunnel.fl4_dst = xfrm[i]->id.daddr.a4; + fl_tunnel.fl4_src = xfrm[i]->props.saddr.a4; + break; + case AF_INET6: + ipv6_addr_copy(&fl_tunnel.fl6_dst, (struct in6_addr*)&xfrm[i]->id.daddr.a6); + ipv6_addr_copy(&fl_tunnel.fl6_src, (struct in6_addr*)&xfrm[i]->props.saddr.a6); + break; + default: + BUG_ON(1); + } + err = xfrm_dst_lookup((struct xfrm_dst **) &rt, - &fl_tunnel, AF_INET6); + &fl_tunnel, encap_family); if (err) goto error; } else @@ -208,6 +213,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int i = 0; for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) { struct xfrm_dst *x = (struct xfrm_dst*)dst_prev; + struct xfrm_state_afinfo *afinfo; dst_prev->xfrm = xfrm[i++]; dst_prev->dev = rt->u.dst.dev; @@ -224,7 +230,17 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int /* Copy neighbour for reachability confirmation */ dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour); dst_prev->input = rt->u.dst.input; - dst_prev->output = xfrm6_output; + /* XXX: When IPv4 is implemented as module and can be unloaded, + * we should manage reference to xfrm4_output in afinfo->output. + * Miyazawa + */ + afinfo = xfrm_state_get_afinfo(dst_prev->xfrm->props.family); + if (!afinfo) { + dst = *dst_p; + goto error; + }; + dst_prev->output = afinfo->output; + xfrm_state_put_afinfo(afinfo); /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ x->u.rt6.rt6i_flags = rt0->rt6i_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL); diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 9ddaa9d..60ad5f0 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -171,6 +171,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = { .init_tempsel = __xfrm6_init_tempsel, .tmpl_sort = __xfrm6_tmpl_sort, .state_sort = __xfrm6_state_sort, + .output = xfrm6_output, }; void __init xfrm6_state_init(void) diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 76c6615..89f283c 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -2035,19 +2035,27 @@ static void __exit ipx_proto_finito(void) ipxitf_cleanup(); - unregister_snap_client(pSNAP_datalink); - pSNAP_datalink = NULL; + if (pSNAP_datalink) { + unregister_snap_client(pSNAP_datalink); + pSNAP_datalink = NULL; + } - unregister_8022_client(p8022_datalink); - p8022_datalink = NULL; + if (p8022_datalink) { + unregister_8022_client(p8022_datalink); + p8022_datalink = NULL; + } dev_remove_pack(&ipx_8023_packet_type); - destroy_8023_client(p8023_datalink); - p8023_datalink = NULL; + if (p8023_datalink) { + destroy_8023_client(p8023_datalink); + p8023_datalink = NULL; + } dev_remove_pack(&ipx_dix_packet_type); - destroy_EII_client(pEII_datalink); - pEII_datalink = NULL; + if (pEII_datalink) { + destroy_EII_client(pEII_datalink); + pEII_datalink = NULL; + } proto_unregister(&ipx_proto); sock_unregister(ipx_family_ops.family); diff --git a/net/irda/irias_object.c b/net/irda/irias_object.c index b1ee99a..2a571b4 100644 --- a/net/irda/irias_object.c +++ b/net/irda/irias_object.c @@ -91,6 +91,12 @@ struct ias_object *irias_new_object( char *name, int id) obj->magic = IAS_OBJECT_MAGIC; obj->name = strndup(name, IAS_MAX_CLASSNAME); + if (!obj->name) { + IRDA_WARNING("%s(), Unable to allocate name!\n", + __FUNCTION__); + kfree(obj); + return NULL; + } obj->id = id; /* Locking notes : the attrib spinlock has lower precendence @@ -101,6 +107,7 @@ struct ias_object *irias_new_object( char *name, int id) if (obj->attribs == NULL) { IRDA_WARNING("%s(), Unable to allocate attribs!\n", __FUNCTION__); + kfree(obj->name); kfree(obj); return NULL; } @@ -357,6 +364,15 @@ void irias_add_integer_attrib(struct ias_object *obj, char *name, int value, /* Insert value */ attrib->value = irias_new_integer_value(value); + if (!attrib->name || !attrib->value) { + IRDA_WARNING("%s: Unable to allocate attribute!\n", + __FUNCTION__); + if (attrib->value) + irias_delete_value(attrib->value); + kfree(attrib->name); + kfree(attrib); + return; + } irias_add_attrib(obj, attrib, owner); } @@ -391,6 +407,15 @@ void irias_add_octseq_attrib(struct ias_object *obj, char *name, __u8 *octets, attrib->name = strndup(name, IAS_MAX_ATTRIBNAME); attrib->value = irias_new_octseq_value( octets, len); + if (!attrib->name || !attrib->value) { + IRDA_WARNING("%s: Unable to allocate attribute!\n", + __FUNCTION__); + if (attrib->value) + irias_delete_value(attrib->value); + kfree(attrib->name); + kfree(attrib); + return; + } irias_add_attrib(obj, attrib, owner); } @@ -424,6 +449,15 @@ void irias_add_string_attrib(struct ias_object *obj, char *name, char *value, attrib->name = strndup(name, IAS_MAX_ATTRIBNAME); attrib->value = irias_new_string_value(value); + if (!attrib->name || !attrib->value) { + IRDA_WARNING("%s: Unable to allocate attribute!\n", + __FUNCTION__); + if (attrib->value) + irias_delete_value(attrib->value); + kfree(attrib->name); + kfree(attrib); + return; + } irias_add_attrib(obj, attrib, owner); } @@ -473,6 +507,12 @@ struct ias_value *irias_new_string_value(char *string) value->type = IAS_STRING; value->charset = CS_ASCII; value->t.string = strndup(string, IAS_MAX_STRING); + if (!value->t.string) { + IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__); + kfree(value); + return NULL; + } + value->len = strlen(value->t.string); return value; diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c index 2bb04ac..310776d 100644 --- a/net/irda/irlan/irlan_common.c +++ b/net/irda/irlan/irlan_common.c @@ -144,12 +144,18 @@ static int __init irlan_init(void) /* Register with IrLMP as a client */ ckey = irlmp_register_client(hints, &irlan_client_discovery_indication, NULL, NULL); - + if (!ckey) + goto err_ckey; + /* Register with IrLMP as a service */ - skey = irlmp_register_service(hints); + skey = irlmp_register_service(hints); + if (!skey) + goto err_skey; /* Start the master IrLAN instance (the only one for now) */ - new = irlan_open(DEV_ADDR_ANY, DEV_ADDR_ANY); + new = irlan_open(DEV_ADDR_ANY, DEV_ADDR_ANY); + if (!new) + goto err_open; /* The master will only open its (listen) control TSAP */ irlan_provider_open_ctrl_tsap(new); @@ -158,6 +164,17 @@ static int __init irlan_init(void) irlmp_discovery_request(DISCOVERY_DEFAULT_SLOTS); return 0; + +err_open: + irlmp_unregister_service(skey); +err_skey: + irlmp_unregister_client(ckey); +err_ckey: +#ifdef CONFIG_PROC_FS + remove_proc_entry("irlan", proc_irda); +#endif /* CONFIG_PROC_FS */ + + return -ENOMEM; } static void __exit irlan_cleanup(void) diff --git a/net/iucv/Kconfig b/net/iucv/Kconfig new file mode 100644 index 0000000..f8fcc3d --- /dev/null +++ b/net/iucv/Kconfig @@ -0,0 +1,15 @@ +config IUCV + tristate "IUCV support (VM only)" + depends on S390 + help + Select this option if you want to use inter-user communication under + VM or VIF sockets. If you run on z/VM, say "Y" to enable a fast + communication link between VM guests. + +config AFIUCV + tristate "AF_IUCV support (VM only)" + depends on IUCV + help + Select this option if you want to use inter-user communication under + VM or VIF sockets. If you run on z/VM, say "Y" to enable a fast + communication link between VM guests. diff --git a/net/iucv/Makefile b/net/iucv/Makefile new file mode 100644 index 0000000..7bfdc85 --- /dev/null +++ b/net/iucv/Makefile @@ -0,0 +1,6 @@ +# +# Makefile for IUCV +# + +obj-$(CONFIG_IUCV) += iucv.o +obj-$(CONFIG_AFIUCV) += af_iucv.o diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c new file mode 100644 index 0000000..acc9421 --- /dev/null +++ b/net/iucv/af_iucv.c @@ -0,0 +1,1077 @@ +/* + * linux/net/iucv/af_iucv.c + * + * IUCV protocol stack for Linux on zSeries + * + * Copyright 2006 IBM Corporation + * + * Author(s): Jennifer Hunt <jenhunt@us.ibm.com> + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/list.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/skbuff.h> +#include <linux/init.h> +#include <linux/poll.h> +#include <net/sock.h> +#include <asm/ebcdic.h> +#include <asm/cpcmd.h> +#include <linux/kmod.h> + +#include <net/iucv/iucv.h> +#include <net/iucv/af_iucv.h> + +#define CONFIG_IUCV_SOCK_DEBUG 1 + +#define IPRMDATA 0x80 +#define VERSION "1.0" + +static char iucv_userid[80]; + +static struct proto_ops iucv_sock_ops; + +static struct proto iucv_proto = { + .name = "AF_IUCV", + .owner = THIS_MODULE, + .obj_size = sizeof(struct iucv_sock), +}; + +/* Call Back functions */ +static void iucv_callback_rx(struct iucv_path *, struct iucv_message *); +static void iucv_callback_txdone(struct iucv_path *, struct iucv_message *); +static void iucv_callback_connack(struct iucv_path *, u8 ipuser[16]); +static int iucv_callback_connreq(struct iucv_path *, u8 ipvmid[8], u8 ipuser[16]); +static void iucv_callback_connrej(struct iucv_path *, u8 ipuser[16]); + +static struct iucv_sock_list iucv_sk_list = { + .lock = RW_LOCK_UNLOCKED, + .autobind_name = ATOMIC_INIT(0) +}; + +static struct iucv_handler af_iucv_handler = { + .path_pending = iucv_callback_connreq, + .path_complete = iucv_callback_connack, + .path_severed = iucv_callback_connrej, + .message_pending = iucv_callback_rx, + .message_complete = iucv_callback_txdone +}; + +static inline void high_nmcpy(unsigned char *dst, char *src) +{ + memcpy(dst, src, 8); +} + +static inline void low_nmcpy(unsigned char *dst, char *src) +{ + memcpy(&dst[8], src, 8); +} + +/* Timers */ +static void iucv_sock_timeout(unsigned long arg) +{ + struct sock *sk = (struct sock *)arg; + + bh_lock_sock(sk); + sk->sk_err = ETIMEDOUT; + sk->sk_state_change(sk); + bh_unlock_sock(sk); + + iucv_sock_kill(sk); + sock_put(sk); +} + +static void iucv_sock_clear_timer(struct sock *sk) +{ + sk_stop_timer(sk, &sk->sk_timer); +} + +static void iucv_sock_init_timer(struct sock *sk) +{ + init_timer(&sk->sk_timer); + sk->sk_timer.function = iucv_sock_timeout; + sk->sk_timer.data = (unsigned long)sk; +} + +static struct sock *__iucv_get_sock_by_name(char *nm) +{ + struct sock *sk; + struct hlist_node *node; + + sk_for_each(sk, node, &iucv_sk_list.head) + if (!memcmp(&iucv_sk(sk)->src_name, nm, 8)) + return sk; + + return NULL; +} + +static void iucv_sock_destruct(struct sock *sk) +{ + skb_queue_purge(&sk->sk_receive_queue); + skb_queue_purge(&sk->sk_write_queue); +} + +/* Cleanup Listen */ +static void iucv_sock_cleanup_listen(struct sock *parent) +{ + struct sock *sk; + + /* Close non-accepted connections */ + while ((sk = iucv_accept_dequeue(parent, NULL))) { + iucv_sock_close(sk); + iucv_sock_kill(sk); + } + + parent->sk_state = IUCV_CLOSED; + sock_set_flag(parent, SOCK_ZAPPED); +} + +/* Kill socket */ +static void iucv_sock_kill(struct sock *sk) +{ + if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket) + return; + + iucv_sock_unlink(&iucv_sk_list, sk); + sock_set_flag(sk, SOCK_DEAD); + sock_put(sk); +} + +/* Close an IUCV socket */ +static void iucv_sock_close(struct sock *sk) +{ + unsigned char user_data[16]; + struct iucv_sock *iucv = iucv_sk(sk); + int err; + + iucv_sock_clear_timer(sk); + lock_sock(sk); + + switch(sk->sk_state) { + case IUCV_LISTEN: + iucv_sock_cleanup_listen(sk); + break; + + case IUCV_CONNECTED: + case IUCV_DISCONN: + err = 0; + if (iucv->path) { + low_nmcpy(user_data, iucv->src_name); + high_nmcpy(user_data, iucv->dst_name); + ASCEBC(user_data, sizeof(user_data)); + err = iucv_path_sever(iucv->path, user_data); + iucv_path_free(iucv->path); + iucv->path = NULL; + } + + sk->sk_state = IUCV_CLOSED; + sk->sk_state_change(sk); + sk->sk_err = ECONNRESET; + sk->sk_state_change(sk); + + skb_queue_purge(&iucv->send_skb_q); + + sock_set_flag(sk, SOCK_ZAPPED); + break; + + default: + sock_set_flag(sk, SOCK_ZAPPED); + break; + }; + + release_sock(sk); + iucv_sock_kill(sk); +} + +static void iucv_sock_init(struct sock *sk, struct sock *parent) +{ + if (parent) + sk->sk_type = parent->sk_type; +} + +static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio) +{ + struct sock *sk; + + sk = sk_alloc(PF_IUCV, prio, &iucv_proto, 1); + if (!sk) + return NULL; + + sock_init_data(sock, sk); + INIT_LIST_HEAD(&iucv_sk(sk)->accept_q); + skb_queue_head_init(&iucv_sk(sk)->send_skb_q); + iucv_sk(sk)->send_tag = 0; + + sk->sk_destruct = iucv_sock_destruct; + sk->sk_sndtimeo = IUCV_CONN_TIMEOUT; + sk->sk_allocation = GFP_DMA; + + sock_reset_flag(sk, SOCK_ZAPPED); + + sk->sk_protocol = proto; + sk->sk_state = IUCV_OPEN; + + iucv_sock_init_timer(sk); + + iucv_sock_link(&iucv_sk_list, sk); + return sk; +} + +/* Create an IUCV socket */ +static int iucv_sock_create(struct socket *sock, int protocol) +{ + struct sock *sk; + + if (sock->type != SOCK_STREAM) + return -ESOCKTNOSUPPORT; + + sock->state = SS_UNCONNECTED; + sock->ops = &iucv_sock_ops; + + sk = iucv_sock_alloc(sock, protocol, GFP_KERNEL); + if (!sk) + return -ENOMEM; + + iucv_sock_init(sk, NULL); + + return 0; +} + +void iucv_sock_link(struct iucv_sock_list *l, struct sock *sk) +{ + write_lock_bh(&l->lock); + sk_add_node(sk, &l->head); + write_unlock_bh(&l->lock); +} + +void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *sk) +{ + write_lock_bh(&l->lock); + sk_del_node_init(sk); + write_unlock_bh(&l->lock); +} + +void iucv_accept_enqueue(struct sock *parent, struct sock *sk) +{ + sock_hold(sk); + list_add_tail(&iucv_sk(sk)->accept_q, &iucv_sk(parent)->accept_q); + iucv_sk(sk)->parent = parent; + parent->sk_ack_backlog++; +} + +void iucv_accept_unlink(struct sock *sk) +{ + list_del_init(&iucv_sk(sk)->accept_q); + iucv_sk(sk)->parent->sk_ack_backlog--; + iucv_sk(sk)->parent = NULL; + sock_put(sk); +} + +struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock) +{ + struct iucv_sock *isk, *n; + struct sock *sk; + + list_for_each_entry_safe(isk, n, &iucv_sk(parent)->accept_q, accept_q){ + sk = (struct sock *) isk; + lock_sock(sk); + + if (sk->sk_state == IUCV_CLOSED) { + release_sock(sk); + iucv_accept_unlink(sk); + continue; + } + + if (sk->sk_state == IUCV_CONNECTED || + sk->sk_state == IUCV_SEVERED || + !newsock) { + iucv_accept_unlink(sk); + if (newsock) + sock_graft(sk, newsock); + + if (sk->sk_state == IUCV_SEVERED) + sk->sk_state = IUCV_DISCONN; + + release_sock(sk); + return sk; + } + + release_sock(sk); + } + return NULL; +} + +int iucv_sock_wait_state(struct sock *sk, int state, int state2, + unsigned long timeo) +{ + DECLARE_WAITQUEUE(wait, current); + int err = 0; + + add_wait_queue(sk->sk_sleep, &wait); + while (sk->sk_state != state && sk->sk_state != state2) { + set_current_state(TASK_INTERRUPTIBLE); + + if (!timeo) { + err = -EAGAIN; + break; + } + + if (signal_pending(current)) { + err = sock_intr_errno(timeo); + break; + } + + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock(sk); + + err = sock_error(sk); + if (err) + break; + } + set_current_state(TASK_RUNNING); + remove_wait_queue(sk->sk_sleep, &wait); + return err; +} + +/* Bind an unbound socket */ +static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr, + int addr_len) +{ + struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr; + struct sock *sk = sock->sk; + struct iucv_sock *iucv; + int err; + + /* Verify the input sockaddr */ + if (!addr || addr->sa_family != AF_IUCV) + return -EINVAL; + + lock_sock(sk); + if (sk->sk_state != IUCV_OPEN) { + err = -EBADFD; + goto done; + } + + write_lock_bh(&iucv_sk_list.lock); + + iucv = iucv_sk(sk); + if (__iucv_get_sock_by_name(sa->siucv_name)) { + err = -EADDRINUSE; + goto done_unlock; + } + if (iucv->path) { + err = 0; + goto done_unlock; + } + + /* Bind the socket */ + memcpy(iucv->src_name, sa->siucv_name, 8); + + /* Copy the user id */ + memcpy(iucv->src_user_id, iucv_userid, 8); + sk->sk_state = IUCV_BOUND; + err = 0; + +done_unlock: + /* Release the socket list lock */ + write_unlock_bh(&iucv_sk_list.lock); +done: + release_sock(sk); + return err; +} + +/* Automatically bind an unbound socket */ +static int iucv_sock_autobind(struct sock *sk) +{ + struct iucv_sock *iucv = iucv_sk(sk); + char query_buffer[80]; + char name[12]; + int err = 0; + + /* Set the userid and name */ + cpcmd("QUERY USERID", query_buffer, sizeof(query_buffer), &err); + if (unlikely(err)) + return -EPROTO; + + memcpy(iucv->src_user_id, query_buffer, 8); + + write_lock_bh(&iucv_sk_list.lock); + + sprintf(name, "%08x", atomic_inc_return(&iucv_sk_list.autobind_name)); + while (__iucv_get_sock_by_name(name)) { + sprintf(name, "%08x", + atomic_inc_return(&iucv_sk_list.autobind_name)); + } + + write_unlock_bh(&iucv_sk_list.lock); + + memcpy(&iucv->src_name, name, 8); + + return err; +} + +/* Connect an unconnected socket */ +static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, + int alen, int flags) +{ + struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr; + struct sock *sk = sock->sk; + struct iucv_sock *iucv; + unsigned char user_data[16]; + int err; + + if (addr->sa_family != AF_IUCV || alen < sizeof(struct sockaddr_iucv)) + return -EINVAL; + + if (sk->sk_state != IUCV_OPEN && sk->sk_state != IUCV_BOUND) + return -EBADFD; + + if (sk->sk_type != SOCK_STREAM) + return -EINVAL; + + iucv = iucv_sk(sk); + + if (sk->sk_state == IUCV_OPEN) { + err = iucv_sock_autobind(sk); + if (unlikely(err)) + return err; + } + + lock_sock(sk); + + /* Set the destination information */ + memcpy(iucv_sk(sk)->dst_user_id, sa->siucv_user_id, 8); + memcpy(iucv_sk(sk)->dst_name, sa->siucv_name, 8); + + high_nmcpy(user_data, sa->siucv_name); + low_nmcpy(user_data, iucv_sk(sk)->src_name); + ASCEBC(user_data, sizeof(user_data)); + + iucv = iucv_sk(sk); + /* Create path. */ + iucv->path = iucv_path_alloc(IUCV_QUEUELEN_DEFAULT, + IPRMDATA, GFP_KERNEL); + err = iucv_path_connect(iucv->path, &af_iucv_handler, + sa->siucv_user_id, NULL, user_data, sk); + if (err) { + iucv_path_free(iucv->path); + iucv->path = NULL; + err = -ECONNREFUSED; + goto done; + } + + if (sk->sk_state != IUCV_CONNECTED) { + err = iucv_sock_wait_state(sk, IUCV_CONNECTED, IUCV_DISCONN, + sock_sndtimeo(sk, flags & O_NONBLOCK)); + } + + if (sk->sk_state == IUCV_DISCONN) { + release_sock(sk); + return -ECONNREFUSED; + } +done: + release_sock(sk); + return err; +} + +/* Move a socket into listening state. */ +static int iucv_sock_listen(struct socket *sock, int backlog) +{ + struct sock *sk = sock->sk; + int err; + + lock_sock(sk); + + err = -EINVAL; + if (sk->sk_state != IUCV_BOUND || sock->type != SOCK_STREAM) + goto done; + + sk->sk_max_ack_backlog = backlog; + sk->sk_ack_backlog = 0; + sk->sk_state = IUCV_LISTEN; + err = 0; + +done: + release_sock(sk); + return err; +} + +/* Accept a pending connection */ +static int iucv_sock_accept(struct socket *sock, struct socket *newsock, + int flags) +{ + DECLARE_WAITQUEUE(wait, current); + struct sock *sk = sock->sk, *nsk; + long timeo; + int err = 0; + + lock_sock(sk); + + if (sk->sk_state != IUCV_LISTEN) { + err = -EBADFD; + goto done; + } + + timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + + /* Wait for an incoming connection */ + add_wait_queue_exclusive(sk->sk_sleep, &wait); + while (!(nsk = iucv_accept_dequeue(sk, newsock))){ + set_current_state(TASK_INTERRUPTIBLE); + if (!timeo) { + err = -EAGAIN; + break; + } + + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock(sk); + + if (sk->sk_state != IUCV_LISTEN) { + err = -EBADFD; + break; + } + + if (signal_pending(current)) { + err = sock_intr_errno(timeo); + break; + } + } + + set_current_state(TASK_RUNNING); + remove_wait_queue(sk->sk_sleep, &wait); + + if (err) + goto done; + + newsock->state = SS_CONNECTED; + +done: + release_sock(sk); + return err; +} + +static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr, + int *len, int peer) +{ + struct sockaddr_iucv *siucv = (struct sockaddr_iucv *) addr; + struct sock *sk = sock->sk; + + addr->sa_family = AF_IUCV; + *len = sizeof(struct sockaddr_iucv); + + if (peer) { + memcpy(siucv->siucv_user_id, iucv_sk(sk)->dst_user_id, 8); + memcpy(siucv->siucv_name, &iucv_sk(sk)->dst_name, 8); + } else { + memcpy(siucv->siucv_user_id, iucv_sk(sk)->src_user_id, 8); + memcpy(siucv->siucv_name, iucv_sk(sk)->src_name, 8); + } + memset(&siucv->siucv_port, 0, sizeof(siucv->siucv_port)); + memset(&siucv->siucv_addr, 0, sizeof(siucv->siucv_addr)); + memset(siucv->siucv_nodeid, 0, sizeof(siucv->siucv_nodeid)); + + return 0; +} + +static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *msg, size_t len) +{ + struct sock *sk = sock->sk; + struct iucv_sock *iucv = iucv_sk(sk); + struct sk_buff *skb; + struct iucv_message txmsg; + int err; + + err = sock_error(sk); + if (err) + return err; + + if (msg->msg_flags & MSG_OOB) + return -EOPNOTSUPP; + + lock_sock(sk); + + if (sk->sk_shutdown & SEND_SHUTDOWN) { + err = -EPIPE; + goto out; + } + + if (sk->sk_state == IUCV_CONNECTED){ + if(!(skb = sock_alloc_send_skb(sk, len, + msg->msg_flags & MSG_DONTWAIT, + &err))) + return err; + + if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)){ + err = -EFAULT; + goto fail; + } + + txmsg.class = 0; + txmsg.tag = iucv->send_tag++; + memcpy(skb->cb, &txmsg.tag, 4); + skb_queue_tail(&iucv->send_skb_q, skb); + err = iucv_message_send(iucv->path, &txmsg, 0, 0, + (void *) skb->data, skb->len); + if (err) { + if (err == 3) + printk(KERN_ERR "AF_IUCV msg limit exceeded\n"); + skb_unlink(skb, &iucv->send_skb_q); + err = -EPIPE; + goto fail; + } + + } else { + err = -ENOTCONN; + goto out; + } + + release_sock(sk); + return len; + +fail: + kfree_skb(skb); +out: + release_sock(sk); + return err; +} + +static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *msg, size_t len, int flags) +{ + int noblock = flags & MSG_DONTWAIT; + struct sock *sk = sock->sk; + int target, copied = 0; + struct sk_buff *skb; + int err = 0; + + if (flags & (MSG_OOB)) + return -EOPNOTSUPP; + + target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); + + skb = skb_recv_datagram(sk, flags, noblock, &err); + if (!skb) { + if (sk->sk_shutdown & RCV_SHUTDOWN) + return 0; + return err; + } + + copied = min_t(unsigned int, skb->len, len); + + if (memcpy_toiovec(msg->msg_iov, skb->data, copied)) { + skb_queue_head(&sk->sk_receive_queue, skb); + if (copied == 0) + return -EFAULT; + } + + len -= copied; + + /* Mark read part of skb as used */ + if (!(flags & MSG_PEEK)) { + skb_pull(skb, copied); + + if (skb->len) { + skb_queue_head(&sk->sk_receive_queue, skb); + goto done; + } + + kfree_skb(skb); + } else + skb_queue_head(&sk->sk_receive_queue, skb); + +done: + return err ? : copied; +} + +static inline unsigned int iucv_accept_poll(struct sock *parent) +{ + struct iucv_sock *isk, *n; + struct sock *sk; + + list_for_each_entry_safe(isk, n, &iucv_sk(parent)->accept_q, accept_q){ + sk = (struct sock *) isk; + + if (sk->sk_state == IUCV_CONNECTED) + return POLLIN | POLLRDNORM; + } + + return 0; +} + +unsigned int iucv_sock_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + struct sock *sk = sock->sk; + unsigned int mask = 0; + + poll_wait(file, sk->sk_sleep, wait); + + if (sk->sk_state == IUCV_LISTEN) + return iucv_accept_poll(sk); + + if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + mask |= POLLERR; + + if (sk->sk_shutdown & RCV_SHUTDOWN) + mask |= POLLRDHUP; + + if (sk->sk_shutdown == SHUTDOWN_MASK) + mask |= POLLHUP; + + if (!skb_queue_empty(&sk->sk_receive_queue) || + (sk->sk_shutdown & RCV_SHUTDOWN)) + mask |= POLLIN | POLLRDNORM; + + if (sk->sk_state == IUCV_CLOSED) + mask |= POLLHUP; + + if (sock_writeable(sk)) + mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + else + set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + + return mask; +} + +static int iucv_sock_shutdown(struct socket *sock, int how) +{ + struct sock *sk = sock->sk; + struct iucv_sock *iucv = iucv_sk(sk); + struct iucv_message txmsg; + int err = 0; + u8 prmmsg[8] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}; + + how++; + + if ((how & ~SHUTDOWN_MASK) || !how) + return -EINVAL; + + lock_sock(sk); + switch(sk->sk_state) { + case IUCV_CLOSED: + err = -ENOTCONN; + goto fail; + + default: + sk->sk_shutdown |= how; + break; + } + + if (how == SEND_SHUTDOWN || how == SHUTDOWN_MASK) { + txmsg.class = 0; + txmsg.tag = 0; + err = iucv_message_send(iucv->path, &txmsg, IUCV_IPRMDATA, 0, + (void *) prmmsg, 8); + if (err) { + switch(err) { + case 1: + err = -ENOTCONN; + break; + case 2: + err = -ECONNRESET; + break; + default: + err = -ENOTCONN; + break; + } + } + } + + if (how == RCV_SHUTDOWN || how == SHUTDOWN_MASK) { + err = iucv_path_quiesce(iucv_sk(sk)->path, NULL); + if (err) + err = -ENOTCONN; + + skb_queue_purge(&sk->sk_receive_queue); + } + + /* Wake up anyone sleeping in poll */ + sk->sk_state_change(sk); + +fail: + release_sock(sk); + return err; +} + +static int iucv_sock_release(struct socket *sock) +{ + struct sock *sk = sock->sk; + int err = 0; + + if (!sk) + return 0; + + iucv_sock_close(sk); + + /* Unregister with IUCV base support */ + if (iucv_sk(sk)->path) { + iucv_path_sever(iucv_sk(sk)->path, NULL); + iucv_path_free(iucv_sk(sk)->path); + iucv_sk(sk)->path = NULL; + } + + if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime){ + lock_sock(sk); + err = iucv_sock_wait_state(sk, IUCV_CLOSED, 0, + sk->sk_lingertime); + release_sock(sk); + } + + sock_orphan(sk); + iucv_sock_kill(sk); + return err; +} + +/* Callback wrappers - called from iucv base support */ +static int iucv_callback_connreq(struct iucv_path *path, + u8 ipvmid[8], u8 ipuser[16]) +{ + unsigned char user_data[16]; + unsigned char nuser_data[16]; + unsigned char src_name[8]; + struct hlist_node *node; + struct sock *sk, *nsk; + struct iucv_sock *iucv, *niucv; + int err; + + memcpy(src_name, ipuser, 8); + EBCASC(src_name, 8); + /* Find out if this path belongs to af_iucv. */ + read_lock(&iucv_sk_list.lock); + iucv = NULL; + sk_for_each(sk, node, &iucv_sk_list.head) + if (sk->sk_state == IUCV_LISTEN && + !memcmp(&iucv_sk(sk)->src_name, src_name, 8)) { + /* + * Found a listening socket with + * src_name == ipuser[0-7]. + */ + iucv = iucv_sk(sk); + break; + } + read_unlock(&iucv_sk_list.lock); + if (!iucv) + /* No socket found, not one of our paths. */ + return -EINVAL; + + bh_lock_sock(sk); + + /* Check if parent socket is listening */ + low_nmcpy(user_data, iucv->src_name); + high_nmcpy(user_data, iucv->dst_name); + ASCEBC(user_data, sizeof(user_data)); + if (sk->sk_state != IUCV_LISTEN) { + err = iucv_path_sever(path, user_data); + goto fail; + } + + /* Check for backlog size */ + if (sk_acceptq_is_full(sk)) { + err = iucv_path_sever(path, user_data); + goto fail; + } + + /* Create the new socket */ + nsk = iucv_sock_alloc(NULL, SOCK_STREAM, GFP_ATOMIC); + if (!nsk){ + err = iucv_path_sever(path, user_data); + goto fail; + } + + niucv = iucv_sk(nsk); + iucv_sock_init(nsk, sk); + + /* Set the new iucv_sock */ + memcpy(niucv->dst_name, ipuser + 8, 8); + EBCASC(niucv->dst_name, 8); + memcpy(niucv->dst_user_id, ipvmid, 8); + memcpy(niucv->src_name, iucv->src_name, 8); + memcpy(niucv->src_user_id, iucv->src_user_id, 8); + niucv->path = path; + + /* Call iucv_accept */ + high_nmcpy(nuser_data, ipuser + 8); + memcpy(nuser_data + 8, niucv->src_name, 8); + ASCEBC(nuser_data + 8, 8); + + path->msglim = IUCV_QUEUELEN_DEFAULT; + err = iucv_path_accept(path, &af_iucv_handler, nuser_data, nsk); + if (err){ + err = iucv_path_sever(path, user_data); + goto fail; + } + + iucv_accept_enqueue(sk, nsk); + + /* Wake up accept */ + nsk->sk_state = IUCV_CONNECTED; + sk->sk_data_ready(sk, 1); + err = 0; +fail: + bh_unlock_sock(sk); + return 0; +} + +static void iucv_callback_connack(struct iucv_path *path, u8 ipuser[16]) +{ + struct sock *sk = path->private; + + sk->sk_state = IUCV_CONNECTED; + sk->sk_state_change(sk); +} + +static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg) +{ + struct sock *sk = path->private; + struct sk_buff *skb; + int rc; + + if (sk->sk_shutdown & RCV_SHUTDOWN) + return; + + skb = alloc_skb(msg->length, GFP_ATOMIC | GFP_DMA); + if (!skb) { + iucv_message_reject(path, msg); + return; + } + + if (msg->flags & IPRMDATA) { + skb->data = NULL; + skb->len = 0; + } else { + rc = iucv_message_receive(path, msg, 0, skb->data, + msg->length, NULL); + if (rc) { + kfree_skb(skb); + return; + } + + skb->h.raw = skb->data; + skb->nh.raw = skb->data; + skb->len = msg->length; + } + + if (sock_queue_rcv_skb(sk, skb)) + kfree_skb(skb); +} + +static void iucv_callback_txdone(struct iucv_path *path, + struct iucv_message *msg) +{ + struct sock *sk = path->private; + struct sk_buff *this; + struct sk_buff_head *list = &iucv_sk(sk)->send_skb_q; + struct sk_buff *list_skb = list->next; + unsigned long flags; + + spin_lock_irqsave(&list->lock, flags); + + do { + this = list_skb; + list_skb = list_skb->next; + } while (memcmp(&msg->tag, this->cb, 4)); + + spin_unlock_irqrestore(&list->lock, flags); + + skb_unlink(this, &iucv_sk(sk)->send_skb_q); + kfree_skb(this); +} + +static void iucv_callback_connrej(struct iucv_path *path, u8 ipuser[16]) +{ + struct sock *sk = path->private; + + if (!list_empty(&iucv_sk(sk)->accept_q)) + sk->sk_state = IUCV_SEVERED; + else + sk->sk_state = IUCV_DISCONN; + + sk->sk_state_change(sk); +} + +static struct proto_ops iucv_sock_ops = { + .family = PF_IUCV, + .owner = THIS_MODULE, + .release = iucv_sock_release, + .bind = iucv_sock_bind, + .connect = iucv_sock_connect, + .listen = iucv_sock_listen, + .accept = iucv_sock_accept, + .getname = iucv_sock_getname, + .sendmsg = iucv_sock_sendmsg, + .recvmsg = iucv_sock_recvmsg, + .poll = iucv_sock_poll, + .ioctl = sock_no_ioctl, + .mmap = sock_no_mmap, + .socketpair = sock_no_socketpair, + .shutdown = iucv_sock_shutdown, + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt +}; + +static struct net_proto_family iucv_sock_family_ops = { + .family = AF_IUCV, + .owner = THIS_MODULE, + .create = iucv_sock_create, +}; + +static int afiucv_init(void) +{ + int err; + + if (!MACHINE_IS_VM) { + printk(KERN_ERR "AF_IUCV connection needs VM as base\n"); + err = -EPROTONOSUPPORT; + goto out; + } + cpcmd("QUERY USERID", iucv_userid, sizeof(iucv_userid), &err); + if (unlikely(err)) { + printk(KERN_ERR "AF_IUCV needs the VM userid\n"); + err = -EPROTONOSUPPORT; + goto out; + } + + err = iucv_register(&af_iucv_handler, 0); + if (err) + goto out; + err = proto_register(&iucv_proto, 0); + if (err) + goto out_iucv; + err = sock_register(&iucv_sock_family_ops); + if (err) + goto out_proto; + printk(KERN_INFO "AF_IUCV lowlevel driver initialized\n"); + return 0; + +out_proto: + proto_unregister(&iucv_proto); +out_iucv: + iucv_unregister(&af_iucv_handler, 0); +out: + return err; +} + +static void __exit afiucv_exit(void) +{ + sock_unregister(PF_IUCV); + proto_unregister(&iucv_proto); + iucv_unregister(&af_iucv_handler, 0); + + printk(KERN_INFO "AF_IUCV lowlevel driver unloaded\n"); +} + +module_init(afiucv_init); +module_exit(afiucv_exit); + +MODULE_AUTHOR("Jennifer Hunt <jenhunt@us.ibm.com>"); +MODULE_DESCRIPTION("IUCV Sockets ver " VERSION); +MODULE_VERSION(VERSION); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NETPROTO(PF_IUCV); diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c new file mode 100644 index 0000000..1b10d57 --- /dev/null +++ b/net/iucv/iucv.c @@ -0,0 +1,1619 @@ +/* + * IUCV base infrastructure. + * + * Copyright 2001, 2006 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): + * Original source: + * Alan Altmark (Alan_Altmark@us.ibm.com) Sept. 2000 + * Xenia Tkatschow (xenia@us.ibm.com) + * 2Gb awareness and general cleanup: + * Fritz Elfert (elfert@de.ibm.com, felfert@millenux.com) + * Rewritten for af_iucv: + * Martin Schwidefsky <schwidefsky@de.ibm.com> + * + * Documentation used: + * The original source + * CP Programming Service, IBM document # SC24-5760 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> + +#include <linux/spinlock.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/list.h> +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/device.h> +#include <linux/cpu.h> +#include <net/iucv/iucv.h> +#include <asm/atomic.h> +#include <asm/ebcdic.h> +#include <asm/io.h> +#include <asm/s390_ext.h> +#include <asm/s390_rdev.h> +#include <asm/smp.h> + +/* + * FLAGS: + * All flags are defined in the field IPFLAGS1 of each function + * and can be found in CP Programming Services. + * IPSRCCLS - Indicates you have specified a source class. + * IPTRGCLS - Indicates you have specified a target class. + * IPFGPID - Indicates you have specified a pathid. + * IPFGMID - Indicates you have specified a message ID. + * IPNORPY - Indicates a one-way message. No reply expected. + * IPALL - Indicates that all paths are affected. + */ +#define IUCV_IPSRCCLS 0x01 +#define IUCV_IPTRGCLS 0x01 +#define IUCV_IPFGPID 0x02 +#define IUCV_IPFGMID 0x04 +#define IUCV_IPNORPY 0x10 +#define IUCV_IPALL 0x80 + +static int iucv_bus_match (struct device *dev, struct device_driver *drv) +{ + return 0; +} + +struct bus_type iucv_bus = { + .name = "iucv", + .match = iucv_bus_match, +}; + +struct device *iucv_root; +static int iucv_available; + +/* General IUCV interrupt structure */ +struct iucv_irq_data { + u16 ippathid; + u8 ipflags1; + u8 iptype; + u32 res2[8]; +}; + +struct iucv_work { + struct list_head list; + struct iucv_irq_data data; +}; + +static LIST_HEAD(iucv_work_queue); +static DEFINE_SPINLOCK(iucv_work_lock); + +static struct iucv_irq_data *iucv_irq_data; +static cpumask_t iucv_buffer_cpumask = CPU_MASK_NONE; +static cpumask_t iucv_irq_cpumask = CPU_MASK_NONE; + +static void iucv_tasklet_handler(unsigned long); +static DECLARE_TASKLET(iucv_tasklet, iucv_tasklet_handler,0); + +enum iucv_command_codes { + IUCV_QUERY = 0, + IUCV_RETRIEVE_BUFFER = 2, + IUCV_SEND = 4, + IUCV_RECEIVE = 5, + IUCV_REPLY = 6, + IUCV_REJECT = 8, + IUCV_PURGE = 9, + IUCV_ACCEPT = 10, + IUCV_CONNECT = 11, + IUCV_DECLARE_BUFFER = 12, + IUCV_QUIESCE = 13, + IUCV_RESUME = 14, + IUCV_SEVER = 15, + IUCV_SETMASK = 16, +}; + +/* + * Error messages that are used with the iucv_sever function. They get + * converted to EBCDIC. + */ +static char iucv_error_no_listener[16] = "NO LISTENER"; +static char iucv_error_no_memory[16] = "NO MEMORY"; +static char iucv_error_pathid[16] = "INVALID PATHID"; + +/* + * iucv_handler_list: List of registered handlers. + */ +static LIST_HEAD(iucv_handler_list); + +/* + * iucv_path_table: an array of iucv_path structures. + */ +static struct iucv_path **iucv_path_table; +static unsigned long iucv_max_pathid; + +/* + * iucv_lock: spinlock protecting iucv_handler_list and iucv_pathid_table + */ +static DEFINE_SPINLOCK(iucv_table_lock); + +/* + * iucv_tasklet_cpu: contains the number of the cpu executing the tasklet. + * Needed for iucv_path_sever called from tasklet. + */ +static int iucv_tasklet_cpu = -1; + +/* + * Mutex and wait queue for iucv_register/iucv_unregister. + */ +static DEFINE_MUTEX(iucv_register_mutex); + +/* + * Counter for number of non-smp capable handlers. + */ +static int iucv_nonsmp_handler; + +/* + * IUCV control data structure. Used by iucv_path_accept, iucv_path_connect, + * iucv_path_quiesce and iucv_path_sever. + */ +struct iucv_cmd_control { + u16 ippathid; + u8 ipflags1; + u8 iprcode; + u16 ipmsglim; + u16 res1; + u8 ipvmid[8]; + u8 ipuser[16]; + u8 iptarget[8]; +} __attribute__ ((packed,aligned(8))); + +/* + * Data in parameter list iucv structure. Used by iucv_message_send, + * iucv_message_send2way and iucv_message_reply. + */ +struct iucv_cmd_dpl { + u16 ippathid; + u8 ipflags1; + u8 iprcode; + u32 ipmsgid; + u32 iptrgcls; + u8 iprmmsg[8]; + u32 ipsrccls; + u32 ipmsgtag; + u32 ipbfadr2; + u32 ipbfln2f; + u32 res; +} __attribute__ ((packed,aligned(8))); + +/* + * Data in buffer iucv structure. Used by iucv_message_receive, + * iucv_message_reject, iucv_message_send, iucv_message_send2way + * and iucv_declare_cpu. + */ +struct iucv_cmd_db { + u16 ippathid; + u8 ipflags1; + u8 iprcode; + u32 ipmsgid; + u32 iptrgcls; + u32 ipbfadr1; + u32 ipbfln1f; + u32 ipsrccls; + u32 ipmsgtag; + u32 ipbfadr2; + u32 ipbfln2f; + u32 res; +} __attribute__ ((packed,aligned(8))); + +/* + * Purge message iucv structure. Used by iucv_message_purge. + */ +struct iucv_cmd_purge { + u16 ippathid; + u8 ipflags1; + u8 iprcode; + u32 ipmsgid; + u8 ipaudit[3]; + u8 res1[5]; + u32 res2; + u32 ipsrccls; + u32 ipmsgtag; + u32 res3[3]; +} __attribute__ ((packed,aligned(8))); + +/* + * Set mask iucv structure. Used by iucv_enable_cpu. + */ +struct iucv_cmd_set_mask { + u8 ipmask; + u8 res1[2]; + u8 iprcode; + u32 res2[9]; +} __attribute__ ((packed,aligned(8))); + +union iucv_param { + struct iucv_cmd_control ctrl; + struct iucv_cmd_dpl dpl; + struct iucv_cmd_db db; + struct iucv_cmd_purge purge; + struct iucv_cmd_set_mask set_mask; +}; + +/* + * Anchor for per-cpu IUCV command parameter block. + */ +static union iucv_param *iucv_param; + +/** + * iucv_call_b2f0 + * @code: identifier of IUCV call to CP. + * @parm: pointer to a struct iucv_parm block + * + * Calls CP to execute IUCV commands. + * + * Returns the result of the CP IUCV call. + */ +static inline int iucv_call_b2f0(int command, union iucv_param *parm) +{ + register unsigned long reg0 asm ("0"); + register unsigned long reg1 asm ("1"); + int ccode; + + reg0 = command; + reg1 = virt_to_phys(parm); + asm volatile( + " .long 0xb2f01000\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (ccode), "=m" (*parm), "+d" (reg0), "+a" (reg1) + : "m" (*parm) : "cc"); + return (ccode == 1) ? parm->ctrl.iprcode : ccode; +} + +/** + * iucv_query_maxconn + * + * Determines the maximum number of connections that may be established. + * + * Returns the maximum number of connections or -EPERM is IUCV is not + * available. + */ +static int iucv_query_maxconn(void) +{ + register unsigned long reg0 asm ("0"); + register unsigned long reg1 asm ("1"); + void *param; + int ccode; + + param = kzalloc(sizeof(union iucv_param), GFP_KERNEL|GFP_DMA); + if (!param) + return -ENOMEM; + reg0 = IUCV_QUERY; + reg1 = (unsigned long) param; + asm volatile ( + " .long 0xb2f01000\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (ccode), "+d" (reg0), "+d" (reg1) : : "cc"); + if (ccode == 0) + iucv_max_pathid = reg0; + kfree(param); + return ccode ? -EPERM : 0; +} + +/** + * iucv_allow_cpu + * @data: unused + * + * Allow iucv interrupts on this cpu. + */ +static void iucv_allow_cpu(void *data) +{ + int cpu = smp_processor_id(); + union iucv_param *parm; + + /* + * Enable all iucv interrupts. + * ipmask contains bits for the different interrupts + * 0x80 - Flag to allow nonpriority message pending interrupts + * 0x40 - Flag to allow priority message pending interrupts + * 0x20 - Flag to allow nonpriority message completion interrupts + * 0x10 - Flag to allow priority message completion interrupts + * 0x08 - Flag to allow IUCV control interrupts + */ + parm = percpu_ptr(iucv_param, smp_processor_id()); + memset(parm, 0, sizeof(union iucv_param)); + parm->set_mask.ipmask = 0xf8; + iucv_call_b2f0(IUCV_SETMASK, parm); + + /* Set indication that iucv interrupts are allowed for this cpu. */ + cpu_set(cpu, iucv_irq_cpumask); +} + +/** + * iucv_block_cpu + * @data: unused + * + * Block iucv interrupts on this cpu. + */ +static void iucv_block_cpu(void *data) +{ + int cpu = smp_processor_id(); + union iucv_param *parm; + + /* Disable all iucv interrupts. */ + parm = percpu_ptr(iucv_param, smp_processor_id()); + memset(parm, 0, sizeof(union iucv_param)); + iucv_call_b2f0(IUCV_SETMASK, parm); + + /* Clear indication that iucv interrupts are allowed for this cpu. */ + cpu_clear(cpu, iucv_irq_cpumask); +} + +/** + * iucv_declare_cpu + * @data: unused + * + * Declare a interupt buffer on this cpu. + */ +static void iucv_declare_cpu(void *data) +{ + int cpu = smp_processor_id(); + union iucv_param *parm; + int rc; + + if (cpu_isset(cpu, iucv_buffer_cpumask)) + return; + + /* Declare interrupt buffer. */ + parm = percpu_ptr(iucv_param, cpu); + memset(parm, 0, sizeof(union iucv_param)); + parm->db.ipbfadr1 = virt_to_phys(percpu_ptr(iucv_irq_data, cpu)); + rc = iucv_call_b2f0(IUCV_DECLARE_BUFFER, parm); + if (rc) { + char *err = "Unknown"; + switch(rc) { + case 0x03: + err = "Directory error"; + break; + case 0x0a: + err = "Invalid length"; + break; + case 0x13: + err = "Buffer already exists"; + break; + case 0x3e: + err = "Buffer overlap"; + break; + case 0x5c: + err = "Paging or storage error"; + break; + } + printk(KERN_WARNING "iucv_register: iucv_declare_buffer " + "on cpu %i returned error 0x%02x (%s)\n", cpu, rc, err); + return; + } + + /* Set indication that an iucv buffer exists for this cpu. */ + cpu_set(cpu, iucv_buffer_cpumask); + + if (iucv_nonsmp_handler == 0 || cpus_empty(iucv_irq_cpumask)) + /* Enable iucv interrupts on this cpu. */ + iucv_allow_cpu(NULL); + else + /* Disable iucv interrupts on this cpu. */ + iucv_block_cpu(NULL); +} + +/** + * iucv_retrieve_cpu + * @data: unused + * + * Retrieve interrupt buffer on this cpu. + */ +static void iucv_retrieve_cpu(void *data) +{ + int cpu = smp_processor_id(); + union iucv_param *parm; + + if (!cpu_isset(cpu, iucv_buffer_cpumask)) + return; + + /* Block iucv interrupts. */ + iucv_block_cpu(NULL); + + /* Retrieve interrupt buffer. */ + parm = percpu_ptr(iucv_param, cpu); + iucv_call_b2f0(IUCV_RETRIEVE_BUFFER, parm); + + /* Clear indication that an iucv buffer exists for this cpu. */ + cpu_clear(cpu, iucv_buffer_cpumask); +} + +/** + * iucv_setmask_smp + * + * Allow iucv interrupts on all cpus. + */ +static void iucv_setmask_mp(void) +{ + int cpu; + + for_each_online_cpu(cpu) + /* Enable all cpus with a declared buffer. */ + if (cpu_isset(cpu, iucv_buffer_cpumask) && + !cpu_isset(cpu, iucv_irq_cpumask)) + smp_call_function_on(iucv_allow_cpu, NULL, 0, 1, cpu); +} + +/** + * iucv_setmask_up + * + * Allow iucv interrupts on a single cpus. + */ +static void iucv_setmask_up(void) +{ + cpumask_t cpumask; + int cpu; + + /* Disable all cpu but the first in cpu_irq_cpumask. */ + cpumask = iucv_irq_cpumask; + cpu_clear(first_cpu(iucv_irq_cpumask), cpumask); + for_each_cpu_mask(cpu, cpumask) + smp_call_function_on(iucv_block_cpu, NULL, 0, 1, cpu); +} + +/** + * iucv_enable + * + * This function makes iucv ready for use. It allocates the pathid + * table, declares an iucv interrupt buffer and enables the iucv + * interrupts. Called when the first user has registered an iucv + * handler. + */ +static int iucv_enable(void) +{ + size_t alloc_size; + int cpu, rc; + + rc = -ENOMEM; + alloc_size = iucv_max_pathid * sizeof(struct iucv_path); + iucv_path_table = kzalloc(alloc_size, GFP_KERNEL); + if (!iucv_path_table) + goto out; + /* Declare per cpu buffers. */ + rc = -EIO; + for_each_online_cpu(cpu) + smp_call_function_on(iucv_declare_cpu, NULL, 0, 1, cpu); + if (cpus_empty(iucv_buffer_cpumask)) + /* No cpu could declare an iucv buffer. */ + goto out_path; + return 0; + +out_path: + kfree(iucv_path_table); +out: + return rc; +} + +/** + * iucv_disable + * + * This function shuts down iucv. It disables iucv interrupts, retrieves + * the iucv interrupt buffer and frees the pathid table. Called after the + * last user unregister its iucv handler. + */ +static void iucv_disable(void) +{ + on_each_cpu(iucv_retrieve_cpu, NULL, 0, 1); + kfree(iucv_path_table); +} + +#ifdef CONFIG_HOTPLUG_CPU +static int __cpuinit iucv_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + cpumask_t cpumask; + long cpu = (long) hcpu; + + switch (action) { + case CPU_UP_PREPARE: + if (!percpu_populate(iucv_irq_data, + sizeof(struct iucv_irq_data), + GFP_KERNEL|GFP_DMA, cpu)) + return NOTIFY_BAD; + if (!percpu_populate(iucv_param, sizeof(union iucv_param), + GFP_KERNEL|GFP_DMA, cpu)) { + percpu_depopulate(iucv_irq_data, cpu); + return NOTIFY_BAD; + } + break; + case CPU_UP_CANCELED: + case CPU_DEAD: + percpu_depopulate(iucv_param, cpu); + percpu_depopulate(iucv_irq_data, cpu); + break; + case CPU_ONLINE: + case CPU_DOWN_FAILED: + smp_call_function_on(iucv_declare_cpu, NULL, 0, 1, cpu); + break; + case CPU_DOWN_PREPARE: + cpumask = iucv_buffer_cpumask; + cpu_clear(cpu, cpumask); + if (cpus_empty(cpumask)) + /* Can't offline last IUCV enabled cpu. */ + return NOTIFY_BAD; + smp_call_function_on(iucv_retrieve_cpu, NULL, 0, 1, cpu); + if (cpus_empty(iucv_irq_cpumask)) + smp_call_function_on(iucv_allow_cpu, NULL, 0, 1, + first_cpu(iucv_buffer_cpumask)); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block iucv_cpu_notifier = { + .notifier_call = iucv_cpu_notify, +}; +#endif + +/** + * iucv_sever_pathid + * @pathid: path identification number. + * @userdata: 16-bytes of user data. + * + * Sever an iucv path to free up the pathid. Used internally. + */ +static int iucv_sever_pathid(u16 pathid, u8 userdata[16]) +{ + union iucv_param *parm; + + parm = percpu_ptr(iucv_param, smp_processor_id()); + memset(parm, 0, sizeof(union iucv_param)); + if (userdata) + memcpy(parm->ctrl.ipuser, userdata, sizeof(parm->ctrl.ipuser)); + parm->ctrl.ippathid = pathid; + return iucv_call_b2f0(IUCV_SEVER, parm); +} + +/** + * __iucv_cleanup_pathid + * @dummy: unused dummy argument + * + * Nop function called via smp_call_function to force work items from + * pending external iucv interrupts to the work queue. + */ +static void __iucv_cleanup_pathid(void *dummy) +{ +} + +/** + * iucv_cleanup_pathid + * @pathid: 16 bit pathid + * + * Function called after a path has been severed to find all remaining + * work items for the now stale pathid. The caller needs to hold the + * iucv_table_lock. + */ +static void iucv_cleanup_pathid(u16 pathid) +{ + struct iucv_work *p, *n; + + /* + * Path is severed, the pathid can be reused immediatly on + * a iucv connect or a connection pending interrupt. + * iucv_path_connect and connection pending interrupt will + * wait until the iucv_table_lock is released before the + * recycled pathid enters the system. + * Force remaining interrupts to the work queue, then + * scan the work queue for items of this path. + */ + smp_call_function(__iucv_cleanup_pathid, NULL, 0, 1); + spin_lock_irq(&iucv_work_lock); + list_for_each_entry_safe(p, n, &iucv_work_queue, list) { + /* Remove work items for pathid except connection pending */ + if (p->data.ippathid == pathid && p->data.iptype != 0x01) { + list_del(&p->list); + kfree(p); + } + } + spin_unlock_irq(&iucv_work_lock); +} + +/** + * iucv_register: + * @handler: address of iucv handler structure + * @smp: != 0 indicates that the handler can deal with out of order messages + * + * Registers a driver with IUCV. + * + * Returns 0 on success, -ENOMEM if the memory allocation for the pathid + * table failed, or -EIO if IUCV_DECLARE_BUFFER failed on all cpus. + */ +int iucv_register(struct iucv_handler *handler, int smp) +{ + int rc; + + if (!iucv_available) + return -ENOSYS; + mutex_lock(&iucv_register_mutex); + if (!smp) + iucv_nonsmp_handler++; + if (list_empty(&iucv_handler_list)) { + rc = iucv_enable(); + if (rc) + goto out_mutex; + } else if (!smp && iucv_nonsmp_handler == 1) + iucv_setmask_up(); + INIT_LIST_HEAD(&handler->paths); + + spin_lock_irq(&iucv_table_lock); + list_add_tail(&handler->list, &iucv_handler_list); + spin_unlock_irq(&iucv_table_lock); + rc = 0; +out_mutex: + mutex_unlock(&iucv_register_mutex); + return rc; +} + +/** + * iucv_unregister + * @handler: address of iucv handler structure + * @smp: != 0 indicates that the handler can deal with out of order messages + * + * Unregister driver from IUCV. + */ +void iucv_unregister(struct iucv_handler *handler, int smp) +{ + struct iucv_path *p, *n; + + mutex_lock(&iucv_register_mutex); + spin_lock_bh(&iucv_table_lock); + /* Remove handler from the iucv_handler_list. */ + list_del_init(&handler->list); + /* Sever all pathids still refering to the handler. */ + list_for_each_entry_safe(p, n, &handler->paths, list) { + iucv_sever_pathid(p->pathid, NULL); + iucv_path_table[p->pathid] = NULL; + list_del(&p->list); + iucv_cleanup_pathid(p->pathid); + iucv_path_free(p); + } + spin_unlock_bh(&iucv_table_lock); + if (!smp) + iucv_nonsmp_handler--; + if (list_empty(&iucv_handler_list)) + iucv_disable(); + else if (!smp && iucv_nonsmp_handler == 0) + iucv_setmask_mp(); + mutex_unlock(&iucv_register_mutex); +} + +/** + * iucv_path_accept + * @path: address of iucv path structure + * @handler: address of iucv handler structure + * @userdata: 16 bytes of data reflected to the communication partner + * @private: private data passed to interrupt handlers for this path + * + * This function is issued after the user received a connection pending + * external interrupt and now wishes to complete the IUCV communication path. + * + * Returns the result of the CP IUCV call. + */ +int iucv_path_accept(struct iucv_path *path, struct iucv_handler *handler, + u8 userdata[16], void *private) +{ + union iucv_param *parm; + int rc; + + local_bh_disable(); + /* Prepare parameter block. */ + parm = percpu_ptr(iucv_param, smp_processor_id()); + memset(parm, 0, sizeof(union iucv_param)); + parm->ctrl.ippathid = path->pathid; + parm->ctrl.ipmsglim = path->msglim; + if (userdata) + memcpy(parm->ctrl.ipuser, userdata, sizeof(parm->ctrl.ipuser)); + parm->ctrl.ipflags1 = path->flags; + + rc = iucv_call_b2f0(IUCV_ACCEPT, parm); + if (!rc) { + path->private = private; + path->msglim = parm->ctrl.ipmsglim; + path->flags = parm->ctrl.ipflags1; + } + local_bh_enable(); + return rc; +} + +/** + * iucv_path_connect + * @path: address of iucv path structure + * @handler: address of iucv handler structure + * @userid: 8-byte user identification + * @system: 8-byte target system identification + * @userdata: 16 bytes of data reflected to the communication partner + * @private: private data passed to interrupt handlers for this path + * + * This function establishes an IUCV path. Although the connect may complete + * successfully, you are not able to use the path until you receive an IUCV + * Connection Complete external interrupt. + * + * Returns the result of the CP IUCV call. + */ +int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler, + u8 userid[8], u8 system[8], u8 userdata[16], + void *private) +{ + union iucv_param *parm; + int rc; + + preempt_disable(); + if (iucv_tasklet_cpu != smp_processor_id()) + spin_lock_bh(&iucv_table_lock); + parm = percpu_ptr(iucv_param, smp_processor_id()); + memset(parm, 0, sizeof(union iucv_param)); + parm->ctrl.ipmsglim = path->msglim; + parm->ctrl.ipflags1 = path->flags; + if (userid) { + memcpy(parm->ctrl.ipvmid, userid, sizeof(parm->ctrl.ipvmid)); + ASCEBC(parm->ctrl.ipvmid, sizeof(parm->ctrl.ipvmid)); + EBC_TOUPPER(parm->ctrl.ipvmid, sizeof(parm->ctrl.ipvmid)); + } + if (system) { + memcpy(parm->ctrl.iptarget, system, + sizeof(parm->ctrl.iptarget)); + ASCEBC(parm->ctrl.iptarget, sizeof(parm->ctrl.iptarget)); + EBC_TOUPPER(parm->ctrl.iptarget, sizeof(parm->ctrl.iptarget)); + } + if (userdata) + memcpy(parm->ctrl.ipuser, userdata, sizeof(parm->ctrl.ipuser)); + + rc = iucv_call_b2f0(IUCV_CONNECT, parm); + if (!rc) { + if (parm->ctrl.ippathid < iucv_max_pathid) { + path->pathid = parm->ctrl.ippathid; + path->msglim = parm->ctrl.ipmsglim; + path->flags = parm->ctrl.ipflags1; + path->handler = handler; + path->private = private; + list_add_tail(&path->list, &handler->paths); + iucv_path_table[path->pathid] = path; + } else { + iucv_sever_pathid(parm->ctrl.ippathid, + iucv_error_pathid); + rc = -EIO; + } + } + if (iucv_tasklet_cpu != smp_processor_id()) + spin_unlock_bh(&iucv_table_lock); + preempt_enable(); + return rc; +} + +/** + * iucv_path_quiesce: + * @path: address of iucv path structure + * @userdata: 16 bytes of data reflected to the communication partner + * + * This function temporarily suspends incoming messages on an IUCV path. + * You can later reactivate the path by invoking the iucv_resume function. + * + * Returns the result from the CP IUCV call. + */ +int iucv_path_quiesce(struct iucv_path *path, u8 userdata[16]) +{ + union iucv_param *parm; + int rc; + + local_bh_disable(); + parm = percpu_ptr(iucv_param, smp_processor_id()); + memset(parm, 0, sizeof(union iucv_param)); + if (userdata) + memcpy(parm->ctrl.ipuser, userdata, sizeof(parm->ctrl.ipuser)); + parm->ctrl.ippathid = path->pathid; + rc = iucv_call_b2f0(IUCV_QUIESCE, parm); + local_bh_enable(); + return rc; +} + +/** + * iucv_path_resume: + * @path: address of iucv path structure + * @userdata: 16 bytes of data reflected to the communication partner + * + * This function resumes incoming messages on an IUCV path that has + * been stopped with iucv_path_quiesce. + * + * Returns the result from the CP IUCV call. + */ +int iucv_path_resume(struct iucv_path *path, u8 userdata[16]) +{ + union iucv_param *parm; + int rc; + + local_bh_disable(); + parm = percpu_ptr(iucv_param, smp_processor_id()); + memset(parm, 0, sizeof(union iucv_param)); + if (userdata) + memcpy(parm->ctrl.ipuser, userdata, sizeof(parm->ctrl.ipuser)); + parm->ctrl.ippathid = path->pathid; + rc = iucv_call_b2f0(IUCV_RESUME, parm); + local_bh_enable(); + return rc; +} + +/** + * iucv_path_sever + * @path: address of iucv path structure + * @userdata: 16 bytes of data reflected to the communication partner + * + * This function terminates an IUCV path. + * + * Returns the result from the CP IUCV call. + */ +int iucv_path_sever(struct iucv_path *path, u8 userdata[16]) +{ + int rc; + + + preempt_disable(); + if (iucv_tasklet_cpu != smp_processor_id()) + spin_lock_bh(&iucv_table_lock); + rc = iucv_sever_pathid(path->pathid, userdata); + if (!rc) { + iucv_path_table[path->pathid] = NULL; + list_del_init(&path->list); + iucv_cleanup_pathid(path->pathid); + } + if (iucv_tasklet_cpu != smp_processor_id()) + spin_unlock_bh(&iucv_table_lock); + preempt_enable(); + return rc; +} + +/** + * iucv_message_purge + * @path: address of iucv path structure + * @msg: address of iucv msg structure + * @srccls: source class of message + * + * Cancels a message you have sent. + * + * Returns the result from the CP IUCV call. + */ +int iucv_message_purge(struct iucv_path *path, struct iucv_message *msg, + u32 srccls) +{ + union iucv_param *parm; + int rc; + + local_bh_disable(); + parm = percpu_ptr(iucv_param, smp_processor_id()); + memset(parm, 0, sizeof(union iucv_param)); + parm->purge.ippathid = path->pathid; + parm->purge.ipmsgid = msg->id; + parm->purge.ipsrccls = srccls; + parm->purge.ipflags1 = IUCV_IPSRCCLS | IUCV_IPFGMID | IUCV_IPFGPID; + rc = iucv_call_b2f0(IUCV_PURGE, parm); + if (!rc) { + msg->audit = (*(u32 *) &parm->purge.ipaudit) >> 8; + msg->tag = parm->purge.ipmsgtag; + } + local_bh_enable(); + return rc; +} + +/** + * iucv_message_receive + * @path: address of iucv path structure + * @msg: address of iucv msg structure + * @flags: how the message is received (IUCV_IPBUFLST) + * @buffer: address of data buffer or address of struct iucv_array + * @size: length of data buffer + * @residual: + * + * This function receives messages that are being sent to you over + * established paths. This function will deal with RMDATA messages + * embedded in struct iucv_message as well. + * + * Returns the result from the CP IUCV call. + */ +int iucv_message_receive(struct iucv_path *path, struct iucv_message *msg, + u8 flags, void *buffer, size_t size, size_t *residual) +{ + union iucv_param *parm; + struct iucv_array *array; + u8 *rmmsg; + size_t copy; + int rc; + + if (msg->flags & IUCV_IPRMDATA) { + /* + * Message is 8 bytes long and has been stored to the + * message descriptor itself. + */ + rc = (size < 8) ? 5 : 0; + if (residual) + *residual = abs(size - 8); + rmmsg = msg->rmmsg; + if (flags & IUCV_IPBUFLST) { + /* Copy to struct iucv_array. */ + size = (size < 8) ? size : 8; + for (array = buffer; size > 0; array++) { + copy = min_t(size_t, size, array->length); + memcpy((u8 *)(addr_t) array->address, + rmmsg, copy); + rmmsg += copy; + size -= copy; + } + } else { + /* Copy to direct buffer. */ + memcpy(buffer, rmmsg, min_t(size_t, size, 8)); + } + return 0; + } + + local_bh_disable(); + parm = percpu_ptr(iucv_param, smp_processor_id()); + memset(parm, 0, sizeof(union iucv_param)); + parm->db.ipbfadr1 = (u32)(addr_t) buffer; + parm->db.ipbfln1f = (u32) size; + parm->db.ipmsgid = msg->id; + parm->db.ippathid = path->pathid; + parm->db.iptrgcls = msg->class; + parm->db.ipflags1 = (flags | IUCV_IPFGPID | + IUCV_IPFGMID | IUCV_IPTRGCLS); + rc = iucv_call_b2f0(IUCV_RECEIVE, parm); + if (!rc || rc == 5) { + msg->flags = parm->db.ipflags1; + if (residual) + *residual = parm->db.ipbfln1f; + } + local_bh_enable(); + return rc; +} + +/** + * iucv_message_reject + * @path: address of iucv path structure + * @msg: address of iucv msg structure + * + * The reject function refuses a specified message. Between the time you + * are notified of a message and the time that you complete the message, + * the message may be rejected. + * + * Returns the result from the CP IUCV call. + */ +int iucv_message_reject(struct iucv_path *path, struct iucv_message *msg) +{ + union iucv_param *parm; + int rc; + + local_bh_disable(); + parm = percpu_ptr(iucv_param, smp_processor_id()); + memset(parm, 0, sizeof(union iucv_param)); + parm->db.ippathid = path->pathid; + parm->db.ipmsgid = msg->id; + parm->db.iptrgcls = msg->class; + parm->db.ipflags1 = (IUCV_IPTRGCLS | IUCV_IPFGMID | IUCV_IPFGPID); + rc = iucv_call_b2f0(IUCV_REJECT, parm); + local_bh_enable(); + return rc; +} + +/** + * iucv_message_reply + * @path: address of iucv path structure + * @msg: address of iucv msg structure + * @flags: how the reply is sent (IUCV_IPRMDATA, IUCV_IPPRTY, IUCV_IPBUFLST) + * @reply: address of reply data buffer or address of struct iucv_array + * @size: length of reply data buffer + * + * This function responds to the two-way messages that you receive. You + * must identify completely the message to which you wish to reply. ie, + * pathid, msgid, and trgcls. Prmmsg signifies the data is moved into + * the parameter list. + * + * Returns the result from the CP IUCV call. + */ +int iucv_message_reply(struct iucv_path *path, struct iucv_message *msg, + u8 flags, void *reply, size_t size) +{ + union iucv_param *parm; + int rc; + + local_bh_disable(); + parm = percpu_ptr(iucv_param, smp_processor_id()); + memset(parm, 0, sizeof(union iucv_param)); + if (flags & IUCV_IPRMDATA) { + parm->dpl.ippathid = path->pathid; + parm->dpl.ipflags1 = flags; + parm->dpl.ipmsgid = msg->id; + parm->dpl.iptrgcls = msg->class; + memcpy(parm->dpl.iprmmsg, reply, min_t(size_t, size, 8)); + } else { + parm->db.ipbfadr1 = (u32)(addr_t) reply; + parm->db.ipbfln1f = (u32) size; + parm->db.ippathid = path->pathid; + parm->db.ipflags1 = flags; + parm->db.ipmsgid = msg->id; + parm->db.iptrgcls = msg->class; + } + rc = iucv_call_b2f0(IUCV_REPLY, parm); + local_bh_enable(); + return rc; +} + +/** + * iucv_message_send + * @path: address of iucv path structure + * @msg: address of iucv msg structure + * @flags: how the message is sent (IUCV_IPRMDATA, IUCV_IPPRTY, IUCV_IPBUFLST) + * @srccls: source class of message + * @buffer: address of send buffer or address of struct iucv_array + * @size: length of send buffer + * + * This function transmits data to another application. Data to be + * transmitted is in a buffer and this is a one-way message and the + * receiver will not reply to the message. + * + * Returns the result from the CP IUCV call. + */ +int iucv_message_send(struct iucv_path *path, struct iucv_message *msg, + u8 flags, u32 srccls, void *buffer, size_t size) +{ + union iucv_param *parm; + int rc; + + local_bh_disable(); + parm = percpu_ptr(iucv_param, smp_processor_id()); + memset(parm, 0, sizeof(union iucv_param)); + if (flags & IUCV_IPRMDATA) { + /* Message of 8 bytes can be placed into the parameter list. */ + parm->dpl.ippathid = path->pathid; + parm->dpl.ipflags1 = flags | IUCV_IPNORPY; + parm->dpl.iptrgcls = msg->class; + parm->dpl.ipsrccls = srccls; + parm->dpl.ipmsgtag = msg->tag; + memcpy(parm->dpl.iprmmsg, buffer, 8); + } else { + parm->db.ipbfadr1 = (u32)(addr_t) buffer; + parm->db.ipbfln1f = (u32) size; + parm->db.ippathid = path->pathid; + parm->db.ipflags1 = flags | IUCV_IPNORPY; + parm->db.iptrgcls = msg->class; + parm->db.ipsrccls = srccls; + parm->db.ipmsgtag = msg->tag; + } + rc = iucv_call_b2f0(IUCV_SEND, parm); + if (!rc) + msg->id = parm->db.ipmsgid; + local_bh_enable(); + return rc; +} + +/** + * iucv_message_send2way + * @path: address of iucv path structure + * @msg: address of iucv msg structure + * @flags: how the message is sent and the reply is received + * (IUCV_IPRMDATA, IUCV_IPBUFLST, IUCV_IPPRTY, IUCV_ANSLST) + * @srccls: source class of message + * @buffer: address of send buffer or address of struct iucv_array + * @size: length of send buffer + * @ansbuf: address of answer buffer or address of struct iucv_array + * @asize: size of reply buffer + * + * This function transmits data to another application. Data to be + * transmitted is in a buffer. The receiver of the send is expected to + * reply to the message and a buffer is provided into which IUCV moves + * the reply to this message. + * + * Returns the result from the CP IUCV call. + */ +int iucv_message_send2way(struct iucv_path *path, struct iucv_message *msg, + u8 flags, u32 srccls, void *buffer, size_t size, + void *answer, size_t asize, size_t *residual) +{ + union iucv_param *parm; + int rc; + + local_bh_disable(); + parm = percpu_ptr(iucv_param, smp_processor_id()); + memset(parm, 0, sizeof(union iucv_param)); + if (flags & IUCV_IPRMDATA) { + parm->dpl.ippathid = path->pathid; + parm->dpl.ipflags1 = path->flags; /* priority message */ + parm->dpl.iptrgcls = msg->class; + parm->dpl.ipsrccls = srccls; + parm->dpl.ipmsgtag = msg->tag; + parm->dpl.ipbfadr2 = (u32)(addr_t) answer; + parm->dpl.ipbfln2f = (u32) asize; + memcpy(parm->dpl.iprmmsg, buffer, 8); + } else { + parm->db.ippathid = path->pathid; + parm->db.ipflags1 = path->flags; /* priority message */ + parm->db.iptrgcls = msg->class; + parm->db.ipsrccls = srccls; + parm->db.ipmsgtag = msg->tag; + parm->db.ipbfadr1 = (u32)(addr_t) buffer; + parm->db.ipbfln1f = (u32) size; + parm->db.ipbfadr2 = (u32)(addr_t) answer; + parm->db.ipbfln2f = (u32) asize; + } + rc = iucv_call_b2f0(IUCV_SEND, parm); + if (!rc) + msg->id = parm->db.ipmsgid; + local_bh_enable(); + return rc; +} + +/** + * iucv_path_pending + * @data: Pointer to external interrupt buffer + * + * Process connection pending work item. Called from tasklet while holding + * iucv_table_lock. + */ +struct iucv_path_pending { + u16 ippathid; + u8 ipflags1; + u8 iptype; + u16 ipmsglim; + u16 res1; + u8 ipvmid[8]; + u8 ipuser[16]; + u32 res3; + u8 ippollfg; + u8 res4[3]; +} __attribute__ ((packed)); + +static void iucv_path_pending(struct iucv_irq_data *data) +{ + struct iucv_path_pending *ipp = (void *) data; + struct iucv_handler *handler; + struct iucv_path *path; + char *error; + + BUG_ON(iucv_path_table[ipp->ippathid]); + /* New pathid, handler found. Create a new path struct. */ + error = iucv_error_no_memory; + path = iucv_path_alloc(ipp->ipmsglim, ipp->ipflags1, GFP_ATOMIC); + if (!path) + goto out_sever; + path->pathid = ipp->ippathid; + iucv_path_table[path->pathid] = path; + EBCASC(ipp->ipvmid, 8); + + /* Call registered handler until one is found that wants the path. */ + list_for_each_entry(handler, &iucv_handler_list, list) { + if (!handler->path_pending) + continue; + /* + * Add path to handler to allow a call to iucv_path_sever + * inside the path_pending function. If the handler returns + * an error remove the path from the handler again. + */ + list_add(&path->list, &handler->paths); + path->handler = handler; + if (!handler->path_pending(path, ipp->ipvmid, ipp->ipuser)) + return; + list_del(&path->list); + path->handler = NULL; + } + /* No handler wanted the path. */ + iucv_path_table[path->pathid] = NULL; + iucv_path_free(path); + error = iucv_error_no_listener; +out_sever: + iucv_sever_pathid(ipp->ippathid, error); +} + +/** + * iucv_path_complete + * @data: Pointer to external interrupt buffer + * + * Process connection complete work item. Called from tasklet while holding + * iucv_table_lock. + */ +struct iucv_path_complete { + u16 ippathid; + u8 ipflags1; + u8 iptype; + u16 ipmsglim; + u16 res1; + u8 res2[8]; + u8 ipuser[16]; + u32 res3; + u8 ippollfg; + u8 res4[3]; +} __attribute__ ((packed)); + +static void iucv_path_complete(struct iucv_irq_data *data) +{ + struct iucv_path_complete *ipc = (void *) data; + struct iucv_path *path = iucv_path_table[ipc->ippathid]; + + BUG_ON(!path || !path->handler); + if (path->handler->path_complete) + path->handler->path_complete(path, ipc->ipuser); +} + +/** + * iucv_path_severed + * @data: Pointer to external interrupt buffer + * + * Process connection severed work item. Called from tasklet while holding + * iucv_table_lock. + */ +struct iucv_path_severed { + u16 ippathid; + u8 res1; + u8 iptype; + u32 res2; + u8 res3[8]; + u8 ipuser[16]; + u32 res4; + u8 ippollfg; + u8 res5[3]; +} __attribute__ ((packed)); + +static void iucv_path_severed(struct iucv_irq_data *data) +{ + struct iucv_path_severed *ips = (void *) data; + struct iucv_path *path = iucv_path_table[ips->ippathid]; + + BUG_ON(!path || !path->handler); + if (path->handler->path_severed) + path->handler->path_severed(path, ips->ipuser); + else { + iucv_sever_pathid(path->pathid, NULL); + iucv_path_table[path->pathid] = NULL; + list_del_init(&path->list); + iucv_cleanup_pathid(path->pathid); + iucv_path_free(path); + } +} + +/** + * iucv_path_quiesced + * @data: Pointer to external interrupt buffer + * + * Process connection quiesced work item. Called from tasklet while holding + * iucv_table_lock. + */ +struct iucv_path_quiesced { + u16 ippathid; + u8 res1; + u8 iptype; + u32 res2; + u8 res3[8]; + u8 ipuser[16]; + u32 res4; + u8 ippollfg; + u8 res5[3]; +} __attribute__ ((packed)); + +static void iucv_path_quiesced(struct iucv_irq_data *data) +{ + struct iucv_path_quiesced *ipq = (void *) data; + struct iucv_path *path = iucv_path_table[ipq->ippathid]; + + BUG_ON(!path || !path->handler); + if (path->handler->path_quiesced) + path->handler->path_quiesced(path, ipq->ipuser); +} + +/** + * iucv_path_resumed + * @data: Pointer to external interrupt buffer + * + * Process connection resumed work item. Called from tasklet while holding + * iucv_table_lock. + */ +struct iucv_path_resumed { + u16 ippathid; + u8 res1; + u8 iptype; + u32 res2; + u8 res3[8]; + u8 ipuser[16]; + u32 res4; + u8 ippollfg; + u8 res5[3]; +} __attribute__ ((packed)); + +static void iucv_path_resumed(struct iucv_irq_data *data) +{ + struct iucv_path_resumed *ipr = (void *) data; + struct iucv_path *path = iucv_path_table[ipr->ippathid]; + + BUG_ON(!path || !path->handler); + if (path->handler->path_resumed) + path->handler->path_resumed(path, ipr->ipuser); +} + +/** + * iucv_message_complete + * @data: Pointer to external interrupt buffer + * + * Process message complete work item. Called from tasklet while holding + * iucv_table_lock. + */ +struct iucv_message_complete { + u16 ippathid; + u8 ipflags1; + u8 iptype; + u32 ipmsgid; + u32 ipaudit; + u8 iprmmsg[8]; + u32 ipsrccls; + u32 ipmsgtag; + u32 res; + u32 ipbfln2f; + u8 ippollfg; + u8 res2[3]; +} __attribute__ ((packed)); + +static void iucv_message_complete(struct iucv_irq_data *data) +{ + struct iucv_message_complete *imc = (void *) data; + struct iucv_path *path = iucv_path_table[imc->ippathid]; + struct iucv_message msg; + + BUG_ON(!path || !path->handler); + if (path->handler->message_complete) { + msg.flags = imc->ipflags1; + msg.id = imc->ipmsgid; + msg.audit = imc->ipaudit; + memcpy(msg.rmmsg, imc->iprmmsg, 8); + msg.class = imc->ipsrccls; + msg.tag = imc->ipmsgtag; + msg.length = imc->ipbfln2f; + path->handler->message_complete(path, &msg); + } +} + +/** + * iucv_message_pending + * @data: Pointer to external interrupt buffer + * + * Process message pending work item. Called from tasklet while holding + * iucv_table_lock. + */ +struct iucv_message_pending { + u16 ippathid; + u8 ipflags1; + u8 iptype; + u32 ipmsgid; + u32 iptrgcls; + union { + u32 iprmmsg1_u32; + u8 iprmmsg1[4]; + } ln1msg1; + union { + u32 ipbfln1f; + u8 iprmmsg2[4]; + } ln1msg2; + u32 res1[3]; + u32 ipbfln2f; + u8 ippollfg; + u8 res2[3]; +} __attribute__ ((packed)); + +static void iucv_message_pending(struct iucv_irq_data *data) +{ + struct iucv_message_pending *imp = (void *) data; + struct iucv_path *path = iucv_path_table[imp->ippathid]; + struct iucv_message msg; + + BUG_ON(!path || !path->handler); + if (path->handler->message_pending) { + msg.flags = imp->ipflags1; + msg.id = imp->ipmsgid; + msg.class = imp->iptrgcls; + if (imp->ipflags1 & IUCV_IPRMDATA) { + memcpy(msg.rmmsg, imp->ln1msg1.iprmmsg1, 8); + msg.length = 8; + } else + msg.length = imp->ln1msg2.ipbfln1f; + msg.reply_size = imp->ipbfln2f; + path->handler->message_pending(path, &msg); + } +} + +/** + * iucv_tasklet_handler: + * + * This tasklet loops over the queue of irq buffers created by + * iucv_external_interrupt, calls the appropriate action handler + * and then frees the buffer. + */ +static void iucv_tasklet_handler(unsigned long ignored) +{ + typedef void iucv_irq_fn(struct iucv_irq_data *); + static iucv_irq_fn *irq_fn[] = { + [0x01] = iucv_path_pending, + [0x02] = iucv_path_complete, + [0x03] = iucv_path_severed, + [0x04] = iucv_path_quiesced, + [0x05] = iucv_path_resumed, + [0x06] = iucv_message_complete, + [0x07] = iucv_message_complete, + [0x08] = iucv_message_pending, + [0x09] = iucv_message_pending, + }; + struct iucv_work *p; + + /* Serialize tasklet, iucv_path_sever and iucv_path_connect. */ + spin_lock(&iucv_table_lock); + iucv_tasklet_cpu = smp_processor_id(); + + spin_lock_irq(&iucv_work_lock); + while (!list_empty(&iucv_work_queue)) { + p = list_entry(iucv_work_queue.next, struct iucv_work, list); + list_del_init(&p->list); + spin_unlock_irq(&iucv_work_lock); + irq_fn[p->data.iptype](&p->data); + kfree(p); + spin_lock_irq(&iucv_work_lock); + } + spin_unlock_irq(&iucv_work_lock); + + iucv_tasklet_cpu = -1; + spin_unlock(&iucv_table_lock); +} + +/** + * iucv_external_interrupt + * @code: irq code + * + * Handles external interrupts coming in from CP. + * Places the interrupt buffer on a queue and schedules iucv_tasklet_handler(). + */ +static void iucv_external_interrupt(u16 code) +{ + struct iucv_irq_data *p; + struct iucv_work *work; + + p = percpu_ptr(iucv_irq_data, smp_processor_id()); + if (p->ippathid >= iucv_max_pathid) { + printk(KERN_WARNING "iucv_do_int: Got interrupt with " + "pathid %d > max_connections (%ld)\n", + p->ippathid, iucv_max_pathid - 1); + iucv_sever_pathid(p->ippathid, iucv_error_no_listener); + return; + } + if (p->iptype < 0x01 || p->iptype > 0x09) { + printk(KERN_ERR "iucv_do_int: unknown iucv interrupt\n"); + return; + } + work = kmalloc(sizeof(struct iucv_work), GFP_ATOMIC); + if (!work) { + printk(KERN_WARNING "iucv_external_interrupt: out of memory\n"); + return; + } + memcpy(&work->data, p, sizeof(work->data)); + spin_lock(&iucv_work_lock); + list_add_tail(&work->list, &iucv_work_queue); + spin_unlock(&iucv_work_lock); + tasklet_schedule(&iucv_tasklet); +} + +/** + * iucv_init + * + * Allocates and initializes various data structures. + */ +static int iucv_init(void) +{ + int rc; + + if (!MACHINE_IS_VM) { + rc = -EPROTONOSUPPORT; + goto out; + } + rc = iucv_query_maxconn(); + if (rc) + goto out; + rc = register_external_interrupt (0x4000, iucv_external_interrupt); + if (rc) + goto out; + rc = bus_register(&iucv_bus); + if (rc) + goto out_int; + iucv_root = s390_root_dev_register("iucv"); + if (IS_ERR(iucv_root)) { + rc = PTR_ERR(iucv_root); + goto out_bus; + } + /* Note: GFP_DMA used used to get memory below 2G */ + iucv_irq_data = percpu_alloc(sizeof(struct iucv_irq_data), + GFP_KERNEL|GFP_DMA); + if (!iucv_irq_data) { + rc = -ENOMEM; + goto out_root; + } + /* Allocate parameter blocks. */ + iucv_param = percpu_alloc(sizeof(union iucv_param), + GFP_KERNEL|GFP_DMA); + if (!iucv_param) { + rc = -ENOMEM; + goto out_extint; + } + register_hotcpu_notifier(&iucv_cpu_notifier); + ASCEBC(iucv_error_no_listener, 16); + ASCEBC(iucv_error_no_memory, 16); + ASCEBC(iucv_error_pathid, 16); + iucv_available = 1; + return 0; + +out_extint: + percpu_free(iucv_irq_data); +out_root: + s390_root_dev_unregister(iucv_root); +out_bus: + bus_unregister(&iucv_bus); +out_int: + unregister_external_interrupt(0x4000, iucv_external_interrupt); +out: + return rc; +} + +/** + * iucv_exit + * + * Frees everything allocated from iucv_init. + */ +static void iucv_exit(void) +{ + struct iucv_work *p, *n; + + spin_lock_irq(&iucv_work_lock); + list_for_each_entry_safe(p, n, &iucv_work_queue, list) + kfree(p); + spin_unlock_irq(&iucv_work_lock); + unregister_hotcpu_notifier(&iucv_cpu_notifier); + percpu_free(iucv_param); + percpu_free(iucv_irq_data); + s390_root_dev_unregister(iucv_root); + bus_unregister(&iucv_bus); + unregister_external_interrupt(0x4000, iucv_external_interrupt); +} + +subsys_initcall(iucv_init); +module_exit(iucv_exit); + +/** + * Export all public stuff + */ +EXPORT_SYMBOL (iucv_bus); +EXPORT_SYMBOL (iucv_root); +EXPORT_SYMBOL (iucv_register); +EXPORT_SYMBOL (iucv_unregister); +EXPORT_SYMBOL (iucv_path_accept); +EXPORT_SYMBOL (iucv_path_connect); +EXPORT_SYMBOL (iucv_path_quiesce); +EXPORT_SYMBOL (iucv_path_sever); +EXPORT_SYMBOL (iucv_message_purge); +EXPORT_SYMBOL (iucv_message_receive); +EXPORT_SYMBOL (iucv_message_reject); +EXPORT_SYMBOL (iucv_message_reply); +EXPORT_SYMBOL (iucv_message_send); +EXPORT_SYMBOL (iucv_message_send2way); + +MODULE_AUTHOR("(C) 2001 IBM Corp. by Fritz Elfert (felfert@millenux.com)"); +MODULE_DESCRIPTION("Linux for S/390 IUCV lowlevel driver"); +MODULE_LICENSE("GPL"); diff --git a/net/key/af_key.c b/net/key/af_key.c index 5dd5094..b4e4440 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2345,6 +2345,196 @@ out: return err; } +#ifdef CONFIG_NET_KEY_MIGRATE +static int pfkey_sockaddr_pair_size(sa_family_t family) +{ + switch (family) { + case AF_INET: + return PFKEY_ALIGN8(sizeof(struct sockaddr_in) * 2); +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + return PFKEY_ALIGN8(sizeof(struct sockaddr_in6) * 2); +#endif + default: + return 0; + } + /* NOTREACHED */ +} + +static int parse_sockaddr_pair(struct sadb_x_ipsecrequest *rq, + xfrm_address_t *saddr, xfrm_address_t *daddr, + u16 *family) +{ + struct sockaddr *sa = (struct sockaddr *)(rq + 1); + if (rq->sadb_x_ipsecrequest_len < + pfkey_sockaddr_pair_size(sa->sa_family)) + return -EINVAL; + + switch (sa->sa_family) { + case AF_INET: + { + struct sockaddr_in *sin; + sin = (struct sockaddr_in *)sa; + if ((sin+1)->sin_family != AF_INET) + return -EINVAL; + memcpy(&saddr->a4, &sin->sin_addr, sizeof(saddr->a4)); + sin++; + memcpy(&daddr->a4, &sin->sin_addr, sizeof(daddr->a4)); + *family = AF_INET; + break; + } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + { + struct sockaddr_in6 *sin6; + sin6 = (struct sockaddr_in6 *)sa; + if ((sin6+1)->sin6_family != AF_INET6) + return -EINVAL; + memcpy(&saddr->a6, &sin6->sin6_addr, + sizeof(saddr->a6)); + sin6++; + memcpy(&daddr->a6, &sin6->sin6_addr, + sizeof(daddr->a6)); + *family = AF_INET6; + break; + } +#endif + default: + return -EINVAL; + } + + return 0; +} + +static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len, + struct xfrm_migrate *m) +{ + int err; + struct sadb_x_ipsecrequest *rq2; + + if (len <= sizeof(struct sadb_x_ipsecrequest) || + len < rq1->sadb_x_ipsecrequest_len) + return -EINVAL; + + /* old endoints */ + err = parse_sockaddr_pair(rq1, &m->old_saddr, &m->old_daddr, + &m->old_family); + if (err) + return err; + + rq2 = (struct sadb_x_ipsecrequest *)((u8 *)rq1 + rq1->sadb_x_ipsecrequest_len); + len -= rq1->sadb_x_ipsecrequest_len; + + if (len <= sizeof(struct sadb_x_ipsecrequest) || + len < rq2->sadb_x_ipsecrequest_len) + return -EINVAL; + + /* new endpoints */ + err = parse_sockaddr_pair(rq2, &m->new_saddr, &m->new_daddr, + &m->new_family); + if (err) + return err; + + if (rq1->sadb_x_ipsecrequest_proto != rq2->sadb_x_ipsecrequest_proto || + rq1->sadb_x_ipsecrequest_mode != rq2->sadb_x_ipsecrequest_mode || + rq1->sadb_x_ipsecrequest_reqid != rq2->sadb_x_ipsecrequest_reqid) + return -EINVAL; + + m->proto = rq1->sadb_x_ipsecrequest_proto; + m->mode = rq1->sadb_x_ipsecrequest_mode - 1; + m->reqid = rq1->sadb_x_ipsecrequest_reqid; + + return ((int)(rq1->sadb_x_ipsecrequest_len + + rq2->sadb_x_ipsecrequest_len)); +} + +static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, + struct sadb_msg *hdr, void **ext_hdrs) +{ + int i, len, ret, err = -EINVAL; + u8 dir; + struct sadb_address *sa; + struct sadb_x_policy *pol; + struct sadb_x_ipsecrequest *rq; + struct xfrm_selector sel; + struct xfrm_migrate m[XFRM_MAX_DEPTH]; + + if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC - 1], + ext_hdrs[SADB_EXT_ADDRESS_DST - 1]) || + !ext_hdrs[SADB_X_EXT_POLICY - 1]) { + err = -EINVAL; + goto out; + } + + pol = ext_hdrs[SADB_X_EXT_POLICY - 1]; + if (!pol) { + err = -EINVAL; + goto out; + } + + if (pol->sadb_x_policy_dir >= IPSEC_DIR_MAX) { + err = -EINVAL; + goto out; + } + + dir = pol->sadb_x_policy_dir - 1; + memset(&sel, 0, sizeof(sel)); + + /* set source address info of selector */ + sa = ext_hdrs[SADB_EXT_ADDRESS_SRC - 1]; + sel.family = pfkey_sadb_addr2xfrm_addr(sa, &sel.saddr); + sel.prefixlen_s = sa->sadb_address_prefixlen; + sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); + sel.sport = ((struct sockaddr_in *)(sa + 1))->sin_port; + if (sel.sport) + sel.sport_mask = ~0; + + /* set destination address info of selector */ + sa = ext_hdrs[SADB_EXT_ADDRESS_DST - 1], + pfkey_sadb_addr2xfrm_addr(sa, &sel.daddr); + sel.prefixlen_d = sa->sadb_address_prefixlen; + sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); + sel.dport = ((struct sockaddr_in *)(sa + 1))->sin_port; + if (sel.dport) + sel.dport_mask = ~0; + + rq = (struct sadb_x_ipsecrequest *)(pol + 1); + + /* extract ipsecrequests */ + i = 0; + len = pol->sadb_x_policy_len * 8 - sizeof(struct sadb_x_policy); + + while (len > 0 && i < XFRM_MAX_DEPTH) { + ret = ipsecrequests_to_migrate(rq, len, &m[i]); + if (ret < 0) { + err = ret; + goto out; + } else { + rq = (struct sadb_x_ipsecrequest *)((u8 *)rq + ret); + len -= ret; + i++; + } + } + + if (!i || len > 0) { + err = -EINVAL; + goto out; + } + + return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i); + + out: + return err; +} +#else +static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, + struct sadb_msg *hdr, void **ext_hdrs) +{ + return -ENOPROTOOPT; +} +#endif + + static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) { unsigned int dir; @@ -2473,6 +2663,7 @@ static pfkey_handler pfkey_funcs[SADB_MAX + 1] = { [SADB_X_SPDFLUSH] = pfkey_spdflush, [SADB_X_SPDSETIDX] = pfkey_spdadd, [SADB_X_SPDDELETE2] = pfkey_spdget, + [SADB_X_MIGRATE] = pfkey_migrate, }; static int pfkey_process(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr) @@ -3118,6 +3309,236 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL); } +#ifdef CONFIG_NET_KEY_MIGRATE +static int set_sadb_address(struct sk_buff *skb, int sasize, int type, + struct xfrm_selector *sel) +{ + struct sadb_address *addr; + struct sockaddr_in *sin; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct sockaddr_in6 *sin6; +#endif + addr = (struct sadb_address *)skb_put(skb, sizeof(struct sadb_address) + sasize); + addr->sadb_address_len = (sizeof(struct sadb_address) + sasize)/8; + addr->sadb_address_exttype = type; + addr->sadb_address_proto = sel->proto; + addr->sadb_address_reserved = 0; + + switch (type) { + case SADB_EXT_ADDRESS_SRC: + if (sel->family == AF_INET) { + addr->sadb_address_prefixlen = sel->prefixlen_s; + sin = (struct sockaddr_in *)(addr + 1); + sin->sin_family = AF_INET; + memcpy(&sin->sin_addr.s_addr, &sel->saddr, + sizeof(sin->sin_addr.s_addr)); + sin->sin_port = 0; + memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + else if (sel->family == AF_INET6) { + addr->sadb_address_prefixlen = sel->prefixlen_s; + sin6 = (struct sockaddr_in6 *)(addr + 1); + sin6->sin6_family = AF_INET6; + sin6->sin6_port = 0; + sin6->sin6_flowinfo = 0; + sin6->sin6_scope_id = 0; + memcpy(&sin6->sin6_addr.s6_addr, &sel->saddr, + sizeof(sin6->sin6_addr.s6_addr)); + } +#endif + break; + case SADB_EXT_ADDRESS_DST: + if (sel->family == AF_INET) { + addr->sadb_address_prefixlen = sel->prefixlen_d; + sin = (struct sockaddr_in *)(addr + 1); + sin->sin_family = AF_INET; + memcpy(&sin->sin_addr.s_addr, &sel->daddr, + sizeof(sin->sin_addr.s_addr)); + sin->sin_port = 0; + memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + else if (sel->family == AF_INET6) { + addr->sadb_address_prefixlen = sel->prefixlen_d; + sin6 = (struct sockaddr_in6 *)(addr + 1); + sin6->sin6_family = AF_INET6; + sin6->sin6_port = 0; + sin6->sin6_flowinfo = 0; + sin6->sin6_scope_id = 0; + memcpy(&sin6->sin6_addr.s6_addr, &sel->daddr, + sizeof(sin6->sin6_addr.s6_addr)); + } +#endif + break; + default: + return -EINVAL; + } + + return 0; +} + +static int set_ipsecrequest(struct sk_buff *skb, + uint8_t proto, uint8_t mode, int level, + uint32_t reqid, uint8_t family, + xfrm_address_t *src, xfrm_address_t *dst) +{ + struct sadb_x_ipsecrequest *rq; + struct sockaddr_in *sin; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct sockaddr_in6 *sin6; +#endif + int size_req; + + size_req = sizeof(struct sadb_x_ipsecrequest) + + pfkey_sockaddr_pair_size(family); + + rq = (struct sadb_x_ipsecrequest *)skb_put(skb, size_req); + memset(rq, 0, size_req); + rq->sadb_x_ipsecrequest_len = size_req; + rq->sadb_x_ipsecrequest_proto = proto; + rq->sadb_x_ipsecrequest_mode = mode; + rq->sadb_x_ipsecrequest_level = level; + rq->sadb_x_ipsecrequest_reqid = reqid; + + switch (family) { + case AF_INET: + sin = (struct sockaddr_in *)(rq + 1); + sin->sin_family = AF_INET; + memcpy(&sin->sin_addr.s_addr, src, + sizeof(sin->sin_addr.s_addr)); + sin++; + sin->sin_family = AF_INET; + memcpy(&sin->sin_addr.s_addr, dst, + sizeof(sin->sin_addr.s_addr)); + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + sin6 = (struct sockaddr_in6 *)(rq + 1); + sin6->sin6_family = AF_INET6; + sin6->sin6_port = 0; + sin6->sin6_flowinfo = 0; + sin6->sin6_scope_id = 0; + memcpy(&sin6->sin6_addr.s6_addr, src, + sizeof(sin6->sin6_addr.s6_addr)); + sin6++; + sin6->sin6_family = AF_INET6; + sin6->sin6_port = 0; + sin6->sin6_flowinfo = 0; + sin6->sin6_scope_id = 0; + memcpy(&sin6->sin6_addr.s6_addr, dst, + sizeof(sin6->sin6_addr.s6_addr)); + break; +#endif + default: + return -EINVAL; + } + + return 0; +} +#endif + +#ifdef CONFIG_NET_KEY_MIGRATE +static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, + struct xfrm_migrate *m, int num_bundles) +{ + int i; + int sasize_sel; + int size = 0; + int size_pol = 0; + struct sk_buff *skb; + struct sadb_msg *hdr; + struct sadb_x_policy *pol; + struct xfrm_migrate *mp; + + if (type != XFRM_POLICY_TYPE_MAIN) + return 0; + + if (num_bundles <= 0 || num_bundles > XFRM_MAX_DEPTH) + return -EINVAL; + + /* selector */ + sasize_sel = pfkey_sockaddr_size(sel->family); + if (!sasize_sel) + return -EINVAL; + size += (sizeof(struct sadb_address) + sasize_sel) * 2; + + /* policy info */ + size_pol += sizeof(struct sadb_x_policy); + + /* ipsecrequests */ + for (i = 0, mp = m; i < num_bundles; i++, mp++) { + /* old locator pair */ + size_pol += sizeof(struct sadb_x_ipsecrequest) + + pfkey_sockaddr_pair_size(mp->old_family); + /* new locator pair */ + size_pol += sizeof(struct sadb_x_ipsecrequest) + + pfkey_sockaddr_pair_size(mp->new_family); + } + + size += sizeof(struct sadb_msg) + size_pol; + + /* alloc buffer */ + skb = alloc_skb(size, GFP_ATOMIC); + if (skb == NULL) + return -ENOMEM; + + hdr = (struct sadb_msg *)skb_put(skb, sizeof(struct sadb_msg)); + hdr->sadb_msg_version = PF_KEY_V2; + hdr->sadb_msg_type = SADB_X_MIGRATE; + hdr->sadb_msg_satype = pfkey_proto2satype(m->proto); + hdr->sadb_msg_len = size / 8; + hdr->sadb_msg_errno = 0; + hdr->sadb_msg_reserved = 0; + hdr->sadb_msg_seq = 0; + hdr->sadb_msg_pid = 0; + + /* selector src */ + set_sadb_address(skb, sasize_sel, SADB_EXT_ADDRESS_SRC, sel); + + /* selector dst */ + set_sadb_address(skb, sasize_sel, SADB_EXT_ADDRESS_DST, sel); + + /* policy information */ + pol = (struct sadb_x_policy *)skb_put(skb, sizeof(struct sadb_x_policy)); + pol->sadb_x_policy_len = size_pol / 8; + pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; + pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; + pol->sadb_x_policy_dir = dir + 1; + pol->sadb_x_policy_id = 0; + pol->sadb_x_policy_priority = 0; + + for (i = 0, mp = m; i < num_bundles; i++, mp++) { + /* old ipsecrequest */ + if (set_ipsecrequest(skb, mp->proto, mp->mode + 1, + (mp->reqid ? IPSEC_LEVEL_UNIQUE : IPSEC_LEVEL_REQUIRE), + mp->reqid, mp->old_family, + &mp->old_saddr, &mp->old_daddr) < 0) { + return -EINVAL; + } + + /* new ipsecrequest */ + if (set_ipsecrequest(skb, mp->proto, mp->mode + 1, + (mp->reqid ? IPSEC_LEVEL_UNIQUE : IPSEC_LEVEL_REQUIRE), + mp->reqid, mp->new_family, + &mp->new_saddr, &mp->new_daddr) < 0) { + return -EINVAL; + } + } + + /* broadcast migrate message to sockets */ + pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL); + + return 0; +} +#else +static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, + struct xfrm_migrate *m, int num_bundles) +{ + return -ENOPROTOOPT; +} +#endif + static int pfkey_sendmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, size_t len) { @@ -3287,6 +3708,7 @@ static struct xfrm_mgr pfkeyv2_mgr = .compile_policy = pfkey_compile_policy, .new_mapping = pfkey_send_new_mapping, .notify_policy = pfkey_send_policy_notify, + .migrate = pfkey_send_migrate, }; static void __exit ipsec_pfkey_exit(void) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 80107d4..748f7f0 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -235,6 +235,19 @@ config NF_CONNTRACK_PPTP To compile it as a module, choose M here. If unsure, say N. +config NF_CONNTRACK_SANE + tristate "SANE protocol support (EXPERIMENTAL)" + depends on EXPERIMENTAL && NF_CONNTRACK + help + SANE is a protocol for remote access to scanners as implemented + by the 'saned' daemon. Like FTP, it uses separate control and + data connections. + + With this module you can support SANE on a connection tracking + firewall. + + To compile it as a module, choose M here. If unsure, say N. + config NF_CONNTRACK_SIP tristate "SIP protocol support (EXPERIMENTAL)" depends on EXPERIMENTAL && NF_CONNTRACK @@ -382,6 +395,32 @@ config NETFILTER_XT_TARGET_CONNSECMARK To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_TARGET_TCPMSS + tristate '"TCPMSS" target support' + depends on NETFILTER_XTABLES && (IPV6 || IPV6=n) + ---help--- + This option adds a `TCPMSS' target, which allows you to alter the + MSS value of TCP SYN packets, to control the maximum size for that + connection (usually limiting it to your outgoing interface's MTU + minus 40). + + This is used to overcome criminally braindead ISPs or servers which + block ICMP Fragmentation Needed packets. The symptoms of this + problem are that everything works fine from your Linux + firewall/router, but machines behind it can never exchange large + packets: + 1) Web browsers connect, then hang with no data received. + 2) Small mail works fine, but large emails hang. + 3) ssh works fine, but scp hangs after initial handshaking. + + Workaround: activate this option and add a rule to your firewall + configuration like: + + iptables -A FORWARD -p tcp --tcp-flags SYN,RST SYN \ + -j TCPMSS --clamp-mss-to-pmtu + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_COMMENT tristate '"comment" match support' depends on NETFILTER_XTABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 5dc5574..b2b5c75 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_NF_CONNTRACK_H323) += nf_conntrack_h323.o obj-$(CONFIG_NF_CONNTRACK_IRC) += nf_conntrack_irc.o obj-$(CONFIG_NF_CONNTRACK_NETBIOS_NS) += nf_conntrack_netbios_ns.o obj-$(CONFIG_NF_CONNTRACK_PPTP) += nf_conntrack_pptp.o +obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o @@ -44,6 +45,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o +obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o # matches diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 626b001..6fccdcf 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -60,12 +60,9 @@ static DEFINE_RWLOCK(tcp_lock); If it's non-zero, we mark only out of window RST segments as INVALID. */ int nf_ct_tcp_be_liberal __read_mostly = 0; -/* When connection is picked up from the middle, how many packets are required - to pass in each direction when we assume we are in sync - if any side uses - window scaling, we lost the game. - If it is set to zero, we disable picking up already established +/* If it is set to zero, we disable picking up already established connections. */ -int nf_ct_tcp_loose __read_mostly = 3; +int nf_ct_tcp_loose __read_mostly = 1; /* Max number of the retransmitted packets without receiving an (acceptable) ACK from the destination. If this number is reached, a shorter timer @@ -650,11 +647,10 @@ static int tcp_in_window(struct ip_ct_tcp *state, before(sack, receiver->td_end + 1), after(ack, receiver->td_end - MAXACKWINDOW(sender))); - if (sender->loose || receiver->loose || - (before(seq, sender->td_maxend + 1) && - after(end, sender->td_end - receiver->td_maxwin - 1) && - before(sack, receiver->td_end + 1) && - after(ack, receiver->td_end - MAXACKWINDOW(sender)))) { + if (before(seq, sender->td_maxend + 1) && + after(end, sender->td_end - receiver->td_maxwin - 1) && + before(sack, receiver->td_end + 1) && + after(ack, receiver->td_end - MAXACKWINDOW(sender))) { /* * Take into account window scaling (RFC 1323). */ @@ -699,15 +695,13 @@ static int tcp_in_window(struct ip_ct_tcp *state, state->retrans = 0; } } - /* - * Close the window of disabled window tracking :-) - */ - if (sender->loose) - sender->loose--; - res = 1; } else { - if (LOG_INVALID(IPPROTO_TCP)) + res = 0; + if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || + nf_ct_tcp_be_liberal) + res = 1; + if (!res && LOG_INVALID(IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: %s ", before(seq, sender->td_maxend + 1) ? @@ -718,8 +712,6 @@ static int tcp_in_window(struct ip_ct_tcp *state, : "ACK is over the upper bound (ACKed data not seen yet)" : "SEQ is under the lower bound (already ACKed data retransmitted)" : "SEQ is over the upper bound (over the window of the receiver)"); - - res = nf_ct_tcp_be_liberal; } DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u " @@ -1063,8 +1055,6 @@ static int tcp_new(struct nf_conn *conntrack, tcp_options(skb, dataoff, th, &conntrack->proto.tcp.seen[0]); conntrack->proto.tcp.seen[1].flags = 0; - conntrack->proto.tcp.seen[0].loose = - conntrack->proto.tcp.seen[1].loose = 0; } else if (nf_ct_tcp_loose == 0) { /* Don't try to pick up connections. */ return 0; @@ -1085,11 +1075,11 @@ static int tcp_new(struct nf_conn *conntrack, conntrack->proto.tcp.seen[0].td_maxwin; conntrack->proto.tcp.seen[0].td_scale = 0; - /* We assume SACK. Should we assume window scaling too? */ + /* We assume SACK and liberal window checking to handle + * window scaling */ conntrack->proto.tcp.seen[0].flags = - conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM; - conntrack->proto.tcp.seen[0].loose = - conntrack->proto.tcp.seen[1].loose = nf_ct_tcp_loose; + conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM | + IP_CT_TCP_FLAG_BE_LIBERAL; } conntrack->proto.tcp.seen[1].td_end = 0; diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c new file mode 100644 index 0000000..eb2d1dc --- /dev/null +++ b/net/netfilter/nf_conntrack_sane.c @@ -0,0 +1,242 @@ +/* SANE connection tracking helper + * (SANE = Scanner Access Now Easy) + * For documentation about the SANE network protocol see + * http://www.sane-project.org/html/doc015.html + */ + +/* Copyright (C) 2007 Red Hat, Inc. + * Author: Michal Schmidt <mschmidt@redhat.com> + * Based on the FTP conntrack helper (net/netfilter/nf_conntrack_ftp.c): + * (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> + * (C) 2003 Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/netfilter.h> +#include <linux/in.h> +#include <linux/tcp.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_expect.h> +#include <linux/netfilter/nf_conntrack_sane.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Michal Schmidt <mschmidt@redhat.com>"); +MODULE_DESCRIPTION("SANE connection tracking helper"); + +static char *sane_buffer; + +static DEFINE_SPINLOCK(nf_sane_lock); + +#define MAX_PORTS 8 +static u_int16_t ports[MAX_PORTS]; +static unsigned int ports_c; +module_param_array(ports, ushort, &ports_c, 0400); + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +struct sane_request { + __be32 RPC_code; +#define SANE_NET_START 7 /* RPC code */ + + __be32 handle; +}; + +struct sane_reply_net_start { + __be32 status; +#define SANE_STATUS_SUCCESS 0 + + __be16 zero; + __be16 port; + /* other fields aren't interesting for conntrack */ +}; + +static int help(struct sk_buff **pskb, + unsigned int protoff, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ + unsigned int dataoff, datalen; + struct tcphdr _tcph, *th; + char *sb_ptr; + int ret = NF_ACCEPT; + int dir = CTINFO2DIR(ctinfo); + struct nf_ct_sane_master *ct_sane_info; + struct nf_conntrack_expect *exp; + struct nf_conntrack_tuple *tuple; + struct sane_request *req; + struct sane_reply_net_start *reply; + int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; + + ct_sane_info = &nfct_help(ct)->help.ct_sane_info; + /* Until there's been traffic both ways, don't look in packets. */ + if (ctinfo != IP_CT_ESTABLISHED && + ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) + return NF_ACCEPT; + + /* Not a full tcp header? */ + th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph); + if (th == NULL) + return NF_ACCEPT; + + /* No data? */ + dataoff = protoff + th->doff * 4; + if (dataoff >= (*pskb)->len) + return NF_ACCEPT; + + datalen = (*pskb)->len - dataoff; + + spin_lock_bh(&nf_sane_lock); + sb_ptr = skb_header_pointer(*pskb, dataoff, datalen, sane_buffer); + BUG_ON(sb_ptr == NULL); + + if (dir == IP_CT_DIR_ORIGINAL) { + if (datalen != sizeof(struct sane_request)) + goto out; + + req = (struct sane_request *)sb_ptr; + if (req->RPC_code != htonl(SANE_NET_START)) { + /* Not an interesting command */ + ct_sane_info->state = SANE_STATE_NORMAL; + goto out; + } + + /* We're interested in the next reply */ + ct_sane_info->state = SANE_STATE_START_REQUESTED; + goto out; + } + + /* Is it a reply to an uninteresting command? */ + if (ct_sane_info->state != SANE_STATE_START_REQUESTED) + goto out; + + /* It's a reply to SANE_NET_START. */ + ct_sane_info->state = SANE_STATE_NORMAL; + + if (datalen < sizeof(struct sane_reply_net_start)) { + DEBUGP("nf_ct_sane: NET_START reply too short\n"); + goto out; + } + + reply = (struct sane_reply_net_start *)sb_ptr; + if (reply->status != htonl(SANE_STATUS_SUCCESS)) { + /* saned refused the command */ + DEBUGP("nf_ct_sane: unsuccessful SANE_STATUS = %u\n", + ntohl(reply->status)); + goto out; + } + + /* Invalid saned reply? Ignore it. */ + if (reply->zero != 0) + goto out; + + exp = nf_conntrack_expect_alloc(ct); + if (exp == NULL) { + ret = NF_DROP; + goto out; + } + + tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + nf_conntrack_expect_init(exp, family, + &tuple->src.u3, &tuple->dst.u3, + IPPROTO_TCP, + NULL, &reply->port); + + DEBUGP("nf_ct_sane: expect: "); + NF_CT_DUMP_TUPLE(&exp->tuple); + NF_CT_DUMP_TUPLE(&exp->mask); + + /* Can't expect this? Best to drop packet now. */ + if (nf_conntrack_expect_related(exp) != 0) + ret = NF_DROP; + + nf_conntrack_expect_put(exp); + +out: + spin_unlock_bh(&nf_sane_lock); + return ret; +} + +static struct nf_conntrack_helper sane[MAX_PORTS][2]; +static char sane_names[MAX_PORTS][2][sizeof("sane-65535")]; + +/* don't make this __exit, since it's called from __init ! */ +static void nf_conntrack_sane_fini(void) +{ + int i, j; + + for (i = 0; i < ports_c; i++) { + for (j = 0; j < 2; j++) { + DEBUGP("nf_ct_sane: unregistering helper for pf: %d " + "port: %d\n", + sane[i][j].tuple.src.l3num, ports[i]); + nf_conntrack_helper_unregister(&sane[i][j]); + } + } + + kfree(sane_buffer); +} + +static int __init nf_conntrack_sane_init(void) +{ + int i, j = -1, ret = 0; + char *tmpname; + + sane_buffer = kmalloc(65536, GFP_KERNEL); + if (!sane_buffer) + return -ENOMEM; + + if (ports_c == 0) + ports[ports_c++] = SANE_PORT; + + /* FIXME should be configurable whether IPv4 and IPv6 connections + are tracked or not - YK */ + for (i = 0; i < ports_c; i++) { + sane[i][0].tuple.src.l3num = PF_INET; + sane[i][1].tuple.src.l3num = PF_INET6; + for (j = 0; j < 2; j++) { + sane[i][j].tuple.src.u.tcp.port = htons(ports[i]); + sane[i][j].tuple.dst.protonum = IPPROTO_TCP; + sane[i][j].mask.src.u.tcp.port = 0xFFFF; + sane[i][j].mask.dst.protonum = 0xFF; + sane[i][j].max_expected = 1; + sane[i][j].timeout = 5 * 60; /* 5 Minutes */ + sane[i][j].me = THIS_MODULE; + sane[i][j].help = help; + tmpname = &sane_names[i][j][0]; + if (ports[i] == SANE_PORT) + sprintf(tmpname, "sane"); + else + sprintf(tmpname, "sane-%d", ports[i]); + sane[i][j].name = tmpname; + + DEBUGP("nf_ct_sane: registering helper for pf: %d " + "port: %d\n", + sane[i][j].tuple.src.l3num, ports[i]); + ret = nf_conntrack_helper_register(&sane[i][j]); + if (ret) { + printk(KERN_ERR "nf_ct_sane: failed to " + "register helper for pf: %d port: %d\n", + sane[i][j].tuple.src.l3num, ports[i]); + nf_conntrack_sane_fini(); + return ret; + } + } + } + + return 0; +} + +module_init(nf_conntrack_sane_init); +module_exit(nf_conntrack_sane_fini); diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c index 50de965..195e929 100644 --- a/net/netfilter/xt_CLASSIFY.c +++ b/net/netfilter/xt_CLASSIFY.c @@ -33,9 +33,7 @@ target(struct sk_buff **pskb, { const struct xt_classify_target_info *clinfo = targinfo; - if ((*pskb)->priority != clinfo->priority) - (*pskb)->priority = clinfo->priority; - + (*pskb)->priority = clinfo->priority; return XT_CONTINUE; } diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c index 0534bfa..795c058 100644 --- a/net/netfilter/xt_CONNMARK.c +++ b/net/netfilter/xt_CONNMARK.c @@ -61,7 +61,7 @@ target(struct sk_buff **pskb, #else nf_conntrack_event_cache(IPCT_MARK, *pskb); #endif - } + } break; case XT_CONNMARK_SAVE: newmark = (*ctmark & ~markinfo->mask) | @@ -78,8 +78,7 @@ target(struct sk_buff **pskb, case XT_CONNMARK_RESTORE: mark = (*pskb)->mark; diff = (*ctmark ^ mark) & markinfo->mask; - if (diff != 0) - (*pskb)->mark = mark ^ diff; + (*pskb)->mark = mark ^ diff; break; } } diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index a3fe3c3..1ab0db6 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -41,8 +41,7 @@ static void secmark_save(struct sk_buff *skb) connsecmark = nf_ct_get_secmark(skb, &ctinfo); if (connsecmark && !*connsecmark) - if (*connsecmark != skb->secmark) - *connsecmark = skb->secmark; + *connsecmark = skb->secmark; } } @@ -58,8 +57,7 @@ static void secmark_restore(struct sk_buff *skb) connsecmark = nf_ct_get_secmark(skb, &ctinfo); if (connsecmark && *connsecmark) - if (skb->secmark != *connsecmark) - skb->secmark = *connsecmark; + skb->secmark = *connsecmark; } } diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c index 0b48547..cfc45af 100644 --- a/net/netfilter/xt_MARK.c +++ b/net/netfilter/xt_MARK.c @@ -31,9 +31,7 @@ target_v0(struct sk_buff **pskb, { const struct xt_mark_target_info *markinfo = targinfo; - if((*pskb)->mark != markinfo->mark) - (*pskb)->mark = markinfo->mark; - + (*pskb)->mark = markinfo->mark; return XT_CONTINUE; } @@ -62,9 +60,7 @@ target_v1(struct sk_buff **pskb, break; } - if((*pskb)->mark != mark) - (*pskb)->mark = mark; - + (*pskb)->mark = mark; return XT_CONTINUE; } diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index add7521..f1131c3 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -47,9 +47,7 @@ static unsigned int target(struct sk_buff **pskb, const struct net_device *in, BUG(); } - if ((*pskb)->secmark != secmark) - (*pskb)->secmark = secmark; - + (*pskb)->secmark = secmark; return XT_CONTINUE; } diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c new file mode 100644 index 0000000..db7e38c --- /dev/null +++ b/net/netfilter/xt_TCPMSS.c @@ -0,0 +1,296 @@ +/* + * This is a module which is used for setting the MSS option in TCP packets. + * + * Copyright (C) 2000 Marc Boucher <marc@mbsi.ca> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> +#include <net/ipv6.h> +#include <net/tcp.h> + +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv6/ip6_tables.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_tcpudp.h> +#include <linux/netfilter/xt_TCPMSS.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); +MODULE_DESCRIPTION("x_tables TCP MSS modification module"); +MODULE_ALIAS("ipt_TCPMSS"); +MODULE_ALIAS("ip6t_TCPMSS"); + +static inline unsigned int +optlen(const u_int8_t *opt, unsigned int offset) +{ + /* Beware zero-length options: make finite progress */ + if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0) + return 1; + else + return opt[offset+1]; +} + +static int +tcpmss_mangle_packet(struct sk_buff **pskb, + const struct xt_tcpmss_info *info, + unsigned int tcphoff, + unsigned int minlen) +{ + struct tcphdr *tcph; + unsigned int tcplen, i; + __be16 oldval; + u16 newmss; + u8 *opt; + + if (!skb_make_writable(pskb, (*pskb)->len)) + return -1; + + tcplen = (*pskb)->len - tcphoff; + tcph = (struct tcphdr *)((*pskb)->nh.raw + tcphoff); + + /* Since it passed flags test in tcp match, we know it is is + not a fragment, and has data >= tcp header length. SYN + packets should not contain data: if they did, then we risk + running over MTU, sending Frag Needed and breaking things + badly. --RR */ + if (tcplen != tcph->doff*4) { + if (net_ratelimit()) + printk(KERN_ERR "xt_TCPMSS: bad length (%u bytes)\n", + (*pskb)->len); + return -1; + } + + if (info->mss == XT_TCPMSS_CLAMP_PMTU) { + if (dst_mtu((*pskb)->dst) <= minlen) { + if (net_ratelimit()) + printk(KERN_ERR "xt_TCPMSS: " + "unknown or invalid path-MTU (%u)\n", + dst_mtu((*pskb)->dst)); + return -1; + } + newmss = dst_mtu((*pskb)->dst) - minlen; + } else + newmss = info->mss; + + opt = (u_int8_t *)tcph; + for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)) { + if (opt[i] == TCPOPT_MSS && tcph->doff*4 - i >= TCPOLEN_MSS && + opt[i+1] == TCPOLEN_MSS) { + u_int16_t oldmss; + + oldmss = (opt[i+2] << 8) | opt[i+3]; + + if (info->mss == XT_TCPMSS_CLAMP_PMTU && + oldmss <= newmss) + return 0; + + opt[i+2] = (newmss & 0xff00) >> 8; + opt[i+3] = (newmss & 0x00ff); + + nf_proto_csum_replace2(&tcph->check, *pskb, + htons(oldmss), htons(newmss), 0); + return 0; + } + } + + /* + * MSS Option not found ?! add it.. + */ + if (skb_tailroom((*pskb)) < TCPOLEN_MSS) { + struct sk_buff *newskb; + + newskb = skb_copy_expand(*pskb, skb_headroom(*pskb), + TCPOLEN_MSS, GFP_ATOMIC); + if (!newskb) + return -1; + kfree_skb(*pskb); + *pskb = newskb; + tcph = (struct tcphdr *)((*pskb)->nh.raw + tcphoff); + } + + skb_put((*pskb), TCPOLEN_MSS); + + opt = (u_int8_t *)tcph + sizeof(struct tcphdr); + memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr)); + + nf_proto_csum_replace2(&tcph->check, *pskb, + htons(tcplen), htons(tcplen + TCPOLEN_MSS), 1); + opt[0] = TCPOPT_MSS; + opt[1] = TCPOLEN_MSS; + opt[2] = (newmss & 0xff00) >> 8; + opt[3] = (newmss & 0x00ff); + + nf_proto_csum_replace4(&tcph->check, *pskb, 0, *((__be32 *)opt), 0); + + oldval = ((__be16 *)tcph)[6]; + tcph->doff += TCPOLEN_MSS/4; + nf_proto_csum_replace2(&tcph->check, *pskb, + oldval, ((__be16 *)tcph)[6], 0); + return TCPOLEN_MSS; +} + +static unsigned int +xt_tcpmss_target4(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const struct xt_target *target, + const void *targinfo) +{ + struct iphdr *iph = (*pskb)->nh.iph; + __be16 newlen; + int ret; + + ret = tcpmss_mangle_packet(pskb, targinfo, iph->ihl * 4, + sizeof(*iph) + sizeof(struct tcphdr)); + if (ret < 0) + return NF_DROP; + if (ret > 0) { + iph = (*pskb)->nh.iph; + newlen = htons(ntohs(iph->tot_len) + ret); + nf_csum_replace2(&iph->check, iph->tot_len, newlen); + iph->tot_len = newlen; + } + return XT_CONTINUE; +} + +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +static unsigned int +xt_tcpmss_target6(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const struct xt_target *target, + const void *targinfo) +{ + struct ipv6hdr *ipv6h = (*pskb)->nh.ipv6h; + u8 nexthdr; + int tcphoff; + int ret; + + nexthdr = ipv6h->nexthdr; + tcphoff = ipv6_skip_exthdr(*pskb, sizeof(*ipv6h), &nexthdr); + if (tcphoff < 0) { + WARN_ON(1); + return NF_DROP; + } + ret = tcpmss_mangle_packet(pskb, targinfo, tcphoff, + sizeof(*ipv6h) + sizeof(struct tcphdr)); + if (ret < 0) + return NF_DROP; + if (ret > 0) { + ipv6h = (*pskb)->nh.ipv6h; + ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) + ret); + } + return XT_CONTINUE; +} +#endif + +#define TH_SYN 0x02 + +/* Must specify -p tcp --syn */ +static inline int find_syn_match(const struct xt_entry_match *m) +{ + const struct xt_tcp *tcpinfo = (const struct xt_tcp *)m->data; + + if (strcmp(m->u.kernel.match->name, "tcp") == 0 && + tcpinfo->flg_cmp & TH_SYN && + !(tcpinfo->invflags & XT_TCP_INV_FLAGS)) + return 1; + + return 0; +} + +static int +xt_tcpmss_checkentry4(const char *tablename, + const void *entry, + const struct xt_target *target, + void *targinfo, + unsigned int hook_mask) +{ + const struct xt_tcpmss_info *info = targinfo; + const struct ipt_entry *e = entry; + + if (info->mss == XT_TCPMSS_CLAMP_PMTU && + (hook_mask & ~((1 << NF_IP_FORWARD) | + (1 << NF_IP_LOCAL_OUT) | + (1 << NF_IP_POST_ROUTING))) != 0) { + printk("xt_TCPMSS: path-MTU clamping only supported in " + "FORWARD, OUTPUT and POSTROUTING hooks\n"); + return 0; + } + if (IPT_MATCH_ITERATE(e, find_syn_match)) + return 1; + printk("xt_TCPMSS: Only works on TCP SYN packets\n"); + return 0; +} + +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +static int +xt_tcpmss_checkentry6(const char *tablename, + const void *entry, + const struct xt_target *target, + void *targinfo, + unsigned int hook_mask) +{ + const struct xt_tcpmss_info *info = targinfo; + const struct ip6t_entry *e = entry; + + if (info->mss == XT_TCPMSS_CLAMP_PMTU && + (hook_mask & ~((1 << NF_IP6_FORWARD) | + (1 << NF_IP6_LOCAL_OUT) | + (1 << NF_IP6_POST_ROUTING))) != 0) { + printk("xt_TCPMSS: path-MTU clamping only supported in " + "FORWARD, OUTPUT and POSTROUTING hooks\n"); + return 0; + } + if (IP6T_MATCH_ITERATE(e, find_syn_match)) + return 1; + printk("xt_TCPMSS: Only works on TCP SYN packets\n"); + return 0; +} +#endif + +static struct xt_target xt_tcpmss_reg[] = { + { + .family = AF_INET, + .name = "TCPMSS", + .checkentry = xt_tcpmss_checkentry4, + .target = xt_tcpmss_target4, + .targetsize = sizeof(struct xt_tcpmss_info), + .proto = IPPROTO_TCP, + .me = THIS_MODULE, + }, +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) + { + .family = AF_INET6, + .name = "TCPMSS", + .checkentry = xt_tcpmss_checkentry6, + .target = xt_tcpmss_target6, + .targetsize = sizeof(struct xt_tcpmss_info), + .proto = IPPROTO_TCP, + .me = THIS_MODULE, + }, +#endif +}; + +static int __init xt_tcpmss_init(void) +{ + return xt_register_targets(xt_tcpmss_reg, ARRAY_SIZE(xt_tcpmss_reg)); +} + +static void __exit xt_tcpmss_fini(void) +{ + xt_unregister_targets(xt_tcpmss_reg, ARRAY_SIZE(xt_tcpmss_reg)); +} + +module_init(xt_tcpmss_init); +module_exit(xt_tcpmss_fini); diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index f28bf69..bd1f7a2 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -414,6 +414,7 @@ hashlimit_init_dst(struct xt_hashlimit_htable *hinfo, struct dsthash_dst *dst, switch (nexthdr) { case IPPROTO_TCP: case IPPROTO_UDP: + case IPPROTO_UDPLITE: case IPPROTO_SCTP: case IPPROTO_DCCP: ports = skb_header_pointer(skb, protoff, sizeof(_ports), diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 6dc01bd..a6fa487 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -60,6 +60,7 @@ #include <linux/netdevice.h> #include <linux/if_packet.h> #include <linux/wireless.h> +#include <linux/kernel.h> #include <linux/kmod.h> #include <net/ip.h> #include <net/protocol.h> @@ -200,7 +201,8 @@ struct packet_sock { #endif struct packet_type prot_hook; spinlock_t bind_lock; - char running; /* prot_hook is attached*/ + unsigned int running:1, /* prot_hook is attached*/ + auxdata:1; int ifindex; /* bound device */ __be16 num; #ifdef CONFIG_PACKET_MULTICAST @@ -214,6 +216,16 @@ struct packet_sock { #endif }; +struct packet_skb_cb { + unsigned int origlen; + union { + struct sockaddr_pkt pkt; + struct sockaddr_ll ll; + } sa; +}; + +#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb)) + #ifdef CONFIG_PACKET_MMAP static inline char *packet_lookup_frame(struct packet_sock *po, unsigned int position) @@ -293,7 +305,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct /* drop conntrack reference */ nf_reset(skb); - spkt = (struct sockaddr_pkt*)skb->cb; + spkt = &PACKET_SKB_CB(skb)->sa.pkt; skb_push(skb, skb->data-skb->mac.raw); @@ -512,7 +524,10 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet skb = nskb; } - sll = (struct sockaddr_ll*)skb->cb; + BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 > + sizeof(skb->cb)); + + sll = &PACKET_SKB_CB(skb)->sa.ll; sll->sll_family = AF_PACKET; sll->sll_hatype = dev->type; sll->sll_protocol = skb->protocol; @@ -523,6 +538,8 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet if (dev->hard_header_parse) sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr); + PACKET_SKB_CB(skb)->origlen = skb->len; + if (pskb_trim(skb, snaplen)) goto drop_n_acct; @@ -582,11 +599,12 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe else if (skb->pkt_type == PACKET_OUTGOING) { /* Special case: outgoing packets have ll header at head */ skb_pull(skb, skb->nh.raw - skb->data); - if (skb->ip_summed == CHECKSUM_PARTIAL) - status |= TP_STATUS_CSUMNOTREADY; } } + if (skb->ip_summed == CHECKSUM_PARTIAL) + status |= TP_STATUS_CSUMNOTREADY; + snaplen = skb->len; res = run_filter(skb, sk, snaplen); @@ -1092,7 +1110,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, * it in now. */ - sll = (struct sockaddr_ll*)skb->cb; + sll = &PACKET_SKB_CB(skb)->sa.ll; if (sock->type == SOCK_PACKET) msg->msg_namelen = sizeof(struct sockaddr_pkt); else @@ -1117,7 +1135,22 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, sock_recv_timestamp(msg, sk, skb); if (msg->msg_name) - memcpy(msg->msg_name, skb->cb, msg->msg_namelen); + memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, + msg->msg_namelen); + + if (pkt_sk(sk)->auxdata) { + struct tpacket_auxdata aux; + + aux.tp_status = TP_STATUS_USER; + if (skb->ip_summed == CHECKSUM_PARTIAL) + aux.tp_status |= TP_STATUS_CSUMNOTREADY; + aux.tp_len = PACKET_SKB_CB(skb)->origlen; + aux.tp_snaplen = skb->len; + aux.tp_mac = 0; + aux.tp_net = skb->nh.raw - skb->data; + + put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); + } /* * Free or return the buffer as appropriate. Again this @@ -1317,6 +1350,7 @@ static int packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen) { struct sock *sk = sock->sk; + struct packet_sock *po = pkt_sk(sk); int ret; if (level != SOL_PACKET) @@ -1369,6 +1403,18 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv return 0; } #endif + case PACKET_AUXDATA: + { + int val; + + if (optlen < sizeof(val)) + return -EINVAL; + if (copy_from_user(&val, optval, sizeof(val))) + return -EFAULT; + + po->auxdata = !!val; + return 0; + } default: return -ENOPROTOOPT; } @@ -1378,8 +1424,11 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { int len; + int val; struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); + void *data; + struct tpacket_stats st; if (level != SOL_PACKET) return -ENOPROTOOPT; @@ -1392,9 +1441,6 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, switch(optname) { case PACKET_STATISTICS: - { - struct tpacket_stats st; - if (len > sizeof(struct tpacket_stats)) len = sizeof(struct tpacket_stats); spin_lock_bh(&sk->sk_receive_queue.lock); @@ -1403,16 +1449,23 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, spin_unlock_bh(&sk->sk_receive_queue.lock); st.tp_packets += st.tp_drops; - if (copy_to_user(optval, &st, len)) - return -EFAULT; + data = &st; + break; + case PACKET_AUXDATA: + if (len > sizeof(int)) + len = sizeof(int); + val = po->auxdata; + + data = &val; break; - } default: return -ENOPROTOOPT; } if (put_user(len, optlen)) return -EFAULT; + if (copy_to_user(optval, data, len)) + return -EFAULT; return 0; } diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 01e6913..4c68c71 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -52,7 +52,7 @@ static struct tcf_hashinfo ipt_hash_info = { static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook) { - struct ipt_target *target; + struct xt_target *target; int ret = 0; target = xt_request_find_target(AF_INET, t->u.user.name, diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index bc116bd..3b6e6a7 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -209,7 +209,7 @@ static void dev_watchdog(unsigned long arg) dev->name); dev->tx_timeout(dev); } - if (!mod_timer(&dev->watchdog_timer, jiffies + dev->watchdog_timeo)) + if (!mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + dev->watchdog_timeo))) dev_hold(dev); } } diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 2567b4c..000e043 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -372,6 +372,20 @@ static int prio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff * return 0; } +static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl, + struct gnet_dump *d) +{ + struct prio_sched_data *q = qdisc_priv(sch); + struct Qdisc *cl_q; + + cl_q = q->queues[cl - 1]; + if (gnet_stats_copy_basic(d, &cl_q->bstats) < 0 || + gnet_stats_copy_queue(d, &cl_q->qstats) < 0) + return -1; + + return 0; +} + static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct prio_sched_data *q = qdisc_priv(sch); @@ -414,6 +428,7 @@ static struct Qdisc_class_ops prio_class_ops = { .bind_tcf = prio_bind, .unbind_tcf = prio_put, .dump = prio_dump_class, + .dump_stats = prio_dump_class_stats, }; static struct Qdisc_ops prio_qdisc_ops = { diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 459cda2..8284480 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -143,6 +143,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && (iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP || + iph->protocol == IPPROTO_UDPLITE || iph->protocol == IPPROTO_SCTP || iph->protocol == IPPROTO_DCCP || iph->protocol == IPPROTO_ESP)) @@ -156,6 +157,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) h2 = iph->saddr.s6_addr32[3]^iph->nexthdr; if (iph->nexthdr == IPPROTO_TCP || iph->nexthdr == IPPROTO_UDP || + iph->nexthdr == IPPROTO_UDPLITE || iph->nexthdr == IPPROTO_SCTP || iph->nexthdr == IPPROTO_DCCP || iph->nexthdr == IPPROTO_ESP) diff --git a/net/socket.c b/net/socket.c index 4e39631..5f374e1 100644 --- a/net/socket.c +++ b/net/socket.c @@ -407,24 +407,11 @@ int sock_map_fd(struct socket *sock) static struct socket *sock_from_file(struct file *file, int *err) { - struct inode *inode; - struct socket *sock; - if (file->f_op == &socket_file_ops) return file->private_data; /* set in sock_map_fd */ - inode = file->f_path.dentry->d_inode; - if (!S_ISSOCK(inode->i_mode)) { - *err = -ENOTSOCK; - return NULL; - } - - sock = SOCKET_I(inode); - if (sock->file != file) { - printk(KERN_ERR "socki_lookup: socket file changed!\n"); - sock->file = file; - } - return sock; + *err = -ENOTSOCK; + return NULL; } /** @@ -1527,8 +1514,9 @@ asmlinkage long sys_sendto(int fd, void __user *buff, size_t len, struct file *sock_file; sock_file = fget_light(fd, &fput_needed); + err = -EBADF; if (!sock_file) - return -EBADF; + goto out; sock = sock_from_file(sock_file, &err); if (!sock) @@ -1555,6 +1543,7 @@ asmlinkage long sys_sendto(int fd, void __user *buff, size_t len, out_put: fput_light(sock_file, fput_needed); +out: return err; } @@ -1586,12 +1575,13 @@ asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size, int fput_needed; sock_file = fget_light(fd, &fput_needed); + err = -EBADF; if (!sock_file) - return -EBADF; + goto out; sock = sock_from_file(sock_file, &err); if (!sock) - goto out; + goto out_put; msg.msg_control = NULL; msg.msg_controllen = 0; @@ -1610,8 +1600,9 @@ asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size, if (err2 < 0) err = err2; } -out: +out_put: fput_light(sock_file, fput_needed); +out: return err; } diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c index 769cdd6..4d90a17 100644 --- a/net/wanrouter/wanmain.c +++ b/net/wanrouter/wanmain.c @@ -86,8 +86,8 @@ static int wanrouter_device_del_if(struct wan_device *wandev, static struct wan_device *wanrouter_find_device(char *name); static int wanrouter_delete_interface(struct wan_device *wandev, char *name); -void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags); -void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags); +static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags); +static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags); @@ -104,8 +104,8 @@ struct wan_device* wanrouter_router_devlist; /* list of registered devices */ * Organize Unique Identifiers for encapsulation/decapsulation */ -static unsigned char wanrouter_oui_ether[] = { 0x00, 0x00, 0x00 }; #if 0 +static unsigned char wanrouter_oui_ether[] = { 0x00, 0x00, 0x00 }; static unsigned char wanrouter_oui_802_2[] = { 0x00, 0x80, 0xC2 }; #endif @@ -246,6 +246,8 @@ int unregister_wan_device(char *name) return 0; } +#if 0 + /* * Encapsulate packet. * @@ -341,6 +343,7 @@ __be16 wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev) return ethertype; } +#endif /* 0 */ /* * WAN device IOCTL. @@ -799,23 +802,19 @@ static int wanrouter_delete_interface(struct wan_device *wandev, char *name) return 0; } -void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) +static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) { spin_lock_irqsave(lock, *smp_flags); } -void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) +static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) { spin_unlock_irqrestore(lock, *smp_flags); } EXPORT_SYMBOL(register_wan_device); EXPORT_SYMBOL(unregister_wan_device); -EXPORT_SYMBOL(wanrouter_encapsulate); -EXPORT_SYMBOL(wanrouter_type_trans); -EXPORT_SYMBOL(lock_adapter_irq); -EXPORT_SYMBOL(unlock_adapter_irq); MODULE_LICENSE("GPL"); diff --git a/net/x25/Makefile b/net/x25/Makefile index 587a71a..a2c34ab 100644 --- a/net/x25/Makefile +++ b/net/x25/Makefile @@ -6,5 +6,5 @@ obj-$(CONFIG_X25) += x25.o x25-y := af_x25.o x25_dev.o x25_facilities.o x25_in.o \ x25_link.o x25_out.o x25_route.o x25_subr.o \ - x25_timer.o x25_proc.o + x25_timer.o x25_proc.o x25_forward.o x25-$(CONFIG_SYSCTL) += sysctl_net_x25.o diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index b5c80b1..b37d894 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -63,6 +63,7 @@ int sysctl_x25_call_request_timeout = X25_DEFAULT_T21; int sysctl_x25_reset_request_timeout = X25_DEFAULT_T22; int sysctl_x25_clear_request_timeout = X25_DEFAULT_T23; int sysctl_x25_ack_holdback_timeout = X25_DEFAULT_T2; +int sysctl_x25_forward = 0; HLIST_HEAD(x25_list); DEFINE_RWLOCK(x25_list_lock); @@ -846,7 +847,7 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb, struct x25_address source_addr, dest_addr; struct x25_facilities facilities; struct x25_dte_facilities dte_facilities; - int len, rc; + int len, addr_len, rc; /* * Remove the LCI and frame type. @@ -857,7 +858,8 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb, * Extract the X.25 addresses and convert them to ASCII strings, * and remove them. */ - skb_pull(skb, x25_addr_ntoa(skb->data, &source_addr, &dest_addr)); + addr_len = x25_addr_ntoa(skb->data, &source_addr, &dest_addr); + skb_pull(skb, addr_len); /* * Get the length of the facilities, skip past them for the moment @@ -873,11 +875,28 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb, sk = x25_find_listener(&source_addr,skb); skb_push(skb,len); + if (sk != NULL && sk_acceptq_is_full(sk)) { + goto out_sock_put; + } + /* - * We can't accept the Call Request. + * We dont have any listeners for this incoming call. + * Try forwarding it. */ - if (sk == NULL || sk_acceptq_is_full(sk)) - goto out_clear_request; + if (sk == NULL) { + skb_push(skb, addr_len + X25_STD_MIN_LEN); + if (sysctl_x25_forward && + x25_forward_call(&dest_addr, nb, skb, lci) > 0) + { + /* Call was forwarded, dont process it any more */ + kfree_skb(skb); + rc = 1; + goto out; + } else { + /* No listeners, can't forward, clear the call */ + goto out_clear_request; + } + } /* * Try to reach a compromise on the requested facilities. @@ -1598,6 +1617,9 @@ void x25_kill_by_neigh(struct x25_neigh *nb) x25_disconnect(s, ENETUNREACH, 0, 0); write_unlock_bh(&x25_list_lock); + + /* Remove any related forwards */ + x25_clear_forward_by_dev(nb->dev); } static int __init x25_init(void) diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c index aabda59..2b2e7fd 100644 --- a/net/x25/sysctl_net_x25.c +++ b/net/x25/sysctl_net_x25.c @@ -73,6 +73,14 @@ static struct ctl_table x25_table[] = { .extra1 = &min_timer, .extra2 = &max_timer, }, + { + .ctl_name = NET_X25_FORWARD, + .procname = "x25_forward", + .data = &sysctl_x25_forward, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, { 0, }, }; diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index 328d80f..f099fd6 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -67,9 +67,18 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb) return x25_rx_call_request(skb, nb, lci); /* - * Its not a Call Request, nor is it a control frame. - * Let caller throw it away. + * Its not a Call Request, nor is it a control frame. + * Can we forward it? */ + + if (x25_forward_data(lci, nb, skb)) { + if (frametype == X25_CLEAR_CONFIRMATION) { + x25_clear_forward_by_lci(lci); + } + kfree_skb(skb); + return 1; + } + /* x25_transmit_clear_request(nb, lci, 0x0D); */ diff --git a/net/x25/x25_forward.c b/net/x25/x25_forward.c new file mode 100644 index 0000000..d339e0c --- /dev/null +++ b/net/x25/x25_forward.c @@ -0,0 +1,163 @@ +/* + * This module: + * This module is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * History + * 03-01-2007 Added forwarding for x.25 Andrew Hendry + */ +#include <linux/if_arp.h> +#include <linux/init.h> +#include <net/x25.h> + +struct list_head x25_forward_list = LIST_HEAD_INIT(x25_forward_list); +DEFINE_RWLOCK(x25_forward_list_lock); + +int x25_forward_call(struct x25_address *dest_addr, struct x25_neigh *from, + struct sk_buff *skb, int lci) +{ + struct x25_route *rt; + struct x25_neigh *neigh_new = NULL; + struct list_head *entry; + struct x25_forward *x25_frwd, *new_frwd; + struct sk_buff *skbn; + short same_lci = 0; + int rc = 0; + + if ((rt = x25_get_route(dest_addr)) != NULL) { + + if ((neigh_new = x25_get_neigh(rt->dev)) == NULL) { + /* This shouldnt happen, if it occurs somehow + * do something sensible + */ + goto out_put_route; + } + + /* Avoid a loop. This is the normal exit path for a + * system with only one x.25 iface and default route + */ + if (rt->dev == from->dev) { + goto out_put_nb; + } + + /* Remote end sending a call request on an already + * established LCI? It shouldnt happen, just in case.. + */ + read_lock_bh(&x25_forward_list_lock); + list_for_each(entry, &x25_forward_list) { + x25_frwd = list_entry(entry, struct x25_forward, node); + if (x25_frwd->lci == lci) { + printk(KERN_WARNING "X.25: call request for lci which is already registered!, transmitting but not registering new pair\n"); + same_lci = 1; + } + } + read_unlock_bh(&x25_forward_list_lock); + + /* Save the forwarding details for future traffic */ + if (!same_lci){ + if ((new_frwd = kmalloc(sizeof(struct x25_forward), + GFP_ATOMIC)) == NULL){ + rc = -ENOMEM; + goto out_put_nb; + } + new_frwd->lci = lci; + new_frwd->dev1 = rt->dev; + new_frwd->dev2 = from->dev; + write_lock_bh(&x25_forward_list_lock); + list_add(&new_frwd->node, &x25_forward_list); + write_unlock_bh(&x25_forward_list_lock); + } + + /* Forward the call request */ + if ( (skbn = skb_clone(skb, GFP_ATOMIC)) == NULL){ + goto out_put_nb; + } + x25_transmit_link(skbn, neigh_new); + rc = 1; + } + + +out_put_nb: + x25_neigh_put(neigh_new); + +out_put_route: + x25_route_put(rt); + return rc; +} + + +int x25_forward_data(int lci, struct x25_neigh *from, struct sk_buff *skb) { + + struct x25_forward *frwd; + struct list_head *entry; + struct net_device *peer = NULL; + struct x25_neigh *nb; + struct sk_buff *skbn; + int rc = 0; + + read_lock_bh(&x25_forward_list_lock); + list_for_each(entry, &x25_forward_list) { + frwd = list_entry(entry, struct x25_forward, node); + if (frwd->lci == lci) { + /* The call is established, either side can send */ + if (from->dev == frwd->dev1) { + peer = frwd->dev2; + } else { + peer = frwd->dev1; + } + break; + } + } + read_unlock_bh(&x25_forward_list_lock); + + if ( (nb = x25_get_neigh(peer)) == NULL) + goto out; + + if ( (skbn = pskb_copy(skb, GFP_ATOMIC)) == NULL){ + goto out; + + } + x25_transmit_link(skbn, nb); + + x25_neigh_put(nb); + rc = 1; +out: + return rc; +} + +void x25_clear_forward_by_lci(unsigned int lci) +{ + struct x25_forward *fwd; + struct list_head *entry, *tmp; + + write_lock_bh(&x25_forward_list_lock); + + list_for_each_safe(entry, tmp, &x25_forward_list) { + fwd = list_entry(entry, struct x25_forward, node); + if (fwd->lci == lci) { + list_del(&fwd->node); + kfree(fwd); + } + } + write_unlock_bh(&x25_forward_list_lock); +} + + +void x25_clear_forward_by_dev(struct net_device *dev) +{ + struct x25_forward *fwd; + struct list_head *entry, *tmp; + + write_lock_bh(&x25_forward_list_lock); + + list_for_each_safe(entry, tmp, &x25_forward_list) { + fwd = list_entry(entry, struct x25_forward, node); + if ((fwd->dev1 == dev) || (fwd->dev2 == dev)){ + list_del(&fwd->node); + kfree(fwd); + } + } + write_unlock_bh(&x25_forward_list_lock); +} diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c index a11837d..e0470bd 100644 --- a/net/x25/x25_proc.c +++ b/net/x25/x25_proc.c @@ -165,6 +165,75 @@ out: return 0; } +static __inline__ struct x25_forward *x25_get_forward_idx(loff_t pos) +{ + struct x25_forward *f; + struct list_head *entry; + + list_for_each(entry, &x25_forward_list) { + f = list_entry(entry, struct x25_forward, node); + if (!pos--) + goto found; + } + + f = NULL; +found: + return f; +} + +static void *x25_seq_forward_start(struct seq_file *seq, loff_t *pos) +{ + loff_t l = *pos; + + read_lock_bh(&x25_forward_list_lock); + return l ? x25_get_forward_idx(--l) : SEQ_START_TOKEN; +} + +static void *x25_seq_forward_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct x25_forward *f; + + ++*pos; + if (v == SEQ_START_TOKEN) { + f = NULL; + if (!list_empty(&x25_forward_list)) + f = list_entry(x25_forward_list.next, + struct x25_forward, node); + goto out; + } + f = v; + if (f->node.next != &x25_forward_list) + f = list_entry(f->node.next, struct x25_forward, node); + else + f = NULL; +out: + return f; + +} + +static void x25_seq_forward_stop(struct seq_file *seq, void *v) +{ + read_unlock_bh(&x25_forward_list_lock); +} + +static int x25_seq_forward_show(struct seq_file *seq, void *v) +{ + struct x25_forward *f; + + if (v == SEQ_START_TOKEN) { + seq_printf(seq, "lci dev1 dev2\n"); + goto out; + } + + f = v; + + seq_printf(seq, "%d %-10s %-10s\n", + f->lci, f->dev1->name, f->dev2->name); + +out: + return 0; +} + static struct seq_operations x25_seq_route_ops = { .start = x25_seq_route_start, .next = x25_seq_route_next, @@ -179,6 +248,13 @@ static struct seq_operations x25_seq_socket_ops = { .show = x25_seq_socket_show, }; +static struct seq_operations x25_seq_forward_ops = { + .start = x25_seq_forward_start, + .next = x25_seq_forward_next, + .stop = x25_seq_forward_stop, + .show = x25_seq_forward_show, +}; + static int x25_seq_socket_open(struct inode *inode, struct file *file) { return seq_open(file, &x25_seq_socket_ops); @@ -189,6 +265,11 @@ static int x25_seq_route_open(struct inode *inode, struct file *file) return seq_open(file, &x25_seq_route_ops); } +static int x25_seq_forward_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &x25_seq_forward_ops); +} + static struct file_operations x25_seq_socket_fops = { .owner = THIS_MODULE, .open = x25_seq_socket_open, @@ -205,6 +286,14 @@ static struct file_operations x25_seq_route_fops = { .release = seq_release, }; +static struct file_operations x25_seq_forward_fops = { + .owner = THIS_MODULE, + .open = x25_seq_forward_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + static struct proc_dir_entry *x25_proc_dir; int __init x25_proc_init(void) @@ -225,9 +314,17 @@ int __init x25_proc_init(void) if (!p) goto out_socket; p->proc_fops = &x25_seq_socket_fops; + + p = create_proc_entry("forward", S_IRUGO, x25_proc_dir); + if (!p) + goto out_forward; + p->proc_fops = &x25_seq_forward_fops; rc = 0; + out: return rc; +out_forward: + remove_proc_entry("socket", x25_proc_dir); out_socket: remove_proc_entry("route", x25_proc_dir); out_route: @@ -237,6 +334,7 @@ out_route: void __exit x25_proc_exit(void) { + remove_proc_entry("forward", x25_proc_dir); remove_proc_entry("route", x25_proc_dir); remove_proc_entry("socket", x25_proc_dir); remove_proc_entry("x25", proc_net); diff --git a/net/x25/x25_route.c b/net/x25/x25_route.c index 2a3fe98..883a848 100644 --- a/net/x25/x25_route.c +++ b/net/x25/x25_route.c @@ -119,6 +119,9 @@ void x25_route_device_down(struct net_device *dev) __x25_remove_route(rt); } write_unlock_bh(&x25_route_list_lock); + + /* Remove any related forwarding */ + x25_clear_forward_by_dev(dev); } /* diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index 0faab63..577a4f82 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -24,6 +24,17 @@ config XFRM_SUB_POLICY If unsure, say N. +config XFRM_MIGRATE + bool "Transformation migrate database (EXPERIMENTAL)" + depends on XFRM && EXPERIMENTAL + ---help--- + A feature to update locator(s) of a given IPsec security + association dynamically. This feature is required, for + instance, in a Mobile IPv6 environment with IPsec configuration + where mobile nodes change their attachment point to the Internet. + + If unsure, say N. + config NET_KEY tristate "PF_KEY sockets" select XFRM @@ -34,4 +45,19 @@ config NET_KEY Say Y unless you know what you are doing. +config NET_KEY_MIGRATE + bool "PF_KEY MIGRATE (EXPERIMENTAL)" + depends on NET_KEY && EXPERIMENTAL + select XFRM_MIGRATE + ---help--- + Add a PF_KEY MIGRATE message to PF_KEYv2 socket family. + The PF_KEY MIGRATE message is used to dynamically update + locator(s) of a given IPsec security association. + This feature is required, for instance, in a Mobile IPv6 + environment with IPsec configuration where mobile nodes + change their attachment point to the Internet. Detail + information can be found in the internet-draft + <draft-sugimoto-mip6-pfkey-migrate>. + + If unsure, say N. diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index f1cf340..248f948 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -266,6 +266,23 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { + .name = "cbc(camellia)", + + .uinfo = { + .encr = { + .blockbits = 128, + .defkeybits = 128, + } + }, + + .desc = { + .sadb_alg_id = SADB_X_EALG_CAMELLIACBC, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 256 + } +}, +{ .name = "cbc(twofish)", .compat = "twofish", diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index b7e537f..fa7ce06 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2236,3 +2236,234 @@ void __init xfrm_init(void) xfrm_input_init(); } +#ifdef CONFIG_XFRM_MIGRATE +static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp, + struct xfrm_selector *sel_tgt) +{ + if (sel_cmp->proto == IPSEC_ULPROTO_ANY) { + if (sel_tgt->family == sel_cmp->family && + xfrm_addr_cmp(&sel_tgt->daddr, &sel_cmp->daddr, + sel_cmp->family) == 0 && + xfrm_addr_cmp(&sel_tgt->saddr, &sel_cmp->saddr, + sel_cmp->family) == 0 && + sel_tgt->prefixlen_d == sel_cmp->prefixlen_d && + sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) { + return 1; + } + } else { + if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) { + return 1; + } + } + return 0; +} + +static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel, + u8 dir, u8 type) +{ + struct xfrm_policy *pol, *ret = NULL; + struct hlist_node *entry; + struct hlist_head *chain; + u32 priority = ~0U; + + read_lock_bh(&xfrm_policy_lock); + chain = policy_hash_direct(&sel->daddr, &sel->saddr, sel->family, dir); + hlist_for_each_entry(pol, entry, chain, bydst) { + if (xfrm_migrate_selector_match(sel, &pol->selector) && + pol->type == type) { + ret = pol; + priority = ret->priority; + break; + } + } + chain = &xfrm_policy_inexact[dir]; + hlist_for_each_entry(pol, entry, chain, bydst) { + if (xfrm_migrate_selector_match(sel, &pol->selector) && + pol->type == type && + pol->priority < priority) { + ret = pol; + break; + } + } + + if (ret) + xfrm_pol_hold(ret); + + read_unlock_bh(&xfrm_policy_lock); + + return ret; +} + +static int migrate_tmpl_match(struct xfrm_migrate *m, struct xfrm_tmpl *t) +{ + int match = 0; + + if (t->mode == m->mode && t->id.proto == m->proto && + (m->reqid == 0 || t->reqid == m->reqid)) { + switch (t->mode) { + case XFRM_MODE_TUNNEL: + case XFRM_MODE_BEET: + if (xfrm_addr_cmp(&t->id.daddr, &m->old_daddr, + m->old_family) == 0 && + xfrm_addr_cmp(&t->saddr, &m->old_saddr, + m->old_family) == 0) { + match = 1; + } + break; + case XFRM_MODE_TRANSPORT: + /* in case of transport mode, template does not store + any IP addresses, hence we just compare mode and + protocol */ + match = 1; + break; + default: + break; + } + } + return match; +} + +/* update endpoint address(es) of template(s) */ +static int xfrm_policy_migrate(struct xfrm_policy *pol, + struct xfrm_migrate *m, int num_migrate) +{ + struct xfrm_migrate *mp; + struct dst_entry *dst; + int i, j, n = 0; + + write_lock_bh(&pol->lock); + if (unlikely(pol->dead)) { + /* target policy has been deleted */ + write_unlock_bh(&pol->lock); + return -ENOENT; + } + + for (i = 0; i < pol->xfrm_nr; i++) { + for (j = 0, mp = m; j < num_migrate; j++, mp++) { + if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i])) + continue; + n++; + if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL) + continue; + /* update endpoints */ + memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr, + sizeof(pol->xfrm_vec[i].id.daddr)); + memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr, + sizeof(pol->xfrm_vec[i].saddr)); + pol->xfrm_vec[i].encap_family = mp->new_family; + /* flush bundles */ + while ((dst = pol->bundles) != NULL) { + pol->bundles = dst->next; + dst_free(dst); + } + } + } + + write_unlock_bh(&pol->lock); + + if (!n) + return -ENODATA; + + return 0; +} + +static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate) +{ + int i, j; + + if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH) + return -EINVAL; + + for (i = 0; i < num_migrate; i++) { + if ((xfrm_addr_cmp(&m[i].old_daddr, &m[i].new_daddr, + m[i].old_family) == 0) && + (xfrm_addr_cmp(&m[i].old_saddr, &m[i].new_saddr, + m[i].old_family) == 0)) + return -EINVAL; + if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) || + xfrm_addr_any(&m[i].new_saddr, m[i].new_family)) + return -EINVAL; + + /* check if there is any duplicated entry */ + for (j = i + 1; j < num_migrate; j++) { + if (!memcmp(&m[i].old_daddr, &m[j].old_daddr, + sizeof(m[i].old_daddr)) && + !memcmp(&m[i].old_saddr, &m[j].old_saddr, + sizeof(m[i].old_saddr)) && + m[i].proto == m[j].proto && + m[i].mode == m[j].mode && + m[i].reqid == m[j].reqid && + m[i].old_family == m[j].old_family) + return -EINVAL; + } + } + + return 0; +} + +int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type, + struct xfrm_migrate *m, int num_migrate) +{ + int i, err, nx_cur = 0, nx_new = 0; + struct xfrm_policy *pol = NULL; + struct xfrm_state *x, *xc; + struct xfrm_state *x_cur[XFRM_MAX_DEPTH]; + struct xfrm_state *x_new[XFRM_MAX_DEPTH]; + struct xfrm_migrate *mp; + + if ((err = xfrm_migrate_check(m, num_migrate)) < 0) + goto out; + + /* Stage 1 - find policy */ + if ((pol = xfrm_migrate_policy_find(sel, dir, type)) == NULL) { + err = -ENOENT; + goto out; + } + + /* Stage 2 - find and update state(s) */ + for (i = 0, mp = m; i < num_migrate; i++, mp++) { + if ((x = xfrm_migrate_state_find(mp))) { + x_cur[nx_cur] = x; + nx_cur++; + if ((xc = xfrm_state_migrate(x, mp))) { + x_new[nx_new] = xc; + nx_new++; + } else { + err = -ENODATA; + goto restore_state; + } + } + } + + /* Stage 3 - update policy */ + if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0) + goto restore_state; + + /* Stage 4 - delete old state(s) */ + if (nx_cur) { + xfrm_states_put(x_cur, nx_cur); + xfrm_states_delete(x_cur, nx_cur); + } + + /* Stage 5 - announce */ + km_migrate(sel, dir, type, m, num_migrate); + + xfrm_pol_put(pol); + + return 0; +out: + return err; + +restore_state: + if (pol) + xfrm_pol_put(pol); + if (nx_cur) + xfrm_states_put(x_cur, nx_cur); + if (nx_new) + xfrm_states_delete(x_new, nx_new); + + return err; +} +EXPORT_SYMBOL(xfrm_migrate); +#endif + diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index fdb08d9..91b0268 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -183,9 +183,6 @@ static DEFINE_SPINLOCK(xfrm_state_gc_lock); int __xfrm_state_delete(struct xfrm_state *x); -static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family); -static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); - int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol); void km_state_expired(struct xfrm_state *x, int hard, u32 pid); @@ -831,6 +828,160 @@ out: } EXPORT_SYMBOL(xfrm_state_add); +#ifdef CONFIG_XFRM_MIGRATE +struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) +{ + int err = -ENOMEM; + struct xfrm_state *x = xfrm_state_alloc(); + if (!x) + goto error; + + memcpy(&x->id, &orig->id, sizeof(x->id)); + memcpy(&x->sel, &orig->sel, sizeof(x->sel)); + memcpy(&x->lft, &orig->lft, sizeof(x->lft)); + x->props.mode = orig->props.mode; + x->props.replay_window = orig->props.replay_window; + x->props.reqid = orig->props.reqid; + x->props.family = orig->props.family; + x->props.saddr = orig->props.saddr; + + if (orig->aalg) { + x->aalg = xfrm_algo_clone(orig->aalg); + if (!x->aalg) + goto error; + } + x->props.aalgo = orig->props.aalgo; + + if (orig->ealg) { + x->ealg = xfrm_algo_clone(orig->ealg); + if (!x->ealg) + goto error; + } + x->props.ealgo = orig->props.ealgo; + + if (orig->calg) { + x->calg = xfrm_algo_clone(orig->calg); + if (!x->calg) + goto error; + } + x->props.calgo = orig->props.calgo; + + if (orig->encap) { + x->encap = kmemdup(orig->encap, sizeof(*x->encap), GFP_KERNEL); + if (!x->encap) + goto error; + } + + if (orig->coaddr) { + x->coaddr = kmemdup(orig->coaddr, sizeof(*x->coaddr), + GFP_KERNEL); + if (!x->coaddr) + goto error; + } + + err = xfrm_init_state(x); + if (err) + goto error; + + x->props.flags = orig->props.flags; + + x->curlft.add_time = orig->curlft.add_time; + x->km.state = orig->km.state; + x->km.seq = orig->km.seq; + + return x; + + error: + if (errp) + *errp = err; + if (x) { + kfree(x->aalg); + kfree(x->ealg); + kfree(x->calg); + kfree(x->encap); + kfree(x->coaddr); + } + kfree(x); + return NULL; +} +EXPORT_SYMBOL(xfrm_state_clone); + +/* xfrm_state_lock is held */ +struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m) +{ + unsigned int h; + struct xfrm_state *x; + struct hlist_node *entry; + + if (m->reqid) { + h = xfrm_dst_hash(&m->old_daddr, &m->old_saddr, + m->reqid, m->old_family); + hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { + if (x->props.mode != m->mode || + x->id.proto != m->proto) + continue; + if (m->reqid && x->props.reqid != m->reqid) + continue; + if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr, + m->old_family) || + xfrm_addr_cmp(&x->props.saddr, &m->old_saddr, + m->old_family)) + continue; + xfrm_state_hold(x); + return x; + } + } else { + h = xfrm_src_hash(&m->old_daddr, &m->old_saddr, + m->old_family); + hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) { + if (x->props.mode != m->mode || + x->id.proto != m->proto) + continue; + if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr, + m->old_family) || + xfrm_addr_cmp(&x->props.saddr, &m->old_saddr, + m->old_family)) + continue; + xfrm_state_hold(x); + return x; + } + } + + return NULL; +} +EXPORT_SYMBOL(xfrm_migrate_state_find); + +struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x, + struct xfrm_migrate *m) +{ + struct xfrm_state *xc; + int err; + + xc = xfrm_state_clone(x, &err); + if (!xc) + return NULL; + + memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr)); + memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr)); + + /* add state */ + if (!xfrm_addr_cmp(&x->id.daddr, &m->new_daddr, m->new_family)) { + /* a care is needed when the destination address of the + state is to be updated as it is a part of triplet */ + xfrm_state_insert(xc); + } else { + if ((err = xfrm_state_add(xc)) < 0) + goto error; + } + + return xc; +error: + kfree(xc); + return NULL; +} +EXPORT_SYMBOL(xfrm_state_migrate); +#endif + int xfrm_state_update(struct xfrm_state *x) { struct xfrm_state *x1; @@ -1345,6 +1496,26 @@ void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid) } EXPORT_SYMBOL(km_policy_expired); +int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type, + struct xfrm_migrate *m, int num_migrate) +{ + int err = -EINVAL; + int ret; + struct xfrm_mgr *km; + + read_lock(&xfrm_km_lock); + list_for_each_entry(km, &xfrm_km_list, list) { + if (km->migrate) { + ret = km->migrate(sel, dir, type, m, num_migrate); + if (!ret) + err = ret; + } + } + read_unlock(&xfrm_km_lock); + return err; +} +EXPORT_SYMBOL(km_migrate); + int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) { int err = -EINVAL; @@ -1458,7 +1629,7 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) } EXPORT_SYMBOL(xfrm_state_unregister_afinfo); -static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family) +struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family) { struct xfrm_state_afinfo *afinfo; if (unlikely(family >= NPROTO)) @@ -1470,11 +1641,14 @@ static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family) return afinfo; } -static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) +void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) { read_unlock(&xfrm_state_afinfo_lock); } +EXPORT_SYMBOL(xfrm_state_get_afinfo); +EXPORT_SYMBOL(xfrm_state_put_afinfo); + /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */ void xfrm_state_delete_tunnel(struct xfrm_state *x) { diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 82f36d3..079a5d3 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1632,6 +1632,176 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; } +#ifdef CONFIG_XFRM_MIGRATE +static int verify_user_migrate(struct rtattr **xfrma) +{ + struct rtattr *rt = xfrma[XFRMA_MIGRATE-1]; + struct xfrm_user_migrate *um; + + if (!rt) + return -EINVAL; + + if ((rt->rta_len - sizeof(*rt)) < sizeof(*um)) + return -EINVAL; + + return 0; +} + +static int copy_from_user_migrate(struct xfrm_migrate *ma, + struct rtattr **xfrma, int *num) +{ + struct rtattr *rt = xfrma[XFRMA_MIGRATE-1]; + struct xfrm_user_migrate *um; + int i, num_migrate; + + um = RTA_DATA(rt); + num_migrate = (rt->rta_len - sizeof(*rt)) / sizeof(*um); + + if (num_migrate <= 0 || num_migrate > XFRM_MAX_DEPTH) + return -EINVAL; + + for (i = 0; i < num_migrate; i++, um++, ma++) { + memcpy(&ma->old_daddr, &um->old_daddr, sizeof(ma->old_daddr)); + memcpy(&ma->old_saddr, &um->old_saddr, sizeof(ma->old_saddr)); + memcpy(&ma->new_daddr, &um->new_daddr, sizeof(ma->new_daddr)); + memcpy(&ma->new_saddr, &um->new_saddr, sizeof(ma->new_saddr)); + + ma->proto = um->proto; + ma->mode = um->mode; + ma->reqid = um->reqid; + + ma->old_family = um->old_family; + ma->new_family = um->new_family; + } + + *num = i; + return 0; +} + +static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtattr **xfrma) +{ + struct xfrm_userpolicy_id *pi = NLMSG_DATA(nlh); + struct xfrm_migrate m[XFRM_MAX_DEPTH]; + u8 type; + int err; + int n = 0; + + err = verify_user_migrate((struct rtattr **)xfrma); + if (err) + return err; + + err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); + if (err) + return err; + + err = copy_from_user_migrate((struct xfrm_migrate *)m, + (struct rtattr **)xfrma, &n); + if (err) + return err; + + if (!n) + return 0; + + xfrm_migrate(&pi->sel, pi->dir, type, m, n); + + return 0; +} +#else +static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtattr **xfrma) +{ + return -ENOPROTOOPT; +} +#endif + +#ifdef CONFIG_XFRM_MIGRATE +static int copy_to_user_migrate(struct xfrm_migrate *m, struct sk_buff *skb) +{ + struct xfrm_user_migrate um; + + memset(&um, 0, sizeof(um)); + um.proto = m->proto; + um.mode = m->mode; + um.reqid = m->reqid; + um.old_family = m->old_family; + memcpy(&um.old_daddr, &m->old_daddr, sizeof(um.old_daddr)); + memcpy(&um.old_saddr, &m->old_saddr, sizeof(um.old_saddr)); + um.new_family = m->new_family; + memcpy(&um.new_daddr, &m->new_daddr, sizeof(um.new_daddr)); + memcpy(&um.new_saddr, &m->new_saddr, sizeof(um.new_saddr)); + + RTA_PUT(skb, XFRMA_MIGRATE, sizeof(um), &um); + return 0; + +rtattr_failure: + return -1; +} + +static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m, + int num_migrate, struct xfrm_selector *sel, + u8 dir, u8 type) +{ + struct xfrm_migrate *mp; + struct xfrm_userpolicy_id *pol_id; + struct nlmsghdr *nlh; + unsigned char *b = skb->tail; + int i; + + nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_MIGRATE, sizeof(*pol_id)); + pol_id = NLMSG_DATA(nlh); + nlh->nlmsg_flags = 0; + + /* copy data from selector, dir, and type to the pol_id */ + memset(pol_id, 0, sizeof(*pol_id)); + memcpy(&pol_id->sel, sel, sizeof(pol_id->sel)); + pol_id->dir = dir; + + if (copy_to_user_policy_type(type, skb) < 0) + goto nlmsg_failure; + + for (i = 0, mp = m ; i < num_migrate; i++, mp++) { + if (copy_to_user_migrate(mp, skb) < 0) + goto nlmsg_failure; + } + + nlh->nlmsg_len = skb->tail - b; + return skb->len; +nlmsg_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, + struct xfrm_migrate *m, int num_migrate) +{ + struct sk_buff *skb; + size_t len; + + len = RTA_SPACE(sizeof(struct xfrm_user_migrate) * num_migrate); + len += NLMSG_SPACE(sizeof(struct xfrm_userpolicy_id)); +#ifdef CONFIG_XFRM_SUB_POLICY + len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type)); +#endif + skb = alloc_skb(len, GFP_ATOMIC); + if (skb == NULL) + return -ENOMEM; + + /* build migrate */ + if (build_migrate(skb, m, num_migrate, sel, dir, type) < 0) + BUG(); + + NETLINK_CB(skb).dst_group = XFRMNLGRP_MIGRATE; + return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_MIGRATE, + GFP_ATOMIC); +} +#else +static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, + struct xfrm_migrate *m, int num_migrate) +{ + return -ENOPROTOOPT; +} +#endif #define XMSGSIZE(type) NLMSG_LENGTH(sizeof(struct type)) @@ -1653,6 +1823,7 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { [XFRM_MSG_NEWAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), [XFRM_MSG_GETAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), [XFRM_MSG_REPORT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_report), + [XFRM_MSG_MIGRATE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id), }; #undef XMSGSIZE @@ -1679,6 +1850,7 @@ static struct xfrm_link { [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_flush_policy }, [XFRM_MSG_NEWAE - XFRM_MSG_BASE] = { .doit = xfrm_new_ae }, [XFRM_MSG_GETAE - XFRM_MSG_BASE] = { .doit = xfrm_get_ae }, + [XFRM_MSG_MIGRATE - XFRM_MSG_BASE] = { .doit = xfrm_do_migrate }, }; static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp) @@ -2285,6 +2457,7 @@ static struct xfrm_mgr netlink_mgr = { .compile_policy = xfrm_compile_policy, .notify_policy = xfrm_send_policy_notify, .report = xfrm_send_report, + .migrate = xfrm_send_migrate, }; static int __init xfrm_user_init(void) |