diff options
Diffstat (limited to 'net/ipv4')
40 files changed, 1013 insertions, 345 deletions
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index d07b0c1..7ed3e4a 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -444,7 +444,7 @@ static int ah_init_state(struct xfrm_state *x) } ahp->icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8; - ahp->icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8; + ahp->icv_trunc_len = x->aalg->alg_trunc_len/8; BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 7620382..e312661 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1468,6 +1468,7 @@ static struct devinet_sysctl_table { DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"), DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE, "accept_source_route"), + DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"), DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"), DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"), DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"), @@ -1605,7 +1606,7 @@ static __net_init int devinet_init_net(struct net *net) all = &ipv4_devconf; dflt = &ipv4_devconf_dflt; - if (net != &init_net) { + if (!net_eq(net, &init_net)) { all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL); if (all == NULL) goto err_alloc_all; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 12f7287..1948895 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -530,7 +530,7 @@ static int esp_init_authenc(struct xfrm_state *x) } err = crypto_aead_setauthsize( - aead, aalg_desc->uinfo.auth.icv_truncbits / 8); + aead, x->aalg->alg_trunc_len / 8); if (err) goto free_key; } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 6c1e56a..3323168 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -241,16 +241,17 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, .iif = oif }; struct fib_result res; - int no_addr, rpf; + int no_addr, rpf, accept_local; int ret; struct net *net; - no_addr = rpf = 0; + no_addr = rpf = accept_local = 0; rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); if (in_dev) { no_addr = in_dev->ifa_list == NULL; rpf = IN_DEV_RPFILTER(in_dev); + accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); } rcu_read_unlock(); @@ -260,8 +261,10 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, net = dev_net(dev); if (fib_lookup(net, &fl, &res)) goto last_resort; - if (res.type != RTN_UNICAST) - goto e_inval_res; + if (res.type != RTN_UNICAST) { + if (res.type != RTN_LOCAL || !accept_local) + goto e_inval_res; + } *spec_dst = FIB_RES_PREFSRC(res); fib_combine_itag(itag, &res); #ifdef CONFIG_IP_ROUTE_MULTIPATH @@ -959,9 +962,11 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo break; case NETDEV_CHANGEMTU: case NETDEV_CHANGE: - case NETDEV_UNREGISTER_PERNET: rt_cache_flush(dev_net(dev), 0); break; + case NETDEV_UNREGISTER_BATCH: + rt_cache_flush_batch(); + break; } return NOTIFY_DONE; } diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 835262c..ca2d07b 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -284,7 +284,7 @@ static int fib_default_rules_init(struct fib_rules_ops *ops) { int err; - err = fib_default_rule_add(ops, 0, RT_TABLE_LOCAL, FIB_RULE_PERMANENT); + err = fib_default_rule_add(ops, 0, RT_TABLE_LOCAL, 0); if (err < 0) return err; err = fib_default_rule_add(ops, 0x7FFE, RT_TABLE_MAIN, 0); @@ -301,13 +301,9 @@ int __net_init fib4_rules_init(struct net *net) int err; struct fib_rules_ops *ops; - ops = kmemdup(&fib4_rules_ops_template, sizeof(*ops), GFP_KERNEL); - if (ops == NULL) - return -ENOMEM; - INIT_LIST_HEAD(&ops->rules_list); - ops->fro_net = net; - - fib_rules_register(ops); + ops = fib_rules_register(&fib4_rules_ops_template, net); + if (IS_ERR(ops)) + return PTR_ERR(ops); err = fib_default_rules_init(ops); if (err < 0) @@ -318,12 +314,10 @@ int __net_init fib4_rules_init(struct net *net) fail: /* also cleans all rules already added */ fib_rules_unregister(ops); - kfree(ops); return err; } void __net_exit fib4_rules_exit(struct net *net) { fib_rules_unregister(net->ipv4.rules_ops); - kfree(net->ipv4.rules_ops); } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 9b096d6..ed19aa6 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -228,7 +228,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi) head = &fib_info_hash[hash]; hlist_for_each_entry(fi, node, head, fib_hash) { - if (fi->fib_net != nfi->fib_net) + if (!net_eq(fi->fib_net, nfi->fib_net)) continue; if (fi->fib_nhs != nfi->fib_nhs) continue; @@ -1047,7 +1047,7 @@ int fib_sync_down_addr(struct net *net, __be32 local) return 0; hlist_for_each_entry(fi, node, head, fib_lhash) { - if (fi->fib_net != net) + if (!net_eq(fi->fib_net, net)) continue; if (fi->fib_prefsrc == local) { fi->fib_flags |= RTNH_F_DEAD; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 6110c6d..76c0840 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1899,8 +1899,9 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct err = -EADDRNOTAVAIL; for (pmc=inet->mc_list; pmc; pmc=pmc->next) { - if (pmc->multi.imr_multiaddr.s_addr == imr.imr_multiaddr.s_addr - && pmc->multi.imr_ifindex == imr.imr_ifindex) + if ((pmc->multi.imr_multiaddr.s_addr == + imr.imr_multiaddr.s_addr) && + (pmc->multi.imr_ifindex == imr.imr_ifindex)) break; } if (!pmc) { /* must have a prior join */ diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 26fb50e..ee16475 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -112,7 +112,7 @@ again: hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) - if (ib_net(tb) == net && tb->port == rover) { + if (net_eq(ib_net(tb), net) && tb->port == rover) { if (tb->fastreuse > 0 && sk->sk_reuse && sk->sk_state != TCP_LISTEN && @@ -158,7 +158,7 @@ have_snum: hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) - if (ib_net(tb) == net && tb->port == snum) + if (net_eq(ib_net(tb), net) && tb->port == snum) goto tb_found; } tb = NULL; @@ -531,7 +531,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, &expire, &resend); if (!expire && (!resend || - !req->rsk_ops->rtx_syn_ack(parent, req) || + !req->rsk_ops->rtx_syn_ack(parent, req, NULL) || inet_rsk(req)->acked)) { unsigned long timeo; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 47ad7aa..21e5e32 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -286,6 +286,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, struct sock *sk2; const struct hlist_nulls_node *node; struct inet_timewait_sock *tw; + int twrefcnt = 0; spin_lock(lock); @@ -318,20 +319,23 @@ unique: sk->sk_hash = hash; WARN_ON(!sk_unhashed(sk)); __sk_nulls_add_node_rcu(sk, &head->chain); + if (tw) { + twrefcnt = inet_twsk_unhash(tw); + NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); + } spin_unlock(lock); + if (twrefcnt) + inet_twsk_put(tw); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); if (twp) { *twp = tw; - NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); } else if (tw) { /* Silly. Should hash-dance instead... */ inet_twsk_deschedule(tw, death_row); - NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); inet_twsk_put(tw); } - return 0; not_unique: @@ -454,7 +458,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, * unique enough. */ inet_bind_bucket_for_each(tb, node, &head->chain) { - if (ib_net(tb) == net && tb->port == port) { + if (net_eq(ib_net(tb), net) && + tb->port == port) { if (tb->fastreuse >= 0) goto next_port; WARN_ON(hlist_empty(&tb->owners)); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 1f5d508..0fdf45e 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -14,22 +14,33 @@ #include <net/inet_timewait_sock.h> #include <net/ip.h> + +/* + * unhash a timewait socket from established hash + * lock must be hold by caller + */ +int inet_twsk_unhash(struct inet_timewait_sock *tw) +{ + if (hlist_nulls_unhashed(&tw->tw_node)) + return 0; + + hlist_nulls_del_rcu(&tw->tw_node); + sk_nulls_node_init(&tw->tw_node); + return 1; +} + /* Must be called with locally disabled BHs. */ static void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo) { struct inet_bind_hashbucket *bhead; struct inet_bind_bucket *tb; + int refcnt; /* Unlink from established hashes. */ spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); spin_lock(lock); - if (hlist_nulls_unhashed(&tw->tw_node)) { - spin_unlock(lock); - return; - } - hlist_nulls_del_rcu(&tw->tw_node); - sk_nulls_node_init(&tw->tw_node); + refcnt = inet_twsk_unhash(tw); spin_unlock(lock); /* Disassociate with bind bucket. */ @@ -37,9 +48,12 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, hashinfo->bhash_size)]; spin_lock(&bhead->lock); tb = tw->tw_tb; - __hlist_del(&tw->tw_bind_node); - tw->tw_tb = NULL; - inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); + if (tb) { + __hlist_del(&tw->tw_bind_node); + tw->tw_tb = NULL; + inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); + refcnt++; + } spin_unlock(&bhead->lock); #ifdef SOCK_REFCNT_DEBUG if (atomic_read(&tw->tw_refcnt) != 1) { @@ -47,7 +61,10 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); } #endif - inet_twsk_put(tw); + while (refcnt) { + inet_twsk_put(tw); + refcnt--; + } } static noinline void inet_twsk_free(struct inet_timewait_sock *tw) @@ -101,13 +118,22 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, * Should be done before removing sk from established chain * because readers are lockless and search established first. */ - atomic_inc(&tw->tw_refcnt); inet_twsk_add_node_rcu(tw, &ehead->twchain); /* Step 3: Remove SK from established hash. */ if (__sk_nulls_del_node_init_rcu(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + /* + * Notes : + * - We initially set tw_refcnt to 0 in inet_twsk_alloc() + * - We add one reference for the bhash link + * - We add one reference for the ehash link + * - We want this refcnt update done before allowing other + * threads to find this tw in ehash chain. + */ + atomic_add(1 + 1 + 1, &tw->tw_refcnt); + spin_unlock(lock); } @@ -139,7 +165,12 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat tw->tw_transparent = inet->transparent; tw->tw_prot = sk->sk_prot_creator; twsk_net_set(tw, hold_net(sock_net(sk))); - atomic_set(&tw->tw_refcnt, 1); + /* + * Because we use RCU lookups, we should not set tw_refcnt + * to a non null value before everything is setup for this + * timewait socket. + */ + atomic_set(&tw->tw_refcnt, 0); inet_twsk_dead_node_init(tw); __module_get(tw->tw_prot->owner); } @@ -421,37 +452,46 @@ out: EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick); -void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, +void inet_twsk_purge(struct inet_hashinfo *hashinfo, struct inet_timewait_death_row *twdr, int family) { struct inet_timewait_sock *tw; struct sock *sk; struct hlist_nulls_node *node; - int h; + unsigned int slot; - local_bh_disable(); - for (h = 0; h <= hashinfo->ehash_mask; h++) { - struct inet_ehash_bucket *head = - inet_ehash_bucket(hashinfo, h); - spinlock_t *lock = inet_ehash_lockp(hashinfo, h); + for (slot = 0; slot <= hashinfo->ehash_mask; slot++) { + struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; +restart_rcu: + rcu_read_lock(); restart: - spin_lock(lock); - sk_nulls_for_each(sk, node, &head->twchain) { - + sk_nulls_for_each_rcu(sk, node, &head->twchain) { tw = inet_twsk(sk); - if (!net_eq(twsk_net(tw), net) || - tw->tw_family != family) + if ((tw->tw_family != family) || + atomic_read(&twsk_net(tw)->count)) + continue; + + if (unlikely(!atomic_inc_not_zero(&tw->tw_refcnt))) continue; - atomic_inc(&tw->tw_refcnt); - spin_unlock(lock); + if (unlikely((tw->tw_family != family) || + atomic_read(&twsk_net(tw)->count))) { + inet_twsk_put(tw); + goto restart; + } + + rcu_read_unlock(); inet_twsk_deschedule(tw, twdr); inet_twsk_put(tw); - - goto restart; + goto restart_rcu; } - spin_unlock(lock); + /* If the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != slot) + goto restart; + rcu_read_unlock(); } - local_bh_enable(); } EXPORT_SYMBOL_GPL(inet_twsk_purge); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index b007f8a..c473531 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -564,7 +564,7 @@ out_oversize: printk(KERN_INFO "Oversized IP packet from %pI4.\n", &qp->saddr); out_fail: - IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_REASMFAILS); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); return err; } @@ -658,7 +658,7 @@ static int ip4_frags_ns_ctl_register(struct net *net) struct ctl_table_header *hdr; table = ip4_frags_ns_ctl_table; - if (net != &init_net) { + if (!net_eq(net, &init_net)) { table = kmemdup(table, sizeof(ip4_frags_ns_ctl_table), GFP_KERNEL); if (table == NULL) goto err_alloc; @@ -676,7 +676,7 @@ static int ip4_frags_ns_ctl_register(struct net *net) return 0; err_reg: - if (net != &init_net) + if (!net_eq(net, &init_net)) kfree(table); err_alloc: return -ENOMEM; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index c5f6af5..f36ce15 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1309,17 +1309,8 @@ static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head) static int ipgre_init_net(struct net *net) { + struct ipgre_net *ign = net_generic(net, ipgre_net_id); int err; - struct ipgre_net *ign; - - err = -ENOMEM; - ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL); - if (ign == NULL) - goto err_alloc; - - err = net_assign_generic(net, ipgre_net_id, ign); - if (err < 0) - goto err_assign; ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0", ipgre_tunnel_setup); @@ -1340,10 +1331,6 @@ static int ipgre_init_net(struct net *net) err_reg_dev: free_netdev(ign->fb_tunnel_dev); err_alloc_dev: - /* nothing */ -err_assign: - kfree(ign); -err_alloc: return err; } @@ -1357,12 +1344,13 @@ static void ipgre_exit_net(struct net *net) ipgre_destroy_tunnels(ign, &list); unregister_netdevice_many(&list); rtnl_unlock(); - kfree(ign); } static struct pernet_operations ipgre_net_ops = { .init = ipgre_init_net, .exit = ipgre_exit_net, + .id = &ipgre_net_id, + .size = sizeof(struct ipgre_net), }; static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) @@ -1682,7 +1670,7 @@ static int __init ipgre_init(void) return -EAGAIN; } - err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops); + err = register_pernet_device(&ipgre_net_ops); if (err < 0) goto gen_device_failed; @@ -1700,7 +1688,7 @@ out: tap_ops_failed: rtnl_link_unregister(&ipgre_link_ops); rtnl_link_failed: - unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops); + unregister_pernet_device(&ipgre_net_ops); gen_device_failed: inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); goto out; @@ -1710,7 +1698,7 @@ static void __exit ipgre_fini(void) { rtnl_link_unregister(&ipgre_tap_ops); rtnl_link_unregister(&ipgre_link_ops); - unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops); + unregister_pernet_device(&ipgre_net_ops); if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) printk(KERN_INFO "ipgre close: can't remove protocol\n"); } diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index fdf51ba..c29de98 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -164,7 +164,7 @@ int ip_call_ra_chain(struct sk_buff *skb) if (sk && inet_sk(sk)->inet_num == protocol && (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dev->ifindex) && - sock_net(sk) == dev_net(dev)) { + net_eq(sock_net(sk), dev_net(dev))) { if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) { read_unlock(&ip_ra_lock); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index b78e615..e34013a 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -503,8 +503,8 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) if (skb->sk) { frag->sk = skb->sk; frag->destructor = sock_wfree; - truesizes += frag->truesize; } + truesizes += frag->truesize; } /* Everything is OK. Generate! */ diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 7242ffc..eda04fe 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -775,17 +775,8 @@ static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head) static int ipip_init_net(struct net *net) { + struct ipip_net *ipn = net_generic(net, ipip_net_id); int err; - struct ipip_net *ipn; - - err = -ENOMEM; - ipn = kzalloc(sizeof(struct ipip_net), GFP_KERNEL); - if (ipn == NULL) - goto err_alloc; - - err = net_assign_generic(net, ipip_net_id, ipn); - if (err < 0) - goto err_assign; ipn->tunnels[0] = ipn->tunnels_wc; ipn->tunnels[1] = ipn->tunnels_l; @@ -812,29 +803,26 @@ err_reg_dev: free_netdev(ipn->fb_tunnel_dev); err_alloc_dev: /* nothing */ -err_assign: - kfree(ipn); -err_alloc: return err; } static void ipip_exit_net(struct net *net) { - struct ipip_net *ipn; + struct ipip_net *ipn = net_generic(net, ipip_net_id); LIST_HEAD(list); - ipn = net_generic(net, ipip_net_id); rtnl_lock(); ipip_destroy_tunnels(ipn, &list); unregister_netdevice_queue(ipn->fb_tunnel_dev, &list); unregister_netdevice_many(&list); rtnl_unlock(); - kfree(ipn); } static struct pernet_operations ipip_net_ops = { .init = ipip_init_net, .exit = ipip_exit_net, + .id = &ipip_net_id, + .size = sizeof(struct ipip_net), }; static int __init ipip_init(void) @@ -848,7 +836,7 @@ static int __init ipip_init(void) return -EAGAIN; } - err = register_pernet_gen_device(&ipip_net_id, &ipip_net_ops); + err = register_pernet_device(&ipip_net_ops); if (err) xfrm4_tunnel_deregister(&ipip_handler, AF_INET); @@ -860,7 +848,7 @@ static void __exit ipip_fini(void) if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET)) printk(KERN_INFO "ipip close: can't deregister tunnel\n"); - unregister_pernet_gen_device(ipip_net_id, &ipip_net_ops); + unregister_pernet_device(&ipip_net_ops); } module_init(ipip_init); diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 1725dc0..f53cb8d 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -155,10 +155,10 @@ static int nf_ip_reroute(struct sk_buff *skb, if (entry->hook == NF_INET_LOCAL_OUT) { const struct iphdr *iph = ip_hdr(skb); - if (!(iph->tos == rt_info->tos - && skb->mark == rt_info->mark - && iph->daddr == rt_info->daddr - && iph->saddr == rt_info->saddr)) + if (!(iph->tos == rt_info->tos && + skb->mark == rt_info->mark && + iph->daddr == rt_info->daddr && + iph->saddr == rt_info->saddr)) return ip_route_me_harder(skb, RTN_UNSPEC); } return 0; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 27774c9..0663276 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -384,11 +384,11 @@ static int mark_source_chains(struct xt_table_info *newinfo, |= ((1 << hook) | (1 << NF_ARP_NUMHOOKS)); /* Unconditional return/END. */ - if ((e->target_offset == sizeof(struct arpt_entry) - && (strcmp(t->target.u.user.name, - ARPT_STANDARD_TARGET) == 0) - && t->verdict < 0 - && unconditional(&e->arp)) || visited) { + if ((e->target_offset == sizeof(struct arpt_entry) && + (strcmp(t->target.u.user.name, + ARPT_STANDARD_TARGET) == 0) && + t->verdict < 0 && unconditional(&e->arp)) || + visited) { unsigned int oldpos, size; if ((strcmp(t->target.u.user.name, @@ -427,8 +427,8 @@ static int mark_source_chains(struct xt_table_info *newinfo, int newpos = t->verdict; if (strcmp(t->target.u.user.name, - ARPT_STANDARD_TARGET) == 0 - && newpos >= 0) { + ARPT_STANDARD_TARGET) == 0 && + newpos >= 0) { if (newpos > newinfo->size - sizeof(struct arpt_entry)) { duprintf("mark_source_chains: " @@ -559,8 +559,8 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, { unsigned int h; - if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 - || (unsigned char *)e + sizeof(struct arpt_entry) >= limit) { + if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 || + (unsigned char *)e + sizeof(struct arpt_entry) >= limit) { duprintf("Bad offset %p\n", e); return -EINVAL; } @@ -1251,8 +1251,8 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, int ret, off, h; duprintf("check_compat_entry_size_and_hooks %p\n", e); - if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 - || (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit) { + if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 || + (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit) { duprintf("Bad offset %p, limit = %p\n", e, limit); return -EINVAL; } diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index c156db2..49ad447 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -497,10 +497,9 @@ ipq_rcv_nl_event(struct notifier_block *this, { struct netlink_notify *n = ptr; - if (event == NETLINK_URELEASE && - n->protocol == NETLINK_FIREWALL && n->pid) { + if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL) { write_lock_bh(&queue_lock); - if ((n->net == &init_net) && (n->pid == peer_pid)) + if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid)) __ipq_reset(); write_unlock_bh(&queue_lock); } @@ -622,7 +621,7 @@ cleanup_netlink_notifier: static void __exit ip_queue_fini(void) { nf_unregister_queue_handlers(&nfqh); - synchronize_net(); + ipq_flush(NULL, 0); #ifdef CONFIG_SYSCTL diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index cde755d..572330a 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -89,9 +89,9 @@ ip_packet_match(const struct iphdr *ip, #define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg))) if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr, - IPT_INV_SRCIP) - || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr, - IPT_INV_DSTIP)) { + IPT_INV_SRCIP) || + FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr, + IPT_INV_DSTIP)) { dprintf("Source or dest mismatch.\n"); dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n", @@ -122,8 +122,8 @@ ip_packet_match(const struct iphdr *ip, } /* Check specific protocol */ - if (ipinfo->proto - && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) { + if (ipinfo->proto && + FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) { dprintf("Packet protocol %hi does not match %hi.%s\n", ip->protocol, ipinfo->proto, ipinfo->invflags&IPT_INV_PROTO ? " (INV)":""); @@ -246,11 +246,11 @@ get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e, } else if (s == e) { (*rulenum)++; - if (s->target_offset == sizeof(struct ipt_entry) - && strcmp(t->target.u.kernel.target->name, - IPT_STANDARD_TARGET) == 0 - && t->verdict < 0 - && unconditional(&s->ip)) { + if (s->target_offset == sizeof(struct ipt_entry) && + strcmp(t->target.u.kernel.target->name, + IPT_STANDARD_TARGET) == 0 && + t->verdict < 0 && + unconditional(&s->ip)) { /* Tail of chains: STANDARD target (return/policy) */ *comment = *chainname == hookname ? comments[NF_IP_TRACE_COMMENT_POLICY] @@ -388,8 +388,8 @@ ipt_do_table(struct sk_buff *skb, back = get_entry(table_base, back->comefrom); continue; } - if (table_base + v != ipt_next_entry(e) - && !(e->ip.flags & IPT_F_GOTO)) { + if (table_base + v != ipt_next_entry(e) && + !(e->ip.flags & IPT_F_GOTO)) { /* Save old back ptr in next entry */ struct ipt_entry *next = ipt_next_entry(e); next->comefrom = (void *)back - table_base; @@ -473,11 +473,11 @@ mark_source_chains(struct xt_table_info *newinfo, e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); /* Unconditional return/END. */ - if ((e->target_offset == sizeof(struct ipt_entry) - && (strcmp(t->target.u.user.name, - IPT_STANDARD_TARGET) == 0) - && t->verdict < 0 - && unconditional(&e->ip)) || visited) { + if ((e->target_offset == sizeof(struct ipt_entry) && + (strcmp(t->target.u.user.name, + IPT_STANDARD_TARGET) == 0) && + t->verdict < 0 && unconditional(&e->ip)) || + visited) { unsigned int oldpos, size; if ((strcmp(t->target.u.user.name, @@ -524,8 +524,8 @@ mark_source_chains(struct xt_table_info *newinfo, int newpos = t->verdict; if (strcmp(t->target.u.user.name, - IPT_STANDARD_TARGET) == 0 - && newpos >= 0) { + IPT_STANDARD_TARGET) == 0 && + newpos >= 0) { if (newpos > newinfo->size - sizeof(struct ipt_entry)) { duprintf("mark_source_chains: " @@ -735,8 +735,8 @@ check_entry_size_and_hooks(struct ipt_entry *e, { unsigned int h; - if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 - || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) { + if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 || + (unsigned char *)e + sizeof(struct ipt_entry) >= limit) { duprintf("Bad offset %p\n", e); return -EINVAL; } @@ -1548,8 +1548,8 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, int ret, off, h; duprintf("check_compat_entry_size_and_hooks %p\n", e); - if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 - || (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) { + if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 || + (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) { duprintf("Bad offset %p, limit = %p\n", e, limit); return -EINVAL; } diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 2e4f98b..40ca2d2 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -303,9 +303,9 @@ clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par) /* special case: ICMP error handling. conntrack distinguishes between * error messages (RELATED) and information requests (see below) */ - if (ip_hdr(skb)->protocol == IPPROTO_ICMP - && (ctinfo == IP_CT_RELATED - || ctinfo == IP_CT_RELATED+IP_CT_IS_REPLY)) + if (ip_hdr(skb)->protocol == IPPROTO_ICMP && + (ctinfo == IP_CT_RELATED || + ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)) return XT_CONTINUE; /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO, @@ -362,8 +362,8 @@ static bool clusterip_tg_check(const struct xt_tgchk_param *par) return false; } - if (e->ip.dmsk.s_addr != htonl(0xffffffff) - || e->ip.dst.s_addr == 0) { + if (e->ip.dmsk.s_addr != htonl(0xffffffff) || + e->ip.dst.s_addr == 0) { printk(KERN_ERR "CLUSTERIP: Please specify destination IP\n"); return false; } @@ -495,14 +495,14 @@ arp_mangle(unsigned int hook, struct clusterip_config *c; /* we don't care about non-ethernet and non-ipv4 ARP */ - if (arp->ar_hrd != htons(ARPHRD_ETHER) - || arp->ar_pro != htons(ETH_P_IP) - || arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN) + if (arp->ar_hrd != htons(ARPHRD_ETHER) || + arp->ar_pro != htons(ETH_P_IP) || + arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN) return NF_ACCEPT; /* we only want to mangle arp requests and replies */ - if (arp->ar_op != htons(ARPOP_REPLY) - && arp->ar_op != htons(ARPOP_REQUEST)) + if (arp->ar_op != htons(ARPOP_REPLY) && + arp->ar_op != htons(ARPOP_REQUEST)) return NF_ACCEPT; payload = (void *)(arp+1); diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index f7e2fa0..549e206 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -85,8 +85,8 @@ ecn_tg(struct sk_buff *skb, const struct xt_target_param *par) if (!set_ect_ip(skb, einfo)) return NF_DROP; - if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR) - && ip_hdr(skb)->protocol == IPPROTO_TCP) + if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR) && + ip_hdr(skb)->protocol == IPPROTO_TCP) if (!set_ect_tcp(skb, einfo)) return NF_DROP; @@ -108,8 +108,8 @@ static bool ecn_tg_check(const struct xt_tgchk_param *par) einfo->ip_ect); return false; } - if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) - && (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) { + if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) && + (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) { printk(KERN_WARNING "ECN: cannot use TCP operations on a " "non-tcp rule\n"); return false; diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index acc44c6..ee128ef 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -74,8 +74,8 @@ static void dump_packet(const struct nf_loginfo *info, if (ntohs(ih->frag_off) & IP_OFFSET) printk("FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET); - if ((logflags & IPT_LOG_IPOPT) - && ih->ihl * 4 > sizeof(struct iphdr)) { + if ((logflags & IPT_LOG_IPOPT) && + ih->ihl * 4 > sizeof(struct iphdr)) { const unsigned char *op; unsigned char _opt[4 * 15 - sizeof(struct iphdr)]; unsigned int i, optsize; @@ -146,8 +146,8 @@ static void dump_packet(const struct nf_loginfo *info, /* Max length: 11 "URGP=65535 " */ printk("URGP=%u ", ntohs(th->urg_ptr)); - if ((logflags & IPT_LOG_TCPOPT) - && th->doff * 4 > sizeof(struct tcphdr)) { + if ((logflags & IPT_LOG_TCPOPT) && + th->doff * 4 > sizeof(struct tcphdr)) { unsigned char _opt[4 * 15 - sizeof(struct tcphdr)]; const unsigned char *op; unsigned int i, optsize; @@ -238,9 +238,9 @@ static void dump_packet(const struct nf_loginfo *info, printk("TYPE=%u CODE=%u ", ich->type, ich->code); /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - if (ich->type <= NR_ICMP_TYPES - && required_len[ich->type] - && skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) { + if (ich->type <= NR_ICMP_TYPES && + required_len[ich->type] && + skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) { printk("INCOMPLETE [%u bytes] ", skb->len - iphoff - ih->ihl*4); break; @@ -276,8 +276,8 @@ static void dump_packet(const struct nf_loginfo *info, } /* Max length: 10 "MTU=65535 " */ - if (ich->type == ICMP_DEST_UNREACH - && ich->code == ICMP_FRAG_NEEDED) + if (ich->type == ICMP_DEST_UNREACH && + ich->code == ICMP_FRAG_NEEDED) printk("MTU=%u ", ntohs(ich->un.frag.mtu)); } break; @@ -407,8 +407,8 @@ ipt_log_packet(u_int8_t pf, if (in && !out) { /* MAC logging for input chain only. */ printk("MAC="); - if (skb->dev && skb->dev->hard_header_len - && skb->mac_header != skb->network_header) { + if (skb->dev && skb->dev->hard_header_len && + skb->mac_header != skb->network_header) { int i; const unsigned char *p = skb_mac_header(skb); for (i = 0; i < skb->dev->hard_header_len; i++,p++) diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index dada086..650b540 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -59,8 +59,8 @@ masquerade_tg(struct sk_buff *skb, const struct xt_target_param *par) ct = nf_ct_get(skb, &ctinfo); nat = nfct_nat(ct); - NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED - || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); + NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || + ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); /* Source address is 0.0.0.0 - locally generated packet that is * probably not supposed to be masqueraded. diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index c93ae44..5113b8f 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -184,8 +184,8 @@ static bool reject_tg_check(const struct xt_tgchk_param *par) return false; } else if (rejinfo->with == IPT_TCP_RESET) { /* Must specify that it's a TCP packet */ - if (e->ip.proto != IPPROTO_TCP - || (e->ip.invflags & XT_INV_PROTO)) { + if (e->ip.proto != IPPROTO_TCP || + (e->ip.invflags & XT_INV_PROTO)) { printk("ipt_REJECT: TCP_RESET invalid for non-tcp\n"); return false; } diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index d32cc4b..399061c 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -226,9 +226,9 @@ static void ipt_ulog_packet(unsigned int hooknum, else *(pm->prefix) = '\0'; - if (in && in->hard_header_len > 0 - && skb->mac_header != skb->network_header - && in->hard_header_len <= ULOG_MAC_LEN) { + if (in && in->hard_header_len > 0 && + skb->mac_header != skb->network_header && + in->hard_header_len <= ULOG_MAC_LEN) { memcpy(pm->mac, skb_mac_header(skb), in->hard_header_len); pm->mac_len = in->hard_header_len; } else diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index 6289b64..2a1e56b 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c @@ -96,8 +96,8 @@ static bool ecn_mt_check(const struct xt_mtchk_param *par) if (info->invert & IPT_ECN_OP_MATCH_MASK) return false; - if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) - && ip->proto != IPPROTO_TCP) { + if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) && + ip->proto != IPPROTO_TCP) { printk(KERN_WARNING "ipt_ecn: can't match TCP bits in rule for" " non-tcp packets\n"); return false; diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 036047f..fae78c3 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -130,8 +130,8 @@ ipt_local_hook(unsigned int hook, u_int32_t mark; /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) - || ip_hdrlen(skb) < sizeof(struct iphdr)) + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; /* Save things which could affect route */ diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index 99eb76c6..3bd3d63 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -94,8 +94,8 @@ ipt_local_out_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { /* Somebody is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) - || ip_hdrlen(skb) < sizeof(struct iphdr)) + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; return ipt_do_table(skb, hook, in, out, dev_net(out)->ipv4.iptable_security); diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index d71ba76..e3dd936 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -54,8 +54,8 @@ static const u_int8_t invmap[] = { static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple, const struct nf_conntrack_tuple *orig) { - if (orig->dst.u.icmp.type >= sizeof(invmap) - || !invmap[orig->dst.u.icmp.type]) + if (orig->dst.u.icmp.type >= sizeof(invmap) || + !invmap[orig->dst.u.icmp.type]) return false; tuple->src.u.icmp.id = orig->src.u.icmp.id; @@ -101,8 +101,8 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb, [ICMP_ADDRESS] = 1 }; - if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) - || !valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) { + if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) || + !valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) { /* Can't create a new ICMP `conn' with this. */ pr_debug("icmp: can't create new conn with type %u\n", ct->tuplehash[0].tuple.dst.u.icmp.type); @@ -201,11 +201,11 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, } /* Need to track icmp error message? */ - if (icmph->type != ICMP_DEST_UNREACH - && icmph->type != ICMP_SOURCE_QUENCH - && icmph->type != ICMP_TIME_EXCEEDED - && icmph->type != ICMP_PARAMETERPROB - && icmph->type != ICMP_REDIRECT) + if (icmph->type != ICMP_DEST_UNREACH && + icmph->type != ICMP_SOURCE_QUENCH && + icmph->type != ICMP_TIME_EXCEEDED && + icmph->type != ICMP_PARAMETERPROB && + icmph->type != ICMP_REDIRECT) return NF_ACCEPT; return icmp_error_message(net, skb, ctinfo, hooknum); @@ -238,17 +238,17 @@ static const struct nla_policy icmp_nla_policy[CTA_PROTO_MAX+1] = { static int icmp_nlattr_to_tuple(struct nlattr *tb[], struct nf_conntrack_tuple *tuple) { - if (!tb[CTA_PROTO_ICMP_TYPE] - || !tb[CTA_PROTO_ICMP_CODE] - || !tb[CTA_PROTO_ICMP_ID]) + if (!tb[CTA_PROTO_ICMP_TYPE] || + !tb[CTA_PROTO_ICMP_CODE] || + !tb[CTA_PROTO_ICMP_ID]) return -EINVAL; tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMP_TYPE]); tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMP_CODE]); tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMP_ID]); - if (tuple->dst.u.icmp.type >= sizeof(invmap) - || !invmap[tuple->dst.u.icmp.type]) + if (tuple->dst.u.icmp.type >= sizeof(invmap) || + !invmap[tuple->dst.u.icmp.type]) return -EINVAL; return 0; diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index f9520fa..7f10a6b 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -41,18 +41,14 @@ adjust_tcp_sequence(u32 seq, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { - int dir; - struct nf_nat_seq *this_way, *other_way; + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); struct nf_conn_nat *nat = nfct_nat(ct); + struct nf_nat_seq *this_way = &nat->seq[dir]; - pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n", seq, seq); - - dir = CTINFO2DIR(ctinfo); - - this_way = &nat->seq[dir]; - other_way = &nat->seq[!dir]; + pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n", + seq, sizediff); - pr_debug("nf_nat_resize_packet: Seq_offset before: "); + pr_debug("adjust_tcp_sequence: Seq_offset before: "); DUMP_OFFSET(this_way); spin_lock_bh(&nf_nat_seqofs_lock); @@ -63,13 +59,13 @@ adjust_tcp_sequence(u32 seq, * retransmit */ if (this_way->offset_before == this_way->offset_after || before(this_way->correction_pos, seq)) { - this_way->correction_pos = seq; - this_way->offset_before = this_way->offset_after; - this_way->offset_after += sizediff; + this_way->correction_pos = seq; + this_way->offset_before = this_way->offset_after; + this_way->offset_after += sizediff; } spin_unlock_bh(&nf_nat_seqofs_lock); - pr_debug("nf_nat_resize_packet: Seq_offset after: "); + pr_debug("adjust_tcp_sequence: Seq_offset after: "); DUMP_OFFSET(this_way); } diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 5f41d01..5678e95 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -197,11 +197,11 @@ nf_nat_out(unsigned int hooknum, (ct = nf_ct_get(skb, &ctinfo)) != NULL) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - if (ct->tuplehash[dir].tuple.src.u3.ip != - ct->tuplehash[!dir].tuple.dst.u3.ip - || ct->tuplehash[dir].tuple.src.u.all != - ct->tuplehash[!dir].tuple.dst.u.all - ) + if ((ct->tuplehash[dir].tuple.src.u3.ip != + ct->tuplehash[!dir].tuple.dst.u3.ip) || + (ct->tuplehash[dir].tuple.src.u.all != + ct->tuplehash[!dir].tuple.dst.u.all) + ) return ip_xfrm_me_harder(skb) == 0 ? ret : NF_DROP; } #endif diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 7547944..90cdcfc 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -513,43 +513,42 @@ static const struct file_operations rt_cpu_seq_fops = { }; #ifdef CONFIG_NET_CLS_ROUTE -static int ip_rt_acct_read(char *buffer, char **start, off_t offset, - int length, int *eof, void *data) -{ - unsigned int i; - - if ((offset & 3) || (length & 3)) - return -EIO; - - if (offset >= sizeof(struct ip_rt_acct) * 256) { - *eof = 1; - return 0; - } - - if (offset + length >= sizeof(struct ip_rt_acct) * 256) { - length = sizeof(struct ip_rt_acct) * 256 - offset; - *eof = 1; +static int rt_acct_proc_show(struct seq_file *m, void *v) +{ + struct ip_rt_acct *dst, *src; + unsigned int i, j; + + dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL); + if (!dst) + return -ENOMEM; + + for_each_possible_cpu(i) { + src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i); + for (j = 0; j < 256; j++) { + dst[j].o_bytes += src[j].o_bytes; + dst[j].o_packets += src[j].o_packets; + dst[j].i_bytes += src[j].i_bytes; + dst[j].i_packets += src[j].i_packets; + } } - offset /= sizeof(u32); - - if (length > 0) { - u32 *dst = (u32 *) buffer; - - *start = buffer; - memset(dst, 0, length); - - for_each_possible_cpu(i) { - unsigned int j; - u32 *src; + seq_write(m, dst, 256 * sizeof(struct ip_rt_acct)); + kfree(dst); + return 0; +} - src = ((u32 *) per_cpu_ptr(ip_rt_acct, i)) + offset; - for (j = 0; j < length/4; j++) - dst[j] += src[j]; - } - } - return length; +static int rt_acct_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, rt_acct_proc_show, NULL); } + +static const struct file_operations rt_acct_proc_fops = { + .owner = THIS_MODULE, + .open = rt_acct_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; #endif static int __net_init ip_rt_do_proc_init(struct net *net) @@ -567,8 +566,7 @@ static int __net_init ip_rt_do_proc_init(struct net *net) goto err2; #ifdef CONFIG_NET_CLS_ROUTE - pde = create_proc_read_entry("rt_acct", 0, net->proc_net, - ip_rt_acct_read, NULL); + pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops); if (!pde) goto err3; #endif @@ -703,7 +701,7 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) { - return dev_net(rt1->u.dst.dev) == dev_net(rt2->u.dst.dev); + return net_eq(dev_net(rt1->u.dst.dev), dev_net(rt2->u.dst.dev)); } static inline int rt_is_expired(struct rtable *rth) @@ -902,6 +900,12 @@ void rt_cache_flush(struct net *net, int delay) rt_do_flush(!in_softirq()); } +/* Flush previous cache invalidated entries from the cache */ +void rt_cache_flush_batch(void) +{ + rt_do_flush(!in_softirq()); +} + /* * We change rt_genid and let gc do the cleanup */ @@ -3310,7 +3314,7 @@ static __net_init int sysctl_route_net_init(struct net *net) struct ctl_table *tbl; tbl = ipv4_route_flush_table; - if (net != &init_net) { + if (!net_eq(net, &init_net)) { tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL); if (tbl == NULL) goto err_dup; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 3146cc4..26399ad 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -253,6 +253,8 @@ EXPORT_SYMBOL(cookie_check_timestamp); struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct ip_options *opt) { + struct tcp_options_received tcp_opt; + u8 *hash_location; struct inet_request_sock *ireq; struct tcp_request_sock *treq; struct tcp_sock *tp = tcp_sk(sk); @@ -263,7 +265,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, int mss; struct rtable *rt; __u8 rcv_wscale; - struct tcp_options_received tcp_opt; if (!sysctl_tcp_syncookies || !th->ack) goto out; @@ -341,7 +342,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, 0, &rt->u.dst); + tcp_parse_options(skb, &tcp_opt, &hash_location, 0, &rt->u.dst); if (tcp_opt.saw_tstamp) cookie_check_timestamp(&tcp_opt); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 2dcf04d..13f7ab6 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -714,6 +714,14 @@ static struct ctl_table ipv4_table[] = { }, { .ctl_name = CTL_UNNUMBERED, + .procname = "tcp_cookie_size", + .data = &sysctl_tcp_cookie_size, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, .procname = "udp_mem", .data = &sysctl_udp_mem, .maxlen = sizeof(sysctl_udp_mem), @@ -818,7 +826,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) struct ctl_table *table; table = ipv4_net_table; - if (net != &init_net) { + if (!net_eq(net, &init_net)) { table = kmemdup(table, sizeof(ipv4_net_table), GFP_KERNEL); if (table == NULL) goto err_alloc; @@ -849,7 +857,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) return 0; err_reg: - if (net != &init_net) + if (!net_eq(net, &init_net)) kfree(table); err_alloc: return -ENOMEM; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 524f976..c8666b7 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -264,6 +264,7 @@ #include <linux/cache.h> #include <linux/err.h> #include <linux/crypto.h> +#include <linux/time.h> #include <net/icmp.h> #include <net/tcp.h> @@ -2059,6 +2060,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_cnt = 0; tp->bytes_acked = 0; + tp->window_clamp = 0; tcp_set_ca_state(sk, TCP_CA_Open); tcp_clear_retrans(tp); inet_csk_delack_init(sk); @@ -2083,8 +2085,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int val; int err = 0; - /* This is a string value all the others are int's */ - if (optname == TCP_CONGESTION) { + /* These are data/string values, all the others are ints */ + switch (optname) { + case TCP_CONGESTION: { char name[TCP_CA_NAME_MAX]; if (optlen < 1) @@ -2101,6 +2104,93 @@ static int do_tcp_setsockopt(struct sock *sk, int level, release_sock(sk); return err; } + case TCP_COOKIE_TRANSACTIONS: { + struct tcp_cookie_transactions ctd; + struct tcp_cookie_values *cvp = NULL; + + if (sizeof(ctd) > optlen) + return -EINVAL; + if (copy_from_user(&ctd, optval, sizeof(ctd))) + return -EFAULT; + + if (ctd.tcpct_used > sizeof(ctd.tcpct_value) || + ctd.tcpct_s_data_desired > TCP_MSS_DESIRED) + return -EINVAL; + + if (ctd.tcpct_cookie_desired == 0) { + /* default to global value */ + } else if ((0x1 & ctd.tcpct_cookie_desired) || + ctd.tcpct_cookie_desired > TCP_COOKIE_MAX || + ctd.tcpct_cookie_desired < TCP_COOKIE_MIN) { + return -EINVAL; + } + + if (TCP_COOKIE_OUT_NEVER & ctd.tcpct_flags) { + /* Supercedes all other values */ + lock_sock(sk); + if (tp->cookie_values != NULL) { + kref_put(&tp->cookie_values->kref, + tcp_cookie_values_release); + tp->cookie_values = NULL; + } + tp->rx_opt.cookie_in_always = 0; /* false */ + tp->rx_opt.cookie_out_never = 1; /* true */ + release_sock(sk); + return err; + } + + /* Allocate ancillary memory before locking. + */ + if (ctd.tcpct_used > 0 || + (tp->cookie_values == NULL && + (sysctl_tcp_cookie_size > 0 || + ctd.tcpct_cookie_desired > 0 || + ctd.tcpct_s_data_desired > 0))) { + cvp = kzalloc(sizeof(*cvp) + ctd.tcpct_used, + GFP_KERNEL); + if (cvp == NULL) + return -ENOMEM; + } + lock_sock(sk); + tp->rx_opt.cookie_in_always = + (TCP_COOKIE_IN_ALWAYS & ctd.tcpct_flags); + tp->rx_opt.cookie_out_never = 0; /* false */ + + if (tp->cookie_values != NULL) { + if (cvp != NULL) { + /* Changed values are recorded by a changed + * pointer, ensuring the cookie will differ, + * without separately hashing each value later. + */ + kref_put(&tp->cookie_values->kref, + tcp_cookie_values_release); + kref_init(&cvp->kref); + tp->cookie_values = cvp; + } else { + cvp = tp->cookie_values; + } + } + if (cvp != NULL) { + cvp->cookie_desired = ctd.tcpct_cookie_desired; + + if (ctd.tcpct_used > 0) { + memcpy(cvp->s_data_payload, ctd.tcpct_value, + ctd.tcpct_used); + cvp->s_data_desired = ctd.tcpct_used; + cvp->s_data_constant = 1; /* true */ + } else { + /* No constant payload data. */ + cvp->s_data_desired = ctd.tcpct_s_data_desired; + cvp->s_data_constant = 0; /* false */ + } + } + release_sock(sk); + return err; + } + default: + /* fallthru */ + break; + }; if (optlen < sizeof(int)) return -EINVAL; @@ -2425,6 +2515,47 @@ static int do_tcp_getsockopt(struct sock *sk, int level, if (copy_to_user(optval, icsk->icsk_ca_ops->name, len)) return -EFAULT; return 0; + + case TCP_COOKIE_TRANSACTIONS: { + struct tcp_cookie_transactions ctd; + struct tcp_cookie_values *cvp = tp->cookie_values; + + if (get_user(len, optlen)) + return -EFAULT; + if (len < sizeof(ctd)) + return -EINVAL; + + memset(&ctd, 0, sizeof(ctd)); + ctd.tcpct_flags = (tp->rx_opt.cookie_in_always ? + TCP_COOKIE_IN_ALWAYS : 0) + | (tp->rx_opt.cookie_out_never ? + TCP_COOKIE_OUT_NEVER : 0); + + if (cvp != NULL) { + ctd.tcpct_flags |= (cvp->s_data_in ? + TCP_S_DATA_IN : 0) + | (cvp->s_data_out ? + TCP_S_DATA_OUT : 0); + + ctd.tcpct_cookie_desired = cvp->cookie_desired; + ctd.tcpct_s_data_desired = cvp->s_data_desired; + + /* Cookie(s) saved, return as nonce */ + if (sizeof(ctd.tcpct_value) < cvp->cookie_pair_size) { + /* impossible? */ + return -EINVAL; + } + memcpy(&ctd.tcpct_value[0], &cvp->cookie_pair[0], + cvp->cookie_pair_size); + ctd.tcpct_used = cvp->cookie_pair_size; + } + + if (put_user(sizeof(ctd), optlen)) + return -EFAULT; + if (copy_to_user(optval, &ctd, sizeof(ctd))) + return -EFAULT; + return 0; + } default: return -ENOPROTOOPT; } @@ -2847,6 +2978,135 @@ EXPORT_SYMBOL(tcp_md5_hash_key); #endif +/** + * Each Responder maintains up to two secret values concurrently for + * efficient secret rollover. Each secret value has 4 states: + * + * Generating. (tcp_secret_generating != tcp_secret_primary) + * Generates new Responder-Cookies, but not yet used for primary + * verification. This is a short-term state, typically lasting only + * one round trip time (RTT). + * + * Primary. (tcp_secret_generating == tcp_secret_primary) + * Used both for generation and primary verification. + * + * Retiring. (tcp_secret_retiring != tcp_secret_secondary) + * Used for verification, until the first failure that can be + * verified by the newer Generating secret. At that time, this + * cookie's state is changed to Secondary, and the Generating + * cookie's state is changed to Primary. This is a short-term state, + * typically lasting only one round trip time (RTT). + * + * Secondary. (tcp_secret_retiring == tcp_secret_secondary) + * Used for secondary verification, after primary verification + * failures. This state lasts no more than twice the Maximum Segment + * Lifetime (2MSL). Then, the secret is discarded. + */ +struct tcp_cookie_secret { + /* The secret is divided into two parts. The digest part is the + * equivalent of previously hashing a secret and saving the state, + * and serves as an initialization vector (IV). The message part + * serves as the trailing secret. + */ + u32 secrets[COOKIE_WORKSPACE_WORDS]; + unsigned long expires; +}; + +#define TCP_SECRET_1MSL (HZ * TCP_PAWS_MSL) +#define TCP_SECRET_2MSL (HZ * TCP_PAWS_MSL * 2) +#define TCP_SECRET_LIFE (HZ * 600) + +static struct tcp_cookie_secret tcp_secret_one; +static struct tcp_cookie_secret tcp_secret_two; + +/* Essentially a circular list, without dynamic allocation. */ +static struct tcp_cookie_secret *tcp_secret_generating; +static struct tcp_cookie_secret *tcp_secret_primary; +static struct tcp_cookie_secret *tcp_secret_retiring; +static struct tcp_cookie_secret *tcp_secret_secondary; + +static DEFINE_SPINLOCK(tcp_secret_locker); + +/* Select a pseudo-random word in the cookie workspace. + */ +static inline u32 tcp_cookie_work(const u32 *ws, const int n) +{ + return ws[COOKIE_DIGEST_WORDS + ((COOKIE_MESSAGE_WORDS-1) & ws[n])]; +} + +/* Fill bakery[COOKIE_WORKSPACE_WORDS] with generator, updating as needed. + * Called in softirq context. + * Returns: 0 for success. + */ +int tcp_cookie_generator(u32 *bakery) +{ + unsigned long jiffy = jiffies; + + if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) { + spin_lock_bh(&tcp_secret_locker); + if (!time_after_eq(jiffy, tcp_secret_generating->expires)) { + /* refreshed by another */ + memcpy(bakery, + &tcp_secret_generating->secrets[0], + COOKIE_WORKSPACE_WORDS); + } else { + /* still needs refreshing */ + get_random_bytes(bakery, COOKIE_WORKSPACE_WORDS); + + /* The first time, paranoia assumes that the + * randomization function isn't as strong. But, + * this secret initialization is delayed until + * the last possible moment (packet arrival). + * Although that time is observable, it is + * unpredictably variable. Mash in the most + * volatile clock bits available, and expire the + * secret extra quickly. + */ + if (unlikely(tcp_secret_primary->expires == + tcp_secret_secondary->expires)) { + struct timespec tv; + + getnstimeofday(&tv); + bakery[COOKIE_DIGEST_WORDS+0] ^= + (u32)tv.tv_nsec; + + tcp_secret_secondary->expires = jiffy + + TCP_SECRET_1MSL + + (0x0f & tcp_cookie_work(bakery, 0)); + } else { + tcp_secret_secondary->expires = jiffy + + TCP_SECRET_LIFE + + (0xff & tcp_cookie_work(bakery, 1)); + tcp_secret_primary->expires = jiffy + + TCP_SECRET_2MSL + + (0x1f & tcp_cookie_work(bakery, 2)); + } + memcpy(&tcp_secret_secondary->secrets[0], + bakery, COOKIE_WORKSPACE_WORDS); + + rcu_assign_pointer(tcp_secret_generating, + tcp_secret_secondary); + rcu_assign_pointer(tcp_secret_retiring, + tcp_secret_primary); + /* + * Neither call_rcu() nor synchronize_rcu() needed. + * Retiring data is not freed. It is replaced after + * further (locked) pointer updates, and a quiet time + * (minimum 1MSL, maximum LIFE - 2MSL). + */ + } + spin_unlock_bh(&tcp_secret_locker); + } else { + rcu_read_lock_bh(); + memcpy(bakery, + &rcu_dereference(tcp_secret_generating)->secrets[0], + COOKIE_WORKSPACE_WORDS); + rcu_read_unlock_bh(); + } + return 0; +} +EXPORT_SYMBOL(tcp_cookie_generator); + void tcp_done(struct sock *sk) { if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) @@ -2881,6 +3141,7 @@ void __init tcp_init(void) struct sk_buff *skb = NULL; unsigned long nr_pages, limit; int order, i, max_share; + unsigned long jiffy = jiffies; BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); @@ -2974,6 +3235,15 @@ void __init tcp_init(void) tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size); tcp_register_congestion_control(&tcp_reno); + + memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets)); + memset(&tcp_secret_two.secrets[0], 0, sizeof(tcp_secret_two.secrets)); + tcp_secret_one.expires = jiffy; /* past due */ + tcp_secret_two.expires = jiffy; /* past due */ + tcp_secret_generating = &tcp_secret_one; + tcp_secret_primary = &tcp_secret_one; + tcp_secret_retiring = &tcp_secret_two; + tcp_secret_secondary = &tcp_secret_two; } EXPORT_SYMBOL(tcp_close); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index fcbcd4f..939edb3 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -27,7 +27,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, r->idiag_rqueue = sk->sk_ack_backlog; r->idiag_wqueue = sk->sk_max_ack_backlog; } else { - r->idiag_rqueue = tp->rcv_nxt - tp->copied_seq; + r->idiag_rqueue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); r->idiag_wqueue = tp->write_seq - tp->snd_una; } if (info != NULL) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index be166e0..57ae96a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3698,7 +3698,7 @@ old_ack: * the fast version below fails. */ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, - int estab, struct dst_entry *dst) + u8 **hvpp, int estab, struct dst_entry *dst) { unsigned char *ptr; struct tcphdr *th = tcp_hdr(skb); @@ -3785,7 +3785,30 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, */ break; #endif - } + case TCPOPT_COOKIE: + /* This option is variable length. + */ + switch (opsize) { + case TCPOLEN_COOKIE_BASE: + /* not yet implemented */ + break; + case TCPOLEN_COOKIE_PAIR: + /* not yet implemented */ + break; + case TCPOLEN_COOKIE_MIN+0: + case TCPOLEN_COOKIE_MIN+2: + case TCPOLEN_COOKIE_MIN+4: + case TCPOLEN_COOKIE_MIN+6: + case TCPOLEN_COOKIE_MAX: + /* 16-bit multiple */ + opt_rx->cookie_plus = opsize; + *hvpp = ptr; + default: + /* ignore option */ + break; + }; + break; + }; ptr += opsize-2; length -= opsize; @@ -3813,17 +3836,20 @@ static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) * If it is wrong it falls back on tcp_parse_options(). */ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, - struct tcp_sock *tp) + struct tcp_sock *tp, u8 **hvpp) { - if (th->doff == sizeof(struct tcphdr) >> 2) { + /* In the spirit of fast parsing, compare doff directly to constant + * values. Because equality is used, short doff can be ignored here. + */ + if (th->doff == (sizeof(*th) / 4)) { tp->rx_opt.saw_tstamp = 0; return 0; } else if (tp->rx_opt.tstamp_ok && - th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) { + th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) { if (tcp_parse_aligned_timestamp(tp, th)) return 1; } - tcp_parse_options(skb, &tp->rx_opt, 1, NULL); + tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL); return 1; } @@ -5077,10 +5103,12 @@ out: static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, struct tcphdr *th, int syn_inerr) { + u8 *hash_location; struct tcp_sock *tp = tcp_sk(sk); /* RFC1323: H1. Apply PAWS check first. */ - if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && + if (tcp_fast_parse_options(skb, th, tp, &hash_location) && + tp->rx_opt.saw_tstamp && tcp_paws_discard(sk, skb)) { if (!th->rst) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); @@ -5368,12 +5396,14 @@ discard: static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, struct tcphdr *th, unsigned len) { - struct tcp_sock *tp = tcp_sk(sk); + u8 *hash_location; struct inet_connection_sock *icsk = inet_csk(sk); - int saved_clamp = tp->rx_opt.mss_clamp; + struct tcp_sock *tp = tcp_sk(sk); struct dst_entry *dst = __sk_dst_get(sk); + struct tcp_cookie_values *cvp = tp->cookie_values; + int saved_clamp = tp->rx_opt.mss_clamp; - tcp_parse_options(skb, &tp->rx_opt, 0, dst); + tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, dst); if (th->ack) { /* rfc793: @@ -5470,6 +5500,31 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * Change state from SYN-SENT only after copied_seq * is initialized. */ tp->copied_seq = tp->rcv_nxt; + + if (cvp != NULL && + cvp->cookie_pair_size > 0 && + tp->rx_opt.cookie_plus > 0) { + int cookie_size = tp->rx_opt.cookie_plus + - TCPOLEN_COOKIE_BASE; + int cookie_pair_size = cookie_size + + cvp->cookie_desired; + + /* A cookie extension option was sent and returned. + * Note that each incoming SYNACK replaces the + * Responder cookie. The initial exchange is most + * fragile, as protection against spoofing relies + * entirely upon the sequence and timestamp (above). + * This replacement strategy allows the correct pair to + * pass through, while any others will be filtered via + * Responder verification later. + */ + if (sizeof(cvp->cookie_pair) >= cookie_pair_size) { + memcpy(&cvp->cookie_pair[cvp->cookie_desired], + hash_location, cookie_size); + cvp->cookie_pair_size = cookie_pair_size; + } + } + smp_mb(); tcp_set_state(sk, TCP_ESTABLISHED); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index df18ce0..29002ab 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -742,8 +742,9 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, * This still operates on a request_sock only, not on a big * socket. */ -static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req, - struct dst_entry *dst) +static int __tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, + struct request_sock *req, + struct request_values *rvp) { const struct inet_request_sock *ireq = inet_rsk(req); int err = -1; @@ -753,7 +754,7 @@ static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req, if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) return -1; - skb = tcp_make_synack(sk, dst, req); + skb = tcp_make_synack(sk, dst, req, rvp); if (skb) { struct tcphdr *th = tcp_hdr(skb); @@ -774,9 +775,10 @@ static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req, return err; } -static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req) +static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, + struct request_values *rvp) { - return __tcp_v4_send_synack(sk, req, NULL); + return __tcp_v4_send_synack(sk, NULL, req, rvp); } /* @@ -1211,13 +1213,16 @@ static struct timewait_sock_ops tcp_timewait_sock_ops = { int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { - struct inet_request_sock *ireq; + struct tcp_extend_values tmp_ext; struct tcp_options_received tmp_opt; + u8 *hash_location; struct request_sock *req; + struct inet_request_sock *ireq; + struct tcp_sock *tp = tcp_sk(sk); + struct dst_entry *dst = NULL; __be32 saddr = ip_hdr(skb)->saddr; __be32 daddr = ip_hdr(skb)->daddr; __u32 isn = TCP_SKB_CB(skb)->when; - struct dst_entry *dst = NULL; #ifdef CONFIG_SYN_COOKIES int want_cookie = 0; #else @@ -1269,15 +1274,49 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = TCP_MSS_DEFAULT; - tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss; + tmp_opt.user_mss = tp->rx_opt.user_mss; + tcp_parse_options(skb, &tmp_opt, &hash_location, 0, dst); + + if (tmp_opt.cookie_plus > 0 && + tmp_opt.saw_tstamp && + !tp->rx_opt.cookie_out_never && + (sysctl_tcp_cookie_size > 0 || + (tp->cookie_values != NULL && + tp->cookie_values->cookie_desired > 0))) { + u8 *c; + u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS]; + int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE; + + if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0) + goto drop_and_release; - tcp_parse_options(skb, &tmp_opt, 0, dst); + /* Secret recipe starts with IP addresses */ + *mess++ ^= daddr; + *mess++ ^= saddr; + + /* plus variable length Initiator Cookie */ + c = (u8 *)mess; + while (l-- > 0) + *c++ ^= *hash_location++; + +#ifdef CONFIG_SYN_COOKIES + want_cookie = 0; /* not our kind of cookie */ +#endif + tmp_ext.cookie_out_never = 0; /* false */ + tmp_ext.cookie_plus = tmp_opt.cookie_plus; + } else if (!tp->rx_opt.cookie_in_always) { + /* redundant indications, but ensure initialization. */ + tmp_ext.cookie_out_never = 1; /* true */ + tmp_ext.cookie_plus = 0; + } else { + goto drop_and_release; + } + tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always; if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; - tcp_openreq_init(req, &tmp_opt, skb); if (security_inet_conn_request(sk, skb, req)) @@ -1337,7 +1376,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) } tcp_rsk(req)->snt_isn = isn; - if (__tcp_v4_send_synack(sk, req, dst) || want_cookie) + if (__tcp_v4_send_synack(sk, dst, req, + (struct request_values *)&tmp_ext) || + want_cookie) goto drop_and_free; inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); @@ -1831,6 +1872,19 @@ static int tcp_v4_init_sock(struct sock *sk) tp->af_specific = &tcp_sock_ipv4_specific; #endif + /* TCP Cookie Transactions */ + if (sysctl_tcp_cookie_size > 0) { + /* Default, cookies without s_data_payload. */ + tp->cookie_values = + kzalloc(sizeof(*tp->cookie_values), + sk->sk_allocation); + if (tp->cookie_values != NULL) + kref_init(&tp->cookie_values->kref); + } + /* Presumed zeroed, in order of appearance: + * cookie_in_always, cookie_out_never, + * s_data_constant, s_data_in, s_data_out + */ sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1]; @@ -1884,6 +1938,13 @@ void tcp_v4_destroy_sock(struct sock *sk) sk->sk_sndmsg_page = NULL; } + /* TCP Cookie Transactions */ + if (tp->cookie_values != NULL) { + kref_put(&tp->cookie_values->kref, + tcp_cookie_values_release); + tp->cookie_values = NULL; + } + percpu_counter_dec(&tcp_sockets_allocated); } @@ -2257,6 +2318,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) __be32 src = inet->inet_rcv_saddr; __u16 destp = ntohs(inet->inet_dport); __u16 srcp = ntohs(inet->inet_sport); + int rx_queue; if (icsk->icsk_pending == ICSK_TIME_RETRANS) { timer_active = 1; @@ -2272,12 +2334,19 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) timer_expires = jiffies; } + if (sk->sk_state == TCP_LISTEN) + rx_queue = sk->sk_ack_backlog; + else + /* + * because we dont lock socket, we might find a transient negative value + */ + rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); + seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n", i, src, srcp, dest, destp, sk->sk_state, tp->write_seq - tp->snd_una, - sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog : - (tp->rcv_nxt - tp->copied_seq), + rx_queue, timer_active, jiffies_to_clock_t(timer_expires - jiffies), icsk->icsk_retransmits, @@ -2468,12 +2537,17 @@ static int __net_init tcp_sk_init(struct net *net) static void __net_exit tcp_sk_exit(struct net *net) { inet_ctl_sock_destroy(net->ipv4.tcp_sock); - inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET); +} + +static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) +{ + inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET); } static struct pernet_operations __net_initdata tcp_sk_ops = { - .init = tcp_sk_init, - .exit = tcp_sk_exit, + .init = tcp_sk_init, + .exit = tcp_sk_exit, + .exit_batch = tcp_sk_exit_batch, }; void __init tcp_v4_init(void) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index ab32c18..87accec 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -90,13 +90,14 @@ enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, const struct tcphdr *th) { - struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); struct tcp_options_received tmp_opt; + u8 *hash_location; + struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); int paws_reject = 0; if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { tmp_opt.tstamp_ok = 1; - tcp_parse_options(skb, &tmp_opt, 1, NULL); + tcp_parse_options(skb, &tmp_opt, &hash_location, 1, NULL); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = tcptw->tw_ts_recent; @@ -383,14 +384,43 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, const struct inet_request_sock *ireq = inet_rsk(req); struct tcp_request_sock *treq = tcp_rsk(req); struct inet_connection_sock *newicsk = inet_csk(newsk); - struct tcp_sock *newtp; + struct tcp_sock *newtp = tcp_sk(newsk); + struct tcp_sock *oldtp = tcp_sk(sk); + struct tcp_cookie_values *oldcvp = oldtp->cookie_values; + + /* TCP Cookie Transactions require space for the cookie pair, + * as it differs for each connection. There is no need to + * copy any s_data_payload stored at the original socket. + * Failure will prevent resuming the connection. + * + * Presumed copied, in order of appearance: + * cookie_in_always, cookie_out_never + */ + if (oldcvp != NULL) { + struct tcp_cookie_values *newcvp = + kzalloc(sizeof(*newtp->cookie_values), + GFP_ATOMIC); + + if (newcvp != NULL) { + kref_init(&newcvp->kref); + newcvp->cookie_desired = + oldcvp->cookie_desired; + newtp->cookie_values = newcvp; + } else { + /* Not Yet Implemented */ + newtp->cookie_values = NULL; + } + } /* Now setup tcp_sock */ - newtp = tcp_sk(newsk); newtp->pred_flags = 0; - newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1; - newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = treq->snt_isn + 1; - newtp->snd_up = treq->snt_isn + 1; + + newtp->rcv_wup = newtp->copied_seq = + newtp->rcv_nxt = treq->rcv_isn + 1; + + newtp->snd_sml = newtp->snd_una = + newtp->snd_nxt = newtp->snd_up = + treq->snt_isn + 1 + tcp_s_data_size(oldtp); tcp_prequeue_init(newtp); @@ -423,8 +453,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, tcp_set_ca_state(newsk, TCP_CA_Open); tcp_init_xmit_timers(newsk); skb_queue_head_init(&newtp->out_of_order_queue); - newtp->write_seq = treq->snt_isn + 1; - newtp->pushed_seq = newtp->write_seq; + newtp->write_seq = newtp->pushed_seq = + treq->snt_isn + 1 + tcp_s_data_size(oldtp); newtp->rx_opt.saw_tstamp = 0; @@ -489,15 +519,16 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct request_sock **prev) { + struct tcp_options_received tmp_opt; + u8 *hash_location; + struct sock *child; const struct tcphdr *th = tcp_hdr(skb); __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); int paws_reject = 0; - struct tcp_options_received tmp_opt; - struct sock *child; - if ((th->doff > (sizeof(struct tcphdr)>>2)) && (req->ts_recent)) { + if ((th->doff > (sizeof(*th) >> 2)) && (req->ts_recent)) { tmp_opt.tstamp_ok = 1; - tcp_parse_options(skb, &tmp_opt, 1, NULL); + tcp_parse_options(skb, &tmp_opt, &hash_location, 1, NULL); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = req->ts_recent; @@ -531,7 +562,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * Enforce "SYN-ACK" according to figure 8, figure 6 * of RFC793, fixed by RFC1122. */ - req->rsk_ops->rtx_syn_ack(sk, req); + req->rsk_ops->rtx_syn_ack(sk, req, NULL); return NULL; } @@ -590,7 +621,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * Invalid ACK: reset will be sent by listening socket */ if ((flg & TCP_FLAG_ACK) && - (TCP_SKB_CB(skb)->ack_seq != tcp_rsk(req)->snt_isn + 1)) + (TCP_SKB_CB(skb)->ack_seq != + tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk)))) return sk; /* Also, it would be not so bad idea to check rcv_tsecr, which diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 875bc6d..93316a9 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -59,6 +59,10 @@ int sysctl_tcp_base_mss __read_mostly = 512; /* By default, RFC2861 behavior. */ int sysctl_tcp_slow_start_after_idle __read_mostly = 1; +int sysctl_tcp_cookie_size __read_mostly = 0; /* TCP_COOKIE_MAX */ +EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size); + + /* Account for new data that has been sent to the network. */ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) { @@ -362,15 +366,45 @@ static inline int tcp_urg_mode(const struct tcp_sock *tp) #define OPTION_TS (1 << 1) #define OPTION_MD5 (1 << 2) #define OPTION_WSCALE (1 << 3) +#define OPTION_COOKIE_EXTENSION (1 << 4) struct tcp_out_options { u8 options; /* bit field of OPTION_* */ u8 ws; /* window scale, 0 to disable */ u8 num_sack_blocks; /* number of SACK blocks to include */ + u8 hash_size; /* bytes in hash_location */ u16 mss; /* 0 to disable */ __u32 tsval, tsecr; /* need to include OPTION_TS */ + __u8 *hash_location; /* temporary pointer, overloaded */ }; +/* The sysctl int routines are generic, so check consistency here. + */ +static u8 tcp_cookie_size_check(u8 desired) +{ + if (desired > 0) { + /* previously specified */ + return desired; + } + if (sysctl_tcp_cookie_size <= 0) { + /* no default specified */ + return 0; + } + if (sysctl_tcp_cookie_size <= TCP_COOKIE_MIN) { + /* value too small, specify minimum */ + return TCP_COOKIE_MIN; + } + if (sysctl_tcp_cookie_size >= TCP_COOKIE_MAX) { + /* value too large, specify maximum */ + return TCP_COOKIE_MAX; + } + if (0x1 & sysctl_tcp_cookie_size) { + /* 8-bit multiple, illegal, fix it */ + return (u8)(sysctl_tcp_cookie_size + 0x1); + } + return (u8)sysctl_tcp_cookie_size; +} + /* Write previously computed TCP options to the packet. * * Beware: Something in the Internet is very sensitive to the ordering of @@ -385,17 +419,34 @@ struct tcp_out_options { * (but it may well be that other scenarios fail similarly). */ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, - const struct tcp_out_options *opts, - __u8 **md5_hash) { - if (unlikely(OPTION_MD5 & opts->options)) { - *ptr++ = htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_MD5SIG << 8) | - TCPOLEN_MD5SIG); - *md5_hash = (__u8 *)ptr; + struct tcp_out_options *opts) +{ + u8 options = opts->options; /* mungable copy */ + + /* Having both authentication and cookies for security is redundant, + * and there's certainly not enough room. Instead, the cookie-less + * extension variant is proposed. + * + * Consider the pessimal case with authentication. The options + * could look like: + * COOKIE|MD5(20) + MSS(4) + SACK|TS(12) + WSCALE(4) == 40 + */ + if (unlikely(OPTION_MD5 & options)) { + if (unlikely(OPTION_COOKIE_EXTENSION & options)) { + *ptr++ = htonl((TCPOPT_COOKIE << 24) | + (TCPOLEN_COOKIE_BASE << 16) | + (TCPOPT_MD5SIG << 8) | + TCPOLEN_MD5SIG); + } else { + *ptr++ = htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_MD5SIG << 8) | + TCPOLEN_MD5SIG); + } + options &= ~OPTION_COOKIE_EXTENSION; + /* overload cookie hash location */ + opts->hash_location = (__u8 *)ptr; ptr += 4; - } else { - *md5_hash = NULL; } if (unlikely(opts->mss)) { @@ -404,12 +455,13 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, opts->mss); } - if (likely(OPTION_TS & opts->options)) { - if (unlikely(OPTION_SACK_ADVERTISE & opts->options)) { + if (likely(OPTION_TS & options)) { + if (unlikely(OPTION_SACK_ADVERTISE & options)) { *ptr++ = htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); + options &= ~OPTION_SACK_ADVERTISE; } else { *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | @@ -420,15 +472,52 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, *ptr++ = htonl(opts->tsecr); } - if (unlikely(OPTION_SACK_ADVERTISE & opts->options && - !(OPTION_TS & opts->options))) { + /* Specification requires after timestamp, so do it now. + * + * Consider the pessimal case without authentication. The options + * could look like: + * MSS(4) + SACK|TS(12) + COOKIE(20) + WSCALE(4) == 40 + */ + if (unlikely(OPTION_COOKIE_EXTENSION & options)) { + __u8 *cookie_copy = opts->hash_location; + u8 cookie_size = opts->hash_size; + + /* 8-bit multiple handled in tcp_cookie_size_check() above, + * and elsewhere. + */ + if (0x2 & cookie_size) { + __u8 *p = (__u8 *)ptr; + + /* 16-bit multiple */ + *p++ = TCPOPT_COOKIE; + *p++ = TCPOLEN_COOKIE_BASE + cookie_size; + *p++ = *cookie_copy++; + *p++ = *cookie_copy++; + ptr++; + cookie_size -= 2; + } else { + /* 32-bit multiple */ + *ptr++ = htonl(((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_COOKIE << 8) | + TCPOLEN_COOKIE_BASE) + + cookie_size); + } + + if (cookie_size > 0) { + memcpy(ptr, cookie_copy, cookie_size); + ptr += (cookie_size / 4); + } + } + + if (unlikely(OPTION_SACK_ADVERTISE & options)) { *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM); } - if (unlikely(OPTION_WSCALE & opts->options)) { + if (unlikely(OPTION_WSCALE & options)) { *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | @@ -463,14 +552,18 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, struct tcp_out_options *opts, struct tcp_md5sig_key **md5) { struct tcp_sock *tp = tcp_sk(sk); - unsigned size = 0; + struct tcp_cookie_values *cvp = tp->cookie_values; struct dst_entry *dst = __sk_dst_get(sk); + unsigned remaining = MAX_TCP_OPTION_SPACE; + u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ? + tcp_cookie_size_check(cvp->cookie_desired) : + 0; #ifdef CONFIG_TCP_MD5SIG *md5 = tp->af_specific->md5_lookup(sk, sk); if (*md5) { opts->options |= OPTION_MD5; - size += TCPOLEN_MD5SIG_ALIGNED; + remaining -= TCPOLEN_MD5SIG_ALIGNED; } #else *md5 = NULL; @@ -486,7 +579,7 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, * SACKs don't matter, we never delay an ACK when we have any of those * going out. */ opts->mss = tcp_advertise_mss(sk); - size += TCPOLEN_MSS_ALIGNED; + remaining -= TCPOLEN_MSS_ALIGNED; if (likely(sysctl_tcp_timestamps && !dst_feature(dst, RTAX_FEATURE_NO_TSTAMP) && @@ -494,22 +587,68 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, opts->options |= OPTION_TS; opts->tsval = TCP_SKB_CB(skb)->when; opts->tsecr = tp->rx_opt.ts_recent; - size += TCPOLEN_TSTAMP_ALIGNED; + remaining -= TCPOLEN_TSTAMP_ALIGNED; } if (likely(sysctl_tcp_window_scaling && !dst_feature(dst, RTAX_FEATURE_NO_WSCALE))) { opts->ws = tp->rx_opt.rcv_wscale; opts->options |= OPTION_WSCALE; - size += TCPOLEN_WSCALE_ALIGNED; + remaining -= TCPOLEN_WSCALE_ALIGNED; } if (likely(sysctl_tcp_sack && !dst_feature(dst, RTAX_FEATURE_NO_SACK))) { opts->options |= OPTION_SACK_ADVERTISE; if (unlikely(!(OPTION_TS & opts->options))) - size += TCPOLEN_SACKPERM_ALIGNED; + remaining -= TCPOLEN_SACKPERM_ALIGNED; } - return size; + /* Note that timestamps are required by the specification. + * + * Odd numbers of bytes are prohibited by the specification, ensuring + * that the cookie is 16-bit aligned, and the resulting cookie pair is + * 32-bit aligned. + */ + if (*md5 == NULL && + (OPTION_TS & opts->options) && + cookie_size > 0) { + int need = TCPOLEN_COOKIE_BASE + cookie_size; + + if (0x2 & need) { + /* 32-bit multiple */ + need += 2; /* NOPs */ + + if (need > remaining) { + /* try shrinking cookie to fit */ + cookie_size -= 2; + need -= 4; + } + } + while (need > remaining && TCP_COOKIE_MIN <= cookie_size) { + cookie_size -= 4; + need -= 4; + } + if (TCP_COOKIE_MIN <= cookie_size) { + opts->options |= OPTION_COOKIE_EXTENSION; + opts->hash_location = (__u8 *)&cvp->cookie_pair[0]; + opts->hash_size = cookie_size; + + /* Remember for future incarnations. */ + cvp->cookie_desired = cookie_size; + + if (cvp->cookie_desired != cvp->cookie_pair_size) { + /* Currently use random bytes as a nonce, + * assuming these are completely unpredictable + * by hostile users of the same system. + */ + get_random_bytes(&cvp->cookie_pair[0], + cookie_size); + cvp->cookie_pair_size = cookie_size; + } + + remaining -= need; + } + } + return MAX_TCP_OPTION_SPACE - remaining; } /* Set up TCP options for SYN-ACKs. */ @@ -517,48 +656,77 @@ static unsigned tcp_synack_options(struct sock *sk, struct request_sock *req, unsigned mss, struct sk_buff *skb, struct tcp_out_options *opts, - struct tcp_md5sig_key **md5) { - unsigned size = 0; + struct tcp_md5sig_key **md5, + struct tcp_extend_values *xvp) +{ struct inet_request_sock *ireq = inet_rsk(req); - char doing_ts; + unsigned remaining = MAX_TCP_OPTION_SPACE; + u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ? + xvp->cookie_plus : + 0; + bool doing_ts = ireq->tstamp_ok; #ifdef CONFIG_TCP_MD5SIG *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); if (*md5) { opts->options |= OPTION_MD5; - size += TCPOLEN_MD5SIG_ALIGNED; + remaining -= TCPOLEN_MD5SIG_ALIGNED; + + /* We can't fit any SACK blocks in a packet with MD5 + TS + * options. There was discussion about disabling SACK + * rather than TS in order to fit in better with old, + * buggy kernels, but that was deemed to be unnecessary. + */ + doing_ts &= !ireq->sack_ok; } #else *md5 = NULL; #endif - /* we can't fit any SACK blocks in a packet with MD5 + TS - options. There was discussion about disabling SACK rather than TS in - order to fit in better with old, buggy kernels, but that was deemed - to be unnecessary. */ - doing_ts = ireq->tstamp_ok && !(*md5 && ireq->sack_ok); - + /* We always send an MSS option. */ opts->mss = mss; - size += TCPOLEN_MSS_ALIGNED; + remaining -= TCPOLEN_MSS_ALIGNED; if (likely(ireq->wscale_ok)) { opts->ws = ireq->rcv_wscale; opts->options |= OPTION_WSCALE; - size += TCPOLEN_WSCALE_ALIGNED; + remaining -= TCPOLEN_WSCALE_ALIGNED; } if (likely(doing_ts)) { opts->options |= OPTION_TS; opts->tsval = TCP_SKB_CB(skb)->when; opts->tsecr = req->ts_recent; - size += TCPOLEN_TSTAMP_ALIGNED; + remaining -= TCPOLEN_TSTAMP_ALIGNED; } if (likely(ireq->sack_ok)) { opts->options |= OPTION_SACK_ADVERTISE; if (unlikely(!doing_ts)) - size += TCPOLEN_SACKPERM_ALIGNED; + remaining -= TCPOLEN_SACKPERM_ALIGNED; } - return size; + /* Similar rationale to tcp_syn_options() applies here, too. + * If the <SYN> options fit, the same options should fit now! + */ + if (*md5 == NULL && + doing_ts && + cookie_plus > TCPOLEN_COOKIE_BASE) { + int need = cookie_plus; /* has TCPOLEN_COOKIE_BASE */ + + if (0x2 & need) { + /* 32-bit multiple */ + need += 2; /* NOPs */ + } + if (need <= remaining) { + opts->options |= OPTION_COOKIE_EXTENSION; + opts->hash_size = cookie_plus - TCPOLEN_COOKIE_BASE; + remaining -= need; + } else { + /* There's no error return, so flag it. */ + xvp->cookie_out_never = 1; /* true */ + opts->hash_size = 0; + } + } + return MAX_TCP_OPTION_SPACE - remaining; } /* Compute TCP options for ESTABLISHED sockets. This is not the @@ -624,7 +792,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, struct tcp_out_options opts; unsigned tcp_options_size, tcp_header_size; struct tcp_md5sig_key *md5; - __u8 *md5_hash_location; struct tcphdr *th; int err; @@ -695,7 +862,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, } } - tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); + tcp_options_write((__be32 *)(th + 1), tp, &opts); if (likely((tcb->flags & TCPCB_FLAG_SYN) == 0)) TCP_ECN_send(sk, skb, tcp_header_size); @@ -703,7 +870,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, /* Calculate the MD5 hash, as we have all we need now */ if (md5) { sk->sk_route_caps &= ~NETIF_F_GSO_MASK; - tp->af_specific->calc_md5_hash(md5_hash_location, + tp->af_specific->calc_md5_hash(opts.hash_location, md5, sk, NULL, skb); } #endif @@ -2224,16 +2391,17 @@ int tcp_send_synack(struct sock *sk) /* Prepare a SYN-ACK. */ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, - struct request_sock *req) + struct request_sock *req, + struct request_values *rvp) { + struct tcp_out_options opts; + struct tcp_extend_values *xvp = tcp_xv(rvp); struct inet_request_sock *ireq = inet_rsk(req); struct tcp_sock *tp = tcp_sk(sk); struct tcphdr *th; - int tcp_header_size; - struct tcp_out_options opts; struct sk_buff *skb; struct tcp_md5sig_key *md5; - __u8 *md5_hash_location; + int tcp_header_size; int mss; skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC); @@ -2271,8 +2439,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, #endif TCP_SKB_CB(skb)->when = tcp_time_stamp; tcp_header_size = tcp_synack_options(sk, req, mss, - skb, &opts, &md5) + - sizeof(struct tcphdr); + skb, &opts, &md5, xvp) + + sizeof(*th); skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); @@ -2289,19 +2457,58 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, */ tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, TCPCB_FLAG_SYN | TCPCB_FLAG_ACK); + + if (OPTION_COOKIE_EXTENSION & opts.options) { + const struct tcp_cookie_values *cvp = tp->cookie_values; + + if (cvp != NULL && + cvp->s_data_constant && + cvp->s_data_desired > 0) { + u8 *buf = skb_put(skb, cvp->s_data_desired); + + /* copy data directly from the listening socket. */ + memcpy(buf, cvp->s_data_payload, cvp->s_data_desired); + TCP_SKB_CB(skb)->end_seq += cvp->s_data_desired; + } + + if (opts.hash_size > 0) { + __u32 workspace[SHA_WORKSPACE_WORDS]; + u32 *mess = &xvp->cookie_bakery[COOKIE_DIGEST_WORDS]; + u32 *tail = &mess[COOKIE_MESSAGE_WORDS-1]; + + /* Secret recipe depends on the Timestamp, (future) + * Sequence and Acknowledgment Numbers, Initiator + * Cookie, and others handled by IP variant caller. + */ + *tail-- ^= opts.tsval; + *tail-- ^= tcp_rsk(req)->rcv_isn + 1; + *tail-- ^= TCP_SKB_CB(skb)->seq + 1; + + /* recommended */ + *tail-- ^= ((th->dest << 16) | th->source); + *tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */ + + sha_transform((__u32 *)&xvp->cookie_bakery[0], + (char *)mess, + &workspace[0]); + opts.hash_location = + (__u8 *)&xvp->cookie_bakery[0]; + } + } + th->seq = htonl(TCP_SKB_CB(skb)->seq); th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1); /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ th->window = htons(min(req->rcv_wnd, 65535U)); - tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); + tcp_options_write((__be32 *)(th + 1), tp, &opts); th->doff = (tcp_header_size >> 2); TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); #ifdef CONFIG_TCP_MD5SIG /* Okay, we have all we need - do the md5 hash if needed */ if (md5) { - tcp_rsk(req)->af_specific->calc_md5_hash(md5_hash_location, + tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location, md5, NULL, req, skb); } #endif |