diff options
Diffstat (limited to 'net/ipv4/inet_timewait_sock.c')
-rw-r--r-- | net/ipv4/inet_timewait_sock.c | 100 |
1 files changed, 70 insertions, 30 deletions
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 1f5d508..0fdf45e 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -14,22 +14,33 @@ #include <net/inet_timewait_sock.h> #include <net/ip.h> + +/* + * unhash a timewait socket from established hash + * lock must be hold by caller + */ +int inet_twsk_unhash(struct inet_timewait_sock *tw) +{ + if (hlist_nulls_unhashed(&tw->tw_node)) + return 0; + + hlist_nulls_del_rcu(&tw->tw_node); + sk_nulls_node_init(&tw->tw_node); + return 1; +} + /* Must be called with locally disabled BHs. */ static void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo) { struct inet_bind_hashbucket *bhead; struct inet_bind_bucket *tb; + int refcnt; /* Unlink from established hashes. */ spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); spin_lock(lock); - if (hlist_nulls_unhashed(&tw->tw_node)) { - spin_unlock(lock); - return; - } - hlist_nulls_del_rcu(&tw->tw_node); - sk_nulls_node_init(&tw->tw_node); + refcnt = inet_twsk_unhash(tw); spin_unlock(lock); /* Disassociate with bind bucket. */ @@ -37,9 +48,12 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, hashinfo->bhash_size)]; spin_lock(&bhead->lock); tb = tw->tw_tb; - __hlist_del(&tw->tw_bind_node); - tw->tw_tb = NULL; - inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); + if (tb) { + __hlist_del(&tw->tw_bind_node); + tw->tw_tb = NULL; + inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); + refcnt++; + } spin_unlock(&bhead->lock); #ifdef SOCK_REFCNT_DEBUG if (atomic_read(&tw->tw_refcnt) != 1) { @@ -47,7 +61,10 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); } #endif - inet_twsk_put(tw); + while (refcnt) { + inet_twsk_put(tw); + refcnt--; + } } static noinline void inet_twsk_free(struct inet_timewait_sock *tw) @@ -101,13 +118,22 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, * Should be done before removing sk from established chain * because readers are lockless and search established first. */ - atomic_inc(&tw->tw_refcnt); inet_twsk_add_node_rcu(tw, &ehead->twchain); /* Step 3: Remove SK from established hash. */ if (__sk_nulls_del_node_init_rcu(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + /* + * Notes : + * - We initially set tw_refcnt to 0 in inet_twsk_alloc() + * - We add one reference for the bhash link + * - We add one reference for the ehash link + * - We want this refcnt update done before allowing other + * threads to find this tw in ehash chain. + */ + atomic_add(1 + 1 + 1, &tw->tw_refcnt); + spin_unlock(lock); } @@ -139,7 +165,12 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat tw->tw_transparent = inet->transparent; tw->tw_prot = sk->sk_prot_creator; twsk_net_set(tw, hold_net(sock_net(sk))); - atomic_set(&tw->tw_refcnt, 1); + /* + * Because we use RCU lookups, we should not set tw_refcnt + * to a non null value before everything is setup for this + * timewait socket. + */ + atomic_set(&tw->tw_refcnt, 0); inet_twsk_dead_node_init(tw); __module_get(tw->tw_prot->owner); } @@ -421,37 +452,46 @@ out: EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick); -void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, +void inet_twsk_purge(struct inet_hashinfo *hashinfo, struct inet_timewait_death_row *twdr, int family) { struct inet_timewait_sock *tw; struct sock *sk; struct hlist_nulls_node *node; - int h; + unsigned int slot; - local_bh_disable(); - for (h = 0; h <= hashinfo->ehash_mask; h++) { - struct inet_ehash_bucket *head = - inet_ehash_bucket(hashinfo, h); - spinlock_t *lock = inet_ehash_lockp(hashinfo, h); + for (slot = 0; slot <= hashinfo->ehash_mask; slot++) { + struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; +restart_rcu: + rcu_read_lock(); restart: - spin_lock(lock); - sk_nulls_for_each(sk, node, &head->twchain) { - + sk_nulls_for_each_rcu(sk, node, &head->twchain) { tw = inet_twsk(sk); - if (!net_eq(twsk_net(tw), net) || - tw->tw_family != family) + if ((tw->tw_family != family) || + atomic_read(&twsk_net(tw)->count)) + continue; + + if (unlikely(!atomic_inc_not_zero(&tw->tw_refcnt))) continue; - atomic_inc(&tw->tw_refcnt); - spin_unlock(lock); + if (unlikely((tw->tw_family != family) || + atomic_read(&twsk_net(tw)->count))) { + inet_twsk_put(tw); + goto restart; + } + + rcu_read_unlock(); inet_twsk_deschedule(tw, twdr); inet_twsk_put(tw); - - goto restart; + goto restart_rcu; } - spin_unlock(lock); + /* If the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != slot) + goto restart; + rcu_read_unlock(); } - local_bh_enable(); } EXPORT_SYMBOL_GPL(inet_twsk_purge); |