From 808f5114a9206fee855117d416440e1071ab375c Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 22 Feb 2010 07:57:18 +0000 Subject: packet: convert socket list to RCU (v3) Convert AF_PACKET to use RCU, eliminating one more reader/writer lock. There is no need for a real sk_del_node_init_rcu(), because sk_del_node_init is doing the equivalent thing to hlst_del_init_rcu already; but added some comments to try and make that obvious. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/netns/packet.h | 4 +-- include/net/sock.h | 10 ++++++++ net/packet/af_packet.c | 62 +++++++++++++++++++++++----------------------- 3 files changed, 43 insertions(+), 33 deletions(-) diff --git a/include/net/netns/packet.h b/include/net/netns/packet.h index 637daf6..cb4e894 100644 --- a/include/net/netns/packet.h +++ b/include/net/netns/packet.h @@ -4,11 +4,11 @@ #ifndef __NETNS_PACKET_H__ #define __NETNS_PACKET_H__ -#include +#include #include struct netns_packet { - rwlock_t sklist_lock; + spinlock_t sklist_lock; struct hlist_head sklist; }; diff --git a/include/net/sock.h b/include/net/sock.h index 580d51f..6cb1676 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -381,6 +381,7 @@ static __inline__ void __sk_del_node(struct sock *sk) __hlist_del(&sk->sk_node); } +/* NB: equivalent to hlist_del_init_rcu */ static __inline__ int __sk_del_node_init(struct sock *sk) { if (sk_hashed(sk)) { @@ -421,6 +422,7 @@ static __inline__ int sk_del_node_init(struct sock *sk) } return rc; } +#define sk_del_node_init_rcu(sk) sk_del_node_init(sk) static __inline__ int __sk_nulls_del_node_init_rcu(struct sock *sk) { @@ -454,6 +456,12 @@ static __inline__ void sk_add_node(struct sock *sk, struct hlist_head *list) __sk_add_node(sk, list); } +static __inline__ void sk_add_node_rcu(struct sock *sk, struct hlist_head *list) +{ + sock_hold(sk); + hlist_add_head_rcu(&sk->sk_node, list); +} + static __inline__ void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) { hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); @@ -478,6 +486,8 @@ static __inline__ void sk_add_bind_node(struct sock *sk, #define sk_for_each(__sk, node, list) \ hlist_for_each_entry(__sk, node, list, sk_node) +#define sk_for_each_rcu(__sk, node, list) \ + hlist_for_each_entry_rcu(__sk, node, list, sk_node) #define sk_nulls_for_each(__sk, node, list) \ hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node) #define sk_nulls_for_each_rcu(__sk, node, list) \ diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 10f7295..2f03693 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1262,24 +1262,22 @@ static int packet_release(struct socket *sock) net = sock_net(sk); po = pkt_sk(sk); - write_lock_bh(&net->packet.sklist_lock); - sk_del_node_init(sk); + spin_lock_bh(&net->packet.sklist_lock); + sk_del_node_init_rcu(sk); sock_prot_inuse_add(net, sk->sk_prot, -1); - write_unlock_bh(&net->packet.sklist_lock); - - /* - * Unhook packet receive handler. - */ + spin_unlock_bh(&net->packet.sklist_lock); + spin_lock(&po->bind_lock); if (po->running) { /* - * Remove the protocol hook + * Remove from protocol table */ - dev_remove_pack(&po->prot_hook); po->running = 0; po->num = 0; + __dev_remove_pack(&po->prot_hook); __sock_put(sk); } + spin_unlock(&po->bind_lock); packet_flush_mclist(sk); @@ -1291,10 +1289,10 @@ static int packet_release(struct socket *sock) if (po->tx_ring.pg_vec) packet_set_ring(sk, &req, 1, 1); + synchronize_net(); /* * Now the socket is dead. No more input will appear. */ - sock_orphan(sk); sock->sk = NULL; @@ -1478,10 +1476,11 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, po->running = 1; } - write_lock_bh(&net->packet.sklist_lock); - sk_add_node(sk, &net->packet.sklist); + spin_lock_bh(&net->packet.sklist_lock); + sk_add_node_rcu(sk, &net->packet.sklist); sock_prot_inuse_add(net, &packet_proto, 1); - write_unlock_bh(&net->packet.sklist_lock); + spin_unlock_bh(&net->packet.sklist_lock); + return 0; out: return err; @@ -2075,8 +2074,8 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void struct net_device *dev = data; struct net *net = dev_net(dev); - read_lock(&net->packet.sklist_lock); - sk_for_each(sk, node, &net->packet.sklist) { + rcu_read_lock(); + sk_for_each_rcu(sk, node, &net->packet.sklist) { struct packet_sock *po = pkt_sk(sk); switch (msg) { @@ -2104,18 +2103,19 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void } break; case NETDEV_UP: - spin_lock(&po->bind_lock); - if (dev->ifindex == po->ifindex && po->num && - !po->running) { - dev_add_pack(&po->prot_hook); - sock_hold(sk); - po->running = 1; + if (dev->ifindex == po->ifindex) { + spin_lock(&po->bind_lock); + if (po->num && !po->running) { + dev_add_pack(&po->prot_hook); + sock_hold(sk); + po->running = 1; + } + spin_unlock(&po->bind_lock); } - spin_unlock(&po->bind_lock); break; } } - read_unlock(&net->packet.sklist_lock); + rcu_read_unlock(); return NOTIFY_DONE; } @@ -2512,24 +2512,24 @@ static struct notifier_block packet_netdev_notifier = { #ifdef CONFIG_PROC_FS static void *packet_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(seq_file_net(seq)->packet.sklist_lock) + __acquires(RCU) { struct net *net = seq_file_net(seq); - read_lock(&net->packet.sklist_lock); - return seq_hlist_start_head(&net->packet.sklist, *pos); + + rcu_read_lock(); + return seq_hlist_start_head_rcu(&net->packet.sklist, *pos); } static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct net *net = seq_file_net(seq); - return seq_hlist_next(v, &net->packet.sklist, pos); + return seq_hlist_next_rcu(v, &net->packet.sklist, pos); } static void packet_seq_stop(struct seq_file *seq, void *v) - __releases(seq_file_net(seq)->packet.sklist_lock) + __releases(RCU) { - struct net *net = seq_file_net(seq); - read_unlock(&net->packet.sklist_lock); + rcu_read_unlock(); } static int packet_seq_show(struct seq_file *seq, void *v) @@ -2581,7 +2581,7 @@ static const struct file_operations packet_seq_fops = { static int __net_init packet_net_init(struct net *net) { - rwlock_init(&net->packet.sklist_lock); + spin_lock_init(&net->packet.sklist_lock); INIT_HLIST_HEAD(&net->packet.sklist); if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops)) -- cgit v1.1