aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorEric Dumazet <dada1@cosmosbay.com>2007-11-05 23:38:39 -0800
committerDavid S. Miller <davem@sunset.davemloft.net>2007-11-07 04:08:57 -0800
commit286ab3d46058840d68e5d7d52e316c1f7e98c59f (patch)
tree1d70e7895c49d2b148e026aa047efe186697fff9 /net
parent91781004b9c029ee55b7aa9ef950a373ba865dc6 (diff)
downloadkernel_goldelico_gta04-286ab3d46058840d68e5d7d52e316c1f7e98c59f.zip
kernel_goldelico_gta04-286ab3d46058840d68e5d7d52e316c1f7e98c59f.tar.gz
kernel_goldelico_gta04-286ab3d46058840d68e5d7d52e316c1f7e98c59f.tar.bz2
[NET]: Define infrastructure to keep 'inuse' changes in an efficent SMP/NUMA way.
"struct proto" currently uses an array stats[NR_CPUS] to track change on 'inuse' sockets per protocol. If NR_CPUS is big, this means we use a big memory area for this. Moreover, all this memory area is located on a single node on NUMA machines, increasing memory pressure on the boot node. In this patch, I tried to : - Keep a fast !CONFIG_SMP implementation - Keep a fast CONFIG_SMP implementation for often used protocols (tcp,udp,raw,...) - Introduce a NUMA efficient implementation Some helper macros are defined in include/net/sock.h These macros take into account CONFIG_SMP If a "struct proto" is declared without using DEFINE_PROTO_INUSE / REF_PROTO_INUSE macros, it will automatically use a default implementation, using a dynamically allocated percpu zone. This default implementation will be NUMA efficient, but might use 32/64 bytes per possible cpu because of current alloc_percpu() implementation. However it still should be better than previous implementation based on stats[NR_CPUS] field. When a "struct proto" is changed to use the new macros, we use a single static "int" percpu variable, lowering the memory and cpu costs, still preserving NUMA efficiency. Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/core/sock.c48
-rw-r--r--net/ipv4/proc.c19
-rw-r--r--net/ipv6/proc.c19
3 files changed, 55 insertions, 31 deletions
diff --git a/net/core/sock.c b/net/core/sock.c
index 12ad206..e077f26 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1801,12 +1801,41 @@ EXPORT_SYMBOL(sk_common_release);
static DEFINE_RWLOCK(proto_list_lock);
static LIST_HEAD(proto_list);
+#ifdef CONFIG_SMP
+/*
+ * Define default functions to keep track of inuse sockets per protocol
+ * Note that often used protocols use dedicated functions to get a speed increase.
+ * (see DEFINE_PROTO_INUSE/REF_PROTO_INUSE)
+ */
+static void inuse_add(struct proto *prot, int inc)
+{
+ per_cpu_ptr(prot->inuse_ptr, smp_processor_id())[0] += inc;
+}
+
+static int inuse_get(const struct proto *prot)
+{
+ int res = 0, cpu;
+ for_each_possible_cpu(cpu)
+ res += per_cpu_ptr(prot->inuse_ptr, cpu)[0];
+ return res;
+}
+#endif
+
int proto_register(struct proto *prot, int alloc_slab)
{
char *request_sock_slab_name = NULL;
char *timewait_sock_slab_name;
int rc = -ENOBUFS;
+#ifdef CONFIG_SMP
+ if (!prot->inuse_getval || !prot->inuse_add) {
+ prot->inuse_ptr = alloc_percpu(int);
+ if (prot->inuse_ptr == NULL)
+ goto out;
+ prot->inuse_getval = inuse_get;
+ prot->inuse_add = inuse_add;
+ }
+#endif
if (alloc_slab) {
prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
SLAB_HWCACHE_ALIGN, NULL);
@@ -1814,7 +1843,7 @@ int proto_register(struct proto *prot, int alloc_slab)
if (prot->slab == NULL) {
printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
prot->name);
- goto out;
+ goto out_free_inuse;
}
if (prot->rsk_prot != NULL) {
@@ -1873,6 +1902,15 @@ out_free_request_sock_slab_name:
out_free_sock_slab:
kmem_cache_destroy(prot->slab);
prot->slab = NULL;
+out_free_inuse:
+#ifdef CONFIG_SMP
+ if (prot->inuse_ptr != NULL) {
+ free_percpu(prot->inuse_ptr);
+ prot->inuse_ptr = NULL;
+ prot->inuse_getval = NULL;
+ prot->inuse_add = NULL;
+ }
+#endif
goto out;
}
@@ -1884,6 +1922,14 @@ void proto_unregister(struct proto *prot)
list_del(&prot->node);
write_unlock(&proto_list_lock);
+#ifdef CONFIG_SMP
+ if (prot->inuse_ptr != NULL) {
+ free_percpu(prot->inuse_ptr);
+ prot->inuse_ptr = NULL;
+ prot->inuse_getval = NULL;
+ prot->inuse_add = NULL;
+ }
+#endif
if (prot->slab != NULL) {
kmem_cache_destroy(prot->slab);
prot->slab = NULL;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index ffdccc0..ce34b28 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -46,17 +46,6 @@
#include <net/sock.h>
#include <net/raw.h>
-static int fold_prot_inuse(struct proto *proto)
-{
- int res = 0;
- int cpu;
-
- for_each_possible_cpu(cpu)
- res += proto->stats[cpu].inuse;
-
- return res;
-}
-
/*
* Report socket allocation statistics [mea@utu.fi]
*/
@@ -64,12 +53,12 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
{
socket_seq_show(seq);
seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
- fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count),
+ sock_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count),
tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated),
atomic_read(&tcp_memory_allocated));
- seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot));
- seq_printf(seq, "UDPLITE: inuse %d\n", fold_prot_inuse(&udplite_prot));
- seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot));
+ seq_printf(seq, "UDP: inuse %d\n", sock_prot_inuse(&udp_prot));
+ seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse(&udplite_prot));
+ seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse(&raw_prot));
seq_printf(seq, "FRAG: inuse %d memory %d\n",
ip_frag_nqueues(), ip_frag_mem());
return 0;
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index be526ad..8631ed7 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -32,27 +32,16 @@
static struct proc_dir_entry *proc_net_devsnmp6;
-static int fold_prot_inuse(struct proto *proto)
-{
- int res = 0;
- int cpu;
-
- for_each_possible_cpu(cpu)
- res += proto->stats[cpu].inuse;
-
- return res;
-}
-
static int sockstat6_seq_show(struct seq_file *seq, void *v)
{
seq_printf(seq, "TCP6: inuse %d\n",
- fold_prot_inuse(&tcpv6_prot));
+ sock_prot_inuse(&tcpv6_prot));
seq_printf(seq, "UDP6: inuse %d\n",
- fold_prot_inuse(&udpv6_prot));
+ sock_prot_inuse(&udpv6_prot));
seq_printf(seq, "UDPLITE6: inuse %d\n",
- fold_prot_inuse(&udplitev6_prot));
+ sock_prot_inuse(&udplitev6_prot));
seq_printf(seq, "RAW6: inuse %d\n",
- fold_prot_inuse(&rawv6_prot));
+ sock_prot_inuse(&rawv6_prot));
seq_printf(seq, "FRAG6: inuse %d memory %d\n",
ip6_frag_nqueues(), ip6_frag_mem());
return 0;