aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthew Wilcox <matthew@wil.cx>2007-11-29 12:05:13 -0700
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-11-30 08:08:05 -0800
commit80cbd911ca25535f6bb66bbcbb98950ec328eb40 (patch)
tree5856978e32c675393bc2cb97e3647fc91e27f3bb
parente1cca7e8d484390169777b423a7fe46c7021fec1 (diff)
downloadkernel_goldelico_gta04-80cbd911ca25535f6bb66bbcbb98950ec328eb40.zip
kernel_goldelico_gta04-80cbd911ca25535f6bb66bbcbb98950ec328eb40.tar.gz
kernel_goldelico_gta04-80cbd911ca25535f6bb66bbcbb98950ec328eb40.tar.bz2
Fix kmem_cache_free performance regression in slab
The database performance group have found that half the cycles spent in kmem_cache_free are spent in this one call to BUG_ON. Moving it into the CONFIG_SLAB_DEBUG-only function cache_free_debugcheck() is a performance win of almost 0.5% on their particular benchmark. The call was added as part of commit ddc2e812d592457747c4367fb73edcaa8e1e49ff with the comment that "overhead should be minimal". It may have been minimal at the time, but it isn't now. [ Quoth Pekka Enberg: "I don't think the BUG_ON per se caused the performance regression but rather the virt_to_head_page() changes to virt_to_cache() that were added later." ] Signed-off-by: Matthew Wilcox <willy@linux.intel.com> Acked-by: Pekka J Enberg <penberg@cs.helsinki.fi> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/slab.c4
1 files changed, 2 insertions, 2 deletions
diff --git a/mm/slab.c b/mm/slab.c
index c31cd36..202465a 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2881,6 +2881,8 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
unsigned int objnr;
struct slab *slabp;
+ BUG_ON(virt_to_cache(objp) != cachep);
+
objp -= obj_offset(cachep);
kfree_debugcheck(objp);
page = virt_to_head_page(objp);
@@ -3759,8 +3761,6 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
{
unsigned long flags;
- BUG_ON(virt_to_cache(objp) != cachep);
-
local_irq_save(flags);
debug_check_no_locks_freed(objp, obj_size(cachep));
__cache_free(cachep, objp);