From bdd779425e01c7247230b23051b1ab2144f9226d Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Mon, 8 Oct 2012 16:29:14 -0700 Subject: revert "mm: mempolicy: Let vma_merge and vma_split handle vma->vm_policy linkages" commit 8d34694c1abf29df1f3c7317936b7e3e2e308d9b upstream. Commit 05f144a0d5c2 ("mm: mempolicy: Let vma_merge and vma_split handle vma->vm_policy linkages") removed vma->vm_policy updates code but it is the purpose of mbind_range(). Now, mbind_range() is virtually a no-op and while it does not allow memory corruption it is not the right fix. This patch is a revert. [mgorman@suse.de: Edited changelog] Signed-off-by: KOSAKI Motohiro Signed-off-by: Mel Gorman Cc: Christoph Lameter Cc: Josh Boyer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/mempolicy.c | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) (limited to 'mm') diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 3f3cc56..464b844 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -606,6 +606,27 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, return first; } +/* Apply policy to a single VMA */ +static int policy_vma(struct vm_area_struct *vma, struct mempolicy *new) +{ + int err = 0; + struct mempolicy *old = vma->vm_policy; + + pr_debug("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n", + vma->vm_start, vma->vm_end, vma->vm_pgoff, + vma->vm_ops, vma->vm_file, + vma->vm_ops ? vma->vm_ops->set_policy : NULL); + + if (vma->vm_ops && vma->vm_ops->set_policy) + err = vma->vm_ops->set_policy(vma, new); + if (!err) { + mpol_get(new); + vma->vm_policy = new; + mpol_put(old); + } + return err; +} + /* Step 2: apply policy to a range and do splits. */ static int mbind_range(struct mm_struct *mm, unsigned long start, unsigned long end, struct mempolicy *new_pol) @@ -645,23 +666,9 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, if (err) goto out; } - - /* - * Apply policy to a single VMA. The reference counting of - * policy for vma_policy linkages has already been handled by - * vma_merge and split_vma as necessary. If this is a shared - * policy then ->set_policy will increment the reference count - * for an sp node. - */ - pr_debug("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n", - vma->vm_start, vma->vm_end, vma->vm_pgoff, - vma->vm_ops, vma->vm_file, - vma->vm_ops ? vma->vm_ops->set_policy : NULL); - if (vma->vm_ops && vma->vm_ops->set_policy) { - err = vma->vm_ops->set_policy(vma, new_pol); - if (err) - goto out; - } + err = policy_vma(vma, new_pol); + if (err) + goto out; } out: -- cgit v1.1 From e12681ffb14f5c3bcd25ace39b9fac3941ad6961 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Mon, 8 Oct 2012 16:29:16 -0700 Subject: mempolicy: remove mempolicy sharing commit 869833f2c5c6e4dd09a5378cfc665ffb4615e5d2 upstream. Dave Jones' system call fuzz testing tool "trinity" triggered the following bug error with slab debugging enabled ============================================================================= BUG numa_policy (Not tainted): Poison overwritten ----------------------------------------------------------------------------- INFO: 0xffff880146498250-0xffff880146498250. First byte 0x6a instead of 0x6b INFO: Allocated in mpol_new+0xa3/0x140 age=46310 cpu=6 pid=32154 __slab_alloc+0x3d3/0x445 kmem_cache_alloc+0x29d/0x2b0 mpol_new+0xa3/0x140 sys_mbind+0x142/0x620 system_call_fastpath+0x16/0x1b INFO: Freed in __mpol_put+0x27/0x30 age=46268 cpu=6 pid=32154 __slab_free+0x2e/0x1de kmem_cache_free+0x25a/0x260 __mpol_put+0x27/0x30 remove_vma+0x68/0x90 exit_mmap+0x118/0x140 mmput+0x73/0x110 exit_mm+0x108/0x130 do_exit+0x162/0xb90 do_group_exit+0x4f/0xc0 sys_exit_group+0x17/0x20 system_call_fastpath+0x16/0x1b INFO: Slab 0xffffea0005192600 objects=27 used=27 fp=0x (null) flags=0x20000000004080 INFO: Object 0xffff880146498250 @offset=592 fp=0xffff88014649b9d0 The problem is that the structure is being prematurely freed due to a reference count imbalance. In the following case mbind(addr, len) should replace the memory policies of both vma1 and vma2 and thus they will become to share the same mempolicy and the new mempolicy will have the MPOL_F_SHARED flag. +-------------------+-------------------+ | vma1 | vma2(shmem) | +-------------------+-------------------+ | | addr addr+len alloc_pages_vma() uses get_vma_policy() and mpol_cond_put() pair for maintaining the mempolicy reference count. The current rule is that get_vma_policy() only increments refcount for shmem VMA and mpol_conf_put() only decrements refcount if the policy has MPOL_F_SHARED. In above case, vma1 is not shmem vma and vma->policy has MPOL_F_SHARED! The reference count will be decreased even though was not increased whenever alloc_page_vma() is called. This has been broken since commit [52cd3b07: mempolicy: rework mempolicy Reference Counting] in 2008. There is another serious bug with the sharing of memory policies. Currently, mempolicy rebind logic (it is called from cpuset rebinding) ignores a refcount of mempolicy and override it forcibly. Thus, any mempolicy sharing may cause mempolicy corruption. The bug was introduced by commit [68860ec1: cpusets: automatic numa mempolicy rebinding]. Ideally, the shared policy handling would be rewritten to either properly handle COW of the policy structures or at least reference count MPOL_F_SHARED based exclusively on information within the policy. However, this patch takes the easier approach of disabling any policy sharing between VMAs. Each new range allocated with sp_alloc will allocate a new policy, set the reference count to 1 and drop the reference count of the old policy. This increases the memory footprint but is not expected to be a major problem as mbind() is unlikely to be used for fine-grained ranges. It is also inefficient because it means we allocate a new policy even in cases where mbind_range() could use the new_policy passed to it. However, it is more straight-forward and the change should be invisible to the user. [mgorman@suse.de: Edited changelog] Reported-by: Dave Jones Cc: Christoph Lameter Reviewed-by: Christoph Lameter Signed-off-by: KOSAKI Motohiro Signed-off-by: Mel Gorman Cc: Josh Boyer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/mempolicy.c | 52 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 14 deletions(-) (limited to 'mm') diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 464b844..52df0b5 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -606,24 +606,39 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, return first; } -/* Apply policy to a single VMA */ -static int policy_vma(struct vm_area_struct *vma, struct mempolicy *new) +/* + * Apply policy to a single VMA + * This must be called with the mmap_sem held for writing. + */ +static int vma_replace_policy(struct vm_area_struct *vma, + struct mempolicy *pol) { - int err = 0; - struct mempolicy *old = vma->vm_policy; + int err; + struct mempolicy *old; + struct mempolicy *new; pr_debug("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n", vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_ops, vma->vm_file, vma->vm_ops ? vma->vm_ops->set_policy : NULL); - if (vma->vm_ops && vma->vm_ops->set_policy) + new = mpol_dup(pol); + if (IS_ERR(new)) + return PTR_ERR(new); + + if (vma->vm_ops && vma->vm_ops->set_policy) { err = vma->vm_ops->set_policy(vma, new); - if (!err) { - mpol_get(new); - vma->vm_policy = new; - mpol_put(old); + if (err) + goto err_out; } + + old = vma->vm_policy; + vma->vm_policy = new; /* protected by mmap_sem */ + mpol_put(old); + + return 0; + err_out: + mpol_put(new); return err; } @@ -666,7 +681,7 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, if (err) goto out; } - err = policy_vma(vma, new_pol); + err = vma_replace_policy(vma, new_pol); if (err) goto out; } @@ -2091,15 +2106,24 @@ static void sp_delete(struct shared_policy *sp, struct sp_node *n) static struct sp_node *sp_alloc(unsigned long start, unsigned long end, struct mempolicy *pol) { - struct sp_node *n = kmem_cache_alloc(sn_cache, GFP_KERNEL); + struct sp_node *n; + struct mempolicy *newpol; + n = kmem_cache_alloc(sn_cache, GFP_KERNEL); if (!n) return NULL; + + newpol = mpol_dup(pol); + if (IS_ERR(newpol)) { + kmem_cache_free(sn_cache, n); + return NULL; + } + newpol->flags |= MPOL_F_SHARED; + n->start = start; n->end = end; - mpol_get(pol); - pol->flags |= MPOL_F_SHARED; /* for unref */ - n->policy = pol; + n->policy = newpol; + return n; } -- cgit v1.1 From cedd186e31dacfb400ec74e0cdd59b02c3d55da8 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 8 Oct 2012 16:29:17 -0700 Subject: mempolicy: fix a race in shared_policy_replace() commit b22d127a39ddd10d93deee3d96e643657ad53a49 upstream. shared_policy_replace() use of sp_alloc() is unsafe. 1) sp_node cannot be dereferenced if sp->lock is not held and 2) another thread can modify sp_node between spin_unlock for allocating a new sp node and next spin_lock. The bug was introduced before 2.6.12-rc2. Kosaki's original patch for this problem was to allocate an sp node and policy within shared_policy_replace and initialise it when the lock is reacquired. I was not keen on this approach because it partially duplicates sp_alloc(). As the paths were sp->lock is taken are not that performance critical this patch converts sp->lock to sp->mutex so it can sleep when calling sp_alloc(). [kosaki.motohiro@jp.fujitsu.com: Original patch] Signed-off-by: Mel Gorman Acked-by: KOSAKI Motohiro Reviewed-by: Christoph Lameter Cc: Josh Boyer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/mempolicy.c | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) (limited to 'mm') diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 52df0b5..a768692 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2021,7 +2021,7 @@ int __mpol_equal(struct mempolicy *a, struct mempolicy *b) */ /* lookup first element intersecting start-end */ -/* Caller holds sp->lock */ +/* Caller holds sp->mutex */ static struct sp_node * sp_lookup(struct shared_policy *sp, unsigned long start, unsigned long end) { @@ -2085,13 +2085,13 @@ mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx) if (!sp->root.rb_node) return NULL; - spin_lock(&sp->lock); + mutex_lock(&sp->mutex); sn = sp_lookup(sp, idx, idx+1); if (sn) { mpol_get(sn->policy); pol = sn->policy; } - spin_unlock(&sp->lock); + mutex_unlock(&sp->mutex); return pol; } @@ -2131,10 +2131,10 @@ static struct sp_node *sp_alloc(unsigned long start, unsigned long end, static int shared_policy_replace(struct shared_policy *sp, unsigned long start, unsigned long end, struct sp_node *new) { - struct sp_node *n, *new2 = NULL; + struct sp_node *n; + int ret = 0; -restart: - spin_lock(&sp->lock); + mutex_lock(&sp->mutex); n = sp_lookup(sp, start, end); /* Take care of old policies in the same range. */ while (n && n->start < end) { @@ -2147,16 +2147,14 @@ restart: } else { /* Old policy spanning whole new range. */ if (n->end > end) { + struct sp_node *new2; + new2 = sp_alloc(end, n->end, n->policy); if (!new2) { - spin_unlock(&sp->lock); - new2 = sp_alloc(end, n->end, n->policy); - if (!new2) - return -ENOMEM; - goto restart; + ret = -ENOMEM; + goto out; } n->end = start; sp_insert(sp, new2); - new2 = NULL; break; } else n->end = start; @@ -2167,12 +2165,9 @@ restart: } if (new) sp_insert(sp, new); - spin_unlock(&sp->lock); - if (new2) { - mpol_put(new2->policy); - kmem_cache_free(sn_cache, new2); - } - return 0; +out: + mutex_unlock(&sp->mutex); + return ret; } /** @@ -2190,7 +2185,7 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) int ret; sp->root = RB_ROOT; /* empty tree == default mempolicy */ - spin_lock_init(&sp->lock); + mutex_init(&sp->mutex); if (mpol) { struct vm_area_struct pvma; @@ -2256,7 +2251,7 @@ void mpol_free_shared_policy(struct shared_policy *p) if (!p->root.rb_node) return; - spin_lock(&p->lock); + mutex_lock(&p->mutex); next = rb_first(&p->root); while (next) { n = rb_entry(next, struct sp_node, nd); @@ -2265,7 +2260,7 @@ void mpol_free_shared_policy(struct shared_policy *p) mpol_put(n->policy); kmem_cache_free(sn_cache, n); } - spin_unlock(&p->lock); + mutex_unlock(&p->mutex); } /* assumes fs == KERNEL_DS */ -- cgit v1.1 From 29715fe22f6e7ea5d84c2872fd5dd2d407ed5083 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Mon, 8 Oct 2012 16:29:19 -0700 Subject: mempolicy: fix refcount leak in mpol_set_shared_policy() commit 63f74ca21f1fad36d075e063f06dcc6d39fe86b2 upstream. When shared_policy_replace() fails to allocate new->policy is not freed correctly by mpol_set_shared_policy(). The problem is that shared mempolicy code directly call kmem_cache_free() in multiple places where it is easy to make a mistake. This patch creates an sp_free wrapper function and uses it. The bug was introduced pre-git age (IOW, before 2.6.12-rc2). [mgorman@suse.de: Editted changelog] Signed-off-by: KOSAKI Motohiro Signed-off-by: Mel Gorman Reviewed-by: Christoph Lameter Cc: Josh Boyer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/mempolicy.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'mm') diff --git a/mm/mempolicy.c b/mm/mempolicy.c index a768692..6a569cc 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2095,12 +2095,17 @@ mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx) return pol; } +static void sp_free(struct sp_node *n) +{ + mpol_put(n->policy); + kmem_cache_free(sn_cache, n); +} + static void sp_delete(struct shared_policy *sp, struct sp_node *n) { pr_debug("deleting %lx-l%lx\n", n->start, n->end); rb_erase(&n->nd, &sp->root); - mpol_put(n->policy); - kmem_cache_free(sn_cache, n); + sp_free(n); } static struct sp_node *sp_alloc(unsigned long start, unsigned long end, @@ -2239,7 +2244,7 @@ int mpol_set_shared_policy(struct shared_policy *info, } err = shared_policy_replace(info, vma->vm_pgoff, vma->vm_pgoff+sz, new); if (err && new) - kmem_cache_free(sn_cache, new); + sp_free(new); return err; } @@ -2256,9 +2261,7 @@ void mpol_free_shared_policy(struct shared_policy *p) while (next) { n = rb_entry(next, struct sp_node, nd); next = rb_next(&n->nd); - rb_erase(&n->nd, &p->root); - mpol_put(n->policy); - kmem_cache_free(sn_cache, n); + sp_delete(p, n); } mutex_unlock(&p->mutex); } -- cgit v1.1