diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/backing-dev.c | 47 | ||||
-rw-r--r-- | mm/filemap.c | 15 | ||||
-rw-r--r-- | mm/fremap.c | 26 | ||||
-rw-r--r-- | mm/mmap.c | 3 | ||||
-rw-r--r-- | mm/nommu.c | 1 | ||||
-rw-r--r-- | mm/oom_kill.c | 107 | ||||
-rw-r--r-- | mm/page-writeback.c | 300 | ||||
-rw-r--r-- | mm/page_alloc.c | 23 | ||||
-rw-r--r-- | mm/readahead.c | 6 | ||||
-rw-r--r-- | mm/rmap.c | 4 | ||||
-rw-r--r-- | mm/shmem.c | 20 | ||||
-rw-r--r-- | mm/slab.c | 14 | ||||
-rw-r--r-- | mm/slob.c | 6 | ||||
-rw-r--r-- | mm/slub.c | 30 | ||||
-rw-r--r-- | mm/swap.c | 5 | ||||
-rw-r--r-- | mm/tiny-shmem.c | 19 | ||||
-rw-r--r-- | mm/truncate.c | 3 | ||||
-rw-r--r-- | mm/vmscan.c | 40 | ||||
-rw-r--r-- | mm/vmstat.c | 2 |
19 files changed, 484 insertions, 187 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index f50a281..b0ceb29 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -5,6 +5,41 @@ #include <linux/sched.h> #include <linux/module.h> +int bdi_init(struct backing_dev_info *bdi) +{ + int i, j; + int err; + + for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { + err = percpu_counter_init_irq(&bdi->bdi_stat[i], 0); + if (err) + goto err; + } + + bdi->dirty_exceeded = 0; + err = prop_local_init_percpu(&bdi->completions); + + if (err) { +err: + for (j = 0; j < i; j++) + percpu_counter_destroy(&bdi->bdi_stat[i]); + } + + return err; +} +EXPORT_SYMBOL(bdi_init); + +void bdi_destroy(struct backing_dev_info *bdi) +{ + int i; + + for (i = 0; i < NR_BDI_STAT_ITEMS; i++) + percpu_counter_destroy(&bdi->bdi_stat[i]); + + prop_local_destroy_percpu(&bdi->completions); +} +EXPORT_SYMBOL(bdi_destroy); + static wait_queue_head_t congestion_wqh[2] = { __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]), __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1]) @@ -55,15 +90,3 @@ long congestion_wait(int rw, long timeout) } EXPORT_SYMBOL(congestion_wait); -/** - * congestion_end - wake up sleepers on a congested backing_dev_info - * @rw: READ or WRITE - */ -void congestion_end(int rw) -{ - wait_queue_head_t *wqh = &congestion_wqh[rw]; - - if (waitqueue_active(wqh)) - wake_up(wqh); -} -EXPORT_SYMBOL(congestion_end); diff --git a/mm/filemap.c b/mm/filemap.c index c6049e9..79f24a9 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -63,6 +63,7 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, * ->private_lock (__free_pte->__set_page_dirty_buffers) * ->swap_lock (exclusive_swap_page, others) * ->mapping->tree_lock + * ->zone.lock * * ->i_mutex * ->i_mmap_lock (truncate->unmap_mapping_range) @@ -1626,12 +1627,18 @@ int __remove_suid(struct dentry *dentry, int kill) int remove_suid(struct dentry *dentry) { - int kill = should_remove_suid(dentry); + int killsuid = should_remove_suid(dentry); + int killpriv = security_inode_need_killpriv(dentry); + int error = 0; - if (unlikely(kill)) - return __remove_suid(dentry, kill); + if (killpriv < 0) + return killpriv; + if (killpriv) + error = security_inode_killpriv(dentry); + if (!error && killsuid) + error = __remove_suid(dentry, killsuid); - return 0; + return error; } EXPORT_SYMBOL(remove_suid); diff --git a/mm/fremap.c b/mm/fremap.c index 95bcb56..14bd3bf 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -5,7 +5,7 @@ * * started by Ingo Molnar, Copyright (C) 2002, 2003 */ - +#include <linux/backing-dev.h> #include <linux/mm.h> #include <linux/swap.h> #include <linux/file.h> @@ -97,26 +97,28 @@ static int populate_range(struct mm_struct *mm, struct vm_area_struct *vma, } -/*** - * sys_remap_file_pages - remap arbitrary pages of a shared backing store - * file within an existing vma. +/** + * sys_remap_file_pages - remap arbitrary pages of an existing VM_SHARED vma * @start: start of the remapped virtual memory range * @size: size of the remapped virtual memory range - * @prot: new protection bits of the range - * @pgoff: to be mapped page of the backing store file + * @prot: new protection bits of the range (see NOTE) + * @pgoff: to-be-mapped page of the backing store file * @flags: 0 or MAP_NONBLOCKED - the later will cause no IO. * - * this syscall works purely via pagetables, so it's the most efficient + * sys_remap_file_pages remaps arbitrary pages of an existing VM_SHARED vma + * (shared backing store file). + * + * This syscall works purely via pagetables, so it's the most efficient * way to map the same (large) file into a given virtual window. Unlike * mmap()/mremap() it does not create any new vmas. The new mappings are * also safe across swapout. * - * NOTE: the 'prot' parameter right now is ignored, and the vma's default - * protection is used. Arbitrary protections might be implemented in the - * future. + * NOTE: the 'prot' parameter right now is ignored (but must be zero), + * and the vma's default protection is used. Arbitrary protections + * might be implemented in the future. */ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, - unsigned long __prot, unsigned long pgoff, unsigned long flags) + unsigned long prot, unsigned long pgoff, unsigned long flags) { struct mm_struct *mm = current->mm; struct address_space *mapping; @@ -125,7 +127,7 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, int err = -EINVAL; int has_write_lock = 0; - if (__prot) + if (prot) return err; /* * Sanitize the syscall parameters: @@ -7,6 +7,7 @@ */ #include <linux/slab.h> +#include <linux/backing-dev.h> #include <linux/mm.h> #include <linux/shm.h> #include <linux/mman.h> @@ -180,8 +181,6 @@ error: return -ENOMEM; } -EXPORT_SYMBOL(__vm_enough_memory); - /* * Requires inode->i_mapping->i_mmap_lock */ @@ -44,7 +44,6 @@ int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; int heap_stack_gap = 0; EXPORT_SYMBOL(mem_map); -EXPORT_SYMBOL(__vm_enough_memory); EXPORT_SYMBOL(num_physpages); /* list of shareable VMAs */ diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 41b4e36..a64decb 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -27,6 +27,8 @@ #include <linux/notifier.h> int sysctl_panic_on_oom; +int sysctl_oom_kill_allocating_task; +static DEFINE_SPINLOCK(zone_scan_mutex); /* #define DEBUG */ /** @@ -141,7 +143,7 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) * because p may have allocated or otherwise mapped memory on * this node before. However it will be less likely. */ - if (!cpuset_excl_nodes_overlap(p)) + if (!cpuset_mems_allowed_intersects(current, p)) points /= 8; /* @@ -164,16 +166,10 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) } /* - * Types of limitations to the nodes from which allocations may occur - */ -#define CONSTRAINT_NONE 1 -#define CONSTRAINT_MEMORY_POLICY 2 -#define CONSTRAINT_CPUSET 3 - -/* * Determine the type of allocation constraint. */ -static inline int constrained_alloc(struct zonelist *zonelist, gfp_t gfp_mask) +static inline enum oom_constraint constrained_alloc(struct zonelist *zonelist, + gfp_t gfp_mask) { #ifdef CONFIG_NUMA struct zone **z; @@ -337,12 +333,20 @@ static int oom_kill_task(struct task_struct *p) return 0; } -static int oom_kill_process(struct task_struct *p, unsigned long points, - const char *message) +static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, + unsigned long points, const char *message) { struct task_struct *c; struct list_head *tsk; + if (printk_ratelimit()) { + printk(KERN_WARNING "%s invoked oom-killer: " + "gfp_mask=0x%x, order=%d, oomkilladj=%d\n", + current->comm, gfp_mask, order, current->oomkilladj); + dump_stack(); + show_mem(); + } + /* * If the task is already exiting, don't alarm the sysadmin or kill * its children or threads, just set TIF_MEMDIE so it can die quickly @@ -380,6 +384,57 @@ int unregister_oom_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(unregister_oom_notifier); +/* + * Try to acquire the OOM killer lock for the zones in zonelist. Returns zero + * if a parallel OOM killing is already taking place that includes a zone in + * the zonelist. Otherwise, locks all zones in the zonelist and returns 1. + */ +int try_set_zone_oom(struct zonelist *zonelist) +{ + struct zone **z; + int ret = 1; + + z = zonelist->zones; + + spin_lock(&zone_scan_mutex); + do { + if (zone_is_oom_locked(*z)) { + ret = 0; + goto out; + } + } while (*(++z) != NULL); + + /* + * Lock each zone in the zonelist under zone_scan_mutex so a parallel + * invocation of try_set_zone_oom() doesn't succeed when it shouldn't. + */ + z = zonelist->zones; + do { + zone_set_flag(*z, ZONE_OOM_LOCKED); + } while (*(++z) != NULL); +out: + spin_unlock(&zone_scan_mutex); + return ret; +} + +/* + * Clears the ZONE_OOM_LOCKED flag for all zones in the zonelist so that failed + * allocation attempts with zonelists containing them may now recall the OOM + * killer, if necessary. + */ +void clear_zonelist_oom(struct zonelist *zonelist) +{ + struct zone **z; + + z = zonelist->zones; + + spin_lock(&zone_scan_mutex); + do { + zone_clear_flag(*z, ZONE_OOM_LOCKED); + } while (*(++z) != NULL); + spin_unlock(&zone_scan_mutex); +} + /** * out_of_memory - kill the "best" process when we run out of memory * @@ -393,21 +448,13 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) struct task_struct *p; unsigned long points = 0; unsigned long freed = 0; - int constraint; + enum oom_constraint constraint; blocking_notifier_call_chain(&oom_notify_list, 0, &freed); if (freed > 0) /* Got some memory back in the last second. */ return; - if (printk_ratelimit()) { - printk(KERN_WARNING "%s invoked oom-killer: " - "gfp_mask=0x%x, order=%d, oomkilladj=%d\n", - current->comm, gfp_mask, order, current->oomkilladj); - dump_stack(); - show_mem(); - } - if (sysctl_panic_on_oom == 2) panic("out of memory. Compulsory panic_on_oom is selected.\n"); @@ -416,23 +463,24 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) * NUMA) that may require different handling. */ constraint = constrained_alloc(zonelist, gfp_mask); - cpuset_lock(); read_lock(&tasklist_lock); switch (constraint) { case CONSTRAINT_MEMORY_POLICY: - oom_kill_process(current, points, + oom_kill_process(current, gfp_mask, order, points, "No available memory (MPOL_BIND)"); break; - case CONSTRAINT_CPUSET: - oom_kill_process(current, points, - "No available memory in cpuset"); - break; - case CONSTRAINT_NONE: if (sysctl_panic_on_oom) panic("out of memory. panic_on_oom is selected\n"); + /* Fall-through */ + case CONSTRAINT_CPUSET: + if (sysctl_oom_kill_allocating_task) { + oom_kill_process(current, gfp_mask, order, points, + "Out of memory (oom_kill_allocating_task)"); + break; + } retry: /* * Rambo mode: Shoot down a process and hope it solves whatever @@ -446,11 +494,11 @@ retry: /* Found nothing?!?! Either we hang forever, or we panic. */ if (!p) { read_unlock(&tasklist_lock); - cpuset_unlock(); panic("Out of memory and no killable processes...\n"); } - if (oom_kill_process(p, points, "Out of memory")) + if (oom_kill_process(p, points, gfp_mask, order, + "Out of memory")) goto retry; break; @@ -458,7 +506,6 @@ retry: out: read_unlock(&tasklist_lock); - cpuset_unlock(); /* * Give "p" a good chance of killing itself before we diff --git a/mm/page-writeback.c b/mm/page-writeback.c index d821321..7845462 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2,6 +2,7 @@ * mm/page-writeback.c * * Copyright (C) 2002, Linus Torvalds. + * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> * * Contains functions related to writing back dirty pages at the * address_space level. @@ -36,7 +37,7 @@ /* * The maximum number of pages to writeout in a single bdflush/kupdate - * operation. We do this so we don't hold I_LOCK against an inode for + * operation. We do this so we don't hold I_SYNC against an inode for * enormous amounts of time, which would block a userspace task which has * been forced to throttle against that inode. Also, the code reevaluates * the dirty each time it has written this many pages. @@ -49,8 +50,6 @@ */ static long ratelimit_pages = 32; -static int dirty_exceeded __cacheline_aligned_in_smp; /* Dirty mem may be over limit */ - /* * When balance_dirty_pages decides that the caller needs to perform some * non-background writeback, this is how many pages it will attempt to write. @@ -103,6 +102,141 @@ EXPORT_SYMBOL(laptop_mode); static void background_writeout(unsigned long _min_pages); /* + * Scale the writeback cache size proportional to the relative writeout speeds. + * + * We do this by keeping a floating proportion between BDIs, based on page + * writeback completions [end_page_writeback()]. Those devices that write out + * pages fastest will get the larger share, while the slower will get a smaller + * share. + * + * We use page writeout completions because we are interested in getting rid of + * dirty pages. Having them written out is the primary goal. + * + * We introduce a concept of time, a period over which we measure these events, + * because demand can/will vary over time. The length of this period itself is + * measured in page writeback completions. + * + */ +static struct prop_descriptor vm_completions; +static struct prop_descriptor vm_dirties; + +static unsigned long determine_dirtyable_memory(void); + +/* + * couple the period to the dirty_ratio: + * + * period/2 ~ roundup_pow_of_two(dirty limit) + */ +static int calc_period_shift(void) +{ + unsigned long dirty_total; + + dirty_total = (vm_dirty_ratio * determine_dirtyable_memory()) / 100; + return 2 + ilog2(dirty_total - 1); +} + +/* + * update the period when the dirty ratio changes. + */ +int dirty_ratio_handler(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int old_ratio = vm_dirty_ratio; + int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); + if (ret == 0 && write && vm_dirty_ratio != old_ratio) { + int shift = calc_period_shift(); + prop_change_shift(&vm_completions, shift); + prop_change_shift(&vm_dirties, shift); + } + return ret; +} + +/* + * Increment the BDI's writeout completion count and the global writeout + * completion count. Called from test_clear_page_writeback(). + */ +static inline void __bdi_writeout_inc(struct backing_dev_info *bdi) +{ + __prop_inc_percpu(&vm_completions, &bdi->completions); +} + +static inline void task_dirty_inc(struct task_struct *tsk) +{ + prop_inc_single(&vm_dirties, &tsk->dirties); +} + +/* + * Obtain an accurate fraction of the BDI's portion. + */ +static void bdi_writeout_fraction(struct backing_dev_info *bdi, + long *numerator, long *denominator) +{ + if (bdi_cap_writeback_dirty(bdi)) { + prop_fraction_percpu(&vm_completions, &bdi->completions, + numerator, denominator); + } else { + *numerator = 0; + *denominator = 1; + } +} + +/* + * Clip the earned share of dirty pages to that which is actually available. + * This avoids exceeding the total dirty_limit when the floating averages + * fluctuate too quickly. + */ +static void +clip_bdi_dirty_limit(struct backing_dev_info *bdi, long dirty, long *pbdi_dirty) +{ + long avail_dirty; + + avail_dirty = dirty - + (global_page_state(NR_FILE_DIRTY) + + global_page_state(NR_WRITEBACK) + + global_page_state(NR_UNSTABLE_NFS)); + + if (avail_dirty < 0) + avail_dirty = 0; + + avail_dirty += bdi_stat(bdi, BDI_RECLAIMABLE) + + bdi_stat(bdi, BDI_WRITEBACK); + + *pbdi_dirty = min(*pbdi_dirty, avail_dirty); +} + +static inline void task_dirties_fraction(struct task_struct *tsk, + long *numerator, long *denominator) +{ + prop_fraction_single(&vm_dirties, &tsk->dirties, + numerator, denominator); +} + +/* + * scale the dirty limit + * + * task specific dirty limit: + * + * dirty -= (dirty/8) * p_{t} + */ +void task_dirty_limit(struct task_struct *tsk, long *pdirty) +{ + long numerator, denominator; + long dirty = *pdirty; + u64 inv = dirty >> 3; + + task_dirties_fraction(tsk, &numerator, &denominator); + inv *= numerator; + do_div(inv, denominator); + + dirty -= inv; + if (dirty < *pdirty/2) + dirty = *pdirty/2; + + *pdirty = dirty; +} + +/* * Work out the current dirty-memory clamping and background writeout * thresholds. * @@ -158,8 +292,8 @@ static unsigned long determine_dirtyable_memory(void) } static void -get_dirty_limits(long *pbackground, long *pdirty, - struct address_space *mapping) +get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, + struct backing_dev_info *bdi) { int background_ratio; /* Percentages */ int dirty_ratio; @@ -193,6 +327,23 @@ get_dirty_limits(long *pbackground, long *pdirty, } *pbackground = background; *pdirty = dirty; + + if (bdi) { + u64 bdi_dirty = dirty; + long numerator, denominator; + + /* + * Calculate this BDI's share of the dirty ratio. + */ + bdi_writeout_fraction(bdi, &numerator, &denominator); + + bdi_dirty *= numerator; + do_div(bdi_dirty, denominator); + + *pbdi_dirty = bdi_dirty; + clip_bdi_dirty_limit(bdi, dirty, pbdi_dirty); + task_dirty_limit(current, pbdi_dirty); + } } /* @@ -204,9 +355,11 @@ get_dirty_limits(long *pbackground, long *pdirty, */ static void balance_dirty_pages(struct address_space *mapping) { - long nr_reclaimable; + long bdi_nr_reclaimable; + long bdi_nr_writeback; long background_thresh; long dirty_thresh; + long bdi_thresh; unsigned long pages_written = 0; unsigned long write_chunk = sync_writeback_pages(); @@ -221,15 +374,15 @@ static void balance_dirty_pages(struct address_space *mapping) .range_cyclic = 1, }; - get_dirty_limits(&background_thresh, &dirty_thresh, mapping); - nr_reclaimable = global_page_state(NR_FILE_DIRTY) + - global_page_state(NR_UNSTABLE_NFS); - if (nr_reclaimable + global_page_state(NR_WRITEBACK) <= - dirty_thresh) - break; + get_dirty_limits(&background_thresh, &dirty_thresh, + &bdi_thresh, bdi); + bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE); + bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK); + if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh) + break; - if (!dirty_exceeded) - dirty_exceeded = 1; + if (!bdi->dirty_exceeded) + bdi->dirty_exceeded = 1; /* Note: nr_reclaimable denotes nr_dirty + nr_unstable. * Unstable writes are a feature of certain networked @@ -237,26 +390,42 @@ static void balance_dirty_pages(struct address_space *mapping) * written to the server's write cache, but has not yet * been flushed to permanent storage. */ - if (nr_reclaimable) { + if (bdi_nr_reclaimable) { writeback_inodes(&wbc); - get_dirty_limits(&background_thresh, - &dirty_thresh, mapping); - nr_reclaimable = global_page_state(NR_FILE_DIRTY) + - global_page_state(NR_UNSTABLE_NFS); - if (nr_reclaimable + - global_page_state(NR_WRITEBACK) - <= dirty_thresh) - break; pages_written += write_chunk - wbc.nr_to_write; - if (pages_written >= write_chunk) - break; /* We've done our duty */ + get_dirty_limits(&background_thresh, &dirty_thresh, + &bdi_thresh, bdi); + } + + /* + * In order to avoid the stacked BDI deadlock we need + * to ensure we accurately count the 'dirty' pages when + * the threshold is low. + * + * Otherwise it would be possible to get thresh+n pages + * reported dirty, even though there are thresh-m pages + * actually dirty; with m+n sitting in the percpu + * deltas. + */ + if (bdi_thresh < 2*bdi_stat_error(bdi)) { + bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_RECLAIMABLE); + bdi_nr_writeback = bdi_stat_sum(bdi, BDI_WRITEBACK); + } else if (bdi_nr_reclaimable) { + bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE); + bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK); } + + if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh) + break; + if (pages_written >= write_chunk) + break; /* We've done our duty */ + congestion_wait(WRITE, HZ/10); } - if (nr_reclaimable + global_page_state(NR_WRITEBACK) - <= dirty_thresh && dirty_exceeded) - dirty_exceeded = 0; + if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh && + bdi->dirty_exceeded) + bdi->dirty_exceeded = 0; if (writeback_in_progress(bdi)) return; /* pdflush is already working this queue */ @@ -270,7 +439,9 @@ static void balance_dirty_pages(struct address_space *mapping) * background_thresh, to keep the amount of dirty memory low. */ if ((laptop_mode && pages_written) || - (!laptop_mode && (nr_reclaimable > background_thresh))) + (!laptop_mode && (global_page_state(NR_FILE_DIRTY) + + global_page_state(NR_UNSTABLE_NFS) + > background_thresh))) pdflush_operation(background_writeout, 0); } @@ -306,7 +477,7 @@ void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, unsigned long *p; ratelimit = ratelimit_pages; - if (dirty_exceeded) + if (mapping->backing_dev_info->dirty_exceeded) ratelimit = 8; /* @@ -331,18 +502,8 @@ void throttle_vm_writeout(gfp_t gfp_mask) long background_thresh; long dirty_thresh; - if ((gfp_mask & (__GFP_FS|__GFP_IO)) != (__GFP_FS|__GFP_IO)) { - /* - * The caller might hold locks which can prevent IO completion - * or progress in the filesystem. So we cannot just sit here - * waiting for IO to complete. - */ - congestion_wait(WRITE, HZ/10); - return; - } - for ( ; ; ) { - get_dirty_limits(&background_thresh, &dirty_thresh, NULL); + get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL); /* * Boost the allowable dirty threshold a bit for page @@ -354,6 +515,14 @@ void throttle_vm_writeout(gfp_t gfp_mask) global_page_state(NR_WRITEBACK) <= dirty_thresh) break; congestion_wait(WRITE, HZ/10); + + /* + * The caller might hold locks which can prevent IO completion + * or progress in the filesystem. So we cannot just sit here + * waiting for IO to complete. + */ + if ((gfp_mask & (__GFP_FS|__GFP_IO)) != (__GFP_FS|__GFP_IO)) + break; } } @@ -377,11 +546,12 @@ static void background_writeout(unsigned long _min_pages) long background_thresh; long dirty_thresh; - get_dirty_limits(&background_thresh, &dirty_thresh, NULL); + get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL); if (global_page_state(NR_FILE_DIRTY) + global_page_state(NR_UNSTABLE_NFS) < background_thresh && min_pages <= 0) break; + wbc.more_io = 0; wbc.encountered_congestion = 0; wbc.nr_to_write = MAX_WRITEBACK_PAGES; wbc.pages_skipped = 0; @@ -389,8 +559,9 @@ static void background_writeout(unsigned long _min_pages) min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { /* Wrote less than expected */ - congestion_wait(WRITE, HZ/10); - if (!wbc.encountered_congestion) + if (wbc.encountered_congestion || wbc.more_io) + congestion_wait(WRITE, HZ/10); + else break; } } @@ -455,11 +626,12 @@ static void wb_kupdate(unsigned long arg) global_page_state(NR_UNSTABLE_NFS) + (inodes_stat.nr_inodes - inodes_stat.nr_unused); while (nr_to_write > 0) { + wbc.more_io = 0; wbc.encountered_congestion = 0; wbc.nr_to_write = MAX_WRITEBACK_PAGES; writeback_inodes(&wbc); if (wbc.nr_to_write > 0) { - if (wbc.encountered_congestion) + if (wbc.encountered_congestion || wbc.more_io) congestion_wait(WRITE, HZ/10); else break; /* All the old data is written */ @@ -580,9 +752,15 @@ static struct notifier_block __cpuinitdata ratelimit_nb = { */ void __init page_writeback_init(void) { + int shift; + mod_timer(&wb_timer, jiffies + dirty_writeback_interval); writeback_set_ratelimit(); register_cpu_notifier(&ratelimit_nb); + + shift = calc_period_shift(); + prop_descriptor_init(&vm_completions, shift); + prop_descriptor_init(&vm_dirties, shift); } /** @@ -672,8 +850,10 @@ retry: ret = (*writepage)(page, wbc, data); - if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) + if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) { unlock_page(page); + ret = 0; + } if (ret || (--(wbc->nr_to_write) <= 0)) done = 1; if (wbc->nonblocking && bdi_write_congested(bdi)) { @@ -827,6 +1007,8 @@ int __set_page_dirty_nobuffers(struct page *page) WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page)); if (mapping_cap_account_dirty(mapping)) { __inc_zone_page_state(page, NR_FILE_DIRTY); + __inc_bdi_stat(mapping->backing_dev_info, + BDI_RECLAIMABLE); task_io_account_write(PAGE_CACHE_SIZE); } radix_tree_tag_set(&mapping->page_tree, @@ -859,7 +1041,7 @@ EXPORT_SYMBOL(redirty_page_for_writepage); * If the mapping doesn't provide a set_page_dirty a_op, then * just fall through and assume that it wants buffer_heads. */ -int fastcall set_page_dirty(struct page *page) +static int __set_page_dirty(struct page *page) { struct address_space *mapping = page_mapping(page); @@ -877,6 +1059,14 @@ int fastcall set_page_dirty(struct page *page) } return 0; } + +int fastcall set_page_dirty(struct page *page) +{ + int ret = __set_page_dirty(page); + if (ret) + task_dirty_inc(current); + return ret; +} EXPORT_SYMBOL(set_page_dirty); /* @@ -961,6 +1151,8 @@ int clear_page_dirty_for_io(struct page *page) */ if (TestClearPageDirty(page)) { dec_zone_page_state(page, NR_FILE_DIRTY); + dec_bdi_stat(mapping->backing_dev_info, + BDI_RECLAIMABLE); return 1; } return 0; @@ -975,14 +1167,20 @@ int test_clear_page_writeback(struct page *page) int ret; if (mapping) { + struct backing_dev_info *bdi = mapping->backing_dev_info; unsigned long flags; write_lock_irqsave(&mapping->tree_lock, flags); ret = TestClearPageWriteback(page); - if (ret) + if (ret) { radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_WRITEBACK); + if (bdi_cap_writeback_dirty(bdi)) { + __dec_bdi_stat(bdi, BDI_WRITEBACK); + __bdi_writeout_inc(bdi); + } + } write_unlock_irqrestore(&mapping->tree_lock, flags); } else { ret = TestClearPageWriteback(page); @@ -998,14 +1196,18 @@ int test_set_page_writeback(struct page *page) int ret; if (mapping) { + struct backing_dev_info *bdi = mapping->backing_dev_info; unsigned long flags; write_lock_irqsave(&mapping->tree_lock, flags); ret = TestSetPageWriteback(page); - if (!ret) + if (!ret) { radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_WRITEBACK); + if (bdi_cap_writeback_dirty(bdi)) + __inc_bdi_stat(bdi, BDI_WRITEBACK); + } if (!PageDirty(page)) radix_tree_tag_clear(&mapping->page_tree, page_index(page), diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d315e11..43f757f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -27,6 +27,7 @@ #include <linux/pagevec.h> #include <linux/blkdev.h> #include <linux/slab.h> +#include <linux/oom.h> #include <linux/notifier.h> #include <linux/topology.h> #include <linux/sysctl.h> @@ -489,7 +490,7 @@ static void free_pages_bulk(struct zone *zone, int count, struct list_head *list, int order) { spin_lock(&zone->lock); - zone->all_unreclaimable = 0; + zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE); zone->pages_scanned = 0; while (count--) { struct page *page; @@ -506,7 +507,7 @@ static void free_pages_bulk(struct zone *zone, int count, static void free_one_page(struct zone *zone, struct page *page, int order) { spin_lock(&zone->lock); - zone->all_unreclaimable = 0; + zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE); zone->pages_scanned = 0; __free_one_page(page, zone, order); spin_unlock(&zone->lock); @@ -1586,6 +1587,11 @@ nofail_alloc: if (page) goto got_pg; } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { + if (!try_set_zone_oom(zonelist)) { + schedule_timeout_uninterruptible(1); + goto restart; + } + /* * Go through the zonelist yet one more time, keep * very high watermark here, this is only to catch @@ -1594,14 +1600,19 @@ nofail_alloc: */ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, zonelist, ALLOC_WMARK_HIGH|ALLOC_CPUSET); - if (page) + if (page) { + clear_zonelist_oom(zonelist); goto got_pg; + } /* The OOM killer will not help higher order allocs so fail */ - if (order > PAGE_ALLOC_COSTLY_ORDER) + if (order > PAGE_ALLOC_COSTLY_ORDER) { + clear_zonelist_oom(zonelist); goto nopage; + } out_of_memory(zonelist, gfp_mask, order); + clear_zonelist_oom(zonelist); goto restart; } @@ -1850,7 +1861,7 @@ void show_free_areas(void) K(zone_page_state(zone, NR_INACTIVE)), K(zone->present_pages), zone->pages_scanned, - (zone->all_unreclaimable ? "yes" : "no") + (zone_is_all_unreclaimable(zone) ? "yes" : "no") ); printk("lowmem_reserve[]:"); for (i = 0; i < MAX_NR_ZONES; i++) @@ -3371,7 +3382,7 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, zone->nr_scan_active = 0; zone->nr_scan_inactive = 0; zap_zone_vm_stats(zone); - atomic_set(&zone->reclaim_in_progress, 0); + zone->flags = 0; if (!size) continue; diff --git a/mm/readahead.c b/mm/readahead.c index 2297888..c9c50ca 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -233,6 +233,12 @@ unsigned long max_sane_readahead(unsigned long nr) + node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2); } +static int __init readahead_init(void) +{ + return bdi_init(&default_backing_dev_info); +} +subsys_initcall(readahead_init); + /* * Submit IO for the read-ahead request in file_ra_state. */ @@ -36,6 +36,7 @@ * mapping->tree_lock (widely used, in set_page_dirty, * in arch-dependent flush_dcache_mmap_lock, * within inode_lock in __sync_single_inode) + * zone->lock (within radix tree node alloc) */ #include <linux/mm.h> @@ -137,8 +138,7 @@ void anon_vma_unlink(struct vm_area_struct *vma) anon_vma_free(anon_vma); } -static void anon_vma_ctor(void *data, struct kmem_cache *cachep, - unsigned long flags) +static void anon_vma_ctor(struct kmem_cache *cachep, void *data) { struct anon_vma *anon_vma = data; @@ -2328,8 +2328,7 @@ static void shmem_destroy_inode(struct inode *inode) kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); } -static void init_once(void *foo, struct kmem_cache *cachep, - unsigned long flags) +static void init_once(struct kmem_cache *cachep, void *foo) { struct shmem_inode_info *p = (struct shmem_inode_info *) foo; @@ -2344,9 +2343,7 @@ static int init_inodecache(void) { shmem_inode_cachep = kmem_cache_create("shmem_inode_cache", sizeof(struct shmem_inode_info), - 0, 0, init_once); - if (shmem_inode_cachep == NULL) - return -ENOMEM; + 0, SLAB_PANIC, init_once); return 0; } @@ -2464,6 +2461,10 @@ static int __init init_tmpfs(void) { int error; + error = bdi_init(&shmem_backing_dev_info); + if (error) + goto out4; + error = init_inodecache(); if (error) goto out3; @@ -2488,6 +2489,8 @@ out1: out2: destroy_inodecache(); out3: + bdi_destroy(&shmem_backing_dev_info); +out4: shm_mnt = ERR_PTR(error); return error; } @@ -2540,11 +2543,8 @@ struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags) d_instantiate(dentry, inode); inode->i_size = size; inode->i_nlink = 0; /* It is unlinked */ - file->f_path.mnt = mntget(shm_mnt); - file->f_path.dentry = dentry; - file->f_mapping = inode->i_mapping; - file->f_op = &shmem_file_operations; - file->f_mode = FMODE_WRITE | FMODE_READ; + init_file(file, shm_mnt, dentry, FMODE_WRITE | FMODE_READ, + &shmem_file_operations); return file; close_file: @@ -267,11 +267,10 @@ struct array_cache { unsigned int batchcount; unsigned int touched; spinlock_t lock; - void *entry[0]; /* + void *entry[]; /* * Must have this definition in here for the proper * alignment of array_cache. Also simplifies accessing * the entries. - * [0] is for gcc 2.95. It should really be []. */ }; @@ -408,7 +407,7 @@ struct kmem_cache { unsigned int dflags; /* dynamic flags */ /* constructor func */ - void (*ctor) (void *, struct kmem_cache *, unsigned long); + void (*ctor)(struct kmem_cache *, void *); /* 5) cache creation/removal */ const char *name; @@ -2129,7 +2128,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep) struct kmem_cache * kmem_cache_create (const char *name, size_t size, size_t align, unsigned long flags, - void (*ctor)(void*, struct kmem_cache *, unsigned long)) + void (*ctor)(struct kmem_cache *, void *)) { size_t left_over, slab_size, ralign; struct kmem_cache *cachep = NULL, *pc; @@ -2636,8 +2635,7 @@ static void cache_init_objs(struct kmem_cache *cachep, * They must also be threaded. */ if (cachep->ctor && !(cachep->flags & SLAB_POISON)) - cachep->ctor(objp + obj_offset(cachep), cachep, - 0); + cachep->ctor(cachep, objp + obj_offset(cachep)); if (cachep->flags & SLAB_RED_ZONE) { if (*dbg_redzone2(cachep, objp) != RED_INACTIVE) @@ -2653,7 +2651,7 @@ static void cache_init_objs(struct kmem_cache *cachep, cachep->buffer_size / PAGE_SIZE, 0); #else if (cachep->ctor) - cachep->ctor(objp, cachep, 0); + cachep->ctor(cachep, objp); #endif slab_bufctl(slabp)[i] = i + 1; } @@ -3078,7 +3076,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, #endif objp += obj_offset(cachep); if (cachep->ctor && cachep->flags & SLAB_POISON) - cachep->ctor(objp, cachep, 0); + cachep->ctor(cachep, objp); #if ARCH_SLAB_MINALIGN if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) { printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n", @@ -499,12 +499,12 @@ struct kmem_cache { unsigned int size, align; unsigned long flags; const char *name; - void (*ctor)(void *, struct kmem_cache *, unsigned long); + void (*ctor)(struct kmem_cache *, void *); }; struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align, unsigned long flags, - void (*ctor)(void*, struct kmem_cache *, unsigned long)) + void (*ctor)(struct kmem_cache *, void *)) { struct kmem_cache *c; @@ -548,7 +548,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node) b = slob_new_page(flags, get_order(c->size), node); if (c->ctor) - c->ctor(b, c, 0); + c->ctor(c, b); return b; } @@ -980,7 +980,7 @@ __setup("slub_debug", setup_slub_debug); static unsigned long kmem_cache_flags(unsigned long objsize, unsigned long flags, const char *name, - void (*ctor)(void *, struct kmem_cache *, unsigned long)) + void (*ctor)(struct kmem_cache *, void *)) { /* * The page->offset field is only 16 bit wide. This is an offset @@ -1027,7 +1027,7 @@ static inline int check_object(struct kmem_cache *s, struct page *page, static inline void add_full(struct kmem_cache_node *n, struct page *page) {} static inline unsigned long kmem_cache_flags(unsigned long objsize, unsigned long flags, const char *name, - void (*ctor)(void *, struct kmem_cache *, unsigned long)) + void (*ctor)(struct kmem_cache *, void *)) { return flags; } @@ -1071,7 +1071,7 @@ static void setup_object(struct kmem_cache *s, struct page *page, { setup_object_debug(s, page, object); if (unlikely(s->ctor)) - s->ctor(object, s, 0); + s->ctor(s, object); } static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) @@ -1085,9 +1085,6 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) BUG_ON(flags & GFP_SLAB_BUG_MASK); - if (flags & __GFP_WAIT) - local_irq_enable(); - page = allocate_slab(s, flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node); if (!page) @@ -1120,8 +1117,6 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) page->freelist = start; page->inuse = 0; out: - if (flags & __GFP_WAIT) - local_irq_disable(); return page; } @@ -1505,7 +1500,14 @@ new_slab: goto load_freelist; } + if (gfpflags & __GFP_WAIT) + local_irq_enable(); + new = new_slab(s, gfpflags, node); + + if (gfpflags & __GFP_WAIT) + local_irq_disable(); + if (new) { c = get_cpu_slab(s, smp_processor_id()); if (c->page) { @@ -2039,12 +2041,6 @@ static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags, init_kmem_cache_node(n); atomic_long_inc(&n->nr_slabs); add_partial(n, page); - - /* - * new_slab() disables interupts. If we do not reenable interrupts here - * then bootup would continue with interrupts disabled. - */ - local_irq_enable(); return n; } @@ -2215,7 +2211,7 @@ static int calculate_sizes(struct kmem_cache *s) static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, const char *name, size_t size, size_t align, unsigned long flags, - void (*ctor)(void *, struct kmem_cache *, unsigned long)) + void (*ctor)(struct kmem_cache *, void *)) { memset(s, 0, kmem_size); s->name = name; @@ -2805,7 +2801,7 @@ static int slab_unmergeable(struct kmem_cache *s) static struct kmem_cache *find_mergeable(size_t size, size_t align, unsigned long flags, const char *name, - void (*ctor)(void *, struct kmem_cache *, unsigned long)) + void (*ctor)(struct kmem_cache *, void *)) { struct kmem_cache *s; @@ -2846,7 +2842,7 @@ static struct kmem_cache *find_mergeable(size_t size, struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align, unsigned long flags, - void (*ctor)(void *, struct kmem_cache *, unsigned long)) + void (*ctor)(struct kmem_cache *, void *)) { struct kmem_cache *s; @@ -28,6 +28,7 @@ #include <linux/percpu.h> #include <linux/cpu.h> #include <linux/notifier.h> +#include <linux/backing-dev.h> /* How many pages do we try to swap or page in/out together? */ int page_cluster; @@ -547,6 +548,10 @@ void __init swap_setup(void) { unsigned long megs = num_physpages >> (20 - PAGE_SHIFT); +#ifdef CONFIG_SWAP + bdi_init(swapper_space.backing_dev_info); +#endif + /* Use a smaller cluster for small-memory machines */ if (megs < 16) page_cluster = 2; diff --git a/mm/tiny-shmem.c b/mm/tiny-shmem.c index 8803471..d436a9c 100644 --- a/mm/tiny-shmem.c +++ b/mm/tiny-shmem.c @@ -66,24 +66,19 @@ struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags) if (!dentry) goto put_memory; - error = -ENFILE; - file = get_empty_filp(); - if (!file) - goto put_dentry; - error = -ENOSPC; inode = ramfs_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0); if (!inode) - goto close_file; + goto put_dentry; d_instantiate(dentry, inode); - inode->i_nlink = 0; /* It is unlinked */ + error = -ENFILE; + file = alloc_file(shm_mnt, dentry, FMODE_WRITE | FMODE_READ, + &ramfs_file_operations); + if (!file) + goto put_dentry; - file->f_path.mnt = mntget(shm_mnt); - file->f_path.dentry = dentry; - file->f_mapping = inode->i_mapping; - file->f_op = &ramfs_file_operations; - file->f_mode = FMODE_WRITE | FMODE_READ; + inode->i_nlink = 0; /* It is unlinked */ /* notify everyone as to the change of file size */ error = do_truncate(dentry, size, 0, file); diff --git a/mm/truncate.c b/mm/truncate.c index 5cdfbc1..cadc156 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -8,6 +8,7 @@ */ #include <linux/kernel.h> +#include <linux/backing-dev.h> #include <linux/mm.h> #include <linux/swap.h> #include <linux/module.h> @@ -72,6 +73,8 @@ void cancel_dirty_page(struct page *page, unsigned int account_size) struct address_space *mapping = page->mapping; if (mapping && mapping_cap_account_dirty(mapping)) { dec_zone_page_state(page, NR_FILE_DIRTY); + dec_bdi_stat(mapping->backing_dev_info, + BDI_RECLAIMABLE); if (account_size) task_io_account_cancelled_write(account_size); } diff --git a/mm/vmscan.c b/mm/vmscan.c index bbd1946..e147138 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1108,8 +1108,6 @@ static unsigned long shrink_zone(int priority, struct zone *zone, unsigned long nr_to_scan; unsigned long nr_reclaimed = 0; - atomic_inc(&zone->reclaim_in_progress); - /* * Add one to `nr_to_scan' just to make sure that the kernel will * slowly sift through the active list. @@ -1148,8 +1146,6 @@ static unsigned long shrink_zone(int priority, struct zone *zone, } throttle_vm_writeout(sc->gfp_mask); - - atomic_dec(&zone->reclaim_in_progress); return nr_reclaimed; } @@ -1187,7 +1183,7 @@ static unsigned long shrink_zones(int priority, struct zone **zones, note_zone_scanning_priority(zone, priority); - if (zone->all_unreclaimable && priority != DEF_PRIORITY) + if (zone_is_all_unreclaimable(zone) && priority != DEF_PRIORITY) continue; /* Let kswapd poll it */ sc->all_unreclaimable = 0; @@ -1368,7 +1364,8 @@ loop_again: if (!populated_zone(zone)) continue; - if (zone->all_unreclaimable && priority != DEF_PRIORITY) + if (zone_is_all_unreclaimable(zone) && + priority != DEF_PRIORITY) continue; if (!zone_watermark_ok(zone, order, zone->pages_high, @@ -1403,7 +1400,8 @@ loop_again: if (!populated_zone(zone)) continue; - if (zone->all_unreclaimable && priority != DEF_PRIORITY) + if (zone_is_all_unreclaimable(zone) && + priority != DEF_PRIORITY) continue; if (!zone_watermark_ok(zone, order, zone->pages_high, @@ -1424,12 +1422,13 @@ loop_again: lru_pages); nr_reclaimed += reclaim_state->reclaimed_slab; total_scanned += sc.nr_scanned; - if (zone->all_unreclaimable) + if (zone_is_all_unreclaimable(zone)) continue; if (nr_slab == 0 && zone->pages_scanned >= (zone_page_state(zone, NR_ACTIVE) + zone_page_state(zone, NR_INACTIVE)) * 6) - zone->all_unreclaimable = 1; + zone_set_flag(zone, + ZONE_ALL_UNRECLAIMABLE); /* * If we've done a decent amount of scanning and * the reclaim ratio is low, start doing writepage @@ -1595,7 +1594,7 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio, if (!populated_zone(zone)) continue; - if (zone->all_unreclaimable && prio != DEF_PRIORITY) + if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY) continue; /* For pass = 0 we don't shrink the active list */ @@ -1897,6 +1896,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) { int node_id; + int ret; /* * Zone reclaim reclaims unmapped file backed pages and @@ -1914,15 +1914,13 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) <= zone->min_slab_pages) return 0; + if (zone_is_all_unreclaimable(zone)) + return 0; + /* - * Avoid concurrent zone reclaims, do not reclaim in a zone that does - * not have reclaimable pages and if we should not delay the allocation - * then do not scan. + * Do not scan if the allocation should not be delayed. */ - if (!(gfp_mask & __GFP_WAIT) || - zone->all_unreclaimable || - atomic_read(&zone->reclaim_in_progress) > 0 || - (current->flags & PF_MEMALLOC)) + if (!(gfp_mask & __GFP_WAIT) || (current->flags & PF_MEMALLOC)) return 0; /* @@ -1934,6 +1932,12 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) node_id = zone_to_nid(zone); if (node_state(node_id, N_CPU) && node_id != numa_node_id()) return 0; - return __zone_reclaim(zone, gfp_mask, order); + + if (zone_test_and_set_flag(zone, ZONE_RECLAIM_LOCKED)) + return 0; + ret = __zone_reclaim(zone, gfp_mask, order); + zone_clear_flag(zone, ZONE_RECLAIM_LOCKED); + + return ret; } #endif diff --git a/mm/vmstat.c b/mm/vmstat.c index 3b5e904..4651bf1 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -704,7 +704,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, "\n all_unreclaimable: %u" "\n prev_priority: %i" "\n start_pfn: %lu", - zone->all_unreclaimable, + zone_is_all_unreclaimable(zone), zone->prev_priority, zone->zone_start_pfn); seq_putc(m, '\n'); |