From c37650161a53c01ddd88587675f9a4adc909a73e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 6 Oct 2010 10:48:20 +0200
Subject: fs: add sync_inode_metadata

Add a new helper to write out the inode using the writeback code,
that is including the correct dirty bit and list manipulation.  A few
of filesystems already opencode this, and a lot of others should be
using it instead of using write_inode_now which also writes out the
data.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux/fs.h')
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4f34ff6..0b03f49 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1734,6 +1734,7 @@ static inline void file_accessed(struct file *file)
 }
 
 int sync_inode(struct inode *inode, struct writeback_control *wbc);
+int sync_inode_metadata(struct inode *inode, int wait);
 
 struct file_system_type {
 	const char *name;
-- 
cgit v1.1


From 56b0dacfa2b8416815a2f2a5f4f51e46be4cf14c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 6 Oct 2010 10:48:55 +0200
Subject: fs: mark destroy_inode static

Hugetlbfs used to need it, but after the destroy_inode and evict_inode
changes it's not required anymore.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0b03f49..0a5d836 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2187,7 +2187,6 @@ extern void unlock_new_inode(struct inode *);
 extern void __iget(struct inode * inode);
 extern void iget_failed(struct inode *);
 extern void end_writeback(struct inode *);
-extern void destroy_inode(struct inode *);
 extern void __destroy_inode(struct inode *);
 extern struct inode *new_inode(struct super_block *);
 extern int should_remove_suid(struct dentry *);
-- 
cgit v1.1


From 7e360c38abe2c70eae3ba5a8a17f17671d8b77c5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 5 Oct 2010 09:32:55 +0200
Subject: fs: allow for more than 2^31 files

Andrew,

Could you please review this patch, you probably are the right guy to
take it, because it crosses fs and net trees.

Note : /proc/sys/fs/file-nr is a read-only file, so this patch doesnt
depend on previous patch (sysctl: fix min/max handling in
__do_proc_doulongvec_minmax())

Thanks !

[PATCH V4] fs: allow for more than 2^31 files

Robin Holt tried to boot a 16TB system and found af_unix was overflowing
a 32bit value :

<quote>

We were seeing a failure which prevented boot.  The kernel was incapable
of creating either a named pipe or unix domain socket.  This comes down
to a common kernel function called unix_create1() which does:

        atomic_inc(&unix_nr_socks);
        if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
                goto out;

The function get_max_files() is a simple return of files_stat.max_files.
files_stat.max_files is a signed integer and is computed in
fs/file_table.c's files_init().

        n = (mempages * (PAGE_SIZE / 1024)) / 10;
        files_stat.max_files = n;

In our case, mempages (total_ram_pages) is approx 3,758,096,384
(0xe0000000).  That leaves max_files at approximately 1,503,238,553.
This causes 2 * get_max_files() to integer overflow.

</quote>

Fix is to let /proc/sys/fs/file-nr & /proc/sys/fs/file-max use long
integers, and change af_unix to use an atomic_long_t instead of
atomic_t.

get_max_files() is changed to return an unsigned long.
get_nr_files() is changed to return a long.

unix_nr_socks is changed from atomic_t to atomic_long_t, while not
strictly needed to address Robin problem.

Before patch (on a 64bit kernel) :
# echo 2147483648 >/proc/sys/fs/file-max
# cat /proc/sys/fs/file-max
-18446744071562067968

After patch:
# echo 2147483648 >/proc/sys/fs/file-max
# cat /proc/sys/fs/file-max
2147483648
# cat /proc/sys/fs/file-nr
704     0       2147483648

Reported-by: Robin Holt <holt@sgi.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: David Miller <davem@davemloft.net>
Reviewed-by: Robin Holt <holt@sgi.com>
Tested-by: Robin Holt <holt@sgi.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0a5d836..0cd6821 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -34,9 +34,9 @@
 
 /* And dynamically-tunable limits and defaults: */
 struct files_stat_struct {
-	int nr_files;		/* read only */
-	int nr_free_files;	/* read only */
-	int max_files;		/* tunable */
+	unsigned long nr_files;		/* read only */
+	unsigned long nr_free_files;	/* read only */
+	unsigned long max_files;		/* tunable */
 };
 
 struct inodes_stat_t {
@@ -400,7 +400,7 @@ extern void __init inode_init_early(void);
 extern void __init files_init(unsigned long);
 
 extern struct files_stat_struct files_stat;
-extern int get_max_files(void);
+extern unsigned long get_max_files(void);
 extern int sysctl_nr_open;
 extern struct inodes_stat_t inodes_stat;
 extern int leases_enable, lease_break_time;
-- 
cgit v1.1


From 4a3956c790290efeb647bbb0c3a90476bb57800e Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Fri, 1 Oct 2010 14:20:22 -0700
Subject: vfs: introduce FMODE_UNSIGNED_OFFSET for allowing negative f_pos

Now, rw_verify_area() checsk f_pos is negative or not.  And if negative,
returns -EINVAL.

But, some special files as /dev/(k)mem and /proc/<pid>/mem etc..  has
negative offsets.  And we can't do any access via read/write to the
file(device).

So introduce FMODE_UNSIGNED_OFFSET to allow negative file offsets.

Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0cd6821..7fc126d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -92,6 +92,9 @@ struct inodes_stat_t {
 /* Expect random access pattern */
 #define FMODE_RANDOM		((__force fmode_t)0x1000)
 
+/* File is huge (eg. /dev/kmem): treat loff_t as unsigned */
+#define FMODE_UNSIGNED_OFFSET	((__force fmode_t)0x2000)
+
 /* File was opened by fanotify and shouldn't generate fanotify events */
 #define FMODE_NONOTIFY		((__force fmode_t)0x1000000)
 
-- 
cgit v1.1


From a8dade34e3df581bc36ca2afe6e27055e178801c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 24 Oct 2010 11:13:10 -0400
Subject: unexport invalidate_inodes

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7fc126d..c3f6daf 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2082,7 +2082,6 @@ extern int check_disk_change(struct block_device *);
 extern int __invalidate_device(struct block_device *);
 extern int invalidate_partition(struct gendisk *, int);
 #endif
-extern int invalidate_inodes(struct super_block *);
 unsigned long invalidate_mapping_pages(struct address_space *mapping,
 					pgoff_t start, pgoff_t end);
 
-- 
cgit v1.1


From 1d3382cbf02986e4833849f528d451367ea0b4cb Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 23 Oct 2010 15:19:20 -0400
Subject: new helper: inode_unhashed()

note: for race-free uses you inode_lock held

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index c3f6daf..78043da 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -786,6 +786,11 @@ struct inode {
 	void			*i_private; /* fs or device private pointer */
 };
 
+static inline int inode_unhashed(struct inode *inode)
+{
+	return hlist_unhashed(&inode->i_hash);
+}
+
 /*
  * inode->i_mutex nesting subclasses for the lock validator:
  *
-- 
cgit v1.1


From cffbc8aa334f55c9ed42d25202eb3ebf3a97c195 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Sat, 23 Oct 2010 05:03:02 -0400
Subject: fs: Convert nr_inodes and nr_unused to per-cpu counters

The number of inodes allocated does not need to be tied to the
addition or removal of an inode to/from a list. If we are not tied
to a list lock, we could update the counters when inodes are
initialised or destroyed, but to do that we need to convert the
counters to be per-cpu (i.e. independent of a lock). This means that
we have the freedom to change the list/locking implementation
without needing to care about the counters.

Based on a patch originally from Eric Dumazet.

[AV: cleaned up a bit, fixed build breakage on weird configs

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 78043da..a3937a8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2486,7 +2486,8 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
 struct ctl_table;
 int proc_nr_files(struct ctl_table *table, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos);
-
+int proc_nr_inodes(struct ctl_table *table, int write,
+		   void __user *buffer, size_t *lenp, loff_t *ppos);
 int __init get_filesystem_list(char *buf);
 
 #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
-- 
cgit v1.1


From 9e38d86ff2d8a8db99570e982230861046df32b5 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Sat, 23 Oct 2010 06:55:17 -0400
Subject: fs: Implement lazy LRU updates for inodes

Convert the inode LRU to use lazy updates to reduce lock and
cacheline traffic.  We avoid moving inodes around in the LRU list
during iget/iput operations so these frequent operations don't need
to access the LRUs. Instead, we defer the refcount checks to
reclaim-time and use a per-inode state flag, I_REFERENCED, to tell
reclaim that iget has touched the inode in the past. This means that
only reclaim should be touching the LRU with any frequency, hence
significantly reducing lock acquisitions and the amount contention
on LRU updates.

This also removes the inode_in_use list, which means we now only
have one list for tracking the inode LRU status. This makes it much
simpler to split out the LRU list operations under it's own lock.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index a3937a8..876275f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1641,16 +1641,17 @@ struct super_operations {
  *
  * Q: What is the difference between I_WILL_FREE and I_FREEING?
  */
-#define I_DIRTY_SYNC		1
-#define I_DIRTY_DATASYNC	2
-#define I_DIRTY_PAGES		4
+#define I_DIRTY_SYNC		(1 << 0)
+#define I_DIRTY_DATASYNC	(1 << 1)
+#define I_DIRTY_PAGES		(1 << 2)
 #define __I_NEW			3
 #define I_NEW			(1 << __I_NEW)
-#define I_WILL_FREE		16
-#define I_FREEING		32
-#define I_CLEAR			64
+#define I_WILL_FREE		(1 << 4)
+#define I_FREEING		(1 << 5)
+#define I_CLEAR			(1 << 6)
 #define __I_SYNC		7
 #define I_SYNC			(1 << __I_SYNC)
+#define I_REFERENCED		(1 << 8)
 
 #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
 
-- 
cgit v1.1


From 646ec4615cd05972581c9c5342ed7a1e77df17bb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 23 Oct 2010 07:15:32 -0400
Subject: fs: remove inode_add_to_list/__inode_add_to_list

Split up inode_add_to_list/__inode_add_to_list.  Locking for the two
lists will be split soon so these helpers really don't buy us much
anymore.

The __ prefixes for the sb list helpers will go away soon, but until
inode_lock is gone we'll need them to distinguish between the locked
and unlocked variants.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 876275f..d43e8b6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2171,7 +2171,6 @@ extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin);
 
 extern int inode_init_always(struct super_block *, struct inode *);
 extern void inode_init_once(struct inode *);
-extern void inode_add_to_lists(struct super_block *, struct inode *);
 extern void iput(struct inode *);
 extern struct inode * igrab(struct inode *);
 extern ino_t iunique(struct super_block *, ino_t);
@@ -2202,9 +2201,11 @@ extern int file_remove_suid(struct file *);
 
 extern void __insert_inode_hash(struct inode *, unsigned long hashval);
 extern void remove_inode_hash(struct inode *);
-static inline void insert_inode_hash(struct inode *inode) {
+static inline void insert_inode_hash(struct inode *inode)
+{
 	__insert_inode_hash(inode, inode->i_ino);
 }
+extern void inode_sb_list_add(struct inode *inode);
 
 #ifdef CONFIG_BLOCK
 extern void submit_bio(int, struct bio *);
-- 
cgit v1.1


From 7de9c6ee3ecffd99e1628e81a5ea5468f7581a1f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 23 Oct 2010 11:11:40 -0400
Subject: new helper: ihold()

Clones an existing reference to inode; caller must already hold one.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index d43e8b6..bd6ae6c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2171,6 +2171,7 @@ extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin);
 
 extern int inode_init_always(struct super_block *, struct inode *);
 extern void inode_init_once(struct inode *);
+extern void ihold(struct inode * inode);
 extern void iput(struct inode *);
 extern struct inode * igrab(struct inode *);
 extern ino_t iunique(struct super_block *, ino_t);
-- 
cgit v1.1


From 85fe4025c616a7c0ed07bc2fc8c5371b07f3888c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 23 Oct 2010 11:19:54 -0400
Subject: fs: do not assign default i_ino in new_inode

Instead of always assigning an increasing inode number in new_inode
move the call to assign it into those callers that actually need it.
For now callers that need it is estimated conservatively, that is
the call is added to all filesystems that do not assign an i_ino
by themselves.  For a few more filesystems we can avoid assigning
any inode number given that they aren't user visible, and for others
it could be done lazily when an inode number is actually needed,
but that's left for later patches.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index bd6ae6c..4a573cf 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2191,6 +2191,7 @@ extern struct inode * iget_locked(struct super_block *, unsigned long);
 extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
 extern int insert_inode_locked(struct inode *);
 extern void unlock_new_inode(struct inode *);
+extern unsigned int get_next_ino(void);
 
 extern void __iget(struct inode * inode);
 extern void iget_failed(struct inode *);
-- 
cgit v1.1


From 312d3ca856d369bb04d0443846b85b4cdde6fa8a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Sun, 10 Oct 2010 05:36:23 -0400
Subject: fs: use percpu counter for nr_dentry and nr_dentry_unused

The nr_dentry stat is a globally touched cacheline and atomic operation
twice over the lifetime of a dentry. It is used for the benfit of userspace
only. Turn it into a per-cpu counter and always decrement it in d_free instead
of doing various batching operations to reduce lock hold times in the callers.

Based on an earlier patch from Nick Piggin <npiggin@suse.de>.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4a573cf..d580599 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2490,6 +2490,8 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
 struct ctl_table;
 int proc_nr_files(struct ctl_table *table, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos);
+int proc_nr_dentry(struct ctl_table *table, int write,
+		  void __user *buffer, size_t *lenp, loff_t *ppos);
 int proc_nr_inodes(struct ctl_table *table, int write,
 		   void __user *buffer, size_t *lenp, loff_t *ppos);
 int __init get_filesystem_list(char *buf);
-- 
cgit v1.1


From 7ccf19a8042e343f8159f8a5fdd6a9422aa90c78 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Thu, 21 Oct 2010 11:49:30 +1100
Subject: fs: inode split IO and LRU lists

The use of the same inode list structure (inode->i_list) for two
different list constructs with different lifecycles and purposes
makes it impossible to separate the locking of the different
operations. Therefore, to enable the separation of the locking of
the writeback and reclaim lists, split the inode->i_list into two
separate lists dedicated to their specific tracking functions.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index d580599..f300a65 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -723,7 +723,8 @@ struct posix_acl;
 
 struct inode {
 	struct hlist_node	i_hash;
-	struct list_head	i_list;		/* backing dev IO list */
+	struct list_head	i_wb_list;	/* backing dev IO list */
+	struct list_head	i_lru;		/* inode LRU list */
 	struct list_head	i_sb_list;
 	struct list_head	i_dentry;
 	unsigned long		i_ino;
-- 
cgit v1.1