From 581bb050941b4f220f84d3e5ed6dace3d42dd382 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 20 Apr 2011 10:06:11 +0800 Subject: Btrfs: Cache free inode numbers in memory Currently btrfs stores the highest objectid of the fs tree, and it always returns (highest+1) inode number when we create a file, so inode numbers won't be reclaimed when we delete files, so we'll run out of inode numbers as we keep create/delete files in 32bits machines. This fixes it, and it works similarly to how we cache free space in block cgroups. We start a kernel thread to read the file tree. By scanning inode items, we know which chunks of inode numbers are free, and we cache them in an rb-tree. Because we are searching the commit root, we have to carefully handle the cross-transaction case. The rb-tree is a hybrid extent+bitmap tree, so if we have too many small chunks of inode numbers, we'll use bitmaps. Initially we allow 16K ram of extents, and a bitmap will be used if we exceed this threshold. The extents threshold is adjusted in runtime. Signed-off-by: Li Zefan --- fs/btrfs/disk-io.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ef6865c..d02683b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -41,6 +41,7 @@ #include "locking.h" #include "tree-log.h" #include "free-space-cache.h" +#include "inode-map.h" static struct extent_io_ops btree_extent_io_ops; static void end_workqueue_fn(struct btrfs_work *work); @@ -1327,6 +1328,19 @@ again: if (IS_ERR(root)) return root; + root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS); + if (!root->free_ino_ctl) + goto fail; + root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned), + GFP_NOFS); + if (!root->free_ino_pinned) + goto fail; + + btrfs_init_free_ino_ctl(root); + mutex_init(&root->fs_commit_mutex); + spin_lock_init(&root->cache_lock); + init_waitqueue_head(&root->cache_wait); + set_anon_super(&root->anon_super, NULL); if (btrfs_root_refs(&root->root_item) == 0) { @@ -2483,6 +2497,8 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) if (btrfs_root_refs(&root->root_item) == 0) synchronize_srcu(&fs_info->subvol_srcu); + __btrfs_remove_free_space_cache(root->free_ino_pinned); + __btrfs_remove_free_space_cache(root->free_ino_ctl); free_fs_root(root); return 0; } @@ -2496,6 +2512,8 @@ static void free_fs_root(struct btrfs_root *root) } free_extent_buffer(root->node); free_extent_buffer(root->commit_root); + kfree(root->free_ino_ctl); + kfree(root->free_ino_pinned); kfree(root->name); kfree(root); } -- cgit v1.1 From 82d5902d9c681be37ffa9d70482907f9f0b7ec1f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 20 Apr 2011 10:33:24 +0800 Subject: Btrfs: Support reading/writing on disk free ino cache This is similar to block group caching. We dedicate a special inode in fs tree to save free ino cache. At the very first time we create/delete a file after mount, the free ino cache will be loaded from disk into memory. When the fs tree is commited, the cache will be written back to disk. To keep compatibility, we check the root generation against the generation of the special inode when loading the cache, so the loading will fail if the btrfs filesystem was mounted in an older kernel before. Signed-off-by: Li Zefan --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d02683b..4f12c30 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2505,6 +2505,7 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) static void free_fs_root(struct btrfs_root *root) { + iput(root->cache_inode); WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); if (root->anon_super.s_dev) { down_write(&root->anon_super.s_umount); -- cgit v1.1