aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ioctl.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-09-12 09:58:51 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-12 09:58:51 -0700
commitb7c09ad4014e3678e8cc01fdf663c9f43b272dc6 (patch)
tree1edb073b0a76ce1530cb31c113f9e741e33ece0e /fs/btrfs/ioctl.c
parent1812997720ab90d029548778c55d7315555e1fef (diff)
parentd7396f07358a7c6e22c238d36d1d85f9d652a414 (diff)
downloadkernel_goldelico_gta04-b7c09ad4014e3678e8cc01fdf663c9f43b272dc6.zip
kernel_goldelico_gta04-b7c09ad4014e3678e8cc01fdf663c9f43b272dc6.tar.gz
kernel_goldelico_gta04-b7c09ad4014e3678e8cc01fdf663c9f43b272dc6.tar.bz2
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs updates from Chris Mason: "This is against 3.11-rc7, but was pulled and tested against your tree as of yesterday. We do have two small incrementals queued up, but I wanted to get this bunch out the door before I hop on an airplane. This is a fairly large batch of fixes, performance improvements, and cleanups from the usual Btrfs suspects. We've included Stefan Behren's work to index subvolume UUIDs, which is targeted at speeding up send/receive with many subvolumes or snapshots in place. It closes a long standing performance issue that was built in to the disk format. Mark Fasheh's offline dedup work is also here. In this case offline means the FS is mounted and active, but the dedup work is not done inline during file IO. This is a building block where utilities are able to ask the FS to dedup a series of extents. The kernel takes care of verifying the data involved really is the same. Today this involves reading both extents, but we'll continue to evolve the patches" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (118 commits) Btrfs: optimize key searches in btrfs_search_slot Btrfs: don't use an async starter for most of our workers Btrfs: only update disk_i_size as we remove extents Btrfs: fix deadlock in uuid scan kthread Btrfs: stop refusing the relocation of chunk 0 Btrfs: fix memory leak of uuid_root in free_fs_info btrfs: reuse kbasename helper btrfs: return btrfs error code for dev excl ops err Btrfs: allow partial ordered extent completion Btrfs: convert all bug_ons in free-space-cache.c Btrfs: add support for asserts Btrfs: adjust the fs_devices->missing count on unmount Btrf: cleanup: don't check for root_refs == 0 twice Btrfs: fix for patch "cleanup: don't check the same thing twice" Btrfs: get rid of one BUG() in write_all_supers() Btrfs: allocate prelim_ref with a slab allocater Btrfs: pass gfp_t to __add_prelim_ref() to avoid always using GFP_ATOMIC Btrfs: fix race conditions in BTRFS_IOC_FS_INFO ioctl Btrfs: fix race between removing a dev and writing sbs Btrfs: remove ourselves from the cluster list under lock ...
Diffstat (limited to 'fs/btrfs/ioctl.c')
-rw-r--r--fs/btrfs/ioctl.c745
1 files changed, 564 insertions, 181 deletions
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 238a055..1a5b946 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -43,6 +43,7 @@
#include <linux/blkdev.h>
#include <linux/uuid.h>
#include <linux/btrfs.h>
+#include <linux/uaccess.h>
#include "compat.h"
#include "ctree.h"
#include "disk-io.h"
@@ -57,6 +58,9 @@
#include "send.h"
#include "dev-replace.h"
+static int btrfs_clone(struct inode *src, struct inode *inode,
+ u64 off, u64 olen, u64 olen_aligned, u64 destoff);
+
/* Mask out flags that are inappropriate for the given type of inode. */
static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
{
@@ -363,6 +367,13 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
return 0;
}
+int btrfs_is_empty_uuid(u8 *uuid)
+{
+ static char empty_uuid[BTRFS_UUID_SIZE] = {0};
+
+ return !memcmp(uuid, empty_uuid, BTRFS_UUID_SIZE);
+}
+
static noinline int create_subvol(struct inode *dir,
struct dentry *dentry,
char *name, int namelen,
@@ -396,7 +407,7 @@ static noinline int create_subvol(struct inode *dir,
* of create_snapshot().
*/
ret = btrfs_subvolume_reserve_metadata(root, &block_rsv,
- 7, &qgroup_reserved);
+ 8, &qgroup_reserved, false);
if (ret)
return ret;
@@ -425,26 +436,25 @@ static noinline int create_subvol(struct inode *dir,
btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
btrfs_set_header_owner(leaf, objectid);
- write_extent_buffer(leaf, root->fs_info->fsid,
- (unsigned long)btrfs_header_fsid(leaf),
+ write_extent_buffer(leaf, root->fs_info->fsid, btrfs_header_fsid(leaf),
BTRFS_FSID_SIZE);
write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
- (unsigned long)btrfs_header_chunk_tree_uuid(leaf),
+ btrfs_header_chunk_tree_uuid(leaf),
BTRFS_UUID_SIZE);
btrfs_mark_buffer_dirty(leaf);
memset(&root_item, 0, sizeof(root_item));
inode_item = &root_item.inode;
- inode_item->generation = cpu_to_le64(1);
- inode_item->size = cpu_to_le64(3);
- inode_item->nlink = cpu_to_le32(1);
- inode_item->nbytes = cpu_to_le64(root->leafsize);
- inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
+ btrfs_set_stack_inode_generation(inode_item, 1);
+ btrfs_set_stack_inode_size(inode_item, 3);
+ btrfs_set_stack_inode_nlink(inode_item, 1);
+ btrfs_set_stack_inode_nbytes(inode_item, root->leafsize);
+ btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755);
- root_item.flags = 0;
- root_item.byte_limit = 0;
- inode_item->flags = cpu_to_le64(BTRFS_INODE_ROOT_ITEM_INIT);
+ btrfs_set_root_flags(&root_item, 0);
+ btrfs_set_root_limit(&root_item, 0);
+ btrfs_set_stack_inode_flags(inode_item, BTRFS_INODE_ROOT_ITEM_INIT);
btrfs_set_root_bytenr(&root_item, leaf->start);
btrfs_set_root_generation(&root_item, trans->transid);
@@ -457,8 +467,8 @@ static noinline int create_subvol(struct inode *dir,
btrfs_root_generation(&root_item));
uuid_le_gen(&new_uuid);
memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE);
- root_item.otime.sec = cpu_to_le64(cur_time.tv_sec);
- root_item.otime.nsec = cpu_to_le32(cur_time.tv_nsec);
+ btrfs_set_stack_timespec_sec(&root_item.otime, cur_time.tv_sec);
+ btrfs_set_stack_timespec_nsec(&root_item.otime, cur_time.tv_nsec);
root_item.ctime = root_item.otime;
btrfs_set_root_ctransid(&root_item, trans->transid);
btrfs_set_root_otransid(&root_item, trans->transid);
@@ -518,9 +528,14 @@ static noinline int create_subvol(struct inode *dir,
ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
objectid, root->root_key.objectid,
btrfs_ino(dir), index, name, namelen);
-
BUG_ON(ret);
+ ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root,
+ root_item.uuid, BTRFS_UUID_KEY_SUBVOL,
+ objectid);
+ if (ret)
+ btrfs_abort_transaction(trans, root, ret);
+
fail:
trans->block_rsv = NULL;
trans->bytes_reserved = 0;
@@ -573,10 +588,12 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
* 1 - root item
* 2 - root ref/backref
* 1 - root of snapshot
+ * 1 - UUID item
*/
ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root,
- &pending_snapshot->block_rsv, 7,
- &pending_snapshot->qgroup_reserved);
+ &pending_snapshot->block_rsv, 8,
+ &pending_snapshot->qgroup_reserved,
+ false);
if (ret)
goto out;
@@ -1267,9 +1284,6 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
cluster = max_cluster;
}
- if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
- BTRFS_I(inode)->force_compress = compress_type;
-
if (i + cluster > ra_index) {
ra_index = max(i, ra_index);
btrfs_force_ra(inode->i_mapping, ra, file, ra_index,
@@ -1278,6 +1292,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
}
mutex_lock(&inode->i_mutex);
+ if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
+ BTRFS_I(inode)->force_compress = compress_type;
ret = cluster_pages_for_defrag(inode, pages, i, cluster);
if (ret < 0) {
mutex_unlock(&inode->i_mutex);
@@ -1334,10 +1350,6 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
atomic_read(&root->fs_info->async_delalloc_pages) == 0));
}
atomic_dec(&root->fs_info->async_submit_draining);
-
- mutex_lock(&inode->i_mutex);
- BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE;
- mutex_unlock(&inode->i_mutex);
}
if (range->compress_type == BTRFS_COMPRESS_LZO) {
@@ -1347,6 +1359,11 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
ret = defrag_count;
out_ra:
+ if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) {
+ mutex_lock(&inode->i_mutex);
+ BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE;
+ mutex_unlock(&inode->i_mutex);
+ }
if (!file)
kfree(ra);
kfree(pages);
@@ -1377,9 +1394,8 @@ static noinline int btrfs_ioctl_resize(struct file *file,
if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
1)) {
- pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
mnt_drop_write_file(file);
- return -EINVAL;
+ return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
}
mutex_lock(&root->fs_info->volume_mutex);
@@ -1403,14 +1419,13 @@ static noinline int btrfs_ioctl_resize(struct file *file,
ret = -EINVAL;
goto out_free;
}
- printk(KERN_INFO "btrfs: resizing devid %llu\n",
- (unsigned long long)devid);
+ printk(KERN_INFO "btrfs: resizing devid %llu\n", devid);
}
device = btrfs_find_device(root->fs_info, devid, NULL, NULL);
if (!device) {
printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
- (unsigned long long)devid);
+ devid);
ret = -ENODEV;
goto out_free;
}
@@ -1418,7 +1433,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
if (!device->writeable) {
printk(KERN_INFO "btrfs: resizer unable to apply on "
"readonly device %llu\n",
- (unsigned long long)devid);
+ devid);
ret = -EPERM;
goto out_free;
}
@@ -1470,8 +1485,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
new_size *= root->sectorsize;
printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n",
- rcu_str_deref(device->name),
- (unsigned long long)new_size);
+ rcu_str_deref(device->name), new_size);
if (new_size > old_size) {
trans = btrfs_start_transaction(root, 0);
@@ -1721,13 +1735,28 @@ out:
static noinline int may_destroy_subvol(struct btrfs_root *root)
{
struct btrfs_path *path;
+ struct btrfs_dir_item *di;
struct btrfs_key key;
+ u64 dir_id;
int ret;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
+ /* Make sure this root isn't set as the default subvol */
+ dir_id = btrfs_super_root_dir(root->fs_info->super_copy);
+ di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, path,
+ dir_id, "default", 7, 0);
+ if (di && !IS_ERR(di)) {
+ btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
+ if (key.objectid == root->root_key.objectid) {
+ ret = -ENOTEMPTY;
+ goto out;
+ }
+ btrfs_release_path(path);
+ }
+
key.objectid = root->root_key.objectid;
key.type = BTRFS_ROOT_REF_KEY;
key.offset = (u64)-1;
@@ -1993,25 +2022,29 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto out;
+ else if (ret > 0) {
+ ret = btrfs_previous_item(root, path, dirid,
+ BTRFS_INODE_REF_KEY);
+ if (ret < 0)
+ goto out;
+ else if (ret > 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+ }
l = path->nodes[0];
slot = path->slots[0];
- if (ret > 0 && slot > 0)
- slot--;
btrfs_item_key_to_cpu(l, &key, slot);
- if (ret > 0 && (key.objectid != dirid ||
- key.type != BTRFS_INODE_REF_KEY)) {
- ret = -ENOENT;
- goto out;
- }
-
iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref);
len = btrfs_inode_ref_name_len(l, iref);
ptr -= len + 1;
total_len += len + 1;
- if (ptr < name)
+ if (ptr < name) {
+ ret = -ENAMETOOLONG;
goto out;
+ }
*(ptr + len) = '/';
read_extent_buffer(l, ptr,(unsigned long)(iref + 1), len);
@@ -2024,8 +2057,6 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
key.offset = (u64)-1;
dirid = key.objectid;
}
- if (ptr < name)
- goto out;
memmove(name, ptr, total_len);
name[total_len]='\0';
ret = 0;
@@ -2174,7 +2205,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
* ref/backref.
*/
err = btrfs_subvolume_reserve_metadata(root, &block_rsv,
- 5, &qgroup_reserved);
+ 5, &qgroup_reserved, true);
if (err)
goto out_up_write;
@@ -2213,6 +2244,27 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
goto out_end_trans;
}
}
+
+ ret = btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root,
+ dest->root_item.uuid, BTRFS_UUID_KEY_SUBVOL,
+ dest->root_key.objectid);
+ if (ret && ret != -ENOENT) {
+ btrfs_abort_transaction(trans, root, ret);
+ err = ret;
+ goto out_end_trans;
+ }
+ if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) {
+ ret = btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root,
+ dest->root_item.received_uuid,
+ BTRFS_UUID_KEY_RECEIVED_SUBVOL,
+ dest->root_key.objectid);
+ if (ret && ret != -ENOENT) {
+ btrfs_abort_transaction(trans, root, ret);
+ err = ret;
+ goto out_end_trans;
+ }
+ }
+
out_end_trans:
trans->block_rsv = NULL;
trans->bytes_reserved = 0;
@@ -2326,8 +2378,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
1)) {
- pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
- return -EINVAL;
+ return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
}
mutex_lock(&root->fs_info->volume_mutex);
@@ -2400,10 +2451,10 @@ static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg)
if (!fi_args)
return -ENOMEM;
+ mutex_lock(&fs_devices->device_list_mutex);
fi_args->num_devices = fs_devices->num_devices;
memcpy(&fi_args->fsid, root->fs_info->fsid, sizeof(fi_args->fsid));
- mutex_lock(&fs_devices->device_list_mutex);
list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
if (device->devid > fi_args->max_id)
fi_args->max_id = device->devid;
@@ -2424,7 +2475,6 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
int ret = 0;
char *s_uuid = NULL;
- char empty_uuid[BTRFS_UUID_SIZE] = {0};
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -2433,7 +2483,7 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
if (IS_ERR(di_args))
return PTR_ERR(di_args);
- if (memcmp(empty_uuid, di_args->uuid, BTRFS_UUID_SIZE) != 0)
+ if (!btrfs_is_empty_uuid(di_args->uuid))
s_uuid = di_args->uuid;
mutex_lock(&fs_devices->device_list_mutex);
@@ -2469,150 +2519,336 @@ out:
return ret;
}
-static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
- u64 off, u64 olen, u64 destoff)
+static struct page *extent_same_get_page(struct inode *inode, u64 off)
+{
+ struct page *page;
+ pgoff_t index;
+ struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
+
+ index = off >> PAGE_CACHE_SHIFT;
+
+ page = grab_cache_page(inode->i_mapping, index);
+ if (!page)
+ return NULL;
+
+ if (!PageUptodate(page)) {
+ if (extent_read_full_page_nolock(tree, page, btrfs_get_extent,
+ 0))
+ return NULL;
+ lock_page(page);
+ if (!PageUptodate(page)) {
+ unlock_page(page);
+ page_cache_release(page);
+ return NULL;
+ }
+ }
+ unlock_page(page);
+
+ return page;
+}
+
+static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
+{
+ /* do any pending delalloc/csum calc on src, one way or
+ another, and lock file content */
+ while (1) {
+ struct btrfs_ordered_extent *ordered;
+ lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
+ ordered = btrfs_lookup_first_ordered_extent(inode,
+ off + len - 1);
+ if (!ordered &&
+ !test_range_bit(&BTRFS_I(inode)->io_tree, off,
+ off + len - 1, EXTENT_DELALLOC, 0, NULL))
+ break;
+ unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
+ if (ordered)
+ btrfs_put_ordered_extent(ordered);
+ btrfs_wait_ordered_range(inode, off, len);
+ }
+}
+
+static void btrfs_double_unlock(struct inode *inode1, u64 loff1,
+ struct inode *inode2, u64 loff2, u64 len)
+{
+ unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
+ unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
+
+ mutex_unlock(&inode1->i_mutex);
+ mutex_unlock(&inode2->i_mutex);
+}
+
+static void btrfs_double_lock(struct inode *inode1, u64 loff1,
+ struct inode *inode2, u64 loff2, u64 len)
+{
+ if (inode1 < inode2) {
+ swap(inode1, inode2);
+ swap(loff1, loff2);
+ }
+
+ mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
+ lock_extent_range(inode1, loff1, len);
+ if (inode1 != inode2) {
+ mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
+ lock_extent_range(inode2, loff2, len);
+ }
+}
+
+static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
+ u64 dst_loff, u64 len)
+{
+ int ret = 0;
+ struct page *src_page, *dst_page;
+ unsigned int cmp_len = PAGE_CACHE_SIZE;
+ void *addr, *dst_addr;
+
+ while (len) {
+ if (len < PAGE_CACHE_SIZE)
+ cmp_len = len;
+
+ src_page = extent_same_get_page(src, loff);
+ if (!src_page)
+ return -EINVAL;
+ dst_page = extent_same_get_page(dst, dst_loff);
+ if (!dst_page) {
+ page_cache_release(src_page);
+ return -EINVAL;
+ }
+ addr = kmap_atomic(src_page);
+ dst_addr = kmap_atomic(dst_page);
+
+ flush_dcache_page(src_page);
+ flush_dcache_page(dst_page);
+
+ if (memcmp(addr, dst_addr, cmp_len))
+ ret = BTRFS_SAME_DATA_DIFFERS;
+
+ kunmap_atomic(addr);
+ kunmap_atomic(dst_addr);
+ page_cache_release(src_page);
+ page_cache_release(dst_page);
+
+ if (ret)
+ break;
+
+ loff += cmp_len;
+ dst_loff += cmp_len;
+ len -= cmp_len;
+ }
+
+ return ret;
+}
+
+static int extent_same_check_offsets(struct inode *inode, u64 off, u64 len)
+{
+ u64 bs = BTRFS_I(inode)->root->fs_info->sb->s_blocksize;
+
+ if (off + len > inode->i_size || off + len < off)
+ return -EINVAL;
+ /* Check that we are block aligned - btrfs_clone() requires this */
+ if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int btrfs_extent_same(struct inode *src, u64 loff, u64 len,
+ struct inode *dst, u64 dst_loff)
{
- struct inode *inode = file_inode(file);
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct fd src_file;
- struct inode *src;
- struct btrfs_trans_handle *trans;
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- char *buf;
- struct btrfs_key key;
- u32 nritems;
- int slot;
int ret;
- u64 len = olen;
- u64 bs = root->fs_info->sb->s_blocksize;
- int same_inode = 0;
/*
- * TODO:
- * - split compressed inline extents. annoying: we need to
- * decompress into destination's address_space (the file offset
- * may change, so source mapping won't do), then recompress (or
- * otherwise reinsert) a subrange.
- * - allow ranges within the same file to be cloned (provided
- * they don't overlap)?
+ * btrfs_clone() can't handle extents in the same file
+ * yet. Once that works, we can drop this check and replace it
+ * with a check for the same inode, but overlapping extents.
*/
-
- /* the destination must be opened for writing */
- if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
+ if (src == dst)
return -EINVAL;
- if (btrfs_root_readonly(root))
- return -EROFS;
+ btrfs_double_lock(src, loff, dst, dst_loff, len);
+
+ ret = extent_same_check_offsets(src, loff, len);
+ if (ret)
+ goto out_unlock;
+
+ ret = extent_same_check_offsets(dst, dst_loff, len);
+ if (ret)
+ goto out_unlock;
+
+ /* don't make the dst file partly checksummed */
+ if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
+ (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ ret = btrfs_cmp_data(src, loff, dst, dst_loff, len);
+ if (ret == 0)
+ ret = btrfs_clone(src, dst, loff, len, len, dst_loff);
+
+out_unlock:
+ btrfs_double_unlock(src, loff, dst, dst_loff, len);
+
+ return ret;
+}
+
+#define BTRFS_MAX_DEDUPE_LEN (16 * 1024 * 1024)
+
+static long btrfs_ioctl_file_extent_same(struct file *file,
+ void __user *argp)
+{
+ struct btrfs_ioctl_same_args *args = argp;
+ struct btrfs_ioctl_same_args same;
+ struct btrfs_ioctl_same_extent_info info;
+ struct inode *src = file->f_dentry->d_inode;
+ struct file *dst_file = NULL;
+ struct inode *dst;
+ u64 off;
+ u64 len;
+ int i;
+ int ret;
+ u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
+ bool is_admin = capable(CAP_SYS_ADMIN);
+
+ if (!(file->f_mode & FMODE_READ))
+ return -EINVAL;
ret = mnt_want_write_file(file);
if (ret)
return ret;
- src_file = fdget(srcfd);
- if (!src_file.file) {
- ret = -EBADF;
- goto out_drop_write;
+ if (copy_from_user(&same,
+ (struct btrfs_ioctl_same_args __user *)argp,
+ sizeof(same))) {
+ ret = -EFAULT;
+ goto out;
}
- ret = -EXDEV;
- if (src_file.file->f_path.mnt != file->f_path.mnt)
- goto out_fput;
+ off = same.logical_offset;
+ len = same.length;
- src = file_inode(src_file.file);
+ /*
+ * Limit the total length we will dedupe for each operation.
+ * This is intended to bound the total time spent in this
+ * ioctl to something sane.
+ */
+ if (len > BTRFS_MAX_DEDUPE_LEN)
+ len = BTRFS_MAX_DEDUPE_LEN;
- ret = -EINVAL;
- if (src == inode)
- same_inode = 1;
+ if (WARN_ON_ONCE(bs < PAGE_CACHE_SIZE)) {
+ /*
+ * Btrfs does not support blocksize < page_size. As a
+ * result, btrfs_cmp_data() won't correctly handle
+ * this situation without an update.
+ */
+ ret = -EINVAL;
+ goto out;
+ }
- /* the src must be open for reading */
- if (!(src_file.file->f_mode & FMODE_READ))
- goto out_fput;
+ ret = -EISDIR;
+ if (S_ISDIR(src->i_mode))
+ goto out;
- /* don't make the dst file partly checksummed */
- if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
- (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
- goto out_fput;
+ ret = -EACCES;
+ if (!S_ISREG(src->i_mode))
+ goto out;
- ret = -EISDIR;
- if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
- goto out_fput;
+ ret = 0;
+ for (i = 0; i < same.dest_count; i++) {
+ if (copy_from_user(&info, &args->info[i], sizeof(info))) {
+ ret = -EFAULT;
+ goto out;
+ }
- ret = -EXDEV;
- if (src->i_sb != inode->i_sb)
- goto out_fput;
+ info.bytes_deduped = 0;
- ret = -ENOMEM;
- buf = vmalloc(btrfs_level_size(root, 0));
- if (!buf)
- goto out_fput;
+ dst_file = fget(info.fd);
+ if (!dst_file) {
+ info.status = -EBADF;
+ goto next;
+ }
- path = btrfs_alloc_path();
- if (!path) {
- vfree(buf);
- goto out_fput;
- }
- path->reada = 2;
+ if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) {
+ info.status = -EINVAL;
+ goto next;
+ }
- if (!same_inode) {
- if (inode < src) {
- mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD);
- } else {
- mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
+ info.status = -EXDEV;
+ if (file->f_path.mnt != dst_file->f_path.mnt)
+ goto next;
+
+ dst = dst_file->f_dentry->d_inode;
+ if (src->i_sb != dst->i_sb)
+ goto next;
+
+ if (S_ISDIR(dst->i_mode)) {
+ info.status = -EISDIR;
+ goto next;
}
- } else {
- mutex_lock(&src->i_mutex);
- }
- /* determine range to clone */
- ret = -EINVAL;
- if (off + len > src->i_size || off + len < off)
- goto out_unlock;
- if (len == 0)
- olen = len = src->i_size - off;
- /* if we extend to eof, continue to block boundary */
- if (off + len == src->i_size)
- len = ALIGN(src->i_size, bs) - off;
+ if (!S_ISREG(dst->i_mode)) {
+ info.status = -EACCES;
+ goto next;
+ }
- /* verify the end result is block aligned */
- if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) ||
- !IS_ALIGNED(destoff, bs))
- goto out_unlock;
+ info.status = btrfs_extent_same(src, off, len, dst,
+ info.logical_offset);
+ if (info.status == 0)
+ info.bytes_deduped += len;
- /* verify if ranges are overlapped within the same file */
- if (same_inode) {
- if (destoff + len > off && destoff < off + len)
- goto out_unlock;
- }
+next:
+ if (dst_file)
+ fput(dst_file);
- if (destoff > inode->i_size) {
- ret = btrfs_cont_expand(inode, inode->i_size, destoff);
- if (ret)
- goto out_unlock;
+ if (__put_user_unaligned(info.status, &args->info[i].status) ||
+ __put_user_unaligned(info.bytes_deduped,
+ &args->info[i].bytes_deduped)) {
+ ret = -EFAULT;
+ goto out;
+ }
}
- /* truncate page cache pages from target inode range */
- truncate_inode_pages_range(&inode->i_data, destoff,
- PAGE_CACHE_ALIGN(destoff + len) - 1);
+out:
+ mnt_drop_write_file(file);
+ return ret;
+}
- /* do any pending delalloc/csum calc on src, one way or
- another, and lock file content */
- while (1) {
- struct btrfs_ordered_extent *ordered;
- lock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1);
- ordered = btrfs_lookup_first_ordered_extent(src, off + len - 1);
- if (!ordered &&
- !test_range_bit(&BTRFS_I(src)->io_tree, off, off + len - 1,
- EXTENT_DELALLOC, 0, NULL))
- break;
- unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1);
- if (ordered)
- btrfs_put_ordered_extent(ordered);
- btrfs_wait_ordered_range(src, off, len);
+/**
+ * btrfs_clone() - clone a range from inode file to another
+ *
+ * @src: Inode to clone from
+ * @inode: Inode to clone to
+ * @off: Offset within source to start clone from
+ * @olen: Original length, passed by user, of range to clone
+ * @olen_aligned: Block-aligned value of olen, extent_same uses
+ * identical values here
+ * @destoff: Offset within @inode to start clone
+ */
+static int btrfs_clone(struct inode *src, struct inode *inode,
+ u64 off, u64 olen, u64 olen_aligned, u64 destoff)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_path *path = NULL;
+ struct extent_buffer *leaf;
+ struct btrfs_trans_handle *trans;
+ char *buf = NULL;
+ struct btrfs_key key;
+ u32 nritems;
+ int slot;
+ int ret;
+ u64 len = olen_aligned;
+
+ ret = -ENOMEM;
+ buf = vmalloc(btrfs_level_size(root, 0));
+ if (!buf)
+ return ret;
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ vfree(buf);
+ return ret;
}
+ path->reada = 2;
/* clone data */
key.objectid = btrfs_ino(src);
key.type = BTRFS_EXTENT_DATA_KEY;
@@ -2858,15 +3094,132 @@ next:
key.offset++;
}
ret = 0;
+
out:
btrfs_release_path(path);
+ btrfs_free_path(path);
+ vfree(buf);
+ return ret;
+}
+
+static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
+ u64 off, u64 olen, u64 destoff)
+{
+ struct inode *inode = fdentry(file)->d_inode;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct fd src_file;
+ struct inode *src;
+ int ret;
+ u64 len = olen;
+ u64 bs = root->fs_info->sb->s_blocksize;
+ int same_inode = 0;
+
+ /*
+ * TODO:
+ * - split compressed inline extents. annoying: we need to
+ * decompress into destination's address_space (the file offset
+ * may change, so source mapping won't do), then recompress (or
+ * otherwise reinsert) a subrange.
+ * - allow ranges within the same file to be cloned (provided
+ * they don't overlap)?
+ */
+
+ /* the destination must be opened for writing */
+ if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
+ return -EINVAL;
+
+ if (btrfs_root_readonly(root))
+ return -EROFS;
+
+ ret = mnt_want_write_file(file);
+ if (ret)
+ return ret;
+
+ src_file = fdget(srcfd);
+ if (!src_file.file) {
+ ret = -EBADF;
+ goto out_drop_write;
+ }
+
+ ret = -EXDEV;
+ if (src_file.file->f_path.mnt != file->f_path.mnt)
+ goto out_fput;
+
+ src = file_inode(src_file.file);
+
+ ret = -EINVAL;
+ if (src == inode)
+ same_inode = 1;
+
+ /* the src must be open for reading */
+ if (!(src_file.file->f_mode & FMODE_READ))
+ goto out_fput;
+
+ /* don't make the dst file partly checksummed */
+ if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
+ (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
+ goto out_fput;
+
+ ret = -EISDIR;
+ if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
+ goto out_fput;
+
+ ret = -EXDEV;
+ if (src->i_sb != inode->i_sb)
+ goto out_fput;
+
+ if (!same_inode) {
+ if (inode < src) {
+ mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
+ mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD);
+ } else {
+ mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT);
+ mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
+ }
+ } else {
+ mutex_lock(&src->i_mutex);
+ }
+
+ /* determine range to clone */
+ ret = -EINVAL;
+ if (off + len > src->i_size || off + len < off)
+ goto out_unlock;
+ if (len == 0)
+ olen = len = src->i_size - off;
+ /* if we extend to eof, continue to block boundary */
+ if (off + len == src->i_size)
+ len = ALIGN(src->i_size, bs) - off;
+
+ /* verify the end result is block aligned */
+ if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) ||
+ !IS_ALIGNED(destoff, bs))
+ goto out_unlock;
+
+ /* verify if ranges are overlapped within the same file */
+ if (same_inode) {
+ if (destoff + len > off && destoff < off + len)
+ goto out_unlock;
+ }
+
+ if (destoff > inode->i_size) {
+ ret = btrfs_cont_expand(inode, inode->i_size, destoff);
+ if (ret)
+ goto out_unlock;
+ }
+
+ /* truncate page cache pages from target inode range */
+ truncate_inode_pages_range(&inode->i_data, destoff,
+ PAGE_CACHE_ALIGN(destoff + len) - 1);
+
+ lock_extent_range(src, off, len);
+
+ ret = btrfs_clone(src, inode, off, olen, len, destoff);
+
unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1);
out_unlock:
mutex_unlock(&src->i_mutex);
if (!same_inode)
mutex_unlock(&inode->i_mutex);
- vfree(buf);
- btrfs_free_path(path);
out_fput:
fdput(src_file);
out_drop_write:
@@ -3312,11 +3665,13 @@ static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg)
switch (p->cmd) {
case BTRFS_IOCTL_DEV_REPLACE_CMD_START:
+ if (root->fs_info->sb->s_flags & MS_RDONLY)
+ return -EROFS;
+
if (atomic_xchg(
&root->fs_info->mutually_exclusive_operation_running,
1)) {
- pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
- ret = -EINPROGRESS;
+ ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
} else {
ret = btrfs_dev_replace_start(root, p);
atomic_set(
@@ -3560,8 +3915,7 @@ again:
} else {
/* this is (1) */
mutex_unlock(&fs_info->balance_mutex);
- pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
- ret = -EINVAL;
+ ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
goto out;
}
@@ -3967,6 +4321,7 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,
struct btrfs_trans_handle *trans;
struct timespec ct = CURRENT_TIME;
int ret = 0;
+ int received_uuid_changed;
ret = mnt_want_write_file(file);
if (ret < 0)
@@ -3996,7 +4351,11 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,
goto out;
}
- trans = btrfs_start_transaction(root, 1);
+ /*
+ * 1 - root item
+ * 2 - uuid items (received uuid + subvol uuid)
+ */
+ trans = btrfs_start_transaction(root, 3);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
trans = NULL;
@@ -4007,24 +4366,42 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,
sa->rtime.sec = ct.tv_sec;
sa->rtime.nsec = ct.tv_nsec;
+ received_uuid_changed = memcmp(root_item->received_uuid, sa->uuid,
+ BTRFS_UUID_SIZE);
+ if (received_uuid_changed &&
+ !btrfs_is_empty_uuid(root_item->received_uuid))
+ btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root,
+ root_item->received_uuid,
+ BTRFS_UUID_KEY_RECEIVED_SUBVOL,
+ root->root_key.objectid);
memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE);
btrfs_set_root_stransid(root_item, sa->stransid);
btrfs_set_root_rtransid(root_item, sa->rtransid);
- root_item->stime.sec = cpu_to_le64(sa->stime.sec);
- root_item->stime.nsec = cpu_to_le32(sa->stime.nsec);
- root_item->rtime.sec = cpu_to_le64(sa->rtime.sec);
- root_item->rtime.nsec = cpu_to_le32(sa->rtime.nsec);
+ btrfs_set_stack_timespec_sec(&root_item->stime, sa->stime.sec);
+ btrfs_set_stack_timespec_nsec(&root_item->stime, sa->stime.nsec);
+ btrfs_set_stack_timespec_sec(&root_item->rtime, sa->rtime.sec);
+ btrfs_set_stack_timespec_nsec(&root_item->rtime, sa->rtime.nsec);
ret = btrfs_update_root(trans, root->fs_info->tree_root,
&root->root_key, &root->root_item);
if (ret < 0) {
btrfs_end_transaction(trans, root);
- trans = NULL;
goto out;
- } else {
- ret = btrfs_commit_transaction(trans, root);
- if (ret < 0)
+ }
+ if (received_uuid_changed && !btrfs_is_empty_uuid(sa->uuid)) {
+ ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root,
+ sa->uuid,
+ BTRFS_UUID_KEY_RECEIVED_SUBVOL,
+ root->root_key.objectid);
+ if (ret < 0 && ret != -EEXIST) {
+ btrfs_abort_transaction(trans, root, ret);
goto out;
+ }
+ }
+ ret = btrfs_commit_transaction(trans, root);
+ if (ret < 0) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto out;
}
ret = copy_to_user(arg, sa, sizeof(*sa));
@@ -4041,18 +4418,22 @@ out:
static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg)
{
struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
- const char *label = root->fs_info->super_copy->label;
- size_t len = strnlen(label, BTRFS_LABEL_SIZE);
+ size_t len;
int ret;
+ char label[BTRFS_LABEL_SIZE];
+
+ spin_lock(&root->fs_info->super_lock);
+ memcpy(label, root->fs_info->super_copy->label, BTRFS_LABEL_SIZE);
+ spin_unlock(&root->fs_info->super_lock);
+
+ len = strnlen(label, BTRFS_LABEL_SIZE);
if (len == BTRFS_LABEL_SIZE) {
pr_warn("btrfs: label is too long, return the first %zu bytes\n",
--len);
}
- mutex_lock(&root->fs_info->volume_mutex);
ret = copy_to_user(arg, label, len);
- mutex_unlock(&root->fs_info->volume_mutex);
return ret ? -EFAULT : 0;
}
@@ -4081,18 +4462,18 @@ static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg)
if (ret)
return ret;
- mutex_lock(&root->fs_info->volume_mutex);
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out_unlock;
}
+ spin_lock(&root->fs_info->super_lock);
strcpy(super_block->label, label);
+ spin_unlock(&root->fs_info->super_lock);
ret = btrfs_end_transaction(trans, root);
out_unlock:
- mutex_unlock(&root->fs_info->volume_mutex);
mnt_drop_write_file(file);
return ret;
}
@@ -4207,6 +4588,8 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_get_fslabel(file, argp);
case BTRFS_IOC_SET_FSLABEL:
return btrfs_ioctl_set_fslabel(file, argp);
+ case BTRFS_IOC_FILE_EXTENT_SAME:
+ return btrfs_ioctl_file_extent_same(file, argp);
}
return -ENOTTY;