aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/dcache.c16
-rw-r--r--fs/internal.h5
-rw-r--r--fs/namespace.c54
-rw-r--r--fs/nfs/blocklayout/blocklayoutdm.c4
-rw-r--r--fs/nfs/idmap.c13
-rw-r--r--fs/nfs/nfs4filelayout.c1
-rw-r--r--fs/nfs/nfs4proc.c16
-rw-r--r--fs/nfs/pnfs.c81
-rw-r--r--fs/nfs/pnfs.h6
-rw-r--r--fs/nfsd/nfscache.c11
-rw-r--r--fs/nfsd/vfs.c3
-rw-r--r--fs/pnode.c6
-rw-r--r--fs/pnode.h1
-rw-r--r--fs/proc/root.c4
-rw-r--r--fs/read_write.c28
-rw-r--r--fs/splice.c4
-rw-r--r--fs/sysfs/mount.c4
17 files changed, 212 insertions, 45 deletions
diff --git a/fs/dcache.c b/fs/dcache.c
index fbfae008..e8bc342 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2542,7 +2542,6 @@ static int prepend_path(const struct path *path,
bool slash = false;
int error = 0;
- br_read_lock(&vfsmount_lock);
while (dentry != root->dentry || vfsmnt != root->mnt) {
struct dentry * parent;
@@ -2572,8 +2571,6 @@ static int prepend_path(const struct path *path,
if (!error && !slash)
error = prepend(buffer, buflen, "/", 1);
-out:
- br_read_unlock(&vfsmount_lock);
return error;
global_root:
@@ -2590,7 +2587,7 @@ global_root:
error = prepend(buffer, buflen, "/", 1);
if (!error)
error = is_mounted(vfsmnt) ? 1 : 2;
- goto out;
+ return error;
}
/**
@@ -2617,9 +2614,11 @@ char *__d_path(const struct path *path,
int error;
prepend(&res, &buflen, "\0", 1);
+ br_read_lock(&vfsmount_lock);
write_seqlock(&rename_lock);
error = prepend_path(path, root, &res, &buflen);
write_sequnlock(&rename_lock);
+ br_read_unlock(&vfsmount_lock);
if (error < 0)
return ERR_PTR(error);
@@ -2636,9 +2635,11 @@ char *d_absolute_path(const struct path *path,
int error;
prepend(&res, &buflen, "\0", 1);
+ br_read_lock(&vfsmount_lock);
write_seqlock(&rename_lock);
error = prepend_path(path, &root, &res, &buflen);
write_sequnlock(&rename_lock);
+ br_read_unlock(&vfsmount_lock);
if (error > 1)
error = -EINVAL;
@@ -2702,11 +2703,13 @@ char *d_path(const struct path *path, char *buf, int buflen)
return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
get_fs_root(current->fs, &root);
+ br_read_lock(&vfsmount_lock);
write_seqlock(&rename_lock);
error = path_with_deleted(path, &root, &res, &buflen);
+ write_sequnlock(&rename_lock);
+ br_read_unlock(&vfsmount_lock);
if (error < 0)
res = ERR_PTR(error);
- write_sequnlock(&rename_lock);
path_put(&root);
return res;
}
@@ -2830,6 +2833,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
get_fs_root_and_pwd(current->fs, &root, &pwd);
error = -ENOENT;
+ br_read_lock(&vfsmount_lock);
write_seqlock(&rename_lock);
if (!d_unlinked(pwd.dentry)) {
unsigned long len;
@@ -2839,6 +2843,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
prepend(&cwd, &buflen, "\0", 1);
error = prepend_path(&pwd, &root, &cwd, &buflen);
write_sequnlock(&rename_lock);
+ br_read_unlock(&vfsmount_lock);
if (error < 0)
goto out;
@@ -2859,6 +2864,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
}
} else {
write_sequnlock(&rename_lock);
+ br_read_unlock(&vfsmount_lock);
}
out:
diff --git a/fs/internal.h b/fs/internal.h
index 507141f..4be7823 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -125,3 +125,8 @@ extern int invalidate_inodes(struct super_block *, bool);
* dcache.c
*/
extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
+
+/*
+ * read_write.c
+ */
+extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *);
diff --git a/fs/namespace.c b/fs/namespace.c
index 50ca17d..d581e45 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -798,6 +798,10 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
}
mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD;
+ /* Don't allow unprivileged users to change mount flags */
+ if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY))
+ mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
+
atomic_inc(&sb->s_active);
mnt->mnt.mnt_sb = sb;
mnt->mnt.mnt_root = dget(root);
@@ -1713,6 +1717,9 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
if (readonly_request == __mnt_is_readonly(mnt))
return 0;
+ if (mnt->mnt_flags & MNT_LOCK_READONLY)
+ return -EPERM;
+
if (readonly_request)
error = mnt_make_readonly(real_mount(mnt));
else
@@ -2339,7 +2346,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
/* First pass: copy the tree topology */
copy_flags = CL_COPY_ALL | CL_EXPIRE;
if (user_ns != mnt_ns->user_ns)
- copy_flags |= CL_SHARED_TO_SLAVE;
+ copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
new = copy_tree(old, old->mnt.mnt_root, copy_flags);
if (IS_ERR(new)) {
up_write(&namespace_sem);
@@ -2732,6 +2739,51 @@ bool our_mnt(struct vfsmount *mnt)
return check_mnt(real_mount(mnt));
}
+bool current_chrooted(void)
+{
+ /* Does the current process have a non-standard root */
+ struct path ns_root;
+ struct path fs_root;
+ bool chrooted;
+
+ /* Find the namespace root */
+ ns_root.mnt = &current->nsproxy->mnt_ns->root->mnt;
+ ns_root.dentry = ns_root.mnt->mnt_root;
+ path_get(&ns_root);
+ while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root))
+ ;
+
+ get_fs_root(current->fs, &fs_root);
+
+ chrooted = !path_equal(&fs_root, &ns_root);
+
+ path_put(&fs_root);
+ path_put(&ns_root);
+
+ return chrooted;
+}
+
+void update_mnt_policy(struct user_namespace *userns)
+{
+ struct mnt_namespace *ns = current->nsproxy->mnt_ns;
+ struct mount *mnt;
+
+ down_read(&namespace_sem);
+ list_for_each_entry(mnt, &ns->list, mnt_list) {
+ switch (mnt->mnt.mnt_sb->s_magic) {
+ case SYSFS_MAGIC:
+ userns->may_mount_sysfs = true;
+ break;
+ case PROC_SUPER_MAGIC:
+ userns->may_mount_proc = true;
+ break;
+ }
+ if (userns->may_mount_sysfs && userns->may_mount_proc)
+ break;
+ }
+ up_read(&namespace_sem);
+}
+
static void *mntns_get(struct task_struct *task)
{
struct mnt_namespace *ns = NULL;
diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c
index 737d839..6fc7b5c 100644
--- a/fs/nfs/blocklayout/blocklayoutdm.c
+++ b/fs/nfs/blocklayout/blocklayoutdm.c
@@ -55,7 +55,8 @@ static void dev_remove(struct net *net, dev_t dev)
bl_pipe_msg.bl_wq = &nn->bl_wq;
memset(msg, 0, sizeof(*msg));
- msg->data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS);
+ msg->len = sizeof(bl_msg) + bl_msg.totallen;
+ msg->data = kzalloc(msg->len, GFP_NOFS);
if (!msg->data)
goto out;
@@ -66,7 +67,6 @@ static void dev_remove(struct net *net, dev_t dev)
memcpy(msg->data, &bl_msg, sizeof(bl_msg));
dataptr = (uint8_t *) msg->data;
memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request));
- msg->len = sizeof(bl_msg) + bl_msg.totallen;
add_wait_queue(&nn->bl_wq, &wq);
if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) {
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index dc0f98d..c516da5 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -726,9 +726,9 @@ out1:
return ret;
}
-static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data)
+static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data, size_t datalen)
{
- return key_instantiate_and_link(key, data, strlen(data) + 1,
+ return key_instantiate_and_link(key, data, datalen,
id_resolver_cache->thread_keyring,
authkey);
}
@@ -738,6 +738,7 @@ static int nfs_idmap_read_and_verify_message(struct idmap_msg *im,
struct key *key, struct key *authkey)
{
char id_str[NFS_UINT_MAXLEN];
+ size_t len;
int ret = -ENOKEY;
/* ret = -ENOKEY */
@@ -747,13 +748,15 @@ static int nfs_idmap_read_and_verify_message(struct idmap_msg *im,
case IDMAP_CONV_NAMETOID:
if (strcmp(upcall->im_name, im->im_name) != 0)
break;
- sprintf(id_str, "%d", im->im_id);
- ret = nfs_idmap_instantiate(key, authkey, id_str);
+ /* Note: here we store the NUL terminator too */
+ len = sprintf(id_str, "%d", im->im_id) + 1;
+ ret = nfs_idmap_instantiate(key, authkey, id_str, len);
break;
case IDMAP_CONV_IDTONAME:
if (upcall->im_id != im->im_id)
break;
- ret = nfs_idmap_instantiate(key, authkey, im->im_name);
+ len = strlen(im->im_name);
+ ret = nfs_idmap_instantiate(key, authkey, im->im_name, len);
break;
default:
ret = -EINVAL;
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 49eeb04..4fb234d 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -129,7 +129,6 @@ static void filelayout_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo)
{
if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
return;
- clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags);
pnfs_return_layout(inode);
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b2671cb..26431cf 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2632,7 +2632,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
int status;
if (pnfs_ld_layoutret_on_setattr(inode))
- pnfs_return_layout(inode);
+ pnfs_commit_and_return_layout(inode);
nfs_fattr_init(fattr);
@@ -6416,22 +6416,8 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
static void nfs4_layoutcommit_release(void *calldata)
{
struct nfs4_layoutcommit_data *data = calldata;
- struct pnfs_layout_segment *lseg, *tmp;
- unsigned long *bitlock = &NFS_I(data->args.inode)->flags;
pnfs_cleanup_layoutcommit(data);
- /* Matched by references in pnfs_set_layoutcommit */
- list_for_each_entry_safe(lseg, tmp, &data->lseg_list, pls_lc_list) {
- list_del_init(&lseg->pls_lc_list);
- if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT,
- &lseg->pls_flags))
- pnfs_put_lseg(lseg);
- }
-
- clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
- smp_mb__after_clear_bit();
- wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
-
put_rpccred(data->cred);
kfree(data);
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 48ac5aa..4bdffe0 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -417,6 +417,16 @@ should_free_lseg(struct pnfs_layout_range *lseg_range,
lo_seg_intersecting(lseg_range, recall_range);
}
+static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
+ struct list_head *tmp_list)
+{
+ if (!atomic_dec_and_test(&lseg->pls_refcount))
+ return false;
+ pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
+ list_add(&lseg->pls_list, tmp_list);
+ return true;
+}
+
/* Returns 1 if lseg is removed from list, 0 otherwise */
static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
struct list_head *tmp_list)
@@ -430,11 +440,8 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
*/
dprintk("%s: lseg %p ref %d\n", __func__, lseg,
atomic_read(&lseg->pls_refcount));
- if (atomic_dec_and_test(&lseg->pls_refcount)) {
- pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
- list_add(&lseg->pls_list, tmp_list);
+ if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list))
rv = 1;
- }
}
return rv;
}
@@ -777,6 +784,21 @@ send_layoutget(struct pnfs_layout_hdr *lo,
return lseg;
}
+static void pnfs_clear_layoutcommit(struct inode *inode,
+ struct list_head *head)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct pnfs_layout_segment *lseg, *tmp;
+
+ if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
+ return;
+ list_for_each_entry_safe(lseg, tmp, &nfsi->layout->plh_segs, pls_list) {
+ if (!test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
+ continue;
+ pnfs_lseg_dec_and_remove_zero(lseg, head);
+ }
+}
+
/*
* Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr
* when the layout segment list is empty.
@@ -808,6 +830,7 @@ _pnfs_return_layout(struct inode *ino)
/* Reference matched in nfs4_layoutreturn_release */
pnfs_get_layout_hdr(lo);
empty = list_empty(&lo->plh_segs);
+ pnfs_clear_layoutcommit(ino, &tmp_list);
pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
/* Don't send a LAYOUTRETURN if list was initially empty */
if (empty) {
@@ -820,8 +843,6 @@ _pnfs_return_layout(struct inode *ino)
spin_unlock(&ino->i_lock);
pnfs_free_lseg_list(&tmp_list);
- WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags));
-
lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
if (unlikely(lrp == NULL)) {
status = -ENOMEM;
@@ -845,6 +866,33 @@ out:
}
EXPORT_SYMBOL_GPL(_pnfs_return_layout);
+int
+pnfs_commit_and_return_layout(struct inode *inode)
+{
+ struct pnfs_layout_hdr *lo;
+ int ret;
+
+ spin_lock(&inode->i_lock);
+ lo = NFS_I(inode)->layout;
+ if (lo == NULL) {
+ spin_unlock(&inode->i_lock);
+ return 0;
+ }
+ pnfs_get_layout_hdr(lo);
+ /* Block new layoutgets and read/write to ds */
+ lo->plh_block_lgets++;
+ spin_unlock(&inode->i_lock);
+ filemap_fdatawait(inode->i_mapping);
+ ret = pnfs_layoutcommit_inode(inode, true);
+ if (ret == 0)
+ ret = _pnfs_return_layout(inode);
+ spin_lock(&inode->i_lock);
+ lo->plh_block_lgets--;
+ spin_unlock(&inode->i_lock);
+ pnfs_put_layout_hdr(lo);
+ return ret;
+}
+
bool pnfs_roc(struct inode *ino)
{
struct pnfs_layout_hdr *lo;
@@ -1458,7 +1506,6 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
dprintk("pnfs write error = %d\n", hdr->pnfs_error);
if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
PNFS_LAYOUTRET_ON_ERROR) {
- clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
pnfs_return_layout(hdr->inode);
}
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
@@ -1613,7 +1660,6 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
dprintk("pnfs read error = %d\n", hdr->pnfs_error);
if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
PNFS_LAYOUTRET_ON_ERROR) {
- clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
pnfs_return_layout(hdr->inode);
}
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
@@ -1746,11 +1792,27 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)
list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
if (lseg->pls_range.iomode == IOMODE_RW &&
- test_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
+ test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
list_add(&lseg->pls_lc_list, listp);
}
}
+static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp)
+{
+ struct pnfs_layout_segment *lseg, *tmp;
+ unsigned long *bitlock = &NFS_I(inode)->flags;
+
+ /* Matched by references in pnfs_set_layoutcommit */
+ list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) {
+ list_del_init(&lseg->pls_lc_list);
+ pnfs_put_lseg(lseg);
+ }
+
+ clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
+ smp_mb__after_clear_bit();
+ wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
+}
+
void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
{
pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode);
@@ -1795,6 +1857,7 @@ void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data)
if (nfss->pnfs_curr_ld->cleanup_layoutcommit)
nfss->pnfs_curr_ld->cleanup_layoutcommit(data);
+ pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list);
}
/*
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 94ba804..f5f8a47 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -219,6 +219,7 @@ void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
int _pnfs_return_layout(struct inode *);
+int pnfs_commit_and_return_layout(struct inode *);
void pnfs_ld_write_done(struct nfs_write_data *);
void pnfs_ld_read_done(struct nfs_read_data *);
struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
@@ -407,6 +408,11 @@ static inline int pnfs_return_layout(struct inode *ino)
return 0;
}
+static inline int pnfs_commit_and_return_layout(struct inode *inode)
+{
+ return 0;
+}
+
static inline bool
pnfs_ld_layoutret_on_setattr(struct inode *inode)
{
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 62c1ee1..ca05f6d 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -102,7 +102,8 @@ nfsd_reply_cache_free_locked(struct svc_cacherep *rp)
{
if (rp->c_type == RC_REPLBUFF)
kfree(rp->c_replvec.iov_base);
- hlist_del(&rp->c_hash);
+ if (!hlist_unhashed(&rp->c_hash))
+ hlist_del(&rp->c_hash);
list_del(&rp->c_lru);
--num_drc_entries;
kmem_cache_free(drc_slab, rp);
@@ -118,6 +119,10 @@ nfsd_reply_cache_free(struct svc_cacherep *rp)
int nfsd_reply_cache_init(void)
{
+ INIT_LIST_HEAD(&lru_head);
+ max_drc_entries = nfsd_cache_size_limit();
+ num_drc_entries = 0;
+
register_shrinker(&nfsd_reply_cache_shrinker);
drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep),
0, 0, NULL);
@@ -128,10 +133,6 @@ int nfsd_reply_cache_init(void)
if (!cache_hash)
goto out_nomem;
- INIT_LIST_HEAD(&lru_head);
- max_drc_entries = nfsd_cache_size_limit();
- num_drc_entries = 0;
-
return 0;
out_nomem:
printk(KERN_ERR "nfsd: failed to allocate reply cache\n");
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 2a7eb53..2b2e239 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1013,6 +1013,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
int host_err;
int stable = *stablep;
int use_wgather;
+ loff_t pos = offset;
dentry = file->f_path.dentry;
inode = dentry->d_inode;
@@ -1025,7 +1026,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
/* Write the data. */
oldfs = get_fs(); set_fs(KERNEL_DS);
- host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
+ host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &pos);
set_fs(oldfs);
if (host_err < 0)
goto out_nfserr;
diff --git a/fs/pnode.c b/fs/pnode.c
index 3e000a5..8b29d21 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -9,6 +9,7 @@
#include <linux/mnt_namespace.h>
#include <linux/mount.h>
#include <linux/fs.h>
+#include <linux/nsproxy.h>
#include "internal.h"
#include "pnode.h"
@@ -220,6 +221,7 @@ static struct mount *get_source(struct mount *dest,
int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
struct mount *source_mnt, struct list_head *tree_list)
{
+ struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
struct mount *m, *child;
int ret = 0;
struct mount *prev_dest_mnt = dest_mnt;
@@ -237,6 +239,10 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
source = get_source(m, prev_dest_mnt, prev_src_mnt, &type);
+ /* Notice when we are propagating across user namespaces */
+ if (m->mnt_ns->user_ns != user_ns)
+ type |= CL_UNPRIVILEGED;
+
child = copy_tree(source, source->mnt.mnt_root, type);
if (IS_ERR(child)) {
ret = PTR_ERR(child);
diff --git a/fs/pnode.h b/fs/pnode.h
index 19b853a3..a0493d5 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -23,6 +23,7 @@
#define CL_MAKE_SHARED 0x08
#define CL_PRIVATE 0x10
#define CL_SHARED_TO_SLAVE 0x20
+#define CL_UNPRIVILEGED 0x40
static inline void set_mnt_shared(struct mount *mnt)
{
diff --git a/fs/proc/root.c b/fs/proc/root.c
index c6e9fac..9c7fab1 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -16,6 +16,7 @@
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/bitops.h>
+#include <linux/user_namespace.h>
#include <linux/mount.h>
#include <linux/pid_namespace.h>
#include <linux/parser.h>
@@ -108,6 +109,9 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
} else {
ns = task_active_pid_ns(current);
options = data;
+
+ if (!current_user_ns()->may_mount_proc)
+ return ERR_PTR(-EPERM);
}
sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns);
diff --git a/fs/read_write.c b/fs/read_write.c
index a698eff..e6ddc8d 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -17,6 +17,7 @@
#include <linux/splice.h>
#include <linux/compat.h>
#include "read_write.h"
+#include "internal.h"
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -417,6 +418,33 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof
EXPORT_SYMBOL(do_sync_write);
+ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
+{
+ mm_segment_t old_fs;
+ const char __user *p;
+ ssize_t ret;
+
+ if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
+ return -EINVAL;
+
+ old_fs = get_fs();
+ set_fs(get_ds());
+ p = (__force const char __user *)buf;
+ if (count > MAX_RW_COUNT)
+ count = MAX_RW_COUNT;
+ if (file->f_op->write)
+ ret = file->f_op->write(file, p, count, pos);
+ else
+ ret = do_sync_write(file, p, count, pos);
+ set_fs(old_fs);
+ if (ret > 0) {
+ fsnotify_modify(file);
+ add_wchar(current, ret);
+ }
+ inc_syscw(current);
+ return ret;
+}
+
ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
{
ssize_t ret;
diff --git a/fs/splice.c b/fs/splice.c
index 718bd00..29e394e 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -31,6 +31,7 @@
#include <linux/security.h>
#include <linux/gfp.h>
#include <linux/socket.h>
+#include "internal.h"
/*
* Attempt to steal a page from a pipe buffer. This should perhaps go into
@@ -1048,9 +1049,10 @@ static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
{
int ret;
void *data;
+ loff_t tmp = sd->pos;
data = buf->ops->map(pipe, buf, 0);
- ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos);
+ ret = __kernel_write(sd->u.file, data + buf->offset, sd->len, &tmp);
buf->ops->unmap(pipe, buf, data);
return ret;
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 8d924b5..afd8327 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -19,6 +19,7 @@
#include <linux/module.h>
#include <linux/magic.h>
#include <linux/slab.h>
+#include <linux/user_namespace.h>
#include "sysfs.h"
@@ -111,6 +112,9 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
struct super_block *sb;
int error;
+ if (!(flags & MS_KERNMOUNT) && !current_user_ns()->may_mount_sysfs)
+ return ERR_PTR(-EPERM);
+
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return ERR_PTR(-ENOMEM);