aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfsd
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfsd')
-rw-r--r--fs/nfsd/export.c149
-rw-r--r--fs/nfsd/nfs2acl.c3
-rw-r--r--fs/nfsd/nfs3acl.c3
-rw-r--r--fs/nfsd/nfs3proc.c18
-rw-r--r--fs/nfsd/nfs3xdr.c56
-rw-r--r--fs/nfsd/nfs4acl.c711
-rw-r--r--fs/nfsd/nfs4proc.c32
-rw-r--r--fs/nfsd/nfs4xdr.c231
-rw-r--r--fs/nfsd/nfsctl.c49
-rw-r--r--fs/nfsd/nfsproc.c12
-rw-r--r--fs/nfsd/nfssvc.c19
-rw-r--r--fs/nfsd/nfsxdr.c43
-rw-r--r--fs/nfsd/vfs.c86
13 files changed, 850 insertions, 562 deletions
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index cfe141e..e13fa23 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -319,12 +319,25 @@ svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old)
static struct cache_head *export_table[EXPORT_HASHMAX];
+static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc)
+{
+ int i;
+
+ for (i = 0; i < fsloc->locations_count; i++) {
+ kfree(fsloc->locations[i].path);
+ kfree(fsloc->locations[i].hosts);
+ }
+ kfree(fsloc->locations);
+}
+
static void svc_export_put(struct kref *ref)
{
struct svc_export *exp = container_of(ref, struct svc_export, h.ref);
dput(exp->ex_dentry);
mntput(exp->ex_mnt);
auth_domain_put(exp->ex_client);
+ kfree(exp->ex_path);
+ nfsd4_fslocs_free(&exp->ex_fslocs);
kfree(exp);
}
@@ -386,6 +399,69 @@ static int check_export(struct inode *inode, int flags)
}
+#ifdef CONFIG_NFSD_V4
+
+static int
+fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc)
+{
+ int len;
+ int migrated, i, err;
+
+ len = qword_get(mesg, buf, PAGE_SIZE);
+ if (len != 5 || memcmp(buf, "fsloc", 5))
+ return 0;
+
+ /* listsize */
+ err = get_int(mesg, &fsloc->locations_count);
+ if (err)
+ return err;
+ if (fsloc->locations_count > MAX_FS_LOCATIONS)
+ return -EINVAL;
+ if (fsloc->locations_count == 0)
+ return 0;
+
+ fsloc->locations = kzalloc(fsloc->locations_count
+ * sizeof(struct nfsd4_fs_location), GFP_KERNEL);
+ if (!fsloc->locations)
+ return -ENOMEM;
+ for (i=0; i < fsloc->locations_count; i++) {
+ /* colon separated host list */
+ err = -EINVAL;
+ len = qword_get(mesg, buf, PAGE_SIZE);
+ if (len <= 0)
+ goto out_free_all;
+ err = -ENOMEM;
+ fsloc->locations[i].hosts = kstrdup(buf, GFP_KERNEL);
+ if (!fsloc->locations[i].hosts)
+ goto out_free_all;
+ err = -EINVAL;
+ /* slash separated path component list */
+ len = qword_get(mesg, buf, PAGE_SIZE);
+ if (len <= 0)
+ goto out_free_all;
+ err = -ENOMEM;
+ fsloc->locations[i].path = kstrdup(buf, GFP_KERNEL);
+ if (!fsloc->locations[i].path)
+ goto out_free_all;
+ }
+ /* migrated */
+ err = get_int(mesg, &migrated);
+ if (err)
+ goto out_free_all;
+ err = -EINVAL;
+ if (migrated < 0 || migrated > 1)
+ goto out_free_all;
+ fsloc->migrated = migrated;
+ return 0;
+out_free_all:
+ nfsd4_fslocs_free(fsloc);
+ return err;
+}
+
+#else /* CONFIG_NFSD_V4 */
+static inline int fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc) { return 0; }
+#endif
+
static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
{
/* client path expiry [flags anonuid anongid fsid] */
@@ -398,6 +474,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
int an_int;
nd.dentry = NULL;
+ exp.ex_path = NULL;
if (mesg[mlen-1] != '\n')
return -EINVAL;
@@ -428,6 +505,10 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
exp.ex_client = dom;
exp.ex_mnt = nd.mnt;
exp.ex_dentry = nd.dentry;
+ exp.ex_path = kstrdup(buf, GFP_KERNEL);
+ err = -ENOMEM;
+ if (!exp.ex_path)
+ goto out;
/* expiry */
err = -EINVAL;
@@ -435,6 +516,11 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
if (exp.h.expiry_time == 0)
goto out;
+ /* fs locations */
+ exp.ex_fslocs.locations = NULL;
+ exp.ex_fslocs.locations_count = 0;
+ exp.ex_fslocs.migrated = 0;
+
/* flags */
err = get_int(&mesg, &an_int);
if (err == -ENOENT)
@@ -460,6 +546,10 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
err = check_export(nd.dentry->d_inode, exp.ex_flags);
if (err) goto out;
+
+ err = fsloc_parse(&mesg, buf, &exp.ex_fslocs);
+ if (err)
+ goto out;
}
expp = svc_export_lookup(&exp);
@@ -473,6 +563,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
else
exp_put(expp);
out:
+ kfree(exp.ex_path);
if (nd.dentry)
path_release(&nd);
out_no_path:
@@ -482,7 +573,8 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
return err;
}
-static void exp_flags(struct seq_file *m, int flag, int fsid, uid_t anonu, uid_t anong);
+static void exp_flags(struct seq_file *m, int flag, int fsid,
+ uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fslocs);
static int svc_export_show(struct seq_file *m,
struct cache_detail *cd,
@@ -501,8 +593,8 @@ static int svc_export_show(struct seq_file *m,
seq_putc(m, '(');
if (test_bit(CACHE_VALID, &h->flags) &&
!test_bit(CACHE_NEGATIVE, &h->flags))
- exp_flags(m, exp->ex_flags, exp->ex_fsid,
- exp->ex_anon_uid, exp->ex_anon_gid);
+ exp_flags(m, exp->ex_flags, exp->ex_fsid,
+ exp->ex_anon_uid, exp->ex_anon_gid, &exp->ex_fslocs);
seq_puts(m, ")\n");
return 0;
}
@@ -524,6 +616,10 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
new->ex_client = item->ex_client;
new->ex_dentry = dget(item->ex_dentry);
new->ex_mnt = mntget(item->ex_mnt);
+ new->ex_path = NULL;
+ new->ex_fslocs.locations = NULL;
+ new->ex_fslocs.locations_count = 0;
+ new->ex_fslocs.migrated = 0;
}
static void export_update(struct cache_head *cnew, struct cache_head *citem)
@@ -535,6 +631,14 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem)
new->ex_anon_uid = item->ex_anon_uid;
new->ex_anon_gid = item->ex_anon_gid;
new->ex_fsid = item->ex_fsid;
+ new->ex_path = item->ex_path;
+ item->ex_path = NULL;
+ new->ex_fslocs.locations = item->ex_fslocs.locations;
+ item->ex_fslocs.locations = NULL;
+ new->ex_fslocs.locations_count = item->ex_fslocs.locations_count;
+ item->ex_fslocs.locations_count = 0;
+ new->ex_fslocs.migrated = item->ex_fslocs.migrated;
+ item->ex_fslocs.migrated = 0;
}
static struct cache_head *svc_export_alloc(void)
@@ -1048,30 +1152,21 @@ int
exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp,
struct cache_req *creq)
{
- struct svc_expkey *fsid_key;
struct svc_export *exp;
int rv;
u32 fsidv[2];
mk_fsid_v1(fsidv, 0);
- fsid_key = exp_find_key(clp, 1, fsidv, creq);
- if (IS_ERR(fsid_key) && PTR_ERR(fsid_key) == -EAGAIN)
+ exp = exp_find(clp, 1, fsidv, creq);
+ if (IS_ERR(exp) && PTR_ERR(exp) == -EAGAIN)
return nfserr_dropit;
- if (!fsid_key || IS_ERR(fsid_key))
- return nfserr_perm;
-
- exp = exp_get_by_name(clp, fsid_key->ek_mnt, fsid_key->ek_dentry, creq);
if (exp == NULL)
- rv = nfserr_perm;
+ return nfserr_perm;
else if (IS_ERR(exp))
- rv = nfserrno(PTR_ERR(exp));
- else {
- rv = fh_compose(fhp, exp,
- fsid_key->ek_dentry, NULL);
- exp_put(exp);
- }
- cache_put(&fsid_key->h, &svc_expkey_cache);
+ return nfserrno(PTR_ERR(exp));
+ rv = fh_compose(fhp, exp, exp->ex_dentry, NULL);
+ exp_put(exp);
return rv;
}
@@ -1158,7 +1253,8 @@ static struct flags {
{ 0, {"", ""}}
};
-static void exp_flags(struct seq_file *m, int flag, int fsid, uid_t anonu, uid_t anong)
+static void exp_flags(struct seq_file *m, int flag, int fsid,
+ uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fsloc)
{
int first = 0;
struct flags *flg;
@@ -1174,6 +1270,21 @@ static void exp_flags(struct seq_file *m, int flag, int fsid, uid_t anonu, uid_t
seq_printf(m, "%sanonuid=%d", first++?",":"", anonu);
if (anong != (gid_t)-2 && anong != (0x10000-2))
seq_printf(m, "%sanongid=%d", first++?",":"", anong);
+ if (fsloc && fsloc->locations_count > 0) {
+ char *loctype = (fsloc->migrated) ? "refer" : "replicas";
+ int i;
+
+ seq_printf(m, "%s%s=", first++?",":"", loctype);
+ seq_escape(m, fsloc->locations[0].path, ",;@ \t\n\\");
+ seq_putc(m, '@');
+ seq_escape(m, fsloc->locations[0].hosts, ",;@ \t\n\\");
+ for (i = 1; i < fsloc->locations_count; i++) {
+ seq_putc(m, ';');
+ seq_escape(m, fsloc->locations[i].path, ",;@ \t\n\\");
+ seq_putc(m, '@');
+ seq_escape(m, fsloc->locations[i].hosts, ",;@ \t\n\\");
+ }
+ }
}
static int e_show(struct seq_file *m, void *p)
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index fe56b38..9187755 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -241,7 +241,7 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, u32 *p,
rqstp->rq_res.page_len = w;
while (w > 0) {
- if (!svc_take_res_page(rqstp))
+ if (!rqstp->rq_respages[rqstp->rq_resused++])
return 0;
w -= PAGE_SIZE;
}
@@ -333,4 +333,5 @@ struct svc_version nfsd_acl_version2 = {
.vs_proc = nfsd_acl_procedures2,
.vs_dispatch = nfsd_dispatch,
.vs_xdrsize = NFS3_SVC_XDRSIZE,
+ .vs_hidden = 1,
};
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 16e10c1..d4bdc00 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -185,7 +185,7 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, u32 *p,
rqstp->rq_res.page_len = w;
while (w > 0) {
- if (!svc_take_res_page(rqstp))
+ if (!rqstp->rq_respages[rqstp->rq_resused++])
return 0;
w -= PAGE_SIZE;
}
@@ -263,5 +263,6 @@ struct svc_version nfsd_acl_version3 = {
.vs_proc = nfsd_acl_procedures3,
.vs_dispatch = nfsd_dispatch,
.vs_xdrsize = NFS3_SVC_XDRSIZE,
+ .vs_hidden = 1,
};
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index f61142a..a5ebc7d 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -160,6 +160,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
struct nfsd3_readres *resp)
{
int nfserr;
+ u32 max_blocksize = svc_max_payload(rqstp);
dprintk("nfsd: READ(3) %s %lu bytes at %lu\n",
SVCFH_fmt(&argp->fh),
@@ -172,15 +173,15 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
*/
resp->count = argp->count;
- if (NFSSVC_MAXBLKSIZE < resp->count)
- resp->count = NFSSVC_MAXBLKSIZE;
+ if (max_blocksize < resp->count)
+ resp->count = max_blocksize;
svc_reserve(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
fh_copy(&resp->fh, &argp->fh);
nfserr = nfsd_read(rqstp, &resp->fh, NULL,
argp->offset,
- argp->vec, argp->vlen,
+ rqstp->rq_vec, argp->vlen,
&resp->count);
if (nfserr == 0) {
struct inode *inode = resp->fh.fh_dentry->d_inode;
@@ -210,7 +211,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
resp->committed = argp->stable;
nfserr = nfsd_write(rqstp, &resp->fh, NULL,
argp->offset,
- argp->vec, argp->vlen,
+ rqstp->rq_vec, argp->vlen,
argp->len,
&resp->committed);
resp->count = argp->count;
@@ -538,15 +539,16 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle *argp,
struct nfsd3_fsinfores *resp)
{
int nfserr;
+ u32 max_blocksize = svc_max_payload(rqstp);
dprintk("nfsd: FSINFO(3) %s\n",
SVCFH_fmt(&argp->fh));
- resp->f_rtmax = NFSSVC_MAXBLKSIZE;
- resp->f_rtpref = NFSSVC_MAXBLKSIZE;
+ resp->f_rtmax = max_blocksize;
+ resp->f_rtpref = max_blocksize;
resp->f_rtmult = PAGE_SIZE;
- resp->f_wtmax = NFSSVC_MAXBLKSIZE;
- resp->f_wtpref = NFSSVC_MAXBLKSIZE;
+ resp->f_wtmax = max_blocksize;
+ resp->f_wtpref = max_blocksize;
resp->f_wtmult = PAGE_SIZE;
resp->f_dtpref = PAGE_SIZE;
resp->f_maxfilesize = ~(u32) 0;
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 243d94b..247d518 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -330,6 +330,7 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
{
unsigned int len;
int v,pn;
+ u32 max_blocksize = svc_max_payload(rqstp);
if (!(p = decode_fh(p, &args->fh))
|| !(p = xdr_decode_hyper(p, &args->offset)))
@@ -337,17 +338,16 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
len = args->count = ntohl(*p++);
- if (len > NFSSVC_MAXBLKSIZE)
- len = NFSSVC_MAXBLKSIZE;
+ if (len > max_blocksize)
+ len = max_blocksize;
/* set up the kvec */
v=0;
while (len > 0) {
- pn = rqstp->rq_resused;
- svc_take_page(rqstp);
- args->vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
- args->vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE;
- len -= args->vec[v].iov_len;
+ pn = rqstp->rq_resused++;
+ rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
+ rqstp->rq_vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE;
+ len -= rqstp->rq_vec[v].iov_len;
v++;
}
args->vlen = v;
@@ -359,6 +359,7 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
struct nfsd3_writeargs *args)
{
unsigned int len, v, hdr;
+ u32 max_blocksize = svc_max_payload(rqstp);
if (!(p = decode_fh(p, &args->fh))
|| !(p = xdr_decode_hyper(p, &args->offset)))
@@ -373,22 +374,22 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
rqstp->rq_arg.len - hdr < len)
return 0;
- args->vec[0].iov_base = (void*)p;
- args->vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr;
+ rqstp->rq_vec[0].iov_base = (void*)p;
+ rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr;
- if (len > NFSSVC_MAXBLKSIZE)
- len = NFSSVC_MAXBLKSIZE;
+ if (len > max_blocksize)
+ len = max_blocksize;
v= 0;
- while (len > args->vec[v].iov_len) {
- len -= args->vec[v].iov_len;
+ while (len > rqstp->rq_vec[v].iov_len) {
+ len -= rqstp->rq_vec[v].iov_len;
v++;
- args->vec[v].iov_base = page_address(rqstp->rq_argpages[v]);
- args->vec[v].iov_len = PAGE_SIZE;
+ rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_pages[v]);
+ rqstp->rq_vec[v].iov_len = PAGE_SIZE;
}
- args->vec[v].iov_len = len;
+ rqstp->rq_vec[v].iov_len = len;
args->vlen = v+1;
- return args->count == args->len && args->vec[0].iov_len > 0;
+ return args->count == args->len && rqstp->rq_vec[0].iov_len > 0;
}
int
@@ -446,11 +447,11 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, u32 *p,
* This page appears in the rq_res.pages list, but as pages_len is always
* 0, it won't get in the way
*/
- svc_take_page(rqstp);
len = ntohl(*p++);
if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE)
return 0;
- args->tname = new = page_address(rqstp->rq_respages[rqstp->rq_resused-1]);
+ args->tname = new =
+ page_address(rqstp->rq_respages[rqstp->rq_resused++]);
args->tlen = len;
/* first copy and check from the first page */
old = (char*)p;
@@ -522,8 +523,8 @@ nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, u32 *p,
{
if (!(p = decode_fh(p, &args->fh)))
return 0;
- svc_take_page(rqstp);
- args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]);
+ args->buffer =
+ page_address(rqstp->rq_respages[rqstp->rq_resused++]);
return xdr_argsize_check(rqstp, p);
}
@@ -554,8 +555,8 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, u32 *p,
if (args->count > PAGE_SIZE)
args->count = PAGE_SIZE;
- svc_take_page(rqstp);
- args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]);
+ args->buffer =
+ page_address(rqstp->rq_respages[rqstp->rq_resused++]);
return xdr_argsize_check(rqstp, p);
}
@@ -565,6 +566,7 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, u32 *p,
struct nfsd3_readdirargs *args)
{
int len, pn;
+ u32 max_blocksize = svc_max_payload(rqstp);
if (!(p = decode_fh(p, &args->fh)))
return 0;
@@ -573,13 +575,12 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, u32 *p,
args->dircount = ntohl(*p++);
args->count = ntohl(*p++);
- len = (args->count > NFSSVC_MAXBLKSIZE) ? NFSSVC_MAXBLKSIZE :
+ len = (args->count > max_blocksize) ? max_blocksize :
args->count;
args->count = len;
while (len > 0) {
- pn = rqstp->rq_resused;
- svc_take_page(rqstp);
+ pn = rqstp->rq_resused++;
if (!args->buffer)
args->buffer = page_address(rqstp->rq_respages[pn]);
len -= PAGE_SIZE;
@@ -668,7 +669,6 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, u32 *p,
rqstp->rq_res.page_len = resp->len;
if (resp->len & 3) {
/* need to pad the tail */
- rqstp->rq_restailpage = 0;
rqstp->rq_res.tail[0].iov_base = p;
*p = 0;
rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3);
@@ -693,7 +693,6 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, u32 *p,
rqstp->rq_res.page_len = resp->count;
if (resp->count & 3) {
/* need to pad the tail */
- rqstp->rq_restailpage = 0;
rqstp->rq_res.tail[0].iov_base = p;
*p = 0;
rqstp->rq_res.tail[0].iov_len = 4 - (resp->count & 3);
@@ -768,7 +767,6 @@ nfs3svc_encode_readdirres(struct svc_rqst *rqstp, u32 *p,
rqstp->rq_res.page_len = (resp->count) << 2;
/* add the 'tail' to the end of the 'head' page - page 0. */
- rqstp->rq_restailpage = 0;
rqstp->rq_res.tail[0].iov_base = p;
*p++ = 0; /* no more entries */
*p++ = htonl(resp->common.err == nfserr_eof);
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index edb107e..5d94555 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -63,6 +63,8 @@
#define NFS4_INHERITANCE_FLAGS (NFS4_ACE_FILE_INHERIT_ACE \
| NFS4_ACE_DIRECTORY_INHERIT_ACE | NFS4_ACE_INHERIT_ONLY_ACE)
+#define NFS4_SUPPORTED_FLAGS (NFS4_INHERITANCE_FLAGS | NFS4_ACE_IDENTIFIER_GROUP)
+
#define MASK_EQUAL(mask1, mask2) \
( ((mask1) & NFS4_ACE_MASK_ALL) == ((mask2) & NFS4_ACE_MASK_ALL) )
@@ -96,24 +98,26 @@ deny_mask(u32 allow_mask, unsigned int flags)
/* XXX: modify functions to return NFS errors; they're only ever
* used by nfs code, after all.... */
-static int
-mode_from_nfs4(u32 perm, unsigned short *mode, unsigned int flags)
+/* We only map from NFSv4 to POSIX ACLs when setting ACLs, when we err on the
+ * side of being more restrictive, so the mode bit mapping below is
+ * pessimistic. An optimistic version would be needed to handle DENY's,
+ * but we espect to coalesce all ALLOWs and DENYs before mapping to mode
+ * bits. */
+
+static void
+low_mode_from_nfs4(u32 perm, unsigned short *mode, unsigned int flags)
{
- u32 ignore = 0;
+ u32 write_mode = NFS4_WRITE_MODE;
- if (!(flags & NFS4_ACL_DIR))
- ignore |= NFS4_ACE_DELETE_CHILD; /* ignore it */
- perm |= ignore;
+ if (flags & NFS4_ACL_DIR)
+ write_mode |= NFS4_ACE_DELETE_CHILD;
*mode = 0;
if ((perm & NFS4_READ_MODE) == NFS4_READ_MODE)
*mode |= ACL_READ;
- if ((perm & NFS4_WRITE_MODE) == NFS4_WRITE_MODE)
+ if ((perm & write_mode) == write_mode)
*mode |= ACL_WRITE;
if ((perm & NFS4_EXECUTE_MODE) == NFS4_EXECUTE_MODE)
*mode |= ACL_EXECUTE;
- if (!MASK_EQUAL(perm, ignore|mask_from_posix(*mode, flags)))
- return -EINVAL;
- return 0;
}
struct ace_container {
@@ -338,38 +342,6 @@ sort_pacl(struct posix_acl *pacl)
return;
}
-static int
-write_pace(struct nfs4_ace *ace, struct posix_acl *pacl,
- struct posix_acl_entry **pace, short tag, unsigned int flags)
-{
- struct posix_acl_entry *this = *pace;
-
- if (*pace == pacl->a_entries + pacl->a_count)
- return -EINVAL; /* fell off the end */
- (*pace)++;
- this->e_tag = tag;
- if (tag == ACL_USER_OBJ)
- flags |= NFS4_ACL_OWNER;
- if (mode_from_nfs4(ace->access_mask, &this->e_perm, flags))
- return -EINVAL;
- this->e_id = (tag == ACL_USER || tag == ACL_GROUP ?
- ace->who : ACL_UNDEFINED_ID);
- return 0;
-}
-
-static struct nfs4_ace *
-get_next_v4_ace(struct list_head **p, struct list_head *head)
-{
- struct nfs4_ace *ace;
-
- *p = (*p)->next;
- if (*p == head)
- return NULL;
- ace = list_entry(*p, struct nfs4_ace, l_ace);
-
- return ace;
-}
-
int
nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl,
struct posix_acl **dpacl, unsigned int flags)
@@ -385,42 +357,23 @@ nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl,
goto out;
error = nfs4_acl_split(acl, dacl);
- if (error < 0)
+ if (error)
goto out_acl;
- if (pacl != NULL) {
- if (acl->naces == 0) {
- error = -ENODATA;
- goto try_dpacl;
- }
-
- *pacl = _nfsv4_to_posix_one(acl, flags);
- if (IS_ERR(*pacl)) {
- error = PTR_ERR(*pacl);
- *pacl = NULL;
- goto out_acl;
- }
+ *pacl = _nfsv4_to_posix_one(acl, flags);
+ if (IS_ERR(*pacl)) {
+ error = PTR_ERR(*pacl);
+ *pacl = NULL;
+ goto out_acl;
}
-try_dpacl:
- if (dpacl != NULL) {
- if (dacl->naces == 0) {
- if (pacl == NULL || *pacl == NULL)
- error = -ENODATA;
- goto out_acl;
- }
-
- error = 0;
- *dpacl = _nfsv4_to_posix_one(dacl, flags);
- if (IS_ERR(*dpacl)) {
- error = PTR_ERR(*dpacl);
- *dpacl = NULL;
- goto out_acl;
- }
+ *dpacl = _nfsv4_to_posix_one(dacl, flags);
+ if (IS_ERR(*dpacl)) {
+ error = PTR_ERR(*dpacl);
+ *dpacl = NULL;
}
-
out_acl:
- if (error && pacl) {
+ if (error) {
posix_acl_release(*pacl);
*pacl = NULL;
}
@@ -429,349 +382,311 @@ out:
return error;
}
+/*
+ * While processing the NFSv4 ACE, this maintains bitmasks representing
+ * which permission bits have been allowed and which denied to a given
+ * entity: */
+struct posix_ace_state {
+ u32 allow;
+ u32 deny;
+};
+
+struct posix_user_ace_state {
+ uid_t uid;
+ struct posix_ace_state perms;
+};
+
+struct posix_ace_state_array {
+ int n;
+ struct posix_user_ace_state aces[];
+};
+
+/*
+ * While processing the NFSv4 ACE, this maintains the partial permissions
+ * calculated so far: */
+
+struct posix_acl_state {
+ struct posix_ace_state owner;
+ struct posix_ace_state group;
+ struct posix_ace_state other;
+ struct posix_ace_state everyone;
+ struct posix_ace_state mask; /* Deny unused in this case */
+ struct posix_ace_state_array *users;
+ struct posix_ace_state_array *groups;
+};
+
static int
-same_who(struct nfs4_ace *a, struct nfs4_ace *b)
+init_state(struct posix_acl_state *state, int cnt)
{
- return a->whotype == b->whotype &&
- (a->whotype != NFS4_ACL_WHO_NAMED || a->who == b->who);
+ int alloc;
+
+ memset(state, 0, sizeof(struct posix_acl_state));
+ /*
+ * In the worst case, each individual acl could be for a distinct
+ * named user or group, but we don't no which, so we allocate
+ * enough space for either:
+ */
+ alloc = sizeof(struct posix_ace_state_array)
+ + cnt*sizeof(struct posix_ace_state);
+ state->users = kzalloc(alloc, GFP_KERNEL);
+ if (!state->users)
+ return -ENOMEM;
+ state->groups = kzalloc(alloc, GFP_KERNEL);
+ if (!state->groups) {
+ kfree(state->users);
+ return -ENOMEM;
+ }
+ return 0;
}
-static int
-complementary_ace_pair(struct nfs4_ace *allow, struct nfs4_ace *deny,
- unsigned int flags)
-{
- int ignore = 0;
- if (!(flags & NFS4_ACL_DIR))
- ignore |= NFS4_ACE_DELETE_CHILD;
- return MASK_EQUAL(ignore|deny_mask(allow->access_mask, flags),
- ignore|deny->access_mask) &&
- allow->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE &&
- deny->type == NFS4_ACE_ACCESS_DENIED_ACE_TYPE &&
- allow->flag == deny->flag &&
- same_who(allow, deny);
+static void
+free_state(struct posix_acl_state *state) {
+ kfree(state->users);
+ kfree(state->groups);
}
-static inline int
-user_obj_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
- struct posix_acl *pacl, struct posix_acl_entry **pace,
- unsigned int flags)
+static inline void add_to_mask(struct posix_acl_state *state, struct posix_ace_state *astate)
{
- int error = -EINVAL;
- struct nfs4_ace *ace, *ace2;
-
- ace = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace == NULL)
- goto out;
- if (ace2type(ace) != ACL_USER_OBJ)
- goto out;
- error = write_pace(ace, pacl, pace, ACL_USER_OBJ, flags);
- if (error < 0)
- goto out;
- error = -EINVAL;
- ace2 = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace2 == NULL)
- goto out;
- if (!complementary_ace_pair(ace, ace2, flags))
- goto out;
- error = 0;
-out:
- return error;
+ state->mask.allow |= astate->allow;
}
-static inline int
-users_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
- struct nfs4_ace **mask_ace,
- struct posix_acl *pacl, struct posix_acl_entry **pace,
- unsigned int flags)
-{
- int error = -EINVAL;
- struct nfs4_ace *ace, *ace2;
+/*
+ * Certain bits (SYNCHRONIZE, DELETE, WRITE_OWNER, READ/WRITE_NAMED_ATTRS,
+ * READ_ATTRIBUTES, READ_ACL) are currently unenforceable and don't translate
+ * to traditional read/write/execute permissions.
+ *
+ * It's problematic to reject acls that use certain mode bits, because it
+ * places the burden on users to learn the rules about which bits one
+ * particular server sets, without giving the user a lot of help--we return an
+ * error that could mean any number of different things. To make matters
+ * worse, the problematic bits might be introduced by some application that's
+ * automatically mapping from some other acl model.
+ *
+ * So wherever possible we accept anything, possibly erring on the side of
+ * denying more permissions than necessary.
+ *
+ * However we do reject *explicit* DENY's of a few bits representing
+ * permissions we could never deny:
+ */
- ace = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace == NULL)
- goto out;
- while (ace2type(ace) == ACL_USER) {
- if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE)
- goto out;
- if (*mask_ace &&
- !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask))
- goto out;
- *mask_ace = ace;
- ace = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace == NULL)
- goto out;
- if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE)
- goto out;
- error = write_pace(ace, pacl, pace, ACL_USER, flags);
- if (error < 0)
- goto out;
- error = -EINVAL;
- ace2 = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace2 == NULL)
- goto out;
- if (!complementary_ace_pair(ace, ace2, flags))
- goto out;
- if ((*mask_ace)->flag != ace2->flag ||
- !same_who(*mask_ace, ace2))
- goto out;
- ace = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace == NULL)
- goto out;
- }
- error = 0;
-out:
- return error;
+static inline int check_deny(u32 mask, int isowner)
+{
+ if (mask & (NFS4_ACE_READ_ATTRIBUTES | NFS4_ACE_READ_ACL))
+ return -EINVAL;
+ if (!isowner)
+ return 0;
+ if (mask & (NFS4_ACE_WRITE_ATTRIBUTES | NFS4_ACE_WRITE_ACL))
+ return -EINVAL;
+ return 0;
}
-static inline int
-group_obj_and_groups_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
- struct nfs4_ace **mask_ace,
- struct posix_acl *pacl, struct posix_acl_entry **pace,
- unsigned int flags)
+static struct posix_acl *
+posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
{
- int error = -EINVAL;
- struct nfs4_ace *ace, *ace2;
- struct ace_container *ac;
- struct list_head group_l;
-
- INIT_LIST_HEAD(&group_l);
- ace = list_entry(*p, struct nfs4_ace, l_ace);
-
- /* group owner (mask and allow aces) */
+ struct posix_acl_entry *pace;
+ struct posix_acl *pacl;
+ int nace;
+ int i, error = 0;
- if (pacl->a_count != 3) {
- /* then the group owner should be preceded by mask */
- if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE)
- goto out;
- if (*mask_ace &&
- !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask))
- goto out;
- *mask_ace = ace;
- ace = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace == NULL)
- goto out;
+ nace = 4 + state->users->n + state->groups->n;
+ pacl = posix_acl_alloc(nace, GFP_KERNEL);
+ if (!pacl)
+ return ERR_PTR(-ENOMEM);
- if ((*mask_ace)->flag != ace->flag || !same_who(*mask_ace, ace))
- goto out;
+ pace = pacl->a_entries;
+ pace->e_tag = ACL_USER_OBJ;
+ error = check_deny(state->owner.deny, 1);
+ if (error)
+ goto out_err;
+ low_mode_from_nfs4(state->owner.allow, &pace->e_perm, flags);
+ pace->e_id = ACL_UNDEFINED_ID;
+
+ for (i=0; i < state->users->n; i++) {
+ pace++;
+ pace->e_tag = ACL_USER;
+ error = check_deny(state->users->aces[i].perms.deny, 0);
+ if (error)
+ goto out_err;
+ low_mode_from_nfs4(state->users->aces[i].perms.allow,
+ &pace->e_perm, flags);
+ pace->e_id = state->users->aces[i].uid;
+ add_to_mask(state, &state->users->aces[i].perms);
}
- if (ace2type(ace) != ACL_GROUP_OBJ)
- goto out;
-
- ac = kmalloc(sizeof(*ac), GFP_KERNEL);
- error = -ENOMEM;
- if (ac == NULL)
- goto out;
- ac->ace = ace;
- list_add_tail(&ac->ace_l, &group_l);
-
- error = -EINVAL;
- if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE)
- goto out;
-
- error = write_pace(ace, pacl, pace, ACL_GROUP_OBJ, flags);
- if (error < 0)
- goto out;
-
- error = -EINVAL;
- ace = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace == NULL)
- goto out;
-
- /* groups (mask and allow aces) */
-
- while (ace2type(ace) == ACL_GROUP) {
- if (*mask_ace == NULL)
- goto out;
-
- if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE ||
- !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask))
- goto out;
- *mask_ace = ace;
+ pace++;
+ pace->e_tag = ACL_GROUP_OBJ;
+ error = check_deny(state->group.deny, 0);
+ if (error)
+ goto out_err;
+ low_mode_from_nfs4(state->group.allow, &pace->e_perm, flags);
+ pace->e_id = ACL_UNDEFINED_ID;
+ add_to_mask(state, &state->group);
+
+ for (i=0; i < state->groups->n; i++) {
+ pace++;
+ pace->e_tag = ACL_GROUP;
+ error = check_deny(state->groups->aces[i].perms.deny, 0);
+ if (error)
+ goto out_err;
+ low_mode_from_nfs4(state->groups->aces[i].perms.allow,
+ &pace->e_perm, flags);
+ pace->e_id = state->groups->aces[i].uid;
+ add_to_mask(state, &state->groups->aces[i].perms);
+ }
- ace = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace == NULL)
- goto out;
- ac = kmalloc(sizeof(*ac), GFP_KERNEL);
- error = -ENOMEM;
- if (ac == NULL)
- goto out;
- error = -EINVAL;
- if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE ||
- !same_who(ace, *mask_ace))
- goto out;
+ pace++;
+ pace->e_tag = ACL_MASK;
+ low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags);
+ pace->e_id = ACL_UNDEFINED_ID;
- ac->ace = ace;
- list_add_tail(&ac->ace_l, &group_l);
+ pace++;
+ pace->e_tag = ACL_OTHER;
+ error = check_deny(state->other.deny, 0);
+ if (error)
+ goto out_err;
+ low_mode_from_nfs4(state->other.allow, &pace->e_perm, flags);
+ pace->e_id = ACL_UNDEFINED_ID;
- error = write_pace(ace, pacl, pace, ACL_GROUP, flags);
- if (error < 0)
- goto out;
- error = -EINVAL;
- ace = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace == NULL)
- goto out;
- }
+ return pacl;
+out_err:
+ posix_acl_release(pacl);
+ return ERR_PTR(error);
+}
- /* group owner (deny ace) */
+static inline void allow_bits(struct posix_ace_state *astate, u32 mask)
+{
+ /* Allow all bits in the mask not already denied: */
+ astate->allow |= mask & ~astate->deny;
+}
- if (ace2type(ace) != ACL_GROUP_OBJ)
- goto out;
- ac = list_entry(group_l.next, struct ace_container, ace_l);
- ace2 = ac->ace;
- if (!complementary_ace_pair(ace2, ace, flags))
- goto out;
- list_del(group_l.next);
- kfree(ac);
+static inline void deny_bits(struct posix_ace_state *astate, u32 mask)
+{
+ /* Deny all bits in the mask not already allowed: */
+ astate->deny |= mask & ~astate->allow;
+}
- /* groups (deny aces) */
+static int find_uid(struct posix_acl_state *state, struct posix_ace_state_array *a, uid_t uid)
+{
+ int i;
- while (!list_empty(&group_l)) {
- ace = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace == NULL)
- goto out;
- if (ace2type(ace) != ACL_GROUP)
- goto out;
- ac = list_entry(group_l.next, struct ace_container, ace_l);
- ace2 = ac->ace;
- if (!complementary_ace_pair(ace2, ace, flags))
- goto out;
- list_del(group_l.next);
- kfree(ac);
- }
+ for (i = 0; i < a->n; i++)
+ if (a->aces[i].uid == uid)
+ return i;
+ /* Not found: */
+ a->n++;
+ a->aces[i].uid = uid;
+ a->aces[i].perms.allow = state->everyone.allow;
+ a->aces[i].perms.deny = state->everyone.deny;
- ace = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace == NULL)
- goto out;
- if (ace2type(ace) != ACL_OTHER)
- goto out;
- error = 0;
-out:
- while (!list_empty(&group_l)) {
- ac = list_entry(group_l.next, struct ace_container, ace_l);
- list_del(group_l.next);
- kfree(ac);
- }
- return error;
+ return i;
}
-static inline int
-mask_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
- struct nfs4_ace **mask_ace,
- struct posix_acl *pacl, struct posix_acl_entry **pace,
- unsigned int flags)
+static void deny_bits_array(struct posix_ace_state_array *a, u32 mask)
{
- int error = -EINVAL;
- struct nfs4_ace *ace;
+ int i;
- ace = list_entry(*p, struct nfs4_ace, l_ace);
- if (pacl->a_count != 3) {
- if (*mask_ace == NULL)
- goto out;
- (*mask_ace)->access_mask = deny_mask((*mask_ace)->access_mask, flags);
- write_pace(*mask_ace, pacl, pace, ACL_MASK, flags);
- }
- error = 0;
-out:
- return error;
+ for (i=0; i < a->n; i++)
+ deny_bits(&a->aces[i].perms, mask);
}
-static inline int
-other_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
- struct posix_acl *pacl, struct posix_acl_entry **pace,
- unsigned int flags)
+static void allow_bits_array(struct posix_ace_state_array *a, u32 mask)
{
- int error = -EINVAL;
- struct nfs4_ace *ace, *ace2;
+ int i;
- ace = list_entry(*p, struct nfs4_ace, l_ace);
- if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE)
- goto out;
- error = write_pace(ace, pacl, pace, ACL_OTHER, flags);
- if (error < 0)
- goto out;
- error = -EINVAL;
- ace2 = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace2 == NULL)
- goto out;
- if (!complementary_ace_pair(ace, ace2, flags))
- goto out;
- error = 0;
-out:
- return error;
+ for (i=0; i < a->n; i++)
+ allow_bits(&a->aces[i].perms, mask);
}
-static int
-calculate_posix_ace_count(struct nfs4_acl *n4acl)
+static void process_one_v4_ace(struct posix_acl_state *state,
+ struct nfs4_ace *ace)
{
- if (n4acl->naces == 6) /* owner, owner group, and other only */
- return 3;
- else { /* Otherwise there must be a mask entry. */
- /* Also, the remaining entries are for named users and
- * groups, and come in threes (mask, allow, deny): */
- if (n4acl->naces < 7)
- return -EINVAL;
- if ((n4acl->naces - 7) % 3)
- return -EINVAL;
- return 4 + (n4acl->naces - 7)/3;
+ u32 mask = ace->access_mask;
+ int i;
+
+ switch (ace2type(ace)) {
+ case ACL_USER_OBJ:
+ if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
+ allow_bits(&state->owner, mask);
+ } else {
+ deny_bits(&state->owner, mask);
+ }
+ break;
+ case ACL_USER:
+ i = find_uid(state, state->users, ace->who);
+ if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
+ allow_bits(&state->users->aces[i].perms, mask);
+ } else {
+ deny_bits(&state->users->aces[i].perms, mask);
+ mask = state->users->aces[i].perms.deny;
+ deny_bits(&state->owner, mask);
+ }
+ break;
+ case ACL_GROUP_OBJ:
+ if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
+ allow_bits(&state->group, mask);
+ } else {
+ deny_bits(&state->group, mask);
+ mask = state->group.deny;
+ deny_bits(&state->owner, mask);
+ deny_bits(&state->everyone, mask);
+ deny_bits_array(state->users, mask);
+ deny_bits_array(state->groups, mask);
+ }
+ break;
+ case ACL_GROUP:
+ i = find_uid(state, state->groups, ace->who);
+ if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
+ allow_bits(&state->groups->aces[i].perms, mask);
+ } else {
+ deny_bits(&state->groups->aces[i].perms, mask);
+ mask = state->groups->aces[i].perms.deny;
+ deny_bits(&state->owner, mask);
+ deny_bits(&state->group, mask);
+ deny_bits(&state->everyone, mask);
+ deny_bits_array(state->users, mask);
+ deny_bits_array(state->groups, mask);
+ }
+ break;
+ case ACL_OTHER:
+ if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
+ allow_bits(&state->owner, mask);
+ allow_bits(&state->group, mask);
+ allow_bits(&state->other, mask);
+ allow_bits(&state->everyone, mask);
+ allow_bits_array(state->users, mask);
+ allow_bits_array(state->groups, mask);
+ } else {
+ deny_bits(&state->owner, mask);
+ deny_bits(&state->group, mask);
+ deny_bits(&state->other, mask);
+ deny_bits(&state->everyone, mask);
+ deny_bits_array(state->users, mask);
+ deny_bits_array(state->groups, mask);
+ }
}
}
-
static struct posix_acl *
_nfsv4_to_posix_one(struct nfs4_acl *n4acl, unsigned int flags)
{
+ struct posix_acl_state state;
struct posix_acl *pacl;
- int error = -EINVAL, nace = 0;
- struct list_head *p;
- struct nfs4_ace *mask_ace = NULL;
- struct posix_acl_entry *pace;
-
- nace = calculate_posix_ace_count(n4acl);
- if (nace < 0)
- goto out_err;
-
- pacl = posix_acl_alloc(nace, GFP_KERNEL);
- error = -ENOMEM;
- if (pacl == NULL)
- goto out_err;
-
- pace = &pacl->a_entries[0];
- p = &n4acl->ace_head;
-
- error = user_obj_from_v4(n4acl, &p, pacl, &pace, flags);
- if (error)
- goto out_acl;
-
- error = users_from_v4(n4acl, &p, &mask_ace, pacl, &pace, flags);
- if (error)
- goto out_acl;
+ struct nfs4_ace *ace;
+ int ret;
- error = group_obj_and_groups_from_v4(n4acl, &p, &mask_ace, pacl, &pace,
- flags);
- if (error)
- goto out_acl;
+ ret = init_state(&state, n4acl->naces);
+ if (ret)
+ return ERR_PTR(ret);
- error = mask_from_v4(n4acl, &p, &mask_ace, pacl, &pace, flags);
- if (error)
- goto out_acl;
- error = other_from_v4(n4acl, &p, pacl, &pace, flags);
- if (error)
- goto out_acl;
+ list_for_each_entry(ace, &n4acl->ace_head, l_ace)
+ process_one_v4_ace(&state, ace);
- error = -EINVAL;
- if (p->next != &n4acl->ace_head)
- goto out_acl;
- if (pace != pacl->a_entries + pacl->a_count)
- goto out_acl;
+ pacl = posix_state_to_acl(&state, flags);
- sort_pacl(pacl);
+ free_state(&state);
- return pacl;
-out_acl:
- posix_acl_release(pacl);
-out_err:
- pacl = ERR_PTR(error);
+ if (!IS_ERR(pacl))
+ sort_pacl(pacl);
return pacl;
}
@@ -785,22 +700,41 @@ nfs4_acl_split(struct nfs4_acl *acl, struct nfs4_acl *dacl)
list_for_each_safe(h, n, &acl->ace_head) {
ace = list_entry(h, struct nfs4_ace, l_ace);
- if ((ace->flag & NFS4_INHERITANCE_FLAGS)
- != NFS4_INHERITANCE_FLAGS)
- continue;
+ if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE &&
+ ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE)
+ return -EINVAL;
- error = nfs4_acl_add_ace(dacl, ace->type, ace->flag,
- ace->access_mask, ace->whotype, ace->who);
- if (error < 0)
- goto out;
+ if (ace->flag & ~NFS4_SUPPORTED_FLAGS)
+ return -EINVAL;
- list_del(h);
- kfree(ace);
- acl->naces--;
+ switch (ace->flag & NFS4_INHERITANCE_FLAGS) {
+ case 0:
+ /* Leave this ace in the effective acl: */
+ continue;
+ case NFS4_INHERITANCE_FLAGS:
+ /* Add this ace to the default acl and remove it
+ * from the effective acl: */
+ error = nfs4_acl_add_ace(dacl, ace->type, ace->flag,
+ ace->access_mask, ace->whotype, ace->who);
+ if (error)
+ return error;
+ list_del(h);
+ kfree(ace);
+ acl->naces--;
+ break;
+ case NFS4_INHERITANCE_FLAGS & ~NFS4_ACE_INHERIT_ONLY_ACE:
+ /* Add this ace to the default, but leave it in
+ * the effective acl as well: */
+ error = nfs4_acl_add_ace(dacl, ace->type, ace->flag,
+ ace->access_mask, ace->whotype, ace->who);
+ if (error)
+ return error;
+ break;
+ default:
+ return -EINVAL;
+ }
}
-
-out:
- return error;
+ return 0;
}
static short
@@ -930,23 +864,6 @@ nfs4_acl_write_who(int who, char *p)
return -1;
}
-static inline int
-match_who(struct nfs4_ace *ace, uid_t owner, gid_t group, uid_t who)
-{
- switch (ace->whotype) {
- case NFS4_ACL_WHO_NAMED:
- return who == ace->who;
- case NFS4_ACL_WHO_OWNER:
- return who == owner;
- case NFS4_ACL_WHO_GROUP:
- return who == group;
- case NFS4_ACL_WHO_EVERYONE:
- return 1;
- default:
- return 0;
- }
-}
-
EXPORT_SYMBOL(nfs4_acl_new);
EXPORT_SYMBOL(nfs4_acl_free);
EXPORT_SYMBOL(nfs4_acl_add_ace);
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 15ded7a..8333db1 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -646,7 +646,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ
*p++ = nfssvc_boot.tv_usec;
status = nfsd_write(rqstp, current_fh, filp, write->wr_offset,
- write->wr_vec, write->wr_vlen, write->wr_buflen,
+ rqstp->rq_vec, write->wr_vlen, write->wr_buflen,
&write->wr_how_written);
if (filp)
fput(filp);
@@ -802,13 +802,29 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
* SETCLIENTID_CONFIRM, PUTFH and PUTROOTFH
* require a valid current filehandle
*/
- if ((!current_fh->fh_dentry) &&
- !((op->opnum == OP_PUTFH) || (op->opnum == OP_PUTROOTFH) ||
- (op->opnum == OP_SETCLIENTID) ||
- (op->opnum == OP_SETCLIENTID_CONFIRM) ||
- (op->opnum == OP_RENEW) || (op->opnum == OP_RESTOREFH) ||
- (op->opnum == OP_RELEASE_LOCKOWNER))) {
- op->status = nfserr_nofilehandle;
+ if (!current_fh->fh_dentry) {
+ if (!((op->opnum == OP_PUTFH) ||
+ (op->opnum == OP_PUTROOTFH) ||
+ (op->opnum == OP_SETCLIENTID) ||
+ (op->opnum == OP_SETCLIENTID_CONFIRM) ||
+ (op->opnum == OP_RENEW) ||
+ (op->opnum == OP_RESTOREFH) ||
+ (op->opnum == OP_RELEASE_LOCKOWNER))) {
+ op->status = nfserr_nofilehandle;
+ goto encode_op;
+ }
+ }
+ /* Check must be done at start of each operation, except
+ * for GETATTR and ops not listed as returning NFS4ERR_MOVED
+ */
+ else if (current_fh->fh_export->ex_fslocs.migrated &&
+ !((op->opnum == OP_GETATTR) ||
+ (op->opnum == OP_PUTROOTFH) ||
+ (op->opnum == OP_PUTPUBFH) ||
+ (op->opnum == OP_RENEW) ||
+ (op->opnum == OP_SETCLIENTID) ||
+ (op->opnum == OP_RELEASE_LOCKOWNER))) {
+ op->status = nfserr_moved;
goto encode_op;
}
switch (op->opnum) {
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 5be0043..41fc241 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -60,6 +60,14 @@
#define NFSDDBG_FACILITY NFSDDBG_XDR
+/*
+ * As per referral draft, the fsid for a referral MUST be different from the fsid of the containing
+ * directory in order to indicate to the client that a filesystem boundary is present
+ * We use a fixed fsid for a referral
+ */
+#define NFS4_REFERRAL_FSID_MAJOR 0x8000000ULL
+#define NFS4_REFERRAL_FSID_MINOR 0x8000000ULL
+
static int
check_filename(char *str, int len, int err)
{
@@ -926,26 +934,26 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__);
goto xdr_error;
}
- write->wr_vec[0].iov_base = p;
- write->wr_vec[0].iov_len = avail;
+ argp->rqstp->rq_vec[0].iov_base = p;
+ argp->rqstp->rq_vec[0].iov_len = avail;
v = 0;
len = write->wr_buflen;
- while (len > write->wr_vec[v].iov_len) {
- len -= write->wr_vec[v].iov_len;
+ while (len > argp->rqstp->rq_vec[v].iov_len) {
+ len -= argp->rqstp->rq_vec[v].iov_len;
v++;
- write->wr_vec[v].iov_base = page_address(argp->pagelist[0]);
+ argp->rqstp->rq_vec[v].iov_base = page_address(argp->pagelist[0]);
argp->pagelist++;
if (argp->pagelen >= PAGE_SIZE) {
- write->wr_vec[v].iov_len = PAGE_SIZE;
+ argp->rqstp->rq_vec[v].iov_len = PAGE_SIZE;
argp->pagelen -= PAGE_SIZE;
} else {
- write->wr_vec[v].iov_len = argp->pagelen;
+ argp->rqstp->rq_vec[v].iov_len = argp->pagelen;
argp->pagelen -= len;
}
}
- argp->end = (u32*) (write->wr_vec[v].iov_base + write->wr_vec[v].iov_len);
- argp->p = (u32*) (write->wr_vec[v].iov_base + (XDR_QUADLEN(len) << 2));
- write->wr_vec[v].iov_len = len;
+ argp->end = (u32*) (argp->rqstp->rq_vec[v].iov_base + argp->rqstp->rq_vec[v].iov_len);
+ argp->p = (u32*) (argp->rqstp->rq_vec[v].iov_base + (XDR_QUADLEN(len) << 2));
+ argp->rqstp->rq_vec[v].iov_len = len;
write->wr_vlen = v+1;
DECODE_TAIL;
@@ -1223,6 +1231,119 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
stateowner->so_replay.rp_buflen); \
} } while (0);
+/* Encode as an array of strings the string given with components
+ * seperated @sep.
+ */
+static int nfsd4_encode_components(char sep, char *components,
+ u32 **pp, int *buflen)
+{
+ u32 *p = *pp;
+ u32 *countp = p;
+ int strlen, count=0;
+ char *str, *end;
+
+ dprintk("nfsd4_encode_components(%s)\n", components);
+ if ((*buflen -= 4) < 0)
+ return nfserr_resource;
+ WRITE32(0); /* We will fill this in with @count later */
+ end = str = components;
+ while (*end) {
+ for (; *end && (*end != sep); end++)
+ ; /* Point to end of component */
+ strlen = end - str;
+ if (strlen) {
+ if ((*buflen -= ((XDR_QUADLEN(strlen) << 2) + 4)) < 0)
+ return nfserr_resource;
+ WRITE32(strlen);
+ WRITEMEM(str, strlen);
+ count++;
+ }
+ else
+ end++;
+ str = end;
+ }
+ *pp = p;
+ p = countp;
+ WRITE32(count);
+ return 0;
+}
+
+/*
+ * encode a location element of a fs_locations structure
+ */
+static int nfsd4_encode_fs_location4(struct nfsd4_fs_location *location,
+ u32 **pp, int *buflen)
+{
+ int status;
+ u32 *p = *pp;
+
+ status = nfsd4_encode_components(':', location->hosts, &p, buflen);
+ if (status)
+ return status;
+ status = nfsd4_encode_components('/', location->path, &p, buflen);
+ if (status)
+ return status;
+ *pp = p;
+ return 0;
+}
+
+/*
+ * Return the path to an export point in the pseudo filesystem namespace
+ * Returned string is safe to use as long as the caller holds a reference
+ * to @exp.
+ */
+static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp)
+{
+ struct svc_fh tmp_fh;
+ char *path, *rootpath;
+ int stat;
+
+ fh_init(&tmp_fh, NFS4_FHSIZE);
+ stat = exp_pseudoroot(rqstp->rq_client, &tmp_fh, &rqstp->rq_chandle);
+ if (stat)
+ return ERR_PTR(stat);
+ rootpath = tmp_fh.fh_export->ex_path;
+
+ path = exp->ex_path;
+
+ if (strncmp(path, rootpath, strlen(rootpath))) {
+ printk("nfsd: fs_locations failed;"
+ "%s is not contained in %s\n", path, rootpath);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ return path + strlen(rootpath);
+}
+
+/*
+ * encode a fs_locations structure
+ */
+static int nfsd4_encode_fs_locations(struct svc_rqst *rqstp,
+ struct svc_export *exp,
+ u32 **pp, int *buflen)
+{
+ int status, i;
+ u32 *p = *pp;
+ struct nfsd4_fs_locations *fslocs = &exp->ex_fslocs;
+ char *root = nfsd4_path(rqstp, exp);
+
+ if (IS_ERR(root))
+ return PTR_ERR(root);
+ status = nfsd4_encode_components('/', root, &p, buflen);
+ if (status)
+ return status;
+ if ((*buflen -= 4) < 0)
+ return nfserr_resource;
+ WRITE32(fslocs->locations_count);
+ for (i=0; i<fslocs->locations_count; i++) {
+ status = nfsd4_encode_fs_location4(&fslocs->locations[i],
+ &p, buflen);
+ if (status)
+ return status;
+ }
+ *pp = p;
+ return 0;
+}
static u32 nfs4_ftypes[16] = {
NF4BAD, NF4FIFO, NF4CHR, NF4BAD,
@@ -1272,6 +1393,25 @@ nfsd4_encode_aclname(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
return nfsd4_encode_name(rqstp, whotype, id, group, p, buflen);
}
+#define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \
+ FATTR4_WORD0_RDATTR_ERROR)
+#define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID
+
+static int fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err)
+{
+ /* As per referral draft: */
+ if (*bmval0 & ~WORD0_ABSENT_FS_ATTRS ||
+ *bmval1 & ~WORD1_ABSENT_FS_ATTRS) {
+ if (*bmval0 & FATTR4_WORD0_RDATTR_ERROR ||
+ *bmval0 & FATTR4_WORD0_FS_LOCATIONS)
+ *rdattr_err = NFSERR_MOVED;
+ else
+ return nfserr_moved;
+ }
+ *bmval0 &= WORD0_ABSENT_FS_ATTRS;
+ *bmval1 &= WORD1_ABSENT_FS_ATTRS;
+ return 0;
+}
/*
* Note: @fhp can be NULL; in this case, we might have to compose the filehandle
@@ -1294,6 +1434,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
u32 *attrlenp;
u32 dummy;
u64 dummy64;
+ u32 rdattr_err = 0;
u32 *p = buffer;
int status;
int aclsupport = 0;
@@ -1303,6 +1444,12 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
BUG_ON(bmval0 & ~NFSD_SUPPORTED_ATTRS_WORD0);
BUG_ON(bmval1 & ~NFSD_SUPPORTED_ATTRS_WORD1);
+ if (exp->ex_fslocs.migrated) {
+ status = fattr_handle_absent_fs(&bmval0, &bmval1, &rdattr_err);
+ if (status)
+ goto out;
+ }
+
status = vfs_getattr(exp->ex_mnt, dentry, &stat);
if (status)
goto out_nfserr;
@@ -1334,6 +1481,11 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
goto out_nfserr;
}
}
+ if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) {
+ if (exp->ex_fslocs.locations == NULL) {
+ bmval0 &= ~FATTR4_WORD0_FS_LOCATIONS;
+ }
+ }
if ((buflen -= 16) < 0)
goto out_resource;
@@ -1343,12 +1495,15 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
attrlenp = p++; /* to be backfilled later */
if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {
+ u32 word0 = NFSD_SUPPORTED_ATTRS_WORD0;
if ((buflen -= 12) < 0)
goto out_resource;
+ if (!aclsupport)
+ word0 &= ~FATTR4_WORD0_ACL;
+ if (!exp->ex_fslocs.locations)
+ word0 &= ~FATTR4_WORD0_FS_LOCATIONS;
WRITE32(2);
- WRITE32(aclsupport ?
- NFSD_SUPPORTED_ATTRS_WORD0 :
- NFSD_SUPPORTED_ATTRS_WORD0 & ~FATTR4_WORD0_ACL);
+ WRITE32(word0);
WRITE32(NFSD_SUPPORTED_ATTRS_WORD1);
}
if (bmval0 & FATTR4_WORD0_TYPE) {
@@ -1402,7 +1557,10 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
if (bmval0 & FATTR4_WORD0_FSID) {
if ((buflen -= 16) < 0)
goto out_resource;
- if (is_fsid(fhp, rqstp->rq_reffh)) {
+ if (exp->ex_fslocs.migrated) {
+ WRITE64(NFS4_REFERRAL_FSID_MAJOR);
+ WRITE64(NFS4_REFERRAL_FSID_MINOR);
+ } else if (is_fsid(fhp, rqstp->rq_reffh)) {
WRITE64((u64)exp->ex_fsid);
WRITE64((u64)0);
} else {
@@ -1425,7 +1583,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) {
if ((buflen -= 4) < 0)
goto out_resource;
- WRITE32(0);
+ WRITE32(rdattr_err);
}
if (bmval0 & FATTR4_WORD0_ACL) {
struct nfs4_ace *ace;
@@ -1513,6 +1671,13 @@ out_acl:
goto out_resource;
WRITE64((u64) statfs.f_files);
}
+ if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) {
+ status = nfsd4_encode_fs_locations(rqstp, exp, &p, &buflen);
+ if (status == nfserr_resource)
+ goto out_resource;
+ if (status)
+ goto out;
+ }
if (bmval0 & FATTR4_WORD0_HOMOGENEOUS) {
if ((buflen -= 4) < 0)
goto out_resource;
@@ -1536,12 +1701,12 @@ out_acl:
if (bmval0 & FATTR4_WORD0_MAXREAD) {
if ((buflen -= 8) < 0)
goto out_resource;
- WRITE64((u64) NFSSVC_MAXBLKSIZE);
+ WRITE64((u64) svc_max_payload(rqstp));
}
if (bmval0 & FATTR4_WORD0_MAXWRITE) {
if ((buflen -= 8) < 0)
goto out_resource;
- WRITE64((u64) NFSSVC_MAXBLKSIZE);
+ WRITE64((u64) svc_max_payload(rqstp));
}
if (bmval1 & FATTR4_WORD1_MODE) {
if ((buflen -= 4) < 0)
@@ -1845,7 +2010,6 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_ge
nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry,
resp->p, &buflen, getattr->ga_bmval,
resp->rqstp);
-
if (!nfserr)
resp->p += buflen;
return nfserr;
@@ -2039,7 +2203,8 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, int nfserr, struct n
}
static int
-nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_read *read)
+nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr,
+ struct nfsd4_read *read)
{
u32 eof;
int v, pn;
@@ -2054,31 +2219,33 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_read
RESERVE_SPACE(8); /* eof flag and byte count */
- maxcount = NFSSVC_MAXBLKSIZE;
+ maxcount = svc_max_payload(resp->rqstp);
if (maxcount > read->rd_length)
maxcount = read->rd_length;
len = maxcount;
v = 0;
while (len > 0) {
- pn = resp->rqstp->rq_resused;
- svc_take_page(resp->rqstp);
- read->rd_iov[v].iov_base = page_address(resp->rqstp->rq_respages[pn]);
- read->rd_iov[v].iov_len = len < PAGE_SIZE ? len : PAGE_SIZE;
+ pn = resp->rqstp->rq_resused++;
+ resp->rqstp->rq_vec[v].iov_base =
+ page_address(resp->rqstp->rq_respages[pn]);
+ resp->rqstp->rq_vec[v].iov_len =
+ len < PAGE_SIZE ? len : PAGE_SIZE;
v++;
len -= PAGE_SIZE;
}
read->rd_vlen = v;
nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp, read->rd_filp,
- read->rd_offset, read->rd_iov, read->rd_vlen,
+ read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen,
&maxcount);
if (nfserr == nfserr_symlink)
nfserr = nfserr_inval;
if (nfserr)
return nfserr;
- eof = (read->rd_offset + maxcount >= read->rd_fhp->fh_dentry->d_inode->i_size);
+ eof = (read->rd_offset + maxcount >=
+ read->rd_fhp->fh_dentry->d_inode->i_size);
WRITE32(eof);
WRITE32(maxcount);
@@ -2088,7 +2255,6 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_read
resp->xbuf->page_len = maxcount;
/* Use rest of head for padding and remaining ops: */
- resp->rqstp->rq_restailpage = 0;
resp->xbuf->tail[0].iov_base = p;
resp->xbuf->tail[0].iov_len = 0;
if (maxcount&3) {
@@ -2113,8 +2279,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_r
if (resp->xbuf->page_len)
return nfserr_resource;
- svc_take_page(resp->rqstp);
- page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
+ page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]);
maxcount = PAGE_SIZE;
RESERVE_SPACE(4);
@@ -2138,7 +2303,6 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_r
resp->xbuf->page_len = maxcount;
/* Use rest of head for padding and remaining ops: */
- resp->rqstp->rq_restailpage = 0;
resp->xbuf->tail[0].iov_base = p;
resp->xbuf->tail[0].iov_len = 0;
if (maxcount&3) {
@@ -2189,8 +2353,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_re
goto err_no_verf;
}
- svc_take_page(resp->rqstp);
- page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
+ page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]);
readdir->common.err = 0;
readdir->buflen = maxcount;
readdir->buffer = page;
@@ -2215,10 +2378,10 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_re
p = readdir->buffer;
*p++ = 0; /* no more entries */
*p++ = htonl(readdir->common.err == nfserr_eof);
- resp->xbuf->page_len = ((char*)p) - (char*)page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
+ resp->xbuf->page_len = ((char*)p) - (char*)page_address(
+ resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
/* Use rest of head for padding and remaining ops: */
- resp->rqstp->rq_restailpage = 0;
resp->xbuf->tail[0].iov_base = tailbase;
resp->xbuf->tail[0].iov_len = 0;
resp->p = resp->xbuf->tail[0].iov_base;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 5c6a477..39aed90 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -57,6 +57,7 @@ enum {
NFSD_Pool_Threads,
NFSD_Versions,
NFSD_Ports,
+ NFSD_MaxBlkSize,
/*
* The below MUST come last. Otherwise we leave a hole in nfsd_files[]
* with !CONFIG_NFSD_V4 and simple_fill_super() goes oops
@@ -82,6 +83,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size);
static ssize_t write_pool_threads(struct file *file, char *buf, size_t size);
static ssize_t write_versions(struct file *file, char *buf, size_t size);
static ssize_t write_ports(struct file *file, char *buf, size_t size);
+static ssize_t write_maxblksize(struct file *file, char *buf, size_t size);
#ifdef CONFIG_NFSD_V4
static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
@@ -100,6 +102,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
[NFSD_Pool_Threads] = write_pool_threads,
[NFSD_Versions] = write_versions,
[NFSD_Ports] = write_ports,
+ [NFSD_MaxBlkSize] = write_maxblksize,
#ifdef CONFIG_NFSD_V4
[NFSD_Leasetime] = write_leasetime,
[NFSD_RecoveryDir] = write_recoverydir,
@@ -523,18 +526,20 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
err = nfsd_create_serv();
if (!err) {
int proto = 0;
- err = lockd_up(proto);
- if (!err) {
- err = svc_addsock(nfsd_serv, fd, buf, &proto);
- if (err)
- lockd_down();
+ err = svc_addsock(nfsd_serv, fd, buf, &proto);
+ if (err >= 0) {
+ err = lockd_up(proto);
+ if (err < 0)
+ svc_sock_names(buf+strlen(buf)+1, nfsd_serv, buf);
}
/* Decrease the count, but don't shutdown the
* the service
*/
+ lock_kernel();
nfsd_serv->sv_nrthreads--;
+ unlock_kernel();
}
- return err;
+ return err < 0 ? err : 0;
}
if (buf[0] == '-') {
char *toclose = kstrdup(buf+1, GFP_KERNEL);
@@ -545,12 +550,43 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
if (nfsd_serv)
len = svc_sock_names(buf, nfsd_serv, toclose);
unlock_kernel();
+ if (len >= 0)
+ lockd_down();
kfree(toclose);
return len;
}
return -EINVAL;
}
+int nfsd_max_blksize;
+
+static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
+{
+ char *mesg = buf;
+ if (size > 0) {
+ int bsize;
+ int rv = get_int(&mesg, &bsize);
+ if (rv)
+ return rv;
+ /* force bsize into allowed range and
+ * required alignment.
+ */
+ if (bsize < 1024)
+ bsize = 1024;
+ if (bsize > NFSSVC_MAXBLKSIZE)
+ bsize = NFSSVC_MAXBLKSIZE;
+ bsize &= ~(1024-1);
+ lock_kernel();
+ if (nfsd_serv && nfsd_serv->sv_nrthreads) {
+ unlock_kernel();
+ return -EBUSY;
+ }
+ nfsd_max_blksize = bsize;
+ unlock_kernel();
+ }
+ return sprintf(buf, "%d\n", nfsd_max_blksize);
+}
+
#ifdef CONFIG_NFSD_V4
extern time_t nfs4_leasetime(void);
@@ -616,6 +652,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
[NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
+ [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
#ifdef CONFIG_NFSD_V4
[NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 06cd0db..9ee1dab 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -146,20 +146,20 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
* status, 17 words for fattr, and 1 word for the byte count.
*/
- if (NFSSVC_MAXBLKSIZE < argp->count) {
+ if (NFSSVC_MAXBLKSIZE_V2 < argp->count) {
printk(KERN_NOTICE
"oversized read request from %u.%u.%u.%u:%d (%d bytes)\n",
NIPQUAD(rqstp->rq_addr.sin_addr.s_addr),
ntohs(rqstp->rq_addr.sin_port),
argp->count);
- argp->count = NFSSVC_MAXBLKSIZE;
+ argp->count = NFSSVC_MAXBLKSIZE_V2;
}
svc_reserve(rqstp, (19<<2) + argp->count + 4);
resp->count = argp->count;
nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), NULL,
argp->offset,
- argp->vec, argp->vlen,
+ rqstp->rq_vec, argp->vlen,
&resp->count);
if (nfserr) return nfserr;
@@ -185,7 +185,7 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), NULL,
argp->offset,
- argp->vec, argp->vlen,
+ rqstp->rq_vec, argp->vlen,
argp->len,
&stable);
return nfsd_return_attrs(nfserr, resp);
@@ -225,7 +225,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
nfserr = nfserr_exist;
if (isdotent(argp->name, argp->len))
goto done;
- fh_lock(dirfhp);
+ fh_lock_nested(dirfhp, I_MUTEX_PARENT);
dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len);
if (IS_ERR(dchild)) {
nfserr = nfserrno(PTR_ERR(dchild));
@@ -553,7 +553,7 @@ static struct svc_procedure nfsd_procedures2[18] = {
PROC(none, void, void, none, RC_NOCACHE, ST),
PROC(lookup, diropargs, diropres, fhandle, RC_NOCACHE, ST+FH+AT),
PROC(readlink, readlinkargs, readlinkres, none, RC_NOCACHE, ST+1+NFS_MAXPATHLEN/4),
- PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE/4),
+ PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4),
PROC(none, void, void, none, RC_NOCACHE, ST),
PROC(write, writeargs, attrstat, fhandle, RC_REPLBUFF, ST+AT),
PROC(create, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT),
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 1944305..6fa6340 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -198,9 +198,26 @@ int nfsd_create_serv(void)
unlock_kernel();
return 0;
}
+ if (nfsd_max_blksize == 0) {
+ /* choose a suitable default */
+ struct sysinfo i;
+ si_meminfo(&i);
+ /* Aim for 1/4096 of memory per thread
+ * This gives 1MB on 4Gig machines
+ * But only uses 32K on 128M machines.
+ * Bottom out at 8K on 32M and smaller.
+ * Of course, this is only a default.
+ */
+ nfsd_max_blksize = NFSSVC_MAXBLKSIZE;
+ i.totalram <<= PAGE_SHIFT - 12;
+ while (nfsd_max_blksize > i.totalram &&
+ nfsd_max_blksize >= 8*1024*2)
+ nfsd_max_blksize /= 2;
+ }
atomic_set(&nfsd_busy, 0);
- nfsd_serv = svc_create_pooled(&nfsd_program, NFSD_BUFSIZE,
+ nfsd_serv = svc_create_pooled(&nfsd_program,
+ NFSD_BUFSIZE - NFSSVC_MAXBLKSIZE + nfsd_max_blksize,
nfsd_last_thread,
nfsd, SIG_NOCLEAN, THIS_MODULE);
if (nfsd_serv == NULL)
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 3f14a17..1135c0d 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -254,19 +254,18 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
len = args->count = ntohl(*p++);
p++; /* totalcount - unused */
- if (len > NFSSVC_MAXBLKSIZE)
- len = NFSSVC_MAXBLKSIZE;
+ if (len > NFSSVC_MAXBLKSIZE_V2)
+ len = NFSSVC_MAXBLKSIZE_V2;
/* set up somewhere to store response.
* We take pages, put them on reslist and include in iovec
*/
v=0;
while (len > 0) {
- pn=rqstp->rq_resused;
- svc_take_page(rqstp);
- args->vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
- args->vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE;
- len -= args->vec[v].iov_len;
+ pn = rqstp->rq_resused++;
+ rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
+ rqstp->rq_vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE;
+ len -= rqstp->rq_vec[v].iov_len;
v++;
}
args->vlen = v;
@@ -286,21 +285,21 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
args->offset = ntohl(*p++); /* offset */
p++; /* totalcount */
len = args->len = ntohl(*p++);
- args->vec[0].iov_base = (void*)p;
- args->vec[0].iov_len = rqstp->rq_arg.head[0].iov_len -
+ rqstp->rq_vec[0].iov_base = (void*)p;
+ rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len -
(((void*)p) - rqstp->rq_arg.head[0].iov_base);
- if (len > NFSSVC_MAXBLKSIZE)
- len = NFSSVC_MAXBLKSIZE;
+ if (len > NFSSVC_MAXBLKSIZE_V2)
+ len = NFSSVC_MAXBLKSIZE_V2;
v = 0;
- while (len > args->vec[v].iov_len) {
- len -= args->vec[v].iov_len;
+ while (len > rqstp->rq_vec[v].iov_len) {
+ len -= rqstp->rq_vec[v].iov_len;
v++;
- args->vec[v].iov_base = page_address(rqstp->rq_argpages[v]);
- args->vec[v].iov_len = PAGE_SIZE;
+ rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_pages[v]);
+ rqstp->rq_vec[v].iov_len = PAGE_SIZE;
}
- args->vec[v].iov_len = len;
+ rqstp->rq_vec[v].iov_len = len;
args->vlen = v+1;
- return args->vec[0].iov_len > 0;
+ return rqstp->rq_vec[0].iov_len > 0;
}
int
@@ -333,8 +332,7 @@ nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, u32 *p, struct nfsd_readlinka
{
if (!(p = decode_fh(p, &args->fh)))
return 0;
- svc_take_page(rqstp);
- args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]);
+ args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused++]);
return xdr_argsize_check(rqstp, p);
}
@@ -375,8 +373,7 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, u32 *p,
if (args->count > PAGE_SIZE)
args->count = PAGE_SIZE;
- svc_take_page(rqstp);
- args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]);
+ args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused++]);
return xdr_argsize_check(rqstp, p);
}
@@ -416,7 +413,6 @@ nfssvc_encode_readlinkres(struct svc_rqst *rqstp, u32 *p,
rqstp->rq_res.page_len = resp->len;
if (resp->len & 3) {
/* need to pad the tail */
- rqstp->rq_restailpage = 0;
rqstp->rq_res.tail[0].iov_base = p;
*p = 0;
rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3);
@@ -436,7 +432,6 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, u32 *p,
rqstp->rq_res.page_len = resp->count;
if (resp->count & 3) {
/* need to pad the tail */
- rqstp->rq_restailpage = 0;
rqstp->rq_res.tail[0].iov_base = p;
*p = 0;
rqstp->rq_res.tail[0].iov_len = 4 - (resp->count&3);
@@ -463,7 +458,7 @@ nfssvc_encode_statfsres(struct svc_rqst *rqstp, u32 *p,
{
struct kstatfs *stat = &resp->stats;
- *p++ = htonl(NFSSVC_MAXBLKSIZE); /* max transfer size */
+ *p++ = htonl(NFSSVC_MAXBLKSIZE_V2); /* max transfer size */
*p++ = htonl(stat->f_bsize);
*p++ = htonl(stat->f_blocks);
*p++ = htonl(stat->f_bfree);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 443ebc5..1141bd2 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -54,6 +54,7 @@
#include <linux/nfsd_idmap.h>
#include <linux/security.h>
#endif /* CONFIG_NFSD_V4 */
+#include <linux/jhash.h>
#include <asm/uaccess.h>
@@ -81,10 +82,19 @@ struct raparms {
dev_t p_dev;
int p_set;
struct file_ra_state p_ra;
+ unsigned int p_hindex;
};
+struct raparm_hbucket {
+ struct raparms *pb_head;
+ spinlock_t pb_lock;
+} ____cacheline_aligned_in_smp;
+
static struct raparms * raparml;
-static struct raparms * raparm_cache;
+#define RAPARM_HASH_BITS 4
+#define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS)
+#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1)
+static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE];
/*
* Called from nfsd_lookup and encode_dirent. Check if we have crossed
@@ -437,13 +447,11 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
} else if (error < 0)
goto out_nfserr;
- if (pacl) {
- error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS);
- if (error < 0)
- goto out_nfserr;
- }
+ error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS);
+ if (error < 0)
+ goto out_nfserr;
- if (dpacl) {
+ if (S_ISDIR(inode->i_mode)) {
error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT);
if (error < 0)
goto out_nfserr;
@@ -743,16 +751,20 @@ nfsd_sync_dir(struct dentry *dp)
* Obtain the readahead parameters for the file
* specified by (dev, ino).
*/
-static DEFINE_SPINLOCK(ra_lock);
static inline struct raparms *
nfsd_get_raparms(dev_t dev, ino_t ino)
{
struct raparms *ra, **rap, **frap = NULL;
int depth = 0;
+ unsigned int hash;
+ struct raparm_hbucket *rab;
- spin_lock(&ra_lock);
- for (rap = &raparm_cache; (ra = *rap); rap = &ra->p_next) {
+ hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK;
+ rab = &raparm_hash[hash];
+
+ spin_lock(&rab->pb_lock);
+ for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) {
if (ra->p_ino == ino && ra->p_dev == dev)
goto found;
depth++;
@@ -761,7 +773,7 @@ nfsd_get_raparms(dev_t dev, ino_t ino)
}
depth = nfsdstats.ra_size*11/10;
if (!frap) {
- spin_unlock(&ra_lock);
+ spin_unlock(&rab->pb_lock);
return NULL;
}
rap = frap;
@@ -769,15 +781,16 @@ nfsd_get_raparms(dev_t dev, ino_t ino)
ra->p_dev = dev;
ra->p_ino = ino;
ra->p_set = 0;
+ ra->p_hindex = hash;
found:
- if (rap != &raparm_cache) {
+ if (rap != &rab->pb_head) {
*rap = ra->p_next;
- ra->p_next = raparm_cache;
- raparm_cache = ra;
+ ra->p_next = rab->pb_head;
+ rab->pb_head = ra;
}
ra->p_count++;
nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
- spin_unlock(&ra_lock);
+ spin_unlock(&rab->pb_lock);
return ra;
}
@@ -791,22 +804,26 @@ nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset
{
unsigned long count = desc->count;
struct svc_rqst *rqstp = desc->arg.data;
+ struct page **pp = rqstp->rq_respages + rqstp->rq_resused;
if (size > count)
size = count;
if (rqstp->rq_res.page_len == 0) {
get_page(page);
- rqstp->rq_respages[rqstp->rq_resused++] = page;
+ put_page(*pp);
+ *pp = page;
+ rqstp->rq_resused++;
rqstp->rq_res.page_base = offset;
rqstp->rq_res.page_len = size;
- } else if (page != rqstp->rq_respages[rqstp->rq_resused-1]) {
+ } else if (page != pp[-1]) {
get_page(page);
- rqstp->rq_respages[rqstp->rq_resused++] = page;
+ put_page(*pp);
+ *pp = page;
+ rqstp->rq_resused++;
rqstp->rq_res.page_len += size;
- } else {
+ } else
rqstp->rq_res.page_len += size;
- }
desc->count = count - size;
desc->written += size;
@@ -837,7 +854,7 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
file->f_ra = ra->p_ra;
if (file->f_op->sendfile && rqstp->rq_sendfile_ok) {
- svc_pushback_unused_pages(rqstp);
+ rqstp->rq_resused = 1;
err = file->f_op->sendfile(file, &offset, *count,
nfsd_read_actor, rqstp);
} else {
@@ -849,11 +866,12 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
/* Write back readahead params */
if (ra) {
- spin_lock(&ra_lock);
+ struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
+ spin_lock(&rab->pb_lock);
ra->p_ra = file->f_ra;
ra->p_set = 1;
ra->p_count--;
- spin_unlock(&ra_lock);
+ spin_unlock(&rab->pb_lock);
}
if (err >= 0) {
@@ -1829,11 +1847,11 @@ nfsd_permission(struct svc_export *exp, struct dentry *dentry, int acc)
void
nfsd_racache_shutdown(void)
{
- if (!raparm_cache)
+ if (!raparml)
return;
dprintk("nfsd: freeing readahead buffers.\n");
kfree(raparml);
- raparm_cache = raparml = NULL;
+ raparml = NULL;
}
/*
* Initialize readahead param cache
@@ -1842,19 +1860,31 @@ int
nfsd_racache_init(int cache_size)
{
int i;
+ int j = 0;
+ int nperbucket;
+
- if (raparm_cache)
+ if (raparml)
return 0;
+ if (cache_size < 2*RAPARM_HASH_SIZE)
+ cache_size = 2*RAPARM_HASH_SIZE;
raparml = kmalloc(sizeof(struct raparms) * cache_size, GFP_KERNEL);
if (raparml != NULL) {
dprintk("nfsd: allocating %d readahead buffers.\n",
cache_size);
+ for (i = 0 ; i < RAPARM_HASH_SIZE ; i++) {
+ raparm_hash[i].pb_head = NULL;
+ spin_lock_init(&raparm_hash[i].pb_lock);
+ }
+ nperbucket = cache_size >> RAPARM_HASH_BITS;
memset(raparml, 0, sizeof(struct raparms) * cache_size);
for (i = 0; i < cache_size - 1; i++) {
- raparml[i].p_next = raparml + i + 1;
+ if (i % nperbucket == 0)
+ raparm_hash[j++].pb_head = raparml + i;
+ if (i % nperbucket < nperbucket-1)
+ raparml[i].p_next = raparml + i + 1;
}
- raparm_cache = raparml;
} else {
printk(KERN_WARNING
"nfsd: Could not allocate memory read-ahead cache.\n");