diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/bio.c | 2 | ||||
-rw-r--r-- | fs/block_dev.c | 7 | ||||
-rw-r--r-- | fs/coda/file.c | 9 | ||||
-rw-r--r-- | fs/exofs/osd.c | 4 | ||||
-rw-r--r-- | fs/pipe.c | 14 | ||||
-rw-r--r-- | fs/read_write.c | 7 | ||||
-rw-r--r-- | fs/splice.c | 338 |
7 files changed, 349 insertions, 32 deletions
@@ -1201,7 +1201,7 @@ static void bio_copy_kern_endio(struct bio *bio, int err) char *addr = page_address(bvec->bv_page); int len = bmd->iovecs[i].bv_len; - if (read && !err) + if (read) memcpy(p, addr, len); __free_page(bvec->bv_page); diff --git a/fs/block_dev.c b/fs/block_dev.c index f45dbc1..a85fe31 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -331,6 +331,12 @@ static int blkdev_readpage(struct file * file, struct page * page) return block_read_full_page(page, blkdev_get_block); } +static int blkdev_readpages(struct file *file, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) +{ + return mpage_readpages(mapping, pages, nr_pages, blkdev_get_block); +} + static int blkdev_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -1399,6 +1405,7 @@ static int blkdev_releasepage(struct page *page, gfp_t wait) static const struct address_space_operations def_blk_aops = { .readpage = blkdev_readpage, + .readpages = blkdev_readpages, .writepage = blkdev_writepage, .sync_page = block_sync_page, .write_begin = blkdev_write_begin, diff --git a/fs/coda/file.c b/fs/coda/file.c index 6a347fb..ffd4281 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -47,6 +47,8 @@ coda_file_splice_read(struct file *coda_file, loff_t *ppos, struct pipe_inode_info *pipe, size_t count, unsigned int flags) { + ssize_t (*splice_read)(struct file *, loff_t *, + struct pipe_inode_info *, size_t, unsigned int); struct coda_file_info *cfi; struct file *host_file; @@ -54,10 +56,11 @@ coda_file_splice_read(struct file *coda_file, loff_t *ppos, BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); host_file = cfi->cfi_container; - if (!host_file->f_op || !host_file->f_op->splice_read) - return -EINVAL; + splice_read = host_file->f_op->splice_read; + if (!splice_read) + splice_read = default_file_splice_read; - return host_file->f_op->splice_read(host_file, ppos, pipe, count,flags); + return splice_read(host_file, ppos, pipe, count, flags); } static ssize_t diff --git a/fs/exofs/osd.c b/fs/exofs/osd.c index b249ae9..06ca926 100644 --- a/fs/exofs/osd.c +++ b/fs/exofs/osd.c @@ -50,10 +50,10 @@ int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid) /* FIXME: should be include in osd_sense_info */ if (in_resid) - *in_resid = or->in.req ? or->in.req->data_len : 0; + *in_resid = or->in.req ? or->in.req->resid_len : 0; if (out_resid) - *out_resid = or->out.req ? or->out.req->data_len : 0; + *out_resid = or->out.req ? or->out.req->resid_len : 0; return ret; } @@ -302,6 +302,20 @@ int generic_pipe_buf_confirm(struct pipe_inode_info *info, return 0; } +/** + * generic_pipe_buf_release - put a reference to a &struct pipe_buffer + * @pipe: the pipe that the buffer belongs to + * @buf: the buffer to put a reference to + * + * Description: + * This function releases a reference to @buf. + */ +void generic_pipe_buf_release(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + page_cache_release(buf->page); +} + static const struct pipe_buf_operations anon_pipe_buf_ops = { .can_merge = 1, .map = generic_pipe_buf_map, diff --git a/fs/read_write.c b/fs/read_write.c index 9d1e76b..6c8c55d 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -805,12 +805,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, goto out; if (!(in_file->f_mode & FMODE_READ)) goto fput_in; - retval = -EINVAL; - in_inode = in_file->f_path.dentry->d_inode; - if (!in_inode) - goto fput_in; - if (!in_file->f_op || !in_file->f_op->splice_read) - goto fput_in; retval = -ESPIPE; if (!ppos) ppos = &in_file->f_pos; @@ -834,6 +828,7 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, retval = -EINVAL; if (!out_file->f_op || !out_file->f_op->sendpage) goto fput_out; + in_inode = in_file->f_path.dentry->d_inode; out_inode = out_file->f_path.dentry->d_inode; retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); if (retval < 0) diff --git a/fs/splice.c b/fs/splice.c index 666953d5..73766d2 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -507,9 +507,131 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, return ret; } - EXPORT_SYMBOL(generic_file_splice_read); +static const struct pipe_buf_operations default_pipe_buf_ops = { + .can_merge = 0, + .map = generic_pipe_buf_map, + .unmap = generic_pipe_buf_unmap, + .confirm = generic_pipe_buf_confirm, + .release = generic_pipe_buf_release, + .steal = generic_pipe_buf_steal, + .get = generic_pipe_buf_get, +}; + +static ssize_t kernel_readv(struct file *file, const struct iovec *vec, + unsigned long vlen, loff_t offset) +{ + mm_segment_t old_fs; + loff_t pos = offset; + ssize_t res; + + old_fs = get_fs(); + set_fs(get_ds()); + /* The cast to a user pointer is valid due to the set_fs() */ + res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos); + set_fs(old_fs); + + return res; +} + +static ssize_t kernel_write(struct file *file, const char *buf, size_t count, + loff_t pos) +{ + mm_segment_t old_fs; + ssize_t res; + + old_fs = get_fs(); + set_fs(get_ds()); + /* The cast to a user pointer is valid due to the set_fs() */ + res = vfs_write(file, (const char __user *)buf, count, &pos); + set_fs(old_fs); + + return res; +} + +ssize_t default_file_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags) +{ + unsigned int nr_pages; + unsigned int nr_freed; + size_t offset; + struct page *pages[PIPE_BUFFERS]; + struct partial_page partial[PIPE_BUFFERS]; + struct iovec vec[PIPE_BUFFERS]; + pgoff_t index; + ssize_t res; + size_t this_len; + int error; + int i; + struct splice_pipe_desc spd = { + .pages = pages, + .partial = partial, + .flags = flags, + .ops = &default_pipe_buf_ops, + .spd_release = spd_release_page, + }; + + index = *ppos >> PAGE_CACHE_SHIFT; + offset = *ppos & ~PAGE_CACHE_MASK; + nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + + for (i = 0; i < nr_pages && i < PIPE_BUFFERS && len; i++) { + struct page *page; + + page = alloc_page(GFP_USER); + error = -ENOMEM; + if (!page) + goto err; + + this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset); + vec[i].iov_base = (void __user *) page_address(page); + vec[i].iov_len = this_len; + pages[i] = page; + spd.nr_pages++; + len -= this_len; + offset = 0; + } + + res = kernel_readv(in, vec, spd.nr_pages, *ppos); + if (res < 0) { + error = res; + goto err; + } + + error = 0; + if (!res) + goto err; + + nr_freed = 0; + for (i = 0; i < spd.nr_pages; i++) { + this_len = min_t(size_t, vec[i].iov_len, res); + partial[i].offset = 0; + partial[i].len = this_len; + if (!this_len) { + __free_page(pages[i]); + pages[i] = NULL; + nr_freed++; + } + res -= this_len; + } + spd.nr_pages -= nr_freed; + + res = splice_to_pipe(pipe, &spd); + if (res > 0) + *ppos += res; + + return res; + +err: + for (i = 0; i < spd.nr_pages; i++) + __free_page(pages[i]); + + return error; +} +EXPORT_SYMBOL(default_file_splice_read); + /* * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' * using sendpage(). Return the number of bytes sent. @@ -881,6 +1003,36 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, EXPORT_SYMBOL(generic_file_splice_write); +static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf, + struct splice_desc *sd) +{ + int ret; + void *data; + + ret = buf->ops->confirm(pipe, buf); + if (ret) + return ret; + + data = buf->ops->map(pipe, buf, 0); + ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos); + buf->ops->unmap(pipe, buf, data); + + return ret; +} + +static ssize_t default_file_splice_write(struct pipe_inode_info *pipe, + struct file *out, loff_t *ppos, + size_t len, unsigned int flags) +{ + ssize_t ret; + + ret = splice_from_pipe(pipe, out, ppos, len, flags, write_pipe_buf); + if (ret > 0) + *ppos += ret; + + return ret; +} + /** * generic_splice_sendpage - splice data from a pipe to a socket * @pipe: pipe to splice from @@ -908,11 +1060,10 @@ EXPORT_SYMBOL(generic_splice_sendpage); static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { + ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, + loff_t *, size_t, unsigned int); int ret; - if (unlikely(!out->f_op || !out->f_op->splice_write)) - return -EINVAL; - if (unlikely(!(out->f_mode & FMODE_WRITE))) return -EBADF; @@ -923,7 +1074,11 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, if (unlikely(ret < 0)) return ret; - return out->f_op->splice_write(pipe, out, ppos, len, flags); + splice_write = out->f_op->splice_write; + if (!splice_write) + splice_write = default_file_splice_write; + + return splice_write(pipe, out, ppos, len, flags); } /* @@ -933,11 +1088,10 @@ static long do_splice_to(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { + ssize_t (*splice_read)(struct file *, loff_t *, + struct pipe_inode_info *, size_t, unsigned int); int ret; - if (unlikely(!in->f_op || !in->f_op->splice_read)) - return -EINVAL; - if (unlikely(!(in->f_mode & FMODE_READ))) return -EBADF; @@ -945,7 +1099,11 @@ static long do_splice_to(struct file *in, loff_t *ppos, if (unlikely(ret < 0)) return ret; - return in->f_op->splice_read(in, ppos, pipe, len, flags); + splice_read = in->f_op->splice_read; + if (!splice_read) + splice_read = default_file_splice_read; + + return splice_read(in, ppos, pipe, len, flags); } /** @@ -1112,6 +1270,9 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, return ret; } +static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, + struct pipe_inode_info *opipe, + size_t len, unsigned int flags); /* * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same * location, so checking ->i_pipe is not enough to verify that this is a @@ -1132,12 +1293,32 @@ static long do_splice(struct file *in, loff_t __user *off_in, struct file *out, loff_t __user *off_out, size_t len, unsigned int flags) { - struct pipe_inode_info *pipe; + struct pipe_inode_info *ipipe; + struct pipe_inode_info *opipe; loff_t offset, *off; long ret; - pipe = pipe_info(in->f_path.dentry->d_inode); - if (pipe) { + ipipe = pipe_info(in->f_path.dentry->d_inode); + opipe = pipe_info(out->f_path.dentry->d_inode); + + if (ipipe && opipe) { + if (off_in || off_out) + return -ESPIPE; + + if (!(in->f_mode & FMODE_READ)) + return -EBADF; + + if (!(out->f_mode & FMODE_WRITE)) + return -EBADF; + + /* Splicing to self would be fun, but... */ + if (ipipe == opipe) + return -EINVAL; + + return splice_pipe_to_pipe(ipipe, opipe, len, flags); + } + + if (ipipe) { if (off_in) return -ESPIPE; if (off_out) { @@ -1149,7 +1330,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, } else off = &out->f_pos; - ret = do_splice_from(pipe, out, off, len, flags); + ret = do_splice_from(ipipe, out, off, len, flags); if (off_out && copy_to_user(off_out, off, sizeof(loff_t))) ret = -EFAULT; @@ -1157,8 +1338,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, return ret; } - pipe = pipe_info(out->f_path.dentry->d_inode); - if (pipe) { + if (opipe) { if (off_out) return -ESPIPE; if (off_in) { @@ -1170,7 +1350,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, } else off = &in->f_pos; - ret = do_splice_to(in, off, pipe, len, flags); + ret = do_splice_to(in, off, opipe, len, flags); if (off_in && copy_to_user(off_in, off, sizeof(loff_t))) ret = -EFAULT; @@ -1511,7 +1691,7 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in, * Make sure there's data to read. Wait for input if we can, otherwise * return an appropriate error. */ -static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) +static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) { int ret; @@ -1549,7 +1729,7 @@ static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) * Make sure there's writeable room. Wait for room if we can, otherwise * return an appropriate error. */ -static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) +static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) { int ret; @@ -1587,6 +1767,124 @@ static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) } /* + * Splice contents of ipipe to opipe. + */ +static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, + struct pipe_inode_info *opipe, + size_t len, unsigned int flags) +{ + struct pipe_buffer *ibuf, *obuf; + int ret = 0, nbuf; + bool input_wakeup = false; + + +retry: + ret = ipipe_prep(ipipe, flags); + if (ret) + return ret; + + ret = opipe_prep(opipe, flags); + if (ret) + return ret; + + /* + * Potential ABBA deadlock, work around it by ordering lock + * grabbing by pipe info address. Otherwise two different processes + * could deadlock (one doing tee from A -> B, the other from B -> A). + */ + pipe_double_lock(ipipe, opipe); + + do { + if (!opipe->readers) { + send_sig(SIGPIPE, current, 0); + if (!ret) + ret = -EPIPE; + break; + } + + if (!ipipe->nrbufs && !ipipe->writers) + break; + + /* + * Cannot make any progress, because either the input + * pipe is empty or the output pipe is full. + */ + if (!ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS) { + /* Already processed some buffers, break */ + if (ret) + break; + + if (flags & SPLICE_F_NONBLOCK) { + ret = -EAGAIN; + break; + } + + /* + * We raced with another reader/writer and haven't + * managed to process any buffers. A zero return + * value means EOF, so retry instead. + */ + pipe_unlock(ipipe); + pipe_unlock(opipe); + goto retry; + } + + ibuf = ipipe->bufs + ipipe->curbuf; + nbuf = (opipe->curbuf + opipe->nrbufs) % PIPE_BUFFERS; + obuf = opipe->bufs + nbuf; + + if (len >= ibuf->len) { + /* + * Simply move the whole buffer from ipipe to opipe + */ + *obuf = *ibuf; + ibuf->ops = NULL; + opipe->nrbufs++; + ipipe->curbuf = (ipipe->curbuf + 1) % PIPE_BUFFERS; + ipipe->nrbufs--; + input_wakeup = true; + } else { + /* + * Get a reference to this pipe buffer, + * so we can copy the contents over. + */ + ibuf->ops->get(ipipe, ibuf); + *obuf = *ibuf; + + /* + * Don't inherit the gift flag, we need to + * prevent multiple steals of this page. + */ + obuf->flags &= ~PIPE_BUF_FLAG_GIFT; + + obuf->len = len; + opipe->nrbufs++; + ibuf->offset += obuf->len; + ibuf->len -= obuf->len; + } + ret += obuf->len; + len -= obuf->len; + } while (len); + + pipe_unlock(ipipe); + pipe_unlock(opipe); + + /* + * If we put data in the output pipe, wakeup any potential readers. + */ + if (ret > 0) { + smp_mb(); + if (waitqueue_active(&opipe->wait)) + wake_up_interruptible(&opipe->wait); + kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN); + } + if (input_wakeup) + wakeup_pipe_writers(ipipe); + + return ret; +} + +/* * Link contents of ipipe to opipe. */ static int link_pipe(struct pipe_inode_info *ipipe, @@ -1690,9 +1988,9 @@ static long do_tee(struct file *in, struct file *out, size_t len, * Keep going, unless we encounter an error. The ipipe/opipe * ordering doesn't really matter. */ - ret = link_ipipe_prep(ipipe, flags); + ret = ipipe_prep(ipipe, flags); if (!ret) { - ret = link_opipe_prep(opipe, flags); + ret = opipe_prep(opipe, flags); if (!ret) ret = link_pipe(ipipe, opipe, len, flags); } |