From 773e9b442693b250aa6c452cb0cf5a9343f51cef Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 7 Jun 2011 20:57:14 -0700 Subject: ceph: fix page alignment corrections dd if=/dev/urandom of=/mnt/fs_depot/dd10 bs=500 seek=8388 count=1 dd if=/mnt/fs_depot/dd10 of=/root/dd10out bs=500 skip=8388 count=1 Reported-by: Henry C Chang Signed-off-by: Sage Weil --- fs/ceph/file.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 9542f07..2be0f35 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -290,7 +290,6 @@ static int striped_read(struct inode *inode, struct ceph_inode_info *ci = ceph_inode(inode); u64 pos, this_len; int io_align, page_align; - int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ int left, pages_left; int read; struct page **page_pos; @@ -326,12 +325,11 @@ more: ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : ""); if (ret > 0) { - int didpages = - ((pos & ~PAGE_CACHE_MASK) + ret) >> PAGE_CACHE_SHIFT; + int didpages = (page_align + ret) >> PAGE_CACHE_SHIFT; if (read < pos - off) { dout(" zero gap %llu to %llu\n", off + read, pos); - ceph_zero_page_vector_range(page_off + read, + ceph_zero_page_vector_range(page_align + read, pos - off - read, pages); } pos += ret; @@ -356,7 +354,7 @@ more: left = inode->i_size - pos; dout("zero tail %d\n", left); - ceph_zero_page_vector_range(page_off + read, left, + ceph_zero_page_vector_range(page_align + read, left, pages); read += left; } -- cgit v1.1 From 9bb0ce2b0b734f3325ea5cd6b351856eeac94f78 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 13 Jun 2011 16:20:18 -0700 Subject: libceph: fix page calculation for non-page-aligned io Set the page count correctly for non-page-aligned IO. We were already doing this correctly for alignment, but not the page count. Fixes DIRECT_IO writes from unaligned pages. Signed-off-by: Sage Weil --- net/ceph/osd_client.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 9cb627a..7330c27 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -477,8 +477,9 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, calc_layout(osdc, vino, layout, off, plen, req, ops); req->r_file_layout = *layout; /* keep a copy */ - /* in case it differs from natural alignment that calc_layout - filled in for us */ + /* in case it differs from natural (file) alignment that + calc_layout filled in for us */ + req->r_num_pages = calc_pages_for(page_align, *plen); req->r_page_alignment = page_align; ceph_osdc_build_request(req, off, plen, ops, @@ -2027,8 +2028,9 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, int want = calc_pages_for(req->r_page_alignment, data_len); if (unlikely(req->r_num_pages < want)) { - pr_warning("tid %lld reply %d > expected %d pages\n", - tid, want, m->nr_pages); + pr_warning("tid %lld reply has %d bytes %d pages, we" + " had only %d pages ready\n", tid, data_len, + want, req->r_num_pages); *skip = 1; ceph_msg_put(m); m = NULL; -- cgit v1.1 From d7f124f129a6aea99938e0d4172c741b56fefeda Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 13 Jun 2011 16:22:18 -0700 Subject: ceph: fix sync and dio writes across stripe boundaries We were iterating across stripe boundaries properly, but not moving the write buffer pointer forward. This caused us to rewrite the same data after the break. Fix by adjusting the data pointer forward, and recalculating the io and buffer alignment after the break. Signed-off-by: Sage Weil --- fs/ceph/file.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 2be0f35..4698a5c 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -476,9 +476,6 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, else pos = *offset; - io_align = pos & ~PAGE_MASK; - buf_align = (unsigned long)data & ~PAGE_MASK; - ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left); if (ret < 0) return ret; @@ -502,6 +499,8 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, * boundary. this isn't atomic, unfortunately. :( */ more: + io_align = pos & ~PAGE_MASK; + buf_align = (unsigned long)data & ~PAGE_MASK; len = left; if (file->f_flags & O_DIRECT) { /* write from beginning of first page, regardless of @@ -591,6 +590,7 @@ out: pos += len; written += len; left -= len; + data += written; if (left) goto more; -- cgit v1.1