Merge branch 'misc' into test
[cascardo/linux.git] / fs / ceph / file.c
index e77c28c..7d0e4a8 100644 (file)
@@ -154,11 +154,13 @@ int ceph_open(struct inode *inode, struct file *file)
        }
 
        /*
-        * No need to block if we have any caps.  Update wanted set
+        * No need to block if we have caps on the auth MDS (for
+        * write) or any MDS (for read).  Update wanted set
         * asynchronously.
         */
        spin_lock(&inode->i_lock);
-       if (__ceph_is_any_real_caps(ci)) {
+       if (__ceph_is_any_real_caps(ci) &&
+           (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
                int mds_wanted = __ceph_caps_mds_wanted(ci);
                int issued = __ceph_caps_issued(ci, NULL);
 
@@ -280,11 +282,13 @@ int ceph_release(struct inode *inode, struct file *file)
 static int striped_read(struct inode *inode,
                        u64 off, u64 len,
                        struct page **pages, int num_pages,
-                       int *checkeof)
+                       int *checkeof, bool align_to_pages,
+                       unsigned long buf_align)
 {
        struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
        struct ceph_inode_info *ci = ceph_inode(inode);
        u64 pos, this_len;
+       int io_align, page_align;
        int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */
        int left, pages_left;
        int read;
@@ -300,14 +304,19 @@ static int striped_read(struct inode *inode,
        page_pos = pages;
        pages_left = num_pages;
        read = 0;
+       io_align = off & ~PAGE_MASK;
 
 more:
+       if (align_to_pages)
+               page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
+       else
+               page_align = pos & ~PAGE_MASK;
        this_len = left;
        ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
                                  &ci->i_layout, pos, &this_len,
                                  ci->i_truncate_seq,
                                  ci->i_truncate_size,
-                                 page_pos, pages_left);
+                                 page_pos, pages_left, page_align);
        hit_stripe = this_len < left;
        was_short = ret >= 0 && ret < this_len;
        if (ret == -ENOENT)
@@ -368,32 +377,34 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
        struct inode *inode = file->f_dentry->d_inode;
        struct page **pages;
        u64 off = *poff;
-       int num_pages = calc_pages_for(off, len);
-       int ret;
+       int num_pages, ret;
 
        dout("sync_read on file %p %llu~%u %s\n", file, off, len,
             (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
 
        if (file->f_flags & O_DIRECT) {
-               pages = ceph_get_direct_page_vector(data, num_pages, off, len);
-
-               /*
-                * flush any page cache pages in this range.  this
-                * will make concurrent normal and O_DIRECT io slow,
-                * but it will at least behave sensibly when they are
-                * in sequence.
-                */
+               num_pages = calc_pages_for((unsigned long)data, len);
+               pages = ceph_get_direct_page_vector(data, num_pages, true);
        } else {
+               num_pages = calc_pages_for(off, len);
                pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
        }
        if (IS_ERR(pages))
                return PTR_ERR(pages);
 
+       /*
+        * flush any page cache pages in this range.  this
+        * will make concurrent normal and sync io slow,
+        * but it will at least behave sensibly when they are
+        * in sequence.
+        */
        ret = filemap_write_and_wait(inode->i_mapping);
        if (ret < 0)
                goto done;
 
-       ret = striped_read(inode, off, len, pages, num_pages, checkeof);
+       ret = striped_read(inode, off, len, pages, num_pages, checkeof,
+                          file->f_flags & O_DIRECT,
+                          (unsigned long)data & ~PAGE_MASK);
 
        if (ret >= 0 && (file->f_flags & O_DIRECT) == 0)
                ret = ceph_copy_page_vector_to_user(pages, data, off, ret);
@@ -402,7 +413,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
 
 done:
        if (file->f_flags & O_DIRECT)
-               ceph_put_page_vector(pages, num_pages);
+               ceph_put_page_vector(pages, num_pages, true);
        else
                ceph_release_page_vector(pages, num_pages);
        dout("sync_read result %d\n", ret);
@@ -448,6 +459,8 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
        int flags;
        int do_sync = 0;
        int check_caps = 0;
+       int page_align, io_align;
+       unsigned long buf_align;
        int ret;
        struct timespec mtime = CURRENT_TIME;
 
@@ -462,6 +475,9 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
        else
                pos = *offset;
 
+       io_align = pos & ~PAGE_MASK;
+       buf_align = (unsigned long)data & ~PAGE_MASK;
+
        ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left);
        if (ret < 0)
                return ret;
@@ -486,20 +502,27 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
         */
 more:
        len = left;
+       if (file->f_flags & O_DIRECT) {
+               /* write from beginning of first page, regardless of
+                  io alignment */
+               page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
+               num_pages = calc_pages_for((unsigned long)data, len);
+       } else {
+               page_align = pos & ~PAGE_MASK;
+               num_pages = calc_pages_for(pos, len);
+       }
        req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
                                    ceph_vino(inode), pos, &len,
                                    CEPH_OSD_OP_WRITE, flags,
                                    ci->i_snap_realm->cached_context,
                                    do_sync,
                                    ci->i_truncate_seq, ci->i_truncate_size,
-                                   &mtime, false, 2);
+                                   &mtime, false, 2, page_align);
        if (!req)
                return -ENOMEM;
 
-       num_pages = calc_pages_for(pos, len);
-
        if (file->f_flags & O_DIRECT) {
-               pages = ceph_get_direct_page_vector(data, num_pages, pos, len);
+               pages = ceph_get_direct_page_vector(data, num_pages, false);
                if (IS_ERR(pages)) {
                        ret = PTR_ERR(pages);
                        goto out;
@@ -549,7 +572,7 @@ more:
        }
 
        if (file->f_flags & O_DIRECT)
-               ceph_put_page_vector(pages, num_pages);
+               ceph_put_page_vector(pages, num_pages, false);
        else if (file->f_flags & O_SYNC)
                ceph_release_page_vector(pages, num_pages);