posix_acl: Inode acl caching fixes
authorAndreas Gruenbacher <agruenba@redhat.com>
Thu, 24 Mar 2016 13:38:37 +0000 (14:38 +0100)
committerAl Viro <viro@zeniv.linux.org.uk>
Thu, 31 Mar 2016 04:30:15 +0000 (00:30 -0400)
When get_acl() is called for an inode whose ACL is not cached yet, the
get_acl inode operation is called to fetch the ACL from the filesystem.
The inode operation is responsible for updating the cached acl with
set_cached_acl().  This is done without locking at the VFS level, so
another task can call set_cached_acl() or forget_cached_acl() before the
get_acl inode operation gets to calling set_cached_acl(), and then
get_acl's call to set_cached_acl() results in caching an outdate ACL.

Prevent this from happening by setting the cached ACL pointer to a
task-specific sentinel value before calling the get_acl inode operation.
Move the responsibility for updating the cached ACL from the get_acl
inode operations to get_acl().  There, only set the cached ACL if the
sentinel value hasn't changed.

The sentinel values are chosen to have odd values.  Likewise, the value
of ACL_NOT_CACHED is odd.  In contrast, ACL object pointers always have
an even value (ACLs are aligned in memory).  This allows to distinguish
uncached ACLs values from ACL objects.

In addition, switch from guarding inode->i_acl and inode->i_default_acl
upates by the inode->i_lock spinlock to using xchg() and cmpxchg().

Filesystems that do not want ACLs returned from their get_acl inode
operations to be cached must call forget_cached_acl() to prevent the VFS
from doing so.

(Patch written by Al Viro and Andreas Gruenbacher.)

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
17 files changed:
fs/9p/acl.c
fs/btrfs/acl.c
fs/ceph/acl.c
fs/ext2/acl.c
fs/ext4/acl.c
fs/f2fs/acl.c
fs/hfsplus/posix_acl.c
fs/inode.c
fs/jffs2/acl.c
fs/jfs/acl.c
fs/namei.c
fs/nfs/nfs3acl.c
fs/ocfs2/dlmglue.c
fs/posix_acl.c
fs/reiserfs/xattr_acl.c
fs/xfs/xfs_acl.c
include/linux/fs.h

index 9da967f..2d94e94 100644 (file)
@@ -93,7 +93,7 @@ static struct posix_acl *v9fs_get_cached_acl(struct inode *inode, int type)
         * instantiating the inode (v9fs_inode_from_fid)
         */
        acl = get_cached_acl(inode, type);
-       BUG_ON(acl == ACL_NOT_CACHED);
+       BUG_ON(is_uncached_acl(acl));
        return acl;
 }
 
index 6d263bb..67a6077 100644 (file)
@@ -63,9 +63,6 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
        }
        kfree(value);
 
-       if (!IS_ERR(acl))
-               set_cached_acl(inode, type, acl);
-
        return acl;
 }
 
index f197084..5457f21 100644 (file)
@@ -37,6 +37,8 @@ static inline void ceph_set_cached_acl(struct inode *inode,
        spin_lock(&ci->i_ceph_lock);
        if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0))
                set_cached_acl(inode, type, acl);
+       else
+               forget_cached_acl(inode, type);
        spin_unlock(&ci->i_ceph_lock);
 }
 
index 27695e6..42f1d18 100644 (file)
@@ -172,9 +172,6 @@ ext2_get_acl(struct inode *inode, int type)
                acl = ERR_PTR(retval);
        kfree(value);
 
-       if (!IS_ERR(acl))
-               set_cached_acl(inode, type, acl);
-
        return acl;
 }
 
index 69b1e73..c6601a4 100644 (file)
@@ -172,9 +172,6 @@ ext4_get_acl(struct inode *inode, int type)
                acl = ERR_PTR(retval);
        kfree(value);
 
-       if (!IS_ERR(acl))
-               set_cached_acl(inode, type, acl);
-
        return acl;
 }
 
index c8f25f7..6f1fdda 100644 (file)
@@ -190,9 +190,6 @@ static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type,
                acl = ERR_PTR(retval);
        kfree(value);
 
-       if (!IS_ERR(acl))
-               set_cached_acl(inode, type, acl);
-
        return acl;
 }
 
index afb33ed..ab7ea25 100644 (file)
@@ -48,9 +48,6 @@ struct posix_acl *hfsplus_get_posix_acl(struct inode *inode, int type)
 
        hfsplus_destroy_attr_entry((hfsplus_attr_entry *)value);
 
-       if (!IS_ERR(acl))
-               set_cached_acl(inode, type, acl);
-
        return acl;
 }
 
index 69b8b52..4202aac 100644 (file)
@@ -238,9 +238,9 @@ void __destroy_inode(struct inode *inode)
        }
 
 #ifdef CONFIG_FS_POSIX_ACL
-       if (inode->i_acl && inode->i_acl != ACL_NOT_CACHED)
+       if (inode->i_acl && !is_uncached_acl(inode->i_acl))
                posix_acl_release(inode->i_acl);
-       if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED)
+       if (inode->i_default_acl && !is_uncached_acl(inode->i_default_acl))
                posix_acl_release(inode->i_default_acl);
 #endif
        this_cpu_dec(nr_inodes);
index 2f7a3c0..bc2693d 100644 (file)
@@ -203,8 +203,6 @@ struct posix_acl *jffs2_get_acl(struct inode *inode, int type)
                acl = ERR_PTR(rc);
        }
        kfree(value);
-       if (!IS_ERR(acl))
-               set_cached_acl(inode, type, acl);
        return acl;
 }
 
index ab48828..21fa92b 100644 (file)
@@ -63,8 +63,6 @@ struct posix_acl *jfs_get_acl(struct inode *inode, int type)
                acl = posix_acl_from_xattr(&init_user_ns, value, size);
        }
        kfree(value);
-       if (!IS_ERR(acl))
-               set_cached_acl(inode, type, acl);
        return acl;
 }
 
index 794f81d..3498d53 100644 (file)
@@ -265,7 +265,7 @@ static int check_acl(struct inode *inode, int mask)
                if (!acl)
                        return -EAGAIN;
                /* no ->get_acl() calls in RCU mode... */
-               if (acl == ACL_NOT_CACHED)
+               if (is_uncached_acl(acl))
                        return -ECHILD;
                return posix_acl_permission(inode, acl, mask & ~MAY_NOT_BLOCK);
        }
index 17c0fa1..720d92f 100644 (file)
 
 #define NFSDBG_FACILITY        NFSDBG_PROC
 
+/*
+ * nfs3_prepare_get_acl, nfs3_complete_get_acl, nfs3_abort_get_acl: Helpers for
+ * caching get_acl results in a race-free way.  See fs/posix_acl.c:get_acl()
+ * for explanations.
+ */
+static void nfs3_prepare_get_acl(struct posix_acl **p)
+{
+       struct posix_acl *sentinel = uncached_acl_sentinel(current);
+
+       if (cmpxchg(p, ACL_NOT_CACHED, sentinel) != ACL_NOT_CACHED) {
+               /* Not the first reader or sentinel already in place. */
+       }
+}
+
+static void nfs3_complete_get_acl(struct posix_acl **p, struct posix_acl *acl)
+{
+       struct posix_acl *sentinel = uncached_acl_sentinel(current);
+
+       /* Only cache the ACL if our sentinel is still in place. */
+       posix_acl_dup(acl);
+       if (cmpxchg(p, sentinel, acl) != sentinel)
+               posix_acl_release(acl);
+}
+
+static void nfs3_abort_get_acl(struct posix_acl **p)
+{
+       struct posix_acl *sentinel = uncached_acl_sentinel(current);
+
+       /* Remove our sentinel upon failure. */
+       cmpxchg(p, sentinel, ACL_NOT_CACHED);
+}
+
 struct posix_acl *nfs3_get_acl(struct inode *inode, int type)
 {
        struct nfs_server *server = NFS_SERVER(inode);
@@ -55,6 +87,11 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type)
        if (res.fattr == NULL)
                return ERR_PTR(-ENOMEM);
 
+       if (args.mask & NFS_ACL)
+               nfs3_prepare_get_acl(&inode->i_acl);
+       if (args.mask & NFS_DFACL)
+               nfs3_prepare_get_acl(&inode->i_default_acl);
+
        status = rpc_call_sync(server->client_acl, &msg, 0);
        dprintk("NFS reply getacl: %d\n", status);
 
@@ -89,12 +126,12 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type)
        }
 
        if (res.mask & NFS_ACL)
-               set_cached_acl(inode, ACL_TYPE_ACCESS, res.acl_access);
+               nfs3_complete_get_acl(&inode->i_acl, res.acl_access);
        else
                forget_cached_acl(inode, ACL_TYPE_ACCESS);
 
        if (res.mask & NFS_DFACL)
-               set_cached_acl(inode, ACL_TYPE_DEFAULT, res.acl_default);
+               nfs3_complete_get_acl(&inode->i_default_acl, res.acl_default);
        else
                forget_cached_acl(inode, ACL_TYPE_DEFAULT);
 
@@ -108,6 +145,8 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type)
        }
 
 getout:
+       nfs3_abort_get_acl(&inode->i_acl);
+       nfs3_abort_get_acl(&inode->i_default_acl);
        posix_acl_release(res.acl_access);
        posix_acl_release(res.acl_default);
        nfs_free_fattr(res.fattr);
index 474e57f..1eaa910 100644 (file)
@@ -54,6 +54,7 @@
 #include "uptodate.h"
 #include "quota.h"
 #include "refcounttree.h"
+#include "acl.h"
 
 #include "buffer_head_io.h"
 
@@ -3623,6 +3624,8 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
                filemap_fdatawait(mapping);
        }
 
+       forget_all_cached_acls(inode);
+
 out:
        return UNBLOCK_CONTINUE;
 }
index 711dd51..bc6736d 100644 (file)
@@ -37,14 +37,18 @@ EXPORT_SYMBOL(acl_by_type);
 struct posix_acl *get_cached_acl(struct inode *inode, int type)
 {
        struct posix_acl **p = acl_by_type(inode, type);
-       struct posix_acl *acl = ACCESS_ONCE(*p);
-       if (acl) {
-               spin_lock(&inode->i_lock);
-               acl = *p;
-               if (acl != ACL_NOT_CACHED)
-                       acl = posix_acl_dup(acl);
-               spin_unlock(&inode->i_lock);
+       struct posix_acl *acl;
+
+       for (;;) {
+               rcu_read_lock();
+               acl = rcu_dereference(*p);
+               if (!acl || is_uncached_acl(acl) ||
+                   atomic_inc_not_zero(&acl->a_refcount))
+                       break;
+               rcu_read_unlock();
+               cpu_relax();
        }
+       rcu_read_unlock();
        return acl;
 }
 EXPORT_SYMBOL(get_cached_acl);
@@ -59,58 +63,72 @@ void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl)
 {
        struct posix_acl **p = acl_by_type(inode, type);
        struct posix_acl *old;
-       spin_lock(&inode->i_lock);
-       old = *p;
-       rcu_assign_pointer(*p, posix_acl_dup(acl));
-       spin_unlock(&inode->i_lock);
-       if (old != ACL_NOT_CACHED)
+
+       old = xchg(p, posix_acl_dup(acl));
+       if (!is_uncached_acl(old))
                posix_acl_release(old);
 }
 EXPORT_SYMBOL(set_cached_acl);
 
-void forget_cached_acl(struct inode *inode, int type)
+static void __forget_cached_acl(struct posix_acl **p)
 {
-       struct posix_acl **p = acl_by_type(inode, type);
        struct posix_acl *old;
-       spin_lock(&inode->i_lock);
-       old = *p;
-       *p = ACL_NOT_CACHED;
-       spin_unlock(&inode->i_lock);
-       if (old != ACL_NOT_CACHED)
+
+       old = xchg(p, ACL_NOT_CACHED);
+       if (!is_uncached_acl(old))
                posix_acl_release(old);
 }
+
+void forget_cached_acl(struct inode *inode, int type)
+{
+       __forget_cached_acl(acl_by_type(inode, type));
+}
 EXPORT_SYMBOL(forget_cached_acl);
 
 void forget_all_cached_acls(struct inode *inode)
 {
-       struct posix_acl *old_access, *old_default;
-       spin_lock(&inode->i_lock);
-       old_access = inode->i_acl;
-       old_default = inode->i_default_acl;
-       inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
-       spin_unlock(&inode->i_lock);
-       if (old_access != ACL_NOT_CACHED)
-               posix_acl_release(old_access);
-       if (old_default != ACL_NOT_CACHED)
-               posix_acl_release(old_default);
+       __forget_cached_acl(&inode->i_acl);
+       __forget_cached_acl(&inode->i_default_acl);
 }
 EXPORT_SYMBOL(forget_all_cached_acls);
 
 struct posix_acl *get_acl(struct inode *inode, int type)
 {
+       void *sentinel;
+       struct posix_acl **p;
        struct posix_acl *acl;
 
+       /*
+        * The sentinel is used to detect when another operation like
+        * set_cached_acl() or forget_cached_acl() races with get_acl().
+        * It is guaranteed that is_uncached_acl(sentinel) is true.
+        */
+
        acl = get_cached_acl(inode, type);
-       if (acl != ACL_NOT_CACHED)
+       if (!is_uncached_acl(acl))
                return acl;
 
        if (!IS_POSIXACL(inode))
                return NULL;
 
+       sentinel = uncached_acl_sentinel(current);
+       p = acl_by_type(inode, type);
+
        /*
-        * A filesystem can force a ACL callback by just never filling the
-        * ACL cache. But normally you'd fill the cache either at inode
-        * instantiation time, or on the first ->get_acl call.
+        * If the ACL isn't being read yet, set our sentinel.  Otherwise, the
+        * current value of the ACL will not be ACL_NOT_CACHED and so our own
+        * sentinel will not be set; another task will update the cache.  We
+        * could wait for that other task to complete its job, but it's easier
+        * to just call ->get_acl to fetch the ACL ourself.  (This is going to
+        * be an unlikely race.)
+        */
+       if (cmpxchg(p, ACL_NOT_CACHED, sentinel) != ACL_NOT_CACHED)
+               /* fall through */ ;
+
+       /*
+        * Normally, the ACL returned by ->get_acl will be cached.
+        * A filesystem can prevent that by calling
+        * forget_cached_acl(inode, type) in ->get_acl.
         *
         * If the filesystem doesn't have a get_acl() function at all, we'll
         * just create the negative cache entry.
@@ -119,7 +137,24 @@ struct posix_acl *get_acl(struct inode *inode, int type)
                set_cached_acl(inode, type, NULL);
                return NULL;
        }
-       return inode->i_op->get_acl(inode, type);
+       acl = inode->i_op->get_acl(inode, type);
+
+       if (IS_ERR(acl)) {
+               /*
+                * Remove our sentinel so that we don't block future attempts
+                * to cache the ACL.
+                */
+               cmpxchg(p, sentinel, ACL_NOT_CACHED);
+               return acl;
+       }
+
+       /*
+        * Cache the result, but only if our sentinel is still in place.
+        */
+       posix_acl_dup(acl);
+       if (unlikely(cmpxchg(p, sentinel, acl) != sentinel))
+               posix_acl_release(acl);
+       return acl;
 }
 EXPORT_SYMBOL(get_acl);
 
index ec74bbe..dbed42f 100644 (file)
@@ -197,10 +197,8 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
 
        size = reiserfs_xattr_get(inode, name, NULL, 0);
        if (size < 0) {
-               if (size == -ENODATA || size == -ENOSYS) {
-                       set_cached_acl(inode, type, NULL);
+               if (size == -ENODATA || size == -ENOSYS)
                        return NULL;
-               }
                return ERR_PTR(size);
        }
 
@@ -220,8 +218,6 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
        } else {
                acl = reiserfs_posix_acl_from_disk(value, retval);
        }
-       if (!IS_ERR(acl))
-               set_cached_acl(inode, type, acl);
 
        kfree(value);
        return acl;
index 2d5df1f..b6e527b 100644 (file)
@@ -158,22 +158,14 @@ xfs_get_acl(struct inode *inode, int type)
        if (error) {
                /*
                 * If the attribute doesn't exist make sure we have a negative
-                * cache entry, for any other error assume it is transient and
-                * leave the cache entry as ACL_NOT_CACHED.
+                * cache entry, for any other error assume it is transient.
                 */
-               if (error == -ENOATTR)
-                       goto out_update_cache;
-               acl = ERR_PTR(error);
-               goto out;
+               if (error != -ENOATTR)
+                       acl = ERR_PTR(error);
+       } else  {
+               acl = xfs_acl_from_disk(xfs_acl, len,
+                                       XFS_ACL_MAX_ENTRIES(ip->i_mount));
        }
-
-       acl = xfs_acl_from_disk(xfs_acl, len, XFS_ACL_MAX_ENTRIES(ip->i_mount));
-       if (IS_ERR(acl))
-               goto out;
-
-out_update_cache:
-       set_cached_acl(inode, type, acl);
-out:
        kmem_free(xfs_acl);
        return acl;
 }
index 14a9719..329ed37 100644 (file)
@@ -577,6 +577,18 @@ static inline void mapping_allow_writable(struct address_space *mapping)
 struct posix_acl;
 #define ACL_NOT_CACHED ((void *)(-1))
 
+static inline struct posix_acl *
+uncached_acl_sentinel(struct task_struct *task)
+{
+       return (void *)task + 1;
+}
+
+static inline bool
+is_uncached_acl(struct posix_acl *acl)
+{
+       return (long)acl & 1;
+}
+
 #define IOP_FASTPERM   0x0001
 #define IOP_LOOKUP     0x0002
 #define IOP_NOFOLLOW   0x0004