Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso...
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 25 Jun 2015 21:06:55 +0000 (14:06 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 25 Jun 2015 21:06:55 +0000 (14:06 -0700)
Pull ext4 updates from Ted Ts'o:
 "A very large number of cleanups and bug fixes --- in particular for
  the ext4 encryption patches, which is a new feature added in the last
  merge window.  Also fix a number of long-standing xfstest failures.
  (Quota writes failing due to ENOSPC, a race between truncate and
  writepage in data=journalled mode that was causing generic/068 to
  fail, and other corner cases.)

  Also add support for FALLOC_FL_INSERT_RANGE, and improve jbd2
  performance eliminating locking when a buffer is modified more than
  once during a transaction (which is very common for allocation
  bitmaps, for example), in which case the state of the journalled
  buffer head doesn't need to change"

[ I renamed "ext4_follow_link()" to "ext4_encrypted_follow_link()" in
  the merge resolution, to make it clear that that function is _only_
  used for encrypted symlinks.  The function doesn't actually work for
  non-encrypted symlinks at all, and they use the generic helpers
                                         - Linus ]

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (52 commits)
  ext4: set lazytime on remount if MS_LAZYTIME is set by mount
  ext4: only call ext4_truncate when size <= isize
  ext4: make online defrag error reporting consistent
  ext4: minor cleanup of ext4_da_reserve_space()
  ext4: don't retry file block mapping on bigalloc fs with non-extent file
  ext4: prevent ext4_quota_write() from failing due to ENOSPC
  ext4: call sync_blockdev() before invalidate_bdev() in put_super()
  jbd2: speedup jbd2_journal_dirty_metadata()
  jbd2: get rid of open coded allocation retry loop
  ext4: improve warning directory handling messages
  jbd2: fix ocfs2 corrupt when updating journal superblock fails
  ext4: mballoc: avoid 20-argument function call
  ext4: wait for existing dio workers in ext4_alloc_file_blocks()
  ext4: recalculate journal credits as inode depth changes
  jbd2: use GFP_NOFS in jbd2_cleanup_journal_tail()
  ext4: use swap() in mext_page_double_lock()
  ext4: use swap() in memswap()
  ext4: fix race between truncate and __ext4_journalled_writepage()
  ext4 crypto: fail the mount if blocksize != pagesize
  ext4: Add support FALLOC_FL_INSERT_RANGE for fallocate
  ...

29 files changed:
fs/ext4/Kconfig
fs/ext4/balloc.c
fs/ext4/crypto.c
fs/ext4/crypto_fname.c
fs/ext4/crypto_key.c
fs/ext4/crypto_policy.c
fs/ext4/dir.c
fs/ext4/ext4.h
fs/ext4/ext4_crypto.h
fs/ext4/extents.c
fs/ext4/file.c
fs/ext4/ialloc.c
fs/ext4/indirect.c
fs/ext4/inline.c
fs/ext4/inode.c
fs/ext4/ioctl.c
fs/ext4/mballoc.c
fs/ext4/move_extent.c
fs/ext4/namei.c
fs/ext4/page-io.c
fs/ext4/readpage.c
fs/ext4/super.c
fs/ext4/symlink.c
fs/jbd2/checkpoint.c
fs/jbd2/journal.c
fs/jbd2/revoke.c
fs/jbd2/transaction.c
include/linux/jbd2.h
include/trace/events/ext4.h

index 024f228..bf8bc8a 100644 (file)
@@ -72,6 +72,7 @@ config EXT4_ENCRYPTION
        select CRYPTO_ECB
        select CRYPTO_XTS
        select CRYPTO_CTS
+       select CRYPTO_CTR
        select CRYPTO_SHA256
        select KEYS
        select ENCRYPTED_KEYS
index 955bf49..cd6ea29 100644 (file)
@@ -369,7 +369,7 @@ static void ext4_validate_block_bitmap(struct super_block *sb,
        struct ext4_group_info *grp = ext4_get_group_info(sb, block_group);
        struct ext4_sb_info *sbi = EXT4_SB(sb);
 
-       if (buffer_verified(bh))
+       if (buffer_verified(bh) || EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
                return;
 
        ext4_lock_group(sb, block_group);
@@ -446,7 +446,7 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
                unlock_buffer(bh);
                if (err)
                        ext4_error(sb, "Checksum bad for grp %u", block_group);
-               return bh;
+               goto verify;
        }
        ext4_unlock_group(sb, block_group);
        if (buffer_uptodate(bh)) {
index 8ff1527..4573155 100644 (file)
@@ -55,6 +55,9 @@ static mempool_t *ext4_bounce_page_pool;
 static LIST_HEAD(ext4_free_crypto_ctxs);
 static DEFINE_SPINLOCK(ext4_crypto_ctx_lock);
 
+static struct kmem_cache *ext4_crypto_ctx_cachep;
+struct kmem_cache *ext4_crypt_info_cachep;
+
 /**
  * ext4_release_crypto_ctx() - Releases an encryption context
  * @ctx: The encryption context to release.
@@ -68,18 +71,12 @@ void ext4_release_crypto_ctx(struct ext4_crypto_ctx *ctx)
 {
        unsigned long flags;
 
-       if (ctx->bounce_page) {
-               if (ctx->flags & EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL)
-                       __free_page(ctx->bounce_page);
-               else
-                       mempool_free(ctx->bounce_page, ext4_bounce_page_pool);
-               ctx->bounce_page = NULL;
-       }
-       ctx->control_page = NULL;
+       if (ctx->flags & EXT4_WRITE_PATH_FL && ctx->w.bounce_page)
+               mempool_free(ctx->w.bounce_page, ext4_bounce_page_pool);
+       ctx->w.bounce_page = NULL;
+       ctx->w.control_page = NULL;
        if (ctx->flags & EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL) {
-               if (ctx->tfm)
-                       crypto_free_tfm(ctx->tfm);
-               kfree(ctx);
+               kmem_cache_free(ext4_crypto_ctx_cachep, ctx);
        } else {
                spin_lock_irqsave(&ext4_crypto_ctx_lock, flags);
                list_add(&ctx->free_list, &ext4_free_crypto_ctxs);
@@ -87,23 +84,6 @@ void ext4_release_crypto_ctx(struct ext4_crypto_ctx *ctx)
        }
 }
 
-/**
- * ext4_alloc_and_init_crypto_ctx() - Allocates and inits an encryption context
- * @mask: The allocation mask.
- *
- * Return: An allocated and initialized encryption context on success. An error
- * value or NULL otherwise.
- */
-static struct ext4_crypto_ctx *ext4_alloc_and_init_crypto_ctx(gfp_t mask)
-{
-       struct ext4_crypto_ctx *ctx = kzalloc(sizeof(struct ext4_crypto_ctx),
-                                             mask);
-
-       if (!ctx)
-               return ERR_PTR(-ENOMEM);
-       return ctx;
-}
-
 /**
  * ext4_get_crypto_ctx() - Gets an encryption context
  * @inode:       The inode for which we are doing the crypto
@@ -118,10 +98,10 @@ struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode)
        struct ext4_crypto_ctx *ctx = NULL;
        int res = 0;
        unsigned long flags;
-       struct ext4_encryption_key *key = &EXT4_I(inode)->i_encryption_key;
+       struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
 
-       if (!ext4_read_workqueue)
-               ext4_init_crypto();
+       if (ci == NULL)
+               return ERR_PTR(-ENOKEY);
 
        /*
         * We first try getting the ctx from a free list because in
@@ -140,50 +120,16 @@ struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode)
                list_del(&ctx->free_list);
        spin_unlock_irqrestore(&ext4_crypto_ctx_lock, flags);
        if (!ctx) {
-               ctx = ext4_alloc_and_init_crypto_ctx(GFP_NOFS);
-               if (IS_ERR(ctx)) {
-                       res = PTR_ERR(ctx);
+               ctx = kmem_cache_zalloc(ext4_crypto_ctx_cachep, GFP_NOFS);
+               if (!ctx) {
+                       res = -ENOMEM;
                        goto out;
                }
                ctx->flags |= EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL;
        } else {
                ctx->flags &= ~EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL;
        }
-
-       /* Allocate a new Crypto API context if we don't already have
-        * one or if it isn't the right mode. */
-       BUG_ON(key->mode == EXT4_ENCRYPTION_MODE_INVALID);
-       if (ctx->tfm && (ctx->mode != key->mode)) {
-               crypto_free_tfm(ctx->tfm);
-               ctx->tfm = NULL;
-               ctx->mode = EXT4_ENCRYPTION_MODE_INVALID;
-       }
-       if (!ctx->tfm) {
-               switch (key->mode) {
-               case EXT4_ENCRYPTION_MODE_AES_256_XTS:
-                       ctx->tfm = crypto_ablkcipher_tfm(
-                               crypto_alloc_ablkcipher("xts(aes)", 0, 0));
-                       break;
-               case EXT4_ENCRYPTION_MODE_AES_256_GCM:
-                       /* TODO(mhalcrow): AEAD w/ gcm(aes);
-                        * crypto_aead_setauthsize() */
-                       ctx->tfm = ERR_PTR(-ENOTSUPP);
-                       break;
-               default:
-                       BUG();
-               }
-               if (IS_ERR_OR_NULL(ctx->tfm)) {
-                       res = PTR_ERR(ctx->tfm);
-                       ctx->tfm = NULL;
-                       goto out;
-               }
-               ctx->mode = key->mode;
-       }
-       BUG_ON(key->size != ext4_encryption_key_size(key->mode));
-
-       /* There shouldn't be a bounce page attached to the crypto
-        * context at this point. */
-       BUG_ON(ctx->bounce_page);
+       ctx->flags &= ~EXT4_WRITE_PATH_FL;
 
 out:
        if (res) {
@@ -204,20 +150,8 @@ void ext4_exit_crypto(void)
 {
        struct ext4_crypto_ctx *pos, *n;
 
-       list_for_each_entry_safe(pos, n, &ext4_free_crypto_ctxs, free_list) {
-               if (pos->bounce_page) {
-                       if (pos->flags &
-                           EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL) {
-                               __free_page(pos->bounce_page);
-                       } else {
-                               mempool_free(pos->bounce_page,
-                                            ext4_bounce_page_pool);
-                       }
-               }
-               if (pos->tfm)
-                       crypto_free_tfm(pos->tfm);
-               kfree(pos);
-       }
+       list_for_each_entry_safe(pos, n, &ext4_free_crypto_ctxs, free_list)
+               kmem_cache_free(ext4_crypto_ctx_cachep, pos);
        INIT_LIST_HEAD(&ext4_free_crypto_ctxs);
        if (ext4_bounce_page_pool)
                mempool_destroy(ext4_bounce_page_pool);
@@ -225,6 +159,12 @@ void ext4_exit_crypto(void)
        if (ext4_read_workqueue)
                destroy_workqueue(ext4_read_workqueue);
        ext4_read_workqueue = NULL;
+       if (ext4_crypto_ctx_cachep)
+               kmem_cache_destroy(ext4_crypto_ctx_cachep);
+       ext4_crypto_ctx_cachep = NULL;
+       if (ext4_crypt_info_cachep)
+               kmem_cache_destroy(ext4_crypt_info_cachep);
+       ext4_crypt_info_cachep = NULL;
 }
 
 /**
@@ -237,23 +177,31 @@ void ext4_exit_crypto(void)
  */
 int ext4_init_crypto(void)
 {
-       int i, res;
+       int i, res = -ENOMEM;
 
        mutex_lock(&crypto_init);
        if (ext4_read_workqueue)
                goto already_initialized;
        ext4_read_workqueue = alloc_workqueue("ext4_crypto", WQ_HIGHPRI, 0);
-       if (!ext4_read_workqueue) {
-               res = -ENOMEM;
+       if (!ext4_read_workqueue)
+               goto fail;
+
+       ext4_crypto_ctx_cachep = KMEM_CACHE(ext4_crypto_ctx,
+                                           SLAB_RECLAIM_ACCOUNT);
+       if (!ext4_crypto_ctx_cachep)
+               goto fail;
+
+       ext4_crypt_info_cachep = KMEM_CACHE(ext4_crypt_info,
+                                           SLAB_RECLAIM_ACCOUNT);
+       if (!ext4_crypt_info_cachep)
                goto fail;
-       }
 
        for (i = 0; i < num_prealloc_crypto_ctxs; i++) {
                struct ext4_crypto_ctx *ctx;
 
-               ctx = ext4_alloc_and_init_crypto_ctx(GFP_KERNEL);
-               if (IS_ERR(ctx)) {
-                       res = PTR_ERR(ctx);
+               ctx = kmem_cache_zalloc(ext4_crypto_ctx_cachep, GFP_NOFS);
+               if (!ctx) {
+                       res = -ENOMEM;
                        goto fail;
                }
                list_add(&ctx->free_list, &ext4_free_crypto_ctxs);
@@ -317,32 +265,11 @@ static int ext4_page_crypto(struct ext4_crypto_ctx *ctx,
        struct ablkcipher_request *req = NULL;
        DECLARE_EXT4_COMPLETION_RESULT(ecr);
        struct scatterlist dst, src;
-       struct ext4_inode_info *ei = EXT4_I(inode);
-       struct crypto_ablkcipher *atfm = __crypto_ablkcipher_cast(ctx->tfm);
+       struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
+       struct crypto_ablkcipher *tfm = ci->ci_ctfm;
        int res = 0;
 
-       BUG_ON(!ctx->tfm);
-       BUG_ON(ctx->mode != ei->i_encryption_key.mode);
-
-       if (ctx->mode != EXT4_ENCRYPTION_MODE_AES_256_XTS) {
-               printk_ratelimited(KERN_ERR
-                                  "%s: unsupported crypto algorithm: %d\n",
-                                  __func__, ctx->mode);
-               return -ENOTSUPP;
-       }
-
-       crypto_ablkcipher_clear_flags(atfm, ~0);
-       crypto_tfm_set_flags(ctx->tfm, CRYPTO_TFM_REQ_WEAK_KEY);
-
-       res = crypto_ablkcipher_setkey(atfm, ei->i_encryption_key.raw,
-                                      ei->i_encryption_key.size);
-       if (res) {
-               printk_ratelimited(KERN_ERR
-                                  "%s: crypto_ablkcipher_setkey() failed\n",
-                                  __func__);
-               return res;
-       }
-       req = ablkcipher_request_alloc(atfm, GFP_NOFS);
+       req = ablkcipher_request_alloc(tfm, GFP_NOFS);
        if (!req) {
                printk_ratelimited(KERN_ERR
                                   "%s: crypto_request_alloc() failed\n",
@@ -384,6 +311,15 @@ static int ext4_page_crypto(struct ext4_crypto_ctx *ctx,
        return 0;
 }
 
+static struct page *alloc_bounce_page(struct ext4_crypto_ctx *ctx)
+{
+       ctx->w.bounce_page = mempool_alloc(ext4_bounce_page_pool, GFP_NOWAIT);
+       if (ctx->w.bounce_page == NULL)
+               return ERR_PTR(-ENOMEM);
+       ctx->flags |= EXT4_WRITE_PATH_FL;
+       return ctx->w.bounce_page;
+}
+
 /**
  * ext4_encrypt() - Encrypts a page
  * @inode:          The inode for which the encryption should take place
@@ -413,27 +349,17 @@ struct page *ext4_encrypt(struct inode *inode,
                return (struct page *) ctx;
 
        /* The encryption operation will require a bounce page. */
-       ciphertext_page = alloc_page(GFP_NOFS);
-       if (!ciphertext_page) {
-               /* This is a potential bottleneck, but at least we'll have
-                * forward progress. */
-               ciphertext_page = mempool_alloc(ext4_bounce_page_pool,
-                                                GFP_NOFS);
-               if (WARN_ON_ONCE(!ciphertext_page)) {
-                       ciphertext_page = mempool_alloc(ext4_bounce_page_pool,
-                                                        GFP_NOFS | __GFP_WAIT);
-               }
-               ctx->flags &= ~EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
-       } else {
-               ctx->flags |= EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
-       }
-       ctx->bounce_page = ciphertext_page;
-       ctx->control_page = plaintext_page;
+       ciphertext_page = alloc_bounce_page(ctx);
+       if (IS_ERR(ciphertext_page))
+               goto errout;
+       ctx->w.control_page = plaintext_page;
        err = ext4_page_crypto(ctx, inode, EXT4_ENCRYPT, plaintext_page->index,
                               plaintext_page, ciphertext_page);
        if (err) {
+               ciphertext_page = ERR_PTR(err);
+       errout:
                ext4_release_crypto_ctx(ctx);
-               return ERR_PTR(err);
+               return ciphertext_page;
        }
        SetPagePrivate(ciphertext_page);
        set_page_private(ciphertext_page, (unsigned long)ctx);
@@ -470,8 +396,8 @@ int ext4_decrypt_one(struct inode *inode, struct page *page)
 
        struct ext4_crypto_ctx *ctx = ext4_get_crypto_ctx(inode);
 
-       if (!ctx)
-               return -ENOMEM;
+       if (IS_ERR(ctx))
+               return PTR_ERR(ctx);
        ret = ext4_decrypt(ctx, page);
        ext4_release_crypto_ctx(ctx);
        return ret;
@@ -493,21 +419,11 @@ int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
 
-       ciphertext_page = alloc_page(GFP_NOFS);
-       if (!ciphertext_page) {
-               /* This is a potential bottleneck, but at least we'll have
-                * forward progress. */
-               ciphertext_page = mempool_alloc(ext4_bounce_page_pool,
-                                                GFP_NOFS);
-               if (WARN_ON_ONCE(!ciphertext_page)) {
-                       ciphertext_page = mempool_alloc(ext4_bounce_page_pool,
-                                                        GFP_NOFS | __GFP_WAIT);
-               }
-               ctx->flags &= ~EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
-       } else {
-               ctx->flags |= EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
+       ciphertext_page = alloc_bounce_page(ctx);
+       if (IS_ERR(ciphertext_page)) {
+               err = PTR_ERR(ciphertext_page);
+               goto errout;
        }
-       ctx->bounce_page = ciphertext_page;
 
        while (len--) {
                err = ext4_page_crypto(ctx, inode, EXT4_ENCRYPT, lblk,
@@ -529,6 +445,7 @@ int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
                        goto errout;
                }
                err = submit_bio_wait(WRITE, bio);
+               bio_put(bio);
                if (err)
                        goto errout;
        }
index fded02f..7dc4eb5 100644 (file)
@@ -48,6 +48,12 @@ bool ext4_valid_filenames_enc_mode(uint32_t mode)
        return (mode == EXT4_ENCRYPTION_MODE_AES_256_CTS);
 }
 
+static unsigned max_name_len(struct inode *inode)
+{
+       return S_ISLNK(inode->i_mode) ? inode->i_sb->s_blocksize :
+               EXT4_NAME_LEN;
+}
+
 /**
  * ext4_fname_encrypt() -
  *
@@ -55,43 +61,52 @@ bool ext4_valid_filenames_enc_mode(uint32_t mode)
  * ciphertext. Errors are returned as negative numbers.  We trust the caller to
  * allocate sufficient memory to oname string.
  */
-static int ext4_fname_encrypt(struct ext4_fname_crypto_ctx *ctx,
+static int ext4_fname_encrypt(struct inode *inode,
                              const struct qstr *iname,
                              struct ext4_str *oname)
 {
        u32 ciphertext_len;
        struct ablkcipher_request *req = NULL;
        DECLARE_EXT4_COMPLETION_RESULT(ecr);
-       struct crypto_ablkcipher *tfm = ctx->ctfm;
+       struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
+       struct crypto_ablkcipher *tfm = ci->ci_ctfm;
        int res = 0;
        char iv[EXT4_CRYPTO_BLOCK_SIZE];
-       struct scatterlist sg[1];
-       int padding = 4 << (ctx->flags & EXT4_POLICY_FLAGS_PAD_MASK);
-       char *workbuf;
+       struct scatterlist src_sg, dst_sg;
+       int padding = 4 << (ci->ci_flags & EXT4_POLICY_FLAGS_PAD_MASK);
+       char *workbuf, buf[32], *alloc_buf = NULL;
+       unsigned lim = max_name_len(inode);
 
-       if (iname->len <= 0 || iname->len > ctx->lim)
+       if (iname->len <= 0 || iname->len > lim)
                return -EIO;
 
        ciphertext_len = (iname->len < EXT4_CRYPTO_BLOCK_SIZE) ?
                EXT4_CRYPTO_BLOCK_SIZE : iname->len;
        ciphertext_len = ext4_fname_crypto_round_up(ciphertext_len, padding);
-       ciphertext_len = (ciphertext_len > ctx->lim)
-                       ? ctx->lim : ciphertext_len;
+       ciphertext_len = (ciphertext_len > lim)
+                       ? lim : ciphertext_len;
+
+       if (ciphertext_len <= sizeof(buf)) {
+               workbuf = buf;
+       } else {
+               alloc_buf = kmalloc(ciphertext_len, GFP_NOFS);
+               if (!alloc_buf)
+                       return -ENOMEM;
+               workbuf = alloc_buf;
+       }
 
        /* Allocate request */
        req = ablkcipher_request_alloc(tfm, GFP_NOFS);
        if (!req) {
                printk_ratelimited(
                    KERN_ERR "%s: crypto_request_alloc() failed\n", __func__);
+               kfree(alloc_buf);
                return -ENOMEM;
        }
        ablkcipher_request_set_callback(req,
                CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
                ext4_dir_crypt_complete, &ecr);
 
-       /* Map the workpage */
-       workbuf = kmap(ctx->workpage);
-
        /* Copy the input */
        memcpy(workbuf, iname->name, iname->len);
        if (iname->len < ciphertext_len)
@@ -101,21 +116,16 @@ static int ext4_fname_encrypt(struct ext4_fname_crypto_ctx *ctx,
        memset(iv, 0, EXT4_CRYPTO_BLOCK_SIZE);
 
        /* Create encryption request */
-       sg_init_table(sg, 1);
-       sg_set_page(sg, ctx->workpage, PAGE_SIZE, 0);
-       ablkcipher_request_set_crypt(req, sg, sg, ciphertext_len, iv);
+       sg_init_one(&src_sg, workbuf, ciphertext_len);
+       sg_init_one(&dst_sg, oname->name, ciphertext_len);
+       ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, ciphertext_len, iv);
        res = crypto_ablkcipher_encrypt(req);
        if (res == -EINPROGRESS || res == -EBUSY) {
                BUG_ON(req->base.data != &ecr);
                wait_for_completion(&ecr.completion);
                res = ecr.res;
        }
-       if (res >= 0) {
-               /* Copy the result to output */
-               memcpy(oname->name, workbuf, ciphertext_len);
-               res = ciphertext_len;
-       }
-       kunmap(ctx->workpage);
+       kfree(alloc_buf);
        ablkcipher_request_free(req);
        if (res < 0) {
                printk_ratelimited(
@@ -132,20 +142,21 @@ static int ext4_fname_encrypt(struct ext4_fname_crypto_ctx *ctx,
  *     Errors are returned as negative numbers.
  *     We trust the caller to allocate sufficient memory to oname string.
  */
-static int ext4_fname_decrypt(struct ext4_fname_crypto_ctx *ctx,
+static int ext4_fname_decrypt(struct inode *inode,
                              const struct ext4_str *iname,
                              struct ext4_str *oname)
 {
        struct ext4_str tmp_in[2], tmp_out[1];
        struct ablkcipher_request *req = NULL;
        DECLARE_EXT4_COMPLETION_RESULT(ecr);
-       struct scatterlist sg[1];
-       struct crypto_ablkcipher *tfm = ctx->ctfm;
+       struct scatterlist src_sg, dst_sg;
+       struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
+       struct crypto_ablkcipher *tfm = ci->ci_ctfm;
        int res = 0;
        char iv[EXT4_CRYPTO_BLOCK_SIZE];
-       char *workbuf;
+       unsigned lim = max_name_len(inode);
 
-       if (iname->len <= 0 || iname->len > ctx->lim)
+       if (iname->len <= 0 || iname->len > lim)
                return -EIO;
 
        tmp_in[0].name = iname->name;
@@ -163,31 +174,19 @@ static int ext4_fname_decrypt(struct ext4_fname_crypto_ctx *ctx,
                CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
                ext4_dir_crypt_complete, &ecr);
 
-       /* Map the workpage */
-       workbuf = kmap(ctx->workpage);
-
-       /* Copy the input */
-       memcpy(workbuf, iname->name, iname->len);
-
        /* Initialize IV */
        memset(iv, 0, EXT4_CRYPTO_BLOCK_SIZE);
 
        /* Create encryption request */
-       sg_init_table(sg, 1);
-       sg_set_page(sg, ctx->workpage, PAGE_SIZE, 0);
-       ablkcipher_request_set_crypt(req, sg, sg, iname->len, iv);
+       sg_init_one(&src_sg, iname->name, iname->len);
+       sg_init_one(&dst_sg, oname->name, oname->len);
+       ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv);
        res = crypto_ablkcipher_decrypt(req);
        if (res == -EINPROGRESS || res == -EBUSY) {
                BUG_ON(req->base.data != &ecr);
                wait_for_completion(&ecr.completion);
                res = ecr.res;
        }
-       if (res >= 0) {
-               /* Copy the result to output */
-               memcpy(oname->name, workbuf, iname->len);
-               res = iname->len;
-       }
-       kunmap(ctx->workpage);
        ablkcipher_request_free(req);
        if (res < 0) {
                printk_ratelimited(
@@ -253,207 +252,6 @@ static int digest_decode(const char *src, int len, char *dst)
        return cp - dst;
 }
 
-/**
- * ext4_free_fname_crypto_ctx() -
- *
- * Frees up a crypto context.
- */
-void ext4_free_fname_crypto_ctx(struct ext4_fname_crypto_ctx *ctx)
-{
-       if (ctx == NULL || IS_ERR(ctx))
-               return;
-
-       if (ctx->ctfm && !IS_ERR(ctx->ctfm))
-               crypto_free_ablkcipher(ctx->ctfm);
-       if (ctx->htfm && !IS_ERR(ctx->htfm))
-               crypto_free_hash(ctx->htfm);
-       if (ctx->workpage && !IS_ERR(ctx->workpage))
-               __free_page(ctx->workpage);
-       kfree(ctx);
-}
-
-/**
- * ext4_put_fname_crypto_ctx() -
- *
- * Return: The crypto context onto free list. If the free list is above a
- * threshold, completely frees up the context, and returns the memory.
- *
- * TODO: Currently we directly free the crypto context. Eventually we should
- * add code it to return to free list. Such an approach will increase
- * efficiency of directory lookup.
- */
-void ext4_put_fname_crypto_ctx(struct ext4_fname_crypto_ctx **ctx)
-{
-       if (*ctx == NULL || IS_ERR(*ctx))
-               return;
-       ext4_free_fname_crypto_ctx(*ctx);
-       *ctx = NULL;
-}
-
-/**
- * ext4_search_fname_crypto_ctx() -
- */
-static struct ext4_fname_crypto_ctx *ext4_search_fname_crypto_ctx(
-               const struct ext4_encryption_key *key)
-{
-       return NULL;
-}
-
-/**
- * ext4_alloc_fname_crypto_ctx() -
- */
-struct ext4_fname_crypto_ctx *ext4_alloc_fname_crypto_ctx(
-       const struct ext4_encryption_key *key)
-{
-       struct ext4_fname_crypto_ctx *ctx;
-
-       ctx = kmalloc(sizeof(struct ext4_fname_crypto_ctx), GFP_NOFS);
-       if (ctx == NULL)
-               return ERR_PTR(-ENOMEM);
-       if (key->mode == EXT4_ENCRYPTION_MODE_INVALID) {
-               /* This will automatically set key mode to invalid
-                * As enum for ENCRYPTION_MODE_INVALID is zero */
-               memset(&ctx->key, 0, sizeof(ctx->key));
-       } else {
-               memcpy(&ctx->key, key, sizeof(struct ext4_encryption_key));
-       }
-       ctx->has_valid_key = (EXT4_ENCRYPTION_MODE_INVALID == key->mode)
-               ? 0 : 1;
-       ctx->ctfm_key_is_ready = 0;
-       ctx->ctfm = NULL;
-       ctx->htfm = NULL;
-       ctx->workpage = NULL;
-       return ctx;
-}
-
-/**
- * ext4_get_fname_crypto_ctx() -
- *
- * Allocates a free crypto context and initializes it to hold
- * the crypto material for the inode.
- *
- * Return: NULL if not encrypted. Error value on error. Valid pointer otherwise.
- */
-struct ext4_fname_crypto_ctx *ext4_get_fname_crypto_ctx(
-       struct inode *inode, u32 max_ciphertext_len)
-{
-       struct ext4_fname_crypto_ctx *ctx;
-       struct ext4_inode_info *ei = EXT4_I(inode);
-       int res;
-
-       /* Check if the crypto policy is set on the inode */
-       res = ext4_encrypted_inode(inode);
-       if (res == 0)
-               return NULL;
-
-       if (!ext4_has_encryption_key(inode))
-               ext4_generate_encryption_key(inode);
-
-       /* Get a crypto context based on the key.
-        * A new context is allocated if no context matches the requested key.
-        */
-       ctx = ext4_search_fname_crypto_ctx(&(ei->i_encryption_key));
-       if (ctx == NULL)
-               ctx = ext4_alloc_fname_crypto_ctx(&(ei->i_encryption_key));
-       if (IS_ERR(ctx))
-               return ctx;
-
-       ctx->flags = ei->i_crypt_policy_flags;
-       if (ctx->has_valid_key) {
-               if (ctx->key.mode != EXT4_ENCRYPTION_MODE_AES_256_CTS) {
-                       printk_once(KERN_WARNING
-                                   "ext4: unsupported key mode %d\n",
-                                   ctx->key.mode);
-                       return ERR_PTR(-ENOKEY);
-               }
-
-               /* As a first cut, we will allocate new tfm in every call.
-                * later, we will keep the tfm around, in case the key gets
-                * re-used */
-               if (ctx->ctfm == NULL) {
-                       ctx->ctfm = crypto_alloc_ablkcipher("cts(cbc(aes))",
-                                       0, 0);
-               }
-               if (IS_ERR(ctx->ctfm)) {
-                       res = PTR_ERR(ctx->ctfm);
-                       printk(
-                           KERN_DEBUG "%s: error (%d) allocating crypto tfm\n",
-                           __func__, res);
-                       ctx->ctfm = NULL;
-                       ext4_put_fname_crypto_ctx(&ctx);
-                       return ERR_PTR(res);
-               }
-               if (ctx->ctfm == NULL) {
-                       printk(
-                           KERN_DEBUG "%s: could not allocate crypto tfm\n",
-                           __func__);
-                       ext4_put_fname_crypto_ctx(&ctx);
-                       return ERR_PTR(-ENOMEM);
-               }
-               if (ctx->workpage == NULL)
-                       ctx->workpage = alloc_page(GFP_NOFS);
-               if (IS_ERR(ctx->workpage)) {
-                       res = PTR_ERR(ctx->workpage);
-                       printk(
-                           KERN_DEBUG "%s: error (%d) allocating work page\n",
-                           __func__, res);
-                       ctx->workpage = NULL;
-                       ext4_put_fname_crypto_ctx(&ctx);
-                       return ERR_PTR(res);
-               }
-               if (ctx->workpage == NULL) {
-                       printk(
-                           KERN_DEBUG "%s: could not allocate work page\n",
-                           __func__);
-                       ext4_put_fname_crypto_ctx(&ctx);
-                       return ERR_PTR(-ENOMEM);
-               }
-               ctx->lim = max_ciphertext_len;
-               crypto_ablkcipher_clear_flags(ctx->ctfm, ~0);
-               crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctx->ctfm),
-                       CRYPTO_TFM_REQ_WEAK_KEY);
-
-               /* If we are lucky, we will get a context that is already
-                * set up with the right key. Else, we will have to
-                * set the key */
-               if (!ctx->ctfm_key_is_ready) {
-                       /* Since our crypto objectives for filename encryption
-                        * are pretty weak,
-                        * we directly use the inode master key */
-                       res = crypto_ablkcipher_setkey(ctx->ctfm,
-                                       ctx->key.raw, ctx->key.size);
-                       if (res) {
-                               ext4_put_fname_crypto_ctx(&ctx);
-                               return ERR_PTR(-EIO);
-                       }
-                       ctx->ctfm_key_is_ready = 1;
-               } else {
-                       /* In the current implementation, key should never be
-                        * marked "ready" for a context that has just been
-                        * allocated. So we should never reach here */
-                        BUG();
-               }
-       }
-       if (ctx->htfm == NULL)
-               ctx->htfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC);
-       if (IS_ERR(ctx->htfm)) {
-               res = PTR_ERR(ctx->htfm);
-               printk(KERN_DEBUG "%s: error (%d) allocating hash tfm\n",
-                       __func__, res);
-               ctx->htfm = NULL;
-               ext4_put_fname_crypto_ctx(&ctx);
-               return ERR_PTR(res);
-       }
-       if (ctx->htfm == NULL) {
-               printk(KERN_DEBUG "%s: could not allocate hash tfm\n",
-                               __func__);
-               ext4_put_fname_crypto_ctx(&ctx);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       return ctx;
-}
-
 /**
  * ext4_fname_crypto_round_up() -
  *
@@ -464,44 +262,29 @@ u32 ext4_fname_crypto_round_up(u32 size, u32 blksize)
        return ((size+blksize-1)/blksize)*blksize;
 }
 
-/**
- * ext4_fname_crypto_namelen_on_disk() -
- */
-int ext4_fname_crypto_namelen_on_disk(struct ext4_fname_crypto_ctx *ctx,
-                                     u32 namelen)
+unsigned ext4_fname_encrypted_size(struct inode *inode, u32 ilen)
 {
-       u32 ciphertext_len;
-       int padding = 4 << (ctx->flags & EXT4_POLICY_FLAGS_PAD_MASK);
-
-       if (ctx == NULL)
-               return -EIO;
-       if (!(ctx->has_valid_key))
-               return -EACCES;
-       ciphertext_len = (namelen < EXT4_CRYPTO_BLOCK_SIZE) ?
-               EXT4_CRYPTO_BLOCK_SIZE : namelen;
-       ciphertext_len = ext4_fname_crypto_round_up(ciphertext_len, padding);
-       ciphertext_len = (ciphertext_len > ctx->lim)
-                       ? ctx->lim : ciphertext_len;
-       return (int) ciphertext_len;
+       struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
+       int padding = 32;
+
+       if (ci)
+               padding = 4 << (ci->ci_flags & EXT4_POLICY_FLAGS_PAD_MASK);
+       if (ilen < EXT4_CRYPTO_BLOCK_SIZE)
+               ilen = EXT4_CRYPTO_BLOCK_SIZE;
+       return ext4_fname_crypto_round_up(ilen, padding);
 }
 
-/**
- * ext4_fname_crypto_alloc_obuff() -
+/*
+ * ext4_fname_crypto_alloc_buffer() -
  *
  * Allocates an output buffer that is sufficient for the crypto operation
  * specified by the context and the direction.
  */
-int ext4_fname_crypto_alloc_buffer(struct ext4_fname_crypto_ctx *ctx,
+int ext4_fname_crypto_alloc_buffer(struct inode *inode,
                                   u32 ilen, struct ext4_str *crypto_str)
 {
-       unsigned int olen;
-       int padding = 4 << (ctx->flags & EXT4_POLICY_FLAGS_PAD_MASK);
+       unsigned int olen = ext4_fname_encrypted_size(inode, ilen);
 
-       if (!ctx)
-               return -EIO;
-       if (padding < EXT4_CRYPTO_BLOCK_SIZE)
-               padding = EXT4_CRYPTO_BLOCK_SIZE;
-       olen = ext4_fname_crypto_round_up(ilen, padding);
        crypto_str->len = olen;
        if (olen < EXT4_FNAME_CRYPTO_DIGEST_SIZE*2)
                olen = EXT4_FNAME_CRYPTO_DIGEST_SIZE*2;
@@ -529,7 +312,7 @@ void ext4_fname_crypto_free_buffer(struct ext4_str *crypto_str)
 /**
  * ext4_fname_disk_to_usr() - converts a filename from disk space to user space
  */
-int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
+int _ext4_fname_disk_to_usr(struct inode *inode,
                            struct dx_hash_info *hinfo,
                            const struct ext4_str *iname,
                            struct ext4_str *oname)
@@ -537,8 +320,6 @@ int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
        char buf[24];
        int ret;
 
-       if (ctx == NULL)
-               return -EIO;
        if (iname->len < 3) {
                /*Check for . and .. */
                if (iname->name[0] == '.' && iname->name[iname->len-1] == '.') {
@@ -548,8 +329,8 @@ int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
                        return oname->len;
                }
        }
-       if (ctx->has_valid_key)
-               return ext4_fname_decrypt(ctx, iname, oname);
+       if (EXT4_I(inode)->i_crypt_info)
+               return ext4_fname_decrypt(inode, iname, oname);
 
        if (iname->len <= EXT4_FNAME_CRYPTO_DIGEST_SIZE) {
                ret = digest_encode(iname->name, iname->len, oname->name);
@@ -568,7 +349,7 @@ int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
        return ret + 1;
 }
 
-int ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
+int ext4_fname_disk_to_usr(struct inode *inode,
                           struct dx_hash_info *hinfo,
                           const struct ext4_dir_entry_2 *de,
                           struct ext4_str *oname)
@@ -576,21 +357,20 @@ int ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
        struct ext4_str iname = {.name = (unsigned char *) de->name,
                                 .len = de->name_len };
 
-       return _ext4_fname_disk_to_usr(ctx, hinfo, &iname, oname);
+       return _ext4_fname_disk_to_usr(inode, hinfo, &iname, oname);
 }
 
 
 /**
  * ext4_fname_usr_to_disk() - converts a filename from user space to disk space
  */
-int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx,
+int ext4_fname_usr_to_disk(struct inode *inode,
                           const struct qstr *iname,
                           struct ext4_str *oname)
 {
        int res;
+       struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
 
-       if (ctx == NULL)
-               return -EIO;
        if (iname->len < 3) {
                /*Check for . and .. */
                if (iname->name[0] == '.' &&
@@ -601,8 +381,8 @@ int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx,
                        return oname->len;
                }
        }
-       if (ctx->has_valid_key) {
-               res = ext4_fname_encrypt(ctx, iname, oname);
+       if (ci) {
+               res = ext4_fname_encrypt(inode, iname, oname);
                return res;
        }
        /* Without a proper key, a user is not allowed to modify the filenames
@@ -611,109 +391,79 @@ int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx,
        return -EACCES;
 }
 
-/*
- * Calculate the htree hash from a filename from user space
- */
-int ext4_fname_usr_to_hash(struct ext4_fname_crypto_ctx *ctx,
-                           const struct qstr *iname,
-                           struct dx_hash_info *hinfo)
+int ext4_fname_setup_filename(struct inode *dir, const struct qstr *iname,
+                             int lookup, struct ext4_filename *fname)
 {
-       struct ext4_str tmp;
-       int ret = 0;
-       char buf[EXT4_FNAME_CRYPTO_DIGEST_SIZE+1];
+       struct ext4_crypt_info *ci;
+       int ret = 0, bigname = 0;
+
+       memset(fname, 0, sizeof(struct ext4_filename));
+       fname->usr_fname = iname;
 
-       if (!ctx ||
+       if (!ext4_encrypted_inode(dir) ||
            ((iname->name[0] == '.') &&
             ((iname->len == 1) ||
              ((iname->name[1] == '.') && (iname->len == 2))))) {
-               ext4fs_dirhash(iname->name, iname->len, hinfo);
+               fname->disk_name.name = (unsigned char *) iname->name;
+               fname->disk_name.len = iname->len;
                return 0;
        }
-
-       if (!ctx->has_valid_key && iname->name[0] == '_') {
-               if (iname->len != 33)
-                       return -ENOENT;
-               ret = digest_decode(iname->name+1, iname->len, buf);
-               if (ret != 24)
-                       return -ENOENT;
-               memcpy(&hinfo->hash, buf, 4);
-               memcpy(&hinfo->minor_hash, buf + 4, 4);
+       ret = ext4_get_encryption_info(dir);
+       if (ret)
+               return ret;
+       ci = EXT4_I(dir)->i_crypt_info;
+       if (ci) {
+               ret = ext4_fname_crypto_alloc_buffer(dir, iname->len,
+                                                    &fname->crypto_buf);
+               if (ret < 0)
+                       return ret;
+               ret = ext4_fname_encrypt(dir, iname, &fname->crypto_buf);
+               if (ret < 0)
+                       goto errout;
+               fname->disk_name.name = fname->crypto_buf.name;
+               fname->disk_name.len = fname->crypto_buf.len;
                return 0;
        }
+       if (!lookup)
+               return -EACCES;
 
-       if (!ctx->has_valid_key && iname->name[0] != '_') {
-               if (iname->len > 43)
-                       return -ENOENT;
-               ret = digest_decode(iname->name, iname->len, buf);
-               ext4fs_dirhash(buf, ret, hinfo);
-               return 0;
+       /* We don't have the key and we are doing a lookup; decode the
+        * user-supplied name
+        */
+       if (iname->name[0] == '_')
+               bigname = 1;
+       if ((bigname && (iname->len != 33)) ||
+           (!bigname && (iname->len > 43)))
+               return -ENOENT;
+
+       fname->crypto_buf.name = kmalloc(32, GFP_KERNEL);
+       if (fname->crypto_buf.name == NULL)
+               return -ENOMEM;
+       ret = digest_decode(iname->name + bigname, iname->len - bigname,
+                           fname->crypto_buf.name);
+       if (ret < 0) {
+               ret = -ENOENT;
+               goto errout;
        }
-
-       /* First encrypt the plaintext name */
-       ret = ext4_fname_crypto_alloc_buffer(ctx, iname->len, &tmp);
-       if (ret < 0)
-               return ret;
-
-       ret = ext4_fname_encrypt(ctx, iname, &tmp);
-       if (ret >= 0) {
-               ext4fs_dirhash(tmp.name, tmp.len, hinfo);
-               ret = 0;
+       fname->crypto_buf.len = ret;
+       if (bigname) {
+               memcpy(&fname->hinfo.hash, fname->crypto_buf.name, 4);
+               memcpy(&fname->hinfo.minor_hash, fname->crypto_buf.name + 4, 4);
+       } else {
+               fname->disk_name.name = fname->crypto_buf.name;
+               fname->disk_name.len = fname->crypto_buf.len;
        }
-
-       ext4_fname_crypto_free_buffer(&tmp);
+       return 0;
+errout:
+       kfree(fname->crypto_buf.name);
+       fname->crypto_buf.name = NULL;
        return ret;
 }
 
-int ext4_fname_match(struct ext4_fname_crypto_ctx *ctx, struct ext4_str *cstr,
-                    int len, const char * const name,
-                    struct ext4_dir_entry_2 *de)
+void ext4_fname_free_filename(struct ext4_filename *fname)
 {
-       int ret = -ENOENT;
-       int bigname = (*name == '_');
-
-       if (ctx->has_valid_key) {
-               if (cstr->name == NULL) {
-                       struct qstr istr;
-
-                       ret = ext4_fname_crypto_alloc_buffer(ctx, len, cstr);
-                       if (ret < 0)
-                               goto errout;
-                       istr.name = name;
-                       istr.len = len;
-                       ret = ext4_fname_encrypt(ctx, &istr, cstr);
-                       if (ret < 0)
-                               goto errout;
-               }
-       } else {
-               if (cstr->name == NULL) {
-                       cstr->name = kmalloc(32, GFP_KERNEL);
-                       if (cstr->name == NULL)
-                               return -ENOMEM;
-                       if ((bigname && (len != 33)) ||
-                           (!bigname && (len > 43)))
-                               goto errout;
-                       ret = digest_decode(name+bigname, len-bigname,
-                                           cstr->name);
-                       if (ret < 0) {
-                               ret = -ENOENT;
-                               goto errout;
-                       }
-                       cstr->len = ret;
-               }
-               if (bigname) {
-                       if (de->name_len < 16)
-                               return 0;
-                       ret = memcmp(de->name + de->name_len - 16,
-                                    cstr->name + 8, 16);
-                       return (ret == 0) ? 1 : 0;
-               }
-       }
-       if (de->name_len != cstr->len)
-               return 0;
-       ret = memcmp(de->name, cstr->name, cstr->len);
-       return (ret == 0) ? 1 : 0;
-errout:
-       kfree(cstr->name);
-       cstr->name = NULL;
-       return ret;
+       kfree(fname->crypto_buf.name);
+       fname->crypto_buf.name = NULL;
+       fname->usr_fname = NULL;
+       fname->disk_name.name = NULL;
 }
index 52170d0..442d24e 100644 (file)
@@ -84,14 +84,38 @@ out:
        return res;
 }
 
-/**
- * ext4_generate_encryption_key() - generates an encryption key
- * @inode: The inode to generate the encryption key for.
- */
-int ext4_generate_encryption_key(struct inode *inode)
+void ext4_free_crypt_info(struct ext4_crypt_info *ci)
+{
+       if (!ci)
+               return;
+
+       if (ci->ci_keyring_key)
+               key_put(ci->ci_keyring_key);
+       crypto_free_ablkcipher(ci->ci_ctfm);
+       kmem_cache_free(ext4_crypt_info_cachep, ci);
+}
+
+void ext4_free_encryption_info(struct inode *inode,
+                              struct ext4_crypt_info *ci)
+{
+       struct ext4_inode_info *ei = EXT4_I(inode);
+       struct ext4_crypt_info *prev;
+
+       if (ci == NULL)
+               ci = ACCESS_ONCE(ei->i_crypt_info);
+       if (ci == NULL)
+               return;
+       prev = cmpxchg(&ei->i_crypt_info, ci, NULL);
+       if (prev != ci)
+               return;
+
+       ext4_free_crypt_info(ci);
+}
+
+int _ext4_get_encryption_info(struct inode *inode)
 {
        struct ext4_inode_info *ei = EXT4_I(inode);
-       struct ext4_encryption_key *crypt_key = &ei->i_encryption_key;
+       struct ext4_crypt_info *crypt_info;
        char full_key_descriptor[EXT4_KEY_DESC_PREFIX_SIZE +
                                 (EXT4_KEY_DESCRIPTOR_SIZE * 2) + 1];
        struct key *keyring_key = NULL;
@@ -99,31 +123,76 @@ int ext4_generate_encryption_key(struct inode *inode)
        struct ext4_encryption_context ctx;
        struct user_key_payload *ukp;
        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
-       int res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
-                                EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
-                                &ctx, sizeof(ctx));
+       struct crypto_ablkcipher *ctfm;
+       const char *cipher_str;
+       char raw_key[EXT4_MAX_KEY_SIZE];
+       char mode;
+       int res;
 
-       if (res != sizeof(ctx)) {
-               if (res > 0)
-                       res = -EINVAL;
-               goto out;
+       if (!ext4_read_workqueue) {
+               res = ext4_init_crypto();
+               if (res)
+                       return res;
+       }
+
+retry:
+       crypt_info = ACCESS_ONCE(ei->i_crypt_info);
+       if (crypt_info) {
+               if (!crypt_info->ci_keyring_key ||
+                   key_validate(crypt_info->ci_keyring_key) == 0)
+                       return 0;
+               ext4_free_encryption_info(inode, crypt_info);
+               goto retry;
        }
+
+       res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
+                                EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
+                                &ctx, sizeof(ctx));
+       if (res < 0) {
+               if (!DUMMY_ENCRYPTION_ENABLED(sbi))
+                       return res;
+               ctx.contents_encryption_mode = EXT4_ENCRYPTION_MODE_AES_256_XTS;
+               ctx.filenames_encryption_mode =
+                       EXT4_ENCRYPTION_MODE_AES_256_CTS;
+               ctx.flags = 0;
+       } else if (res != sizeof(ctx))
+               return -EINVAL;
        res = 0;
 
-       ei->i_crypt_policy_flags = ctx.flags;
+       crypt_info = kmem_cache_alloc(ext4_crypt_info_cachep, GFP_KERNEL);
+       if (!crypt_info)
+               return -ENOMEM;
+
+       crypt_info->ci_flags = ctx.flags;
+       crypt_info->ci_data_mode = ctx.contents_encryption_mode;
+       crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
+       crypt_info->ci_ctfm = NULL;
+       crypt_info->ci_keyring_key = NULL;
+       memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
+              sizeof(crypt_info->ci_master_key));
        if (S_ISREG(inode->i_mode))
-               crypt_key->mode = ctx.contents_encryption_mode;
+               mode = crypt_info->ci_data_mode;
        else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
-               crypt_key->mode = ctx.filenames_encryption_mode;
-       else {
-               printk(KERN_ERR "ext4 crypto: Unsupported inode type.\n");
+               mode = crypt_info->ci_filename_mode;
+       else
                BUG();
+       switch (mode) {
+       case EXT4_ENCRYPTION_MODE_AES_256_XTS:
+               cipher_str = "xts(aes)";
+               break;
+       case EXT4_ENCRYPTION_MODE_AES_256_CTS:
+               cipher_str = "cts(cbc(aes))";
+               break;
+       default:
+               printk_once(KERN_WARNING
+                           "ext4: unsupported key mode %d (ino %u)\n",
+                           mode, (unsigned) inode->i_ino);
+               res = -ENOKEY;
+               goto out;
        }
-       crypt_key->size = ext4_encryption_key_size(crypt_key->mode);
-       BUG_ON(!crypt_key->size);
        if (DUMMY_ENCRYPTION_ENABLED(sbi)) {
-               memset(crypt_key->raw, 0x42, EXT4_AES_256_XTS_KEY_SIZE);
-               goto out;
+               memset(raw_key, 0x42, EXT4_AES_256_XTS_KEY_SIZE);
+               goto got_key;
        }
        memcpy(full_key_descriptor, EXT4_KEY_DESC_PREFIX,
               EXT4_KEY_DESC_PREFIX_SIZE);
@@ -138,6 +207,7 @@ int ext4_generate_encryption_key(struct inode *inode)
                keyring_key = NULL;
                goto out;
        }
+       crypt_info->ci_keyring_key = keyring_key;
        BUG_ON(keyring_key->type != &key_type_logon);
        ukp = ((struct user_key_payload *)keyring_key->payload.data);
        if (ukp->datalen != sizeof(struct ext4_encryption_key)) {
@@ -148,19 +218,43 @@ int ext4_generate_encryption_key(struct inode *inode)
        BUILD_BUG_ON(EXT4_AES_128_ECB_KEY_SIZE !=
                     EXT4_KEY_DERIVATION_NONCE_SIZE);
        BUG_ON(master_key->size != EXT4_AES_256_XTS_KEY_SIZE);
-       res = ext4_derive_key_aes(ctx.nonce, master_key->raw, crypt_key->raw);
+       res = ext4_derive_key_aes(ctx.nonce, master_key->raw,
+                                 raw_key);
+got_key:
+       ctfm = crypto_alloc_ablkcipher(cipher_str, 0, 0);
+       if (!ctfm || IS_ERR(ctfm)) {
+               res = ctfm ? PTR_ERR(ctfm) : -ENOMEM;
+               printk(KERN_DEBUG
+                      "%s: error %d (inode %u) allocating crypto tfm\n",
+                      __func__, res, (unsigned) inode->i_ino);
+               goto out;
+       }
+       crypt_info->ci_ctfm = ctfm;
+       crypto_ablkcipher_clear_flags(ctfm, ~0);
+       crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctfm),
+                            CRYPTO_TFM_REQ_WEAK_KEY);
+       res = crypto_ablkcipher_setkey(ctfm, raw_key,
+                                      ext4_encryption_key_size(mode));
+       if (res)
+               goto out;
+       memzero_explicit(raw_key, sizeof(raw_key));
+       if (cmpxchg(&ei->i_crypt_info, NULL, crypt_info) != NULL) {
+               ext4_free_crypt_info(crypt_info);
+               goto retry;
+       }
+       return 0;
+
 out:
-       if (keyring_key)
-               key_put(keyring_key);
-       if (res < 0)
-               crypt_key->mode = EXT4_ENCRYPTION_MODE_INVALID;
+       if (res == -ENOKEY)
+               res = 0;
+       ext4_free_crypt_info(crypt_info);
+       memzero_explicit(raw_key, sizeof(raw_key));
        return res;
 }
 
 int ext4_has_encryption_key(struct inode *inode)
 {
        struct ext4_inode_info *ei = EXT4_I(inode);
-       struct ext4_encryption_key *crypt_key = &ei->i_encryption_key;
 
-       return (crypt_key->mode != EXT4_ENCRYPTION_MODE_INVALID);
+       return (ei->i_crypt_info != NULL);
 }
index a6d6291..02c4e5d 100644 (file)
@@ -51,6 +51,10 @@ static int ext4_create_encryption_context_from_policy(
        struct ext4_encryption_context ctx;
        int res = 0;
 
+       res = ext4_convert_inline_data(inode);
+       if (res)
+               return res;
+
        ctx.format = EXT4_ENCRYPTION_CONTEXT_FORMAT_V1;
        memcpy(ctx.master_key_descriptor, policy->master_key_descriptor,
               EXT4_KEY_DESCRIPTOR_SIZE);
@@ -89,6 +93,8 @@ int ext4_process_policy(const struct ext4_encryption_policy *policy,
                return -EINVAL;
 
        if (!ext4_inode_has_encryption_context(inode)) {
+               if (!S_ISDIR(inode->i_mode))
+                       return -EINVAL;
                if (!ext4_empty_dir(inode))
                        return -ENOTEMPTY;
                return ext4_create_encryption_context_from_policy(inode,
@@ -126,7 +132,7 @@ int ext4_get_policy(struct inode *inode, struct ext4_encryption_policy *policy)
 int ext4_is_child_context_consistent_with_parent(struct inode *parent,
                                                 struct inode *child)
 {
-       struct ext4_encryption_context parent_ctx, child_ctx;
+       struct ext4_crypt_info *parent_ci, *child_ci;
        int res;
 
        if ((parent == NULL) || (child == NULL)) {
@@ -136,26 +142,28 @@ int ext4_is_child_context_consistent_with_parent(struct inode *parent,
        /* no restrictions if the parent directory is not encrypted */
        if (!ext4_encrypted_inode(parent))
                return 1;
-       res = ext4_xattr_get(parent, EXT4_XATTR_INDEX_ENCRYPTION,
-                            EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
-                            &parent_ctx, sizeof(parent_ctx));
-       if (res != sizeof(parent_ctx))
-               return 0;
        /* if the child directory is not encrypted, this is always a problem */
        if (!ext4_encrypted_inode(child))
                return 0;
-       res = ext4_xattr_get(child, EXT4_XATTR_INDEX_ENCRYPTION,
-                            EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
-                            &child_ctx, sizeof(child_ctx));
-       if (res != sizeof(child_ctx))
+       res = ext4_get_encryption_info(parent);
+       if (res)
                return 0;
-       return (memcmp(parent_ctx.master_key_descriptor,
-                      child_ctx.master_key_descriptor,
+       res = ext4_get_encryption_info(child);
+       if (res)
+               return 0;
+       parent_ci = EXT4_I(parent)->i_crypt_info;
+       child_ci = EXT4_I(child)->i_crypt_info;
+       if (!parent_ci && !child_ci)
+               return 1;
+       if (!parent_ci || !child_ci)
+               return 0;
+
+       return (memcmp(parent_ci->ci_master_key,
+                      child_ci->ci_master_key,
                       EXT4_KEY_DESCRIPTOR_SIZE) == 0 &&
-               (parent_ctx.contents_encryption_mode ==
-                child_ctx.contents_encryption_mode) &&
-               (parent_ctx.filenames_encryption_mode ==
-                child_ctx.filenames_encryption_mode));
+               (parent_ci->ci_data_mode == child_ci->ci_data_mode) &&
+               (parent_ci->ci_filename_mode == child_ci->ci_filename_mode) &&
+               (parent_ci->ci_flags == child_ci->ci_flags));
 }
 
 /**
@@ -168,31 +176,40 @@ int ext4_is_child_context_consistent_with_parent(struct inode *parent,
 int ext4_inherit_context(struct inode *parent, struct inode *child)
 {
        struct ext4_encryption_context ctx;
-       int res = ext4_xattr_get(parent, EXT4_XATTR_INDEX_ENCRYPTION,
-                                EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
-                                &ctx, sizeof(ctx));
+       struct ext4_crypt_info *ci;
+       int res;
+
+       res = ext4_get_encryption_info(parent);
+       if (res < 0)
+               return res;
+       ci = EXT4_I(parent)->i_crypt_info;
+       if (ci == NULL)
+               return -ENOKEY;
 
-       if (res != sizeof(ctx)) {
-               if (DUMMY_ENCRYPTION_ENABLED(EXT4_SB(parent->i_sb))) {
-                       ctx.format = EXT4_ENCRYPTION_CONTEXT_FORMAT_V1;
-                       ctx.contents_encryption_mode =
-                               EXT4_ENCRYPTION_MODE_AES_256_XTS;
-                       ctx.filenames_encryption_mode =
-                               EXT4_ENCRYPTION_MODE_AES_256_CTS;
-                       ctx.flags = 0;
-                       memset(ctx.master_key_descriptor, 0x42,
-                              EXT4_KEY_DESCRIPTOR_SIZE);
-                       res = 0;
-               } else {
-                       goto out;
-               }
+       ctx.format = EXT4_ENCRYPTION_CONTEXT_FORMAT_V1;
+       if (DUMMY_ENCRYPTION_ENABLED(EXT4_SB(parent->i_sb))) {
+               ctx.contents_encryption_mode = EXT4_ENCRYPTION_MODE_AES_256_XTS;
+               ctx.filenames_encryption_mode =
+                       EXT4_ENCRYPTION_MODE_AES_256_CTS;
+               ctx.flags = 0;
+               memset(ctx.master_key_descriptor, 0x42,
+                      EXT4_KEY_DESCRIPTOR_SIZE);
+               res = 0;
+       } else {
+               ctx.contents_encryption_mode = ci->ci_data_mode;
+               ctx.filenames_encryption_mode = ci->ci_filename_mode;
+               ctx.flags = ci->ci_flags;
+               memcpy(ctx.master_key_descriptor, ci->ci_master_key,
+                      EXT4_KEY_DESCRIPTOR_SIZE);
        }
        get_random_bytes(ctx.nonce, EXT4_KEY_DERIVATION_NONCE_SIZE);
        res = ext4_xattr_set(child, EXT4_XATTR_INDEX_ENCRYPTION,
                             EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
                             sizeof(ctx), 0);
-out:
-       if (!res)
+       if (!res) {
                ext4_set_inode_flag(child, EXT4_INODE_ENCRYPT);
+               ext4_clear_inode_state(child, EXT4_STATE_MAY_INLINE_DATA);
+               res = ext4_get_encryption_info(child);
+       }
        return res;
 }
index 5665d82..f9e1491 100644 (file)
@@ -110,7 +110,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
        struct super_block *sb = inode->i_sb;
        struct buffer_head *bh = NULL;
        int dir_has_error = 0;
-       struct ext4_fname_crypto_ctx *enc_ctx = NULL;
        struct ext4_str fname_crypto_str = {.name = NULL, .len = 0};
 
        if (is_dx_dir(inode)) {
@@ -134,16 +133,11 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
                        return err;
        }
 
-       enc_ctx = ext4_get_fname_crypto_ctx(inode, EXT4_NAME_LEN);
-       if (IS_ERR(enc_ctx))
-               return PTR_ERR(enc_ctx);
-       if (enc_ctx) {
-               err = ext4_fname_crypto_alloc_buffer(enc_ctx, EXT4_NAME_LEN,
+       if (ext4_encrypted_inode(inode)) {
+               err = ext4_fname_crypto_alloc_buffer(inode, EXT4_NAME_LEN,
                                                     &fname_crypto_str);
-               if (err < 0) {
-                       ext4_put_fname_crypto_ctx(&enc_ctx);
+               if (err < 0)
                        return err;
-               }
        }
 
        offset = ctx->pos & (sb->s_blocksize - 1);
@@ -239,17 +233,19 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
                        offset += ext4_rec_len_from_disk(de->rec_len,
                                        sb->s_blocksize);
                        if (le32_to_cpu(de->inode)) {
-                               if (enc_ctx == NULL) {
-                                       /* Directory is not encrypted */
+                               if (!ext4_encrypted_inode(inode)) {
                                        if (!dir_emit(ctx, de->name,
                                            de->name_len,
                                            le32_to_cpu(de->inode),
                                            get_dtype(sb, de->file_type)))
                                                goto done;
                                } else {
+                                       int save_len = fname_crypto_str.len;
+
                                        /* Directory is encrypted */
-                                       err = ext4_fname_disk_to_usr(enc_ctx,
+                                       err = ext4_fname_disk_to_usr(inode,
                                                NULL, de, &fname_crypto_str);
+                                       fname_crypto_str.len = save_len;
                                        if (err < 0)
                                                goto errout;
                                        if (!dir_emit(ctx,
@@ -272,7 +268,6 @@ done:
        err = 0;
 errout:
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
-       ext4_put_fname_crypto_ctx(&enc_ctx);
        ext4_fname_crypto_free_buffer(&fname_crypto_str);
 #endif
        brelse(bh);
@@ -598,6 +593,13 @@ finished:
        return 0;
 }
 
+static int ext4_dir_open(struct inode * inode, struct file * filp)
+{
+       if (ext4_encrypted_inode(inode))
+               return ext4_get_encryption_info(inode) ? -EACCES : 0;
+       return 0;
+}
+
 static int ext4_release_dir(struct inode *inode, struct file *filp)
 {
        if (filp->private_data)
@@ -640,5 +642,6 @@ const struct file_operations ext4_dir_operations = {
        .compat_ioctl   = ext4_compat_ioctl,
 #endif
        .fsync          = ext4_sync_file,
+       .open           = ext4_dir_open,
        .release        = ext4_release_dir,
 };
index 0a3b72d..f5e9f04 100644 (file)
 #define ext_debug(fmt, ...)    no_printk(fmt, ##__VA_ARGS__)
 #endif
 
-#define EXT4_ERROR_INODE(inode, fmt, a...) \
-       ext4_error_inode((inode), __func__, __LINE__, 0, (fmt), ## a)
-
-#define EXT4_ERROR_INODE_BLOCK(inode, block, fmt, a...)                        \
-       ext4_error_inode((inode), __func__, __LINE__, (block), (fmt), ## a)
-
-#define EXT4_ERROR_FILE(file, block, fmt, a...)                                \
-       ext4_error_file((file), __func__, __LINE__, (block), (fmt), ## a)
-
 /* data type for block offset of block group */
 typedef int ext4_grpblk_t;
 
@@ -90,6 +81,11 @@ typedef __u32 ext4_lblk_t;
 /* data type for block group number */
 typedef unsigned int ext4_group_t;
 
+enum SHIFT_DIRECTION {
+       SHIFT_LEFT = 0,
+       SHIFT_RIGHT,
+};
+
 /*
  * Flags used in mballoc's allocation_context flags field.
  *
@@ -911,7 +907,6 @@ struct ext4_inode_info {
 
        /* on-disk additional length */
        __u16 i_extra_isize;
-       char i_crypt_policy_flags;
 
        /* Indicate the inline data space. */
        u16 i_inline_off;
@@ -955,7 +950,7 @@ struct ext4_inode_info {
 
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
        /* Encryption params */
-       struct ext4_encryption_key i_encryption_key;
+       struct ext4_crypt_info *i_crypt_info;
 #endif
 };
 
@@ -1374,12 +1369,6 @@ struct ext4_sb_info {
        struct ratelimit_state s_err_ratelimit_state;
        struct ratelimit_state s_warning_ratelimit_state;
        struct ratelimit_state s_msg_ratelimit_state;
-
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
-       /* Encryption */
-       uint32_t s_file_encryption_mode;
-       uint32_t s_dir_encryption_mode;
-#endif
 };
 
 static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -1838,6 +1827,17 @@ struct dx_hash_info
  */
 #define HASH_NB_ALWAYS         1
 
+struct ext4_filename {
+       const struct qstr *usr_fname;
+       struct ext4_str disk_name;
+       struct dx_hash_info hinfo;
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+       struct ext4_str crypto_buf;
+#endif
+};
+
+#define fname_name(p) ((p)->disk_name.name)
+#define fname_len(p)  ((p)->disk_name.len)
 
 /*
  * Describe an inode's exact location on disk and in memory
@@ -2054,6 +2054,7 @@ int ext4_get_policy(struct inode *inode,
                    struct ext4_encryption_policy *policy);
 
 /* crypto.c */
+extern struct kmem_cache *ext4_crypt_info_cachep;
 bool ext4_valid_contents_enc_mode(uint32_t mode);
 uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size);
 extern struct workqueue_struct *ext4_read_workqueue;
@@ -2085,57 +2086,84 @@ static inline int ext4_sb_has_crypto(struct super_block *sb)
 /* crypto_fname.c */
 bool ext4_valid_filenames_enc_mode(uint32_t mode);
 u32 ext4_fname_crypto_round_up(u32 size, u32 blksize);
-int ext4_fname_crypto_alloc_buffer(struct ext4_fname_crypto_ctx *ctx,
+unsigned ext4_fname_encrypted_size(struct inode *inode, u32 ilen);
+int ext4_fname_crypto_alloc_buffer(struct inode *inode,
                                   u32 ilen, struct ext4_str *crypto_str);
-int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
+int _ext4_fname_disk_to_usr(struct inode *inode,
                            struct dx_hash_info *hinfo,
                            const struct ext4_str *iname,
                            struct ext4_str *oname);
-int ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
+int ext4_fname_disk_to_usr(struct inode *inode,
                           struct dx_hash_info *hinfo,
                           const struct ext4_dir_entry_2 *de,
                           struct ext4_str *oname);
-int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx,
+int ext4_fname_usr_to_disk(struct inode *inode,
                           const struct qstr *iname,
                           struct ext4_str *oname);
-int ext4_fname_usr_to_hash(struct ext4_fname_crypto_ctx *ctx,
-                          const struct qstr *iname,
-                          struct dx_hash_info *hinfo);
-int ext4_fname_crypto_namelen_on_disk(struct ext4_fname_crypto_ctx *ctx,
-                                     u32 namelen);
-int ext4_fname_match(struct ext4_fname_crypto_ctx *ctx, struct ext4_str *cstr,
-                    int len, const char * const name,
-                    struct ext4_dir_entry_2 *de);
-
-
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
-void ext4_put_fname_crypto_ctx(struct ext4_fname_crypto_ctx **ctx);
-struct ext4_fname_crypto_ctx *ext4_get_fname_crypto_ctx(struct inode *inode,
-                                                       u32 max_len);
 void ext4_fname_crypto_free_buffer(struct ext4_str *crypto_str);
+int ext4_fname_setup_filename(struct inode *dir, const struct qstr *iname,
+                             int lookup, struct ext4_filename *fname);
+void ext4_fname_free_filename(struct ext4_filename *fname);
 #else
 static inline
-void ext4_put_fname_crypto_ctx(struct ext4_fname_crypto_ctx **ctx) { }
-static inline
-struct ext4_fname_crypto_ctx *ext4_get_fname_crypto_ctx(struct inode *inode,
-                                                       u32 max_len)
+int ext4_setup_fname_crypto(struct inode *inode)
 {
-       return NULL;
+       return 0;
 }
 static inline void ext4_fname_crypto_free_buffer(struct ext4_str *p) { }
+static inline int ext4_fname_setup_filename(struct inode *dir,
+                                    const struct qstr *iname,
+                                    int lookup, struct ext4_filename *fname)
+{
+       fname->usr_fname = iname;
+       fname->disk_name.name = (unsigned char *) iname->name;
+       fname->disk_name.len = iname->len;
+       return 0;
+}
+static inline void ext4_fname_free_filename(struct ext4_filename *fname) { }
 #endif
 
 
 /* crypto_key.c */
-int ext4_generate_encryption_key(struct inode *inode);
+void ext4_free_crypt_info(struct ext4_crypt_info *ci);
+void ext4_free_encryption_info(struct inode *inode, struct ext4_crypt_info *ci);
+int _ext4_get_encryption_info(struct inode *inode);
 
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
 int ext4_has_encryption_key(struct inode *inode);
+
+static inline int ext4_get_encryption_info(struct inode *inode)
+{
+       struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
+
+       if (!ci ||
+           (ci->ci_keyring_key &&
+            (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
+                                          (1 << KEY_FLAG_REVOKED) |
+                                          (1 << KEY_FLAG_DEAD)))))
+               return _ext4_get_encryption_info(inode);
+       return 0;
+}
+
+static inline struct ext4_crypt_info *ext4_encryption_info(struct inode *inode)
+{
+       return EXT4_I(inode)->i_crypt_info;
+}
+
 #else
 static inline int ext4_has_encryption_key(struct inode *inode)
 {
        return 0;
 }
+static inline int ext4_get_encryption_info(struct inode *inode)
+{
+       return 0;
+}
+static inline struct ext4_crypt_info *ext4_encryption_info(struct inode *inode)
+{
+       return NULL;
+}
 #endif
 
 
@@ -2156,14 +2184,13 @@ extern void ext4_htree_free_dir_info(struct dir_private_info *p);
 extern int ext4_find_dest_de(struct inode *dir, struct inode *inode,
                             struct buffer_head *bh,
                             void *buf, int buf_size,
-                            const char *name, int namelen,
+                            struct ext4_filename *fname,
                             struct ext4_dir_entry_2 **dest_de);
 int ext4_insert_dentry(struct inode *dir,
-                       struct inode *inode,
-                       struct ext4_dir_entry_2 *de,
-                       int buf_size,
-                      const struct qstr *iname,
-                       const char *name, int namelen);
+                      struct inode *inode,
+                      struct ext4_dir_entry_2 *de,
+                      int buf_size,
+                      struct ext4_filename *fname);
 static inline void ext4_update_dx_flag(struct inode *inode)
 {
        if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
@@ -2317,13 +2344,14 @@ extern int ext4_orphan_add(handle_t *, struct inode *);
 extern int ext4_orphan_del(handle_t *, struct inode *);
 extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
                                __u32 start_minor_hash, __u32 *next_hash);
-extern int search_dir(struct buffer_head *bh,
-                     char *search_buf,
-                     int buf_size,
-                     struct inode *dir,
-                     const struct qstr *d_name,
-                     unsigned int offset,
-                     struct ext4_dir_entry_2 **res_dir);
+extern int ext4_search_dir(struct buffer_head *bh,
+                          char *search_buf,
+                          int buf_size,
+                          struct inode *dir,
+                          struct ext4_filename *fname,
+                          const struct qstr *d_name,
+                          unsigned int offset,
+                          struct ext4_dir_entry_2 **res_dir);
 extern int ext4_generic_delete_entry(handle_t *handle,
                                     struct inode *dir,
                                     struct ext4_dir_entry_2 *de_del,
@@ -2368,6 +2396,9 @@ void __ext4_abort(struct super_block *, const char *, unsigned int,
 extern __printf(4, 5)
 void __ext4_warning(struct super_block *, const char *, unsigned int,
                    const char *, ...);
+extern __printf(4, 5)
+void __ext4_warning_inode(const struct inode *inode, const char *function,
+                         unsigned int line, const char *fmt, ...);
 extern __printf(3, 4)
 void __ext4_msg(struct super_block *, const char *, const char *, ...);
 extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp,
@@ -2378,6 +2409,15 @@ void __ext4_grp_locked_error(const char *, unsigned int,
                             unsigned long, ext4_fsblk_t,
                             const char *, ...);
 
+#define EXT4_ERROR_INODE(inode, fmt, a...) \
+       ext4_error_inode((inode), __func__, __LINE__, 0, (fmt), ## a)
+
+#define EXT4_ERROR_INODE_BLOCK(inode, block, fmt, a...)                        \
+       ext4_error_inode((inode), __func__, __LINE__, (block), (fmt), ## a)
+
+#define EXT4_ERROR_FILE(file, block, fmt, a...)                                \
+       ext4_error_file((file), __func__, __LINE__, (block), (fmt), ## a)
+
 #ifdef CONFIG_PRINTK
 
 #define ext4_error_inode(inode, func, line, block, fmt, ...)           \
@@ -2390,6 +2430,8 @@ void __ext4_grp_locked_error(const char *, unsigned int,
        __ext4_abort(sb, __func__, __LINE__, fmt, ##__VA_ARGS__)
 #define ext4_warning(sb, fmt, ...)                                     \
        __ext4_warning(sb, __func__, __LINE__, fmt, ##__VA_ARGS__)
+#define ext4_warning_inode(inode, fmt, ...)                            \
+       __ext4_warning_inode(inode, __func__, __LINE__, fmt, ##__VA_ARGS__)
 #define ext4_msg(sb, level, fmt, ...)                          \
        __ext4_msg(sb, level, fmt, ##__VA_ARGS__)
 #define dump_mmp_msg(sb, mmp, msg)                                     \
@@ -2425,6 +2467,11 @@ do {                                                                     \
        no_printk(fmt, ##__VA_ARGS__);                                  \
        __ext4_warning(sb, "", 0, " ");                                 \
 } while (0)
+#define ext4_warning_inode(inode, fmt, ...)                            \
+do {                                                                   \
+       no_printk(fmt, ##__VA_ARGS__);                                  \
+       __ext4_warning_inode(inode, "", 0, " ");                        \
+} while (0)
 #define ext4_msg(sb, level, fmt, ...)                                  \
 do {                                                                   \
        no_printk(fmt, ##__VA_ARGS__);                                  \
@@ -2768,7 +2815,9 @@ extern int ext4_da_write_inline_data_begin(struct address_space *mapping,
 extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
                                         unsigned len, unsigned copied,
                                         struct page *page);
-extern int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
+extern int ext4_try_add_inline_entry(handle_t *handle,
+                                    struct ext4_filename *fname,
+                                    struct dentry *dentry,
                                     struct inode *inode);
 extern int ext4_try_create_inline_dir(handle_t *handle,
                                      struct inode *parent,
@@ -2782,6 +2831,7 @@ extern int htree_inlinedir_to_tree(struct file *dir_file,
                                   __u32 start_hash, __u32 start_minor_hash,
                                   int *has_inline_data);
 extern struct buffer_head *ext4_find_inline_entry(struct inode *dir,
+                                       struct ext4_filename *fname,
                                        const struct qstr *d_name,
                                        struct ext4_dir_entry_2 **res_dir,
                                        int *has_inline_data);
@@ -2913,6 +2963,7 @@ extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                        __u64 start, __u64 len);
 extern int ext4_ext_precache(struct inode *inode);
 extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
+extern int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len);
 extern int ext4_swap_extents(handle_t *handle, struct inode *inode1,
                                struct inode *inode2, ext4_lblk_t lblk1,
                             ext4_lblk_t lblk2,  ext4_lblk_t count,
index d75159c..ac7d4e8 100644 (file)
@@ -66,24 +66,39 @@ struct ext4_encryption_context {
 #define EXT4_KEY_DESC_PREFIX "ext4:"
 #define EXT4_KEY_DESC_PREFIX_SIZE 5
 
+/* This is passed in from userspace into the kernel keyring */
 struct ext4_encryption_key {
-       uint32_t mode;
-       char raw[EXT4_MAX_KEY_SIZE];
-       uint32_t size;
+        __u32 mode;
+        char raw[EXT4_MAX_KEY_SIZE];
+        __u32 size;
+} __attribute__((__packed__));
+
+struct ext4_crypt_info {
+       char            ci_data_mode;
+       char            ci_filename_mode;
+       char            ci_flags;
+       struct crypto_ablkcipher *ci_ctfm;
+       struct key      *ci_keyring_key;
+       char            ci_master_key[EXT4_KEY_DESCRIPTOR_SIZE];
 };
 
 #define EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL             0x00000001
-#define EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL     0x00000002
+#define EXT4_WRITE_PATH_FL                           0x00000002
 
 struct ext4_crypto_ctx {
-       struct crypto_tfm *tfm;         /* Crypto API context */
-       struct page *bounce_page;       /* Ciphertext page on write path */
-       struct page *control_page;      /* Original page on write path */
-       struct bio *bio;                /* The bio for this context */
-       struct work_struct work;        /* Work queue for read complete path */
-       struct list_head free_list;     /* Free list */
-       int flags;                      /* Flags */
-       int mode;                       /* Encryption mode for tfm */
+       union {
+               struct {
+                       struct page *bounce_page;       /* Ciphertext page */
+                       struct page *control_page;      /* Original page  */
+               } w;
+               struct {
+                       struct bio *bio;
+                       struct work_struct work;
+               } r;
+               struct list_head free_list;     /* Free list */
+       };
+       char flags;                      /* Flags */
+       char mode;                       /* Encryption mode for tfm */
 };
 
 struct ext4_completion_result {
@@ -121,18 +136,6 @@ struct ext4_str {
        u32 len;
 };
 
-struct ext4_fname_crypto_ctx {
-       u32 lim;
-       char tmp_buf[EXT4_CRYPTO_BLOCK_SIZE];
-       struct crypto_ablkcipher *ctfm;
-       struct crypto_hash *htfm;
-       struct page *workpage;
-       struct ext4_encryption_key key;
-       unsigned flags : 8;
-       unsigned has_valid_key : 1;
-       unsigned ctfm_key_is_ready : 1;
-};
-
 /**
  * For encrypted symlinks, the ciphertext length is stored at the beginning
  * of the string in little-endian format.
index e003a1e..d86d262 100644 (file)
@@ -4456,6 +4456,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                ar.flags |= EXT4_MB_HINT_NOPREALLOC;
        if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
                ar.flags |= EXT4_MB_DELALLOC_RESERVED;
+       if (flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
+               ar.flags |= EXT4_MB_USE_RESERVED;
        newblock = ext4_mb_new_blocks(handle, &ar, &err);
        if (!newblock)
                goto out2;
@@ -4663,6 +4665,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
        int ret = 0;
        int ret2 = 0;
        int retries = 0;
+       int depth = 0;
        struct ext4_map_blocks map;
        unsigned int credits;
        loff_t epos;
@@ -4677,13 +4680,32 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
        if (len <= EXT_UNWRITTEN_MAX_LEN)
                flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
 
+       /* Wait all existing dio workers, newcomers will block on i_mutex */
+       ext4_inode_block_unlocked_dio(inode);
+       inode_dio_wait(inode);
+
        /*
         * credits to insert 1 extent into extent tree
         */
        credits = ext4_chunk_trans_blocks(inode, len);
+       /*
+        * We can only call ext_depth() on extent based inodes
+        */
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+               depth = ext_depth(inode);
+       else
+               depth = -1;
 
 retry:
        while (ret >= 0 && len) {
+               /*
+                * Recalculate credits when extent tree depth changes.
+                */
+               if (depth >= 0 && depth != ext_depth(inode)) {
+                       credits = ext4_chunk_trans_blocks(inode, len);
+                       depth = ext_depth(inode);
+               }
+
                handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
                                            credits);
                if (IS_ERR(handle)) {
@@ -4725,6 +4747,8 @@ retry:
                goto retry;
        }
 
+       ext4_inode_resume_unlocked_dio(inode);
+
        return ret > 0 ? ret2 : ret;
 }
 
@@ -4912,12 +4936,14 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
         * bug we should fix....
         */
        if (ext4_encrypted_inode(inode) &&
-           (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)))
+           (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE |
+                    FALLOC_FL_ZERO_RANGE)))
                return -EOPNOTSUPP;
 
        /* Return error if mode is not supported */
        if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
-                    FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
+                    FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
+                    FALLOC_FL_INSERT_RANGE))
                return -EOPNOTSUPP;
 
        if (mode & FALLOC_FL_PUNCH_HOLE)
@@ -4930,6 +4956,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        if (mode & FALLOC_FL_COLLAPSE_RANGE)
                return ext4_collapse_range(inode, offset, len);
 
+       if (mode & FALLOC_FL_INSERT_RANGE)
+               return ext4_insert_range(inode, offset, len);
+
        if (mode & FALLOC_FL_ZERO_RANGE)
                return ext4_zero_range(file, offset, len, mode);
 
@@ -5224,13 +5253,13 @@ ext4_access_path(handle_t *handle, struct inode *inode,
 /*
  * ext4_ext_shift_path_extents:
  * Shift the extents of a path structure lying between path[depth].p_ext
- * and EXT_LAST_EXTENT(path[depth].p_hdr) downwards, by subtracting shift
- * from starting block for each extent.
+ * and EXT_LAST_EXTENT(path[depth].p_hdr), by @shift blocks. @SHIFT tells
+ * if it is right shift or left shift operation.
  */
 static int
 ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
                            struct inode *inode, handle_t *handle,
-                           ext4_lblk_t *start)
+                           enum SHIFT_DIRECTION SHIFT)
 {
        int depth, err = 0;
        struct ext4_extent *ex_start, *ex_last;
@@ -5252,19 +5281,25 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
                        if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr))
                                update = 1;
 
-                       *start = le32_to_cpu(ex_last->ee_block) +
-                               ext4_ext_get_actual_len(ex_last);
-
                        while (ex_start <= ex_last) {
-                               le32_add_cpu(&ex_start->ee_block, -shift);
-                               /* Try to merge to the left. */
-                               if ((ex_start >
-                                    EXT_FIRST_EXTENT(path[depth].p_hdr)) &&
-                                   ext4_ext_try_to_merge_right(inode,
-                                                       path, ex_start - 1))
+                               if (SHIFT == SHIFT_LEFT) {
+                                       le32_add_cpu(&ex_start->ee_block,
+                                               -shift);
+                                       /* Try to merge to the left. */
+                                       if ((ex_start >
+                                           EXT_FIRST_EXTENT(path[depth].p_hdr))
+                                           &&
+                                           ext4_ext_try_to_merge_right(inode,
+                                           path, ex_start - 1))
+                                               ex_last--;
+                                       else
+                                               ex_start++;
+                               } else {
+                                       le32_add_cpu(&ex_last->ee_block, shift);
+                                       ext4_ext_try_to_merge_right(inode, path,
+                                               ex_last);
                                        ex_last--;
-                               else
-                                       ex_start++;
+                               }
                        }
                        err = ext4_ext_dirty(handle, inode, path + depth);
                        if (err)
@@ -5279,7 +5314,10 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
                if (err)
                        goto out;
 
-               le32_add_cpu(&path[depth].p_idx->ei_block, -shift);
+               if (SHIFT == SHIFT_LEFT)
+                       le32_add_cpu(&path[depth].p_idx->ei_block, -shift);
+               else
+                       le32_add_cpu(&path[depth].p_idx->ei_block, shift);
                err = ext4_ext_dirty(handle, inode, path + depth);
                if (err)
                        goto out;
@@ -5297,19 +5335,20 @@ out:
 
 /*
  * ext4_ext_shift_extents:
- * All the extents which lies in the range from start to the last allocated
- * block for the file are shifted downwards by shift blocks.
+ * All the extents which lies in the range from @start to the last allocated
+ * block for the @inode are shifted either towards left or right (depending
+ * upon @SHIFT) by @shift blocks.
  * On success, 0 is returned, error otherwise.
  */
 static int
 ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
-                      ext4_lblk_t start, ext4_lblk_t shift)
+                      ext4_lblk_t start, ext4_lblk_t shift,
+                      enum SHIFT_DIRECTION SHIFT)
 {
        struct ext4_ext_path *path;
        int ret = 0, depth;
        struct ext4_extent *extent;
-       ext4_lblk_t stop_block;
-       ext4_lblk_t ex_start, ex_end;
+       ext4_lblk_t stop, *iterator, ex_start, ex_end;
 
        /* Let path point to the last extent */
        path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
@@ -5321,58 +5360,84 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
        if (!extent)
                goto out;
 
-       stop_block = le32_to_cpu(extent->ee_block) +
+       stop = le32_to_cpu(extent->ee_block) +
                        ext4_ext_get_actual_len(extent);
 
-       /* Nothing to shift, if hole is at the end of file */
-       if (start >= stop_block)
-               goto out;
+       /*
+        * In case of left shift, Don't start shifting extents until we make
+        * sure the hole is big enough to accommodate the shift.
+       */
+       if (SHIFT == SHIFT_LEFT) {
+               path = ext4_find_extent(inode, start - 1, &path, 0);
+               if (IS_ERR(path))
+                       return PTR_ERR(path);
+               depth = path->p_depth;
+               extent =  path[depth].p_ext;
+               if (extent) {
+                       ex_start = le32_to_cpu(extent->ee_block);
+                       ex_end = le32_to_cpu(extent->ee_block) +
+                               ext4_ext_get_actual_len(extent);
+               } else {
+                       ex_start = 0;
+                       ex_end = 0;
+               }
 
-       /*
-        * Don't start shifting extents until we make sure the hole is big
-        * enough to accomodate the shift.
-        */
-       path = ext4_find_extent(inode, start - 1, &path, 0);
-       if (IS_ERR(path))
-               return PTR_ERR(path);
-       depth = path->p_depth;
-       extent =  path[depth].p_ext;
-       if (extent) {
-               ex_start = le32_to_cpu(extent->ee_block);
-               ex_end = le32_to_cpu(extent->ee_block) +
-                       ext4_ext_get_actual_len(extent);
-       } else {
-               ex_start = 0;
-               ex_end = 0;
+               if ((start == ex_start && shift > ex_start) ||
+                   (shift > start - ex_end)) {
+                       ext4_ext_drop_refs(path);
+                       kfree(path);
+                       return -EINVAL;
+               }
        }
 
-       if ((start == ex_start && shift > ex_start) ||
-           (shift > start - ex_end))
-               return -EINVAL;
+       /*
+        * In case of left shift, iterator points to start and it is increased
+        * till we reach stop. In case of right shift, iterator points to stop
+        * and it is decreased till we reach start.
+        */
+       if (SHIFT == SHIFT_LEFT)
+               iterator = &start;
+       else
+               iterator = &stop;
 
        /* Its safe to start updating extents */
-       while (start < stop_block) {
-               path = ext4_find_extent(inode, start, &path, 0);
+       while (start < stop) {
+               path = ext4_find_extent(inode, *iterator, &path, 0);
                if (IS_ERR(path))
                        return PTR_ERR(path);
                depth = path->p_depth;
                extent = path[depth].p_ext;
                if (!extent) {
                        EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
-                                        (unsigned long) start);
+                                        (unsigned long) *iterator);
                        return -EIO;
                }
-               if (start > le32_to_cpu(extent->ee_block)) {
+               if (SHIFT == SHIFT_LEFT && *iterator >
+                   le32_to_cpu(extent->ee_block)) {
                        /* Hole, move to the next extent */
                        if (extent < EXT_LAST_EXTENT(path[depth].p_hdr)) {
                                path[depth].p_ext++;
                        } else {
-                               start = ext4_ext_next_allocated_block(path);
+                               *iterator = ext4_ext_next_allocated_block(path);
                                continue;
                        }
                }
+
+               if (SHIFT == SHIFT_LEFT) {
+                       extent = EXT_LAST_EXTENT(path[depth].p_hdr);
+                       *iterator = le32_to_cpu(extent->ee_block) +
+                                       ext4_ext_get_actual_len(extent);
+               } else {
+                       extent = EXT_FIRST_EXTENT(path[depth].p_hdr);
+                       *iterator =  le32_to_cpu(extent->ee_block) > 0 ?
+                               le32_to_cpu(extent->ee_block) - 1 : 0;
+                       /* Update path extent in case we need to stop */
+                       while (le32_to_cpu(extent->ee_block) < start)
+                               extent++;
+                       path[depth].p_ext = extent;
+               }
                ret = ext4_ext_shift_path_extents(path, shift, inode,
-                               handle, &start);
+                               handle, SHIFT);
                if (ret)
                        break;
        }
@@ -5485,7 +5550,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        ext4_discard_preallocations(inode);
 
        ret = ext4_ext_shift_extents(inode, handle, punch_stop,
-                                    punch_stop - punch_start);
+                                    punch_stop - punch_start, SHIFT_LEFT);
        if (ret) {
                up_write(&EXT4_I(inode)->i_data_sem);
                goto out_stop;
@@ -5510,6 +5575,174 @@ out_mutex:
        return ret;
 }
 
+/*
+ * ext4_insert_range:
+ * This function implements the FALLOC_FL_INSERT_RANGE flag of fallocate.
+ * The data blocks starting from @offset to the EOF are shifted by @len
+ * towards right to create a hole in the @inode. Inode size is increased
+ * by len bytes.
+ * Returns 0 on success, error otherwise.
+ */
+int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
+{
+       struct super_block *sb = inode->i_sb;
+       handle_t *handle;
+       struct ext4_ext_path *path;
+       struct ext4_extent *extent;
+       ext4_lblk_t offset_lblk, len_lblk, ee_start_lblk = 0;
+       unsigned int credits, ee_len;
+       int ret = 0, depth, split_flag = 0;
+       loff_t ioffset;
+
+       /*
+        * We need to test this early because xfstests assumes that an
+        * insert range of (0, 1) will return EOPNOTSUPP if the file
+        * system does not support insert range.
+        */
+       if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+               return -EOPNOTSUPP;
+
+       /* Insert range works only on fs block size aligned offsets. */
+       if (offset & (EXT4_CLUSTER_SIZE(sb) - 1) ||
+                       len & (EXT4_CLUSTER_SIZE(sb) - 1))
+               return -EINVAL;
+
+       if (!S_ISREG(inode->i_mode))
+               return -EOPNOTSUPP;
+
+       trace_ext4_insert_range(inode, offset, len);
+
+       offset_lblk = offset >> EXT4_BLOCK_SIZE_BITS(sb);
+       len_lblk = len >> EXT4_BLOCK_SIZE_BITS(sb);
+
+       /* Call ext4_force_commit to flush all data in case of data=journal */
+       if (ext4_should_journal_data(inode)) {
+               ret = ext4_force_commit(inode->i_sb);
+               if (ret)
+                       return ret;
+       }
+
+       /*
+        * Need to round down to align start offset to page size boundary
+        * for page size > block size.
+        */
+       ioffset = round_down(offset, PAGE_SIZE);
+
+       /* Write out all dirty pages */
+       ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
+                       LLONG_MAX);
+       if (ret)
+               return ret;
+
+       /* Take mutex lock */
+       mutex_lock(&inode->i_mutex);
+
+       /* Currently just for extent based files */
+       if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
+               ret = -EOPNOTSUPP;
+               goto out_mutex;
+       }
+
+       /* Check for wrap through zero */
+       if (inode->i_size + len > inode->i_sb->s_maxbytes) {
+               ret = -EFBIG;
+               goto out_mutex;
+       }
+
+       /* Offset should be less than i_size */
+       if (offset >= i_size_read(inode)) {
+               ret = -EINVAL;
+               goto out_mutex;
+       }
+
+       truncate_pagecache(inode, ioffset);
+
+       /* Wait for existing dio to complete */
+       ext4_inode_block_unlocked_dio(inode);
+       inode_dio_wait(inode);
+
+       credits = ext4_writepage_trans_blocks(inode);
+       handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
+       if (IS_ERR(handle)) {
+               ret = PTR_ERR(handle);
+               goto out_dio;
+       }
+
+       /* Expand file to avoid data loss if there is error while shifting */
+       inode->i_size += len;
+       EXT4_I(inode)->i_disksize += len;
+       inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+       ret = ext4_mark_inode_dirty(handle, inode);
+       if (ret)
+               goto out_stop;
+
+       down_write(&EXT4_I(inode)->i_data_sem);
+       ext4_discard_preallocations(inode);
+
+       path = ext4_find_extent(inode, offset_lblk, NULL, 0);
+       if (IS_ERR(path)) {
+               up_write(&EXT4_I(inode)->i_data_sem);
+               goto out_stop;
+       }
+
+       depth = ext_depth(inode);
+       extent = path[depth].p_ext;
+       if (extent) {
+               ee_start_lblk = le32_to_cpu(extent->ee_block);
+               ee_len = ext4_ext_get_actual_len(extent);
+
+               /*
+                * If offset_lblk is not the starting block of extent, split
+                * the extent @offset_lblk
+                */
+               if ((offset_lblk > ee_start_lblk) &&
+                               (offset_lblk < (ee_start_lblk + ee_len))) {
+                       if (ext4_ext_is_unwritten(extent))
+                               split_flag = EXT4_EXT_MARK_UNWRIT1 |
+                                       EXT4_EXT_MARK_UNWRIT2;
+                       ret = ext4_split_extent_at(handle, inode, &path,
+                                       offset_lblk, split_flag,
+                                       EXT4_EX_NOCACHE |
+                                       EXT4_GET_BLOCKS_PRE_IO |
+                                       EXT4_GET_BLOCKS_METADATA_NOFAIL);
+               }
+
+               ext4_ext_drop_refs(path);
+               kfree(path);
+               if (ret < 0) {
+                       up_write(&EXT4_I(inode)->i_data_sem);
+                       goto out_stop;
+               }
+       }
+
+       ret = ext4_es_remove_extent(inode, offset_lblk,
+                       EXT_MAX_BLOCKS - offset_lblk);
+       if (ret) {
+               up_write(&EXT4_I(inode)->i_data_sem);
+               goto out_stop;
+       }
+
+       /*
+        * if offset_lblk lies in a hole which is at start of file, use
+        * ee_start_lblk to shift extents
+        */
+       ret = ext4_ext_shift_extents(inode, handle,
+               ee_start_lblk > offset_lblk ? ee_start_lblk : offset_lblk,
+               len_lblk, SHIFT_RIGHT);
+
+       up_write(&EXT4_I(inode)->i_data_sem);
+       if (IS_SYNC(inode))
+               ext4_handle_sync(handle);
+
+out_stop:
+       ext4_journal_stop(handle);
+out_dio:
+       ext4_inode_resume_unlocked_dio(inode);
+out_mutex:
+       mutex_unlock(&inode->i_mutex);
+       return ret;
+}
+
 /**
  * ext4_swap_extents - Swap extents between two inodes
  *
@@ -5542,7 +5775,7 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1,
        BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem));
        BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem));
        BUG_ON(!mutex_is_locked(&inode1->i_mutex));
-       BUG_ON(!mutex_is_locked(&inode1->i_mutex));
+       BUG_ON(!mutex_is_locked(&inode2->i_mutex));
 
        *erp = ext4_es_remove_extent(inode1, lblk1, count);
        if (unlikely(*erp))
index 0613c25..ac517f1 100644 (file)
@@ -223,9 +223,11 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
        struct inode *inode = file->f_mapping->host;
 
        if (ext4_encrypted_inode(inode)) {
-               int err = ext4_generate_encryption_key(inode);
+               int err = ext4_get_encryption_info(inode);
                if (err)
                        return 0;
+               if (ext4_encryption_info(inode) == NULL)
+                       return -ENOKEY;
        }
        file_accessed(file);
        if (IS_DAX(file_inode(file))) {
@@ -278,6 +280,13 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
                        ext4_journal_stop(handle);
                }
        }
+       if (ext4_encrypted_inode(inode)) {
+               ret = ext4_get_encryption_info(inode);
+               if (ret)
+                       return -EACCES;
+               if (ext4_encryption_info(inode) == NULL)
+                       return -ENOKEY;
+       }
        /*
         * Set up the jbd2_inode if we are opening the inode for
         * writing and the journal is present
@@ -287,13 +296,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
                if (ret < 0)
                        return ret;
        }
-       ret = dquot_file_open(inode, filp);
-       if (!ret && ext4_encrypted_inode(inode)) {
-               ret = ext4_generate_encryption_key(inode);
-               if (ret)
-                       ret = -EACCES;
-       }
-       return ret;
+       return dquot_file_open(inode, filp);
 }
 
 /*
index 1eaa6cb..173c1ae 100644 (file)
@@ -726,11 +726,25 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
        ext4_group_t i;
        ext4_group_t flex_group;
        struct ext4_group_info *grp;
+       int encrypt = 0;
 
        /* Cannot create files in a deleted directory */
        if (!dir || !dir->i_nlink)
                return ERR_PTR(-EPERM);
 
+       if ((ext4_encrypted_inode(dir) ||
+            DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb))) &&
+           (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
+               err = ext4_get_encryption_info(dir);
+               if (err)
+                       return ERR_PTR(err);
+               if (ext4_encryption_info(dir) == NULL)
+                       return ERR_PTR(-EPERM);
+               if (!handle)
+                       nblocks += EXT4_DATA_TRANS_BLOCKS(dir->i_sb);
+               encrypt = 1;
+       }
+
        sb = dir->i_sb;
        ngroups = ext4_get_groups_count(sb);
        trace_ext4_request_inode(dir, mode);
@@ -996,12 +1010,6 @@ got:
        ei->i_block_group = group;
        ei->i_last_alloc_group = ~0;
 
-       /* If the directory encrypted, then we should encrypt the inode. */
-       if ((S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) &&
-           (ext4_encrypted_inode(dir) ||
-            DUMMY_ENCRYPTION_ENABLED(sbi)))
-               ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
-
        ext4_set_inode_flags(inode);
        if (IS_DIRSYNC(inode))
                ext4_handle_sync(handle);
@@ -1034,28 +1042,9 @@ got:
        ext4_set_inode_state(inode, EXT4_STATE_NEW);
 
        ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
-       if ((sbi->s_file_encryption_mode == EXT4_ENCRYPTION_MODE_INVALID) &&
-           (sbi->s_dir_encryption_mode == EXT4_ENCRYPTION_MODE_INVALID)) {
-               ei->i_inline_off = 0;
-               if (EXT4_HAS_INCOMPAT_FEATURE(sb,
-                       EXT4_FEATURE_INCOMPAT_INLINE_DATA))
-                       ext4_set_inode_state(inode,
-                       EXT4_STATE_MAY_INLINE_DATA);
-       } else {
-               /* Inline data and encryption are incompatible
-                * We turn off inline data since encryption is enabled */
-               ei->i_inline_off = 1;
-               if (EXT4_HAS_INCOMPAT_FEATURE(sb,
-                       EXT4_FEATURE_INCOMPAT_INLINE_DATA))
-                       ext4_clear_inode_state(inode,
-                       EXT4_STATE_MAY_INLINE_DATA);
-       }
-#else
        ei->i_inline_off = 0;
        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_INLINE_DATA))
                ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
-#endif
        ret = inode;
        err = dquot_alloc_inode(inode);
        if (err)
@@ -1082,6 +1071,12 @@ got:
                ei->i_datasync_tid = handle->h_transaction->t_tid;
        }
 
+       if (encrypt) {
+               err = ext4_inherit_context(dir, inode);
+               if (err)
+                       goto fail_free_drop;
+       }
+
        err = ext4_mark_inode_dirty(handle, inode);
        if (err) {
                ext4_std_error(sb, err);
index 9588240..4f6ac49 100644 (file)
@@ -565,7 +565,7 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
                                       EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
                EXT4_ERROR_INODE(inode, "Can't allocate blocks for "
                                 "non-extent mapped inodes with bigalloc");
-               return -ENOSPC;
+               return -EUCLEAN;
        }
 
        /* Set up for the direct block allocation */
@@ -576,6 +576,8 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
                ar.flags = EXT4_MB_HINT_DATA;
        if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
                ar.flags |= EXT4_MB_DELALLOC_RESERVED;
+       if (flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
+               ar.flags |= EXT4_MB_USE_RESERVED;
 
        ar.goal = ext4_find_goal(inode, map->m_lblk, partial);
 
index 095c7a2..cd944a7 100644 (file)
@@ -995,20 +995,18 @@ void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh,
  * and -EEXIST if directory entry already exists.
  */
 static int ext4_add_dirent_to_inline(handle_t *handle,
+                                    struct ext4_filename *fname,
                                     struct dentry *dentry,
                                     struct inode *inode,
                                     struct ext4_iloc *iloc,
                                     void *inline_start, int inline_size)
 {
        struct inode    *dir = d_inode(dentry->d_parent);
-       const char      *name = dentry->d_name.name;
-       int             namelen = dentry->d_name.len;
        int             err;
        struct ext4_dir_entry_2 *de;
 
-       err = ext4_find_dest_de(dir, inode, iloc->bh,
-                               inline_start, inline_size,
-                               name, namelen, &de);
+       err = ext4_find_dest_de(dir, inode, iloc->bh, inline_start,
+                               inline_size, fname, &de);
        if (err)
                return err;
 
@@ -1016,8 +1014,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle,
        err = ext4_journal_get_write_access(handle, iloc->bh);
        if (err)
                return err;
-       ext4_insert_dentry(dir, inode, de, inline_size, &dentry->d_name,
-                          name, namelen);
+       ext4_insert_dentry(dir, inode, de, inline_size, fname);
 
        ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size);
 
@@ -1248,8 +1245,8 @@ out:
  * If succeeds, return 0. If not, extended the inline dir and copied data to
  * the new created block.
  */
-int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
-                             struct inode *inode)
+int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
+                             struct dentry *dentry, struct inode *inode)
 {
        int ret, inline_size;
        void *inline_start;
@@ -1268,7 +1265,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
                                                 EXT4_INLINE_DOTDOT_SIZE;
        inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;
 
-       ret = ext4_add_dirent_to_inline(handle, dentry, inode, &iloc,
+       ret = ext4_add_dirent_to_inline(handle, fname, dentry, inode, &iloc,
                                        inline_start, inline_size);
        if (ret != -ENOSPC)
                goto out;
@@ -1289,8 +1286,9 @@ int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
        if (inline_size) {
                inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
 
-               ret = ext4_add_dirent_to_inline(handle, dentry, inode, &iloc,
-                                               inline_start, inline_size);
+               ret = ext4_add_dirent_to_inline(handle, fname, dentry,
+                                               inode, &iloc, inline_start,
+                                               inline_size);
 
                if (ret != -ENOSPC)
                        goto out;
@@ -1611,6 +1609,7 @@ out:
 }
 
 struct buffer_head *ext4_find_inline_entry(struct inode *dir,
+                                       struct ext4_filename *fname,
                                        const struct qstr *d_name,
                                        struct ext4_dir_entry_2 **res_dir,
                                        int *has_inline_data)
@@ -1632,8 +1631,8 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir,
        inline_start = (void *)ext4_raw_inode(&iloc)->i_block +
                                                EXT4_INLINE_DOTDOT_SIZE;
        inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;
-       ret = search_dir(iloc.bh, inline_start, inline_size,
-                        dir, d_name, 0, res_dir);
+       ret = ext4_search_dir(iloc.bh, inline_start, inline_size,
+                             dir, fname, d_name, 0, res_dir);
        if (ret == 1)
                goto out_find;
        if (ret < 0)
@@ -1645,8 +1644,8 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir,
        inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
        inline_size = ext4_get_inline_size(dir) - EXT4_MIN_INLINE_DATA_SIZE;
 
-       ret = search_dir(iloc.bh, inline_start, inline_size,
-                        dir, d_name, 0, res_dir);
+       ret = ext4_search_dir(iloc.bh, inline_start, inline_size,
+                             dir, fname, d_name, 0, res_dir);
        if (ret == 1)
                goto out_find;
 
index 5168c9b..f8a8d4e 100644 (file)
@@ -731,18 +731,18 @@ int ext4_get_block(struct inode *inode, sector_t iblock,
  * `handle' can be NULL if create is zero
  */
 struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
-                               ext4_lblk_t block, int create)
+                               ext4_lblk_t block, int map_flags)
 {
        struct ext4_map_blocks map;
        struct buffer_head *bh;
+       int create = map_flags & EXT4_GET_BLOCKS_CREATE;
        int err;
 
        J_ASSERT(handle != NULL || create == 0);
 
        map.m_lblk = block;
        map.m_len = 1;
-       err = ext4_map_blocks(handle, inode, &map,
-                             create ? EXT4_GET_BLOCKS_CREATE : 0);
+       err = ext4_map_blocks(handle, inode, &map, map_flags);
 
        if (err == 0)
                return create ? ERR_PTR(-ENOSPC) : NULL;
@@ -788,11 +788,11 @@ errout:
 }
 
 struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
-                              ext4_lblk_t block, int create)
+                              ext4_lblk_t block, int map_flags)
 {
        struct buffer_head *bh;
 
-       bh = ext4_getblk(handle, inode, block, create);
+       bh = ext4_getblk(handle, inode, block, map_flags);
        if (IS_ERR(bh))
                return bh;
        if (!bh || buffer_uptodate(bh))
@@ -1261,13 +1261,12 @@ static int ext4_journalled_write_end(struct file *file,
 }
 
 /*
- * Reserve a single cluster located at lblock
+ * Reserve space for a single cluster
  */
-static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
+static int ext4_da_reserve_space(struct inode *inode)
 {
        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
        struct ext4_inode_info *ei = EXT4_I(inode);
-       unsigned int md_needed;
        int ret;
 
        /*
@@ -1279,25 +1278,14 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
        if (ret)
                return ret;
 
-       /*
-        * recalculate the amount of metadata blocks to reserve
-        * in order to allocate nrblocks
-        * worse case is one extent per block
-        */
        spin_lock(&ei->i_block_reservation_lock);
-       /*
-        * ext4_calc_metadata_amount() has side effects, which we have
-        * to be prepared undo if we fail to claim space.
-        */
-       md_needed = 0;
-       trace_ext4_da_reserve_space(inode, 0);
-
        if (ext4_claim_free_clusters(sbi, 1, 0)) {
                spin_unlock(&ei->i_block_reservation_lock);
                dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
                return -ENOSPC;
        }
        ei->i_reserved_data_blocks++;
+       trace_ext4_da_reserve_space(inode);
        spin_unlock(&ei->i_block_reservation_lock);
 
        return 0;       /* success */
@@ -1566,9 +1554,9 @@ add_delayed:
                 * then we don't need to reserve it again. However we still need
                 * to reserve metadata for every block we're going to write.
                 */
-               if (EXT4_SB(inode->i_sb)->s_cluster_ratio <= 1 ||
+               if (EXT4_SB(inode->i_sb)->s_cluster_ratio == 1 ||
                    !ext4_find_delalloc_cluster(inode, map->m_lblk)) {
-                       ret = ext4_da_reserve_space(inode, iblock);
+                       ret = ext4_da_reserve_space(inode);
                        if (ret) {
                                /* not enough space to reserve */
                                retval = ret;
@@ -1701,19 +1689,32 @@ static int __ext4_journalled_writepage(struct page *page,
                ext4_walk_page_buffers(handle, page_bufs, 0, len,
                                       NULL, bget_one);
        }
-       /* As soon as we unlock the page, it can go away, but we have
-        * references to buffers so we are safe */
+       /*
+        * We need to release the page lock before we start the
+        * journal, so grab a reference so the page won't disappear
+        * out from under us.
+        */
+       get_page(page);
        unlock_page(page);
 
        handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
                                    ext4_writepage_trans_blocks(inode));
        if (IS_ERR(handle)) {
                ret = PTR_ERR(handle);
-               goto out;
+               put_page(page);
+               goto out_no_pagelock;
        }
-
        BUG_ON(!ext4_handle_valid(handle));
 
+       lock_page(page);
+       put_page(page);
+       if (page->mapping != mapping) {
+               /* The page got truncated from under us */
+               ext4_journal_stop(handle);
+               ret = 0;
+               goto out;
+       }
+
        if (inline_data) {
                BUFFER_TRACE(inode_bh, "get write access");
                ret = ext4_journal_get_write_access(handle, inode_bh);
@@ -1739,6 +1740,8 @@ static int __ext4_journalled_writepage(struct page *page,
                                       NULL, bput_one);
        ext4_set_inode_state(inode, EXT4_STATE_JDATA);
 out:
+       unlock_page(page);
+out_no_pagelock:
        brelse(inode_bh);
        return ret;
 }
@@ -4681,8 +4684,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                ext4_journal_stop(handle);
        }
 
-       if (attr->ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) {
+       if (attr->ia_valid & ATTR_SIZE) {
                handle_t *handle;
+               loff_t oldsize = inode->i_size;
+               int shrink = (attr->ia_size <= inode->i_size);
 
                if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
                        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -4690,24 +4695,26 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                        if (attr->ia_size > sbi->s_bitmap_maxbytes)
                                return -EFBIG;
                }
+               if (!S_ISREG(inode->i_mode))
+                       return -EINVAL;
 
                if (IS_I_VERSION(inode) && attr->ia_size != inode->i_size)
                        inode_inc_iversion(inode);
 
-               if (S_ISREG(inode->i_mode) &&
+               if (ext4_should_order_data(inode) &&
                    (attr->ia_size < inode->i_size)) {
-                       if (ext4_should_order_data(inode)) {
-                               error = ext4_begin_ordered_truncate(inode,
+                       error = ext4_begin_ordered_truncate(inode,
                                                            attr->ia_size);
-                               if (error)
-                                       goto err_out;
-                       }
+                       if (error)
+                               goto err_out;
+               }
+               if (attr->ia_size != inode->i_size) {
                        handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
                        if (IS_ERR(handle)) {
                                error = PTR_ERR(handle);
                                goto err_out;
                        }
-                       if (ext4_handle_valid(handle)) {
+                       if (ext4_handle_valid(handle) && shrink) {
                                error = ext4_orphan_add(handle, inode);
                                orphan = 1;
                        }
@@ -4726,15 +4733,13 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                        up_write(&EXT4_I(inode)->i_data_sem);
                        ext4_journal_stop(handle);
                        if (error) {
-                               ext4_orphan_del(NULL, inode);
+                               if (orphan)
+                                       ext4_orphan_del(NULL, inode);
                                goto err_out;
                        }
-               } else {
-                       loff_t oldsize = inode->i_size;
-
-                       i_size_write(inode, attr->ia_size);
-                       pagecache_isize_extended(inode, oldsize, inode->i_size);
                }
+               if (!shrink)
+                       pagecache_isize_extended(inode, oldsize, inode->i_size);
 
                /*
                 * Blocks are going to be removed from the inode. Wait
@@ -4754,13 +4759,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                 * in data=journal mode to make pages freeable.
                 */
                truncate_pagecache(inode, inode->i_size);
+               if (shrink)
+                       ext4_truncate(inode);
        }
-       /*
-        * We want to call ext4_truncate() even if attr->ia_size ==
-        * inode->i_size for cases like truncation of fallocated space
-        */
-       if (attr->ia_valid & ATTR_SIZE)
-               ext4_truncate(inode);
 
        if (!rc) {
                setattr_copy(inode, attr);
index 2cb9e17..cb84512 100644 (file)
 static void memswap(void *a, void *b, size_t len)
 {
        unsigned char *ap, *bp;
-       unsigned char tmp;
 
        ap = (unsigned char *)a;
        bp = (unsigned char *)b;
        while (len-- > 0) {
-               tmp = *ap;
-               *ap = *bp;
-               *bp = tmp;
+               swap(*ap, *bp);
                ap++;
                bp++;
        }
@@ -675,8 +672,8 @@ encryption_policy_out:
                        if (err)
                                return err;
                }
-               if (copy_to_user((void *) arg, sbi->s_es->s_encrypt_pw_salt,
-                                16))
+               if (copy_to_user((void __user *) arg,
+                                sbi->s_es->s_encrypt_pw_salt, 16))
                        return -EFAULT;
                return 0;
        }
@@ -690,7 +687,7 @@ encryption_policy_out:
                err = ext4_get_policy(inode, &policy);
                if (err)
                        return err;
-               if (copy_to_user((void *)arg, &policy, sizeof(policy)))
+               if (copy_to_user((void __user *)arg, &policy, sizeof(policy)))
                        return -EFAULT;
                return 0;
 #else
index 8d1e602..1c535fa 100644 (file)
@@ -882,10 +882,8 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
 
        /* wait for I/O completion */
        for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
-               if (bh[i] && ext4_wait_block_bitmap(sb, group, bh[i])) {
+               if (bh[i] && ext4_wait_block_bitmap(sb, group, bh[i]))
                        err = -EIO;
-                       goto out;
-               }
        }
 
        first_block = page->index * blocks_per_page;
@@ -898,6 +896,11 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
                        /* skip initialized uptodate buddy */
                        continue;
 
+               if (!buffer_verified(bh[group - first_group]))
+                       /* Skip faulty bitmaps */
+                       continue;
+               err = 0;
+
                /*
                 * data carry information regarding this
                 * particular group in the format specified
@@ -2008,7 +2011,12 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
        }
 }
 
-/* This is now called BEFORE we load the buddy bitmap. */
+/*
+ * This is now called BEFORE we load the buddy bitmap.
+ * Returns either 1 or 0 indicating that the group is either suitable
+ * for the allocation or not. In addition it can also return negative
+ * error code when something goes wrong.
+ */
 static int ext4_mb_good_group(struct ext4_allocation_context *ac,
                                ext4_group_t group, int cr)
 {
@@ -2031,7 +2039,7 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
        if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
                int ret = ext4_mb_init_group(ac->ac_sb, group);
                if (ret)
-                       return 0;
+                       return ret;
        }
 
        fragments = grp->bb_fragments;
@@ -2078,7 +2086,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 {
        ext4_group_t ngroups, group, i;
        int cr;
-       int err = 0;
+       int err = 0, first_err = 0;
        struct ext4_sb_info *sbi;
        struct super_block *sb;
        struct ext4_buddy e4b;
@@ -2145,6 +2153,7 @@ repeat:
                group = ac->ac_g_ex.fe_group;
 
                for (i = 0; i < ngroups; group++, i++) {
+                       int ret = 0;
                        cond_resched();
                        /*
                         * Artificially restricted ngroups for non-extent
@@ -2154,8 +2163,12 @@ repeat:
                                group = 0;
 
                        /* This now checks without needing the buddy page */
-                       if (!ext4_mb_good_group(ac, group, cr))
+                       ret = ext4_mb_good_group(ac, group, cr);
+                       if (ret <= 0) {
+                               if (!first_err)
+                                       first_err = ret;
                                continue;
+                       }
 
                        err = ext4_mb_load_buddy(sb, group, &e4b);
                        if (err)
@@ -2167,9 +2180,12 @@ repeat:
                         * We need to check again after locking the
                         * block group
                         */
-                       if (!ext4_mb_good_group(ac, group, cr)) {
+                       ret = ext4_mb_good_group(ac, group, cr);
+                       if (ret <= 0) {
                                ext4_unlock_group(sb, group);
                                ext4_mb_unload_buddy(&e4b);
+                               if (!first_err)
+                                       first_err = ret;
                                continue;
                        }
 
@@ -2216,6 +2232,8 @@ repeat:
                }
        }
 out:
+       if (!err && ac->ac_status != AC_STATUS_FOUND && first_err)
+               err = first_err;
        return err;
 }
 
@@ -2257,12 +2275,9 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
 
        group--;
        if (group == 0)
-               seq_printf(seq, "#%-5s: %-5s %-5s %-5s "
-                               "[ %-5s %-5s %-5s %-5s %-5s %-5s %-5s "
-                                 "%-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
-                          "group", "free", "frags", "first",
-                          "2^0", "2^1", "2^2", "2^3", "2^4", "2^5", "2^6",
-                          "2^7", "2^8", "2^9", "2^10", "2^11", "2^12", "2^13");
+               seq_puts(seq, "#group: free  frags first ["
+                             " 2^0   2^1   2^2   2^3   2^4   2^5   2^6  "
+                             " 2^7   2^8   2^9   2^10  2^11  2^12  2^13  ]");
 
        i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
                sizeof(struct ext4_group_info);
index 370420b..fb6f117 100644 (file)
@@ -166,12 +166,9 @@ mext_page_double_lock(struct inode *inode1, struct inode *inode2,
         */
        wait_on_page_writeback(page[0]);
        wait_on_page_writeback(page[1]);
-       if (inode1 > inode2) {
-               struct page *tmp;
-               tmp = page[0];
-               page[0] = page[1];
-               page[1] = tmp;
-       }
+       if (inode1 > inode2)
+               swap(page[0], page[1]);
+
        return 0;
 }
 
@@ -574,12 +571,16 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
                        orig_inode->i_ino, donor_inode->i_ino);
                return -EINVAL;
        }
-       /* TODO: This is non obvious task to swap blocks for inodes with full
-          jornaling enabled */
+
+       /* TODO: it's not obvious how to swap blocks for inodes with full
+          journaling enabled */
        if (ext4_should_journal_data(orig_inode) ||
            ext4_should_journal_data(donor_inode)) {
-               return -EINVAL;
+               ext4_msg(orig_inode->i_sb, KERN_ERR,
+                        "Online defrag not supported with data journaling");
+               return -EOPNOTSUPP;
        }
+
        /* Protect orig and donor inodes against a truncate */
        lock_two_nondirectories(orig_inode, donor_inode);
 
index 5fdb9f6..011dcfb 100644 (file)
@@ -61,7 +61,7 @@ static struct buffer_head *ext4_append(handle_t *handle,
 
        *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
 
-       bh = ext4_bread(handle, inode, *block, 1);
+       bh = ext4_bread(handle, inode, *block, EXT4_GET_BLOCKS_CREATE);
        if (IS_ERR(bh))
                return bh;
        inode->i_size += inode->i_sb->s_blocksize;
@@ -84,12 +84,13 @@ typedef enum {
 } dirblock_type_t;
 
 #define ext4_read_dirblock(inode, block, type) \
-       __ext4_read_dirblock((inode), (block), (type), __LINE__)
+       __ext4_read_dirblock((inode), (block), (type), __func__, __LINE__)
 
 static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
-                                             ext4_lblk_t block,
-                                             dirblock_type_t type,
-                                             unsigned int line)
+                                               ext4_lblk_t block,
+                                               dirblock_type_t type,
+                                               const char *func,
+                                               unsigned int line)
 {
        struct buffer_head *bh;
        struct ext4_dir_entry *dirent;
@@ -97,15 +98,17 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
 
        bh = ext4_bread(NULL, inode, block, 0);
        if (IS_ERR(bh)) {
-               __ext4_warning(inode->i_sb, __func__, line,
-                              "error %ld reading directory block "
-                              "(ino %lu, block %lu)", PTR_ERR(bh), inode->i_ino,
-                              (unsigned long) block);
+               __ext4_warning(inode->i_sb, func, line,
+                              "inode #%lu: lblock %lu: comm %s: "
+                              "error %ld reading directory block",
+                              inode->i_ino, (unsigned long)block,
+                              current->comm, PTR_ERR(bh));
 
                return bh;
        }
        if (!bh) {
-               ext4_error_inode(inode, __func__, line, block, "Directory hole found");
+               ext4_error_inode(inode, func, line, block,
+                                "Directory hole found");
                return ERR_PTR(-EIO);
        }
        dirent = (struct ext4_dir_entry *) bh->b_data;
@@ -119,7 +122,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
                        is_dx_block = 1;
        }
        if (!is_dx_block && type == INDEX) {
-               ext4_error_inode(inode, __func__, line, block,
+               ext4_error_inode(inode, func, line, block,
                       "directory leaf block found instead of index block");
                return ERR_PTR(-EIO);
        }
@@ -136,8 +139,8 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
                if (ext4_dx_csum_verify(inode, dirent))
                        set_buffer_verified(bh);
                else {
-                       ext4_error_inode(inode, __func__, line, block,
-                               "Directory index failed checksum");
+                       ext4_error_inode(inode, func, line, block,
+                                        "Directory index failed checksum");
                        brelse(bh);
                        return ERR_PTR(-EIO);
                }
@@ -146,8 +149,8 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
                if (ext4_dirent_csum_verify(inode, dirent))
                        set_buffer_verified(bh);
                else {
-                       ext4_error_inode(inode, __func__, line, block,
-                               "Directory block failed checksum");
+                       ext4_error_inode(inode, func, line, block,
+                                        "Directory block failed checksum");
                        brelse(bh);
                        return ERR_PTR(-EIO);
                }
@@ -248,7 +251,7 @@ static void dx_set_count(struct dx_entry *entries, unsigned value);
 static void dx_set_limit(struct dx_entry *entries, unsigned value);
 static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
 static unsigned dx_node_limit(struct inode *dir);
-static struct dx_frame *dx_probe(const struct qstr *d_name,
+static struct dx_frame *dx_probe(struct ext4_filename *fname,
                                 struct inode *dir,
                                 struct dx_hash_info *hinfo,
                                 struct dx_frame *frame);
@@ -267,10 +270,10 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
                                 struct dx_frame *frames,
                                 __u32 *start_hash);
 static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
-               const struct qstr *d_name,
+               struct ext4_filename *fname,
                struct ext4_dir_entry_2 **res_dir);
-static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
-                            struct inode *inode);
+static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
+                            struct dentry *dentry, struct inode *inode);
 
 /* checksumming functions */
 void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
@@ -327,10 +330,14 @@ static __le32 ext4_dirent_csum(struct inode *inode,
        return cpu_to_le32(csum);
 }
 
-static void warn_no_space_for_csum(struct inode *inode)
+#define warn_no_space_for_csum(inode)                                  \
+       __warn_no_space_for_csum((inode), __func__, __LINE__)
+
+static void __warn_no_space_for_csum(struct inode *inode, const char *func,
+                                    unsigned int line)
 {
-       ext4_warning(inode->i_sb, "no space in directory inode %lu leaf for "
-                    "checksum.  Please run e2fsck -D.", inode->i_ino);
+       __ext4_warning_inode(inode, func, line,
+               "No space for directory leaf checksum. Please run e2fsck -D.");
 }
 
 int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent)
@@ -607,17 +614,15 @@ static struct stats dx_show_leaf(struct inode *dir,
                                char *name;
                                struct ext4_str fname_crypto_str
                                        = {.name = NULL, .len = 0};
-                               struct ext4_fname_crypto_ctx *ctx = NULL;
-                               int res;
+                               int res = 0;
 
                                name  = de->name;
                                len = de->name_len;
-                               ctx = ext4_get_fname_crypto_ctx(dir,
-                                                               EXT4_NAME_LEN);
-                               if (IS_ERR(ctx)) {
-                                       printk(KERN_WARNING "Error acquiring"
-                                       " crypto ctxt--skipping crypto\n");
-                                       ctx = NULL;
+                               if (ext4_encrypted_inode(inode))
+                                       res = ext4_get_encryption_info(dir);
+                               if (res) {
+                                       printk(KERN_WARNING "Error setting up"
+                                              " fname crypto: %d\n", res);
                                }
                                if (ctx == NULL) {
                                        /* Directory is not encrypted */
@@ -637,7 +642,6 @@ static struct stats dx_show_leaf(struct inode *dir,
                                                        "allocating crypto "
                                                        "buffer--skipping "
                                                        "crypto\n");
-                                               ext4_put_fname_crypto_ctx(&ctx);
                                                ctx = NULL;
                                        }
                                        res = ext4_fname_disk_to_usr(ctx, NULL, de,
@@ -658,7 +662,6 @@ static struct stats dx_show_leaf(struct inode *dir,
                                        printk("%*.s:(E)%x.%u ", len, name,
                                               h.hash, (unsigned) ((char *) de
                                                                   - base));
-                                       ext4_put_fname_crypto_ctx(&ctx);
                                        ext4_fname_crypto_free_buffer(
                                                &fname_crypto_str);
                                }
@@ -724,7 +727,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
  * back to userspace.
  */
 static struct dx_frame *
-dx_probe(const struct qstr *d_name, struct inode *dir,
+dx_probe(struct ext4_filename *fname, struct inode *dir,
         struct dx_hash_info *hinfo, struct dx_frame *frame_in)
 {
        unsigned count, indirect;
@@ -742,56 +745,41 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
        if (root->info.hash_version != DX_HASH_TEA &&
            root->info.hash_version != DX_HASH_HALF_MD4 &&
            root->info.hash_version != DX_HASH_LEGACY) {
-               ext4_warning(dir->i_sb, "Unrecognised inode hash code %d",
-                            root->info.hash_version);
+               ext4_warning_inode(dir, "Unrecognised inode hash code %u",
+                                  root->info.hash_version);
                goto fail;
        }
+       if (fname)
+               hinfo = &fname->hinfo;
        hinfo->hash_version = root->info.hash_version;
        if (hinfo->hash_version <= DX_HASH_TEA)
                hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
        hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
-       if (d_name) {
-               struct ext4_fname_crypto_ctx *ctx = NULL;
-               int res;
-
-               /* Check if the directory is encrypted */
-               ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN);
-               if (IS_ERR(ctx)) {
-                       ret_err = ERR_PTR(PTR_ERR(ctx));
-                       goto fail;
-               }
-               res = ext4_fname_usr_to_hash(ctx, d_name, hinfo);
-               if (res < 0) {
-                       ret_err = ERR_PTR(res);
-                       goto fail;
-               }
-               ext4_put_fname_crypto_ctx(&ctx);
-       }
-#else
-       if (d_name)
-               ext4fs_dirhash(d_name->name, d_name->len, hinfo);
-#endif
+       if (fname && fname_name(fname))
+               ext4fs_dirhash(fname_name(fname), fname_len(fname), hinfo);
        hash = hinfo->hash;
 
        if (root->info.unused_flags & 1) {
-               ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x",
-                            root->info.unused_flags);
+               ext4_warning_inode(dir, "Unimplemented hash flags: %#06x",
+                                  root->info.unused_flags);
                goto fail;
        }
 
-       if ((indirect = root->info.indirect_levels) > 1) {
-               ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x",
-                            root->info.indirect_levels);
+       indirect = root->info.indirect_levels;
+       if (indirect > 1) {
+               ext4_warning_inode(dir, "Unimplemented hash depth: %#06x",
+                                  root->info.indirect_levels);
                goto fail;
        }
 
-       entries = (struct dx_entry *) (((char *)&root->info) +
-                                      root->info.info_length);
+       entries = (struct dx_entry *)(((char *)&root->info) +
+                                     root->info.info_length);
 
        if (dx_get_limit(entries) != dx_root_limit(dir,
                                                   root->info.info_length)) {
-               ext4_warning(dir->i_sb, "dx entry: limit != root limit");
+               ext4_warning_inode(dir, "dx entry: limit %u != root limit %u",
+                                  dx_get_limit(entries),
+                                  dx_root_limit(dir, root->info.info_length));
                goto fail;
        }
 
@@ -799,15 +787,16 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
        while (1) {
                count = dx_get_count(entries);
                if (!count || count > dx_get_limit(entries)) {
-                       ext4_warning(dir->i_sb,
-                                    "dx entry: no count or count > limit");
+                       ext4_warning_inode(dir,
+                                          "dx entry: count %u beyond limit %u",
+                                          count, dx_get_limit(entries));
                        goto fail;
                }
 
                p = entries + 1;
                q = entries + count - 1;
                while (p <= q) {
-                       m = p + (q - p)/2;
+                       m = p + (q - p) / 2;
                        dxtrace(printk("."));
                        if (dx_get_hash(m) > hash)
                                q = m - 1;
@@ -831,7 +820,8 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
                }
 
                at = p - 1;
-               dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
+               dxtrace(printk(" %x->%u\n", at == entries ? 0 : dx_get_hash(at),
+                              dx_get_block(at)));
                frame->entries = entries;
                frame->at = at;
                if (!indirect--)
@@ -845,9 +835,10 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
                }
                entries = ((struct dx_node *) frame->bh->b_data)->entries;
 
-               if (dx_get_limit(entries) != dx_node_limit (dir)) {
-                       ext4_warning(dir->i_sb,
-                                    "dx entry: limit != node limit");
+               if (dx_get_limit(entries) != dx_node_limit(dir)) {
+                       ext4_warning_inode(dir,
+                               "dx entry: limit %u != node limit %u",
+                               dx_get_limit(entries), dx_node_limit(dir));
                        goto fail;
                }
        }
@@ -858,18 +849,17 @@ fail:
        }
 
        if (ret_err == ERR_PTR(ERR_BAD_DX_DIR))
-               ext4_warning(dir->i_sb,
-                            "Corrupt dir inode %lu, running e2fsck is "
-                            "recommended.", dir->i_ino);
+               ext4_warning_inode(dir,
+                       "Corrupt directory, running e2fsck is recommended");
        return ret_err;
 }
 
-static void dx_release (struct dx_frame *frames)
+static void dx_release(struct dx_frame *frames)
 {
        if (frames[0].bh == NULL)
                return;
 
-       if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
+       if (((struct dx_root *)frames[0].bh->b_data)->info.indirect_levels)
                brelse(frames[1].bh);
        brelse(frames[0].bh);
 }
@@ -962,7 +952,6 @@ static int htree_dirblock_to_tree(struct file *dir_file,
        struct buffer_head *bh;
        struct ext4_dir_entry_2 *de, *top;
        int err = 0, count = 0;
-       struct ext4_fname_crypto_ctx *ctx = NULL;
        struct ext4_str fname_crypto_str = {.name = NULL, .len = 0}, tmp_str;
 
        dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
@@ -977,17 +966,15 @@ static int htree_dirblock_to_tree(struct file *dir_file,
                                           EXT4_DIR_REC_LEN(0));
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
        /* Check if the directory is encrypted */
-       ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN);
-       if (IS_ERR(ctx)) {
-               err = PTR_ERR(ctx);
-               brelse(bh);
-               return err;
-       }
-       if (ctx != NULL) {
-               err = ext4_fname_crypto_alloc_buffer(ctx, EXT4_NAME_LEN,
+       if (ext4_encrypted_inode(dir)) {
+               err = ext4_get_encryption_info(dir);
+               if (err < 0) {
+                       brelse(bh);
+                       return err;
+               }
+               err = ext4_fname_crypto_alloc_buffer(dir, EXT4_NAME_LEN,
                                                     &fname_crypto_str);
                if (err < 0) {
-                       ext4_put_fname_crypto_ctx(&ctx);
                        brelse(bh);
                        return err;
                }
@@ -1008,16 +995,17 @@ static int htree_dirblock_to_tree(struct file *dir_file,
                        continue;
                if (de->inode == 0)
                        continue;
-               if (ctx == NULL) {
-                       /* Directory is not encrypted */
+               if (!ext4_encrypted_inode(dir)) {
                        tmp_str.name = de->name;
                        tmp_str.len = de->name_len;
                        err = ext4_htree_store_dirent(dir_file,
                                   hinfo->hash, hinfo->minor_hash, de,
                                   &tmp_str);
                } else {
+                       int save_len = fname_crypto_str.len;
+
                        /* Directory is encrypted */
-                       err = ext4_fname_disk_to_usr(ctx, hinfo, de,
+                       err = ext4_fname_disk_to_usr(dir, hinfo, de,
                                                     &fname_crypto_str);
                        if (err < 0) {
                                count = err;
@@ -1026,6 +1014,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
                        err = ext4_htree_store_dirent(dir_file,
                                   hinfo->hash, hinfo->minor_hash, de,
                                        &fname_crypto_str);
+                       fname_crypto_str.len = save_len;
                }
                if (err != 0) {
                        count = err;
@@ -1036,7 +1025,6 @@ static int htree_dirblock_to_tree(struct file *dir_file,
 errout:
        brelse(bh);
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
-       ext4_put_fname_crypto_ctx(&ctx);
        ext4_fname_crypto_free_buffer(&fname_crypto_str);
 #endif
        return count;
@@ -1155,12 +1143,13 @@ errout:
 
 static inline int search_dirblock(struct buffer_head *bh,
                                  struct inode *dir,
+                                 struct ext4_filename *fname,
                                  const struct qstr *d_name,
                                  unsigned int offset,
                                  struct ext4_dir_entry_2 **res_dir)
 {
-       return search_dir(bh, bh->b_data, dir->i_sb->s_blocksize, dir,
-                         d_name, offset, res_dir);
+       return ext4_search_dir(bh, bh->b_data, dir->i_sb->s_blocksize, dir,
+                              fname, d_name, offset, res_dir);
 }
 
 /*
@@ -1242,54 +1231,54 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
  * `len <= EXT4_NAME_LEN' is guaranteed by caller.
  * `de != NULL' is guaranteed by caller.
  */
-static inline int ext4_match(struct ext4_fname_crypto_ctx *ctx,
-                            struct ext4_str *fname_crypto_str,
-                            int len, const char * const name,
+static inline int ext4_match(struct ext4_filename *fname,
                             struct ext4_dir_entry_2 *de)
 {
-       int res;
+       const void *name = fname_name(fname);
+       u32 len = fname_len(fname);
 
        if (!de->inode)
                return 0;
 
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
-       if (ctx)
-               return ext4_fname_match(ctx, fname_crypto_str, len, name, de);
+       if (unlikely(!name)) {
+               if (fname->usr_fname->name[0] == '_') {
+                       int ret;
+                       if (de->name_len < 16)
+                               return 0;
+                       ret = memcmp(de->name + de->name_len - 16,
+                                    fname->crypto_buf.name + 8, 16);
+                       return (ret == 0) ? 1 : 0;
+               }
+               name = fname->crypto_buf.name;
+               len = fname->crypto_buf.len;
+       }
 #endif
-       if (len != de->name_len)
+       if (de->name_len != len)
                return 0;
-       res = memcmp(name, de->name, len);
-       return (res == 0) ? 1 : 0;
+       return (memcmp(de->name, name, len) == 0) ? 1 : 0;
 }
 
 /*
  * Returns 0 if not found, -1 on failure, and 1 on success
  */
-int search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
-              struct inode *dir, const struct qstr *d_name,
-              unsigned int offset, struct ext4_dir_entry_2 **res_dir)
+int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
+                   struct inode *dir, struct ext4_filename *fname,
+                   const struct qstr *d_name,
+                   unsigned int offset, struct ext4_dir_entry_2 **res_dir)
 {
        struct ext4_dir_entry_2 * de;
        char * dlimit;
        int de_len;
-       const char *name = d_name->name;
-       int namelen = d_name->len;
-       struct ext4_fname_crypto_ctx *ctx = NULL;
-       struct ext4_str fname_crypto_str = {.name = NULL, .len = 0};
        int res;
 
-       ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN);
-       if (IS_ERR(ctx))
-               return -1;
-
        de = (struct ext4_dir_entry_2 *)search_buf;
        dlimit = search_buf + buf_size;
        while ((char *) de < dlimit) {
                /* this code is executed quadratically often */
                /* do minimal checking `by hand' */
                if ((char *) de + de->name_len <= dlimit) {
-                       res = ext4_match(ctx, &fname_crypto_str, namelen,
-                                        name, de);
+                       res = ext4_match(fname, de);
                        if (res < 0) {
                                res = -1;
                                goto return_result;
@@ -1322,8 +1311,6 @@ int search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
 
        res = 0;
 return_result:
-       ext4_put_fname_crypto_ctx(&ctx);
-       ext4_fname_crypto_free_buffer(&fname_crypto_str);
        return res;
 }
 
@@ -1370,7 +1357,8 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
                                   buffer */
        int num = 0;
        ext4_lblk_t  nblocks;
-       int i, namelen;
+       int i, namelen, retval;
+       struct ext4_filename fname;
 
        *res_dir = NULL;
        sb = dir->i_sb;
@@ -1378,14 +1366,18 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
        if (namelen > EXT4_NAME_LEN)
                return NULL;
 
+       retval = ext4_fname_setup_filename(dir, d_name, 1, &fname);
+       if (retval)
+               return ERR_PTR(retval);
+
        if (ext4_has_inline_data(dir)) {
                int has_inline_data = 1;
-               ret = ext4_find_inline_entry(dir, d_name, res_dir,
+               ret = ext4_find_inline_entry(dir, &fname, d_name, res_dir,
                                             &has_inline_data);
                if (has_inline_data) {
                        if (inlined)
                                *inlined = 1;
-                       return ret;
+                       goto cleanup_and_exit;
                }
        }
 
@@ -1400,14 +1392,14 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
                goto restart;
        }
        if (is_dx(dir)) {
-               bh = ext4_dx_find_entry(dir, d_name, res_dir);
+               ret = ext4_dx_find_entry(dir, &fname, res_dir);
                /*
                 * On success, or if the error was file not found,
                 * return.  Otherwise, fall back to doing a search the
                 * old fashioned way.
                 */
-               if (!IS_ERR(bh) || PTR_ERR(bh) != ERR_BAD_DX_DIR)
-                       return bh;
+               if (!IS_ERR(ret) || PTR_ERR(ret) != ERR_BAD_DX_DIR)
+                       goto cleanup_and_exit;
                dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
                               "falling back\n"));
        }
@@ -1438,8 +1430,10 @@ restart:
                                num++;
                                bh = ext4_getblk(NULL, dir, b++, 0);
                                if (unlikely(IS_ERR(bh))) {
-                                       if (ra_max == 0)
-                                               return bh;
+                                       if (ra_max == 0) {
+                                               ret = bh;
+                                               goto cleanup_and_exit;
+                                       }
                                        break;
                                }
                                bh_use[ra_max] = bh;
@@ -1469,7 +1463,7 @@ restart:
                        goto next;
                }
                set_buffer_verified(bh);
-               i = search_dirblock(bh, dir, d_name,
+               i = search_dirblock(bh, dir, &fname, d_name,
                            block << EXT4_BLOCK_SIZE_BITS(sb), res_dir);
                if (i == 1) {
                        EXT4_I(dir)->i_dir_start_lookup = block;
@@ -1500,15 +1494,17 @@ cleanup_and_exit:
        /* Clean up the read-ahead blocks */
        for (; ra_ptr < ra_max; ra_ptr++)
                brelse(bh_use[ra_ptr]);
+       ext4_fname_free_filename(&fname);
        return ret;
 }
 
-static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
-                      struct ext4_dir_entry_2 **res_dir)
+static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
+                       struct ext4_filename *fname,
+                       struct ext4_dir_entry_2 **res_dir)
 {
        struct super_block * sb = dir->i_sb;
-       struct dx_hash_info     hinfo;
        struct dx_frame frames[2], *frame;
+       const struct qstr *d_name = fname->usr_fname;
        struct buffer_head *bh;
        ext4_lblk_t block;
        int retval;
@@ -1516,7 +1512,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
        *res_dir = NULL;
 #endif
-       frame = dx_probe(d_name, dir, &hinfo, frames);
+       frame = dx_probe(fname, dir, NULL, frames);
        if (IS_ERR(frame))
                return (struct buffer_head *) frame;
        do {
@@ -1525,7 +1521,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
                if (IS_ERR(bh))
                        goto errout;
 
-               retval = search_dirblock(bh, dir, d_name,
+               retval = search_dirblock(bh, dir, fname, d_name,
                                         block << EXT4_BLOCK_SIZE_BITS(sb),
                                         res_dir);
                if (retval == 1)
@@ -1537,12 +1533,12 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
                }
 
                /* Check to see if we should continue to search */
-               retval = ext4_htree_next_block(dir, hinfo.hash, frame,
+               retval = ext4_htree_next_block(dir, fname->hinfo.hash, frame,
                                               frames, NULL);
                if (retval < 0) {
-                       ext4_warning(sb,
-                            "error %d reading index page in directory #%lu",
-                            retval, dir->i_ino);
+                       ext4_warning_inode(dir,
+                               "error %d reading directory index block",
+                               retval);
                        bh = ERR_PTR(retval);
                        goto errout;
                }
@@ -1796,32 +1792,16 @@ journal_error:
 int ext4_find_dest_de(struct inode *dir, struct inode *inode,
                      struct buffer_head *bh,
                      void *buf, int buf_size,
-                     const char *name, int namelen,
+                     struct ext4_filename *fname,
                      struct ext4_dir_entry_2 **dest_de)
 {
        struct ext4_dir_entry_2 *de;
-       unsigned short reclen = EXT4_DIR_REC_LEN(namelen);
+       unsigned short reclen = EXT4_DIR_REC_LEN(fname_len(fname));
        int nlen, rlen;
        unsigned int offset = 0;
        char *top;
-       struct ext4_fname_crypto_ctx *ctx = NULL;
-       struct ext4_str fname_crypto_str = {.name = NULL, .len = 0};
        int res;
 
-       ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN);
-       if (IS_ERR(ctx))
-               return -1;
-
-       if (ctx != NULL) {
-               /* Calculate record length needed to store the entry */
-               res = ext4_fname_crypto_namelen_on_disk(ctx, namelen);
-               if (res < 0) {
-                       ext4_put_fname_crypto_ctx(&ctx);
-                       return res;
-               }
-               reclen = EXT4_DIR_REC_LEN(res);
-       }
-
        de = (struct ext4_dir_entry_2 *)buf;
        top = buf + buf_size - reclen;
        while ((char *) de <= top) {
@@ -1831,7 +1811,7 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
                        goto return_result;
                }
                /* Provide crypto context and crypto buffer to ext4 match */
-               res = ext4_match(ctx, &fname_crypto_str, namelen, name, de);
+               res = ext4_match(fname, de);
                if (res < 0)
                        goto return_result;
                if (res > 0) {
@@ -1853,8 +1833,6 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
                res = 0;
        }
 return_result:
-       ext4_put_fname_crypto_ctx(&ctx);
-       ext4_fname_crypto_free_buffer(&fname_crypto_str);
        return res;
 }
 
@@ -1862,39 +1840,10 @@ int ext4_insert_dentry(struct inode *dir,
                       struct inode *inode,
                       struct ext4_dir_entry_2 *de,
                       int buf_size,
-                      const struct qstr *iname,
-                      const char *name, int namelen)
+                      struct ext4_filename *fname)
 {
 
        int nlen, rlen;
-       struct ext4_fname_crypto_ctx *ctx = NULL;
-       struct ext4_str fname_crypto_str = {.name = NULL, .len = 0};
-       struct ext4_str tmp_str;
-       int res;
-
-       ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN);
-       if (IS_ERR(ctx))
-               return -EIO;
-       /* By default, the input name would be written to the disk */
-       tmp_str.name = (unsigned char *)name;
-       tmp_str.len = namelen;
-       if (ctx != NULL) {
-               /* Directory is encrypted */
-               res = ext4_fname_crypto_alloc_buffer(ctx, EXT4_NAME_LEN,
-                                                    &fname_crypto_str);
-               if (res < 0) {
-                       ext4_put_fname_crypto_ctx(&ctx);
-                       return -ENOMEM;
-               }
-               res = ext4_fname_usr_to_disk(ctx, iname, &fname_crypto_str);
-               if (res < 0) {
-                       ext4_put_fname_crypto_ctx(&ctx);
-                       ext4_fname_crypto_free_buffer(&fname_crypto_str);
-                       return res;
-               }
-               tmp_str.name = fname_crypto_str.name;
-               tmp_str.len = fname_crypto_str.len;
-       }
 
        nlen = EXT4_DIR_REC_LEN(de->name_len);
        rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
@@ -1908,11 +1857,8 @@ int ext4_insert_dentry(struct inode *dir,
        de->file_type = EXT4_FT_UNKNOWN;
        de->inode = cpu_to_le32(inode->i_ino);
        ext4_set_de_type(inode->i_sb, de, inode->i_mode);
-       de->name_len = tmp_str.len;
-
-       memcpy(de->name, tmp_str.name, tmp_str.len);
-       ext4_put_fname_crypto_ctx(&ctx);
-       ext4_fname_crypto_free_buffer(&fname_crypto_str);
+       de->name_len = fname_len(fname);
+       memcpy(de->name, fname_name(fname), fname_len(fname));
        return 0;
 }
 
@@ -1924,13 +1870,11 @@ int ext4_insert_dentry(struct inode *dir,
  * space.  It will return -ENOSPC if no space is available, and -EIO
  * and -EEXIST if directory entry already exists.
  */
-static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
+static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
+                            struct inode *dir,
                             struct inode *inode, struct ext4_dir_entry_2 *de,
                             struct buffer_head *bh)
 {
-       struct inode    *dir = d_inode(dentry->d_parent);
-       const char      *name = dentry->d_name.name;
-       int             namelen = dentry->d_name.len;
        unsigned int    blocksize = dir->i_sb->s_blocksize;
        int             csum_size = 0;
        int             err;
@@ -1939,9 +1883,8 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
                csum_size = sizeof(struct ext4_dir_entry_tail);
 
        if (!de) {
-               err = ext4_find_dest_de(dir, inode,
-                                       bh, bh->b_data, blocksize - csum_size,
-                                       name, namelen, &de);
+               err = ext4_find_dest_de(dir, inode, bh, bh->b_data,
+                                       blocksize - csum_size, fname, &de);
                if (err)
                        return err;
        }
@@ -1954,8 +1897,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
 
        /* By now the buffer is marked for journaling. Due to crypto operations,
         * the following function call may fail */
-       err = ext4_insert_dentry(dir, inode, de, blocksize, &dentry->d_name,
-                                name, namelen);
+       err = ext4_insert_dentry(dir, inode, de, blocksize, fname);
        if (err < 0)
                return err;
 
@@ -1985,17 +1927,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
  * This converts a one block unindexed directory to a 3 block indexed
  * directory, and adds the dentry to the indexed directory.
  */
-static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
+static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
+                           struct dentry *dentry,
                            struct inode *inode, struct buffer_head *bh)
 {
        struct inode    *dir = d_inode(dentry->d_parent);
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
-       struct ext4_fname_crypto_ctx *ctx = NULL;
-       int res;
-#else
-       const char      *name = dentry->d_name.name;
-       int             namelen = dentry->d_name.len;
-#endif
        struct buffer_head *bh2;
        struct dx_root  *root;
        struct dx_frame frames[2], *frame;
@@ -2006,17 +1942,10 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
        unsigned        len;
        int             retval;
        unsigned        blocksize;
-       struct dx_hash_info hinfo;
        ext4_lblk_t  block;
        struct fake_dirent *fde;
        int csum_size = 0;
 
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
-       ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN);
-       if (IS_ERR(ctx))
-               return PTR_ERR(ctx);
-#endif
-
        if (ext4_has_metadata_csum(inode->i_sb))
                csum_size = sizeof(struct ext4_dir_entry_tail);
 
@@ -2078,22 +2007,12 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
        dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info)));
 
        /* Initialize as for dx_probe */
-       hinfo.hash_version = root->info.hash_version;
-       if (hinfo.hash_version <= DX_HASH_TEA)
-               hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
-       hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
-       res = ext4_fname_usr_to_hash(ctx, &dentry->d_name, &hinfo);
-       if (res < 0) {
-               ext4_put_fname_crypto_ctx(&ctx);
-               ext4_mark_inode_dirty(handle, dir);
-               brelse(bh);
-               return res;
-       }
-       ext4_put_fname_crypto_ctx(&ctx);
-#else
-       ext4fs_dirhash(name, namelen, &hinfo);
-#endif
+       fname->hinfo.hash_version = root->info.hash_version;
+       if (fname->hinfo.hash_version <= DX_HASH_TEA)
+               fname->hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
+       fname->hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
+       ext4fs_dirhash(fname_name(fname), fname_len(fname), &fname->hinfo);
+
        memset(frames, 0, sizeof(frames));
        frame = frames;
        frame->entries = entries;
@@ -2108,14 +2027,14 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
        if (retval)
                goto out_frames;        
 
-       de = do_split(handle,dir, &bh, frame, &hinfo);
+       de = do_split(handle,dir, &bh, frame, &fname->hinfo);
        if (IS_ERR(de)) {
                retval = PTR_ERR(de);
                goto out_frames;
        }
        dx_release(frames);
 
-       retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
+       retval = add_dirent_to_buf(handle, fname, dir, inode, de, bh);
        brelse(bh);
        return retval;
 out_frames:
@@ -2147,6 +2066,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
        struct ext4_dir_entry_2 *de;
        struct ext4_dir_entry_tail *t;
        struct super_block *sb;
+       struct ext4_filename fname;
        int     retval;
        int     dx_fallback=0;
        unsigned blocksize;
@@ -2161,10 +2081,15 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
        if (!dentry->d_name.len)
                return -EINVAL;
 
+       retval = ext4_fname_setup_filename(dir, &dentry->d_name, 0, &fname);
+       if (retval)
+               return retval;
+
        if (ext4_has_inline_data(dir)) {
-               retval = ext4_try_add_inline_entry(handle, dentry, inode);
+               retval = ext4_try_add_inline_entry(handle, &fname,
+                                                  dentry, inode);
                if (retval < 0)
-                       return retval;
+                       goto out;
                if (retval == 1) {
                        retval = 0;
                        goto out;
@@ -2172,7 +2097,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
        }
 
        if (is_dx(dir)) {
-               retval = ext4_dx_add_entry(handle, dentry, inode);
+               retval = ext4_dx_add_entry(handle, &fname, dentry, inode);
                if (!retval || (retval != ERR_BAD_DX_DIR))
                        goto out;
                ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
@@ -2182,24 +2107,31 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
        blocks = dir->i_size >> sb->s_blocksize_bits;
        for (block = 0; block < blocks; block++) {
                bh = ext4_read_dirblock(dir, block, DIRENT);
-               if (IS_ERR(bh))
-                       return PTR_ERR(bh);
-
-               retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
+               if (IS_ERR(bh)) {
+                       retval = PTR_ERR(bh);
+                       bh = NULL;
+                       goto out;
+               }
+               retval = add_dirent_to_buf(handle, &fname, dir, inode,
+                                          NULL, bh);
                if (retval != -ENOSPC)
                        goto out;
 
                if (blocks == 1 && !dx_fallback &&
                    EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) {
-                       retval = make_indexed_dir(handle, dentry, inode, bh);
+                       retval = make_indexed_dir(handle, &fname, dentry,
+                                                 inode, bh);
                        bh = NULL; /* make_indexed_dir releases bh */
                        goto out;
                }
                brelse(bh);
        }
        bh = ext4_append(handle, dir, &block);
-       if (IS_ERR(bh))
-               return PTR_ERR(bh);
+       if (IS_ERR(bh)) {
+               retval = PTR_ERR(bh);
+               bh = NULL;
+               goto out;
+       }
        de = (struct ext4_dir_entry_2 *) bh->b_data;
        de->inode = 0;
        de->rec_len = ext4_rec_len_to_disk(blocksize - csum_size, blocksize);
@@ -2209,8 +2141,9 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
                initialize_dirent_tail(t, blocksize);
        }
 
-       retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
+       retval = add_dirent_to_buf(handle, &fname, dir, inode, de, bh);
 out:
+       ext4_fname_free_filename(&fname);
        brelse(bh);
        if (retval == 0)
                ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY);
@@ -2220,19 +2153,18 @@ out:
 /*
  * Returns 0 for success, or a negative error value
  */
-static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
-                            struct inode *inode)
+static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
+                            struct dentry *dentry, struct inode *inode)
 {
        struct dx_frame frames[2], *frame;
        struct dx_entry *entries, *at;
-       struct dx_hash_info hinfo;
        struct buffer_head *bh;
        struct inode *dir = d_inode(dentry->d_parent);
        struct super_block *sb = dir->i_sb;
        struct ext4_dir_entry_2 *de;
        int err;
 
-       frame = dx_probe(&dentry->d_name, dir, &hinfo, frames);
+       frame = dx_probe(fname, dir, NULL, frames);
        if (IS_ERR(frame))
                return PTR_ERR(frame);
        entries = frame->entries;
@@ -2249,7 +2181,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
        if (err)
                goto journal_error;
 
-       err = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
+       err = add_dirent_to_buf(handle, fname, dir, inode, NULL, bh);
        if (err != -ENOSPC)
                goto cleanup;
 
@@ -2267,7 +2199,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
 
                if (levels && (dx_get_count(frames->entries) ==
                               dx_get_limit(frames->entries))) {
-                       ext4_warning(sb, "Directory index full!");
+                       ext4_warning_inode(dir, "Directory index full!");
                        err = -ENOSPC;
                        goto cleanup;
                }
@@ -2345,12 +2277,12 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
                        goto cleanup;
                }
        }
-       de = do_split(handle, dir, &bh, frame, &hinfo);
+       de = do_split(handle, dir, &bh, frame, &fname->hinfo);
        if (IS_ERR(de)) {
                err = PTR_ERR(de);
                goto cleanup;
        }
-       err = add_dirent_to_buf(handle, dentry, inode, de, bh);
+       err = add_dirent_to_buf(handle, fname, dir, inode, de, bh);
        goto cleanup;
 
 journal_error:
@@ -2517,20 +2449,7 @@ retry:
                inode->i_op = &ext4_file_inode_operations;
                inode->i_fop = &ext4_file_operations;
                ext4_set_aops(inode);
-               err = 0;
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
-               if (!err && (ext4_encrypted_inode(dir) ||
-                            DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb)))) {
-                       err = ext4_inherit_context(dir, inode);
-                       if (err) {
-                               clear_nlink(inode);
-                               unlock_new_inode(inode);
-                               iput(inode);
-                       }
-               }
-#endif
-               if (!err)
-                       err = ext4_add_nondir(handle, dentry, inode);
+               err = ext4_add_nondir(handle, dentry, inode);
                if (!err && IS_DIRSYNC(dir))
                        ext4_handle_sync(handle);
        }
@@ -2711,14 +2630,6 @@ retry:
        err = ext4_init_new_dir(handle, dir, inode);
        if (err)
                goto out_clear_inode;
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
-       if (ext4_encrypted_inode(dir) ||
-           DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb))) {
-               err = ext4_inherit_context(dir, inode);
-               if (err)
-                       goto out_clear_inode;
-       }
-#endif
        err = ext4_mark_inode_dirty(handle, inode);
        if (!err)
                err = ext4_add_entry(handle, dentry, inode);
@@ -2779,12 +2690,9 @@ int ext4_empty_dir(struct inode *inode)
        de = (struct ext4_dir_entry_2 *) bh->b_data;
        de1 = ext4_next_entry(de, sb->s_blocksize);
        if (le32_to_cpu(de->inode) != inode->i_ino ||
-                       !le32_to_cpu(de1->inode) ||
-                       strcmp(".", de->name) ||
-                       strcmp("..", de1->name)) {
-               ext4_warning(inode->i_sb,
-                            "bad directory (dir #%lu) - no `.' or `..'",
-                            inode->i_ino);
+                       le32_to_cpu(de1->inode) == 0 ||
+                       strcmp(".", de->name) || strcmp("..", de1->name)) {
+               ext4_warning_inode(inode, "directory missing '.' and/or '..'");
                brelse(bh);
                return 1;
        }
@@ -3037,8 +2945,9 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
        if (retval)
                goto end_rmdir;
        if (!EXT4_DIR_LINK_EMPTY(inode))
-               ext4_warning(inode->i_sb,
-                            "empty directory has too many links (%d)",
+               ext4_warning_inode(inode,
+                            "empty directory '%.*s' has too many links (%u)",
+                            dentry->d_name.len, dentry->d_name.name,
                             inode->i_nlink);
        inode->i_version++;
        clear_nlink(inode);
@@ -3098,10 +3007,9 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
        if (IS_DIRSYNC(dir))
                ext4_handle_sync(handle);
 
-       if (!inode->i_nlink) {
-               ext4_warning(inode->i_sb,
-                            "Deleting nonexistent file (%lu), %d",
-                            inode->i_ino, inode->i_nlink);
+       if (inode->i_nlink == 0) {
+               ext4_warning_inode(inode, "Deleting file '%.*s' with no links",
+                                  dentry->d_name.len, dentry->d_name.name);
                set_nlink(inode, 1);
        }
        retval = ext4_delete_entry(handle, dir, de, bh);
@@ -3140,10 +3048,23 @@ static int ext4_symlink(struct inode *dir,
 
        encryption_required = (ext4_encrypted_inode(dir) ||
                               DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb)));
-       if (encryption_required)
-               disk_link.len = encrypted_symlink_data_len(len) + 1;
-       if (disk_link.len > dir->i_sb->s_blocksize)
-               return -ENAMETOOLONG;
+       if (encryption_required) {
+               err = ext4_get_encryption_info(dir);
+               if (err)
+                       return err;
+               if (ext4_encryption_info(dir) == NULL)
+                       return -EPERM;
+               disk_link.len = (ext4_fname_encrypted_size(dir, len) +
+                                sizeof(struct ext4_encrypted_symlink_data));
+               sd = kzalloc(disk_link.len, GFP_KERNEL);
+               if (!sd)
+                       return -ENOMEM;
+       }
+
+       if (disk_link.len > dir->i_sb->s_blocksize) {
+               err = -ENAMETOOLONG;
+               goto err_free_sd;
+       }
 
        dquot_initialize(dir);
 
@@ -3174,34 +3095,19 @@ static int ext4_symlink(struct inode *dir,
        if (IS_ERR(inode)) {
                if (handle)
                        ext4_journal_stop(handle);
-               return PTR_ERR(inode);
+               err = PTR_ERR(inode);
+               goto err_free_sd;
        }
 
        if (encryption_required) {
-               struct ext4_fname_crypto_ctx *ctx = NULL;
                struct qstr istr;
                struct ext4_str ostr;
 
-               sd = kzalloc(disk_link.len, GFP_NOFS);
-               if (!sd) {
-                       err = -ENOMEM;
-                       goto err_drop_inode;
-               }
-               err = ext4_inherit_context(dir, inode);
-               if (err)
-                       goto err_drop_inode;
-               ctx = ext4_get_fname_crypto_ctx(inode,
-                                               inode->i_sb->s_blocksize);
-               if (IS_ERR_OR_NULL(ctx)) {
-                       /* We just set the policy, so ctx should not be NULL */
-                       err = (ctx == NULL) ? -EIO : PTR_ERR(ctx);
-                       goto err_drop_inode;
-               }
                istr.name = (const unsigned char *) symname;
                istr.len = len;
                ostr.name = sd->encrypted_path;
-               err = ext4_fname_usr_to_disk(ctx, &istr, &ostr);
-               ext4_put_fname_crypto_ctx(&ctx);
+               ostr.len = disk_link.len;
+               err = ext4_fname_usr_to_disk(inode, &istr, &ostr);
                if (err < 0)
                        goto err_drop_inode;
                sd->len = cpu_to_le16(ostr.len);
@@ -3271,10 +3177,11 @@ static int ext4_symlink(struct inode *dir,
 err_drop_inode:
        if (handle)
                ext4_journal_stop(handle);
-       kfree(sd);
        clear_nlink(inode);
        unlock_new_inode(inode);
        iput(inode);
+err_free_sd:
+       kfree(sd);
        return err;
 }
 
@@ -3490,9 +3397,9 @@ static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent,
        }
 
        if (retval) {
-               ext4_warning(ent->dir->i_sb,
-                               "Deleting old file (%lu), %d, error=%d",
-                               ent->dir->i_ino, ent->dir->i_nlink, retval);
+               ext4_warning_inode(ent->dir,
+                                  "Deleting old file: nlink %d, error=%d",
+                                  ent->dir->i_nlink, retval);
        }
 }
 
@@ -3762,6 +3669,15 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
        u8 new_file_type;
        int retval;
 
+       if ((ext4_encrypted_inode(old_dir) ||
+            ext4_encrypted_inode(new_dir)) &&
+           (old_dir != new_dir) &&
+           (!ext4_is_child_context_consistent_with_parent(new_dir,
+                                                          old.inode) ||
+            !ext4_is_child_context_consistent_with_parent(old_dir,
+                                                          new.inode)))
+               return -EPERM;
+
        dquot_initialize(old.dir);
        dquot_initialize(new.dir);
 
index 5765f88..79636e2 100644 (file)
@@ -84,7 +84,7 @@ static void ext4_finish_bio(struct bio *bio)
                        /* The bounce data pages are unmapped. */
                        data_page = page;
                        ctx = (struct ext4_crypto_ctx *)page_private(data_page);
-                       page = ctx->control_page;
+                       page = ctx->w.control_page;
                }
 #endif
 
index 171b9ac..ec3ef93 100644 (file)
@@ -54,8 +54,8 @@ static void completion_pages(struct work_struct *work)
 {
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
        struct ext4_crypto_ctx *ctx =
-               container_of(work, struct ext4_crypto_ctx, work);
-       struct bio      *bio    = ctx->bio;
+               container_of(work, struct ext4_crypto_ctx, r.work);
+       struct bio      *bio    = ctx->r.bio;
        struct bio_vec  *bv;
        int             i;
 
@@ -109,9 +109,9 @@ static void mpage_end_io(struct bio *bio, int err)
                if (err) {
                        ext4_release_crypto_ctx(ctx);
                } else {
-                       INIT_WORK(&ctx->work, completion_pages);
-                       ctx->bio = bio;
-                       queue_work(ext4_read_workqueue, &ctx->work);
+                       INIT_WORK(&ctx->r.work, completion_pages);
+                       ctx->r.bio = bio;
+                       queue_work(ext4_read_workqueue, &ctx->r.work);
                        return;
                }
        }
index ca9d4a2..90ec13f 100644 (file)
@@ -591,14 +591,17 @@ void __ext4_msg(struct super_block *sb,
        va_end(args);
 }
 
+#define ext4_warning_ratelimit(sb)                                     \
+               ___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state), \
+                            "EXT4-fs warning")
+
 void __ext4_warning(struct super_block *sb, const char *function,
                    unsigned int line, const char *fmt, ...)
 {
        struct va_format vaf;
        va_list args;
 
-       if (!___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state),
-                         "EXT4-fs warning"))
+       if (!ext4_warning_ratelimit(sb))
                return;
 
        va_start(args, fmt);
@@ -609,6 +612,24 @@ void __ext4_warning(struct super_block *sb, const char *function,
        va_end(args);
 }
 
+void __ext4_warning_inode(const struct inode *inode, const char *function,
+                         unsigned int line, const char *fmt, ...)
+{
+       struct va_format vaf;
+       va_list args;
+
+       if (!ext4_warning_ratelimit(inode->i_sb))
+               return;
+
+       va_start(args, fmt);
+       vaf.fmt = fmt;
+       vaf.va = &args;
+       printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: "
+              "inode #%lu: comm %s: %pV\n", inode->i_sb->s_id,
+              function, line, inode->i_ino, current->comm, &vaf);
+       va_end(args);
+}
+
 void __ext4_grp_locked_error(const char *function, unsigned int line,
                             struct super_block *sb, ext4_group_t grp,
                             unsigned long ino, ext4_fsblk_t block,
@@ -807,6 +828,7 @@ static void ext4_put_super(struct super_block *sb)
                dump_orphan_list(sb, sbi);
        J_ASSERT(list_empty(&sbi->s_orphan));
 
+       sync_blockdev(sb->s_bdev);
        invalidate_bdev(sb->s_bdev);
        if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
                /*
@@ -879,9 +901,8 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
        atomic_set(&ei->i_unwritten, 0);
        INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
-       ei->i_encryption_key.mode = EXT4_ENCRYPTION_MODE_INVALID;
+       ei->i_crypt_info = NULL;
 #endif
-
        return &ei->vfs_inode;
 }
 
@@ -958,6 +979,10 @@ void ext4_clear_inode(struct inode *inode)
                jbd2_free_inode(EXT4_I(inode)->jinode);
                EXT4_I(inode)->jinode = NULL;
        }
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+       if (EXT4_I(inode)->i_crypt_info)
+               ext4_free_encryption_info(inode, EXT4_I(inode)->i_crypt_info);
+#endif
 }
 
 static struct inode *ext4_nfs_get_inode(struct super_block *sb,
@@ -3449,11 +3474,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        if (sb->s_bdev->bd_part)
                sbi->s_sectors_written_start =
                        part_stat_read(sb->s_bdev->bd_part, sectors[1]);
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
-       /* Modes of operations for file and directory encryption. */
-       sbi->s_file_encryption_mode = EXT4_ENCRYPTION_MODE_AES_256_XTS;
-       sbi->s_dir_encryption_mode = EXT4_ENCRYPTION_MODE_INVALID;
-#endif
 
        /* Cleanup superblock name */
        for (cp = sb->s_id; (cp = strchr(cp, '/'));)
@@ -4067,7 +4087,15 @@ no_journal:
                }
        }
 
-       if (unlikely(sbi->s_mount_flags & EXT4_MF_TEST_DUMMY_ENCRYPTION) &&
+       if ((DUMMY_ENCRYPTION_ENABLED(sbi) ||
+            EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT)) &&
+           (blocksize != PAGE_CACHE_SIZE)) {
+               ext4_msg(sb, KERN_ERR,
+                        "Unsupported blocksize for fs encryption");
+               goto failed_mount_wq;
+       }
+
+       if (DUMMY_ENCRYPTION_ENABLED(sbi) &&
            !(sb->s_flags & MS_RDONLY) &&
            !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT)) {
                EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT);
@@ -4943,6 +4971,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
                set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
        }
 
+       if (*flags & MS_LAZYTIME)
+               sb->s_flags |= MS_LAZYTIME;
+
        if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
                if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
                        err = -EROFS;
@@ -5410,6 +5441,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
        struct inode *inode = sb_dqopt(sb)->files[type];
        ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
        int err, offset = off & (sb->s_blocksize - 1);
+       int retries = 0;
        struct buffer_head *bh;
        handle_t *handle = journal_current_handle();
 
@@ -5430,7 +5462,12 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
                return -EIO;
        }
 
-       bh = ext4_bread(handle, inode, blk, 1);
+       do {
+               bh = ext4_bread(handle, inode, blk,
+                               EXT4_GET_BLOCKS_CREATE |
+                               EXT4_GET_BLOCKS_METADATA_NOFAIL);
+       } while (IS_ERR(bh) && (PTR_ERR(bh) == -ENOSPC) &&
+                ext4_should_retry_alloc(inode->i_sb, &retries));
        if (IS_ERR(bh))
                return PTR_ERR(bh);
        if (!bh)
@@ -5647,6 +5684,7 @@ out7:
 
 static void __exit ext4_exit_fs(void)
 {
+       ext4_exit_crypto();
        ext4_destroy_lazyinit_thread();
        unregister_as_ext2();
        unregister_as_ext3();
index ba5bd18..c677f2c 100644 (file)
 #include "xattr.h"
 
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
-static const char *ext4_follow_link(struct dentry *dentry, void **cookie)
+static const char *ext4_encrypted_follow_link(struct dentry *dentry, void **cookie)
 {
        struct page *cpage = NULL;
        char *caddr, *paddr = NULL;
        struct ext4_str cstr, pstr;
        struct inode *inode = d_inode(dentry);
-       struct ext4_fname_crypto_ctx *ctx = NULL;
        struct ext4_encrypted_symlink_data *sd;
        loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1);
        int res;
        u32 plen, max_size = inode->i_sb->s_blocksize;
 
-       ctx = ext4_get_fname_crypto_ctx(inode, inode->i_sb->s_blocksize);
-       if (IS_ERR(ctx))
-               return ERR_CAST(ctx);
+       res = ext4_get_encryption_info(inode);
+       if (res)
+               return ERR_PTR(res);
 
        if (ext4_inode_is_fast_symlink(inode)) {
                caddr = (char *) EXT4_I(inode)->i_data;
                max_size = sizeof(EXT4_I(inode)->i_data);
        } else {
                cpage = read_mapping_page(inode->i_mapping, 0, NULL);
-               if (IS_ERR(cpage)) {
-                       ext4_put_fname_crypto_ctx(&ctx);
+               if (IS_ERR(cpage))
                        return ERR_CAST(cpage);
-               }
                caddr = kmap(cpage);
                caddr[size] = 0;
        }
@@ -71,20 +68,19 @@ static const char *ext4_follow_link(struct dentry *dentry, void **cookie)
                goto errout;
        }
        pstr.name = paddr;
-       res = _ext4_fname_disk_to_usr(ctx, NULL, &cstr, &pstr);
+       pstr.len = plen;
+       res = _ext4_fname_disk_to_usr(inode, NULL, &cstr, &pstr);
        if (res < 0)
                goto errout;
        /* Null-terminate the name */
        if (res <= plen)
                paddr[res] = '\0';
-       ext4_put_fname_crypto_ctx(&ctx);
        if (cpage) {
                kunmap(cpage);
                page_cache_release(cpage);
        }
        return *cookie = paddr;
 errout:
-       ext4_put_fname_crypto_ctx(&ctx);
        if (cpage) {
                kunmap(cpage);
                page_cache_release(cpage);
@@ -95,7 +91,7 @@ errout:
 
 const struct inode_operations ext4_encrypted_symlink_inode_operations = {
        .readlink       = generic_readlink,
-       .follow_link    = ext4_follow_link,
+       .follow_link    = ext4_encrypted_follow_link,
        .put_link       = kfree_put_link,
        .setattr        = ext4_setattr,
        .setxattr       = generic_setxattr,
index 988b32e..4227dc4 100644 (file)
@@ -390,7 +390,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
        unsigned long   blocknr;
 
        if (is_journal_aborted(journal))
-               return 1;
+               return -EIO;
 
        if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr))
                return 1;
@@ -405,10 +405,9 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
         * jbd2_cleanup_journal_tail() doesn't get called all that often.
         */
        if (journal->j_flags & JBD2_BARRIER)
-               blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
+               blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS, NULL);
 
-       __jbd2_update_log_tail(journal, first_tid, blocknr);
-       return 0;
+       return __jbd2_update_log_tail(journal, first_tid, blocknr);
 }
 
 
index b96bd80..179d7d8 100644 (file)
@@ -371,16 +371,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
         */
        J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));
 
-retry_alloc:
-       new_bh = alloc_buffer_head(GFP_NOFS);
-       if (!new_bh) {
-               /*
-                * Failure is not an option, but __GFP_NOFAIL is going
-                * away; so we retry ourselves here.
-                */
-               congestion_wait(BLK_RW_ASYNC, HZ/50);
-               goto retry_alloc;
-       }
+       new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL);
 
        /* keep subsequent assertions sane */
        atomic_set(&new_bh->b_count, 1);
@@ -885,9 +876,10 @@ int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
  *
  * Requires j_checkpoint_mutex
  */
-void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
+int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
 {
        unsigned long freed;
+       int ret;
 
        BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
 
@@ -897,7 +889,10 @@ void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
         * space and if we lose sb update during power failure we'd replay
         * old transaction with possibly newly overwritten data.
         */
-       jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA);
+       ret = jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA);
+       if (ret)
+               goto out;
+
        write_lock(&journal->j_state_lock);
        freed = block - journal->j_tail;
        if (block < journal->j_tail)
@@ -913,6 +908,9 @@ void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
        journal->j_tail_sequence = tid;
        journal->j_tail = block;
        write_unlock(&journal->j_state_lock);
+
+out:
+       return ret;
 }
 
 /*
@@ -1331,7 +1329,7 @@ static int journal_reset(journal_t *journal)
        return jbd2_journal_start_thread(journal);
 }
 
-static void jbd2_write_superblock(journal_t *journal, int write_op)
+static int jbd2_write_superblock(journal_t *journal, int write_op)
 {
        struct buffer_head *bh = journal->j_sb_buffer;
        journal_superblock_t *sb = journal->j_superblock;
@@ -1370,7 +1368,10 @@ static void jbd2_write_superblock(journal_t *journal, int write_op)
                printk(KERN_ERR "JBD2: Error %d detected when updating "
                       "journal superblock for %s.\n", ret,
                       journal->j_devname);
+               jbd2_journal_abort(journal, ret);
        }
+
+       return ret;
 }
 
 /**
@@ -1383,10 +1384,11 @@ static void jbd2_write_superblock(journal_t *journal, int write_op)
  * Update a journal's superblock information about log tail and write it to
  * disk, waiting for the IO to complete.
  */
-void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
+int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
                                     unsigned long tail_block, int write_op)
 {
        journal_superblock_t *sb = journal->j_superblock;
+       int ret;
 
        BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
        jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
@@ -1395,13 +1397,18 @@ void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
        sb->s_sequence = cpu_to_be32(tail_tid);
        sb->s_start    = cpu_to_be32(tail_block);
 
-       jbd2_write_superblock(journal, write_op);
+       ret = jbd2_write_superblock(journal, write_op);
+       if (ret)
+               goto out;
 
        /* Log is no longer empty */
        write_lock(&journal->j_state_lock);
        WARN_ON(!sb->s_sequence);
        journal->j_flags &= ~JBD2_FLUSHED;
        write_unlock(&journal->j_state_lock);
+
+out:
+       return ret;
 }
 
 /**
@@ -1950,7 +1957,14 @@ int jbd2_journal_flush(journal_t *journal)
                return -EIO;
 
        mutex_lock(&journal->j_checkpoint_mutex);
-       jbd2_cleanup_journal_tail(journal);
+       if (!err) {
+               err = jbd2_cleanup_journal_tail(journal);
+               if (err < 0) {
+                       mutex_unlock(&journal->j_checkpoint_mutex);
+                       goto out;
+               }
+               err = 0;
+       }
 
        /* Finally, mark the journal as really needing no recovery.
         * This sets s_start==0 in the underlying superblock, which is
@@ -1966,7 +1980,8 @@ int jbd2_journal_flush(journal_t *journal)
        J_ASSERT(journal->j_head == journal->j_tail);
        J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
        write_unlock(&journal->j_state_lock);
-       return 0;
+out:
+       return err;
 }
 
 /**
@@ -2330,7 +2345,7 @@ static int jbd2_journal_init_journal_head_cache(void)
        jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head",
                                sizeof(struct journal_head),
                                0,              /* offset */
-                               SLAB_TEMPORARY, /* flags */
+                               SLAB_TEMPORARY | SLAB_DESTROY_BY_RCU,
                                NULL);          /* ctor */
        retval = 0;
        if (!jbd2_journal_head_cache) {
@@ -2362,10 +2377,8 @@ static struct journal_head *journal_alloc_journal_head(void)
        if (!ret) {
                jbd_debug(1, "out of memory for journal_head\n");
                pr_notice_ratelimited("ENOMEM in %s, retrying.\n", __func__);
-               while (!ret) {
-                       yield();
-                       ret = kmem_cache_zalloc(jbd2_journal_head_cache, GFP_NOFS);
-               }
+               ret = kmem_cache_zalloc(jbd2_journal_head_cache,
+                               GFP_NOFS | __GFP_NOFAIL);
        }
        return ret;
 }
index 14214da..0abf2e7 100644 (file)
@@ -141,11 +141,13 @@ static int insert_revoke_hash(journal_t *journal, unsigned long long blocknr,
 {
        struct list_head *hash_list;
        struct jbd2_revoke_record_s *record;
+       gfp_t gfp_mask = GFP_NOFS;
 
-repeat:
-       record = kmem_cache_alloc(jbd2_revoke_record_cache, GFP_NOFS);
+       if (journal_oom_retry)
+               gfp_mask |= __GFP_NOFAIL;
+       record = kmem_cache_alloc(jbd2_revoke_record_cache, gfp_mask);
        if (!record)
-               goto oom;
+               return -ENOMEM;
 
        record->sequence = seq;
        record->blocknr = blocknr;
@@ -154,13 +156,6 @@ repeat:
        list_add(&record->hash, hash_list);
        spin_unlock(&journal->j_revoke_lock);
        return 0;
-
-oom:
-       if (!journal_oom_retry)
-               return -ENOMEM;
-       jbd_debug(1, "ENOMEM in %s, retrying\n", __func__);
-       yield();
-       goto repeat;
 }
 
 /* Find a revoke record in the journal's hash table. */
index ff2f2e6..cbe8b3a 100644 (file)
@@ -278,22 +278,16 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
 
 alloc_transaction:
        if (!journal->j_running_transaction) {
+               /*
+                * If __GFP_FS is not present, then we may be being called from
+                * inside the fs writeback layer, so we MUST NOT fail.
+                */
+               if ((gfp_mask & __GFP_FS) == 0)
+                       gfp_mask |= __GFP_NOFAIL;
                new_transaction = kmem_cache_zalloc(transaction_cache,
                                                    gfp_mask);
-               if (!new_transaction) {
-                       /*
-                        * If __GFP_FS is not present, then we may be
-                        * being called from inside the fs writeback
-                        * layer, so we MUST NOT fail.  Since
-                        * __GFP_NOFAIL is going away, we will arrange
-                        * to retry the allocation ourselves.
-                        */
-                       if ((gfp_mask & __GFP_FS) == 0) {
-                               congestion_wait(BLK_RW_ASYNC, HZ/50);
-                               goto alloc_transaction;
-                       }
+               if (!new_transaction)
                        return -ENOMEM;
-               }
        }
 
        jbd_debug(3, "New handle %p going live.\n", handle);
@@ -761,6 +755,30 @@ static void warn_dirty_buffer(struct buffer_head *bh)
               bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
 }
 
+/* Call t_frozen trigger and copy buffer data into jh->b_frozen_data. */
+static void jbd2_freeze_jh_data(struct journal_head *jh)
+{
+       struct page *page;
+       int offset;
+       char *source;
+       struct buffer_head *bh = jh2bh(jh);
+
+       J_EXPECT_JH(jh, buffer_uptodate(bh), "Possible IO failure.\n");
+       page = bh->b_page;
+       offset = offset_in_page(bh->b_data);
+       source = kmap_atomic(page);
+       /* Fire data frozen trigger just before we copy the data */
+       jbd2_buffer_frozen_trigger(jh, source + offset, jh->b_triggers);
+       memcpy(jh->b_frozen_data, source + offset, bh->b_size);
+       kunmap_atomic(source);
+
+       /*
+        * Now that the frozen data is saved off, we need to store any matching
+        * triggers.
+        */
+       jh->b_frozen_triggers = jh->b_triggers;
+}
+
 /*
  * If the buffer is already part of the current transaction, then there
  * is nothing we need to do.  If it is already part of a prior
@@ -780,7 +798,6 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
        journal_t *journal;
        int error;
        char *frozen_buffer = NULL;
-       int need_copy = 0;
        unsigned long start_lock, time_lock;
 
        if (is_handle_aborted(handle))
@@ -866,6 +883,26 @@ repeat:
         */
        jh->b_modified = 0;
 
+       /*
+        * If the buffer is not journaled right now, we need to make sure it
+        * doesn't get written to disk before the caller actually commits the
+        * new data
+        */
+       if (!jh->b_transaction) {
+               JBUFFER_TRACE(jh, "no transaction");
+               J_ASSERT_JH(jh, !jh->b_next_transaction);
+               JBUFFER_TRACE(jh, "file as BJ_Reserved");
+               /*
+                * Make sure all stores to jh (b_modified, b_frozen_data) are
+                * visible before attaching it to the running transaction.
+                * Paired with barrier in jbd2_write_access_granted()
+                */
+               smp_wmb();
+               spin_lock(&journal->j_list_lock);
+               __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
+               spin_unlock(&journal->j_list_lock);
+               goto done;
+       }
        /*
         * If there is already a copy-out version of this buffer, then we don't
         * need to make another one
@@ -873,113 +910,70 @@ repeat:
        if (jh->b_frozen_data) {
                JBUFFER_TRACE(jh, "has frozen data");
                J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
-               jh->b_next_transaction = transaction;
-               goto done;
+               goto attach_next;
        }
 
-       /* Is there data here we need to preserve? */
+       JBUFFER_TRACE(jh, "owned by older transaction");
+       J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
+       J_ASSERT_JH(jh, jh->b_transaction == journal->j_committing_transaction);
 
-       if (jh->b_transaction && jh->b_transaction != transaction) {
-               JBUFFER_TRACE(jh, "owned by older transaction");
-               J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
-               J_ASSERT_JH(jh, jh->b_transaction ==
-                                       journal->j_committing_transaction);
+       /*
+        * There is one case we have to be very careful about.  If the
+        * committing transaction is currently writing this buffer out to disk
+        * and has NOT made a copy-out, then we cannot modify the buffer
+        * contents at all right now.  The essence of copy-out is that it is
+        * the extra copy, not the primary copy, which gets journaled.  If the
+        * primary copy is already going to disk then we cannot do copy-out
+        * here.
+        */
+       if (buffer_shadow(bh)) {
+               JBUFFER_TRACE(jh, "on shadow: sleep");
+               jbd_unlock_bh_state(bh);
+               wait_on_bit_io(&bh->b_state, BH_Shadow, TASK_UNINTERRUPTIBLE);
+               goto repeat;
+       }
 
-               /* There is one case we have to be very careful about.
-                * If the committing transaction is currently writing
-                * this buffer out to disk and has NOT made a copy-out,
-                * then we cannot modify the buffer contents at all
-                * right now.  The essence of copy-out is that it is the
-                * extra copy, not the primary copy, which gets
-                * journaled.  If the primary copy is already going to
-                * disk then we cannot do copy-out here. */
-
-               if (buffer_shadow(bh)) {
-                       JBUFFER_TRACE(jh, "on shadow: sleep");
+       /*
+        * Only do the copy if the currently-owning transaction still needs it.
+        * If buffer isn't on BJ_Metadata list, the committing transaction is
+        * past that stage (here we use the fact that BH_Shadow is set under
+        * bh_state lock together with refiling to BJ_Shadow list and at this
+        * point we know the buffer doesn't have BH_Shadow set).
+        *
+        * Subtle point, though: if this is a get_undo_access, then we will be
+        * relying on the frozen_data to contain the new value of the
+        * committed_data record after the transaction, so we HAVE to force the
+        * frozen_data copy in that case.
+        */
+       if (jh->b_jlist == BJ_Metadata || force_copy) {
+               JBUFFER_TRACE(jh, "generate frozen data");
+               if (!frozen_buffer) {
+                       JBUFFER_TRACE(jh, "allocate memory for buffer");
                        jbd_unlock_bh_state(bh);
-                       wait_on_bit_io(&bh->b_state, BH_Shadow,
-                                      TASK_UNINTERRUPTIBLE);
-                       goto repeat;
-               }
-
-               /*
-                * Only do the copy if the currently-owning transaction still
-                * needs it. If buffer isn't on BJ_Metadata list, the
-                * committing transaction is past that stage (here we use the
-                * fact that BH_Shadow is set under bh_state lock together with
-                * refiling to BJ_Shadow list and at this point we know the
-                * buffer doesn't have BH_Shadow set).
-                *
-                * Subtle point, though: if this is a get_undo_access,
-                * then we will be relying on the frozen_data to contain
-                * the new value of the committed_data record after the
-                * transaction, so we HAVE to force the frozen_data copy
-                * in that case.
-                */
-               if (jh->b_jlist == BJ_Metadata || force_copy) {
-                       JBUFFER_TRACE(jh, "generate frozen data");
+                       frozen_buffer = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
                        if (!frozen_buffer) {
-                               JBUFFER_TRACE(jh, "allocate memory for buffer");
-                               jbd_unlock_bh_state(bh);
-                               frozen_buffer =
-                                       jbd2_alloc(jh2bh(jh)->b_size,
-                                                        GFP_NOFS);
-                               if (!frozen_buffer) {
-                                       printk(KERN_ERR
-                                              "%s: OOM for frozen_buffer\n",
-                                              __func__);
-                                       JBUFFER_TRACE(jh, "oom!");
-                                       error = -ENOMEM;
-                                       jbd_lock_bh_state(bh);
-                                       goto done;
-                               }
-                               goto repeat;
+                               printk(KERN_ERR "%s: OOM for frozen_buffer\n",
+                                      __func__);
+                               JBUFFER_TRACE(jh, "oom!");
+                               error = -ENOMEM;
+                               goto out;
                        }
-                       jh->b_frozen_data = frozen_buffer;
-                       frozen_buffer = NULL;
-                       need_copy = 1;
+                       goto repeat;
                }
-               jh->b_next_transaction = transaction;
+               jh->b_frozen_data = frozen_buffer;
+               frozen_buffer = NULL;
+               jbd2_freeze_jh_data(jh);
        }
-
-
+attach_next:
        /*
-        * Finally, if the buffer is not journaled right now, we need to make
-        * sure it doesn't get written to disk before the caller actually
-        * commits the new data
+        * Make sure all stores to jh (b_modified, b_frozen_data) are visible
+        * before attaching it to the running transaction. Paired with barrier
+        * in jbd2_write_access_granted()
         */
-       if (!jh->b_transaction) {
-               JBUFFER_TRACE(jh, "no transaction");
-               J_ASSERT_JH(jh, !jh->b_next_transaction);
-               JBUFFER_TRACE(jh, "file as BJ_Reserved");
-               spin_lock(&journal->j_list_lock);
-               __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
-               spin_unlock(&journal->j_list_lock);
-       }
+       smp_wmb();
+       jh->b_next_transaction = transaction;
 
 done:
-       if (need_copy) {
-               struct page *page;
-               int offset;
-               char *source;
-
-               J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)),
-                           "Possible IO failure.\n");
-               page = jh2bh(jh)->b_page;
-               offset = offset_in_page(jh2bh(jh)->b_data);
-               source = kmap_atomic(page);
-               /* Fire data frozen trigger just before we copy the data */
-               jbd2_buffer_frozen_trigger(jh, source + offset,
-                                          jh->b_triggers);
-               memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
-               kunmap_atomic(source);
-
-               /*
-                * Now that the frozen data is saved off, we need to store
-                * any matching triggers.
-                */
-               jh->b_frozen_triggers = jh->b_triggers;
-       }
        jbd_unlock_bh_state(bh);
 
        /*
@@ -996,6 +990,55 @@ out:
        return error;
 }
 
+/* Fast check whether buffer is already attached to the required transaction */
+static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh)
+{
+       struct journal_head *jh;
+       bool ret = false;
+
+       /* Dirty buffers require special handling... */
+       if (buffer_dirty(bh))
+               return false;
+
+       /*
+        * RCU protects us from dereferencing freed pages. So the checks we do
+        * are guaranteed not to oops. However the jh slab object can get freed
+        * & reallocated while we work with it. So we have to be careful. When
+        * we see jh attached to the running transaction, we know it must stay
+        * so until the transaction is committed. Thus jh won't be freed and
+        * will be attached to the same bh while we run.  However it can
+        * happen jh gets freed, reallocated, and attached to the transaction
+        * just after we get pointer to it from bh. So we have to be careful
+        * and recheck jh still belongs to our bh before we return success.
+        */
+       rcu_read_lock();
+       if (!buffer_jbd(bh))
+               goto out;
+       /* This should be bh2jh() but that doesn't work with inline functions */
+       jh = READ_ONCE(bh->b_private);
+       if (!jh)
+               goto out;
+       if (jh->b_transaction != handle->h_transaction &&
+           jh->b_next_transaction != handle->h_transaction)
+               goto out;
+       /*
+        * There are two reasons for the barrier here:
+        * 1) Make sure to fetch b_bh after we did previous checks so that we
+        * detect when jh went through free, realloc, attach to transaction
+        * while we were checking. Paired with implicit barrier in that path.
+        * 2) So that access to bh done after jbd2_write_access_granted()
+        * doesn't get reordered and see inconsistent state of concurrent
+        * do_get_write_access().
+        */
+       smp_mb();
+       if (unlikely(jh->b_bh != bh))
+               goto out;
+       ret = true;
+out:
+       rcu_read_unlock();
+       return ret;
+}
+
 /**
  * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
  * @handle: transaction to add buffer modifications to
@@ -1009,9 +1052,13 @@ out:
 
 int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh)
 {
-       struct journal_head *jh = jbd2_journal_add_journal_head(bh);
+       struct journal_head *jh;
        int rc;
 
+       if (jbd2_write_access_granted(handle, bh))
+               return 0;
+
+       jh = jbd2_journal_add_journal_head(bh);
        /* We do not want to get caught playing with fields which the
         * log thread also manipulates.  Make sure that the buffer
         * completes any outstanding IO before proceeding. */
@@ -1141,11 +1188,14 @@ out:
 int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
 {
        int err;
-       struct journal_head *jh = jbd2_journal_add_journal_head(bh);
+       struct journal_head *jh;
        char *committed_data = NULL;
 
        JBUFFER_TRACE(jh, "entry");
+       if (jbd2_write_access_granted(handle, bh))
+               return 0;
 
+       jh = jbd2_journal_add_journal_head(bh);
        /*
         * Do this first --- it can drop the journal lock, so we want to
         * make sure that obtaining the committed_data is done
@@ -1230,8 +1280,6 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh,
        triggers->t_abort(triggers, jh2bh(jh));
 }
 
-
-
 /**
  * int jbd2_journal_dirty_metadata() -  mark a buffer as containing dirty metadata
  * @handle: transaction to add buffer to.
@@ -1264,12 +1312,36 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
 
        if (is_handle_aborted(handle))
                return -EROFS;
-       journal = transaction->t_journal;
-       jh = jbd2_journal_grab_journal_head(bh);
-       if (!jh) {
+       if (!buffer_jbd(bh)) {
                ret = -EUCLEAN;
                goto out;
        }
+       /*
+        * We don't grab jh reference here since the buffer must be part
+        * of the running transaction.
+        */
+       jh = bh2jh(bh);
+       J_ASSERT_JH(jh, jh->b_transaction == transaction ||
+                       jh->b_next_transaction == transaction);
+       if (jh->b_modified == 1) {
+               /*
+                * If it's in our transaction it must be in BJ_Metadata list.
+                * The assertion is unreliable since we may see jh in
+                * inconsistent state unless we grab bh_state lock. But this
+                * is crutial to catch bugs so let's do a reliable check until
+                * the lockless handling is fully proven.
+                */
+               if (jh->b_transaction == transaction &&
+                   jh->b_jlist != BJ_Metadata) {
+                       jbd_lock_bh_state(bh);
+                       J_ASSERT_JH(jh, jh->b_transaction != transaction ||
+                                       jh->b_jlist == BJ_Metadata);
+                       jbd_unlock_bh_state(bh);
+               }
+               goto out;
+       }
+
+       journal = transaction->t_journal;
        jbd_debug(5, "journal_head %p\n", jh);
        JBUFFER_TRACE(jh, "entry");
 
@@ -1360,7 +1432,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
        spin_unlock(&journal->j_list_lock);
 out_unlock_bh:
        jbd_unlock_bh_state(bh);
-       jbd2_journal_put_journal_head(jh);
 out:
        JBUFFER_TRACE(jh, "exit");
        return ret;
index 20e7f78..edb640a 100644 (file)
@@ -1035,7 +1035,7 @@ struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal);
 int jbd2_journal_next_log_block(journal_t *, unsigned long long *);
 int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
                              unsigned long *block);
-void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
+int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
 void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
 
 /* Commit management */
@@ -1157,7 +1157,7 @@ extern int           jbd2_journal_recover    (journal_t *journal);
 extern int        jbd2_journal_wipe       (journal_t *, int);
 extern int        jbd2_journal_skip_recovery   (journal_t *);
 extern void       jbd2_journal_update_sb_errno(journal_t *);
-extern void       jbd2_journal_update_sb_log_tail      (journal_t *, tid_t,
+extern int        jbd2_journal_update_sb_log_tail      (journal_t *, tid_t,
                                unsigned long, int);
 extern void       __jbd2_journal_abort_hard    (journal_t *);
 extern void       jbd2_journal_abort      (journal_t *, int);
index 08ec3dd..594b4b2 100644 (file)
@@ -1185,15 +1185,14 @@ TRACE_EVENT(ext4_da_update_reserve_space,
 );
 
 TRACE_EVENT(ext4_da_reserve_space,
-       TP_PROTO(struct inode *inode, int md_needed),
+       TP_PROTO(struct inode *inode),
 
-       TP_ARGS(inode, md_needed),
+       TP_ARGS(inode),
 
        TP_STRUCT__entry(
                __field(        dev_t,  dev                     )
                __field(        ino_t,  ino                     )
                __field(        __u64,  i_blocks                )
-               __field(        int,    md_needed               )
                __field(        int,    reserved_data_blocks    )
                __field(        int,    reserved_meta_blocks    )
                __field(        __u16,  mode                    )
@@ -1203,18 +1202,17 @@ TRACE_EVENT(ext4_da_reserve_space,
                __entry->dev    = inode->i_sb->s_dev;
                __entry->ino    = inode->i_ino;
                __entry->i_blocks = inode->i_blocks;
-               __entry->md_needed = md_needed;
                __entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
                __entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
                __entry->mode   = inode->i_mode;
        ),
 
-       TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu md_needed %d "
+       TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu "
                  "reserved_data_blocks %d reserved_meta_blocks %d",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long) __entry->ino,
                  __entry->mode, __entry->i_blocks,
-                 __entry->md_needed, __entry->reserved_data_blocks,
+                 __entry->reserved_data_blocks,
                  __entry->reserved_meta_blocks)
 );
 
@@ -2478,6 +2476,31 @@ TRACE_EVENT(ext4_collapse_range,
                  __entry->offset, __entry->len)
 );
 
+TRACE_EVENT(ext4_insert_range,
+       TP_PROTO(struct inode *inode, loff_t offset, loff_t len),
+
+       TP_ARGS(inode, offset, len),
+
+       TP_STRUCT__entry(
+               __field(dev_t,  dev)
+               __field(ino_t,  ino)
+               __field(loff_t, offset)
+               __field(loff_t, len)
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->ino    = inode->i_ino;
+               __entry->offset = offset;
+               __entry->len    = len;
+       ),
+
+       TP_printk("dev %d,%d ino %lu offset %lld len %lld",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->offset, __entry->len)
+);
+
 TRACE_EVENT(ext4_es_shrink,
        TP_PROTO(struct super_block *sb, int nr_shrunk, u64 scan_time,
                 int nr_skipped, int retried),