Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 10 Oct 2016 21:04:16 +0000 (14:04 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 10 Oct 2016 21:04:16 +0000 (14:04 -0700)
Pull crypto updates from Herbert Xu:
 "Here is the crypto update for 4.9:

  API:
   - The crypto engine code now supports hashes.

  Algorithms:
   - Allow keys >= 2048 bits in FIPS mode for RSA.

  Drivers:
   - Memory overwrite fix for vmx ghash.
   - Add support for building ARM sha1-neon in Thumb2 mode.
   - Reenable ARM ghash-ce code by adding import/export.
   - Reenable img-hash by adding import/export.
   - Add support for multiple cores in omap-aes.
   - Add little-endian support for sha1-powerpc.
   - Add Cavium HWRNG driver for ThunderX SoC"

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (137 commits)
  crypto: caam - treat SGT address pointer as u64
  crypto: ccp - Make syslog errors human-readable
  crypto: ccp - clean up data structure
  crypto: vmx - Ensure ghash-generic is enabled
  crypto: testmgr - add guard to dst buffer for ahash_export
  crypto: caam - Unmap region obtained by of_iomap
  crypto: sha1-powerpc - little-endian support
  crypto: gcm - Fix IV buffer size in crypto_gcm_setkey
  crypto: vmx - Fix memory corruption caused by p8_ghash
  crypto: ghash-generic - move common definitions to a new header file
  crypto: caam - fix sg dump
  hwrng: omap - Only fail if pm_runtime_get_sync returns < 0
  crypto: omap-sham - shrink the internal buffer size
  crypto: omap-sham - add support for export/import
  crypto: omap-sham - convert driver logic to use sgs for data xmit
  crypto: omap-sham - change the DMA threshold value to a define
  crypto: omap-sham - add support functions for sg based data handling
  crypto: omap-sham - rename sgl to sgl_tmp for deprecation
  crypto: omap-sham - align algorithms on word offset
  crypto: omap-sham - add context export/import stubs
  ...

77 files changed:
Documentation/DocBook/crypto-API.tmpl
arch/arm/crypto/ghash-ce-glue.c
arch/arm/crypto/sha1-armv7-neon.S
arch/powerpc/crypto/sha1-powerpc-asm.S
crypto/algif_hash.c
crypto/crct10dif_generic.c
crypto/crypto_engine.c
crypto/drbg.c
crypto/gcm.c
crypto/ghash-generic.c
crypto/mcryptd.c
crypto/rsa_helper.c
crypto/testmgr.c
crypto/testmgr.h
crypto/xor.c
crypto/xts.c
drivers/char/hw_random/Kconfig
drivers/char/hw_random/Makefile
drivers/char/hw_random/amd-rng.c
drivers/char/hw_random/bcm2835-rng.c
drivers/char/hw_random/cavium-rng-vf.c [new file with mode: 0644]
drivers/char/hw_random/cavium-rng.c [new file with mode: 0644]
drivers/char/hw_random/core.c
drivers/char/hw_random/geode-rng.c
drivers/char/hw_random/meson-rng.c
drivers/char/hw_random/omap-rng.c
drivers/char/hw_random/omap3-rom-rng.c
drivers/char/hw_random/pasemi-rng.c
drivers/char/hw_random/pic32-rng.c
drivers/char/hw_random/st-rng.c
drivers/char/hw_random/tx4939-rng.c
drivers/crypto/Kconfig
drivers/crypto/caam/caamalg.c
drivers/crypto/caam/caamhash.c
drivers/crypto/caam/ctrl.c
drivers/crypto/caam/desc.h
drivers/crypto/caam/desc_constr.h
drivers/crypto/caam/intern.h
drivers/crypto/caam/jr.c
drivers/crypto/caam/regs.h
drivers/crypto/caam/sg_sw_sec4.h
drivers/crypto/ccp/Makefile
drivers/crypto/ccp/ccp-crypto-sha.c
drivers/crypto/ccp/ccp-dev-v3.c
drivers/crypto/ccp/ccp-dev-v5.c [new file with mode: 0644]
drivers/crypto/ccp/ccp-dev.c
drivers/crypto/ccp/ccp-dev.h
drivers/crypto/ccp/ccp-dmaengine.c
drivers/crypto/ccp/ccp-ops.c
drivers/crypto/ccp/ccp-pci.c
drivers/crypto/hifn_795x.c
drivers/crypto/img-hash.c
drivers/crypto/ixp4xx_crypto.c
drivers/crypto/marvell/cesa.c
drivers/crypto/marvell/hash.c
drivers/crypto/marvell/tdma.c
drivers/crypto/mv_cesa.c
drivers/crypto/mxc-scc.c
drivers/crypto/omap-aes.c
drivers/crypto/omap-des.c
drivers/crypto/omap-sham.c
drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h
drivers/crypto/qat/qat_common/adf_admin.c
drivers/crypto/qat/qat_common/qat_uclo.c
drivers/crypto/rockchip/rk3288_crypto.c
drivers/crypto/sunxi-ss/sun4i-ss-cipher.c
drivers/crypto/sunxi-ss/sun4i-ss-core.c
drivers/crypto/sunxi-ss/sun4i-ss-hash.c
drivers/crypto/sunxi-ss/sun4i-ss.h
drivers/crypto/vmx/Kconfig
drivers/crypto/vmx/ghash.c
drivers/pci/quirks.c
include/crypto/algapi.h
include/crypto/engine.h [new file with mode: 0644]
include/crypto/ghash.h [new file with mode: 0644]
include/linux/ccp.h
include/linux/hw_random.h

index fb2a152..088b79c 100644 (file)
@@ -797,7 +797,8 @@ kernel crypto API            |       Caller
      include/linux/crypto.h and their definition can be seen below.
      The former function registers a single transformation, while
      the latter works on an array of transformation descriptions.
-     The latter is useful when registering transformations in bulk.
+     The latter is useful when registering transformations in bulk,
+     for example when a driver implements multiple transformations.
     </para>
 
     <programlisting>
@@ -822,18 +823,31 @@ kernel crypto API            |       Caller
     </para>
 
     <para>
-     The bulk registration / unregistration functions require
-     that struct crypto_alg is an array of count size. These
-     functions simply loop over that array and register /
-     unregister each individual algorithm. If an error occurs,
-     the loop is terminated at the offending algorithm definition.
-     That means, the algorithms prior to the offending algorithm
-     are successfully registered. Note, the caller has no way of
-     knowing which cipher implementations have successfully
-     registered. If this is important to know, the caller should
-     loop through the different implementations using the single
-     instance *_alg functions for each individual implementation.
+     The bulk registration/unregistration functions
+     register/unregister each transformation in the given array of
+     length count.  They handle errors as follows:
     </para>
+    <itemizedlist>
+     <listitem>
+      <para>
+       crypto_register_algs() succeeds if and only if it
+       successfully registers all the given transformations. If an
+       error occurs partway through, then it rolls back successful
+       registrations before returning the error code. Note that if
+       a driver needs to handle registration errors for individual
+       transformations, then it will need to use the non-bulk
+       function crypto_register_alg() instead.
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       crypto_unregister_algs() tries to unregister all the given
+       transformations, continuing on error. It logs errors and
+       always returns zero.
+      </para>
+     </listitem>
+    </itemizedlist>
+
    </sect1>
 
    <sect1><title>Single-Block Symmetric Ciphers [CIPHER]</title>
index 1568cb5..7546b3c 100644 (file)
@@ -138,7 +138,7 @@ static struct shash_alg ghash_alg = {
        .setkey                 = ghash_setkey,
        .descsize               = sizeof(struct ghash_desc_ctx),
        .base                   = {
-               .cra_name       = "ghash",
+               .cra_name       = "__ghash",
                .cra_driver_name = "__driver-ghash-ce",
                .cra_priority   = 0,
                .cra_flags      = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_INTERNAL,
@@ -220,6 +220,27 @@ static int ghash_async_digest(struct ahash_request *req)
        }
 }
 
+static int ghash_async_import(struct ahash_request *req, const void *in)
+{
+       struct ahash_request *cryptd_req = ahash_request_ctx(req);
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+       struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
+
+       desc->tfm = cryptd_ahash_child(ctx->cryptd_tfm);
+       desc->flags = req->base.flags;
+
+       return crypto_shash_import(desc, in);
+}
+
+static int ghash_async_export(struct ahash_request *req, void *out)
+{
+       struct ahash_request *cryptd_req = ahash_request_ctx(req);
+       struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
+
+       return crypto_shash_export(desc, out);
+}
+
 static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key,
                              unsigned int keylen)
 {
@@ -268,7 +289,10 @@ static struct ahash_alg ghash_async_alg = {
        .final                  = ghash_async_final,
        .setkey                 = ghash_async_setkey,
        .digest                 = ghash_async_digest,
+       .import                 = ghash_async_import,
+       .export                 = ghash_async_export,
        .halg.digestsize        = GHASH_DIGEST_SIZE,
+       .halg.statesize         = sizeof(struct ghash_desc_ctx),
        .halg.base              = {
                .cra_name       = "ghash",
                .cra_driver_name = "ghash-ce",
index dcd01f3..2468fad 100644 (file)
@@ -12,7 +12,6 @@
 #include <asm/assembler.h>
 
 .syntax unified
-.code   32
 .fpu neon
 
 .text
index 125e165..82ddc9b 100644 (file)
@@ -7,6 +7,15 @@
 #include <asm/ppc_asm.h>
 #include <asm/asm-offsets.h>
 
+#ifdef __BIG_ENDIAN__
+#define LWZ(rt, d, ra) \
+       lwz     rt,d(ra)
+#else
+#define LWZ(rt, d, ra) \
+       li      rt,d;   \
+       lwbrx   rt,rt,ra
+#endif
+
 /*
  * We roll the registers for T, A, B, C, D, E around on each
  * iteration; T on iteration t is A on iteration t+1, and so on.
@@ -23,7 +32,7 @@
 #define W(t)   (((t)%16)+16)
 
 #define LOADW(t)                               \
-       lwz     W(t),(t)*4(r4)
+       LWZ(W(t),(t)*4,r4)
 
 #define STEPD0_LOAD(t)                         \
        andc    r0,RD(t),RB(t);         \
@@ -33,7 +42,7 @@
        add     r0,RE(t),r15;                   \
        add     RT(t),RT(t),r6;         \
        add     r14,r0,W(t);                    \
-       lwz     W((t)+4),((t)+4)*4(r4); \
+       LWZ(W((t)+4),((t)+4)*4,r4);     \
        rotlwi  RB(t),RB(t),30;                 \
        add     RT(t),RT(t),r14
 
index 68a5cea..2d8466f 100644 (file)
@@ -39,6 +39,37 @@ struct algif_hash_tfm {
        bool has_key;
 };
 
+static int hash_alloc_result(struct sock *sk, struct hash_ctx *ctx)
+{
+       unsigned ds;
+
+       if (ctx->result)
+               return 0;
+
+       ds = crypto_ahash_digestsize(crypto_ahash_reqtfm(&ctx->req));
+
+       ctx->result = sock_kmalloc(sk, ds, GFP_KERNEL);
+       if (!ctx->result)
+               return -ENOMEM;
+
+       memset(ctx->result, 0, ds);
+
+       return 0;
+}
+
+static void hash_free_result(struct sock *sk, struct hash_ctx *ctx)
+{
+       unsigned ds;
+
+       if (!ctx->result)
+               return;
+
+       ds = crypto_ahash_digestsize(crypto_ahash_reqtfm(&ctx->req));
+
+       sock_kzfree_s(sk, ctx->result, ds);
+       ctx->result = NULL;
+}
+
 static int hash_sendmsg(struct socket *sock, struct msghdr *msg,
                        size_t ignored)
 {
@@ -54,6 +85,9 @@ static int hash_sendmsg(struct socket *sock, struct msghdr *msg,
 
        lock_sock(sk);
        if (!ctx->more) {
+               if ((msg->msg_flags & MSG_MORE))
+                       hash_free_result(sk, ctx);
+
                err = af_alg_wait_for_completion(crypto_ahash_init(&ctx->req),
                                                &ctx->completion);
                if (err)
@@ -90,6 +124,10 @@ static int hash_sendmsg(struct socket *sock, struct msghdr *msg,
 
        ctx->more = msg->msg_flags & MSG_MORE;
        if (!ctx->more) {
+               err = hash_alloc_result(sk, ctx);
+               if (err)
+                       goto unlock;
+
                ahash_request_set_crypt(&ctx->req, NULL, ctx->result, 0);
                err = af_alg_wait_for_completion(crypto_ahash_final(&ctx->req),
                                                 &ctx->completion);
@@ -116,6 +154,13 @@ static ssize_t hash_sendpage(struct socket *sock, struct page *page,
        sg_init_table(ctx->sgl.sg, 1);
        sg_set_page(ctx->sgl.sg, page, size, offset);
 
+       if (!(flags & MSG_MORE)) {
+               err = hash_alloc_result(sk, ctx);
+               if (err)
+                       goto unlock;
+       } else if (!ctx->more)
+               hash_free_result(sk, ctx);
+
        ahash_request_set_crypt(&ctx->req, ctx->sgl.sg, ctx->result, size);
 
        if (!(flags & MSG_MORE)) {
@@ -153,6 +198,7 @@ static int hash_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
        struct alg_sock *ask = alg_sk(sk);
        struct hash_ctx *ctx = ask->private;
        unsigned ds = crypto_ahash_digestsize(crypto_ahash_reqtfm(&ctx->req));
+       bool result;
        int err;
 
        if (len > ds)
@@ -161,17 +207,29 @@ static int hash_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
                msg->msg_flags |= MSG_TRUNC;
 
        lock_sock(sk);
+       result = ctx->result;
+       err = hash_alloc_result(sk, ctx);
+       if (err)
+               goto unlock;
+
+       ahash_request_set_crypt(&ctx->req, NULL, ctx->result, 0);
+
        if (ctx->more) {
                ctx->more = 0;
-               ahash_request_set_crypt(&ctx->req, NULL, ctx->result, 0);
                err = af_alg_wait_for_completion(crypto_ahash_final(&ctx->req),
                                                 &ctx->completion);
                if (err)
                        goto unlock;
+       } else if (!result) {
+               err = af_alg_wait_for_completion(
+                               crypto_ahash_digest(&ctx->req),
+                               &ctx->completion);
        }
 
        err = memcpy_to_msg(msg, ctx->result, len);
 
+       hash_free_result(sk, ctx);
+
 unlock:
        release_sock(sk);
 
@@ -394,8 +452,7 @@ static void hash_sock_destruct(struct sock *sk)
        struct alg_sock *ask = alg_sk(sk);
        struct hash_ctx *ctx = ask->private;
 
-       sock_kzfree_s(sk, ctx->result,
-                     crypto_ahash_digestsize(crypto_ahash_reqtfm(&ctx->req)));
+       hash_free_result(sk, ctx);
        sock_kfree_s(sk, ctx, ctx->len);
        af_alg_release_parent(sk);
 }
@@ -407,20 +464,12 @@ static int hash_accept_parent_nokey(void *private, struct sock *sk)
        struct algif_hash_tfm *tfm = private;
        struct crypto_ahash *hash = tfm->hash;
        unsigned len = sizeof(*ctx) + crypto_ahash_reqsize(hash);
-       unsigned ds = crypto_ahash_digestsize(hash);
 
        ctx = sock_kmalloc(sk, len, GFP_KERNEL);
        if (!ctx)
                return -ENOMEM;
 
-       ctx->result = sock_kmalloc(sk, ds, GFP_KERNEL);
-       if (!ctx->result) {
-               sock_kfree_s(sk, ctx, len);
-               return -ENOMEM;
-       }
-
-       memset(ctx->result, 0, ds);
-
+       ctx->result = NULL;
        ctx->len = len;
        ctx->more = 0;
        af_alg_init_completion(&ctx->completion);
index c122961..8e94e29 100644 (file)
@@ -107,10 +107,7 @@ static struct shash_alg alg = {
 
 static int __init crct10dif_mod_init(void)
 {
-       int ret;
-
-       ret = crypto_register_shash(&alg);
-       return ret;
+       return crypto_register_shash(&alg);
 }
 
 static void __exit crct10dif_mod_fini(void)
index a55c82d..bfb92ac 100644 (file)
 
 #include <linux/err.h>
 #include <linux/delay.h>
+#include <crypto/engine.h>
+#include <crypto/internal/hash.h>
 #include "internal.h"
 
 #define CRYPTO_ENGINE_MAX_QLEN 10
 
-void crypto_finalize_request(struct crypto_engine *engine,
-                            struct ablkcipher_request *req, int err);
-
 /**
  * crypto_pump_requests - dequeue one request from engine queue to process
  * @engine: the hardware engine
@@ -34,10 +33,11 @@ static void crypto_pump_requests(struct crypto_engine *engine,
                                 bool in_kthread)
 {
        struct crypto_async_request *async_req, *backlog;
-       struct ablkcipher_request *req;
+       struct ahash_request *hreq;
+       struct ablkcipher_request *breq;
        unsigned long flags;
        bool was_busy = false;
-       int ret;
+       int ret, rtype;
 
        spin_lock_irqsave(&engine->queue_lock, flags);
 
@@ -82,9 +82,7 @@ static void crypto_pump_requests(struct crypto_engine *engine,
        if (!async_req)
                goto out;
 
-       req = ablkcipher_request_cast(async_req);
-
-       engine->cur_req = req;
+       engine->cur_req = async_req;
        if (backlog)
                backlog->complete(backlog, -EINPROGRESS);
 
@@ -95,6 +93,7 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 
        spin_unlock_irqrestore(&engine->queue_lock, flags);
 
+       rtype = crypto_tfm_alg_type(engine->cur_req->tfm);
        /* Until here we get the request need to be encrypted successfully */
        if (!was_busy && engine->prepare_crypt_hardware) {
                ret = engine->prepare_crypt_hardware(engine);
@@ -104,24 +103,55 @@ static void crypto_pump_requests(struct crypto_engine *engine,
                }
        }
 
-       if (engine->prepare_request) {
-               ret = engine->prepare_request(engine, engine->cur_req);
+       switch (rtype) {
+       case CRYPTO_ALG_TYPE_AHASH:
+               hreq = ahash_request_cast(engine->cur_req);
+               if (engine->prepare_hash_request) {
+                       ret = engine->prepare_hash_request(engine, hreq);
+                       if (ret) {
+                               pr_err("failed to prepare request: %d\n", ret);
+                               goto req_err;
+                       }
+                       engine->cur_req_prepared = true;
+               }
+               ret = engine->hash_one_request(engine, hreq);
                if (ret) {
-                       pr_err("failed to prepare request: %d\n", ret);
+                       pr_err("failed to hash one request from queue\n");
                        goto req_err;
                }
-               engine->cur_req_prepared = true;
-       }
-
-       ret = engine->crypt_one_request(engine, engine->cur_req);
-       if (ret) {
-               pr_err("failed to crypt one request from queue\n");
-               goto req_err;
+               return;
+       case CRYPTO_ALG_TYPE_ABLKCIPHER:
+               breq = ablkcipher_request_cast(engine->cur_req);
+               if (engine->prepare_cipher_request) {
+                       ret = engine->prepare_cipher_request(engine, breq);
+                       if (ret) {
+                               pr_err("failed to prepare request: %d\n", ret);
+                               goto req_err;
+                       }
+                       engine->cur_req_prepared = true;
+               }
+               ret = engine->cipher_one_request(engine, breq);
+               if (ret) {
+                       pr_err("failed to cipher one request from queue\n");
+                       goto req_err;
+               }
+               return;
+       default:
+               pr_err("failed to prepare request of unknown type\n");
+               return;
        }
-       return;
 
 req_err:
-       crypto_finalize_request(engine, engine->cur_req, ret);
+       switch (rtype) {
+       case CRYPTO_ALG_TYPE_AHASH:
+               hreq = ahash_request_cast(engine->cur_req);
+               crypto_finalize_hash_request(engine, hreq, ret);
+               break;
+       case CRYPTO_ALG_TYPE_ABLKCIPHER:
+               breq = ablkcipher_request_cast(engine->cur_req);
+               crypto_finalize_cipher_request(engine, breq, ret);
+               break;
+       }
        return;
 
 out:
@@ -137,12 +167,14 @@ static void crypto_pump_work(struct kthread_work *work)
 }
 
 /**
- * crypto_transfer_request - transfer the new request into the engine queue
+ * crypto_transfer_cipher_request - transfer the new request into the
+ * enginequeue
  * @engine: the hardware engine
  * @req: the request need to be listed into the engine queue
  */
-int crypto_transfer_request(struct crypto_engine *engine,
-                           struct ablkcipher_request *req, bool need_pump)
+int crypto_transfer_cipher_request(struct crypto_engine *engine,
+                                  struct ablkcipher_request *req,
+                                  bool need_pump)
 {
        unsigned long flags;
        int ret;
@@ -162,46 +194,125 @@ int crypto_transfer_request(struct crypto_engine *engine,
        spin_unlock_irqrestore(&engine->queue_lock, flags);
        return ret;
 }
-EXPORT_SYMBOL_GPL(crypto_transfer_request);
+EXPORT_SYMBOL_GPL(crypto_transfer_cipher_request);
+
+/**
+ * crypto_transfer_cipher_request_to_engine - transfer one request to list
+ * into the engine queue
+ * @engine: the hardware engine
+ * @req: the request need to be listed into the engine queue
+ */
+int crypto_transfer_cipher_request_to_engine(struct crypto_engine *engine,
+                                            struct ablkcipher_request *req)
+{
+       return crypto_transfer_cipher_request(engine, req, true);
+}
+EXPORT_SYMBOL_GPL(crypto_transfer_cipher_request_to_engine);
+
+/**
+ * crypto_transfer_hash_request - transfer the new request into the
+ * enginequeue
+ * @engine: the hardware engine
+ * @req: the request need to be listed into the engine queue
+ */
+int crypto_transfer_hash_request(struct crypto_engine *engine,
+                                struct ahash_request *req, bool need_pump)
+{
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&engine->queue_lock, flags);
+
+       if (!engine->running) {
+               spin_unlock_irqrestore(&engine->queue_lock, flags);
+               return -ESHUTDOWN;
+       }
+
+       ret = ahash_enqueue_request(&engine->queue, req);
+
+       if (!engine->busy && need_pump)
+               queue_kthread_work(&engine->kworker, &engine->pump_requests);
+
+       spin_unlock_irqrestore(&engine->queue_lock, flags);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(crypto_transfer_hash_request);
 
 /**
- * crypto_transfer_request_to_engine - transfer one request to list into the
- * engine queue
+ * crypto_transfer_hash_request_to_engine - transfer one request to list
+ * into the engine queue
  * @engine: the hardware engine
  * @req: the request need to be listed into the engine queue
  */
-int crypto_transfer_request_to_engine(struct crypto_engine *engine,
-                                     struct ablkcipher_request *req)
+int crypto_transfer_hash_request_to_engine(struct crypto_engine *engine,
+                                          struct ahash_request *req)
 {
-       return crypto_transfer_request(engine, req, true);
+       return crypto_transfer_hash_request(engine, req, true);
 }
-EXPORT_SYMBOL_GPL(crypto_transfer_request_to_engine);
+EXPORT_SYMBOL_GPL(crypto_transfer_hash_request_to_engine);
 
 /**
- * crypto_finalize_request - finalize one request if the request is done
+ * crypto_finalize_cipher_request - finalize one request if the request is done
  * @engine: the hardware engine
  * @req: the request need to be finalized
  * @err: error number
  */
-void crypto_finalize_request(struct crypto_engine *engine,
-                            struct ablkcipher_request *req, int err)
+void crypto_finalize_cipher_request(struct crypto_engine *engine,
+                                   struct ablkcipher_request *req, int err)
 {
        unsigned long flags;
        bool finalize_cur_req = false;
        int ret;
 
        spin_lock_irqsave(&engine->queue_lock, flags);
-       if (engine->cur_req == req)
+       if (engine->cur_req == &req->base)
                finalize_cur_req = true;
        spin_unlock_irqrestore(&engine->queue_lock, flags);
 
        if (finalize_cur_req) {
-               if (engine->cur_req_prepared && engine->unprepare_request) {
-                       ret = engine->unprepare_request(engine, req);
+               if (engine->cur_req_prepared &&
+                   engine->unprepare_cipher_request) {
+                       ret = engine->unprepare_cipher_request(engine, req);
                        if (ret)
                                pr_err("failed to unprepare request\n");
                }
+               spin_lock_irqsave(&engine->queue_lock, flags);
+               engine->cur_req = NULL;
+               engine->cur_req_prepared = false;
+               spin_unlock_irqrestore(&engine->queue_lock, flags);
+       }
+
+       req->base.complete(&req->base, err);
 
+       queue_kthread_work(&engine->kworker, &engine->pump_requests);
+}
+EXPORT_SYMBOL_GPL(crypto_finalize_cipher_request);
+
+/**
+ * crypto_finalize_hash_request - finalize one request if the request is done
+ * @engine: the hardware engine
+ * @req: the request need to be finalized
+ * @err: error number
+ */
+void crypto_finalize_hash_request(struct crypto_engine *engine,
+                                 struct ahash_request *req, int err)
+{
+       unsigned long flags;
+       bool finalize_cur_req = false;
+       int ret;
+
+       spin_lock_irqsave(&engine->queue_lock, flags);
+       if (engine->cur_req == &req->base)
+               finalize_cur_req = true;
+       spin_unlock_irqrestore(&engine->queue_lock, flags);
+
+       if (finalize_cur_req) {
+               if (engine->cur_req_prepared &&
+                   engine->unprepare_hash_request) {
+                       ret = engine->unprepare_hash_request(engine, req);
+                       if (ret)
+                               pr_err("failed to unprepare request\n");
+               }
                spin_lock_irqsave(&engine->queue_lock, flags);
                engine->cur_req = NULL;
                engine->cur_req_prepared = false;
@@ -212,7 +323,7 @@ void crypto_finalize_request(struct crypto_engine *engine,
 
        queue_kthread_work(&engine->kworker, &engine->pump_requests);
 }
-EXPORT_SYMBOL_GPL(crypto_finalize_request);
+EXPORT_SYMBOL_GPL(crypto_finalize_hash_request);
 
 /**
  * crypto_engine_start - start the hardware engine
@@ -249,7 +360,7 @@ EXPORT_SYMBOL_GPL(crypto_engine_start);
 int crypto_engine_stop(struct crypto_engine *engine)
 {
        unsigned long flags;
-       unsigned limit = 500;
+       unsigned int limit = 500;
        int ret = 0;
 
        spin_lock_irqsave(&engine->queue_lock, flags);
index f752da3..fb33f7d 100644 (file)
@@ -1178,12 +1178,16 @@ static inline int drbg_alloc_state(struct drbg_state *drbg)
                goto err;
 
        drbg->Vbuf = kmalloc(drbg_statelen(drbg) + ret, GFP_KERNEL);
-       if (!drbg->Vbuf)
+       if (!drbg->Vbuf) {
+               ret = -ENOMEM;
                goto fini;
+       }
        drbg->V = PTR_ALIGN(drbg->Vbuf, ret + 1);
        drbg->Cbuf = kmalloc(drbg_statelen(drbg) + ret, GFP_KERNEL);
-       if (!drbg->Cbuf)
+       if (!drbg->Cbuf) {
+               ret = -ENOMEM;
                goto fini;
+       }
        drbg->C = PTR_ALIGN(drbg->Cbuf, ret + 1);
        /* scratchpad is only generated for CTR and Hash */
        if (drbg->core->flags & DRBG_HMAC)
@@ -1199,8 +1203,10 @@ static inline int drbg_alloc_state(struct drbg_state *drbg)
 
        if (0 < sb_size) {
                drbg->scratchpadbuf = kzalloc(sb_size + ret, GFP_KERNEL);
-               if (!drbg->scratchpadbuf)
+               if (!drbg->scratchpadbuf) {
+                       ret = -ENOMEM;
                        goto fini;
+               }
                drbg->scratchpad = PTR_ALIGN(drbg->scratchpadbuf, ret + 1);
        }
 
@@ -1917,6 +1923,8 @@ static inline int __init drbg_healthcheck_sanity(void)
                return -ENOMEM;
 
        mutex_init(&drbg->drbg_mutex);
+       drbg->core = &drbg_cores[coreref];
+       drbg->reseed_threshold = drbg_max_requests(drbg);
 
        /*
         * if the following tests fail, it is likely that there is a buffer
@@ -1926,12 +1934,6 @@ static inline int __init drbg_healthcheck_sanity(void)
         * grave bug.
         */
 
-       /* get a valid instance of DRBG for following tests */
-       ret = drbg_instantiate(drbg, NULL, coreref, pr);
-       if (ret) {
-               rc = ret;
-               goto outbuf;
-       }
        max_addtllen = drbg_max_addtl(drbg);
        max_request_bytes = drbg_max_request_bytes(drbg);
        drbg_string_fill(&addtl, buf, max_addtllen + 1);
@@ -1941,10 +1943,9 @@ static inline int __init drbg_healthcheck_sanity(void)
        /* overflow max_bits */
        len = drbg_generate(drbg, buf, (max_request_bytes + 1), NULL);
        BUG_ON(0 < len);
-       drbg_uninstantiate(drbg);
 
        /* overflow max addtllen with personalization string */
-       ret = drbg_instantiate(drbg, &addtl, coreref, pr);
+       ret = drbg_seed(drbg, &addtl, false);
        BUG_ON(0 == ret);
        /* all tests passed */
        rc = 0;
@@ -1952,9 +1953,7 @@ static inline int __init drbg_healthcheck_sanity(void)
        pr_devel("DRBG: Sanity tests for failure code paths successfully "
                 "completed\n");
 
-       drbg_uninstantiate(drbg);
-outbuf:
-       kzfree(drbg);
+       kfree(drbg);
        return rc;
 }
 
@@ -2006,7 +2005,7 @@ static int __init drbg_init(void)
 {
        unsigned int i = 0; /* pointer to drbg_algs */
        unsigned int j = 0; /* pointer to drbg_cores */
-       int ret = -EFAULT;
+       int ret;
 
        ret = drbg_healthcheck_sanity();
        if (ret)
@@ -2016,7 +2015,7 @@ static int __init drbg_init(void)
                pr_info("DRBG: Cannot register all DRBG types"
                        "(slots needed: %zu, slots available: %zu)\n",
                        ARRAY_SIZE(drbg_cores) * 2, ARRAY_SIZE(drbg_algs));
-               return ret;
+               return -EFAULT;
        }
 
        /*
index 70a892e..f624ac9 100644 (file)
@@ -117,7 +117,7 @@ static int crypto_gcm_setkey(struct crypto_aead *aead, const u8 *key,
        struct crypto_skcipher *ctr = ctx->ctr;
        struct {
                be128 hash;
-               u8 iv[8];
+               u8 iv[16];
 
                struct crypto_gcm_setkey_result result;
 
index bac7099..12ad3e3 100644 (file)
 
 #include <crypto/algapi.h>
 #include <crypto/gf128mul.h>
+#include <crypto/ghash.h>
 #include <crypto/internal/hash.h>
 #include <linux/crypto.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 
-#define GHASH_BLOCK_SIZE       16
-#define GHASH_DIGEST_SIZE      16
-
-struct ghash_ctx {
-       struct gf128mul_4k *gf128;
-};
-
-struct ghash_desc_ctx {
-       u8 buffer[GHASH_BLOCK_SIZE];
-       u32 bytes;
-};
-
 static int ghash_init(struct shash_desc *desc)
 {
        struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
index 86fb59b..94ee44a 100644 (file)
@@ -612,12 +612,7 @@ EXPORT_SYMBOL_GPL(mcryptd_alloc_ahash);
 
 int ahash_mcryptd_digest(struct ahash_request *desc)
 {
-       int err;
-
-       err = crypto_ahash_init(desc) ?:
-             ahash_mcryptd_finup(desc);
-
-       return err;
+       return crypto_ahash_init(desc) ?: ahash_mcryptd_finup(desc);
 }
 
 int ahash_mcryptd_update(struct ahash_request *desc)
index 4df6451..0b66dc8 100644 (file)
@@ -35,8 +35,8 @@ int rsa_get_n(void *context, size_t hdrlen, unsigned char tag,
                        n_sz--;
                }
 
-               /* In FIPS mode only allow key size 2K & 3K */
-               if (n_sz != 256 && n_sz != 384) {
+               /* In FIPS mode only allow key size 2K and higher */
+               if (n_sz < 256) {
                        pr_err("RSA: key size not allowed in FIPS mode\n");
                        return -EINVAL;
                }
index 5c9d5a5..62dffa0 100644 (file)
@@ -209,16 +209,19 @@ static int ahash_partial_update(struct ahash_request **preq,
        char *state;
        struct ahash_request *req;
        int statesize, ret = -EINVAL;
+       const char guard[] = { 0x00, 0xba, 0xad, 0x00 };
 
        req = *preq;
        statesize = crypto_ahash_statesize(
                        crypto_ahash_reqtfm(req));
-       state = kmalloc(statesize, GFP_KERNEL);
+       state = kmalloc(statesize + sizeof(guard), GFP_KERNEL);
        if (!state) {
                pr_err("alt: hash: Failed to alloc state for %s\n", algo);
                goto out_nostate;
        }
+       memcpy(state + statesize, guard, sizeof(guard));
        ret = crypto_ahash_export(req, state);
+       WARN_ON(memcmp(state + statesize, guard, sizeof(guard)));
        if (ret) {
                pr_err("alt: hash: Failed to export() for %s\n", algo);
                goto out;
@@ -665,7 +668,7 @@ static int __test_aead(struct crypto_aead *tfm, int enc,
                memcpy(key, template[i].key, template[i].klen);
 
                ret = crypto_aead_setkey(tfm, key, template[i].klen);
-               if (!ret == template[i].fail) {
+               if (template[i].fail == !ret) {
                        pr_err("alg: aead%s: setkey failed on test %d for %s: flags=%x\n",
                               d, j, algo, crypto_aead_get_flags(tfm));
                        goto out;
@@ -770,7 +773,7 @@ static int __test_aead(struct crypto_aead *tfm, int enc,
                memcpy(key, template[i].key, template[i].klen);
 
                ret = crypto_aead_setkey(tfm, key, template[i].klen);
-               if (!ret == template[i].fail) {
+               if (template[i].fail == !ret) {
                        pr_err("alg: aead%s: setkey failed on chunk test %d for %s: flags=%x\n",
                               d, j, algo, crypto_aead_get_flags(tfm));
                        goto out;
@@ -1008,6 +1011,9 @@ static int test_cipher(struct crypto_cipher *tfm, int enc,
                if (template[i].np)
                        continue;
 
+               if (fips_enabled && template[i].fips_skip)
+                       continue;
+
                j++;
 
                ret = -EINVAL;
@@ -1023,7 +1029,7 @@ static int test_cipher(struct crypto_cipher *tfm, int enc,
 
                ret = crypto_cipher_setkey(tfm, template[i].key,
                                           template[i].klen);
-               if (!ret == template[i].fail) {
+               if (template[i].fail == !ret) {
                        printk(KERN_ERR "alg: cipher: setkey failed "
                               "on test %d for %s: flags=%x\n", j,
                               algo, crypto_cipher_get_flags(tfm));
@@ -1112,6 +1118,9 @@ static int __test_skcipher(struct crypto_skcipher *tfm, int enc,
                if (template[i].np && !template[i].also_non_np)
                        continue;
 
+               if (fips_enabled && template[i].fips_skip)
+                       continue;
+
                if (template[i].iv)
                        memcpy(iv, template[i].iv, ivsize);
                else
@@ -1133,7 +1142,7 @@ static int __test_skcipher(struct crypto_skcipher *tfm, int enc,
 
                ret = crypto_skcipher_setkey(tfm, template[i].key,
                                             template[i].klen);
-               if (!ret == template[i].fail) {
+               if (template[i].fail == !ret) {
                        pr_err("alg: skcipher%s: setkey failed on test %d for %s: flags=%x\n",
                               d, j, algo, crypto_skcipher_get_flags(tfm));
                        goto out;
@@ -1198,6 +1207,9 @@ static int __test_skcipher(struct crypto_skcipher *tfm, int enc,
                if (!template[i].np)
                        continue;
 
+               if (fips_enabled && template[i].fips_skip)
+                       continue;
+
                if (template[i].iv)
                        memcpy(iv, template[i].iv, ivsize);
                else
@@ -1211,7 +1223,7 @@ static int __test_skcipher(struct crypto_skcipher *tfm, int enc,
 
                ret = crypto_skcipher_setkey(tfm, template[i].key,
                                             template[i].klen);
-               if (!ret == template[i].fail) {
+               if (template[i].fail == !ret) {
                        pr_err("alg: skcipher%s: setkey failed on chunk test %d for %s: flags=%x\n",
                               d, j, algo, crypto_skcipher_get_flags(tfm));
                        goto out;
index acb6bbf..e64a4ef 100644 (file)
@@ -59,6 +59,7 @@ struct hash_testvec {
  * @tap:       How to distribute data in @np SGs
  * @also_non_np:       if set to 1, the test will be also done without
  *                     splitting data in @np SGs
+ * @fips_skip: Skip the test vector in FIPS mode
  */
 
 struct cipher_testvec {
@@ -75,6 +76,7 @@ struct cipher_testvec {
        unsigned char klen;
        unsigned short ilen;
        unsigned short rlen;
+       bool fips_skip;
 };
 
 struct aead_testvec {
@@ -18224,6 +18226,7 @@ static struct cipher_testvec aes_xts_enc_tv_template[] = {
                          "\x00\x00\x00\x00\x00\x00\x00\x00"
                          "\x00\x00\x00\x00\x00\x00\x00\x00",
                .klen   = 32,
+               .fips_skip = 1,
                .iv     = "\x00\x00\x00\x00\x00\x00\x00\x00"
                          "\x00\x00\x00\x00\x00\x00\x00\x00",
                .input  = "\x00\x00\x00\x00\x00\x00\x00\x00"
@@ -18566,6 +18569,7 @@ static struct cipher_testvec aes_xts_dec_tv_template[] = {
                          "\x00\x00\x00\x00\x00\x00\x00\x00"
                          "\x00\x00\x00\x00\x00\x00\x00\x00",
                .klen   = 32,
+               .fips_skip = 1,
                .iv     = "\x00\x00\x00\x00\x00\x00\x00\x00"
                          "\x00\x00\x00\x00\x00\x00\x00\x00",
                .input = "\x91\x7c\xf6\x9e\xbd\x68\xb2\xec"
index 35d6b3a..263af9f 100644 (file)
 #include <linux/preempt.h>
 #include <asm/xor.h>
 
+#ifndef XOR_SELECT_TEMPLATE
+#define XOR_SELECT_TEMPLATE(x) (x)
+#endif
+
 /* The xor routines to use.  */
 static struct xor_block_template *active_template;
 
@@ -109,6 +113,15 @@ calibrate_xor_blocks(void)
        void *b1, *b2;
        struct xor_block_template *f, *fastest;
 
+       fastest = XOR_SELECT_TEMPLATE(NULL);
+
+       if (fastest) {
+               printk(KERN_INFO "xor: automatically using best "
+                                "checksumming function   %-10s\n",
+                      fastest->name);
+               goto out;
+       }
+
        /*
         * Note: Since the memory is not actually used for _anything_ but to
         * test the XOR speed, we don't really want kmemcheck to warn about
@@ -126,36 +139,22 @@ calibrate_xor_blocks(void)
         * all the possible functions, just test the best one
         */
 
-       fastest = NULL;
-
-#ifdef XOR_SELECT_TEMPLATE
-               fastest = XOR_SELECT_TEMPLATE(fastest);
-#endif
-
 #define xor_speed(templ)       do_xor_speed((templ), b1, b2)
 
-       if (fastest) {
-               printk(KERN_INFO "xor: automatically using best "
-                                "checksumming function:\n");
-               xor_speed(fastest);
-               goto out;
-       } else {
-               printk(KERN_INFO "xor: measuring software checksum speed\n");
-               XOR_TRY_TEMPLATES;
-               fastest = template_list;
-               for (f = fastest; f; f = f->next)
-                       if (f->speed > fastest->speed)
-                               fastest = f;
-       }
+       printk(KERN_INFO "xor: measuring software checksum speed\n");
+       XOR_TRY_TEMPLATES;
+       fastest = template_list;
+       for (f = fastest; f; f = f->next)
+               if (f->speed > fastest->speed)
+                       fastest = f;
 
        printk(KERN_INFO "xor: using function: %s (%d.%03d MB/sec)\n",
               fastest->name, fastest->speed / 1000, fastest->speed % 1000);
 
 #undef xor_speed
 
- out:
        free_pages((unsigned long)b1, 2);
-
+out:
        active_template = fastest;
        return 0;
 }
index 26ba583..305343f 100644 (file)
@@ -5,7 +5,7 @@
  *
  * Copyright (c) 2007 Rik Snel <rsnel@cube.dyndns.org>
  *
- * Based om ecb.c
+ * Based on ecb.c
  * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
  *
  * This program is free software; you can redistribute it and/or modify it
index 8c0770b..200dab5 100644 (file)
@@ -410,6 +410,19 @@ config HW_RANDOM_MESON
 
          If unsure, say Y.
 
+config HW_RANDOM_CAVIUM
+       tristate "Cavium ThunderX Random Number Generator support"
+       depends on HW_RANDOM && PCI && (ARM64 || (COMPILE_TEST && 64BIT))
+       default HW_RANDOM
+       ---help---
+         This driver provides kernel-side support for the Random Number
+         Generator hardware found on Cavium SoCs.
+
+         To compile this driver as a module, choose M here: the
+         module will be called cavium_rng.
+
+         If unsure, say Y.
+
 endif # HW_RANDOM
 
 config UML_RANDOM
index 04bb0b0..5f52b1e 100644 (file)
@@ -35,3 +35,4 @@ obj-$(CONFIG_HW_RANDOM_XGENE) += xgene-rng.o
 obj-$(CONFIG_HW_RANDOM_STM32) += stm32-rng.o
 obj-$(CONFIG_HW_RANDOM_PIC32) += pic32-rng.o
 obj-$(CONFIG_HW_RANDOM_MESON) += meson-rng.o
+obj-$(CONFIG_HW_RANDOM_CAVIUM) += cavium-rng.o cavium-rng-vf.o
index 48f6a83..4a99ac7 100644 (file)
  * warranty of any kind, whether express or implied.
  */
 
-#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/hw_random.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/pci.h>
-#include <linux/hw_random.h>
-#include <linux/delay.h>
-#include <asm/io.h>
 
+#define DRV_NAME "AMD768-HWRNG"
 
-#define PFX    KBUILD_MODNAME ": "
-
+#define RNGDATA                0x00
+#define RNGDONE                0x04
+#define PMBASE_OFFSET  0xF0
+#define PMBASE_SIZE    8
 
 /*
  * Data for PCI driver interface
@@ -50,72 +52,84 @@ static const struct pci_device_id pci_tbl[] = {
 };
 MODULE_DEVICE_TABLE(pci, pci_tbl);
 
-static struct pci_dev *amd_pdev;
-
+struct amd768_priv {
+       void __iomem *iobase;
+       struct pci_dev *pcidev;
+};
 
-static int amd_rng_data_present(struct hwrng *rng, int wait)
+static int amd_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
 {
-       u32 pmbase = (u32)rng->priv;
-       int data, i;
-
-       for (i = 0; i < 20; i++) {
-               data = !!(inl(pmbase + 0xF4) & 1);
-               if (data || !wait)
-                       break;
-               udelay(10);
+       u32 *data = buf;
+       struct amd768_priv *priv = (struct amd768_priv *)rng->priv;
+       size_t read = 0;
+       /* We will wait at maximum one time per read */
+       int timeout = max / 4 + 1;
+
+       /*
+        * RNG data is available when RNGDONE is set to 1
+        * New random numbers are generated approximately 128 microseconds
+        * after RNGDATA is read
+        */
+       while (read < max) {
+               if (ioread32(priv->iobase + RNGDONE) == 0) {
+                       if (wait) {
+                               /* Delay given by datasheet */
+                               usleep_range(128, 196);
+                               if (timeout-- == 0)
+                                       return read;
+                       } else {
+                               return 0;
+                       }
+               } else {
+                       *data = ioread32(priv->iobase + RNGDATA);
+                       data++;
+                       read += 4;
+               }
        }
-       return data;
-}
 
-static int amd_rng_data_read(struct hwrng *rng, u32 *data)
-{
-       u32 pmbase = (u32)rng->priv;
-
-       *data = inl(pmbase + 0xF0);
-
-       return 4;
+       return read;
 }
 
 static int amd_rng_init(struct hwrng *rng)
 {
+       struct amd768_priv *priv = (struct amd768_priv *)rng->priv;
        u8 rnen;
 
-       pci_read_config_byte(amd_pdev, 0x40, &rnen);
-       rnen |= (1 << 7);       /* RNG on */
-       pci_write_config_byte(amd_pdev, 0x40, rnen);
+       pci_read_config_byte(priv->pcidev, 0x40, &rnen);
+       rnen |= BIT(7); /* RNG on */
+       pci_write_config_byte(priv->pcidev, 0x40, rnen);
 
-       pci_read_config_byte(amd_pdev, 0x41, &rnen);
-       rnen |= (1 << 7);       /* PMIO enable */
-       pci_write_config_byte(amd_pdev, 0x41, rnen);
+       pci_read_config_byte(priv->pcidev, 0x41, &rnen);
+       rnen |= BIT(7); /* PMIO enable */
+       pci_write_config_byte(priv->pcidev, 0x41, rnen);
 
        return 0;
 }
 
 static void amd_rng_cleanup(struct hwrng *rng)
 {
+       struct amd768_priv *priv = (struct amd768_priv *)rng->priv;
        u8 rnen;
 
-       pci_read_config_byte(amd_pdev, 0x40, &rnen);
-       rnen &= ~(1 << 7);      /* RNG off */
-       pci_write_config_byte(amd_pdev, 0x40, rnen);
+       pci_read_config_byte(priv->pcidev, 0x40, &rnen);
+       rnen &= ~BIT(7);        /* RNG off */
+       pci_write_config_byte(priv->pcidev, 0x40, rnen);
 }
 
-
 static struct hwrng amd_rng = {
        .name           = "amd",
        .init           = amd_rng_init,
        .cleanup        = amd_rng_cleanup,
-       .data_present   = amd_rng_data_present,
-       .data_read      = amd_rng_data_read,
+       .read           = amd_rng_read,
 };
 
-
 static int __init mod_init(void)
 {
        int err = -ENODEV;
        struct pci_dev *pdev = NULL;
        const struct pci_device_id *ent;
        u32 pmbase;
+       struct amd768_priv *priv;
 
        for_each_pci_dev(pdev) {
                ent = pci_match_id(pci_tbl, pdev);
@@ -123,42 +137,44 @@ static int __init mod_init(void)
                        goto found;
        }
        /* Device not found. */
-       goto out;
+       return -ENODEV;
 
 found:
        err = pci_read_config_dword(pdev, 0x58, &pmbase);
        if (err)
-               goto out;
-       err = -EIO;
+               return err;
+
        pmbase &= 0x0000FF00;
        if (pmbase == 0)
-               goto out;
-       if (!request_region(pmbase + 0xF0, 8, "AMD HWRNG")) {
-               dev_err(&pdev->dev, "AMD HWRNG region 0x%x already in use!\n",
+               return -EIO;
+
+       priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
+
+       if (!devm_request_region(&pdev->dev, pmbase + PMBASE_OFFSET,
+                               PMBASE_SIZE, DRV_NAME)) {
+               dev_err(&pdev->dev, DRV_NAME " region 0x%x already in use!\n",
                        pmbase + 0xF0);
-               err = -EBUSY;
-               goto out;
+               return -EBUSY;
        }
-       amd_rng.priv = (unsigned long)pmbase;
-       amd_pdev = pdev;
-
-       pr_info("AMD768 RNG detected\n");
-       err = hwrng_register(&amd_rng);
-       if (err) {
-               pr_err(PFX "RNG registering failed (%d)\n",
-                      err);
-               release_region(pmbase + 0xF0, 8);
-               goto out;
+
+       priv->iobase = devm_ioport_map(&pdev->dev, pmbase + PMBASE_OFFSET,
+                       PMBASE_SIZE);
+       if (!priv->iobase) {
+               pr_err(DRV_NAME "Cannot map ioport\n");
+               return -ENOMEM;
        }
-out:
-       return err;
+
+       amd_rng.priv = (unsigned long)priv;
+       priv->pcidev = pdev;
+
+       pr_info(DRV_NAME " detected\n");
+       return devm_hwrng_register(&pdev->dev, &amd_rng);
 }
 
 static void __exit mod_exit(void)
 {
-       u32 pmbase = (unsigned long)amd_rng.priv;
-       release_region(pmbase + 0xF0, 8);
-       hwrng_unregister(&amd_rng);
 }
 
 module_init(mod_init);
index af21492..574211a 100644 (file)
@@ -92,9 +92,10 @@ static int bcm2835_rng_probe(struct platform_device *pdev)
        bcm2835_rng_ops.priv = (unsigned long)rng_base;
 
        rng_id = of_match_node(bcm2835_rng_of_match, np);
-       if (!rng_id)
+       if (!rng_id) {
+               iounmap(rng_base);
                return -EINVAL;
-
+       }
        /* Check for rng init function, execute it */
        rng_setup = rng_id->data;
        if (rng_setup)
diff --git a/drivers/char/hw_random/cavium-rng-vf.c b/drivers/char/hw_random/cavium-rng-vf.c
new file mode 100644 (file)
index 0000000..066ae0e
--- /dev/null
@@ -0,0 +1,99 @@
+/*
+ * Hardware Random Number Generator support for Cavium, Inc.
+ * Thunder processor family.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2016 Cavium, Inc.
+ */
+
+#include <linux/hw_random.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+
+struct cavium_rng {
+       struct hwrng ops;
+       void __iomem *result;
+};
+
+/* Read data from the RNG unit */
+static int cavium_rng_read(struct hwrng *rng, void *dat, size_t max, bool wait)
+{
+       struct cavium_rng *p = container_of(rng, struct cavium_rng, ops);
+       unsigned int size = max;
+
+       while (size >= 8) {
+               *((u64 *)dat) = readq(p->result);
+               size -= 8;
+               dat += 8;
+       }
+       while (size > 0) {
+               *((u8 *)dat) = readb(p->result);
+               size--;
+               dat++;
+       }
+       return max;
+}
+
+/* Map Cavium RNG to an HWRNG object */
+static int cavium_rng_probe_vf(struct  pci_dev         *pdev,
+                        const struct   pci_device_id   *id)
+{
+       struct  cavium_rng *rng;
+       int     ret;
+
+       rng = devm_kzalloc(&pdev->dev, sizeof(*rng), GFP_KERNEL);
+       if (!rng)
+               return -ENOMEM;
+
+       /* Map the RNG result */
+       rng->result = pcim_iomap(pdev, 0, 0);
+       if (!rng->result) {
+               dev_err(&pdev->dev, "Error iomap failed retrieving result.\n");
+               return -ENOMEM;
+       }
+
+       rng->ops.name    = "cavium rng";
+       rng->ops.read    = cavium_rng_read;
+       rng->ops.quality = 1000;
+
+       pci_set_drvdata(pdev, rng);
+
+       ret = hwrng_register(&rng->ops);
+       if (ret) {
+               dev_err(&pdev->dev, "Error registering device as HWRNG.\n");
+               return ret;
+       }
+
+       return 0;
+}
+
+/* Remove the VF */
+void  cavium_rng_remove_vf(struct pci_dev *pdev)
+{
+       struct cavium_rng *rng;
+
+       rng = pci_get_drvdata(pdev);
+       hwrng_unregister(&rng->ops);
+}
+
+static const struct pci_device_id cavium_rng_vf_id_table[] = {
+       { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, 0xa033), 0, 0, 0},
+       {0,},
+};
+MODULE_DEVICE_TABLE(pci, cavium_rng_vf_id_table);
+
+static struct pci_driver cavium_rng_vf_driver = {
+       .name           = "cavium_rng_vf",
+       .id_table       = cavium_rng_vf_id_table,
+       .probe          = cavium_rng_probe_vf,
+       .remove         = cavium_rng_remove_vf,
+};
+module_pci_driver(cavium_rng_vf_driver);
+
+MODULE_AUTHOR("Omer Khaliq <okhaliq@caviumnetworks.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/char/hw_random/cavium-rng.c b/drivers/char/hw_random/cavium-rng.c
new file mode 100644 (file)
index 0000000..a944e0a
--- /dev/null
@@ -0,0 +1,94 @@
+/*
+ * Hardware Random Number Generator support for Cavium Inc.
+ * Thunder processor family.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2016 Cavium, Inc.
+ */
+
+#include <linux/hw_random.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+
+#define THUNDERX_RNM_ENT_EN     0x1
+#define THUNDERX_RNM_RNG_EN     0x2
+
+struct cavium_rng_pf {
+       void __iomem *control_status;
+};
+
+/* Enable the RNG hardware and activate the VF */
+static int cavium_rng_probe(struct pci_dev *pdev,
+                       const struct pci_device_id *id)
+{
+       struct  cavium_rng_pf *rng;
+       int     iov_err;
+
+       rng = devm_kzalloc(&pdev->dev, sizeof(*rng), GFP_KERNEL);
+       if (!rng)
+               return -ENOMEM;
+
+       /*Map the RNG control */
+       rng->control_status = pcim_iomap(pdev, 0, 0);
+       if (!rng->control_status) {
+               dev_err(&pdev->dev,
+                       "Error iomap failed retrieving control_status.\n");
+               return -ENOMEM;
+       }
+
+       /* Enable the RNG hardware and entropy source */
+       writeq(THUNDERX_RNM_RNG_EN | THUNDERX_RNM_ENT_EN,
+               rng->control_status);
+
+       pci_set_drvdata(pdev, rng);
+
+       /* Enable the Cavium RNG as a VF */
+       iov_err = pci_enable_sriov(pdev, 1);
+       if (iov_err != 0) {
+               /* Disable the RNG hardware and entropy source */
+               writeq(0, rng->control_status);
+               dev_err(&pdev->dev,
+                       "Error initializing RNG virtual function,(%i).\n",
+                       iov_err);
+               return iov_err;
+       }
+
+       return 0;
+}
+
+/* Disable VF and RNG Hardware */
+void  cavium_rng_remove(struct pci_dev *pdev)
+{
+       struct cavium_rng_pf *rng;
+
+       rng = pci_get_drvdata(pdev);
+
+       /* Remove the VF */
+       pci_disable_sriov(pdev);
+
+       /* Disable the RNG hardware and entropy source */
+       writeq(0, rng->control_status);
+}
+
+static const struct pci_device_id cavium_rng_pf_id_table[] = {
+       { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, 0xa018), 0, 0, 0}, /* Thunder RNM */
+       {0,},
+};
+
+MODULE_DEVICE_TABLE(pci, cavium_rng_pf_id_table);
+
+static struct pci_driver cavium_rng_pf_driver = {
+       .name           = "cavium_rng_pf",
+       .id_table       = cavium_rng_pf_id_table,
+       .probe          = cavium_rng_probe,
+       .remove         = cavium_rng_remove,
+};
+
+module_pci_driver(cavium_rng_pf_driver);
+MODULE_AUTHOR("Omer Khaliq <okhaliq@caviumnetworks.com>");
+MODULE_LICENSE("GPL");
index 9203f2d..4827945 100644 (file)
@@ -449,22 +449,6 @@ int hwrng_register(struct hwrng *rng)
                goto out;
 
        mutex_lock(&rng_mutex);
-
-       /* kmalloc makes this safe for virt_to_page() in virtio_rng.c */
-       err = -ENOMEM;
-       if (!rng_buffer) {
-               rng_buffer = kmalloc(rng_buffer_size(), GFP_KERNEL);
-               if (!rng_buffer)
-                       goto out_unlock;
-       }
-       if (!rng_fillbuf) {
-               rng_fillbuf = kmalloc(rng_buffer_size(), GFP_KERNEL);
-               if (!rng_fillbuf) {
-                       kfree(rng_buffer);
-                       goto out_unlock;
-               }
-       }
-
        /* Must not register two RNGs with the same name. */
        err = -EEXIST;
        list_for_each_entry(tmp, &rng_list, list) {
@@ -573,7 +557,26 @@ EXPORT_SYMBOL_GPL(devm_hwrng_unregister);
 
 static int __init hwrng_modinit(void)
 {
-       return register_miscdev();
+       int ret = -ENOMEM;
+
+       /* kmalloc makes this safe for virt_to_page() in virtio_rng.c */
+       rng_buffer = kmalloc(rng_buffer_size(), GFP_KERNEL);
+       if (!rng_buffer)
+               return -ENOMEM;
+
+       rng_fillbuf = kmalloc(rng_buffer_size(), GFP_KERNEL);
+       if (!rng_fillbuf) {
+               kfree(rng_buffer);
+               return -ENOMEM;
+       }
+
+       ret = register_miscdev();
+       if (ret) {
+               kfree(rng_fillbuf);
+               kfree(rng_buffer);
+       }
+
+       return ret;
 }
 
 static void __exit hwrng_modexit(void)
index 0d0579f..e7a2459 100644 (file)
  * warranty of any kind, whether express or implied.
  */
 
-#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/hw_random.h>
+#include <linux/io.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/pci.h>
-#include <linux/hw_random.h>
-#include <linux/delay.h>
-#include <asm/io.h>
-
-
-#define PFX    KBUILD_MODNAME ": "
 
 #define GEODE_RNG_DATA_REG   0x50
 #define GEODE_RNG_STATUS_REG 0x54
@@ -85,7 +82,6 @@ static struct hwrng geode_rng = {
 
 static int __init mod_init(void)
 {
-       int err = -ENODEV;
        struct pci_dev *pdev = NULL;
        const struct pci_device_id *ent;
        void __iomem *mem;
@@ -93,43 +89,27 @@ static int __init mod_init(void)
 
        for_each_pci_dev(pdev) {
                ent = pci_match_id(pci_tbl, pdev);
-               if (ent)
-                       goto found;
-       }
-       /* Device not found. */
-       goto out;
-
-found:
-       rng_base = pci_resource_start(pdev, 0);
-       if (rng_base == 0)
-               goto out;
-       err = -ENOMEM;
-       mem = ioremap(rng_base, 0x58);
-       if (!mem)
-               goto out;
-       geode_rng.priv = (unsigned long)mem;
-
-       pr_info("AMD Geode RNG detected\n");
-       err = hwrng_register(&geode_rng);
-       if (err) {
-               pr_err(PFX "RNG registering failed (%d)\n",
-                      err);
-               goto err_unmap;
+               if (ent) {
+                       rng_base = pci_resource_start(pdev, 0);
+                       if (rng_base == 0)
+                               return -ENODEV;
+
+                       mem = devm_ioremap(&pdev->dev, rng_base, 0x58);
+                       if (!mem)
+                               return -ENOMEM;
+                       geode_rng.priv = (unsigned long)mem;
+
+                       pr_info("AMD Geode RNG detected\n");
+                       return devm_hwrng_register(&pdev->dev, &geode_rng);
+               }
        }
-out:
-       return err;
 
-err_unmap:
-       iounmap(mem);
-       goto out;
+       /* Device not found. */
+       return -ENODEV;
 }
 
 static void __exit mod_exit(void)
 {
-       void __iomem *mem = (void __iomem *)geode_rng.priv;
-
-       hwrng_unregister(&geode_rng);
-       iounmap(mem);
 }
 
 module_init(mod_init);
index 0cfd81b..58bef39 100644 (file)
@@ -76,9 +76,6 @@ static int meson_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
        struct meson_rng_data *data =
                        container_of(rng, struct meson_rng_data, rng);
 
-       if (max < sizeof(u32))
-               return 0;
-
        *(u32 *)buf = readl_relaxed(data->base + RNG_DATA);
 
        return sizeof(u32);
index 01d4be2..f5c26a5 100644 (file)
@@ -385,7 +385,7 @@ static int omap_rng_probe(struct platform_device *pdev)
 
        pm_runtime_enable(&pdev->dev);
        ret = pm_runtime_get_sync(&pdev->dev);
-       if (ret) {
+       if (ret < 0) {
                dev_err(&pdev->dev, "Failed to runtime_get device: %d\n", ret);
                pm_runtime_put_noidle(&pdev->dev);
                goto err_ioremap;
@@ -443,7 +443,7 @@ static int __maybe_unused omap_rng_resume(struct device *dev)
        int ret;
 
        ret = pm_runtime_get_sync(dev);
-       if (ret) {
+       if (ret < 0) {
                dev_err(dev, "Failed to runtime_get device: %d\n", ret);
                pm_runtime_put_noidle(dev);
                return ret;
index 8da14f1..37a58d7 100644 (file)
@@ -71,12 +71,7 @@ static int omap3_rom_rng_get_random(void *buf, unsigned int count)
        return 0;
 }
 
-static int omap3_rom_rng_data_present(struct hwrng *rng, int wait)
-{
-       return 1;
-}
-
-static int omap3_rom_rng_data_read(struct hwrng *rng, u32 *data)
+static int omap3_rom_rng_read(struct hwrng *rng, void *data, size_t max, bool w)
 {
        int r;
 
@@ -88,8 +83,7 @@ static int omap3_rom_rng_data_read(struct hwrng *rng, u32 *data)
 
 static struct hwrng omap3_rom_rng_ops = {
        .name           = "omap3-rom",
-       .data_present   = omap3_rom_rng_data_present,
-       .data_read      = omap3_rom_rng_data_read,
+       .read           = omap3_rom_rng_read,
 };
 
 static int omap3_rom_rng_probe(struct platform_device *pdev)
index c19e23d..545df48 100644 (file)
@@ -95,42 +95,20 @@ static struct hwrng pasemi_rng = {
        .data_read      = pasemi_rng_data_read,
 };
 
-static int rng_probe(struct platform_device *ofdev)
+static int rng_probe(struct platform_device *pdev)
 {
        void __iomem *rng_regs;
-       struct device_node *rng_np = ofdev->dev.of_node;
-       struct resource res;
-       int err = 0;
+       struct resource *res;
 
-       err = of_address_to_resource(rng_np, 0, &res);
-       if (err)
-               return -ENODEV;
-
-       rng_regs = ioremap(res.start, 0x100);
-
-       if (!rng_regs)
-               return -ENOMEM;
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       rng_regs = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(rng_regs))
+               return PTR_ERR(rng_regs);
 
        pasemi_rng.priv = (unsigned long)rng_regs;
 
        pr_info("Registering PA Semi RNG\n");
-
-       err = hwrng_register(&pasemi_rng);
-
-       if (err)
-               iounmap(rng_regs);
-
-       return err;
-}
-
-static int rng_remove(struct platform_device *dev)
-{
-       void __iomem *rng_regs = (void __iomem *)pasemi_rng.priv;
-
-       hwrng_unregister(&pasemi_rng);
-       iounmap(rng_regs);
-
-       return 0;
+       return devm_hwrng_register(&pdev->dev, &pasemi_rng);
 }
 
 static const struct of_device_id rng_match[] = {
@@ -146,7 +124,6 @@ static struct platform_driver rng_driver = {
                .of_match_table = rng_match,
        },
        .probe          = rng_probe,
-       .remove         = rng_remove,
 };
 
 module_platform_driver(rng_driver);
index 108897b..11dc9b7 100644 (file)
@@ -143,7 +143,6 @@ static struct platform_driver pic32_rng_driver = {
        .remove         = pic32_rng_remove,
        .driver         = {
                .name   = "pic32-rng",
-               .owner  = THIS_MODULE,
                .of_match_table = of_match_ptr(pic32_rng_of_match),
        },
 };
index 1d35363..938ec10 100644 (file)
@@ -54,9 +54,6 @@ static int st_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
        u32 status;
        int i;
 
-       if (max < sizeof(u16))
-               return -EINVAL;
-
        /* Wait until FIFO is full - max 4uS*/
        for (i = 0; i < ST_RNG_FILL_FIFO_TIMEOUT; i++) {
                status = readl_relaxed(ddata->base + ST_RNG_STATUS_REG);
@@ -111,6 +108,7 @@ static int st_rng_probe(struct platform_device *pdev)
        ret = hwrng_register(&ddata->ops);
        if (ret) {
                dev_err(&pdev->dev, "Failed to register HW RNG\n");
+               clk_disable_unprepare(clk);
                return ret;
        }
 
index a7b6949..1093583 100644 (file)
@@ -144,22 +144,13 @@ static int __init tx4939_rng_probe(struct platform_device *dev)
        }
 
        platform_set_drvdata(dev, rngdev);
-       return hwrng_register(&rngdev->rng);
-}
-
-static int __exit tx4939_rng_remove(struct platform_device *dev)
-{
-       struct tx4939_rng *rngdev = platform_get_drvdata(dev);
-
-       hwrng_unregister(&rngdev->rng);
-       return 0;
+       return devm_hwrng_register(&dev->dev, &rngdev->rng);
 }
 
 static struct platform_driver tx4939_rng_driver = {
        .driver         = {
                .name   = "tx4939-rng",
        },
-       .remove = tx4939_rng_remove,
 };
 
 module_platform_driver_probe(tx4939_rng_driver, tx4939_rng_probe);
index 9b035b7..4d2b81f 100644 (file)
@@ -318,6 +318,9 @@ config CRYPTO_DEV_OMAP_AES
        select CRYPTO_AES
        select CRYPTO_BLKCIPHER
        select CRYPTO_ENGINE
+       select CRYPTO_CBC
+       select CRYPTO_ECB
+       select CRYPTO_CTR
        help
          OMAP processors have AES module accelerator. Select this if you
          want to use the OMAP module for AES algorithms.
index b304421..156aad1 100644 (file)
 #else
 #define debug(format, arg...)
 #endif
+
+#ifdef DEBUG
+#include <linux/highmem.h>
+
+static void dbg_dump_sg(const char *level, const char *prefix_str,
+                       int prefix_type, int rowsize, int groupsize,
+                       struct scatterlist *sg, size_t tlen, bool ascii,
+                       bool may_sleep)
+{
+       struct scatterlist *it;
+       void *it_page;
+       size_t len;
+       void *buf;
+
+       for (it = sg; it != NULL && tlen > 0 ; it = sg_next(sg)) {
+               /*
+                * make sure the scatterlist's page
+                * has a valid virtual memory mapping
+                */
+               it_page = kmap_atomic(sg_page(it));
+               if (unlikely(!it_page)) {
+                       printk(KERN_ERR "dbg_dump_sg: kmap failed\n");
+                       return;
+               }
+
+               buf = it_page + it->offset;
+               len = min(tlen, it->length);
+               print_hex_dump(level, prefix_str, prefix_type, rowsize,
+                              groupsize, buf, len, ascii);
+               tlen -= len;
+
+               kunmap_atomic(it_page);
+       }
+}
+#endif
+
 static struct list_head alg_list;
 
 struct caam_alg_entry {
@@ -227,8 +263,9 @@ static void append_key_aead(u32 *desc, struct caam_ctx *ctx,
        if (is_rfc3686) {
                nonce = (u32 *)((void *)ctx->key + ctx->split_key_pad_len +
                               enckeylen);
-               append_load_imm_u32(desc, *nonce, LDST_CLASS_IND_CCB |
-                                   LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
+               append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
+                                  LDST_CLASS_IND_CCB |
+                                  LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
                append_move(desc,
                            MOVE_SRC_OUTFIFO |
                            MOVE_DEST_CLASS1CTX |
@@ -500,11 +537,10 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 
        /* Load Counter into CONTEXT1 reg */
        if (is_rfc3686)
-               append_load_imm_u32(desc, be32_to_cpu(1), LDST_IMM |
-                                   LDST_CLASS_1_CCB |
-                                   LDST_SRCDST_BYTE_CONTEXT |
-                                   ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
-                                    LDST_OFFSET_SHIFT));
+               append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
+                                    LDST_SRCDST_BYTE_CONTEXT |
+                                    ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
+                                     LDST_OFFSET_SHIFT));
 
        /* Class 1 operation */
        append_operation(desc, ctx->class1_alg_type |
@@ -578,11 +614,10 @@ skip_enc:
 
        /* Load Counter into CONTEXT1 reg */
        if (is_rfc3686)
-               append_load_imm_u32(desc, be32_to_cpu(1), LDST_IMM |
-                                   LDST_CLASS_1_CCB |
-                                   LDST_SRCDST_BYTE_CONTEXT |
-                                   ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
-                                    LDST_OFFSET_SHIFT));
+               append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
+                                    LDST_SRCDST_BYTE_CONTEXT |
+                                    ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
+                                     LDST_OFFSET_SHIFT));
 
        /* Choose operation */
        if (ctr_mode)
@@ -683,11 +718,10 @@ copy_iv:
 
        /* Load Counter into CONTEXT1 reg */
        if (is_rfc3686)
-               append_load_imm_u32(desc, be32_to_cpu(1), LDST_IMM |
-                                   LDST_CLASS_1_CCB |
-                                   LDST_SRCDST_BYTE_CONTEXT |
-                                   ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
-                                    LDST_OFFSET_SHIFT));
+               append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
+                                    LDST_SRCDST_BYTE_CONTEXT |
+                                    ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
+                                     LDST_OFFSET_SHIFT));
 
        /* Class 1 operation */
        append_operation(desc, ctx->class1_alg_type |
@@ -1478,7 +1512,7 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
        int ret = 0;
        u32 *key_jump_cmd;
        u32 *desc;
-       u32 *nonce;
+       u8 *nonce;
        u32 geniv;
        u32 ctx1_iv_off = 0;
        const bool ctr_mode = ((ctx->class1_alg_type & OP_ALG_AAI_MASK) ==
@@ -1531,9 +1565,10 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 
        /* Load nonce into CONTEXT1 reg */
        if (is_rfc3686) {
-               nonce = (u32 *)(key + keylen);
-               append_load_imm_u32(desc, *nonce, LDST_CLASS_IND_CCB |
-                                   LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
+               nonce = (u8 *)key + keylen;
+               append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
+                                  LDST_CLASS_IND_CCB |
+                                  LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
                append_move(desc, MOVE_WAITCOMP |
                            MOVE_SRC_OUTFIFO |
                            MOVE_DEST_CLASS1CTX |
@@ -1549,11 +1584,10 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 
        /* Load counter into CONTEXT1 reg */
        if (is_rfc3686)
-               append_load_imm_u32(desc, be32_to_cpu(1), LDST_IMM |
-                                   LDST_CLASS_1_CCB |
-                                   LDST_SRCDST_BYTE_CONTEXT |
-                                   ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
-                                    LDST_OFFSET_SHIFT));
+               append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
+                                    LDST_SRCDST_BYTE_CONTEXT |
+                                    ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
+                                     LDST_OFFSET_SHIFT));
 
        /* Load operation */
        append_operation(desc, ctx->class1_alg_type |
@@ -1590,9 +1624,10 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 
        /* Load nonce into CONTEXT1 reg */
        if (is_rfc3686) {
-               nonce = (u32 *)(key + keylen);
-               append_load_imm_u32(desc, *nonce, LDST_CLASS_IND_CCB |
-                                   LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
+               nonce = (u8 *)key + keylen;
+               append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
+                                  LDST_CLASS_IND_CCB |
+                                  LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
                append_move(desc, MOVE_WAITCOMP |
                            MOVE_SRC_OUTFIFO |
                            MOVE_DEST_CLASS1CTX |
@@ -1608,11 +1643,10 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 
        /* Load counter into CONTEXT1 reg */
        if (is_rfc3686)
-               append_load_imm_u32(desc, be32_to_cpu(1), LDST_IMM |
-                                   LDST_CLASS_1_CCB |
-                                   LDST_SRCDST_BYTE_CONTEXT |
-                                   ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
-                                    LDST_OFFSET_SHIFT));
+               append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
+                                    LDST_SRCDST_BYTE_CONTEXT |
+                                    ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
+                                     LDST_OFFSET_SHIFT));
 
        /* Choose operation */
        if (ctr_mode)
@@ -1653,9 +1687,10 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 
        /* Load Nonce into CONTEXT1 reg */
        if (is_rfc3686) {
-               nonce = (u32 *)(key + keylen);
-               append_load_imm_u32(desc, *nonce, LDST_CLASS_IND_CCB |
-                                   LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
+               nonce = (u8 *)key + keylen;
+               append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
+                                  LDST_CLASS_IND_CCB |
+                                  LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
                append_move(desc, MOVE_WAITCOMP |
                            MOVE_SRC_OUTFIFO |
                            MOVE_DEST_CLASS1CTX |
@@ -1685,11 +1720,10 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 
        /* Load Counter into CONTEXT1 reg */
        if (is_rfc3686)
-               append_load_imm_u32(desc, (u32)1, LDST_IMM |
-                                   LDST_CLASS_1_CCB |
-                                   LDST_SRCDST_BYTE_CONTEXT |
-                                   ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
-                                    LDST_OFFSET_SHIFT));
+               append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
+                                    LDST_SRCDST_BYTE_CONTEXT |
+                                    ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
+                                     LDST_OFFSET_SHIFT));
 
        if (ctx1_iv_off)
                append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | JUMP_COND_NCP |
@@ -1995,9 +2029,9 @@ static void ablkcipher_encrypt_done(struct device *jrdev, u32 *desc, u32 err,
        print_hex_dump(KERN_ERR, "dstiv  @"__stringify(__LINE__)": ",
                       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
                       edesc->src_nents > 1 ? 100 : ivsize, 1);
-       print_hex_dump(KERN_ERR, "dst    @"__stringify(__LINE__)": ",
-                      DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(req->src),
-                      edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
+       dbg_dump_sg(KERN_ERR, "dst    @"__stringify(__LINE__)": ",
+                   DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
+                   edesc->dst_nents > 1 ? 100 : req->nbytes, 1, true);
 #endif
 
        ablkcipher_unmap(jrdev, edesc, req);
@@ -2027,9 +2061,9 @@ static void ablkcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
        print_hex_dump(KERN_ERR, "dstiv  @"__stringify(__LINE__)": ",
                       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
                       ivsize, 1);
-       print_hex_dump(KERN_ERR, "dst    @"__stringify(__LINE__)": ",
-                      DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(req->src),
-                      edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
+       dbg_dump_sg(KERN_ERR, "dst    @"__stringify(__LINE__)": ",
+                   DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
+                   edesc->dst_nents > 1 ? 100 : req->nbytes, 1, true);
 #endif
 
        ablkcipher_unmap(jrdev, edesc, req);
@@ -2184,12 +2218,15 @@ static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr,
        int len, sec4_sg_index = 0;
 
 #ifdef DEBUG
+       bool may_sleep = ((req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
+                                             CRYPTO_TFM_REQ_MAY_SLEEP)) != 0);
        print_hex_dump(KERN_ERR, "presciv@"__stringify(__LINE__)": ",
                       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
                       ivsize, 1);
-       print_hex_dump(KERN_ERR, "src    @"__stringify(__LINE__)": ",
-                      DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(req->src),
-                      edesc->src_nents ? 100 : req->nbytes, 1);
+       printk(KERN_ERR "asked=%d, nbytes%d\n", (int)edesc->src_nents ? 100 : req->nbytes, req->nbytes);
+       dbg_dump_sg(KERN_ERR, "src    @"__stringify(__LINE__)": ",
+                   DUMP_PREFIX_ADDRESS, 16, 4, req->src,
+                   edesc->src_nents ? 100 : req->nbytes, 1, may_sleep);
 #endif
 
        len = desc_len(sh_desc);
@@ -2241,12 +2278,14 @@ static void init_ablkcipher_giv_job(u32 *sh_desc, dma_addr_t ptr,
        int len, sec4_sg_index = 0;
 
 #ifdef DEBUG
+       bool may_sleep = ((req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
+                                             CRYPTO_TFM_REQ_MAY_SLEEP)) != 0);
        print_hex_dump(KERN_ERR, "presciv@" __stringify(__LINE__) ": ",
                       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
                       ivsize, 1);
-       print_hex_dump(KERN_ERR, "src    @" __stringify(__LINE__) ": ",
-                      DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(req->src),
-                      edesc->src_nents ? 100 : req->nbytes, 1);
+       dbg_dump_sg(KERN_ERR, "src    @" __stringify(__LINE__) ": ",
+                   DUMP_PREFIX_ADDRESS, 16, 4, req->src,
+                   edesc->src_nents ? 100 : req->nbytes, 1, may_sleep);
 #endif
 
        len = desc_len(sh_desc);
@@ -2516,18 +2555,20 @@ static int aead_decrypt(struct aead_request *req)
        u32 *desc;
        int ret = 0;
 
+#ifdef DEBUG
+       bool may_sleep = ((req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
+                                             CRYPTO_TFM_REQ_MAY_SLEEP)) != 0);
+       dbg_dump_sg(KERN_ERR, "dec src@"__stringify(__LINE__)": ",
+                   DUMP_PREFIX_ADDRESS, 16, 4, req->src,
+                   req->assoclen + req->cryptlen, 1, may_sleep);
+#endif
+
        /* allocate extended descriptor */
        edesc = aead_edesc_alloc(req, AUTHENC_DESC_JOB_IO_LEN,
                                 &all_contig, false);
        if (IS_ERR(edesc))
                return PTR_ERR(edesc);
 
-#ifdef DEBUG
-       print_hex_dump(KERN_ERR, "dec src@"__stringify(__LINE__)": ",
-                      DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(req->src),
-                      req->assoclen + req->cryptlen, 1);
-#endif
-
        /* Create and submit job descriptor*/
        init_authenc_job(req, edesc, all_contig, false);
 #ifdef DEBUG
index 36365b3..660dc20 100644 (file)
@@ -99,17 +99,17 @@ static struct list_head hash_list;
 
 /* ahash per-session context */
 struct caam_hash_ctx {
-       struct device *jrdev;
-       u32 sh_desc_update[DESC_HASH_MAX_USED_LEN];
-       u32 sh_desc_update_first[DESC_HASH_MAX_USED_LEN];
-       u32 sh_desc_fin[DESC_HASH_MAX_USED_LEN];
-       u32 sh_desc_digest[DESC_HASH_MAX_USED_LEN];
-       u32 sh_desc_finup[DESC_HASH_MAX_USED_LEN];
-       dma_addr_t sh_desc_update_dma;
+       u32 sh_desc_update[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
+       u32 sh_desc_update_first[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
+       u32 sh_desc_fin[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
+       u32 sh_desc_digest[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
+       u32 sh_desc_finup[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
+       dma_addr_t sh_desc_update_dma ____cacheline_aligned;
        dma_addr_t sh_desc_update_first_dma;
        dma_addr_t sh_desc_fin_dma;
        dma_addr_t sh_desc_digest_dma;
        dma_addr_t sh_desc_finup_dma;
+       struct device *jrdev;
        u32 alg_type;
        u32 alg_op;
        u8 key[CAAM_MAX_HASH_KEY_SIZE];
@@ -187,15 +187,6 @@ static inline dma_addr_t buf_map_to_sec4_sg(struct device *jrdev,
        return buf_dma;
 }
 
-/* Map req->src and put it in link table */
-static inline void src_map_to_sec4_sg(struct device *jrdev,
-                                     struct scatterlist *src, int src_nents,
-                                     struct sec4_sg_entry *sec4_sg)
-{
-       dma_map_sg(jrdev, src, src_nents, DMA_TO_DEVICE);
-       sg_to_sec4_sg_last(src, src_nents, sec4_sg, 0);
-}
-
 /*
  * Only put buffer in link table if it contains data, which is possible,
  * since a buffer has previously been used, and needs to be unmapped,
@@ -449,7 +440,7 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in,
        u32 *desc;
        struct split_key_result result;
        dma_addr_t src_dma, dst_dma;
-       int ret = 0;
+       int ret;
 
        desc = kmalloc(CAAM_CMD_SZ * 8 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA);
        if (!desc) {
@@ -526,7 +517,7 @@ static int ahash_setkey(struct crypto_ahash *ahash,
        struct device *jrdev = ctx->jrdev;
        int blocksize = crypto_tfm_alg_blocksize(&ahash->base);
        int digestsize = crypto_ahash_digestsize(ahash);
-       int ret = 0;
+       int ret;
        u8 *hashed_key = NULL;
 
 #ifdef DEBUG
@@ -534,14 +525,15 @@ static int ahash_setkey(struct crypto_ahash *ahash,
 #endif
 
        if (keylen > blocksize) {
-               hashed_key = kmalloc(sizeof(u8) * digestsize, GFP_KERNEL |
-                                    GFP_DMA);
+               hashed_key = kmalloc_array(digestsize,
+                                          sizeof(*hashed_key),
+                                          GFP_KERNEL | GFP_DMA);
                if (!hashed_key)
                        return -ENOMEM;
                ret = hash_digest_key(ctx, key, &keylen, hashed_key,
                                      digestsize);
                if (ret)
-                       goto badkey;
+                       goto bad_free_key;
                key = hashed_key;
        }
 
@@ -559,14 +551,14 @@ static int ahash_setkey(struct crypto_ahash *ahash,
 
        ret = gen_split_hash_key(ctx, key, keylen);
        if (ret)
-               goto badkey;
+               goto bad_free_key;
 
        ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->split_key_pad_len,
                                      DMA_TO_DEVICE);
        if (dma_mapping_error(jrdev, ctx->key_dma)) {
                dev_err(jrdev, "unable to map key i/o memory\n");
                ret = -ENOMEM;
-               goto map_err;
+               goto error_free_key;
        }
 #ifdef DEBUG
        print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ",
@@ -579,11 +571,10 @@ static int ahash_setkey(struct crypto_ahash *ahash,
                dma_unmap_single(jrdev, ctx->key_dma, ctx->split_key_pad_len,
                                 DMA_TO_DEVICE);
        }
-
-map_err:
+ error_free_key:
        kfree(hashed_key);
        return ret;
-badkey:
+ bad_free_key:
        kfree(hashed_key);
        crypto_ahash_set_flags(ahash, CRYPTO_TFM_RES_BAD_KEY_LEN);
        return -EINVAL;
@@ -595,16 +586,16 @@ badkey:
  * @sec4_sg_dma: physical mapped address of h/w link table
  * @src_nents: number of segments in input scatterlist
  * @sec4_sg_bytes: length of dma mapped sec4_sg space
- * @sec4_sg: pointer to h/w link table
  * @hw_desc: the h/w job descriptor followed by any referenced link tables
+ * @sec4_sg: h/w link table
  */
 struct ahash_edesc {
        dma_addr_t dst_dma;
        dma_addr_t sec4_sg_dma;
        int src_nents;
        int sec4_sg_bytes;
-       struct sec4_sg_entry *sec4_sg;
-       u32 hw_desc[0];
+       u32 hw_desc[DESC_JOB_IO_LEN / sizeof(u32)] ____cacheline_aligned;
+       struct sec4_sg_entry sec4_sg[0];
 };
 
 static inline void ahash_unmap(struct device *dev,
@@ -774,6 +765,65 @@ static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err,
        req->base.complete(&req->base, err);
 }
 
+/*
+ * Allocate an enhanced descriptor, which contains the hardware descriptor
+ * and space for hardware scatter table containing sg_num entries.
+ */
+static struct ahash_edesc *ahash_edesc_alloc(struct caam_hash_ctx *ctx,
+                                            int sg_num, u32 *sh_desc,
+                                            dma_addr_t sh_desc_dma,
+                                            gfp_t flags)
+{
+       struct ahash_edesc *edesc;
+       unsigned int sg_size = sg_num * sizeof(struct sec4_sg_entry);
+
+       edesc = kzalloc(sizeof(*edesc) + sg_size, GFP_DMA | flags);
+       if (!edesc) {
+               dev_err(ctx->jrdev, "could not allocate extended descriptor\n");
+               return NULL;
+       }
+
+       init_job_desc_shared(edesc->hw_desc, sh_desc_dma, desc_len(sh_desc),
+                            HDR_SHARE_DEFER | HDR_REVERSE);
+
+       return edesc;
+}
+
+static int ahash_edesc_add_src(struct caam_hash_ctx *ctx,
+                              struct ahash_edesc *edesc,
+                              struct ahash_request *req, int nents,
+                              unsigned int first_sg,
+                              unsigned int first_bytes, size_t to_hash)
+{
+       dma_addr_t src_dma;
+       u32 options;
+
+       if (nents > 1 || first_sg) {
+               struct sec4_sg_entry *sg = edesc->sec4_sg;
+               unsigned int sgsize = sizeof(*sg) * (first_sg + nents);
+
+               sg_to_sec4_sg_last(req->src, nents, sg + first_sg, 0);
+
+               src_dma = dma_map_single(ctx->jrdev, sg, sgsize, DMA_TO_DEVICE);
+               if (dma_mapping_error(ctx->jrdev, src_dma)) {
+                       dev_err(ctx->jrdev, "unable to map S/G table\n");
+                       return -ENOMEM;
+               }
+
+               edesc->sec4_sg_bytes = sgsize;
+               edesc->sec4_sg_dma = src_dma;
+               options = LDST_SGF;
+       } else {
+               src_dma = sg_dma_address(req->src);
+               options = 0;
+       }
+
+       append_seq_in_ptr(edesc->hw_desc, src_dma, first_bytes + to_hash,
+                         options);
+
+       return 0;
+}
+
 /* submit update job descriptor */
 static int ahash_update_ctx(struct ahash_request *req)
 {
@@ -789,12 +839,10 @@ static int ahash_update_ctx(struct ahash_request *req)
        int *next_buflen = state->current_buf ? &state->buflen_0 :
                           &state->buflen_1, last_buflen;
        int in_len = *buflen + req->nbytes, to_hash;
-       u32 *sh_desc = ctx->sh_desc_update, *desc;
-       dma_addr_t ptr = ctx->sh_desc_update_dma;
-       int src_nents, sec4_sg_bytes, sec4_sg_src_index;
+       u32 *desc;
+       int src_nents, mapped_nents, sec4_sg_bytes, sec4_sg_src_index;
        struct ahash_edesc *edesc;
        int ret = 0;
-       int sh_len;
 
        last_buflen = *next_buflen;
        *next_buflen = in_len & (crypto_tfm_alg_blocksize(&ahash->base) - 1);
@@ -807,40 +855,51 @@ static int ahash_update_ctx(struct ahash_request *req)
                        dev_err(jrdev, "Invalid number of src SG.\n");
                        return src_nents;
                }
+
+               if (src_nents) {
+                       mapped_nents = dma_map_sg(jrdev, req->src, src_nents,
+                                                 DMA_TO_DEVICE);
+                       if (!mapped_nents) {
+                               dev_err(jrdev, "unable to DMA map source\n");
+                               return -ENOMEM;
+                       }
+               } else {
+                       mapped_nents = 0;
+               }
+
                sec4_sg_src_index = 1 + (*buflen ? 1 : 0);
-               sec4_sg_bytes = (sec4_sg_src_index + src_nents) *
+               sec4_sg_bytes = (sec4_sg_src_index + mapped_nents) *
                                 sizeof(struct sec4_sg_entry);
 
                /*
                 * allocate space for base edesc and hw desc commands,
                 * link tables
                 */
-               edesc = kzalloc(sizeof(*edesc) + DESC_JOB_IO_LEN +
-                               sec4_sg_bytes, GFP_DMA | flags);
+               edesc = ahash_edesc_alloc(ctx, sec4_sg_src_index + mapped_nents,
+                                         ctx->sh_desc_update,
+                                         ctx->sh_desc_update_dma, flags);
                if (!edesc) {
-                       dev_err(jrdev,
-                               "could not allocate extended descriptor\n");
+                       dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
                        return -ENOMEM;
                }
 
                edesc->src_nents = src_nents;
                edesc->sec4_sg_bytes = sec4_sg_bytes;
-               edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
-                                DESC_JOB_IO_LEN;
 
                ret = ctx_map_to_sec4_sg(desc, jrdev, state, ctx->ctx_len,
                                         edesc->sec4_sg, DMA_BIDIRECTIONAL);
                if (ret)
-                       return ret;
+                       goto unmap_ctx;
 
                state->buf_dma = try_buf_map_to_sec4_sg(jrdev,
                                                        edesc->sec4_sg + 1,
                                                        buf, state->buf_dma,
                                                        *buflen, last_buflen);
 
-               if (src_nents) {
-                       src_map_to_sec4_sg(jrdev, req->src, src_nents,
-                                          edesc->sec4_sg + sec4_sg_src_index);
+               if (mapped_nents) {
+                       sg_to_sec4_sg_last(req->src, mapped_nents,
+                                          edesc->sec4_sg + sec4_sg_src_index,
+                                          0);
                        if (*next_buflen)
                                scatterwalk_map_and_copy(next_buf, req->src,
                                                         to_hash - *buflen,
@@ -852,17 +911,15 @@ static int ahash_update_ctx(struct ahash_request *req)
 
                state->current_buf = !state->current_buf;
 
-               sh_len = desc_len(sh_desc);
                desc = edesc->hw_desc;
-               init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER |
-                                    HDR_REVERSE);
 
                edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
                                                     sec4_sg_bytes,
                                                     DMA_TO_DEVICE);
                if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
                        dev_err(jrdev, "unable to map S/G table\n");
-                       return -ENOMEM;
+                       ret = -ENOMEM;
+                       goto unmap_ctx;
                }
 
                append_seq_in_ptr(desc, edesc->sec4_sg_dma, ctx->ctx_len +
@@ -877,13 +934,10 @@ static int ahash_update_ctx(struct ahash_request *req)
 #endif
 
                ret = caam_jr_enqueue(jrdev, desc, ahash_done_bi, req);
-               if (!ret) {
-                       ret = -EINPROGRESS;
-               } else {
-                       ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len,
-                                          DMA_BIDIRECTIONAL);
-                       kfree(edesc);
-               }
+               if (ret)
+                       goto unmap_ctx;
+
+               ret = -EINPROGRESS;
        } else if (*next_buflen) {
                scatterwalk_map_and_copy(buf + *buflen, req->src, 0,
                                         req->nbytes, 0);
@@ -898,6 +952,10 @@ static int ahash_update_ctx(struct ahash_request *req)
                       *next_buflen, 1);
 #endif
 
+       return ret;
+ unmap_ctx:
+       ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_BIDIRECTIONAL);
+       kfree(edesc);
        return ret;
 }
 
@@ -913,38 +971,31 @@ static int ahash_final_ctx(struct ahash_request *req)
        int buflen = state->current_buf ? state->buflen_1 : state->buflen_0;
        int last_buflen = state->current_buf ? state->buflen_0 :
                          state->buflen_1;
-       u32 *sh_desc = ctx->sh_desc_fin, *desc;
-       dma_addr_t ptr = ctx->sh_desc_fin_dma;
+       u32 *desc;
        int sec4_sg_bytes, sec4_sg_src_index;
        int digestsize = crypto_ahash_digestsize(ahash);
        struct ahash_edesc *edesc;
-       int ret = 0;
-       int sh_len;
+       int ret;
 
        sec4_sg_src_index = 1 + (buflen ? 1 : 0);
        sec4_sg_bytes = sec4_sg_src_index * sizeof(struct sec4_sg_entry);
 
        /* allocate space for base edesc and hw desc commands, link tables */
-       edesc = kzalloc(sizeof(*edesc) + DESC_JOB_IO_LEN + sec4_sg_bytes,
-                       GFP_DMA | flags);
-       if (!edesc) {
-               dev_err(jrdev, "could not allocate extended descriptor\n");
+       edesc = ahash_edesc_alloc(ctx, sec4_sg_src_index,
+                                 ctx->sh_desc_fin, ctx->sh_desc_fin_dma,
+                                 flags);
+       if (!edesc)
                return -ENOMEM;
-       }
 
-       sh_len = desc_len(sh_desc);
        desc = edesc->hw_desc;
-       init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE);
 
        edesc->sec4_sg_bytes = sec4_sg_bytes;
-       edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
-                        DESC_JOB_IO_LEN;
        edesc->src_nents = 0;
 
        ret = ctx_map_to_sec4_sg(desc, jrdev, state, ctx->ctx_len,
                                 edesc->sec4_sg, DMA_TO_DEVICE);
        if (ret)
-               return ret;
+               goto unmap_ctx;
 
        state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1,
                                                buf, state->buf_dma, buflen,
@@ -956,7 +1007,8 @@ static int ahash_final_ctx(struct ahash_request *req)
                                            sec4_sg_bytes, DMA_TO_DEVICE);
        if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
                dev_err(jrdev, "unable to map S/G table\n");
-               return -ENOMEM;
+               ret = -ENOMEM;
+               goto unmap_ctx;
        }
 
        append_seq_in_ptr(desc, edesc->sec4_sg_dma, ctx->ctx_len + buflen,
@@ -966,7 +1018,8 @@ static int ahash_final_ctx(struct ahash_request *req)
                                                digestsize);
        if (dma_mapping_error(jrdev, edesc->dst_dma)) {
                dev_err(jrdev, "unable to map dst\n");
-               return -ENOMEM;
+               ret = -ENOMEM;
+               goto unmap_ctx;
        }
 
 #ifdef DEBUG
@@ -975,13 +1028,13 @@ static int ahash_final_ctx(struct ahash_request *req)
 #endif
 
        ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_src, req);
-       if (!ret) {
-               ret = -EINPROGRESS;
-       } else {
-               ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE);
-               kfree(edesc);
-       }
+       if (ret)
+               goto unmap_ctx;
 
+       return -EINPROGRESS;
+ unmap_ctx:
+       ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE);
+       kfree(edesc);
        return ret;
 }
 
@@ -997,68 +1050,66 @@ static int ahash_finup_ctx(struct ahash_request *req)
        int buflen = state->current_buf ? state->buflen_1 : state->buflen_0;
        int last_buflen = state->current_buf ? state->buflen_0 :
                          state->buflen_1;
-       u32 *sh_desc = ctx->sh_desc_finup, *desc;
-       dma_addr_t ptr = ctx->sh_desc_finup_dma;
-       int sec4_sg_bytes, sec4_sg_src_index;
-       int src_nents;
+       u32 *desc;
+       int sec4_sg_src_index;
+       int src_nents, mapped_nents;
        int digestsize = crypto_ahash_digestsize(ahash);
        struct ahash_edesc *edesc;
-       int ret = 0;
-       int sh_len;
+       int ret;
 
        src_nents = sg_nents_for_len(req->src, req->nbytes);
        if (src_nents < 0) {
                dev_err(jrdev, "Invalid number of src SG.\n");
                return src_nents;
        }
+
+       if (src_nents) {
+               mapped_nents = dma_map_sg(jrdev, req->src, src_nents,
+                                         DMA_TO_DEVICE);
+               if (!mapped_nents) {
+                       dev_err(jrdev, "unable to DMA map source\n");
+                       return -ENOMEM;
+               }
+       } else {
+               mapped_nents = 0;
+       }
+
        sec4_sg_src_index = 1 + (buflen ? 1 : 0);
-       sec4_sg_bytes = (sec4_sg_src_index + src_nents) *
-                        sizeof(struct sec4_sg_entry);
 
        /* allocate space for base edesc and hw desc commands, link tables */
-       edesc = kzalloc(sizeof(*edesc) + DESC_JOB_IO_LEN + sec4_sg_bytes,
-                       GFP_DMA | flags);
+       edesc = ahash_edesc_alloc(ctx, sec4_sg_src_index + mapped_nents,
+                                 ctx->sh_desc_finup, ctx->sh_desc_finup_dma,
+                                 flags);
        if (!edesc) {
-               dev_err(jrdev, "could not allocate extended descriptor\n");
+               dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
                return -ENOMEM;
        }
 
-       sh_len = desc_len(sh_desc);
        desc = edesc->hw_desc;
-       init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE);
 
        edesc->src_nents = src_nents;
-       edesc->sec4_sg_bytes = sec4_sg_bytes;
-       edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
-                        DESC_JOB_IO_LEN;
 
        ret = ctx_map_to_sec4_sg(desc, jrdev, state, ctx->ctx_len,
                                 edesc->sec4_sg, DMA_TO_DEVICE);
        if (ret)
-               return ret;
+               goto unmap_ctx;
 
        state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1,
                                                buf, state->buf_dma, buflen,
                                                last_buflen);
 
-       src_map_to_sec4_sg(jrdev, req->src, src_nents, edesc->sec4_sg +
-                          sec4_sg_src_index);
-
-       edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
-                                           sec4_sg_bytes, DMA_TO_DEVICE);
-       if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
-               dev_err(jrdev, "unable to map S/G table\n");
-               return -ENOMEM;
-       }
-
-       append_seq_in_ptr(desc, edesc->sec4_sg_dma, ctx->ctx_len +
-                              buflen + req->nbytes, LDST_SGF);
+       ret = ahash_edesc_add_src(ctx, edesc, req, mapped_nents,
+                                 sec4_sg_src_index, ctx->ctx_len + buflen,
+                                 req->nbytes);
+       if (ret)
+               goto unmap_ctx;
 
        edesc->dst_dma = map_seq_out_ptr_result(desc, jrdev, req->result,
                                                digestsize);
        if (dma_mapping_error(jrdev, edesc->dst_dma)) {
                dev_err(jrdev, "unable to map dst\n");
-               return -ENOMEM;
+               ret = -ENOMEM;
+               goto unmap_ctx;
        }
 
 #ifdef DEBUG
@@ -1067,13 +1118,13 @@ static int ahash_finup_ctx(struct ahash_request *req)
 #endif
 
        ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_src, req);
-       if (!ret) {
-               ret = -EINPROGRESS;
-       } else {
-               ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE);
-               kfree(edesc);
-       }
+       if (ret)
+               goto unmap_ctx;
 
+       return -EINPROGRESS;
+ unmap_ctx:
+       ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE);
+       kfree(edesc);
        return ret;
 }
 
@@ -1084,60 +1135,56 @@ static int ahash_digest(struct ahash_request *req)
        struct device *jrdev = ctx->jrdev;
        gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
                       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
-       u32 *sh_desc = ctx->sh_desc_digest, *desc;
-       dma_addr_t ptr = ctx->sh_desc_digest_dma;
+       u32 *desc;
        int digestsize = crypto_ahash_digestsize(ahash);
-       int src_nents, sec4_sg_bytes;
-       dma_addr_t src_dma;
+       int src_nents, mapped_nents;
        struct ahash_edesc *edesc;
-       int ret = 0;
-       u32 options;
-       int sh_len;
+       int ret;
 
-       src_nents = sg_count(req->src, req->nbytes);
+       src_nents = sg_nents_for_len(req->src, req->nbytes);
        if (src_nents < 0) {
                dev_err(jrdev, "Invalid number of src SG.\n");
                return src_nents;
        }
-       dma_map_sg(jrdev, req->src, src_nents ? : 1, DMA_TO_DEVICE);
-       sec4_sg_bytes = src_nents * sizeof(struct sec4_sg_entry);
+
+       if (src_nents) {
+               mapped_nents = dma_map_sg(jrdev, req->src, src_nents,
+                                         DMA_TO_DEVICE);
+               if (!mapped_nents) {
+                       dev_err(jrdev, "unable to map source for DMA\n");
+                       return -ENOMEM;
+               }
+       } else {
+               mapped_nents = 0;
+       }
 
        /* allocate space for base edesc and hw desc commands, link tables */
-       edesc = kzalloc(sizeof(*edesc) + sec4_sg_bytes + DESC_JOB_IO_LEN,
-                       GFP_DMA | flags);
+       edesc = ahash_edesc_alloc(ctx, mapped_nents > 1 ? mapped_nents : 0,
+                                 ctx->sh_desc_digest, ctx->sh_desc_digest_dma,
+                                 flags);
        if (!edesc) {
-               dev_err(jrdev, "could not allocate extended descriptor\n");
+               dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
                return -ENOMEM;
        }
-       edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
-                         DESC_JOB_IO_LEN;
-       edesc->sec4_sg_bytes = sec4_sg_bytes;
-       edesc->src_nents = src_nents;
 
-       sh_len = desc_len(sh_desc);
-       desc = edesc->hw_desc;
-       init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE);
+       edesc->src_nents = src_nents;
 
-       if (src_nents) {
-               sg_to_sec4_sg_last(req->src, src_nents, edesc->sec4_sg, 0);
-               edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
-                                           sec4_sg_bytes, DMA_TO_DEVICE);
-               if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
-                       dev_err(jrdev, "unable to map S/G table\n");
-                       return -ENOMEM;
-               }
-               src_dma = edesc->sec4_sg_dma;
-               options = LDST_SGF;
-       } else {
-               src_dma = sg_dma_address(req->src);
-               options = 0;
+       ret = ahash_edesc_add_src(ctx, edesc, req, mapped_nents, 0, 0,
+                                 req->nbytes);
+       if (ret) {
+               ahash_unmap(jrdev, edesc, req, digestsize);
+               kfree(edesc);
+               return ret;
        }
-       append_seq_in_ptr(desc, src_dma, req->nbytes, options);
+
+       desc = edesc->hw_desc;
 
        edesc->dst_dma = map_seq_out_ptr_result(desc, jrdev, req->result,
                                                digestsize);
        if (dma_mapping_error(jrdev, edesc->dst_dma)) {
                dev_err(jrdev, "unable to map dst\n");
+               ahash_unmap(jrdev, edesc, req, digestsize);
+               kfree(edesc);
                return -ENOMEM;
        }
 
@@ -1168,29 +1215,23 @@ static int ahash_final_no_ctx(struct ahash_request *req)
                       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
        u8 *buf = state->current_buf ? state->buf_1 : state->buf_0;
        int buflen = state->current_buf ? state->buflen_1 : state->buflen_0;
-       u32 *sh_desc = ctx->sh_desc_digest, *desc;
-       dma_addr_t ptr = ctx->sh_desc_digest_dma;
+       u32 *desc;
        int digestsize = crypto_ahash_digestsize(ahash);
        struct ahash_edesc *edesc;
-       int ret = 0;
-       int sh_len;
+       int ret;
 
        /* allocate space for base edesc and hw desc commands, link tables */
-       edesc = kzalloc(sizeof(*edesc) + DESC_JOB_IO_LEN, GFP_DMA | flags);
-       if (!edesc) {
-               dev_err(jrdev, "could not allocate extended descriptor\n");
+       edesc = ahash_edesc_alloc(ctx, 0, ctx->sh_desc_digest,
+                                 ctx->sh_desc_digest_dma, flags);
+       if (!edesc)
                return -ENOMEM;
-       }
 
-       edesc->sec4_sg_bytes = 0;
-       sh_len = desc_len(sh_desc);
        desc = edesc->hw_desc;
-       init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE);
 
        state->buf_dma = dma_map_single(jrdev, buf, buflen, DMA_TO_DEVICE);
        if (dma_mapping_error(jrdev, state->buf_dma)) {
                dev_err(jrdev, "unable to map src\n");
-               return -ENOMEM;
+               goto unmap;
        }
 
        append_seq_in_ptr(desc, state->buf_dma, buflen, 0);
@@ -1199,7 +1240,7 @@ static int ahash_final_no_ctx(struct ahash_request *req)
                                                digestsize);
        if (dma_mapping_error(jrdev, edesc->dst_dma)) {
                dev_err(jrdev, "unable to map dst\n");
-               return -ENOMEM;
+               goto unmap;
        }
        edesc->src_nents = 0;
 
@@ -1217,6 +1258,11 @@ static int ahash_final_no_ctx(struct ahash_request *req)
        }
 
        return ret;
+ unmap:
+       ahash_unmap(jrdev, edesc, req, digestsize);
+       kfree(edesc);
+       return -ENOMEM;
+
 }
 
 /* submit ahash update if it the first job descriptor after update */
@@ -1234,48 +1280,58 @@ static int ahash_update_no_ctx(struct ahash_request *req)
        int *next_buflen = state->current_buf ? &state->buflen_0 :
                           &state->buflen_1;
        int in_len = *buflen + req->nbytes, to_hash;
-       int sec4_sg_bytes, src_nents;
+       int sec4_sg_bytes, src_nents, mapped_nents;
        struct ahash_edesc *edesc;
-       u32 *desc, *sh_desc = ctx->sh_desc_update_first;
-       dma_addr_t ptr = ctx->sh_desc_update_first_dma;
+       u32 *desc;
        int ret = 0;
-       int sh_len;
 
        *next_buflen = in_len & (crypto_tfm_alg_blocksize(&ahash->base) - 1);
        to_hash = in_len - *next_buflen;
 
        if (to_hash) {
                src_nents = sg_nents_for_len(req->src,
-                                            req->nbytes - (*next_buflen));
+                                            req->nbytes - *next_buflen);
                if (src_nents < 0) {
                        dev_err(jrdev, "Invalid number of src SG.\n");
                        return src_nents;
                }
-               sec4_sg_bytes = (1 + src_nents) *
+
+               if (src_nents) {
+                       mapped_nents = dma_map_sg(jrdev, req->src, src_nents,
+                                                 DMA_TO_DEVICE);
+                       if (!mapped_nents) {
+                               dev_err(jrdev, "unable to DMA map source\n");
+                               return -ENOMEM;
+                       }
+               } else {
+                       mapped_nents = 0;
+               }
+
+               sec4_sg_bytes = (1 + mapped_nents) *
                                sizeof(struct sec4_sg_entry);
 
                /*
                 * allocate space for base edesc and hw desc commands,
                 * link tables
                 */
-               edesc = kzalloc(sizeof(*edesc) + DESC_JOB_IO_LEN +
-                               sec4_sg_bytes, GFP_DMA | flags);
+               edesc = ahash_edesc_alloc(ctx, 1 + mapped_nents,
+                                         ctx->sh_desc_update_first,
+                                         ctx->sh_desc_update_first_dma,
+                                         flags);
                if (!edesc) {
-                       dev_err(jrdev,
-                               "could not allocate extended descriptor\n");
+                       dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
                        return -ENOMEM;
                }
 
                edesc->src_nents = src_nents;
                edesc->sec4_sg_bytes = sec4_sg_bytes;
-               edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
-                                DESC_JOB_IO_LEN;
                edesc->dst_dma = 0;
 
                state->buf_dma = buf_map_to_sec4_sg(jrdev, edesc->sec4_sg,
                                                    buf, *buflen);
-               src_map_to_sec4_sg(jrdev, req->src, src_nents,
-                                  edesc->sec4_sg + 1);
+               sg_to_sec4_sg_last(req->src, mapped_nents,
+                                  edesc->sec4_sg + 1, 0);
+
                if (*next_buflen) {
                        scatterwalk_map_and_copy(next_buf, req->src,
                                                 to_hash - *buflen,
@@ -1284,24 +1340,22 @@ static int ahash_update_no_ctx(struct ahash_request *req)
 
                state->current_buf = !state->current_buf;
 
-               sh_len = desc_len(sh_desc);
                desc = edesc->hw_desc;
-               init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER |
-                                    HDR_REVERSE);
 
                edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
                                                    sec4_sg_bytes,
                                                    DMA_TO_DEVICE);
                if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
                        dev_err(jrdev, "unable to map S/G table\n");
-                       return -ENOMEM;
+                       ret = -ENOMEM;
+                       goto unmap_ctx;
                }
 
                append_seq_in_ptr(desc, edesc->sec4_sg_dma, to_hash, LDST_SGF);
 
                ret = map_seq_out_ptr_ctx(desc, jrdev, state, ctx->ctx_len);
                if (ret)
-                       return ret;
+                       goto unmap_ctx;
 
 #ifdef DEBUG
                print_hex_dump(KERN_ERR, "jobdesc@"__stringify(__LINE__)": ",
@@ -1310,16 +1364,13 @@ static int ahash_update_no_ctx(struct ahash_request *req)
 #endif
 
                ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_dst, req);
-               if (!ret) {
-                       ret = -EINPROGRESS;
-                       state->update = ahash_update_ctx;
-                       state->finup = ahash_finup_ctx;
-                       state->final = ahash_final_ctx;
-               } else {
-                       ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len,
-                                       DMA_TO_DEVICE);
-                       kfree(edesc);
-               }
+               if (ret)
+                       goto unmap_ctx;
+
+               ret = -EINPROGRESS;
+               state->update = ahash_update_ctx;
+               state->finup = ahash_finup_ctx;
+               state->final = ahash_final_ctx;
        } else if (*next_buflen) {
                scatterwalk_map_and_copy(buf + *buflen, req->src, 0,
                                         req->nbytes, 0);
@@ -1334,6 +1385,10 @@ static int ahash_update_no_ctx(struct ahash_request *req)
                       *next_buflen, 1);
 #endif
 
+       return ret;
+ unmap_ctx:
+       ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_TO_DEVICE);
+       kfree(edesc);
        return ret;
 }
 
@@ -1350,61 +1405,63 @@ static int ahash_finup_no_ctx(struct ahash_request *req)
        int buflen = state->current_buf ? state->buflen_1 : state->buflen_0;
        int last_buflen = state->current_buf ? state->buflen_0 :
                          state->buflen_1;
-       u32 *sh_desc = ctx->sh_desc_digest, *desc;
-       dma_addr_t ptr = ctx->sh_desc_digest_dma;
-       int sec4_sg_bytes, sec4_sg_src_index, src_nents;
+       u32 *desc;
+       int sec4_sg_bytes, sec4_sg_src_index, src_nents, mapped_nents;
        int digestsize = crypto_ahash_digestsize(ahash);
        struct ahash_edesc *edesc;
-       int sh_len;
-       int ret = 0;
+       int ret;
 
        src_nents = sg_nents_for_len(req->src, req->nbytes);
        if (src_nents < 0) {
                dev_err(jrdev, "Invalid number of src SG.\n");
                return src_nents;
        }
+
+       if (src_nents) {
+               mapped_nents = dma_map_sg(jrdev, req->src, src_nents,
+                                         DMA_TO_DEVICE);
+               if (!mapped_nents) {
+                       dev_err(jrdev, "unable to DMA map source\n");
+                       return -ENOMEM;
+               }
+       } else {
+               mapped_nents = 0;
+       }
+
        sec4_sg_src_index = 2;
-       sec4_sg_bytes = (sec4_sg_src_index + src_nents) *
+       sec4_sg_bytes = (sec4_sg_src_index + mapped_nents) *
                         sizeof(struct sec4_sg_entry);
 
        /* allocate space for base edesc and hw desc commands, link tables */
-       edesc = kzalloc(sizeof(*edesc) + DESC_JOB_IO_LEN + sec4_sg_bytes,
-                       GFP_DMA | flags);
+       edesc = ahash_edesc_alloc(ctx, sec4_sg_src_index + mapped_nents,
+                                 ctx->sh_desc_digest, ctx->sh_desc_digest_dma,
+                                 flags);
        if (!edesc) {
-               dev_err(jrdev, "could not allocate extended descriptor\n");
+               dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
                return -ENOMEM;
        }
 
-       sh_len = desc_len(sh_desc);
        desc = edesc->hw_desc;
-       init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE);
 
        edesc->src_nents = src_nents;
        edesc->sec4_sg_bytes = sec4_sg_bytes;
-       edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
-                        DESC_JOB_IO_LEN;
 
        state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg, buf,
                                                state->buf_dma, buflen,
                                                last_buflen);
 
-       src_map_to_sec4_sg(jrdev, req->src, src_nents, edesc->sec4_sg + 1);
-
-       edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
-                                           sec4_sg_bytes, DMA_TO_DEVICE);
-       if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
+       ret = ahash_edesc_add_src(ctx, edesc, req, mapped_nents, 1, buflen,
+                                 req->nbytes);
+       if (ret) {
                dev_err(jrdev, "unable to map S/G table\n");
-               return -ENOMEM;
+               goto unmap;
        }
 
-       append_seq_in_ptr(desc, edesc->sec4_sg_dma, buflen +
-                              req->nbytes, LDST_SGF);
-
        edesc->dst_dma = map_seq_out_ptr_result(desc, jrdev, req->result,
                                                digestsize);
        if (dma_mapping_error(jrdev, edesc->dst_dma)) {
                dev_err(jrdev, "unable to map dst\n");
-               return -ENOMEM;
+               goto unmap;
        }
 
 #ifdef DEBUG
@@ -1421,6 +1478,11 @@ static int ahash_finup_no_ctx(struct ahash_request *req)
        }
 
        return ret;
+ unmap:
+       ahash_unmap(jrdev, edesc, req, digestsize);
+       kfree(edesc);
+       return -ENOMEM;
+
 }
 
 /* submit first update job descriptor after init */
@@ -1436,78 +1498,65 @@ static int ahash_update_first(struct ahash_request *req)
        int *next_buflen = state->current_buf ?
                &state->buflen_1 : &state->buflen_0;
        int to_hash;
-       u32 *sh_desc = ctx->sh_desc_update_first, *desc;
-       dma_addr_t ptr = ctx->sh_desc_update_first_dma;
-       int sec4_sg_bytes, src_nents;
-       dma_addr_t src_dma;
-       u32 options;
+       u32 *desc;
+       int src_nents, mapped_nents;
        struct ahash_edesc *edesc;
        int ret = 0;
-       int sh_len;
 
        *next_buflen = req->nbytes & (crypto_tfm_alg_blocksize(&ahash->base) -
                                      1);
        to_hash = req->nbytes - *next_buflen;
 
        if (to_hash) {
-               src_nents = sg_count(req->src, req->nbytes - (*next_buflen));
+               src_nents = sg_nents_for_len(req->src,
+                                            req->nbytes - *next_buflen);
                if (src_nents < 0) {
                        dev_err(jrdev, "Invalid number of src SG.\n");
                        return src_nents;
                }
-               dma_map_sg(jrdev, req->src, src_nents ? : 1, DMA_TO_DEVICE);
-               sec4_sg_bytes = src_nents * sizeof(struct sec4_sg_entry);
+
+               if (src_nents) {
+                       mapped_nents = dma_map_sg(jrdev, req->src, src_nents,
+                                                 DMA_TO_DEVICE);
+                       if (!mapped_nents) {
+                               dev_err(jrdev, "unable to map source for DMA\n");
+                               return -ENOMEM;
+                       }
+               } else {
+                       mapped_nents = 0;
+               }
 
                /*
                 * allocate space for base edesc and hw desc commands,
                 * link tables
                 */
-               edesc = kzalloc(sizeof(*edesc) + DESC_JOB_IO_LEN +
-                               sec4_sg_bytes, GFP_DMA | flags);
+               edesc = ahash_edesc_alloc(ctx, mapped_nents > 1 ?
+                                         mapped_nents : 0,
+                                         ctx->sh_desc_update_first,
+                                         ctx->sh_desc_update_first_dma,
+                                         flags);
                if (!edesc) {
-                       dev_err(jrdev,
-                               "could not allocate extended descriptor\n");
+                       dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
                        return -ENOMEM;
                }
 
                edesc->src_nents = src_nents;
-               edesc->sec4_sg_bytes = sec4_sg_bytes;
-               edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
-                                DESC_JOB_IO_LEN;
                edesc->dst_dma = 0;
 
-               if (src_nents) {
-                       sg_to_sec4_sg_last(req->src, src_nents,
-                                          edesc->sec4_sg, 0);
-                       edesc->sec4_sg_dma = dma_map_single(jrdev,
-                                                           edesc->sec4_sg,
-                                                           sec4_sg_bytes,
-                                                           DMA_TO_DEVICE);
-                       if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
-                               dev_err(jrdev, "unable to map S/G table\n");
-                               return -ENOMEM;
-                       }
-                       src_dma = edesc->sec4_sg_dma;
-                       options = LDST_SGF;
-               } else {
-                       src_dma = sg_dma_address(req->src);
-                       options = 0;
-               }
+               ret = ahash_edesc_add_src(ctx, edesc, req, mapped_nents, 0, 0,
+                                         to_hash);
+               if (ret)
+                       goto unmap_ctx;
 
                if (*next_buflen)
                        scatterwalk_map_and_copy(next_buf, req->src, to_hash,
                                                 *next_buflen, 0);
 
-               sh_len = desc_len(sh_desc);
                desc = edesc->hw_desc;
-               init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER |
-                                    HDR_REVERSE);
-
-               append_seq_in_ptr(desc, src_dma, to_hash, options);
 
                ret = map_seq_out_ptr_ctx(desc, jrdev, state, ctx->ctx_len);
                if (ret)
-                       return ret;
+                       goto unmap_ctx;
 
 #ifdef DEBUG
                print_hex_dump(KERN_ERR, "jobdesc@"__stringify(__LINE__)": ",
@@ -1515,18 +1564,14 @@ static int ahash_update_first(struct ahash_request *req)
                               desc_bytes(desc), 1);
 #endif
 
-               ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_dst,
-                                     req);
-               if (!ret) {
-                       ret = -EINPROGRESS;
-                       state->update = ahash_update_ctx;
-                       state->finup = ahash_finup_ctx;
-                       state->final = ahash_final_ctx;
-               } else {
-                       ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len,
-                                       DMA_TO_DEVICE);
-                       kfree(edesc);
-               }
+               ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_dst, req);
+               if (ret)
+                       goto unmap_ctx;
+
+               ret = -EINPROGRESS;
+               state->update = ahash_update_ctx;
+               state->finup = ahash_finup_ctx;
+               state->final = ahash_final_ctx;
        } else if (*next_buflen) {
                state->update = ahash_update_no_ctx;
                state->finup = ahash_finup_no_ctx;
@@ -1540,6 +1585,10 @@ static int ahash_update_first(struct ahash_request *req)
                       *next_buflen, 1);
 #endif
 
+       return ret;
+ unmap_ctx:
+       ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_TO_DEVICE);
+       kfree(edesc);
        return ret;
 }
 
@@ -1799,7 +1848,6 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
                                         HASH_MSG_LEN + SHA256_DIGEST_SIZE,
                                         HASH_MSG_LEN + 64,
                                         HASH_MSG_LEN + SHA512_DIGEST_SIZE };
-       int ret = 0;
 
        /*
         * Get a Job ring from Job Ring driver to ensure in-order
@@ -1819,10 +1867,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
 
        crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
                                 sizeof(struct caam_hash_state));
-
-       ret = ahash_set_sh_desc(ahash);
-
-       return ret;
+       return ahash_set_sh_desc(ahash);
 }
 
 static void caam_hash_cra_exit(struct crypto_tfm *tfm)
index 0ec112e..72ff196 100644 (file)
@@ -14,6 +14,7 @@
 #include "jr.h"
 #include "desc_constr.h"
 #include "error.h"
+#include "ctrl.h"
 
 bool caam_little_end;
 EXPORT_SYMBOL(caam_little_end);
@@ -826,6 +827,8 @@ static int caam_probe(struct platform_device *pdev)
 
 caam_remove:
        caam_remove(pdev);
+       return ret;
+
 iounmap_ctrl:
        iounmap(ctrl);
 disable_caam_emi_slow:
index 26427c1..513b664 100644 (file)
 #define SEC4_SG_OFFSET_MASK    0x00001fff
 
 struct sec4_sg_entry {
-#if !defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && \
-       defined(CONFIG_CRYPTO_DEV_FSL_CAAM_IMX)
-       u32 rsvd1;
-       dma_addr_t ptr;
-#else
        u64 ptr;
-#endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_IMX */
        u32 len;
        u32 bpid_offset;
 };
index d3869b9..a8cd8a7 100644 (file)
@@ -324,6 +324,23 @@ static inline void append_##cmd##_imm_##type(u32 *desc, type immediate, \
 }
 APPEND_CMD_RAW_IMM(load, LOAD, u32);
 
+/*
+ * ee - endianness
+ * size - size of immediate type in bytes
+ */
+#define APPEND_CMD_RAW_IMM2(cmd, op, ee, size) \
+static inline void append_##cmd##_imm_##ee##size(u32 *desc, \
+                                                  u##size immediate, \
+                                                  u32 options) \
+{ \
+       __##ee##size data = cpu_to_##ee##size(immediate); \
+       PRINT_POS; \
+       append_cmd(desc, CMD_##op | IMMEDIATE | options | sizeof(data)); \
+       append_data(desc, &data, sizeof(data)); \
+}
+
+APPEND_CMD_RAW_IMM2(load, LOAD, be, 32);
+
 /*
  * Append math command. Only the last part of destination and source need to
  * be specified
index e2bcacc..5d4c050 100644 (file)
@@ -41,7 +41,6 @@ struct caam_drv_private_jr {
        struct device           *dev;
        int ridx;
        struct caam_job_ring __iomem *rregs;    /* JobR's register space */
-       struct tasklet_struct irqtask;
        int irq;                        /* One per queue */
 
        /* Number of scatterlist crypt transforms active on the JobR */
index a81f551..757c27f 100644 (file)
@@ -73,8 +73,6 @@ static int caam_jr_shutdown(struct device *dev)
 
        ret = caam_reset_hw_jr(dev);
 
-       tasklet_kill(&jrp->irqtask);
-
        /* Release interrupt */
        free_irq(jrp->irq, dev);
 
@@ -130,7 +128,7 @@ static irqreturn_t caam_jr_interrupt(int irq, void *st_dev)
 
        /*
         * Check the output ring for ready responses, kick
-        * tasklet if jobs done.
+        * the threaded irq if jobs done.
         */
        irqstate = rd_reg32(&jrp->rregs->jrintstatus);
        if (!irqstate)
@@ -152,18 +150,13 @@ static irqreturn_t caam_jr_interrupt(int irq, void *st_dev)
        /* Have valid interrupt at this point, just ACK and trigger */
        wr_reg32(&jrp->rregs->jrintstatus, irqstate);
 
-       preempt_disable();
-       tasklet_schedule(&jrp->irqtask);
-       preempt_enable();
-
-       return IRQ_HANDLED;
+       return IRQ_WAKE_THREAD;
 }
 
-/* Deferred service handler, run as interrupt-fired tasklet */
-static void caam_jr_dequeue(unsigned long devarg)
+static irqreturn_t caam_jr_threadirq(int irq, void *st_dev)
 {
        int hw_idx, sw_idx, i, head, tail;
-       struct device *dev = (struct device *)devarg;
+       struct device *dev = st_dev;
        struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
        void (*usercall)(struct device *dev, u32 *desc, u32 status, void *arg);
        u32 *userdesc, userstatus;
@@ -237,6 +230,8 @@ static void caam_jr_dequeue(unsigned long devarg)
 
        /* reenable / unmask IRQs */
        clrsetbits_32(&jrp->rregs->rconfig_lo, JRCFG_IMSK, 0);
+
+       return IRQ_HANDLED;
 }
 
 /**
@@ -394,11 +389,10 @@ static int caam_jr_init(struct device *dev)
 
        jrp = dev_get_drvdata(dev);
 
-       tasklet_init(&jrp->irqtask, caam_jr_dequeue, (unsigned long)dev);
-
        /* Connect job ring interrupt handler. */
-       error = request_irq(jrp->irq, caam_jr_interrupt, IRQF_SHARED,
-                           dev_name(dev), dev);
+       error = request_threaded_irq(jrp->irq, caam_jr_interrupt,
+                                    caam_jr_threadirq, IRQF_SHARED,
+                                    dev_name(dev), dev);
        if (error) {
                dev_err(dev, "can't connect JobR %d interrupt (%d)\n",
                        jrp->ridx, jrp->irq);
@@ -460,7 +454,6 @@ out_free_inpring:
 out_free_irq:
        free_irq(jrp->irq, dev);
 out_kill_deq:
-       tasklet_kill(&jrp->irqtask);
        return error;
 }
 
@@ -513,6 +506,7 @@ static int caam_jr_probe(struct platform_device *pdev)
        error = caam_jr_init(jrdev); /* now turn on hardware */
        if (error) {
                irq_dispose_mapping(jrpriv->irq);
+               iounmap(ctrl);
                return error;
        }
 
index b3c5016..84d2f83 100644 (file)
@@ -196,6 +196,14 @@ static inline u64 rd_reg64(void __iomem *reg)
 #define caam_dma_to_cpu(value) caam32_to_cpu(value)
 #endif /* CONFIG_ARCH_DMA_ADDR_T_64BIT  */
 
+#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX
+#define cpu_to_caam_dma64(value) \
+               (((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | \
+                (u64)cpu_to_caam32(upper_32_bits(value)))
+#else
+#define cpu_to_caam_dma64(value) cpu_to_caam64(value)
+#endif
+
 /*
  * jr_outentry
  * Represents each entry in a JobR output ring
index 19dc64f..41cd5a3 100644 (file)
@@ -15,7 +15,7 @@ struct sec4_sg_entry;
 static inline void dma_to_sec4_sg_one(struct sec4_sg_entry *sec4_sg_ptr,
                                      dma_addr_t dma, u32 len, u16 offset)
 {
-       sec4_sg_ptr->ptr = cpu_to_caam_dma(dma);
+       sec4_sg_ptr->ptr = cpu_to_caam_dma64(dma);
        sec4_sg_ptr->len = cpu_to_caam32(len);
        sec4_sg_ptr->bpid_offset = cpu_to_caam32(offset & SEC4_SG_OFFSET_MASK);
 #ifdef DEBUG
index ee4d274..346ceb8 100644 (file)
@@ -2,6 +2,7 @@ obj-$(CONFIG_CRYPTO_DEV_CCP_DD) += ccp.o
 ccp-objs := ccp-dev.o \
            ccp-ops.o \
            ccp-dev-v3.o \
+           ccp-dev-v5.o \
            ccp-platform.o \
            ccp-dmaengine.o
 ccp-$(CONFIG_PCI) += ccp-pci.o
index 8f36af6..84a652b 100644 (file)
@@ -4,6 +4,7 @@
  * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -134,7 +135,22 @@ static int ccp_do_sha_update(struct ahash_request *req, unsigned int nbytes,
        rctx->cmd.engine = CCP_ENGINE_SHA;
        rctx->cmd.u.sha.type = rctx->type;
        rctx->cmd.u.sha.ctx = &rctx->ctx_sg;
-       rctx->cmd.u.sha.ctx_len = sizeof(rctx->ctx);
+
+       switch (rctx->type) {
+       case CCP_SHA_TYPE_1:
+               rctx->cmd.u.sha.ctx_len = SHA1_DIGEST_SIZE;
+               break;
+       case CCP_SHA_TYPE_224:
+               rctx->cmd.u.sha.ctx_len = SHA224_DIGEST_SIZE;
+               break;
+       case CCP_SHA_TYPE_256:
+               rctx->cmd.u.sha.ctx_len = SHA256_DIGEST_SIZE;
+               break;
+       default:
+               /* Should never get here */
+               break;
+       }
+
        rctx->cmd.u.sha.src = sg;
        rctx->cmd.u.sha.src_len = rctx->hash_cnt;
        rctx->cmd.u.sha.opad = ctx->u.sha.key_len ?
index d7a7103..8d2dbac 100644 (file)
@@ -4,6 +4,7 @@
  * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
 
 #include "ccp-dev.h"
 
+static u32 ccp_alloc_ksb(struct ccp_cmd_queue *cmd_q, unsigned int count)
+{
+       int start;
+       struct ccp_device *ccp = cmd_q->ccp;
+
+       for (;;) {
+               mutex_lock(&ccp->sb_mutex);
+
+               start = (u32)bitmap_find_next_zero_area(ccp->sb,
+                                                       ccp->sb_count,
+                                                       ccp->sb_start,
+                                                       count, 0);
+               if (start <= ccp->sb_count) {
+                       bitmap_set(ccp->sb, start, count);
+
+                       mutex_unlock(&ccp->sb_mutex);
+                       break;
+               }
+
+               ccp->sb_avail = 0;
+
+               mutex_unlock(&ccp->sb_mutex);
+
+               /* Wait for KSB entries to become available */
+               if (wait_event_interruptible(ccp->sb_queue, ccp->sb_avail))
+                       return 0;
+       }
+
+       return KSB_START + start;
+}
+
+static void ccp_free_ksb(struct ccp_cmd_queue *cmd_q, unsigned int start,
+                        unsigned int count)
+{
+       struct ccp_device *ccp = cmd_q->ccp;
+
+       if (!start)
+               return;
+
+       mutex_lock(&ccp->sb_mutex);
+
+       bitmap_clear(ccp->sb, start - KSB_START, count);
+
+       ccp->sb_avail = 1;
+
+       mutex_unlock(&ccp->sb_mutex);
+
+       wake_up_interruptible_all(&ccp->sb_queue);
+}
+
+static unsigned int ccp_get_free_slots(struct ccp_cmd_queue *cmd_q)
+{
+       return CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
+}
+
 static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count)
 {
        struct ccp_cmd_queue *cmd_q = op->cmd_q;
@@ -68,6 +124,9 @@ static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count)
                        /* On error delete all related jobs from the queue */
                        cmd = (cmd_q->id << DEL_Q_ID_SHIFT)
                              | op->jobid;
+                       if (cmd_q->cmd_error)
+                               ccp_log_error(cmd_q->ccp,
+                                             cmd_q->cmd_error);
 
                        iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
 
@@ -99,10 +158,10 @@ static int ccp_perform_aes(struct ccp_op *op)
                | (op->u.aes.type << REQ1_AES_TYPE_SHIFT)
                | (op->u.aes.mode << REQ1_AES_MODE_SHIFT)
                | (op->u.aes.action << REQ1_AES_ACTION_SHIFT)
-               | (op->ksb_key << REQ1_KEY_KSB_SHIFT);
+               | (op->sb_key << REQ1_KEY_KSB_SHIFT);
        cr[1] = op->src.u.dma.length - 1;
        cr[2] = ccp_addr_lo(&op->src.u.dma);
-       cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
+       cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT)
                | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
                | ccp_addr_hi(&op->src.u.dma);
        cr[4] = ccp_addr_lo(&op->dst.u.dma);
@@ -129,10 +188,10 @@ static int ccp_perform_xts_aes(struct ccp_op *op)
        cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT)
                | (op->u.xts.action << REQ1_AES_ACTION_SHIFT)
                | (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT)
-               | (op->ksb_key << REQ1_KEY_KSB_SHIFT);
+               | (op->sb_key << REQ1_KEY_KSB_SHIFT);
        cr[1] = op->src.u.dma.length - 1;
        cr[2] = ccp_addr_lo(&op->src.u.dma);
-       cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
+       cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT)
                | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
                | ccp_addr_hi(&op->src.u.dma);
        cr[4] = ccp_addr_lo(&op->dst.u.dma);
@@ -158,7 +217,7 @@ static int ccp_perform_sha(struct ccp_op *op)
                | REQ1_INIT;
        cr[1] = op->src.u.dma.length - 1;
        cr[2] = ccp_addr_lo(&op->src.u.dma);
-       cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
+       cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT)
                | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
                | ccp_addr_hi(&op->src.u.dma);
 
@@ -181,11 +240,11 @@ static int ccp_perform_rsa(struct ccp_op *op)
        /* Fill out the register contents for REQ1 through REQ6 */
        cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT)
                | (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT)
-               | (op->ksb_key << REQ1_KEY_KSB_SHIFT)
+               | (op->sb_key << REQ1_KEY_KSB_SHIFT)
                | REQ1_EOM;
        cr[1] = op->u.rsa.input_len - 1;
        cr[2] = ccp_addr_lo(&op->src.u.dma);
-       cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
+       cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT)
                | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
                | ccp_addr_hi(&op->src.u.dma);
        cr[4] = ccp_addr_lo(&op->dst.u.dma);
@@ -215,10 +274,10 @@ static int ccp_perform_passthru(struct ccp_op *op)
                        | ccp_addr_hi(&op->src.u.dma);
 
                if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
-                       cr[3] |= (op->ksb_key << REQ4_KSB_SHIFT);
+                       cr[3] |= (op->sb_key << REQ4_KSB_SHIFT);
        } else {
-               cr[2] = op->src.u.ksb * CCP_KSB_BYTES;
-               cr[3] = (CCP_MEMTYPE_KSB << REQ4_MEMTYPE_SHIFT);
+               cr[2] = op->src.u.sb * CCP_SB_BYTES;
+               cr[3] = (CCP_MEMTYPE_SB << REQ4_MEMTYPE_SHIFT);
        }
 
        if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
@@ -226,8 +285,8 @@ static int ccp_perform_passthru(struct ccp_op *op)
                cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
                        | ccp_addr_hi(&op->dst.u.dma);
        } else {
-               cr[4] = op->dst.u.ksb * CCP_KSB_BYTES;
-               cr[5] = (CCP_MEMTYPE_KSB << REQ6_MEMTYPE_SHIFT);
+               cr[4] = op->dst.u.sb * CCP_SB_BYTES;
+               cr[5] = (CCP_MEMTYPE_SB << REQ6_MEMTYPE_SHIFT);
        }
 
        if (op->eom)
@@ -256,35 +315,6 @@ static int ccp_perform_ecc(struct ccp_op *op)
        return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
 }
 
-static int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait)
-{
-       struct ccp_device *ccp = container_of(rng, struct ccp_device, hwrng);
-       u32 trng_value;
-       int len = min_t(int, sizeof(trng_value), max);
-
-       /*
-        * Locking is provided by the caller so we can update device
-        * hwrng-related fields safely
-        */
-       trng_value = ioread32(ccp->io_regs + TRNG_OUT_REG);
-       if (!trng_value) {
-               /* Zero is returned if not data is available or if a
-                * bad-entropy error is present. Assume an error if
-                * we exceed TRNG_RETRIES reads of zero.
-                */
-               if (ccp->hwrng_retries++ > TRNG_RETRIES)
-                       return -EIO;
-
-               return 0;
-       }
-
-       /* Reset the counter and save the rng value */
-       ccp->hwrng_retries = 0;
-       memcpy(data, &trng_value, len);
-
-       return len;
-}
-
 static int ccp_init(struct ccp_device *ccp)
 {
        struct device *dev = ccp->dev;
@@ -321,9 +351,9 @@ static int ccp_init(struct ccp_device *ccp)
                cmd_q->dma_pool = dma_pool;
 
                /* Reserve 2 KSB regions for the queue */
-               cmd_q->ksb_key = KSB_START + ccp->ksb_start++;
-               cmd_q->ksb_ctx = KSB_START + ccp->ksb_start++;
-               ccp->ksb_count -= 2;
+               cmd_q->sb_key = KSB_START + ccp->sb_start++;
+               cmd_q->sb_ctx = KSB_START + ccp->sb_start++;
+               ccp->sb_count -= 2;
 
                /* Preset some register values and masks that are queue
                 * number dependent
@@ -335,7 +365,7 @@ static int ccp_init(struct ccp_device *ccp)
                cmd_q->int_ok = 1 << (i * 2);
                cmd_q->int_err = 1 << ((i * 2) + 1);
 
-               cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
+               cmd_q->free_slots = ccp_get_free_slots(cmd_q);
 
                init_waitqueue_head(&cmd_q->int_queue);
 
@@ -375,9 +405,10 @@ static int ccp_init(struct ccp_device *ccp)
        }
 
        /* Initialize the queues used to wait for KSB space and suspend */
-       init_waitqueue_head(&ccp->ksb_queue);
+       init_waitqueue_head(&ccp->sb_queue);
        init_waitqueue_head(&ccp->suspend_queue);
 
+       dev_dbg(dev, "Starting threads...\n");
        /* Create a kthread for each queue */
        for (i = 0; i < ccp->cmd_q_count; i++) {
                struct task_struct *kthread;
@@ -397,29 +428,26 @@ static int ccp_init(struct ccp_device *ccp)
                wake_up_process(kthread);
        }
 
-       /* Register the RNG */
-       ccp->hwrng.name = ccp->rngname;
-       ccp->hwrng.read = ccp_trng_read;
-       ret = hwrng_register(&ccp->hwrng);
-       if (ret) {
-               dev_err(dev, "error registering hwrng (%d)\n", ret);
+       dev_dbg(dev, "Enabling interrupts...\n");
+       /* Enable interrupts */
+       iowrite32(qim, ccp->io_regs + IRQ_MASK_REG);
+
+       dev_dbg(dev, "Registering device...\n");
+       ccp_add_device(ccp);
+
+       ret = ccp_register_rng(ccp);
+       if (ret)
                goto e_kthread;
-       }
 
        /* Register the DMA engine support */
        ret = ccp_dmaengine_register(ccp);
        if (ret)
                goto e_hwrng;
 
-       ccp_add_device(ccp);
-
-       /* Enable interrupts */
-       iowrite32(qim, ccp->io_regs + IRQ_MASK_REG);
-
        return 0;
 
 e_hwrng:
-       hwrng_unregister(&ccp->hwrng);
+       ccp_unregister_rng(ccp);
 
 e_kthread:
        for (i = 0; i < ccp->cmd_q_count; i++)
@@ -441,19 +469,14 @@ static void ccp_destroy(struct ccp_device *ccp)
        struct ccp_cmd *cmd;
        unsigned int qim, i;
 
-       /* Remove this device from the list of available units first */
-       ccp_del_device(ccp);
-
        /* Unregister the DMA engine */
        ccp_dmaengine_unregister(ccp);
 
        /* Unregister the RNG */
-       hwrng_unregister(&ccp->hwrng);
+       ccp_unregister_rng(ccp);
 
-       /* Stop the queue kthreads */
-       for (i = 0; i < ccp->cmd_q_count; i++)
-               if (ccp->cmd_q[i].kthread)
-                       kthread_stop(ccp->cmd_q[i].kthread);
+       /* Remove this device from the list of available units */
+       ccp_del_device(ccp);
 
        /* Build queue interrupt mask (two interrupt masks per queue) */
        qim = 0;
@@ -472,6 +495,11 @@ static void ccp_destroy(struct ccp_device *ccp)
        }
        iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG);
 
+       /* Stop the queue kthreads */
+       for (i = 0; i < ccp->cmd_q_count; i++)
+               if (ccp->cmd_q[i].kthread)
+                       kthread_stop(ccp->cmd_q[i].kthread);
+
        ccp->free_irq(ccp);
 
        for (i = 0; i < ccp->cmd_q_count; i++)
@@ -527,18 +555,24 @@ static irqreturn_t ccp_irq_handler(int irq, void *data)
 }
 
 static const struct ccp_actions ccp3_actions = {
-       .perform_aes = ccp_perform_aes,
-       .perform_xts_aes = ccp_perform_xts_aes,
-       .perform_sha = ccp_perform_sha,
-       .perform_rsa = ccp_perform_rsa,
-       .perform_passthru = ccp_perform_passthru,
-       .perform_ecc = ccp_perform_ecc,
+       .aes = ccp_perform_aes,
+       .xts_aes = ccp_perform_xts_aes,
+       .sha = ccp_perform_sha,
+       .rsa = ccp_perform_rsa,
+       .passthru = ccp_perform_passthru,
+       .ecc = ccp_perform_ecc,
+       .sballoc = ccp_alloc_ksb,
+       .sbfree = ccp_free_ksb,
        .init = ccp_init,
        .destroy = ccp_destroy,
+       .get_free_slots = ccp_get_free_slots,
        .irqhandler = ccp_irq_handler,
 };
 
-struct ccp_vdata ccpv3 = {
+const struct ccp_vdata ccpv3 = {
        .version = CCP_VERSION(3, 0),
+       .setup = NULL,
        .perform = &ccp3_actions,
+       .bar = 2,
+       .offset = 0x20000,
 };
diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c
new file mode 100644 (file)
index 0000000..faf3cb3
--- /dev/null
@@ -0,0 +1,1017 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) driver
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Author: Gary R Hook <gary.hook@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kthread.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/compiler.h>
+#include <linux/ccp.h>
+
+#include "ccp-dev.h"
+
+static u32 ccp_lsb_alloc(struct ccp_cmd_queue *cmd_q, unsigned int count)
+{
+       struct ccp_device *ccp;
+       int start;
+
+       /* First look at the map for the queue */
+       if (cmd_q->lsb >= 0) {
+               start = (u32)bitmap_find_next_zero_area(cmd_q->lsbmap,
+                                                       LSB_SIZE,
+                                                       0, count, 0);
+               if (start < LSB_SIZE) {
+                       bitmap_set(cmd_q->lsbmap, start, count);
+                       return start + cmd_q->lsb * LSB_SIZE;
+               }
+       }
+
+       /* No joy; try to get an entry from the shared blocks */
+       ccp = cmd_q->ccp;
+       for (;;) {
+               mutex_lock(&ccp->sb_mutex);
+
+               start = (u32)bitmap_find_next_zero_area(ccp->lsbmap,
+                                                       MAX_LSB_CNT * LSB_SIZE,
+                                                       0,
+                                                       count, 0);
+               if (start <= MAX_LSB_CNT * LSB_SIZE) {
+                       bitmap_set(ccp->lsbmap, start, count);
+
+                       mutex_unlock(&ccp->sb_mutex);
+                       return start * LSB_ITEM_SIZE;
+               }
+
+               ccp->sb_avail = 0;
+
+               mutex_unlock(&ccp->sb_mutex);
+
+               /* Wait for KSB entries to become available */
+               if (wait_event_interruptible(ccp->sb_queue, ccp->sb_avail))
+                       return 0;
+       }
+}
+
+static void ccp_lsb_free(struct ccp_cmd_queue *cmd_q, unsigned int start,
+                        unsigned int count)
+{
+       int lsbno = start / LSB_SIZE;
+
+       if (!start)
+               return;
+
+       if (cmd_q->lsb == lsbno) {
+               /* An entry from the private LSB */
+               bitmap_clear(cmd_q->lsbmap, start % LSB_SIZE, count);
+       } else {
+               /* From the shared LSBs */
+               struct ccp_device *ccp = cmd_q->ccp;
+
+               mutex_lock(&ccp->sb_mutex);
+               bitmap_clear(ccp->lsbmap, start, count);
+               ccp->sb_avail = 1;
+               mutex_unlock(&ccp->sb_mutex);
+               wake_up_interruptible_all(&ccp->sb_queue);
+       }
+}
+
+/* CCP version 5: Union to define the function field (cmd_reg1/dword0) */
+union ccp_function {
+       struct {
+               u16 size:7;
+               u16 encrypt:1;
+               u16 mode:5;
+               u16 type:2;
+       } aes;
+       struct {
+               u16 size:7;
+               u16 encrypt:1;
+               u16 rsvd:5;
+               u16 type:2;
+       } aes_xts;
+       struct {
+               u16 rsvd1:10;
+               u16 type:4;
+               u16 rsvd2:1;
+       } sha;
+       struct {
+               u16 mode:3;
+               u16 size:12;
+       } rsa;
+       struct {
+               u16 byteswap:2;
+               u16 bitwise:3;
+               u16 reflect:2;
+               u16 rsvd:8;
+       } pt;
+       struct  {
+               u16 rsvd:13;
+       } zlib;
+       struct {
+               u16 size:10;
+               u16 type:2;
+               u16 mode:3;
+       } ecc;
+       u16 raw;
+};
+
+#define        CCP_AES_SIZE(p)         ((p)->aes.size)
+#define        CCP_AES_ENCRYPT(p)      ((p)->aes.encrypt)
+#define        CCP_AES_MODE(p)         ((p)->aes.mode)
+#define        CCP_AES_TYPE(p)         ((p)->aes.type)
+#define        CCP_XTS_SIZE(p)         ((p)->aes_xts.size)
+#define        CCP_XTS_ENCRYPT(p)      ((p)->aes_xts.encrypt)
+#define        CCP_SHA_TYPE(p)         ((p)->sha.type)
+#define        CCP_RSA_SIZE(p)         ((p)->rsa.size)
+#define        CCP_PT_BYTESWAP(p)      ((p)->pt.byteswap)
+#define        CCP_PT_BITWISE(p)       ((p)->pt.bitwise)
+#define        CCP_ECC_MODE(p)         ((p)->ecc.mode)
+#define        CCP_ECC_AFFINE(p)       ((p)->ecc.one)
+
+/* Word 0 */
+#define CCP5_CMD_DW0(p)                ((p)->dw0)
+#define CCP5_CMD_SOC(p)                (CCP5_CMD_DW0(p).soc)
+#define CCP5_CMD_IOC(p)                (CCP5_CMD_DW0(p).ioc)
+#define CCP5_CMD_INIT(p)       (CCP5_CMD_DW0(p).init)
+#define CCP5_CMD_EOM(p)                (CCP5_CMD_DW0(p).eom)
+#define CCP5_CMD_FUNCTION(p)   (CCP5_CMD_DW0(p).function)
+#define CCP5_CMD_ENGINE(p)     (CCP5_CMD_DW0(p).engine)
+#define CCP5_CMD_PROT(p)       (CCP5_CMD_DW0(p).prot)
+
+/* Word 1 */
+#define CCP5_CMD_DW1(p)                ((p)->length)
+#define CCP5_CMD_LEN(p)                (CCP5_CMD_DW1(p))
+
+/* Word 2 */
+#define CCP5_CMD_DW2(p)                ((p)->src_lo)
+#define CCP5_CMD_SRC_LO(p)     (CCP5_CMD_DW2(p))
+
+/* Word 3 */
+#define CCP5_CMD_DW3(p)                ((p)->dw3)
+#define CCP5_CMD_SRC_MEM(p)    ((p)->dw3.src_mem)
+#define CCP5_CMD_SRC_HI(p)     ((p)->dw3.src_hi)
+#define CCP5_CMD_LSB_ID(p)     ((p)->dw3.lsb_cxt_id)
+#define CCP5_CMD_FIX_SRC(p)    ((p)->dw3.fixed)
+
+/* Words 4/5 */
+#define CCP5_CMD_DW4(p)                ((p)->dw4)
+#define CCP5_CMD_DST_LO(p)     (CCP5_CMD_DW4(p).dst_lo)
+#define CCP5_CMD_DW5(p)                ((p)->dw5.fields.dst_hi)
+#define CCP5_CMD_DST_HI(p)     (CCP5_CMD_DW5(p))
+#define CCP5_CMD_DST_MEM(p)    ((p)->dw5.fields.dst_mem)
+#define CCP5_CMD_FIX_DST(p)    ((p)->dw5.fields.fixed)
+#define CCP5_CMD_SHA_LO(p)     ((p)->dw4.sha_len_lo)
+#define CCP5_CMD_SHA_HI(p)     ((p)->dw5.sha_len_hi)
+
+/* Word 6/7 */
+#define CCP5_CMD_DW6(p)                ((p)->key_lo)
+#define CCP5_CMD_KEY_LO(p)     (CCP5_CMD_DW6(p))
+#define CCP5_CMD_DW7(p)                ((p)->dw7)
+#define CCP5_CMD_KEY_HI(p)     ((p)->dw7.key_hi)
+#define CCP5_CMD_KEY_MEM(p)    ((p)->dw7.key_mem)
+
+static inline u32 low_address(unsigned long addr)
+{
+       return (u64)addr & 0x0ffffffff;
+}
+
+static inline u32 high_address(unsigned long addr)
+{
+       return ((u64)addr >> 32) & 0x00000ffff;
+}
+
+static unsigned int ccp5_get_free_slots(struct ccp_cmd_queue *cmd_q)
+{
+       unsigned int head_idx, n;
+       u32 head_lo, queue_start;
+
+       queue_start = low_address(cmd_q->qdma_tail);
+       head_lo = ioread32(cmd_q->reg_head_lo);
+       head_idx = (head_lo - queue_start) / sizeof(struct ccp5_desc);
+
+       n = head_idx + COMMANDS_PER_QUEUE - cmd_q->qidx - 1;
+
+       return n % COMMANDS_PER_QUEUE; /* Always one unused spot */
+}
+
+static int ccp5_do_cmd(struct ccp5_desc *desc,
+                      struct ccp_cmd_queue *cmd_q)
+{
+       u32 *mP;
+       __le32 *dP;
+       u32 tail;
+       int     i;
+       int ret = 0;
+
+       if (CCP5_CMD_SOC(desc)) {
+               CCP5_CMD_IOC(desc) = 1;
+               CCP5_CMD_SOC(desc) = 0;
+       }
+       mutex_lock(&cmd_q->q_mutex);
+
+       mP = (u32 *) &cmd_q->qbase[cmd_q->qidx];
+       dP = (__le32 *) desc;
+       for (i = 0; i < 8; i++)
+               mP[i] = cpu_to_le32(dP[i]); /* handle endianness */
+
+       cmd_q->qidx = (cmd_q->qidx + 1) % COMMANDS_PER_QUEUE;
+
+       /* The data used by this command must be flushed to memory */
+       wmb();
+
+       /* Write the new tail address back to the queue register */
+       tail = low_address(cmd_q->qdma_tail + cmd_q->qidx * Q_DESC_SIZE);
+       iowrite32(tail, cmd_q->reg_tail_lo);
+
+       /* Turn the queue back on using our cached control register */
+       iowrite32(cmd_q->qcontrol | CMD5_Q_RUN, cmd_q->reg_control);
+       mutex_unlock(&cmd_q->q_mutex);
+
+       if (CCP5_CMD_IOC(desc)) {
+               /* Wait for the job to complete */
+               ret = wait_event_interruptible(cmd_q->int_queue,
+                                              cmd_q->int_rcvd);
+               if (ret || cmd_q->cmd_error) {
+                       if (cmd_q->cmd_error)
+                               ccp_log_error(cmd_q->ccp,
+                                             cmd_q->cmd_error);
+                       /* A version 5 device doesn't use Job IDs... */
+                       if (!ret)
+                               ret = -EIO;
+               }
+               cmd_q->int_rcvd = 0;
+       }
+
+       return 0;
+}
+
+static int ccp5_perform_aes(struct ccp_op *op)
+{
+       struct ccp5_desc desc;
+       union ccp_function function;
+       u32 key_addr = op->sb_key * LSB_ITEM_SIZE;
+
+       /* Zero out all the fields of the command desc */
+       memset(&desc, 0, Q_DESC_SIZE);
+
+       CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_AES;
+
+       CCP5_CMD_SOC(&desc) = op->soc;
+       CCP5_CMD_IOC(&desc) = 1;
+       CCP5_CMD_INIT(&desc) = op->init;
+       CCP5_CMD_EOM(&desc) = op->eom;
+       CCP5_CMD_PROT(&desc) = 0;
+
+       function.raw = 0;
+       CCP_AES_ENCRYPT(&function) = op->u.aes.action;
+       CCP_AES_MODE(&function) = op->u.aes.mode;
+       CCP_AES_TYPE(&function) = op->u.aes.type;
+       if (op->u.aes.mode == CCP_AES_MODE_CFB)
+               CCP_AES_SIZE(&function) = 0x7f;
+
+       CCP5_CMD_FUNCTION(&desc) = function.raw;
+
+       CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
+
+       CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
+       CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
+       CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+       CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
+       CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
+       CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+       CCP5_CMD_KEY_LO(&desc) = lower_32_bits(key_addr);
+       CCP5_CMD_KEY_HI(&desc) = 0;
+       CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB;
+       CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;
+
+       return ccp5_do_cmd(&desc, op->cmd_q);
+}
+
+static int ccp5_perform_xts_aes(struct ccp_op *op)
+{
+       struct ccp5_desc desc;
+       union ccp_function function;
+       u32 key_addr = op->sb_key * LSB_ITEM_SIZE;
+
+       /* Zero out all the fields of the command desc */
+       memset(&desc, 0, Q_DESC_SIZE);
+
+       CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_XTS_AES_128;
+
+       CCP5_CMD_SOC(&desc) = op->soc;
+       CCP5_CMD_IOC(&desc) = 1;
+       CCP5_CMD_INIT(&desc) = op->init;
+       CCP5_CMD_EOM(&desc) = op->eom;
+       CCP5_CMD_PROT(&desc) = 0;
+
+       function.raw = 0;
+       CCP_XTS_ENCRYPT(&function) = op->u.xts.action;
+       CCP_XTS_SIZE(&function) = op->u.xts.unit_size;
+       CCP5_CMD_FUNCTION(&desc) = function.raw;
+
+       CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
+
+       CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
+       CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
+       CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+       CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
+       CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
+       CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+       CCP5_CMD_KEY_LO(&desc) = lower_32_bits(key_addr);
+       CCP5_CMD_KEY_HI(&desc) =  0;
+       CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB;
+       CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;
+
+       return ccp5_do_cmd(&desc, op->cmd_q);
+}
+
+static int ccp5_perform_sha(struct ccp_op *op)
+{
+       struct ccp5_desc desc;
+       union ccp_function function;
+
+       /* Zero out all the fields of the command desc */
+       memset(&desc, 0, Q_DESC_SIZE);
+
+       CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_SHA;
+
+       CCP5_CMD_SOC(&desc) = op->soc;
+       CCP5_CMD_IOC(&desc) = 1;
+       CCP5_CMD_INIT(&desc) = 1;
+       CCP5_CMD_EOM(&desc) = op->eom;
+       CCP5_CMD_PROT(&desc) = 0;
+
+       function.raw = 0;
+       CCP_SHA_TYPE(&function) = op->u.sha.type;
+       CCP5_CMD_FUNCTION(&desc) = function.raw;
+
+       CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
+
+       CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
+       CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
+       CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+       CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;
+
+       if (op->eom) {
+               CCP5_CMD_SHA_LO(&desc) = lower_32_bits(op->u.sha.msg_bits);
+               CCP5_CMD_SHA_HI(&desc) = upper_32_bits(op->u.sha.msg_bits);
+       } else {
+               CCP5_CMD_SHA_LO(&desc) = 0;
+               CCP5_CMD_SHA_HI(&desc) = 0;
+       }
+
+       return ccp5_do_cmd(&desc, op->cmd_q);
+}
+
+static int ccp5_perform_rsa(struct ccp_op *op)
+{
+       struct ccp5_desc desc;
+       union ccp_function function;
+
+       /* Zero out all the fields of the command desc */
+       memset(&desc, 0, Q_DESC_SIZE);
+
+       CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_RSA;
+
+       CCP5_CMD_SOC(&desc) = op->soc;
+       CCP5_CMD_IOC(&desc) = 1;
+       CCP5_CMD_INIT(&desc) = 0;
+       CCP5_CMD_EOM(&desc) = 1;
+       CCP5_CMD_PROT(&desc) = 0;
+
+       function.raw = 0;
+       CCP_RSA_SIZE(&function) = op->u.rsa.mod_size;
+       CCP5_CMD_FUNCTION(&desc) = function.raw;
+
+       CCP5_CMD_LEN(&desc) = op->u.rsa.input_len;
+
+       /* Source is from external memory */
+       CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
+       CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
+       CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+       /* Destination is in external memory */
+       CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
+       CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
+       CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+       /* Key (Exponent) is in external memory */
+       CCP5_CMD_KEY_LO(&desc) = ccp_addr_lo(&op->exp.u.dma);
+       CCP5_CMD_KEY_HI(&desc) = ccp_addr_hi(&op->exp.u.dma);
+       CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+       return ccp5_do_cmd(&desc, op->cmd_q);
+}
+
+static int ccp5_perform_passthru(struct ccp_op *op)
+{
+       struct ccp5_desc desc;
+       union ccp_function function;
+       struct ccp_dma_info *saddr = &op->src.u.dma;
+       struct ccp_dma_info *daddr = &op->dst.u.dma;
+
+       memset(&desc, 0, Q_DESC_SIZE);
+
+       CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_PASSTHRU;
+
+       CCP5_CMD_SOC(&desc) = 0;
+       CCP5_CMD_IOC(&desc) = 1;
+       CCP5_CMD_INIT(&desc) = 0;
+       CCP5_CMD_EOM(&desc) = op->eom;
+       CCP5_CMD_PROT(&desc) = 0;
+
+       function.raw = 0;
+       CCP_PT_BYTESWAP(&function) = op->u.passthru.byte_swap;
+       CCP_PT_BITWISE(&function) = op->u.passthru.bit_mod;
+       CCP5_CMD_FUNCTION(&desc) = function.raw;
+
+       /* Length of source data is always 256 bytes */
+       if (op->src.type == CCP_MEMTYPE_SYSTEM)
+               CCP5_CMD_LEN(&desc) = saddr->length;
+       else
+               CCP5_CMD_LEN(&desc) = daddr->length;
+
+       if (op->src.type == CCP_MEMTYPE_SYSTEM) {
+               CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
+               CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
+               CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+               if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
+                       CCP5_CMD_LSB_ID(&desc) = op->sb_key;
+       } else {
+               u32 key_addr = op->src.u.sb * CCP_SB_BYTES;
+
+               CCP5_CMD_SRC_LO(&desc) = lower_32_bits(key_addr);
+               CCP5_CMD_SRC_HI(&desc) = 0;
+               CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SB;
+       }
+
+       if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
+               CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
+               CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
+               CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+       } else {
+               u32 key_addr = op->dst.u.sb * CCP_SB_BYTES;
+
+               CCP5_CMD_DST_LO(&desc) = lower_32_bits(key_addr);
+               CCP5_CMD_DST_HI(&desc) = 0;
+               CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SB;
+       }
+
+       return ccp5_do_cmd(&desc, op->cmd_q);
+}
+
+static int ccp5_perform_ecc(struct ccp_op *op)
+{
+       struct ccp5_desc desc;
+       union ccp_function function;
+
+       /* Zero out all the fields of the command desc */
+       memset(&desc, 0, Q_DESC_SIZE);
+
+       CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_ECC;
+
+       CCP5_CMD_SOC(&desc) = 0;
+       CCP5_CMD_IOC(&desc) = 1;
+       CCP5_CMD_INIT(&desc) = 0;
+       CCP5_CMD_EOM(&desc) = 1;
+       CCP5_CMD_PROT(&desc) = 0;
+
+       function.raw = 0;
+       function.ecc.mode = op->u.ecc.function;
+       CCP5_CMD_FUNCTION(&desc) = function.raw;
+
+       CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
+
+       CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
+       CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
+       CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+       CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
+       CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
+       CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+       return ccp5_do_cmd(&desc, op->cmd_q);
+}
+
+static int ccp_find_lsb_regions(struct ccp_cmd_queue *cmd_q, u64 status)
+{
+       int q_mask = 1 << cmd_q->id;
+       int queues = 0;
+       int j;
+
+       /* Build a bit mask to know which LSBs this queue has access to.
+        * Don't bother with segment 0 as it has special privileges.
+        */
+       for (j = 1; j < MAX_LSB_CNT; j++) {
+               if (status & q_mask)
+                       bitmap_set(cmd_q->lsbmask, j, 1);
+               status >>= LSB_REGION_WIDTH;
+       }
+       queues = bitmap_weight(cmd_q->lsbmask, MAX_LSB_CNT);
+       dev_info(cmd_q->ccp->dev, "Queue %d can access %d LSB regions\n",
+                cmd_q->id, queues);
+
+       return queues ? 0 : -EINVAL;
+}
+
+
+static int ccp_find_and_assign_lsb_to_q(struct ccp_device *ccp,
+                                       int lsb_cnt, int n_lsbs,
+                                       unsigned long *lsb_pub)
+{
+       DECLARE_BITMAP(qlsb, MAX_LSB_CNT);
+       int bitno;
+       int qlsb_wgt;
+       int i;
+
+       /* For each queue:
+        * If the count of potential LSBs available to a queue matches the
+        * ordinal given to us in lsb_cnt:
+        * Copy the mask of possible LSBs for this queue into "qlsb";
+        * For each bit in qlsb, see if the corresponding bit in the
+        * aggregation mask is set; if so, we have a match.
+        *     If we have a match, clear the bit in the aggregation to
+        *     mark it as no longer available.
+        *     If there is no match, clear the bit in qlsb and keep looking.
+        */
+       for (i = 0; i < ccp->cmd_q_count; i++) {
+               struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i];
+
+               qlsb_wgt = bitmap_weight(cmd_q->lsbmask, MAX_LSB_CNT);
+
+               if (qlsb_wgt == lsb_cnt) {
+                       bitmap_copy(qlsb, cmd_q->lsbmask, MAX_LSB_CNT);
+
+                       bitno = find_first_bit(qlsb, MAX_LSB_CNT);
+                       while (bitno < MAX_LSB_CNT) {
+                               if (test_bit(bitno, lsb_pub)) {
+                                       /* We found an available LSB
+                                        * that this queue can access
+                                        */
+                                       cmd_q->lsb = bitno;
+                                       bitmap_clear(lsb_pub, bitno, 1);
+                                       dev_info(ccp->dev,
+                                                "Queue %d gets LSB %d\n",
+                                                i, bitno);
+                                       break;
+                               }
+                               bitmap_clear(qlsb, bitno, 1);
+                               bitno = find_first_bit(qlsb, MAX_LSB_CNT);
+                       }
+                       if (bitno >= MAX_LSB_CNT)
+                               return -EINVAL;
+                       n_lsbs--;
+               }
+       }
+       return n_lsbs;
+}
+
+/* For each queue, from the most- to least-constrained:
+ * find an LSB that can be assigned to the queue. If there are N queues that
+ * can only use M LSBs, where N > M, fail; otherwise, every queue will get a
+ * dedicated LSB. Remaining LSB regions become a shared resource.
+ * If we have fewer LSBs than queues, all LSB regions become shared resources.
+ */
+static int ccp_assign_lsbs(struct ccp_device *ccp)
+{
+       DECLARE_BITMAP(lsb_pub, MAX_LSB_CNT);
+       DECLARE_BITMAP(qlsb, MAX_LSB_CNT);
+       int n_lsbs = 0;
+       int bitno;
+       int i, lsb_cnt;
+       int rc = 0;
+
+       bitmap_zero(lsb_pub, MAX_LSB_CNT);
+
+       /* Create an aggregate bitmap to get a total count of available LSBs */
+       for (i = 0; i < ccp->cmd_q_count; i++)
+               bitmap_or(lsb_pub,
+                         lsb_pub, ccp->cmd_q[i].lsbmask,
+                         MAX_LSB_CNT);
+
+       n_lsbs = bitmap_weight(lsb_pub, MAX_LSB_CNT);
+
+       if (n_lsbs >= ccp->cmd_q_count) {
+               /* We have enough LSBS to give every queue a private LSB.
+                * Brute force search to start with the queues that are more
+                * constrained in LSB choice. When an LSB is privately
+                * assigned, it is removed from the public mask.
+                * This is an ugly N squared algorithm with some optimization.
+                */
+               for (lsb_cnt = 1;
+                    n_lsbs && (lsb_cnt <= MAX_LSB_CNT);
+                    lsb_cnt++) {
+                       rc = ccp_find_and_assign_lsb_to_q(ccp, lsb_cnt, n_lsbs,
+                                                         lsb_pub);
+                       if (rc < 0)
+                               return -EINVAL;
+                       n_lsbs = rc;
+               }
+       }
+
+       rc = 0;
+       /* What's left of the LSBs, according to the public mask, now become
+        * shared. Any zero bits in the lsb_pub mask represent an LSB region
+        * that can't be used as a shared resource, so mark the LSB slots for
+        * them as "in use".
+        */
+       bitmap_copy(qlsb, lsb_pub, MAX_LSB_CNT);
+
+       bitno = find_first_zero_bit(qlsb, MAX_LSB_CNT);
+       while (bitno < MAX_LSB_CNT) {
+               bitmap_set(ccp->lsbmap, bitno * LSB_SIZE, LSB_SIZE);
+               bitmap_set(qlsb, bitno, 1);
+               bitno = find_first_zero_bit(qlsb, MAX_LSB_CNT);
+       }
+
+       return rc;
+}
+
+static int ccp5_init(struct ccp_device *ccp)
+{
+       struct device *dev = ccp->dev;
+       struct ccp_cmd_queue *cmd_q;
+       struct dma_pool *dma_pool;
+       char dma_pool_name[MAX_DMAPOOL_NAME_LEN];
+       unsigned int qmr, qim, i;
+       u64 status;
+       u32 status_lo, status_hi;
+       int ret;
+
+       /* Find available queues */
+       qim = 0;
+       qmr = ioread32(ccp->io_regs + Q_MASK_REG);
+       for (i = 0; i < MAX_HW_QUEUES; i++) {
+
+               if (!(qmr & (1 << i)))
+                       continue;
+
+               /* Allocate a dma pool for this queue */
+               snprintf(dma_pool_name, sizeof(dma_pool_name), "%s_q%d",
+                        ccp->name, i);
+               dma_pool = dma_pool_create(dma_pool_name, dev,
+                                          CCP_DMAPOOL_MAX_SIZE,
+                                          CCP_DMAPOOL_ALIGN, 0);
+               if (!dma_pool) {
+                       dev_err(dev, "unable to allocate dma pool\n");
+                       ret = -ENOMEM;
+               }
+
+               cmd_q = &ccp->cmd_q[ccp->cmd_q_count];
+               ccp->cmd_q_count++;
+
+               cmd_q->ccp = ccp;
+               cmd_q->id = i;
+               cmd_q->dma_pool = dma_pool;
+               mutex_init(&cmd_q->q_mutex);
+
+               /* Page alignment satisfies our needs for N <= 128 */
+               BUILD_BUG_ON(COMMANDS_PER_QUEUE > 128);
+               cmd_q->qsize = Q_SIZE(Q_DESC_SIZE);
+               cmd_q->qbase = dma_zalloc_coherent(dev, cmd_q->qsize,
+                                                  &cmd_q->qbase_dma,
+                                                  GFP_KERNEL);
+               if (!cmd_q->qbase) {
+                       dev_err(dev, "unable to allocate command queue\n");
+                       ret = -ENOMEM;
+                       goto e_pool;
+               }
+
+               cmd_q->qidx = 0;
+               /* Preset some register values and masks that are queue
+                * number dependent
+                */
+               cmd_q->reg_control = ccp->io_regs +
+                                    CMD5_Q_STATUS_INCR * (i + 1);
+               cmd_q->reg_tail_lo = cmd_q->reg_control + CMD5_Q_TAIL_LO_BASE;
+               cmd_q->reg_head_lo = cmd_q->reg_control + CMD5_Q_HEAD_LO_BASE;
+               cmd_q->reg_int_enable = cmd_q->reg_control +
+                                       CMD5_Q_INT_ENABLE_BASE;
+               cmd_q->reg_interrupt_status = cmd_q->reg_control +
+                                             CMD5_Q_INTERRUPT_STATUS_BASE;
+               cmd_q->reg_status = cmd_q->reg_control + CMD5_Q_STATUS_BASE;
+               cmd_q->reg_int_status = cmd_q->reg_control +
+                                       CMD5_Q_INT_STATUS_BASE;
+               cmd_q->reg_dma_status = cmd_q->reg_control +
+                                       CMD5_Q_DMA_STATUS_BASE;
+               cmd_q->reg_dma_read_status = cmd_q->reg_control +
+                                            CMD5_Q_DMA_READ_STATUS_BASE;
+               cmd_q->reg_dma_write_status = cmd_q->reg_control +
+                                             CMD5_Q_DMA_WRITE_STATUS_BASE;
+
+               init_waitqueue_head(&cmd_q->int_queue);
+
+               dev_dbg(dev, "queue #%u available\n", i);
+       }
+       if (ccp->cmd_q_count == 0) {
+               dev_notice(dev, "no command queues available\n");
+               ret = -EIO;
+               goto e_pool;
+       }
+       dev_notice(dev, "%u command queues available\n", ccp->cmd_q_count);
+
+       /* Turn off the queues and disable interrupts until ready */
+       for (i = 0; i < ccp->cmd_q_count; i++) {
+               cmd_q = &ccp->cmd_q[i];
+
+               cmd_q->qcontrol = 0; /* Start with nothing */
+               iowrite32(cmd_q->qcontrol, cmd_q->reg_control);
+
+               /* Disable the interrupts */
+               iowrite32(0x00, cmd_q->reg_int_enable);
+               ioread32(cmd_q->reg_int_status);
+               ioread32(cmd_q->reg_status);
+
+               /* Clear the interrupts */
+               iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status);
+       }
+
+       dev_dbg(dev, "Requesting an IRQ...\n");
+       /* Request an irq */
+       ret = ccp->get_irq(ccp);
+       if (ret) {
+               dev_err(dev, "unable to allocate an IRQ\n");
+               goto e_pool;
+       }
+
+       /* Initialize the queue used to suspend */
+       init_waitqueue_head(&ccp->suspend_queue);
+
+       dev_dbg(dev, "Loading LSB map...\n");
+       /* Copy the private LSB mask to the public registers */
+       status_lo = ioread32(ccp->io_regs + LSB_PRIVATE_MASK_LO_OFFSET);
+       status_hi = ioread32(ccp->io_regs + LSB_PRIVATE_MASK_HI_OFFSET);
+       iowrite32(status_lo, ccp->io_regs + LSB_PUBLIC_MASK_LO_OFFSET);
+       iowrite32(status_hi, ccp->io_regs + LSB_PUBLIC_MASK_HI_OFFSET);
+       status = ((u64)status_hi<<30) | (u64)status_lo;
+
+       dev_dbg(dev, "Configuring virtual queues...\n");
+       /* Configure size of each virtual queue accessible to host */
+       for (i = 0; i < ccp->cmd_q_count; i++) {
+               u32 dma_addr_lo;
+               u32 dma_addr_hi;
+
+               cmd_q = &ccp->cmd_q[i];
+
+               cmd_q->qcontrol &= ~(CMD5_Q_SIZE << CMD5_Q_SHIFT);
+               cmd_q->qcontrol |= QUEUE_SIZE_VAL << CMD5_Q_SHIFT;
+
+               cmd_q->qdma_tail = cmd_q->qbase_dma;
+               dma_addr_lo = low_address(cmd_q->qdma_tail);
+               iowrite32((u32)dma_addr_lo, cmd_q->reg_tail_lo);
+               iowrite32((u32)dma_addr_lo, cmd_q->reg_head_lo);
+
+               dma_addr_hi = high_address(cmd_q->qdma_tail);
+               cmd_q->qcontrol |= (dma_addr_hi << 16);
+               iowrite32(cmd_q->qcontrol, cmd_q->reg_control);
+
+               /* Find the LSB regions accessible to the queue */
+               ccp_find_lsb_regions(cmd_q, status);
+               cmd_q->lsb = -1; /* Unassigned value */
+       }
+
+       dev_dbg(dev, "Assigning LSBs...\n");
+       ret = ccp_assign_lsbs(ccp);
+       if (ret) {
+               dev_err(dev, "Unable to assign LSBs (%d)\n", ret);
+               goto e_irq;
+       }
+
+       /* Optimization: pre-allocate LSB slots for each queue */
+       for (i = 0; i < ccp->cmd_q_count; i++) {
+               ccp->cmd_q[i].sb_key = ccp_lsb_alloc(&ccp->cmd_q[i], 2);
+               ccp->cmd_q[i].sb_ctx = ccp_lsb_alloc(&ccp->cmd_q[i], 2);
+       }
+
+       dev_dbg(dev, "Starting threads...\n");
+       /* Create a kthread for each queue */
+       for (i = 0; i < ccp->cmd_q_count; i++) {
+               struct task_struct *kthread;
+
+               cmd_q = &ccp->cmd_q[i];
+
+               kthread = kthread_create(ccp_cmd_queue_thread, cmd_q,
+                                        "%s-q%u", ccp->name, cmd_q->id);
+               if (IS_ERR(kthread)) {
+                       dev_err(dev, "error creating queue thread (%ld)\n",
+                               PTR_ERR(kthread));
+                       ret = PTR_ERR(kthread);
+                       goto e_kthread;
+               }
+
+               cmd_q->kthread = kthread;
+               wake_up_process(kthread);
+       }
+
+       dev_dbg(dev, "Enabling interrupts...\n");
+       /* Enable interrupts */
+       for (i = 0; i < ccp->cmd_q_count; i++) {
+               cmd_q = &ccp->cmd_q[i];
+               iowrite32(ALL_INTERRUPTS, cmd_q->reg_int_enable);
+       }
+
+       dev_dbg(dev, "Registering device...\n");
+       /* Put this on the unit list to make it available */
+       ccp_add_device(ccp);
+
+       ret = ccp_register_rng(ccp);
+       if (ret)
+               goto e_kthread;
+
+       /* Register the DMA engine support */
+       ret = ccp_dmaengine_register(ccp);
+       if (ret)
+               goto e_hwrng;
+
+       return 0;
+
+e_hwrng:
+       ccp_unregister_rng(ccp);
+
+e_kthread:
+       for (i = 0; i < ccp->cmd_q_count; i++)
+               if (ccp->cmd_q[i].kthread)
+                       kthread_stop(ccp->cmd_q[i].kthread);
+
+e_irq:
+       ccp->free_irq(ccp);
+
+e_pool:
+       for (i = 0; i < ccp->cmd_q_count; i++)
+               dma_pool_destroy(ccp->cmd_q[i].dma_pool);
+
+       return ret;
+}
+
+static void ccp5_destroy(struct ccp_device *ccp)
+{
+       struct device *dev = ccp->dev;
+       struct ccp_cmd_queue *cmd_q;
+       struct ccp_cmd *cmd;
+       unsigned int i;
+
+       /* Unregister the DMA engine */
+       ccp_dmaengine_unregister(ccp);
+
+       /* Unregister the RNG */
+       ccp_unregister_rng(ccp);
+
+       /* Remove this device from the list of available units first */
+       ccp_del_device(ccp);
+
+       /* Disable and clear interrupts */
+       for (i = 0; i < ccp->cmd_q_count; i++) {
+               cmd_q = &ccp->cmd_q[i];
+
+               /* Turn off the run bit */
+               iowrite32(cmd_q->qcontrol & ~CMD5_Q_RUN, cmd_q->reg_control);
+
+               /* Disable the interrupts */
+               iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status);
+
+               /* Clear the interrupt status */
+               iowrite32(0x00, cmd_q->reg_int_enable);
+               ioread32(cmd_q->reg_int_status);
+               ioread32(cmd_q->reg_status);
+       }
+
+       /* Stop the queue kthreads */
+       for (i = 0; i < ccp->cmd_q_count; i++)
+               if (ccp->cmd_q[i].kthread)
+                       kthread_stop(ccp->cmd_q[i].kthread);
+
+       ccp->free_irq(ccp);
+
+       for (i = 0; i < ccp->cmd_q_count; i++) {
+               cmd_q = &ccp->cmd_q[i];
+               dma_free_coherent(dev, cmd_q->qsize, cmd_q->qbase,
+                                 cmd_q->qbase_dma);
+       }
+
+       /* Flush the cmd and backlog queue */
+       while (!list_empty(&ccp->cmd)) {
+               /* Invoke the callback directly with an error code */
+               cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry);
+               list_del(&cmd->entry);
+               cmd->callback(cmd->data, -ENODEV);
+       }
+       while (!list_empty(&ccp->backlog)) {
+               /* Invoke the callback directly with an error code */
+               cmd = list_first_entry(&ccp->backlog, struct ccp_cmd, entry);
+               list_del(&cmd->entry);
+               cmd->callback(cmd->data, -ENODEV);
+       }
+}
+
+static irqreturn_t ccp5_irq_handler(int irq, void *data)
+{
+       struct device *dev = data;
+       struct ccp_device *ccp = dev_get_drvdata(dev);
+       u32 status;
+       unsigned int i;
+
+       for (i = 0; i < ccp->cmd_q_count; i++) {
+               struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i];
+
+               status = ioread32(cmd_q->reg_interrupt_status);
+
+               if (status) {
+                       cmd_q->int_status = status;
+                       cmd_q->q_status = ioread32(cmd_q->reg_status);
+                       cmd_q->q_int_status = ioread32(cmd_q->reg_int_status);
+
+                       /* On error, only save the first error value */
+                       if ((status & INT_ERROR) && !cmd_q->cmd_error)
+                               cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status);
+
+                       cmd_q->int_rcvd = 1;
+
+                       /* Acknowledge the interrupt and wake the kthread */
+                       iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status);
+                       wake_up_interruptible(&cmd_q->int_queue);
+               }
+       }
+
+       return IRQ_HANDLED;
+}
+
+static void ccp5_config(struct ccp_device *ccp)
+{
+       /* Public side */
+       iowrite32(0x00001249, ccp->io_regs + CMD5_REQID_CONFIG_OFFSET);
+}
+
+static void ccp5other_config(struct ccp_device *ccp)
+{
+       int i;
+       u32 rnd;
+
+       /* We own all of the queues on the NTB CCP */
+
+       iowrite32(0x00012D57, ccp->io_regs + CMD5_TRNG_CTL_OFFSET);
+       iowrite32(0x00000003, ccp->io_regs + CMD5_CONFIG_0_OFFSET);
+       for (i = 0; i < 12; i++) {
+               rnd = ioread32(ccp->io_regs + TRNG_OUT_REG);
+               iowrite32(rnd, ccp->io_regs + CMD5_AES_MASK_OFFSET);
+       }
+
+       iowrite32(0x0000001F, ccp->io_regs + CMD5_QUEUE_MASK_OFFSET);
+       iowrite32(0x00005B6D, ccp->io_regs + CMD5_QUEUE_PRIO_OFFSET);
+       iowrite32(0x00000000, ccp->io_regs + CMD5_CMD_TIMEOUT_OFFSET);
+
+       iowrite32(0x3FFFFFFF, ccp->io_regs + LSB_PRIVATE_MASK_LO_OFFSET);
+       iowrite32(0x000003FF, ccp->io_regs + LSB_PRIVATE_MASK_HI_OFFSET);
+
+       iowrite32(0x00108823, ccp->io_regs + CMD5_CLK_GATE_CTL_OFFSET);
+
+       ccp5_config(ccp);
+}
+
+/* Version 5 adds some function, but is essentially the same as v5 */
+static const struct ccp_actions ccp5_actions = {
+       .aes = ccp5_perform_aes,
+       .xts_aes = ccp5_perform_xts_aes,
+       .sha = ccp5_perform_sha,
+       .rsa = ccp5_perform_rsa,
+       .passthru = ccp5_perform_passthru,
+       .ecc = ccp5_perform_ecc,
+       .sballoc = ccp_lsb_alloc,
+       .sbfree = ccp_lsb_free,
+       .init = ccp5_init,
+       .destroy = ccp5_destroy,
+       .get_free_slots = ccp5_get_free_slots,
+       .irqhandler = ccp5_irq_handler,
+};
+
+const struct ccp_vdata ccpv5a = {
+       .version = CCP_VERSION(5, 0),
+       .setup = ccp5_config,
+       .perform = &ccp5_actions,
+       .bar = 2,
+       .offset = 0x0,
+};
+
+const struct ccp_vdata ccpv5b = {
+       .version = CCP_VERSION(5, 0),
+       .setup = ccp5other_config,
+       .perform = &ccp5_actions,
+       .bar = 2,
+       .offset = 0x0,
+};
index 87b9f2b..cafa633 100644 (file)
@@ -4,6 +4,7 @@
  * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -39,6 +40,59 @@ struct ccp_tasklet_data {
        struct ccp_cmd *cmd;
 };
 
+/* Human-readable error strings */
+char *ccp_error_codes[] = {
+       "",
+       "ERR 01: ILLEGAL_ENGINE",
+       "ERR 02: ILLEGAL_KEY_ID",
+       "ERR 03: ILLEGAL_FUNCTION_TYPE",
+       "ERR 04: ILLEGAL_FUNCTION_MODE",
+       "ERR 05: ILLEGAL_FUNCTION_ENCRYPT",
+       "ERR 06: ILLEGAL_FUNCTION_SIZE",
+       "ERR 07: Zlib_MISSING_INIT_EOM",
+       "ERR 08: ILLEGAL_FUNCTION_RSVD",
+       "ERR 09: ILLEGAL_BUFFER_LENGTH",
+       "ERR 10: VLSB_FAULT",
+       "ERR 11: ILLEGAL_MEM_ADDR",
+       "ERR 12: ILLEGAL_MEM_SEL",
+       "ERR 13: ILLEGAL_CONTEXT_ID",
+       "ERR 14: ILLEGAL_KEY_ADDR",
+       "ERR 15: 0xF Reserved",
+       "ERR 16: Zlib_ILLEGAL_MULTI_QUEUE",
+       "ERR 17: Zlib_ILLEGAL_JOBID_CHANGE",
+       "ERR 18: CMD_TIMEOUT",
+       "ERR 19: IDMA0_AXI_SLVERR",
+       "ERR 20: IDMA0_AXI_DECERR",
+       "ERR 21: 0x15 Reserved",
+       "ERR 22: IDMA1_AXI_SLAVE_FAULT",
+       "ERR 23: IDMA1_AIXI_DECERR",
+       "ERR 24: 0x18 Reserved",
+       "ERR 25: ZLIBVHB_AXI_SLVERR",
+       "ERR 26: ZLIBVHB_AXI_DECERR",
+       "ERR 27: 0x1B Reserved",
+       "ERR 27: ZLIB_UNEXPECTED_EOM",
+       "ERR 27: ZLIB_EXTRA_DATA",
+       "ERR 30: ZLIB_BTYPE",
+       "ERR 31: ZLIB_UNDEFINED_SYMBOL",
+       "ERR 32: ZLIB_UNDEFINED_DISTANCE_S",
+       "ERR 33: ZLIB_CODE_LENGTH_SYMBOL",
+       "ERR 34: ZLIB _VHB_ILLEGAL_FETCH",
+       "ERR 35: ZLIB_UNCOMPRESSED_LEN",
+       "ERR 36: ZLIB_LIMIT_REACHED",
+       "ERR 37: ZLIB_CHECKSUM_MISMATCH0",
+       "ERR 38: ODMA0_AXI_SLVERR",
+       "ERR 39: ODMA0_AXI_DECERR",
+       "ERR 40: 0x28 Reserved",
+       "ERR 41: ODMA1_AXI_SLVERR",
+       "ERR 42: ODMA1_AXI_DECERR",
+       "ERR 43: LSB_PARITY_ERR",
+};
+
+void ccp_log_error(struct ccp_device *d, int e)
+{
+       dev_err(d->dev, "CCP error: %s (0x%x)\n", ccp_error_codes[e], e);
+}
+
 /* List of CCPs, CCP count, read-write access lock, and access functions
  *
  * Lock structure: get ccp_unit_lock for reading whenever we need to
@@ -58,7 +112,7 @@ static struct ccp_device *ccp_rr;
 
 /* Ever-increasing value to produce unique unit numbers */
 static atomic_t ccp_unit_ordinal;
-unsigned int ccp_increment_unit_ordinal(void)
+static unsigned int ccp_increment_unit_ordinal(void)
 {
        return atomic_inc_return(&ccp_unit_ordinal);
 }
@@ -118,6 +172,29 @@ void ccp_del_device(struct ccp_device *ccp)
        write_unlock_irqrestore(&ccp_unit_lock, flags);
 }
 
+
+
+int ccp_register_rng(struct ccp_device *ccp)
+{
+       int ret = 0;
+
+       dev_dbg(ccp->dev, "Registering RNG...\n");
+       /* Register an RNG */
+       ccp->hwrng.name = ccp->rngname;
+       ccp->hwrng.read = ccp_trng_read;
+       ret = hwrng_register(&ccp->hwrng);
+       if (ret)
+               dev_err(ccp->dev, "error registering hwrng (%d)\n", ret);
+
+       return ret;
+}
+
+void ccp_unregister_rng(struct ccp_device *ccp)
+{
+       if (ccp->hwrng.name)
+               hwrng_unregister(&ccp->hwrng);
+}
+
 static struct ccp_device *ccp_get_device(void)
 {
        unsigned long flags;
@@ -397,9 +474,9 @@ struct ccp_device *ccp_alloc_struct(struct device *dev)
 
        spin_lock_init(&ccp->cmd_lock);
        mutex_init(&ccp->req_mutex);
-       mutex_init(&ccp->ksb_mutex);
-       ccp->ksb_count = KSB_COUNT;
-       ccp->ksb_start = 0;
+       mutex_init(&ccp->sb_mutex);
+       ccp->sb_count = KSB_COUNT;
+       ccp->sb_start = 0;
 
        ccp->ord = ccp_increment_unit_ordinal();
        snprintf(ccp->name, MAX_CCP_NAME_LEN, "ccp-%u", ccp->ord);
@@ -408,6 +485,34 @@ struct ccp_device *ccp_alloc_struct(struct device *dev)
        return ccp;
 }
 
+int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait)
+{
+       struct ccp_device *ccp = container_of(rng, struct ccp_device, hwrng);
+       u32 trng_value;
+       int len = min_t(int, sizeof(trng_value), max);
+
+       /* Locking is provided by the caller so we can update device
+        * hwrng-related fields safely
+        */
+       trng_value = ioread32(ccp->io_regs + TRNG_OUT_REG);
+       if (!trng_value) {
+               /* Zero is returned if not data is available or if a
+                * bad-entropy error is present. Assume an error if
+                * we exceed TRNG_RETRIES reads of zero.
+                */
+               if (ccp->hwrng_retries++ > TRNG_RETRIES)
+                       return -EIO;
+
+               return 0;
+       }
+
+       /* Reset the counter and save the rng value */
+       ccp->hwrng_retries = 0;
+       memcpy(data, &trng_value, len);
+
+       return len;
+}
+
 #ifdef CONFIG_PM
 bool ccp_queues_suspended(struct ccp_device *ccp)
 {
index bd41ffc..da5f4a6 100644 (file)
@@ -4,6 +4,7 @@
  * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
 #define CMD_Q_ERROR(__qs)              ((__qs) & 0x0000003f)
 #define CMD_Q_DEPTH(__qs)              (((__qs) >> 12) & 0x0000000f)
 
-/****** REQ0 Related Values ******/
+/* ------------------------ CCP Version 5 Specifics ------------------------ */
+#define CMD5_QUEUE_MASK_OFFSET         0x00
+#define        CMD5_QUEUE_PRIO_OFFSET          0x04
+#define CMD5_REQID_CONFIG_OFFSET       0x08
+#define        CMD5_CMD_TIMEOUT_OFFSET         0x10
+#define LSB_PUBLIC_MASK_LO_OFFSET      0x18
+#define LSB_PUBLIC_MASK_HI_OFFSET      0x1C
+#define LSB_PRIVATE_MASK_LO_OFFSET     0x20
+#define LSB_PRIVATE_MASK_HI_OFFSET     0x24
+
+#define CMD5_Q_CONTROL_BASE            0x0000
+#define CMD5_Q_TAIL_LO_BASE            0x0004
+#define CMD5_Q_HEAD_LO_BASE            0x0008
+#define CMD5_Q_INT_ENABLE_BASE         0x000C
+#define CMD5_Q_INTERRUPT_STATUS_BASE   0x0010
+
+#define CMD5_Q_STATUS_BASE             0x0100
+#define CMD5_Q_INT_STATUS_BASE         0x0104
+#define CMD5_Q_DMA_STATUS_BASE         0x0108
+#define CMD5_Q_DMA_READ_STATUS_BASE    0x010C
+#define CMD5_Q_DMA_WRITE_STATUS_BASE   0x0110
+#define CMD5_Q_ABORT_BASE              0x0114
+#define CMD5_Q_AX_CACHE_BASE           0x0118
+
+#define        CMD5_CONFIG_0_OFFSET            0x6000
+#define        CMD5_TRNG_CTL_OFFSET            0x6008
+#define        CMD5_AES_MASK_OFFSET            0x6010
+#define        CMD5_CLK_GATE_CTL_OFFSET        0x603C
+
+/* Address offset between two virtual queue registers */
+#define CMD5_Q_STATUS_INCR             0x1000
+
+/* Bit masks */
+#define CMD5_Q_RUN                     0x1
+#define CMD5_Q_HALT                    0x2
+#define CMD5_Q_MEM_LOCATION            0x4
+#define CMD5_Q_SIZE                    0x1F
+#define CMD5_Q_SHIFT                   3
+#define COMMANDS_PER_QUEUE             16
+#define QUEUE_SIZE_VAL                 ((ffs(COMMANDS_PER_QUEUE) - 2) & \
+                                         CMD5_Q_SIZE)
+#define Q_PTR_MASK                     (2 << (QUEUE_SIZE_VAL + 5) - 1)
+#define Q_DESC_SIZE                    sizeof(struct ccp5_desc)
+#define Q_SIZE(n)                      (COMMANDS_PER_QUEUE*(n))
+
+#define INT_COMPLETION                 0x1
+#define INT_ERROR                      0x2
+#define INT_QUEUE_STOPPED              0x4
+#define ALL_INTERRUPTS                 (INT_COMPLETION| \
+                                        INT_ERROR| \
+                                        INT_QUEUE_STOPPED)
+
+#define LSB_REGION_WIDTH               5
+#define MAX_LSB_CNT                    8
+
+#define LSB_SIZE                       16
+#define LSB_ITEM_SIZE                  32
+#define PLSB_MAP_SIZE                  (LSB_SIZE)
+#define SLSB_MAP_SIZE                  (MAX_LSB_CNT * LSB_SIZE)
+
+#define LSB_ENTRY_NUMBER(LSB_ADDR)     (LSB_ADDR / LSB_ITEM_SIZE)
+
+/* ------------------------ CCP Version 3 Specifics ------------------------ */
 #define REQ0_WAIT_FOR_WRITE            0x00000004
 #define REQ0_INT_ON_COMPLETE           0x00000002
 #define REQ0_STOP_ON_COMPLETE          0x00000001
 #define KSB_START                      77
 #define KSB_END                                127
 #define KSB_COUNT                      (KSB_END - KSB_START + 1)
-#define CCP_KSB_BITS                   256
-#define CCP_KSB_BYTES                  32
+#define CCP_SB_BITS                    256
 
 #define CCP_JOBID_MASK                 0x0000003f
 
+/* ------------------------ General CCP Defines ------------------------ */
+
 #define CCP_DMAPOOL_MAX_SIZE           64
 #define CCP_DMAPOOL_ALIGN              BIT(5)
 
 #define CCP_REVERSE_BUF_SIZE           64
 
-#define CCP_AES_KEY_KSB_COUNT          1
-#define CCP_AES_CTX_KSB_COUNT          1
+#define CCP_AES_KEY_SB_COUNT           1
+#define CCP_AES_CTX_SB_COUNT           1
 
-#define CCP_XTS_AES_KEY_KSB_COUNT      1
-#define CCP_XTS_AES_CTX_KSB_COUNT      1
+#define CCP_XTS_AES_KEY_SB_COUNT       1
+#define CCP_XTS_AES_CTX_SB_COUNT       1
 
-#define CCP_SHA_KSB_COUNT              1
+#define CCP_SHA_SB_COUNT               1
 
 #define CCP_RSA_MAX_WIDTH              4096
 
 #define CCP_PASSTHRU_BLOCKSIZE         256
 #define CCP_PASSTHRU_MASKSIZE          32
-#define CCP_PASSTHRU_KSB_COUNT         1
+#define CCP_PASSTHRU_SB_COUNT          1
 
 #define CCP_ECC_MODULUS_BYTES          48      /* 384-bits */
 #define CCP_ECC_MAX_OPERANDS           6
 #define CCP_ECC_RESULT_OFFSET          60
 #define CCP_ECC_RESULT_SUCCESS         0x0001
 
-struct ccp_op;
-
-/* Structure for computation functions that are device-specific */
-struct ccp_actions {
-       int (*perform_aes)(struct ccp_op *);
-       int (*perform_xts_aes)(struct ccp_op *);
-       int (*perform_sha)(struct ccp_op *);
-       int (*perform_rsa)(struct ccp_op *);
-       int (*perform_passthru)(struct ccp_op *);
-       int (*perform_ecc)(struct ccp_op *);
-       int (*init)(struct ccp_device *);
-       void (*destroy)(struct ccp_device *);
-       irqreturn_t (*irqhandler)(int, void *);
-};
-
-/* Structure to hold CCP version-specific values */
-struct ccp_vdata {
-       unsigned int version;
-       const struct ccp_actions *perform;
-};
-
-extern struct ccp_vdata ccpv3;
+#define CCP_SB_BYTES                   32
 
+struct ccp_op;
 struct ccp_device;
 struct ccp_cmd;
+struct ccp_fns;
 
 struct ccp_dma_cmd {
        struct list_head entry;
@@ -212,9 +257,29 @@ struct ccp_cmd_queue {
        /* Queue dma pool */
        struct dma_pool *dma_pool;
 
-       /* Queue reserved KSB regions */
-       u32 ksb_key;
-       u32 ksb_ctx;
+       /* Queue base address (not neccessarily aligned)*/
+       struct ccp5_desc *qbase;
+
+       /* Aligned queue start address (per requirement) */
+       struct mutex q_mutex ____cacheline_aligned;
+       unsigned int qidx;
+
+       /* Version 5 has different requirements for queue memory */
+       unsigned int qsize;
+       dma_addr_t qbase_dma;
+       dma_addr_t qdma_tail;
+
+       /* Per-queue reserved storage block(s) */
+       u32 sb_key;
+       u32 sb_ctx;
+
+       /* Bitmap of LSBs that can be accessed by this queue */
+       DECLARE_BITMAP(lsbmask, MAX_LSB_CNT);
+       /* Private LSB that is assigned to this queue, or -1 if none.
+        * Bitmap for my private LSB, unused otherwise
+        */
+       unsigned int lsb;
+       DECLARE_BITMAP(lsbmap, PLSB_MAP_SIZE);
 
        /* Queue processing thread */
        struct task_struct *kthread;
@@ -229,8 +294,17 @@ struct ccp_cmd_queue {
        u32 int_err;
 
        /* Register addresses for queue */
+       void __iomem *reg_control;
+       void __iomem *reg_tail_lo;
+       void __iomem *reg_head_lo;
+       void __iomem *reg_int_enable;
+       void __iomem *reg_interrupt_status;
        void __iomem *reg_status;
        void __iomem *reg_int_status;
+       void __iomem *reg_dma_status;
+       void __iomem *reg_dma_read_status;
+       void __iomem *reg_dma_write_status;
+       u32 qcontrol; /* Cached control register */
 
        /* Status values from job */
        u32 int_status;
@@ -253,16 +327,14 @@ struct ccp_device {
 
        struct device *dev;
 
-       /*
-        * Bus specific device information
+       /* Bus specific device information
         */
        void *dev_specific;
        int (*get_irq)(struct ccp_device *ccp);
        void (*free_irq)(struct ccp_device *ccp);
        unsigned int irq;
 
-       /*
-        * I/O area used for device communication. The register mapping
+       /* I/O area used for device communication. The register mapping
         * starts at an offset into the mapped bar.
         *   The CMD_REQx registers and the Delete_Cmd_Queue_Job register
         *   need to be protected while a command queue thread is accessing
@@ -272,8 +344,7 @@ struct ccp_device {
        void __iomem *io_map;
        void __iomem *io_regs;
 
-       /*
-        * Master lists that all cmds are queued on. Because there can be
+       /* Master lists that all cmds are queued on. Because there can be
         * more than one CCP command queue that can process a cmd a separate
         * backlog list is neeeded so that the backlog completion call
         * completes before the cmd is available for execution.
@@ -283,47 +354,54 @@ struct ccp_device {
        struct list_head cmd;
        struct list_head backlog;
 
-       /*
-        * The command queues. These represent the queues available on the
+       /* The command queues. These represent the queues available on the
         * CCP that are available for processing cmds
         */
        struct ccp_cmd_queue cmd_q[MAX_HW_QUEUES];
        unsigned int cmd_q_count;
 
-       /*
-        * Support for the CCP True RNG
+       /* Support for the CCP True RNG
         */
        struct hwrng hwrng;
        unsigned int hwrng_retries;
 
-       /*
-        * Support for the CCP DMA capabilities
+       /* Support for the CCP DMA capabilities
         */
        struct dma_device dma_dev;
        struct ccp_dma_chan *ccp_dma_chan;
        struct kmem_cache *dma_cmd_cache;
        struct kmem_cache *dma_desc_cache;
 
-       /*
-        * A counter used to generate job-ids for cmds submitted to the CCP
+       /* A counter used to generate job-ids for cmds submitted to the CCP
         */
        atomic_t current_id ____cacheline_aligned;
 
-       /*
-        * The CCP uses key storage blocks (KSB) to maintain context for certain
-        * operations. To prevent multiple cmds from using the same KSB range
-        * a command queue reserves a KSB range for the duration of the cmd.
-        * Each queue, will however, reserve 2 KSB blocks for operations that
-        * only require single KSB entries (eg. AES context/iv and key) in order
-        * to avoid allocation contention.  This will reserve at most 10 KSB
-        * entries, leaving 40 KSB entries available for dynamic allocation.
+       /* The v3 CCP uses key storage blocks (SB) to maintain context for
+        * certain operations. To prevent multiple cmds from using the same
+        * SB range a command queue reserves an SB range for the duration of
+        * the cmd. Each queue, will however, reserve 2 SB blocks for
+        * operations that only require single SB entries (eg. AES context/iv
+        * and key) in order to avoid allocation contention.  This will reserve
+        * at most 10 SB entries, leaving 40 SB entries available for dynamic
+        * allocation.
+        *
+        * The v5 CCP Local Storage Block (LSB) is broken up into 8
+        * memrory ranges, each of which can be enabled for access by one
+        * or more queues. Device initialization takes this into account,
+        * and attempts to assign one region for exclusive use by each
+        * available queue; the rest are then aggregated as "public" use.
+        * If there are fewer regions than queues, all regions are shared
+        * amongst all queues.
         */
-       struct mutex ksb_mutex ____cacheline_aligned;
-       DECLARE_BITMAP(ksb, KSB_COUNT);
-       wait_queue_head_t ksb_queue;
-       unsigned int ksb_avail;
-       unsigned int ksb_count;
-       u32 ksb_start;
+       struct mutex sb_mutex ____cacheline_aligned;
+       DECLARE_BITMAP(sb, KSB_COUNT);
+       wait_queue_head_t sb_queue;
+       unsigned int sb_avail;
+       unsigned int sb_count;
+       u32 sb_start;
+
+       /* Bitmap of shared LSBs, if any */
+       DECLARE_BITMAP(lsbmap, SLSB_MAP_SIZE);
 
        /* Suspend support */
        unsigned int suspending;
@@ -335,10 +413,11 @@ struct ccp_device {
 
 enum ccp_memtype {
        CCP_MEMTYPE_SYSTEM = 0,
-       CCP_MEMTYPE_KSB,
+       CCP_MEMTYPE_SB,
        CCP_MEMTYPE_LOCAL,
        CCP_MEMTYPE__LAST,
 };
+#define        CCP_MEMTYPE_LSB CCP_MEMTYPE_KSB
 
 struct ccp_dma_info {
        dma_addr_t address;
@@ -379,7 +458,7 @@ struct ccp_mem {
        enum ccp_memtype type;
        union {
                struct ccp_dma_info dma;
-               u32 ksb;
+               u32 sb;
        } u;
 };
 
@@ -419,13 +498,14 @@ struct ccp_op {
        u32 jobid;
        u32 ioc;
        u32 soc;
-       u32 ksb_key;
-       u32 ksb_ctx;
+       u32 sb_key;
+       u32 sb_ctx;
        u32 init;
        u32 eom;
 
        struct ccp_mem src;
        struct ccp_mem dst;
+       struct ccp_mem exp;
 
        union {
                struct ccp_aes_op aes;
@@ -435,6 +515,7 @@ struct ccp_op {
                struct ccp_passthru_op passthru;
                struct ccp_ecc_op ecc;
        } u;
+       struct ccp_mem key;
 };
 
 static inline u32 ccp_addr_lo(struct ccp_dma_info *info)
@@ -447,6 +528,70 @@ static inline u32 ccp_addr_hi(struct ccp_dma_info *info)
        return upper_32_bits(info->address + info->offset) & 0x0000ffff;
 }
 
+/**
+ * descriptor for version 5 CPP commands
+ * 8 32-bit words:
+ * word 0: function; engine; control bits
+ * word 1: length of source data
+ * word 2: low 32 bits of source pointer
+ * word 3: upper 16 bits of source pointer; source memory type
+ * word 4: low 32 bits of destination pointer
+ * word 5: upper 16 bits of destination pointer; destination memory type
+ * word 6: low 32 bits of key pointer
+ * word 7: upper 16 bits of key pointer; key memory type
+ */
+struct dword0 {
+       __le32 soc:1;
+       __le32 ioc:1;
+       __le32 rsvd1:1;
+       __le32 init:1;
+       __le32 eom:1;           /* AES/SHA only */
+       __le32 function:15;
+       __le32 engine:4;
+       __le32 prot:1;
+       __le32 rsvd2:7;
+};
+
+struct dword3 {
+       __le32 src_hi:16;
+       __le32 src_mem:2;
+       __le32 lsb_cxt_id:8;
+       __le32 rsvd1:5;
+       __le32 fixed:1;
+};
+
+union dword4 {
+       __le32 dst_lo;          /* NON-SHA      */
+       __le32 sha_len_lo;      /* SHA          */
+};
+
+union dword5 {
+       struct {
+               __le32 dst_hi:16;
+               __le32 dst_mem:2;
+               __le32 rsvd1:13;
+               __le32 fixed:1;
+       } fields;
+       __le32 sha_len_hi;
+};
+
+struct dword7 {
+       __le32 key_hi:16;
+       __le32 key_mem:2;
+       __le32 rsvd1:14;
+};
+
+struct ccp5_desc {
+       struct dword0 dw0;
+       __le32 length;
+       __le32 src_lo;
+       struct dword3 dw3;
+       union dword4 dw4;
+       union dword5 dw5;
+       __le32 key_lo;
+       struct dword7 dw7;
+};
+
 int ccp_pci_init(void);
 void ccp_pci_exit(void);
 
@@ -456,13 +601,48 @@ void ccp_platform_exit(void);
 void ccp_add_device(struct ccp_device *ccp);
 void ccp_del_device(struct ccp_device *ccp);
 
+extern void ccp_log_error(struct ccp_device *, int);
+
 struct ccp_device *ccp_alloc_struct(struct device *dev);
 bool ccp_queues_suspended(struct ccp_device *ccp);
 int ccp_cmd_queue_thread(void *data);
+int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait);
 
 int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd);
 
+int ccp_register_rng(struct ccp_device *ccp);
+void ccp_unregister_rng(struct ccp_device *ccp);
 int ccp_dmaengine_register(struct ccp_device *ccp);
 void ccp_dmaengine_unregister(struct ccp_device *ccp);
 
+/* Structure for computation functions that are device-specific */
+struct ccp_actions {
+       int (*aes)(struct ccp_op *);
+       int (*xts_aes)(struct ccp_op *);
+       int (*sha)(struct ccp_op *);
+       int (*rsa)(struct ccp_op *);
+       int (*passthru)(struct ccp_op *);
+       int (*ecc)(struct ccp_op *);
+       u32 (*sballoc)(struct ccp_cmd_queue *, unsigned int);
+       void (*sbfree)(struct ccp_cmd_queue *, unsigned int,
+                              unsigned int);
+       unsigned int (*get_free_slots)(struct ccp_cmd_queue *);
+       int (*init)(struct ccp_device *);
+       void (*destroy)(struct ccp_device *);
+       irqreturn_t (*irqhandler)(int, void *);
+};
+
+/* Structure to hold CCP version-specific values */
+struct ccp_vdata {
+       const unsigned int version;
+       void (*setup)(struct ccp_device *);
+       const struct ccp_actions *perform;
+       const unsigned int bar;
+       const unsigned int offset;
+};
+
+extern const struct ccp_vdata ccpv3;
+extern const struct ccp_vdata ccpv5a;
+extern const struct ccp_vdata ccpv5b;
+
 #endif
index 94f77b0..6553912 100644 (file)
@@ -299,12 +299,10 @@ static struct ccp_dma_desc *ccp_alloc_dma_desc(struct ccp_dma_chan *chan,
 {
        struct ccp_dma_desc *desc;
 
-       desc = kmem_cache_alloc(chan->ccp->dma_desc_cache, GFP_NOWAIT);
+       desc = kmem_cache_zalloc(chan->ccp->dma_desc_cache, GFP_NOWAIT);
        if (!desc)
                return NULL;
 
-       memset(desc, 0, sizeof(*desc));
-
        dma_async_tx_descriptor_init(&desc->tx_desc, &chan->dma_chan);
        desc->tx_desc.flags = flags;
        desc->tx_desc.tx_submit = ccp_tx_submit;
@@ -650,8 +648,11 @@ int ccp_dmaengine_register(struct ccp_device *ccp)
        dma_desc_cache_name = devm_kasprintf(ccp->dev, GFP_KERNEL,
                                             "%s-dmaengine-desc-cache",
                                             ccp->name);
-       if (!dma_cmd_cache_name)
-               return -ENOMEM;
+       if (!dma_desc_cache_name) {
+               ret = -ENOMEM;
+               goto err_cache;
+       }
+
        ccp->dma_desc_cache = kmem_cache_create(dma_desc_cache_name,
                                                sizeof(struct ccp_dma_desc),
                                                sizeof(void *),
index ffa2891..50fae44 100644 (file)
@@ -4,6 +4,7 @@
  * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
 #include "ccp-dev.h"
 
 /* SHA initial context values */
-static const __be32 ccp_sha1_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
+static const __be32 ccp_sha1_init[SHA1_DIGEST_SIZE / sizeof(__be32)] = {
        cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1),
        cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3),
-       cpu_to_be32(SHA1_H4), 0, 0, 0,
+       cpu_to_be32(SHA1_H4),
 };
 
-static const __be32 ccp_sha224_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
+static const __be32 ccp_sha224_init[SHA256_DIGEST_SIZE / sizeof(__be32)] = {
        cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1),
        cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3),
        cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5),
        cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7),
 };
 
-static const __be32 ccp_sha256_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
+static const __be32 ccp_sha256_init[SHA256_DIGEST_SIZE / sizeof(__be32)] = {
        cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1),
        cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3),
        cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5),
        cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7),
 };
 
-static u32 ccp_alloc_ksb(struct ccp_device *ccp, unsigned int count)
-{
-       int start;
-
-       for (;;) {
-               mutex_lock(&ccp->ksb_mutex);
-
-               start = (u32)bitmap_find_next_zero_area(ccp->ksb,
-                                                       ccp->ksb_count,
-                                                       ccp->ksb_start,
-                                                       count, 0);
-               if (start <= ccp->ksb_count) {
-                       bitmap_set(ccp->ksb, start, count);
-
-                       mutex_unlock(&ccp->ksb_mutex);
-                       break;
-               }
-
-               ccp->ksb_avail = 0;
-
-               mutex_unlock(&ccp->ksb_mutex);
-
-               /* Wait for KSB entries to become available */
-               if (wait_event_interruptible(ccp->ksb_queue, ccp->ksb_avail))
-                       return 0;
-       }
-
-       return KSB_START + start;
-}
-
-static void ccp_free_ksb(struct ccp_device *ccp, unsigned int start,
-                        unsigned int count)
-{
-       if (!start)
-               return;
-
-       mutex_lock(&ccp->ksb_mutex);
-
-       bitmap_clear(ccp->ksb, start - KSB_START, count);
-
-       ccp->ksb_avail = 1;
-
-       mutex_unlock(&ccp->ksb_mutex);
-
-       wake_up_interruptible_all(&ccp->ksb_queue);
-}
+#define        CCP_NEW_JOBID(ccp)      ((ccp->vdata->version == CCP_VERSION(3, 0)) ? \
+                                       ccp_gen_jobid(ccp) : 0)
 
 static u32 ccp_gen_jobid(struct ccp_device *ccp)
 {
@@ -231,7 +188,7 @@ static int ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
                                   unsigned int len, unsigned int se_len,
                                   bool sign_extend)
 {
-       unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
+       unsigned int nbytes, sg_offset, dm_offset, sb_len, i;
        u8 buffer[CCP_REVERSE_BUF_SIZE];
 
        if (WARN_ON(se_len > sizeof(buffer)))
@@ -241,21 +198,21 @@ static int ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
        dm_offset = 0;
        nbytes = len;
        while (nbytes) {
-               ksb_len = min_t(unsigned int, nbytes, se_len);
-               sg_offset -= ksb_len;
+               sb_len = min_t(unsigned int, nbytes, se_len);
+               sg_offset -= sb_len;
 
-               scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 0);
-               for (i = 0; i < ksb_len; i++)
-                       wa->address[dm_offset + i] = buffer[ksb_len - i - 1];
+               scatterwalk_map_and_copy(buffer, sg, sg_offset, sb_len, 0);
+               for (i = 0; i < sb_len; i++)
+                       wa->address[dm_offset + i] = buffer[sb_len - i - 1];
 
-               dm_offset += ksb_len;
-               nbytes -= ksb_len;
+               dm_offset += sb_len;
+               nbytes -= sb_len;
 
-               if ((ksb_len != se_len) && sign_extend) {
+               if ((sb_len != se_len) && sign_extend) {
                        /* Must sign-extend to nearest sign-extend length */
                        if (wa->address[dm_offset - 1] & 0x80)
                                memset(wa->address + dm_offset, 0xff,
-                                      se_len - ksb_len);
+                                      se_len - sb_len);
                }
        }
 
@@ -266,22 +223,22 @@ static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
                                    struct scatterlist *sg,
                                    unsigned int len)
 {
-       unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
+       unsigned int nbytes, sg_offset, dm_offset, sb_len, i;
        u8 buffer[CCP_REVERSE_BUF_SIZE];
 
        sg_offset = 0;
        dm_offset = len;
        nbytes = len;
        while (nbytes) {
-               ksb_len = min_t(unsigned int, nbytes, sizeof(buffer));
-               dm_offset -= ksb_len;
+               sb_len = min_t(unsigned int, nbytes, sizeof(buffer));
+               dm_offset -= sb_len;
 
-               for (i = 0; i < ksb_len; i++)
-                       buffer[ksb_len - i - 1] = wa->address[dm_offset + i];
-               scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 1);
+               for (i = 0; i < sb_len; i++)
+                       buffer[sb_len - i - 1] = wa->address[dm_offset + i];
+               scatterwalk_map_and_copy(buffer, sg, sg_offset, sb_len, 1);
 
-               sg_offset += ksb_len;
-               nbytes -= ksb_len;
+               sg_offset += sb_len;
+               nbytes -= sb_len;
        }
 }
 
@@ -449,9 +406,9 @@ static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst,
        }
 }
 
-static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q,
-                               struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
-                               u32 byte_swap, bool from)
+static int ccp_copy_to_from_sb(struct ccp_cmd_queue *cmd_q,
+                              struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
+                              u32 byte_swap, bool from)
 {
        struct ccp_op op;
 
@@ -463,8 +420,8 @@ static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q,
 
        if (from) {
                op.soc = 1;
-               op.src.type = CCP_MEMTYPE_KSB;
-               op.src.u.ksb = ksb;
+               op.src.type = CCP_MEMTYPE_SB;
+               op.src.u.sb = sb;
                op.dst.type = CCP_MEMTYPE_SYSTEM;
                op.dst.u.dma.address = wa->dma.address;
                op.dst.u.dma.length = wa->length;
@@ -472,27 +429,27 @@ static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q,
                op.src.type = CCP_MEMTYPE_SYSTEM;
                op.src.u.dma.address = wa->dma.address;
                op.src.u.dma.length = wa->length;
-               op.dst.type = CCP_MEMTYPE_KSB;
-               op.dst.u.ksb = ksb;
+               op.dst.type = CCP_MEMTYPE_SB;
+               op.dst.u.sb = sb;
        }
 
        op.u.passthru.byte_swap = byte_swap;
 
-       return cmd_q->ccp->vdata->perform->perform_passthru(&op);
+       return cmd_q->ccp->vdata->perform->passthru(&op);
 }
 
-static int ccp_copy_to_ksb(struct ccp_cmd_queue *cmd_q,
-                          struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
-                          u32 byte_swap)
+static int ccp_copy_to_sb(struct ccp_cmd_queue *cmd_q,
+                         struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
+                         u32 byte_swap)
 {
-       return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, false);
+       return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, false);
 }
 
-static int ccp_copy_from_ksb(struct ccp_cmd_queue *cmd_q,
-                            struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
-                            u32 byte_swap)
+static int ccp_copy_from_sb(struct ccp_cmd_queue *cmd_q,
+                           struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
+                           u32 byte_swap)
 {
-       return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, true);
+       return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, true);
 }
 
 static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
@@ -527,54 +484,54 @@ static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
                        return -EINVAL;
        }
 
-       BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
-       BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
+       BUILD_BUG_ON(CCP_AES_KEY_SB_COUNT != 1);
+       BUILD_BUG_ON(CCP_AES_CTX_SB_COUNT != 1);
 
        ret = -EIO;
        memset(&op, 0, sizeof(op));
        op.cmd_q = cmd_q;
-       op.jobid = ccp_gen_jobid(cmd_q->ccp);
-       op.ksb_key = cmd_q->ksb_key;
-       op.ksb_ctx = cmd_q->ksb_ctx;
+       op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
+       op.sb_key = cmd_q->sb_key;
+       op.sb_ctx = cmd_q->sb_ctx;
        op.init = 1;
        op.u.aes.type = aes->type;
        op.u.aes.mode = aes->mode;
        op.u.aes.action = aes->action;
 
-       /* All supported key sizes fit in a single (32-byte) KSB entry
+       /* All supported key sizes fit in a single (32-byte) SB entry
         * and must be in little endian format. Use the 256-bit byte
         * swap passthru option to convert from big endian to little
         * endian.
         */
        ret = ccp_init_dm_workarea(&key, cmd_q,
-                                  CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
+                                  CCP_AES_KEY_SB_COUNT * CCP_SB_BYTES,
                                   DMA_TO_DEVICE);
        if (ret)
                return ret;
 
-       dm_offset = CCP_KSB_BYTES - aes->key_len;
+       dm_offset = CCP_SB_BYTES - aes->key_len;
        ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
-       ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
-                             CCP_PASSTHRU_BYTESWAP_256BIT);
+       ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
+                            CCP_PASSTHRU_BYTESWAP_256BIT);
        if (ret) {
                cmd->engine_error = cmd_q->cmd_error;
                goto e_key;
        }
 
-       /* The AES context fits in a single (32-byte) KSB entry and
+       /* The AES context fits in a single (32-byte) SB entry and
         * must be in little endian format. Use the 256-bit byte swap
         * passthru option to convert from big endian to little endian.
         */
        ret = ccp_init_dm_workarea(&ctx, cmd_q,
-                                  CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
+                                  CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
                                   DMA_BIDIRECTIONAL);
        if (ret)
                goto e_key;
 
-       dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
+       dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
        ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
-       ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
-                             CCP_PASSTHRU_BYTESWAP_256BIT);
+       ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+                            CCP_PASSTHRU_BYTESWAP_256BIT);
        if (ret) {
                cmd->engine_error = cmd_q->cmd_error;
                goto e_ctx;
@@ -592,9 +549,9 @@ static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
                        op.eom = 1;
 
                        /* Push the K1/K2 key to the CCP now */
-                       ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid,
-                                               op.ksb_ctx,
-                                               CCP_PASSTHRU_BYTESWAP_256BIT);
+                       ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid,
+                                              op.sb_ctx,
+                                              CCP_PASSTHRU_BYTESWAP_256BIT);
                        if (ret) {
                                cmd->engine_error = cmd_q->cmd_error;
                                goto e_src;
@@ -602,15 +559,15 @@ static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
 
                        ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
                                        aes->cmac_key_len);
-                       ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
-                                             CCP_PASSTHRU_BYTESWAP_256BIT);
+                       ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+                                            CCP_PASSTHRU_BYTESWAP_256BIT);
                        if (ret) {
                                cmd->engine_error = cmd_q->cmd_error;
                                goto e_src;
                        }
                }
 
-               ret = cmd_q->ccp->vdata->perform->perform_aes(&op);
+               ret = cmd_q->ccp->vdata->perform->aes(&op);
                if (ret) {
                        cmd->engine_error = cmd_q->cmd_error;
                        goto e_src;
@@ -622,15 +579,15 @@ static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
        /* Retrieve the AES context - convert from LE to BE using
         * 32-byte (256-bit) byteswapping
         */
-       ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
-                               CCP_PASSTHRU_BYTESWAP_256BIT);
+       ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+                              CCP_PASSTHRU_BYTESWAP_256BIT);
        if (ret) {
                cmd->engine_error = cmd_q->cmd_error;
                goto e_src;
        }
 
        /* ...but we only need AES_BLOCK_SIZE bytes */
-       dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
+       dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
        ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
 
 e_src:
@@ -680,56 +637,56 @@ static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
                        return -EINVAL;
        }
 
-       BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
-       BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
+       BUILD_BUG_ON(CCP_AES_KEY_SB_COUNT != 1);
+       BUILD_BUG_ON(CCP_AES_CTX_SB_COUNT != 1);
 
        ret = -EIO;
        memset(&op, 0, sizeof(op));
        op.cmd_q = cmd_q;
-       op.jobid = ccp_gen_jobid(cmd_q->ccp);
-       op.ksb_key = cmd_q->ksb_key;
-       op.ksb_ctx = cmd_q->ksb_ctx;
+       op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
+       op.sb_key = cmd_q->sb_key;
+       op.sb_ctx = cmd_q->sb_ctx;
        op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
        op.u.aes.type = aes->type;
        op.u.aes.mode = aes->mode;
        op.u.aes.action = aes->action;
 
-       /* All supported key sizes fit in a single (32-byte) KSB entry
+       /* All supported key sizes fit in a single (32-byte) SB entry
         * and must be in little endian format. Use the 256-bit byte
         * swap passthru option to convert from big endian to little
         * endian.
         */
        ret = ccp_init_dm_workarea(&key, cmd_q,
-                                  CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
+                                  CCP_AES_KEY_SB_COUNT * CCP_SB_BYTES,
                                   DMA_TO_DEVICE);
        if (ret)
                return ret;
 
-       dm_offset = CCP_KSB_BYTES - aes->key_len;
+       dm_offset = CCP_SB_BYTES - aes->key_len;
        ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
-       ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
-                             CCP_PASSTHRU_BYTESWAP_256BIT);
+       ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
+                            CCP_PASSTHRU_BYTESWAP_256BIT);
        if (ret) {
                cmd->engine_error = cmd_q->cmd_error;
                goto e_key;
        }
 
-       /* The AES context fits in a single (32-byte) KSB entry and
+       /* The AES context fits in a single (32-byte) SB entry and
         * must be in little endian format. Use the 256-bit byte swap
         * passthru option to convert from big endian to little endian.
         */
        ret = ccp_init_dm_workarea(&ctx, cmd_q,
-                                  CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
+                                  CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
                                   DMA_BIDIRECTIONAL);
        if (ret)
                goto e_key;
 
        if (aes->mode != CCP_AES_MODE_ECB) {
-               /* Load the AES context - conver to LE */
-               dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
+               /* Load the AES context - convert to LE */
+               dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
                ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
-               ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
-                                     CCP_PASSTHRU_BYTESWAP_256BIT);
+               ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+                                    CCP_PASSTHRU_BYTESWAP_256BIT);
                if (ret) {
                        cmd->engine_error = cmd_q->cmd_error;
                        goto e_ctx;
@@ -772,7 +729,7 @@ static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
                                op.soc = 1;
                }
 
-               ret = cmd_q->ccp->vdata->perform->perform_aes(&op);
+               ret = cmd_q->ccp->vdata->perform->aes(&op);
                if (ret) {
                        cmd->engine_error = cmd_q->cmd_error;
                        goto e_dst;
@@ -785,15 +742,15 @@ static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
                /* Retrieve the AES context - convert from LE to BE using
                 * 32-byte (256-bit) byteswapping
                 */
-               ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
-                                       CCP_PASSTHRU_BYTESWAP_256BIT);
+               ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+                                      CCP_PASSTHRU_BYTESWAP_256BIT);
                if (ret) {
                        cmd->engine_error = cmd_q->cmd_error;
                        goto e_dst;
                }
 
                /* ...but we only need AES_BLOCK_SIZE bytes */
-               dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
+               dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
                ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
        }
 
@@ -857,53 +814,53 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
        if (!xts->key || !xts->iv || !xts->src || !xts->dst)
                return -EINVAL;
 
-       BUILD_BUG_ON(CCP_XTS_AES_KEY_KSB_COUNT != 1);
-       BUILD_BUG_ON(CCP_XTS_AES_CTX_KSB_COUNT != 1);
+       BUILD_BUG_ON(CCP_XTS_AES_KEY_SB_COUNT != 1);
+       BUILD_BUG_ON(CCP_XTS_AES_CTX_SB_COUNT != 1);
 
        ret = -EIO;
        memset(&op, 0, sizeof(op));
        op.cmd_q = cmd_q;
-       op.jobid = ccp_gen_jobid(cmd_q->ccp);
-       op.ksb_key = cmd_q->ksb_key;
-       op.ksb_ctx = cmd_q->ksb_ctx;
+       op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
+       op.sb_key = cmd_q->sb_key;
+       op.sb_ctx = cmd_q->sb_ctx;
        op.init = 1;
        op.u.xts.action = xts->action;
        op.u.xts.unit_size = xts->unit_size;
 
-       /* All supported key sizes fit in a single (32-byte) KSB entry
+       /* All supported key sizes fit in a single (32-byte) SB entry
         * and must be in little endian format. Use the 256-bit byte
         * swap passthru option to convert from big endian to little
         * endian.
         */
        ret = ccp_init_dm_workarea(&key, cmd_q,
-                                  CCP_XTS_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
+                                  CCP_XTS_AES_KEY_SB_COUNT * CCP_SB_BYTES,
                                   DMA_TO_DEVICE);
        if (ret)
                return ret;
 
-       dm_offset = CCP_KSB_BYTES - AES_KEYSIZE_128;
+       dm_offset = CCP_SB_BYTES - AES_KEYSIZE_128;
        ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
        ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len);
-       ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
-                             CCP_PASSTHRU_BYTESWAP_256BIT);
+       ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
+                            CCP_PASSTHRU_BYTESWAP_256BIT);
        if (ret) {
                cmd->engine_error = cmd_q->cmd_error;
                goto e_key;
        }
 
-       /* The AES context fits in a single (32-byte) KSB entry and
+       /* The AES context fits in a single (32-byte) SB entry and
         * for XTS is already in little endian format so no byte swapping
         * is needed.
         */
        ret = ccp_init_dm_workarea(&ctx, cmd_q,
-                                  CCP_XTS_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
+                                  CCP_XTS_AES_CTX_SB_COUNT * CCP_SB_BYTES,
                                   DMA_BIDIRECTIONAL);
        if (ret)
                goto e_key;
 
        ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
-       ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
-                             CCP_PASSTHRU_BYTESWAP_NOOP);
+       ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+                            CCP_PASSTHRU_BYTESWAP_NOOP);
        if (ret) {
                cmd->engine_error = cmd_q->cmd_error;
                goto e_ctx;
@@ -937,7 +894,7 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
                if (!src.sg_wa.bytes_left)
                        op.eom = 1;
 
-               ret = cmd_q->ccp->vdata->perform->perform_xts_aes(&op);
+               ret = cmd_q->ccp->vdata->perform->xts_aes(&op);
                if (ret) {
                        cmd->engine_error = cmd_q->cmd_error;
                        goto e_dst;
@@ -949,15 +906,15 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
        /* Retrieve the AES context - convert from LE to BE using
         * 32-byte (256-bit) byteswapping
         */
-       ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
-                               CCP_PASSTHRU_BYTESWAP_256BIT);
+       ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+                              CCP_PASSTHRU_BYTESWAP_256BIT);
        if (ret) {
                cmd->engine_error = cmd_q->cmd_error;
                goto e_dst;
        }
 
        /* ...but we only need AES_BLOCK_SIZE bytes */
-       dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
+       dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
        ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len);
 
 e_dst:
@@ -982,163 +939,227 @@ static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
        struct ccp_dm_workarea ctx;
        struct ccp_data src;
        struct ccp_op op;
+       unsigned int ioffset, ooffset;
+       unsigned int digest_size;
+       int sb_count;
+       const void *init;
+       u64 block_size;
+       int ctx_size;
        int ret;
 
-       if (sha->ctx_len != CCP_SHA_CTXSIZE)
+       switch (sha->type) {
+       case CCP_SHA_TYPE_1:
+               if (sha->ctx_len < SHA1_DIGEST_SIZE)
+                       return -EINVAL;
+               block_size = SHA1_BLOCK_SIZE;
+               break;
+       case CCP_SHA_TYPE_224:
+               if (sha->ctx_len < SHA224_DIGEST_SIZE)
+                       return -EINVAL;
+               block_size = SHA224_BLOCK_SIZE;
+               break;
+       case CCP_SHA_TYPE_256:
+               if (sha->ctx_len < SHA256_DIGEST_SIZE)
+                       return -EINVAL;
+               block_size = SHA256_BLOCK_SIZE;
+               break;
+       default:
                return -EINVAL;
+       }
 
        if (!sha->ctx)
                return -EINVAL;
 
-       if (!sha->final && (sha->src_len & (CCP_SHA_BLOCKSIZE - 1)))
+       if (!sha->final && (sha->src_len & (block_size - 1)))
                return -EINVAL;
 
-       if (!sha->src_len) {
-               const u8 *sha_zero;
+       /* The version 3 device can't handle zero-length input */
+       if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) {
 
-               /* Not final, just return */
-               if (!sha->final)
-                       return 0;
+               if (!sha->src_len) {
+                       unsigned int digest_len;
+                       const u8 *sha_zero;
 
-               /* CCP can't do a zero length sha operation so the caller
-                * must buffer the data.
-                */
-               if (sha->msg_bits)
-                       return -EINVAL;
+                       /* Not final, just return */
+                       if (!sha->final)
+                               return 0;
 
-               /* The CCP cannot perform zero-length sha operations so the
-                * caller is required to buffer data for the final operation.
-                * However, a sha operation for a message with a total length
-                * of zero is valid so known values are required to supply
-                * the result.
-                */
-               switch (sha->type) {
-               case CCP_SHA_TYPE_1:
-                       sha_zero = sha1_zero_message_hash;
-                       break;
-               case CCP_SHA_TYPE_224:
-                       sha_zero = sha224_zero_message_hash;
-                       break;
-               case CCP_SHA_TYPE_256:
-                       sha_zero = sha256_zero_message_hash;
-                       break;
-               default:
-                       return -EINVAL;
-               }
+                       /* CCP can't do a zero length sha operation so the
+                        * caller must buffer the data.
+                        */
+                       if (sha->msg_bits)
+                               return -EINVAL;
+
+                       /* The CCP cannot perform zero-length sha operations
+                        * so the caller is required to buffer data for the
+                        * final operation. However, a sha operation for a
+                        * message with a total length of zero is valid so
+                        * known values are required to supply the result.
+                        */
+                       switch (sha->type) {
+                       case CCP_SHA_TYPE_1:
+                               sha_zero = sha1_zero_message_hash;
+                               digest_len = SHA1_DIGEST_SIZE;
+                               break;
+                       case CCP_SHA_TYPE_224:
+                               sha_zero = sha224_zero_message_hash;
+                               digest_len = SHA224_DIGEST_SIZE;
+                               break;
+                       case CCP_SHA_TYPE_256:
+                               sha_zero = sha256_zero_message_hash;
+                               digest_len = SHA256_DIGEST_SIZE;
+                               break;
+                       default:
+                               return -EINVAL;
+                       }
 
-               scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
-                                        sha->ctx_len, 1);
+                       scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
+                                                digest_len, 1);
 
-               return 0;
+                       return 0;
+               }
        }
 
-       if (!sha->src)
-               return -EINVAL;
+       /* Set variables used throughout */
+       switch (sha->type) {
+       case CCP_SHA_TYPE_1:
+               digest_size = SHA1_DIGEST_SIZE;
+               init = (void *) ccp_sha1_init;
+               ctx_size = SHA1_DIGEST_SIZE;
+               sb_count = 1;
+               if (cmd_q->ccp->vdata->version != CCP_VERSION(3, 0))
+                       ooffset = ioffset = CCP_SB_BYTES - SHA1_DIGEST_SIZE;
+               else
+                       ooffset = ioffset = 0;
+               break;
+       case CCP_SHA_TYPE_224:
+               digest_size = SHA224_DIGEST_SIZE;
+               init = (void *) ccp_sha224_init;
+               ctx_size = SHA256_DIGEST_SIZE;
+               sb_count = 1;
+               ioffset = 0;
+               if (cmd_q->ccp->vdata->version != CCP_VERSION(3, 0))
+                       ooffset = CCP_SB_BYTES - SHA224_DIGEST_SIZE;
+               else
+                       ooffset = 0;
+               break;
+       case CCP_SHA_TYPE_256:
+               digest_size = SHA256_DIGEST_SIZE;
+               init = (void *) ccp_sha256_init;
+               ctx_size = SHA256_DIGEST_SIZE;
+               sb_count = 1;
+               ooffset = ioffset = 0;
+               break;
+       default:
+               ret = -EINVAL;
+               goto e_data;
+       }
 
-       BUILD_BUG_ON(CCP_SHA_KSB_COUNT != 1);
+       /* For zero-length plaintext the src pointer is ignored;
+        * otherwise both parts must be valid
+        */
+       if (sha->src_len && !sha->src)
+               return -EINVAL;
 
        memset(&op, 0, sizeof(op));
        op.cmd_q = cmd_q;
-       op.jobid = ccp_gen_jobid(cmd_q->ccp);
-       op.ksb_ctx = cmd_q->ksb_ctx;
+       op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
+       op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */
        op.u.sha.type = sha->type;
        op.u.sha.msg_bits = sha->msg_bits;
 
-       /* The SHA context fits in a single (32-byte) KSB entry and
-        * must be in little endian format. Use the 256-bit byte swap
-        * passthru option to convert from big endian to little endian.
-        */
-       ret = ccp_init_dm_workarea(&ctx, cmd_q,
-                                  CCP_SHA_KSB_COUNT * CCP_KSB_BYTES,
+       ret = ccp_init_dm_workarea(&ctx, cmd_q, sb_count * CCP_SB_BYTES,
                                   DMA_BIDIRECTIONAL);
        if (ret)
                return ret;
-
        if (sha->first) {
-               const __be32 *init;
-
                switch (sha->type) {
                case CCP_SHA_TYPE_1:
-                       init = ccp_sha1_init;
-                       break;
                case CCP_SHA_TYPE_224:
-                       init = ccp_sha224_init;
-                       break;
                case CCP_SHA_TYPE_256:
-                       init = ccp_sha256_init;
+                       memcpy(ctx.address + ioffset, init, ctx_size);
                        break;
                default:
                        ret = -EINVAL;
                        goto e_ctx;
                }
-               memcpy(ctx.address, init, CCP_SHA_CTXSIZE);
        } else {
-               ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
+               /* Restore the context */
+               ccp_set_dm_area(&ctx, 0, sha->ctx, 0,
+                               sb_count * CCP_SB_BYTES);
        }
 
-       ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
-                             CCP_PASSTHRU_BYTESWAP_256BIT);
+       ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+                            CCP_PASSTHRU_BYTESWAP_256BIT);
        if (ret) {
                cmd->engine_error = cmd_q->cmd_error;
                goto e_ctx;
        }
 
-       /* Send data to the CCP SHA engine */
-       ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
-                           CCP_SHA_BLOCKSIZE, DMA_TO_DEVICE);
-       if (ret)
-               goto e_ctx;
+       if (sha->src) {
+               /* Send data to the CCP SHA engine; block_size is set above */
+               ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
+                                   block_size, DMA_TO_DEVICE);
+               if (ret)
+                       goto e_ctx;
 
-       while (src.sg_wa.bytes_left) {
-               ccp_prepare_data(&src, NULL, &op, CCP_SHA_BLOCKSIZE, false);
-               if (sha->final && !src.sg_wa.bytes_left)
-                       op.eom = 1;
+               while (src.sg_wa.bytes_left) {
+                       ccp_prepare_data(&src, NULL, &op, block_size, false);
+                       if (sha->final && !src.sg_wa.bytes_left)
+                               op.eom = 1;
+
+                       ret = cmd_q->ccp->vdata->perform->sha(&op);
+                       if (ret) {
+                               cmd->engine_error = cmd_q->cmd_error;
+                               goto e_data;
+                       }
 
-               ret = cmd_q->ccp->vdata->perform->perform_sha(&op);
+                       ccp_process_data(&src, NULL, &op);
+               }
+       } else {
+               op.eom = 1;
+               ret = cmd_q->ccp->vdata->perform->sha(&op);
                if (ret) {
                        cmd->engine_error = cmd_q->cmd_error;
                        goto e_data;
                }
-
-               ccp_process_data(&src, NULL, &op);
        }
 
        /* Retrieve the SHA context - convert from LE to BE using
         * 32-byte (256-bit) byteswapping to BE
         */
-       ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
-                               CCP_PASSTHRU_BYTESWAP_256BIT);
+       ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+                              CCP_PASSTHRU_BYTESWAP_256BIT);
        if (ret) {
                cmd->engine_error = cmd_q->cmd_error;
                goto e_data;
        }
 
-       ccp_get_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
-
-       if (sha->final && sha->opad) {
-               /* HMAC operation, recursively perform final SHA */
-               struct ccp_cmd hmac_cmd;
-               struct scatterlist sg;
-               u64 block_size, digest_size;
-               u8 *hmac_buf;
-
+       if (sha->final) {
+               /* Finishing up, so get the digest */
                switch (sha->type) {
                case CCP_SHA_TYPE_1:
-                       block_size = SHA1_BLOCK_SIZE;
-                       digest_size = SHA1_DIGEST_SIZE;
-                       break;
                case CCP_SHA_TYPE_224:
-                       block_size = SHA224_BLOCK_SIZE;
-                       digest_size = SHA224_DIGEST_SIZE;
-                       break;
                case CCP_SHA_TYPE_256:
-                       block_size = SHA256_BLOCK_SIZE;
-                       digest_size = SHA256_DIGEST_SIZE;
+                       ccp_get_dm_area(&ctx, ooffset,
+                                       sha->ctx, 0,
+                                       digest_size);
                        break;
                default:
                        ret = -EINVAL;
-                       goto e_data;
+                       goto e_ctx;
                }
+       } else {
+               /* Stash the context */
+               ccp_get_dm_area(&ctx, 0, sha->ctx, 0,
+                               sb_count * CCP_SB_BYTES);
+       }
+
+       if (sha->final && sha->opad) {
+               /* HMAC operation, recursively perform final SHA */
+               struct ccp_cmd hmac_cmd;
+               struct scatterlist sg;
+               u8 *hmac_buf;
 
                if (sha->opad_len != block_size) {
                        ret = -EINVAL;
@@ -1153,7 +1174,18 @@ static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
                sg_init_one(&sg, hmac_buf, block_size + digest_size);
 
                scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0);
-               memcpy(hmac_buf + block_size, ctx.address, digest_size);
+               switch (sha->type) {
+               case CCP_SHA_TYPE_1:
+               case CCP_SHA_TYPE_224:
+               case CCP_SHA_TYPE_256:
+                       memcpy(hmac_buf + block_size,
+                              ctx.address + ooffset,
+                              digest_size);
+                       break;
+               default:
+                       ret = -EINVAL;
+                       goto e_ctx;
+               }
 
                memset(&hmac_cmd, 0, sizeof(hmac_cmd));
                hmac_cmd.engine = CCP_ENGINE_SHA;
@@ -1176,7 +1208,8 @@ static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
        }
 
 e_data:
-       ccp_free_data(&src, cmd_q);
+       if (sha->src)
+               ccp_free_data(&src, cmd_q);
 
 e_ctx:
        ccp_dm_free(&ctx);
@@ -1190,7 +1223,7 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
        struct ccp_dm_workarea exp, src;
        struct ccp_data dst;
        struct ccp_op op;
-       unsigned int ksb_count, i_len, o_len;
+       unsigned int sb_count, i_len, o_len;
        int ret;
 
        if (rsa->key_size > CCP_RSA_MAX_WIDTH)
@@ -1208,16 +1241,17 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
        o_len = ((rsa->key_size + 255) / 256) * 32;
        i_len = o_len * 2;
 
-       ksb_count = o_len / CCP_KSB_BYTES;
+       sb_count = o_len / CCP_SB_BYTES;
 
        memset(&op, 0, sizeof(op));
        op.cmd_q = cmd_q;
        op.jobid = ccp_gen_jobid(cmd_q->ccp);
-       op.ksb_key = ccp_alloc_ksb(cmd_q->ccp, ksb_count);
-       if (!op.ksb_key)
+       op.sb_key = cmd_q->ccp->vdata->perform->sballoc(cmd_q, sb_count);
+
+       if (!op.sb_key)
                return -EIO;
 
-       /* The RSA exponent may span multiple (32-byte) KSB entries and must
+       /* The RSA exponent may span multiple (32-byte) SB entries and must
         * be in little endian format. Reverse copy each 32-byte chunk
         * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk)
         * and each byte within that chunk and do not perform any byte swap
@@ -1225,14 +1259,14 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
         */
        ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
        if (ret)
-               goto e_ksb;
+               goto e_sb;
 
        ret = ccp_reverse_set_dm_area(&exp, rsa->exp, rsa->exp_len,
-                                     CCP_KSB_BYTES, false);
+                                     CCP_SB_BYTES, false);
        if (ret)
                goto e_exp;
-       ret = ccp_copy_to_ksb(cmd_q, &exp, op.jobid, op.ksb_key,
-                             CCP_PASSTHRU_BYTESWAP_NOOP);
+       ret = ccp_copy_to_sb(cmd_q, &exp, op.jobid, op.sb_key,
+                            CCP_PASSTHRU_BYTESWAP_NOOP);
        if (ret) {
                cmd->engine_error = cmd_q->cmd_error;
                goto e_exp;
@@ -1247,12 +1281,12 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
                goto e_exp;
 
        ret = ccp_reverse_set_dm_area(&src, rsa->mod, rsa->mod_len,
-                                     CCP_KSB_BYTES, false);
+                                     CCP_SB_BYTES, false);
        if (ret)
                goto e_src;
        src.address += o_len;   /* Adjust the address for the copy operation */
        ret = ccp_reverse_set_dm_area(&src, rsa->src, rsa->src_len,
-                                     CCP_KSB_BYTES, false);
+                                     CCP_SB_BYTES, false);
        if (ret)
                goto e_src;
        src.address -= o_len;   /* Reset the address to original value */
@@ -1274,7 +1308,7 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
        op.u.rsa.mod_size = rsa->key_size;
        op.u.rsa.input_len = i_len;
 
-       ret = cmd_q->ccp->vdata->perform->perform_rsa(&op);
+       ret = cmd_q->ccp->vdata->perform->rsa(&op);
        if (ret) {
                cmd->engine_error = cmd_q->cmd_error;
                goto e_dst;
@@ -1291,8 +1325,8 @@ e_src:
 e_exp:
        ccp_dm_free(&exp);
 
-e_ksb:
-       ccp_free_ksb(cmd_q->ccp, op.ksb_key, ksb_count);
+e_sb:
+       cmd_q->ccp->vdata->perform->sbfree(cmd_q, op.sb_key, sb_count);
 
        return ret;
 }
@@ -1306,7 +1340,7 @@ static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
        struct ccp_op op;
        bool in_place = false;
        unsigned int i;
-       int ret;
+       int ret = 0;
 
        if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
                return -EINVAL;
@@ -1321,26 +1355,26 @@ static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
                        return -EINVAL;
        }
 
-       BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT != 1);
+       BUILD_BUG_ON(CCP_PASSTHRU_SB_COUNT != 1);
 
        memset(&op, 0, sizeof(op));
        op.cmd_q = cmd_q;
-       op.jobid = ccp_gen_jobid(cmd_q->ccp);
+       op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
 
        if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
                /* Load the mask */
-               op.ksb_key = cmd_q->ksb_key;
+               op.sb_key = cmd_q->sb_key;
 
                ret = ccp_init_dm_workarea(&mask, cmd_q,
-                                          CCP_PASSTHRU_KSB_COUNT *
-                                          CCP_KSB_BYTES,
+                                          CCP_PASSTHRU_SB_COUNT *
+                                          CCP_SB_BYTES,
                                           DMA_TO_DEVICE);
                if (ret)
                        return ret;
 
                ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
-               ret = ccp_copy_to_ksb(cmd_q, &mask, op.jobid, op.ksb_key,
-                                     CCP_PASSTHRU_BYTESWAP_NOOP);
+               ret = ccp_copy_to_sb(cmd_q, &mask, op.jobid, op.sb_key,
+                                    CCP_PASSTHRU_BYTESWAP_NOOP);
                if (ret) {
                        cmd->engine_error = cmd_q->cmd_error;
                        goto e_mask;
@@ -1399,7 +1433,7 @@ static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
                op.dst.u.dma.offset = dst.sg_wa.sg_used;
                op.dst.u.dma.length = op.src.u.dma.length;
 
-               ret = cmd_q->ccp->vdata->perform->perform_passthru(&op);
+               ret = cmd_q->ccp->vdata->perform->passthru(&op);
                if (ret) {
                        cmd->engine_error = cmd_q->cmd_error;
                        goto e_dst;
@@ -1448,7 +1482,7 @@ static int ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q,
                        return -EINVAL;
        }
 
-       BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT != 1);
+       BUILD_BUG_ON(CCP_PASSTHRU_SB_COUNT != 1);
 
        memset(&op, 0, sizeof(op));
        op.cmd_q = cmd_q;
@@ -1456,13 +1490,13 @@ static int ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q,
 
        if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
                /* Load the mask */
-               op.ksb_key = cmd_q->ksb_key;
+               op.sb_key = cmd_q->sb_key;
 
                mask.length = pt->mask_len;
                mask.dma.address = pt->mask;
                mask.dma.length = pt->mask_len;
 
-               ret = ccp_copy_to_ksb(cmd_q, &mask, op.jobid, op.ksb_key,
+               ret = ccp_copy_to_sb(cmd_q, &mask, op.jobid, op.sb_key,
                                     CCP_PASSTHRU_BYTESWAP_NOOP);
                if (ret) {
                        cmd->engine_error = cmd_q->cmd_error;
@@ -1484,7 +1518,7 @@ static int ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q,
        op.dst.u.dma.offset = 0;
        op.dst.u.dma.length = pt->src_len;
 
-       ret = cmd_q->ccp->vdata->perform->perform_passthru(&op);
+       ret = cmd_q->ccp->vdata->perform->passthru(&op);
        if (ret)
                cmd->engine_error = cmd_q->cmd_error;
 
@@ -1514,7 +1548,7 @@ static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 
        memset(&op, 0, sizeof(op));
        op.cmd_q = cmd_q;
-       op.jobid = ccp_gen_jobid(cmd_q->ccp);
+       op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
 
        /* Concatenate the modulus and the operands. Both the modulus and
         * the operands must be in little endian format.  Since the input
@@ -1575,7 +1609,7 @@ static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 
        op.u.ecc.function = cmd->u.ecc.function;
 
-       ret = cmd_q->ccp->vdata->perform->perform_ecc(&op);
+       ret = cmd_q->ccp->vdata->perform->ecc(&op);
        if (ret) {
                cmd->engine_error = cmd_q->cmd_error;
                goto e_dst;
@@ -1639,7 +1673,7 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 
        memset(&op, 0, sizeof(op));
        op.cmd_q = cmd_q;
-       op.jobid = ccp_gen_jobid(cmd_q->ccp);
+       op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
 
        /* Concatenate the modulus and the operands. Both the modulus and
         * the operands must be in little endian format.  Since the input
@@ -1677,7 +1711,7 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
                goto e_src;
        src.address += CCP_ECC_OPERAND_SIZE;
 
-       /* Set the first point Z coordianate to 1 */
+       /* Set the first point Z coordinate to 1 */
        *src.address = 0x01;
        src.address += CCP_ECC_OPERAND_SIZE;
 
@@ -1696,7 +1730,7 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
                        goto e_src;
                src.address += CCP_ECC_OPERAND_SIZE;
 
-               /* Set the second point Z coordianate to 1 */
+               /* Set the second point Z coordinate to 1 */
                *src.address = 0x01;
                src.address += CCP_ECC_OPERAND_SIZE;
        } else {
@@ -1739,7 +1773,7 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 
        op.u.ecc.function = cmd->u.ecc.function;
 
-       ret = cmd_q->ccp->vdata->perform->perform_ecc(&op);
+       ret = cmd_q->ccp->vdata->perform->ecc(&op);
        if (ret) {
                cmd->engine_error = cmd_q->cmd_error;
                goto e_dst;
@@ -1810,7 +1844,7 @@ int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
        cmd->engine_error = 0;
        cmd_q->cmd_error = 0;
        cmd_q->int_rcvd = 0;
-       cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
+       cmd_q->free_slots = cmd_q->ccp->vdata->perform->get_free_slots(cmd_q);
 
        switch (cmd->engine) {
        case CCP_ENGINE_AES:
index 0bf262e..28a9996 100644 (file)
@@ -4,6 +4,7 @@
  * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -25,9 +26,6 @@
 
 #include "ccp-dev.h"
 
-#define IO_BAR                         2
-#define IO_OFFSET                      0x20000
-
 #define MSIX_VECTORS                   2
 
 struct ccp_msix {
@@ -143,10 +141,11 @@ static void ccp_free_irqs(struct ccp_device *ccp)
                        free_irq(ccp_pci->msix[ccp_pci->msix_count].vector,
                                 dev);
                pci_disable_msix(pdev);
-       } else {
+       } else if (ccp->irq) {
                free_irq(ccp->irq, dev);
                pci_disable_msi(pdev);
        }
+       ccp->irq = 0;
 }
 
 static int ccp_find_mmio_area(struct ccp_device *ccp)
@@ -156,10 +155,11 @@ static int ccp_find_mmio_area(struct ccp_device *ccp)
        resource_size_t io_len;
        unsigned long io_flags;
 
-       io_flags = pci_resource_flags(pdev, IO_BAR);
-       io_len = pci_resource_len(pdev, IO_BAR);
-       if ((io_flags & IORESOURCE_MEM) && (io_len >= (IO_OFFSET + 0x800)))
-               return IO_BAR;
+       io_flags = pci_resource_flags(pdev, ccp->vdata->bar);
+       io_len = pci_resource_len(pdev, ccp->vdata->bar);
+       if ((io_flags & IORESOURCE_MEM) &&
+           (io_len >= (ccp->vdata->offset + 0x800)))
+               return ccp->vdata->bar;
 
        return -EIO;
 }
@@ -216,7 +216,7 @@ static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
                dev_err(dev, "pci_iomap failed\n");
                goto e_device;
        }
-       ccp->io_regs = ccp->io_map + IO_OFFSET;
+       ccp->io_regs = ccp->io_map + ccp->vdata->offset;
 
        ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
        if (ret) {
@@ -230,6 +230,9 @@ static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
        dev_set_drvdata(dev, ccp);
 
+       if (ccp->vdata->setup)
+               ccp->vdata->setup(ccp);
+
        ret = ccp->vdata->perform->init(ccp);
        if (ret)
                goto e_iomap;
@@ -322,6 +325,8 @@ static int ccp_pci_resume(struct pci_dev *pdev)
 
 static const struct pci_device_id ccp_pci_table[] = {
        { PCI_VDEVICE(AMD, 0x1537), (kernel_ulong_t)&ccpv3 },
+       { PCI_VDEVICE(AMD, 0x1456), (kernel_ulong_t)&ccpv5a },
+       { PCI_VDEVICE(AMD, 0x1468), (kernel_ulong_t)&ccpv5b },
        /* Last entry must be zero */
        { 0, }
 };
index eee2c7e..e09d405 100644 (file)
@@ -636,20 +636,12 @@ struct hifn_request_context {
 
 static inline u32 hifn_read_0(struct hifn_device *dev, u32 reg)
 {
-       u32 ret;
-
-       ret = readl(dev->bar[0] + reg);
-
-       return ret;
+       return readl(dev->bar[0] + reg);
 }
 
 static inline u32 hifn_read_1(struct hifn_device *dev, u32 reg)
 {
-       u32 ret;
-
-       ret = readl(dev->bar[1] + reg);
-
-       return ret;
+       return readl(dev->bar[1] + reg);
 }
 
 static inline void hifn_write_0(struct hifn_device *dev, u32 reg, u32 val)
index 68e8aa9..a2e77b8 100644 (file)
@@ -71,6 +71,7 @@
 #define DRIVER_FLAGS_MD5               BIT(21)
 
 #define IMG_HASH_QUEUE_LENGTH          20
+#define IMG_HASH_DMA_BURST             4
 #define IMG_HASH_DMA_THRESHOLD         64
 
 #ifdef __LITTLE_ENDIAN
@@ -102,8 +103,10 @@ struct img_hash_request_ctx {
        unsigned long           op;
 
        size_t                  bufcnt;
-       u8 buffer[0] __aligned(sizeof(u32));
        struct ahash_request    fallback_req;
+
+       /* Zero length buffer must remain last member of struct */
+       u8 buffer[0] __aligned(sizeof(u32));
 };
 
 struct img_hash_ctx {
@@ -340,7 +343,7 @@ static int img_hash_dma_init(struct img_hash_dev *hdev)
        dma_conf.direction = DMA_MEM_TO_DEV;
        dma_conf.dst_addr = hdev->bus_addr;
        dma_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
-       dma_conf.dst_maxburst = 16;
+       dma_conf.dst_maxburst = IMG_HASH_DMA_BURST;
        dma_conf.device_fc = false;
 
        err = dmaengine_slave_config(hdev->dma_lch,  &dma_conf);
@@ -361,7 +364,7 @@ static void img_hash_dma_task(unsigned long d)
        size_t nbytes, bleft, wsend, len, tbc;
        struct scatterlist tsg;
 
-       if (!ctx->sg)
+       if (!hdev->req || !ctx->sg)
                return;
 
        addr = sg_virt(ctx->sg);
@@ -587,6 +590,32 @@ static int img_hash_finup(struct ahash_request *req)
        return crypto_ahash_finup(&rctx->fallback_req);
 }
 
+static int img_hash_import(struct ahash_request *req, const void *in)
+{
+       struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+       ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
+       rctx->fallback_req.base.flags = req->base.flags
+               & CRYPTO_TFM_REQ_MAY_SLEEP;
+
+       return crypto_ahash_import(&rctx->fallback_req, in);
+}
+
+static int img_hash_export(struct ahash_request *req, void *out)
+{
+       struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+       ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
+       rctx->fallback_req.base.flags = req->base.flags
+               & CRYPTO_TFM_REQ_MAY_SLEEP;
+
+       return crypto_ahash_export(&rctx->fallback_req, out);
+}
+
 static int img_hash_digest(struct ahash_request *req)
 {
        struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
@@ -643,10 +672,9 @@ static int img_hash_digest(struct ahash_request *req)
        return err;
 }
 
-static int img_hash_cra_init(struct crypto_tfm *tfm)
+static int img_hash_cra_init(struct crypto_tfm *tfm, const char *alg_name)
 {
        struct img_hash_ctx *ctx = crypto_tfm_ctx(tfm);
-       const char *alg_name = crypto_tfm_alg_name(tfm);
        int err = -ENOMEM;
 
        ctx->fallback = crypto_alloc_ahash(alg_name, 0,
@@ -658,6 +686,7 @@ static int img_hash_cra_init(struct crypto_tfm *tfm)
        }
        crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
                                 sizeof(struct img_hash_request_ctx) +
+                                crypto_ahash_reqsize(ctx->fallback) +
                                 IMG_HASH_DMA_THRESHOLD);
 
        return 0;
@@ -666,6 +695,26 @@ err:
        return err;
 }
 
+static int img_hash_cra_md5_init(struct crypto_tfm *tfm)
+{
+       return img_hash_cra_init(tfm, "md5-generic");
+}
+
+static int img_hash_cra_sha1_init(struct crypto_tfm *tfm)
+{
+       return img_hash_cra_init(tfm, "sha1-generic");
+}
+
+static int img_hash_cra_sha224_init(struct crypto_tfm *tfm)
+{
+       return img_hash_cra_init(tfm, "sha224-generic");
+}
+
+static int img_hash_cra_sha256_init(struct crypto_tfm *tfm)
+{
+       return img_hash_cra_init(tfm, "sha256-generic");
+}
+
 static void img_hash_cra_exit(struct crypto_tfm *tfm)
 {
        struct img_hash_ctx *tctx = crypto_tfm_ctx(tfm);
@@ -711,9 +760,12 @@ static struct ahash_alg img_algs[] = {
                .update = img_hash_update,
                .final = img_hash_final,
                .finup = img_hash_finup,
+               .export = img_hash_export,
+               .import = img_hash_import,
                .digest = img_hash_digest,
                .halg = {
                        .digestsize = MD5_DIGEST_SIZE,
+                       .statesize = sizeof(struct md5_state),
                        .base = {
                                .cra_name = "md5",
                                .cra_driver_name = "img-md5",
@@ -723,7 +775,7 @@ static struct ahash_alg img_algs[] = {
                                CRYPTO_ALG_NEED_FALLBACK,
                                .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
                                .cra_ctxsize = sizeof(struct img_hash_ctx),
-                               .cra_init = img_hash_cra_init,
+                               .cra_init = img_hash_cra_md5_init,
                                .cra_exit = img_hash_cra_exit,
                                .cra_module = THIS_MODULE,
                        }
@@ -734,9 +786,12 @@ static struct ahash_alg img_algs[] = {
                .update = img_hash_update,
                .final = img_hash_final,
                .finup = img_hash_finup,
+               .export = img_hash_export,
+               .import = img_hash_import,
                .digest = img_hash_digest,
                .halg = {
                        .digestsize = SHA1_DIGEST_SIZE,
+                       .statesize = sizeof(struct sha1_state),
                        .base = {
                                .cra_name = "sha1",
                                .cra_driver_name = "img-sha1",
@@ -746,7 +801,7 @@ static struct ahash_alg img_algs[] = {
                                CRYPTO_ALG_NEED_FALLBACK,
                                .cra_blocksize = SHA1_BLOCK_SIZE,
                                .cra_ctxsize = sizeof(struct img_hash_ctx),
-                               .cra_init = img_hash_cra_init,
+                               .cra_init = img_hash_cra_sha1_init,
                                .cra_exit = img_hash_cra_exit,
                                .cra_module = THIS_MODULE,
                        }
@@ -757,9 +812,12 @@ static struct ahash_alg img_algs[] = {
                .update = img_hash_update,
                .final = img_hash_final,
                .finup = img_hash_finup,
+               .export = img_hash_export,
+               .import = img_hash_import,
                .digest = img_hash_digest,
                .halg = {
                        .digestsize = SHA224_DIGEST_SIZE,
+                       .statesize = sizeof(struct sha256_state),
                        .base = {
                                .cra_name = "sha224",
                                .cra_driver_name = "img-sha224",
@@ -769,7 +827,7 @@ static struct ahash_alg img_algs[] = {
                                CRYPTO_ALG_NEED_FALLBACK,
                                .cra_blocksize = SHA224_BLOCK_SIZE,
                                .cra_ctxsize = sizeof(struct img_hash_ctx),
-                               .cra_init = img_hash_cra_init,
+                               .cra_init = img_hash_cra_sha224_init,
                                .cra_exit = img_hash_cra_exit,
                                .cra_module = THIS_MODULE,
                        }
@@ -780,9 +838,12 @@ static struct ahash_alg img_algs[] = {
                .update = img_hash_update,
                .final = img_hash_final,
                .finup = img_hash_finup,
+               .export = img_hash_export,
+               .import = img_hash_import,
                .digest = img_hash_digest,
                .halg = {
                        .digestsize = SHA256_DIGEST_SIZE,
+                       .statesize = sizeof(struct sha256_state),
                        .base = {
                                .cra_name = "sha256",
                                .cra_driver_name = "img-sha256",
@@ -792,7 +853,7 @@ static struct ahash_alg img_algs[] = {
                                CRYPTO_ALG_NEED_FALLBACK,
                                .cra_blocksize = SHA256_BLOCK_SIZE,
                                .cra_ctxsize = sizeof(struct img_hash_ctx),
-                               .cra_init = img_hash_cra_init,
+                               .cra_init = img_hash_cra_sha256_init,
                                .cra_exit = img_hash_cra_exit,
                                .cra_module = THIS_MODULE,
                        }
@@ -971,7 +1032,7 @@ static int img_hash_probe(struct platform_device *pdev)
        err = img_register_algs(hdev);
        if (err)
                goto err_algs;
-       dev_dbg(dev, "Img MD5/SHA1/SHA224/SHA256 Hardware accelerator initialized\n");
+       dev_info(dev, "Img MD5/SHA1/SHA224/SHA256 Hardware accelerator initialized\n");
 
        return 0;
 
@@ -1013,11 +1074,38 @@ static int img_hash_remove(struct platform_device *pdev)
        return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int img_hash_suspend(struct device *dev)
+{
+       struct img_hash_dev *hdev = dev_get_drvdata(dev);
+
+       clk_disable_unprepare(hdev->hash_clk);
+       clk_disable_unprepare(hdev->sys_clk);
+
+       return 0;
+}
+
+static int img_hash_resume(struct device *dev)
+{
+       struct img_hash_dev *hdev = dev_get_drvdata(dev);
+
+       clk_prepare_enable(hdev->hash_clk);
+       clk_prepare_enable(hdev->sys_clk);
+
+       return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+static const struct dev_pm_ops img_hash_pm_ops = {
+       SET_SYSTEM_SLEEP_PM_OPS(img_hash_suspend, img_hash_resume)
+};
+
 static struct platform_driver img_hash_driver = {
        .probe          = img_hash_probe,
        .remove         = img_hash_remove,
        .driver         = {
                .name   = "img-hash-accelerator",
+               .pm     = &img_hash_pm_ops,
                .of_match_table = of_match_ptr(img_hash_match),
        }
 };
index 2296934..7868765 100644 (file)
@@ -447,9 +447,8 @@ static int init_ixp_crypto(struct device *dev)
 
        if (!npe_running(npe_c)) {
                ret = npe_load_firmware(npe_c, npe_name(npe_c), dev);
-               if (ret) {
-                       return ret;
-               }
+               if (ret)
+                       goto npe_release;
                if (npe_recv_message(npe_c, msg, "STATUS_MSG"))
                        goto npe_error;
        } else {
@@ -473,7 +472,8 @@ static int init_ixp_crypto(struct device *dev)
        default:
                printk(KERN_ERR "Firmware of %s lacks crypto support\n",
                        npe_name(npe_c));
-               return -ENODEV;
+               ret = -ENODEV;
+               goto npe_release;
        }
        /* buffer_pool will also be used to sometimes store the hmac,
         * so assure it is large enough
@@ -512,6 +512,7 @@ npe_error:
 err:
        dma_pool_destroy(ctx_pool);
        dma_pool_destroy(buffer_pool);
+npe_release:
        npe_release(npe_c);
        return ret;
 }
index d64af86..37dadb2 100644 (file)
@@ -166,6 +166,7 @@ static irqreturn_t mv_cesa_int(int irq, void *priv)
                        if (!req)
                                break;
 
+                       ctx = crypto_tfm_ctx(req->tfm);
                        mv_cesa_complete_req(ctx, req, 0);
                }
        }
index 82e0f4e..9f28468 100644 (file)
@@ -374,7 +374,7 @@ static const struct mv_cesa_req_ops mv_cesa_ahash_req_ops = {
        .complete = mv_cesa_ahash_complete,
 };
 
-static int mv_cesa_ahash_init(struct ahash_request *req,
+static void mv_cesa_ahash_init(struct ahash_request *req,
                              struct mv_cesa_op_ctx *tmpl, bool algo_le)
 {
        struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
@@ -390,8 +390,6 @@ static int mv_cesa_ahash_init(struct ahash_request *req,
        creq->op_tmpl = *tmpl;
        creq->len = 0;
        creq->algo_le = algo_le;
-
-       return 0;
 }
 
 static inline int mv_cesa_ahash_cra_init(struct crypto_tfm *tfm)
@@ -405,15 +403,16 @@ static inline int mv_cesa_ahash_cra_init(struct crypto_tfm *tfm)
        return 0;
 }
 
-static int mv_cesa_ahash_cache_req(struct ahash_request *req, bool *cached)
+static bool mv_cesa_ahash_cache_req(struct ahash_request *req)
 {
        struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
+       bool cached = false;
 
-       if (creq->cache_ptr + req->nbytes < 64 && !creq->last_req) {
-               *cached = true;
+       if (creq->cache_ptr + req->nbytes < CESA_MAX_HASH_BLOCK_SIZE && !creq->last_req) {
+               cached = true;
 
                if (!req->nbytes)
-                       return 0;
+                       return cached;
 
                sg_pcopy_to_buffer(req->src, creq->src_nents,
                                   creq->cache + creq->cache_ptr,
@@ -422,7 +421,7 @@ static int mv_cesa_ahash_cache_req(struct ahash_request *req, bool *cached)
                creq->cache_ptr += req->nbytes;
        }
 
-       return 0;
+       return cached;
 }
 
 static struct mv_cesa_op_ctx *
@@ -455,7 +454,6 @@ mv_cesa_dma_add_frag(struct mv_cesa_tdma_chain *chain,
 
 static int
 mv_cesa_ahash_dma_add_cache(struct mv_cesa_tdma_chain *chain,
-                           struct mv_cesa_ahash_dma_iter *dma_iter,
                            struct mv_cesa_ahash_req *creq,
                            gfp_t flags)
 {
@@ -586,7 +584,7 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
         * Add the cache (left-over data from a previous block) first.
         * This will never overflow the SRAM size.
         */
-       ret = mv_cesa_ahash_dma_add_cache(&basereq->chain, &iter, creq, flags);
+       ret = mv_cesa_ahash_dma_add_cache(&basereq->chain, creq, flags);
        if (ret)
                goto err_free_tdma;
 
@@ -668,7 +666,6 @@ err:
 static int mv_cesa_ahash_req_init(struct ahash_request *req, bool *cached)
 {
        struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
-       int ret;
 
        creq->src_nents = sg_nents_for_len(req->src, req->nbytes);
        if (creq->src_nents < 0) {
@@ -676,17 +673,15 @@ static int mv_cesa_ahash_req_init(struct ahash_request *req, bool *cached)
                return creq->src_nents;
        }
 
-       ret = mv_cesa_ahash_cache_req(req, cached);
-       if (ret)
-               return ret;
+       *cached = mv_cesa_ahash_cache_req(req);
 
        if (*cached)
                return 0;
 
        if (cesa_dev->caps->has_tdma)
-               ret = mv_cesa_ahash_dma_req_init(req);
-
-       return ret;
+               return mv_cesa_ahash_dma_req_init(req);
+       else
+               return 0;
 }
 
 static int mv_cesa_ahash_queue_req(struct ahash_request *req)
@@ -805,13 +800,14 @@ static int mv_cesa_md5_init(struct ahash_request *req)
        struct mv_cesa_op_ctx tmpl = { };
 
        mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_MD5);
+
+       mv_cesa_ahash_init(req, &tmpl, true);
+
        creq->state[0] = MD5_H0;
        creq->state[1] = MD5_H1;
        creq->state[2] = MD5_H2;
        creq->state[3] = MD5_H3;
 
-       mv_cesa_ahash_init(req, &tmpl, true);
-
        return 0;
 }
 
@@ -873,14 +869,15 @@ static int mv_cesa_sha1_init(struct ahash_request *req)
        struct mv_cesa_op_ctx tmpl = { };
 
        mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_SHA1);
+
+       mv_cesa_ahash_init(req, &tmpl, false);
+
        creq->state[0] = SHA1_H0;
        creq->state[1] = SHA1_H1;
        creq->state[2] = SHA1_H2;
        creq->state[3] = SHA1_H3;
        creq->state[4] = SHA1_H4;
 
-       mv_cesa_ahash_init(req, &tmpl, false);
-
        return 0;
 }
 
@@ -942,6 +939,9 @@ static int mv_cesa_sha256_init(struct ahash_request *req)
        struct mv_cesa_op_ctx tmpl = { };
 
        mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_SHA256);
+
+       mv_cesa_ahash_init(req, &tmpl, false);
+
        creq->state[0] = SHA256_H0;
        creq->state[1] = SHA256_H1;
        creq->state[2] = SHA256_H2;
@@ -951,8 +951,6 @@ static int mv_cesa_sha256_init(struct ahash_request *req)
        creq->state[6] = SHA256_H6;
        creq->state[7] = SHA256_H7;
 
-       mv_cesa_ahash_init(req, &tmpl, false);
-
        return 0;
 }
 
index 86a065b..9fd7a5f 100644 (file)
@@ -261,6 +261,7 @@ struct mv_cesa_op_ctx *mv_cesa_dma_add_op(struct mv_cesa_tdma_chain *chain,
        tdma->op = op;
        tdma->byte_cnt = cpu_to_le32(size | BIT(31));
        tdma->src = cpu_to_le32(dma_handle);
+       tdma->dst = CESA_SA_CFG_SRAM_OFFSET;
        tdma->flags = CESA_TDMA_DST_IN_SRAM | CESA_TDMA_OP;
 
        return op;
index e6b658f..104e9ce 100644 (file)
@@ -1091,11 +1091,8 @@ static int mv_probe(struct platform_device *pdev)
 
        cp->max_req_size = cp->sram_size - SRAM_CFG_SPACE;
 
-       if (pdev->dev.of_node)
-               irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
-       else
-               irq = platform_get_irq(pdev, 0);
-       if (irq < 0 || irq == NO_IRQ) {
+       irq = platform_get_irq(pdev, 0);
+       if (irq < 0) {
                ret = irq;
                goto err;
        }
index ff383ef..ee4be1b 100644 (file)
@@ -668,7 +668,9 @@ static int mxc_scc_probe(struct platform_device *pdev)
                return PTR_ERR(scc->clk);
        }
 
-       clk_prepare_enable(scc->clk);
+       ret = clk_prepare_enable(scc->clk);
+       if (ret)
+               return ret;
 
        /* clear error status register */
        writel(0x0, scc->base + SCC_SCM_ERROR_STATUS);
index 4ab53a6..fe32dd9 100644 (file)
@@ -35,7 +35,8 @@
 #include <linux/interrupt.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/aes.h>
-#include <crypto/algapi.h>
+#include <crypto/engine.h>
+#include <crypto/internal/skcipher.h>
 
 #define DST_MAXBURST                   4
 #define DMA_MIN                                (DST_MAXBURST * sizeof(u32))
@@ -85,6 +86,8 @@
 #define AES_REG_IRQ_DATA_OUT           BIT(2)
 #define DEFAULT_TIMEOUT                (5*HZ)
 
+#define DEFAULT_AUTOSUSPEND_DELAY      1000
+
 #define FLAGS_MODE_MASK                0x000f
 #define FLAGS_ENCRYPT          BIT(0)
 #define FLAGS_CBC              BIT(1)
@@ -103,6 +106,7 @@ struct omap_aes_ctx {
        int             keylen;
        u32             key[AES_KEYSIZE_256 / sizeof(u32)];
        unsigned long   flags;
+       struct crypto_skcipher  *fallback;
 };
 
 struct omap_aes_reqctx {
@@ -238,11 +242,19 @@ static void omap_aes_write_n(struct omap_aes_dev *dd, u32 offset,
 
 static int omap_aes_hw_init(struct omap_aes_dev *dd)
 {
+       int err;
+
        if (!(dd->flags & FLAGS_INIT)) {
                dd->flags |= FLAGS_INIT;
                dd->err = 0;
        }
 
+       err = pm_runtime_get_sync(dd->dev);
+       if (err < 0) {
+               dev_err(dd->dev, "failed to get sync: %d\n", err);
+               return err;
+       }
+
        return 0;
 }
 
@@ -319,20 +331,12 @@ static void omap_aes_dma_stop(struct omap_aes_dev *dd)
 
 static struct omap_aes_dev *omap_aes_find_dev(struct omap_aes_ctx *ctx)
 {
-       struct omap_aes_dev *dd = NULL, *tmp;
+       struct omap_aes_dev *dd;
 
        spin_lock_bh(&list_lock);
-       if (!ctx->dd) {
-               list_for_each_entry(tmp, &dev_list, list) {
-                       /* FIXME: take fist available aes core */
-                       dd = tmp;
-                       break;
-               }
-               ctx->dd = dd;
-       } else {
-               /* already found before */
-               dd = ctx->dd;
-       }
+       dd = list_first_entry(&dev_list, struct omap_aes_dev, list);
+       list_move_tail(&dd->list, &dev_list);
+       ctx->dd = dd;
        spin_unlock_bh(&list_lock);
 
        return dd;
@@ -519,7 +523,10 @@ static void omap_aes_finish_req(struct omap_aes_dev *dd, int err)
 
        pr_debug("err: %d\n", err);
 
-       crypto_finalize_request(dd->engine, req, err);
+       crypto_finalize_cipher_request(dd->engine, req, err);
+
+       pm_runtime_mark_last_busy(dd->dev);
+       pm_runtime_put_autosuspend(dd->dev);
 }
 
 static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd)
@@ -592,7 +599,7 @@ static int omap_aes_handle_queue(struct omap_aes_dev *dd,
                                 struct ablkcipher_request *req)
 {
        if (req)
-               return crypto_transfer_request_to_engine(dd->engine, req);
+               return crypto_transfer_cipher_request_to_engine(dd->engine, req);
 
        return 0;
 }
@@ -602,7 +609,7 @@ static int omap_aes_prepare_req(struct crypto_engine *engine,
 {
        struct omap_aes_ctx *ctx = crypto_ablkcipher_ctx(
                        crypto_ablkcipher_reqtfm(req));
-       struct omap_aes_dev *dd = omap_aes_find_dev(ctx);
+       struct omap_aes_dev *dd = ctx->dd;
        struct omap_aes_reqctx *rctx;
 
        if (!dd)
@@ -648,7 +655,7 @@ static int omap_aes_crypt_req(struct crypto_engine *engine,
 {
        struct omap_aes_ctx *ctx = crypto_ablkcipher_ctx(
                        crypto_ablkcipher_reqtfm(req));
-       struct omap_aes_dev *dd = omap_aes_find_dev(ctx);
+       struct omap_aes_dev *dd = ctx->dd;
 
        if (!dd)
                return -ENODEV;
@@ -696,11 +703,29 @@ static int omap_aes_crypt(struct ablkcipher_request *req, unsigned long mode)
                        crypto_ablkcipher_reqtfm(req));
        struct omap_aes_reqctx *rctx = ablkcipher_request_ctx(req);
        struct omap_aes_dev *dd;
+       int ret;
 
        pr_debug("nbytes: %d, enc: %d, cbc: %d\n", req->nbytes,
                  !!(mode & FLAGS_ENCRYPT),
                  !!(mode & FLAGS_CBC));
 
+       if (req->nbytes < 200) {
+               SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
+
+               skcipher_request_set_tfm(subreq, ctx->fallback);
+               skcipher_request_set_callback(subreq, req->base.flags, NULL,
+                                             NULL);
+               skcipher_request_set_crypt(subreq, req->src, req->dst,
+                                          req->nbytes, req->info);
+
+               if (mode & FLAGS_ENCRYPT)
+                       ret = crypto_skcipher_encrypt(subreq);
+               else
+                       ret = crypto_skcipher_decrypt(subreq);
+
+               skcipher_request_zero(subreq);
+               return ret;
+       }
        dd = omap_aes_find_dev(ctx);
        if (!dd)
                return -ENODEV;
@@ -716,6 +741,7 @@ static int omap_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
                           unsigned int keylen)
 {
        struct omap_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+       int ret;
 
        if (keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_192 &&
                   keylen != AES_KEYSIZE_256)
@@ -726,6 +752,14 @@ static int omap_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
        memcpy(ctx->key, key, keylen);
        ctx->keylen = keylen;
 
+       crypto_skcipher_clear_flags(ctx->fallback, CRYPTO_TFM_REQ_MASK);
+       crypto_skcipher_set_flags(ctx->fallback, tfm->base.crt_flags &
+                                                CRYPTO_TFM_REQ_MASK);
+
+       ret = crypto_skcipher_setkey(ctx->fallback, key, keylen);
+       if (!ret)
+               return 0;
+
        return 0;
 }
 
@@ -761,22 +795,16 @@ static int omap_aes_ctr_decrypt(struct ablkcipher_request *req)
 
 static int omap_aes_cra_init(struct crypto_tfm *tfm)
 {
-       struct omap_aes_dev *dd = NULL;
-       int err;
+       const char *name = crypto_tfm_alg_name(tfm);
+       const u32 flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK;
+       struct omap_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+       struct crypto_skcipher *blk;
 
-       /* Find AES device, currently picks the first device */
-       spin_lock_bh(&list_lock);
-       list_for_each_entry(dd, &dev_list, list) {
-               break;
-       }
-       spin_unlock_bh(&list_lock);
+       blk = crypto_alloc_skcipher(name, 0, flags);
+       if (IS_ERR(blk))
+               return PTR_ERR(blk);
 
-       err = pm_runtime_get_sync(dd->dev);
-       if (err < 0) {
-               dev_err(dd->dev, "%s: failed to get_sync(%d)\n",
-                       __func__, err);
-               return err;
-       }
+       ctx->fallback = blk;
 
        tfm->crt_ablkcipher.reqsize = sizeof(struct omap_aes_reqctx);
 
@@ -785,16 +813,12 @@ static int omap_aes_cra_init(struct crypto_tfm *tfm)
 
 static void omap_aes_cra_exit(struct crypto_tfm *tfm)
 {
-       struct omap_aes_dev *dd = NULL;
+       struct omap_aes_ctx *ctx = crypto_tfm_ctx(tfm);
 
-       /* Find AES device, currently picks the first device */
-       spin_lock_bh(&list_lock);
-       list_for_each_entry(dd, &dev_list, list) {
-               break;
-       }
-       spin_unlock_bh(&list_lock);
+       if (ctx->fallback)
+               crypto_free_skcipher(ctx->fallback);
 
-       pm_runtime_put_sync(dd->dev);
+       ctx->fallback = NULL;
 }
 
 /* ********************** ALGS ************************************ */
@@ -806,7 +830,7 @@ static struct crypto_alg algs_ecb_cbc[] = {
        .cra_priority           = 300,
        .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER |
                                  CRYPTO_ALG_KERN_DRIVER_ONLY |
-                                 CRYPTO_ALG_ASYNC,
+                                 CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
        .cra_blocksize          = AES_BLOCK_SIZE,
        .cra_ctxsize            = sizeof(struct omap_aes_ctx),
        .cra_alignmask          = 0,
@@ -828,7 +852,7 @@ static struct crypto_alg algs_ecb_cbc[] = {
        .cra_priority           = 300,
        .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER |
                                  CRYPTO_ALG_KERN_DRIVER_ONLY |
-                                 CRYPTO_ALG_ASYNC,
+                                 CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
        .cra_blocksize          = AES_BLOCK_SIZE,
        .cra_ctxsize            = sizeof(struct omap_aes_ctx),
        .cra_alignmask          = 0,
@@ -854,7 +878,7 @@ static struct crypto_alg algs_ctr[] = {
        .cra_priority           = 300,
        .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER |
                                  CRYPTO_ALG_KERN_DRIVER_ONLY |
-                                 CRYPTO_ALG_ASYNC,
+                                 CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
        .cra_blocksize          = AES_BLOCK_SIZE,
        .cra_ctxsize            = sizeof(struct omap_aes_ctx),
        .cra_alignmask          = 0,
@@ -1140,6 +1164,9 @@ static int omap_aes_probe(struct platform_device *pdev)
        }
        dd->phys_base = res.start;
 
+       pm_runtime_use_autosuspend(dev);
+       pm_runtime_set_autosuspend_delay(dev, DEFAULT_AUTOSUSPEND_DELAY);
+
        pm_runtime_enable(dev);
        err = pm_runtime_get_sync(dev);
        if (err < 0) {
@@ -1186,6 +1213,19 @@ static int omap_aes_probe(struct platform_device *pdev)
        list_add_tail(&dd->list, &dev_list);
        spin_unlock(&list_lock);
 
+       /* Initialize crypto engine */
+       dd->engine = crypto_engine_alloc_init(dev, 1);
+       if (!dd->engine) {
+               err = -ENOMEM;
+               goto err_engine;
+       }
+
+       dd->engine->prepare_cipher_request = omap_aes_prepare_req;
+       dd->engine->cipher_one_request = omap_aes_crypt_req;
+       err = crypto_engine_start(dd->engine);
+       if (err)
+               goto err_engine;
+
        for (i = 0; i < dd->pdata->algs_info_size; i++) {
                if (!dd->pdata->algs_info[i].registered) {
                        for (j = 0; j < dd->pdata->algs_info[i].size; j++) {
@@ -1203,26 +1243,17 @@ static int omap_aes_probe(struct platform_device *pdev)
                }
        }
 
-       /* Initialize crypto engine */
-       dd->engine = crypto_engine_alloc_init(dev, 1);
-       if (!dd->engine)
-               goto err_algs;
-
-       dd->engine->prepare_request = omap_aes_prepare_req;
-       dd->engine->crypt_one_request = omap_aes_crypt_req;
-       err = crypto_engine_start(dd->engine);
-       if (err)
-               goto err_engine;
-
        return 0;
-err_engine:
-       crypto_engine_exit(dd->engine);
 err_algs:
        for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
                for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--)
                        crypto_unregister_alg(
                                        &dd->pdata->algs_info[i].algs_list[j]);
 
+err_engine:
+       if (dd->engine)
+               crypto_engine_exit(dd->engine);
+
        omap_aes_dma_cleanup(dd);
 err_irq:
        tasklet_kill(&dd->done_task);
index 5691434..a6f6553 100644 (file)
@@ -39,6 +39,7 @@
 #include <crypto/scatterwalk.h>
 #include <crypto/des.h>
 #include <crypto/algapi.h>
+#include <crypto/engine.h>
 
 #define DST_MAXBURST                   2
 
@@ -506,7 +507,7 @@ static void omap_des_finish_req(struct omap_des_dev *dd, int err)
        pr_debug("err: %d\n", err);
 
        pm_runtime_put(dd->dev);
-       crypto_finalize_request(dd->engine, req, err);
+       crypto_finalize_cipher_request(dd->engine, req, err);
 }
 
 static int omap_des_crypt_dma_stop(struct omap_des_dev *dd)
@@ -574,7 +575,7 @@ static int omap_des_handle_queue(struct omap_des_dev *dd,
                                 struct ablkcipher_request *req)
 {
        if (req)
-               return crypto_transfer_request_to_engine(dd->engine, req);
+               return crypto_transfer_cipher_request_to_engine(dd->engine, req);
 
        return 0;
 }
@@ -1078,6 +1079,19 @@ static int omap_des_probe(struct platform_device *pdev)
        list_add_tail(&dd->list, &dev_list);
        spin_unlock(&list_lock);
 
+       /* Initialize des crypto engine */
+       dd->engine = crypto_engine_alloc_init(dev, 1);
+       if (!dd->engine) {
+               err = -ENOMEM;
+               goto err_engine;
+       }
+
+       dd->engine->prepare_cipher_request = omap_des_prepare_req;
+       dd->engine->cipher_one_request = omap_des_crypt_req;
+       err = crypto_engine_start(dd->engine);
+       if (err)
+               goto err_engine;
+
        for (i = 0; i < dd->pdata->algs_info_size; i++) {
                for (j = 0; j < dd->pdata->algs_info[i].size; j++) {
                        algp = &dd->pdata->algs_info[i].algs_list[j];
@@ -1093,27 +1107,18 @@ static int omap_des_probe(struct platform_device *pdev)
                }
        }
 
-       /* Initialize des crypto engine */
-       dd->engine = crypto_engine_alloc_init(dev, 1);
-       if (!dd->engine)
-               goto err_algs;
-
-       dd->engine->prepare_request = omap_des_prepare_req;
-       dd->engine->crypt_one_request = omap_des_crypt_req;
-       err = crypto_engine_start(dd->engine);
-       if (err)
-               goto err_engine;
-
        return 0;
 
-err_engine:
-       crypto_engine_exit(dd->engine);
 err_algs:
        for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
                for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--)
                        crypto_unregister_alg(
                                        &dd->pdata->algs_info[i].algs_list[j]);
 
+err_engine:
+       if (dd->engine)
+               crypto_engine_exit(dd->engine);
+
        omap_des_dma_cleanup(dd);
 err_irq:
        tasklet_kill(&dd->done_task);
index 7fe4eef..d0b16e5 100644 (file)
 #define FLAGS_DMA_READY                6
 #define FLAGS_AUTO_XOR         7
 #define FLAGS_BE32_SHA1                8
+#define FLAGS_SGS_COPIED       9
+#define FLAGS_SGS_ALLOCED      10
 /* context flags */
 #define FLAGS_FINUP            16
-#define FLAGS_SG               17
 
 #define FLAGS_MODE_SHIFT       18
 #define FLAGS_MODE_MASK                (SHA_REG_MODE_ALGO_MASK << FLAGS_MODE_SHIFT)
 #define OMAP_ALIGN_MASK                (sizeof(u32)-1)
 #define OMAP_ALIGNED           __attribute__((aligned(sizeof(u32))))
 
-#define BUFLEN                 PAGE_SIZE
+#define BUFLEN                 SHA512_BLOCK_SIZE
+#define OMAP_SHA_DMA_THRESHOLD 256
 
 struct omap_sham_dev;
 
@@ -147,12 +149,12 @@ struct omap_sham_reqctx {
        size_t                  digcnt;
        size_t                  bufcnt;
        size_t                  buflen;
-       dma_addr_t              dma_addr;
 
        /* walk state */
        struct scatterlist      *sg;
-       struct scatterlist      sgl;
-       unsigned int            offset; /* offset in current sg */
+       struct scatterlist      sgl[2];
+       int                     offset; /* offset in current sg */
+       int                     sg_len;
        unsigned int            total;  /* total request */
 
        u8                      buffer[0] OMAP_ALIGNED;
@@ -223,6 +225,7 @@ struct omap_sham_dev {
        struct dma_chan         *dma_lch;
        struct tasklet_struct   done_task;
        u8                      polling_mode;
+       u8                      xmit_buf[BUFLEN];
 
        unsigned long           flags;
        struct crypto_queue     queue;
@@ -510,12 +513,14 @@ static int omap_sham_poll_irq_omap4(struct omap_sham_dev *dd)
                              SHA_REG_IRQSTATUS_INPUT_RDY);
 }
 
-static int omap_sham_xmit_cpu(struct omap_sham_dev *dd, const u8 *buf,
-                             size_t length, int final)
+static int omap_sham_xmit_cpu(struct omap_sham_dev *dd, size_t length,
+                             int final)
 {
        struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
        int count, len32, bs32, offset = 0;
-       const u32 *buffer = (const u32 *)buf;
+       const u32 *buffer;
+       int mlen;
+       struct sg_mapping_iter mi;
 
        dev_dbg(dd->dev, "xmit_cpu: digcnt: %d, length: %d, final: %d\n",
                                                ctx->digcnt, length, final);
@@ -525,6 +530,7 @@ static int omap_sham_xmit_cpu(struct omap_sham_dev *dd, const u8 *buf,
 
        /* should be non-zero before next lines to disable clocks later */
        ctx->digcnt += length;
+       ctx->total -= length;
 
        if (final)
                set_bit(FLAGS_FINAL, &dd->flags); /* catch last interrupt */
@@ -534,16 +540,35 @@ static int omap_sham_xmit_cpu(struct omap_sham_dev *dd, const u8 *buf,
        len32 = DIV_ROUND_UP(length, sizeof(u32));
        bs32 = get_block_size(ctx) / sizeof(u32);
 
+       sg_miter_start(&mi, ctx->sg, ctx->sg_len,
+                      SG_MITER_FROM_SG | SG_MITER_ATOMIC);
+
+       mlen = 0;
+
        while (len32) {
                if (dd->pdata->poll_irq(dd))
                        return -ETIMEDOUT;
 
-               for (count = 0; count < min(len32, bs32); count++, offset++)
+               for (count = 0; count < min(len32, bs32); count++, offset++) {
+                       if (!mlen) {
+                               sg_miter_next(&mi);
+                               mlen = mi.length;
+                               if (!mlen) {
+                                       pr_err("sg miter failure.\n");
+                                       return -EINVAL;
+                               }
+                               offset = 0;
+                               buffer = mi.addr;
+                       }
                        omap_sham_write(dd, SHA_REG_DIN(dd, count),
                                        buffer[offset]);
+                       mlen -= 4;
+               }
                len32 -= min(len32, bs32);
        }
 
+       sg_miter_stop(&mi);
+
        return -EINPROGRESS;
 }
 
@@ -555,22 +580,27 @@ static void omap_sham_dma_callback(void *param)
        tasklet_schedule(&dd->done_task);
 }
 
-static int omap_sham_xmit_dma(struct omap_sham_dev *dd, dma_addr_t dma_addr,
-                             size_t length, int final, int is_sg)
+static int omap_sham_xmit_dma(struct omap_sham_dev *dd, size_t length,
+                             int final)
 {
        struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
        struct dma_async_tx_descriptor *tx;
        struct dma_slave_config cfg;
-       int len32, ret, dma_min = get_block_size(ctx);
+       int ret;
 
        dev_dbg(dd->dev, "xmit_dma: digcnt: %d, length: %d, final: %d\n",
                                                ctx->digcnt, length, final);
 
+       if (!dma_map_sg(dd->dev, ctx->sg, ctx->sg_len, DMA_TO_DEVICE)) {
+               dev_err(dd->dev, "dma_map_sg error\n");
+               return -EINVAL;
+       }
+
        memset(&cfg, 0, sizeof(cfg));
 
        cfg.dst_addr = dd->phys_base + SHA_REG_DIN(dd, 0);
        cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
-       cfg.dst_maxburst = dma_min / DMA_SLAVE_BUSWIDTH_4_BYTES;
+       cfg.dst_maxburst = get_block_size(ctx) / DMA_SLAVE_BUSWIDTH_4_BYTES;
 
        ret = dmaengine_slave_config(dd->dma_lch, &cfg);
        if (ret) {
@@ -578,30 +608,12 @@ static int omap_sham_xmit_dma(struct omap_sham_dev *dd, dma_addr_t dma_addr,
                return ret;
        }
 
-       len32 = DIV_ROUND_UP(length, dma_min) * dma_min;
-
-       if (is_sg) {
-               /*
-                * The SG entry passed in may not have the 'length' member
-                * set correctly so use a local SG entry (sgl) with the
-                * proper value for 'length' instead.  If this is not done,
-                * the dmaengine may try to DMA the incorrect amount of data.
-                */
-               sg_init_table(&ctx->sgl, 1);
-               sg_assign_page(&ctx->sgl, sg_page(ctx->sg));
-               ctx->sgl.offset = ctx->sg->offset;
-               sg_dma_len(&ctx->sgl) = len32;
-               sg_dma_address(&ctx->sgl) = sg_dma_address(ctx->sg);
-
-               tx = dmaengine_prep_slave_sg(dd->dma_lch, &ctx->sgl, 1,
-                       DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
-       } else {
-               tx = dmaengine_prep_slave_single(dd->dma_lch, dma_addr, len32,
-                       DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
-       }
+       tx = dmaengine_prep_slave_sg(dd->dma_lch, ctx->sg, ctx->sg_len,
+                                    DMA_MEM_TO_DEV,
+                                    DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 
        if (!tx) {
-               dev_err(dd->dev, "prep_slave_sg/single() failed\n");
+               dev_err(dd->dev, "prep_slave_sg failed\n");
                return -EINVAL;
        }
 
@@ -611,6 +623,7 @@ static int omap_sham_xmit_dma(struct omap_sham_dev *dd, dma_addr_t dma_addr,
        dd->pdata->write_ctrl(dd, length, final, 1);
 
        ctx->digcnt += length;
+       ctx->total -= length;
 
        if (final)
                set_bit(FLAGS_FINAL, &dd->flags); /* catch last interrupt */
@@ -625,189 +638,257 @@ static int omap_sham_xmit_dma(struct omap_sham_dev *dd, dma_addr_t dma_addr,
        return -EINPROGRESS;
 }
 
-static size_t omap_sham_append_buffer(struct omap_sham_reqctx *ctx,
-                               const u8 *data, size_t length)
+static int omap_sham_copy_sg_lists(struct omap_sham_reqctx *ctx,
+                                  struct scatterlist *sg, int bs, int new_len)
 {
-       size_t count = min(length, ctx->buflen - ctx->bufcnt);
+       int n = sg_nents(sg);
+       struct scatterlist *tmp;
+       int offset = ctx->offset;
 
-       count = min(count, ctx->total);
-       if (count <= 0)
-               return 0;
-       memcpy(ctx->buffer + ctx->bufcnt, data, count);
-       ctx->bufcnt += count;
+       if (ctx->bufcnt)
+               n++;
 
-       return count;
-}
+       ctx->sg = kmalloc_array(n, sizeof(*sg), GFP_KERNEL);
+       if (!ctx->sg)
+               return -ENOMEM;
 
-static size_t omap_sham_append_sg(struct omap_sham_reqctx *ctx)
-{
-       size_t count;
-       const u8 *vaddr;
+       sg_init_table(ctx->sg, n);
 
-       while (ctx->sg) {
-               vaddr = kmap_atomic(sg_page(ctx->sg));
-               vaddr += ctx->sg->offset;
+       tmp = ctx->sg;
 
-               count = omap_sham_append_buffer(ctx,
-                               vaddr + ctx->offset,
-                               ctx->sg->length - ctx->offset);
+       ctx->sg_len = 0;
 
-               kunmap_atomic((void *)vaddr);
+       if (ctx->bufcnt) {
+               sg_set_buf(tmp, ctx->dd->xmit_buf, ctx->bufcnt);
+               tmp = sg_next(tmp);
+               ctx->sg_len++;
+       }
 
-               if (!count)
-                       break;
-               ctx->offset += count;
-               ctx->total -= count;
-               if (ctx->offset == ctx->sg->length) {
-                       ctx->sg = sg_next(ctx->sg);
-                       if (ctx->sg)
-                               ctx->offset = 0;
-                       else
-                               ctx->total = 0;
+       while (sg && new_len) {
+               int len = sg->length - offset;
+
+               if (offset) {
+                       offset -= sg->length;
+                       if (offset < 0)
+                               offset = 0;
+               }
+
+               if (new_len < len)
+                       len = new_len;
+
+               if (len > 0) {
+                       new_len -= len;
+                       sg_set_page(tmp, sg_page(sg), len, sg->offset);
+                       if (new_len <= 0)
+                               sg_mark_end(tmp);
+                       tmp = sg_next(tmp);
+                       ctx->sg_len++;
                }
+
+               sg = sg_next(sg);
        }
 
+       set_bit(FLAGS_SGS_ALLOCED, &ctx->dd->flags);
+
+       ctx->bufcnt = 0;
+
        return 0;
 }
 
-static int omap_sham_xmit_dma_map(struct omap_sham_dev *dd,
-                                       struct omap_sham_reqctx *ctx,
-                                       size_t length, int final)
+static int omap_sham_copy_sgs(struct omap_sham_reqctx *ctx,
+                             struct scatterlist *sg, int bs, int new_len)
 {
-       int ret;
+       int pages;
+       void *buf;
+       int len;
 
-       ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer, ctx->buflen,
-                                      DMA_TO_DEVICE);
-       if (dma_mapping_error(dd->dev, ctx->dma_addr)) {
-               dev_err(dd->dev, "dma %u bytes error\n", ctx->buflen);
-               return -EINVAL;
+       len = new_len + ctx->bufcnt;
+
+       pages = get_order(ctx->total);
+
+       buf = (void *)__get_free_pages(GFP_ATOMIC, pages);
+       if (!buf) {
+               pr_err("Couldn't allocate pages for unaligned cases.\n");
+               return -ENOMEM;
        }
 
-       ctx->flags &= ~BIT(FLAGS_SG);
+       if (ctx->bufcnt)
+               memcpy(buf, ctx->dd->xmit_buf, ctx->bufcnt);
 
-       ret = omap_sham_xmit_dma(dd, ctx->dma_addr, length, final, 0);
-       if (ret != -EINPROGRESS)
-               dma_unmap_single(dd->dev, ctx->dma_addr, ctx->buflen,
-                                DMA_TO_DEVICE);
+       scatterwalk_map_and_copy(buf + ctx->bufcnt, sg, ctx->offset,
+                                ctx->total - ctx->bufcnt, 0);
+       sg_init_table(ctx->sgl, 1);
+       sg_set_buf(ctx->sgl, buf, len);
+       ctx->sg = ctx->sgl;
+       set_bit(FLAGS_SGS_COPIED, &ctx->dd->flags);
+       ctx->sg_len = 1;
+       ctx->bufcnt = 0;
+       ctx->offset = 0;
 
-       return ret;
+       return 0;
 }
 
-static int omap_sham_update_dma_slow(struct omap_sham_dev *dd)
+static int omap_sham_align_sgs(struct scatterlist *sg,
+                              int nbytes, int bs, bool final,
+                              struct omap_sham_reqctx *rctx)
 {
-       struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
-       unsigned int final;
-       size_t count;
+       int n = 0;
+       bool aligned = true;
+       bool list_ok = true;
+       struct scatterlist *sg_tmp = sg;
+       int new_len;
+       int offset = rctx->offset;
 
-       omap_sham_append_sg(ctx);
+       if (!sg || !sg->length || !nbytes)
+               return 0;
+
+       new_len = nbytes;
 
-       final = (ctx->flags & BIT(FLAGS_FINUP)) && !ctx->total;
+       if (offset)
+               list_ok = false;
 
-       dev_dbg(dd->dev, "slow: bufcnt: %u, digcnt: %d, final: %d\n",
-                                        ctx->bufcnt, ctx->digcnt, final);
+       if (final)
+               new_len = DIV_ROUND_UP(new_len, bs) * bs;
+       else
+               new_len = new_len / bs * bs;
 
-       if (final || (ctx->bufcnt == ctx->buflen && ctx->total)) {
-               count = ctx->bufcnt;
-               ctx->bufcnt = 0;
-               return omap_sham_xmit_dma_map(dd, ctx, count, final);
+       while (nbytes > 0 && sg_tmp) {
+               n++;
+
+               if (offset < sg_tmp->length) {
+                       if (!IS_ALIGNED(offset + sg_tmp->offset, 4)) {
+                               aligned = false;
+                               break;
+                       }
+
+                       if (!IS_ALIGNED(sg_tmp->length - offset, bs)) {
+                               aligned = false;
+                               break;
+                       }
+               }
+
+               if (offset) {
+                       offset -= sg_tmp->length;
+                       if (offset < 0) {
+                               nbytes += offset;
+                               offset = 0;
+                       }
+               } else {
+                       nbytes -= sg_tmp->length;
+               }
+
+               sg_tmp = sg_next(sg_tmp);
+
+               if (nbytes < 0) {
+                       list_ok = false;
+                       break;
+               }
        }
 
+       if (!aligned)
+               return omap_sham_copy_sgs(rctx, sg, bs, new_len);
+       else if (!list_ok)
+               return omap_sham_copy_sg_lists(rctx, sg, bs, new_len);
+
+       rctx->sg_len = n;
+       rctx->sg = sg;
+
        return 0;
 }
 
-/* Start address alignment */
-#define SG_AA(sg)      (IS_ALIGNED(sg->offset, sizeof(u32)))
-/* SHA1 block size alignment */
-#define SG_SA(sg, bs)  (IS_ALIGNED(sg->length, bs))
-
-static int omap_sham_update_dma_start(struct omap_sham_dev *dd)
+static int omap_sham_prepare_request(struct ahash_request *req, bool update)
 {
-       struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
-       unsigned int length, final, tail;
-       struct scatterlist *sg;
-       int ret, bs;
+       struct omap_sham_reqctx *rctx = ahash_request_ctx(req);
+       int bs;
+       int ret;
+       int nbytes;
+       bool final = rctx->flags & BIT(FLAGS_FINUP);
+       int xmit_len, hash_later;
 
-       if (!ctx->total)
+       if (!req)
                return 0;
 
-       if (ctx->bufcnt || ctx->offset)
-               return omap_sham_update_dma_slow(dd);
-
-       /*
-        * Don't use the sg interface when the transfer size is less
-        * than the number of elements in a DMA frame.  Otherwise,
-        * the dmaengine infrastructure will calculate that it needs
-        * to transfer 0 frames which ultimately fails.
-        */
-       if (ctx->total < get_block_size(ctx))
-               return omap_sham_update_dma_slow(dd);
-
-       dev_dbg(dd->dev, "fast: digcnt: %d, bufcnt: %u, total: %u\n",
-                       ctx->digcnt, ctx->bufcnt, ctx->total);
+       bs = get_block_size(rctx);
 
-       sg = ctx->sg;
-       bs = get_block_size(ctx);
+       if (update)
+               nbytes = req->nbytes;
+       else
+               nbytes = 0;
 
-       if (!SG_AA(sg))
-               return omap_sham_update_dma_slow(dd);
+       rctx->total = nbytes + rctx->bufcnt;
 
-       if (!sg_is_last(sg) && !SG_SA(sg, bs))
-               /* size is not BLOCK_SIZE aligned */
-               return omap_sham_update_dma_slow(dd);
+       if (!rctx->total)
+               return 0;
 
-       length = min(ctx->total, sg->length);
+       if (nbytes && (!IS_ALIGNED(rctx->bufcnt, bs))) {
+               int len = bs - rctx->bufcnt % bs;
 
-       if (sg_is_last(sg)) {
-               if (!(ctx->flags & BIT(FLAGS_FINUP))) {
-                       /* not last sg must be BLOCK_SIZE aligned */
-                       tail = length & (bs - 1);
-                       /* without finup() we need one block to close hash */
-                       if (!tail)
-                               tail = bs;
-                       length -= tail;
-               }
+               if (len > nbytes)
+                       len = nbytes;
+               scatterwalk_map_and_copy(rctx->buffer + rctx->bufcnt, req->src,
+                                        0, len, 0);
+               rctx->bufcnt += len;
+               nbytes -= len;
+               rctx->offset = len;
        }
 
-       if (!dma_map_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE)) {
-               dev_err(dd->dev, "dma_map_sg  error\n");
-               return -EINVAL;
-       }
+       if (rctx->bufcnt)
+               memcpy(rctx->dd->xmit_buf, rctx->buffer, rctx->bufcnt);
 
-       ctx->flags |= BIT(FLAGS_SG);
+       ret = omap_sham_align_sgs(req->src, nbytes, bs, final, rctx);
+       if (ret)
+               return ret;
 
-       ctx->total -= length;
-       ctx->offset = length; /* offset where to start slow */
+       xmit_len = rctx->total;
 
-       final = (ctx->flags & BIT(FLAGS_FINUP)) && !ctx->total;
+       if (!IS_ALIGNED(xmit_len, bs)) {
+               if (final)
+                       xmit_len = DIV_ROUND_UP(xmit_len, bs) * bs;
+               else
+                       xmit_len = xmit_len / bs * bs;
+       }
 
-       ret = omap_sham_xmit_dma(dd, sg_dma_address(ctx->sg), length, final, 1);
-       if (ret != -EINPROGRESS)
-               dma_unmap_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE);
+       hash_later = rctx->total - xmit_len;
+       if (hash_later < 0)
+               hash_later = 0;
 
-       return ret;
-}
+       if (rctx->bufcnt && nbytes) {
+               /* have data from previous operation and current */
+               sg_init_table(rctx->sgl, 2);
+               sg_set_buf(rctx->sgl, rctx->dd->xmit_buf, rctx->bufcnt);
 
-static int omap_sham_update_cpu(struct omap_sham_dev *dd)
-{
-       struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
-       int bufcnt, final;
+               sg_chain(rctx->sgl, 2, req->src);
 
-       if (!ctx->total)
-               return 0;
+               rctx->sg = rctx->sgl;
 
-       omap_sham_append_sg(ctx);
+               rctx->sg_len++;
+       } else if (rctx->bufcnt) {
+               /* have buffered data only */
+               sg_init_table(rctx->sgl, 1);
+               sg_set_buf(rctx->sgl, rctx->dd->xmit_buf, xmit_len);
 
-       final = (ctx->flags & BIT(FLAGS_FINUP)) && !ctx->total;
+               rctx->sg = rctx->sgl;
 
-       dev_dbg(dd->dev, "cpu: bufcnt: %u, digcnt: %d, final: %d\n",
-               ctx->bufcnt, ctx->digcnt, final);
+               rctx->sg_len = 1;
+       }
 
-       if (final || (ctx->bufcnt == ctx->buflen && ctx->total)) {
-               bufcnt = ctx->bufcnt;
-               ctx->bufcnt = 0;
-               return omap_sham_xmit_cpu(dd, ctx->buffer, bufcnt, final);
+       if (hash_later) {
+               if (req->nbytes) {
+                       scatterwalk_map_and_copy(rctx->buffer, req->src,
+                                                req->nbytes - hash_later,
+                                                hash_later, 0);
+               } else {
+                       memcpy(rctx->buffer, rctx->buffer + xmit_len,
+                              hash_later);
+               }
+               rctx->bufcnt = hash_later;
+       } else {
+               rctx->bufcnt = 0;
        }
 
+       if (!final)
+               rctx->total = xmit_len;
+
        return 0;
 }
 
@@ -815,18 +896,9 @@ static int omap_sham_update_dma_stop(struct omap_sham_dev *dd)
 {
        struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
 
+       dma_unmap_sg(dd->dev, ctx->sg, ctx->sg_len, DMA_TO_DEVICE);
 
-       if (ctx->flags & BIT(FLAGS_SG)) {
-               dma_unmap_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE);
-               if (ctx->sg->length == ctx->offset) {
-                       ctx->sg = sg_next(ctx->sg);
-                       if (ctx->sg)
-                               ctx->offset = 0;
-               }
-       } else {
-               dma_unmap_single(dd->dev, ctx->dma_addr, ctx->buflen,
-                                DMA_TO_DEVICE);
-       }
+       clear_bit(FLAGS_DMA_ACTIVE, &dd->flags);
 
        return 0;
 }
@@ -887,6 +959,8 @@ static int omap_sham_init(struct ahash_request *req)
 
        ctx->bufcnt = 0;
        ctx->digcnt = 0;
+       ctx->total = 0;
+       ctx->offset = 0;
        ctx->buflen = BUFLEN;
 
        if (tctx->flags & BIT(FLAGS_HMAC)) {
@@ -909,14 +983,19 @@ static int omap_sham_update_req(struct omap_sham_dev *dd)
        struct ahash_request *req = dd->req;
        struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
        int err;
+       bool final = ctx->flags & BIT(FLAGS_FINUP);
 
        dev_dbg(dd->dev, "update_req: total: %u, digcnt: %d, finup: %d\n",
                 ctx->total, ctx->digcnt, (ctx->flags & BIT(FLAGS_FINUP)) != 0);
 
+       if (ctx->total < get_block_size(ctx) ||
+           ctx->total < OMAP_SHA_DMA_THRESHOLD)
+               ctx->flags |= BIT(FLAGS_CPU);
+
        if (ctx->flags & BIT(FLAGS_CPU))
-               err = omap_sham_update_cpu(dd);
+               err = omap_sham_xmit_cpu(dd, ctx->total, final);
        else
-               err = omap_sham_update_dma_start(dd);
+               err = omap_sham_xmit_dma(dd, ctx->total, final);
 
        /* wait for dma completion before can take more data */
        dev_dbg(dd->dev, "update: err: %d, digcnt: %d\n", err, ctx->digcnt);
@@ -930,7 +1009,7 @@ static int omap_sham_final_req(struct omap_sham_dev *dd)
        struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
        int err = 0, use_dma = 1;
 
-       if ((ctx->bufcnt <= get_block_size(ctx)) || dd->polling_mode)
+       if ((ctx->total <= get_block_size(ctx)) || dd->polling_mode)
                /*
                 * faster to handle last block with cpu or
                 * use cpu when dma is not present.
@@ -938,9 +1017,9 @@ static int omap_sham_final_req(struct omap_sham_dev *dd)
                use_dma = 0;
 
        if (use_dma)
-               err = omap_sham_xmit_dma_map(dd, ctx, ctx->bufcnt, 1);
+               err = omap_sham_xmit_dma(dd, ctx->total, 1);
        else
-               err = omap_sham_xmit_cpu(dd, ctx->buffer, ctx->bufcnt, 1);
+               err = omap_sham_xmit_cpu(dd, ctx->total, 1);
 
        ctx->bufcnt = 0;
 
@@ -988,6 +1067,17 @@ static void omap_sham_finish_req(struct ahash_request *req, int err)
        struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
        struct omap_sham_dev *dd = ctx->dd;
 
+       if (test_bit(FLAGS_SGS_COPIED, &dd->flags))
+               free_pages((unsigned long)sg_virt(ctx->sg),
+                          get_order(ctx->sg->length));
+
+       if (test_bit(FLAGS_SGS_ALLOCED, &dd->flags))
+               kfree(ctx->sg);
+
+       ctx->sg = NULL;
+
+       dd->flags &= ~(BIT(FLAGS_SGS_ALLOCED) | BIT(FLAGS_SGS_COPIED));
+
        if (!err) {
                dd->pdata->copy_hash(req, 1);
                if (test_bit(FLAGS_FINAL, &dd->flags))
@@ -1005,9 +1095,6 @@ static void omap_sham_finish_req(struct ahash_request *req, int err)
 
        if (req->base.complete)
                req->base.complete(&req->base, err);
-
-       /* handle new request */
-       tasklet_schedule(&dd->done_task);
 }
 
 static int omap_sham_handle_queue(struct omap_sham_dev *dd,
@@ -1018,6 +1105,7 @@ static int omap_sham_handle_queue(struct omap_sham_dev *dd,
        unsigned long flags;
        int err = 0, ret = 0;
 
+retry:
        spin_lock_irqsave(&dd->lock, flags);
        if (req)
                ret = ahash_enqueue_request(&dd->queue, req);
@@ -1041,6 +1129,10 @@ static int omap_sham_handle_queue(struct omap_sham_dev *dd,
        dd->req = req;
        ctx = ahash_request_ctx(req);
 
+       err = omap_sham_prepare_request(req, ctx->op == OP_UPDATE);
+       if (err)
+               goto err1;
+
        dev_dbg(dd->dev, "handling new req, op: %lu, nbytes: %d\n",
                                                ctx->op, req->nbytes);
 
@@ -1061,11 +1153,19 @@ static int omap_sham_handle_queue(struct omap_sham_dev *dd,
                err = omap_sham_final_req(dd);
        }
 err1:
-       if (err != -EINPROGRESS)
+       dev_dbg(dd->dev, "exit, err: %d\n", err);
+
+       if (err != -EINPROGRESS) {
                /* done_task will not finish it, so do it here */
                omap_sham_finish_req(req, err);
+               req = NULL;
 
-       dev_dbg(dd->dev, "exit, err: %d\n", err);
+               /*
+                * Execute next request immediately if there is anything
+                * in queue.
+                */
+               goto retry;
+       }
 
        return ret;
 }
@@ -1085,34 +1185,15 @@ static int omap_sham_update(struct ahash_request *req)
 {
        struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
        struct omap_sham_dev *dd = ctx->dd;
-       int bs = get_block_size(ctx);
 
        if (!req->nbytes)
                return 0;
 
-       ctx->total = req->nbytes;
-       ctx->sg = req->src;
-       ctx->offset = 0;
-
-       if (ctx->flags & BIT(FLAGS_FINUP)) {
-               if ((ctx->digcnt + ctx->bufcnt + ctx->total) < 240) {
-                       /*
-                       * OMAP HW accel works only with buffers >= 9
-                       * will switch to bypass in final()
-                       * final has the same request and data
-                       */
-                       omap_sham_append_sg(ctx);
-                       return 0;
-               } else if ((ctx->bufcnt + ctx->total <= bs) ||
-                          dd->polling_mode) {
-                       /*
-                        * faster to use CPU for short transfers or
-                        * use cpu when dma is not present.
-                        */
-                       ctx->flags |= BIT(FLAGS_CPU);
-               }
-       } else if (ctx->bufcnt + ctx->total < ctx->buflen) {
-               omap_sham_append_sg(ctx);
+       if (ctx->total + req->nbytes < ctx->buflen) {
+               scatterwalk_map_and_copy(ctx->buffer + ctx->bufcnt, req->src,
+                                        0, req->nbytes, 0);
+               ctx->bufcnt += req->nbytes;
+               ctx->total += req->nbytes;
                return 0;
        }
 
@@ -1137,9 +1218,20 @@ static int omap_sham_final_shash(struct ahash_request *req)
 {
        struct omap_sham_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
        struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+       int offset = 0;
+
+       /*
+        * If we are running HMAC on limited hardware support, skip
+        * the ipad in the beginning of the buffer if we are going for
+        * software fallback algorithm.
+        */
+       if (test_bit(FLAGS_HMAC, &ctx->flags) &&
+           !test_bit(FLAGS_AUTO_XOR, &ctx->dd->flags))
+               offset = get_block_size(ctx);
 
        return omap_sham_shash_digest(tctx->fallback, req->base.flags,
-                                     ctx->buffer, ctx->bufcnt, req->result);
+                                     ctx->buffer + offset,
+                                     ctx->bufcnt - offset, req->result);
 }
 
 static int omap_sham_final(struct ahash_request *req)
@@ -1154,10 +1246,11 @@ static int omap_sham_final(struct ahash_request *req)
        /*
         * OMAP HW accel works only with buffers >= 9.
         * HMAC is always >= 9 because ipad == block size.
-        * If buffersize is less than 240, we use fallback SW encoding,
-        * as using DMA + HW in this case doesn't provide any benefit.
+        * If buffersize is less than DMA_THRESHOLD, we use fallback
+        * SW encoding, as using DMA + HW in this case doesn't provide
+        * any benefit.
         */
-       if ((ctx->digcnt + ctx->bufcnt) < 240)
+       if (!ctx->digcnt && ctx->bufcnt < OMAP_SHA_DMA_THRESHOLD)
                return omap_sham_final_shash(req);
        else if (ctx->bufcnt)
                return omap_sham_enqueue(req, OP_FINAL);
@@ -1323,6 +1416,25 @@ static void omap_sham_cra_exit(struct crypto_tfm *tfm)
        }
 }
 
+static int omap_sham_export(struct ahash_request *req, void *out)
+{
+       struct omap_sham_reqctx *rctx = ahash_request_ctx(req);
+
+       memcpy(out, rctx, sizeof(*rctx) + rctx->bufcnt);
+
+       return 0;
+}
+
+static int omap_sham_import(struct ahash_request *req, const void *in)
+{
+       struct omap_sham_reqctx *rctx = ahash_request_ctx(req);
+       const struct omap_sham_reqctx *ctx_in = in;
+
+       memcpy(rctx, in, sizeof(*rctx) + ctx_in->bufcnt);
+
+       return 0;
+}
+
 static struct ahash_alg algs_sha1_md5[] = {
 {
        .init           = omap_sham_init,
@@ -1341,7 +1453,7 @@ static struct ahash_alg algs_sha1_md5[] = {
                                                CRYPTO_ALG_NEED_FALLBACK,
                .cra_blocksize          = SHA1_BLOCK_SIZE,
                .cra_ctxsize            = sizeof(struct omap_sham_ctx),
-               .cra_alignmask          = 0,
+               .cra_alignmask          = OMAP_ALIGN_MASK,
                .cra_module             = THIS_MODULE,
                .cra_init               = omap_sham_cra_init,
                .cra_exit               = omap_sham_cra_exit,
@@ -1440,7 +1552,7 @@ static struct ahash_alg algs_sha224_sha256[] = {
                                                CRYPTO_ALG_NEED_FALLBACK,
                .cra_blocksize          = SHA224_BLOCK_SIZE,
                .cra_ctxsize            = sizeof(struct omap_sham_ctx),
-               .cra_alignmask          = 0,
+               .cra_alignmask          = OMAP_ALIGN_MASK,
                .cra_module             = THIS_MODULE,
                .cra_init               = omap_sham_cra_init,
                .cra_exit               = omap_sham_cra_exit,
@@ -1462,7 +1574,7 @@ static struct ahash_alg algs_sha224_sha256[] = {
                                                CRYPTO_ALG_NEED_FALLBACK,
                .cra_blocksize          = SHA256_BLOCK_SIZE,
                .cra_ctxsize            = sizeof(struct omap_sham_ctx),
-               .cra_alignmask          = 0,
+               .cra_alignmask          = OMAP_ALIGN_MASK,
                .cra_module             = THIS_MODULE,
                .cra_init               = omap_sham_cra_init,
                .cra_exit               = omap_sham_cra_exit,
@@ -1535,7 +1647,7 @@ static struct ahash_alg algs_sha384_sha512[] = {
                                                CRYPTO_ALG_NEED_FALLBACK,
                .cra_blocksize          = SHA384_BLOCK_SIZE,
                .cra_ctxsize            = sizeof(struct omap_sham_ctx),
-               .cra_alignmask          = 0,
+               .cra_alignmask          = OMAP_ALIGN_MASK,
                .cra_module             = THIS_MODULE,
                .cra_init               = omap_sham_cra_init,
                .cra_exit               = omap_sham_cra_exit,
@@ -1557,7 +1669,7 @@ static struct ahash_alg algs_sha384_sha512[] = {
                                                CRYPTO_ALG_NEED_FALLBACK,
                .cra_blocksize          = SHA512_BLOCK_SIZE,
                .cra_ctxsize            = sizeof(struct omap_sham_ctx),
-               .cra_alignmask          = 0,
+               .cra_alignmask          = OMAP_ALIGN_MASK,
                .cra_module             = THIS_MODULE,
                .cra_init               = omap_sham_cra_init,
                .cra_exit               = omap_sham_cra_exit,
@@ -1624,12 +1736,8 @@ static void omap_sham_done_task(unsigned long data)
        }
 
        if (test_bit(FLAGS_CPU, &dd->flags)) {
-               if (test_and_clear_bit(FLAGS_OUTPUT_READY, &dd->flags)) {
-                       /* hash or semi-hash ready */
-                       err = omap_sham_update_cpu(dd);
-                       if (err != -EINPROGRESS)
-                               goto finish;
-               }
+               if (test_and_clear_bit(FLAGS_OUTPUT_READY, &dd->flags))
+                       goto finish;
        } else if (test_bit(FLAGS_DMA_READY, &dd->flags)) {
                if (test_and_clear_bit(FLAGS_DMA_ACTIVE, &dd->flags)) {
                        omap_sham_update_dma_stop(dd);
@@ -1641,8 +1749,6 @@ static void omap_sham_done_task(unsigned long data)
                if (test_and_clear_bit(FLAGS_OUTPUT_READY, &dd->flags)) {
                        /* hash or semi-hash ready */
                        clear_bit(FLAGS_DMA_READY, &dd->flags);
-                       err = omap_sham_update_dma_start(dd);
-                       if (err != -EINPROGRESS)
                                goto finish;
                }
        }
@@ -1653,6 +1759,10 @@ finish:
        dev_dbg(dd->dev, "update done: err: %d\n", err);
        /* finish curent request */
        omap_sham_finish_req(dd->req, err);
+
+       /* If we are not busy, process next req */
+       if (!test_bit(FLAGS_BUSY, &dd->flags))
+               omap_sham_handle_queue(dd, NULL);
 }
 
 static irqreturn_t omap_sham_irq_common(struct omap_sham_dev *dd)
@@ -1977,8 +2087,14 @@ static int omap_sham_probe(struct platform_device *pdev)
 
        for (i = 0; i < dd->pdata->algs_info_size; i++) {
                for (j = 0; j < dd->pdata->algs_info[i].size; j++) {
-                       err = crypto_register_ahash(
-                                       &dd->pdata->algs_info[i].algs_list[j]);
+                       struct ahash_alg *alg;
+
+                       alg = &dd->pdata->algs_info[i].algs_list[j];
+                       alg->export = omap_sham_export;
+                       alg->import = omap_sham_import;
+                       alg->halg.statesize = sizeof(struct omap_sham_reqctx) +
+                                             BUFLEN;
+                       err = crypto_register_ahash(alg);
                        if (err)
                                goto err_algs;
 
index 2f2681d..afc9a0a 100644 (file)
@@ -55,7 +55,7 @@
 #define ADF_C3XXX_MAX_ACCELERATORS 3
 #define ADF_C3XXX_MAX_ACCELENGINES 6
 #define ADF_C3XXX_ACCELERATORS_REG_OFFSET 16
-#define ADF_C3XXX_ACCELERATORS_MASK 0x3
+#define ADF_C3XXX_ACCELERATORS_MASK 0x7
 #define ADF_C3XXX_ACCELENGINES_MASK 0x3F
 #define ADF_C3XXX_ETR_MAX_BANKS 16
 #define ADF_C3XXX_SMIAPF0_MASK_OFFSET (0x3A000 + 0x28)
index ce7c462..3744b22 100644 (file)
@@ -146,6 +146,7 @@ struct adf_admin_comms {
        dma_addr_t phy_addr;
        dma_addr_t const_tbl_addr;
        void *virt_addr;
+       void *virt_tbl_addr;
        void __iomem *mailbox_addr;
        struct mutex lock;      /* protects adf_admin_comms struct */
 };
@@ -251,17 +252,19 @@ int adf_init_admin_comms(struct adf_accel_dev *accel_dev)
                return -ENOMEM;
        }
 
-       admin->const_tbl_addr = dma_map_single(&GET_DEV(accel_dev),
-                                              (void *) const_tab, 1024,
-                                              DMA_TO_DEVICE);
-
-       if (unlikely(dma_mapping_error(&GET_DEV(accel_dev),
-                                      admin->const_tbl_addr))) {
+       admin->virt_tbl_addr = dma_zalloc_coherent(&GET_DEV(accel_dev),
+                                                  PAGE_SIZE,
+                                                  &admin->const_tbl_addr,
+                                                  GFP_KERNEL);
+       if (!admin->virt_tbl_addr) {
+               dev_err(&GET_DEV(accel_dev), "Failed to allocate const_tbl\n");
                dma_free_coherent(&GET_DEV(accel_dev), PAGE_SIZE,
                                  admin->virt_addr, admin->phy_addr);
                kfree(admin);
                return -ENOMEM;
        }
+
+       memcpy(admin->virt_tbl_addr, const_tab, sizeof(const_tab));
        reg_val = (u64)admin->phy_addr;
        ADF_CSR_WR(csr, ADF_DH895XCC_ADMINMSGUR_OFFSET, reg_val >> 32);
        ADF_CSR_WR(csr, ADF_DH895XCC_ADMINMSGLR_OFFSET, reg_val);
@@ -282,9 +285,10 @@ void adf_exit_admin_comms(struct adf_accel_dev *accel_dev)
        if (admin->virt_addr)
                dma_free_coherent(&GET_DEV(accel_dev), PAGE_SIZE,
                                  admin->virt_addr, admin->phy_addr);
+       if (admin->virt_tbl_addr)
+               dma_free_coherent(&GET_DEV(accel_dev), PAGE_SIZE,
+                                 admin->virt_tbl_addr, admin->const_tbl_addr);
 
-       dma_unmap_single(&GET_DEV(accel_dev), admin->const_tbl_addr, 1024,
-                        DMA_TO_DEVICE);
        mutex_destroy(&admin->lock);
        kfree(admin);
        accel_dev->admin = NULL;
index 9b961b3..e2454d9 100644 (file)
@@ -967,10 +967,6 @@ static int qat_uclo_parse_uof_obj(struct icp_qat_fw_loader_handle *handle)
        struct icp_qat_uclo_objhandle *obj_handle = handle->obj_handle;
        unsigned int ae;
 
-       obj_handle->uword_buf = kcalloc(UWORD_CPYBUF_SIZE, sizeof(uint64_t),
-                                       GFP_KERNEL);
-       if (!obj_handle->uword_buf)
-               return -ENOMEM;
        obj_handle->encap_uof_obj.beg_uof = obj_handle->obj_hdr->file_buff;
        obj_handle->encap_uof_obj.obj_hdr = (struct icp_qat_uof_objhdr *)
                                             obj_handle->obj_hdr->file_buff;
@@ -982,6 +978,10 @@ static int qat_uclo_parse_uof_obj(struct icp_qat_fw_loader_handle *handle)
                pr_err("QAT: UOF incompatible\n");
                return -EINVAL;
        }
+       obj_handle->uword_buf = kcalloc(UWORD_CPYBUF_SIZE, sizeof(uint64_t),
+                                       GFP_KERNEL);
+       if (!obj_handle->uword_buf)
+               return -ENOMEM;
        obj_handle->ustore_phy_size = ICP_QAT_UCLO_MAX_USTORE;
        if (!obj_handle->obj_hdr->file_buff ||
            !qat_uclo_map_str_table(obj_handle->obj_hdr, ICP_QAT_UOF_STRT,
index af50825..d0f80c6 100644 (file)
@@ -304,11 +304,9 @@ static int rk_crypto_probe(struct platform_device *pdev)
        usleep_range(10, 20);
        reset_control_deassert(crypto_info->rst);
 
-       err = devm_add_action(dev, rk_crypto_action, crypto_info);
-       if (err) {
-               reset_control_assert(crypto_info->rst);
+       err = devm_add_action_or_reset(dev, rk_crypto_action, crypto_info);
+       if (err)
                goto err_crypto;
-       }
 
        spin_lock_init(&crypto_info->lock);
 
index 3830d7c..90efd10 100644 (file)
@@ -29,7 +29,8 @@ static int sun4i_ss_opti_poll(struct ablkcipher_request *areq)
        u32 tx_cnt = 0;
        u32 spaces;
        u32 v;
-       int i, err = 0;
+       int err = 0;
+       unsigned int i;
        unsigned int ileft = areq->nbytes;
        unsigned int oleft = areq->nbytes;
        unsigned int todo;
@@ -139,7 +140,8 @@ static int sun4i_ss_cipher_poll(struct ablkcipher_request *areq)
        u32 tx_cnt = 0;
        u32 v;
        u32 spaces;
-       int i, err = 0;
+       int err = 0;
+       unsigned int i;
        unsigned int ileft = areq->nbytes;
        unsigned int oleft = areq->nbytes;
        unsigned int todo;
index 107cd2a..3ac6c6c 100644 (file)
@@ -172,45 +172,45 @@ static struct sun4i_ss_alg_template ss_algs[] = {
 },
 {       .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
        .alg.crypto = {
-                       .cra_name = "cbc(des3_ede)",
-                       .cra_driver_name = "cbc-des3-sun4i-ss",
-                       .cra_priority = 300,
-                       .cra_blocksize = DES3_EDE_BLOCK_SIZE,
-                       .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER,
-                       .cra_ctxsize = sizeof(struct sun4i_req_ctx),
-                       .cra_module = THIS_MODULE,
-                       .cra_alignmask = 3,
-                       .cra_type = &crypto_ablkcipher_type,
-                       .cra_init = sun4i_ss_cipher_init,
-                       .cra_u.ablkcipher = {
-                               .min_keysize    = DES3_EDE_KEY_SIZE,
-                               .max_keysize    = DES3_EDE_KEY_SIZE,
-                               .ivsize         = DES3_EDE_BLOCK_SIZE,
-                               .setkey         = sun4i_ss_des3_setkey,
-                               .encrypt        = sun4i_ss_cbc_des3_encrypt,
-                               .decrypt        = sun4i_ss_cbc_des3_decrypt,
+               .cra_name = "cbc(des3_ede)",
+               .cra_driver_name = "cbc-des3-sun4i-ss",
+               .cra_priority = 300,
+               .cra_blocksize = DES3_EDE_BLOCK_SIZE,
+               .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER,
+               .cra_ctxsize = sizeof(struct sun4i_req_ctx),
+               .cra_module = THIS_MODULE,
+               .cra_alignmask = 3,
+               .cra_type = &crypto_ablkcipher_type,
+               .cra_init = sun4i_ss_cipher_init,
+               .cra_u.ablkcipher = {
+                       .min_keysize    = DES3_EDE_KEY_SIZE,
+                       .max_keysize    = DES3_EDE_KEY_SIZE,
+                       .ivsize         = DES3_EDE_BLOCK_SIZE,
+                       .setkey         = sun4i_ss_des3_setkey,
+                       .encrypt        = sun4i_ss_cbc_des3_encrypt,
+                       .decrypt        = sun4i_ss_cbc_des3_decrypt,
                }
        }
 },
 {       .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
        .alg.crypto = {
-                       .cra_name = "ecb(des3_ede)",
-                       .cra_driver_name = "ecb-des3-sun4i-ss",
-                       .cra_priority = 300,
-                       .cra_blocksize = DES3_EDE_BLOCK_SIZE,
-                       .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER,
-                       .cra_ctxsize = sizeof(struct sun4i_req_ctx),
-                       .cra_module = THIS_MODULE,
-                       .cra_alignmask = 3,
-                       .cra_type = &crypto_ablkcipher_type,
-                       .cra_init = sun4i_ss_cipher_init,
-                       .cra_u.ablkcipher = {
-                               .min_keysize    = DES3_EDE_KEY_SIZE,
-                               .max_keysize    = DES3_EDE_KEY_SIZE,
-                               .ivsize         = DES3_EDE_BLOCK_SIZE,
-                               .setkey         = sun4i_ss_des3_setkey,
-                               .encrypt        = sun4i_ss_ecb_des3_encrypt,
-                               .decrypt        = sun4i_ss_ecb_des3_decrypt,
+               .cra_name = "ecb(des3_ede)",
+               .cra_driver_name = "ecb-des3-sun4i-ss",
+               .cra_priority = 300,
+               .cra_blocksize = DES3_EDE_BLOCK_SIZE,
+               .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER,
+               .cra_ctxsize = sizeof(struct sun4i_req_ctx),
+               .cra_module = THIS_MODULE,
+               .cra_alignmask = 3,
+               .cra_type = &crypto_ablkcipher_type,
+               .cra_init = sun4i_ss_cipher_init,
+               .cra_u.ablkcipher = {
+                       .min_keysize    = DES3_EDE_KEY_SIZE,
+                       .max_keysize    = DES3_EDE_KEY_SIZE,
+                       .ivsize         = DES3_EDE_BLOCK_SIZE,
+                       .setkey         = sun4i_ss_des3_setkey,
+                       .encrypt        = sun4i_ss_ecb_des3_encrypt,
+                       .decrypt        = sun4i_ss_ecb_des3_decrypt,
                }
        }
 },
index ff80314..0de2f62 100644 (file)
 
 int sun4i_hash_crainit(struct crypto_tfm *tfm)
 {
+       struct sun4i_tfm_ctx *op = crypto_tfm_ctx(tfm);
+       struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg);
+       struct sun4i_ss_alg_template *algt;
+
+       memset(op, 0, sizeof(struct sun4i_tfm_ctx));
+
+       algt = container_of(alg, struct sun4i_ss_alg_template, alg.hash);
+       op->ss = algt->ss;
+
        crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
                                 sizeof(struct sun4i_req_ctx));
        return 0;
@@ -32,13 +41,10 @@ int sun4i_hash_init(struct ahash_request *areq)
        struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
        struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
        struct sun4i_ss_alg_template *algt;
-       struct sun4i_ss_ctx *ss;
 
        memset(op, 0, sizeof(struct sun4i_req_ctx));
 
        algt = container_of(alg, struct sun4i_ss_alg_template, alg.hash);
-       ss = algt->ss;
-       op->ss = algt->ss;
        op->mode = algt->mode;
 
        return 0;
@@ -129,6 +135,9 @@ int sun4i_hash_import_sha1(struct ahash_request *areq, const void *in)
        return 0;
 }
 
+#define SS_HASH_UPDATE 1
+#define SS_HASH_FINAL 2
+
 /*
  * sun4i_hash_update: update hash engine
  *
@@ -156,7 +165,7 @@ int sun4i_hash_import_sha1(struct ahash_request *areq, const void *in)
  * write remaining data in op->buf
  * final state op->len=56
  */
-int sun4i_hash_update(struct ahash_request *areq)
+static int sun4i_hash(struct ahash_request *areq)
 {
        u32 v, ivmode = 0;
        unsigned int i = 0;
@@ -167,8 +176,9 @@ int sun4i_hash_update(struct ahash_request *areq)
         */
 
        struct sun4i_req_ctx *op = ahash_request_ctx(areq);
-       struct sun4i_ss_ctx *ss = op->ss;
        struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+       struct sun4i_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm);
+       struct sun4i_ss_ctx *ss = tfmctx->ss;
        unsigned int in_i = 0; /* advancement in the current SG */
        unsigned int end;
        /*
@@ -180,22 +190,30 @@ int sun4i_hash_update(struct ahash_request *areq)
        u32 spaces, rx_cnt = SS_RX_DEFAULT;
        size_t copied = 0;
        struct sg_mapping_iter mi;
+       unsigned int j = 0;
+       int zeros;
+       unsigned int index, padlen;
+       __be64 bits;
+       u32 bf[32];
+       u32 wb = 0;
+       unsigned int nwait, nbw = 0;
+       struct scatterlist *in_sg = areq->src;
 
        dev_dbg(ss->dev, "%s %s bc=%llu len=%u mode=%x wl=%u h0=%0x",
                __func__, crypto_tfm_alg_name(areq->base.tfm),
                op->byte_count, areq->nbytes, op->mode,
                op->len, op->hash[0]);
 
-       if (areq->nbytes == 0)
+       if (unlikely(areq->nbytes == 0) && (op->flags & SS_HASH_FINAL) == 0)
                return 0;
 
        /* protect against overflow */
-       if (areq->nbytes > UINT_MAX - op->len) {
+       if (unlikely(areq->nbytes > UINT_MAX - op->len)) {
                dev_err(ss->dev, "Cannot process too large request\n");
                return -EINVAL;
        }
 
-       if (op->len + areq->nbytes < 64) {
+       if (op->len + areq->nbytes < 64 && (op->flags & SS_HASH_FINAL) == 0) {
                /* linearize data to op->buf */
                copied = sg_pcopy_to_buffer(areq->src, sg_nents(areq->src),
                                            op->buf + op->len, areq->nbytes, 0);
@@ -203,14 +221,6 @@ int sun4i_hash_update(struct ahash_request *areq)
                return 0;
        }
 
-       end = ((areq->nbytes + op->len) / 64) * 64 - op->len;
-
-       if (end > areq->nbytes || areq->nbytes - end > 63) {
-               dev_err(ss->dev, "ERROR: Bound error %u %u\n",
-                       end, areq->nbytes);
-               return -EINVAL;
-       }
-
        spin_lock_bh(&ss->slock);
 
        /*
@@ -225,6 +235,34 @@ int sun4i_hash_update(struct ahash_request *areq)
        /* Enable the device */
        writel(op->mode | SS_ENABLED | ivmode, ss->base + SS_CTL);
 
+       if ((op->flags & SS_HASH_UPDATE) == 0)
+               goto hash_final;
+
+       /* start of handling data */
+       if ((op->flags & SS_HASH_FINAL) == 0) {
+               end = ((areq->nbytes + op->len) / 64) * 64 - op->len;
+
+               if (end > areq->nbytes || areq->nbytes - end > 63) {
+                       dev_err(ss->dev, "ERROR: Bound error %u %u\n",
+                               end, areq->nbytes);
+                       err = -EINVAL;
+                       goto release_ss;
+               }
+       } else {
+               /* Since we have the flag final, we can go up to modulo 4 */
+               end = ((areq->nbytes + op->len) / 4) * 4 - op->len;
+       }
+
+       /* TODO if SGlen % 4 and op->len == 0 then DMA */
+       i = 1;
+       while (in_sg && i == 1) {
+               if ((in_sg->length % 4) != 0)
+                       i = 0;
+               in_sg = sg_next(in_sg);
+       }
+       if (i == 1 && op->len == 0)
+               dev_dbg(ss->dev, "We can DMA\n");
+
        i = 0;
        sg_miter_start(&mi, areq->src, sg_nents(areq->src),
                       SG_MITER_FROM_SG | SG_MITER_ATOMIC);
@@ -285,7 +323,11 @@ int sun4i_hash_update(struct ahash_request *areq)
                        }
                }
        } while (i < end);
-       /* final linear */
+
+       /*
+        * Now we have written to the device all that we can,
+        * store the remaining bytes in op->buf
+        */
        if ((areq->nbytes - i) < 64) {
                while (i < areq->nbytes && in_i < mi.length && op->len < 64) {
                        /* how many bytes we can read from current SG */
@@ -304,13 +346,21 @@ int sun4i_hash_update(struct ahash_request *areq)
 
        sg_miter_stop(&mi);
 
+       /*
+        * End of data process
+        * Now if we have the flag final go to finalize part
+        * If not, store the partial hash
+        */
+       if ((op->flags & SS_HASH_FINAL) > 0)
+               goto hash_final;
+
        writel(op->mode | SS_ENABLED | SS_DATA_END, ss->base + SS_CTL);
        i = 0;
        do {
                v = readl(ss->base + SS_CTL);
                i++;
        } while (i < SS_TIMEOUT && (v & SS_DATA_END) > 0);
-       if (i >= SS_TIMEOUT) {
+       if (unlikely(i >= SS_TIMEOUT)) {
                dev_err_ratelimited(ss->dev,
                                    "ERROR: hash end timeout %d>%d ctl=%x len=%u\n",
                                    i, SS_TIMEOUT, v, areq->nbytes);
@@ -318,56 +368,24 @@ int sun4i_hash_update(struct ahash_request *areq)
                goto release_ss;
        }
 
-       /* get the partial hash only if something was written */
        for (i = 0; i < crypto_ahash_digestsize(tfm) / 4; i++)
                op->hash[i] = readl(ss->base + SS_MD0 + i * 4);
 
-release_ss:
-       writel(0, ss->base + SS_CTL);
-       spin_unlock_bh(&ss->slock);
-       return err;
-}
+       goto release_ss;
 
 /*
- * sun4i_hash_final: finalize hashing operation
+ * hash_final: finalize hashing operation
  *
  * If we have some remaining bytes, we write them.
  * Then ask the SS for finalizing the hashing operation
  *
  * I do not check RX FIFO size in this function since the size is 32
  * after each enabling and this function neither write more than 32 words.
+ * If we come from the update part, we cannot have more than
+ * 3 remaining bytes to write and SS is fast enough to not care about it.
  */
-int sun4i_hash_final(struct ahash_request *areq)
-{
-       u32 v, ivmode = 0;
-       unsigned int i;
-       unsigned int j = 0;
-       int zeros, err = 0;
-       unsigned int index, padlen;
-       __be64 bits;
-       struct sun4i_req_ctx *op = ahash_request_ctx(areq);
-       struct sun4i_ss_ctx *ss = op->ss;
-       struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
-       u32 bf[32];
-       u32 wb = 0;
-       unsigned int nwait, nbw = 0;
-
-       dev_dbg(ss->dev, "%s: byte=%llu len=%u mode=%x wl=%u h=%x",
-               __func__, op->byte_count, areq->nbytes, op->mode,
-               op->len, op->hash[0]);
 
-       spin_lock_bh(&ss->slock);
-
-       /*
-        * if we have already written something,
-        * restore the partial hash state
-        */
-       if (op->byte_count > 0) {
-               ivmode = SS_IV_ARBITRARY;
-               for (i = 0; i < crypto_ahash_digestsize(tfm) / 4; i++)
-                       writel(op->hash[i], ss->base + SS_IV0 + i * 4);
-       }
-       writel(op->mode | SS_ENABLED | ivmode, ss->base + SS_CTL);
+hash_final:
 
        /* write the remaining words of the wait buffer */
        if (op->len > 0) {
@@ -428,7 +446,7 @@ int sun4i_hash_final(struct ahash_request *areq)
 
        /*
         * Wait for SS to finish the hash.
-        * The timeout could happen only in case of bad overcloking
+        * The timeout could happen only in case of bad overclocking
         * or driver bug.
         */
        i = 0;
@@ -436,7 +454,7 @@ int sun4i_hash_final(struct ahash_request *areq)
                v = readl(ss->base + SS_CTL);
                i++;
        } while (i < SS_TIMEOUT && (v & SS_DATA_END) > 0);
-       if (i >= SS_TIMEOUT) {
+       if (unlikely(i >= SS_TIMEOUT)) {
                dev_err_ratelimited(ss->dev,
                                    "ERROR: hash end timeout %d>%d ctl=%x len=%u\n",
                                    i, SS_TIMEOUT, v, areq->nbytes);
@@ -463,30 +481,41 @@ release_ss:
        return err;
 }
 
+int sun4i_hash_final(struct ahash_request *areq)
+{
+       struct sun4i_req_ctx *op = ahash_request_ctx(areq);
+
+       op->flags = SS_HASH_FINAL;
+       return sun4i_hash(areq);
+}
+
+int sun4i_hash_update(struct ahash_request *areq)
+{
+       struct sun4i_req_ctx *op = ahash_request_ctx(areq);
+
+       op->flags = SS_HASH_UPDATE;
+       return sun4i_hash(areq);
+}
+
 /* sun4i_hash_finup: finalize hashing operation after an update */
 int sun4i_hash_finup(struct ahash_request *areq)
 {
-       int err;
-
-       err = sun4i_hash_update(areq);
-       if (err != 0)
-               return err;
+       struct sun4i_req_ctx *op = ahash_request_ctx(areq);
 
-       return sun4i_hash_final(areq);
+       op->flags = SS_HASH_UPDATE | SS_HASH_FINAL;
+       return sun4i_hash(areq);
 }
 
 /* combo of init/update/final functions */
 int sun4i_hash_digest(struct ahash_request *areq)
 {
        int err;
+       struct sun4i_req_ctx *op = ahash_request_ctx(areq);
 
        err = sun4i_hash_init(areq);
        if (err != 0)
                return err;
 
-       err = sun4i_hash_update(areq);
-       if (err != 0)
-               return err;
-
-       return sun4i_hash_final(areq);
+       op->flags = SS_HASH_UPDATE | SS_HASH_FINAL;
+       return sun4i_hash(areq);
 }
index 8e9c05f..f04c0f8 100644 (file)
@@ -163,7 +163,7 @@ struct sun4i_req_ctx {
        u32 hash[5]; /* for storing SS_IVx register */
        char buf[64];
        unsigned int len;
-       struct sun4i_ss_ctx *ss;
+       int flags;
 };
 
 int sun4i_hash_crainit(struct crypto_tfm *tfm);
index a83ead1..c3d524e 100644 (file)
@@ -1,6 +1,7 @@
 config CRYPTO_DEV_VMX_ENCRYPT
        tristate "Encryption acceleration support on P8 CPU"
        depends on CRYPTO_DEV_VMX
+       select CRYPTO_GHASH
        default m
        help
          Support for VMX cryptographic acceleration instructions on Power8 CPU.
index 6c999cb..27a94a1 100644 (file)
 #include <linux/hardirq.h>
 #include <asm/switch_to.h>
 #include <crypto/aes.h>
+#include <crypto/ghash.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/internal/hash.h>
 #include <crypto/b128ops.h>
 
 #define IN_INTERRUPT in_interrupt()
 
-#define GHASH_BLOCK_SIZE (16)
-#define GHASH_DIGEST_SIZE (16)
-#define GHASH_KEY_LEN (16)
-
 void gcm_init_p8(u128 htable[16], const u64 Xi[2]);
 void gcm_gmult_p8(u64 Xi[2], const u128 htable[16]);
 void gcm_ghash_p8(u64 Xi[2], const u128 htable[16],
@@ -55,16 +52,11 @@ struct p8_ghash_desc_ctx {
 
 static int p8_ghash_init_tfm(struct crypto_tfm *tfm)
 {
-       const char *alg;
+       const char *alg = "ghash-generic";
        struct crypto_shash *fallback;
        struct crypto_shash *shash_tfm = __crypto_shash_cast(tfm);
        struct p8_ghash_ctx *ctx = crypto_tfm_ctx(tfm);
 
-       if (!(alg = crypto_tfm_alg_name(tfm))) {
-               printk(KERN_ERR "Failed to get algorithm name.\n");
-               return -ENOENT;
-       }
-
        fallback = crypto_alloc_shash(alg, 0, CRYPTO_ALG_NEED_FALLBACK);
        if (IS_ERR(fallback)) {
                printk(KERN_ERR
@@ -78,10 +70,18 @@ static int p8_ghash_init_tfm(struct crypto_tfm *tfm)
        crypto_shash_set_flags(fallback,
                               crypto_shash_get_flags((struct crypto_shash
                                                       *) tfm));
-       ctx->fallback = fallback;
 
-       shash_tfm->descsize = sizeof(struct p8_ghash_desc_ctx)
-           + crypto_shash_descsize(fallback);
+       /* Check if the descsize defined in the algorithm is still enough. */
+       if (shash_tfm->descsize < sizeof(struct p8_ghash_desc_ctx)
+           + crypto_shash_descsize(fallback)) {
+               printk(KERN_ERR
+                      "Desc size of the fallback implementation (%s) does not match the expected value: %lu vs %u\n",
+                      alg,
+                      shash_tfm->descsize - sizeof(struct p8_ghash_desc_ctx),
+                      crypto_shash_descsize(fallback));
+               return -EINVAL;
+       }
+       ctx->fallback = fallback;
 
        return 0;
 }
@@ -113,7 +113,7 @@ static int p8_ghash_setkey(struct crypto_shash *tfm, const u8 *key,
 {
        struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(tfm));
 
-       if (keylen != GHASH_KEY_LEN)
+       if (keylen != GHASH_BLOCK_SIZE)
                return -EINVAL;
 
        preempt_disable();
@@ -211,7 +211,8 @@ struct shash_alg p8_ghash_alg = {
        .update = p8_ghash_update,
        .final = p8_ghash_final,
        .setkey = p8_ghash_setkey,
-       .descsize = sizeof(struct p8_ghash_desc_ctx),
+       .descsize = sizeof(struct p8_ghash_desc_ctx)
+               + sizeof(struct ghash_desc_ctx),
        .base = {
                 .cra_name = "ghash",
                 .cra_driver_name = "p8_ghash",
index cffc1c0..c232729 100644 (file)
@@ -834,6 +834,17 @@ static void quirk_amd_ioapic(struct pci_dev *dev)
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD,     PCI_DEVICE_ID_AMD_VIPER_7410,   quirk_amd_ioapic);
 #endif /* CONFIG_X86_IO_APIC */
 
+#if defined(CONFIG_ARM64) && defined(CONFIG_PCI_ATS)
+
+static void quirk_cavium_sriov_rnm_link(struct pci_dev *dev)
+{
+       /* Fix for improper SRIOV configuration on Cavium cn88xx  RNM device */
+       if (dev->subsystem_device == 0xa118)
+               dev->sriov->link = dev->devfn;
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CAVIUM, 0xa018, quirk_cavium_sriov_rnm_link);
+#endif
+
 /*
  * Some settings of MMRBC can lead to data corruption so block changes.
  * See AMD 8131 HyperTransport PCI-X Tunnel Revision Guide
index 8637cdf..404e955 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/crypto.h>
 #include <linux/list.h>
 #include <linux/kernel.h>
-#include <linux/kthread.h>
 #include <linux/skbuff.h>
 
 struct crypto_aead;
@@ -129,75 +128,6 @@ struct ablkcipher_walk {
        unsigned int            blocksize;
 };
 
-#define ENGINE_NAME_LEN        30
-/*
- * struct crypto_engine - crypto hardware engine
- * @name: the engine name
- * @idling: the engine is entering idle state
- * @busy: request pump is busy
- * @running: the engine is on working
- * @cur_req_prepared: current request is prepared
- * @list: link with the global crypto engine list
- * @queue_lock: spinlock to syncronise access to request queue
- * @queue: the crypto queue of the engine
- * @rt: whether this queue is set to run as a realtime task
- * @prepare_crypt_hardware: a request will soon arrive from the queue
- * so the subsystem requests the driver to prepare the hardware
- * by issuing this call
- * @unprepare_crypt_hardware: there are currently no more requests on the
- * queue so the subsystem notifies the driver that it may relax the
- * hardware by issuing this call
- * @prepare_request: do some prepare if need before handle the current request
- * @unprepare_request: undo any work done by prepare_message()
- * @crypt_one_request: do encryption for current request
- * @kworker: thread struct for request pump
- * @kworker_task: pointer to task for request pump kworker thread
- * @pump_requests: work struct for scheduling work to the request pump
- * @priv_data: the engine private data
- * @cur_req: the current request which is on processing
- */
-struct crypto_engine {
-       char                    name[ENGINE_NAME_LEN];
-       bool                    idling;
-       bool                    busy;
-       bool                    running;
-       bool                    cur_req_prepared;
-
-       struct list_head        list;
-       spinlock_t              queue_lock;
-       struct crypto_queue     queue;
-
-       bool                    rt;
-
-       int (*prepare_crypt_hardware)(struct crypto_engine *engine);
-       int (*unprepare_crypt_hardware)(struct crypto_engine *engine);
-
-       int (*prepare_request)(struct crypto_engine *engine,
-                              struct ablkcipher_request *req);
-       int (*unprepare_request)(struct crypto_engine *engine,
-                                struct ablkcipher_request *req);
-       int (*crypt_one_request)(struct crypto_engine *engine,
-                                struct ablkcipher_request *req);
-
-       struct kthread_worker           kworker;
-       struct task_struct              *kworker_task;
-       struct kthread_work             pump_requests;
-
-       void                            *priv_data;
-       struct ablkcipher_request       *cur_req;
-};
-
-int crypto_transfer_request(struct crypto_engine *engine,
-                           struct ablkcipher_request *req, bool need_pump);
-int crypto_transfer_request_to_engine(struct crypto_engine *engine,
-                                     struct ablkcipher_request *req);
-void crypto_finalize_request(struct crypto_engine *engine,
-                            struct ablkcipher_request *req, int err);
-int crypto_engine_start(struct crypto_engine *engine);
-int crypto_engine_stop(struct crypto_engine *engine);
-struct crypto_engine *crypto_engine_alloc_init(struct device *dev, bool rt);
-int crypto_engine_exit(struct crypto_engine *engine);
-
 extern const struct crypto_type crypto_ablkcipher_type;
 extern const struct crypto_type crypto_blkcipher_type;
 
diff --git a/include/crypto/engine.h b/include/crypto/engine.h
new file mode 100644 (file)
index 0000000..04eb5c7
--- /dev/null
@@ -0,0 +1,107 @@
+/*
+ * Crypto engine API
+ *
+ * Copyright (c) 2016 Baolin Wang <baolin.wang@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+#ifndef _CRYPTO_ENGINE_H
+#define _CRYPTO_ENGINE_H
+
+#include <linux/crypto.h>
+#include <linux/list.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <crypto/algapi.h>
+#include <crypto/hash.h>
+
+#define ENGINE_NAME_LEN        30
+/*
+ * struct crypto_engine - crypto hardware engine
+ * @name: the engine name
+ * @idling: the engine is entering idle state
+ * @busy: request pump is busy
+ * @running: the engine is on working
+ * @cur_req_prepared: current request is prepared
+ * @list: link with the global crypto engine list
+ * @queue_lock: spinlock to syncronise access to request queue
+ * @queue: the crypto queue of the engine
+ * @rt: whether this queue is set to run as a realtime task
+ * @prepare_crypt_hardware: a request will soon arrive from the queue
+ * so the subsystem requests the driver to prepare the hardware
+ * by issuing this call
+ * @unprepare_crypt_hardware: there are currently no more requests on the
+ * queue so the subsystem notifies the driver that it may relax the
+ * hardware by issuing this call
+ * @prepare_cipher_request: do some prepare if need before handle the current request
+ * @unprepare_cipher_request: undo any work done by prepare_cipher_request()
+ * @cipher_one_request: do encryption for current request
+ * @prepare_hash_request: do some prepare if need before handle the current request
+ * @unprepare_hash_request: undo any work done by prepare_hash_request()
+ * @hash_one_request: do hash for current request
+ * @kworker: thread struct for request pump
+ * @kworker_task: pointer to task for request pump kworker thread
+ * @pump_requests: work struct for scheduling work to the request pump
+ * @priv_data: the engine private data
+ * @cur_req: the current request which is on processing
+ */
+struct crypto_engine {
+       char                    name[ENGINE_NAME_LEN];
+       bool                    idling;
+       bool                    busy;
+       bool                    running;
+       bool                    cur_req_prepared;
+
+       struct list_head        list;
+       spinlock_t              queue_lock;
+       struct crypto_queue     queue;
+
+       bool                    rt;
+
+       int (*prepare_crypt_hardware)(struct crypto_engine *engine);
+       int (*unprepare_crypt_hardware)(struct crypto_engine *engine);
+
+       int (*prepare_cipher_request)(struct crypto_engine *engine,
+                                     struct ablkcipher_request *req);
+       int (*unprepare_cipher_request)(struct crypto_engine *engine,
+                                       struct ablkcipher_request *req);
+       int (*prepare_hash_request)(struct crypto_engine *engine,
+                                   struct ahash_request *req);
+       int (*unprepare_hash_request)(struct crypto_engine *engine,
+                                     struct ahash_request *req);
+       int (*cipher_one_request)(struct crypto_engine *engine,
+                                 struct ablkcipher_request *req);
+       int (*hash_one_request)(struct crypto_engine *engine,
+                               struct ahash_request *req);
+
+       struct kthread_worker           kworker;
+       struct task_struct              *kworker_task;
+       struct kthread_work             pump_requests;
+
+       void                            *priv_data;
+       struct crypto_async_request     *cur_req;
+};
+
+int crypto_transfer_cipher_request(struct crypto_engine *engine,
+                                  struct ablkcipher_request *req,
+                                  bool need_pump);
+int crypto_transfer_cipher_request_to_engine(struct crypto_engine *engine,
+                                            struct ablkcipher_request *req);
+int crypto_transfer_hash_request(struct crypto_engine *engine,
+                                struct ahash_request *req, bool need_pump);
+int crypto_transfer_hash_request_to_engine(struct crypto_engine *engine,
+                                          struct ahash_request *req);
+void crypto_finalize_cipher_request(struct crypto_engine *engine,
+                                   struct ablkcipher_request *req, int err);
+void crypto_finalize_hash_request(struct crypto_engine *engine,
+                                 struct ahash_request *req, int err);
+int crypto_engine_start(struct crypto_engine *engine);
+int crypto_engine_stop(struct crypto_engine *engine);
+struct crypto_engine *crypto_engine_alloc_init(struct device *dev, bool rt);
+int crypto_engine_exit(struct crypto_engine *engine);
+
+#endif /* _CRYPTO_ENGINE_H */
diff --git a/include/crypto/ghash.h b/include/crypto/ghash.h
new file mode 100644 (file)
index 0000000..2a61c9b
--- /dev/null
@@ -0,0 +1,23 @@
+/*
+ * Common values for GHASH algorithms
+ */
+
+#ifndef __CRYPTO_GHASH_H__
+#define __CRYPTO_GHASH_H__
+
+#include <linux/types.h>
+#include <crypto/gf128mul.h>
+
+#define GHASH_BLOCK_SIZE       16
+#define GHASH_DIGEST_SIZE      16
+
+struct ghash_ctx {
+       struct gf128mul_4k *gf128;
+};
+
+struct ghash_desc_ctx {
+       u8 buffer[GHASH_BLOCK_SIZE];
+       u32 bytes;
+};
+
+#endif
index 7c2bb27..a765333 100644 (file)
@@ -238,9 +238,6 @@ struct ccp_xts_aes_engine {
 };
 
 /***** SHA engine *****/
-#define CCP_SHA_BLOCKSIZE               SHA256_BLOCK_SIZE
-#define CCP_SHA_CTXSIZE                 SHA256_DIGEST_SIZE
-
 /**
  * ccp_sha_type - type of SHA operation
  *
index 4f7d8f4..34a0dc1 100644 (file)
@@ -29,7 +29,9 @@
  *                     Returns the number of lower random bytes in "data".
  *                     Must not be NULL.    *OBSOLETE*
  * @read:              New API. drivers can fill up to max bytes of data
- *                     into the buffer. The buffer is aligned for any type.
+ *                     into the buffer. The buffer is aligned for any type
+ *                     and max is guaranteed to be >= to that alignment
+ *                     (either 4 or 8 depending on architecture).
  * @priv:              Private data, for use by the RNG driver.
  * @quality:           Estimation of true entropy in RNG's bitstream
  *                     (per mill).