arm64/crypto: GHASH secure hash using ARMv8 Crypto Extensions
authorArd Biesheuvel <ard.biesheuvel@linaro.org>
Wed, 26 Mar 2014 19:53:05 +0000 (20:53 +0100)
committerArd Biesheuvel <ard.biesheuvel@linaro.org>
Wed, 14 May 2014 17:04:07 +0000 (10:04 -0700)
This is a port to ARMv8 (Crypto Extensions) of the Intel implementation of the
GHASH Secure Hash (used in the Galois/Counter chaining mode). It relies on the
optional PMULL/PMULL2 instruction (polynomial multiply long, what Intel call
carry-less multiply).

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
arch/arm64/crypto/Kconfig
arch/arm64/crypto/Makefile
arch/arm64/crypto/ghash-ce-core.S [new file with mode: 0644]
arch/arm64/crypto/ghash-ce-glue.c [new file with mode: 0644]

index eb1e997..0c50859 100644 (file)
@@ -18,4 +18,10 @@ config CRYPTO_SHA2_ARM64_CE
        depends on ARM64 && KERNEL_MODE_NEON
        select CRYPTO_HASH
 
+
+config CRYPTO_GHASH_ARM64_CE
+       tristate "GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions"
+       depends on ARM64 && KERNEL_MODE_NEON
+       select CRYPTO_HASH
+
 endif
index 0b3885a..e8c81a0 100644 (file)
@@ -13,3 +13,6 @@ sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o
 
 obj-$(CONFIG_CRYPTO_SHA2_ARM64_CE) += sha2-ce.o
 sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o
+
+obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
+ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
new file mode 100644 (file)
index 0000000..b9e6eaf
--- /dev/null
@@ -0,0 +1,95 @@
+/*
+ * Accelerated GHASH implementation with ARMv8 PMULL instructions.
+ *
+ * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * Based on arch/x86/crypto/ghash-pmullni-intel_asm.S
+ *
+ * Copyright (c) 2009 Intel Corp.
+ *   Author: Huang Ying <ying.huang@intel.com>
+ *           Vinodh Gopal
+ *           Erdinc Ozturk
+ *           Deniz Karakoyunlu
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+       DATA    .req    v0
+       SHASH   .req    v1
+       IN1     .req    v2
+       T1      .req    v2
+       T2      .req    v3
+       T3      .req    v4
+       VZR     .req    v5
+
+       .text
+       .arch           armv8-a+crypto
+
+       /*
+        * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+        *                         struct ghash_key const *k, const char *head)
+        */
+ENTRY(pmull_ghash_update)
+       ld1             {DATA.16b}, [x1]
+       ld1             {SHASH.16b}, [x3]
+       eor             VZR.16b, VZR.16b, VZR.16b
+
+       /* do the head block first, if supplied */
+       cbz             x4, 0f
+       ld1             {IN1.2d}, [x4]
+       b               1f
+
+0:     ld1             {IN1.2d}, [x2], #16
+       sub             w0, w0, #1
+1:     ext             IN1.16b, IN1.16b, IN1.16b, #8
+CPU_LE(        rev64           IN1.16b, IN1.16b        )
+       eor             DATA.16b, DATA.16b, IN1.16b
+
+       /* multiply DATA by SHASH in GF(2^128) */
+       ext             T2.16b, DATA.16b, DATA.16b, #8
+       ext             T3.16b, SHASH.16b, SHASH.16b, #8
+       eor             T2.16b, T2.16b, DATA.16b
+       eor             T3.16b, T3.16b, SHASH.16b
+
+       pmull2          T1.1q, SHASH.2d, DATA.2d        // a1 * b1
+       pmull           DATA.1q, SHASH.1d, DATA.1d      // a0 * b0
+       pmull           T2.1q, T2.1d, T3.1d             // (a1 + a0)(b1 + b0)
+       eor             T2.16b, T2.16b, T1.16b          // (a0 * b1) + (a1 * b0)
+       eor             T2.16b, T2.16b, DATA.16b
+
+       ext             T3.16b, VZR.16b, T2.16b, #8
+       ext             T2.16b, T2.16b, VZR.16b, #8
+       eor             DATA.16b, DATA.16b, T3.16b
+       eor             T1.16b, T1.16b, T2.16b  // <T1:DATA> is result of
+                                               // carry-less multiplication
+
+       /* first phase of the reduction */
+       shl             T3.2d, DATA.2d, #1
+       eor             T3.16b, T3.16b, DATA.16b
+       shl             T3.2d, T3.2d, #5
+       eor             T3.16b, T3.16b, DATA.16b
+       shl             T3.2d, T3.2d, #57
+       ext             T2.16b, VZR.16b, T3.16b, #8
+       ext             T3.16b, T3.16b, VZR.16b, #8
+       eor             DATA.16b, DATA.16b, T2.16b
+       eor             T1.16b, T1.16b, T3.16b
+
+       /* second phase of the reduction */
+       ushr            T2.2d, DATA.2d, #5
+       eor             T2.16b, T2.16b, DATA.16b
+       ushr            T2.2d, T2.2d, #1
+       eor             T2.16b, T2.16b, DATA.16b
+       ushr            T2.2d, T2.2d, #1
+       eor             T1.16b, T1.16b, T2.16b
+       eor             DATA.16b, DATA.16b, T1.16b
+
+       cbnz            w0, 0b
+
+       st1             {DATA.16b}, [x1]
+       ret
+ENDPROC(pmull_ghash_update)
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
new file mode 100644 (file)
index 0000000..b92baf3
--- /dev/null
@@ -0,0 +1,155 @@
+/*
+ * Accelerated GHASH implementation with ARMv8 PMULL instructions.
+ *
+ * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/hash.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+#define GHASH_BLOCK_SIZE       16
+#define GHASH_DIGEST_SIZE      16
+
+struct ghash_key {
+       u64 a;
+       u64 b;
+};
+
+struct ghash_desc_ctx {
+       u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)];
+       u8 buf[GHASH_BLOCK_SIZE];
+       u32 count;
+};
+
+asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+                                  struct ghash_key const *k, const char *head);
+
+static int ghash_init(struct shash_desc *desc)
+{
+       struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+
+       *ctx = (struct ghash_desc_ctx){};
+       return 0;
+}
+
+static int ghash_update(struct shash_desc *desc, const u8 *src,
+                       unsigned int len)
+{
+       struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+       unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
+
+       ctx->count += len;
+
+       if ((partial + len) >= GHASH_BLOCK_SIZE) {
+               struct ghash_key *key = crypto_shash_ctx(desc->tfm);
+               int blocks;
+
+               if (partial) {
+                       int p = GHASH_BLOCK_SIZE - partial;
+
+                       memcpy(ctx->buf + partial, src, p);
+                       src += p;
+                       len -= p;
+               }
+
+               blocks = len / GHASH_BLOCK_SIZE;
+               len %= GHASH_BLOCK_SIZE;
+
+               kernel_neon_begin_partial(6);
+               pmull_ghash_update(blocks, ctx->digest, src, key,
+                                  partial ? ctx->buf : NULL);
+               kernel_neon_end();
+               src += blocks * GHASH_BLOCK_SIZE;
+       }
+       if (len)
+               memcpy(ctx->buf + partial, src, len);
+       return 0;
+}
+
+static int ghash_final(struct shash_desc *desc, u8 *dst)
+{
+       struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+       unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
+
+       if (partial) {
+               struct ghash_key *key = crypto_shash_ctx(desc->tfm);
+
+               memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
+
+               kernel_neon_begin_partial(6);
+               pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL);
+               kernel_neon_end();
+       }
+       put_unaligned_be64(ctx->digest[1], dst);
+       put_unaligned_be64(ctx->digest[0], dst + 8);
+
+       *ctx = (struct ghash_desc_ctx){};
+       return 0;
+}
+
+static int ghash_setkey(struct crypto_shash *tfm,
+                       const u8 *inkey, unsigned int keylen)
+{
+       struct ghash_key *key = crypto_shash_ctx(tfm);
+       u64 a, b;
+
+       if (keylen != GHASH_BLOCK_SIZE) {
+               crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+               return -EINVAL;
+       }
+
+       /* perform multiplication by 'x' in GF(2^128) */
+       b = get_unaligned_be64(inkey);
+       a = get_unaligned_be64(inkey + 8);
+
+       key->a = (a << 1) | (b >> 63);
+       key->b = (b << 1) | (a >> 63);
+
+       if (b >> 63)
+               key->b ^= 0xc200000000000000UL;
+
+       return 0;
+}
+
+static struct shash_alg ghash_alg = {
+       .digestsize     = GHASH_DIGEST_SIZE,
+       .init           = ghash_init,
+       .update         = ghash_update,
+       .final          = ghash_final,
+       .setkey         = ghash_setkey,
+       .descsize       = sizeof(struct ghash_desc_ctx),
+       .base           = {
+               .cra_name               = "ghash",
+               .cra_driver_name        = "ghash-ce",
+               .cra_priority           = 200,
+               .cra_flags              = CRYPTO_ALG_TYPE_SHASH,
+               .cra_blocksize          = GHASH_BLOCK_SIZE,
+               .cra_ctxsize            = sizeof(struct ghash_key),
+               .cra_module             = THIS_MODULE,
+       },
+};
+
+static int __init ghash_ce_mod_init(void)
+{
+       return crypto_register_shash(&ghash_alg);
+}
+
+static void __exit ghash_ce_mod_exit(void)
+{
+       crypto_unregister_shash(&ghash_alg);
+}
+
+module_cpu_feature_match(PMULL, ghash_ce_mod_init);
+module_exit(ghash_ce_mod_exit);