X-Git-Url: http://git.cascardo.info/?p=cascardo%2Flinux.git;a=blobdiff_plain;f=arch%2Farm%2Fcrypto%2Fsha1-armv7-neon.S;h=2468fade49cf3f3d2b14dfa875b06113c8640c99;hp=50013c0e2864d8a24ca7efc6a8da3f1e8b37f9c6;hb=8a1e377e55f2dca5c689926313beeaa8ac2adb22;hpb=b10778a00d40b3d9fdaaf5891e802794781ff71c diff --git a/arch/arm/crypto/sha1-armv7-neon.S b/arch/arm/crypto/sha1-armv7-neon.S index 50013c0e2864..2468fade49cf 100644 --- a/arch/arm/crypto/sha1-armv7-neon.S +++ b/arch/arm/crypto/sha1-armv7-neon.S @@ -9,10 +9,9 @@ */ #include - +#include .syntax unified -.code 32 .fpu neon .text @@ -61,13 +60,13 @@ #define RT3 r12 #define W0 q0 -#define W1 q1 +#define W1 q7 #define W2 q2 #define W3 q3 #define W4 q4 -#define W5 q5 -#define W6 q6 -#define W7 q7 +#define W5 q6 +#define W6 q5 +#define W7 q1 #define tmp0 q8 #define tmp1 q9 @@ -79,6 +78,11 @@ #define qK3 q14 #define qK4 q15 +#ifdef CONFIG_CPU_BIG_ENDIAN +#define ARM_LE(code...) +#else +#define ARM_LE(code...) code +#endif /* Round function macros. */ @@ -150,45 +154,45 @@ #define W_PRECALC_00_15() \ add RWK, sp, #(WK_offs(0)); \ \ - vld1.32 {tmp0, tmp1}, [RDATA]!; \ - vrev32.8 W0, tmp0; /* big => little */ \ - vld1.32 {tmp2, tmp3}, [RDATA]!; \ + vld1.32 {W0, W7}, [RDATA]!; \ + ARM_LE(vrev32.8 W0, W0; ) /* big => little */ \ + vld1.32 {W6, W5}, [RDATA]!; \ vadd.u32 tmp0, W0, curK; \ - vrev32.8 W7, tmp1; /* big => little */ \ - vrev32.8 W6, tmp2; /* big => little */ \ + ARM_LE(vrev32.8 W7, W7; ) /* big => little */ \ + ARM_LE(vrev32.8 W6, W6; ) /* big => little */ \ vadd.u32 tmp1, W7, curK; \ - vrev32.8 W5, tmp3; /* big => little */ \ + ARM_LE(vrev32.8 W5, W5; ) /* big => little */ \ vadd.u32 tmp2, W6, curK; \ vst1.32 {tmp0, tmp1}, [RWK]!; \ vadd.u32 tmp3, W5, curK; \ vst1.32 {tmp2, tmp3}, [RWK]; \ #define WPRECALC_00_15_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vld1.32 {tmp0, tmp1}, [RDATA]!; \ + vld1.32 {W0, W7}, [RDATA]!; \ #define WPRECALC_00_15_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ add RWK, sp, #(WK_offs(0)); \ #define WPRECALC_00_15_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vrev32.8 W0, tmp0; /* big => little */ \ + ARM_LE(vrev32.8 W0, W0; ) /* big => little */ \ #define WPRECALC_00_15_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vld1.32 {tmp2, tmp3}, [RDATA]!; \ + vld1.32 {W6, W5}, [RDATA]!; \ #define WPRECALC_00_15_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ vadd.u32 tmp0, W0, curK; \ #define WPRECALC_00_15_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vrev32.8 W7, tmp1; /* big => little */ \ + ARM_LE(vrev32.8 W7, W7; ) /* big => little */ \ #define WPRECALC_00_15_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vrev32.8 W6, tmp2; /* big => little */ \ + ARM_LE(vrev32.8 W6, W6; ) /* big => little */ \ #define WPRECALC_00_15_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ vadd.u32 tmp1, W7, curK; \ #define WPRECALC_00_15_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vrev32.8 W5, tmp3; /* big => little */ \ + ARM_LE(vrev32.8 W5, W5; ) /* big => little */ \ #define WPRECALC_00_15_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ vadd.u32 tmp2, W6, curK; \