lib/GCD.c: use binary GCD algorithm instead of Euclidean

author Zhaoxiu Zeng <zhaoxiu.zeng@gmail.com>

Sat, 21 May 2016 00:03:57 +0000 (17:03 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 21 May 2016 00:58:30 +0000 (17:58 -0700)
author Zhaoxiu Zeng <zhaoxiu.zeng@gmail.com>
Sat, 21 May 2016 00:03:57 +0000 (17:03 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 21 May 2016 00:58:30 +0000 (17:58 -0700)
diff --git a/arch/Kconfig b/arch/Kconfig

index 8f84fd2..b16e74e 100644 (file)
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -647,4 +647,7 @@ config COMPAT_OLD_SIGACTION
  config ARCH_NO_COHERENT_DMA_MMAP
         bool
  
+config CPU_NO_EFFICIENT_FFS
+       def_bool n
+
  source "kernel/gcov/Kconfig"
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig

index fe99f89..7f312d8 100644 (file)
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -26,6 +26,7 @@ config ALPHA
         select MODULES_USE_ELF_RELA
         select ODD_RT_SIGACTION
         select OLD_SIGSUSPEND
+       select CPU_NO_EFFICIENT_FFS if !ALPHA_EV67
         help
           The Alpha is a 64-bit general-purpose processor designed and
           marketed by the Digital Equipment Corporation of blessed memory,
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig

index 8894f7e..0dcbacf 100644 (file)
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -107,6 +107,7 @@ choice
  
  config ISA_ARCOMPACT
         bool "ARCompact ISA"
+       select CPU_NO_EFFICIENT_FFS
         help
           The original ARC ISA of ARC600/700 cores
  
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig

index 5534766..cb569b6 100644 (file)
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -421,18 +421,21 @@ config CPU_32v3
         select CPU_USE_DOMAINS if MMU
         select NEED_KUSER_HELPERS
         select TLS_REG_EMUL if SMP || !MMU
+       select CPU_NO_EFFICIENT_FFS
  
  config CPU_32v4
         bool
         select CPU_USE_DOMAINS if MMU
         select NEED_KUSER_HELPERS
         select TLS_REG_EMUL if SMP || !MMU
+       select CPU_NO_EFFICIENT_FFS
  
  config CPU_32v4T
         bool
         select CPU_USE_DOMAINS if MMU
         select NEED_KUSER_HELPERS
         select TLS_REG_EMUL if SMP || !MMU
+       select CPU_NO_EFFICIENT_FFS
  
  config CPU_32v5
         bool
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig

index 986ea84..aa232de 100644 (file)
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -20,6 +20,7 @@ config H8300
         select HAVE_KERNEL_GZIP
         select HAVE_KERNEL_LZO
         select HAVE_ARCH_KGDB
+       select CPU_NO_EFFICIENT_FFS
  
  config RWSEM_GENERIC_SPINLOCK
         def_bool y
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig

index c82b292..3cc8498 100644 (file)
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -17,6 +17,7 @@ config M32R
         select ARCH_USES_GETTIMEOFFSET
         select MODULES_USE_ELF_RELA
         select HAVE_DEBUG_STACKOVERFLOW
+       select CPU_NO_EFFICIENT_FFS
  
  config SBUS
         bool
diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu

index c1beb5a..8ace920 100644 (file)
--- a/arch/m68k/Kconfig.cpu
+++ b/arch/m68k/Kconfig.cpu
@@ -40,6 +40,7 @@ config M68000
         select CPU_HAS_NO_MULDIV64
         select CPU_HAS_NO_UNALIGNED
         select GENERIC_CSUM
+       select CPU_NO_EFFICIENT_FFS
         help
           The Freescale (was Motorola) 68000 CPU is the first generation of
           the well known M68K family of processors. The CPU core as well as
@@ -51,6 +52,7 @@ config MCPU32
         bool
         select CPU_HAS_NO_BITFIELDS
         select CPU_HAS_NO_UNALIGNED
+       select CPU_NO_EFFICIENT_FFS
         help
           The Freescale (was then Motorola) CPU32 is a CPU core that is
           based on the 68020 processor. For the most part it is used in
@@ -130,6 +132,7 @@ config M5206
         depends on !MMU
         select COLDFIRE_SW_A7
         select HAVE_MBAR
+       select CPU_NO_EFFICIENT_FFS
         help
           Motorola ColdFire 5206 processor support.
  
@@ -138,6 +141,7 @@ config M5206e
         depends on !MMU
         select COLDFIRE_SW_A7
         select HAVE_MBAR
+       select CPU_NO_EFFICIENT_FFS
         help
           Motorola ColdFire 5206e processor support.
  
@@ -163,6 +167,7 @@ config M5249
         depends on !MMU
         select COLDFIRE_SW_A7
         select HAVE_MBAR
+       select CPU_NO_EFFICIENT_FFS
         help
           Motorola ColdFire 5249 processor support.
  
@@ -171,6 +176,7 @@ config M525x
         depends on !MMU
         select COLDFIRE_SW_A7
         select HAVE_MBAR
+       select CPU_NO_EFFICIENT_FFS
         help
           Freescale (Motorola) Coldfire 5251/5253 processor support.
  
@@ -189,6 +195,7 @@ config M5272
         depends on !MMU
         select COLDFIRE_SW_A7
         select HAVE_MBAR
+       select CPU_NO_EFFICIENT_FFS
         help
           Motorola ColdFire 5272 processor support.
  
@@ -217,6 +224,7 @@ config M5307
         select COLDFIRE_SW_A7
         select HAVE_CACHE_CB
         select HAVE_MBAR
+       select CPU_NO_EFFICIENT_FFS
         help
           Motorola ColdFire 5307 processor support.
  
@@ -242,6 +250,7 @@ config M5407
         select COLDFIRE_SW_A7
         select HAVE_CACHE_CB
         select HAVE_MBAR
+       select CPU_NO_EFFICIENT_FFS
         help
           Motorola ColdFire 5407 processor support.
  
@@ -251,6 +260,7 @@ config M547x
         select MMU_COLDFIRE if MMU
         select HAVE_CACHE_CB
         select HAVE_MBAR
+       select CPU_NO_EFFICIENT_FFS
         help
           Freescale ColdFire 5470/5471/5472/5473/5474/5475 processor support.
  
@@ -260,6 +270,7 @@ config M548x
         select M54xx
         select HAVE_CACHE_CB
         select HAVE_MBAR
+       select CPU_NO_EFFICIENT_FFS
         help
           Freescale ColdFire 5480/5481/5482/5483/5484/5485 processor support.
  
diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig

index e47a08d..5b7a45d 100644 (file)
--- a/arch/metag/Kconfig
+++ b/arch/metag/Kconfig
@@ -30,6 +30,7 @@ config METAG
         select OF
         select OF_EARLY_FLATTREE
         select SPARSE_IRQ
+       select CPU_NO_EFFICIENT_FFS
  
  config STACKTRACE_SUPPORT
         def_bool y
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig

index 3d793b5..f17c3a4 100644 (file)
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -32,6 +32,7 @@ config MICROBLAZE
         select OF_EARLY_FLATTREE
         select TRACING_SUPPORT
         select VIRT_TO_BUS
+       select CPU_NO_EFFICIENT_FFS
  
  config SWAP
         def_bool n
diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h

index e6f19fc..e961c8a 100644 (file)
--- a/arch/mips/include/asm/cpu-features.h
+++ b/arch/mips/include/asm/cpu-features.h
@@ -204,6 +204,16 @@
  #endif
  #endif
  
+/* __builtin_constant_p(cpu_has_mips_r) && cpu_has_mips_r */
+#if !((defined(cpu_has_mips32r1) && cpu_has_mips32r1) || \
+         (defined(cpu_has_mips32r2) && cpu_has_mips32r2) || \
+         (defined(cpu_has_mips32r6) && cpu_has_mips32r6) || \
+         (defined(cpu_has_mips64r1) && cpu_has_mips64r1) || \
+         (defined(cpu_has_mips64r2) && cpu_has_mips64r2) || \
+         (defined(cpu_has_mips64r6) && cpu_has_mips64r6))
+#define CPU_NO_EFFICIENT_FFS 1
+#endif
+
  #ifndef cpu_has_mips_1
  # define cpu_has_mips_1                (!cpu_has_mips_r6)
  #endif
diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig

index 87ca653..51a56c8 100644 (file)
--- a/arch/nios2/Kconfig
+++ b/arch/nios2/Kconfig
@@ -15,6 +15,7 @@ config NIOS2
         select SOC_BUS
         select SPARSE_IRQ
         select USB_ARCH_HAS_HCD if USB_SUPPORT
+       select CPU_NO_EFFICIENT_FFS
  
  config GENERIC_CSUM
         def_bool y
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig

index e118c02..142cb05 100644 (file)
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -25,6 +25,7 @@ config OPENRISC
         select MODULES_USE_ELF_RELA
         select HAVE_DEBUG_STACKOVERFLOW
         select OR1K_PIC
+       select CPU_NO_EFFICIENT_FFS if !OPENRISC_HAVE_INST_FF1
  
  config MMU
         def_bool y
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig

index 88cfaa8..3d498a6 100644 (file)
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -32,6 +32,7 @@ config PARISC
         select HAVE_ARCH_AUDITSYSCALL
         select HAVE_ARCH_SECCOMP_FILTER
         select ARCH_NO_COHERENT_DMA_MMAP
+       select CPU_NO_EFFICIENT_FFS
  
         help
           The PA-RISC microprocessor is designed by Hewlett-Packard and used
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig

index 1c3c43d..a8c2590 100644 (file)
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -123,6 +123,7 @@ config S390
         select HAVE_ARCH_AUDITSYSCALL
         select HAVE_ARCH_EARLY_PFN_TO_NID
         select HAVE_ARCH_JUMP_LABEL
+       select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES
         select HAVE_ARCH_SECCOMP_FILTER
         select HAVE_ARCH_SOFT_DIRTY
         select HAVE_ARCH_TRACEHOOK
diff --git a/arch/score/Kconfig b/arch/score/Kconfig

index 366e1b5..507d631 100644 (file)
--- a/arch/score/Kconfig
+++ b/arch/score/Kconfig
@@ -14,6 +14,7 @@ config SCORE
         select VIRT_TO_BUS
         select MODULES_USE_ELF_REL
         select CLONE_BACKWARDS
+       select CPU_NO_EFFICIENT_FFS
  
  choice
         prompt "System type"
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig

index f625434..e803a83 100644 (file)
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -20,6 +20,7 @@ config SUPERH
         select PERF_USE_VMALLOC
         select HAVE_DEBUG_KMEMLEAK
         select HAVE_KERNEL_GZIP
+       select CPU_NO_EFFICIENT_FFS
         select HAVE_KERNEL_BZIP2
         select HAVE_KERNEL_LZMA
         select HAVE_KERNEL_XZ
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig

index 1012f7f..546293d 100644 (file)
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -42,6 +42,7 @@ config SPARC
         select ODD_RT_SIGACTION
         select OLD_SIGSUSPEND
         select ARCH_HAS_SG_CHAIN
+       select CPU_NO_EFFICIENT_FFS
  
  config SPARC32
         def_bool !64BIT
diff --git a/lib/gcd.c b/lib/gcd.c

index 3657f12..135ee64 100644 (file)
--- a/lib/gcd.c
+++ b/lib/gcd.c
@@ -2,20 +2,77 @@
  #include <linux/gcd.h>
  #include <linux/export.h>
  
-/* Greatest common divisor */
+/*
+ * This implements the binary GCD algorithm. (Often attributed to Stein,
+ * but as Knuth has noted, appears in a first-century Chinese math text.)
+ *
+ * This is faster than the division-based algorithm even on x86, which
+ * has decent hardware division.
+ */
+
+#if !defined(CONFIG_CPU_NO_EFFICIENT_FFS) && !defined(CPU_NO_EFFICIENT_FFS)
+
+/* If __ffs is available, the even/odd algorithm benchmarks slower. */
  unsigned long gcd(unsigned long a, unsigned long b)
  {
-       unsigned long r;
+       unsigned long r = a | b;
+
+       if (!a || !b)
+               return r;
  
-       if (a < b)
-               swap(a, b);
+       b >>= __ffs(b);
+       if (b == 1)
+               return r & -r;
  
-       if (!b)
-               return a;
-       while ((r = a % b) != 0) {
-               a = b;
-               b = r;
+       for (;;) {
+               a >>= __ffs(a);
+               if (a == 1)
+                       return r & -r;
+               if (a == b)
+                       return a << __ffs(r);
+
+               if (a < b)
+                       swap(a, b);
+               a -= b;
         }
-       return b;
  }
+
+#else
+
+/* If normalization is done by loops, the even/odd algorithm is a win. */
+unsigned long gcd(unsigned long a, unsigned long b)
+{
+       unsigned long r = a | b;
+
+       if (!a || !b)
+               return r;
+
+       /* Isolate lsbit of r */
+       r &= -r;
+
+       while (!(b & r))
+               b >>= 1;
+       if (b == r)
+               return r;
+
+       for (;;) {
+               while (!(a & r))
+                       a >>= 1;
+               if (a == r)
+                       return r;
+               if (a == b)
+                       return a;
+
+               if (a < b)
+                       swap(a, b);
+               a -= b;
+               a >>= 1;
+               if (a & r)
+                       a += b;
+               a >>= 1;
+       }
+}
+
+#endif
+
  EXPORT_SYMBOL_GPL(gcd);
author	Zhaoxiu Zeng <zhaoxiu.zeng@gmail.com>
	Sat, 21 May 2016 00:03:57 +0000 (17:03 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 21 May 2016 00:58:30 +0000 (17:58 -0700)
arch/Kconfig		patch \| blob \| history
arch/alpha/Kconfig		patch \| blob \| history
arch/arc/Kconfig		patch \| blob \| history
arch/arm/mm/Kconfig		patch \| blob \| history
arch/h8300/Kconfig		patch \| blob \| history
arch/m32r/Kconfig		patch \| blob \| history
arch/m68k/Kconfig.cpu		patch \| blob \| history
arch/metag/Kconfig		patch \| blob \| history
arch/microblaze/Kconfig		patch \| blob \| history
arch/mips/include/asm/cpu-features.h		patch \| blob \| history
arch/nios2/Kconfig		patch \| blob \| history
arch/openrisc/Kconfig		patch \| blob \| history
arch/parisc/Kconfig		patch \| blob \| history
arch/s390/Kconfig		patch \| blob \| history
arch/score/Kconfig		patch \| blob \| history
arch/sh/Kconfig		patch \| blob \| history
arch/sparc/Kconfig		patch \| blob \| history
lib/gcd.c		patch \| blob \| history