arch/arm/lib/div64.S

   1 /*
   2  *  linux/arch/arm/lib/div64.S
   3  *
   4  *  Optimized computation of 64-bit dividend / 32-bit divisor
   5  *
   6  *  Author:     Nicolas Pitre
   7  *  Created:    Oct 5, 2003
   8  *  Copyright:  Monta Vista Software, Inc.
   9  *
  10  *  This program is free software; you can redistribute it and/or modify
  11  *  it under the terms of the GNU General Public License version 2 as
  12  *  published by the Free Software Foundation.
  13  */
  14
  15 #include <linux/linkage.h>
  16 #include <asm/assembler.h>
  17 #include <asm/unwind.h>
  18 #include <asm/export.h>
  19
  20 #ifdef __ARMEB__
  21 #define xh r0
  22 #define xl r1
  23 #define yh r2
  24 #define yl r3
  25 #else
  26 #define xl r0
  27 #define xh r1
  28 #define yl r2
  29 #define yh r3
  30 #endif
  31
  32 /*
  33  * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
  34  *
  35  * Note: Calling convention is totally non standard for optimal code.
  36  *       This is meant to be used by do_div() from include/asm/div64.h only.
  37  *
  38  * Input parameters:
  39  *      xh-xl   = dividend (clobbered)
  40  *      r4      = divisor (preserved)
  41  *
  42  * Output values:
  43  *      yh-yl   = result
  44  *      xh      = remainder
  45  *
  46  * Clobbered regs: xl, ip
  47  */
  48
  49 ENTRY(__do_div64)
  50 UNWIND(.fnstart)
  51
  52         @ Test for easy paths first.
  53         subs    ip, r4, #1
  54         bls     9f                      @ divisor is 0 or 1
  55         tst     ip, r4
  56         beq     8f                      @ divisor is power of 2
  57
  58         @ See if we need to handle upper 32-bit result.
  59         cmp     xh, r4
  60         mov     yh, #0
  61         blo     3f
  62
  63         @ Align divisor with upper part of dividend.
  64         @ The aligned divisor is stored in yl preserving the original.
  65         @ The bit position is stored in ip.
  66
  67 #if __LINUX_ARM_ARCH__ >= 5
  68
  69         clz     yl, r4
  70         clz     ip, xh
  71         sub     yl, yl, ip
  72         mov     ip, #1
  73         mov     ip, ip, lsl yl
  74         mov     yl, r4, lsl yl
  75
  76 #else
  77
  78         mov     yl, r4
  79         mov     ip, #1
  80 1:      cmp     yl, #0x80000000
  81         cmpcc   yl, xh
  82         movcc   yl, yl, lsl #1
  83         movcc   ip, ip, lsl #1
  84         bcc     1b
  85
  86 #endif
  87
  88         @ The division loop for needed upper bit positions.
  89         @ Break out early if dividend reaches 0.
  90 2:      cmp     xh, yl
  91         orrcs   yh, yh, ip
  92         subcss  xh, xh, yl
  93         movnes  ip, ip, lsr #1
  94         mov     yl, yl, lsr #1
  95         bne     2b
  96
  97         @ See if we need to handle lower 32-bit result.
  98 3:      cmp     xh, #0
  99         mov     yl, #0
 100         cmpeq   xl, r4
 101         movlo   xh, xl
 102         retlo   lr
 103
 104         @ The division loop for lower bit positions.
 105         @ Here we shift remainer bits leftwards rather than moving the
 106         @ divisor for comparisons, considering the carry-out bit as well.
 107         mov     ip, #0x80000000
 108 4:      movs    xl, xl, lsl #1
 109         adcs    xh, xh, xh
 110         beq     6f
 111         cmpcc   xh, r4
 112 5:      orrcs   yl, yl, ip
 113         subcs   xh, xh, r4
 114         movs    ip, ip, lsr #1
 115         bne     4b
 116         ret     lr
 117
 118         @ The top part of remainder became zero.  If carry is set
 119         @ (the 33th bit) this is a false positive so resume the loop.
 120         @ Otherwise, if lower part is also null then we are done.
 121 6:      bcs     5b
 122         cmp     xl, #0
 123         reteq   lr
 124
 125         @ We still have remainer bits in the low part.  Bring them up.
 126
 127 #if __LINUX_ARM_ARCH__ >= 5
 128
 129         clz     xh, xl                  @ we know xh is zero here so...
 130         add     xh, xh, #1
 131         mov     xl, xl, lsl xh
 132         mov     ip, ip, lsr xh
 133
 134 #else
 135
 136 7:      movs    xl, xl, lsl #1
 137         mov     ip, ip, lsr #1
 138         bcc     7b
 139
 140 #endif
 141
 142         @ Current remainder is now 1.  It is worthless to compare with
 143         @ divisor at this point since divisor can not be smaller than 3 here.
 144         @ If possible, branch for another shift in the division loop.
 145         @ If no bit position left then we are done.
 146         movs    ip, ip, lsr #1
 147         mov     xh, #1
 148         bne     4b
 149         ret     lr
 150
 151 8:      @ Division by a power of 2: determine what that divisor order is
 152         @ then simply shift values around
 153
 154 #if __LINUX_ARM_ARCH__ >= 5
 155
 156         clz     ip, r4
 157         rsb     ip, ip, #31
 158
 159 #else
 160
 161         mov     yl, r4
 162         cmp     r4, #(1 << 16)
 163         mov     ip, #0
 164         movhs   yl, yl, lsr #16
 165         movhs   ip, #16
 166
 167         cmp     yl, #(1 << 8)
 168         movhs   yl, yl, lsr #8
 169         addhs   ip, ip, #8
 170
 171         cmp     yl, #(1 << 4)
 172         movhs   yl, yl, lsr #4
 173         addhs   ip, ip, #4
 174
 175         cmp     yl, #(1 << 2)
 176         addhi   ip, ip, #3
 177         addls   ip, ip, yl, lsr #1
 178
 179 #endif
 180
 181         mov     yh, xh, lsr ip
 182         mov     yl, xl, lsr ip
 183         rsb     ip, ip, #32
 184  ARM(   orr     yl, yl, xh, lsl ip      )
 185  THUMB( lsl     xh, xh, ip              )
 186  THUMB( orr     yl, yl, xh              )
 187         mov     xh, xl, lsl ip
 188         mov     xh, xh, lsr ip
 189         ret     lr
 190
 191         @ eq -> division by 1: obvious enough...
 192 9:      moveq   yl, xl
 193         moveq   yh, xh
 194         moveq   xh, #0
 195         reteq   lr
 196 UNWIND(.fnend)
 197
 198 UNWIND(.fnstart)
 199 UNWIND(.pad #4)
 200 UNWIND(.save {lr})
 201 Ldiv0_64:
 202         @ Division by 0:
 203         str     lr, [sp, #-8]!
 204         bl      __div0
 205
 206         @ as wrong as it could be...
 207         mov     yl, #0
 208         mov     yh, #0
 209         mov     xh, #0
 210         ldr     pc, [sp], #8
 211
 212 UNWIND(.fnend)
 213 ENDPROC(__do_div64)
 214 EXPORT_SYMBOL(__do_div64)