Merge branch 'for-linville' of git://github.com/kvalo/ath
[cascardo/linux.git] / arch / arm / lib / csumpartialcopygeneric.S
1 /*
2  *  linux/arch/arm/lib/csumpartialcopygeneric.S
3  *
4  *  Copyright (C) 1995-2001 Russell King
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10 #include <asm/assembler.h>
11
12 /*
13  * unsigned int
14  * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
15  *  r0 = src, r1 = dst, r2 = len, r3 = sum
16  *  Returns : r0 = checksum
17  *
18  * Note that 'tst' and 'teq' preserve the carry flag.
19  */
20
21 src     .req    r0
22 dst     .req    r1
23 len     .req    r2
24 sum     .req    r3
25
26 .Lzero:         mov     r0, sum
27                 load_regs
28
29                 /*
30                  * Align an unaligned destination pointer.  We know that
31                  * we have >= 8 bytes here, so we don't need to check
32                  * the length.  Note that the source pointer hasn't been
33                  * aligned yet.
34                  */
35 .Ldst_unaligned:
36                 tst     dst, #1
37                 beq     .Ldst_16bit
38
39                 load1b  ip
40                 sub     len, len, #1
41                 adcs    sum, sum, ip, put_byte_1        @ update checksum
42                 strb    ip, [dst], #1
43                 tst     dst, #2
44                 reteq   lr                      @ dst is now 32bit aligned
45
46 .Ldst_16bit:    load2b  r8, ip
47                 sub     len, len, #2
48                 adcs    sum, sum, r8, put_byte_0
49                 strb    r8, [dst], #1
50                 adcs    sum, sum, ip, put_byte_1
51                 strb    ip, [dst], #1
52                 ret     lr                      @ dst is now 32bit aligned
53
54                 /*
55                  * Handle 0 to 7 bytes, with any alignment of source and
56                  * destination pointers.  Note that when we get here, C = 0
57                  */
58 .Lless8:        teq     len, #0                 @ check for zero count
59                 beq     .Lzero
60
61                 /* we must have at least one byte. */
62                 tst     dst, #1                 @ dst 16-bit aligned
63                 beq     .Lless8_aligned
64
65                 /* Align dst */
66                 load1b  ip
67                 sub     len, len, #1
68                 adcs    sum, sum, ip, put_byte_1        @ update checksum
69                 strb    ip, [dst], #1
70                 tst     len, #6
71                 beq     .Lless8_byteonly
72
73 1:              load2b  r8, ip
74                 sub     len, len, #2
75                 adcs    sum, sum, r8, put_byte_0
76                 strb    r8, [dst], #1
77                 adcs    sum, sum, ip, put_byte_1
78                 strb    ip, [dst], #1
79 .Lless8_aligned:
80                 tst     len, #6
81                 bne     1b
82 .Lless8_byteonly:
83                 tst     len, #1
84                 beq     .Ldone
85                 load1b  r8
86                 adcs    sum, sum, r8, put_byte_0        @ update checksum
87                 strb    r8, [dst], #1
88                 b       .Ldone
89
90 FN_ENTRY
91                 save_regs
92
93                 cmp     len, #8                 @ Ensure that we have at least
94                 blo     .Lless8                 @ 8 bytes to copy.
95
96                 adds    sum, sum, #0            @ C = 0
97                 tst     dst, #3                 @ Test destination alignment
98                 blne    .Ldst_unaligned         @ align destination, return here
99
100                 /*
101                  * Ok, the dst pointer is now 32bit aligned, and we know
102                  * that we must have more than 4 bytes to copy.  Note
103                  * that C contains the carry from the dst alignment above.
104                  */
105
106                 tst     src, #3                 @ Test source alignment
107                 bne     .Lsrc_not_aligned
108
109                 /* Routine for src & dst aligned */
110
111                 bics    ip, len, #15
112                 beq     2f
113
114 1:              load4l  r4, r5, r6, r7
115                 stmia   dst!, {r4, r5, r6, r7}
116                 adcs    sum, sum, r4
117                 adcs    sum, sum, r5
118                 adcs    sum, sum, r6
119                 adcs    sum, sum, r7
120                 sub     ip, ip, #16
121                 teq     ip, #0
122                 bne     1b
123
124 2:              ands    ip, len, #12
125                 beq     4f
126                 tst     ip, #8
127                 beq     3f
128                 load2l  r4, r5
129                 stmia   dst!, {r4, r5}
130                 adcs    sum, sum, r4
131                 adcs    sum, sum, r5
132                 tst     ip, #4
133                 beq     4f
134
135 3:              load1l  r4
136                 str     r4, [dst], #4
137                 adcs    sum, sum, r4
138
139 4:              ands    len, len, #3
140                 beq     .Ldone
141                 load1l  r4
142                 tst     len, #2
143                 mov     r5, r4, get_byte_0
144                 beq     .Lexit
145                 adcs    sum, sum, r4, lspush #16
146                 strb    r5, [dst], #1
147                 mov     r5, r4, get_byte_1
148                 strb    r5, [dst], #1
149                 mov     r5, r4, get_byte_2
150 .Lexit:         tst     len, #1
151                 strneb  r5, [dst], #1
152                 andne   r5, r5, #255
153                 adcnes  sum, sum, r5, put_byte_0
154
155                 /*
156                  * If the dst pointer was not 16-bit aligned, we
157                  * need to rotate the checksum here to get around
158                  * the inefficient byte manipulations in the
159                  * architecture independent code.
160                  */
161 .Ldone:         adc     r0, sum, #0
162                 ldr     sum, [sp, #0]           @ dst
163                 tst     sum, #1
164                 movne   r0, r0, ror #8
165                 load_regs
166
167 .Lsrc_not_aligned:
168                 adc     sum, sum, #0            @ include C from dst alignment
169                 and     ip, src, #3
170                 bic     src, src, #3
171                 load1l  r5
172                 cmp     ip, #2
173                 beq     .Lsrc2_aligned
174                 bhi     .Lsrc3_aligned
175                 mov     r4, r5, lspull #8               @ C = 0
176                 bics    ip, len, #15
177                 beq     2f
178 1:              load4l  r5, r6, r7, r8
179                 orr     r4, r4, r5, lspush #24
180                 mov     r5, r5, lspull #8
181                 orr     r5, r5, r6, lspush #24
182                 mov     r6, r6, lspull #8
183                 orr     r6, r6, r7, lspush #24
184                 mov     r7, r7, lspull #8
185                 orr     r7, r7, r8, lspush #24
186                 stmia   dst!, {r4, r5, r6, r7}
187                 adcs    sum, sum, r4
188                 adcs    sum, sum, r5
189                 adcs    sum, sum, r6
190                 adcs    sum, sum, r7
191                 mov     r4, r8, lspull #8
192                 sub     ip, ip, #16
193                 teq     ip, #0
194                 bne     1b
195 2:              ands    ip, len, #12
196                 beq     4f
197                 tst     ip, #8
198                 beq     3f
199                 load2l  r5, r6
200                 orr     r4, r4, r5, lspush #24
201                 mov     r5, r5, lspull #8
202                 orr     r5, r5, r6, lspush #24
203                 stmia   dst!, {r4, r5}
204                 adcs    sum, sum, r4
205                 adcs    sum, sum, r5
206                 mov     r4, r6, lspull #8
207                 tst     ip, #4
208                 beq     4f
209 3:              load1l  r5
210                 orr     r4, r4, r5, lspush #24
211                 str     r4, [dst], #4
212                 adcs    sum, sum, r4
213                 mov     r4, r5, lspull #8
214 4:              ands    len, len, #3
215                 beq     .Ldone
216                 mov     r5, r4, get_byte_0
217                 tst     len, #2
218                 beq     .Lexit
219                 adcs    sum, sum, r4, lspush #16
220                 strb    r5, [dst], #1
221                 mov     r5, r4, get_byte_1
222                 strb    r5, [dst], #1
223                 mov     r5, r4, get_byte_2
224                 b       .Lexit
225
226 .Lsrc2_aligned: mov     r4, r5, lspull #16
227                 adds    sum, sum, #0
228                 bics    ip, len, #15
229                 beq     2f
230 1:              load4l  r5, r6, r7, r8
231                 orr     r4, r4, r5, lspush #16
232                 mov     r5, r5, lspull #16
233                 orr     r5, r5, r6, lspush #16
234                 mov     r6, r6, lspull #16
235                 orr     r6, r6, r7, lspush #16
236                 mov     r7, r7, lspull #16
237                 orr     r7, r7, r8, lspush #16
238                 stmia   dst!, {r4, r5, r6, r7}
239                 adcs    sum, sum, r4
240                 adcs    sum, sum, r5
241                 adcs    sum, sum, r6
242                 adcs    sum, sum, r7
243                 mov     r4, r8, lspull #16
244                 sub     ip, ip, #16
245                 teq     ip, #0
246                 bne     1b
247 2:              ands    ip, len, #12
248                 beq     4f
249                 tst     ip, #8
250                 beq     3f
251                 load2l  r5, r6
252                 orr     r4, r4, r5, lspush #16
253                 mov     r5, r5, lspull #16
254                 orr     r5, r5, r6, lspush #16
255                 stmia   dst!, {r4, r5}
256                 adcs    sum, sum, r4
257                 adcs    sum, sum, r5
258                 mov     r4, r6, lspull #16
259                 tst     ip, #4
260                 beq     4f
261 3:              load1l  r5
262                 orr     r4, r4, r5, lspush #16
263                 str     r4, [dst], #4
264                 adcs    sum, sum, r4
265                 mov     r4, r5, lspull #16
266 4:              ands    len, len, #3
267                 beq     .Ldone
268                 mov     r5, r4, get_byte_0
269                 tst     len, #2
270                 beq     .Lexit
271                 adcs    sum, sum, r4
272                 strb    r5, [dst], #1
273                 mov     r5, r4, get_byte_1
274                 strb    r5, [dst], #1
275                 tst     len, #1
276                 beq     .Ldone
277                 load1b  r5
278                 b       .Lexit
279
280 .Lsrc3_aligned: mov     r4, r5, lspull #24
281                 adds    sum, sum, #0
282                 bics    ip, len, #15
283                 beq     2f
284 1:              load4l  r5, r6, r7, r8
285                 orr     r4, r4, r5, lspush #8
286                 mov     r5, r5, lspull #24
287                 orr     r5, r5, r6, lspush #8
288                 mov     r6, r6, lspull #24
289                 orr     r6, r6, r7, lspush #8
290                 mov     r7, r7, lspull #24
291                 orr     r7, r7, r8, lspush #8
292                 stmia   dst!, {r4, r5, r6, r7}
293                 adcs    sum, sum, r4
294                 adcs    sum, sum, r5
295                 adcs    sum, sum, r6
296                 adcs    sum, sum, r7
297                 mov     r4, r8, lspull #24
298                 sub     ip, ip, #16
299                 teq     ip, #0
300                 bne     1b
301 2:              ands    ip, len, #12
302                 beq     4f
303                 tst     ip, #8
304                 beq     3f
305                 load2l  r5, r6
306                 orr     r4, r4, r5, lspush #8
307                 mov     r5, r5, lspull #24
308                 orr     r5, r5, r6, lspush #8
309                 stmia   dst!, {r4, r5}
310                 adcs    sum, sum, r4
311                 adcs    sum, sum, r5
312                 mov     r4, r6, lspull #24
313                 tst     ip, #4
314                 beq     4f
315 3:              load1l  r5
316                 orr     r4, r4, r5, lspush #8
317                 str     r4, [dst], #4
318                 adcs    sum, sum, r4
319                 mov     r4, r5, lspull #24
320 4:              ands    len, len, #3
321                 beq     .Ldone
322                 mov     r5, r4, get_byte_0
323                 tst     len, #2
324                 beq     .Lexit
325                 strb    r5, [dst], #1
326                 adcs    sum, sum, r4
327                 load1l  r4
328                 mov     r5, r4, get_byte_0
329                 strb    r5, [dst], #1
330                 adcs    sum, sum, r4, lspush #24
331                 mov     r5, r4, get_byte_1
332                 b       .Lexit
333 FN_EXIT