Merge tag 'gcc-plugins-v4.9-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git...
[cascardo/linux.git] / arch / powerpc / lib / copyuser_64.S
1 /*
2  * Copyright (C) 2002 Paul Mackerras, IBM Corp.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version
7  * 2 of the License, or (at your option) any later version.
8  */
9 #include <asm/processor.h>
10 #include <asm/ppc_asm.h>
11 #include <asm/export.h>
12
13 #ifdef __BIG_ENDIAN__
14 #define sLd sld         /* Shift towards low-numbered address. */
15 #define sHd srd         /* Shift towards high-numbered address. */
16 #else
17 #define sLd srd         /* Shift towards low-numbered address. */
18 #define sHd sld         /* Shift towards high-numbered address. */
19 #endif
20
21         .align  7
22 _GLOBAL_TOC(__copy_tofrom_user)
23 BEGIN_FTR_SECTION
24         nop
25 FTR_SECTION_ELSE
26         b       __copy_tofrom_user_power7
27 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
28 _GLOBAL(__copy_tofrom_user_base)
29         /* first check for a whole page copy on a page boundary */
30         cmpldi  cr1,r5,16
31         cmpdi   cr6,r5,4096
32         or      r0,r3,r4
33         neg     r6,r3           /* LS 3 bits = # bytes to 8-byte dest bdry */
34         andi.   r0,r0,4095
35         std     r3,-24(r1)
36         crand   cr0*4+2,cr0*4+2,cr6*4+2
37         std     r4,-16(r1)
38         std     r5,-8(r1)
39         dcbt    0,r4
40         beq     .Lcopy_page_4K
41         andi.   r6,r6,7
42         PPC_MTOCRF(0x01,r5)
43         blt     cr1,.Lshort_copy
44 /* Below we want to nop out the bne if we're on a CPU that has the
45  * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
46  * cleared.
47  * At the time of writing the only CPU that has this combination of bits
48  * set is Power6.
49  */
50 BEGIN_FTR_SECTION
51         nop
52 FTR_SECTION_ELSE
53         bne     .Ldst_unaligned
54 ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
55                     CPU_FTR_UNALIGNED_LD_STD)
56 .Ldst_aligned:
57         addi    r3,r3,-16
58 BEGIN_FTR_SECTION
59         andi.   r0,r4,7
60         bne     .Lsrc_unaligned
61 END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
62         blt     cr1,.Ldo_tail           /* if < 16 bytes to copy */
63         srdi    r0,r5,5
64         cmpdi   cr1,r0,0
65 20:     ld      r7,0(r4)
66 220:    ld      r6,8(r4)
67         addi    r4,r4,16
68         mtctr   r0
69         andi.   r0,r5,0x10
70         beq     22f
71         addi    r3,r3,16
72         addi    r4,r4,-16
73         mr      r9,r7
74         mr      r8,r6
75         beq     cr1,72f
76 21:     ld      r7,16(r4)
77 221:    ld      r6,24(r4)
78         addi    r4,r4,32
79 70:     std     r9,0(r3)
80 270:    std     r8,8(r3)
81 22:     ld      r9,0(r4)
82 222:    ld      r8,8(r4)
83 71:     std     r7,16(r3)
84 271:    std     r6,24(r3)
85         addi    r3,r3,32
86         bdnz    21b
87 72:     std     r9,0(r3)
88 272:    std     r8,8(r3)
89         andi.   r5,r5,0xf
90         beq+    3f
91         addi    r4,r4,16
92 .Ldo_tail:
93         addi    r3,r3,16
94         bf      cr7*4+0,246f
95 244:    ld      r9,0(r4)
96         addi    r4,r4,8
97 245:    std     r9,0(r3)
98         addi    r3,r3,8
99 246:    bf      cr7*4+1,1f
100 23:     lwz     r9,0(r4)
101         addi    r4,r4,4
102 73:     stw     r9,0(r3)
103         addi    r3,r3,4
104 1:      bf      cr7*4+2,2f
105 44:     lhz     r9,0(r4)
106         addi    r4,r4,2
107 74:     sth     r9,0(r3)
108         addi    r3,r3,2
109 2:      bf      cr7*4+3,3f
110 45:     lbz     r9,0(r4)
111 75:     stb     r9,0(r3)
112 3:      li      r3,0
113         blr
114
115 .Lsrc_unaligned:
116         srdi    r6,r5,3
117         addi    r5,r5,-16
118         subf    r4,r0,r4
119         srdi    r7,r5,4
120         sldi    r10,r0,3
121         cmpldi  cr6,r6,3
122         andi.   r5,r5,7
123         mtctr   r7
124         subfic  r11,r10,64
125         add     r5,r5,r0
126         bt      cr7*4+0,28f
127
128 24:     ld      r9,0(r4)        /* 3+2n loads, 2+2n stores */
129 25:     ld      r0,8(r4)
130         sLd     r6,r9,r10
131 26:     ldu     r9,16(r4)
132         sHd     r7,r0,r11
133         sLd     r8,r0,r10
134         or      r7,r7,r6
135         blt     cr6,79f
136 27:     ld      r0,8(r4)
137         b       2f
138
139 28:     ld      r0,0(r4)        /* 4+2n loads, 3+2n stores */
140 29:     ldu     r9,8(r4)
141         sLd     r8,r0,r10
142         addi    r3,r3,-8
143         blt     cr6,5f
144 30:     ld      r0,8(r4)
145         sHd     r12,r9,r11
146         sLd     r6,r9,r10
147 31:     ldu     r9,16(r4)
148         or      r12,r8,r12
149         sHd     r7,r0,r11
150         sLd     r8,r0,r10
151         addi    r3,r3,16
152         beq     cr6,78f
153
154 1:      or      r7,r7,r6
155 32:     ld      r0,8(r4)
156 76:     std     r12,8(r3)
157 2:      sHd     r12,r9,r11
158         sLd     r6,r9,r10
159 33:     ldu     r9,16(r4)
160         or      r12,r8,r12
161 77:     stdu    r7,16(r3)
162         sHd     r7,r0,r11
163         sLd     r8,r0,r10
164         bdnz    1b
165
166 78:     std     r12,8(r3)
167         or      r7,r7,r6
168 79:     std     r7,16(r3)
169 5:      sHd     r12,r9,r11
170         or      r12,r8,r12
171 80:     std     r12,24(r3)
172         bne     6f
173         li      r3,0
174         blr
175 6:      cmpwi   cr1,r5,8
176         addi    r3,r3,32
177         sLd     r9,r9,r10
178         ble     cr1,7f
179 34:     ld      r0,8(r4)
180         sHd     r7,r0,r11
181         or      r9,r7,r9
182 7:
183         bf      cr7*4+1,1f
184 #ifdef __BIG_ENDIAN__
185         rotldi  r9,r9,32
186 #endif
187 94:     stw     r9,0(r3)
188 #ifdef __LITTLE_ENDIAN__
189         rotrdi  r9,r9,32
190 #endif
191         addi    r3,r3,4
192 1:      bf      cr7*4+2,2f
193 #ifdef __BIG_ENDIAN__
194         rotldi  r9,r9,16
195 #endif
196 95:     sth     r9,0(r3)
197 #ifdef __LITTLE_ENDIAN__
198         rotrdi  r9,r9,16
199 #endif
200         addi    r3,r3,2
201 2:      bf      cr7*4+3,3f
202 #ifdef __BIG_ENDIAN__
203         rotldi  r9,r9,8
204 #endif
205 96:     stb     r9,0(r3)
206 #ifdef __LITTLE_ENDIAN__
207         rotrdi  r9,r9,8
208 #endif
209 3:      li      r3,0
210         blr
211
212 .Ldst_unaligned:
213         PPC_MTOCRF(0x01,r6)             /* put #bytes to 8B bdry into cr7 */
214         subf    r5,r6,r5
215         li      r7,0
216         cmpldi  cr1,r5,16
217         bf      cr7*4+3,1f
218 35:     lbz     r0,0(r4)
219 81:     stb     r0,0(r3)
220         addi    r7,r7,1
221 1:      bf      cr7*4+2,2f
222 36:     lhzx    r0,r7,r4
223 82:     sthx    r0,r7,r3
224         addi    r7,r7,2
225 2:      bf      cr7*4+1,3f
226 37:     lwzx    r0,r7,r4
227 83:     stwx    r0,r7,r3
228 3:      PPC_MTOCRF(0x01,r5)
229         add     r4,r6,r4
230         add     r3,r6,r3
231         b       .Ldst_aligned
232
233 .Lshort_copy:
234         bf      cr7*4+0,1f
235 38:     lwz     r0,0(r4)
236 39:     lwz     r9,4(r4)
237         addi    r4,r4,8
238 84:     stw     r0,0(r3)
239 85:     stw     r9,4(r3)
240         addi    r3,r3,8
241 1:      bf      cr7*4+1,2f
242 40:     lwz     r0,0(r4)
243         addi    r4,r4,4
244 86:     stw     r0,0(r3)
245         addi    r3,r3,4
246 2:      bf      cr7*4+2,3f
247 41:     lhz     r0,0(r4)
248         addi    r4,r4,2
249 87:     sth     r0,0(r3)
250         addi    r3,r3,2
251 3:      bf      cr7*4+3,4f
252 42:     lbz     r0,0(r4)
253 88:     stb     r0,0(r3)
254 4:      li      r3,0
255         blr
256
257 /*
258  * exception handlers follow
259  * we have to return the number of bytes not copied
260  * for an exception on a load, we set the rest of the destination to 0
261  */
262
263 136:
264 137:
265         add     r3,r3,r7
266         b       1f
267 130:
268 131:
269         addi    r3,r3,8
270 120:
271 320:
272 122:
273 322:
274 124:
275 125:
276 126:
277 127:
278 128:
279 129:
280 133:
281         addi    r3,r3,8
282 132:
283         addi    r3,r3,8
284 121:
285 321:
286 344:
287 134:
288 135:
289 138:
290 139:
291 140:
292 141:
293 142:
294 123:
295 144:
296 145:
297
298 /*
299  * here we have had a fault on a load and r3 points to the first
300  * unmodified byte of the destination
301  */
302 1:      ld      r6,-24(r1)
303         ld      r4,-16(r1)
304         ld      r5,-8(r1)
305         subf    r6,r6,r3
306         add     r4,r4,r6
307         subf    r5,r6,r5        /* #bytes left to go */
308
309 /*
310  * first see if we can copy any more bytes before hitting another exception
311  */
312         mtctr   r5
313 43:     lbz     r0,0(r4)
314         addi    r4,r4,1
315 89:     stb     r0,0(r3)
316         addi    r3,r3,1
317         bdnz    43b
318         li      r3,0            /* huh? all copied successfully this time? */
319         blr
320
321 /*
322  * here we have trapped again, need to clear ctr bytes starting at r3
323  */
324 143:    mfctr   r5
325         li      r0,0
326         mr      r4,r3
327         mr      r3,r5           /* return the number of bytes not copied */
328 1:      andi.   r9,r4,7
329         beq     3f
330 90:     stb     r0,0(r4)
331         addic.  r5,r5,-1
332         addi    r4,r4,1
333         bne     1b
334         blr
335 3:      cmpldi  cr1,r5,8
336         srdi    r9,r5,3
337         andi.   r5,r5,7
338         blt     cr1,93f
339         mtctr   r9
340 91:     std     r0,0(r4)
341         addi    r4,r4,8
342         bdnz    91b
343 93:     beqlr
344         mtctr   r5      
345 92:     stb     r0,0(r4)
346         addi    r4,r4,1
347         bdnz    92b
348         blr
349
350 /*
351  * exception handlers for stores: we just need to work
352  * out how many bytes weren't copied
353  */
354 182:
355 183:
356         add     r3,r3,r7
357         b       1f
358 371:
359 180:
360         addi    r3,r3,8
361 171:
362 177:
363 179:
364         addi    r3,r3,8
365 370:
366 372:
367 176:
368 178:
369         addi    r3,r3,4
370 185:
371         addi    r3,r3,4
372 170:
373 172:
374 345:
375 173:
376 174:
377 175:
378 181:
379 184:
380 186:
381 187:
382 188:
383 189:    
384 194:
385 195:
386 196:
387 1:
388         ld      r6,-24(r1)
389         ld      r5,-8(r1)
390         add     r6,r6,r5
391         subf    r3,r3,r6        /* #bytes not copied */
392 190:
393 191:
394 192:
395         blr                     /* #bytes not copied in r3 */
396
397         .section __ex_table,"a"
398         .align  3
399         .llong  20b,120b
400         .llong  220b,320b
401         .llong  21b,121b
402         .llong  221b,321b
403         .llong  70b,170b
404         .llong  270b,370b
405         .llong  22b,122b
406         .llong  222b,322b
407         .llong  71b,171b
408         .llong  271b,371b
409         .llong  72b,172b
410         .llong  272b,372b
411         .llong  244b,344b
412         .llong  245b,345b
413         .llong  23b,123b
414         .llong  73b,173b
415         .llong  44b,144b
416         .llong  74b,174b
417         .llong  45b,145b
418         .llong  75b,175b
419         .llong  24b,124b
420         .llong  25b,125b
421         .llong  26b,126b
422         .llong  27b,127b
423         .llong  28b,128b
424         .llong  29b,129b
425         .llong  30b,130b
426         .llong  31b,131b
427         .llong  32b,132b
428         .llong  76b,176b
429         .llong  33b,133b
430         .llong  77b,177b
431         .llong  78b,178b
432         .llong  79b,179b
433         .llong  80b,180b
434         .llong  34b,134b
435         .llong  94b,194b
436         .llong  95b,195b
437         .llong  96b,196b
438         .llong  35b,135b
439         .llong  81b,181b
440         .llong  36b,136b
441         .llong  82b,182b
442         .llong  37b,137b
443         .llong  83b,183b
444         .llong  38b,138b
445         .llong  39b,139b
446         .llong  84b,184b
447         .llong  85b,185b
448         .llong  40b,140b
449         .llong  86b,186b
450         .llong  41b,141b
451         .llong  87b,187b
452         .llong  42b,142b
453         .llong  88b,188b
454         .llong  43b,143b
455         .llong  89b,189b
456         .llong  90b,190b
457         .llong  91b,191b
458         .llong  92b,192b
459         
460         .text
461
462 /*
463  * Routine to copy a whole page of data, optimized for POWER4.
464  * On POWER4 it is more than 50% faster than the simple loop
465  * above (following the .Ldst_aligned label).
466  */
467 .Lcopy_page_4K:
468         std     r31,-32(1)
469         std     r30,-40(1)
470         std     r29,-48(1)
471         std     r28,-56(1)
472         std     r27,-64(1)
473         std     r26,-72(1)
474         std     r25,-80(1)
475         std     r24,-88(1)
476         std     r23,-96(1)
477         std     r22,-104(1)
478         std     r21,-112(1)
479         std     r20,-120(1)
480         li      r5,4096/32 - 1
481         addi    r3,r3,-8
482         li      r0,5
483 0:      addi    r5,r5,-24
484         mtctr   r0
485 20:     ld      r22,640(4)
486 21:     ld      r21,512(4)
487 22:     ld      r20,384(4)
488 23:     ld      r11,256(4)
489 24:     ld      r9,128(4)
490 25:     ld      r7,0(4)
491 26:     ld      r25,648(4)
492 27:     ld      r24,520(4)
493 28:     ld      r23,392(4)
494 29:     ld      r10,264(4)
495 30:     ld      r8,136(4)
496 31:     ldu     r6,8(4)
497         cmpwi   r5,24
498 1:
499 32:     std     r22,648(3)
500 33:     std     r21,520(3)
501 34:     std     r20,392(3)
502 35:     std     r11,264(3)
503 36:     std     r9,136(3)
504 37:     std     r7,8(3)
505 38:     ld      r28,648(4)
506 39:     ld      r27,520(4)
507 40:     ld      r26,392(4)
508 41:     ld      r31,264(4)
509 42:     ld      r30,136(4)
510 43:     ld      r29,8(4)
511 44:     std     r25,656(3)
512 45:     std     r24,528(3)
513 46:     std     r23,400(3)
514 47:     std     r10,272(3)
515 48:     std     r8,144(3)
516 49:     std     r6,16(3)
517 50:     ld      r22,656(4)
518 51:     ld      r21,528(4)
519 52:     ld      r20,400(4)
520 53:     ld      r11,272(4)
521 54:     ld      r9,144(4)
522 55:     ld      r7,16(4)
523 56:     std     r28,664(3)
524 57:     std     r27,536(3)
525 58:     std     r26,408(3)
526 59:     std     r31,280(3)
527 60:     std     r30,152(3)
528 61:     stdu    r29,24(3)
529 62:     ld      r25,664(4)
530 63:     ld      r24,536(4)
531 64:     ld      r23,408(4)
532 65:     ld      r10,280(4)
533 66:     ld      r8,152(4)
534 67:     ldu     r6,24(4)
535         bdnz    1b
536 68:     std     r22,648(3)
537 69:     std     r21,520(3)
538 70:     std     r20,392(3)
539 71:     std     r11,264(3)
540 72:     std     r9,136(3)
541 73:     std     r7,8(3)
542 74:     addi    r4,r4,640
543 75:     addi    r3,r3,648
544         bge     0b
545         mtctr   r5
546 76:     ld      r7,0(4)
547 77:     ld      r8,8(4)
548 78:     ldu     r9,16(4)
549 3:
550 79:     ld      r10,8(4)
551 80:     std     r7,8(3)
552 81:     ld      r7,16(4)
553 82:     std     r8,16(3)
554 83:     ld      r8,24(4)
555 84:     std     r9,24(3)
556 85:     ldu     r9,32(4)
557 86:     stdu    r10,32(3)
558         bdnz    3b
559 4:
560 87:     ld      r10,8(4)
561 88:     std     r7,8(3)
562 89:     std     r8,16(3)
563 90:     std     r9,24(3)
564 91:     std     r10,32(3)
565 9:      ld      r20,-120(1)
566         ld      r21,-112(1)
567         ld      r22,-104(1)
568         ld      r23,-96(1)
569         ld      r24,-88(1)
570         ld      r25,-80(1)
571         ld      r26,-72(1)
572         ld      r27,-64(1)
573         ld      r28,-56(1)
574         ld      r29,-48(1)
575         ld      r30,-40(1)
576         ld      r31,-32(1)
577         li      r3,0
578         blr
579
580 /*
581  * on an exception, reset to the beginning and jump back into the
582  * standard __copy_tofrom_user
583  */
584 100:    ld      r20,-120(1)
585         ld      r21,-112(1)
586         ld      r22,-104(1)
587         ld      r23,-96(1)
588         ld      r24,-88(1)
589         ld      r25,-80(1)
590         ld      r26,-72(1)
591         ld      r27,-64(1)
592         ld      r28,-56(1)
593         ld      r29,-48(1)
594         ld      r30,-40(1)
595         ld      r31,-32(1)
596         ld      r3,-24(r1)
597         ld      r4,-16(r1)
598         li      r5,4096
599         b       .Ldst_aligned
600
601         .section __ex_table,"a"
602         .align  3
603         .llong  20b,100b
604         .llong  21b,100b
605         .llong  22b,100b
606         .llong  23b,100b
607         .llong  24b,100b
608         .llong  25b,100b
609         .llong  26b,100b
610         .llong  27b,100b
611         .llong  28b,100b
612         .llong  29b,100b
613         .llong  30b,100b
614         .llong  31b,100b
615         .llong  32b,100b
616         .llong  33b,100b
617         .llong  34b,100b
618         .llong  35b,100b
619         .llong  36b,100b
620         .llong  37b,100b
621         .llong  38b,100b
622         .llong  39b,100b
623         .llong  40b,100b
624         .llong  41b,100b
625         .llong  42b,100b
626         .llong  43b,100b
627         .llong  44b,100b
628         .llong  45b,100b
629         .llong  46b,100b
630         .llong  47b,100b
631         .llong  48b,100b
632         .llong  49b,100b
633         .llong  50b,100b
634         .llong  51b,100b
635         .llong  52b,100b
636         .llong  53b,100b
637         .llong  54b,100b
638         .llong  55b,100b
639         .llong  56b,100b
640         .llong  57b,100b
641         .llong  58b,100b
642         .llong  59b,100b
643         .llong  60b,100b
644         .llong  61b,100b
645         .llong  62b,100b
646         .llong  63b,100b
647         .llong  64b,100b
648         .llong  65b,100b
649         .llong  66b,100b
650         .llong  67b,100b
651         .llong  68b,100b
652         .llong  69b,100b
653         .llong  70b,100b
654         .llong  71b,100b
655         .llong  72b,100b
656         .llong  73b,100b
657         .llong  74b,100b
658         .llong  75b,100b
659         .llong  76b,100b
660         .llong  77b,100b
661         .llong  78b,100b
662         .llong  79b,100b
663         .llong  80b,100b
664         .llong  81b,100b
665         .llong  82b,100b
666         .llong  83b,100b
667         .llong  84b,100b
668         .llong  85b,100b
669         .llong  86b,100b
670         .llong  87b,100b
671         .llong  88b,100b
672         .llong  89b,100b
673         .llong  90b,100b
674         .llong  91b,100b
675 EXPORT_SYMBOL(__copy_tofrom_user)