Merge tag 'iwlwifi-next-for-kalle-2014-12-30' of https://git.kernel.org/pub/scm/linux...
[cascardo/linux.git] / arch / tile / kernel / unaligned.c
1 /*
2  * Copyright 2013 Tilera Corporation. All Rights Reserved.
3  *
4  *   This program is free software; you can redistribute it and/or
5  *   modify it under the terms of the GNU General Public License
6  *   as published by the Free Software Foundation, version 2.
7  *
8  *   This program is distributed in the hope that it will be useful, but
9  *   WITHOUT ANY WARRANTY; without even the implied warranty of
10  *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11  *   NON INFRINGEMENT.  See the GNU General Public License for
12  *   more details.
13  *
14  * A code-rewriter that handles unaligned exception.
15  */
16
17 #include <linux/smp.h>
18 #include <linux/ptrace.h>
19 #include <linux/slab.h>
20 #include <linux/thread_info.h>
21 #include <linux/uaccess.h>
22 #include <linux/mman.h>
23 #include <linux/types.h>
24 #include <linux/err.h>
25 #include <linux/module.h>
26 #include <linux/compat.h>
27 #include <linux/prctl.h>
28 #include <asm/cacheflush.h>
29 #include <asm/traps.h>
30 #include <asm/uaccess.h>
31 #include <asm/unaligned.h>
32 #include <arch/abi.h>
33 #include <arch/spr_def.h>
34 #include <arch/opcode.h>
35
36
37 /*
38  * This file handles unaligned exception for tile-Gx. The tilepro's unaligned
39  * exception is supported out of single_step.c
40  */
41
42 int unaligned_printk;
43
44 static int __init setup_unaligned_printk(char *str)
45 {
46         long val;
47         if (kstrtol(str, 0, &val) != 0)
48                 return 0;
49         unaligned_printk = val;
50         pr_info("Printk for each unaligned data accesses is %s\n",
51                 unaligned_printk ? "enabled" : "disabled");
52         return 1;
53 }
54 __setup("unaligned_printk=", setup_unaligned_printk);
55
56 unsigned int unaligned_fixup_count;
57
58 #ifdef __tilegx__
59
60 /*
61  * Unalign data jit fixup code fragement. Reserved space is 128 bytes.
62  * The 1st 64-bit word saves fault PC address, 2nd word is the fault
63  * instruction bundle followed by 14 JIT bundles.
64  */
65
66 struct unaligned_jit_fragment {
67         unsigned long       pc;
68         tilegx_bundle_bits  bundle;
69         tilegx_bundle_bits  insn[14];
70 };
71
72 /*
73  * Check if a nop or fnop at bundle's pipeline X0.
74  */
75
76 static bool is_bundle_x0_nop(tilegx_bundle_bits bundle)
77 {
78         return (((get_UnaryOpcodeExtension_X0(bundle) ==
79                   NOP_UNARY_OPCODE_X0) &&
80                  (get_RRROpcodeExtension_X0(bundle) ==
81                   UNARY_RRR_0_OPCODE_X0) &&
82                  (get_Opcode_X0(bundle) ==
83                   RRR_0_OPCODE_X0)) ||
84                 ((get_UnaryOpcodeExtension_X0(bundle) ==
85                   FNOP_UNARY_OPCODE_X0) &&
86                  (get_RRROpcodeExtension_X0(bundle) ==
87                   UNARY_RRR_0_OPCODE_X0) &&
88                  (get_Opcode_X0(bundle) ==
89                   RRR_0_OPCODE_X0)));
90 }
91
92 /*
93  * Check if nop or fnop at bundle's pipeline X1.
94  */
95
96 static bool is_bundle_x1_nop(tilegx_bundle_bits bundle)
97 {
98         return (((get_UnaryOpcodeExtension_X1(bundle) ==
99                   NOP_UNARY_OPCODE_X1) &&
100                  (get_RRROpcodeExtension_X1(bundle) ==
101                   UNARY_RRR_0_OPCODE_X1) &&
102                  (get_Opcode_X1(bundle) ==
103                   RRR_0_OPCODE_X1)) ||
104                 ((get_UnaryOpcodeExtension_X1(bundle) ==
105                   FNOP_UNARY_OPCODE_X1) &&
106                  (get_RRROpcodeExtension_X1(bundle) ==
107                   UNARY_RRR_0_OPCODE_X1) &&
108                  (get_Opcode_X1(bundle) ==
109                   RRR_0_OPCODE_X1)));
110 }
111
112 /*
113  * Check if nop or fnop at bundle's Y0 pipeline.
114  */
115
116 static bool is_bundle_y0_nop(tilegx_bundle_bits bundle)
117 {
118         return (((get_UnaryOpcodeExtension_Y0(bundle) ==
119                   NOP_UNARY_OPCODE_Y0) &&
120                  (get_RRROpcodeExtension_Y0(bundle) ==
121                   UNARY_RRR_1_OPCODE_Y0) &&
122                  (get_Opcode_Y0(bundle) ==
123                   RRR_1_OPCODE_Y0)) ||
124                 ((get_UnaryOpcodeExtension_Y0(bundle) ==
125                   FNOP_UNARY_OPCODE_Y0) &&
126                  (get_RRROpcodeExtension_Y0(bundle) ==
127                   UNARY_RRR_1_OPCODE_Y0) &&
128                  (get_Opcode_Y0(bundle) ==
129                   RRR_1_OPCODE_Y0)));
130 }
131
132 /*
133  * Check if nop or fnop at bundle's pipeline Y1.
134  */
135
136 static bool is_bundle_y1_nop(tilegx_bundle_bits bundle)
137 {
138         return (((get_UnaryOpcodeExtension_Y1(bundle) ==
139                   NOP_UNARY_OPCODE_Y1) &&
140                  (get_RRROpcodeExtension_Y1(bundle) ==
141                   UNARY_RRR_1_OPCODE_Y1) &&
142                  (get_Opcode_Y1(bundle) ==
143                   RRR_1_OPCODE_Y1)) ||
144                 ((get_UnaryOpcodeExtension_Y1(bundle) ==
145                   FNOP_UNARY_OPCODE_Y1) &&
146                  (get_RRROpcodeExtension_Y1(bundle) ==
147                   UNARY_RRR_1_OPCODE_Y1) &&
148                  (get_Opcode_Y1(bundle) ==
149                   RRR_1_OPCODE_Y1)));
150 }
151
152 /*
153  * Test if a bundle's y0 and y1 pipelines are both nop or fnop.
154  */
155
156 static bool is_y0_y1_nop(tilegx_bundle_bits bundle)
157 {
158         return is_bundle_y0_nop(bundle) && is_bundle_y1_nop(bundle);
159 }
160
161 /*
162  * Test if a bundle's x0 and x1 pipelines are both nop or fnop.
163  */
164
165 static bool is_x0_x1_nop(tilegx_bundle_bits bundle)
166 {
167         return is_bundle_x0_nop(bundle) && is_bundle_x1_nop(bundle);
168 }
169
170 /*
171  * Find the destination, source registers of fault unalign access instruction
172  * at X1 or Y2. Also, allocate up to 3 scratch registers clob1, clob2 and
173  * clob3, which are guaranteed different from any register used in the fault
174  * bundle. r_alias is used to return if the other instructions other than the
175  * unalign load/store shares same register with ra, rb and rd.
176  */
177
178 static void find_regs(tilegx_bundle_bits bundle, uint64_t *rd, uint64_t *ra,
179                       uint64_t *rb, uint64_t *clob1, uint64_t *clob2,
180                       uint64_t *clob3, bool *r_alias)
181 {
182         int i;
183         uint64_t reg;
184         uint64_t reg_map = 0, alias_reg_map = 0, map;
185         bool alias = false;
186
187         /*
188          * Parse fault bundle, find potential used registers and mark
189          * corresponding bits in reg_map and alias_map. These 2 bit maps
190          * are used to find the scratch registers and determine if there
191          * is register alais.
192          */
193         if (bundle & TILEGX_BUNDLE_MODE_MASK) {  /* Y Mode Bundle. */
194
195                 reg = get_SrcA_Y2(bundle);
196                 reg_map |= 1ULL << reg;
197                 *ra = reg;
198                 reg = get_SrcBDest_Y2(bundle);
199                 reg_map |= 1ULL << reg;
200
201                 if (rd) {
202                         /* Load. */
203                         *rd = reg;
204                         alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
205                 } else {
206                         /* Store. */
207                         *rb = reg;
208                         alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
209                 }
210
211                 if (!is_bundle_y1_nop(bundle)) {
212                         reg = get_SrcA_Y1(bundle);
213                         reg_map |= (1ULL << reg);
214                         map = (1ULL << reg);
215
216                         reg = get_SrcB_Y1(bundle);
217                         reg_map |= (1ULL << reg);
218                         map |= (1ULL << reg);
219
220                         reg = get_Dest_Y1(bundle);
221                         reg_map |= (1ULL << reg);
222                         map |= (1ULL << reg);
223
224                         if (map & alias_reg_map)
225                                 alias = true;
226                 }
227
228                 if (!is_bundle_y0_nop(bundle)) {
229                         reg = get_SrcA_Y0(bundle);
230                         reg_map |= (1ULL << reg);
231                         map = (1ULL << reg);
232
233                         reg = get_SrcB_Y0(bundle);
234                         reg_map |= (1ULL << reg);
235                         map |= (1ULL << reg);
236
237                         reg = get_Dest_Y0(bundle);
238                         reg_map |= (1ULL << reg);
239                         map |= (1ULL << reg);
240
241                         if (map & alias_reg_map)
242                                 alias = true;
243                 }
244         } else  { /* X Mode Bundle. */
245
246                 reg = get_SrcA_X1(bundle);
247                 reg_map |= (1ULL << reg);
248                 *ra = reg;
249                 if (rd) {
250                         /* Load. */
251                         reg = get_Dest_X1(bundle);
252                         reg_map |= (1ULL << reg);
253                         *rd = reg;
254                         alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
255                 } else {
256                         /* Store. */
257                         reg = get_SrcB_X1(bundle);
258                         reg_map |= (1ULL << reg);
259                         *rb = reg;
260                         alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
261                 }
262
263                 if (!is_bundle_x0_nop(bundle)) {
264                         reg = get_SrcA_X0(bundle);
265                         reg_map |= (1ULL << reg);
266                         map = (1ULL << reg);
267
268                         reg = get_SrcB_X0(bundle);
269                         reg_map |= (1ULL << reg);
270                         map |= (1ULL << reg);
271
272                         reg = get_Dest_X0(bundle);
273                         reg_map |= (1ULL << reg);
274                         map |= (1ULL << reg);
275
276                         if (map & alias_reg_map)
277                                 alias = true;
278                 }
279         }
280
281         /*
282          * "alias" indicates if the unalign access registers have collision
283          * with others in the same bundle. We jsut simply test all register
284          * operands case (RRR), ignored the case with immidate. If a bundle
285          * has no register alias, we may do fixup in a simple or fast manner.
286          * So if an immidata field happens to hit with a register, we may end
287          * up fall back to the generic handling.
288          */
289
290         *r_alias = alias;
291
292         /* Flip bits on reg_map. */
293         reg_map ^= -1ULL;
294
295         /* Scan reg_map lower 54(TREG_SP) bits to find 3 set bits. */
296         for (i = 0; i < TREG_SP; i++) {
297                 if (reg_map & (0x1ULL << i)) {
298                         if (*clob1 == -1) {
299                                 *clob1 = i;
300                         } else if (*clob2 == -1) {
301                                 *clob2 = i;
302                         } else if (*clob3 == -1) {
303                                 *clob3 = i;
304                                 return;
305                         }
306                 }
307         }
308 }
309
310 /*
311  * Sanity check for register ra, rb, rd, clob1/2/3. Return true if any of them
312  * is unexpected.
313  */
314
315 static bool check_regs(uint64_t rd, uint64_t ra, uint64_t rb,
316                        uint64_t clob1, uint64_t clob2,  uint64_t clob3)
317 {
318         bool unexpected = false;
319         if ((ra >= 56) && (ra != TREG_ZERO))
320                 unexpected = true;
321
322         if ((clob1 >= 56) || (clob2 >= 56) || (clob3 >= 56))
323                 unexpected = true;
324
325         if (rd != -1) {
326                 if ((rd >= 56) && (rd != TREG_ZERO))
327                         unexpected = true;
328         } else {
329                 if ((rb >= 56) && (rb != TREG_ZERO))
330                         unexpected = true;
331         }
332         return unexpected;
333 }
334
335
336 #define  GX_INSN_X0_MASK   ((1ULL << 31) - 1)
337 #define  GX_INSN_X1_MASK   (((1ULL << 31) - 1) << 31)
338 #define  GX_INSN_Y0_MASK   ((0xFULL << 27) | (0xFFFFFULL))
339 #define  GX_INSN_Y1_MASK   (GX_INSN_Y0_MASK << 31)
340 #define  GX_INSN_Y2_MASK   ((0x7FULL << 51) | (0x7FULL << 20))
341
342 #ifdef __LITTLE_ENDIAN
343 #define  GX_INSN_BSWAP(_bundle_)    (_bundle_)
344 #else
345 #define  GX_INSN_BSWAP(_bundle_)    swab64(_bundle_)
346 #endif /* __LITTLE_ENDIAN */
347
348 /*
349  * __JIT_CODE(.) creates template bundles in .rodata.unalign_data section.
350  * The corresponding static function jix_x#_###(.) generates partial or
351  * whole bundle based on the template and given arguments.
352  */
353
354 #define __JIT_CODE(_X_)                                         \
355         asm (".pushsection .rodata.unalign_data, \"a\"\n"       \
356              _X_"\n"                                            \
357              ".popsection\n")
358
359 __JIT_CODE("__unalign_jit_x1_mtspr:   {mtspr 0,  r0}");
360 static tilegx_bundle_bits jit_x1_mtspr(int spr, int reg)
361 {
362         extern  tilegx_bundle_bits __unalign_jit_x1_mtspr;
363         return (GX_INSN_BSWAP(__unalign_jit_x1_mtspr) & GX_INSN_X1_MASK) |
364                 create_MT_Imm14_X1(spr) | create_SrcA_X1(reg);
365 }
366
367 __JIT_CODE("__unalign_jit_x1_mfspr:   {mfspr r0, 0}");
368 static tilegx_bundle_bits  jit_x1_mfspr(int reg, int spr)
369 {
370         extern  tilegx_bundle_bits __unalign_jit_x1_mfspr;
371         return (GX_INSN_BSWAP(__unalign_jit_x1_mfspr) & GX_INSN_X1_MASK) |
372                 create_MF_Imm14_X1(spr) | create_Dest_X1(reg);
373 }
374
375 __JIT_CODE("__unalign_jit_x0_addi:   {addi  r0, r0, 0; iret}");
376 static tilegx_bundle_bits  jit_x0_addi(int rd, int ra, int imm8)
377 {
378         extern  tilegx_bundle_bits __unalign_jit_x0_addi;
379         return (GX_INSN_BSWAP(__unalign_jit_x0_addi) & GX_INSN_X0_MASK) |
380                 create_Dest_X0(rd) | create_SrcA_X0(ra) |
381                 create_Imm8_X0(imm8);
382 }
383
384 __JIT_CODE("__unalign_jit_x1_ldna:   {ldna  r0, r0}");
385 static tilegx_bundle_bits  jit_x1_ldna(int rd, int ra)
386 {
387         extern  tilegx_bundle_bits __unalign_jit_x1_ldna;
388         return (GX_INSN_BSWAP(__unalign_jit_x1_ldna) &  GX_INSN_X1_MASK) |
389                 create_Dest_X1(rd) | create_SrcA_X1(ra);
390 }
391
392 __JIT_CODE("__unalign_jit_x0_dblalign:   {dblalign r0, r0 ,r0}");
393 static tilegx_bundle_bits  jit_x0_dblalign(int rd, int ra, int rb)
394 {
395         extern  tilegx_bundle_bits __unalign_jit_x0_dblalign;
396         return (GX_INSN_BSWAP(__unalign_jit_x0_dblalign) & GX_INSN_X0_MASK) |
397                 create_Dest_X0(rd) | create_SrcA_X0(ra) |
398                 create_SrcB_X0(rb);
399 }
400
401 __JIT_CODE("__unalign_jit_x1_iret:   {iret}");
402 static tilegx_bundle_bits  jit_x1_iret(void)
403 {
404         extern  tilegx_bundle_bits __unalign_jit_x1_iret;
405         return GX_INSN_BSWAP(__unalign_jit_x1_iret) & GX_INSN_X1_MASK;
406 }
407
408 __JIT_CODE("__unalign_jit_x01_fnop:   {fnop;fnop}");
409 static tilegx_bundle_bits  jit_x0_fnop(void)
410 {
411         extern  tilegx_bundle_bits __unalign_jit_x01_fnop;
412         return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X0_MASK;
413 }
414
415 static tilegx_bundle_bits  jit_x1_fnop(void)
416 {
417         extern  tilegx_bundle_bits __unalign_jit_x01_fnop;
418         return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X1_MASK;
419 }
420
421 __JIT_CODE("__unalign_jit_y2_dummy:   {fnop; fnop; ld zero, sp}");
422 static tilegx_bundle_bits  jit_y2_dummy(void)
423 {
424         extern  tilegx_bundle_bits __unalign_jit_y2_dummy;
425         return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y2_MASK;
426 }
427
428 static tilegx_bundle_bits  jit_y1_fnop(void)
429 {
430         extern  tilegx_bundle_bits __unalign_jit_y2_dummy;
431         return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y1_MASK;
432 }
433
434 __JIT_CODE("__unalign_jit_x1_st1_add:  {st1_add r1, r0, 0}");
435 static tilegx_bundle_bits  jit_x1_st1_add(int ra, int rb, int imm8)
436 {
437         extern  tilegx_bundle_bits __unalign_jit_x1_st1_add;
438         return (GX_INSN_BSWAP(__unalign_jit_x1_st1_add) &
439                 (~create_SrcA_X1(-1)) &
440                 GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
441                 create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
442 }
443
444 __JIT_CODE("__unalign_jit_x1_st:  {crc32_8 r1, r0, r0; st  r0, r0}");
445 static tilegx_bundle_bits  jit_x1_st(int ra, int rb)
446 {
447         extern  tilegx_bundle_bits __unalign_jit_x1_st;
448         return (GX_INSN_BSWAP(__unalign_jit_x1_st) & GX_INSN_X1_MASK) |
449                 create_SrcA_X1(ra) | create_SrcB_X1(rb);
450 }
451
452 __JIT_CODE("__unalign_jit_x1_st_add:  {st_add  r1, r0, 0}");
453 static tilegx_bundle_bits  jit_x1_st_add(int ra, int rb, int imm8)
454 {
455         extern  tilegx_bundle_bits __unalign_jit_x1_st_add;
456         return (GX_INSN_BSWAP(__unalign_jit_x1_st_add) &
457                 (~create_SrcA_X1(-1)) &
458                 GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
459                 create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
460 }
461
462 __JIT_CODE("__unalign_jit_x1_ld:  {crc32_8 r1, r0, r0; ld  r0, r0}");
463 static tilegx_bundle_bits  jit_x1_ld(int rd, int ra)
464 {
465         extern  tilegx_bundle_bits __unalign_jit_x1_ld;
466         return (GX_INSN_BSWAP(__unalign_jit_x1_ld) & GX_INSN_X1_MASK) |
467                 create_Dest_X1(rd) | create_SrcA_X1(ra);
468 }
469
470 __JIT_CODE("__unalign_jit_x1_ld_add:  {ld_add  r1, r0, 0}");
471 static tilegx_bundle_bits  jit_x1_ld_add(int rd, int ra, int imm8)
472 {
473         extern  tilegx_bundle_bits __unalign_jit_x1_ld_add;
474         return (GX_INSN_BSWAP(__unalign_jit_x1_ld_add) &
475                 (~create_Dest_X1(-1)) &
476                 GX_INSN_X1_MASK) | create_Dest_X1(rd) |
477                 create_SrcA_X1(ra) | create_Imm8_X1(imm8);
478 }
479
480 __JIT_CODE("__unalign_jit_x0_bfexts:  {bfexts r0, r0, 0, 0}");
481 static tilegx_bundle_bits  jit_x0_bfexts(int rd, int ra, int bfs, int bfe)
482 {
483         extern  tilegx_bundle_bits __unalign_jit_x0_bfexts;
484         return (GX_INSN_BSWAP(__unalign_jit_x0_bfexts) &
485                 GX_INSN_X0_MASK) |
486                 create_Dest_X0(rd) | create_SrcA_X0(ra) |
487                 create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
488 }
489
490 __JIT_CODE("__unalign_jit_x0_bfextu:  {bfextu r0, r0, 0, 0}");
491 static tilegx_bundle_bits  jit_x0_bfextu(int rd, int ra, int bfs, int bfe)
492 {
493         extern  tilegx_bundle_bits __unalign_jit_x0_bfextu;
494         return (GX_INSN_BSWAP(__unalign_jit_x0_bfextu) &
495                 GX_INSN_X0_MASK) |
496                 create_Dest_X0(rd) | create_SrcA_X0(ra) |
497                 create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
498 }
499
500 __JIT_CODE("__unalign_jit_x1_addi:  {bfextu r1, r1, 0, 0; addi r0, r0, 0}");
501 static tilegx_bundle_bits  jit_x1_addi(int rd, int ra, int imm8)
502 {
503         extern  tilegx_bundle_bits __unalign_jit_x1_addi;
504         return (GX_INSN_BSWAP(__unalign_jit_x1_addi) & GX_INSN_X1_MASK) |
505                 create_Dest_X1(rd) | create_SrcA_X1(ra) |
506                 create_Imm8_X1(imm8);
507 }
508
509 __JIT_CODE("__unalign_jit_x0_shrui:  {shrui r0, r0, 0; iret}");
510 static tilegx_bundle_bits  jit_x0_shrui(int rd, int ra, int imm6)
511 {
512         extern  tilegx_bundle_bits __unalign_jit_x0_shrui;
513         return (GX_INSN_BSWAP(__unalign_jit_x0_shrui) &
514                 GX_INSN_X0_MASK) |
515                 create_Dest_X0(rd) | create_SrcA_X0(ra) |
516                 create_ShAmt_X0(imm6);
517 }
518
519 __JIT_CODE("__unalign_jit_x0_rotli:  {rotli r0, r0, 0; iret}");
520 static tilegx_bundle_bits  jit_x0_rotli(int rd, int ra, int imm6)
521 {
522         extern  tilegx_bundle_bits __unalign_jit_x0_rotli;
523         return (GX_INSN_BSWAP(__unalign_jit_x0_rotli) &
524                 GX_INSN_X0_MASK) |
525                 create_Dest_X0(rd) | create_SrcA_X0(ra) |
526                 create_ShAmt_X0(imm6);
527 }
528
529 __JIT_CODE("__unalign_jit_x1_bnezt:  {bnezt r0, __unalign_jit_x1_bnezt}");
530 static tilegx_bundle_bits  jit_x1_bnezt(int ra, int broff)
531 {
532         extern  tilegx_bundle_bits __unalign_jit_x1_bnezt;
533         return (GX_INSN_BSWAP(__unalign_jit_x1_bnezt) &
534                 GX_INSN_X1_MASK) |
535                 create_SrcA_X1(ra) | create_BrOff_X1(broff);
536 }
537
538 #undef __JIT_CODE
539
540 /*
541  * This function generates unalign fixup JIT.
542  *
543  * We first find unalign load/store instruction's destination, source
544  * registers: ra, rb and rd. and 3 scratch registers by calling
545  * find_regs(...). 3 scratch clobbers should not alias with any register
546  * used in the fault bundle. Then analyze the fault bundle to determine
547  * if it's a load or store, operand width, branch or address increment etc.
548  * At last generated JIT is copied into JIT code area in user space.
549  */
550
551 static
552 void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle,
553                     int align_ctl)
554 {
555         struct thread_info *info = current_thread_info();
556         struct unaligned_jit_fragment frag;
557         struct unaligned_jit_fragment *jit_code_area;
558         tilegx_bundle_bits bundle_2 = 0;
559         /* If bundle_2_enable = false, bundle_2 is fnop/nop operation. */
560         bool     bundle_2_enable = true;
561         uint64_t ra = -1, rb = -1, rd = -1, clob1 = -1, clob2 = -1, clob3 = -1;
562         /*
563          * Indicate if the unalign access
564          * instruction's registers hit with
565          * others in the same bundle.
566          */
567         bool     alias = false;
568         bool     load_n_store = true;
569         bool     load_store_signed = false;
570         unsigned int  load_store_size = 8;
571         bool     y1_br = false;  /* True, for a branch in same bundle at Y1.*/
572         int      y1_br_reg = 0;
573         /* True for link operation. i.e. jalr or lnk at Y1 */
574         bool     y1_lr = false;
575         int      y1_lr_reg = 0;
576         bool     x1_add = false;/* True, for load/store ADD instruction at X1*/
577         int      x1_add_imm8 = 0;
578         bool     unexpected = false;
579         int      n = 0, k;
580
581         jit_code_area =
582                 (struct unaligned_jit_fragment *)(info->unalign_jit_base);
583
584         memset((void *)&frag, 0, sizeof(frag));
585
586         /* 0: X mode, Otherwise: Y mode. */
587         if (bundle & TILEGX_BUNDLE_MODE_MASK) {
588                 unsigned int mod, opcode;
589
590                 if (get_Opcode_Y1(bundle) == RRR_1_OPCODE_Y1 &&
591                     get_RRROpcodeExtension_Y1(bundle) ==
592                     UNARY_RRR_1_OPCODE_Y1) {
593
594                         opcode = get_UnaryOpcodeExtension_Y1(bundle);
595
596                         /*
597                          * Test "jalr", "jalrp", "jr", "jrp" instruction at Y1
598                          * pipeline.
599                          */
600                         switch (opcode) {
601                         case JALR_UNARY_OPCODE_Y1:
602                         case JALRP_UNARY_OPCODE_Y1:
603                                 y1_lr = true;
604                                 y1_lr_reg = 55; /* Link register. */
605                                 /* FALLTHROUGH */
606                         case JR_UNARY_OPCODE_Y1:
607                         case JRP_UNARY_OPCODE_Y1:
608                                 y1_br = true;
609                                 y1_br_reg = get_SrcA_Y1(bundle);
610                                 break;
611                         case LNK_UNARY_OPCODE_Y1:
612                                 /* "lnk" at Y1 pipeline. */
613                                 y1_lr = true;
614                                 y1_lr_reg = get_Dest_Y1(bundle);
615                                 break;
616                         }
617                 }
618
619                 opcode = get_Opcode_Y2(bundle);
620                 mod = get_Mode(bundle);
621
622                 /*
623                  *  bundle_2 is bundle after making Y2 as a dummy operation
624                  *  - ld zero, sp
625                  */
626                 bundle_2 = (bundle & (~GX_INSN_Y2_MASK)) | jit_y2_dummy();
627
628                 /* Make Y1 as fnop if Y1 is a branch or lnk operation. */
629                 if (y1_br || y1_lr) {
630                         bundle_2 &= ~(GX_INSN_Y1_MASK);
631                         bundle_2 |= jit_y1_fnop();
632                 }
633
634                 if (is_y0_y1_nop(bundle_2))
635                         bundle_2_enable = false;
636
637                 if (mod == MODE_OPCODE_YC2) {
638                         /* Store. */
639                         load_n_store = false;
640                         load_store_size = 1 << opcode;
641                         load_store_signed = false;
642                         find_regs(bundle, 0, &ra, &rb, &clob1, &clob2,
643                                   &clob3, &alias);
644                         if (load_store_size > 8)
645                                 unexpected = true;
646                 } else {
647                         /* Load. */
648                         load_n_store = true;
649                         if (mod == MODE_OPCODE_YB2) {
650                                 switch (opcode) {
651                                 case LD_OPCODE_Y2:
652                                         load_store_signed = false;
653                                         load_store_size = 8;
654                                         break;
655                                 case LD4S_OPCODE_Y2:
656                                         load_store_signed = true;
657                                         load_store_size = 4;
658                                         break;
659                                 case LD4U_OPCODE_Y2:
660                                         load_store_signed = false;
661                                         load_store_size = 4;
662                                         break;
663                                 default:
664                                         unexpected = true;
665                                 }
666                         } else if (mod == MODE_OPCODE_YA2) {
667                                 if (opcode == LD2S_OPCODE_Y2) {
668                                         load_store_signed = true;
669                                         load_store_size = 2;
670                                 } else if (opcode == LD2U_OPCODE_Y2) {
671                                         load_store_signed = false;
672                                         load_store_size = 2;
673                                 } else
674                                         unexpected = true;
675                         } else
676                                 unexpected = true;
677                         find_regs(bundle, &rd, &ra, &rb, &clob1, &clob2,
678                                   &clob3, &alias);
679                 }
680         } else {
681                 unsigned int opcode;
682
683                 /* bundle_2 is bundle after making X1 as "fnop". */
684                 bundle_2 = (bundle & (~GX_INSN_X1_MASK)) | jit_x1_fnop();
685
686                 if (is_x0_x1_nop(bundle_2))
687                         bundle_2_enable = false;
688
689                 if (get_Opcode_X1(bundle) == RRR_0_OPCODE_X1) {
690                         opcode = get_UnaryOpcodeExtension_X1(bundle);
691
692                         if (get_RRROpcodeExtension_X1(bundle) ==
693                             UNARY_RRR_0_OPCODE_X1) {
694                                 load_n_store = true;
695                                 find_regs(bundle, &rd, &ra, &rb, &clob1,
696                                           &clob2, &clob3, &alias);
697
698                                 switch (opcode) {
699                                 case LD_UNARY_OPCODE_X1:
700                                         load_store_signed = false;
701                                         load_store_size = 8;
702                                         break;
703                                 case LD4S_UNARY_OPCODE_X1:
704                                         load_store_signed = true;
705                                         /* FALLTHROUGH */
706                                 case LD4U_UNARY_OPCODE_X1:
707                                         load_store_size = 4;
708                                         break;
709
710                                 case LD2S_UNARY_OPCODE_X1:
711                                         load_store_signed = true;
712                                         /* FALLTHROUGH */
713                                 case LD2U_UNARY_OPCODE_X1:
714                                         load_store_size = 2;
715                                         break;
716                                 default:
717                                         unexpected = true;
718                                 }
719                         } else {
720                                 load_n_store = false;
721                                 load_store_signed = false;
722                                 find_regs(bundle, 0, &ra, &rb,
723                                           &clob1, &clob2, &clob3,
724                                           &alias);
725
726                                 opcode = get_RRROpcodeExtension_X1(bundle);
727                                 switch (opcode) {
728                                 case ST_RRR_0_OPCODE_X1:
729                                         load_store_size = 8;
730                                         break;
731                                 case ST4_RRR_0_OPCODE_X1:
732                                         load_store_size = 4;
733                                         break;
734                                 case ST2_RRR_0_OPCODE_X1:
735                                         load_store_size = 2;
736                                         break;
737                                 default:
738                                         unexpected = true;
739                                 }
740                         }
741                 } else if (get_Opcode_X1(bundle) == IMM8_OPCODE_X1) {
742                         load_n_store = true;
743                         opcode = get_Imm8OpcodeExtension_X1(bundle);
744                         switch (opcode) {
745                         case LD_ADD_IMM8_OPCODE_X1:
746                                 load_store_size = 8;
747                                 break;
748
749                         case LD4S_ADD_IMM8_OPCODE_X1:
750                                 load_store_signed = true;
751                                 /* FALLTHROUGH */
752                         case LD4U_ADD_IMM8_OPCODE_X1:
753                                 load_store_size = 4;
754                                 break;
755
756                         case LD2S_ADD_IMM8_OPCODE_X1:
757                                 load_store_signed = true;
758                                 /* FALLTHROUGH */
759                         case LD2U_ADD_IMM8_OPCODE_X1:
760                                 load_store_size = 2;
761                                 break;
762
763                         case ST_ADD_IMM8_OPCODE_X1:
764                                 load_n_store = false;
765                                 load_store_size = 8;
766                                 break;
767                         case ST4_ADD_IMM8_OPCODE_X1:
768                                 load_n_store = false;
769                                 load_store_size = 4;
770                                 break;
771                         case ST2_ADD_IMM8_OPCODE_X1:
772                                 load_n_store = false;
773                                 load_store_size = 2;
774                                 break;
775                         default:
776                                 unexpected = true;
777                         }
778
779                         if (!unexpected) {
780                                 x1_add = true;
781                                 if (load_n_store)
782                                         x1_add_imm8 = get_Imm8_X1(bundle);
783                                 else
784                                         x1_add_imm8 = get_Dest_Imm8_X1(bundle);
785                         }
786
787                         find_regs(bundle, load_n_store ? (&rd) : NULL,
788                                   &ra, &rb, &clob1, &clob2, &clob3, &alias);
789                 } else
790                         unexpected = true;
791         }
792
793         /*
794          * Some sanity check for register numbers extracted from fault bundle.
795          */
796         if (check_regs(rd, ra, rb, clob1, clob2, clob3) == true)
797                 unexpected = true;
798
799         /* Give warning if register ra has an aligned address. */
800         if (!unexpected)
801                 WARN_ON(!((load_store_size - 1) & (regs->regs[ra])));
802
803
804         /*
805          * Fault came from kernel space, here we only need take care of
806          * unaligned "get_user/put_user" macros defined in "uaccess.h".
807          * Basically, we will handle bundle like this:
808          * {ld/2u/4s rd, ra; movei rx, 0} or {st/2/4 ra, rb; movei rx, 0}
809          * (Refer to file "arch/tile/include/asm/uaccess.h" for details).
810          * For either load or store, byte-wise operation is performed by calling
811          * get_user() or put_user(). If the macro returns non-zero value,
812          * set the value to rx, otherwise set zero to rx. Finally make pc point
813          * to next bundle and return.
814          */
815
816         if (EX1_PL(regs->ex1) != USER_PL) {
817
818                 unsigned long rx = 0;
819                 unsigned long x = 0, ret = 0;
820
821                 if (y1_br || y1_lr || x1_add ||
822                     (load_store_signed !=
823                      (load_n_store && load_store_size == 4))) {
824                         /* No branch, link, wrong sign-ext or load/store add. */
825                         unexpected = true;
826                 } else if (!unexpected) {
827                         if (bundle & TILEGX_BUNDLE_MODE_MASK) {
828                                 /*
829                                  * Fault bundle is Y mode.
830                                  * Check if the Y1 and Y0 is the form of
831                                  * { movei rx, 0; nop/fnop }, if yes,
832                                  * find the rx.
833                                  */
834
835                                 if ((get_Opcode_Y1(bundle) == ADDI_OPCODE_Y1)
836                                     && (get_SrcA_Y1(bundle) == TREG_ZERO) &&
837                                     (get_Imm8_Y1(bundle) == 0) &&
838                                     is_bundle_y0_nop(bundle)) {
839                                         rx = get_Dest_Y1(bundle);
840                                 } else if ((get_Opcode_Y0(bundle) ==
841                                             ADDI_OPCODE_Y0) &&
842                                            (get_SrcA_Y0(bundle) == TREG_ZERO) &&
843                                            (get_Imm8_Y0(bundle) == 0) &&
844                                            is_bundle_y1_nop(bundle)) {
845                                         rx = get_Dest_Y0(bundle);
846                                 } else {
847                                         unexpected = true;
848                                 }
849                         } else {
850                                 /*
851                                  * Fault bundle is X mode.
852                                  * Check if the X0 is 'movei rx, 0',
853                                  * if yes, find the rx.
854                                  */
855
856                                 if ((get_Opcode_X0(bundle) == IMM8_OPCODE_X0)
857                                     && (get_Imm8OpcodeExtension_X0(bundle) ==
858                                         ADDI_IMM8_OPCODE_X0) &&
859                                     (get_SrcA_X0(bundle) == TREG_ZERO) &&
860                                     (get_Imm8_X0(bundle) == 0)) {
861                                         rx = get_Dest_X0(bundle);
862                                 } else {
863                                         unexpected = true;
864                                 }
865                         }
866
867                         /* rx should be less than 56. */
868                         if (!unexpected && (rx >= 56))
869                                 unexpected = true;
870                 }
871
872                 if (!search_exception_tables(regs->pc)) {
873                         /* No fixup in the exception tables for the pc. */
874                         unexpected = true;
875                 }
876
877                 if (unexpected) {
878                         /* Unexpected unalign kernel fault. */
879                         struct task_struct *tsk = validate_current();
880
881                         bust_spinlocks(1);
882
883                         show_regs(regs);
884
885                         if (unlikely(tsk->pid < 2)) {
886                                 panic("Kernel unalign fault running %s!",
887                                       tsk->pid ? "init" : "the idle task");
888                         }
889 #ifdef SUPPORT_DIE
890                         die("Oops", regs);
891 #endif
892                         bust_spinlocks(1);
893
894                         do_group_exit(SIGKILL);
895
896                 } else {
897                         unsigned long i, b = 0;
898                         unsigned char *ptr =
899                                 (unsigned char *)regs->regs[ra];
900                         if (load_n_store) {
901                                 /* handle get_user(x, ptr) */
902                                 for (i = 0; i < load_store_size; i++) {
903                                         ret = get_user(b, ptr++);
904                                         if (!ret) {
905                                                 /* Success! update x. */
906 #ifdef __LITTLE_ENDIAN
907                                                 x |= (b << (8 * i));
908 #else
909                                                 x <<= 8;
910                                                 x |= b;
911 #endif /* __LITTLE_ENDIAN */
912                                         } else {
913                                                 x = 0;
914                                                 break;
915                                         }
916                                 }
917
918                                 /* Sign-extend 4-byte loads. */
919                                 if (load_store_size == 4)
920                                         x = (long)(int)x;
921
922                                 /* Set register rd. */
923                                 regs->regs[rd] = x;
924
925                                 /* Set register rx. */
926                                 regs->regs[rx] = ret;
927
928                                 /* Bump pc. */
929                                 regs->pc += 8;
930
931                         } else {
932                                 /* Handle put_user(x, ptr) */
933                                 x = regs->regs[rb];
934 #ifdef __LITTLE_ENDIAN
935                                 b = x;
936 #else
937                                 /*
938                                  * Swap x in order to store x from low
939                                  * to high memory same as the
940                                  * little-endian case.
941                                  */
942                                 switch (load_store_size) {
943                                 case 8:
944                                         b = swab64(x);
945                                         break;
946                                 case 4:
947                                         b = swab32(x);
948                                         break;
949                                 case 2:
950                                         b = swab16(x);
951                                         break;
952                                 }
953 #endif /* __LITTLE_ENDIAN */
954                                 for (i = 0; i < load_store_size; i++) {
955                                         ret = put_user(b, ptr++);
956                                         if (ret)
957                                                 break;
958                                         /* Success! shift 1 byte. */
959                                         b >>= 8;
960                                 }
961                                 /* Set register rx. */
962                                 regs->regs[rx] = ret;
963
964                                 /* Bump pc. */
965                                 regs->pc += 8;
966                         }
967                 }
968
969                 unaligned_fixup_count++;
970
971                 if (unaligned_printk) {
972                         pr_info("%s/%d - Unalign fixup for kernel access to userspace %lx\n",
973                                 current->comm, current->pid, regs->regs[ra]);
974                 }
975
976                 /* Done! Return to the exception handler. */
977                 return;
978         }
979
980         if ((align_ctl == 0) || unexpected) {
981                 siginfo_t info = {
982                         .si_signo = SIGBUS,
983                         .si_code = BUS_ADRALN,
984                         .si_addr = (unsigned char __user *)0
985                 };
986                 if (unaligned_printk)
987                         pr_info("Unalign bundle: unexp @%llx, %llx\n",
988                                 (unsigned long long)regs->pc,
989                                 (unsigned long long)bundle);
990
991                 if (ra < 56) {
992                         unsigned long uaa = (unsigned long)regs->regs[ra];
993                         /* Set bus Address. */
994                         info.si_addr = (unsigned char __user *)uaa;
995                 }
996
997                 unaligned_fixup_count++;
998
999                 trace_unhandled_signal("unaligned fixup trap", regs,
1000                                        (unsigned long)info.si_addr, SIGBUS);
1001                 force_sig_info(info.si_signo, &info, current);
1002                 return;
1003         }
1004
1005 #ifdef __LITTLE_ENDIAN
1006 #define UA_FIXUP_ADDR_DELTA          1
1007 #define UA_FIXUP_BFEXT_START(_B_)    0
1008 #define UA_FIXUP_BFEXT_END(_B_)     (8 * (_B_) - 1)
1009 #else /* __BIG_ENDIAN */
1010 #define UA_FIXUP_ADDR_DELTA          -1
1011 #define UA_FIXUP_BFEXT_START(_B_)   (64 - 8 * (_B_))
1012 #define UA_FIXUP_BFEXT_END(_B_)      63
1013 #endif /* __LITTLE_ENDIAN */
1014
1015
1016
1017         if ((ra != rb) && (rd != TREG_SP) && !alias &&
1018             !y1_br && !y1_lr && !x1_add) {
1019                 /*
1020                  * Simple case: ra != rb and no register alias found,
1021                  * and no branch or link. This will be the majority.
1022                  * We can do a little better for simplae case than the
1023                  * generic scheme below.
1024                  */
1025                 if (!load_n_store) {
1026                         /*
1027                          * Simple store: ra != rb, no need for scratch register.
1028                          * Just store and rotate to right bytewise.
1029                          */
1030 #ifdef __BIG_ENDIAN
1031                         frag.insn[n++] =
1032                                 jit_x0_addi(ra, ra, load_store_size - 1) |
1033                                 jit_x1_fnop();
1034 #endif /* __BIG_ENDIAN */
1035                         for (k = 0; k < load_store_size; k++) {
1036                                 /* Store a byte. */
1037                                 frag.insn[n++] =
1038                                         jit_x0_rotli(rb, rb, 56) |
1039                                         jit_x1_st1_add(ra, rb,
1040                                                        UA_FIXUP_ADDR_DELTA);
1041                         }
1042 #ifdef __BIG_ENDIAN
1043                         frag.insn[n] = jit_x1_addi(ra, ra, 1);
1044 #else
1045                         frag.insn[n] = jit_x1_addi(ra, ra,
1046                                                    -1 * load_store_size);
1047 #endif /* __LITTLE_ENDIAN */
1048
1049                         if (load_store_size == 8) {
1050                                 frag.insn[n] |= jit_x0_fnop();
1051                         } else if (load_store_size == 4) {
1052                                 frag.insn[n] |= jit_x0_rotli(rb, rb, 32);
1053                         } else { /* = 2 */
1054                                 frag.insn[n] |= jit_x0_rotli(rb, rb, 16);
1055                         }
1056                         n++;
1057                         if (bundle_2_enable)
1058                                 frag.insn[n++] = bundle_2;
1059                         frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
1060                 } else {
1061                         if (rd == ra) {
1062                                 /* Use two clobber registers: clob1/2. */
1063                                 frag.insn[n++] =
1064                                         jit_x0_addi(TREG_SP, TREG_SP, -16) |
1065                                         jit_x1_fnop();
1066                                 frag.insn[n++] =
1067                                         jit_x0_addi(clob1, ra, 7) |
1068                                         jit_x1_st_add(TREG_SP, clob1, -8);
1069                                 frag.insn[n++] =
1070                                         jit_x0_addi(clob2, ra, 0) |
1071                                         jit_x1_st(TREG_SP, clob2);
1072                                 frag.insn[n++] =
1073                                         jit_x0_fnop() |
1074                                         jit_x1_ldna(rd, ra);
1075                                 frag.insn[n++] =
1076                                         jit_x0_fnop() |
1077                                         jit_x1_ldna(clob1, clob1);
1078                                 /*
1079                                  * Note: we must make sure that rd must not
1080                                  * be sp. Recover clob1/2 from stack.
1081                                  */
1082                                 frag.insn[n++] =
1083                                         jit_x0_dblalign(rd, clob1, clob2) |
1084                                         jit_x1_ld_add(clob2, TREG_SP, 8);
1085                                 frag.insn[n++] =
1086                                         jit_x0_fnop() |
1087                                         jit_x1_ld_add(clob1, TREG_SP, 16);
1088                         } else {
1089                                 /* Use one clobber register: clob1 only. */
1090                                 frag.insn[n++] =
1091                                         jit_x0_addi(TREG_SP, TREG_SP, -16) |
1092                                         jit_x1_fnop();
1093                                 frag.insn[n++] =
1094                                         jit_x0_addi(clob1, ra, 7) |
1095                                         jit_x1_st(TREG_SP, clob1);
1096                                 frag.insn[n++] =
1097                                         jit_x0_fnop() |
1098                                         jit_x1_ldna(rd, ra);
1099                                 frag.insn[n++] =
1100                                         jit_x0_fnop() |
1101                                         jit_x1_ldna(clob1, clob1);
1102                                 /*
1103                                  * Note: we must make sure that rd must not
1104                                  * be sp. Recover clob1 from stack.
1105                                  */
1106                                 frag.insn[n++] =
1107                                         jit_x0_dblalign(rd, clob1, ra) |
1108                                         jit_x1_ld_add(clob1, TREG_SP, 16);
1109                         }
1110
1111                         if (bundle_2_enable)
1112                                 frag.insn[n++] = bundle_2;
1113                         /*
1114                          * For non 8-byte load, extract corresponding bytes and
1115                          * signed extension.
1116                          */
1117                         if (load_store_size == 4) {
1118                                 if (load_store_signed)
1119                                         frag.insn[n++] =
1120                                                 jit_x0_bfexts(
1121                                                         rd, rd,
1122                                                         UA_FIXUP_BFEXT_START(4),
1123                                                         UA_FIXUP_BFEXT_END(4)) |
1124                                                 jit_x1_fnop();
1125                                 else
1126                                         frag.insn[n++] =
1127                                                 jit_x0_bfextu(
1128                                                         rd, rd,
1129                                                         UA_FIXUP_BFEXT_START(4),
1130                                                         UA_FIXUP_BFEXT_END(4)) |
1131                                                 jit_x1_fnop();
1132                         } else if (load_store_size == 2) {
1133                                 if (load_store_signed)
1134                                         frag.insn[n++] =
1135                                                 jit_x0_bfexts(
1136                                                         rd, rd,
1137                                                         UA_FIXUP_BFEXT_START(2),
1138                                                         UA_FIXUP_BFEXT_END(2)) |
1139                                                 jit_x1_fnop();
1140                                 else
1141                                         frag.insn[n++] =
1142                                                 jit_x0_bfextu(
1143                                                         rd, rd,
1144                                                         UA_FIXUP_BFEXT_START(2),
1145                                                         UA_FIXUP_BFEXT_END(2)) |
1146                                                 jit_x1_fnop();
1147                         }
1148
1149                         frag.insn[n++] =
1150                                 jit_x0_fnop()  |
1151                                 jit_x1_iret();
1152                 }
1153         } else if (!load_n_store) {
1154
1155                 /*
1156                  * Generic memory store cases: use 3 clobber registers.
1157                  *
1158                  * Alloc space for saveing clob2,1,3 on user's stack.
1159                  * register clob3 points to where clob2 saved, followed by
1160                  * clob1 and 3 from high to low memory.
1161                  */
1162                 frag.insn[n++] =
1163                         jit_x0_addi(TREG_SP, TREG_SP, -32)    |
1164                         jit_x1_fnop();
1165                 frag.insn[n++] =
1166                         jit_x0_addi(clob3, TREG_SP, 16)  |
1167                         jit_x1_st_add(TREG_SP, clob3, 8);
1168 #ifdef __LITTLE_ENDIAN
1169                 frag.insn[n++] =
1170                         jit_x0_addi(clob1, ra, 0)   |
1171                         jit_x1_st_add(TREG_SP, clob1, 8);
1172 #else
1173                 frag.insn[n++] =
1174                         jit_x0_addi(clob1, ra, load_store_size - 1)   |
1175                         jit_x1_st_add(TREG_SP, clob1, 8);
1176 #endif
1177                 if (load_store_size == 8) {
1178                         /*
1179                          * We save one byte a time, not for fast, but compact
1180                          * code. After each store, data source register shift
1181                          * right one byte. unchanged after 8 stores.
1182                          */
1183                         frag.insn[n++] =
1184                                 jit_x0_addi(clob2, TREG_ZERO, 7)     |
1185                                 jit_x1_st_add(TREG_SP, clob2, 16);
1186                         frag.insn[n++] =
1187                                 jit_x0_rotli(rb, rb, 56)      |
1188                                 jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
1189                         frag.insn[n++] =
1190                                 jit_x0_addi(clob2, clob2, -1) |
1191                                 jit_x1_bnezt(clob2, -1);
1192                         frag.insn[n++] =
1193                                 jit_x0_fnop()                 |
1194                                 jit_x1_addi(clob2, y1_br_reg, 0);
1195                 } else if (load_store_size == 4) {
1196                         frag.insn[n++] =
1197                                 jit_x0_addi(clob2, TREG_ZERO, 3)     |
1198                                 jit_x1_st_add(TREG_SP, clob2, 16);
1199                         frag.insn[n++] =
1200                                 jit_x0_rotli(rb, rb, 56)      |
1201                                 jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
1202                         frag.insn[n++] =
1203                                 jit_x0_addi(clob2, clob2, -1) |
1204                                 jit_x1_bnezt(clob2, -1);
1205                         /*
1206                          * same as 8-byte case, but need shift another 4
1207                          * byte to recover rb for 4-byte store.
1208                          */
1209                         frag.insn[n++] = jit_x0_rotli(rb, rb, 32)      |
1210                                 jit_x1_addi(clob2, y1_br_reg, 0);
1211                 } else { /* =2 */
1212                         frag.insn[n++] =
1213                                 jit_x0_addi(clob2, rb, 0)     |
1214                                 jit_x1_st_add(TREG_SP, clob2, 16);
1215                         for (k = 0; k < 2; k++) {
1216                                 frag.insn[n++] =
1217                                         jit_x0_shrui(rb, rb, 8)  |
1218                                         jit_x1_st1_add(clob1, rb,
1219                                                        UA_FIXUP_ADDR_DELTA);
1220                         }
1221                         frag.insn[n++] =
1222                                 jit_x0_addi(rb, clob2, 0)       |
1223                                 jit_x1_addi(clob2, y1_br_reg, 0);
1224                 }
1225
1226                 if (bundle_2_enable)
1227                         frag.insn[n++] = bundle_2;
1228
1229                 if (y1_lr) {
1230                         frag.insn[n++] =
1231                                 jit_x0_fnop()                    |
1232                                 jit_x1_mfspr(y1_lr_reg,
1233                                              SPR_EX_CONTEXT_0_0);
1234                 }
1235                 if (y1_br) {
1236                         frag.insn[n++] =
1237                                 jit_x0_fnop()                    |
1238                                 jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
1239                                              clob2);
1240                 }
1241                 if (x1_add) {
1242                         frag.insn[n++] =
1243                                 jit_x0_addi(ra, ra, x1_add_imm8) |
1244                                 jit_x1_ld_add(clob2, clob3, -8);
1245                 } else {
1246                         frag.insn[n++] =
1247                                 jit_x0_fnop()                    |
1248                                 jit_x1_ld_add(clob2, clob3, -8);
1249                 }
1250                 frag.insn[n++] =
1251                         jit_x0_fnop()   |
1252                         jit_x1_ld_add(clob1, clob3, -8);
1253                 frag.insn[n++] = jit_x0_fnop()   | jit_x1_ld(clob3, clob3);
1254                 frag.insn[n++] = jit_x0_fnop()   | jit_x1_iret();
1255
1256         } else {
1257                 /*
1258                  * Generic memory load cases.
1259                  *
1260                  * Alloc space for saveing clob1,2,3 on user's stack.
1261                  * register clob3 points to where clob1 saved, followed
1262                  * by clob2 and 3 from high to low memory.
1263                  */
1264
1265                 frag.insn[n++] =
1266                         jit_x0_addi(TREG_SP, TREG_SP, -32) |
1267                         jit_x1_fnop();
1268                 frag.insn[n++] =
1269                         jit_x0_addi(clob3, TREG_SP, 16) |
1270                         jit_x1_st_add(TREG_SP, clob3, 8);
1271                 frag.insn[n++] =
1272                         jit_x0_addi(clob2, ra, 0) |
1273                         jit_x1_st_add(TREG_SP, clob2, 8);
1274
1275                 if (y1_br) {
1276                         frag.insn[n++] =
1277                                 jit_x0_addi(clob1, y1_br_reg, 0) |
1278                                 jit_x1_st_add(TREG_SP, clob1, 16);
1279                 } else {
1280                         frag.insn[n++] =
1281                                 jit_x0_fnop() |
1282                                 jit_x1_st_add(TREG_SP, clob1, 16);
1283                 }
1284
1285                 if (bundle_2_enable)
1286                         frag.insn[n++] = bundle_2;
1287
1288                 if (y1_lr) {
1289                         frag.insn[n++] =
1290                                 jit_x0_fnop()  |
1291                                 jit_x1_mfspr(y1_lr_reg,
1292                                              SPR_EX_CONTEXT_0_0);
1293                 }
1294
1295                 if (y1_br) {
1296                         frag.insn[n++] =
1297                                 jit_x0_fnop() |
1298                                 jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
1299                                              clob1);
1300                 }
1301
1302                 frag.insn[n++] =
1303                         jit_x0_addi(clob1, clob2, 7)      |
1304                         jit_x1_ldna(rd, clob2);
1305                 frag.insn[n++] =
1306                         jit_x0_fnop()                     |
1307                         jit_x1_ldna(clob1, clob1);
1308                 frag.insn[n++] =
1309                         jit_x0_dblalign(rd, clob1, clob2) |
1310                         jit_x1_ld_add(clob1, clob3, -8);
1311                 if (x1_add) {
1312                         frag.insn[n++] =
1313                                 jit_x0_addi(ra, ra, x1_add_imm8) |
1314                                 jit_x1_ld_add(clob2, clob3, -8);
1315                 } else {
1316                         frag.insn[n++] =
1317                                 jit_x0_fnop()  |
1318                                 jit_x1_ld_add(clob2, clob3, -8);
1319                 }
1320
1321                 frag.insn[n++] =
1322                         jit_x0_fnop() |
1323                         jit_x1_ld(clob3, clob3);
1324
1325                 if (load_store_size == 4) {
1326                         if (load_store_signed)
1327                                 frag.insn[n++] =
1328                                         jit_x0_bfexts(
1329                                                 rd, rd,
1330                                                 UA_FIXUP_BFEXT_START(4),
1331                                                 UA_FIXUP_BFEXT_END(4)) |
1332                                         jit_x1_fnop();
1333                         else
1334                                 frag.insn[n++] =
1335                                         jit_x0_bfextu(
1336                                                 rd, rd,
1337                                                 UA_FIXUP_BFEXT_START(4),
1338                                                 UA_FIXUP_BFEXT_END(4)) |
1339                                         jit_x1_fnop();
1340                 } else if (load_store_size == 2) {
1341                         if (load_store_signed)
1342                                 frag.insn[n++] =
1343                                         jit_x0_bfexts(
1344                                                 rd, rd,
1345                                                 UA_FIXUP_BFEXT_START(2),
1346                                                 UA_FIXUP_BFEXT_END(2)) |
1347                                         jit_x1_fnop();
1348                         else
1349                                 frag.insn[n++] =
1350                                         jit_x0_bfextu(
1351                                                 rd, rd,
1352                                                 UA_FIXUP_BFEXT_START(2),
1353                                                 UA_FIXUP_BFEXT_END(2)) |
1354                                         jit_x1_fnop();
1355                 }
1356
1357                 frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
1358         }
1359
1360         /* Max JIT bundle count is 14. */
1361         WARN_ON(n > 14);
1362
1363         if (!unexpected) {
1364                 int status = 0;
1365                 int idx = (regs->pc >> 3) &
1366                         ((1ULL << (PAGE_SHIFT - UNALIGN_JIT_SHIFT)) - 1);
1367
1368                 frag.pc = regs->pc;
1369                 frag.bundle = bundle;
1370
1371                 if (unaligned_printk) {
1372                         pr_info("%s/%d, Unalign fixup: pc=%lx bundle=%lx %d %d %d %d %d %d %d %d\n",
1373                                 current->comm, current->pid,
1374                                 (unsigned long)frag.pc,
1375                                 (unsigned long)frag.bundle,
1376                                 (int)alias, (int)rd, (int)ra,
1377                                 (int)rb, (int)bundle_2_enable,
1378                                 (int)y1_lr, (int)y1_br, (int)x1_add);
1379
1380                         for (k = 0; k < n; k += 2)
1381                                 pr_info("[%d] %016llx %016llx\n",
1382                                         k, (unsigned long long)frag.insn[k],
1383                                         (unsigned long long)frag.insn[k+1]);
1384                 }
1385
1386                 /* Swap bundle byte order for big endian sys. */
1387 #ifdef __BIG_ENDIAN
1388                 frag.bundle = GX_INSN_BSWAP(frag.bundle);
1389                 for (k = 0; k < n; k++)
1390                         frag.insn[k] = GX_INSN_BSWAP(frag.insn[k]);
1391 #endif /* __BIG_ENDIAN */
1392
1393                 status = copy_to_user((void __user *)&jit_code_area[idx],
1394                                       &frag, sizeof(frag));
1395                 if (status) {
1396                         /* Fail to copy JIT into user land. send SIGSEGV. */
1397                         siginfo_t info = {
1398                                 .si_signo = SIGSEGV,
1399                                 .si_code = SEGV_MAPERR,
1400                                 .si_addr = (void __user *)&jit_code_area[idx]
1401                         };
1402
1403                         pr_warn("Unalign fixup: pid=%d %s jit_code_area=%llx\n",
1404                                 current->pid, current->comm,
1405                                 (unsigned long long)&jit_code_area[idx]);
1406
1407                         trace_unhandled_signal("segfault in unalign fixup",
1408                                                regs,
1409                                                (unsigned long)info.si_addr,
1410                                                SIGSEGV);
1411                         force_sig_info(info.si_signo, &info, current);
1412                         return;
1413                 }
1414
1415
1416                 /* Do a cheaper increment, not accurate. */
1417                 unaligned_fixup_count++;
1418                 __flush_icache_range((unsigned long)&jit_code_area[idx],
1419                                      (unsigned long)&jit_code_area[idx] +
1420                                      sizeof(frag));
1421
1422                 /* Setup SPR_EX_CONTEXT_0_0/1 for returning to user program.*/
1423                 __insn_mtspr(SPR_EX_CONTEXT_0_0, regs->pc + 8);
1424                 __insn_mtspr(SPR_EX_CONTEXT_0_1, PL_ICS_EX1(USER_PL, 0));
1425
1426                 /* Modify pc at the start of new JIT. */
1427                 regs->pc = (unsigned long)&jit_code_area[idx].insn[0];
1428                 /* Set ICS in SPR_EX_CONTEXT_K_1. */
1429                 regs->ex1 = PL_ICS_EX1(USER_PL, 1);
1430         }
1431 }
1432
1433
1434 /*
1435  * C function to generate unalign data JIT. Called from unalign data
1436  * interrupt handler.
1437  *
1438  * First check if unalign fix is disabled or exception did not not come from
1439  * user space or sp register points to unalign address, if true, generate a
1440  * SIGBUS. Then map a page into user space as JIT area if it is not mapped
1441  * yet. Genenerate JIT code by calling jit_bundle_gen(). After that return
1442  * back to exception handler.
1443  *
1444  * The exception handler will "iret" to new generated JIT code after
1445  * restoring caller saved registers. In theory, the JIT code will perform
1446  * another "iret" to resume user's program.
1447  */
1448
1449 void do_unaligned(struct pt_regs *regs, int vecnum)
1450 {
1451         tilegx_bundle_bits __user  *pc;
1452         tilegx_bundle_bits bundle;
1453         struct thread_info *info = current_thread_info();
1454         int align_ctl;
1455
1456         /* Checks the per-process unaligned JIT flags */
1457         align_ctl = unaligned_fixup;
1458         switch (task_thread_info(current)->align_ctl) {
1459         case PR_UNALIGN_NOPRINT:
1460                 align_ctl = 1;
1461                 break;
1462         case PR_UNALIGN_SIGBUS:
1463                 align_ctl = 0;
1464                 break;
1465         }
1466
1467         /* Enable iterrupt in order to access user land. */
1468         local_irq_enable();
1469
1470         /*
1471          * The fault came from kernel space. Two choices:
1472          * (a) unaligned_fixup < 1, we will first call get/put_user fixup
1473          *     to return -EFAULT. If no fixup, simply panic the kernel.
1474          * (b) unaligned_fixup >=1, we will try to fix the unaligned access
1475          *     if it was triggered by get_user/put_user() macros. Panic the
1476          *     kernel if it is not fixable.
1477          */
1478
1479         if (EX1_PL(regs->ex1) != USER_PL) {
1480
1481                 if (align_ctl < 1) {
1482                         unaligned_fixup_count++;
1483                         /* If exception came from kernel, try fix it up. */
1484                         if (fixup_exception(regs)) {
1485                                 if (unaligned_printk)
1486                                         pr_info("Unalign fixup: %d %llx @%llx\n",
1487                                                 (int)unaligned_fixup,
1488                                                 (unsigned long long)regs->ex1,
1489                                                 (unsigned long long)regs->pc);
1490                                 return;
1491                         }
1492                         /* Not fixable. Go panic. */
1493                         panic("Unalign exception in Kernel. pc=%lx",
1494                               regs->pc);
1495                         return;
1496                 } else {
1497                         /*
1498                          * Try to fix the exception. If we can't, panic the
1499                          * kernel.
1500                          */
1501                         bundle = GX_INSN_BSWAP(
1502                                 *((tilegx_bundle_bits *)(regs->pc)));
1503                         jit_bundle_gen(regs, bundle, align_ctl);
1504                         return;
1505                 }
1506         }
1507
1508         /*
1509          * Fault came from user with ICS or stack is not aligned.
1510          * If so, we will trigger SIGBUS.
1511          */
1512         if ((regs->sp & 0x7) || (regs->ex1) || (align_ctl < 0)) {
1513                 siginfo_t info = {
1514                         .si_signo = SIGBUS,
1515                         .si_code = BUS_ADRALN,
1516                         .si_addr = (unsigned char __user *)0
1517                 };
1518
1519                 if (unaligned_printk)
1520                         pr_info("Unalign fixup: %d %llx @%llx\n",
1521                                 (int)unaligned_fixup,
1522                                 (unsigned long long)regs->ex1,
1523                                 (unsigned long long)regs->pc);
1524
1525                 unaligned_fixup_count++;
1526
1527                 trace_unhandled_signal("unaligned fixup trap", regs, 0, SIGBUS);
1528                 force_sig_info(info.si_signo, &info, current);
1529                 return;
1530         }
1531
1532
1533         /* Read the bundle casued the exception! */
1534         pc = (tilegx_bundle_bits __user *)(regs->pc);
1535         if (get_user(bundle, pc) != 0) {
1536                 /* Probably never be here since pc is valid user address.*/
1537                 siginfo_t info = {
1538                         .si_signo = SIGSEGV,
1539                         .si_code = SEGV_MAPERR,
1540                         .si_addr = (void __user *)pc
1541                 };
1542                 pr_err("Couldn't read instruction at %p trying to step\n", pc);
1543                 trace_unhandled_signal("segfault in unalign fixup", regs,
1544                                        (unsigned long)info.si_addr, SIGSEGV);
1545                 force_sig_info(info.si_signo, &info, current);
1546                 return;
1547         }
1548
1549         if (!info->unalign_jit_base) {
1550                 void __user *user_page;
1551
1552                 /*
1553                  * Allocate a page in userland.
1554                  * For 64-bit processes we try to place the mapping far
1555                  * from anything else that might be going on (specifically
1556                  * 64 GB below the top of the user address space).  If it
1557                  * happens not to be possible to put it there, it's OK;
1558                  * the kernel will choose another location and we'll
1559                  * remember it for later.
1560                  */
1561                 if (is_compat_task())
1562                         user_page = NULL;
1563                 else
1564                         user_page = (void __user *)(TASK_SIZE - (1UL << 36)) +
1565                                 (current->pid << PAGE_SHIFT);
1566
1567                 user_page = (void __user *) vm_mmap(NULL,
1568                                                     (unsigned long)user_page,
1569                                                     PAGE_SIZE,
1570                                                     PROT_EXEC | PROT_READ |
1571                                                     PROT_WRITE,
1572 #ifdef CONFIG_HOMECACHE
1573                                                     MAP_CACHE_HOME_TASK |
1574 #endif
1575                                                     MAP_PRIVATE |
1576                                                     MAP_ANONYMOUS,
1577                                                     0);
1578
1579                 if (IS_ERR((void __force *)user_page)) {
1580                         pr_err("Out of kernel pages trying do_mmap\n");
1581                         return;
1582                 }
1583
1584                 /* Save the address in the thread_info struct */
1585                 info->unalign_jit_base = user_page;
1586                 if (unaligned_printk)
1587                         pr_info("Unalign bundle: %d:%d, allocate page @%llx\n",
1588                                 raw_smp_processor_id(), current->pid,
1589                                 (unsigned long long)user_page);
1590         }
1591
1592         /* Generate unalign JIT */
1593         jit_bundle_gen(regs, GX_INSN_BSWAP(bundle), align_ctl);
1594 }
1595
1596 #endif /* __tilegx__ */