Merge tag 'gcc-plugins-v4.9-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git...
[cascardo/linux.git] / arch / s390 / kvm / kvm-s390.c
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/random.h>
27 #include <linux/slab.h>
28 #include <linux/timer.h>
29 #include <linux/vmalloc.h>
30 #include <linux/bitmap.h>
31 #include <asm/asm-offsets.h>
32 #include <asm/lowcore.h>
33 #include <asm/stp.h>
34 #include <asm/pgtable.h>
35 #include <asm/gmap.h>
36 #include <asm/nmi.h>
37 #include <asm/switch_to.h>
38 #include <asm/isc.h>
39 #include <asm/sclp.h>
40 #include <asm/cpacf.h>
41 #include <asm/timex.h>
42 #include "kvm-s390.h"
43 #include "gaccess.h"
44
45 #define KMSG_COMPONENT "kvm-s390"
46 #undef pr_fmt
47 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
48
49 #define CREATE_TRACE_POINTS
50 #include "trace.h"
51 #include "trace-s390.h"
52
53 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
54 #define LOCAL_IRQS 32
55 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
56                            (KVM_MAX_VCPUS + LOCAL_IRQS))
57
58 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
59
60 struct kvm_stats_debugfs_item debugfs_entries[] = {
61         { "userspace_handled", VCPU_STAT(exit_userspace) },
62         { "exit_null", VCPU_STAT(exit_null) },
63         { "exit_validity", VCPU_STAT(exit_validity) },
64         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
65         { "exit_external_request", VCPU_STAT(exit_external_request) },
66         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
67         { "exit_instruction", VCPU_STAT(exit_instruction) },
68         { "exit_pei", VCPU_STAT(exit_pei) },
69         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
70         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
71         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
72         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
73         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
74         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
75         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
76         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
77         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
78         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
79         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
80         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
81         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
82         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
83         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
84         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
85         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
86         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
87         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
88         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
89         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
90         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
91         { "instruction_spx", VCPU_STAT(instruction_spx) },
92         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
93         { "instruction_stap", VCPU_STAT(instruction_stap) },
94         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
95         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
96         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
97         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
98         { "instruction_essa", VCPU_STAT(instruction_essa) },
99         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
100         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
101         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
102         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
103         { "instruction_sie", VCPU_STAT(instruction_sie) },
104         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
105         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
106         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
107         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
108         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
109         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
110         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
111         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
112         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
113         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
114         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
115         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
116         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
117         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
118         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
119         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
120         { "diagnose_10", VCPU_STAT(diagnose_10) },
121         { "diagnose_44", VCPU_STAT(diagnose_44) },
122         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
123         { "diagnose_258", VCPU_STAT(diagnose_258) },
124         { "diagnose_308", VCPU_STAT(diagnose_308) },
125         { "diagnose_500", VCPU_STAT(diagnose_500) },
126         { NULL }
127 };
128
129 /* allow nested virtualization in KVM (if enabled by user space) */
130 static int nested;
131 module_param(nested, int, S_IRUGO);
132 MODULE_PARM_DESC(nested, "Nested virtualization support");
133
134 /* upper facilities limit for kvm */
135 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
136
137 unsigned long kvm_s390_fac_list_mask_size(void)
138 {
139         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
140         return ARRAY_SIZE(kvm_s390_fac_list_mask);
141 }
142
143 /* available cpu features supported by kvm */
144 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
145 /* available subfunctions indicated via query / "test bit" */
146 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
147
148 static struct gmap_notifier gmap_notifier;
149 static struct gmap_notifier vsie_gmap_notifier;
150 debug_info_t *kvm_s390_dbf;
151
152 /* Section: not file related */
153 int kvm_arch_hardware_enable(void)
154 {
155         /* every s390 is virtualization enabled ;-) */
156         return 0;
157 }
158
159 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
160                               unsigned long end);
161
162 /*
163  * This callback is executed during stop_machine(). All CPUs are therefore
164  * temporarily stopped. In order not to change guest behavior, we have to
165  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
166  * so a CPU won't be stopped while calculating with the epoch.
167  */
168 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
169                           void *v)
170 {
171         struct kvm *kvm;
172         struct kvm_vcpu *vcpu;
173         int i;
174         unsigned long long *delta = v;
175
176         list_for_each_entry(kvm, &vm_list, vm_list) {
177                 kvm->arch.epoch -= *delta;
178                 kvm_for_each_vcpu(i, vcpu, kvm) {
179                         vcpu->arch.sie_block->epoch -= *delta;
180                         if (vcpu->arch.cputm_enabled)
181                                 vcpu->arch.cputm_start += *delta;
182                         if (vcpu->arch.vsie_block)
183                                 vcpu->arch.vsie_block->epoch -= *delta;
184                 }
185         }
186         return NOTIFY_OK;
187 }
188
189 static struct notifier_block kvm_clock_notifier = {
190         .notifier_call = kvm_clock_sync,
191 };
192
193 int kvm_arch_hardware_setup(void)
194 {
195         gmap_notifier.notifier_call = kvm_gmap_notifier;
196         gmap_register_pte_notifier(&gmap_notifier);
197         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
198         gmap_register_pte_notifier(&vsie_gmap_notifier);
199         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
200                                        &kvm_clock_notifier);
201         return 0;
202 }
203
204 void kvm_arch_hardware_unsetup(void)
205 {
206         gmap_unregister_pte_notifier(&gmap_notifier);
207         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
208         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
209                                          &kvm_clock_notifier);
210 }
211
212 static void allow_cpu_feat(unsigned long nr)
213 {
214         set_bit_inv(nr, kvm_s390_available_cpu_feat);
215 }
216
217 static inline int plo_test_bit(unsigned char nr)
218 {
219         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
220         int cc = 3; /* subfunction not available */
221
222         asm volatile(
223                 /* Parameter registers are ignored for "test bit" */
224                 "       plo     0,0,0,0(0)\n"
225                 "       ipm     %0\n"
226                 "       srl     %0,28\n"
227                 : "=d" (cc)
228                 : "d" (r0)
229                 : "cc");
230         return cc == 0;
231 }
232
233 static void kvm_s390_cpu_feat_init(void)
234 {
235         int i;
236
237         for (i = 0; i < 256; ++i) {
238                 if (plo_test_bit(i))
239                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
240         }
241
242         if (test_facility(28)) /* TOD-clock steering */
243                 ptff(kvm_s390_available_subfunc.ptff,
244                      sizeof(kvm_s390_available_subfunc.ptff),
245                      PTFF_QAF);
246
247         if (test_facility(17)) { /* MSA */
248                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
249                               kvm_s390_available_subfunc.kmac);
250                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
251                               kvm_s390_available_subfunc.kmc);
252                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
253                               kvm_s390_available_subfunc.km);
254                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
255                               kvm_s390_available_subfunc.kimd);
256                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
257                               kvm_s390_available_subfunc.klmd);
258         }
259         if (test_facility(76)) /* MSA3 */
260                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
261                               kvm_s390_available_subfunc.pckmo);
262         if (test_facility(77)) { /* MSA4 */
263                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
264                               kvm_s390_available_subfunc.kmctr);
265                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
266                               kvm_s390_available_subfunc.kmf);
267                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
268                               kvm_s390_available_subfunc.kmo);
269                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
270                               kvm_s390_available_subfunc.pcc);
271         }
272         if (test_facility(57)) /* MSA5 */
273                 __cpacf_query(CPACF_PPNO, (cpacf_mask_t *)
274                               kvm_s390_available_subfunc.ppno);
275
276         if (MACHINE_HAS_ESOP)
277                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
278         /*
279          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
280          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
281          */
282         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
283             !test_facility(3) || !nested)
284                 return;
285         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
286         if (sclp.has_64bscao)
287                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
288         if (sclp.has_siif)
289                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
290         if (sclp.has_gpere)
291                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
292         if (sclp.has_gsls)
293                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
294         if (sclp.has_ib)
295                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
296         if (sclp.has_cei)
297                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
298         if (sclp.has_ibs)
299                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
300         /*
301          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
302          * all skey handling functions read/set the skey from the PGSTE
303          * instead of the real storage key.
304          *
305          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
306          * pages being detected as preserved although they are resident.
307          *
308          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
309          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
310          *
311          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
312          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
313          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
314          *
315          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
316          * cannot easily shadow the SCA because of the ipte lock.
317          */
318 }
319
320 int kvm_arch_init(void *opaque)
321 {
322         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
323         if (!kvm_s390_dbf)
324                 return -ENOMEM;
325
326         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
327                 debug_unregister(kvm_s390_dbf);
328                 return -ENOMEM;
329         }
330
331         kvm_s390_cpu_feat_init();
332
333         /* Register floating interrupt controller interface. */
334         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
335 }
336
337 void kvm_arch_exit(void)
338 {
339         debug_unregister(kvm_s390_dbf);
340 }
341
342 /* Section: device related */
343 long kvm_arch_dev_ioctl(struct file *filp,
344                         unsigned int ioctl, unsigned long arg)
345 {
346         if (ioctl == KVM_S390_ENABLE_SIE)
347                 return s390_enable_sie();
348         return -EINVAL;
349 }
350
351 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
352 {
353         int r;
354
355         switch (ext) {
356         case KVM_CAP_S390_PSW:
357         case KVM_CAP_S390_GMAP:
358         case KVM_CAP_SYNC_MMU:
359 #ifdef CONFIG_KVM_S390_UCONTROL
360         case KVM_CAP_S390_UCONTROL:
361 #endif
362         case KVM_CAP_ASYNC_PF:
363         case KVM_CAP_SYNC_REGS:
364         case KVM_CAP_ONE_REG:
365         case KVM_CAP_ENABLE_CAP:
366         case KVM_CAP_S390_CSS_SUPPORT:
367         case KVM_CAP_IOEVENTFD:
368         case KVM_CAP_DEVICE_CTRL:
369         case KVM_CAP_ENABLE_CAP_VM:
370         case KVM_CAP_S390_IRQCHIP:
371         case KVM_CAP_VM_ATTRIBUTES:
372         case KVM_CAP_MP_STATE:
373         case KVM_CAP_S390_INJECT_IRQ:
374         case KVM_CAP_S390_USER_SIGP:
375         case KVM_CAP_S390_USER_STSI:
376         case KVM_CAP_S390_SKEYS:
377         case KVM_CAP_S390_IRQ_STATE:
378         case KVM_CAP_S390_USER_INSTR0:
379                 r = 1;
380                 break;
381         case KVM_CAP_S390_MEM_OP:
382                 r = MEM_OP_MAX_SIZE;
383                 break;
384         case KVM_CAP_NR_VCPUS:
385         case KVM_CAP_MAX_VCPUS:
386                 r = KVM_S390_BSCA_CPU_SLOTS;
387                 if (!kvm_s390_use_sca_entries())
388                         r = KVM_MAX_VCPUS;
389                 else if (sclp.has_esca && sclp.has_64bscao)
390                         r = KVM_S390_ESCA_CPU_SLOTS;
391                 break;
392         case KVM_CAP_NR_MEMSLOTS:
393                 r = KVM_USER_MEM_SLOTS;
394                 break;
395         case KVM_CAP_S390_COW:
396                 r = MACHINE_HAS_ESOP;
397                 break;
398         case KVM_CAP_S390_VECTOR_REGISTERS:
399                 r = MACHINE_HAS_VX;
400                 break;
401         case KVM_CAP_S390_RI:
402                 r = test_facility(64);
403                 break;
404         default:
405                 r = 0;
406         }
407         return r;
408 }
409
410 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
411                                         struct kvm_memory_slot *memslot)
412 {
413         gfn_t cur_gfn, last_gfn;
414         unsigned long address;
415         struct gmap *gmap = kvm->arch.gmap;
416
417         /* Loop over all guest pages */
418         last_gfn = memslot->base_gfn + memslot->npages;
419         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
420                 address = gfn_to_hva_memslot(memslot, cur_gfn);
421
422                 if (test_and_clear_guest_dirty(gmap->mm, address))
423                         mark_page_dirty(kvm, cur_gfn);
424                 if (fatal_signal_pending(current))
425                         return;
426                 cond_resched();
427         }
428 }
429
430 /* Section: vm related */
431 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
432
433 /*
434  * Get (and clear) the dirty memory log for a memory slot.
435  */
436 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
437                                struct kvm_dirty_log *log)
438 {
439         int r;
440         unsigned long n;
441         struct kvm_memslots *slots;
442         struct kvm_memory_slot *memslot;
443         int is_dirty = 0;
444
445         mutex_lock(&kvm->slots_lock);
446
447         r = -EINVAL;
448         if (log->slot >= KVM_USER_MEM_SLOTS)
449                 goto out;
450
451         slots = kvm_memslots(kvm);
452         memslot = id_to_memslot(slots, log->slot);
453         r = -ENOENT;
454         if (!memslot->dirty_bitmap)
455                 goto out;
456
457         kvm_s390_sync_dirty_log(kvm, memslot);
458         r = kvm_get_dirty_log(kvm, log, &is_dirty);
459         if (r)
460                 goto out;
461
462         /* Clear the dirty log */
463         if (is_dirty) {
464                 n = kvm_dirty_bitmap_bytes(memslot);
465                 memset(memslot->dirty_bitmap, 0, n);
466         }
467         r = 0;
468 out:
469         mutex_unlock(&kvm->slots_lock);
470         return r;
471 }
472
473 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
474 {
475         unsigned int i;
476         struct kvm_vcpu *vcpu;
477
478         kvm_for_each_vcpu(i, vcpu, kvm) {
479                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
480         }
481 }
482
483 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
484 {
485         int r;
486
487         if (cap->flags)
488                 return -EINVAL;
489
490         switch (cap->cap) {
491         case KVM_CAP_S390_IRQCHIP:
492                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
493                 kvm->arch.use_irqchip = 1;
494                 r = 0;
495                 break;
496         case KVM_CAP_S390_USER_SIGP:
497                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
498                 kvm->arch.user_sigp = 1;
499                 r = 0;
500                 break;
501         case KVM_CAP_S390_VECTOR_REGISTERS:
502                 mutex_lock(&kvm->lock);
503                 if (kvm->created_vcpus) {
504                         r = -EBUSY;
505                 } else if (MACHINE_HAS_VX) {
506                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
507                         set_kvm_facility(kvm->arch.model.fac_list, 129);
508                         r = 0;
509                 } else
510                         r = -EINVAL;
511                 mutex_unlock(&kvm->lock);
512                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
513                          r ? "(not available)" : "(success)");
514                 break;
515         case KVM_CAP_S390_RI:
516                 r = -EINVAL;
517                 mutex_lock(&kvm->lock);
518                 if (kvm->created_vcpus) {
519                         r = -EBUSY;
520                 } else if (test_facility(64)) {
521                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
522                         set_kvm_facility(kvm->arch.model.fac_list, 64);
523                         r = 0;
524                 }
525                 mutex_unlock(&kvm->lock);
526                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
527                          r ? "(not available)" : "(success)");
528                 break;
529         case KVM_CAP_S390_USER_STSI:
530                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
531                 kvm->arch.user_stsi = 1;
532                 r = 0;
533                 break;
534         case KVM_CAP_S390_USER_INSTR0:
535                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
536                 kvm->arch.user_instr0 = 1;
537                 icpt_operexc_on_all_vcpus(kvm);
538                 r = 0;
539                 break;
540         default:
541                 r = -EINVAL;
542                 break;
543         }
544         return r;
545 }
546
547 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
548 {
549         int ret;
550
551         switch (attr->attr) {
552         case KVM_S390_VM_MEM_LIMIT_SIZE:
553                 ret = 0;
554                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
555                          kvm->arch.mem_limit);
556                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
557                         ret = -EFAULT;
558                 break;
559         default:
560                 ret = -ENXIO;
561                 break;
562         }
563         return ret;
564 }
565
566 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
567 {
568         int ret;
569         unsigned int idx;
570         switch (attr->attr) {
571         case KVM_S390_VM_MEM_ENABLE_CMMA:
572                 ret = -ENXIO;
573                 if (!sclp.has_cmma)
574                         break;
575
576                 ret = -EBUSY;
577                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
578                 mutex_lock(&kvm->lock);
579                 if (!kvm->created_vcpus) {
580                         kvm->arch.use_cmma = 1;
581                         ret = 0;
582                 }
583                 mutex_unlock(&kvm->lock);
584                 break;
585         case KVM_S390_VM_MEM_CLR_CMMA:
586                 ret = -ENXIO;
587                 if (!sclp.has_cmma)
588                         break;
589                 ret = -EINVAL;
590                 if (!kvm->arch.use_cmma)
591                         break;
592
593                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
594                 mutex_lock(&kvm->lock);
595                 idx = srcu_read_lock(&kvm->srcu);
596                 s390_reset_cmma(kvm->arch.gmap->mm);
597                 srcu_read_unlock(&kvm->srcu, idx);
598                 mutex_unlock(&kvm->lock);
599                 ret = 0;
600                 break;
601         case KVM_S390_VM_MEM_LIMIT_SIZE: {
602                 unsigned long new_limit;
603
604                 if (kvm_is_ucontrol(kvm))
605                         return -EINVAL;
606
607                 if (get_user(new_limit, (u64 __user *)attr->addr))
608                         return -EFAULT;
609
610                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
611                     new_limit > kvm->arch.mem_limit)
612                         return -E2BIG;
613
614                 if (!new_limit)
615                         return -EINVAL;
616
617                 /* gmap_create takes last usable address */
618                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
619                         new_limit -= 1;
620
621                 ret = -EBUSY;
622                 mutex_lock(&kvm->lock);
623                 if (!kvm->created_vcpus) {
624                         /* gmap_create will round the limit up */
625                         struct gmap *new = gmap_create(current->mm, new_limit);
626
627                         if (!new) {
628                                 ret = -ENOMEM;
629                         } else {
630                                 gmap_remove(kvm->arch.gmap);
631                                 new->private = kvm;
632                                 kvm->arch.gmap = new;
633                                 ret = 0;
634                         }
635                 }
636                 mutex_unlock(&kvm->lock);
637                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
638                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
639                          (void *) kvm->arch.gmap->asce);
640                 break;
641         }
642         default:
643                 ret = -ENXIO;
644                 break;
645         }
646         return ret;
647 }
648
649 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
650
651 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
652 {
653         struct kvm_vcpu *vcpu;
654         int i;
655
656         if (!test_kvm_facility(kvm, 76))
657                 return -EINVAL;
658
659         mutex_lock(&kvm->lock);
660         switch (attr->attr) {
661         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
662                 get_random_bytes(
663                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
664                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
665                 kvm->arch.crypto.aes_kw = 1;
666                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
667                 break;
668         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
669                 get_random_bytes(
670                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
671                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
672                 kvm->arch.crypto.dea_kw = 1;
673                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
674                 break;
675         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
676                 kvm->arch.crypto.aes_kw = 0;
677                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
678                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
679                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
680                 break;
681         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
682                 kvm->arch.crypto.dea_kw = 0;
683                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
684                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
685                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
686                 break;
687         default:
688                 mutex_unlock(&kvm->lock);
689                 return -ENXIO;
690         }
691
692         kvm_for_each_vcpu(i, vcpu, kvm) {
693                 kvm_s390_vcpu_crypto_setup(vcpu);
694                 exit_sie(vcpu);
695         }
696         mutex_unlock(&kvm->lock);
697         return 0;
698 }
699
700 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
701 {
702         u8 gtod_high;
703
704         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
705                                            sizeof(gtod_high)))
706                 return -EFAULT;
707
708         if (gtod_high != 0)
709                 return -EINVAL;
710         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
711
712         return 0;
713 }
714
715 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
716 {
717         u64 gtod;
718
719         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
720                 return -EFAULT;
721
722         kvm_s390_set_tod_clock(kvm, gtod);
723         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
724         return 0;
725 }
726
727 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
728 {
729         int ret;
730
731         if (attr->flags)
732                 return -EINVAL;
733
734         switch (attr->attr) {
735         case KVM_S390_VM_TOD_HIGH:
736                 ret = kvm_s390_set_tod_high(kvm, attr);
737                 break;
738         case KVM_S390_VM_TOD_LOW:
739                 ret = kvm_s390_set_tod_low(kvm, attr);
740                 break;
741         default:
742                 ret = -ENXIO;
743                 break;
744         }
745         return ret;
746 }
747
748 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
749 {
750         u8 gtod_high = 0;
751
752         if (copy_to_user((void __user *)attr->addr, &gtod_high,
753                                          sizeof(gtod_high)))
754                 return -EFAULT;
755         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
756
757         return 0;
758 }
759
760 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
761 {
762         u64 gtod;
763
764         gtod = kvm_s390_get_tod_clock_fast(kvm);
765         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
766                 return -EFAULT;
767         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
768
769         return 0;
770 }
771
772 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
773 {
774         int ret;
775
776         if (attr->flags)
777                 return -EINVAL;
778
779         switch (attr->attr) {
780         case KVM_S390_VM_TOD_HIGH:
781                 ret = kvm_s390_get_tod_high(kvm, attr);
782                 break;
783         case KVM_S390_VM_TOD_LOW:
784                 ret = kvm_s390_get_tod_low(kvm, attr);
785                 break;
786         default:
787                 ret = -ENXIO;
788                 break;
789         }
790         return ret;
791 }
792
793 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
794 {
795         struct kvm_s390_vm_cpu_processor *proc;
796         u16 lowest_ibc, unblocked_ibc;
797         int ret = 0;
798
799         mutex_lock(&kvm->lock);
800         if (kvm->created_vcpus) {
801                 ret = -EBUSY;
802                 goto out;
803         }
804         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
805         if (!proc) {
806                 ret = -ENOMEM;
807                 goto out;
808         }
809         if (!copy_from_user(proc, (void __user *)attr->addr,
810                             sizeof(*proc))) {
811                 kvm->arch.model.cpuid = proc->cpuid;
812                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
813                 unblocked_ibc = sclp.ibc & 0xfff;
814                 if (lowest_ibc && proc->ibc) {
815                         if (proc->ibc > unblocked_ibc)
816                                 kvm->arch.model.ibc = unblocked_ibc;
817                         else if (proc->ibc < lowest_ibc)
818                                 kvm->arch.model.ibc = lowest_ibc;
819                         else
820                                 kvm->arch.model.ibc = proc->ibc;
821                 }
822                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
823                        S390_ARCH_FAC_LIST_SIZE_BYTE);
824         } else
825                 ret = -EFAULT;
826         kfree(proc);
827 out:
828         mutex_unlock(&kvm->lock);
829         return ret;
830 }
831
832 static int kvm_s390_set_processor_feat(struct kvm *kvm,
833                                        struct kvm_device_attr *attr)
834 {
835         struct kvm_s390_vm_cpu_feat data;
836         int ret = -EBUSY;
837
838         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
839                 return -EFAULT;
840         if (!bitmap_subset((unsigned long *) data.feat,
841                            kvm_s390_available_cpu_feat,
842                            KVM_S390_VM_CPU_FEAT_NR_BITS))
843                 return -EINVAL;
844
845         mutex_lock(&kvm->lock);
846         if (!atomic_read(&kvm->online_vcpus)) {
847                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
848                             KVM_S390_VM_CPU_FEAT_NR_BITS);
849                 ret = 0;
850         }
851         mutex_unlock(&kvm->lock);
852         return ret;
853 }
854
855 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
856                                           struct kvm_device_attr *attr)
857 {
858         /*
859          * Once supported by kernel + hw, we have to store the subfunctions
860          * in kvm->arch and remember that user space configured them.
861          */
862         return -ENXIO;
863 }
864
865 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
866 {
867         int ret = -ENXIO;
868
869         switch (attr->attr) {
870         case KVM_S390_VM_CPU_PROCESSOR:
871                 ret = kvm_s390_set_processor(kvm, attr);
872                 break;
873         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
874                 ret = kvm_s390_set_processor_feat(kvm, attr);
875                 break;
876         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
877                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
878                 break;
879         }
880         return ret;
881 }
882
883 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
884 {
885         struct kvm_s390_vm_cpu_processor *proc;
886         int ret = 0;
887
888         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
889         if (!proc) {
890                 ret = -ENOMEM;
891                 goto out;
892         }
893         proc->cpuid = kvm->arch.model.cpuid;
894         proc->ibc = kvm->arch.model.ibc;
895         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
896                S390_ARCH_FAC_LIST_SIZE_BYTE);
897         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
898                 ret = -EFAULT;
899         kfree(proc);
900 out:
901         return ret;
902 }
903
904 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
905 {
906         struct kvm_s390_vm_cpu_machine *mach;
907         int ret = 0;
908
909         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
910         if (!mach) {
911                 ret = -ENOMEM;
912                 goto out;
913         }
914         get_cpu_id((struct cpuid *) &mach->cpuid);
915         mach->ibc = sclp.ibc;
916         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
917                S390_ARCH_FAC_LIST_SIZE_BYTE);
918         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
919                S390_ARCH_FAC_LIST_SIZE_BYTE);
920         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
921                 ret = -EFAULT;
922         kfree(mach);
923 out:
924         return ret;
925 }
926
927 static int kvm_s390_get_processor_feat(struct kvm *kvm,
928                                        struct kvm_device_attr *attr)
929 {
930         struct kvm_s390_vm_cpu_feat data;
931
932         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
933                     KVM_S390_VM_CPU_FEAT_NR_BITS);
934         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
935                 return -EFAULT;
936         return 0;
937 }
938
939 static int kvm_s390_get_machine_feat(struct kvm *kvm,
940                                      struct kvm_device_attr *attr)
941 {
942         struct kvm_s390_vm_cpu_feat data;
943
944         bitmap_copy((unsigned long *) data.feat,
945                     kvm_s390_available_cpu_feat,
946                     KVM_S390_VM_CPU_FEAT_NR_BITS);
947         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
948                 return -EFAULT;
949         return 0;
950 }
951
952 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
953                                           struct kvm_device_attr *attr)
954 {
955         /*
956          * Once we can actually configure subfunctions (kernel + hw support),
957          * we have to check if they were already set by user space, if so copy
958          * them from kvm->arch.
959          */
960         return -ENXIO;
961 }
962
963 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
964                                         struct kvm_device_attr *attr)
965 {
966         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
967             sizeof(struct kvm_s390_vm_cpu_subfunc)))
968                 return -EFAULT;
969         return 0;
970 }
971 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
972 {
973         int ret = -ENXIO;
974
975         switch (attr->attr) {
976         case KVM_S390_VM_CPU_PROCESSOR:
977                 ret = kvm_s390_get_processor(kvm, attr);
978                 break;
979         case KVM_S390_VM_CPU_MACHINE:
980                 ret = kvm_s390_get_machine(kvm, attr);
981                 break;
982         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
983                 ret = kvm_s390_get_processor_feat(kvm, attr);
984                 break;
985         case KVM_S390_VM_CPU_MACHINE_FEAT:
986                 ret = kvm_s390_get_machine_feat(kvm, attr);
987                 break;
988         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
989                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
990                 break;
991         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
992                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
993                 break;
994         }
995         return ret;
996 }
997
998 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
999 {
1000         int ret;
1001
1002         switch (attr->group) {
1003         case KVM_S390_VM_MEM_CTRL:
1004                 ret = kvm_s390_set_mem_control(kvm, attr);
1005                 break;
1006         case KVM_S390_VM_TOD:
1007                 ret = kvm_s390_set_tod(kvm, attr);
1008                 break;
1009         case KVM_S390_VM_CPU_MODEL:
1010                 ret = kvm_s390_set_cpu_model(kvm, attr);
1011                 break;
1012         case KVM_S390_VM_CRYPTO:
1013                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1014                 break;
1015         default:
1016                 ret = -ENXIO;
1017                 break;
1018         }
1019
1020         return ret;
1021 }
1022
1023 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1024 {
1025         int ret;
1026
1027         switch (attr->group) {
1028         case KVM_S390_VM_MEM_CTRL:
1029                 ret = kvm_s390_get_mem_control(kvm, attr);
1030                 break;
1031         case KVM_S390_VM_TOD:
1032                 ret = kvm_s390_get_tod(kvm, attr);
1033                 break;
1034         case KVM_S390_VM_CPU_MODEL:
1035                 ret = kvm_s390_get_cpu_model(kvm, attr);
1036                 break;
1037         default:
1038                 ret = -ENXIO;
1039                 break;
1040         }
1041
1042         return ret;
1043 }
1044
1045 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1046 {
1047         int ret;
1048
1049         switch (attr->group) {
1050         case KVM_S390_VM_MEM_CTRL:
1051                 switch (attr->attr) {
1052                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1053                 case KVM_S390_VM_MEM_CLR_CMMA:
1054                         ret = sclp.has_cmma ? 0 : -ENXIO;
1055                         break;
1056                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1057                         ret = 0;
1058                         break;
1059                 default:
1060                         ret = -ENXIO;
1061                         break;
1062                 }
1063                 break;
1064         case KVM_S390_VM_TOD:
1065                 switch (attr->attr) {
1066                 case KVM_S390_VM_TOD_LOW:
1067                 case KVM_S390_VM_TOD_HIGH:
1068                         ret = 0;
1069                         break;
1070                 default:
1071                         ret = -ENXIO;
1072                         break;
1073                 }
1074                 break;
1075         case KVM_S390_VM_CPU_MODEL:
1076                 switch (attr->attr) {
1077                 case KVM_S390_VM_CPU_PROCESSOR:
1078                 case KVM_S390_VM_CPU_MACHINE:
1079                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1080                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1081                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1082                         ret = 0;
1083                         break;
1084                 /* configuring subfunctions is not supported yet */
1085                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1086                 default:
1087                         ret = -ENXIO;
1088                         break;
1089                 }
1090                 break;
1091         case KVM_S390_VM_CRYPTO:
1092                 switch (attr->attr) {
1093                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1094                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1095                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1096                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1097                         ret = 0;
1098                         break;
1099                 default:
1100                         ret = -ENXIO;
1101                         break;
1102                 }
1103                 break;
1104         default:
1105                 ret = -ENXIO;
1106                 break;
1107         }
1108
1109         return ret;
1110 }
1111
1112 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1113 {
1114         uint8_t *keys;
1115         uint64_t hva;
1116         int i, r = 0;
1117
1118         if (args->flags != 0)
1119                 return -EINVAL;
1120
1121         /* Is this guest using storage keys? */
1122         if (!mm_use_skey(current->mm))
1123                 return KVM_S390_GET_SKEYS_NONE;
1124
1125         /* Enforce sane limit on memory allocation */
1126         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1127                 return -EINVAL;
1128
1129         keys = kmalloc_array(args->count, sizeof(uint8_t),
1130                              GFP_KERNEL | __GFP_NOWARN);
1131         if (!keys)
1132                 keys = vmalloc(sizeof(uint8_t) * args->count);
1133         if (!keys)
1134                 return -ENOMEM;
1135
1136         down_read(&current->mm->mmap_sem);
1137         for (i = 0; i < args->count; i++) {
1138                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1139                 if (kvm_is_error_hva(hva)) {
1140                         r = -EFAULT;
1141                         break;
1142                 }
1143
1144                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1145                 if (r)
1146                         break;
1147         }
1148         up_read(&current->mm->mmap_sem);
1149
1150         if (!r) {
1151                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1152                                  sizeof(uint8_t) * args->count);
1153                 if (r)
1154                         r = -EFAULT;
1155         }
1156
1157         kvfree(keys);
1158         return r;
1159 }
1160
1161 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1162 {
1163         uint8_t *keys;
1164         uint64_t hva;
1165         int i, r = 0;
1166
1167         if (args->flags != 0)
1168                 return -EINVAL;
1169
1170         /* Enforce sane limit on memory allocation */
1171         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1172                 return -EINVAL;
1173
1174         keys = kmalloc_array(args->count, sizeof(uint8_t),
1175                              GFP_KERNEL | __GFP_NOWARN);
1176         if (!keys)
1177                 keys = vmalloc(sizeof(uint8_t) * args->count);
1178         if (!keys)
1179                 return -ENOMEM;
1180
1181         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1182                            sizeof(uint8_t) * args->count);
1183         if (r) {
1184                 r = -EFAULT;
1185                 goto out;
1186         }
1187
1188         /* Enable storage key handling for the guest */
1189         r = s390_enable_skey();
1190         if (r)
1191                 goto out;
1192
1193         down_read(&current->mm->mmap_sem);
1194         for (i = 0; i < args->count; i++) {
1195                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1196                 if (kvm_is_error_hva(hva)) {
1197                         r = -EFAULT;
1198                         break;
1199                 }
1200
1201                 /* Lowest order bit is reserved */
1202                 if (keys[i] & 0x01) {
1203                         r = -EINVAL;
1204                         break;
1205                 }
1206
1207                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1208                 if (r)
1209                         break;
1210         }
1211         up_read(&current->mm->mmap_sem);
1212 out:
1213         kvfree(keys);
1214         return r;
1215 }
1216
1217 long kvm_arch_vm_ioctl(struct file *filp,
1218                        unsigned int ioctl, unsigned long arg)
1219 {
1220         struct kvm *kvm = filp->private_data;
1221         void __user *argp = (void __user *)arg;
1222         struct kvm_device_attr attr;
1223         int r;
1224
1225         switch (ioctl) {
1226         case KVM_S390_INTERRUPT: {
1227                 struct kvm_s390_interrupt s390int;
1228
1229                 r = -EFAULT;
1230                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1231                         break;
1232                 r = kvm_s390_inject_vm(kvm, &s390int);
1233                 break;
1234         }
1235         case KVM_ENABLE_CAP: {
1236                 struct kvm_enable_cap cap;
1237                 r = -EFAULT;
1238                 if (copy_from_user(&cap, argp, sizeof(cap)))
1239                         break;
1240                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1241                 break;
1242         }
1243         case KVM_CREATE_IRQCHIP: {
1244                 struct kvm_irq_routing_entry routing;
1245
1246                 r = -EINVAL;
1247                 if (kvm->arch.use_irqchip) {
1248                         /* Set up dummy routing. */
1249                         memset(&routing, 0, sizeof(routing));
1250                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1251                 }
1252                 break;
1253         }
1254         case KVM_SET_DEVICE_ATTR: {
1255                 r = -EFAULT;
1256                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1257                         break;
1258                 r = kvm_s390_vm_set_attr(kvm, &attr);
1259                 break;
1260         }
1261         case KVM_GET_DEVICE_ATTR: {
1262                 r = -EFAULT;
1263                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1264                         break;
1265                 r = kvm_s390_vm_get_attr(kvm, &attr);
1266                 break;
1267         }
1268         case KVM_HAS_DEVICE_ATTR: {
1269                 r = -EFAULT;
1270                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1271                         break;
1272                 r = kvm_s390_vm_has_attr(kvm, &attr);
1273                 break;
1274         }
1275         case KVM_S390_GET_SKEYS: {
1276                 struct kvm_s390_skeys args;
1277
1278                 r = -EFAULT;
1279                 if (copy_from_user(&args, argp,
1280                                    sizeof(struct kvm_s390_skeys)))
1281                         break;
1282                 r = kvm_s390_get_skeys(kvm, &args);
1283                 break;
1284         }
1285         case KVM_S390_SET_SKEYS: {
1286                 struct kvm_s390_skeys args;
1287
1288                 r = -EFAULT;
1289                 if (copy_from_user(&args, argp,
1290                                    sizeof(struct kvm_s390_skeys)))
1291                         break;
1292                 r = kvm_s390_set_skeys(kvm, &args);
1293                 break;
1294         }
1295         default:
1296                 r = -ENOTTY;
1297         }
1298
1299         return r;
1300 }
1301
1302 static int kvm_s390_query_ap_config(u8 *config)
1303 {
1304         u32 fcn_code = 0x04000000UL;
1305         u32 cc = 0;
1306
1307         memset(config, 0, 128);
1308         asm volatile(
1309                 "lgr 0,%1\n"
1310                 "lgr 2,%2\n"
1311                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1312                 "0: ipm %0\n"
1313                 "srl %0,28\n"
1314                 "1:\n"
1315                 EX_TABLE(0b, 1b)
1316                 : "+r" (cc)
1317                 : "r" (fcn_code), "r" (config)
1318                 : "cc", "0", "2", "memory"
1319         );
1320
1321         return cc;
1322 }
1323
1324 static int kvm_s390_apxa_installed(void)
1325 {
1326         u8 config[128];
1327         int cc;
1328
1329         if (test_facility(12)) {
1330                 cc = kvm_s390_query_ap_config(config);
1331
1332                 if (cc)
1333                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1334                 else
1335                         return config[0] & 0x40;
1336         }
1337
1338         return 0;
1339 }
1340
1341 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1342 {
1343         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1344
1345         if (kvm_s390_apxa_installed())
1346                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1347         else
1348                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1349 }
1350
1351 static u64 kvm_s390_get_initial_cpuid(void)
1352 {
1353         struct cpuid cpuid;
1354
1355         get_cpu_id(&cpuid);
1356         cpuid.version = 0xff;
1357         return *((u64 *) &cpuid);
1358 }
1359
1360 static void kvm_s390_crypto_init(struct kvm *kvm)
1361 {
1362         if (!test_kvm_facility(kvm, 76))
1363                 return;
1364
1365         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1366         kvm_s390_set_crycb_format(kvm);
1367
1368         /* Enable AES/DEA protected key functions by default */
1369         kvm->arch.crypto.aes_kw = 1;
1370         kvm->arch.crypto.dea_kw = 1;
1371         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1372                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1373         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1374                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1375 }
1376
1377 static void sca_dispose(struct kvm *kvm)
1378 {
1379         if (kvm->arch.use_esca)
1380                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1381         else
1382                 free_page((unsigned long)(kvm->arch.sca));
1383         kvm->arch.sca = NULL;
1384 }
1385
1386 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1387 {
1388         gfp_t alloc_flags = GFP_KERNEL;
1389         int i, rc;
1390         char debug_name[16];
1391         static unsigned long sca_offset;
1392
1393         rc = -EINVAL;
1394 #ifdef CONFIG_KVM_S390_UCONTROL
1395         if (type & ~KVM_VM_S390_UCONTROL)
1396                 goto out_err;
1397         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1398                 goto out_err;
1399 #else
1400         if (type)
1401                 goto out_err;
1402 #endif
1403
1404         rc = s390_enable_sie();
1405         if (rc)
1406                 goto out_err;
1407
1408         rc = -ENOMEM;
1409
1410         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1411
1412         kvm->arch.use_esca = 0; /* start with basic SCA */
1413         if (!sclp.has_64bscao)
1414                 alloc_flags |= GFP_DMA;
1415         rwlock_init(&kvm->arch.sca_lock);
1416         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1417         if (!kvm->arch.sca)
1418                 goto out_err;
1419         spin_lock(&kvm_lock);
1420         sca_offset += 16;
1421         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1422                 sca_offset = 0;
1423         kvm->arch.sca = (struct bsca_block *)
1424                         ((char *) kvm->arch.sca + sca_offset);
1425         spin_unlock(&kvm_lock);
1426
1427         sprintf(debug_name, "kvm-%u", current->pid);
1428
1429         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1430         if (!kvm->arch.dbf)
1431                 goto out_err;
1432
1433         kvm->arch.sie_page2 =
1434              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1435         if (!kvm->arch.sie_page2)
1436                 goto out_err;
1437
1438         /* Populate the facility mask initially. */
1439         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1440                S390_ARCH_FAC_LIST_SIZE_BYTE);
1441         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1442                 if (i < kvm_s390_fac_list_mask_size())
1443                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1444                 else
1445                         kvm->arch.model.fac_mask[i] = 0UL;
1446         }
1447
1448         /* Populate the facility list initially. */
1449         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1450         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1451                S390_ARCH_FAC_LIST_SIZE_BYTE);
1452
1453         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1454         set_kvm_facility(kvm->arch.model.fac_list, 74);
1455
1456         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1457         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1458
1459         kvm_s390_crypto_init(kvm);
1460
1461         spin_lock_init(&kvm->arch.float_int.lock);
1462         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1463                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1464         init_waitqueue_head(&kvm->arch.ipte_wq);
1465         mutex_init(&kvm->arch.ipte_mutex);
1466
1467         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1468         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1469
1470         if (type & KVM_VM_S390_UCONTROL) {
1471                 kvm->arch.gmap = NULL;
1472                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1473         } else {
1474                 if (sclp.hamax == U64_MAX)
1475                         kvm->arch.mem_limit = TASK_MAX_SIZE;
1476                 else
1477                         kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1478                                                     sclp.hamax + 1);
1479                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1480                 if (!kvm->arch.gmap)
1481                         goto out_err;
1482                 kvm->arch.gmap->private = kvm;
1483                 kvm->arch.gmap->pfault_enabled = 0;
1484         }
1485
1486         kvm->arch.css_support = 0;
1487         kvm->arch.use_irqchip = 0;
1488         kvm->arch.epoch = 0;
1489
1490         spin_lock_init(&kvm->arch.start_stop_lock);
1491         kvm_s390_vsie_init(kvm);
1492         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1493
1494         return 0;
1495 out_err:
1496         free_page((unsigned long)kvm->arch.sie_page2);
1497         debug_unregister(kvm->arch.dbf);
1498         sca_dispose(kvm);
1499         KVM_EVENT(3, "creation of vm failed: %d", rc);
1500         return rc;
1501 }
1502
1503 bool kvm_arch_has_vcpu_debugfs(void)
1504 {
1505         return false;
1506 }
1507
1508 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1509 {
1510         return 0;
1511 }
1512
1513 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1514 {
1515         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1516         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1517         kvm_s390_clear_local_irqs(vcpu);
1518         kvm_clear_async_pf_completion_queue(vcpu);
1519         if (!kvm_is_ucontrol(vcpu->kvm))
1520                 sca_del_vcpu(vcpu);
1521
1522         if (kvm_is_ucontrol(vcpu->kvm))
1523                 gmap_remove(vcpu->arch.gmap);
1524
1525         if (vcpu->kvm->arch.use_cmma)
1526                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1527         free_page((unsigned long)(vcpu->arch.sie_block));
1528
1529         kvm_vcpu_uninit(vcpu);
1530         kmem_cache_free(kvm_vcpu_cache, vcpu);
1531 }
1532
1533 static void kvm_free_vcpus(struct kvm *kvm)
1534 {
1535         unsigned int i;
1536         struct kvm_vcpu *vcpu;
1537
1538         kvm_for_each_vcpu(i, vcpu, kvm)
1539                 kvm_arch_vcpu_destroy(vcpu);
1540
1541         mutex_lock(&kvm->lock);
1542         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1543                 kvm->vcpus[i] = NULL;
1544
1545         atomic_set(&kvm->online_vcpus, 0);
1546         mutex_unlock(&kvm->lock);
1547 }
1548
1549 void kvm_arch_destroy_vm(struct kvm *kvm)
1550 {
1551         kvm_free_vcpus(kvm);
1552         sca_dispose(kvm);
1553         debug_unregister(kvm->arch.dbf);
1554         free_page((unsigned long)kvm->arch.sie_page2);
1555         if (!kvm_is_ucontrol(kvm))
1556                 gmap_remove(kvm->arch.gmap);
1557         kvm_s390_destroy_adapters(kvm);
1558         kvm_s390_clear_float_irqs(kvm);
1559         kvm_s390_vsie_destroy(kvm);
1560         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1561 }
1562
1563 /* Section: vcpu related */
1564 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1565 {
1566         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1567         if (!vcpu->arch.gmap)
1568                 return -ENOMEM;
1569         vcpu->arch.gmap->private = vcpu->kvm;
1570
1571         return 0;
1572 }
1573
1574 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1575 {
1576         if (!kvm_s390_use_sca_entries())
1577                 return;
1578         read_lock(&vcpu->kvm->arch.sca_lock);
1579         if (vcpu->kvm->arch.use_esca) {
1580                 struct esca_block *sca = vcpu->kvm->arch.sca;
1581
1582                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1583                 sca->cpu[vcpu->vcpu_id].sda = 0;
1584         } else {
1585                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1586
1587                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1588                 sca->cpu[vcpu->vcpu_id].sda = 0;
1589         }
1590         read_unlock(&vcpu->kvm->arch.sca_lock);
1591 }
1592
1593 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1594 {
1595         if (!kvm_s390_use_sca_entries()) {
1596                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1597
1598                 /* we still need the basic sca for the ipte control */
1599                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1600                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1601         }
1602         read_lock(&vcpu->kvm->arch.sca_lock);
1603         if (vcpu->kvm->arch.use_esca) {
1604                 struct esca_block *sca = vcpu->kvm->arch.sca;
1605
1606                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1607                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1608                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1609                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1610                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1611         } else {
1612                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1613
1614                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1615                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1616                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1617                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1618         }
1619         read_unlock(&vcpu->kvm->arch.sca_lock);
1620 }
1621
1622 /* Basic SCA to Extended SCA data copy routines */
1623 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1624 {
1625         d->sda = s->sda;
1626         d->sigp_ctrl.c = s->sigp_ctrl.c;
1627         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1628 }
1629
1630 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1631 {
1632         int i;
1633
1634         d->ipte_control = s->ipte_control;
1635         d->mcn[0] = s->mcn;
1636         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1637                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1638 }
1639
1640 static int sca_switch_to_extended(struct kvm *kvm)
1641 {
1642         struct bsca_block *old_sca = kvm->arch.sca;
1643         struct esca_block *new_sca;
1644         struct kvm_vcpu *vcpu;
1645         unsigned int vcpu_idx;
1646         u32 scaol, scaoh;
1647
1648         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1649         if (!new_sca)
1650                 return -ENOMEM;
1651
1652         scaoh = (u32)((u64)(new_sca) >> 32);
1653         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1654
1655         kvm_s390_vcpu_block_all(kvm);
1656         write_lock(&kvm->arch.sca_lock);
1657
1658         sca_copy_b_to_e(new_sca, old_sca);
1659
1660         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1661                 vcpu->arch.sie_block->scaoh = scaoh;
1662                 vcpu->arch.sie_block->scaol = scaol;
1663                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1664         }
1665         kvm->arch.sca = new_sca;
1666         kvm->arch.use_esca = 1;
1667
1668         write_unlock(&kvm->arch.sca_lock);
1669         kvm_s390_vcpu_unblock_all(kvm);
1670
1671         free_page((unsigned long)old_sca);
1672
1673         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1674                  old_sca, kvm->arch.sca);
1675         return 0;
1676 }
1677
1678 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1679 {
1680         int rc;
1681
1682         if (!kvm_s390_use_sca_entries()) {
1683                 if (id < KVM_MAX_VCPUS)
1684                         return true;
1685                 return false;
1686         }
1687         if (id < KVM_S390_BSCA_CPU_SLOTS)
1688                 return true;
1689         if (!sclp.has_esca || !sclp.has_64bscao)
1690                 return false;
1691
1692         mutex_lock(&kvm->lock);
1693         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1694         mutex_unlock(&kvm->lock);
1695
1696         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1697 }
1698
1699 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1700 {
1701         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1702         kvm_clear_async_pf_completion_queue(vcpu);
1703         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1704                                     KVM_SYNC_GPRS |
1705                                     KVM_SYNC_ACRS |
1706                                     KVM_SYNC_CRS |
1707                                     KVM_SYNC_ARCH0 |
1708                                     KVM_SYNC_PFAULT;
1709         kvm_s390_set_prefix(vcpu, 0);
1710         if (test_kvm_facility(vcpu->kvm, 64))
1711                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1712         /* fprs can be synchronized via vrs, even if the guest has no vx. With
1713          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1714          */
1715         if (MACHINE_HAS_VX)
1716                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1717         else
1718                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1719
1720         if (kvm_is_ucontrol(vcpu->kvm))
1721                 return __kvm_ucontrol_vcpu_init(vcpu);
1722
1723         return 0;
1724 }
1725
1726 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1727 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1728 {
1729         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1730         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1731         vcpu->arch.cputm_start = get_tod_clock_fast();
1732         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1733 }
1734
1735 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1736 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1737 {
1738         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1739         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1740         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1741         vcpu->arch.cputm_start = 0;
1742         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1743 }
1744
1745 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1746 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1747 {
1748         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1749         vcpu->arch.cputm_enabled = true;
1750         __start_cpu_timer_accounting(vcpu);
1751 }
1752
1753 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1754 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1755 {
1756         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1757         __stop_cpu_timer_accounting(vcpu);
1758         vcpu->arch.cputm_enabled = false;
1759 }
1760
1761 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1762 {
1763         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1764         __enable_cpu_timer_accounting(vcpu);
1765         preempt_enable();
1766 }
1767
1768 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1769 {
1770         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1771         __disable_cpu_timer_accounting(vcpu);
1772         preempt_enable();
1773 }
1774
1775 /* set the cpu timer - may only be called from the VCPU thread itself */
1776 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1777 {
1778         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1779         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1780         if (vcpu->arch.cputm_enabled)
1781                 vcpu->arch.cputm_start = get_tod_clock_fast();
1782         vcpu->arch.sie_block->cputm = cputm;
1783         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1784         preempt_enable();
1785 }
1786
1787 /* update and get the cpu timer - can also be called from other VCPU threads */
1788 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1789 {
1790         unsigned int seq;
1791         __u64 value;
1792
1793         if (unlikely(!vcpu->arch.cputm_enabled))
1794                 return vcpu->arch.sie_block->cputm;
1795
1796         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1797         do {
1798                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1799                 /*
1800                  * If the writer would ever execute a read in the critical
1801                  * section, e.g. in irq context, we have a deadlock.
1802                  */
1803                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1804                 value = vcpu->arch.sie_block->cputm;
1805                 /* if cputm_start is 0, accounting is being started/stopped */
1806                 if (likely(vcpu->arch.cputm_start))
1807                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1808         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1809         preempt_enable();
1810         return value;
1811 }
1812
1813 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1814 {
1815         /* Save host register state */
1816         save_fpu_regs();
1817         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1818         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1819
1820         if (MACHINE_HAS_VX)
1821                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1822         else
1823                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1824         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1825         if (test_fp_ctl(current->thread.fpu.fpc))
1826                 /* User space provided an invalid FPC, let's clear it */
1827                 current->thread.fpu.fpc = 0;
1828
1829         save_access_regs(vcpu->arch.host_acrs);
1830         restore_access_regs(vcpu->run->s.regs.acrs);
1831         gmap_enable(vcpu->arch.enabled_gmap);
1832         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1833         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1834                 __start_cpu_timer_accounting(vcpu);
1835         vcpu->cpu = cpu;
1836 }
1837
1838 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1839 {
1840         vcpu->cpu = -1;
1841         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1842                 __stop_cpu_timer_accounting(vcpu);
1843         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1844         vcpu->arch.enabled_gmap = gmap_get_enabled();
1845         gmap_disable(vcpu->arch.enabled_gmap);
1846
1847         /* Save guest register state */
1848         save_fpu_regs();
1849         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1850
1851         /* Restore host register state */
1852         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1853         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1854
1855         save_access_regs(vcpu->run->s.regs.acrs);
1856         restore_access_regs(vcpu->arch.host_acrs);
1857 }
1858
1859 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1860 {
1861         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1862         vcpu->arch.sie_block->gpsw.mask = 0UL;
1863         vcpu->arch.sie_block->gpsw.addr = 0UL;
1864         kvm_s390_set_prefix(vcpu, 0);
1865         kvm_s390_set_cpu_timer(vcpu, 0);
1866         vcpu->arch.sie_block->ckc       = 0UL;
1867         vcpu->arch.sie_block->todpr     = 0;
1868         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1869         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1870         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1871         /* make sure the new fpc will be lazily loaded */
1872         save_fpu_regs();
1873         current->thread.fpu.fpc = 0;
1874         vcpu->arch.sie_block->gbea = 1;
1875         vcpu->arch.sie_block->pp = 0;
1876         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1877         kvm_clear_async_pf_completion_queue(vcpu);
1878         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1879                 kvm_s390_vcpu_stop(vcpu);
1880         kvm_s390_clear_local_irqs(vcpu);
1881 }
1882
1883 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1884 {
1885         mutex_lock(&vcpu->kvm->lock);
1886         preempt_disable();
1887         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1888         preempt_enable();
1889         mutex_unlock(&vcpu->kvm->lock);
1890         if (!kvm_is_ucontrol(vcpu->kvm)) {
1891                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1892                 sca_add_vcpu(vcpu);
1893         }
1894         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1895                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1896         /* make vcpu_load load the right gmap on the first trigger */
1897         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1898 }
1899
1900 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1901 {
1902         if (!test_kvm_facility(vcpu->kvm, 76))
1903                 return;
1904
1905         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1906
1907         if (vcpu->kvm->arch.crypto.aes_kw)
1908                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1909         if (vcpu->kvm->arch.crypto.dea_kw)
1910                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1911
1912         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1913 }
1914
1915 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1916 {
1917         free_page(vcpu->arch.sie_block->cbrlo);
1918         vcpu->arch.sie_block->cbrlo = 0;
1919 }
1920
1921 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1922 {
1923         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1924         if (!vcpu->arch.sie_block->cbrlo)
1925                 return -ENOMEM;
1926
1927         vcpu->arch.sie_block->ecb2 |= 0x80;
1928         vcpu->arch.sie_block->ecb2 &= ~0x08;
1929         return 0;
1930 }
1931
1932 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1933 {
1934         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1935
1936         vcpu->arch.sie_block->ibc = model->ibc;
1937         if (test_kvm_facility(vcpu->kvm, 7))
1938                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1939 }
1940
1941 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1942 {
1943         int rc = 0;
1944
1945         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1946                                                     CPUSTAT_SM |
1947                                                     CPUSTAT_STOPPED);
1948
1949         if (test_kvm_facility(vcpu->kvm, 78))
1950                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1951         else if (test_kvm_facility(vcpu->kvm, 8))
1952                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1953
1954         kvm_s390_vcpu_setup_model(vcpu);
1955
1956         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1957         if (MACHINE_HAS_ESOP)
1958                 vcpu->arch.sie_block->ecb |= 0x02;
1959         if (test_kvm_facility(vcpu->kvm, 9))
1960                 vcpu->arch.sie_block->ecb |= 0x04;
1961         if (test_kvm_facility(vcpu->kvm, 73))
1962                 vcpu->arch.sie_block->ecb |= 0x10;
1963
1964         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1965                 vcpu->arch.sie_block->ecb2 |= 0x08;
1966         vcpu->arch.sie_block->eca = 0x1002000U;
1967         if (sclp.has_cei)
1968                 vcpu->arch.sie_block->eca |= 0x80000000U;
1969         if (sclp.has_ib)
1970                 vcpu->arch.sie_block->eca |= 0x40000000U;
1971         if (sclp.has_siif)
1972                 vcpu->arch.sie_block->eca |= 1;
1973         if (sclp.has_sigpif)
1974                 vcpu->arch.sie_block->eca |= 0x10000000U;
1975         if (test_kvm_facility(vcpu->kvm, 129)) {
1976                 vcpu->arch.sie_block->eca |= 0x00020000;
1977                 vcpu->arch.sie_block->ecd |= 0x20000000;
1978         }
1979         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1980         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1981
1982         if (vcpu->kvm->arch.use_cmma) {
1983                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1984                 if (rc)
1985                         return rc;
1986         }
1987         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1988         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1989
1990         kvm_s390_vcpu_crypto_setup(vcpu);
1991
1992         return rc;
1993 }
1994
1995 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1996                                       unsigned int id)
1997 {
1998         struct kvm_vcpu *vcpu;
1999         struct sie_page *sie_page;
2000         int rc = -EINVAL;
2001
2002         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2003                 goto out;
2004
2005         rc = -ENOMEM;
2006
2007         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2008         if (!vcpu)
2009                 goto out;
2010
2011         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2012         if (!sie_page)
2013                 goto out_free_cpu;
2014
2015         vcpu->arch.sie_block = &sie_page->sie_block;
2016         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2017
2018         /* the real guest size will always be smaller than msl */
2019         vcpu->arch.sie_block->mso = 0;
2020         vcpu->arch.sie_block->msl = sclp.hamax;
2021
2022         vcpu->arch.sie_block->icpua = id;
2023         spin_lock_init(&vcpu->arch.local_int.lock);
2024         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2025         vcpu->arch.local_int.wq = &vcpu->wq;
2026         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2027         seqcount_init(&vcpu->arch.cputm_seqcount);
2028
2029         rc = kvm_vcpu_init(vcpu, kvm, id);
2030         if (rc)
2031                 goto out_free_sie_block;
2032         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2033                  vcpu->arch.sie_block);
2034         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2035
2036         return vcpu;
2037 out_free_sie_block:
2038         free_page((unsigned long)(vcpu->arch.sie_block));
2039 out_free_cpu:
2040         kmem_cache_free(kvm_vcpu_cache, vcpu);
2041 out:
2042         return ERR_PTR(rc);
2043 }
2044
2045 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2046 {
2047         return kvm_s390_vcpu_has_irq(vcpu, 0);
2048 }
2049
2050 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2051 {
2052         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2053         exit_sie(vcpu);
2054 }
2055
2056 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2057 {
2058         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2059 }
2060
2061 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2062 {
2063         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2064         exit_sie(vcpu);
2065 }
2066
2067 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2068 {
2069         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2070 }
2071
2072 /*
2073  * Kick a guest cpu out of SIE and wait until SIE is not running.
2074  * If the CPU is not running (e.g. waiting as idle) the function will
2075  * return immediately. */
2076 void exit_sie(struct kvm_vcpu *vcpu)
2077 {
2078         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2079         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2080                 cpu_relax();
2081 }
2082
2083 /* Kick a guest cpu out of SIE to process a request synchronously */
2084 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2085 {
2086         kvm_make_request(req, vcpu);
2087         kvm_s390_vcpu_request(vcpu);
2088 }
2089
2090 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2091                               unsigned long end)
2092 {
2093         struct kvm *kvm = gmap->private;
2094         struct kvm_vcpu *vcpu;
2095         unsigned long prefix;
2096         int i;
2097
2098         if (gmap_is_shadow(gmap))
2099                 return;
2100         if (start >= 1UL << 31)
2101                 /* We are only interested in prefix pages */
2102                 return;
2103         kvm_for_each_vcpu(i, vcpu, kvm) {
2104                 /* match against both prefix pages */
2105                 prefix = kvm_s390_get_prefix(vcpu);
2106                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2107                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2108                                    start, end);
2109                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2110                 }
2111         }
2112 }
2113
2114 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2115 {
2116         /* kvm common code refers to this, but never calls it */
2117         BUG();
2118         return 0;
2119 }
2120
2121 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2122                                            struct kvm_one_reg *reg)
2123 {
2124         int r = -EINVAL;
2125
2126         switch (reg->id) {
2127         case KVM_REG_S390_TODPR:
2128                 r = put_user(vcpu->arch.sie_block->todpr,
2129                              (u32 __user *)reg->addr);
2130                 break;
2131         case KVM_REG_S390_EPOCHDIFF:
2132                 r = put_user(vcpu->arch.sie_block->epoch,
2133                              (u64 __user *)reg->addr);
2134                 break;
2135         case KVM_REG_S390_CPU_TIMER:
2136                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2137                              (u64 __user *)reg->addr);
2138                 break;
2139         case KVM_REG_S390_CLOCK_COMP:
2140                 r = put_user(vcpu->arch.sie_block->ckc,
2141                              (u64 __user *)reg->addr);
2142                 break;
2143         case KVM_REG_S390_PFTOKEN:
2144                 r = put_user(vcpu->arch.pfault_token,
2145                              (u64 __user *)reg->addr);
2146                 break;
2147         case KVM_REG_S390_PFCOMPARE:
2148                 r = put_user(vcpu->arch.pfault_compare,
2149                              (u64 __user *)reg->addr);
2150                 break;
2151         case KVM_REG_S390_PFSELECT:
2152                 r = put_user(vcpu->arch.pfault_select,
2153                              (u64 __user *)reg->addr);
2154                 break;
2155         case KVM_REG_S390_PP:
2156                 r = put_user(vcpu->arch.sie_block->pp,
2157                              (u64 __user *)reg->addr);
2158                 break;
2159         case KVM_REG_S390_GBEA:
2160                 r = put_user(vcpu->arch.sie_block->gbea,
2161                              (u64 __user *)reg->addr);
2162                 break;
2163         default:
2164                 break;
2165         }
2166
2167         return r;
2168 }
2169
2170 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2171                                            struct kvm_one_reg *reg)
2172 {
2173         int r = -EINVAL;
2174         __u64 val;
2175
2176         switch (reg->id) {
2177         case KVM_REG_S390_TODPR:
2178                 r = get_user(vcpu->arch.sie_block->todpr,
2179                              (u32 __user *)reg->addr);
2180                 break;
2181         case KVM_REG_S390_EPOCHDIFF:
2182                 r = get_user(vcpu->arch.sie_block->epoch,
2183                              (u64 __user *)reg->addr);
2184                 break;
2185         case KVM_REG_S390_CPU_TIMER:
2186                 r = get_user(val, (u64 __user *)reg->addr);
2187                 if (!r)
2188                         kvm_s390_set_cpu_timer(vcpu, val);
2189                 break;
2190         case KVM_REG_S390_CLOCK_COMP:
2191                 r = get_user(vcpu->arch.sie_block->ckc,
2192                              (u64 __user *)reg->addr);
2193                 break;
2194         case KVM_REG_S390_PFTOKEN:
2195                 r = get_user(vcpu->arch.pfault_token,
2196                              (u64 __user *)reg->addr);
2197                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2198                         kvm_clear_async_pf_completion_queue(vcpu);
2199                 break;
2200         case KVM_REG_S390_PFCOMPARE:
2201                 r = get_user(vcpu->arch.pfault_compare,
2202                              (u64 __user *)reg->addr);
2203                 break;
2204         case KVM_REG_S390_PFSELECT:
2205                 r = get_user(vcpu->arch.pfault_select,
2206                              (u64 __user *)reg->addr);
2207                 break;
2208         case KVM_REG_S390_PP:
2209                 r = get_user(vcpu->arch.sie_block->pp,
2210                              (u64 __user *)reg->addr);
2211                 break;
2212         case KVM_REG_S390_GBEA:
2213                 r = get_user(vcpu->arch.sie_block->gbea,
2214                              (u64 __user *)reg->addr);
2215                 break;
2216         default:
2217                 break;
2218         }
2219
2220         return r;
2221 }
2222
2223 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2224 {
2225         kvm_s390_vcpu_initial_reset(vcpu);
2226         return 0;
2227 }
2228
2229 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2230 {
2231         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2232         return 0;
2233 }
2234
2235 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2236 {
2237         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2238         return 0;
2239 }
2240
2241 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2242                                   struct kvm_sregs *sregs)
2243 {
2244         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2245         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2246         restore_access_regs(vcpu->run->s.regs.acrs);
2247         return 0;
2248 }
2249
2250 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2251                                   struct kvm_sregs *sregs)
2252 {
2253         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2254         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2255         return 0;
2256 }
2257
2258 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2259 {
2260         /* make sure the new values will be lazily loaded */
2261         save_fpu_regs();
2262         if (test_fp_ctl(fpu->fpc))
2263                 return -EINVAL;
2264         current->thread.fpu.fpc = fpu->fpc;
2265         if (MACHINE_HAS_VX)
2266                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2267                                  (freg_t *) fpu->fprs);
2268         else
2269                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2270         return 0;
2271 }
2272
2273 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2274 {
2275         /* make sure we have the latest values */
2276         save_fpu_regs();
2277         if (MACHINE_HAS_VX)
2278                 convert_vx_to_fp((freg_t *) fpu->fprs,
2279                                  (__vector128 *) vcpu->run->s.regs.vrs);
2280         else
2281                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2282         fpu->fpc = current->thread.fpu.fpc;
2283         return 0;
2284 }
2285
2286 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2287 {
2288         int rc = 0;
2289
2290         if (!is_vcpu_stopped(vcpu))
2291                 rc = -EBUSY;
2292         else {
2293                 vcpu->run->psw_mask = psw.mask;
2294                 vcpu->run->psw_addr = psw.addr;
2295         }
2296         return rc;
2297 }
2298
2299 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2300                                   struct kvm_translation *tr)
2301 {
2302         return -EINVAL; /* not implemented yet */
2303 }
2304
2305 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2306                               KVM_GUESTDBG_USE_HW_BP | \
2307                               KVM_GUESTDBG_ENABLE)
2308
2309 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2310                                         struct kvm_guest_debug *dbg)
2311 {
2312         int rc = 0;
2313
2314         vcpu->guest_debug = 0;
2315         kvm_s390_clear_bp_data(vcpu);
2316
2317         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2318                 return -EINVAL;
2319         if (!sclp.has_gpere)
2320                 return -EINVAL;
2321
2322         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2323                 vcpu->guest_debug = dbg->control;
2324                 /* enforce guest PER */
2325                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2326
2327                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2328                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2329         } else {
2330                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2331                 vcpu->arch.guestdbg.last_bp = 0;
2332         }
2333
2334         if (rc) {
2335                 vcpu->guest_debug = 0;
2336                 kvm_s390_clear_bp_data(vcpu);
2337                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2338         }
2339
2340         return rc;
2341 }
2342
2343 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2344                                     struct kvm_mp_state *mp_state)
2345 {
2346         /* CHECK_STOP and LOAD are not supported yet */
2347         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2348                                        KVM_MP_STATE_OPERATING;
2349 }
2350
2351 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2352                                     struct kvm_mp_state *mp_state)
2353 {
2354         int rc = 0;
2355
2356         /* user space knows about this interface - let it control the state */
2357         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2358
2359         switch (mp_state->mp_state) {
2360         case KVM_MP_STATE_STOPPED:
2361                 kvm_s390_vcpu_stop(vcpu);
2362                 break;
2363         case KVM_MP_STATE_OPERATING:
2364                 kvm_s390_vcpu_start(vcpu);
2365                 break;
2366         case KVM_MP_STATE_LOAD:
2367         case KVM_MP_STATE_CHECK_STOP:
2368                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2369         default:
2370                 rc = -ENXIO;
2371         }
2372
2373         return rc;
2374 }
2375
2376 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2377 {
2378         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2379 }
2380
2381 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2382 {
2383 retry:
2384         kvm_s390_vcpu_request_handled(vcpu);
2385         if (!vcpu->requests)
2386                 return 0;
2387         /*
2388          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2389          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2390          * This ensures that the ipte instruction for this request has
2391          * already finished. We might race against a second unmapper that
2392          * wants to set the blocking bit. Lets just retry the request loop.
2393          */
2394         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2395                 int rc;
2396                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2397                                           kvm_s390_get_prefix(vcpu),
2398                                           PAGE_SIZE * 2, PROT_WRITE);
2399                 if (rc) {
2400                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2401                         return rc;
2402                 }
2403                 goto retry;
2404         }
2405
2406         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2407                 vcpu->arch.sie_block->ihcpu = 0xffff;
2408                 goto retry;
2409         }
2410
2411         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2412                 if (!ibs_enabled(vcpu)) {
2413                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2414                         atomic_or(CPUSTAT_IBS,
2415                                         &vcpu->arch.sie_block->cpuflags);
2416                 }
2417                 goto retry;
2418         }
2419
2420         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2421                 if (ibs_enabled(vcpu)) {
2422                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2423                         atomic_andnot(CPUSTAT_IBS,
2424                                           &vcpu->arch.sie_block->cpuflags);
2425                 }
2426                 goto retry;
2427         }
2428
2429         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2430                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2431                 goto retry;
2432         }
2433
2434         /* nothing to do, just clear the request */
2435         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2436
2437         return 0;
2438 }
2439
2440 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2441 {
2442         struct kvm_vcpu *vcpu;
2443         int i;
2444
2445         mutex_lock(&kvm->lock);
2446         preempt_disable();
2447         kvm->arch.epoch = tod - get_tod_clock();
2448         kvm_s390_vcpu_block_all(kvm);
2449         kvm_for_each_vcpu(i, vcpu, kvm)
2450                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2451         kvm_s390_vcpu_unblock_all(kvm);
2452         preempt_enable();
2453         mutex_unlock(&kvm->lock);
2454 }
2455
2456 /**
2457  * kvm_arch_fault_in_page - fault-in guest page if necessary
2458  * @vcpu: The corresponding virtual cpu
2459  * @gpa: Guest physical address
2460  * @writable: Whether the page should be writable or not
2461  *
2462  * Make sure that a guest page has been faulted-in on the host.
2463  *
2464  * Return: Zero on success, negative error code otherwise.
2465  */
2466 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2467 {
2468         return gmap_fault(vcpu->arch.gmap, gpa,
2469                           writable ? FAULT_FLAG_WRITE : 0);
2470 }
2471
2472 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2473                                       unsigned long token)
2474 {
2475         struct kvm_s390_interrupt inti;
2476         struct kvm_s390_irq irq;
2477
2478         if (start_token) {
2479                 irq.u.ext.ext_params2 = token;
2480                 irq.type = KVM_S390_INT_PFAULT_INIT;
2481                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2482         } else {
2483                 inti.type = KVM_S390_INT_PFAULT_DONE;
2484                 inti.parm64 = token;
2485                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2486         }
2487 }
2488
2489 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2490                                      struct kvm_async_pf *work)
2491 {
2492         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2493         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2494 }
2495
2496 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2497                                  struct kvm_async_pf *work)
2498 {
2499         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2500         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2501 }
2502
2503 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2504                                struct kvm_async_pf *work)
2505 {
2506         /* s390 will always inject the page directly */
2507 }
2508
2509 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2510 {
2511         /*
2512          * s390 will always inject the page directly,
2513          * but we still want check_async_completion to cleanup
2514          */
2515         return true;
2516 }
2517
2518 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2519 {
2520         hva_t hva;
2521         struct kvm_arch_async_pf arch;
2522         int rc;
2523
2524         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2525                 return 0;
2526         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2527             vcpu->arch.pfault_compare)
2528                 return 0;
2529         if (psw_extint_disabled(vcpu))
2530                 return 0;
2531         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2532                 return 0;
2533         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2534                 return 0;
2535         if (!vcpu->arch.gmap->pfault_enabled)
2536                 return 0;
2537
2538         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2539         hva += current->thread.gmap_addr & ~PAGE_MASK;
2540         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2541                 return 0;
2542
2543         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2544         return rc;
2545 }
2546
2547 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2548 {
2549         int rc, cpuflags;
2550
2551         /*
2552          * On s390 notifications for arriving pages will be delivered directly
2553          * to the guest but the house keeping for completed pfaults is
2554          * handled outside the worker.
2555          */
2556         kvm_check_async_pf_completion(vcpu);
2557
2558         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2559         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2560
2561         if (need_resched())
2562                 schedule();
2563
2564         if (test_cpu_flag(CIF_MCCK_PENDING))
2565                 s390_handle_mcck();
2566
2567         if (!kvm_is_ucontrol(vcpu->kvm)) {
2568                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2569                 if (rc)
2570                         return rc;
2571         }
2572
2573         rc = kvm_s390_handle_requests(vcpu);
2574         if (rc)
2575                 return rc;
2576
2577         if (guestdbg_enabled(vcpu)) {
2578                 kvm_s390_backup_guest_per_regs(vcpu);
2579                 kvm_s390_patch_guest_per_regs(vcpu);
2580         }
2581
2582         vcpu->arch.sie_block->icptcode = 0;
2583         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2584         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2585         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2586
2587         return 0;
2588 }
2589
2590 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2591 {
2592         struct kvm_s390_pgm_info pgm_info = {
2593                 .code = PGM_ADDRESSING,
2594         };
2595         u8 opcode, ilen;
2596         int rc;
2597
2598         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2599         trace_kvm_s390_sie_fault(vcpu);
2600
2601         /*
2602          * We want to inject an addressing exception, which is defined as a
2603          * suppressing or terminating exception. However, since we came here
2604          * by a DAT access exception, the PSW still points to the faulting
2605          * instruction since DAT exceptions are nullifying. So we've got
2606          * to look up the current opcode to get the length of the instruction
2607          * to be able to forward the PSW.
2608          */
2609         rc = read_guest_instr(vcpu, &opcode, 1);
2610         ilen = insn_length(opcode);
2611         if (rc < 0) {
2612                 return rc;
2613         } else if (rc) {
2614                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2615                  * Forward by arbitrary ilc, injection will take care of
2616                  * nullification if necessary.
2617                  */
2618                 pgm_info = vcpu->arch.pgm;
2619                 ilen = 4;
2620         }
2621         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2622         kvm_s390_forward_psw(vcpu, ilen);
2623         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2624 }
2625
2626 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2627 {
2628         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2629                    vcpu->arch.sie_block->icptcode);
2630         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2631
2632         if (guestdbg_enabled(vcpu))
2633                 kvm_s390_restore_guest_per_regs(vcpu);
2634
2635         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2636         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2637
2638         if (vcpu->arch.sie_block->icptcode > 0) {
2639                 int rc = kvm_handle_sie_intercept(vcpu);
2640
2641                 if (rc != -EOPNOTSUPP)
2642                         return rc;
2643                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2644                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2645                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2646                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2647                 return -EREMOTE;
2648         } else if (exit_reason != -EFAULT) {
2649                 vcpu->stat.exit_null++;
2650                 return 0;
2651         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2652                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2653                 vcpu->run->s390_ucontrol.trans_exc_code =
2654                                                 current->thread.gmap_addr;
2655                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2656                 return -EREMOTE;
2657         } else if (current->thread.gmap_pfault) {
2658                 trace_kvm_s390_major_guest_pfault(vcpu);
2659                 current->thread.gmap_pfault = 0;
2660                 if (kvm_arch_setup_async_pf(vcpu))
2661                         return 0;
2662                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2663         }
2664         return vcpu_post_run_fault_in_sie(vcpu);
2665 }
2666
2667 static int __vcpu_run(struct kvm_vcpu *vcpu)
2668 {
2669         int rc, exit_reason;
2670
2671         /*
2672          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2673          * ning the guest), so that memslots (and other stuff) are protected
2674          */
2675         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2676
2677         do {
2678                 rc = vcpu_pre_run(vcpu);
2679                 if (rc)
2680                         break;
2681
2682                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2683                 /*
2684                  * As PF_VCPU will be used in fault handler, between
2685                  * guest_enter and guest_exit should be no uaccess.
2686                  */
2687                 local_irq_disable();
2688                 guest_enter_irqoff();
2689                 __disable_cpu_timer_accounting(vcpu);
2690                 local_irq_enable();
2691                 exit_reason = sie64a(vcpu->arch.sie_block,
2692                                      vcpu->run->s.regs.gprs);
2693                 local_irq_disable();
2694                 __enable_cpu_timer_accounting(vcpu);
2695                 guest_exit_irqoff();
2696                 local_irq_enable();
2697                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2698
2699                 rc = vcpu_post_run(vcpu, exit_reason);
2700         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2701
2702         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2703         return rc;
2704 }
2705
2706 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2707 {
2708         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2709         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2710         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2711                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2712         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2713                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2714                 /* some control register changes require a tlb flush */
2715                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2716         }
2717         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2718                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2719                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2720                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2721                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2722                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2723         }
2724         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2725                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2726                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2727                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2728                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2729                         kvm_clear_async_pf_completion_queue(vcpu);
2730         }
2731         /*
2732          * If userspace sets the riccb (e.g. after migration) to a valid state,
2733          * we should enable RI here instead of doing the lazy enablement.
2734          */
2735         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
2736             test_kvm_facility(vcpu->kvm, 64)) {
2737                 struct runtime_instr_cb *riccb =
2738                         (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
2739
2740                 if (riccb->valid)
2741                         vcpu->arch.sie_block->ecb3 |= 0x01;
2742         }
2743
2744         kvm_run->kvm_dirty_regs = 0;
2745 }
2746
2747 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2748 {
2749         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2750         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2751         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2752         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2753         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2754         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2755         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2756         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2757         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2758         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2759         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2760         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2761 }
2762
2763 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2764 {
2765         int rc;
2766         sigset_t sigsaved;
2767
2768         if (guestdbg_exit_pending(vcpu)) {
2769                 kvm_s390_prepare_debug_exit(vcpu);
2770                 return 0;
2771         }
2772
2773         if (vcpu->sigset_active)
2774                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2775
2776         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2777                 kvm_s390_vcpu_start(vcpu);
2778         } else if (is_vcpu_stopped(vcpu)) {
2779                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2780                                    vcpu->vcpu_id);
2781                 return -EINVAL;
2782         }
2783
2784         sync_regs(vcpu, kvm_run);
2785         enable_cpu_timer_accounting(vcpu);
2786
2787         might_fault();
2788         rc = __vcpu_run(vcpu);
2789
2790         if (signal_pending(current) && !rc) {
2791                 kvm_run->exit_reason = KVM_EXIT_INTR;
2792                 rc = -EINTR;
2793         }
2794
2795         if (guestdbg_exit_pending(vcpu) && !rc)  {
2796                 kvm_s390_prepare_debug_exit(vcpu);
2797                 rc = 0;
2798         }
2799
2800         if (rc == -EREMOTE) {
2801                 /* userspace support is needed, kvm_run has been prepared */
2802                 rc = 0;
2803         }
2804
2805         disable_cpu_timer_accounting(vcpu);
2806         store_regs(vcpu, kvm_run);
2807
2808         if (vcpu->sigset_active)
2809                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2810
2811         vcpu->stat.exit_userspace++;
2812         return rc;
2813 }
2814
2815 /*
2816  * store status at address
2817  * we use have two special cases:
2818  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2819  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2820  */
2821 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2822 {
2823         unsigned char archmode = 1;
2824         freg_t fprs[NUM_FPRS];
2825         unsigned int px;
2826         u64 clkcomp, cputm;
2827         int rc;
2828
2829         px = kvm_s390_get_prefix(vcpu);
2830         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2831                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2832                         return -EFAULT;
2833                 gpa = 0;
2834         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2835                 if (write_guest_real(vcpu, 163, &archmode, 1))
2836                         return -EFAULT;
2837                 gpa = px;
2838         } else
2839                 gpa -= __LC_FPREGS_SAVE_AREA;
2840
2841         /* manually convert vector registers if necessary */
2842         if (MACHINE_HAS_VX) {
2843                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2844                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2845                                      fprs, 128);
2846         } else {
2847                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2848                                      vcpu->run->s.regs.fprs, 128);
2849         }
2850         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2851                               vcpu->run->s.regs.gprs, 128);
2852         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2853                               &vcpu->arch.sie_block->gpsw, 16);
2854         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2855                               &px, 4);
2856         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2857                               &vcpu->run->s.regs.fpc, 4);
2858         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2859                               &vcpu->arch.sie_block->todpr, 4);
2860         cputm = kvm_s390_get_cpu_timer(vcpu);
2861         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2862                               &cputm, 8);
2863         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2864         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2865                               &clkcomp, 8);
2866         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2867                               &vcpu->run->s.regs.acrs, 64);
2868         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2869                               &vcpu->arch.sie_block->gcr, 128);
2870         return rc ? -EFAULT : 0;
2871 }
2872
2873 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2874 {
2875         /*
2876          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2877          * copying in vcpu load/put. Lets update our copies before we save
2878          * it into the save area
2879          */
2880         save_fpu_regs();
2881         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2882         save_access_regs(vcpu->run->s.regs.acrs);
2883
2884         return kvm_s390_store_status_unloaded(vcpu, addr);
2885 }
2886
2887 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2888 {
2889         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2890         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2891 }
2892
2893 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2894 {
2895         unsigned int i;
2896         struct kvm_vcpu *vcpu;
2897
2898         kvm_for_each_vcpu(i, vcpu, kvm) {
2899                 __disable_ibs_on_vcpu(vcpu);
2900         }
2901 }
2902
2903 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2904 {
2905         if (!sclp.has_ibs)
2906                 return;
2907         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2908         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2909 }
2910
2911 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2912 {
2913         int i, online_vcpus, started_vcpus = 0;
2914
2915         if (!is_vcpu_stopped(vcpu))
2916                 return;
2917
2918         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2919         /* Only one cpu at a time may enter/leave the STOPPED state. */
2920         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2921         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2922
2923         for (i = 0; i < online_vcpus; i++) {
2924                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2925                         started_vcpus++;
2926         }
2927
2928         if (started_vcpus == 0) {
2929                 /* we're the only active VCPU -> speed it up */
2930                 __enable_ibs_on_vcpu(vcpu);
2931         } else if (started_vcpus == 1) {
2932                 /*
2933                  * As we are starting a second VCPU, we have to disable
2934                  * the IBS facility on all VCPUs to remove potentially
2935                  * oustanding ENABLE requests.
2936                  */
2937                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2938         }
2939
2940         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2941         /*
2942          * Another VCPU might have used IBS while we were offline.
2943          * Let's play safe and flush the VCPU at startup.
2944          */
2945         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2946         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2947         return;
2948 }
2949
2950 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2951 {
2952         int i, online_vcpus, started_vcpus = 0;
2953         struct kvm_vcpu *started_vcpu = NULL;
2954
2955         if (is_vcpu_stopped(vcpu))
2956                 return;
2957
2958         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2959         /* Only one cpu at a time may enter/leave the STOPPED state. */
2960         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2961         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2962
2963         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2964         kvm_s390_clear_stop_irq(vcpu);
2965
2966         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2967         __disable_ibs_on_vcpu(vcpu);
2968
2969         for (i = 0; i < online_vcpus; i++) {
2970                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2971                         started_vcpus++;
2972                         started_vcpu = vcpu->kvm->vcpus[i];
2973                 }
2974         }
2975
2976         if (started_vcpus == 1) {
2977                 /*
2978                  * As we only have one VCPU left, we want to enable the
2979                  * IBS facility for that VCPU to speed it up.
2980                  */
2981                 __enable_ibs_on_vcpu(started_vcpu);
2982         }
2983
2984         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2985         return;
2986 }
2987
2988 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2989                                      struct kvm_enable_cap *cap)
2990 {
2991         int r;
2992
2993         if (cap->flags)
2994                 return -EINVAL;
2995
2996         switch (cap->cap) {
2997         case KVM_CAP_S390_CSS_SUPPORT:
2998                 if (!vcpu->kvm->arch.css_support) {
2999                         vcpu->kvm->arch.css_support = 1;
3000                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3001                         trace_kvm_s390_enable_css(vcpu->kvm);
3002                 }
3003                 r = 0;
3004                 break;
3005         default:
3006                 r = -EINVAL;
3007                 break;
3008         }
3009         return r;
3010 }
3011
3012 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3013                                   struct kvm_s390_mem_op *mop)
3014 {
3015         void __user *uaddr = (void __user *)mop->buf;
3016         void *tmpbuf = NULL;
3017         int r, srcu_idx;
3018         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3019                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3020
3021         if (mop->flags & ~supported_flags)
3022                 return -EINVAL;
3023
3024         if (mop->size > MEM_OP_MAX_SIZE)
3025                 return -E2BIG;
3026
3027         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3028                 tmpbuf = vmalloc(mop->size);
3029                 if (!tmpbuf)
3030                         return -ENOMEM;
3031         }
3032
3033         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3034
3035         switch (mop->op) {
3036         case KVM_S390_MEMOP_LOGICAL_READ:
3037                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3038                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3039                                             mop->size, GACC_FETCH);
3040                         break;
3041                 }
3042                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3043                 if (r == 0) {
3044                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3045                                 r = -EFAULT;
3046                 }
3047                 break;
3048         case KVM_S390_MEMOP_LOGICAL_WRITE:
3049                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3050                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3051                                             mop->size, GACC_STORE);
3052                         break;
3053                 }
3054                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3055                         r = -EFAULT;
3056                         break;
3057                 }
3058                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3059                 break;
3060         default:
3061                 r = -EINVAL;
3062         }
3063
3064         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3065
3066         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3067                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3068
3069         vfree(tmpbuf);
3070         return r;
3071 }
3072
3073 long kvm_arch_vcpu_ioctl(struct file *filp,
3074                          unsigned int ioctl, unsigned long arg)
3075 {
3076         struct kvm_vcpu *vcpu = filp->private_data;
3077         void __user *argp = (void __user *)arg;
3078         int idx;
3079         long r;
3080
3081         switch (ioctl) {
3082         case KVM_S390_IRQ: {
3083                 struct kvm_s390_irq s390irq;
3084
3085                 r = -EFAULT;
3086                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3087                         break;
3088                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3089                 break;
3090         }
3091         case KVM_S390_INTERRUPT: {
3092                 struct kvm_s390_interrupt s390int;
3093                 struct kvm_s390_irq s390irq;
3094
3095                 r = -EFAULT;
3096                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3097                         break;
3098                 if (s390int_to_s390irq(&s390int, &s390irq))
3099                         return -EINVAL;
3100                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3101                 break;
3102         }
3103         case KVM_S390_STORE_STATUS:
3104                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3105                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3106                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3107                 break;
3108         case KVM_S390_SET_INITIAL_PSW: {
3109                 psw_t psw;
3110
3111                 r = -EFAULT;
3112                 if (copy_from_user(&psw, argp, sizeof(psw)))
3113                         break;
3114                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3115                 break;
3116         }
3117         case KVM_S390_INITIAL_RESET:
3118                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3119                 break;
3120         case KVM_SET_ONE_REG:
3121         case KVM_GET_ONE_REG: {
3122                 struct kvm_one_reg reg;
3123                 r = -EFAULT;
3124                 if (copy_from_user(&reg, argp, sizeof(reg)))
3125                         break;
3126                 if (ioctl == KVM_SET_ONE_REG)
3127                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3128                 else
3129                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3130                 break;
3131         }
3132 #ifdef CONFIG_KVM_S390_UCONTROL
3133         case KVM_S390_UCAS_MAP: {
3134                 struct kvm_s390_ucas_mapping ucasmap;
3135
3136                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3137                         r = -EFAULT;
3138                         break;
3139                 }
3140
3141                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3142                         r = -EINVAL;
3143                         break;
3144                 }
3145
3146                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3147                                      ucasmap.vcpu_addr, ucasmap.length);
3148                 break;
3149         }
3150         case KVM_S390_UCAS_UNMAP: {
3151                 struct kvm_s390_ucas_mapping ucasmap;
3152
3153                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3154                         r = -EFAULT;
3155                         break;
3156                 }
3157
3158                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3159                         r = -EINVAL;
3160                         break;
3161                 }
3162
3163                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3164                         ucasmap.length);
3165                 break;
3166         }
3167 #endif
3168         case KVM_S390_VCPU_FAULT: {
3169                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3170                 break;
3171         }
3172         case KVM_ENABLE_CAP:
3173         {
3174                 struct kvm_enable_cap cap;
3175                 r = -EFAULT;
3176                 if (copy_from_user(&cap, argp, sizeof(cap)))
3177                         break;
3178                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3179                 break;
3180         }
3181         case KVM_S390_MEM_OP: {
3182                 struct kvm_s390_mem_op mem_op;
3183
3184                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3185                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3186                 else
3187                         r = -EFAULT;
3188                 break;
3189         }
3190         case KVM_S390_SET_IRQ_STATE: {
3191                 struct kvm_s390_irq_state irq_state;
3192
3193                 r = -EFAULT;
3194                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3195                         break;
3196                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3197                     irq_state.len == 0 ||
3198                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3199                         r = -EINVAL;
3200                         break;
3201                 }
3202                 r = kvm_s390_set_irq_state(vcpu,
3203                                            (void __user *) irq_state.buf,
3204                                            irq_state.len);
3205                 break;
3206         }
3207         case KVM_S390_GET_IRQ_STATE: {
3208                 struct kvm_s390_irq_state irq_state;
3209
3210                 r = -EFAULT;
3211                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3212                         break;
3213                 if (irq_state.len == 0) {
3214                         r = -EINVAL;
3215                         break;
3216                 }
3217                 r = kvm_s390_get_irq_state(vcpu,
3218                                            (__u8 __user *)  irq_state.buf,
3219                                            irq_state.len);
3220                 break;
3221         }
3222         default:
3223                 r = -ENOTTY;
3224         }
3225         return r;
3226 }
3227
3228 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3229 {
3230 #ifdef CONFIG_KVM_S390_UCONTROL
3231         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3232                  && (kvm_is_ucontrol(vcpu->kvm))) {
3233                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3234                 get_page(vmf->page);
3235                 return 0;
3236         }
3237 #endif
3238         return VM_FAULT_SIGBUS;
3239 }
3240
3241 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3242                             unsigned long npages)
3243 {
3244         return 0;
3245 }
3246
3247 /* Section: memory related */
3248 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3249                                    struct kvm_memory_slot *memslot,
3250                                    const struct kvm_userspace_memory_region *mem,
3251                                    enum kvm_mr_change change)
3252 {
3253         /* A few sanity checks. We can have memory slots which have to be
3254            located/ended at a segment boundary (1MB). The memory in userland is
3255            ok to be fragmented into various different vmas. It is okay to mmap()
3256            and munmap() stuff in this slot after doing this call at any time */
3257
3258         if (mem->userspace_addr & 0xffffful)
3259                 return -EINVAL;
3260
3261         if (mem->memory_size & 0xffffful)
3262                 return -EINVAL;
3263
3264         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3265                 return -EINVAL;
3266
3267         return 0;
3268 }
3269
3270 void kvm_arch_commit_memory_region(struct kvm *kvm,
3271                                 const struct kvm_userspace_memory_region *mem,
3272                                 const struct kvm_memory_slot *old,
3273                                 const struct kvm_memory_slot *new,
3274                                 enum kvm_mr_change change)
3275 {
3276         int rc;
3277
3278         /* If the basics of the memslot do not change, we do not want
3279          * to update the gmap. Every update causes several unnecessary
3280          * segment translation exceptions. This is usually handled just
3281          * fine by the normal fault handler + gmap, but it will also
3282          * cause faults on the prefix page of running guest CPUs.
3283          */
3284         if (old->userspace_addr == mem->userspace_addr &&
3285             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3286             old->npages * PAGE_SIZE == mem->memory_size)
3287                 return;
3288
3289         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3290                 mem->guest_phys_addr, mem->memory_size);
3291         if (rc)
3292                 pr_warn("failed to commit memory region\n");
3293         return;
3294 }
3295
3296 static inline unsigned long nonhyp_mask(int i)
3297 {
3298         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3299
3300         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3301 }
3302
3303 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3304 {
3305         vcpu->valid_wakeup = false;
3306 }
3307
3308 static int __init kvm_s390_init(void)
3309 {
3310         int i;
3311
3312         if (!sclp.has_sief2) {
3313                 pr_info("SIE not available\n");
3314                 return -ENODEV;
3315         }
3316
3317         for (i = 0; i < 16; i++)
3318                 kvm_s390_fac_list_mask[i] |=
3319                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3320
3321         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3322 }
3323
3324 static void __exit kvm_s390_exit(void)
3325 {
3326         kvm_exit();
3327 }
3328
3329 module_init(kvm_s390_init);
3330 module_exit(kvm_s390_exit);
3331
3332 /*
3333  * Enable autoloading of the kvm module.
3334  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3335  * since x86 takes a different approach.
3336  */
3337 #include <linux/miscdevice.h>
3338 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3339 MODULE_ALIAS("devname:kvm");