2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/random.h>
27 #include <linux/slab.h>
28 #include <linux/timer.h>
29 #include <linux/vmalloc.h>
30 #include <linux/bitmap.h>
31 #include <asm/asm-offsets.h>
32 #include <asm/lowcore.h>
34 #include <asm/pgtable.h>
37 #include <asm/switch_to.h>
40 #include <asm/cpacf.h>
45 #define KMSG_COMPONENT "kvm-s390"
47 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
49 #define CREATE_TRACE_POINTS
51 #include "trace-s390.h"
53 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
55 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
56 (KVM_MAX_VCPUS + LOCAL_IRQS))
58 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60 struct kvm_stats_debugfs_item debugfs_entries[] = {
61 { "userspace_handled", VCPU_STAT(exit_userspace) },
62 { "exit_null", VCPU_STAT(exit_null) },
63 { "exit_validity", VCPU_STAT(exit_validity) },
64 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
65 { "exit_external_request", VCPU_STAT(exit_external_request) },
66 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
67 { "exit_instruction", VCPU_STAT(exit_instruction) },
68 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
69 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
70 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
71 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
72 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
73 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
74 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
75 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
76 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
77 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
78 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
79 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
80 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
81 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
82 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
83 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
84 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
85 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
86 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
87 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
88 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
89 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
90 { "instruction_spx", VCPU_STAT(instruction_spx) },
91 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
92 { "instruction_stap", VCPU_STAT(instruction_stap) },
93 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
94 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
95 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
96 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
97 { "instruction_essa", VCPU_STAT(instruction_essa) },
98 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
99 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
100 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
101 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
102 { "instruction_sie", VCPU_STAT(instruction_sie) },
103 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
104 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
105 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
106 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
107 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
108 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
109 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
110 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
111 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
112 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
113 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
114 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
115 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
116 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
117 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
118 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
119 { "diagnose_10", VCPU_STAT(diagnose_10) },
120 { "diagnose_44", VCPU_STAT(diagnose_44) },
121 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
122 { "diagnose_258", VCPU_STAT(diagnose_258) },
123 { "diagnose_308", VCPU_STAT(diagnose_308) },
124 { "diagnose_500", VCPU_STAT(diagnose_500) },
128 /* allow nested virtualization in KVM (if enabled by user space) */
130 module_param(nested, int, S_IRUGO);
131 MODULE_PARM_DESC(nested, "Nested virtualization support");
133 /* upper facilities limit for kvm */
134 unsigned long kvm_s390_fac_list_mask[16] = {
135 0xffe6000000000000UL,
136 0x005e000000000000UL,
139 unsigned long kvm_s390_fac_list_mask_size(void)
141 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
142 return ARRAY_SIZE(kvm_s390_fac_list_mask);
145 /* available cpu features supported by kvm */
146 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
147 /* available subfunctions indicated via query / "test bit" */
148 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
150 static struct gmap_notifier gmap_notifier;
151 static struct gmap_notifier vsie_gmap_notifier;
152 debug_info_t *kvm_s390_dbf;
154 /* Section: not file related */
155 int kvm_arch_hardware_enable(void)
157 /* every s390 is virtualization enabled ;-) */
161 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
165 * This callback is executed during stop_machine(). All CPUs are therefore
166 * temporarily stopped. In order not to change guest behavior, we have to
167 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
168 * so a CPU won't be stopped while calculating with the epoch.
170 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
174 struct kvm_vcpu *vcpu;
176 unsigned long long *delta = v;
178 list_for_each_entry(kvm, &vm_list, vm_list) {
179 kvm->arch.epoch -= *delta;
180 kvm_for_each_vcpu(i, vcpu, kvm) {
181 vcpu->arch.sie_block->epoch -= *delta;
182 if (vcpu->arch.cputm_enabled)
183 vcpu->arch.cputm_start += *delta;
184 if (vcpu->arch.vsie_block)
185 vcpu->arch.vsie_block->epoch -= *delta;
191 static struct notifier_block kvm_clock_notifier = {
192 .notifier_call = kvm_clock_sync,
195 int kvm_arch_hardware_setup(void)
197 gmap_notifier.notifier_call = kvm_gmap_notifier;
198 gmap_register_pte_notifier(&gmap_notifier);
199 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
200 gmap_register_pte_notifier(&vsie_gmap_notifier);
201 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
202 &kvm_clock_notifier);
206 void kvm_arch_hardware_unsetup(void)
208 gmap_unregister_pte_notifier(&gmap_notifier);
209 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
210 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
211 &kvm_clock_notifier);
214 static void allow_cpu_feat(unsigned long nr)
216 set_bit_inv(nr, kvm_s390_available_cpu_feat);
219 static inline int plo_test_bit(unsigned char nr)
221 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
222 int cc = 3; /* subfunction not available */
225 /* Parameter registers are ignored for "test bit" */
235 static void kvm_s390_cpu_feat_init(void)
239 for (i = 0; i < 256; ++i) {
241 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
244 if (test_facility(28)) /* TOD-clock steering */
245 etr_ptff(kvm_s390_available_subfunc.ptff, ETR_PTFF_QAF);
247 if (test_facility(17)) { /* MSA */
248 __cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac);
249 __cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc);
250 __cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km);
251 __cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd);
252 __cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd);
254 if (test_facility(76)) /* MSA3 */
255 __cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo);
256 if (test_facility(77)) { /* MSA4 */
257 __cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr);
258 __cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf);
259 __cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo);
260 __cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc);
262 if (test_facility(57)) /* MSA5 */
263 __cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno);
265 if (MACHINE_HAS_ESOP)
266 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
268 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
269 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
271 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
272 !test_facility(3) || !nested)
274 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
275 if (sclp.has_64bscao)
276 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
278 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
280 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
282 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
284 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
286 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
288 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
290 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
291 * all skey handling functions read/set the skey from the PGSTE
292 * instead of the real storage key.
294 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
295 * pages being detected as preserved although they are resident.
297 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
298 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
300 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
301 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
302 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
304 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
305 * cannot easily shadow the SCA because of the ipte lock.
309 int kvm_arch_init(void *opaque)
311 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
315 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
316 debug_unregister(kvm_s390_dbf);
320 kvm_s390_cpu_feat_init();
322 /* Register floating interrupt controller interface. */
323 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
326 void kvm_arch_exit(void)
328 debug_unregister(kvm_s390_dbf);
331 /* Section: device related */
332 long kvm_arch_dev_ioctl(struct file *filp,
333 unsigned int ioctl, unsigned long arg)
335 if (ioctl == KVM_S390_ENABLE_SIE)
336 return s390_enable_sie();
340 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
345 case KVM_CAP_S390_PSW:
346 case KVM_CAP_S390_GMAP:
347 case KVM_CAP_SYNC_MMU:
348 #ifdef CONFIG_KVM_S390_UCONTROL
349 case KVM_CAP_S390_UCONTROL:
351 case KVM_CAP_ASYNC_PF:
352 case KVM_CAP_SYNC_REGS:
353 case KVM_CAP_ONE_REG:
354 case KVM_CAP_ENABLE_CAP:
355 case KVM_CAP_S390_CSS_SUPPORT:
356 case KVM_CAP_IOEVENTFD:
357 case KVM_CAP_DEVICE_CTRL:
358 case KVM_CAP_ENABLE_CAP_VM:
359 case KVM_CAP_S390_IRQCHIP:
360 case KVM_CAP_VM_ATTRIBUTES:
361 case KVM_CAP_MP_STATE:
362 case KVM_CAP_S390_INJECT_IRQ:
363 case KVM_CAP_S390_USER_SIGP:
364 case KVM_CAP_S390_USER_STSI:
365 case KVM_CAP_S390_SKEYS:
366 case KVM_CAP_S390_IRQ_STATE:
367 case KVM_CAP_S390_USER_INSTR0:
370 case KVM_CAP_S390_MEM_OP:
373 case KVM_CAP_NR_VCPUS:
374 case KVM_CAP_MAX_VCPUS:
375 r = KVM_S390_BSCA_CPU_SLOTS;
376 if (sclp.has_esca && sclp.has_64bscao)
377 r = KVM_S390_ESCA_CPU_SLOTS;
379 case KVM_CAP_NR_MEMSLOTS:
380 r = KVM_USER_MEM_SLOTS;
382 case KVM_CAP_S390_COW:
383 r = MACHINE_HAS_ESOP;
385 case KVM_CAP_S390_VECTOR_REGISTERS:
388 case KVM_CAP_S390_RI:
389 r = test_facility(64);
397 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
398 struct kvm_memory_slot *memslot)
400 gfn_t cur_gfn, last_gfn;
401 unsigned long address;
402 struct gmap *gmap = kvm->arch.gmap;
404 /* Loop over all guest pages */
405 last_gfn = memslot->base_gfn + memslot->npages;
406 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
407 address = gfn_to_hva_memslot(memslot, cur_gfn);
409 if (test_and_clear_guest_dirty(gmap->mm, address))
410 mark_page_dirty(kvm, cur_gfn);
411 if (fatal_signal_pending(current))
417 /* Section: vm related */
418 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
421 * Get (and clear) the dirty memory log for a memory slot.
423 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
424 struct kvm_dirty_log *log)
428 struct kvm_memslots *slots;
429 struct kvm_memory_slot *memslot;
432 mutex_lock(&kvm->slots_lock);
435 if (log->slot >= KVM_USER_MEM_SLOTS)
438 slots = kvm_memslots(kvm);
439 memslot = id_to_memslot(slots, log->slot);
441 if (!memslot->dirty_bitmap)
444 kvm_s390_sync_dirty_log(kvm, memslot);
445 r = kvm_get_dirty_log(kvm, log, &is_dirty);
449 /* Clear the dirty log */
451 n = kvm_dirty_bitmap_bytes(memslot);
452 memset(memslot->dirty_bitmap, 0, n);
456 mutex_unlock(&kvm->slots_lock);
460 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
463 struct kvm_vcpu *vcpu;
465 kvm_for_each_vcpu(i, vcpu, kvm) {
466 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
470 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
478 case KVM_CAP_S390_IRQCHIP:
479 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
480 kvm->arch.use_irqchip = 1;
483 case KVM_CAP_S390_USER_SIGP:
484 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
485 kvm->arch.user_sigp = 1;
488 case KVM_CAP_S390_VECTOR_REGISTERS:
489 mutex_lock(&kvm->lock);
490 if (kvm->created_vcpus) {
492 } else if (MACHINE_HAS_VX) {
493 set_kvm_facility(kvm->arch.model.fac_mask, 129);
494 set_kvm_facility(kvm->arch.model.fac_list, 129);
498 mutex_unlock(&kvm->lock);
499 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
500 r ? "(not available)" : "(success)");
502 case KVM_CAP_S390_RI:
504 mutex_lock(&kvm->lock);
505 if (kvm->created_vcpus) {
507 } else if (test_facility(64)) {
508 set_kvm_facility(kvm->arch.model.fac_mask, 64);
509 set_kvm_facility(kvm->arch.model.fac_list, 64);
512 mutex_unlock(&kvm->lock);
513 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
514 r ? "(not available)" : "(success)");
516 case KVM_CAP_S390_USER_STSI:
517 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
518 kvm->arch.user_stsi = 1;
521 case KVM_CAP_S390_USER_INSTR0:
522 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
523 kvm->arch.user_instr0 = 1;
524 icpt_operexc_on_all_vcpus(kvm);
534 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
538 switch (attr->attr) {
539 case KVM_S390_VM_MEM_LIMIT_SIZE:
541 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
542 kvm->arch.mem_limit);
543 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
553 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
557 switch (attr->attr) {
558 case KVM_S390_VM_MEM_ENABLE_CMMA:
564 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
565 mutex_lock(&kvm->lock);
566 if (!kvm->created_vcpus) {
567 kvm->arch.use_cmma = 1;
570 mutex_unlock(&kvm->lock);
572 case KVM_S390_VM_MEM_CLR_CMMA:
577 if (!kvm->arch.use_cmma)
580 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
581 mutex_lock(&kvm->lock);
582 idx = srcu_read_lock(&kvm->srcu);
583 s390_reset_cmma(kvm->arch.gmap->mm);
584 srcu_read_unlock(&kvm->srcu, idx);
585 mutex_unlock(&kvm->lock);
588 case KVM_S390_VM_MEM_LIMIT_SIZE: {
589 unsigned long new_limit;
591 if (kvm_is_ucontrol(kvm))
594 if (get_user(new_limit, (u64 __user *)attr->addr))
597 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
598 new_limit > kvm->arch.mem_limit)
604 /* gmap_create takes last usable address */
605 if (new_limit != KVM_S390_NO_MEM_LIMIT)
609 mutex_lock(&kvm->lock);
610 if (!kvm->created_vcpus) {
611 /* gmap_create will round the limit up */
612 struct gmap *new = gmap_create(current->mm, new_limit);
617 gmap_remove(kvm->arch.gmap);
619 kvm->arch.gmap = new;
623 mutex_unlock(&kvm->lock);
624 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
625 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
626 (void *) kvm->arch.gmap->asce);
636 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
638 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
640 struct kvm_vcpu *vcpu;
643 if (!test_kvm_facility(kvm, 76))
646 mutex_lock(&kvm->lock);
647 switch (attr->attr) {
648 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
650 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
651 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
652 kvm->arch.crypto.aes_kw = 1;
653 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
655 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
657 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
658 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
659 kvm->arch.crypto.dea_kw = 1;
660 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
662 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
663 kvm->arch.crypto.aes_kw = 0;
664 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
665 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
666 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
668 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
669 kvm->arch.crypto.dea_kw = 0;
670 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
671 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
672 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
675 mutex_unlock(&kvm->lock);
679 kvm_for_each_vcpu(i, vcpu, kvm) {
680 kvm_s390_vcpu_crypto_setup(vcpu);
683 mutex_unlock(&kvm->lock);
687 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
691 if (copy_from_user(>od_high, (void __user *)attr->addr,
697 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
702 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
706 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
709 kvm_s390_set_tod_clock(kvm, gtod);
710 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
714 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
721 switch (attr->attr) {
722 case KVM_S390_VM_TOD_HIGH:
723 ret = kvm_s390_set_tod_high(kvm, attr);
725 case KVM_S390_VM_TOD_LOW:
726 ret = kvm_s390_set_tod_low(kvm, attr);
735 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
739 if (copy_to_user((void __user *)attr->addr, >od_high,
742 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
747 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
751 gtod = kvm_s390_get_tod_clock_fast(kvm);
752 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
754 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
759 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
766 switch (attr->attr) {
767 case KVM_S390_VM_TOD_HIGH:
768 ret = kvm_s390_get_tod_high(kvm, attr);
770 case KVM_S390_VM_TOD_LOW:
771 ret = kvm_s390_get_tod_low(kvm, attr);
780 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
782 struct kvm_s390_vm_cpu_processor *proc;
783 u16 lowest_ibc, unblocked_ibc;
786 mutex_lock(&kvm->lock);
787 if (kvm->created_vcpus) {
791 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
796 if (!copy_from_user(proc, (void __user *)attr->addr,
798 kvm->arch.model.cpuid = proc->cpuid;
799 lowest_ibc = sclp.ibc >> 16 & 0xfff;
800 unblocked_ibc = sclp.ibc & 0xfff;
802 if (proc->ibc > unblocked_ibc)
803 kvm->arch.model.ibc = unblocked_ibc;
804 else if (proc->ibc < lowest_ibc)
805 kvm->arch.model.ibc = lowest_ibc;
807 kvm->arch.model.ibc = proc->ibc;
809 memcpy(kvm->arch.model.fac_list, proc->fac_list,
810 S390_ARCH_FAC_LIST_SIZE_BYTE);
815 mutex_unlock(&kvm->lock);
819 static int kvm_s390_set_processor_feat(struct kvm *kvm,
820 struct kvm_device_attr *attr)
822 struct kvm_s390_vm_cpu_feat data;
825 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
827 if (!bitmap_subset((unsigned long *) data.feat,
828 kvm_s390_available_cpu_feat,
829 KVM_S390_VM_CPU_FEAT_NR_BITS))
832 mutex_lock(&kvm->lock);
833 if (!atomic_read(&kvm->online_vcpus)) {
834 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
835 KVM_S390_VM_CPU_FEAT_NR_BITS);
838 mutex_unlock(&kvm->lock);
842 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
843 struct kvm_device_attr *attr)
846 * Once supported by kernel + hw, we have to store the subfunctions
847 * in kvm->arch and remember that user space configured them.
852 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
856 switch (attr->attr) {
857 case KVM_S390_VM_CPU_PROCESSOR:
858 ret = kvm_s390_set_processor(kvm, attr);
860 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
861 ret = kvm_s390_set_processor_feat(kvm, attr);
863 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
864 ret = kvm_s390_set_processor_subfunc(kvm, attr);
870 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
872 struct kvm_s390_vm_cpu_processor *proc;
875 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
880 proc->cpuid = kvm->arch.model.cpuid;
881 proc->ibc = kvm->arch.model.ibc;
882 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
883 S390_ARCH_FAC_LIST_SIZE_BYTE);
884 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
891 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
893 struct kvm_s390_vm_cpu_machine *mach;
896 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
901 get_cpu_id((struct cpuid *) &mach->cpuid);
902 mach->ibc = sclp.ibc;
903 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
904 S390_ARCH_FAC_LIST_SIZE_BYTE);
905 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
906 S390_ARCH_FAC_LIST_SIZE_BYTE);
907 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
914 static int kvm_s390_get_processor_feat(struct kvm *kvm,
915 struct kvm_device_attr *attr)
917 struct kvm_s390_vm_cpu_feat data;
919 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
920 KVM_S390_VM_CPU_FEAT_NR_BITS);
921 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
926 static int kvm_s390_get_machine_feat(struct kvm *kvm,
927 struct kvm_device_attr *attr)
929 struct kvm_s390_vm_cpu_feat data;
931 bitmap_copy((unsigned long *) data.feat,
932 kvm_s390_available_cpu_feat,
933 KVM_S390_VM_CPU_FEAT_NR_BITS);
934 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
939 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
940 struct kvm_device_attr *attr)
943 * Once we can actually configure subfunctions (kernel + hw support),
944 * we have to check if they were already set by user space, if so copy
945 * them from kvm->arch.
950 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
951 struct kvm_device_attr *attr)
953 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
954 sizeof(struct kvm_s390_vm_cpu_subfunc)))
958 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
962 switch (attr->attr) {
963 case KVM_S390_VM_CPU_PROCESSOR:
964 ret = kvm_s390_get_processor(kvm, attr);
966 case KVM_S390_VM_CPU_MACHINE:
967 ret = kvm_s390_get_machine(kvm, attr);
969 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
970 ret = kvm_s390_get_processor_feat(kvm, attr);
972 case KVM_S390_VM_CPU_MACHINE_FEAT:
973 ret = kvm_s390_get_machine_feat(kvm, attr);
975 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
976 ret = kvm_s390_get_processor_subfunc(kvm, attr);
978 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
979 ret = kvm_s390_get_machine_subfunc(kvm, attr);
985 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
989 switch (attr->group) {
990 case KVM_S390_VM_MEM_CTRL:
991 ret = kvm_s390_set_mem_control(kvm, attr);
993 case KVM_S390_VM_TOD:
994 ret = kvm_s390_set_tod(kvm, attr);
996 case KVM_S390_VM_CPU_MODEL:
997 ret = kvm_s390_set_cpu_model(kvm, attr);
999 case KVM_S390_VM_CRYPTO:
1000 ret = kvm_s390_vm_set_crypto(kvm, attr);
1010 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1014 switch (attr->group) {
1015 case KVM_S390_VM_MEM_CTRL:
1016 ret = kvm_s390_get_mem_control(kvm, attr);
1018 case KVM_S390_VM_TOD:
1019 ret = kvm_s390_get_tod(kvm, attr);
1021 case KVM_S390_VM_CPU_MODEL:
1022 ret = kvm_s390_get_cpu_model(kvm, attr);
1032 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1036 switch (attr->group) {
1037 case KVM_S390_VM_MEM_CTRL:
1038 switch (attr->attr) {
1039 case KVM_S390_VM_MEM_ENABLE_CMMA:
1040 case KVM_S390_VM_MEM_CLR_CMMA:
1041 ret = sclp.has_cmma ? 0 : -ENXIO;
1043 case KVM_S390_VM_MEM_LIMIT_SIZE:
1051 case KVM_S390_VM_TOD:
1052 switch (attr->attr) {
1053 case KVM_S390_VM_TOD_LOW:
1054 case KVM_S390_VM_TOD_HIGH:
1062 case KVM_S390_VM_CPU_MODEL:
1063 switch (attr->attr) {
1064 case KVM_S390_VM_CPU_PROCESSOR:
1065 case KVM_S390_VM_CPU_MACHINE:
1066 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1067 case KVM_S390_VM_CPU_MACHINE_FEAT:
1068 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1071 /* configuring subfunctions is not supported yet */
1072 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1078 case KVM_S390_VM_CRYPTO:
1079 switch (attr->attr) {
1080 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1081 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1082 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1083 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1099 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1105 if (args->flags != 0)
1108 /* Is this guest using storage keys? */
1109 if (!mm_use_skey(current->mm))
1110 return KVM_S390_GET_SKEYS_NONE;
1112 /* Enforce sane limit on memory allocation */
1113 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1116 keys = kmalloc_array(args->count, sizeof(uint8_t),
1117 GFP_KERNEL | __GFP_NOWARN);
1119 keys = vmalloc(sizeof(uint8_t) * args->count);
1123 down_read(¤t->mm->mmap_sem);
1124 for (i = 0; i < args->count; i++) {
1125 hva = gfn_to_hva(kvm, args->start_gfn + i);
1126 if (kvm_is_error_hva(hva)) {
1131 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1135 up_read(¤t->mm->mmap_sem);
1138 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1139 sizeof(uint8_t) * args->count);
1148 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1154 if (args->flags != 0)
1157 /* Enforce sane limit on memory allocation */
1158 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1161 keys = kmalloc_array(args->count, sizeof(uint8_t),
1162 GFP_KERNEL | __GFP_NOWARN);
1164 keys = vmalloc(sizeof(uint8_t) * args->count);
1168 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1169 sizeof(uint8_t) * args->count);
1175 /* Enable storage key handling for the guest */
1176 r = s390_enable_skey();
1180 down_read(¤t->mm->mmap_sem);
1181 for (i = 0; i < args->count; i++) {
1182 hva = gfn_to_hva(kvm, args->start_gfn + i);
1183 if (kvm_is_error_hva(hva)) {
1188 /* Lowest order bit is reserved */
1189 if (keys[i] & 0x01) {
1194 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1198 up_read(¤t->mm->mmap_sem);
1204 long kvm_arch_vm_ioctl(struct file *filp,
1205 unsigned int ioctl, unsigned long arg)
1207 struct kvm *kvm = filp->private_data;
1208 void __user *argp = (void __user *)arg;
1209 struct kvm_device_attr attr;
1213 case KVM_S390_INTERRUPT: {
1214 struct kvm_s390_interrupt s390int;
1217 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1219 r = kvm_s390_inject_vm(kvm, &s390int);
1222 case KVM_ENABLE_CAP: {
1223 struct kvm_enable_cap cap;
1225 if (copy_from_user(&cap, argp, sizeof(cap)))
1227 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1230 case KVM_CREATE_IRQCHIP: {
1231 struct kvm_irq_routing_entry routing;
1234 if (kvm->arch.use_irqchip) {
1235 /* Set up dummy routing. */
1236 memset(&routing, 0, sizeof(routing));
1237 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1241 case KVM_SET_DEVICE_ATTR: {
1243 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1245 r = kvm_s390_vm_set_attr(kvm, &attr);
1248 case KVM_GET_DEVICE_ATTR: {
1250 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1252 r = kvm_s390_vm_get_attr(kvm, &attr);
1255 case KVM_HAS_DEVICE_ATTR: {
1257 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1259 r = kvm_s390_vm_has_attr(kvm, &attr);
1262 case KVM_S390_GET_SKEYS: {
1263 struct kvm_s390_skeys args;
1266 if (copy_from_user(&args, argp,
1267 sizeof(struct kvm_s390_skeys)))
1269 r = kvm_s390_get_skeys(kvm, &args);
1272 case KVM_S390_SET_SKEYS: {
1273 struct kvm_s390_skeys args;
1276 if (copy_from_user(&args, argp,
1277 sizeof(struct kvm_s390_skeys)))
1279 r = kvm_s390_set_skeys(kvm, &args);
1289 static int kvm_s390_query_ap_config(u8 *config)
1291 u32 fcn_code = 0x04000000UL;
1294 memset(config, 0, 128);
1298 ".long 0xb2af0000\n" /* PQAP(QCI) */
1304 : "r" (fcn_code), "r" (config)
1305 : "cc", "0", "2", "memory"
1311 static int kvm_s390_apxa_installed(void)
1316 if (test_facility(12)) {
1317 cc = kvm_s390_query_ap_config(config);
1320 pr_err("PQAP(QCI) failed with cc=%d", cc);
1322 return config[0] & 0x40;
1328 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1330 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1332 if (kvm_s390_apxa_installed())
1333 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1335 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1338 static u64 kvm_s390_get_initial_cpuid(void)
1343 cpuid.version = 0xff;
1344 return *((u64 *) &cpuid);
1347 static void kvm_s390_crypto_init(struct kvm *kvm)
1349 if (!test_kvm_facility(kvm, 76))
1352 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1353 kvm_s390_set_crycb_format(kvm);
1355 /* Enable AES/DEA protected key functions by default */
1356 kvm->arch.crypto.aes_kw = 1;
1357 kvm->arch.crypto.dea_kw = 1;
1358 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1359 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1360 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1361 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1364 static void sca_dispose(struct kvm *kvm)
1366 if (kvm->arch.use_esca)
1367 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1369 free_page((unsigned long)(kvm->arch.sca));
1370 kvm->arch.sca = NULL;
1373 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1375 gfp_t alloc_flags = GFP_KERNEL;
1377 char debug_name[16];
1378 static unsigned long sca_offset;
1381 #ifdef CONFIG_KVM_S390_UCONTROL
1382 if (type & ~KVM_VM_S390_UCONTROL)
1384 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1391 rc = s390_enable_sie();
1397 ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1399 kvm->arch.use_esca = 0; /* start with basic SCA */
1400 if (!sclp.has_64bscao)
1401 alloc_flags |= GFP_DMA;
1402 rwlock_init(&kvm->arch.sca_lock);
1403 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1406 spin_lock(&kvm_lock);
1408 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1410 kvm->arch.sca = (struct bsca_block *)
1411 ((char *) kvm->arch.sca + sca_offset);
1412 spin_unlock(&kvm_lock);
1414 sprintf(debug_name, "kvm-%u", current->pid);
1416 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1420 kvm->arch.sie_page2 =
1421 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1422 if (!kvm->arch.sie_page2)
1425 /* Populate the facility mask initially. */
1426 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1427 S390_ARCH_FAC_LIST_SIZE_BYTE);
1428 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1429 if (i < kvm_s390_fac_list_mask_size())
1430 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1432 kvm->arch.model.fac_mask[i] = 0UL;
1435 /* Populate the facility list initially. */
1436 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1437 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1438 S390_ARCH_FAC_LIST_SIZE_BYTE);
1440 set_kvm_facility(kvm->arch.model.fac_mask, 74);
1441 set_kvm_facility(kvm->arch.model.fac_list, 74);
1443 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1444 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1446 kvm_s390_crypto_init(kvm);
1448 spin_lock_init(&kvm->arch.float_int.lock);
1449 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1450 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1451 init_waitqueue_head(&kvm->arch.ipte_wq);
1452 mutex_init(&kvm->arch.ipte_mutex);
1454 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1455 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1457 if (type & KVM_VM_S390_UCONTROL) {
1458 kvm->arch.gmap = NULL;
1459 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1461 if (sclp.hamax == U64_MAX)
1462 kvm->arch.mem_limit = TASK_MAX_SIZE;
1464 kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1466 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1467 if (!kvm->arch.gmap)
1469 kvm->arch.gmap->private = kvm;
1470 kvm->arch.gmap->pfault_enabled = 0;
1473 kvm->arch.css_support = 0;
1474 kvm->arch.use_irqchip = 0;
1475 kvm->arch.epoch = 0;
1477 spin_lock_init(&kvm->arch.start_stop_lock);
1478 kvm_s390_vsie_init(kvm);
1479 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1483 free_page((unsigned long)kvm->arch.sie_page2);
1484 debug_unregister(kvm->arch.dbf);
1486 KVM_EVENT(3, "creation of vm failed: %d", rc);
1490 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1492 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1493 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1494 kvm_s390_clear_local_irqs(vcpu);
1495 kvm_clear_async_pf_completion_queue(vcpu);
1496 if (!kvm_is_ucontrol(vcpu->kvm))
1499 if (kvm_is_ucontrol(vcpu->kvm))
1500 gmap_remove(vcpu->arch.gmap);
1502 if (vcpu->kvm->arch.use_cmma)
1503 kvm_s390_vcpu_unsetup_cmma(vcpu);
1504 free_page((unsigned long)(vcpu->arch.sie_block));
1506 kvm_vcpu_uninit(vcpu);
1507 kmem_cache_free(kvm_vcpu_cache, vcpu);
1510 static void kvm_free_vcpus(struct kvm *kvm)
1513 struct kvm_vcpu *vcpu;
1515 kvm_for_each_vcpu(i, vcpu, kvm)
1516 kvm_arch_vcpu_destroy(vcpu);
1518 mutex_lock(&kvm->lock);
1519 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1520 kvm->vcpus[i] = NULL;
1522 atomic_set(&kvm->online_vcpus, 0);
1523 mutex_unlock(&kvm->lock);
1526 void kvm_arch_destroy_vm(struct kvm *kvm)
1528 kvm_free_vcpus(kvm);
1530 debug_unregister(kvm->arch.dbf);
1531 free_page((unsigned long)kvm->arch.sie_page2);
1532 if (!kvm_is_ucontrol(kvm))
1533 gmap_remove(kvm->arch.gmap);
1534 kvm_s390_destroy_adapters(kvm);
1535 kvm_s390_clear_float_irqs(kvm);
1536 kvm_s390_vsie_destroy(kvm);
1537 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1540 /* Section: vcpu related */
1541 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1543 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1544 if (!vcpu->arch.gmap)
1546 vcpu->arch.gmap->private = vcpu->kvm;
1551 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1553 read_lock(&vcpu->kvm->arch.sca_lock);
1554 if (vcpu->kvm->arch.use_esca) {
1555 struct esca_block *sca = vcpu->kvm->arch.sca;
1557 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1558 sca->cpu[vcpu->vcpu_id].sda = 0;
1560 struct bsca_block *sca = vcpu->kvm->arch.sca;
1562 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1563 sca->cpu[vcpu->vcpu_id].sda = 0;
1565 read_unlock(&vcpu->kvm->arch.sca_lock);
1568 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1570 read_lock(&vcpu->kvm->arch.sca_lock);
1571 if (vcpu->kvm->arch.use_esca) {
1572 struct esca_block *sca = vcpu->kvm->arch.sca;
1574 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1575 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1576 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1577 vcpu->arch.sie_block->ecb2 |= 0x04U;
1578 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1580 struct bsca_block *sca = vcpu->kvm->arch.sca;
1582 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1583 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1584 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1585 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1587 read_unlock(&vcpu->kvm->arch.sca_lock);
1590 /* Basic SCA to Extended SCA data copy routines */
1591 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1594 d->sigp_ctrl.c = s->sigp_ctrl.c;
1595 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1598 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1602 d->ipte_control = s->ipte_control;
1604 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1605 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1608 static int sca_switch_to_extended(struct kvm *kvm)
1610 struct bsca_block *old_sca = kvm->arch.sca;
1611 struct esca_block *new_sca;
1612 struct kvm_vcpu *vcpu;
1613 unsigned int vcpu_idx;
1616 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1620 scaoh = (u32)((u64)(new_sca) >> 32);
1621 scaol = (u32)(u64)(new_sca) & ~0x3fU;
1623 kvm_s390_vcpu_block_all(kvm);
1624 write_lock(&kvm->arch.sca_lock);
1626 sca_copy_b_to_e(new_sca, old_sca);
1628 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1629 vcpu->arch.sie_block->scaoh = scaoh;
1630 vcpu->arch.sie_block->scaol = scaol;
1631 vcpu->arch.sie_block->ecb2 |= 0x04U;
1633 kvm->arch.sca = new_sca;
1634 kvm->arch.use_esca = 1;
1636 write_unlock(&kvm->arch.sca_lock);
1637 kvm_s390_vcpu_unblock_all(kvm);
1639 free_page((unsigned long)old_sca);
1641 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1642 old_sca, kvm->arch.sca);
1646 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1650 if (id < KVM_S390_BSCA_CPU_SLOTS)
1652 if (!sclp.has_esca || !sclp.has_64bscao)
1655 mutex_lock(&kvm->lock);
1656 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1657 mutex_unlock(&kvm->lock);
1659 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1662 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1664 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1665 kvm_clear_async_pf_completion_queue(vcpu);
1666 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1672 if (test_kvm_facility(vcpu->kvm, 64))
1673 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1674 /* fprs can be synchronized via vrs, even if the guest has no vx. With
1675 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1678 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1680 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1682 if (kvm_is_ucontrol(vcpu->kvm))
1683 return __kvm_ucontrol_vcpu_init(vcpu);
1688 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1689 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1691 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1692 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1693 vcpu->arch.cputm_start = get_tod_clock_fast();
1694 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1697 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1698 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1700 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1701 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1702 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1703 vcpu->arch.cputm_start = 0;
1704 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1707 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1708 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1710 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1711 vcpu->arch.cputm_enabled = true;
1712 __start_cpu_timer_accounting(vcpu);
1715 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1716 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1718 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1719 __stop_cpu_timer_accounting(vcpu);
1720 vcpu->arch.cputm_enabled = false;
1723 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1725 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1726 __enable_cpu_timer_accounting(vcpu);
1730 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1732 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1733 __disable_cpu_timer_accounting(vcpu);
1737 /* set the cpu timer - may only be called from the VCPU thread itself */
1738 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1740 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1741 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1742 if (vcpu->arch.cputm_enabled)
1743 vcpu->arch.cputm_start = get_tod_clock_fast();
1744 vcpu->arch.sie_block->cputm = cputm;
1745 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1749 /* update and get the cpu timer - can also be called from other VCPU threads */
1750 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1755 if (unlikely(!vcpu->arch.cputm_enabled))
1756 return vcpu->arch.sie_block->cputm;
1758 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1760 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1762 * If the writer would ever execute a read in the critical
1763 * section, e.g. in irq context, we have a deadlock.
1765 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1766 value = vcpu->arch.sie_block->cputm;
1767 /* if cputm_start is 0, accounting is being started/stopped */
1768 if (likely(vcpu->arch.cputm_start))
1769 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1770 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1775 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1777 /* Save host register state */
1779 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1780 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1783 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1785 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1786 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1787 if (test_fp_ctl(current->thread.fpu.fpc))
1788 /* User space provided an invalid FPC, let's clear it */
1789 current->thread.fpu.fpc = 0;
1791 save_access_regs(vcpu->arch.host_acrs);
1792 restore_access_regs(vcpu->run->s.regs.acrs);
1793 gmap_enable(vcpu->arch.enabled_gmap);
1794 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1795 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1796 __start_cpu_timer_accounting(vcpu);
1800 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1803 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1804 __stop_cpu_timer_accounting(vcpu);
1805 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1806 vcpu->arch.enabled_gmap = gmap_get_enabled();
1807 gmap_disable(vcpu->arch.enabled_gmap);
1809 /* Save guest register state */
1811 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1813 /* Restore host register state */
1814 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1815 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1817 save_access_regs(vcpu->run->s.regs.acrs);
1818 restore_access_regs(vcpu->arch.host_acrs);
1821 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1823 /* this equals initial cpu reset in pop, but we don't switch to ESA */
1824 vcpu->arch.sie_block->gpsw.mask = 0UL;
1825 vcpu->arch.sie_block->gpsw.addr = 0UL;
1826 kvm_s390_set_prefix(vcpu, 0);
1827 kvm_s390_set_cpu_timer(vcpu, 0);
1828 vcpu->arch.sie_block->ckc = 0UL;
1829 vcpu->arch.sie_block->todpr = 0;
1830 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1831 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
1832 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1833 /* make sure the new fpc will be lazily loaded */
1835 current->thread.fpu.fpc = 0;
1836 vcpu->arch.sie_block->gbea = 1;
1837 vcpu->arch.sie_block->pp = 0;
1838 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1839 kvm_clear_async_pf_completion_queue(vcpu);
1840 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1841 kvm_s390_vcpu_stop(vcpu);
1842 kvm_s390_clear_local_irqs(vcpu);
1845 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1847 mutex_lock(&vcpu->kvm->lock);
1849 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1851 mutex_unlock(&vcpu->kvm->lock);
1852 if (!kvm_is_ucontrol(vcpu->kvm)) {
1853 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1856 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1857 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1858 /* make vcpu_load load the right gmap on the first trigger */
1859 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1862 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1864 if (!test_kvm_facility(vcpu->kvm, 76))
1867 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1869 if (vcpu->kvm->arch.crypto.aes_kw)
1870 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1871 if (vcpu->kvm->arch.crypto.dea_kw)
1872 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1874 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1877 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1879 free_page(vcpu->arch.sie_block->cbrlo);
1880 vcpu->arch.sie_block->cbrlo = 0;
1883 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1885 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1886 if (!vcpu->arch.sie_block->cbrlo)
1889 vcpu->arch.sie_block->ecb2 |= 0x80;
1890 vcpu->arch.sie_block->ecb2 &= ~0x08;
1894 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1896 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1898 vcpu->arch.sie_block->ibc = model->ibc;
1899 if (test_kvm_facility(vcpu->kvm, 7))
1900 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1903 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1907 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1911 if (test_kvm_facility(vcpu->kvm, 78))
1912 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1913 else if (test_kvm_facility(vcpu->kvm, 8))
1914 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1916 kvm_s390_vcpu_setup_model(vcpu);
1918 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1919 if (MACHINE_HAS_ESOP)
1920 vcpu->arch.sie_block->ecb |= 0x02;
1921 if (test_kvm_facility(vcpu->kvm, 9))
1922 vcpu->arch.sie_block->ecb |= 0x04;
1923 if (test_kvm_facility(vcpu->kvm, 73))
1924 vcpu->arch.sie_block->ecb |= 0x10;
1926 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1927 vcpu->arch.sie_block->ecb2 |= 0x08;
1928 vcpu->arch.sie_block->eca = 0x1002000U;
1930 vcpu->arch.sie_block->eca |= 0x80000000U;
1932 vcpu->arch.sie_block->eca |= 0x40000000U;
1934 vcpu->arch.sie_block->eca |= 1;
1935 if (sclp.has_sigpif)
1936 vcpu->arch.sie_block->eca |= 0x10000000U;
1937 if (test_kvm_facility(vcpu->kvm, 64))
1938 vcpu->arch.sie_block->ecb3 |= 0x01;
1939 if (test_kvm_facility(vcpu->kvm, 129)) {
1940 vcpu->arch.sie_block->eca |= 0x00020000;
1941 vcpu->arch.sie_block->ecd |= 0x20000000;
1943 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1944 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1946 if (vcpu->kvm->arch.use_cmma) {
1947 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1951 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1952 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1954 kvm_s390_vcpu_crypto_setup(vcpu);
1959 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1962 struct kvm_vcpu *vcpu;
1963 struct sie_page *sie_page;
1966 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1971 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1975 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1979 vcpu->arch.sie_block = &sie_page->sie_block;
1980 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1982 /* the real guest size will always be smaller than msl */
1983 vcpu->arch.sie_block->mso = 0;
1984 vcpu->arch.sie_block->msl = sclp.hamax;
1986 vcpu->arch.sie_block->icpua = id;
1987 spin_lock_init(&vcpu->arch.local_int.lock);
1988 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1989 vcpu->arch.local_int.wq = &vcpu->wq;
1990 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1991 seqcount_init(&vcpu->arch.cputm_seqcount);
1993 rc = kvm_vcpu_init(vcpu, kvm, id);
1995 goto out_free_sie_block;
1996 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
1997 vcpu->arch.sie_block);
1998 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2002 free_page((unsigned long)(vcpu->arch.sie_block));
2004 kmem_cache_free(kvm_vcpu_cache, vcpu);
2009 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2011 return kvm_s390_vcpu_has_irq(vcpu, 0);
2014 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2016 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2020 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2022 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2025 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2027 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2031 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2033 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2037 * Kick a guest cpu out of SIE and wait until SIE is not running.
2038 * If the CPU is not running (e.g. waiting as idle) the function will
2039 * return immediately. */
2040 void exit_sie(struct kvm_vcpu *vcpu)
2042 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2043 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2047 /* Kick a guest cpu out of SIE to process a request synchronously */
2048 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2050 kvm_make_request(req, vcpu);
2051 kvm_s390_vcpu_request(vcpu);
2054 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2057 struct kvm *kvm = gmap->private;
2058 struct kvm_vcpu *vcpu;
2059 unsigned long prefix;
2062 if (gmap_is_shadow(gmap))
2064 if (start >= 1UL << 31)
2065 /* We are only interested in prefix pages */
2067 kvm_for_each_vcpu(i, vcpu, kvm) {
2068 /* match against both prefix pages */
2069 prefix = kvm_s390_get_prefix(vcpu);
2070 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2071 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2073 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2078 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2080 /* kvm common code refers to this, but never calls it */
2085 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2086 struct kvm_one_reg *reg)
2091 case KVM_REG_S390_TODPR:
2092 r = put_user(vcpu->arch.sie_block->todpr,
2093 (u32 __user *)reg->addr);
2095 case KVM_REG_S390_EPOCHDIFF:
2096 r = put_user(vcpu->arch.sie_block->epoch,
2097 (u64 __user *)reg->addr);
2099 case KVM_REG_S390_CPU_TIMER:
2100 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2101 (u64 __user *)reg->addr);
2103 case KVM_REG_S390_CLOCK_COMP:
2104 r = put_user(vcpu->arch.sie_block->ckc,
2105 (u64 __user *)reg->addr);
2107 case KVM_REG_S390_PFTOKEN:
2108 r = put_user(vcpu->arch.pfault_token,
2109 (u64 __user *)reg->addr);
2111 case KVM_REG_S390_PFCOMPARE:
2112 r = put_user(vcpu->arch.pfault_compare,
2113 (u64 __user *)reg->addr);
2115 case KVM_REG_S390_PFSELECT:
2116 r = put_user(vcpu->arch.pfault_select,
2117 (u64 __user *)reg->addr);
2119 case KVM_REG_S390_PP:
2120 r = put_user(vcpu->arch.sie_block->pp,
2121 (u64 __user *)reg->addr);
2123 case KVM_REG_S390_GBEA:
2124 r = put_user(vcpu->arch.sie_block->gbea,
2125 (u64 __user *)reg->addr);
2134 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2135 struct kvm_one_reg *reg)
2141 case KVM_REG_S390_TODPR:
2142 r = get_user(vcpu->arch.sie_block->todpr,
2143 (u32 __user *)reg->addr);
2145 case KVM_REG_S390_EPOCHDIFF:
2146 r = get_user(vcpu->arch.sie_block->epoch,
2147 (u64 __user *)reg->addr);
2149 case KVM_REG_S390_CPU_TIMER:
2150 r = get_user(val, (u64 __user *)reg->addr);
2152 kvm_s390_set_cpu_timer(vcpu, val);
2154 case KVM_REG_S390_CLOCK_COMP:
2155 r = get_user(vcpu->arch.sie_block->ckc,
2156 (u64 __user *)reg->addr);
2158 case KVM_REG_S390_PFTOKEN:
2159 r = get_user(vcpu->arch.pfault_token,
2160 (u64 __user *)reg->addr);
2161 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2162 kvm_clear_async_pf_completion_queue(vcpu);
2164 case KVM_REG_S390_PFCOMPARE:
2165 r = get_user(vcpu->arch.pfault_compare,
2166 (u64 __user *)reg->addr);
2168 case KVM_REG_S390_PFSELECT:
2169 r = get_user(vcpu->arch.pfault_select,
2170 (u64 __user *)reg->addr);
2172 case KVM_REG_S390_PP:
2173 r = get_user(vcpu->arch.sie_block->pp,
2174 (u64 __user *)reg->addr);
2176 case KVM_REG_S390_GBEA:
2177 r = get_user(vcpu->arch.sie_block->gbea,
2178 (u64 __user *)reg->addr);
2187 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2189 kvm_s390_vcpu_initial_reset(vcpu);
2193 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2195 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
2199 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2201 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2205 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2206 struct kvm_sregs *sregs)
2208 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2209 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2210 restore_access_regs(vcpu->run->s.regs.acrs);
2214 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2215 struct kvm_sregs *sregs)
2217 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2218 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2222 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2224 /* make sure the new values will be lazily loaded */
2226 if (test_fp_ctl(fpu->fpc))
2228 current->thread.fpu.fpc = fpu->fpc;
2230 convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
2232 memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
2236 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2238 /* make sure we have the latest values */
2241 convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
2243 memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
2244 fpu->fpc = current->thread.fpu.fpc;
2248 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2252 if (!is_vcpu_stopped(vcpu))
2255 vcpu->run->psw_mask = psw.mask;
2256 vcpu->run->psw_addr = psw.addr;
2261 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2262 struct kvm_translation *tr)
2264 return -EINVAL; /* not implemented yet */
2267 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2268 KVM_GUESTDBG_USE_HW_BP | \
2269 KVM_GUESTDBG_ENABLE)
2271 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2272 struct kvm_guest_debug *dbg)
2276 vcpu->guest_debug = 0;
2277 kvm_s390_clear_bp_data(vcpu);
2279 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2281 if (!sclp.has_gpere)
2284 if (dbg->control & KVM_GUESTDBG_ENABLE) {
2285 vcpu->guest_debug = dbg->control;
2286 /* enforce guest PER */
2287 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2289 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2290 rc = kvm_s390_import_bp_data(vcpu, dbg);
2292 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2293 vcpu->arch.guestdbg.last_bp = 0;
2297 vcpu->guest_debug = 0;
2298 kvm_s390_clear_bp_data(vcpu);
2299 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2305 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2306 struct kvm_mp_state *mp_state)
2308 /* CHECK_STOP and LOAD are not supported yet */
2309 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2310 KVM_MP_STATE_OPERATING;
2313 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2314 struct kvm_mp_state *mp_state)
2318 /* user space knows about this interface - let it control the state */
2319 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2321 switch (mp_state->mp_state) {
2322 case KVM_MP_STATE_STOPPED:
2323 kvm_s390_vcpu_stop(vcpu);
2325 case KVM_MP_STATE_OPERATING:
2326 kvm_s390_vcpu_start(vcpu);
2328 case KVM_MP_STATE_LOAD:
2329 case KVM_MP_STATE_CHECK_STOP:
2330 /* fall through - CHECK_STOP and LOAD are not supported yet */
2338 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2340 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2343 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2346 kvm_s390_vcpu_request_handled(vcpu);
2347 if (!vcpu->requests)
2350 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2351 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2352 * This ensures that the ipte instruction for this request has
2353 * already finished. We might race against a second unmapper that
2354 * wants to set the blocking bit. Lets just retry the request loop.
2356 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2358 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2359 kvm_s390_get_prefix(vcpu),
2360 PAGE_SIZE * 2, PROT_WRITE);
2366 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2367 vcpu->arch.sie_block->ihcpu = 0xffff;
2371 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2372 if (!ibs_enabled(vcpu)) {
2373 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2374 atomic_or(CPUSTAT_IBS,
2375 &vcpu->arch.sie_block->cpuflags);
2380 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2381 if (ibs_enabled(vcpu)) {
2382 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2383 atomic_andnot(CPUSTAT_IBS,
2384 &vcpu->arch.sie_block->cpuflags);
2389 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2390 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2394 /* nothing to do, just clear the request */
2395 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2400 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2402 struct kvm_vcpu *vcpu;
2405 mutex_lock(&kvm->lock);
2407 kvm->arch.epoch = tod - get_tod_clock();
2408 kvm_s390_vcpu_block_all(kvm);
2409 kvm_for_each_vcpu(i, vcpu, kvm)
2410 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2411 kvm_s390_vcpu_unblock_all(kvm);
2413 mutex_unlock(&kvm->lock);
2417 * kvm_arch_fault_in_page - fault-in guest page if necessary
2418 * @vcpu: The corresponding virtual cpu
2419 * @gpa: Guest physical address
2420 * @writable: Whether the page should be writable or not
2422 * Make sure that a guest page has been faulted-in on the host.
2424 * Return: Zero on success, negative error code otherwise.
2426 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2428 return gmap_fault(vcpu->arch.gmap, gpa,
2429 writable ? FAULT_FLAG_WRITE : 0);
2432 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2433 unsigned long token)
2435 struct kvm_s390_interrupt inti;
2436 struct kvm_s390_irq irq;
2439 irq.u.ext.ext_params2 = token;
2440 irq.type = KVM_S390_INT_PFAULT_INIT;
2441 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2443 inti.type = KVM_S390_INT_PFAULT_DONE;
2444 inti.parm64 = token;
2445 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2449 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2450 struct kvm_async_pf *work)
2452 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2453 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2456 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2457 struct kvm_async_pf *work)
2459 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2460 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2463 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2464 struct kvm_async_pf *work)
2466 /* s390 will always inject the page directly */
2469 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2472 * s390 will always inject the page directly,
2473 * but we still want check_async_completion to cleanup
2478 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2481 struct kvm_arch_async_pf arch;
2484 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2486 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2487 vcpu->arch.pfault_compare)
2489 if (psw_extint_disabled(vcpu))
2491 if (kvm_s390_vcpu_has_irq(vcpu, 0))
2493 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2495 if (!vcpu->arch.gmap->pfault_enabled)
2498 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2499 hva += current->thread.gmap_addr & ~PAGE_MASK;
2500 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2503 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2507 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2512 * On s390 notifications for arriving pages will be delivered directly
2513 * to the guest but the house keeping for completed pfaults is
2514 * handled outside the worker.
2516 kvm_check_async_pf_completion(vcpu);
2518 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2519 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2524 if (test_cpu_flag(CIF_MCCK_PENDING))
2527 if (!kvm_is_ucontrol(vcpu->kvm)) {
2528 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2533 rc = kvm_s390_handle_requests(vcpu);
2537 if (guestdbg_enabled(vcpu)) {
2538 kvm_s390_backup_guest_per_regs(vcpu);
2539 kvm_s390_patch_guest_per_regs(vcpu);
2542 vcpu->arch.sie_block->icptcode = 0;
2543 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2544 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2545 trace_kvm_s390_sie_enter(vcpu, cpuflags);
2550 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2552 struct kvm_s390_pgm_info pgm_info = {
2553 .code = PGM_ADDRESSING,
2558 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2559 trace_kvm_s390_sie_fault(vcpu);
2562 * We want to inject an addressing exception, which is defined as a
2563 * suppressing or terminating exception. However, since we came here
2564 * by a DAT access exception, the PSW still points to the faulting
2565 * instruction since DAT exceptions are nullifying. So we've got
2566 * to look up the current opcode to get the length of the instruction
2567 * to be able to forward the PSW.
2569 rc = read_guest_instr(vcpu, &opcode, 1);
2570 ilen = insn_length(opcode);
2574 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2575 * Forward by arbitrary ilc, injection will take care of
2576 * nullification if necessary.
2578 pgm_info = vcpu->arch.pgm;
2581 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2582 kvm_s390_forward_psw(vcpu, ilen);
2583 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2586 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2588 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2589 vcpu->arch.sie_block->icptcode);
2590 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2592 if (guestdbg_enabled(vcpu))
2593 kvm_s390_restore_guest_per_regs(vcpu);
2595 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2596 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2598 if (vcpu->arch.sie_block->icptcode > 0) {
2599 int rc = kvm_handle_sie_intercept(vcpu);
2601 if (rc != -EOPNOTSUPP)
2603 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2604 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2605 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2606 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2608 } else if (exit_reason != -EFAULT) {
2609 vcpu->stat.exit_null++;
2611 } else if (kvm_is_ucontrol(vcpu->kvm)) {
2612 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2613 vcpu->run->s390_ucontrol.trans_exc_code =
2614 current->thread.gmap_addr;
2615 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2617 } else if (current->thread.gmap_pfault) {
2618 trace_kvm_s390_major_guest_pfault(vcpu);
2619 current->thread.gmap_pfault = 0;
2620 if (kvm_arch_setup_async_pf(vcpu))
2622 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2624 return vcpu_post_run_fault_in_sie(vcpu);
2627 static int __vcpu_run(struct kvm_vcpu *vcpu)
2629 int rc, exit_reason;
2632 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2633 * ning the guest), so that memslots (and other stuff) are protected
2635 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2638 rc = vcpu_pre_run(vcpu);
2642 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2644 * As PF_VCPU will be used in fault handler, between
2645 * guest_enter and guest_exit should be no uaccess.
2647 local_irq_disable();
2648 guest_enter_irqoff();
2649 __disable_cpu_timer_accounting(vcpu);
2651 exit_reason = sie64a(vcpu->arch.sie_block,
2652 vcpu->run->s.regs.gprs);
2653 local_irq_disable();
2654 __enable_cpu_timer_accounting(vcpu);
2655 guest_exit_irqoff();
2657 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2659 rc = vcpu_post_run(vcpu, exit_reason);
2660 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2662 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2666 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2668 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2669 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2670 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2671 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2672 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2673 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2674 /* some control register changes require a tlb flush */
2675 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2677 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2678 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2679 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2680 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2681 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2682 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2684 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2685 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2686 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2687 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2688 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2689 kvm_clear_async_pf_completion_queue(vcpu);
2691 kvm_run->kvm_dirty_regs = 0;
2694 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2696 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2697 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2698 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2699 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2700 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2701 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2702 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2703 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2704 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2705 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2706 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2707 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2710 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2715 if (guestdbg_exit_pending(vcpu)) {
2716 kvm_s390_prepare_debug_exit(vcpu);
2720 if (vcpu->sigset_active)
2721 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2723 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2724 kvm_s390_vcpu_start(vcpu);
2725 } else if (is_vcpu_stopped(vcpu)) {
2726 pr_err_ratelimited("can't run stopped vcpu %d\n",
2731 sync_regs(vcpu, kvm_run);
2732 enable_cpu_timer_accounting(vcpu);
2735 rc = __vcpu_run(vcpu);
2737 if (signal_pending(current) && !rc) {
2738 kvm_run->exit_reason = KVM_EXIT_INTR;
2742 if (guestdbg_exit_pending(vcpu) && !rc) {
2743 kvm_s390_prepare_debug_exit(vcpu);
2747 if (rc == -EREMOTE) {
2748 /* userspace support is needed, kvm_run has been prepared */
2752 disable_cpu_timer_accounting(vcpu);
2753 store_regs(vcpu, kvm_run);
2755 if (vcpu->sigset_active)
2756 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2758 vcpu->stat.exit_userspace++;
2763 * store status at address
2764 * we use have two special cases:
2765 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2766 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2768 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2770 unsigned char archmode = 1;
2771 freg_t fprs[NUM_FPRS];
2776 px = kvm_s390_get_prefix(vcpu);
2777 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2778 if (write_guest_abs(vcpu, 163, &archmode, 1))
2781 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2782 if (write_guest_real(vcpu, 163, &archmode, 1))
2786 gpa -= __LC_FPREGS_SAVE_AREA;
2788 /* manually convert vector registers if necessary */
2789 if (MACHINE_HAS_VX) {
2790 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2791 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2794 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2795 vcpu->run->s.regs.fprs, 128);
2797 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2798 vcpu->run->s.regs.gprs, 128);
2799 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2800 &vcpu->arch.sie_block->gpsw, 16);
2801 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2803 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2804 &vcpu->run->s.regs.fpc, 4);
2805 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2806 &vcpu->arch.sie_block->todpr, 4);
2807 cputm = kvm_s390_get_cpu_timer(vcpu);
2808 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2810 clkcomp = vcpu->arch.sie_block->ckc >> 8;
2811 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2813 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2814 &vcpu->run->s.regs.acrs, 64);
2815 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2816 &vcpu->arch.sie_block->gcr, 128);
2817 return rc ? -EFAULT : 0;
2820 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2823 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2824 * copying in vcpu load/put. Lets update our copies before we save
2825 * it into the save area
2828 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2829 save_access_regs(vcpu->run->s.regs.acrs);
2831 return kvm_s390_store_status_unloaded(vcpu, addr);
2835 * store additional status at address
2837 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2840 /* Only bits 0-53 are used for address formation */
2841 if (!(gpa & ~0x3ff))
2844 return write_guest_abs(vcpu, gpa & ~0x3ff,
2845 (void *)&vcpu->run->s.regs.vrs, 512);
2848 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2850 if (!test_kvm_facility(vcpu->kvm, 129))
2854 * The guest VXRS are in the host VXRs due to the lazy
2855 * copying in vcpu load/put. We can simply call save_fpu_regs()
2856 * to save the current register state because we are in the
2857 * middle of a load/put cycle.
2859 * Let's update our copies before we save it into the save area.
2863 return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2866 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2868 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2869 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2872 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2875 struct kvm_vcpu *vcpu;
2877 kvm_for_each_vcpu(i, vcpu, kvm) {
2878 __disable_ibs_on_vcpu(vcpu);
2882 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2886 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2887 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2890 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2892 int i, online_vcpus, started_vcpus = 0;
2894 if (!is_vcpu_stopped(vcpu))
2897 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2898 /* Only one cpu at a time may enter/leave the STOPPED state. */
2899 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2900 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2902 for (i = 0; i < online_vcpus; i++) {
2903 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2907 if (started_vcpus == 0) {
2908 /* we're the only active VCPU -> speed it up */
2909 __enable_ibs_on_vcpu(vcpu);
2910 } else if (started_vcpus == 1) {
2912 * As we are starting a second VCPU, we have to disable
2913 * the IBS facility on all VCPUs to remove potentially
2914 * oustanding ENABLE requests.
2916 __disable_ibs_on_all_vcpus(vcpu->kvm);
2919 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2921 * Another VCPU might have used IBS while we were offline.
2922 * Let's play safe and flush the VCPU at startup.
2924 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2925 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2929 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2931 int i, online_vcpus, started_vcpus = 0;
2932 struct kvm_vcpu *started_vcpu = NULL;
2934 if (is_vcpu_stopped(vcpu))
2937 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2938 /* Only one cpu at a time may enter/leave the STOPPED state. */
2939 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2940 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2942 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2943 kvm_s390_clear_stop_irq(vcpu);
2945 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2946 __disable_ibs_on_vcpu(vcpu);
2948 for (i = 0; i < online_vcpus; i++) {
2949 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2951 started_vcpu = vcpu->kvm->vcpus[i];
2955 if (started_vcpus == 1) {
2957 * As we only have one VCPU left, we want to enable the
2958 * IBS facility for that VCPU to speed it up.
2960 __enable_ibs_on_vcpu(started_vcpu);
2963 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2967 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2968 struct kvm_enable_cap *cap)
2976 case KVM_CAP_S390_CSS_SUPPORT:
2977 if (!vcpu->kvm->arch.css_support) {
2978 vcpu->kvm->arch.css_support = 1;
2979 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2980 trace_kvm_s390_enable_css(vcpu->kvm);
2991 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2992 struct kvm_s390_mem_op *mop)
2994 void __user *uaddr = (void __user *)mop->buf;
2995 void *tmpbuf = NULL;
2997 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2998 | KVM_S390_MEMOP_F_CHECK_ONLY;
3000 if (mop->flags & ~supported_flags)
3003 if (mop->size > MEM_OP_MAX_SIZE)
3006 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3007 tmpbuf = vmalloc(mop->size);
3012 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3015 case KVM_S390_MEMOP_LOGICAL_READ:
3016 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3017 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3018 mop->size, GACC_FETCH);
3021 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3023 if (copy_to_user(uaddr, tmpbuf, mop->size))
3027 case KVM_S390_MEMOP_LOGICAL_WRITE:
3028 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3029 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3030 mop->size, GACC_STORE);
3033 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3037 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3043 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3045 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3046 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3052 long kvm_arch_vcpu_ioctl(struct file *filp,
3053 unsigned int ioctl, unsigned long arg)
3055 struct kvm_vcpu *vcpu = filp->private_data;
3056 void __user *argp = (void __user *)arg;
3061 case KVM_S390_IRQ: {
3062 struct kvm_s390_irq s390irq;
3065 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3067 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3070 case KVM_S390_INTERRUPT: {
3071 struct kvm_s390_interrupt s390int;
3072 struct kvm_s390_irq s390irq;
3075 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3077 if (s390int_to_s390irq(&s390int, &s390irq))
3079 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3082 case KVM_S390_STORE_STATUS:
3083 idx = srcu_read_lock(&vcpu->kvm->srcu);
3084 r = kvm_s390_vcpu_store_status(vcpu, arg);
3085 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3087 case KVM_S390_SET_INITIAL_PSW: {
3091 if (copy_from_user(&psw, argp, sizeof(psw)))
3093 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3096 case KVM_S390_INITIAL_RESET:
3097 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3099 case KVM_SET_ONE_REG:
3100 case KVM_GET_ONE_REG: {
3101 struct kvm_one_reg reg;
3103 if (copy_from_user(®, argp, sizeof(reg)))
3105 if (ioctl == KVM_SET_ONE_REG)
3106 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
3108 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
3111 #ifdef CONFIG_KVM_S390_UCONTROL
3112 case KVM_S390_UCAS_MAP: {
3113 struct kvm_s390_ucas_mapping ucasmap;
3115 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3120 if (!kvm_is_ucontrol(vcpu->kvm)) {
3125 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3126 ucasmap.vcpu_addr, ucasmap.length);
3129 case KVM_S390_UCAS_UNMAP: {
3130 struct kvm_s390_ucas_mapping ucasmap;
3132 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3137 if (!kvm_is_ucontrol(vcpu->kvm)) {
3142 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3147 case KVM_S390_VCPU_FAULT: {
3148 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3151 case KVM_ENABLE_CAP:
3153 struct kvm_enable_cap cap;
3155 if (copy_from_user(&cap, argp, sizeof(cap)))
3157 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3160 case KVM_S390_MEM_OP: {
3161 struct kvm_s390_mem_op mem_op;
3163 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3164 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3169 case KVM_S390_SET_IRQ_STATE: {
3170 struct kvm_s390_irq_state irq_state;
3173 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3175 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3176 irq_state.len == 0 ||
3177 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3181 r = kvm_s390_set_irq_state(vcpu,
3182 (void __user *) irq_state.buf,
3186 case KVM_S390_GET_IRQ_STATE: {
3187 struct kvm_s390_irq_state irq_state;
3190 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3192 if (irq_state.len == 0) {
3196 r = kvm_s390_get_irq_state(vcpu,
3197 (__u8 __user *) irq_state.buf,
3207 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3209 #ifdef CONFIG_KVM_S390_UCONTROL
3210 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3211 && (kvm_is_ucontrol(vcpu->kvm))) {
3212 vmf->page = virt_to_page(vcpu->arch.sie_block);
3213 get_page(vmf->page);
3217 return VM_FAULT_SIGBUS;
3220 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3221 unsigned long npages)
3226 /* Section: memory related */
3227 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3228 struct kvm_memory_slot *memslot,
3229 const struct kvm_userspace_memory_region *mem,
3230 enum kvm_mr_change change)
3232 /* A few sanity checks. We can have memory slots which have to be
3233 located/ended at a segment boundary (1MB). The memory in userland is
3234 ok to be fragmented into various different vmas. It is okay to mmap()
3235 and munmap() stuff in this slot after doing this call at any time */
3237 if (mem->userspace_addr & 0xffffful)
3240 if (mem->memory_size & 0xffffful)
3243 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3249 void kvm_arch_commit_memory_region(struct kvm *kvm,
3250 const struct kvm_userspace_memory_region *mem,
3251 const struct kvm_memory_slot *old,
3252 const struct kvm_memory_slot *new,
3253 enum kvm_mr_change change)
3257 /* If the basics of the memslot do not change, we do not want
3258 * to update the gmap. Every update causes several unnecessary
3259 * segment translation exceptions. This is usually handled just
3260 * fine by the normal fault handler + gmap, but it will also
3261 * cause faults on the prefix page of running guest CPUs.
3263 if (old->userspace_addr == mem->userspace_addr &&
3264 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3265 old->npages * PAGE_SIZE == mem->memory_size)
3268 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3269 mem->guest_phys_addr, mem->memory_size);
3271 pr_warn("failed to commit memory region\n");
3275 static inline unsigned long nonhyp_mask(int i)
3277 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3279 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3282 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3284 vcpu->valid_wakeup = false;
3287 static int __init kvm_s390_init(void)
3291 if (!sclp.has_sief2) {
3292 pr_info("SIE not available\n");
3296 for (i = 0; i < 16; i++)
3297 kvm_s390_fac_list_mask[i] |=
3298 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3300 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3303 static void __exit kvm_s390_exit(void)
3308 module_init(kvm_s390_init);
3309 module_exit(kvm_s390_exit);
3312 * Enable autoloading of the kvm module.
3313 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3314 * since x86 takes a different approach.
3316 #include <linux/miscdevice.h>
3317 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3318 MODULE_ALIAS("devname:kvm");