2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <linux/vmalloc.h>
29 #include <asm/asm-offsets.h>
30 #include <asm/lowcore.h>
32 #include <asm/pgtable.h>
34 #include <asm/switch_to.h>
40 #define KMSG_COMPONENT "kvm-s390"
42 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
44 #define CREATE_TRACE_POINTS
46 #include "trace-s390.h"
48 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
50 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
51 (KVM_MAX_VCPUS + LOCAL_IRQS))
53 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
55 struct kvm_stats_debugfs_item debugfs_entries[] = {
56 { "userspace_handled", VCPU_STAT(exit_userspace) },
57 { "exit_null", VCPU_STAT(exit_null) },
58 { "exit_validity", VCPU_STAT(exit_validity) },
59 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
60 { "exit_external_request", VCPU_STAT(exit_external_request) },
61 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
62 { "exit_instruction", VCPU_STAT(exit_instruction) },
63 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
64 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
65 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
66 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
67 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
68 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
69 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
70 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
71 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
72 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
73 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
74 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
75 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
76 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
77 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
78 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
79 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
80 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
81 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
82 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
83 { "instruction_spx", VCPU_STAT(instruction_spx) },
84 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
85 { "instruction_stap", VCPU_STAT(instruction_stap) },
86 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
87 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
88 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
89 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
90 { "instruction_essa", VCPU_STAT(instruction_essa) },
91 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
92 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
93 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
94 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
95 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
96 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
97 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
98 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
99 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
100 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
101 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
102 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
103 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
104 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
105 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
106 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
107 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
108 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
109 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
110 { "diagnose_10", VCPU_STAT(diagnose_10) },
111 { "diagnose_44", VCPU_STAT(diagnose_44) },
112 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
113 { "diagnose_258", VCPU_STAT(diagnose_258) },
114 { "diagnose_308", VCPU_STAT(diagnose_308) },
115 { "diagnose_500", VCPU_STAT(diagnose_500) },
119 /* upper facilities limit for kvm */
120 unsigned long kvm_s390_fac_list_mask[] = {
121 0xffe6fffbfcfdfc40UL,
122 0x005e800000000000UL,
125 unsigned long kvm_s390_fac_list_mask_size(void)
127 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
128 return ARRAY_SIZE(kvm_s390_fac_list_mask);
131 static struct gmap_notifier gmap_notifier;
132 debug_info_t *kvm_s390_dbf;
134 /* Section: not file related */
135 int kvm_arch_hardware_enable(void)
137 /* every s390 is virtualization enabled ;-) */
141 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
144 * This callback is executed during stop_machine(). All CPUs are therefore
145 * temporarily stopped. In order not to change guest behavior, we have to
146 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
147 * so a CPU won't be stopped while calculating with the epoch.
149 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
153 struct kvm_vcpu *vcpu;
155 unsigned long long *delta = v;
157 list_for_each_entry(kvm, &vm_list, vm_list) {
158 kvm->arch.epoch -= *delta;
159 kvm_for_each_vcpu(i, vcpu, kvm) {
160 vcpu->arch.sie_block->epoch -= *delta;
166 static struct notifier_block kvm_clock_notifier = {
167 .notifier_call = kvm_clock_sync,
170 int kvm_arch_hardware_setup(void)
172 gmap_notifier.notifier_call = kvm_gmap_notifier;
173 gmap_register_ipte_notifier(&gmap_notifier);
174 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
175 &kvm_clock_notifier);
179 void kvm_arch_hardware_unsetup(void)
181 gmap_unregister_ipte_notifier(&gmap_notifier);
182 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
183 &kvm_clock_notifier);
186 int kvm_arch_init(void *opaque)
188 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
192 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
193 debug_unregister(kvm_s390_dbf);
197 /* Register floating interrupt controller interface. */
198 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
201 void kvm_arch_exit(void)
203 debug_unregister(kvm_s390_dbf);
206 /* Section: device related */
207 long kvm_arch_dev_ioctl(struct file *filp,
208 unsigned int ioctl, unsigned long arg)
210 if (ioctl == KVM_S390_ENABLE_SIE)
211 return s390_enable_sie();
215 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
220 case KVM_CAP_S390_PSW:
221 case KVM_CAP_S390_GMAP:
222 case KVM_CAP_SYNC_MMU:
223 #ifdef CONFIG_KVM_S390_UCONTROL
224 case KVM_CAP_S390_UCONTROL:
226 case KVM_CAP_ASYNC_PF:
227 case KVM_CAP_SYNC_REGS:
228 case KVM_CAP_ONE_REG:
229 case KVM_CAP_ENABLE_CAP:
230 case KVM_CAP_S390_CSS_SUPPORT:
231 case KVM_CAP_IOEVENTFD:
232 case KVM_CAP_DEVICE_CTRL:
233 case KVM_CAP_ENABLE_CAP_VM:
234 case KVM_CAP_S390_IRQCHIP:
235 case KVM_CAP_VM_ATTRIBUTES:
236 case KVM_CAP_MP_STATE:
237 case KVM_CAP_S390_INJECT_IRQ:
238 case KVM_CAP_S390_USER_SIGP:
239 case KVM_CAP_S390_USER_STSI:
240 case KVM_CAP_S390_SKEYS:
241 case KVM_CAP_S390_IRQ_STATE:
244 case KVM_CAP_S390_MEM_OP:
247 case KVM_CAP_NR_VCPUS:
248 case KVM_CAP_MAX_VCPUS:
249 r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS
250 : KVM_S390_BSCA_CPU_SLOTS;
252 case KVM_CAP_NR_MEMSLOTS:
253 r = KVM_USER_MEM_SLOTS;
255 case KVM_CAP_S390_COW:
256 r = MACHINE_HAS_ESOP;
258 case KVM_CAP_S390_VECTOR_REGISTERS:
267 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
268 struct kvm_memory_slot *memslot)
270 gfn_t cur_gfn, last_gfn;
271 unsigned long address;
272 struct gmap *gmap = kvm->arch.gmap;
274 down_read(&gmap->mm->mmap_sem);
275 /* Loop over all guest pages */
276 last_gfn = memslot->base_gfn + memslot->npages;
277 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
278 address = gfn_to_hva_memslot(memslot, cur_gfn);
280 if (gmap_test_and_clear_dirty(address, gmap))
281 mark_page_dirty(kvm, cur_gfn);
283 up_read(&gmap->mm->mmap_sem);
286 /* Section: vm related */
287 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
290 * Get (and clear) the dirty memory log for a memory slot.
292 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
293 struct kvm_dirty_log *log)
297 struct kvm_memslots *slots;
298 struct kvm_memory_slot *memslot;
301 mutex_lock(&kvm->slots_lock);
304 if (log->slot >= KVM_USER_MEM_SLOTS)
307 slots = kvm_memslots(kvm);
308 memslot = id_to_memslot(slots, log->slot);
310 if (!memslot->dirty_bitmap)
313 kvm_s390_sync_dirty_log(kvm, memslot);
314 r = kvm_get_dirty_log(kvm, log, &is_dirty);
318 /* Clear the dirty log */
320 n = kvm_dirty_bitmap_bytes(memslot);
321 memset(memslot->dirty_bitmap, 0, n);
325 mutex_unlock(&kvm->slots_lock);
329 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
337 case KVM_CAP_S390_IRQCHIP:
338 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
339 kvm->arch.use_irqchip = 1;
342 case KVM_CAP_S390_USER_SIGP:
343 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
344 kvm->arch.user_sigp = 1;
347 case KVM_CAP_S390_VECTOR_REGISTERS:
348 mutex_lock(&kvm->lock);
349 if (atomic_read(&kvm->online_vcpus)) {
351 } else if (MACHINE_HAS_VX) {
352 set_kvm_facility(kvm->arch.model.fac->mask, 129);
353 set_kvm_facility(kvm->arch.model.fac->list, 129);
357 mutex_unlock(&kvm->lock);
358 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
359 r ? "(not available)" : "(success)");
361 case KVM_CAP_S390_USER_STSI:
362 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
363 kvm->arch.user_stsi = 1;
373 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
377 switch (attr->attr) {
378 case KVM_S390_VM_MEM_LIMIT_SIZE:
380 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
381 kvm->arch.mem_limit);
382 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
392 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
396 switch (attr->attr) {
397 case KVM_S390_VM_MEM_ENABLE_CMMA:
398 /* enable CMMA only for z10 and later (EDAT_1) */
400 if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
404 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
405 mutex_lock(&kvm->lock);
406 if (atomic_read(&kvm->online_vcpus) == 0) {
407 kvm->arch.use_cmma = 1;
410 mutex_unlock(&kvm->lock);
412 case KVM_S390_VM_MEM_CLR_CMMA:
414 if (!kvm->arch.use_cmma)
417 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
418 mutex_lock(&kvm->lock);
419 idx = srcu_read_lock(&kvm->srcu);
420 s390_reset_cmma(kvm->arch.gmap->mm);
421 srcu_read_unlock(&kvm->srcu, idx);
422 mutex_unlock(&kvm->lock);
425 case KVM_S390_VM_MEM_LIMIT_SIZE: {
426 unsigned long new_limit;
428 if (kvm_is_ucontrol(kvm))
431 if (get_user(new_limit, (u64 __user *)attr->addr))
434 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
435 new_limit > kvm->arch.mem_limit)
441 /* gmap_alloc takes last usable address */
442 if (new_limit != KVM_S390_NO_MEM_LIMIT)
446 mutex_lock(&kvm->lock);
447 if (atomic_read(&kvm->online_vcpus) == 0) {
448 /* gmap_alloc will round the limit up */
449 struct gmap *new = gmap_alloc(current->mm, new_limit);
454 gmap_free(kvm->arch.gmap);
456 kvm->arch.gmap = new;
460 mutex_unlock(&kvm->lock);
461 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
462 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
463 (void *) kvm->arch.gmap->asce);
473 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
475 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
477 struct kvm_vcpu *vcpu;
480 if (!test_kvm_facility(kvm, 76))
483 mutex_lock(&kvm->lock);
484 switch (attr->attr) {
485 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
487 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
488 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
489 kvm->arch.crypto.aes_kw = 1;
490 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
492 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
494 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
495 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
496 kvm->arch.crypto.dea_kw = 1;
497 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
499 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
500 kvm->arch.crypto.aes_kw = 0;
501 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
502 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
503 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
505 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
506 kvm->arch.crypto.dea_kw = 0;
507 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
508 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
509 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
512 mutex_unlock(&kvm->lock);
516 kvm_for_each_vcpu(i, vcpu, kvm) {
517 kvm_s390_vcpu_crypto_setup(vcpu);
520 mutex_unlock(&kvm->lock);
524 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
528 if (copy_from_user(>od_high, (void __user *)attr->addr,
534 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
539 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
543 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
546 kvm_s390_set_tod_clock(kvm, gtod);
547 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
551 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
558 switch (attr->attr) {
559 case KVM_S390_VM_TOD_HIGH:
560 ret = kvm_s390_set_tod_high(kvm, attr);
562 case KVM_S390_VM_TOD_LOW:
563 ret = kvm_s390_set_tod_low(kvm, attr);
572 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
576 if (copy_to_user((void __user *)attr->addr, >od_high,
579 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
584 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
588 gtod = kvm_s390_get_tod_clock_fast(kvm);
589 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
591 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
596 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
603 switch (attr->attr) {
604 case KVM_S390_VM_TOD_HIGH:
605 ret = kvm_s390_get_tod_high(kvm, attr);
607 case KVM_S390_VM_TOD_LOW:
608 ret = kvm_s390_get_tod_low(kvm, attr);
617 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
619 struct kvm_s390_vm_cpu_processor *proc;
622 mutex_lock(&kvm->lock);
623 if (atomic_read(&kvm->online_vcpus)) {
627 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
632 if (!copy_from_user(proc, (void __user *)attr->addr,
634 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
635 sizeof(struct cpuid));
636 kvm->arch.model.ibc = proc->ibc;
637 memcpy(kvm->arch.model.fac->list, proc->fac_list,
638 S390_ARCH_FAC_LIST_SIZE_BYTE);
643 mutex_unlock(&kvm->lock);
647 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
651 switch (attr->attr) {
652 case KVM_S390_VM_CPU_PROCESSOR:
653 ret = kvm_s390_set_processor(kvm, attr);
659 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
661 struct kvm_s390_vm_cpu_processor *proc;
664 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
669 memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
670 proc->ibc = kvm->arch.model.ibc;
671 memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
672 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
679 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
681 struct kvm_s390_vm_cpu_machine *mach;
684 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
689 get_cpu_id((struct cpuid *) &mach->cpuid);
690 mach->ibc = sclp.ibc;
691 memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
692 S390_ARCH_FAC_LIST_SIZE_BYTE);
693 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
694 S390_ARCH_FAC_LIST_SIZE_BYTE);
695 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
702 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
706 switch (attr->attr) {
707 case KVM_S390_VM_CPU_PROCESSOR:
708 ret = kvm_s390_get_processor(kvm, attr);
710 case KVM_S390_VM_CPU_MACHINE:
711 ret = kvm_s390_get_machine(kvm, attr);
717 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
721 switch (attr->group) {
722 case KVM_S390_VM_MEM_CTRL:
723 ret = kvm_s390_set_mem_control(kvm, attr);
725 case KVM_S390_VM_TOD:
726 ret = kvm_s390_set_tod(kvm, attr);
728 case KVM_S390_VM_CPU_MODEL:
729 ret = kvm_s390_set_cpu_model(kvm, attr);
731 case KVM_S390_VM_CRYPTO:
732 ret = kvm_s390_vm_set_crypto(kvm, attr);
742 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
746 switch (attr->group) {
747 case KVM_S390_VM_MEM_CTRL:
748 ret = kvm_s390_get_mem_control(kvm, attr);
750 case KVM_S390_VM_TOD:
751 ret = kvm_s390_get_tod(kvm, attr);
753 case KVM_S390_VM_CPU_MODEL:
754 ret = kvm_s390_get_cpu_model(kvm, attr);
764 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
768 switch (attr->group) {
769 case KVM_S390_VM_MEM_CTRL:
770 switch (attr->attr) {
771 case KVM_S390_VM_MEM_ENABLE_CMMA:
772 case KVM_S390_VM_MEM_CLR_CMMA:
773 case KVM_S390_VM_MEM_LIMIT_SIZE:
781 case KVM_S390_VM_TOD:
782 switch (attr->attr) {
783 case KVM_S390_VM_TOD_LOW:
784 case KVM_S390_VM_TOD_HIGH:
792 case KVM_S390_VM_CPU_MODEL:
793 switch (attr->attr) {
794 case KVM_S390_VM_CPU_PROCESSOR:
795 case KVM_S390_VM_CPU_MACHINE:
803 case KVM_S390_VM_CRYPTO:
804 switch (attr->attr) {
805 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
806 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
807 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
808 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
824 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
828 unsigned long curkey;
831 if (args->flags != 0)
834 /* Is this guest using storage keys? */
835 if (!mm_use_skey(current->mm))
836 return KVM_S390_GET_SKEYS_NONE;
838 /* Enforce sane limit on memory allocation */
839 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
842 keys = kmalloc_array(args->count, sizeof(uint8_t),
843 GFP_KERNEL | __GFP_NOWARN);
845 keys = vmalloc(sizeof(uint8_t) * args->count);
849 for (i = 0; i < args->count; i++) {
850 hva = gfn_to_hva(kvm, args->start_gfn + i);
851 if (kvm_is_error_hva(hva)) {
856 curkey = get_guest_storage_key(current->mm, hva);
857 if (IS_ERR_VALUE(curkey)) {
864 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
865 sizeof(uint8_t) * args->count);
873 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
879 if (args->flags != 0)
882 /* Enforce sane limit on memory allocation */
883 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
886 keys = kmalloc_array(args->count, sizeof(uint8_t),
887 GFP_KERNEL | __GFP_NOWARN);
889 keys = vmalloc(sizeof(uint8_t) * args->count);
893 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
894 sizeof(uint8_t) * args->count);
900 /* Enable storage key handling for the guest */
901 r = s390_enable_skey();
905 for (i = 0; i < args->count; i++) {
906 hva = gfn_to_hva(kvm, args->start_gfn + i);
907 if (kvm_is_error_hva(hva)) {
912 /* Lowest order bit is reserved */
913 if (keys[i] & 0x01) {
918 r = set_guest_storage_key(current->mm, hva,
919 (unsigned long)keys[i], 0);
928 long kvm_arch_vm_ioctl(struct file *filp,
929 unsigned int ioctl, unsigned long arg)
931 struct kvm *kvm = filp->private_data;
932 void __user *argp = (void __user *)arg;
933 struct kvm_device_attr attr;
937 case KVM_S390_INTERRUPT: {
938 struct kvm_s390_interrupt s390int;
941 if (copy_from_user(&s390int, argp, sizeof(s390int)))
943 r = kvm_s390_inject_vm(kvm, &s390int);
946 case KVM_ENABLE_CAP: {
947 struct kvm_enable_cap cap;
949 if (copy_from_user(&cap, argp, sizeof(cap)))
951 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
954 case KVM_CREATE_IRQCHIP: {
955 struct kvm_irq_routing_entry routing;
958 if (kvm->arch.use_irqchip) {
959 /* Set up dummy routing. */
960 memset(&routing, 0, sizeof(routing));
961 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
965 case KVM_SET_DEVICE_ATTR: {
967 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
969 r = kvm_s390_vm_set_attr(kvm, &attr);
972 case KVM_GET_DEVICE_ATTR: {
974 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
976 r = kvm_s390_vm_get_attr(kvm, &attr);
979 case KVM_HAS_DEVICE_ATTR: {
981 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
983 r = kvm_s390_vm_has_attr(kvm, &attr);
986 case KVM_S390_GET_SKEYS: {
987 struct kvm_s390_skeys args;
990 if (copy_from_user(&args, argp,
991 sizeof(struct kvm_s390_skeys)))
993 r = kvm_s390_get_skeys(kvm, &args);
996 case KVM_S390_SET_SKEYS: {
997 struct kvm_s390_skeys args;
1000 if (copy_from_user(&args, argp,
1001 sizeof(struct kvm_s390_skeys)))
1003 r = kvm_s390_set_skeys(kvm, &args);
1013 static int kvm_s390_query_ap_config(u8 *config)
1015 u32 fcn_code = 0x04000000UL;
1018 memset(config, 0, 128);
1022 ".long 0xb2af0000\n" /* PQAP(QCI) */
1028 : "r" (fcn_code), "r" (config)
1029 : "cc", "0", "2", "memory"
1035 static int kvm_s390_apxa_installed(void)
1040 if (test_facility(12)) {
1041 cc = kvm_s390_query_ap_config(config);
1044 pr_err("PQAP(QCI) failed with cc=%d", cc);
1046 return config[0] & 0x40;
1052 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1054 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1056 if (kvm_s390_apxa_installed())
1057 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1059 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1062 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1065 cpu_id->version = 0xff;
1068 static int kvm_s390_crypto_init(struct kvm *kvm)
1070 if (!test_kvm_facility(kvm, 76))
1073 kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
1074 GFP_KERNEL | GFP_DMA);
1075 if (!kvm->arch.crypto.crycb)
1078 kvm_s390_set_crycb_format(kvm);
1080 /* Enable AES/DEA protected key functions by default */
1081 kvm->arch.crypto.aes_kw = 1;
1082 kvm->arch.crypto.dea_kw = 1;
1083 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1084 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1085 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1086 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1091 static void sca_dispose(struct kvm *kvm)
1093 if (kvm->arch.use_esca)
1094 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1096 free_page((unsigned long)(kvm->arch.sca));
1097 kvm->arch.sca = NULL;
1100 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1103 char debug_name[16];
1104 static unsigned long sca_offset;
1107 #ifdef CONFIG_KVM_S390_UCONTROL
1108 if (type & ~KVM_VM_S390_UCONTROL)
1110 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1117 rc = s390_enable_sie();
1123 kvm->arch.use_esca = 0; /* start with basic SCA */
1124 rwlock_init(&kvm->arch.sca_lock);
1125 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL);
1128 spin_lock(&kvm_lock);
1130 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1132 kvm->arch.sca = (struct bsca_block *)
1133 ((char *) kvm->arch.sca + sca_offset);
1134 spin_unlock(&kvm_lock);
1136 sprintf(debug_name, "kvm-%u", current->pid);
1138 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1143 * The architectural maximum amount of facilities is 16 kbit. To store
1144 * this amount, 2 kbyte of memory is required. Thus we need a full
1145 * page to hold the guest facility list (arch.model.fac->list) and the
1146 * facility mask (arch.model.fac->mask). Its address size has to be
1147 * 31 bits and word aligned.
1149 kvm->arch.model.fac =
1150 (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1151 if (!kvm->arch.model.fac)
1154 /* Populate the facility mask initially. */
1155 memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1156 S390_ARCH_FAC_LIST_SIZE_BYTE);
1157 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1158 if (i < kvm_s390_fac_list_mask_size())
1159 kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1161 kvm->arch.model.fac->mask[i] = 0UL;
1164 /* Populate the facility list initially. */
1165 memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1166 S390_ARCH_FAC_LIST_SIZE_BYTE);
1168 kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1169 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1171 if (kvm_s390_crypto_init(kvm) < 0)
1174 spin_lock_init(&kvm->arch.float_int.lock);
1175 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1176 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1177 init_waitqueue_head(&kvm->arch.ipte_wq);
1178 mutex_init(&kvm->arch.ipte_mutex);
1180 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1181 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1183 if (type & KVM_VM_S390_UCONTROL) {
1184 kvm->arch.gmap = NULL;
1185 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1187 kvm->arch.mem_limit = TASK_MAX_SIZE;
1188 kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1);
1189 if (!kvm->arch.gmap)
1191 kvm->arch.gmap->private = kvm;
1192 kvm->arch.gmap->pfault_enabled = 0;
1195 kvm->arch.css_support = 0;
1196 kvm->arch.use_irqchip = 0;
1197 kvm->arch.epoch = 0;
1199 spin_lock_init(&kvm->arch.start_stop_lock);
1200 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1204 kfree(kvm->arch.crypto.crycb);
1205 free_page((unsigned long)kvm->arch.model.fac);
1206 debug_unregister(kvm->arch.dbf);
1208 KVM_EVENT(3, "creation of vm failed: %d", rc);
1212 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1214 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1215 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1216 kvm_s390_clear_local_irqs(vcpu);
1217 kvm_clear_async_pf_completion_queue(vcpu);
1218 if (!kvm_is_ucontrol(vcpu->kvm))
1222 if (kvm_is_ucontrol(vcpu->kvm))
1223 gmap_free(vcpu->arch.gmap);
1225 if (vcpu->kvm->arch.use_cmma)
1226 kvm_s390_vcpu_unsetup_cmma(vcpu);
1227 free_page((unsigned long)(vcpu->arch.sie_block));
1229 kvm_vcpu_uninit(vcpu);
1230 kmem_cache_free(kvm_vcpu_cache, vcpu);
1233 static void kvm_free_vcpus(struct kvm *kvm)
1236 struct kvm_vcpu *vcpu;
1238 kvm_for_each_vcpu(i, vcpu, kvm)
1239 kvm_arch_vcpu_destroy(vcpu);
1241 mutex_lock(&kvm->lock);
1242 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1243 kvm->vcpus[i] = NULL;
1245 atomic_set(&kvm->online_vcpus, 0);
1246 mutex_unlock(&kvm->lock);
1249 void kvm_arch_destroy_vm(struct kvm *kvm)
1251 kvm_free_vcpus(kvm);
1252 free_page((unsigned long)kvm->arch.model.fac);
1254 debug_unregister(kvm->arch.dbf);
1255 kfree(kvm->arch.crypto.crycb);
1256 if (!kvm_is_ucontrol(kvm))
1257 gmap_free(kvm->arch.gmap);
1258 kvm_s390_destroy_adapters(kvm);
1259 kvm_s390_clear_float_irqs(kvm);
1260 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1263 /* Section: vcpu related */
1264 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1266 vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1267 if (!vcpu->arch.gmap)
1269 vcpu->arch.gmap->private = vcpu->kvm;
1274 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1276 read_lock(&vcpu->kvm->arch.sca_lock);
1277 if (vcpu->kvm->arch.use_esca) {
1278 struct esca_block *sca = vcpu->kvm->arch.sca;
1280 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1281 sca->cpu[vcpu->vcpu_id].sda = 0;
1283 struct bsca_block *sca = vcpu->kvm->arch.sca;
1285 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1286 sca->cpu[vcpu->vcpu_id].sda = 0;
1288 read_unlock(&vcpu->kvm->arch.sca_lock);
1291 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1293 read_lock(&vcpu->kvm->arch.sca_lock);
1294 if (vcpu->kvm->arch.use_esca) {
1295 struct esca_block *sca = vcpu->kvm->arch.sca;
1297 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1298 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1299 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1300 vcpu->arch.sie_block->ecb2 |= 0x04U;
1301 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1303 struct bsca_block *sca = vcpu->kvm->arch.sca;
1305 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1306 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1307 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1308 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1310 read_unlock(&vcpu->kvm->arch.sca_lock);
1313 /* Basic SCA to Extended SCA data copy routines */
1314 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1317 d->sigp_ctrl.c = s->sigp_ctrl.c;
1318 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1321 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1325 d->ipte_control = s->ipte_control;
1327 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1328 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1331 static int sca_switch_to_extended(struct kvm *kvm)
1333 struct bsca_block *old_sca = kvm->arch.sca;
1334 struct esca_block *new_sca;
1335 struct kvm_vcpu *vcpu;
1336 unsigned int vcpu_idx;
1339 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1343 scaoh = (u32)((u64)(new_sca) >> 32);
1344 scaol = (u32)(u64)(new_sca) & ~0x3fU;
1346 kvm_s390_vcpu_block_all(kvm);
1347 write_lock(&kvm->arch.sca_lock);
1349 sca_copy_b_to_e(new_sca, old_sca);
1351 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1352 vcpu->arch.sie_block->scaoh = scaoh;
1353 vcpu->arch.sie_block->scaol = scaol;
1354 vcpu->arch.sie_block->ecb2 |= 0x04U;
1356 kvm->arch.sca = new_sca;
1357 kvm->arch.use_esca = 1;
1359 write_unlock(&kvm->arch.sca_lock);
1360 kvm_s390_vcpu_unblock_all(kvm);
1362 free_page((unsigned long)old_sca);
1364 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1365 old_sca, kvm->arch.sca);
1369 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1373 if (id < KVM_S390_BSCA_CPU_SLOTS)
1378 mutex_lock(&kvm->lock);
1379 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1380 mutex_unlock(&kvm->lock);
1382 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1385 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1387 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1388 kvm_clear_async_pf_completion_queue(vcpu);
1389 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1395 if (test_kvm_facility(vcpu->kvm, 129))
1396 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1398 if (kvm_is_ucontrol(vcpu->kvm))
1399 return __kvm_ucontrol_vcpu_init(vcpu);
1405 * Backs up the current FP/VX register save area on a particular
1406 * destination. Used to switch between different register save
1409 static inline void save_fpu_to(struct fpu *dst)
1411 dst->fpc = current->thread.fpu.fpc;
1412 dst->regs = current->thread.fpu.regs;
1416 * Switches the FP/VX register save area from which to lazy
1417 * restore register contents.
1419 static inline void load_fpu_from(struct fpu *from)
1421 current->thread.fpu.fpc = from->fpc;
1422 current->thread.fpu.regs = from->regs;
1425 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1427 /* Save host register state */
1429 save_fpu_to(&vcpu->arch.host_fpregs);
1431 if (test_kvm_facility(vcpu->kvm, 129)) {
1432 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1434 * Use the register save area in the SIE-control block
1435 * for register restore and save in kvm_arch_vcpu_put()
1437 current->thread.fpu.vxrs =
1438 (__vector128 *)&vcpu->run->s.regs.vrs;
1440 load_fpu_from(&vcpu->arch.guest_fpregs);
1442 if (test_fp_ctl(current->thread.fpu.fpc))
1443 /* User space provided an invalid FPC, let's clear it */
1444 current->thread.fpu.fpc = 0;
1446 save_access_regs(vcpu->arch.host_acrs);
1447 restore_access_regs(vcpu->run->s.regs.acrs);
1448 gmap_enable(vcpu->arch.gmap);
1449 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1452 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1454 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1455 gmap_disable(vcpu->arch.gmap);
1459 if (test_kvm_facility(vcpu->kvm, 129))
1461 * kvm_arch_vcpu_load() set up the register save area to
1462 * the &vcpu->run->s.regs.vrs and, thus, the vector registers
1463 * are already saved. Only the floating-point control must be
1466 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1468 save_fpu_to(&vcpu->arch.guest_fpregs);
1469 load_fpu_from(&vcpu->arch.host_fpregs);
1471 save_access_regs(vcpu->run->s.regs.acrs);
1472 restore_access_regs(vcpu->arch.host_acrs);
1475 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1477 /* this equals initial cpu reset in pop, but we don't switch to ESA */
1478 vcpu->arch.sie_block->gpsw.mask = 0UL;
1479 vcpu->arch.sie_block->gpsw.addr = 0UL;
1480 kvm_s390_set_prefix(vcpu, 0);
1481 vcpu->arch.sie_block->cputm = 0UL;
1482 vcpu->arch.sie_block->ckc = 0UL;
1483 vcpu->arch.sie_block->todpr = 0;
1484 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1485 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
1486 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1487 vcpu->arch.guest_fpregs.fpc = 0;
1488 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
1489 vcpu->arch.sie_block->gbea = 1;
1490 vcpu->arch.sie_block->pp = 0;
1491 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1492 kvm_clear_async_pf_completion_queue(vcpu);
1493 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1494 kvm_s390_vcpu_stop(vcpu);
1495 kvm_s390_clear_local_irqs(vcpu);
1498 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1500 mutex_lock(&vcpu->kvm->lock);
1502 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1504 mutex_unlock(&vcpu->kvm->lock);
1505 if (!kvm_is_ucontrol(vcpu->kvm)) {
1506 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1512 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1514 if (!test_kvm_facility(vcpu->kvm, 76))
1517 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1519 if (vcpu->kvm->arch.crypto.aes_kw)
1520 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1521 if (vcpu->kvm->arch.crypto.dea_kw)
1522 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1524 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1527 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1529 free_page(vcpu->arch.sie_block->cbrlo);
1530 vcpu->arch.sie_block->cbrlo = 0;
1533 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1535 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1536 if (!vcpu->arch.sie_block->cbrlo)
1539 vcpu->arch.sie_block->ecb2 |= 0x80;
1540 vcpu->arch.sie_block->ecb2 &= ~0x08;
1544 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1546 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1548 vcpu->arch.cpu_id = model->cpu_id;
1549 vcpu->arch.sie_block->ibc = model->ibc;
1550 vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1553 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1557 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1561 if (test_kvm_facility(vcpu->kvm, 78))
1562 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1563 else if (test_kvm_facility(vcpu->kvm, 8))
1564 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1566 kvm_s390_vcpu_setup_model(vcpu);
1568 vcpu->arch.sie_block->ecb = 6;
1569 if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1570 vcpu->arch.sie_block->ecb |= 0x10;
1572 vcpu->arch.sie_block->ecb2 = 8;
1573 vcpu->arch.sie_block->eca = 0xC1002000U;
1575 vcpu->arch.sie_block->eca |= 1;
1576 if (sclp.has_sigpif)
1577 vcpu->arch.sie_block->eca |= 0x10000000U;
1578 if (test_kvm_facility(vcpu->kvm, 129)) {
1579 vcpu->arch.sie_block->eca |= 0x00020000;
1580 vcpu->arch.sie_block->ecd |= 0x20000000;
1582 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1584 if (vcpu->kvm->arch.use_cmma) {
1585 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1589 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1590 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1592 kvm_s390_vcpu_crypto_setup(vcpu);
1597 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1600 struct kvm_vcpu *vcpu;
1601 struct sie_page *sie_page;
1604 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1609 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1613 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1617 vcpu->arch.sie_block = &sie_page->sie_block;
1618 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1620 vcpu->arch.sie_block->icpua = id;
1621 spin_lock_init(&vcpu->arch.local_int.lock);
1622 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1623 vcpu->arch.local_int.wq = &vcpu->wq;
1624 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1627 * Allocate a save area for floating-point registers. If the vector
1628 * extension is available, register contents are saved in the SIE
1629 * control block. The allocated save area is still required in
1630 * particular places, for example, in kvm_s390_vcpu_store_status().
1632 vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
1634 if (!vcpu->arch.guest_fpregs.fprs)
1635 goto out_free_sie_block;
1637 rc = kvm_vcpu_init(vcpu, kvm, id);
1639 goto out_free_sie_block;
1640 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
1641 vcpu->arch.sie_block);
1642 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1646 free_page((unsigned long)(vcpu->arch.sie_block));
1648 kmem_cache_free(kvm_vcpu_cache, vcpu);
1653 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1655 return kvm_s390_vcpu_has_irq(vcpu, 0);
1658 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1660 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1664 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1666 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1669 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1671 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1675 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1677 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1681 * Kick a guest cpu out of SIE and wait until SIE is not running.
1682 * If the CPU is not running (e.g. waiting as idle) the function will
1683 * return immediately. */
1684 void exit_sie(struct kvm_vcpu *vcpu)
1686 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1687 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1691 /* Kick a guest cpu out of SIE to process a request synchronously */
1692 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1694 kvm_make_request(req, vcpu);
1695 kvm_s390_vcpu_request(vcpu);
1698 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1701 struct kvm *kvm = gmap->private;
1702 struct kvm_vcpu *vcpu;
1704 kvm_for_each_vcpu(i, vcpu, kvm) {
1705 /* match against both prefix pages */
1706 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1707 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1708 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1713 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1715 /* kvm common code refers to this, but never calls it */
1720 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1721 struct kvm_one_reg *reg)
1726 case KVM_REG_S390_TODPR:
1727 r = put_user(vcpu->arch.sie_block->todpr,
1728 (u32 __user *)reg->addr);
1730 case KVM_REG_S390_EPOCHDIFF:
1731 r = put_user(vcpu->arch.sie_block->epoch,
1732 (u64 __user *)reg->addr);
1734 case KVM_REG_S390_CPU_TIMER:
1735 r = put_user(vcpu->arch.sie_block->cputm,
1736 (u64 __user *)reg->addr);
1738 case KVM_REG_S390_CLOCK_COMP:
1739 r = put_user(vcpu->arch.sie_block->ckc,
1740 (u64 __user *)reg->addr);
1742 case KVM_REG_S390_PFTOKEN:
1743 r = put_user(vcpu->arch.pfault_token,
1744 (u64 __user *)reg->addr);
1746 case KVM_REG_S390_PFCOMPARE:
1747 r = put_user(vcpu->arch.pfault_compare,
1748 (u64 __user *)reg->addr);
1750 case KVM_REG_S390_PFSELECT:
1751 r = put_user(vcpu->arch.pfault_select,
1752 (u64 __user *)reg->addr);
1754 case KVM_REG_S390_PP:
1755 r = put_user(vcpu->arch.sie_block->pp,
1756 (u64 __user *)reg->addr);
1758 case KVM_REG_S390_GBEA:
1759 r = put_user(vcpu->arch.sie_block->gbea,
1760 (u64 __user *)reg->addr);
1769 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1770 struct kvm_one_reg *reg)
1775 case KVM_REG_S390_TODPR:
1776 r = get_user(vcpu->arch.sie_block->todpr,
1777 (u32 __user *)reg->addr);
1779 case KVM_REG_S390_EPOCHDIFF:
1780 r = get_user(vcpu->arch.sie_block->epoch,
1781 (u64 __user *)reg->addr);
1783 case KVM_REG_S390_CPU_TIMER:
1784 r = get_user(vcpu->arch.sie_block->cputm,
1785 (u64 __user *)reg->addr);
1787 case KVM_REG_S390_CLOCK_COMP:
1788 r = get_user(vcpu->arch.sie_block->ckc,
1789 (u64 __user *)reg->addr);
1791 case KVM_REG_S390_PFTOKEN:
1792 r = get_user(vcpu->arch.pfault_token,
1793 (u64 __user *)reg->addr);
1794 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1795 kvm_clear_async_pf_completion_queue(vcpu);
1797 case KVM_REG_S390_PFCOMPARE:
1798 r = get_user(vcpu->arch.pfault_compare,
1799 (u64 __user *)reg->addr);
1801 case KVM_REG_S390_PFSELECT:
1802 r = get_user(vcpu->arch.pfault_select,
1803 (u64 __user *)reg->addr);
1805 case KVM_REG_S390_PP:
1806 r = get_user(vcpu->arch.sie_block->pp,
1807 (u64 __user *)reg->addr);
1809 case KVM_REG_S390_GBEA:
1810 r = get_user(vcpu->arch.sie_block->gbea,
1811 (u64 __user *)reg->addr);
1820 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1822 kvm_s390_vcpu_initial_reset(vcpu);
1826 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1828 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
1832 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1834 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1838 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1839 struct kvm_sregs *sregs)
1841 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1842 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1843 restore_access_regs(vcpu->run->s.regs.acrs);
1847 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1848 struct kvm_sregs *sregs)
1850 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1851 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1855 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1857 if (test_fp_ctl(fpu->fpc))
1859 memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
1860 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
1862 load_fpu_from(&vcpu->arch.guest_fpregs);
1866 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1868 memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
1869 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
1873 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1877 if (!is_vcpu_stopped(vcpu))
1880 vcpu->run->psw_mask = psw.mask;
1881 vcpu->run->psw_addr = psw.addr;
1886 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1887 struct kvm_translation *tr)
1889 return -EINVAL; /* not implemented yet */
1892 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1893 KVM_GUESTDBG_USE_HW_BP | \
1894 KVM_GUESTDBG_ENABLE)
1896 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1897 struct kvm_guest_debug *dbg)
1901 vcpu->guest_debug = 0;
1902 kvm_s390_clear_bp_data(vcpu);
1904 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1907 if (dbg->control & KVM_GUESTDBG_ENABLE) {
1908 vcpu->guest_debug = dbg->control;
1909 /* enforce guest PER */
1910 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1912 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1913 rc = kvm_s390_import_bp_data(vcpu, dbg);
1915 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1916 vcpu->arch.guestdbg.last_bp = 0;
1920 vcpu->guest_debug = 0;
1921 kvm_s390_clear_bp_data(vcpu);
1922 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1928 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1929 struct kvm_mp_state *mp_state)
1931 /* CHECK_STOP and LOAD are not supported yet */
1932 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1933 KVM_MP_STATE_OPERATING;
1936 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1937 struct kvm_mp_state *mp_state)
1941 /* user space knows about this interface - let it control the state */
1942 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1944 switch (mp_state->mp_state) {
1945 case KVM_MP_STATE_STOPPED:
1946 kvm_s390_vcpu_stop(vcpu);
1948 case KVM_MP_STATE_OPERATING:
1949 kvm_s390_vcpu_start(vcpu);
1951 case KVM_MP_STATE_LOAD:
1952 case KVM_MP_STATE_CHECK_STOP:
1953 /* fall through - CHECK_STOP and LOAD are not supported yet */
1961 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1963 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1966 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1969 kvm_s390_vcpu_request_handled(vcpu);
1970 if (!vcpu->requests)
1973 * We use MMU_RELOAD just to re-arm the ipte notifier for the
1974 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1975 * This ensures that the ipte instruction for this request has
1976 * already finished. We might race against a second unmapper that
1977 * wants to set the blocking bit. Lets just retry the request loop.
1979 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1981 rc = gmap_ipte_notify(vcpu->arch.gmap,
1982 kvm_s390_get_prefix(vcpu),
1989 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1990 vcpu->arch.sie_block->ihcpu = 0xffff;
1994 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1995 if (!ibs_enabled(vcpu)) {
1996 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1997 atomic_or(CPUSTAT_IBS,
1998 &vcpu->arch.sie_block->cpuflags);
2003 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2004 if (ibs_enabled(vcpu)) {
2005 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2006 atomic_andnot(CPUSTAT_IBS,
2007 &vcpu->arch.sie_block->cpuflags);
2012 /* nothing to do, just clear the request */
2013 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2018 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2020 struct kvm_vcpu *vcpu;
2023 mutex_lock(&kvm->lock);
2025 kvm->arch.epoch = tod - get_tod_clock();
2026 kvm_s390_vcpu_block_all(kvm);
2027 kvm_for_each_vcpu(i, vcpu, kvm)
2028 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2029 kvm_s390_vcpu_unblock_all(kvm);
2031 mutex_unlock(&kvm->lock);
2035 * kvm_arch_fault_in_page - fault-in guest page if necessary
2036 * @vcpu: The corresponding virtual cpu
2037 * @gpa: Guest physical address
2038 * @writable: Whether the page should be writable or not
2040 * Make sure that a guest page has been faulted-in on the host.
2042 * Return: Zero on success, negative error code otherwise.
2044 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2046 return gmap_fault(vcpu->arch.gmap, gpa,
2047 writable ? FAULT_FLAG_WRITE : 0);
2050 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2051 unsigned long token)
2053 struct kvm_s390_interrupt inti;
2054 struct kvm_s390_irq irq;
2057 irq.u.ext.ext_params2 = token;
2058 irq.type = KVM_S390_INT_PFAULT_INIT;
2059 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2061 inti.type = KVM_S390_INT_PFAULT_DONE;
2062 inti.parm64 = token;
2063 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2067 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2068 struct kvm_async_pf *work)
2070 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2071 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2074 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2075 struct kvm_async_pf *work)
2077 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2078 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2081 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2082 struct kvm_async_pf *work)
2084 /* s390 will always inject the page directly */
2087 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2090 * s390 will always inject the page directly,
2091 * but we still want check_async_completion to cleanup
2096 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2099 struct kvm_arch_async_pf arch;
2102 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2104 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2105 vcpu->arch.pfault_compare)
2107 if (psw_extint_disabled(vcpu))
2109 if (kvm_s390_vcpu_has_irq(vcpu, 0))
2111 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2113 if (!vcpu->arch.gmap->pfault_enabled)
2116 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2117 hva += current->thread.gmap_addr & ~PAGE_MASK;
2118 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2121 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2125 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2130 * On s390 notifications for arriving pages will be delivered directly
2131 * to the guest but the house keeping for completed pfaults is
2132 * handled outside the worker.
2134 kvm_check_async_pf_completion(vcpu);
2136 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2137 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2142 if (test_cpu_flag(CIF_MCCK_PENDING))
2145 if (!kvm_is_ucontrol(vcpu->kvm)) {
2146 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2151 rc = kvm_s390_handle_requests(vcpu);
2155 if (guestdbg_enabled(vcpu)) {
2156 kvm_s390_backup_guest_per_regs(vcpu);
2157 kvm_s390_patch_guest_per_regs(vcpu);
2160 vcpu->arch.sie_block->icptcode = 0;
2161 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2162 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2163 trace_kvm_s390_sie_enter(vcpu, cpuflags);
2168 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2170 psw_t *psw = &vcpu->arch.sie_block->gpsw;
2174 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2175 trace_kvm_s390_sie_fault(vcpu);
2178 * We want to inject an addressing exception, which is defined as a
2179 * suppressing or terminating exception. However, since we came here
2180 * by a DAT access exception, the PSW still points to the faulting
2181 * instruction since DAT exceptions are nullifying. So we've got
2182 * to look up the current opcode to get the length of the instruction
2183 * to be able to forward the PSW.
2185 rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
2187 return kvm_s390_inject_prog_cond(vcpu, rc);
2188 psw->addr = __rewind_psw(*psw, -insn_length(opcode));
2190 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
2193 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2195 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2196 vcpu->arch.sie_block->icptcode);
2197 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2199 if (guestdbg_enabled(vcpu))
2200 kvm_s390_restore_guest_per_regs(vcpu);
2202 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2203 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2205 if (vcpu->arch.sie_block->icptcode > 0) {
2206 int rc = kvm_handle_sie_intercept(vcpu);
2208 if (rc != -EOPNOTSUPP)
2210 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2211 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2212 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2213 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2215 } else if (exit_reason != -EFAULT) {
2216 vcpu->stat.exit_null++;
2218 } else if (kvm_is_ucontrol(vcpu->kvm)) {
2219 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2220 vcpu->run->s390_ucontrol.trans_exc_code =
2221 current->thread.gmap_addr;
2222 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2224 } else if (current->thread.gmap_pfault) {
2225 trace_kvm_s390_major_guest_pfault(vcpu);
2226 current->thread.gmap_pfault = 0;
2227 if (kvm_arch_setup_async_pf(vcpu))
2229 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2231 return vcpu_post_run_fault_in_sie(vcpu);
2234 static int __vcpu_run(struct kvm_vcpu *vcpu)
2236 int rc, exit_reason;
2239 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2240 * ning the guest), so that memslots (and other stuff) are protected
2242 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2245 rc = vcpu_pre_run(vcpu);
2249 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2251 * As PF_VCPU will be used in fault handler, between
2252 * guest_enter and guest_exit should be no uaccess.
2254 local_irq_disable();
2255 __kvm_guest_enter();
2257 exit_reason = sie64a(vcpu->arch.sie_block,
2258 vcpu->run->s.regs.gprs);
2259 local_irq_disable();
2262 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2264 rc = vcpu_post_run(vcpu, exit_reason);
2265 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2267 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2271 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2273 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2274 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2275 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2276 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2277 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2278 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2279 /* some control register changes require a tlb flush */
2280 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2282 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2283 vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2284 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2285 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2286 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2287 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2289 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2290 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2291 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2292 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2293 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2294 kvm_clear_async_pf_completion_queue(vcpu);
2296 kvm_run->kvm_dirty_regs = 0;
2299 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2301 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2302 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2303 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2304 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2305 kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2306 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2307 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2308 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2309 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2310 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2311 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2312 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2315 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2320 if (guestdbg_exit_pending(vcpu)) {
2321 kvm_s390_prepare_debug_exit(vcpu);
2325 if (vcpu->sigset_active)
2326 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2328 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2329 kvm_s390_vcpu_start(vcpu);
2330 } else if (is_vcpu_stopped(vcpu)) {
2331 pr_err_ratelimited("can't run stopped vcpu %d\n",
2336 sync_regs(vcpu, kvm_run);
2339 rc = __vcpu_run(vcpu);
2341 if (signal_pending(current) && !rc) {
2342 kvm_run->exit_reason = KVM_EXIT_INTR;
2346 if (guestdbg_exit_pending(vcpu) && !rc) {
2347 kvm_s390_prepare_debug_exit(vcpu);
2351 if (rc == -EREMOTE) {
2352 /* userspace support is needed, kvm_run has been prepared */
2356 store_regs(vcpu, kvm_run);
2358 if (vcpu->sigset_active)
2359 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2361 vcpu->stat.exit_userspace++;
2366 * store status at address
2367 * we use have two special cases:
2368 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2369 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2371 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2373 unsigned char archmode = 1;
2378 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2379 if (write_guest_abs(vcpu, 163, &archmode, 1))
2381 gpa = SAVE_AREA_BASE;
2382 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2383 if (write_guest_real(vcpu, 163, &archmode, 1))
2385 gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
2387 rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
2388 vcpu->arch.guest_fpregs.fprs, 128);
2389 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
2390 vcpu->run->s.regs.gprs, 128);
2391 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
2392 &vcpu->arch.sie_block->gpsw, 16);
2393 px = kvm_s390_get_prefix(vcpu);
2394 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
2396 rc |= write_guest_abs(vcpu,
2397 gpa + offsetof(struct save_area, fp_ctrl_reg),
2398 &vcpu->arch.guest_fpregs.fpc, 4);
2399 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
2400 &vcpu->arch.sie_block->todpr, 4);
2401 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
2402 &vcpu->arch.sie_block->cputm, 8);
2403 clkcomp = vcpu->arch.sie_block->ckc >> 8;
2404 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
2406 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
2407 &vcpu->run->s.regs.acrs, 64);
2408 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
2409 &vcpu->arch.sie_block->gcr, 128);
2410 return rc ? -EFAULT : 0;
2413 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2416 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2417 * copying in vcpu load/put. Lets update our copies before we save
2418 * it into the save area
2421 if (test_kvm_facility(vcpu->kvm, 129)) {
2423 * If the vector extension is available, the vector registers
2424 * which overlaps with floating-point registers are saved in
2425 * the SIE-control block. Hence, extract the floating-point
2426 * registers and the FPC value and store them in the
2427 * guest_fpregs structure.
2429 vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
2430 convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
2431 current->thread.fpu.vxrs);
2433 save_fpu_to(&vcpu->arch.guest_fpregs);
2434 save_access_regs(vcpu->run->s.regs.acrs);
2436 return kvm_s390_store_status_unloaded(vcpu, addr);
2440 * store additional status at address
2442 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2445 /* Only bits 0-53 are used for address formation */
2446 if (!(gpa & ~0x3ff))
2449 return write_guest_abs(vcpu, gpa & ~0x3ff,
2450 (void *)&vcpu->run->s.regs.vrs, 512);
2453 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2455 if (!test_kvm_facility(vcpu->kvm, 129))
2459 * The guest VXRS are in the host VXRs due to the lazy
2460 * copying in vcpu load/put. We can simply call save_fpu_regs()
2461 * to save the current register state because we are in the
2462 * middle of a load/put cycle.
2464 * Let's update our copies before we save it into the save area.
2468 return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2471 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2473 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2474 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2477 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2480 struct kvm_vcpu *vcpu;
2482 kvm_for_each_vcpu(i, vcpu, kvm) {
2483 __disable_ibs_on_vcpu(vcpu);
2487 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2489 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2490 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2493 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2495 int i, online_vcpus, started_vcpus = 0;
2497 if (!is_vcpu_stopped(vcpu))
2500 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2501 /* Only one cpu at a time may enter/leave the STOPPED state. */
2502 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2503 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2505 for (i = 0; i < online_vcpus; i++) {
2506 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2510 if (started_vcpus == 0) {
2511 /* we're the only active VCPU -> speed it up */
2512 __enable_ibs_on_vcpu(vcpu);
2513 } else if (started_vcpus == 1) {
2515 * As we are starting a second VCPU, we have to disable
2516 * the IBS facility on all VCPUs to remove potentially
2517 * oustanding ENABLE requests.
2519 __disable_ibs_on_all_vcpus(vcpu->kvm);
2522 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2524 * Another VCPU might have used IBS while we were offline.
2525 * Let's play safe and flush the VCPU at startup.
2527 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2528 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2532 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2534 int i, online_vcpus, started_vcpus = 0;
2535 struct kvm_vcpu *started_vcpu = NULL;
2537 if (is_vcpu_stopped(vcpu))
2540 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2541 /* Only one cpu at a time may enter/leave the STOPPED state. */
2542 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2543 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2545 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2546 kvm_s390_clear_stop_irq(vcpu);
2548 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2549 __disable_ibs_on_vcpu(vcpu);
2551 for (i = 0; i < online_vcpus; i++) {
2552 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2554 started_vcpu = vcpu->kvm->vcpus[i];
2558 if (started_vcpus == 1) {
2560 * As we only have one VCPU left, we want to enable the
2561 * IBS facility for that VCPU to speed it up.
2563 __enable_ibs_on_vcpu(started_vcpu);
2566 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2570 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2571 struct kvm_enable_cap *cap)
2579 case KVM_CAP_S390_CSS_SUPPORT:
2580 if (!vcpu->kvm->arch.css_support) {
2581 vcpu->kvm->arch.css_support = 1;
2582 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2583 trace_kvm_s390_enable_css(vcpu->kvm);
2594 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2595 struct kvm_s390_mem_op *mop)
2597 void __user *uaddr = (void __user *)mop->buf;
2598 void *tmpbuf = NULL;
2600 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2601 | KVM_S390_MEMOP_F_CHECK_ONLY;
2603 if (mop->flags & ~supported_flags)
2606 if (mop->size > MEM_OP_MAX_SIZE)
2609 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2610 tmpbuf = vmalloc(mop->size);
2615 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2618 case KVM_S390_MEMOP_LOGICAL_READ:
2619 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2620 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
2623 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2625 if (copy_to_user(uaddr, tmpbuf, mop->size))
2629 case KVM_S390_MEMOP_LOGICAL_WRITE:
2630 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2631 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
2634 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2638 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2644 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2646 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2647 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2653 long kvm_arch_vcpu_ioctl(struct file *filp,
2654 unsigned int ioctl, unsigned long arg)
2656 struct kvm_vcpu *vcpu = filp->private_data;
2657 void __user *argp = (void __user *)arg;
2662 case KVM_S390_IRQ: {
2663 struct kvm_s390_irq s390irq;
2666 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2668 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2671 case KVM_S390_INTERRUPT: {
2672 struct kvm_s390_interrupt s390int;
2673 struct kvm_s390_irq s390irq;
2676 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2678 if (s390int_to_s390irq(&s390int, &s390irq))
2680 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2683 case KVM_S390_STORE_STATUS:
2684 idx = srcu_read_lock(&vcpu->kvm->srcu);
2685 r = kvm_s390_vcpu_store_status(vcpu, arg);
2686 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2688 case KVM_S390_SET_INITIAL_PSW: {
2692 if (copy_from_user(&psw, argp, sizeof(psw)))
2694 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2697 case KVM_S390_INITIAL_RESET:
2698 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2700 case KVM_SET_ONE_REG:
2701 case KVM_GET_ONE_REG: {
2702 struct kvm_one_reg reg;
2704 if (copy_from_user(®, argp, sizeof(reg)))
2706 if (ioctl == KVM_SET_ONE_REG)
2707 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
2709 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
2712 #ifdef CONFIG_KVM_S390_UCONTROL
2713 case KVM_S390_UCAS_MAP: {
2714 struct kvm_s390_ucas_mapping ucasmap;
2716 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2721 if (!kvm_is_ucontrol(vcpu->kvm)) {
2726 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2727 ucasmap.vcpu_addr, ucasmap.length);
2730 case KVM_S390_UCAS_UNMAP: {
2731 struct kvm_s390_ucas_mapping ucasmap;
2733 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2738 if (!kvm_is_ucontrol(vcpu->kvm)) {
2743 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2748 case KVM_S390_VCPU_FAULT: {
2749 r = gmap_fault(vcpu->arch.gmap, arg, 0);
2752 case KVM_ENABLE_CAP:
2754 struct kvm_enable_cap cap;
2756 if (copy_from_user(&cap, argp, sizeof(cap)))
2758 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2761 case KVM_S390_MEM_OP: {
2762 struct kvm_s390_mem_op mem_op;
2764 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2765 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2770 case KVM_S390_SET_IRQ_STATE: {
2771 struct kvm_s390_irq_state irq_state;
2774 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2776 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2777 irq_state.len == 0 ||
2778 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2782 r = kvm_s390_set_irq_state(vcpu,
2783 (void __user *) irq_state.buf,
2787 case KVM_S390_GET_IRQ_STATE: {
2788 struct kvm_s390_irq_state irq_state;
2791 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2793 if (irq_state.len == 0) {
2797 r = kvm_s390_get_irq_state(vcpu,
2798 (__u8 __user *) irq_state.buf,
2808 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2810 #ifdef CONFIG_KVM_S390_UCONTROL
2811 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2812 && (kvm_is_ucontrol(vcpu->kvm))) {
2813 vmf->page = virt_to_page(vcpu->arch.sie_block);
2814 get_page(vmf->page);
2818 return VM_FAULT_SIGBUS;
2821 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2822 unsigned long npages)
2827 /* Section: memory related */
2828 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2829 struct kvm_memory_slot *memslot,
2830 const struct kvm_userspace_memory_region *mem,
2831 enum kvm_mr_change change)
2833 /* A few sanity checks. We can have memory slots which have to be
2834 located/ended at a segment boundary (1MB). The memory in userland is
2835 ok to be fragmented into various different vmas. It is okay to mmap()
2836 and munmap() stuff in this slot after doing this call at any time */
2838 if (mem->userspace_addr & 0xffffful)
2841 if (mem->memory_size & 0xffffful)
2844 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
2850 void kvm_arch_commit_memory_region(struct kvm *kvm,
2851 const struct kvm_userspace_memory_region *mem,
2852 const struct kvm_memory_slot *old,
2853 const struct kvm_memory_slot *new,
2854 enum kvm_mr_change change)
2858 /* If the basics of the memslot do not change, we do not want
2859 * to update the gmap. Every update causes several unnecessary
2860 * segment translation exceptions. This is usually handled just
2861 * fine by the normal fault handler + gmap, but it will also
2862 * cause faults on the prefix page of running guest CPUs.
2864 if (old->userspace_addr == mem->userspace_addr &&
2865 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2866 old->npages * PAGE_SIZE == mem->memory_size)
2869 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2870 mem->guest_phys_addr, mem->memory_size);
2872 pr_warn("failed to commit memory region\n");
2876 static int __init kvm_s390_init(void)
2878 if (!sclp.has_sief2) {
2879 pr_info("SIE not available\n");
2883 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2886 static void __exit kvm_s390_exit(void)
2891 module_init(kvm_s390_init);
2892 module_exit(kvm_s390_exit);
2895 * Enable autoloading of the kvm module.
2896 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2897 * since x86 takes a different approach.
2899 #include <linux/miscdevice.h>
2900 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2901 MODULE_ALIAS("devname:kvm");