KVM: s390: fix mismatch between user and in-kernel guest limit
[cascardo/linux.git] / arch / s390 / kvm / kvm-s390.c
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <linux/vmalloc.h>
29 #include <asm/asm-offsets.h>
30 #include <asm/lowcore.h>
31 #include <asm/etr.h>
32 #include <asm/pgtable.h>
33 #include <asm/nmi.h>
34 #include <asm/switch_to.h>
35 #include <asm/isc.h>
36 #include <asm/sclp.h>
37 #include "kvm-s390.h"
38 #include "gaccess.h"
39
40 #define KMSG_COMPONENT "kvm-s390"
41 #undef pr_fmt
42 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
43
44 #define CREATE_TRACE_POINTS
45 #include "trace.h"
46 #include "trace-s390.h"
47
48 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
49 #define LOCAL_IRQS 32
50 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
51                            (KVM_MAX_VCPUS + LOCAL_IRQS))
52
53 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
54
55 struct kvm_stats_debugfs_item debugfs_entries[] = {
56         { "userspace_handled", VCPU_STAT(exit_userspace) },
57         { "exit_null", VCPU_STAT(exit_null) },
58         { "exit_validity", VCPU_STAT(exit_validity) },
59         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
60         { "exit_external_request", VCPU_STAT(exit_external_request) },
61         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
62         { "exit_instruction", VCPU_STAT(exit_instruction) },
63         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
64         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
65         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
66         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
67         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
68         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
69         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
70         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
71         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
72         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
73         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
74         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
75         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
76         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
77         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
78         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
79         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
80         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
81         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
82         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
83         { "instruction_spx", VCPU_STAT(instruction_spx) },
84         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
85         { "instruction_stap", VCPU_STAT(instruction_stap) },
86         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
87         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
88         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
89         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
90         { "instruction_essa", VCPU_STAT(instruction_essa) },
91         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
92         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
93         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
94         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
95         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
96         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
97         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
98         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
99         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
100         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
101         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
102         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
103         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
104         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
105         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
106         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
107         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
108         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
109         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
110         { "diagnose_10", VCPU_STAT(diagnose_10) },
111         { "diagnose_44", VCPU_STAT(diagnose_44) },
112         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
113         { "diagnose_258", VCPU_STAT(diagnose_258) },
114         { "diagnose_308", VCPU_STAT(diagnose_308) },
115         { "diagnose_500", VCPU_STAT(diagnose_500) },
116         { NULL }
117 };
118
119 /* upper facilities limit for kvm */
120 unsigned long kvm_s390_fac_list_mask[] = {
121         0xffe6fffbfcfdfc40UL,
122         0x005e800000000000UL,
123 };
124
125 unsigned long kvm_s390_fac_list_mask_size(void)
126 {
127         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
128         return ARRAY_SIZE(kvm_s390_fac_list_mask);
129 }
130
131 static struct gmap_notifier gmap_notifier;
132 debug_info_t *kvm_s390_dbf;
133
134 /* Section: not file related */
135 int kvm_arch_hardware_enable(void)
136 {
137         /* every s390 is virtualization enabled ;-) */
138         return 0;
139 }
140
141 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
142
143 /*
144  * This callback is executed during stop_machine(). All CPUs are therefore
145  * temporarily stopped. In order not to change guest behavior, we have to
146  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
147  * so a CPU won't be stopped while calculating with the epoch.
148  */
149 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
150                           void *v)
151 {
152         struct kvm *kvm;
153         struct kvm_vcpu *vcpu;
154         int i;
155         unsigned long long *delta = v;
156
157         list_for_each_entry(kvm, &vm_list, vm_list) {
158                 kvm->arch.epoch -= *delta;
159                 kvm_for_each_vcpu(i, vcpu, kvm) {
160                         vcpu->arch.sie_block->epoch -= *delta;
161                 }
162         }
163         return NOTIFY_OK;
164 }
165
166 static struct notifier_block kvm_clock_notifier = {
167         .notifier_call = kvm_clock_sync,
168 };
169
170 int kvm_arch_hardware_setup(void)
171 {
172         gmap_notifier.notifier_call = kvm_gmap_notifier;
173         gmap_register_ipte_notifier(&gmap_notifier);
174         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
175                                        &kvm_clock_notifier);
176         return 0;
177 }
178
179 void kvm_arch_hardware_unsetup(void)
180 {
181         gmap_unregister_ipte_notifier(&gmap_notifier);
182         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
183                                          &kvm_clock_notifier);
184 }
185
186 int kvm_arch_init(void *opaque)
187 {
188         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
189         if (!kvm_s390_dbf)
190                 return -ENOMEM;
191
192         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
193                 debug_unregister(kvm_s390_dbf);
194                 return -ENOMEM;
195         }
196
197         /* Register floating interrupt controller interface. */
198         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
199 }
200
201 void kvm_arch_exit(void)
202 {
203         debug_unregister(kvm_s390_dbf);
204 }
205
206 /* Section: device related */
207 long kvm_arch_dev_ioctl(struct file *filp,
208                         unsigned int ioctl, unsigned long arg)
209 {
210         if (ioctl == KVM_S390_ENABLE_SIE)
211                 return s390_enable_sie();
212         return -EINVAL;
213 }
214
215 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
216 {
217         int r;
218
219         switch (ext) {
220         case KVM_CAP_S390_PSW:
221         case KVM_CAP_S390_GMAP:
222         case KVM_CAP_SYNC_MMU:
223 #ifdef CONFIG_KVM_S390_UCONTROL
224         case KVM_CAP_S390_UCONTROL:
225 #endif
226         case KVM_CAP_ASYNC_PF:
227         case KVM_CAP_SYNC_REGS:
228         case KVM_CAP_ONE_REG:
229         case KVM_CAP_ENABLE_CAP:
230         case KVM_CAP_S390_CSS_SUPPORT:
231         case KVM_CAP_IOEVENTFD:
232         case KVM_CAP_DEVICE_CTRL:
233         case KVM_CAP_ENABLE_CAP_VM:
234         case KVM_CAP_S390_IRQCHIP:
235         case KVM_CAP_VM_ATTRIBUTES:
236         case KVM_CAP_MP_STATE:
237         case KVM_CAP_S390_INJECT_IRQ:
238         case KVM_CAP_S390_USER_SIGP:
239         case KVM_CAP_S390_USER_STSI:
240         case KVM_CAP_S390_SKEYS:
241         case KVM_CAP_S390_IRQ_STATE:
242                 r = 1;
243                 break;
244         case KVM_CAP_S390_MEM_OP:
245                 r = MEM_OP_MAX_SIZE;
246                 break;
247         case KVM_CAP_NR_VCPUS:
248         case KVM_CAP_MAX_VCPUS:
249                 r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS
250                                   : KVM_S390_BSCA_CPU_SLOTS;
251                 break;
252         case KVM_CAP_NR_MEMSLOTS:
253                 r = KVM_USER_MEM_SLOTS;
254                 break;
255         case KVM_CAP_S390_COW:
256                 r = MACHINE_HAS_ESOP;
257                 break;
258         case KVM_CAP_S390_VECTOR_REGISTERS:
259                 r = MACHINE_HAS_VX;
260                 break;
261         default:
262                 r = 0;
263         }
264         return r;
265 }
266
267 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
268                                         struct kvm_memory_slot *memslot)
269 {
270         gfn_t cur_gfn, last_gfn;
271         unsigned long address;
272         struct gmap *gmap = kvm->arch.gmap;
273
274         down_read(&gmap->mm->mmap_sem);
275         /* Loop over all guest pages */
276         last_gfn = memslot->base_gfn + memslot->npages;
277         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
278                 address = gfn_to_hva_memslot(memslot, cur_gfn);
279
280                 if (gmap_test_and_clear_dirty(address, gmap))
281                         mark_page_dirty(kvm, cur_gfn);
282         }
283         up_read(&gmap->mm->mmap_sem);
284 }
285
286 /* Section: vm related */
287 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
288
289 /*
290  * Get (and clear) the dirty memory log for a memory slot.
291  */
292 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
293                                struct kvm_dirty_log *log)
294 {
295         int r;
296         unsigned long n;
297         struct kvm_memslots *slots;
298         struct kvm_memory_slot *memslot;
299         int is_dirty = 0;
300
301         mutex_lock(&kvm->slots_lock);
302
303         r = -EINVAL;
304         if (log->slot >= KVM_USER_MEM_SLOTS)
305                 goto out;
306
307         slots = kvm_memslots(kvm);
308         memslot = id_to_memslot(slots, log->slot);
309         r = -ENOENT;
310         if (!memslot->dirty_bitmap)
311                 goto out;
312
313         kvm_s390_sync_dirty_log(kvm, memslot);
314         r = kvm_get_dirty_log(kvm, log, &is_dirty);
315         if (r)
316                 goto out;
317
318         /* Clear the dirty log */
319         if (is_dirty) {
320                 n = kvm_dirty_bitmap_bytes(memslot);
321                 memset(memslot->dirty_bitmap, 0, n);
322         }
323         r = 0;
324 out:
325         mutex_unlock(&kvm->slots_lock);
326         return r;
327 }
328
329 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
330 {
331         int r;
332
333         if (cap->flags)
334                 return -EINVAL;
335
336         switch (cap->cap) {
337         case KVM_CAP_S390_IRQCHIP:
338                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
339                 kvm->arch.use_irqchip = 1;
340                 r = 0;
341                 break;
342         case KVM_CAP_S390_USER_SIGP:
343                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
344                 kvm->arch.user_sigp = 1;
345                 r = 0;
346                 break;
347         case KVM_CAP_S390_VECTOR_REGISTERS:
348                 mutex_lock(&kvm->lock);
349                 if (atomic_read(&kvm->online_vcpus)) {
350                         r = -EBUSY;
351                 } else if (MACHINE_HAS_VX) {
352                         set_kvm_facility(kvm->arch.model.fac->mask, 129);
353                         set_kvm_facility(kvm->arch.model.fac->list, 129);
354                         r = 0;
355                 } else
356                         r = -EINVAL;
357                 mutex_unlock(&kvm->lock);
358                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
359                          r ? "(not available)" : "(success)");
360                 break;
361         case KVM_CAP_S390_USER_STSI:
362                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
363                 kvm->arch.user_stsi = 1;
364                 r = 0;
365                 break;
366         default:
367                 r = -EINVAL;
368                 break;
369         }
370         return r;
371 }
372
373 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
374 {
375         int ret;
376
377         switch (attr->attr) {
378         case KVM_S390_VM_MEM_LIMIT_SIZE:
379                 ret = 0;
380                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
381                          kvm->arch.mem_limit);
382                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
383                         ret = -EFAULT;
384                 break;
385         default:
386                 ret = -ENXIO;
387                 break;
388         }
389         return ret;
390 }
391
392 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
393 {
394         int ret;
395         unsigned int idx;
396         switch (attr->attr) {
397         case KVM_S390_VM_MEM_ENABLE_CMMA:
398                 /* enable CMMA only for z10 and later (EDAT_1) */
399                 ret = -EINVAL;
400                 if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
401                         break;
402
403                 ret = -EBUSY;
404                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
405                 mutex_lock(&kvm->lock);
406                 if (atomic_read(&kvm->online_vcpus) == 0) {
407                         kvm->arch.use_cmma = 1;
408                         ret = 0;
409                 }
410                 mutex_unlock(&kvm->lock);
411                 break;
412         case KVM_S390_VM_MEM_CLR_CMMA:
413                 ret = -EINVAL;
414                 if (!kvm->arch.use_cmma)
415                         break;
416
417                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
418                 mutex_lock(&kvm->lock);
419                 idx = srcu_read_lock(&kvm->srcu);
420                 s390_reset_cmma(kvm->arch.gmap->mm);
421                 srcu_read_unlock(&kvm->srcu, idx);
422                 mutex_unlock(&kvm->lock);
423                 ret = 0;
424                 break;
425         case KVM_S390_VM_MEM_LIMIT_SIZE: {
426                 unsigned long new_limit;
427
428                 if (kvm_is_ucontrol(kvm))
429                         return -EINVAL;
430
431                 if (get_user(new_limit, (u64 __user *)attr->addr))
432                         return -EFAULT;
433
434                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
435                     new_limit > kvm->arch.mem_limit)
436                         return -E2BIG;
437
438                 if (!new_limit)
439                         return -EINVAL;
440
441                 /* gmap_alloc takes last usable address */
442                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
443                         new_limit -= 1;
444
445                 ret = -EBUSY;
446                 mutex_lock(&kvm->lock);
447                 if (atomic_read(&kvm->online_vcpus) == 0) {
448                         /* gmap_alloc will round the limit up */
449                         struct gmap *new = gmap_alloc(current->mm, new_limit);
450
451                         if (!new) {
452                                 ret = -ENOMEM;
453                         } else {
454                                 gmap_free(kvm->arch.gmap);
455                                 new->private = kvm;
456                                 kvm->arch.gmap = new;
457                                 ret = 0;
458                         }
459                 }
460                 mutex_unlock(&kvm->lock);
461                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
462                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
463                          (void *) kvm->arch.gmap->asce);
464                 break;
465         }
466         default:
467                 ret = -ENXIO;
468                 break;
469         }
470         return ret;
471 }
472
473 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
474
475 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
476 {
477         struct kvm_vcpu *vcpu;
478         int i;
479
480         if (!test_kvm_facility(kvm, 76))
481                 return -EINVAL;
482
483         mutex_lock(&kvm->lock);
484         switch (attr->attr) {
485         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
486                 get_random_bytes(
487                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
488                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
489                 kvm->arch.crypto.aes_kw = 1;
490                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
491                 break;
492         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
493                 get_random_bytes(
494                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
495                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
496                 kvm->arch.crypto.dea_kw = 1;
497                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
498                 break;
499         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
500                 kvm->arch.crypto.aes_kw = 0;
501                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
502                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
503                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
504                 break;
505         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
506                 kvm->arch.crypto.dea_kw = 0;
507                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
508                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
509                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
510                 break;
511         default:
512                 mutex_unlock(&kvm->lock);
513                 return -ENXIO;
514         }
515
516         kvm_for_each_vcpu(i, vcpu, kvm) {
517                 kvm_s390_vcpu_crypto_setup(vcpu);
518                 exit_sie(vcpu);
519         }
520         mutex_unlock(&kvm->lock);
521         return 0;
522 }
523
524 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
525 {
526         u8 gtod_high;
527
528         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
529                                            sizeof(gtod_high)))
530                 return -EFAULT;
531
532         if (gtod_high != 0)
533                 return -EINVAL;
534         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
535
536         return 0;
537 }
538
539 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
540 {
541         u64 gtod;
542
543         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
544                 return -EFAULT;
545
546         kvm_s390_set_tod_clock(kvm, gtod);
547         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
548         return 0;
549 }
550
551 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
552 {
553         int ret;
554
555         if (attr->flags)
556                 return -EINVAL;
557
558         switch (attr->attr) {
559         case KVM_S390_VM_TOD_HIGH:
560                 ret = kvm_s390_set_tod_high(kvm, attr);
561                 break;
562         case KVM_S390_VM_TOD_LOW:
563                 ret = kvm_s390_set_tod_low(kvm, attr);
564                 break;
565         default:
566                 ret = -ENXIO;
567                 break;
568         }
569         return ret;
570 }
571
572 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
573 {
574         u8 gtod_high = 0;
575
576         if (copy_to_user((void __user *)attr->addr, &gtod_high,
577                                          sizeof(gtod_high)))
578                 return -EFAULT;
579         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
580
581         return 0;
582 }
583
584 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
585 {
586         u64 gtod;
587
588         gtod = kvm_s390_get_tod_clock_fast(kvm);
589         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
590                 return -EFAULT;
591         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
592
593         return 0;
594 }
595
596 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
597 {
598         int ret;
599
600         if (attr->flags)
601                 return -EINVAL;
602
603         switch (attr->attr) {
604         case KVM_S390_VM_TOD_HIGH:
605                 ret = kvm_s390_get_tod_high(kvm, attr);
606                 break;
607         case KVM_S390_VM_TOD_LOW:
608                 ret = kvm_s390_get_tod_low(kvm, attr);
609                 break;
610         default:
611                 ret = -ENXIO;
612                 break;
613         }
614         return ret;
615 }
616
617 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
618 {
619         struct kvm_s390_vm_cpu_processor *proc;
620         int ret = 0;
621
622         mutex_lock(&kvm->lock);
623         if (atomic_read(&kvm->online_vcpus)) {
624                 ret = -EBUSY;
625                 goto out;
626         }
627         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
628         if (!proc) {
629                 ret = -ENOMEM;
630                 goto out;
631         }
632         if (!copy_from_user(proc, (void __user *)attr->addr,
633                             sizeof(*proc))) {
634                 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
635                        sizeof(struct cpuid));
636                 kvm->arch.model.ibc = proc->ibc;
637                 memcpy(kvm->arch.model.fac->list, proc->fac_list,
638                        S390_ARCH_FAC_LIST_SIZE_BYTE);
639         } else
640                 ret = -EFAULT;
641         kfree(proc);
642 out:
643         mutex_unlock(&kvm->lock);
644         return ret;
645 }
646
647 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
648 {
649         int ret = -ENXIO;
650
651         switch (attr->attr) {
652         case KVM_S390_VM_CPU_PROCESSOR:
653                 ret = kvm_s390_set_processor(kvm, attr);
654                 break;
655         }
656         return ret;
657 }
658
659 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
660 {
661         struct kvm_s390_vm_cpu_processor *proc;
662         int ret = 0;
663
664         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
665         if (!proc) {
666                 ret = -ENOMEM;
667                 goto out;
668         }
669         memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
670         proc->ibc = kvm->arch.model.ibc;
671         memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
672         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
673                 ret = -EFAULT;
674         kfree(proc);
675 out:
676         return ret;
677 }
678
679 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
680 {
681         struct kvm_s390_vm_cpu_machine *mach;
682         int ret = 0;
683
684         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
685         if (!mach) {
686                 ret = -ENOMEM;
687                 goto out;
688         }
689         get_cpu_id((struct cpuid *) &mach->cpuid);
690         mach->ibc = sclp.ibc;
691         memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
692                S390_ARCH_FAC_LIST_SIZE_BYTE);
693         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
694                S390_ARCH_FAC_LIST_SIZE_BYTE);
695         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
696                 ret = -EFAULT;
697         kfree(mach);
698 out:
699         return ret;
700 }
701
702 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
703 {
704         int ret = -ENXIO;
705
706         switch (attr->attr) {
707         case KVM_S390_VM_CPU_PROCESSOR:
708                 ret = kvm_s390_get_processor(kvm, attr);
709                 break;
710         case KVM_S390_VM_CPU_MACHINE:
711                 ret = kvm_s390_get_machine(kvm, attr);
712                 break;
713         }
714         return ret;
715 }
716
717 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
718 {
719         int ret;
720
721         switch (attr->group) {
722         case KVM_S390_VM_MEM_CTRL:
723                 ret = kvm_s390_set_mem_control(kvm, attr);
724                 break;
725         case KVM_S390_VM_TOD:
726                 ret = kvm_s390_set_tod(kvm, attr);
727                 break;
728         case KVM_S390_VM_CPU_MODEL:
729                 ret = kvm_s390_set_cpu_model(kvm, attr);
730                 break;
731         case KVM_S390_VM_CRYPTO:
732                 ret = kvm_s390_vm_set_crypto(kvm, attr);
733                 break;
734         default:
735                 ret = -ENXIO;
736                 break;
737         }
738
739         return ret;
740 }
741
742 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
743 {
744         int ret;
745
746         switch (attr->group) {
747         case KVM_S390_VM_MEM_CTRL:
748                 ret = kvm_s390_get_mem_control(kvm, attr);
749                 break;
750         case KVM_S390_VM_TOD:
751                 ret = kvm_s390_get_tod(kvm, attr);
752                 break;
753         case KVM_S390_VM_CPU_MODEL:
754                 ret = kvm_s390_get_cpu_model(kvm, attr);
755                 break;
756         default:
757                 ret = -ENXIO;
758                 break;
759         }
760
761         return ret;
762 }
763
764 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
765 {
766         int ret;
767
768         switch (attr->group) {
769         case KVM_S390_VM_MEM_CTRL:
770                 switch (attr->attr) {
771                 case KVM_S390_VM_MEM_ENABLE_CMMA:
772                 case KVM_S390_VM_MEM_CLR_CMMA:
773                 case KVM_S390_VM_MEM_LIMIT_SIZE:
774                         ret = 0;
775                         break;
776                 default:
777                         ret = -ENXIO;
778                         break;
779                 }
780                 break;
781         case KVM_S390_VM_TOD:
782                 switch (attr->attr) {
783                 case KVM_S390_VM_TOD_LOW:
784                 case KVM_S390_VM_TOD_HIGH:
785                         ret = 0;
786                         break;
787                 default:
788                         ret = -ENXIO;
789                         break;
790                 }
791                 break;
792         case KVM_S390_VM_CPU_MODEL:
793                 switch (attr->attr) {
794                 case KVM_S390_VM_CPU_PROCESSOR:
795                 case KVM_S390_VM_CPU_MACHINE:
796                         ret = 0;
797                         break;
798                 default:
799                         ret = -ENXIO;
800                         break;
801                 }
802                 break;
803         case KVM_S390_VM_CRYPTO:
804                 switch (attr->attr) {
805                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
806                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
807                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
808                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
809                         ret = 0;
810                         break;
811                 default:
812                         ret = -ENXIO;
813                         break;
814                 }
815                 break;
816         default:
817                 ret = -ENXIO;
818                 break;
819         }
820
821         return ret;
822 }
823
824 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
825 {
826         uint8_t *keys;
827         uint64_t hva;
828         unsigned long curkey;
829         int i, r = 0;
830
831         if (args->flags != 0)
832                 return -EINVAL;
833
834         /* Is this guest using storage keys? */
835         if (!mm_use_skey(current->mm))
836                 return KVM_S390_GET_SKEYS_NONE;
837
838         /* Enforce sane limit on memory allocation */
839         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
840                 return -EINVAL;
841
842         keys = kmalloc_array(args->count, sizeof(uint8_t),
843                              GFP_KERNEL | __GFP_NOWARN);
844         if (!keys)
845                 keys = vmalloc(sizeof(uint8_t) * args->count);
846         if (!keys)
847                 return -ENOMEM;
848
849         for (i = 0; i < args->count; i++) {
850                 hva = gfn_to_hva(kvm, args->start_gfn + i);
851                 if (kvm_is_error_hva(hva)) {
852                         r = -EFAULT;
853                         goto out;
854                 }
855
856                 curkey = get_guest_storage_key(current->mm, hva);
857                 if (IS_ERR_VALUE(curkey)) {
858                         r = curkey;
859                         goto out;
860                 }
861                 keys[i] = curkey;
862         }
863
864         r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
865                          sizeof(uint8_t) * args->count);
866         if (r)
867                 r = -EFAULT;
868 out:
869         kvfree(keys);
870         return r;
871 }
872
873 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
874 {
875         uint8_t *keys;
876         uint64_t hva;
877         int i, r = 0;
878
879         if (args->flags != 0)
880                 return -EINVAL;
881
882         /* Enforce sane limit on memory allocation */
883         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
884                 return -EINVAL;
885
886         keys = kmalloc_array(args->count, sizeof(uint8_t),
887                              GFP_KERNEL | __GFP_NOWARN);
888         if (!keys)
889                 keys = vmalloc(sizeof(uint8_t) * args->count);
890         if (!keys)
891                 return -ENOMEM;
892
893         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
894                            sizeof(uint8_t) * args->count);
895         if (r) {
896                 r = -EFAULT;
897                 goto out;
898         }
899
900         /* Enable storage key handling for the guest */
901         r = s390_enable_skey();
902         if (r)
903                 goto out;
904
905         for (i = 0; i < args->count; i++) {
906                 hva = gfn_to_hva(kvm, args->start_gfn + i);
907                 if (kvm_is_error_hva(hva)) {
908                         r = -EFAULT;
909                         goto out;
910                 }
911
912                 /* Lowest order bit is reserved */
913                 if (keys[i] & 0x01) {
914                         r = -EINVAL;
915                         goto out;
916                 }
917
918                 r = set_guest_storage_key(current->mm, hva,
919                                           (unsigned long)keys[i], 0);
920                 if (r)
921                         goto out;
922         }
923 out:
924         kvfree(keys);
925         return r;
926 }
927
928 long kvm_arch_vm_ioctl(struct file *filp,
929                        unsigned int ioctl, unsigned long arg)
930 {
931         struct kvm *kvm = filp->private_data;
932         void __user *argp = (void __user *)arg;
933         struct kvm_device_attr attr;
934         int r;
935
936         switch (ioctl) {
937         case KVM_S390_INTERRUPT: {
938                 struct kvm_s390_interrupt s390int;
939
940                 r = -EFAULT;
941                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
942                         break;
943                 r = kvm_s390_inject_vm(kvm, &s390int);
944                 break;
945         }
946         case KVM_ENABLE_CAP: {
947                 struct kvm_enable_cap cap;
948                 r = -EFAULT;
949                 if (copy_from_user(&cap, argp, sizeof(cap)))
950                         break;
951                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
952                 break;
953         }
954         case KVM_CREATE_IRQCHIP: {
955                 struct kvm_irq_routing_entry routing;
956
957                 r = -EINVAL;
958                 if (kvm->arch.use_irqchip) {
959                         /* Set up dummy routing. */
960                         memset(&routing, 0, sizeof(routing));
961                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
962                 }
963                 break;
964         }
965         case KVM_SET_DEVICE_ATTR: {
966                 r = -EFAULT;
967                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
968                         break;
969                 r = kvm_s390_vm_set_attr(kvm, &attr);
970                 break;
971         }
972         case KVM_GET_DEVICE_ATTR: {
973                 r = -EFAULT;
974                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
975                         break;
976                 r = kvm_s390_vm_get_attr(kvm, &attr);
977                 break;
978         }
979         case KVM_HAS_DEVICE_ATTR: {
980                 r = -EFAULT;
981                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
982                         break;
983                 r = kvm_s390_vm_has_attr(kvm, &attr);
984                 break;
985         }
986         case KVM_S390_GET_SKEYS: {
987                 struct kvm_s390_skeys args;
988
989                 r = -EFAULT;
990                 if (copy_from_user(&args, argp,
991                                    sizeof(struct kvm_s390_skeys)))
992                         break;
993                 r = kvm_s390_get_skeys(kvm, &args);
994                 break;
995         }
996         case KVM_S390_SET_SKEYS: {
997                 struct kvm_s390_skeys args;
998
999                 r = -EFAULT;
1000                 if (copy_from_user(&args, argp,
1001                                    sizeof(struct kvm_s390_skeys)))
1002                         break;
1003                 r = kvm_s390_set_skeys(kvm, &args);
1004                 break;
1005         }
1006         default:
1007                 r = -ENOTTY;
1008         }
1009
1010         return r;
1011 }
1012
1013 static int kvm_s390_query_ap_config(u8 *config)
1014 {
1015         u32 fcn_code = 0x04000000UL;
1016         u32 cc = 0;
1017
1018         memset(config, 0, 128);
1019         asm volatile(
1020                 "lgr 0,%1\n"
1021                 "lgr 2,%2\n"
1022                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1023                 "0: ipm %0\n"
1024                 "srl %0,28\n"
1025                 "1:\n"
1026                 EX_TABLE(0b, 1b)
1027                 : "+r" (cc)
1028                 : "r" (fcn_code), "r" (config)
1029                 : "cc", "0", "2", "memory"
1030         );
1031
1032         return cc;
1033 }
1034
1035 static int kvm_s390_apxa_installed(void)
1036 {
1037         u8 config[128];
1038         int cc;
1039
1040         if (test_facility(12)) {
1041                 cc = kvm_s390_query_ap_config(config);
1042
1043                 if (cc)
1044                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1045                 else
1046                         return config[0] & 0x40;
1047         }
1048
1049         return 0;
1050 }
1051
1052 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1053 {
1054         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1055
1056         if (kvm_s390_apxa_installed())
1057                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1058         else
1059                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1060 }
1061
1062 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1063 {
1064         get_cpu_id(cpu_id);
1065         cpu_id->version = 0xff;
1066 }
1067
1068 static int kvm_s390_crypto_init(struct kvm *kvm)
1069 {
1070         if (!test_kvm_facility(kvm, 76))
1071                 return 0;
1072
1073         kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
1074                                          GFP_KERNEL | GFP_DMA);
1075         if (!kvm->arch.crypto.crycb)
1076                 return -ENOMEM;
1077
1078         kvm_s390_set_crycb_format(kvm);
1079
1080         /* Enable AES/DEA protected key functions by default */
1081         kvm->arch.crypto.aes_kw = 1;
1082         kvm->arch.crypto.dea_kw = 1;
1083         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1084                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1085         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1086                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1087
1088         return 0;
1089 }
1090
1091 static void sca_dispose(struct kvm *kvm)
1092 {
1093         if (kvm->arch.use_esca)
1094                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1095         else
1096                 free_page((unsigned long)(kvm->arch.sca));
1097         kvm->arch.sca = NULL;
1098 }
1099
1100 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1101 {
1102         int i, rc;
1103         char debug_name[16];
1104         static unsigned long sca_offset;
1105
1106         rc = -EINVAL;
1107 #ifdef CONFIG_KVM_S390_UCONTROL
1108         if (type & ~KVM_VM_S390_UCONTROL)
1109                 goto out_err;
1110         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1111                 goto out_err;
1112 #else
1113         if (type)
1114                 goto out_err;
1115 #endif
1116
1117         rc = s390_enable_sie();
1118         if (rc)
1119                 goto out_err;
1120
1121         rc = -ENOMEM;
1122
1123         kvm->arch.use_esca = 0; /* start with basic SCA */
1124         rwlock_init(&kvm->arch.sca_lock);
1125         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL);
1126         if (!kvm->arch.sca)
1127                 goto out_err;
1128         spin_lock(&kvm_lock);
1129         sca_offset += 16;
1130         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1131                 sca_offset = 0;
1132         kvm->arch.sca = (struct bsca_block *)
1133                         ((char *) kvm->arch.sca + sca_offset);
1134         spin_unlock(&kvm_lock);
1135
1136         sprintf(debug_name, "kvm-%u", current->pid);
1137
1138         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1139         if (!kvm->arch.dbf)
1140                 goto out_err;
1141
1142         /*
1143          * The architectural maximum amount of facilities is 16 kbit. To store
1144          * this amount, 2 kbyte of memory is required. Thus we need a full
1145          * page to hold the guest facility list (arch.model.fac->list) and the
1146          * facility mask (arch.model.fac->mask). Its address size has to be
1147          * 31 bits and word aligned.
1148          */
1149         kvm->arch.model.fac =
1150                 (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1151         if (!kvm->arch.model.fac)
1152                 goto out_err;
1153
1154         /* Populate the facility mask initially. */
1155         memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1156                S390_ARCH_FAC_LIST_SIZE_BYTE);
1157         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1158                 if (i < kvm_s390_fac_list_mask_size())
1159                         kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1160                 else
1161                         kvm->arch.model.fac->mask[i] = 0UL;
1162         }
1163
1164         /* Populate the facility list initially. */
1165         memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1166                S390_ARCH_FAC_LIST_SIZE_BYTE);
1167
1168         kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1169         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1170
1171         if (kvm_s390_crypto_init(kvm) < 0)
1172                 goto out_err;
1173
1174         spin_lock_init(&kvm->arch.float_int.lock);
1175         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1176                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1177         init_waitqueue_head(&kvm->arch.ipte_wq);
1178         mutex_init(&kvm->arch.ipte_mutex);
1179
1180         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1181         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1182
1183         if (type & KVM_VM_S390_UCONTROL) {
1184                 kvm->arch.gmap = NULL;
1185                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1186         } else {
1187                 kvm->arch.mem_limit = TASK_MAX_SIZE;
1188                 kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1);
1189                 if (!kvm->arch.gmap)
1190                         goto out_err;
1191                 kvm->arch.gmap->private = kvm;
1192                 kvm->arch.gmap->pfault_enabled = 0;
1193         }
1194
1195         kvm->arch.css_support = 0;
1196         kvm->arch.use_irqchip = 0;
1197         kvm->arch.epoch = 0;
1198
1199         spin_lock_init(&kvm->arch.start_stop_lock);
1200         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1201
1202         return 0;
1203 out_err:
1204         kfree(kvm->arch.crypto.crycb);
1205         free_page((unsigned long)kvm->arch.model.fac);
1206         debug_unregister(kvm->arch.dbf);
1207         sca_dispose(kvm);
1208         KVM_EVENT(3, "creation of vm failed: %d", rc);
1209         return rc;
1210 }
1211
1212 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1213 {
1214         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1215         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1216         kvm_s390_clear_local_irqs(vcpu);
1217         kvm_clear_async_pf_completion_queue(vcpu);
1218         if (!kvm_is_ucontrol(vcpu->kvm))
1219                 sca_del_vcpu(vcpu);
1220         smp_mb();
1221
1222         if (kvm_is_ucontrol(vcpu->kvm))
1223                 gmap_free(vcpu->arch.gmap);
1224
1225         if (vcpu->kvm->arch.use_cmma)
1226                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1227         free_page((unsigned long)(vcpu->arch.sie_block));
1228
1229         kvm_vcpu_uninit(vcpu);
1230         kmem_cache_free(kvm_vcpu_cache, vcpu);
1231 }
1232
1233 static void kvm_free_vcpus(struct kvm *kvm)
1234 {
1235         unsigned int i;
1236         struct kvm_vcpu *vcpu;
1237
1238         kvm_for_each_vcpu(i, vcpu, kvm)
1239                 kvm_arch_vcpu_destroy(vcpu);
1240
1241         mutex_lock(&kvm->lock);
1242         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1243                 kvm->vcpus[i] = NULL;
1244
1245         atomic_set(&kvm->online_vcpus, 0);
1246         mutex_unlock(&kvm->lock);
1247 }
1248
1249 void kvm_arch_destroy_vm(struct kvm *kvm)
1250 {
1251         kvm_free_vcpus(kvm);
1252         free_page((unsigned long)kvm->arch.model.fac);
1253         sca_dispose(kvm);
1254         debug_unregister(kvm->arch.dbf);
1255         kfree(kvm->arch.crypto.crycb);
1256         if (!kvm_is_ucontrol(kvm))
1257                 gmap_free(kvm->arch.gmap);
1258         kvm_s390_destroy_adapters(kvm);
1259         kvm_s390_clear_float_irqs(kvm);
1260         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1261 }
1262
1263 /* Section: vcpu related */
1264 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1265 {
1266         vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1267         if (!vcpu->arch.gmap)
1268                 return -ENOMEM;
1269         vcpu->arch.gmap->private = vcpu->kvm;
1270
1271         return 0;
1272 }
1273
1274 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1275 {
1276         read_lock(&vcpu->kvm->arch.sca_lock);
1277         if (vcpu->kvm->arch.use_esca) {
1278                 struct esca_block *sca = vcpu->kvm->arch.sca;
1279
1280                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1281                 sca->cpu[vcpu->vcpu_id].sda = 0;
1282         } else {
1283                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1284
1285                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1286                 sca->cpu[vcpu->vcpu_id].sda = 0;
1287         }
1288         read_unlock(&vcpu->kvm->arch.sca_lock);
1289 }
1290
1291 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1292 {
1293         read_lock(&vcpu->kvm->arch.sca_lock);
1294         if (vcpu->kvm->arch.use_esca) {
1295                 struct esca_block *sca = vcpu->kvm->arch.sca;
1296
1297                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1298                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1299                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1300                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1301                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1302         } else {
1303                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1304
1305                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1306                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1307                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1308                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1309         }
1310         read_unlock(&vcpu->kvm->arch.sca_lock);
1311 }
1312
1313 /* Basic SCA to Extended SCA data copy routines */
1314 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1315 {
1316         d->sda = s->sda;
1317         d->sigp_ctrl.c = s->sigp_ctrl.c;
1318         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1319 }
1320
1321 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1322 {
1323         int i;
1324
1325         d->ipte_control = s->ipte_control;
1326         d->mcn[0] = s->mcn;
1327         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1328                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1329 }
1330
1331 static int sca_switch_to_extended(struct kvm *kvm)
1332 {
1333         struct bsca_block *old_sca = kvm->arch.sca;
1334         struct esca_block *new_sca;
1335         struct kvm_vcpu *vcpu;
1336         unsigned int vcpu_idx;
1337         u32 scaol, scaoh;
1338
1339         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1340         if (!new_sca)
1341                 return -ENOMEM;
1342
1343         scaoh = (u32)((u64)(new_sca) >> 32);
1344         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1345
1346         kvm_s390_vcpu_block_all(kvm);
1347         write_lock(&kvm->arch.sca_lock);
1348
1349         sca_copy_b_to_e(new_sca, old_sca);
1350
1351         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1352                 vcpu->arch.sie_block->scaoh = scaoh;
1353                 vcpu->arch.sie_block->scaol = scaol;
1354                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1355         }
1356         kvm->arch.sca = new_sca;
1357         kvm->arch.use_esca = 1;
1358
1359         write_unlock(&kvm->arch.sca_lock);
1360         kvm_s390_vcpu_unblock_all(kvm);
1361
1362         free_page((unsigned long)old_sca);
1363
1364         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1365                  old_sca, kvm->arch.sca);
1366         return 0;
1367 }
1368
1369 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1370 {
1371         int rc;
1372
1373         if (id < KVM_S390_BSCA_CPU_SLOTS)
1374                 return true;
1375         if (!sclp.has_esca)
1376                 return false;
1377
1378         mutex_lock(&kvm->lock);
1379         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1380         mutex_unlock(&kvm->lock);
1381
1382         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1383 }
1384
1385 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1386 {
1387         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1388         kvm_clear_async_pf_completion_queue(vcpu);
1389         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1390                                     KVM_SYNC_GPRS |
1391                                     KVM_SYNC_ACRS |
1392                                     KVM_SYNC_CRS |
1393                                     KVM_SYNC_ARCH0 |
1394                                     KVM_SYNC_PFAULT;
1395         if (test_kvm_facility(vcpu->kvm, 129))
1396                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1397
1398         if (kvm_is_ucontrol(vcpu->kvm))
1399                 return __kvm_ucontrol_vcpu_init(vcpu);
1400
1401         return 0;
1402 }
1403
1404 /*
1405  * Backs up the current FP/VX register save area on a particular
1406  * destination.  Used to switch between different register save
1407  * areas.
1408  */
1409 static inline void save_fpu_to(struct fpu *dst)
1410 {
1411         dst->fpc = current->thread.fpu.fpc;
1412         dst->regs = current->thread.fpu.regs;
1413 }
1414
1415 /*
1416  * Switches the FP/VX register save area from which to lazy
1417  * restore register contents.
1418  */
1419 static inline void load_fpu_from(struct fpu *from)
1420 {
1421         current->thread.fpu.fpc = from->fpc;
1422         current->thread.fpu.regs = from->regs;
1423 }
1424
1425 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1426 {
1427         /* Save host register state */
1428         save_fpu_regs();
1429         save_fpu_to(&vcpu->arch.host_fpregs);
1430
1431         if (test_kvm_facility(vcpu->kvm, 129)) {
1432                 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1433                 /*
1434                  * Use the register save area in the SIE-control block
1435                  * for register restore and save in kvm_arch_vcpu_put()
1436                  */
1437                 current->thread.fpu.vxrs =
1438                         (__vector128 *)&vcpu->run->s.regs.vrs;
1439         } else
1440                 load_fpu_from(&vcpu->arch.guest_fpregs);
1441
1442         if (test_fp_ctl(current->thread.fpu.fpc))
1443                 /* User space provided an invalid FPC, let's clear it */
1444                 current->thread.fpu.fpc = 0;
1445
1446         save_access_regs(vcpu->arch.host_acrs);
1447         restore_access_regs(vcpu->run->s.regs.acrs);
1448         gmap_enable(vcpu->arch.gmap);
1449         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1450 }
1451
1452 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1453 {
1454         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1455         gmap_disable(vcpu->arch.gmap);
1456
1457         save_fpu_regs();
1458
1459         if (test_kvm_facility(vcpu->kvm, 129))
1460                 /*
1461                  * kvm_arch_vcpu_load() set up the register save area to
1462                  * the &vcpu->run->s.regs.vrs and, thus, the vector registers
1463                  * are already saved.  Only the floating-point control must be
1464                  * copied.
1465                  */
1466                 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1467         else
1468                 save_fpu_to(&vcpu->arch.guest_fpregs);
1469         load_fpu_from(&vcpu->arch.host_fpregs);
1470
1471         save_access_regs(vcpu->run->s.regs.acrs);
1472         restore_access_regs(vcpu->arch.host_acrs);
1473 }
1474
1475 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1476 {
1477         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1478         vcpu->arch.sie_block->gpsw.mask = 0UL;
1479         vcpu->arch.sie_block->gpsw.addr = 0UL;
1480         kvm_s390_set_prefix(vcpu, 0);
1481         vcpu->arch.sie_block->cputm     = 0UL;
1482         vcpu->arch.sie_block->ckc       = 0UL;
1483         vcpu->arch.sie_block->todpr     = 0;
1484         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1485         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1486         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1487         vcpu->arch.guest_fpregs.fpc = 0;
1488         asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
1489         vcpu->arch.sie_block->gbea = 1;
1490         vcpu->arch.sie_block->pp = 0;
1491         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1492         kvm_clear_async_pf_completion_queue(vcpu);
1493         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1494                 kvm_s390_vcpu_stop(vcpu);
1495         kvm_s390_clear_local_irqs(vcpu);
1496 }
1497
1498 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1499 {
1500         mutex_lock(&vcpu->kvm->lock);
1501         preempt_disable();
1502         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1503         preempt_enable();
1504         mutex_unlock(&vcpu->kvm->lock);
1505         if (!kvm_is_ucontrol(vcpu->kvm)) {
1506                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1507                 sca_add_vcpu(vcpu);
1508         }
1509
1510 }
1511
1512 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1513 {
1514         if (!test_kvm_facility(vcpu->kvm, 76))
1515                 return;
1516
1517         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1518
1519         if (vcpu->kvm->arch.crypto.aes_kw)
1520                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1521         if (vcpu->kvm->arch.crypto.dea_kw)
1522                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1523
1524         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1525 }
1526
1527 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1528 {
1529         free_page(vcpu->arch.sie_block->cbrlo);
1530         vcpu->arch.sie_block->cbrlo = 0;
1531 }
1532
1533 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1534 {
1535         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1536         if (!vcpu->arch.sie_block->cbrlo)
1537                 return -ENOMEM;
1538
1539         vcpu->arch.sie_block->ecb2 |= 0x80;
1540         vcpu->arch.sie_block->ecb2 &= ~0x08;
1541         return 0;
1542 }
1543
1544 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1545 {
1546         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1547
1548         vcpu->arch.cpu_id = model->cpu_id;
1549         vcpu->arch.sie_block->ibc = model->ibc;
1550         vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1551 }
1552
1553 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1554 {
1555         int rc = 0;
1556
1557         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1558                                                     CPUSTAT_SM |
1559                                                     CPUSTAT_STOPPED);
1560
1561         if (test_kvm_facility(vcpu->kvm, 78))
1562                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1563         else if (test_kvm_facility(vcpu->kvm, 8))
1564                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1565
1566         kvm_s390_vcpu_setup_model(vcpu);
1567
1568         vcpu->arch.sie_block->ecb   = 6;
1569         if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1570                 vcpu->arch.sie_block->ecb |= 0x10;
1571
1572         vcpu->arch.sie_block->ecb2  = 8;
1573         vcpu->arch.sie_block->eca   = 0xC1002000U;
1574         if (sclp.has_siif)
1575                 vcpu->arch.sie_block->eca |= 1;
1576         if (sclp.has_sigpif)
1577                 vcpu->arch.sie_block->eca |= 0x10000000U;
1578         if (test_kvm_facility(vcpu->kvm, 129)) {
1579                 vcpu->arch.sie_block->eca |= 0x00020000;
1580                 vcpu->arch.sie_block->ecd |= 0x20000000;
1581         }
1582         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1583
1584         if (vcpu->kvm->arch.use_cmma) {
1585                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1586                 if (rc)
1587                         return rc;
1588         }
1589         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1590         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1591
1592         kvm_s390_vcpu_crypto_setup(vcpu);
1593
1594         return rc;
1595 }
1596
1597 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1598                                       unsigned int id)
1599 {
1600         struct kvm_vcpu *vcpu;
1601         struct sie_page *sie_page;
1602         int rc = -EINVAL;
1603
1604         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1605                 goto out;
1606
1607         rc = -ENOMEM;
1608
1609         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1610         if (!vcpu)
1611                 goto out;
1612
1613         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1614         if (!sie_page)
1615                 goto out_free_cpu;
1616
1617         vcpu->arch.sie_block = &sie_page->sie_block;
1618         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1619
1620         vcpu->arch.sie_block->icpua = id;
1621         spin_lock_init(&vcpu->arch.local_int.lock);
1622         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1623         vcpu->arch.local_int.wq = &vcpu->wq;
1624         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1625
1626         /*
1627          * Allocate a save area for floating-point registers.  If the vector
1628          * extension is available, register contents are saved in the SIE
1629          * control block.  The allocated save area is still required in
1630          * particular places, for example, in kvm_s390_vcpu_store_status().
1631          */
1632         vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
1633                                                GFP_KERNEL);
1634         if (!vcpu->arch.guest_fpregs.fprs)
1635                 goto out_free_sie_block;
1636
1637         rc = kvm_vcpu_init(vcpu, kvm, id);
1638         if (rc)
1639                 goto out_free_sie_block;
1640         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
1641                  vcpu->arch.sie_block);
1642         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1643
1644         return vcpu;
1645 out_free_sie_block:
1646         free_page((unsigned long)(vcpu->arch.sie_block));
1647 out_free_cpu:
1648         kmem_cache_free(kvm_vcpu_cache, vcpu);
1649 out:
1650         return ERR_PTR(rc);
1651 }
1652
1653 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1654 {
1655         return kvm_s390_vcpu_has_irq(vcpu, 0);
1656 }
1657
1658 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1659 {
1660         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1661         exit_sie(vcpu);
1662 }
1663
1664 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1665 {
1666         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1667 }
1668
1669 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1670 {
1671         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1672         exit_sie(vcpu);
1673 }
1674
1675 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1676 {
1677         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1678 }
1679
1680 /*
1681  * Kick a guest cpu out of SIE and wait until SIE is not running.
1682  * If the CPU is not running (e.g. waiting as idle) the function will
1683  * return immediately. */
1684 void exit_sie(struct kvm_vcpu *vcpu)
1685 {
1686         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1687         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1688                 cpu_relax();
1689 }
1690
1691 /* Kick a guest cpu out of SIE to process a request synchronously */
1692 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1693 {
1694         kvm_make_request(req, vcpu);
1695         kvm_s390_vcpu_request(vcpu);
1696 }
1697
1698 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1699 {
1700         int i;
1701         struct kvm *kvm = gmap->private;
1702         struct kvm_vcpu *vcpu;
1703
1704         kvm_for_each_vcpu(i, vcpu, kvm) {
1705                 /* match against both prefix pages */
1706                 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1707                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1708                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1709                 }
1710         }
1711 }
1712
1713 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1714 {
1715         /* kvm common code refers to this, but never calls it */
1716         BUG();
1717         return 0;
1718 }
1719
1720 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1721                                            struct kvm_one_reg *reg)
1722 {
1723         int r = -EINVAL;
1724
1725         switch (reg->id) {
1726         case KVM_REG_S390_TODPR:
1727                 r = put_user(vcpu->arch.sie_block->todpr,
1728                              (u32 __user *)reg->addr);
1729                 break;
1730         case KVM_REG_S390_EPOCHDIFF:
1731                 r = put_user(vcpu->arch.sie_block->epoch,
1732                              (u64 __user *)reg->addr);
1733                 break;
1734         case KVM_REG_S390_CPU_TIMER:
1735                 r = put_user(vcpu->arch.sie_block->cputm,
1736                              (u64 __user *)reg->addr);
1737                 break;
1738         case KVM_REG_S390_CLOCK_COMP:
1739                 r = put_user(vcpu->arch.sie_block->ckc,
1740                              (u64 __user *)reg->addr);
1741                 break;
1742         case KVM_REG_S390_PFTOKEN:
1743                 r = put_user(vcpu->arch.pfault_token,
1744                              (u64 __user *)reg->addr);
1745                 break;
1746         case KVM_REG_S390_PFCOMPARE:
1747                 r = put_user(vcpu->arch.pfault_compare,
1748                              (u64 __user *)reg->addr);
1749                 break;
1750         case KVM_REG_S390_PFSELECT:
1751                 r = put_user(vcpu->arch.pfault_select,
1752                              (u64 __user *)reg->addr);
1753                 break;
1754         case KVM_REG_S390_PP:
1755                 r = put_user(vcpu->arch.sie_block->pp,
1756                              (u64 __user *)reg->addr);
1757                 break;
1758         case KVM_REG_S390_GBEA:
1759                 r = put_user(vcpu->arch.sie_block->gbea,
1760                              (u64 __user *)reg->addr);
1761                 break;
1762         default:
1763                 break;
1764         }
1765
1766         return r;
1767 }
1768
1769 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1770                                            struct kvm_one_reg *reg)
1771 {
1772         int r = -EINVAL;
1773
1774         switch (reg->id) {
1775         case KVM_REG_S390_TODPR:
1776                 r = get_user(vcpu->arch.sie_block->todpr,
1777                              (u32 __user *)reg->addr);
1778                 break;
1779         case KVM_REG_S390_EPOCHDIFF:
1780                 r = get_user(vcpu->arch.sie_block->epoch,
1781                              (u64 __user *)reg->addr);
1782                 break;
1783         case KVM_REG_S390_CPU_TIMER:
1784                 r = get_user(vcpu->arch.sie_block->cputm,
1785                              (u64 __user *)reg->addr);
1786                 break;
1787         case KVM_REG_S390_CLOCK_COMP:
1788                 r = get_user(vcpu->arch.sie_block->ckc,
1789                              (u64 __user *)reg->addr);
1790                 break;
1791         case KVM_REG_S390_PFTOKEN:
1792                 r = get_user(vcpu->arch.pfault_token,
1793                              (u64 __user *)reg->addr);
1794                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1795                         kvm_clear_async_pf_completion_queue(vcpu);
1796                 break;
1797         case KVM_REG_S390_PFCOMPARE:
1798                 r = get_user(vcpu->arch.pfault_compare,
1799                              (u64 __user *)reg->addr);
1800                 break;
1801         case KVM_REG_S390_PFSELECT:
1802                 r = get_user(vcpu->arch.pfault_select,
1803                              (u64 __user *)reg->addr);
1804                 break;
1805         case KVM_REG_S390_PP:
1806                 r = get_user(vcpu->arch.sie_block->pp,
1807                              (u64 __user *)reg->addr);
1808                 break;
1809         case KVM_REG_S390_GBEA:
1810                 r = get_user(vcpu->arch.sie_block->gbea,
1811                              (u64 __user *)reg->addr);
1812                 break;
1813         default:
1814                 break;
1815         }
1816
1817         return r;
1818 }
1819
1820 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1821 {
1822         kvm_s390_vcpu_initial_reset(vcpu);
1823         return 0;
1824 }
1825
1826 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1827 {
1828         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
1829         return 0;
1830 }
1831
1832 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1833 {
1834         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1835         return 0;
1836 }
1837
1838 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1839                                   struct kvm_sregs *sregs)
1840 {
1841         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1842         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1843         restore_access_regs(vcpu->run->s.regs.acrs);
1844         return 0;
1845 }
1846
1847 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1848                                   struct kvm_sregs *sregs)
1849 {
1850         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1851         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1852         return 0;
1853 }
1854
1855 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1856 {
1857         if (test_fp_ctl(fpu->fpc))
1858                 return -EINVAL;
1859         memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
1860         vcpu->arch.guest_fpregs.fpc = fpu->fpc;
1861         save_fpu_regs();
1862         load_fpu_from(&vcpu->arch.guest_fpregs);
1863         return 0;
1864 }
1865
1866 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1867 {
1868         memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
1869         fpu->fpc = vcpu->arch.guest_fpregs.fpc;
1870         return 0;
1871 }
1872
1873 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1874 {
1875         int rc = 0;
1876
1877         if (!is_vcpu_stopped(vcpu))
1878                 rc = -EBUSY;
1879         else {
1880                 vcpu->run->psw_mask = psw.mask;
1881                 vcpu->run->psw_addr = psw.addr;
1882         }
1883         return rc;
1884 }
1885
1886 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1887                                   struct kvm_translation *tr)
1888 {
1889         return -EINVAL; /* not implemented yet */
1890 }
1891
1892 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1893                               KVM_GUESTDBG_USE_HW_BP | \
1894                               KVM_GUESTDBG_ENABLE)
1895
1896 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1897                                         struct kvm_guest_debug *dbg)
1898 {
1899         int rc = 0;
1900
1901         vcpu->guest_debug = 0;
1902         kvm_s390_clear_bp_data(vcpu);
1903
1904         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1905                 return -EINVAL;
1906
1907         if (dbg->control & KVM_GUESTDBG_ENABLE) {
1908                 vcpu->guest_debug = dbg->control;
1909                 /* enforce guest PER */
1910                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1911
1912                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1913                         rc = kvm_s390_import_bp_data(vcpu, dbg);
1914         } else {
1915                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1916                 vcpu->arch.guestdbg.last_bp = 0;
1917         }
1918
1919         if (rc) {
1920                 vcpu->guest_debug = 0;
1921                 kvm_s390_clear_bp_data(vcpu);
1922                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1923         }
1924
1925         return rc;
1926 }
1927
1928 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1929                                     struct kvm_mp_state *mp_state)
1930 {
1931         /* CHECK_STOP and LOAD are not supported yet */
1932         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1933                                        KVM_MP_STATE_OPERATING;
1934 }
1935
1936 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1937                                     struct kvm_mp_state *mp_state)
1938 {
1939         int rc = 0;
1940
1941         /* user space knows about this interface - let it control the state */
1942         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1943
1944         switch (mp_state->mp_state) {
1945         case KVM_MP_STATE_STOPPED:
1946                 kvm_s390_vcpu_stop(vcpu);
1947                 break;
1948         case KVM_MP_STATE_OPERATING:
1949                 kvm_s390_vcpu_start(vcpu);
1950                 break;
1951         case KVM_MP_STATE_LOAD:
1952         case KVM_MP_STATE_CHECK_STOP:
1953                 /* fall through - CHECK_STOP and LOAD are not supported yet */
1954         default:
1955                 rc = -ENXIO;
1956         }
1957
1958         return rc;
1959 }
1960
1961 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1962 {
1963         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1964 }
1965
1966 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1967 {
1968 retry:
1969         kvm_s390_vcpu_request_handled(vcpu);
1970         if (!vcpu->requests)
1971                 return 0;
1972         /*
1973          * We use MMU_RELOAD just to re-arm the ipte notifier for the
1974          * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1975          * This ensures that the ipte instruction for this request has
1976          * already finished. We might race against a second unmapper that
1977          * wants to set the blocking bit. Lets just retry the request loop.
1978          */
1979         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1980                 int rc;
1981                 rc = gmap_ipte_notify(vcpu->arch.gmap,
1982                                       kvm_s390_get_prefix(vcpu),
1983                                       PAGE_SIZE * 2);
1984                 if (rc)
1985                         return rc;
1986                 goto retry;
1987         }
1988
1989         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1990                 vcpu->arch.sie_block->ihcpu = 0xffff;
1991                 goto retry;
1992         }
1993
1994         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1995                 if (!ibs_enabled(vcpu)) {
1996                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1997                         atomic_or(CPUSTAT_IBS,
1998                                         &vcpu->arch.sie_block->cpuflags);
1999                 }
2000                 goto retry;
2001         }
2002
2003         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2004                 if (ibs_enabled(vcpu)) {
2005                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2006                         atomic_andnot(CPUSTAT_IBS,
2007                                           &vcpu->arch.sie_block->cpuflags);
2008                 }
2009                 goto retry;
2010         }
2011
2012         /* nothing to do, just clear the request */
2013         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2014
2015         return 0;
2016 }
2017
2018 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2019 {
2020         struct kvm_vcpu *vcpu;
2021         int i;
2022
2023         mutex_lock(&kvm->lock);
2024         preempt_disable();
2025         kvm->arch.epoch = tod - get_tod_clock();
2026         kvm_s390_vcpu_block_all(kvm);
2027         kvm_for_each_vcpu(i, vcpu, kvm)
2028                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2029         kvm_s390_vcpu_unblock_all(kvm);
2030         preempt_enable();
2031         mutex_unlock(&kvm->lock);
2032 }
2033
2034 /**
2035  * kvm_arch_fault_in_page - fault-in guest page if necessary
2036  * @vcpu: The corresponding virtual cpu
2037  * @gpa: Guest physical address
2038  * @writable: Whether the page should be writable or not
2039  *
2040  * Make sure that a guest page has been faulted-in on the host.
2041  *
2042  * Return: Zero on success, negative error code otherwise.
2043  */
2044 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2045 {
2046         return gmap_fault(vcpu->arch.gmap, gpa,
2047                           writable ? FAULT_FLAG_WRITE : 0);
2048 }
2049
2050 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2051                                       unsigned long token)
2052 {
2053         struct kvm_s390_interrupt inti;
2054         struct kvm_s390_irq irq;
2055
2056         if (start_token) {
2057                 irq.u.ext.ext_params2 = token;
2058                 irq.type = KVM_S390_INT_PFAULT_INIT;
2059                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2060         } else {
2061                 inti.type = KVM_S390_INT_PFAULT_DONE;
2062                 inti.parm64 = token;
2063                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2064         }
2065 }
2066
2067 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2068                                      struct kvm_async_pf *work)
2069 {
2070         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2071         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2072 }
2073
2074 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2075                                  struct kvm_async_pf *work)
2076 {
2077         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2078         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2079 }
2080
2081 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2082                                struct kvm_async_pf *work)
2083 {
2084         /* s390 will always inject the page directly */
2085 }
2086
2087 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2088 {
2089         /*
2090          * s390 will always inject the page directly,
2091          * but we still want check_async_completion to cleanup
2092          */
2093         return true;
2094 }
2095
2096 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2097 {
2098         hva_t hva;
2099         struct kvm_arch_async_pf arch;
2100         int rc;
2101
2102         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2103                 return 0;
2104         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2105             vcpu->arch.pfault_compare)
2106                 return 0;
2107         if (psw_extint_disabled(vcpu))
2108                 return 0;
2109         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2110                 return 0;
2111         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2112                 return 0;
2113         if (!vcpu->arch.gmap->pfault_enabled)
2114                 return 0;
2115
2116         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2117         hva += current->thread.gmap_addr & ~PAGE_MASK;
2118         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2119                 return 0;
2120
2121         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2122         return rc;
2123 }
2124
2125 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2126 {
2127         int rc, cpuflags;
2128
2129         /*
2130          * On s390 notifications for arriving pages will be delivered directly
2131          * to the guest but the house keeping for completed pfaults is
2132          * handled outside the worker.
2133          */
2134         kvm_check_async_pf_completion(vcpu);
2135
2136         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2137         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2138
2139         if (need_resched())
2140                 schedule();
2141
2142         if (test_cpu_flag(CIF_MCCK_PENDING))
2143                 s390_handle_mcck();
2144
2145         if (!kvm_is_ucontrol(vcpu->kvm)) {
2146                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2147                 if (rc)
2148                         return rc;
2149         }
2150
2151         rc = kvm_s390_handle_requests(vcpu);
2152         if (rc)
2153                 return rc;
2154
2155         if (guestdbg_enabled(vcpu)) {
2156                 kvm_s390_backup_guest_per_regs(vcpu);
2157                 kvm_s390_patch_guest_per_regs(vcpu);
2158         }
2159
2160         vcpu->arch.sie_block->icptcode = 0;
2161         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2162         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2163         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2164
2165         return 0;
2166 }
2167
2168 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2169 {
2170         psw_t *psw = &vcpu->arch.sie_block->gpsw;
2171         u8 opcode;
2172         int rc;
2173
2174         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2175         trace_kvm_s390_sie_fault(vcpu);
2176
2177         /*
2178          * We want to inject an addressing exception, which is defined as a
2179          * suppressing or terminating exception. However, since we came here
2180          * by a DAT access exception, the PSW still points to the faulting
2181          * instruction since DAT exceptions are nullifying. So we've got
2182          * to look up the current opcode to get the length of the instruction
2183          * to be able to forward the PSW.
2184          */
2185         rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
2186         if (rc)
2187                 return kvm_s390_inject_prog_cond(vcpu, rc);
2188         psw->addr = __rewind_psw(*psw, -insn_length(opcode));
2189
2190         return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
2191 }
2192
2193 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2194 {
2195         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2196                    vcpu->arch.sie_block->icptcode);
2197         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2198
2199         if (guestdbg_enabled(vcpu))
2200                 kvm_s390_restore_guest_per_regs(vcpu);
2201
2202         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2203         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2204
2205         if (vcpu->arch.sie_block->icptcode > 0) {
2206                 int rc = kvm_handle_sie_intercept(vcpu);
2207
2208                 if (rc != -EOPNOTSUPP)
2209                         return rc;
2210                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2211                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2212                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2213                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2214                 return -EREMOTE;
2215         } else if (exit_reason != -EFAULT) {
2216                 vcpu->stat.exit_null++;
2217                 return 0;
2218         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2219                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2220                 vcpu->run->s390_ucontrol.trans_exc_code =
2221                                                 current->thread.gmap_addr;
2222                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2223                 return -EREMOTE;
2224         } else if (current->thread.gmap_pfault) {
2225                 trace_kvm_s390_major_guest_pfault(vcpu);
2226                 current->thread.gmap_pfault = 0;
2227                 if (kvm_arch_setup_async_pf(vcpu))
2228                         return 0;
2229                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2230         }
2231         return vcpu_post_run_fault_in_sie(vcpu);
2232 }
2233
2234 static int __vcpu_run(struct kvm_vcpu *vcpu)
2235 {
2236         int rc, exit_reason;
2237
2238         /*
2239          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2240          * ning the guest), so that memslots (and other stuff) are protected
2241          */
2242         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2243
2244         do {
2245                 rc = vcpu_pre_run(vcpu);
2246                 if (rc)
2247                         break;
2248
2249                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2250                 /*
2251                  * As PF_VCPU will be used in fault handler, between
2252                  * guest_enter and guest_exit should be no uaccess.
2253                  */
2254                 local_irq_disable();
2255                 __kvm_guest_enter();
2256                 local_irq_enable();
2257                 exit_reason = sie64a(vcpu->arch.sie_block,
2258                                      vcpu->run->s.regs.gprs);
2259                 local_irq_disable();
2260                 __kvm_guest_exit();
2261                 local_irq_enable();
2262                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2263
2264                 rc = vcpu_post_run(vcpu, exit_reason);
2265         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2266
2267         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2268         return rc;
2269 }
2270
2271 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2272 {
2273         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2274         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2275         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2276                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2277         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2278                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2279                 /* some control register changes require a tlb flush */
2280                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2281         }
2282         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2283                 vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2284                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2285                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2286                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2287                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2288         }
2289         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2290                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2291                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2292                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2293                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2294                         kvm_clear_async_pf_completion_queue(vcpu);
2295         }
2296         kvm_run->kvm_dirty_regs = 0;
2297 }
2298
2299 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2300 {
2301         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2302         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2303         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2304         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2305         kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2306         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2307         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2308         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2309         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2310         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2311         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2312         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2313 }
2314
2315 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2316 {
2317         int rc;
2318         sigset_t sigsaved;
2319
2320         if (guestdbg_exit_pending(vcpu)) {
2321                 kvm_s390_prepare_debug_exit(vcpu);
2322                 return 0;
2323         }
2324
2325         if (vcpu->sigset_active)
2326                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2327
2328         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2329                 kvm_s390_vcpu_start(vcpu);
2330         } else if (is_vcpu_stopped(vcpu)) {
2331                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2332                                    vcpu->vcpu_id);
2333                 return -EINVAL;
2334         }
2335
2336         sync_regs(vcpu, kvm_run);
2337
2338         might_fault();
2339         rc = __vcpu_run(vcpu);
2340
2341         if (signal_pending(current) && !rc) {
2342                 kvm_run->exit_reason = KVM_EXIT_INTR;
2343                 rc = -EINTR;
2344         }
2345
2346         if (guestdbg_exit_pending(vcpu) && !rc)  {
2347                 kvm_s390_prepare_debug_exit(vcpu);
2348                 rc = 0;
2349         }
2350
2351         if (rc == -EREMOTE) {
2352                 /* userspace support is needed, kvm_run has been prepared */
2353                 rc = 0;
2354         }
2355
2356         store_regs(vcpu, kvm_run);
2357
2358         if (vcpu->sigset_active)
2359                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2360
2361         vcpu->stat.exit_userspace++;
2362         return rc;
2363 }
2364
2365 /*
2366  * store status at address
2367  * we use have two special cases:
2368  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2369  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2370  */
2371 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2372 {
2373         unsigned char archmode = 1;
2374         unsigned int px;
2375         u64 clkcomp;
2376         int rc;
2377
2378         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2379                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2380                         return -EFAULT;
2381                 gpa = SAVE_AREA_BASE;
2382         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2383                 if (write_guest_real(vcpu, 163, &archmode, 1))
2384                         return -EFAULT;
2385                 gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
2386         }
2387         rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
2388                              vcpu->arch.guest_fpregs.fprs, 128);
2389         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
2390                               vcpu->run->s.regs.gprs, 128);
2391         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
2392                               &vcpu->arch.sie_block->gpsw, 16);
2393         px = kvm_s390_get_prefix(vcpu);
2394         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
2395                               &px, 4);
2396         rc |= write_guest_abs(vcpu,
2397                               gpa + offsetof(struct save_area, fp_ctrl_reg),
2398                               &vcpu->arch.guest_fpregs.fpc, 4);
2399         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
2400                               &vcpu->arch.sie_block->todpr, 4);
2401         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
2402                               &vcpu->arch.sie_block->cputm, 8);
2403         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2404         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
2405                               &clkcomp, 8);
2406         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
2407                               &vcpu->run->s.regs.acrs, 64);
2408         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
2409                               &vcpu->arch.sie_block->gcr, 128);
2410         return rc ? -EFAULT : 0;
2411 }
2412
2413 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2414 {
2415         /*
2416          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2417          * copying in vcpu load/put. Lets update our copies before we save
2418          * it into the save area
2419          */
2420         save_fpu_regs();
2421         if (test_kvm_facility(vcpu->kvm, 129)) {
2422                 /*
2423                  * If the vector extension is available, the vector registers
2424                  * which overlaps with floating-point registers are saved in
2425                  * the SIE-control block.  Hence, extract the floating-point
2426                  * registers and the FPC value and store them in the
2427                  * guest_fpregs structure.
2428                  */
2429                 vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
2430                 convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
2431                                  current->thread.fpu.vxrs);
2432         } else
2433                 save_fpu_to(&vcpu->arch.guest_fpregs);
2434         save_access_regs(vcpu->run->s.regs.acrs);
2435
2436         return kvm_s390_store_status_unloaded(vcpu, addr);
2437 }
2438
2439 /*
2440  * store additional status at address
2441  */
2442 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2443                                         unsigned long gpa)
2444 {
2445         /* Only bits 0-53 are used for address formation */
2446         if (!(gpa & ~0x3ff))
2447                 return 0;
2448
2449         return write_guest_abs(vcpu, gpa & ~0x3ff,
2450                                (void *)&vcpu->run->s.regs.vrs, 512);
2451 }
2452
2453 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2454 {
2455         if (!test_kvm_facility(vcpu->kvm, 129))
2456                 return 0;
2457
2458         /*
2459          * The guest VXRS are in the host VXRs due to the lazy
2460          * copying in vcpu load/put. We can simply call save_fpu_regs()
2461          * to save the current register state because we are in the
2462          * middle of a load/put cycle.
2463          *
2464          * Let's update our copies before we save it into the save area.
2465          */
2466         save_fpu_regs();
2467
2468         return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2469 }
2470
2471 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2472 {
2473         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2474         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2475 }
2476
2477 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2478 {
2479         unsigned int i;
2480         struct kvm_vcpu *vcpu;
2481
2482         kvm_for_each_vcpu(i, vcpu, kvm) {
2483                 __disable_ibs_on_vcpu(vcpu);
2484         }
2485 }
2486
2487 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2488 {
2489         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2490         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2491 }
2492
2493 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2494 {
2495         int i, online_vcpus, started_vcpus = 0;
2496
2497         if (!is_vcpu_stopped(vcpu))
2498                 return;
2499
2500         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2501         /* Only one cpu at a time may enter/leave the STOPPED state. */
2502         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2503         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2504
2505         for (i = 0; i < online_vcpus; i++) {
2506                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2507                         started_vcpus++;
2508         }
2509
2510         if (started_vcpus == 0) {
2511                 /* we're the only active VCPU -> speed it up */
2512                 __enable_ibs_on_vcpu(vcpu);
2513         } else if (started_vcpus == 1) {
2514                 /*
2515                  * As we are starting a second VCPU, we have to disable
2516                  * the IBS facility on all VCPUs to remove potentially
2517                  * oustanding ENABLE requests.
2518                  */
2519                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2520         }
2521
2522         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2523         /*
2524          * Another VCPU might have used IBS while we were offline.
2525          * Let's play safe and flush the VCPU at startup.
2526          */
2527         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2528         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2529         return;
2530 }
2531
2532 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2533 {
2534         int i, online_vcpus, started_vcpus = 0;
2535         struct kvm_vcpu *started_vcpu = NULL;
2536
2537         if (is_vcpu_stopped(vcpu))
2538                 return;
2539
2540         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2541         /* Only one cpu at a time may enter/leave the STOPPED state. */
2542         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2543         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2544
2545         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2546         kvm_s390_clear_stop_irq(vcpu);
2547
2548         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2549         __disable_ibs_on_vcpu(vcpu);
2550
2551         for (i = 0; i < online_vcpus; i++) {
2552                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2553                         started_vcpus++;
2554                         started_vcpu = vcpu->kvm->vcpus[i];
2555                 }
2556         }
2557
2558         if (started_vcpus == 1) {
2559                 /*
2560                  * As we only have one VCPU left, we want to enable the
2561                  * IBS facility for that VCPU to speed it up.
2562                  */
2563                 __enable_ibs_on_vcpu(started_vcpu);
2564         }
2565
2566         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2567         return;
2568 }
2569
2570 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2571                                      struct kvm_enable_cap *cap)
2572 {
2573         int r;
2574
2575         if (cap->flags)
2576                 return -EINVAL;
2577
2578         switch (cap->cap) {
2579         case KVM_CAP_S390_CSS_SUPPORT:
2580                 if (!vcpu->kvm->arch.css_support) {
2581                         vcpu->kvm->arch.css_support = 1;
2582                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2583                         trace_kvm_s390_enable_css(vcpu->kvm);
2584                 }
2585                 r = 0;
2586                 break;
2587         default:
2588                 r = -EINVAL;
2589                 break;
2590         }
2591         return r;
2592 }
2593
2594 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2595                                   struct kvm_s390_mem_op *mop)
2596 {
2597         void __user *uaddr = (void __user *)mop->buf;
2598         void *tmpbuf = NULL;
2599         int r, srcu_idx;
2600         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2601                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
2602
2603         if (mop->flags & ~supported_flags)
2604                 return -EINVAL;
2605
2606         if (mop->size > MEM_OP_MAX_SIZE)
2607                 return -E2BIG;
2608
2609         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2610                 tmpbuf = vmalloc(mop->size);
2611                 if (!tmpbuf)
2612                         return -ENOMEM;
2613         }
2614
2615         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2616
2617         switch (mop->op) {
2618         case KVM_S390_MEMOP_LOGICAL_READ:
2619                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2620                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
2621                         break;
2622                 }
2623                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2624                 if (r == 0) {
2625                         if (copy_to_user(uaddr, tmpbuf, mop->size))
2626                                 r = -EFAULT;
2627                 }
2628                 break;
2629         case KVM_S390_MEMOP_LOGICAL_WRITE:
2630                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2631                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
2632                         break;
2633                 }
2634                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2635                         r = -EFAULT;
2636                         break;
2637                 }
2638                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2639                 break;
2640         default:
2641                 r = -EINVAL;
2642         }
2643
2644         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2645
2646         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2647                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2648
2649         vfree(tmpbuf);
2650         return r;
2651 }
2652
2653 long kvm_arch_vcpu_ioctl(struct file *filp,
2654                          unsigned int ioctl, unsigned long arg)
2655 {
2656         struct kvm_vcpu *vcpu = filp->private_data;
2657         void __user *argp = (void __user *)arg;
2658         int idx;
2659         long r;
2660
2661         switch (ioctl) {
2662         case KVM_S390_IRQ: {
2663                 struct kvm_s390_irq s390irq;
2664
2665                 r = -EFAULT;
2666                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2667                         break;
2668                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2669                 break;
2670         }
2671         case KVM_S390_INTERRUPT: {
2672                 struct kvm_s390_interrupt s390int;
2673                 struct kvm_s390_irq s390irq;
2674
2675                 r = -EFAULT;
2676                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2677                         break;
2678                 if (s390int_to_s390irq(&s390int, &s390irq))
2679                         return -EINVAL;
2680                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2681                 break;
2682         }
2683         case KVM_S390_STORE_STATUS:
2684                 idx = srcu_read_lock(&vcpu->kvm->srcu);
2685                 r = kvm_s390_vcpu_store_status(vcpu, arg);
2686                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2687                 break;
2688         case KVM_S390_SET_INITIAL_PSW: {
2689                 psw_t psw;
2690
2691                 r = -EFAULT;
2692                 if (copy_from_user(&psw, argp, sizeof(psw)))
2693                         break;
2694                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2695                 break;
2696         }
2697         case KVM_S390_INITIAL_RESET:
2698                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2699                 break;
2700         case KVM_SET_ONE_REG:
2701         case KVM_GET_ONE_REG: {
2702                 struct kvm_one_reg reg;
2703                 r = -EFAULT;
2704                 if (copy_from_user(&reg, argp, sizeof(reg)))
2705                         break;
2706                 if (ioctl == KVM_SET_ONE_REG)
2707                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
2708                 else
2709                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
2710                 break;
2711         }
2712 #ifdef CONFIG_KVM_S390_UCONTROL
2713         case KVM_S390_UCAS_MAP: {
2714                 struct kvm_s390_ucas_mapping ucasmap;
2715
2716                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2717                         r = -EFAULT;
2718                         break;
2719                 }
2720
2721                 if (!kvm_is_ucontrol(vcpu->kvm)) {
2722                         r = -EINVAL;
2723                         break;
2724                 }
2725
2726                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2727                                      ucasmap.vcpu_addr, ucasmap.length);
2728                 break;
2729         }
2730         case KVM_S390_UCAS_UNMAP: {
2731                 struct kvm_s390_ucas_mapping ucasmap;
2732
2733                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2734                         r = -EFAULT;
2735                         break;
2736                 }
2737
2738                 if (!kvm_is_ucontrol(vcpu->kvm)) {
2739                         r = -EINVAL;
2740                         break;
2741                 }
2742
2743                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2744                         ucasmap.length);
2745                 break;
2746         }
2747 #endif
2748         case KVM_S390_VCPU_FAULT: {
2749                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
2750                 break;
2751         }
2752         case KVM_ENABLE_CAP:
2753         {
2754                 struct kvm_enable_cap cap;
2755                 r = -EFAULT;
2756                 if (copy_from_user(&cap, argp, sizeof(cap)))
2757                         break;
2758                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2759                 break;
2760         }
2761         case KVM_S390_MEM_OP: {
2762                 struct kvm_s390_mem_op mem_op;
2763
2764                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2765                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2766                 else
2767                         r = -EFAULT;
2768                 break;
2769         }
2770         case KVM_S390_SET_IRQ_STATE: {
2771                 struct kvm_s390_irq_state irq_state;
2772
2773                 r = -EFAULT;
2774                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2775                         break;
2776                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2777                     irq_state.len == 0 ||
2778                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2779                         r = -EINVAL;
2780                         break;
2781                 }
2782                 r = kvm_s390_set_irq_state(vcpu,
2783                                            (void __user *) irq_state.buf,
2784                                            irq_state.len);
2785                 break;
2786         }
2787         case KVM_S390_GET_IRQ_STATE: {
2788                 struct kvm_s390_irq_state irq_state;
2789
2790                 r = -EFAULT;
2791                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2792                         break;
2793                 if (irq_state.len == 0) {
2794                         r = -EINVAL;
2795                         break;
2796                 }
2797                 r = kvm_s390_get_irq_state(vcpu,
2798                                            (__u8 __user *)  irq_state.buf,
2799                                            irq_state.len);
2800                 break;
2801         }
2802         default:
2803                 r = -ENOTTY;
2804         }
2805         return r;
2806 }
2807
2808 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2809 {
2810 #ifdef CONFIG_KVM_S390_UCONTROL
2811         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2812                  && (kvm_is_ucontrol(vcpu->kvm))) {
2813                 vmf->page = virt_to_page(vcpu->arch.sie_block);
2814                 get_page(vmf->page);
2815                 return 0;
2816         }
2817 #endif
2818         return VM_FAULT_SIGBUS;
2819 }
2820
2821 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2822                             unsigned long npages)
2823 {
2824         return 0;
2825 }
2826
2827 /* Section: memory related */
2828 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2829                                    struct kvm_memory_slot *memslot,
2830                                    const struct kvm_userspace_memory_region *mem,
2831                                    enum kvm_mr_change change)
2832 {
2833         /* A few sanity checks. We can have memory slots which have to be
2834            located/ended at a segment boundary (1MB). The memory in userland is
2835            ok to be fragmented into various different vmas. It is okay to mmap()
2836            and munmap() stuff in this slot after doing this call at any time */
2837
2838         if (mem->userspace_addr & 0xffffful)
2839                 return -EINVAL;
2840
2841         if (mem->memory_size & 0xffffful)
2842                 return -EINVAL;
2843
2844         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
2845                 return -EINVAL;
2846
2847         return 0;
2848 }
2849
2850 void kvm_arch_commit_memory_region(struct kvm *kvm,
2851                                 const struct kvm_userspace_memory_region *mem,
2852                                 const struct kvm_memory_slot *old,
2853                                 const struct kvm_memory_slot *new,
2854                                 enum kvm_mr_change change)
2855 {
2856         int rc;
2857
2858         /* If the basics of the memslot do not change, we do not want
2859          * to update the gmap. Every update causes several unnecessary
2860          * segment translation exceptions. This is usually handled just
2861          * fine by the normal fault handler + gmap, but it will also
2862          * cause faults on the prefix page of running guest CPUs.
2863          */
2864         if (old->userspace_addr == mem->userspace_addr &&
2865             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2866             old->npages * PAGE_SIZE == mem->memory_size)
2867                 return;
2868
2869         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2870                 mem->guest_phys_addr, mem->memory_size);
2871         if (rc)
2872                 pr_warn("failed to commit memory region\n");
2873         return;
2874 }
2875
2876 static int __init kvm_s390_init(void)
2877 {
2878         if (!sclp.has_sief2) {
2879                 pr_info("SIE not available\n");
2880                 return -ENODEV;
2881         }
2882
2883         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2884 }
2885
2886 static void __exit kvm_s390_exit(void)
2887 {
2888         kvm_exit();
2889 }
2890
2891 module_init(kvm_s390_init);
2892 module_exit(kvm_s390_exit);
2893
2894 /*
2895  * Enable autoloading of the kvm module.
2896  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2897  * since x86 takes a different approach.
2898  */
2899 #include <linux/miscdevice.h>
2900 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2901 MODULE_ALIAS("devname:kvm");