arch/s390/kvm/kvm-s390.c

   1 /*
   2  * hosting zSeries kernel virtual machines
   3  *
   4  * Copyright IBM Corp. 2008, 2009
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License (version 2 only)
   8  * as published by the Free Software Foundation.
   9  *
  10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  11  *               Christian Borntraeger <borntraeger@de.ibm.com>
  12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  14  *               Jason J. Herne <jjherne@us.ibm.com>
  15  */
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/random.h>
  27 #include <linux/slab.h>
  28 #include <linux/timer.h>
  29 #include <linux/vmalloc.h>
  30 #include <linux/bitmap.h>
  31 #include <asm/asm-offsets.h>
  32 #include <asm/lowcore.h>
  33 #include <asm/etr.h>
  34 #include <asm/pgtable.h>
  35 #include <asm/gmap.h>
  36 #include <asm/nmi.h>
  37 #include <asm/switch_to.h>
  38 #include <asm/isc.h>
  39 #include <asm/sclp.h>
  40 #include <asm/cpacf.h>
  41 #include <asm/etr.h>
  42 #include "kvm-s390.h"
  43 #include "gaccess.h"
  44
  45 #define KMSG_COMPONENT "kvm-s390"
  46 #undef pr_fmt
  47 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  48
  49 #define CREATE_TRACE_POINTS
  50 #include "trace.h"
  51 #include "trace-s390.h"
  52
  53 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  54 #define LOCAL_IRQS 32
  55 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  56                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  57
  58 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  59
  60 struct kvm_stats_debugfs_item debugfs_entries[] = {
  61         { "userspace_handled", VCPU_STAT(exit_userspace) },
  62         { "exit_null", VCPU_STAT(exit_null) },
  63         { "exit_validity", VCPU_STAT(exit_validity) },
  64         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  65         { "exit_external_request", VCPU_STAT(exit_external_request) },
  66         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  67         { "exit_instruction", VCPU_STAT(exit_instruction) },
  68         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  69         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  70         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  71         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  72         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  73         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  74         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  75         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  76         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  77         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  78         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  79         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  80         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  81         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  82         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  83         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  84         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  85         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  86         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  87         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  88         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  89         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  90         { "instruction_spx", VCPU_STAT(instruction_spx) },
  91         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
  92         { "instruction_stap", VCPU_STAT(instruction_stap) },
  93         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
  94         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
  95         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
  96         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
  97         { "instruction_essa", VCPU_STAT(instruction_essa) },
  98         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
  99         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 100         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 101         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 102         { "instruction_sie", VCPU_STAT(instruction_sie) },
 103         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 104         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 105         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 106         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 107         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 108         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 109         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 110         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 111         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 112         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 113         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 114         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 115         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 116         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 117         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 118         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 119         { "diagnose_10", VCPU_STAT(diagnose_10) },
 120         { "diagnose_44", VCPU_STAT(diagnose_44) },
 121         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
 122         { "diagnose_258", VCPU_STAT(diagnose_258) },
 123         { "diagnose_308", VCPU_STAT(diagnose_308) },
 124         { "diagnose_500", VCPU_STAT(diagnose_500) },
 125         { NULL }
 126 };
 127
 128 /* allow nested virtualization in KVM (if enabled by user space) */
 129 static int nested;
 130 module_param(nested, int, S_IRUGO);
 131 MODULE_PARM_DESC(nested, "Nested virtualization support");
 132
 133 /* upper facilities limit for kvm */
 134 unsigned long kvm_s390_fac_list_mask[16] = {
 135         0xffe6000000000000UL,
 136         0x005e000000000000UL,
 137 };
 138
 139 unsigned long kvm_s390_fac_list_mask_size(void)
 140 {
 141         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 142         return ARRAY_SIZE(kvm_s390_fac_list_mask);
 143 }
 144
 145 /* available cpu features supported by kvm */
 146 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 147 /* available subfunctions indicated via query / "test bit" */
 148 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 149
 150 static struct gmap_notifier gmap_notifier;
 151 static struct gmap_notifier vsie_gmap_notifier;
 152 debug_info_t *kvm_s390_dbf;
 153
 154 /* Section: not file related */
 155 int kvm_arch_hardware_enable(void)
 156 {
 157         /* every s390 is virtualization enabled ;-) */
 158         return 0;
 159 }
 160
 161 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 162                               unsigned long end);
 163
 164 /*
 165  * This callback is executed during stop_machine(). All CPUs are therefore
 166  * temporarily stopped. In order not to change guest behavior, we have to
 167  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 168  * so a CPU won't be stopped while calculating with the epoch.
 169  */
 170 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 171                           void *v)
 172 {
 173         struct kvm *kvm;
 174         struct kvm_vcpu *vcpu;
 175         int i;
 176         unsigned long long *delta = v;
 177
 178         list_for_each_entry(kvm, &vm_list, vm_list) {
 179                 kvm->arch.epoch -= *delta;
 180                 kvm_for_each_vcpu(i, vcpu, kvm) {
 181                         vcpu->arch.sie_block->epoch -= *delta;
 182                         if (vcpu->arch.cputm_enabled)
 183                                 vcpu->arch.cputm_start += *delta;
 184                         if (vcpu->arch.vsie_block)
 185                                 vcpu->arch.vsie_block->epoch -= *delta;
 186                 }
 187         }
 188         return NOTIFY_OK;
 189 }
 190
 191 static struct notifier_block kvm_clock_notifier = {
 192         .notifier_call = kvm_clock_sync,
 193 };
 194
 195 int kvm_arch_hardware_setup(void)
 196 {
 197         gmap_notifier.notifier_call = kvm_gmap_notifier;
 198         gmap_register_pte_notifier(&gmap_notifier);
 199         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 200         gmap_register_pte_notifier(&vsie_gmap_notifier);
 201         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 202                                        &kvm_clock_notifier);
 203         return 0;
 204 }
 205
 206 void kvm_arch_hardware_unsetup(void)
 207 {
 208         gmap_unregister_pte_notifier(&gmap_notifier);
 209         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 210         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 211                                          &kvm_clock_notifier);
 212 }
 213
 214 static void allow_cpu_feat(unsigned long nr)
 215 {
 216         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 217 }
 218
 219 static inline int plo_test_bit(unsigned char nr)
 220 {
 221         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 222         int cc = 3; /* subfunction not available */
 223
 224         asm volatile(
 225                 /* Parameter registers are ignored for "test bit" */
 226                 "       plo     0,0,0,0(0)\n"
 227                 "       ipm     %0\n"
 228                 "       srl     %0,28\n"
 229                 : "=d" (cc)
 230                 : "d" (r0)
 231                 : "cc");
 232         return cc == 0;
 233 }
 234
 235 static void kvm_s390_cpu_feat_init(void)
 236 {
 237         int i;
 238
 239         for (i = 0; i < 256; ++i) {
 240                 if (plo_test_bit(i))
 241                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 242         }
 243
 244         if (test_facility(28)) /* TOD-clock steering */
 245                 etr_ptff(kvm_s390_available_subfunc.ptff, ETR_PTFF_QAF);
 246
 247         if (test_facility(17)) { /* MSA */
 248                 __cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac);
 249                 __cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc);
 250                 __cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km);
 251                 __cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd);
 252                 __cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd);
 253         }
 254         if (test_facility(76)) /* MSA3 */
 255                 __cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo);
 256         if (test_facility(77)) { /* MSA4 */
 257                 __cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr);
 258                 __cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf);
 259                 __cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo);
 260                 __cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc);
 261         }
 262         if (test_facility(57)) /* MSA5 */
 263                 __cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno);
 264
 265         if (MACHINE_HAS_ESOP)
 266                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 267         /*
 268          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 269          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 270          */
 271         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 272             !test_facility(3) || !nested)
 273                 return;
 274         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 275         if (sclp.has_64bscao)
 276                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 277         if (sclp.has_siif)
 278                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 279         if (sclp.has_gpere)
 280                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 281         if (sclp.has_gsls)
 282                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 283         if (sclp.has_ib)
 284                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 285         if (sclp.has_cei)
 286                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 287         if (sclp.has_ibs)
 288                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 289         /*
 290          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 291          * all skey handling functions read/set the skey from the PGSTE
 292          * instead of the real storage key.
 293          *
 294          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 295          * pages being detected as preserved although they are resident.
 296          *
 297          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 298          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 299          *
 300          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 301          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 302          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 303          *
 304          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 305          * cannot easily shadow the SCA because of the ipte lock.
 306          */
 307 }
 308
 309 int kvm_arch_init(void *opaque)
 310 {
 311         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 312         if (!kvm_s390_dbf)
 313                 return -ENOMEM;
 314
 315         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 316                 debug_unregister(kvm_s390_dbf);
 317                 return -ENOMEM;
 318         }
 319
 320         kvm_s390_cpu_feat_init();
 321
 322         /* Register floating interrupt controller interface. */
 323         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 324 }
 325
 326 void kvm_arch_exit(void)
 327 {
 328         debug_unregister(kvm_s390_dbf);
 329 }
 330
 331 /* Section: device related */
 332 long kvm_arch_dev_ioctl(struct file *filp,
 333                         unsigned int ioctl, unsigned long arg)
 334 {
 335         if (ioctl == KVM_S390_ENABLE_SIE)
 336                 return s390_enable_sie();
 337         return -EINVAL;
 338 }
 339
 340 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 341 {
 342         int r;
 343
 344         switch (ext) {
 345         case KVM_CAP_S390_PSW:
 346         case KVM_CAP_S390_GMAP:
 347         case KVM_CAP_SYNC_MMU:
 348 #ifdef CONFIG_KVM_S390_UCONTROL
 349         case KVM_CAP_S390_UCONTROL:
 350 #endif
 351         case KVM_CAP_ASYNC_PF:
 352         case KVM_CAP_SYNC_REGS:
 353         case KVM_CAP_ONE_REG:
 354         case KVM_CAP_ENABLE_CAP:
 355         case KVM_CAP_S390_CSS_SUPPORT:
 356         case KVM_CAP_IOEVENTFD:
 357         case KVM_CAP_DEVICE_CTRL:
 358         case KVM_CAP_ENABLE_CAP_VM:
 359         case KVM_CAP_S390_IRQCHIP:
 360         case KVM_CAP_VM_ATTRIBUTES:
 361         case KVM_CAP_MP_STATE:
 362         case KVM_CAP_S390_INJECT_IRQ:
 363         case KVM_CAP_S390_USER_SIGP:
 364         case KVM_CAP_S390_USER_STSI:
 365         case KVM_CAP_S390_SKEYS:
 366         case KVM_CAP_S390_IRQ_STATE:
 367         case KVM_CAP_S390_USER_INSTR0:
 368                 r = 1;
 369                 break;
 370         case KVM_CAP_S390_MEM_OP:
 371                 r = MEM_OP_MAX_SIZE;
 372                 break;
 373         case KVM_CAP_NR_VCPUS:
 374         case KVM_CAP_MAX_VCPUS:
 375                 r = KVM_S390_BSCA_CPU_SLOTS;
 376                 if (sclp.has_esca && sclp.has_64bscao)
 377                         r = KVM_S390_ESCA_CPU_SLOTS;
 378                 break;
 379         case KVM_CAP_NR_MEMSLOTS:
 380                 r = KVM_USER_MEM_SLOTS;
 381                 break;
 382         case KVM_CAP_S390_COW:
 383                 r = MACHINE_HAS_ESOP;
 384                 break;
 385         case KVM_CAP_S390_VECTOR_REGISTERS:
 386                 r = MACHINE_HAS_VX;
 387                 break;
 388         case KVM_CAP_S390_RI:
 389                 r = test_facility(64);
 390                 break;
 391         default:
 392                 r = 0;
 393         }
 394         return r;
 395 }
 396
 397 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 398                                         struct kvm_memory_slot *memslot)
 399 {
 400         gfn_t cur_gfn, last_gfn;
 401         unsigned long address;
 402         struct gmap *gmap = kvm->arch.gmap;
 403
 404         /* Loop over all guest pages */
 405         last_gfn = memslot->base_gfn + memslot->npages;
 406         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 407                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 408
 409                 if (test_and_clear_guest_dirty(gmap->mm, address))
 410                         mark_page_dirty(kvm, cur_gfn);
 411                 if (fatal_signal_pending(current))
 412                         return;
 413                 cond_resched();
 414         }
 415 }
 416
 417 /* Section: vm related */
 418 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 419
 420 /*
 421  * Get (and clear) the dirty memory log for a memory slot.
 422  */
 423 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 424                                struct kvm_dirty_log *log)
 425 {
 426         int r;
 427         unsigned long n;
 428         struct kvm_memslots *slots;
 429         struct kvm_memory_slot *memslot;
 430         int is_dirty = 0;
 431
 432         mutex_lock(&kvm->slots_lock);
 433
 434         r = -EINVAL;
 435         if (log->slot >= KVM_USER_MEM_SLOTS)
 436                 goto out;
 437
 438         slots = kvm_memslots(kvm);
 439         memslot = id_to_memslot(slots, log->slot);
 440         r = -ENOENT;
 441         if (!memslot->dirty_bitmap)
 442                 goto out;
 443
 444         kvm_s390_sync_dirty_log(kvm, memslot);
 445         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 446         if (r)
 447                 goto out;
 448
 449         /* Clear the dirty log */
 450         if (is_dirty) {
 451                 n = kvm_dirty_bitmap_bytes(memslot);
 452                 memset(memslot->dirty_bitmap, 0, n);
 453         }
 454         r = 0;
 455 out:
 456         mutex_unlock(&kvm->slots_lock);
 457         return r;
 458 }
 459
 460 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 461 {
 462         unsigned int i;
 463         struct kvm_vcpu *vcpu;
 464
 465         kvm_for_each_vcpu(i, vcpu, kvm) {
 466                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 467         }
 468 }
 469
 470 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 471 {
 472         int r;
 473
 474         if (cap->flags)
 475                 return -EINVAL;
 476
 477         switch (cap->cap) {
 478         case KVM_CAP_S390_IRQCHIP:
 479                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 480                 kvm->arch.use_irqchip = 1;
 481                 r = 0;
 482                 break;
 483         case KVM_CAP_S390_USER_SIGP:
 484                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 485                 kvm->arch.user_sigp = 1;
 486                 r = 0;
 487                 break;
 488         case KVM_CAP_S390_VECTOR_REGISTERS:
 489                 mutex_lock(&kvm->lock);
 490                 if (kvm->created_vcpus) {
 491                         r = -EBUSY;
 492                 } else if (MACHINE_HAS_VX) {
 493                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 494                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 495                         r = 0;
 496                 } else
 497                         r = -EINVAL;
 498                 mutex_unlock(&kvm->lock);
 499                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 500                          r ? "(not available)" : "(success)");
 501                 break;
 502         case KVM_CAP_S390_RI:
 503                 r = -EINVAL;
 504                 mutex_lock(&kvm->lock);
 505                 if (kvm->created_vcpus) {
 506                         r = -EBUSY;
 507                 } else if (test_facility(64)) {
 508                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 509                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 510                         r = 0;
 511                 }
 512                 mutex_unlock(&kvm->lock);
 513                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 514                          r ? "(not available)" : "(success)");
 515                 break;
 516         case KVM_CAP_S390_USER_STSI:
 517                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 518                 kvm->arch.user_stsi = 1;
 519                 r = 0;
 520                 break;
 521         case KVM_CAP_S390_USER_INSTR0:
 522                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 523                 kvm->arch.user_instr0 = 1;
 524                 icpt_operexc_on_all_vcpus(kvm);
 525                 r = 0;
 526                 break;
 527         default:
 528                 r = -EINVAL;
 529                 break;
 530         }
 531         return r;
 532 }
 533
 534 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 535 {
 536         int ret;
 537
 538         switch (attr->attr) {
 539         case KVM_S390_VM_MEM_LIMIT_SIZE:
 540                 ret = 0;
 541                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 542                          kvm->arch.mem_limit);
 543                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 544                         ret = -EFAULT;
 545                 break;
 546         default:
 547                 ret = -ENXIO;
 548                 break;
 549         }
 550         return ret;
 551 }
 552
 553 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 554 {
 555         int ret;
 556         unsigned int idx;
 557         switch (attr->attr) {
 558         case KVM_S390_VM_MEM_ENABLE_CMMA:
 559                 ret = -ENXIO;
 560                 if (!sclp.has_cmma)
 561                         break;
 562
 563                 ret = -EBUSY;
 564                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 565                 mutex_lock(&kvm->lock);
 566                 if (!kvm->created_vcpus) {
 567                         kvm->arch.use_cmma = 1;
 568                         ret = 0;
 569                 }
 570                 mutex_unlock(&kvm->lock);
 571                 break;
 572         case KVM_S390_VM_MEM_CLR_CMMA:
 573                 ret = -ENXIO;
 574                 if (!sclp.has_cmma)
 575                         break;
 576                 ret = -EINVAL;
 577                 if (!kvm->arch.use_cmma)
 578                         break;
 579
 580                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 581                 mutex_lock(&kvm->lock);
 582                 idx = srcu_read_lock(&kvm->srcu);
 583                 s390_reset_cmma(kvm->arch.gmap->mm);
 584                 srcu_read_unlock(&kvm->srcu, idx);
 585                 mutex_unlock(&kvm->lock);
 586                 ret = 0;
 587                 break;
 588         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 589                 unsigned long new_limit;
 590
 591                 if (kvm_is_ucontrol(kvm))
 592                         return -EINVAL;
 593
 594                 if (get_user(new_limit, (u64 __user *)attr->addr))
 595                         return -EFAULT;
 596
 597                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 598                     new_limit > kvm->arch.mem_limit)
 599                         return -E2BIG;
 600
 601                 if (!new_limit)
 602                         return -EINVAL;
 603
 604                 /* gmap_create takes last usable address */
 605                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 606                         new_limit -= 1;
 607
 608                 ret = -EBUSY;
 609                 mutex_lock(&kvm->lock);
 610                 if (!kvm->created_vcpus) {
 611                         /* gmap_create will round the limit up */
 612                         struct gmap *new = gmap_create(current->mm, new_limit);
 613
 614                         if (!new) {
 615                                 ret = -ENOMEM;
 616                         } else {
 617                                 gmap_remove(kvm->arch.gmap);
 618                                 new->private = kvm;
 619                                 kvm->arch.gmap = new;
 620                                 ret = 0;
 621                         }
 622                 }
 623                 mutex_unlock(&kvm->lock);
 624                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 625                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 626                          (void *) kvm->arch.gmap->asce);
 627                 break;
 628         }
 629         default:
 630                 ret = -ENXIO;
 631                 break;
 632         }
 633         return ret;
 634 }
 635
 636 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 637
 638 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 639 {
 640         struct kvm_vcpu *vcpu;
 641         int i;
 642
 643         if (!test_kvm_facility(kvm, 76))
 644                 return -EINVAL;
 645
 646         mutex_lock(&kvm->lock);
 647         switch (attr->attr) {
 648         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 649                 get_random_bytes(
 650                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 651                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 652                 kvm->arch.crypto.aes_kw = 1;
 653                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 654                 break;
 655         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 656                 get_random_bytes(
 657                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 658                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 659                 kvm->arch.crypto.dea_kw = 1;
 660                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 661                 break;
 662         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 663                 kvm->arch.crypto.aes_kw = 0;
 664                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 665                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 666                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 667                 break;
 668         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 669                 kvm->arch.crypto.dea_kw = 0;
 670                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 671                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 672                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 673                 break;
 674         default:
 675                 mutex_unlock(&kvm->lock);
 676                 return -ENXIO;
 677         }
 678
 679         kvm_for_each_vcpu(i, vcpu, kvm) {
 680                 kvm_s390_vcpu_crypto_setup(vcpu);
 681                 exit_sie(vcpu);
 682         }
 683         mutex_unlock(&kvm->lock);
 684         return 0;
 685 }
 686
 687 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 688 {
 689         u8 gtod_high;
 690
 691         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 692                                            sizeof(gtod_high)))
 693                 return -EFAULT;
 694
 695         if (gtod_high != 0)
 696                 return -EINVAL;
 697         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 698
 699         return 0;
 700 }
 701
 702 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 703 {
 704         u64 gtod;
 705
 706         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 707                 return -EFAULT;
 708
 709         kvm_s390_set_tod_clock(kvm, gtod);
 710         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
 711         return 0;
 712 }
 713
 714 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 715 {
 716         int ret;
 717
 718         if (attr->flags)
 719                 return -EINVAL;
 720
 721         switch (attr->attr) {
 722         case KVM_S390_VM_TOD_HIGH:
 723                 ret = kvm_s390_set_tod_high(kvm, attr);
 724                 break;
 725         case KVM_S390_VM_TOD_LOW:
 726                 ret = kvm_s390_set_tod_low(kvm, attr);
 727                 break;
 728         default:
 729                 ret = -ENXIO;
 730                 break;
 731         }
 732         return ret;
 733 }
 734
 735 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 736 {
 737         u8 gtod_high = 0;
 738
 739         if (copy_to_user((void __user *)attr->addr, &gtod_high,
 740                                          sizeof(gtod_high)))
 741                 return -EFAULT;
 742         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
 743
 744         return 0;
 745 }
 746
 747 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 748 {
 749         u64 gtod;
 750
 751         gtod = kvm_s390_get_tod_clock_fast(kvm);
 752         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 753                 return -EFAULT;
 754         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
 755
 756         return 0;
 757 }
 758
 759 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 760 {
 761         int ret;
 762
 763         if (attr->flags)
 764                 return -EINVAL;
 765
 766         switch (attr->attr) {
 767         case KVM_S390_VM_TOD_HIGH:
 768                 ret = kvm_s390_get_tod_high(kvm, attr);
 769                 break;
 770         case KVM_S390_VM_TOD_LOW:
 771                 ret = kvm_s390_get_tod_low(kvm, attr);
 772                 break;
 773         default:
 774                 ret = -ENXIO;
 775                 break;
 776         }
 777         return ret;
 778 }
 779
 780 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 781 {
 782         struct kvm_s390_vm_cpu_processor *proc;
 783         u16 lowest_ibc, unblocked_ibc;
 784         int ret = 0;
 785
 786         mutex_lock(&kvm->lock);
 787         if (kvm->created_vcpus) {
 788                 ret = -EBUSY;
 789                 goto out;
 790         }
 791         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 792         if (!proc) {
 793                 ret = -ENOMEM;
 794                 goto out;
 795         }
 796         if (!copy_from_user(proc, (void __user *)attr->addr,
 797                             sizeof(*proc))) {
 798                 kvm->arch.model.cpuid = proc->cpuid;
 799                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
 800                 unblocked_ibc = sclp.ibc & 0xfff;
 801                 if (lowest_ibc) {
 802                         if (proc->ibc > unblocked_ibc)
 803                                 kvm->arch.model.ibc = unblocked_ibc;
 804                         else if (proc->ibc < lowest_ibc)
 805                                 kvm->arch.model.ibc = lowest_ibc;
 806                         else
 807                                 kvm->arch.model.ibc = proc->ibc;
 808                 }
 809                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
 810                        S390_ARCH_FAC_LIST_SIZE_BYTE);
 811         } else
 812                 ret = -EFAULT;
 813         kfree(proc);
 814 out:
 815         mutex_unlock(&kvm->lock);
 816         return ret;
 817 }
 818
 819 static int kvm_s390_set_processor_feat(struct kvm *kvm,
 820                                        struct kvm_device_attr *attr)
 821 {
 822         struct kvm_s390_vm_cpu_feat data;
 823         int ret = -EBUSY;
 824
 825         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
 826                 return -EFAULT;
 827         if (!bitmap_subset((unsigned long *) data.feat,
 828                            kvm_s390_available_cpu_feat,
 829                            KVM_S390_VM_CPU_FEAT_NR_BITS))
 830                 return -EINVAL;
 831
 832         mutex_lock(&kvm->lock);
 833         if (!atomic_read(&kvm->online_vcpus)) {
 834                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
 835                             KVM_S390_VM_CPU_FEAT_NR_BITS);
 836                 ret = 0;
 837         }
 838         mutex_unlock(&kvm->lock);
 839         return ret;
 840 }
 841
 842 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
 843                                           struct kvm_device_attr *attr)
 844 {
 845         /*
 846          * Once supported by kernel + hw, we have to store the subfunctions
 847          * in kvm->arch and remember that user space configured them.
 848          */
 849         return -ENXIO;
 850 }
 851
 852 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 853 {
 854         int ret = -ENXIO;
 855
 856         switch (attr->attr) {
 857         case KVM_S390_VM_CPU_PROCESSOR:
 858                 ret = kvm_s390_set_processor(kvm, attr);
 859                 break;
 860         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 861                 ret = kvm_s390_set_processor_feat(kvm, attr);
 862                 break;
 863         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 864                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
 865                 break;
 866         }
 867         return ret;
 868 }
 869
 870 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 871 {
 872         struct kvm_s390_vm_cpu_processor *proc;
 873         int ret = 0;
 874
 875         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 876         if (!proc) {
 877                 ret = -ENOMEM;
 878                 goto out;
 879         }
 880         proc->cpuid = kvm->arch.model.cpuid;
 881         proc->ibc = kvm->arch.model.ibc;
 882         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
 883                S390_ARCH_FAC_LIST_SIZE_BYTE);
 884         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
 885                 ret = -EFAULT;
 886         kfree(proc);
 887 out:
 888         return ret;
 889 }
 890
 891 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
 892 {
 893         struct kvm_s390_vm_cpu_machine *mach;
 894         int ret = 0;
 895
 896         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
 897         if (!mach) {
 898                 ret = -ENOMEM;
 899                 goto out;
 900         }
 901         get_cpu_id((struct cpuid *) &mach->cpuid);
 902         mach->ibc = sclp.ibc;
 903         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
 904                S390_ARCH_FAC_LIST_SIZE_BYTE);
 905         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
 906                S390_ARCH_FAC_LIST_SIZE_BYTE);
 907         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
 908                 ret = -EFAULT;
 909         kfree(mach);
 910 out:
 911         return ret;
 912 }
 913
 914 static int kvm_s390_get_processor_feat(struct kvm *kvm,
 915                                        struct kvm_device_attr *attr)
 916 {
 917         struct kvm_s390_vm_cpu_feat data;
 918
 919         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
 920                     KVM_S390_VM_CPU_FEAT_NR_BITS);
 921         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 922                 return -EFAULT;
 923         return 0;
 924 }
 925
 926 static int kvm_s390_get_machine_feat(struct kvm *kvm,
 927                                      struct kvm_device_attr *attr)
 928 {
 929         struct kvm_s390_vm_cpu_feat data;
 930
 931         bitmap_copy((unsigned long *) data.feat,
 932                     kvm_s390_available_cpu_feat,
 933                     KVM_S390_VM_CPU_FEAT_NR_BITS);
 934         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 935                 return -EFAULT;
 936         return 0;
 937 }
 938
 939 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
 940                                           struct kvm_device_attr *attr)
 941 {
 942         /*
 943          * Once we can actually configure subfunctions (kernel + hw support),
 944          * we have to check if they were already set by user space, if so copy
 945          * them from kvm->arch.
 946          */
 947         return -ENXIO;
 948 }
 949
 950 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
 951                                         struct kvm_device_attr *attr)
 952 {
 953         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
 954             sizeof(struct kvm_s390_vm_cpu_subfunc)))
 955                 return -EFAULT;
 956         return 0;
 957 }
 958 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 959 {
 960         int ret = -ENXIO;
 961
 962         switch (attr->attr) {
 963         case KVM_S390_VM_CPU_PROCESSOR:
 964                 ret = kvm_s390_get_processor(kvm, attr);
 965                 break;
 966         case KVM_S390_VM_CPU_MACHINE:
 967                 ret = kvm_s390_get_machine(kvm, attr);
 968                 break;
 969         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 970                 ret = kvm_s390_get_processor_feat(kvm, attr);
 971                 break;
 972         case KVM_S390_VM_CPU_MACHINE_FEAT:
 973                 ret = kvm_s390_get_machine_feat(kvm, attr);
 974                 break;
 975         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 976                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
 977                 break;
 978         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
 979                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
 980                 break;
 981         }
 982         return ret;
 983 }
 984
 985 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 986 {
 987         int ret;
 988
 989         switch (attr->group) {
 990         case KVM_S390_VM_MEM_CTRL:
 991                 ret = kvm_s390_set_mem_control(kvm, attr);
 992                 break;
 993         case KVM_S390_VM_TOD:
 994                 ret = kvm_s390_set_tod(kvm, attr);
 995                 break;
 996         case KVM_S390_VM_CPU_MODEL:
 997                 ret = kvm_s390_set_cpu_model(kvm, attr);
 998                 break;
 999         case KVM_S390_VM_CRYPTO:
1000                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1001                 break;
1002         default:
1003                 ret = -ENXIO;
1004                 break;
1005         }
1006
1007         return ret;
1008 }
1009
1010 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1011 {
1012         int ret;
1013
1014         switch (attr->group) {
1015         case KVM_S390_VM_MEM_CTRL:
1016                 ret = kvm_s390_get_mem_control(kvm, attr);
1017                 break;
1018         case KVM_S390_VM_TOD:
1019                 ret = kvm_s390_get_tod(kvm, attr);
1020                 break;
1021         case KVM_S390_VM_CPU_MODEL:
1022                 ret = kvm_s390_get_cpu_model(kvm, attr);
1023                 break;
1024         default:
1025                 ret = -ENXIO;
1026                 break;
1027         }
1028
1029         return ret;
1030 }
1031
1032 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1033 {
1034         int ret;
1035
1036         switch (attr->group) {
1037         case KVM_S390_VM_MEM_CTRL:
1038                 switch (attr->attr) {
1039                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1040                 case KVM_S390_VM_MEM_CLR_CMMA:
1041                         ret = sclp.has_cmma ? 0 : -ENXIO;
1042                         break;
1043                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1044                         ret = 0;
1045                         break;
1046                 default:
1047                         ret = -ENXIO;
1048                         break;
1049                 }
1050                 break;
1051         case KVM_S390_VM_TOD:
1052                 switch (attr->attr) {
1053                 case KVM_S390_VM_TOD_LOW:
1054                 case KVM_S390_VM_TOD_HIGH:
1055                         ret = 0;
1056                         break;
1057                 default:
1058                         ret = -ENXIO;
1059                         break;
1060                 }
1061                 break;
1062         case KVM_S390_VM_CPU_MODEL:
1063                 switch (attr->attr) {
1064                 case KVM_S390_VM_CPU_PROCESSOR:
1065                 case KVM_S390_VM_CPU_MACHINE:
1066                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1067                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1068                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1069                         ret = 0;
1070                         break;
1071                 /* configuring subfunctions is not supported yet */
1072                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1073                 default:
1074                         ret = -ENXIO;
1075                         break;
1076                 }
1077                 break;
1078         case KVM_S390_VM_CRYPTO:
1079                 switch (attr->attr) {
1080                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1081                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1082                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1083                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1084                         ret = 0;
1085                         break;
1086                 default:
1087                         ret = -ENXIO;
1088                         break;
1089                 }
1090                 break;
1091         default:
1092                 ret = -ENXIO;
1093                 break;
1094         }
1095
1096         return ret;
1097 }
1098
1099 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1100 {
1101         uint8_t *keys;
1102         uint64_t hva;
1103         int i, r = 0;
1104
1105         if (args->flags != 0)
1106                 return -EINVAL;
1107
1108         /* Is this guest using storage keys? */
1109         if (!mm_use_skey(current->mm))
1110                 return KVM_S390_GET_SKEYS_NONE;
1111
1112         /* Enforce sane limit on memory allocation */
1113         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1114                 return -EINVAL;
1115
1116         keys = kmalloc_array(args->count, sizeof(uint8_t),
1117                              GFP_KERNEL | __GFP_NOWARN);
1118         if (!keys)
1119                 keys = vmalloc(sizeof(uint8_t) * args->count);
1120         if (!keys)
1121                 return -ENOMEM;
1122
1123         down_read(&current->mm->mmap_sem);
1124         for (i = 0; i < args->count; i++) {
1125                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1126                 if (kvm_is_error_hva(hva)) {
1127                         r = -EFAULT;
1128                         break;
1129                 }
1130
1131                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1132                 if (r)
1133                         break;
1134         }
1135         up_read(&current->mm->mmap_sem);
1136
1137         if (!r) {
1138                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1139                                  sizeof(uint8_t) * args->count);
1140                 if (r)
1141                         r = -EFAULT;
1142         }
1143
1144         kvfree(keys);
1145         return r;
1146 }
1147
1148 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1149 {
1150         uint8_t *keys;
1151         uint64_t hva;
1152         int i, r = 0;
1153
1154         if (args->flags != 0)
1155                 return -EINVAL;
1156
1157         /* Enforce sane limit on memory allocation */
1158         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1159                 return -EINVAL;
1160
1161         keys = kmalloc_array(args->count, sizeof(uint8_t),
1162                              GFP_KERNEL | __GFP_NOWARN);
1163         if (!keys)
1164                 keys = vmalloc(sizeof(uint8_t) * args->count);
1165         if (!keys)
1166                 return -ENOMEM;
1167
1168         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1169                            sizeof(uint8_t) * args->count);
1170         if (r) {
1171                 r = -EFAULT;
1172                 goto out;
1173         }
1174
1175         /* Enable storage key handling for the guest */
1176         r = s390_enable_skey();
1177         if (r)
1178                 goto out;
1179
1180         down_read(&current->mm->mmap_sem);
1181         for (i = 0; i < args->count; i++) {
1182                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1183                 if (kvm_is_error_hva(hva)) {
1184                         r = -EFAULT;
1185                         break;
1186                 }
1187
1188                 /* Lowest order bit is reserved */
1189                 if (keys[i] & 0x01) {
1190                         r = -EINVAL;
1191                         break;
1192                 }
1193
1194                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1195                 if (r)
1196                         break;
1197         }
1198         up_read(&current->mm->mmap_sem);
1199 out:
1200         kvfree(keys);
1201         return r;
1202 }
1203
1204 long kvm_arch_vm_ioctl(struct file *filp,
1205                        unsigned int ioctl, unsigned long arg)
1206 {
1207         struct kvm *kvm = filp->private_data;
1208         void __user *argp = (void __user *)arg;
1209         struct kvm_device_attr attr;
1210         int r;
1211
1212         switch (ioctl) {
1213         case KVM_S390_INTERRUPT: {
1214                 struct kvm_s390_interrupt s390int;
1215
1216                 r = -EFAULT;
1217                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1218                         break;
1219                 r = kvm_s390_inject_vm(kvm, &s390int);
1220                 break;
1221         }
1222         case KVM_ENABLE_CAP: {
1223                 struct kvm_enable_cap cap;
1224                 r = -EFAULT;
1225                 if (copy_from_user(&cap, argp, sizeof(cap)))
1226                         break;
1227                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1228                 break;
1229         }
1230         case KVM_CREATE_IRQCHIP: {
1231                 struct kvm_irq_routing_entry routing;
1232
1233                 r = -EINVAL;
1234                 if (kvm->arch.use_irqchip) {
1235                         /* Set up dummy routing. */
1236                         memset(&routing, 0, sizeof(routing));
1237                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1238                 }
1239                 break;
1240         }
1241         case KVM_SET_DEVICE_ATTR: {
1242                 r = -EFAULT;
1243                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1244                         break;
1245                 r = kvm_s390_vm_set_attr(kvm, &attr);
1246                 break;
1247         }
1248         case KVM_GET_DEVICE_ATTR: {
1249                 r = -EFAULT;
1250                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1251                         break;
1252                 r = kvm_s390_vm_get_attr(kvm, &attr);
1253                 break;
1254         }
1255         case KVM_HAS_DEVICE_ATTR: {
1256                 r = -EFAULT;
1257                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1258                         break;
1259                 r = kvm_s390_vm_has_attr(kvm, &attr);
1260                 break;
1261         }
1262         case KVM_S390_GET_SKEYS: {
1263                 struct kvm_s390_skeys args;
1264
1265                 r = -EFAULT;
1266                 if (copy_from_user(&args, argp,
1267                                    sizeof(struct kvm_s390_skeys)))
1268                         break;
1269                 r = kvm_s390_get_skeys(kvm, &args);
1270                 break;
1271         }
1272         case KVM_S390_SET_SKEYS: {
1273                 struct kvm_s390_skeys args;
1274
1275                 r = -EFAULT;
1276                 if (copy_from_user(&args, argp,
1277                                    sizeof(struct kvm_s390_skeys)))
1278                         break;
1279                 r = kvm_s390_set_skeys(kvm, &args);
1280                 break;
1281         }
1282         default:
1283                 r = -ENOTTY;
1284         }
1285
1286         return r;
1287 }
1288
1289 static int kvm_s390_query_ap_config(u8 *config)
1290 {
1291         u32 fcn_code = 0x04000000UL;
1292         u32 cc = 0;
1293
1294         memset(config, 0, 128);
1295         asm volatile(
1296                 "lgr 0,%1\n"
1297                 "lgr 2,%2\n"
1298                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1299                 "0: ipm %0\n"
1300                 "srl %0,28\n"
1301                 "1:\n"
1302                 EX_TABLE(0b, 1b)
1303                 : "+r" (cc)
1304                 : "r" (fcn_code), "r" (config)
1305                 : "cc", "0", "2", "memory"
1306         );
1307
1308         return cc;
1309 }
1310
1311 static int kvm_s390_apxa_installed(void)
1312 {
1313         u8 config[128];
1314         int cc;
1315
1316         if (test_facility(12)) {
1317                 cc = kvm_s390_query_ap_config(config);
1318
1319                 if (cc)
1320                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1321                 else
1322                         return config[0] & 0x40;
1323         }
1324
1325         return 0;
1326 }
1327
1328 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1329 {
1330         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1331
1332         if (kvm_s390_apxa_installed())
1333                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1334         else
1335                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1336 }
1337
1338 static u64 kvm_s390_get_initial_cpuid(void)
1339 {
1340         struct cpuid cpuid;
1341
1342         get_cpu_id(&cpuid);
1343         cpuid.version = 0xff;
1344         return *((u64 *) &cpuid);
1345 }
1346
1347 static void kvm_s390_crypto_init(struct kvm *kvm)
1348 {
1349         if (!test_kvm_facility(kvm, 76))
1350                 return;
1351
1352         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1353         kvm_s390_set_crycb_format(kvm);
1354
1355         /* Enable AES/DEA protected key functions by default */
1356         kvm->arch.crypto.aes_kw = 1;
1357         kvm->arch.crypto.dea_kw = 1;
1358         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1359                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1360         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1361                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1362 }
1363
1364 static void sca_dispose(struct kvm *kvm)
1365 {
1366         if (kvm->arch.use_esca)
1367                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1368         else
1369                 free_page((unsigned long)(kvm->arch.sca));
1370         kvm->arch.sca = NULL;
1371 }
1372
1373 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1374 {
1375         gfp_t alloc_flags = GFP_KERNEL;
1376         int i, rc;
1377         char debug_name[16];
1378         static unsigned long sca_offset;
1379
1380         rc = -EINVAL;
1381 #ifdef CONFIG_KVM_S390_UCONTROL
1382         if (type & ~KVM_VM_S390_UCONTROL)
1383                 goto out_err;
1384         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1385                 goto out_err;
1386 #else
1387         if (type)
1388                 goto out_err;
1389 #endif
1390
1391         rc = s390_enable_sie();
1392         if (rc)
1393                 goto out_err;
1394
1395         rc = -ENOMEM;
1396
1397         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1398
1399         kvm->arch.use_esca = 0; /* start with basic SCA */
1400         if (!sclp.has_64bscao)
1401                 alloc_flags |= GFP_DMA;
1402         rwlock_init(&kvm->arch.sca_lock);
1403         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1404         if (!kvm->arch.sca)
1405                 goto out_err;
1406         spin_lock(&kvm_lock);
1407         sca_offset += 16;
1408         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1409                 sca_offset = 0;
1410         kvm->arch.sca = (struct bsca_block *)
1411                         ((char *) kvm->arch.sca + sca_offset);
1412         spin_unlock(&kvm_lock);
1413
1414         sprintf(debug_name, "kvm-%u", current->pid);
1415
1416         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1417         if (!kvm->arch.dbf)
1418                 goto out_err;
1419
1420         kvm->arch.sie_page2 =
1421              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1422         if (!kvm->arch.sie_page2)
1423                 goto out_err;
1424
1425         /* Populate the facility mask initially. */
1426         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1427                S390_ARCH_FAC_LIST_SIZE_BYTE);
1428         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1429                 if (i < kvm_s390_fac_list_mask_size())
1430                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1431                 else
1432                         kvm->arch.model.fac_mask[i] = 0UL;
1433         }
1434
1435         /* Populate the facility list initially. */
1436         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1437         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1438                S390_ARCH_FAC_LIST_SIZE_BYTE);
1439
1440         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1441         set_kvm_facility(kvm->arch.model.fac_list, 74);
1442
1443         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1444         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1445
1446         kvm_s390_crypto_init(kvm);
1447
1448         spin_lock_init(&kvm->arch.float_int.lock);
1449         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1450                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1451         init_waitqueue_head(&kvm->arch.ipte_wq);
1452         mutex_init(&kvm->arch.ipte_mutex);
1453
1454         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1455         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1456
1457         if (type & KVM_VM_S390_UCONTROL) {
1458                 kvm->arch.gmap = NULL;
1459                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1460         } else {
1461                 if (sclp.hamax == U64_MAX)
1462                         kvm->arch.mem_limit = TASK_MAX_SIZE;
1463                 else
1464                         kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1465                                                     sclp.hamax + 1);
1466                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1467                 if (!kvm->arch.gmap)
1468                         goto out_err;
1469                 kvm->arch.gmap->private = kvm;
1470                 kvm->arch.gmap->pfault_enabled = 0;
1471         }
1472
1473         kvm->arch.css_support = 0;
1474         kvm->arch.use_irqchip = 0;
1475         kvm->arch.epoch = 0;
1476
1477         spin_lock_init(&kvm->arch.start_stop_lock);
1478         kvm_s390_vsie_init(kvm);
1479         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1480
1481         return 0;
1482 out_err:
1483         free_page((unsigned long)kvm->arch.sie_page2);
1484         debug_unregister(kvm->arch.dbf);
1485         sca_dispose(kvm);
1486         KVM_EVENT(3, "creation of vm failed: %d", rc);
1487         return rc;
1488 }
1489
1490 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1491 {
1492         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1493         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1494         kvm_s390_clear_local_irqs(vcpu);
1495         kvm_clear_async_pf_completion_queue(vcpu);
1496         if (!kvm_is_ucontrol(vcpu->kvm))
1497                 sca_del_vcpu(vcpu);
1498
1499         if (kvm_is_ucontrol(vcpu->kvm))
1500                 gmap_remove(vcpu->arch.gmap);
1501
1502         if (vcpu->kvm->arch.use_cmma)
1503                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1504         free_page((unsigned long)(vcpu->arch.sie_block));
1505
1506         kvm_vcpu_uninit(vcpu);
1507         kmem_cache_free(kvm_vcpu_cache, vcpu);
1508 }
1509
1510 static void kvm_free_vcpus(struct kvm *kvm)
1511 {
1512         unsigned int i;
1513         struct kvm_vcpu *vcpu;
1514
1515         kvm_for_each_vcpu(i, vcpu, kvm)
1516                 kvm_arch_vcpu_destroy(vcpu);
1517
1518         mutex_lock(&kvm->lock);
1519         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1520                 kvm->vcpus[i] = NULL;
1521
1522         atomic_set(&kvm->online_vcpus, 0);
1523         mutex_unlock(&kvm->lock);
1524 }
1525
1526 void kvm_arch_destroy_vm(struct kvm *kvm)
1527 {
1528         kvm_free_vcpus(kvm);
1529         sca_dispose(kvm);
1530         debug_unregister(kvm->arch.dbf);
1531         free_page((unsigned long)kvm->arch.sie_page2);
1532         if (!kvm_is_ucontrol(kvm))
1533                 gmap_remove(kvm->arch.gmap);
1534         kvm_s390_destroy_adapters(kvm);
1535         kvm_s390_clear_float_irqs(kvm);
1536         kvm_s390_vsie_destroy(kvm);
1537         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1538 }
1539
1540 /* Section: vcpu related */
1541 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1542 {
1543         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1544         if (!vcpu->arch.gmap)
1545                 return -ENOMEM;
1546         vcpu->arch.gmap->private = vcpu->kvm;
1547
1548         return 0;
1549 }
1550
1551 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1552 {
1553         read_lock(&vcpu->kvm->arch.sca_lock);
1554         if (vcpu->kvm->arch.use_esca) {
1555                 struct esca_block *sca = vcpu->kvm->arch.sca;
1556
1557                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1558                 sca->cpu[vcpu->vcpu_id].sda = 0;
1559         } else {
1560                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1561
1562                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1563                 sca->cpu[vcpu->vcpu_id].sda = 0;
1564         }
1565         read_unlock(&vcpu->kvm->arch.sca_lock);
1566 }
1567
1568 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1569 {
1570         read_lock(&vcpu->kvm->arch.sca_lock);
1571         if (vcpu->kvm->arch.use_esca) {
1572                 struct esca_block *sca = vcpu->kvm->arch.sca;
1573
1574                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1575                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1576                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1577                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1578                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1579         } else {
1580                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1581
1582                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1583                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1584                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1585                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1586         }
1587         read_unlock(&vcpu->kvm->arch.sca_lock);
1588 }
1589
1590 /* Basic SCA to Extended SCA data copy routines */
1591 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1592 {
1593         d->sda = s->sda;
1594         d->sigp_ctrl.c = s->sigp_ctrl.c;
1595         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1596 }
1597
1598 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1599 {
1600         int i;
1601
1602         d->ipte_control = s->ipte_control;
1603         d->mcn[0] = s->mcn;
1604         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1605                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1606 }
1607
1608 static int sca_switch_to_extended(struct kvm *kvm)
1609 {
1610         struct bsca_block *old_sca = kvm->arch.sca;
1611         struct esca_block *new_sca;
1612         struct kvm_vcpu *vcpu;
1613         unsigned int vcpu_idx;
1614         u32 scaol, scaoh;
1615
1616         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1617         if (!new_sca)
1618                 return -ENOMEM;
1619
1620         scaoh = (u32)((u64)(new_sca) >> 32);
1621         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1622
1623         kvm_s390_vcpu_block_all(kvm);
1624         write_lock(&kvm->arch.sca_lock);
1625
1626         sca_copy_b_to_e(new_sca, old_sca);
1627
1628         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1629                 vcpu->arch.sie_block->scaoh = scaoh;
1630                 vcpu->arch.sie_block->scaol = scaol;
1631                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1632         }
1633         kvm->arch.sca = new_sca;
1634         kvm->arch.use_esca = 1;
1635
1636         write_unlock(&kvm->arch.sca_lock);
1637         kvm_s390_vcpu_unblock_all(kvm);
1638
1639         free_page((unsigned long)old_sca);
1640
1641         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1642                  old_sca, kvm->arch.sca);
1643         return 0;
1644 }
1645
1646 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1647 {
1648         int rc;
1649
1650         if (id < KVM_S390_BSCA_CPU_SLOTS)
1651                 return true;
1652         if (!sclp.has_esca || !sclp.has_64bscao)
1653                 return false;
1654
1655         mutex_lock(&kvm->lock);
1656         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1657         mutex_unlock(&kvm->lock);
1658
1659         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1660 }
1661
1662 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1663 {
1664         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1665         kvm_clear_async_pf_completion_queue(vcpu);
1666         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1667                                     KVM_SYNC_GPRS |
1668                                     KVM_SYNC_ACRS |
1669                                     KVM_SYNC_CRS |
1670                                     KVM_SYNC_ARCH0 |
1671                                     KVM_SYNC_PFAULT;
1672         if (test_kvm_facility(vcpu->kvm, 64))
1673                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1674         /* fprs can be synchronized via vrs, even if the guest has no vx. With
1675          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1676          */
1677         if (MACHINE_HAS_VX)
1678                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1679         else
1680                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1681
1682         if (kvm_is_ucontrol(vcpu->kvm))
1683                 return __kvm_ucontrol_vcpu_init(vcpu);
1684
1685         return 0;
1686 }
1687
1688 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1689 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1690 {
1691         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1692         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1693         vcpu->arch.cputm_start = get_tod_clock_fast();
1694         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1695 }
1696
1697 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1698 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1699 {
1700         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1701         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1702         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1703         vcpu->arch.cputm_start = 0;
1704         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1705 }
1706
1707 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1708 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1709 {
1710         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1711         vcpu->arch.cputm_enabled = true;
1712         __start_cpu_timer_accounting(vcpu);
1713 }
1714
1715 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1716 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1717 {
1718         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1719         __stop_cpu_timer_accounting(vcpu);
1720         vcpu->arch.cputm_enabled = false;
1721 }
1722
1723 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1724 {
1725         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1726         __enable_cpu_timer_accounting(vcpu);
1727         preempt_enable();
1728 }
1729
1730 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1731 {
1732         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1733         __disable_cpu_timer_accounting(vcpu);
1734         preempt_enable();
1735 }
1736
1737 /* set the cpu timer - may only be called from the VCPU thread itself */
1738 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1739 {
1740         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1741         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1742         if (vcpu->arch.cputm_enabled)
1743                 vcpu->arch.cputm_start = get_tod_clock_fast();
1744         vcpu->arch.sie_block->cputm = cputm;
1745         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1746         preempt_enable();
1747 }
1748
1749 /* update and get the cpu timer - can also be called from other VCPU threads */
1750 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1751 {
1752         unsigned int seq;
1753         __u64 value;
1754
1755         if (unlikely(!vcpu->arch.cputm_enabled))
1756                 return vcpu->arch.sie_block->cputm;
1757
1758         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1759         do {
1760                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1761                 /*
1762                  * If the writer would ever execute a read in the critical
1763                  * section, e.g. in irq context, we have a deadlock.
1764                  */
1765                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1766                 value = vcpu->arch.sie_block->cputm;
1767                 /* if cputm_start is 0, accounting is being started/stopped */
1768                 if (likely(vcpu->arch.cputm_start))
1769                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1770         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1771         preempt_enable();
1772         return value;
1773 }
1774
1775 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1776 {
1777         /* Save host register state */
1778         save_fpu_regs();
1779         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1780         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1781
1782         if (MACHINE_HAS_VX)
1783                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1784         else
1785                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1786         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1787         if (test_fp_ctl(current->thread.fpu.fpc))
1788                 /* User space provided an invalid FPC, let's clear it */
1789                 current->thread.fpu.fpc = 0;
1790
1791         save_access_regs(vcpu->arch.host_acrs);
1792         restore_access_regs(vcpu->run->s.regs.acrs);
1793         gmap_enable(vcpu->arch.enabled_gmap);
1794         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1795         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1796                 __start_cpu_timer_accounting(vcpu);
1797         vcpu->cpu = cpu;
1798 }
1799
1800 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1801 {
1802         vcpu->cpu = -1;
1803         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1804                 __stop_cpu_timer_accounting(vcpu);
1805         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1806         vcpu->arch.enabled_gmap = gmap_get_enabled();
1807         gmap_disable(vcpu->arch.enabled_gmap);
1808
1809         /* Save guest register state */
1810         save_fpu_regs();
1811         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1812
1813         /* Restore host register state */
1814         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1815         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1816
1817         save_access_regs(vcpu->run->s.regs.acrs);
1818         restore_access_regs(vcpu->arch.host_acrs);
1819 }
1820
1821 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1822 {
1823         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1824         vcpu->arch.sie_block->gpsw.mask = 0UL;
1825         vcpu->arch.sie_block->gpsw.addr = 0UL;
1826         kvm_s390_set_prefix(vcpu, 0);
1827         kvm_s390_set_cpu_timer(vcpu, 0);
1828         vcpu->arch.sie_block->ckc       = 0UL;
1829         vcpu->arch.sie_block->todpr     = 0;
1830         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1831         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1832         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1833         /* make sure the new fpc will be lazily loaded */
1834         save_fpu_regs();
1835         current->thread.fpu.fpc = 0;
1836         vcpu->arch.sie_block->gbea = 1;
1837         vcpu->arch.sie_block->pp = 0;
1838         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1839         kvm_clear_async_pf_completion_queue(vcpu);
1840         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1841                 kvm_s390_vcpu_stop(vcpu);
1842         kvm_s390_clear_local_irqs(vcpu);
1843 }
1844
1845 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1846 {
1847         mutex_lock(&vcpu->kvm->lock);
1848         preempt_disable();
1849         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1850         preempt_enable();
1851         mutex_unlock(&vcpu->kvm->lock);
1852         if (!kvm_is_ucontrol(vcpu->kvm)) {
1853                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1854                 sca_add_vcpu(vcpu);
1855         }
1856         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1857                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1858         /* make vcpu_load load the right gmap on the first trigger */
1859         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1860 }
1861
1862 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1863 {
1864         if (!test_kvm_facility(vcpu->kvm, 76))
1865                 return;
1866
1867         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1868
1869         if (vcpu->kvm->arch.crypto.aes_kw)
1870                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1871         if (vcpu->kvm->arch.crypto.dea_kw)
1872                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1873
1874         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1875 }
1876
1877 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1878 {
1879         free_page(vcpu->arch.sie_block->cbrlo);
1880         vcpu->arch.sie_block->cbrlo = 0;
1881 }
1882
1883 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1884 {
1885         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1886         if (!vcpu->arch.sie_block->cbrlo)
1887                 return -ENOMEM;
1888
1889         vcpu->arch.sie_block->ecb2 |= 0x80;
1890         vcpu->arch.sie_block->ecb2 &= ~0x08;
1891         return 0;
1892 }
1893
1894 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1895 {
1896         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1897
1898         vcpu->arch.sie_block->ibc = model->ibc;
1899         if (test_kvm_facility(vcpu->kvm, 7))
1900                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1901 }
1902
1903 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1904 {
1905         int rc = 0;
1906
1907         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1908                                                     CPUSTAT_SM |
1909                                                     CPUSTAT_STOPPED);
1910
1911         if (test_kvm_facility(vcpu->kvm, 78))
1912                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1913         else if (test_kvm_facility(vcpu->kvm, 8))
1914                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1915
1916         kvm_s390_vcpu_setup_model(vcpu);
1917
1918         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1919         if (MACHINE_HAS_ESOP)
1920                 vcpu->arch.sie_block->ecb |= 0x02;
1921         if (test_kvm_facility(vcpu->kvm, 9))
1922                 vcpu->arch.sie_block->ecb |= 0x04;
1923         if (test_kvm_facility(vcpu->kvm, 73))
1924                 vcpu->arch.sie_block->ecb |= 0x10;
1925
1926         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1927                 vcpu->arch.sie_block->ecb2 |= 0x08;
1928         vcpu->arch.sie_block->eca = 0x1002000U;
1929         if (sclp.has_cei)
1930                 vcpu->arch.sie_block->eca |= 0x80000000U;
1931         if (sclp.has_ib)
1932                 vcpu->arch.sie_block->eca |= 0x40000000U;
1933         if (sclp.has_siif)
1934                 vcpu->arch.sie_block->eca |= 1;
1935         if (sclp.has_sigpif)
1936                 vcpu->arch.sie_block->eca |= 0x10000000U;
1937         if (test_kvm_facility(vcpu->kvm, 64))
1938                 vcpu->arch.sie_block->ecb3 |= 0x01;
1939         if (test_kvm_facility(vcpu->kvm, 129)) {
1940                 vcpu->arch.sie_block->eca |= 0x00020000;
1941                 vcpu->arch.sie_block->ecd |= 0x20000000;
1942         }
1943         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1944         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1945
1946         if (vcpu->kvm->arch.use_cmma) {
1947                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1948                 if (rc)
1949                         return rc;
1950         }
1951         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1952         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1953
1954         kvm_s390_vcpu_crypto_setup(vcpu);
1955
1956         return rc;
1957 }
1958
1959 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1960                                       unsigned int id)
1961 {
1962         struct kvm_vcpu *vcpu;
1963         struct sie_page *sie_page;
1964         int rc = -EINVAL;
1965
1966         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1967                 goto out;
1968
1969         rc = -ENOMEM;
1970
1971         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1972         if (!vcpu)
1973                 goto out;
1974
1975         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1976         if (!sie_page)
1977                 goto out_free_cpu;
1978
1979         vcpu->arch.sie_block = &sie_page->sie_block;
1980         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1981
1982         /* the real guest size will always be smaller than msl */
1983         vcpu->arch.sie_block->mso = 0;
1984         vcpu->arch.sie_block->msl = sclp.hamax;
1985
1986         vcpu->arch.sie_block->icpua = id;
1987         spin_lock_init(&vcpu->arch.local_int.lock);
1988         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1989         vcpu->arch.local_int.wq = &vcpu->wq;
1990         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1991         seqcount_init(&vcpu->arch.cputm_seqcount);
1992
1993         rc = kvm_vcpu_init(vcpu, kvm, id);
1994         if (rc)
1995                 goto out_free_sie_block;
1996         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
1997                  vcpu->arch.sie_block);
1998         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1999
2000         return vcpu;
2001 out_free_sie_block:
2002         free_page((unsigned long)(vcpu->arch.sie_block));
2003 out_free_cpu:
2004         kmem_cache_free(kvm_vcpu_cache, vcpu);
2005 out:
2006         return ERR_PTR(rc);
2007 }
2008
2009 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2010 {
2011         return kvm_s390_vcpu_has_irq(vcpu, 0);
2012 }
2013
2014 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2015 {
2016         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2017         exit_sie(vcpu);
2018 }
2019
2020 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2021 {
2022         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2023 }
2024
2025 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2026 {
2027         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2028         exit_sie(vcpu);
2029 }
2030
2031 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2032 {
2033         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2034 }
2035
2036 /*
2037  * Kick a guest cpu out of SIE and wait until SIE is not running.
2038  * If the CPU is not running (e.g. waiting as idle) the function will
2039  * return immediately. */
2040 void exit_sie(struct kvm_vcpu *vcpu)
2041 {
2042         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2043         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2044                 cpu_relax();
2045 }
2046
2047 /* Kick a guest cpu out of SIE to process a request synchronously */
2048 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2049 {
2050         kvm_make_request(req, vcpu);
2051         kvm_s390_vcpu_request(vcpu);
2052 }
2053
2054 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2055                               unsigned long end)
2056 {
2057         struct kvm *kvm = gmap->private;
2058         struct kvm_vcpu *vcpu;
2059         unsigned long prefix;
2060         int i;
2061
2062         if (gmap_is_shadow(gmap))
2063                 return;
2064         if (start >= 1UL << 31)
2065                 /* We are only interested in prefix pages */
2066                 return;
2067         kvm_for_each_vcpu(i, vcpu, kvm) {
2068                 /* match against both prefix pages */
2069                 prefix = kvm_s390_get_prefix(vcpu);
2070                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2071                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2072                                    start, end);
2073                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2074                 }
2075         }
2076 }
2077
2078 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2079 {
2080         /* kvm common code refers to this, but never calls it */
2081         BUG();
2082         return 0;
2083 }
2084
2085 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2086                                            struct kvm_one_reg *reg)
2087 {
2088         int r = -EINVAL;
2089
2090         switch (reg->id) {
2091         case KVM_REG_S390_TODPR:
2092                 r = put_user(vcpu->arch.sie_block->todpr,
2093                              (u32 __user *)reg->addr);
2094                 break;
2095         case KVM_REG_S390_EPOCHDIFF:
2096                 r = put_user(vcpu->arch.sie_block->epoch,
2097                              (u64 __user *)reg->addr);
2098                 break;
2099         case KVM_REG_S390_CPU_TIMER:
2100                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2101                              (u64 __user *)reg->addr);
2102                 break;
2103         case KVM_REG_S390_CLOCK_COMP:
2104                 r = put_user(vcpu->arch.sie_block->ckc,
2105                              (u64 __user *)reg->addr);
2106                 break;
2107         case KVM_REG_S390_PFTOKEN:
2108                 r = put_user(vcpu->arch.pfault_token,
2109                              (u64 __user *)reg->addr);
2110                 break;
2111         case KVM_REG_S390_PFCOMPARE:
2112                 r = put_user(vcpu->arch.pfault_compare,
2113                              (u64 __user *)reg->addr);
2114                 break;
2115         case KVM_REG_S390_PFSELECT:
2116                 r = put_user(vcpu->arch.pfault_select,
2117                              (u64 __user *)reg->addr);
2118                 break;
2119         case KVM_REG_S390_PP:
2120                 r = put_user(vcpu->arch.sie_block->pp,
2121                              (u64 __user *)reg->addr);
2122                 break;
2123         case KVM_REG_S390_GBEA:
2124                 r = put_user(vcpu->arch.sie_block->gbea,
2125                              (u64 __user *)reg->addr);
2126                 break;
2127         default:
2128                 break;
2129         }
2130
2131         return r;
2132 }
2133
2134 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2135                                            struct kvm_one_reg *reg)
2136 {
2137         int r = -EINVAL;
2138         __u64 val;
2139
2140         switch (reg->id) {
2141         case KVM_REG_S390_TODPR:
2142                 r = get_user(vcpu->arch.sie_block->todpr,
2143                              (u32 __user *)reg->addr);
2144                 break;
2145         case KVM_REG_S390_EPOCHDIFF:
2146                 r = get_user(vcpu->arch.sie_block->epoch,
2147                              (u64 __user *)reg->addr);
2148                 break;
2149         case KVM_REG_S390_CPU_TIMER:
2150                 r = get_user(val, (u64 __user *)reg->addr);
2151                 if (!r)
2152                         kvm_s390_set_cpu_timer(vcpu, val);
2153                 break;
2154         case KVM_REG_S390_CLOCK_COMP:
2155                 r = get_user(vcpu->arch.sie_block->ckc,
2156                              (u64 __user *)reg->addr);
2157                 break;
2158         case KVM_REG_S390_PFTOKEN:
2159                 r = get_user(vcpu->arch.pfault_token,
2160                              (u64 __user *)reg->addr);
2161                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2162                         kvm_clear_async_pf_completion_queue(vcpu);
2163                 break;
2164         case KVM_REG_S390_PFCOMPARE:
2165                 r = get_user(vcpu->arch.pfault_compare,
2166                              (u64 __user *)reg->addr);
2167                 break;
2168         case KVM_REG_S390_PFSELECT:
2169                 r = get_user(vcpu->arch.pfault_select,
2170                              (u64 __user *)reg->addr);
2171                 break;
2172         case KVM_REG_S390_PP:
2173                 r = get_user(vcpu->arch.sie_block->pp,
2174                              (u64 __user *)reg->addr);
2175                 break;
2176         case KVM_REG_S390_GBEA:
2177                 r = get_user(vcpu->arch.sie_block->gbea,
2178                              (u64 __user *)reg->addr);
2179                 break;
2180         default:
2181                 break;
2182         }
2183
2184         return r;
2185 }
2186
2187 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2188 {
2189         kvm_s390_vcpu_initial_reset(vcpu);
2190         return 0;
2191 }
2192
2193 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2194 {
2195         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2196         return 0;
2197 }
2198
2199 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2200 {
2201         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2202         return 0;
2203 }
2204
2205 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2206                                   struct kvm_sregs *sregs)
2207 {
2208         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2209         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2210         restore_access_regs(vcpu->run->s.regs.acrs);
2211         return 0;
2212 }
2213
2214 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2215                                   struct kvm_sregs *sregs)
2216 {
2217         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2218         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2219         return 0;
2220 }
2221
2222 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2223 {
2224         /* make sure the new values will be lazily loaded */
2225         save_fpu_regs();
2226         if (test_fp_ctl(fpu->fpc))
2227                 return -EINVAL;
2228         current->thread.fpu.fpc = fpu->fpc;
2229         if (MACHINE_HAS_VX)
2230                 convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
2231         else
2232                 memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
2233         return 0;
2234 }
2235
2236 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2237 {
2238         /* make sure we have the latest values */
2239         save_fpu_regs();
2240         if (MACHINE_HAS_VX)
2241                 convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
2242         else
2243                 memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
2244         fpu->fpc = current->thread.fpu.fpc;
2245         return 0;
2246 }
2247
2248 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2249 {
2250         int rc = 0;
2251
2252         if (!is_vcpu_stopped(vcpu))
2253                 rc = -EBUSY;
2254         else {
2255                 vcpu->run->psw_mask = psw.mask;
2256                 vcpu->run->psw_addr = psw.addr;
2257         }
2258         return rc;
2259 }
2260
2261 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2262                                   struct kvm_translation *tr)
2263 {
2264         return -EINVAL; /* not implemented yet */
2265 }
2266
2267 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2268                               KVM_GUESTDBG_USE_HW_BP | \
2269                               KVM_GUESTDBG_ENABLE)
2270
2271 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2272                                         struct kvm_guest_debug *dbg)
2273 {
2274         int rc = 0;
2275
2276         vcpu->guest_debug = 0;
2277         kvm_s390_clear_bp_data(vcpu);
2278
2279         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2280                 return -EINVAL;
2281         if (!sclp.has_gpere)
2282                 return -EINVAL;
2283
2284         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2285                 vcpu->guest_debug = dbg->control;
2286                 /* enforce guest PER */
2287                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2288
2289                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2290                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2291         } else {
2292                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2293                 vcpu->arch.guestdbg.last_bp = 0;
2294         }
2295
2296         if (rc) {
2297                 vcpu->guest_debug = 0;
2298                 kvm_s390_clear_bp_data(vcpu);
2299                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2300         }
2301
2302         return rc;
2303 }
2304
2305 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2306                                     struct kvm_mp_state *mp_state)
2307 {
2308         /* CHECK_STOP and LOAD are not supported yet */
2309         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2310                                        KVM_MP_STATE_OPERATING;
2311 }
2312
2313 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2314                                     struct kvm_mp_state *mp_state)
2315 {
2316         int rc = 0;
2317
2318         /* user space knows about this interface - let it control the state */
2319         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2320
2321         switch (mp_state->mp_state) {
2322         case KVM_MP_STATE_STOPPED:
2323                 kvm_s390_vcpu_stop(vcpu);
2324                 break;
2325         case KVM_MP_STATE_OPERATING:
2326                 kvm_s390_vcpu_start(vcpu);
2327                 break;
2328         case KVM_MP_STATE_LOAD:
2329         case KVM_MP_STATE_CHECK_STOP:
2330                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2331         default:
2332                 rc = -ENXIO;
2333         }
2334
2335         return rc;
2336 }
2337
2338 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2339 {
2340         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2341 }
2342
2343 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2344 {
2345 retry:
2346         kvm_s390_vcpu_request_handled(vcpu);
2347         if (!vcpu->requests)
2348                 return 0;
2349         /*
2350          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2351          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2352          * This ensures that the ipte instruction for this request has
2353          * already finished. We might race against a second unmapper that
2354          * wants to set the blocking bit. Lets just retry the request loop.
2355          */
2356         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2357                 int rc;
2358                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2359                                           kvm_s390_get_prefix(vcpu),
2360                                           PAGE_SIZE * 2, PROT_WRITE);
2361                 if (rc)
2362                         return rc;
2363                 goto retry;
2364         }
2365
2366         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2367                 vcpu->arch.sie_block->ihcpu = 0xffff;
2368                 goto retry;
2369         }
2370
2371         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2372                 if (!ibs_enabled(vcpu)) {
2373                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2374                         atomic_or(CPUSTAT_IBS,
2375                                         &vcpu->arch.sie_block->cpuflags);
2376                 }
2377                 goto retry;
2378         }
2379
2380         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2381                 if (ibs_enabled(vcpu)) {
2382                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2383                         atomic_andnot(CPUSTAT_IBS,
2384                                           &vcpu->arch.sie_block->cpuflags);
2385                 }
2386                 goto retry;
2387         }
2388
2389         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2390                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2391                 goto retry;
2392         }
2393
2394         /* nothing to do, just clear the request */
2395         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2396
2397         return 0;
2398 }
2399
2400 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2401 {
2402         struct kvm_vcpu *vcpu;
2403         int i;
2404
2405         mutex_lock(&kvm->lock);
2406         preempt_disable();
2407         kvm->arch.epoch = tod - get_tod_clock();
2408         kvm_s390_vcpu_block_all(kvm);
2409         kvm_for_each_vcpu(i, vcpu, kvm)
2410                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2411         kvm_s390_vcpu_unblock_all(kvm);
2412         preempt_enable();
2413         mutex_unlock(&kvm->lock);
2414 }
2415
2416 /**
2417  * kvm_arch_fault_in_page - fault-in guest page if necessary
2418  * @vcpu: The corresponding virtual cpu
2419  * @gpa: Guest physical address
2420  * @writable: Whether the page should be writable or not
2421  *
2422  * Make sure that a guest page has been faulted-in on the host.
2423  *
2424  * Return: Zero on success, negative error code otherwise.
2425  */
2426 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2427 {
2428         return gmap_fault(vcpu->arch.gmap, gpa,
2429                           writable ? FAULT_FLAG_WRITE : 0);
2430 }
2431
2432 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2433                                       unsigned long token)
2434 {
2435         struct kvm_s390_interrupt inti;
2436         struct kvm_s390_irq irq;
2437
2438         if (start_token) {
2439                 irq.u.ext.ext_params2 = token;
2440                 irq.type = KVM_S390_INT_PFAULT_INIT;
2441                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2442         } else {
2443                 inti.type = KVM_S390_INT_PFAULT_DONE;
2444                 inti.parm64 = token;
2445                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2446         }
2447 }
2448
2449 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2450                                      struct kvm_async_pf *work)
2451 {
2452         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2453         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2454 }
2455
2456 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2457                                  struct kvm_async_pf *work)
2458 {
2459         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2460         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2461 }
2462
2463 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2464                                struct kvm_async_pf *work)
2465 {
2466         /* s390 will always inject the page directly */
2467 }
2468
2469 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2470 {
2471         /*
2472          * s390 will always inject the page directly,
2473          * but we still want check_async_completion to cleanup
2474          */
2475         return true;
2476 }
2477
2478 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2479 {
2480         hva_t hva;
2481         struct kvm_arch_async_pf arch;
2482         int rc;
2483
2484         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2485                 return 0;
2486         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2487             vcpu->arch.pfault_compare)
2488                 return 0;
2489         if (psw_extint_disabled(vcpu))
2490                 return 0;
2491         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2492                 return 0;
2493         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2494                 return 0;
2495         if (!vcpu->arch.gmap->pfault_enabled)
2496                 return 0;
2497
2498         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2499         hva += current->thread.gmap_addr & ~PAGE_MASK;
2500         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2501                 return 0;
2502
2503         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2504         return rc;
2505 }
2506
2507 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2508 {
2509         int rc, cpuflags;
2510
2511         /*
2512          * On s390 notifications for arriving pages will be delivered directly
2513          * to the guest but the house keeping for completed pfaults is
2514          * handled outside the worker.
2515          */
2516         kvm_check_async_pf_completion(vcpu);
2517
2518         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2519         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2520
2521         if (need_resched())
2522                 schedule();
2523
2524         if (test_cpu_flag(CIF_MCCK_PENDING))
2525                 s390_handle_mcck();
2526
2527         if (!kvm_is_ucontrol(vcpu->kvm)) {
2528                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2529                 if (rc)
2530                         return rc;
2531         }
2532
2533         rc = kvm_s390_handle_requests(vcpu);
2534         if (rc)
2535                 return rc;
2536
2537         if (guestdbg_enabled(vcpu)) {
2538                 kvm_s390_backup_guest_per_regs(vcpu);
2539                 kvm_s390_patch_guest_per_regs(vcpu);
2540         }
2541
2542         vcpu->arch.sie_block->icptcode = 0;
2543         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2544         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2545         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2546
2547         return 0;
2548 }
2549
2550 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2551 {
2552         struct kvm_s390_pgm_info pgm_info = {
2553                 .code = PGM_ADDRESSING,
2554         };
2555         u8 opcode, ilen;
2556         int rc;
2557
2558         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2559         trace_kvm_s390_sie_fault(vcpu);
2560
2561         /*
2562          * We want to inject an addressing exception, which is defined as a
2563          * suppressing or terminating exception. However, since we came here
2564          * by a DAT access exception, the PSW still points to the faulting
2565          * instruction since DAT exceptions are nullifying. So we've got
2566          * to look up the current opcode to get the length of the instruction
2567          * to be able to forward the PSW.
2568          */
2569         rc = read_guest_instr(vcpu, &opcode, 1);
2570         ilen = insn_length(opcode);
2571         if (rc < 0) {
2572                 return rc;
2573         } else if (rc) {
2574                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2575                  * Forward by arbitrary ilc, injection will take care of
2576                  * nullification if necessary.
2577                  */
2578                 pgm_info = vcpu->arch.pgm;
2579                 ilen = 4;
2580         }
2581         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2582         kvm_s390_forward_psw(vcpu, ilen);
2583         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2584 }
2585
2586 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2587 {
2588         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2589                    vcpu->arch.sie_block->icptcode);
2590         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2591
2592         if (guestdbg_enabled(vcpu))
2593                 kvm_s390_restore_guest_per_regs(vcpu);
2594
2595         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2596         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2597
2598         if (vcpu->arch.sie_block->icptcode > 0) {
2599                 int rc = kvm_handle_sie_intercept(vcpu);
2600
2601                 if (rc != -EOPNOTSUPP)
2602                         return rc;
2603                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2604                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2605                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2606                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2607                 return -EREMOTE;
2608         } else if (exit_reason != -EFAULT) {
2609                 vcpu->stat.exit_null++;
2610                 return 0;
2611         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2612                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2613                 vcpu->run->s390_ucontrol.trans_exc_code =
2614                                                 current->thread.gmap_addr;
2615                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2616                 return -EREMOTE;
2617         } else if (current->thread.gmap_pfault) {
2618                 trace_kvm_s390_major_guest_pfault(vcpu);
2619                 current->thread.gmap_pfault = 0;
2620                 if (kvm_arch_setup_async_pf(vcpu))
2621                         return 0;
2622                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2623         }
2624         return vcpu_post_run_fault_in_sie(vcpu);
2625 }
2626
2627 static int __vcpu_run(struct kvm_vcpu *vcpu)
2628 {
2629         int rc, exit_reason;
2630
2631         /*
2632          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2633          * ning the guest), so that memslots (and other stuff) are protected
2634          */
2635         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2636
2637         do {
2638                 rc = vcpu_pre_run(vcpu);
2639                 if (rc)
2640                         break;
2641
2642                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2643                 /*
2644                  * As PF_VCPU will be used in fault handler, between
2645                  * guest_enter and guest_exit should be no uaccess.
2646                  */
2647                 local_irq_disable();
2648                 guest_enter_irqoff();
2649                 __disable_cpu_timer_accounting(vcpu);
2650                 local_irq_enable();
2651                 exit_reason = sie64a(vcpu->arch.sie_block,
2652                                      vcpu->run->s.regs.gprs);
2653                 local_irq_disable();
2654                 __enable_cpu_timer_accounting(vcpu);
2655                 guest_exit_irqoff();
2656                 local_irq_enable();
2657                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2658
2659                 rc = vcpu_post_run(vcpu, exit_reason);
2660         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2661
2662         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2663         return rc;
2664 }
2665
2666 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2667 {
2668         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2669         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2670         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2671                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2672         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2673                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2674                 /* some control register changes require a tlb flush */
2675                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2676         }
2677         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2678                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2679                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2680                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2681                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2682                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2683         }
2684         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2685                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2686                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2687                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2688                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2689                         kvm_clear_async_pf_completion_queue(vcpu);
2690         }
2691         kvm_run->kvm_dirty_regs = 0;
2692 }
2693
2694 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2695 {
2696         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2697         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2698         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2699         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2700         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2701         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2702         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2703         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2704         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2705         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2706         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2707         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2708 }
2709
2710 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2711 {
2712         int rc;
2713         sigset_t sigsaved;
2714
2715         if (guestdbg_exit_pending(vcpu)) {
2716                 kvm_s390_prepare_debug_exit(vcpu);
2717                 return 0;
2718         }
2719
2720         if (vcpu->sigset_active)
2721                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2722
2723         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2724                 kvm_s390_vcpu_start(vcpu);
2725         } else if (is_vcpu_stopped(vcpu)) {
2726                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2727                                    vcpu->vcpu_id);
2728                 return -EINVAL;
2729         }
2730
2731         sync_regs(vcpu, kvm_run);
2732         enable_cpu_timer_accounting(vcpu);
2733
2734         might_fault();
2735         rc = __vcpu_run(vcpu);
2736
2737         if (signal_pending(current) && !rc) {
2738                 kvm_run->exit_reason = KVM_EXIT_INTR;
2739                 rc = -EINTR;
2740         }
2741
2742         if (guestdbg_exit_pending(vcpu) && !rc)  {
2743                 kvm_s390_prepare_debug_exit(vcpu);
2744                 rc = 0;
2745         }
2746
2747         if (rc == -EREMOTE) {
2748                 /* userspace support is needed, kvm_run has been prepared */
2749                 rc = 0;
2750         }
2751
2752         disable_cpu_timer_accounting(vcpu);
2753         store_regs(vcpu, kvm_run);
2754
2755         if (vcpu->sigset_active)
2756                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2757
2758         vcpu->stat.exit_userspace++;
2759         return rc;
2760 }
2761
2762 /*
2763  * store status at address
2764  * we use have two special cases:
2765  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2766  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2767  */
2768 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2769 {
2770         unsigned char archmode = 1;
2771         freg_t fprs[NUM_FPRS];
2772         unsigned int px;
2773         u64 clkcomp, cputm;
2774         int rc;
2775
2776         px = kvm_s390_get_prefix(vcpu);
2777         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2778                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2779                         return -EFAULT;
2780                 gpa = 0;
2781         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2782                 if (write_guest_real(vcpu, 163, &archmode, 1))
2783                         return -EFAULT;
2784                 gpa = px;
2785         } else
2786                 gpa -= __LC_FPREGS_SAVE_AREA;
2787
2788         /* manually convert vector registers if necessary */
2789         if (MACHINE_HAS_VX) {
2790                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2791                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2792                                      fprs, 128);
2793         } else {
2794                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2795                                      vcpu->run->s.regs.fprs, 128);
2796         }
2797         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2798                               vcpu->run->s.regs.gprs, 128);
2799         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2800                               &vcpu->arch.sie_block->gpsw, 16);
2801         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2802                               &px, 4);
2803         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2804                               &vcpu->run->s.regs.fpc, 4);
2805         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2806                               &vcpu->arch.sie_block->todpr, 4);
2807         cputm = kvm_s390_get_cpu_timer(vcpu);
2808         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2809                               &cputm, 8);
2810         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2811         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2812                               &clkcomp, 8);
2813         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2814                               &vcpu->run->s.regs.acrs, 64);
2815         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2816                               &vcpu->arch.sie_block->gcr, 128);
2817         return rc ? -EFAULT : 0;
2818 }
2819
2820 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2821 {
2822         /*
2823          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2824          * copying in vcpu load/put. Lets update our copies before we save
2825          * it into the save area
2826          */
2827         save_fpu_regs();
2828         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2829         save_access_regs(vcpu->run->s.regs.acrs);
2830
2831         return kvm_s390_store_status_unloaded(vcpu, addr);
2832 }
2833
2834 /*
2835  * store additional status at address
2836  */
2837 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2838                                         unsigned long gpa)
2839 {
2840         /* Only bits 0-53 are used for address formation */
2841         if (!(gpa & ~0x3ff))
2842                 return 0;
2843
2844         return write_guest_abs(vcpu, gpa & ~0x3ff,
2845                                (void *)&vcpu->run->s.regs.vrs, 512);
2846 }
2847
2848 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2849 {
2850         if (!test_kvm_facility(vcpu->kvm, 129))
2851                 return 0;
2852
2853         /*
2854          * The guest VXRS are in the host VXRs due to the lazy
2855          * copying in vcpu load/put. We can simply call save_fpu_regs()
2856          * to save the current register state because we are in the
2857          * middle of a load/put cycle.
2858          *
2859          * Let's update our copies before we save it into the save area.
2860          */
2861         save_fpu_regs();
2862
2863         return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2864 }
2865
2866 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2867 {
2868         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2869         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2870 }
2871
2872 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2873 {
2874         unsigned int i;
2875         struct kvm_vcpu *vcpu;
2876
2877         kvm_for_each_vcpu(i, vcpu, kvm) {
2878                 __disable_ibs_on_vcpu(vcpu);
2879         }
2880 }
2881
2882 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2883 {
2884         if (!sclp.has_ibs)
2885                 return;
2886         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2887         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2888 }
2889
2890 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2891 {
2892         int i, online_vcpus, started_vcpus = 0;
2893
2894         if (!is_vcpu_stopped(vcpu))
2895                 return;
2896
2897         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2898         /* Only one cpu at a time may enter/leave the STOPPED state. */
2899         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2900         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2901
2902         for (i = 0; i < online_vcpus; i++) {
2903                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2904                         started_vcpus++;
2905         }
2906
2907         if (started_vcpus == 0) {
2908                 /* we're the only active VCPU -> speed it up */
2909                 __enable_ibs_on_vcpu(vcpu);
2910         } else if (started_vcpus == 1) {
2911                 /*
2912                  * As we are starting a second VCPU, we have to disable
2913                  * the IBS facility on all VCPUs to remove potentially
2914                  * oustanding ENABLE requests.
2915                  */
2916                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2917         }
2918
2919         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2920         /*
2921          * Another VCPU might have used IBS while we were offline.
2922          * Let's play safe and flush the VCPU at startup.
2923          */
2924         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2925         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2926         return;
2927 }
2928
2929 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2930 {
2931         int i, online_vcpus, started_vcpus = 0;
2932         struct kvm_vcpu *started_vcpu = NULL;
2933
2934         if (is_vcpu_stopped(vcpu))
2935                 return;
2936
2937         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2938         /* Only one cpu at a time may enter/leave the STOPPED state. */
2939         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2940         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2941
2942         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2943         kvm_s390_clear_stop_irq(vcpu);
2944
2945         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2946         __disable_ibs_on_vcpu(vcpu);
2947
2948         for (i = 0; i < online_vcpus; i++) {
2949                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2950                         started_vcpus++;
2951                         started_vcpu = vcpu->kvm->vcpus[i];
2952                 }
2953         }
2954
2955         if (started_vcpus == 1) {
2956                 /*
2957                  * As we only have one VCPU left, we want to enable the
2958                  * IBS facility for that VCPU to speed it up.
2959                  */
2960                 __enable_ibs_on_vcpu(started_vcpu);
2961         }
2962
2963         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2964         return;
2965 }
2966
2967 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2968                                      struct kvm_enable_cap *cap)
2969 {
2970         int r;
2971
2972         if (cap->flags)
2973                 return -EINVAL;
2974
2975         switch (cap->cap) {
2976         case KVM_CAP_S390_CSS_SUPPORT:
2977                 if (!vcpu->kvm->arch.css_support) {
2978                         vcpu->kvm->arch.css_support = 1;
2979                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2980                         trace_kvm_s390_enable_css(vcpu->kvm);
2981                 }
2982                 r = 0;
2983                 break;
2984         default:
2985                 r = -EINVAL;
2986                 break;
2987         }
2988         return r;
2989 }
2990
2991 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2992                                   struct kvm_s390_mem_op *mop)
2993 {
2994         void __user *uaddr = (void __user *)mop->buf;
2995         void *tmpbuf = NULL;
2996         int r, srcu_idx;
2997         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2998                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
2999
3000         if (mop->flags & ~supported_flags)
3001                 return -EINVAL;
3002
3003         if (mop->size > MEM_OP_MAX_SIZE)
3004                 return -E2BIG;
3005
3006         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3007                 tmpbuf = vmalloc(mop->size);
3008                 if (!tmpbuf)
3009                         return -ENOMEM;
3010         }
3011
3012         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3013
3014         switch (mop->op) {
3015         case KVM_S390_MEMOP_LOGICAL_READ:
3016                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3017                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3018                                             mop->size, GACC_FETCH);
3019                         break;
3020                 }
3021                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3022                 if (r == 0) {
3023                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3024                                 r = -EFAULT;
3025                 }
3026                 break;
3027         case KVM_S390_MEMOP_LOGICAL_WRITE:
3028                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3029                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3030                                             mop->size, GACC_STORE);
3031                         break;
3032                 }
3033                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3034                         r = -EFAULT;
3035                         break;
3036                 }
3037                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3038                 break;
3039         default:
3040                 r = -EINVAL;
3041         }
3042
3043         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3044
3045         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3046                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3047
3048         vfree(tmpbuf);
3049         return r;
3050 }
3051
3052 long kvm_arch_vcpu_ioctl(struct file *filp,
3053                          unsigned int ioctl, unsigned long arg)
3054 {
3055         struct kvm_vcpu *vcpu = filp->private_data;
3056         void __user *argp = (void __user *)arg;
3057         int idx;
3058         long r;
3059
3060         switch (ioctl) {
3061         case KVM_S390_IRQ: {
3062                 struct kvm_s390_irq s390irq;
3063
3064                 r = -EFAULT;
3065                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3066                         break;
3067                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3068                 break;
3069         }
3070         case KVM_S390_INTERRUPT: {
3071                 struct kvm_s390_interrupt s390int;
3072                 struct kvm_s390_irq s390irq;
3073
3074                 r = -EFAULT;
3075                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3076                         break;
3077                 if (s390int_to_s390irq(&s390int, &s390irq))
3078                         return -EINVAL;
3079                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3080                 break;
3081         }
3082         case KVM_S390_STORE_STATUS:
3083                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3084                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3085                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3086                 break;
3087         case KVM_S390_SET_INITIAL_PSW: {
3088                 psw_t psw;
3089
3090                 r = -EFAULT;
3091                 if (copy_from_user(&psw, argp, sizeof(psw)))
3092                         break;
3093                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3094                 break;
3095         }
3096         case KVM_S390_INITIAL_RESET:
3097                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3098                 break;
3099         case KVM_SET_ONE_REG:
3100         case KVM_GET_ONE_REG: {
3101                 struct kvm_one_reg reg;
3102                 r = -EFAULT;
3103                 if (copy_from_user(&reg, argp, sizeof(reg)))
3104                         break;
3105                 if (ioctl == KVM_SET_ONE_REG)
3106                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3107                 else
3108                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3109                 break;
3110         }
3111 #ifdef CONFIG_KVM_S390_UCONTROL
3112         case KVM_S390_UCAS_MAP: {
3113                 struct kvm_s390_ucas_mapping ucasmap;
3114
3115                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3116                         r = -EFAULT;
3117                         break;
3118                 }
3119
3120                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3121                         r = -EINVAL;
3122                         break;
3123                 }
3124
3125                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3126                                      ucasmap.vcpu_addr, ucasmap.length);
3127                 break;
3128         }
3129         case KVM_S390_UCAS_UNMAP: {
3130                 struct kvm_s390_ucas_mapping ucasmap;
3131
3132                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3133                         r = -EFAULT;
3134                         break;
3135                 }
3136
3137                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3138                         r = -EINVAL;
3139                         break;
3140                 }
3141
3142                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3143                         ucasmap.length);
3144                 break;
3145         }
3146 #endif
3147         case KVM_S390_VCPU_FAULT: {
3148                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3149                 break;
3150         }
3151         case KVM_ENABLE_CAP:
3152         {
3153                 struct kvm_enable_cap cap;
3154                 r = -EFAULT;
3155                 if (copy_from_user(&cap, argp, sizeof(cap)))
3156                         break;
3157                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3158                 break;
3159         }
3160         case KVM_S390_MEM_OP: {
3161                 struct kvm_s390_mem_op mem_op;
3162
3163                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3164                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3165                 else
3166                         r = -EFAULT;
3167                 break;
3168         }
3169         case KVM_S390_SET_IRQ_STATE: {
3170                 struct kvm_s390_irq_state irq_state;
3171
3172                 r = -EFAULT;
3173                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3174                         break;
3175                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3176                     irq_state.len == 0 ||
3177                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3178                         r = -EINVAL;
3179                         break;
3180                 }
3181                 r = kvm_s390_set_irq_state(vcpu,
3182                                            (void __user *) irq_state.buf,
3183                                            irq_state.len);
3184                 break;
3185         }
3186         case KVM_S390_GET_IRQ_STATE: {
3187                 struct kvm_s390_irq_state irq_state;
3188
3189                 r = -EFAULT;
3190                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3191                         break;
3192                 if (irq_state.len == 0) {
3193                         r = -EINVAL;
3194                         break;
3195                 }
3196                 r = kvm_s390_get_irq_state(vcpu,
3197                                            (__u8 __user *)  irq_state.buf,
3198                                            irq_state.len);
3199                 break;
3200         }
3201         default:
3202                 r = -ENOTTY;
3203         }
3204         return r;
3205 }
3206
3207 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3208 {
3209 #ifdef CONFIG_KVM_S390_UCONTROL
3210         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3211                  && (kvm_is_ucontrol(vcpu->kvm))) {
3212                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3213                 get_page(vmf->page);
3214                 return 0;
3215         }
3216 #endif
3217         return VM_FAULT_SIGBUS;
3218 }
3219
3220 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3221                             unsigned long npages)
3222 {
3223         return 0;
3224 }
3225
3226 /* Section: memory related */
3227 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3228                                    struct kvm_memory_slot *memslot,
3229                                    const struct kvm_userspace_memory_region *mem,
3230                                    enum kvm_mr_change change)
3231 {
3232         /* A few sanity checks. We can have memory slots which have to be
3233            located/ended at a segment boundary (1MB). The memory in userland is
3234            ok to be fragmented into various different vmas. It is okay to mmap()
3235            and munmap() stuff in this slot after doing this call at any time */
3236
3237         if (mem->userspace_addr & 0xffffful)
3238                 return -EINVAL;
3239
3240         if (mem->memory_size & 0xffffful)
3241                 return -EINVAL;
3242
3243         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3244                 return -EINVAL;
3245
3246         return 0;
3247 }
3248
3249 void kvm_arch_commit_memory_region(struct kvm *kvm,
3250                                 const struct kvm_userspace_memory_region *mem,
3251                                 const struct kvm_memory_slot *old,
3252                                 const struct kvm_memory_slot *new,
3253                                 enum kvm_mr_change change)
3254 {
3255         int rc;
3256
3257         /* If the basics of the memslot do not change, we do not want
3258          * to update the gmap. Every update causes several unnecessary
3259          * segment translation exceptions. This is usually handled just
3260          * fine by the normal fault handler + gmap, but it will also
3261          * cause faults on the prefix page of running guest CPUs.
3262          */
3263         if (old->userspace_addr == mem->userspace_addr &&
3264             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3265             old->npages * PAGE_SIZE == mem->memory_size)
3266                 return;
3267
3268         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3269                 mem->guest_phys_addr, mem->memory_size);
3270         if (rc)
3271                 pr_warn("failed to commit memory region\n");
3272         return;
3273 }
3274
3275 static inline unsigned long nonhyp_mask(int i)
3276 {
3277         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3278
3279         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3280 }
3281
3282 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3283 {
3284         vcpu->valid_wakeup = false;
3285 }
3286
3287 static int __init kvm_s390_init(void)
3288 {
3289         int i;
3290
3291         if (!sclp.has_sief2) {
3292                 pr_info("SIE not available\n");
3293                 return -ENODEV;
3294         }
3295
3296         for (i = 0; i < 16; i++)
3297                 kvm_s390_fac_list_mask[i] |=
3298                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3299
3300         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3301 }
3302
3303 static void __exit kvm_s390_exit(void)
3304 {
3305         kvm_exit();
3306 }
3307
3308 module_init(kvm_s390_init);
3309 module_exit(kvm_s390_exit);
3310
3311 /*
3312  * Enable autoloading of the kvm module.
3313  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3314  * since x86 takes a different approach.
3315  */
3316 #include <linux/miscdevice.h>
3317 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3318 MODULE_ALIAS("devname:kvm");