driver will print ACPI tables for AMD IOMMU during
IOMMU initialization.
+ amd_iommu_intr= [HW,X86-64]
+ Specifies one of the following AMD IOMMU interrupt
+ remapping modes:
+ legacy - Use legacy interrupt remapping mode.
+ vapic - Use virtual APIC mode, which allows IOMMU
+ to inject interrupts directly into guest.
+ This mode requires kvm-amd.avic=1.
+ (Default when IOMMU HW support is present.)
+
amijoy.map= [HW,JOY] Amiga joystick support
Map of devices attached to JOY0DAT and JOY1DAT
Format: <a>,<b>
loops can be debugged more effectively on production
systems.
+ clocksource.arm_arch_timer.fsl-a008585=
+ [ARM64]
+ Format: <bool>
+ Enable/disable the workaround of Freescale/NXP
+ erratum A-008585. This can be useful for KVM
+ guests, if the guest device tree doesn't show the
+ erratum. If unspecified, the workaround is
+ enabled based on the device tree.
+
clearcpuid=BITNUM [X86]
Disable CPUID feature X for the kernel. See
arch/x86/include/asm/cpufeatures.h for the valid bit
determined by the stdout-path property in device
tree's chosen node.
- cdns,<addr>
- Start an early, polled-mode console on a cadence serial
- port at the specified address. The cadence serial port
- must already be setup and configured. Options are not
- yet supported.
+ cdns,<addr>[,options]
+ Start an early, polled-mode console on a Cadence
+ (xuartps) serial port at the specified address. Only
+ supported option is baud rate. If baud rate is not
+ specified, the serial port must already be setup and
+ configured.
uart[8250],io,<addr>[,options]
uart[8250],mmio,<addr>[,options]
Format: <unsigned int> such that (rxsize & ~0x1fffc0) == 0.
Default: 1024
+ gpio-mockup.gpio_mockup_ranges
+ [HW] Sets the ranges of gpiochip of for this device.
+ Format: <start1>,<end1>,<start2>,<end2>...
+
hardlockup_all_cpu_backtrace=
[KNL] Should the hard-lockup detector generate
backtraces on all cpus.
initrd= [BOOT] Specify the location of the initial ramdisk
+ init_pkru= [x86] Specify the default memory protection keys rights
+ register contents for all processes. 0x55555554 by
+ default (disallow access to all but pkey 0). Can
+ override in debugfs after boot.
+
inport.irq= [HW] Inport (ATI XL and Microsoft) busmouse driver
Format: <irq>
intel_idle.max_cstate= [KNL,HW,ACPI,X86]
0 disables intel_idle and fall back on acpi_idle.
- 1 to 6 specify maximum depth of C-state.
+ 1 to 9 specify maximum depth of C-state.
intel_pstate= [X86]
disable
than or equal to this physical address is ignored.
maxcpus= [SMP] Maximum number of processors that an SMP kernel
- should make use of. maxcpus=n : n >= 0 limits the
- kernel to using 'n' processors. n=0 is a special case,
- it is equivalent to "nosmp", which also disables
- the IO APIC.
+ will bring up during bootup. maxcpus=n : n >= 0 limits
+ the kernel to bring up 'n' processors. Surely after
+ bootup you can bring up the other plugged cpu by executing
+ "echo 1 > /sys/devices/system/cpu/cpuX/online". So maxcpus
+ only takes effect during system bootup.
+ While n=0 is a special case, it is equivalent to "nosmp",
+ which also disables the IO APIC.
max_loop= [LOOP] The number of loop block devices that get
(loop.max_loop) unconditionally pre-created at init time. The default
nodelayacct [KNL] Disable per-task delay accounting
- nodisconnect [HW,SCSI,M68K] Disables SCSI disconnects.
-
nodsp [SH] Disable hardware DSP at boot time.
noefi Disable EFI runtime services support.
nr_cpus= [SMP] Maximum number of processors that an SMP kernel
could support. nr_cpus=n : n >= 1 limits the kernel to
- supporting 'n' processors. Later in runtime you can not
- use hotplug cpu feature to put more cpu back to online.
- just like you compile the kernel NR_CPUS=n
+ support 'n' processors. It could be larger than the
+ number of already plugged CPU during bootup, later in
+ runtime you can physically add extra cpu until it reaches
+ n. So during boot up some boot time memory for per-cpu
+ variables need be pre-allocated for later physical cpu
+ hot plugging.
nr_uarts= [SERIAL] maximum number of UARTs to be registered.
u = IGNORE_UAS (don't bind to the uas driver);
w = NO_WP_DETECT (don't test whether the
medium is write-protected).
+ y = ALWAYS_SYNC (issue a SYNCHRONIZE_CACHE
+ even if the device claims no cache)
Example: quirks=0419:aaf5:rl,0421:0433:rc
user_debug= [KNL,ARM]
#include <asm/debugreg.h>
#include <asm/switch_to.h>
#include <asm/xen/hypervisor.h>
-
-asmlinkage extern void ret_from_fork(void);
+#include <asm/vdso.h>
__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
get_debugreg(d7, 7);
/* Only print out debug registers if they are in their non-default state. */
- if ((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
- (d6 == DR6_RESERVED) && (d7 == 0x400))
- return;
-
- printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
- printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
+ if (!((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
+ (d6 == DR6_RESERVED) && (d7 == 0x400))) {
+ printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n",
+ d0, d1, d2);
+ printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n",
+ d3, d6, d7);
+ }
if (boot_cpu_has(X86_FEATURE_OSPKE))
printk(KERN_DEFAULT "PKRU: %08x\n", read_pkru());
{
int err;
struct pt_regs *childregs;
+ struct fork_frame *fork_frame;
+ struct inactive_task_frame *frame;
struct task_struct *me = current;
p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
childregs = task_pt_regs(p);
- p->thread.sp = (unsigned long) childregs;
- set_tsk_thread_flag(p, TIF_FORK);
+ fork_frame = container_of(childregs, struct fork_frame, regs);
+ frame = &fork_frame->frame;
+ frame->bp = 0;
+ frame->ret_addr = (unsigned long) ret_from_fork;
+ p->thread.sp = (unsigned long) fork_frame;
p->thread.io_bitmap_ptr = NULL;
savesegment(gs, p->thread.gsindex);
if (unlikely(p->flags & PF_KTHREAD)) {
/* kernel thread */
memset(childregs, 0, sizeof(struct pt_regs));
- childregs->sp = (unsigned long)childregs;
- childregs->ss = __KERNEL_DS;
- childregs->bx = sp; /* function */
- childregs->bp = arg;
- childregs->orig_ax = -1;
- childregs->cs = __KERNEL_CS | get_kernel_rpl();
- childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
+ frame->bx = sp; /* function */
+ frame->r12 = arg;
return 0;
}
+ frame->bx = 0;
*childregs = *current_pt_regs();
childregs->ax = 0;
current->personality &= ~READ_IMPLIES_EXEC;
/* in_compat_syscall() uses the presence of the x32
syscall bit flag to determine compat status */
- current_thread_info()->status &= ~TS_COMPAT;
+ current->thread.status &= ~TS_COMPAT;
} else {
set_thread_flag(TIF_IA32);
clear_thread_flag(TIF_X32);
current->mm->context.ia32_compat = TIF_IA32;
current->personality |= force_personality32;
/* Prepare the first "return" to user space */
- current_thread_info()->status |= TS_COMPAT;
+ current->thread.status |= TS_COMPAT;
}
}
EXPORT_SYMBOL_GPL(set_personality_ia32);
+#ifdef CONFIG_CHECKPOINT_RESTORE
+static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
+{
+ int ret;
+
+ ret = map_vdso_once(image, addr);
+ if (ret)
+ return ret;
+
+ return (long)image->size;
+}
+#endif
+
long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
{
int ret = 0;
break;
}
+#ifdef CONFIG_CHECKPOINT_RESTORE
+# ifdef CONFIG_X86_X32_ABI
+ case ARCH_MAP_VDSO_X32:
+ return prctl_map_vdso(&vdso_image_x32, addr);
+# endif
+# if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
+ case ARCH_MAP_VDSO_32:
+ return prctl_map_vdso(&vdso_image_32, addr);
+# endif
+ case ARCH_MAP_VDSO_64:
+ return prctl_map_vdso(&vdso_image_64, addr);
+#endif
+
default:
ret = -EINVAL;
break;
*/
#include <linux/sched.h> /* test_thread_flag(), ... */
#include <linux/kdebug.h> /* oops_begin/end, ... */
-#include <linux/module.h> /* search_exception_table */
+#include <linux/extable.h> /* search_exception_table */
#include <linux/bootmem.h> /* max_low_pfn */
#include <linux/kprobes.h> /* NOKPROBE_SYMBOL, ... */
#include <linux/mmiotrace.h> /* kmmio_handler, ... */
return;
}
+#ifdef CONFIG_VMAP_STACK
+ /*
+ * Stack overflow? During boot, we can fault near the initial
+ * stack in the direct map, but that's not an overflow -- check
+ * that we're in vmalloc space to avoid this.
+ */
+ if (is_vmalloc_addr((void *)address) &&
+ (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
+ address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
+ register void *__sp asm("rsp");
+ unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *);
+ /*
+ * We're likely to be running with very little stack space
+ * left. It's plausible that we'd hit this condition but
+ * double-fault even before we get this far, in which case
+ * we're fine: the double-fault handler will deal with it.
+ *
+ * We don't want to make it all the way into the oops code
+ * and then double-fault, though, because we're likely to
+ * break the console driver and lose most of the stack dump.
+ */
+ asm volatile ("movq %[stack], %%rsp\n\t"
+ "call handle_stack_overflow\n\t"
+ "1: jmp 1b"
+ : "+r" (__sp)
+ : "D" ("kernel stack overflow (page fault)"),
+ "S" (regs), "d" (address),
+ [stack] "rm" (stack));
+ unreachable();
+ }
+#endif
+
/*
* 32-bit:
*
{
/* This is only called for the current mm, so: */
bool foreign = false;
+
+ /*
+ * Read or write was blocked by protection keys. This is
+ * always an unconditional error and can never result in
+ * a follow-up action to resolve the fault, like a COW.
+ */
+ if (error_code & PF_PK)
+ return 1;
+
/*
* Make sure to check the VMA so that we do not perform
* faults just to hit a PF_PK as soon as we fill in a
#include <linux/mmu_notifier.h>
#include <linux/migrate.h>
#include <linux/perf_event.h>
+ #include <linux/pkeys.h>
#include <linux/ksm.h>
#include <linux/pkeys.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/cacheflush.h>
+ #include <asm/mmu_context.h>
#include <asm/tlbflush.h>
#include "internal.h"
vma->vm_userfaultfd_ctx);
if (*pprev) {
vma = *pprev;
+ VM_WARN_ON((vma->vm_flags ^ newflags) & ~VM_SOFTDIRTY);
goto success;
}
* held in write mode.
*/
vma->vm_flags = newflags;
- dirty_accountable = vma_wants_writenotify(vma);
+ dirty_accountable = vma_wants_writenotify(vma, vma->vm_page_prot);
vma_set_page_prot(vma);
change_protection(vma, start, end, vma->vm_page_prot,
return error;
}
- SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
- unsigned long, prot)
+ /*
+ * pkey==-1 when doing a legacy mprotect()
+ */
+ static int do_mprotect_pkey(unsigned long start, size_t len,
+ unsigned long prot, int pkey)
{
unsigned long nstart, end, tmp, reqprot;
struct vm_area_struct *vma, *prev;
if (down_write_killable(¤t->mm->mmap_sem))
return -EINTR;
+ /*
+ * If userspace did not allocate the pkey, do not let
+ * them use it here.
+ */
+ error = -EINVAL;
+ if ((pkey != -1) && !mm_pkey_is_allocated(current->mm, pkey))
+ goto out;
+
vma = find_vma(current->mm, start);
error = -ENOMEM;
if (!vma)
prev = vma;
for (nstart = start ; ; ) {
+ unsigned long mask_off_old_flags;
unsigned long newflags;
- int pkey = arch_override_mprotect_pkey(vma, prot, -1);
+ int new_vma_pkey;
/* Here we know that vma->vm_start <= nstart < vma->vm_end. */
if (rier && (vma->vm_flags & VM_MAYEXEC))
prot |= PROT_EXEC;
- newflags = calc_vm_prot_bits(prot, pkey);
- newflags |= (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC));
+ /*
+ * Each mprotect() call explicitly passes r/w/x permissions.
+ * If a permission is not passed to mprotect(), it must be
+ * cleared from the VMA.
+ */
+ mask_off_old_flags = VM_READ | VM_WRITE | VM_EXEC |
+ ARCH_VM_PKEY_FLAGS;
+
+ new_vma_pkey = arch_override_mprotect_pkey(vma, prot, pkey);
+ newflags = calc_vm_prot_bits(prot, new_vma_pkey);
+ newflags |= (vma->vm_flags & ~mask_off_old_flags);
/* newflags >> 4 shift VM_MAY% in place of VM_% */
if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) {
up_write(¤t->mm->mmap_sem);
return error;
}
+
+ SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
+ unsigned long, prot)
+ {
+ return do_mprotect_pkey(start, len, prot, -1);
+ }
+
+ SYSCALL_DEFINE4(pkey_mprotect, unsigned long, start, size_t, len,
+ unsigned long, prot, int, pkey)
+ {
+ return do_mprotect_pkey(start, len, prot, pkey);
+ }
+
+ SYSCALL_DEFINE2(pkey_alloc, unsigned long, flags, unsigned long, init_val)
+ {
+ int pkey;
+ int ret;
+
+ /* No flags supported yet. */
+ if (flags)
+ return -EINVAL;
+ /* check for unsupported init values */
+ if (init_val & ~PKEY_ACCESS_MASK)
+ return -EINVAL;
+
+ down_write(¤t->mm->mmap_sem);
+ pkey = mm_pkey_alloc(current->mm);
+
+ ret = -ENOSPC;
+ if (pkey == -1)
+ goto out;
+
+ ret = arch_set_user_pkey_access(current, pkey, init_val);
+ if (ret) {
+ mm_pkey_free(current->mm, pkey);
+ goto out;
+ }
+ ret = pkey;
+ out:
+ up_write(¤t->mm->mmap_sem);
+ return ret;
+ }
+
+ SYSCALL_DEFINE1(pkey_free, int, pkey)
+ {
+ int ret;
+
+ down_write(¤t->mm->mmap_sem);
+ ret = mm_pkey_free(current->mm, pkey);
+ up_write(¤t->mm->mmap_sem);
+
+ /*
+ * We could provie warnings or errors if any VMA still
+ * has the pkey set here.
+ */
+ return ret;
+ }