Merge branch 'perf/urgent' into perf/core, to pick up dependencies
authorIngo Molnar <mingo@kernel.org>
Thu, 18 Aug 2016 08:36:21 +0000 (10:36 +0200)
committerIngo Molnar <mingo@kernel.org>
Thu, 18 Aug 2016 08:36:21 +0000 (10:36 +0200)
Signed-off-by: Ingo Molnar <mingo@kernel.org>
arch/x86/events/core.c
arch/x86/events/intel/core.c
arch/x86/events/intel/ds.c
arch/x86/events/intel/lbr.c
arch/x86/events/perf_event.h
include/linux/perf_event.h
kernel/events/core.c
kernel/events/uprobes.c

index d0efb5c..18a1acf 100644 (file)
@@ -1201,6 +1201,9 @@ static int x86_pmu_add(struct perf_event *event, int flags)
         * If group events scheduling transaction was started,
         * skip the schedulability test here, it will be performed
         * at commit time (->commit_txn) as a whole.
+        *
+        * If commit fails, we'll call ->del() on all events
+        * for which ->add() was called.
         */
        if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
                goto done_collect;
@@ -1223,6 +1226,14 @@ done_collect:
        cpuc->n_added += n - n0;
        cpuc->n_txn += n - n0;
 
+       if (x86_pmu.add) {
+               /*
+                * This is before x86_pmu_enable() will call x86_pmu_start(),
+                * so we enable LBRs before an event needs them etc..
+                */
+               x86_pmu.add(event);
+       }
+
        ret = 0;
 out:
        return ret;
@@ -1346,7 +1357,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
        event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;
 
        /*
-        * If we're called during a txn, we don't need to do anything.
+        * If we're called during a txn, we only need to undo x86_pmu.add.
         * The events never got scheduled and ->cancel_txn will truncate
         * the event_list.
         *
@@ -1354,7 +1365,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
         * an event added during that same TXN.
         */
        if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
-               return;
+               goto do_del;
 
        /*
         * Not a TXN, therefore cleanup properly.
@@ -1384,6 +1395,15 @@ static void x86_pmu_del(struct perf_event *event, int flags)
        --cpuc->n_events;
 
        perf_event_update_userpage(event);
+
+do_del:
+       if (x86_pmu.del) {
+               /*
+                * This is after x86_pmu_stop(); so we disable LBRs after any
+                * event can need them etc..
+                */
+               x86_pmu.del(event);
+       }
 }
 
 int x86_pmu_handle_irq(struct pt_regs *regs)
index 2cbde2f..88792f8 100644 (file)
@@ -1907,13 +1907,6 @@ static void intel_pmu_disable_event(struct perf_event *event)
        cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
        cpuc->intel_cp_status &= ~(1ull << hwc->idx);
 
-       /*
-        * must disable before any actual event
-        * because any event may be combined with LBR
-        */
-       if (needs_branch_stack(event))
-               intel_pmu_lbr_disable(event);
-
        if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
                intel_pmu_disable_fixed(hwc);
                return;
@@ -1925,6 +1918,14 @@ static void intel_pmu_disable_event(struct perf_event *event)
                intel_pmu_pebs_disable(event);
 }
 
+static void intel_pmu_del_event(struct perf_event *event)
+{
+       if (needs_branch_stack(event))
+               intel_pmu_lbr_del(event);
+       if (event->attr.precise_ip)
+               intel_pmu_pebs_del(event);
+}
+
 static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
 {
        int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
@@ -1968,12 +1969,6 @@ static void intel_pmu_enable_event(struct perf_event *event)
                intel_pmu_enable_bts(hwc->config);
                return;
        }
-       /*
-        * must enabled before any actual event
-        * because any event may be combined with LBR
-        */
-       if (needs_branch_stack(event))
-               intel_pmu_lbr_enable(event);
 
        if (event->attr.exclude_host)
                cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
@@ -1994,6 +1989,14 @@ static void intel_pmu_enable_event(struct perf_event *event)
        __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
 }
 
+static void intel_pmu_add_event(struct perf_event *event)
+{
+       if (event->attr.precise_ip)
+               intel_pmu_pebs_add(event);
+       if (needs_branch_stack(event))
+               intel_pmu_lbr_add(event);
+}
+
 /*
  * Save and restart an expired event. Called by NMI contexts,
  * so it has to be careful about preempting normal event ops:
@@ -3290,6 +3293,8 @@ static __initconst const struct x86_pmu intel_pmu = {
        .enable_all             = intel_pmu_enable_all,
        .enable                 = intel_pmu_enable_event,
        .disable                = intel_pmu_disable_event,
+       .add                    = intel_pmu_add_event,
+       .del                    = intel_pmu_del_event,
        .hw_config              = intel_pmu_hw_config,
        .schedule_events        = x86_schedule_events,
        .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
index 7ce9f3f..248023f 100644 (file)
@@ -806,9 +806,55 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
        return &emptyconstraint;
 }
 
-static inline bool pebs_is_enabled(struct cpu_hw_events *cpuc)
+/*
+ * We need the sched_task callback even for per-cpu events when we use
+ * the large interrupt threshold, such that we can provide PID and TID
+ * to PEBS samples.
+ */
+static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
+{
+       return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
+}
+
+static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
+{
+       struct debug_store *ds = cpuc->ds;
+       u64 threshold;
+
+       if (cpuc->n_pebs == cpuc->n_large_pebs) {
+               threshold = ds->pebs_absolute_maximum -
+                       x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
+       } else {
+               threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
+       }
+
+       ds->pebs_interrupt_threshold = threshold;
+}
+
+static void
+pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu)
+{
+       if (needed_cb != pebs_needs_sched_cb(cpuc)) {
+               if (!needed_cb)
+                       perf_sched_cb_inc(pmu);
+               else
+                       perf_sched_cb_dec(pmu);
+
+               pebs_update_threshold(cpuc);
+       }
+}
+
+void intel_pmu_pebs_add(struct perf_event *event)
 {
-       return (cpuc->pebs_enabled & ((1ULL << MAX_PEBS_EVENTS) - 1));
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
+       bool needed_cb = pebs_needs_sched_cb(cpuc);
+
+       cpuc->n_pebs++;
+       if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
+               cpuc->n_large_pebs++;
+
+       pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
 }
 
 void intel_pmu_pebs_enable(struct perf_event *event)
@@ -816,12 +862,9 @@ void intel_pmu_pebs_enable(struct perf_event *event)
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct hw_perf_event *hwc = &event->hw;
        struct debug_store *ds = cpuc->ds;
-       bool first_pebs;
-       u64 threshold;
 
        hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
 
-       first_pebs = !pebs_is_enabled(cpuc);
        cpuc->pebs_enabled |= 1ULL << hwc->idx;
 
        if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
@@ -830,46 +873,34 @@ void intel_pmu_pebs_enable(struct perf_event *event)
                cpuc->pebs_enabled |= 1ULL << 63;
 
        /*
-        * When the event is constrained enough we can use a larger
-        * threshold and run the event with less frequent PMI.
+        * Use auto-reload if possible to save a MSR write in the PMI.
+        * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
         */
-       if (hwc->flags & PERF_X86_EVENT_FREERUNNING) {
-               threshold = ds->pebs_absolute_maximum -
-                       x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
-
-               if (first_pebs)
-                       perf_sched_cb_inc(event->ctx->pmu);
-       } else {
-               threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
-
-               /*
-                * If not all events can use larger buffer,
-                * roll back to threshold = 1
-                */
-               if (!first_pebs &&
-                   (ds->pebs_interrupt_threshold > threshold))
-                       perf_sched_cb_dec(event->ctx->pmu);
-       }
-
-       /* Use auto-reload if possible to save a MSR write in the PMI */
        if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
                ds->pebs_event_reset[hwc->idx] =
                        (u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
        }
+}
 
-       if (first_pebs || ds->pebs_interrupt_threshold > threshold)
-               ds->pebs_interrupt_threshold = threshold;
+void intel_pmu_pebs_del(struct perf_event *event)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
+       bool needed_cb = pebs_needs_sched_cb(cpuc);
+
+       cpuc->n_pebs--;
+       if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
+               cpuc->n_large_pebs--;
+
+       pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
 }
 
 void intel_pmu_pebs_disable(struct perf_event *event)
 {
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct hw_perf_event *hwc = &event->hw;
-       struct debug_store *ds = cpuc->ds;
-       bool large_pebs = ds->pebs_interrupt_threshold >
-               ds->pebs_buffer_base + x86_pmu.pebs_record_size;
 
-       if (large_pebs)
+       if (cpuc->n_pebs == cpuc->n_large_pebs)
                intel_pmu_drain_pebs_buffer();
 
        cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
@@ -879,9 +910,6 @@ void intel_pmu_pebs_disable(struct perf_event *event)
        else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
                cpuc->pebs_enabled &= ~(1ULL << 63);
 
-       if (large_pebs && !pebs_is_enabled(cpuc))
-               perf_sched_cb_dec(event->ctx->pmu);
-
        if (cpuc->enabled)
                wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
 
index 707d358..fc6cf21 100644 (file)
@@ -380,7 +380,6 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
 
 void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
 {
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct x86_perf_task_context *task_ctx;
 
        /*
@@ -390,31 +389,21 @@ void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
         */
        task_ctx = ctx ? ctx->task_ctx_data : NULL;
        if (task_ctx) {
-               if (sched_in) {
+               if (sched_in)
                        __intel_pmu_lbr_restore(task_ctx);
-                       cpuc->lbr_context = ctx;
-               } else {
+               else
                        __intel_pmu_lbr_save(task_ctx);
-               }
                return;
        }
 
        /*
-        * When sampling the branck stack in system-wide, it may be
-        * necessary to flush the stack on context switch. This happens
-        * when the branch stack does not tag its entries with the pid
-        * of the current task. Otherwise it becomes impossible to
-        * associate a branch entry with a task. This ambiguity is more
-        * likely to appear when the branch stack supports priv level
-        * filtering and the user sets it to monitor only at the user
-        * level (which could be a useful measurement in system-wide
-        * mode). In that case, the risk is high of having a branch
-        * stack with branch from multiple tasks.
-        */
-       if (sched_in) {
+        * Since a context switch can flip the address space and LBR entries
+        * are not tagged with an identifier, we need to wipe the LBR, even for
+        * per-cpu events. You simply cannot resolve the branches from the old
+        * address space.
+        */
+       if (sched_in)
                intel_pmu_lbr_reset();
-               cpuc->lbr_context = ctx;
-       }
 }
 
 static inline bool branch_user_callstack(unsigned br_sel)
@@ -422,7 +411,7 @@ static inline bool branch_user_callstack(unsigned br_sel)
        return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK);
 }
 
-void intel_pmu_lbr_enable(struct perf_event *event)
+void intel_pmu_lbr_add(struct perf_event *event)
 {
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct x86_perf_task_context *task_ctx;
@@ -430,27 +419,38 @@ void intel_pmu_lbr_enable(struct perf_event *event)
        if (!x86_pmu.lbr_nr)
                return;
 
-       /*
-        * Reset the LBR stack if we changed task context to
-        * avoid data leaks.
-        */
-       if (event->ctx->task && cpuc->lbr_context != event->ctx) {
-               intel_pmu_lbr_reset();
-               cpuc->lbr_context = event->ctx;
-       }
        cpuc->br_sel = event->hw.branch_reg.reg;
 
-       if (branch_user_callstack(cpuc->br_sel) && event->ctx &&
-                                       event->ctx->task_ctx_data) {
+       if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data) {
                task_ctx = event->ctx->task_ctx_data;
                task_ctx->lbr_callstack_users++;
        }
 
-       cpuc->lbr_users++;
+       /*
+        * Request pmu::sched_task() callback, which will fire inside the
+        * regular perf event scheduling, so that call will:
+        *
+        *  - restore or wipe; when LBR-callstack,
+        *  - wipe; otherwise,
+        *
+        * when this is from __perf_event_task_sched_in().
+        *
+        * However, if this is from perf_install_in_context(), no such callback
+        * will follow and we'll need to reset the LBR here if this is the
+        * first LBR event.
+        *
+        * The problem is, we cannot tell these cases apart... but we can
+        * exclude the biggest chunk of cases by looking at
+        * event->total_time_running. An event that has accrued runtime cannot
+        * be 'new'. Conversely, a new event can get installed through the
+        * context switch path for the first time.
+        */
        perf_sched_cb_inc(event->ctx->pmu);
+       if (!cpuc->lbr_users++ && !event->total_time_running)
+               intel_pmu_lbr_reset();
 }
 
-void intel_pmu_lbr_disable(struct perf_event *event)
+void intel_pmu_lbr_del(struct perf_event *event)
 {
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct x86_perf_task_context *task_ctx;
@@ -467,12 +467,6 @@ void intel_pmu_lbr_disable(struct perf_event *event)
        cpuc->lbr_users--;
        WARN_ON_ONCE(cpuc->lbr_users < 0);
        perf_sched_cb_dec(event->ctx->pmu);
-
-       if (cpuc->enabled && !cpuc->lbr_users) {
-               __intel_pmu_lbr_disable();
-               /* avoid stale pointer */
-               cpuc->lbr_context = NULL;
-       }
 }
 
 void intel_pmu_lbr_enable_all(bool pmi)
index 8c4a477..5874d8d 100644 (file)
@@ -194,12 +194,13 @@ struct cpu_hw_events {
         */
        struct debug_store      *ds;
        u64                     pebs_enabled;
+       int                     n_pebs;
+       int                     n_large_pebs;
 
        /*
         * Intel LBR bits
         */
        int                             lbr_users;
-       void                            *lbr_context;
        struct perf_branch_stack        lbr_stack;
        struct perf_branch_entry        lbr_entries[MAX_LBR_ENTRIES];
        struct er_account               *lbr_sel;
@@ -508,6 +509,8 @@ struct x86_pmu {
        void            (*enable_all)(int added);
        void            (*enable)(struct perf_event *);
        void            (*disable)(struct perf_event *);
+       void            (*add)(struct perf_event *);
+       void            (*del)(struct perf_event *);
        int             (*hw_config)(struct perf_event *event);
        int             (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
        unsigned        eventsel;
@@ -888,6 +891,10 @@ extern struct event_constraint intel_skl_pebs_event_constraints[];
 
 struct event_constraint *intel_pebs_constraints(struct perf_event *event);
 
+void intel_pmu_pebs_add(struct perf_event *event);
+
+void intel_pmu_pebs_del(struct perf_event *event);
+
 void intel_pmu_pebs_enable(struct perf_event *event);
 
 void intel_pmu_pebs_disable(struct perf_event *event);
@@ -906,9 +913,9 @@ u64 lbr_from_signext_quirk_wr(u64 val);
 
 void intel_pmu_lbr_reset(void);
 
-void intel_pmu_lbr_enable(struct perf_event *event);
+void intel_pmu_lbr_add(struct perf_event *event);
 
-void intel_pmu_lbr_disable(struct perf_event *event);
+void intel_pmu_lbr_del(struct perf_event *event);
 
 void intel_pmu_lbr_enable_all(bool pmi);
 
index 2b6b43c..529c41f 100644 (file)
@@ -774,6 +774,9 @@ struct perf_cpu_context {
 #ifdef CONFIG_CGROUP_PERF
        struct perf_cgroup              *cgrp;
 #endif
+
+       struct list_head                sched_cb_entry;
+       int                             sched_cb_usage;
 };
 
 struct perf_output_handle {
index 5650f53..ca4fde5 100644 (file)
@@ -1832,6 +1832,8 @@ group_sched_out(struct perf_event *group_event,
        struct perf_event *event;
        int state = group_event->state;
 
+       perf_pmu_disable(ctx->pmu);
+
        event_sched_out(group_event, cpuctx, ctx);
 
        /*
@@ -1840,6 +1842,8 @@ group_sched_out(struct perf_event *group_event,
        list_for_each_entry(event, &group_event->sibling_list, group_entry)
                event_sched_out(event, cpuctx, ctx);
 
+       perf_pmu_enable(ctx->pmu);
+
        if (state == PERF_EVENT_STATE_ACTIVE && group_event->attr.exclusive)
                cpuctx->exclusive = 0;
 }
@@ -2837,19 +2841,36 @@ unlock:
        }
 }
 
+static DEFINE_PER_CPU(struct list_head, sched_cb_list);
+
 void perf_sched_cb_dec(struct pmu *pmu)
 {
+       struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+
        this_cpu_dec(perf_sched_cb_usages);
+
+       if (!--cpuctx->sched_cb_usage)
+               list_del(&cpuctx->sched_cb_entry);
 }
 
+
 void perf_sched_cb_inc(struct pmu *pmu)
 {
+       struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+
+       if (!cpuctx->sched_cb_usage++)
+               list_add(&cpuctx->sched_cb_entry, this_cpu_ptr(&sched_cb_list));
+
        this_cpu_inc(perf_sched_cb_usages);
 }
 
 /*
  * This function provides the context switch callback to the lower code
  * layer. It is invoked ONLY when the context switch callback is enabled.
+ *
+ * This callback is relevant even to per-cpu events; for example multi event
+ * PEBS requires this to provide PID/TID information. This requires we flush
+ * all queued PEBS records before we context switch to a new task.
  */
 static void perf_pmu_sched_task(struct task_struct *prev,
                                struct task_struct *next,
@@ -2857,34 +2878,24 @@ static void perf_pmu_sched_task(struct task_struct *prev,
 {
        struct perf_cpu_context *cpuctx;
        struct pmu *pmu;
-       unsigned long flags;
 
        if (prev == next)
                return;
 
-       local_irq_save(flags);
+       list_for_each_entry(cpuctx, this_cpu_ptr(&sched_cb_list), sched_cb_entry) {
+               pmu = cpuctx->unique_pmu; /* software PMUs will not have sched_task */
 
-       rcu_read_lock();
-
-       list_for_each_entry_rcu(pmu, &pmus, entry) {
-               if (pmu->sched_task) {
-                       cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
-
-                       perf_ctx_lock(cpuctx, cpuctx->task_ctx);
-
-                       perf_pmu_disable(pmu);
+               if (WARN_ON_ONCE(!pmu->sched_task))
+                       continue;
 
-                       pmu->sched_task(cpuctx->task_ctx, sched_in);
+               perf_ctx_lock(cpuctx, cpuctx->task_ctx);
+               perf_pmu_disable(pmu);
 
-                       perf_pmu_enable(pmu);
+               pmu->sched_task(cpuctx->task_ctx, sched_in);
 
-                       perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
-               }
+               perf_pmu_enable(pmu);
+               perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
        }
-
-       rcu_read_unlock();
-
-       local_irq_restore(flags);
 }
 
 static void perf_event_switch(struct task_struct *task,
@@ -10426,6 +10437,8 @@ static void __init perf_event_init_all_cpus(void)
 
                INIT_LIST_HEAD(&per_cpu(pmu_sb_events.list, cpu));
                raw_spin_lock_init(&per_cpu(pmu_sb_events.lock, cpu));
+
+               INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu));
        }
 }
 
index 8c50276..d4129bb 100644 (file)
@@ -150,7 +150,7 @@ static loff_t vaddr_to_offset(struct vm_area_struct *vma, unsigned long vaddr)
  * Returns 0 on success, -EFAULT on failure.
  */
 static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
-                               struct page *page, struct page *kpage)
+                               struct page *old_page, struct page *new_page)
 {
        struct mm_struct *mm = vma->vm_mm;
        spinlock_t *ptl;
@@ -161,49 +161,49 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
        const unsigned long mmun_end   = addr + PAGE_SIZE;
        struct mem_cgroup *memcg;
 
-       err = mem_cgroup_try_charge(kpage, vma->vm_mm, GFP_KERNEL, &memcg,
+       err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL, &memcg,
                        false);
        if (err)
                return err;
 
        /* For try_to_free_swap() and munlock_vma_page() below */
-       lock_page(page);
+       lock_page(old_page);
 
        mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
        err = -EAGAIN;
-       ptep = page_check_address(page, mm, addr, &ptl, 0);
+       ptep = page_check_address(old_page, mm, addr, &ptl, 0);
        if (!ptep) {
-               mem_cgroup_cancel_charge(kpage, memcg, false);
+               mem_cgroup_cancel_charge(new_page, memcg, false);
                goto unlock;
        }
 
-       get_page(kpage);
-       page_add_new_anon_rmap(kpage, vma, addr, false);
-       mem_cgroup_commit_charge(kpage, memcg, false, false);
-       lru_cache_add_active_or_unevictable(kpage, vma);
+       get_page(new_page);
+       page_add_new_anon_rmap(new_page, vma, addr, false);
+       mem_cgroup_commit_charge(new_page, memcg, false, false);
+       lru_cache_add_active_or_unevictable(new_page, vma);
 
-       if (!PageAnon(page)) {
-               dec_mm_counter(mm, mm_counter_file(page));
+       if (!PageAnon(old_page)) {
+               dec_mm_counter(mm, mm_counter_file(old_page));
                inc_mm_counter(mm, MM_ANONPAGES);
        }
 
        flush_cache_page(vma, addr, pte_pfn(*ptep));
        ptep_clear_flush_notify(vma, addr, ptep);
-       set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
+       set_pte_at_notify(mm, addr, ptep, mk_pte(new_page, vma->vm_page_prot));
 
-       page_remove_rmap(page, false);
-       if (!page_mapped(page))
-               try_to_free_swap(page);
+       page_remove_rmap(old_page, false);
+       if (!page_mapped(old_page))
+               try_to_free_swap(old_page);
        pte_unmap_unlock(ptep, ptl);
 
        if (vma->vm_flags & VM_LOCKED)
-               munlock_vma_page(page);
-       put_page(page);
+               munlock_vma_page(old_page);
+       put_page(old_page);
 
        err = 0;
  unlock:
        mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
-       unlock_page(page);
+       unlock_page(old_page);
        return err;
 }