#include <linux/interrupt.h>
#include <linux/slab.h>
#include <linux/delay.h>
+ #include <linux/reboot.h>
#include <asm/sn/intr.h>
#include <asm/sn/sn_sal.h>
#include <asm/uaccess.h>
/* systune related variables for /proc/sys directories */
- static int xpc_hb_min = 1;
- static int xpc_hb_max = 10;
+ static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
+ static int xpc_hb_min_interval = 1;
+ static int xpc_hb_max_interval = 10;
- static int xpc_hb_check_min = 10;
- static int xpc_hb_check_max = 120;
+ static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL;
+ static int xpc_hb_check_min_interval = 10;
+ static int xpc_hb_check_max_interval = 120;
+
+ int xpc_disengage_request_timelimit = XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT;
+ static int xpc_disengage_request_min_timelimit = 0;
+ static int xpc_disengage_request_max_timelimit = 120;
static ctl_table xpc_sys_xpc_hb_dir[] = {
{
&proc_dointvec_minmax,
&sysctl_intvec,
NULL,
- &xpc_hb_min, &xpc_hb_max
+ &xpc_hb_min_interval,
+ &xpc_hb_max_interval
},
{
2,
&proc_dointvec_minmax,
&sysctl_intvec,
NULL,
- &xpc_hb_check_min, &xpc_hb_check_max
+ &xpc_hb_check_min_interval,
+ &xpc_hb_check_max_interval
},
{0}
};
0555,
xpc_sys_xpc_hb_dir
},
+ {
+ 2,
+ "disengage_request_timelimit",
+ &xpc_disengage_request_timelimit,
+ sizeof(int),
+ 0644,
+ NULL,
+ &proc_dointvec_minmax,
+ &sysctl_intvec,
+ NULL,
+ &xpc_disengage_request_min_timelimit,
+ &xpc_disengage_request_max_timelimit
+ },
{0}
};
static ctl_table xpc_sys_dir[] = {
static unsigned long xpc_hb_check_timeout;
- /* xpc_hb_checker thread exited notification */
+ /* notification that the xpc_hb_checker thread has exited */
static DECLARE_MUTEX_LOCKED(xpc_hb_checker_exited);
- /* xpc_discovery thread exited notification */
+ /* notification that the xpc_discovery thread has exited */
static DECLARE_MUTEX_LOCKED(xpc_discovery_exited);
static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *);
+ static int xpc_system_reboot(struct notifier_block *, unsigned long, void *);
+ static struct notifier_block xpc_reboot_notifier = {
+ .notifier_call = xpc_system_reboot,
+ };
+
+
+ /*
+ * Timer function to enforce the timelimit on the partition disengage request.
+ */
+ static void
+ xpc_timeout_partition_disengage_request(unsigned long data)
+ {
+ struct xpc_partition *part = (struct xpc_partition *) data;
+
+
+ DBUG_ON(jiffies < part->disengage_request_timeout);
+
+ (void) xpc_partition_disengaged(part);
+
+ DBUG_ON(part->disengage_request_timeout != 0);
+ DBUG_ON(xpc_partition_engaged(1UL << XPC_PARTID(part)) != 0);
+ }
+
+
/*
* Notify the heartbeat check thread that an IRQ has been received.
*/
while (!(volatile int) xpc_exiting) {
- /* wait for IRQ or timeout */
- (void) wait_event_interruptible(xpc_act_IRQ_wq,
- (last_IRQ_count < atomic_read(&xpc_act_IRQ_rcvd) ||
- jiffies >= xpc_hb_check_timeout ||
- (volatile int) xpc_exiting));
-
dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have "
"been received\n",
(int) (xpc_hb_check_timeout - jiffies),
}
+ /* check for outstanding IRQs */
new_IRQ_count = atomic_read(&xpc_act_IRQ_rcvd);
if (last_IRQ_count < new_IRQ_count || force_IRQ != 0) {
force_IRQ = 0;
xpc_hb_check_timeout = jiffies +
(xpc_hb_check_interval * HZ);
}
+
+ /* wait for IRQ or timeout */
+ (void) wait_event_interruptible(xpc_act_IRQ_wq,
+ (last_IRQ_count < atomic_read(&xpc_act_IRQ_rcvd) ||
+ jiffies >= xpc_hb_check_timeout ||
+ (volatile int) xpc_exiting));
}
dev_dbg(xpc_part, "heartbeat checker is exiting\n");
- /* mark this thread as inactive */
+ /* mark this thread as having exited */
up(&xpc_hb_checker_exited);
return 0;
}
dev_dbg(xpc_part, "discovery thread is exiting\n");
- /* mark this thread as inactive */
+ /* mark this thread as having exited */
up(&xpc_discovery_exited);
return 0;
}
"partition %d\n", XPC_PARTID(part));
/* wait a 1/4 of a second or so */
- msleep_interruptible(250);
+ (void) msleep_interruptible(250);
if (part->act_state == XPC_P_DEACTIVATING) {
return part->reason;
xpc_channel_mgr(struct xpc_partition *part)
{
while (part->act_state != XPC_P_DEACTIVATING ||
- atomic_read(&part->nchannels_active) > 0) {
+ atomic_read(&part->nchannels_active) > 0 ||
+ !xpc_partition_disengaged(part)) {
xpc_process_channel_activity(part);
(volatile u64) part->local_IPI_amo != 0 ||
((volatile u8) part->act_state ==
XPC_P_DEACTIVATING &&
- atomic_read(&part->nchannels_active) == 0)));
+ atomic_read(&part->nchannels_active) == 0 &&
+ xpc_partition_disengaged(part))));
atomic_set(&part->channel_mgr_requests, 1);
// >>> Does it need to wakeup periodically as well? In case we
return 0;
}
- XPC_ALLOW_HB(partid, xpc_vars);
+ xpc_allow_hb(partid, xpc_vars);
xpc_IPI_send_activated(part);
*/
(void) xpc_partition_up(part);
+ xpc_disallow_hb(partid, xpc_vars);
xpc_mark_partition_inactive(part);
if (part->reason == xpcReactivating) {
struct xpc_partition *part = &xpc_partitions[partid];
struct xpc_channel *ch;
int n_needed;
+ unsigned long irq_flags;
daemonize("xpc%02dc%d", partid, ch_number);
ch = &part->channels[ch_number];
if (!(ch->flags & XPC_C_DISCONNECTING)) {
- DBUG_ON(!(ch->flags & XPC_C_CONNECTED));
/* let registerer know that connection has been established */
- if (atomic_read(&ch->kthreads_assigned) == 1) {
+ spin_lock_irqsave(&ch->lock, irq_flags);
+ if (!(ch->flags & XPC_C_CONNECTCALLOUT)) {
+ ch->flags |= XPC_C_CONNECTCALLOUT;
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+
xpc_connected_callout(ch);
/*
!(ch->flags & XPC_C_DISCONNECTING)) {
xpc_activate_kthreads(ch, n_needed);
}
+ } else {
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
}
xpc_kthread_waitmsgs(part, ch);
}
- if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
- ((ch->flags & XPC_C_CONNECTCALLOUT) ||
- (ch->reason != xpcUnregistering &&
- ch->reason != xpcOtherUnregistering))) {
- xpc_disconnected_callout(ch);
+ if (atomic_dec_return(&ch->kthreads_assigned) == 0) {
+ spin_lock_irqsave(&ch->lock, irq_flags);
+ if ((ch->flags & XPC_C_CONNECTCALLOUT) &&
+ !(ch->flags & XPC_C_DISCONNECTCALLOUT)) {
+ ch->flags |= XPC_C_DISCONNECTCALLOUT;
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+ xpc_disconnecting_callout(ch);
+ } else {
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ }
+ if (atomic_dec_return(&part->nchannels_engaged) == 0) {
+ xpc_mark_partition_disengaged(part);
+ xpc_IPI_send_disengage(part);
+ }
}
unsigned long irq_flags;
pid_t pid;
u64 args = XPC_PACK_ARGS(ch->partid, ch->number);
+ struct xpc_partition *part = &xpc_partitions[ch->partid];
while (needed-- > 0) {
+
+ /*
+ * The following is done on behalf of the newly created
+ * kthread. That kthread is responsible for doing the
+ * counterpart to the following before it exits.
+ */
+ (void) xpc_part_ref(part);
+ xpc_msgqueue_ref(ch);
+ if (atomic_inc_return(&ch->kthreads_assigned) == 1 &&
+ atomic_inc_return(&part->nchannels_engaged) == 1) {
+ xpc_mark_partition_engaged(part);
+ }
+
pid = kernel_thread(xpc_daemonize_kthread, (void *) args, 0);
if (pid < 0) {
/* the fork failed */
+ if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
+ atomic_dec_return(&part->nchannels_engaged) == 0) {
+ xpc_mark_partition_disengaged(part);
+ xpc_IPI_send_disengage(part);
+ }
+ xpc_msgqueue_deref(ch);
+ xpc_part_deref(part);
if (atomic_read(&ch->kthreads_assigned) <
ch->kthreads_idle_limit) {
break;
}
- /*
- * The following is done on behalf of the newly created
- * kthread. That kthread is responsible for doing the
- * counterpart to the following before it exits.
- */
- (void) xpc_part_ref(&xpc_partitions[ch->partid]);
- xpc_msgqueue_ref(ch);
- atomic_inc(&ch->kthreads_assigned);
ch->kthreads_created++; // >>> temporary debug only!!!
}
}
void
xpc_disconnect_wait(int ch_number)
{
+ unsigned long irq_flags;
partid_t partid;
struct xpc_partition *part;
struct xpc_channel *ch;
+ int wakeup_channel_mgr;
/* now wait for all callouts to the caller's function to cease */
for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
part = &xpc_partitions[partid];
- if (xpc_part_ref(part)) {
- ch = &part->channels[ch_number];
+ if (!xpc_part_ref(part)) {
+ continue;
+ }
- // >>> how do we keep from falling into the window between our check and going
- // >>> down and coming back up where sema is re-inited?
- if (ch->flags & XPC_C_SETUP) {
- (void) down(&ch->teardown_sema);
- }
+ ch = &part->channels[ch_number];
+ if (!(ch->flags & XPC_C_WDISCONNECT)) {
xpc_part_deref(part);
+ continue;
+ }
+
+ (void) down(&ch->wdisconnect_sema);
+
+ spin_lock_irqsave(&ch->lock, irq_flags);
+ DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
+ wakeup_channel_mgr = 0;
+
+ if (ch->delayed_IPI_flags) {
+ if (part->act_state != XPC_P_DEACTIVATING) {
+ spin_lock(&part->IPI_lock);
+ XPC_SET_IPI_FLAGS(part->local_IPI_amo,
+ ch->number, ch->delayed_IPI_flags);
+ spin_unlock(&part->IPI_lock);
+ wakeup_channel_mgr = 1;
+ }
+ ch->delayed_IPI_flags = 0;
}
+
+ ch->flags &= ~XPC_C_WDISCONNECT;
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+ if (wakeup_channel_mgr) {
+ xpc_wakeup_channel_mgr(part);
+ }
+
+ xpc_part_deref(part);
}
}
static void
- xpc_do_exit(void)
+ xpc_do_exit(enum xpc_retval reason)
{
partid_t partid;
int active_part_count;
struct xpc_partition *part;
+ unsigned long printmsg_time;
- /* now it's time to eliminate our heartbeat */
- del_timer_sync(&xpc_hb_timer);
- xpc_vars->heartbeating_to_mask = 0;
-
- /* indicate to others that our reserved page is uninitialized */
- xpc_rsvd_page->vars_pa = 0;
-
- /*
- * Ignore all incoming interrupts. Without interupts the heartbeat
- * checker won't activate any new partitions that may come up.
- */
- free_irq(SGI_XPC_ACTIVATE, NULL);
+ /* a 'rmmod XPC' and a 'reboot' cannot both end up here together */
+ DBUG_ON(xpc_exiting == 1);
/*
- * Cause the heartbeat checker and the discovery threads to exit.
- * We don't want them attempting to activate new partitions as we
- * try to deactivate the existing ones.
+ * Let the heartbeat checker thread and the discovery thread
+ * (if one is running) know that they should exit. Also wake up
+ * the heartbeat checker thread in case it's sleeping.
*/
xpc_exiting = 1;
wake_up_interruptible(&xpc_act_IRQ_wq);
- /* wait for the heartbeat checker thread to mark itself inactive */
- down(&xpc_hb_checker_exited);
+ /* ignore all incoming interrupts */
+ free_irq(SGI_XPC_ACTIVATE, NULL);
- /* wait for the discovery thread to mark itself inactive */
+ /* wait for the discovery thread to exit */
down(&xpc_discovery_exited);
+ /* wait for the heartbeat checker thread to exit */
+ down(&xpc_hb_checker_exited);
- msleep_interruptible(300);
+
+ /* sleep for a 1/3 of a second or so */
+ (void) msleep_interruptible(300);
/* wait for all partitions to become inactive */
+ printmsg_time = jiffies;
+
do {
active_part_count = 0;
for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
part = &xpc_partitions[partid];
- if (part->act_state != XPC_P_INACTIVE) {
- active_part_count++;
- XPC_DEACTIVATE_PARTITION(part, xpcUnloading);
+ if (xpc_partition_disengaged(part) &&
+ part->act_state == XPC_P_INACTIVE) {
+ continue;
}
+
+ active_part_count++;
+
+ XPC_DEACTIVATE_PARTITION(part, reason);
}
- if (active_part_count)
- msleep_interruptible(300);
- } while (active_part_count > 0);
+ if (active_part_count == 0) {
+ break;
+ }
+ if (jiffies >= printmsg_time) {
+ dev_info(xpc_part, "waiting for partitions to "
+ "deactivate/disengage, active count=%d, remote "
+ "engaged=0x%lx\n", active_part_count,
+ xpc_partition_engaged(1UL << partid));
+
+ printmsg_time = jiffies +
+ (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
+ }
+
+ /* sleep for a 1/3 of a second or so */
+ (void) msleep_interruptible(300);
+
+ } while (1);
+
+ DBUG_ON(xpc_partition_engaged(-1UL));
+
+
+ /* indicate to others that our reserved page is uninitialized */
+ xpc_rsvd_page->vars_pa = 0;
+
+ /* now it's time to eliminate our heartbeat */
+ del_timer_sync(&xpc_hb_timer);
+ DBUG_ON(xpc_vars->heartbeating_to_mask != 0);
+
+ /* take ourselves off of the reboot_notifier_list */
+ (void) unregister_reboot_notifier(&xpc_reboot_notifier);
/* close down protections for IPI operations */
xpc_restrict_IPI_ops();
}
+ /*
+ * This function is called when the system is being rebooted.
+ */
+ static int
+ xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
+ {
+ enum xpc_retval reason;
+
+
+ switch (event) {
+ case SYS_RESTART:
+ reason = xpcSystemReboot;
+ break;
+ case SYS_HALT:
+ reason = xpcSystemHalt;
+ break;
+ case SYS_POWER_OFF:
+ reason = xpcSystemPoweroff;
+ break;
+ default:
+ reason = xpcSystemGoingDown;
+ }
+
+ xpc_do_exit(reason);
+ return NOTIFY_DONE;
+ }
+
+
int __init
xpc_init(void)
{
pid_t pid;
+ if (!ia64_platform_is("sn2")) {
+ return -ENODEV;
+ }
+
/*
* xpc_remote_copy_buffer is used as a temporary buffer for bte_copy'ng
- * both a partition's reserved page and its XPC variables. Its size was
- * based on the size of a reserved page. So we need to ensure that the
- * XPC variables will fit as well.
+ * various portions of a partition's reserved page. Its size is based
+ * on the size of the reserved page header and part_nasids mask. So we
+ * need to ensure that the other items will fit as well.
*/
- if (XPC_VARS_ALIGNED_SIZE > XPC_RSVD_PAGE_ALIGNED_SIZE) {
+ if (XPC_RP_VARS_SIZE > XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES) {
dev_err(xpc_part, "xpc_remote_copy_buffer is not big enough\n");
return -EPERM;
}
spin_lock_init(&part->act_lock);
part->act_state = XPC_P_INACTIVE;
XPC_SET_REASON(part, 0, 0);
+
+ init_timer(&part->disengage_request_timer);
+ part->disengage_request_timer.function =
+ xpc_timeout_partition_disengage_request;
+ part->disengage_request_timer.data = (unsigned long) part;
+
part->setup_state = XPC_P_UNSET;
init_waitqueue_head(&part->teardown_wq);
atomic_set(&part->references, 0);
}
+ /* add ourselves to the reboot_notifier_list */
+ ret = register_reboot_notifier(&xpc_reboot_notifier);
+ if (ret != 0) {
+ dev_warn(xpc_part, "can't register reboot notifier\n");
+ }
+
+
/*
* Set the beating to other partitions into motion. This is
* the last requirement for other partitions' discovery to
/* indicate to others that our reserved page is uninitialized */
xpc_rsvd_page->vars_pa = 0;
+ /* take ourselves off of the reboot_notifier_list */
+ (void) unregister_reboot_notifier(&xpc_reboot_notifier);
+
del_timer_sync(&xpc_hb_timer);
free_irq(SGI_XPC_ACTIVATE, NULL);
xpc_restrict_IPI_ops();
/* mark this new thread as a non-starter */
up(&xpc_discovery_exited);
- xpc_do_exit();
+ xpc_do_exit(xpcUnloading);
return -EBUSY;
}
void __exit
xpc_exit(void)
{
- xpc_do_exit();
+ xpc_do_exit(xpcUnloading);
}
module_exit(xpc_exit);
MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between "
"heartbeat checks.");
+ module_param(xpc_disengage_request_timelimit, int, 0);
+ MODULE_PARM_DESC(xpc_disengage_request_timelimit, "Number of seconds to wait "
+ "for disengage request to complete.");
+
/* original protection values for each node */
-u64 xpc_prot_vec[MAX_COMPACT_NODES];
+u64 xpc_prot_vec[MAX_NUMNODES];
- /* this partition's reserved page */
+ /* this partition's reserved page pointers */
struct xpc_rsvd_page *xpc_rsvd_page;
-
- /* this partition's XPC variables (within the reserved page) */
+ static u64 *xpc_part_nasids;
+ static u64 *xpc_mach_nasids;
struct xpc_vars *xpc_vars;
struct xpc_vars_part *xpc_vars_part;
+ static int xp_nasid_mask_bytes; /* actual size in bytes of nasid mask */
+ static int xp_nasid_mask_words; /* actual size in words of nasid mask */
+
/*
* For performance reasons, each entry of xpc_partitions[] is cacheline
/*
- * Generic buffer used to store a local copy of the remote partitions
- * reserved page or XPC variables.
+ * Generic buffer used to store a local copy of portions of a remote
+ * partition's reserved page (either its header and part_nasids mask,
+ * or its vars).
*
* xpc_discovery runs only once and is a seperate thread that is
* very likely going to be processing in parallel with receiving
* interrupts.
*/
- char ____cacheline_aligned
- xpc_remote_copy_buffer[XPC_RSVD_PAGE_ALIGNED_SIZE];
-
-
- /* systune related variables */
- int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
- int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_TIMEOUT;
+ char ____cacheline_aligned xpc_remote_copy_buffer[XPC_RP_HEADER_SIZE +
+ XP_NASID_MASK_BYTES];
/*
* for that nasid. This function returns 0 on any error.
*/
static u64
- xpc_get_rsvd_page_pa(int nasid, u64 buf, u64 buf_size)
+ xpc_get_rsvd_page_pa(int nasid)
{
bte_result_t bte_res;
s64 status;
u64 cookie = 0;
u64 rp_pa = nasid; /* seed with nasid */
u64 len = 0;
+ u64 buf = buf;
+ u64 buf_len = 0;
+ void *buf_base = NULL;
while (1) {
break;
}
- if (len > buf_size) {
- dev_err(xpc_part, "len (=0x%016lx) > buf_size\n", len);
- status = SALRET_ERROR;
- break;
+ if (L1_CACHE_ALIGN(len) > buf_len) {
+ if (buf_base != NULL) {
+ kfree(buf_base);
+ }
+ buf_len = L1_CACHE_ALIGN(len);
+ buf = (u64) xpc_kmalloc_cacheline_aligned(buf_len,
+ GFP_KERNEL, &buf_base);
+ if (buf_base == NULL) {
+ dev_err(xpc_part, "unable to kmalloc "
+ "len=0x%016lx\n", buf_len);
+ status = SALRET_ERROR;
+ break;
+ }
}
- bte_res = xp_bte_copy(rp_pa, ia64_tpa(buf), buf_size,
+ bte_res = xp_bte_copy(rp_pa, ia64_tpa(buf), buf_len,
(BTE_NOTIFY | BTE_WACQUIRE), NULL);
if (bte_res != BTE_SUCCESS) {
dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res);
}
}
+ if (buf_base != NULL) {
+ kfree(buf_base);
+ }
+
if (status != SALRET_OK) {
rp_pa = 0;
}
{
struct xpc_rsvd_page *rp;
AMO_t *amos_page;
- u64 rp_pa, next_cl, nasid_array = 0;
+ u64 rp_pa, nasid_array = 0;
int i, ret;
/* get the local reserved page's address */
- rp_pa = xpc_get_rsvd_page_pa(cnodeid_to_nasid(0),
- (u64) xpc_remote_copy_buffer,
- XPC_RSVD_PAGE_ALIGNED_SIZE);
+ preempt_disable();
+ rp_pa = xpc_get_rsvd_page_pa(cpuid_to_nasid(smp_processor_id()));
+ preempt_enable();
if (rp_pa == 0) {
dev_err(xpc_part, "SAL failed to locate the reserved page\n");
return NULL;
rp->version = XPC_RP_VERSION;
- /*
- * Place the XPC variables on the cache line following the
- * reserved page structure.
- */
- next_cl = (u64) rp + XPC_RSVD_PAGE_ALIGNED_SIZE;
- xpc_vars = (struct xpc_vars *) next_cl;
+ /* establish the actual sizes of the nasid masks */
+ if (rp->SAL_version == 1) {
+ /* SAL_version 1 didn't set the nasids_size field */
+ rp->nasids_size = 128;
+ }
+ xp_nasid_mask_bytes = rp->nasids_size;
+ xp_nasid_mask_words = xp_nasid_mask_bytes / 8;
+
+ /* setup the pointers to the various items in the reserved page */
+ xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
+ xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
+ xpc_vars = XPC_RP_VARS(rp);
+ xpc_vars_part = XPC_RP_VARS_PART(rp);
/*
* Before clearing xpc_vars, see if a page of AMOs had been previously
amos_page = (AMO_t *) TO_AMO((u64) amos_page);
}
+ /* clear xpc_vars */
memset(xpc_vars, 0, sizeof(struct xpc_vars));
- /*
- * Place the XPC per partition specific variables on the cache line
- * following the XPC variables structure.
- */
- next_cl += XPC_VARS_ALIGNED_SIZE;
- memset((u64 *) next_cl, 0, sizeof(struct xpc_vars_part) *
- XP_MAX_PARTITIONS);
- xpc_vars_part = (struct xpc_vars_part *) next_cl;
- xpc_vars->vars_part_pa = __pa(next_cl);
-
xpc_vars->version = XPC_V_VERSION;
xpc_vars->act_nasid = cpuid_to_nasid(0);
xpc_vars->act_phys_cpuid = cpu_physical_id(0);
+ xpc_vars->vars_part_pa = __pa(xpc_vars_part);
+ xpc_vars->amos_page_pa = ia64_tpa((u64) amos_page);
xpc_vars->amos_page = amos_page; /* save for next load of XPC */
- /*
- * Initialize the activation related AMO variables.
- */
- xpc_vars->act_amos = xpc_IPI_init(XP_MAX_PARTITIONS);
- for (i = 1; i < XP_NASID_MASK_WORDS; i++) {
- xpc_IPI_init(i + XP_MAX_PARTITIONS);
+ /* clear xpc_vars_part */
+ memset((u64 *) xpc_vars_part, 0, sizeof(struct xpc_vars_part) *
+ XP_MAX_PARTITIONS);
+
+ /* initialize the activate IRQ related AMO variables */
+ for (i = 0; i < xp_nasid_mask_words; i++) {
+ (void) xpc_IPI_init(XPC_ACTIVATE_IRQ_AMOS + i);
}
- /* export AMO page's physical address to other partitions */
- xpc_vars->amos_page_pa = ia64_tpa((u64) xpc_vars->amos_page);
+
+ /* initialize the engaged remote partitions related AMO variables */
+ (void) xpc_IPI_init(XPC_ENGAGED_PARTITIONS_AMO);
+ (void) xpc_IPI_init(XPC_DISENGAGE_REQUEST_AMO);
+
+ /* timestamp of when reserved page was setup by XPC */
+ rp->stamp = CURRENT_TIME;
/*
* This signifies to the remote partition that our reserved
remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer;
for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+
+ if (xpc_exiting) {
+ break;
+ }
+
if (partid == sn_partition_id) {
continue;
}
/* pull the remote_hb cache line */
bres = xp_bte_copy(part->remote_vars_pa,
ia64_tpa((u64) remote_vars),
- XPC_VARS_ALIGNED_SIZE,
+ XPC_RP_VARS_SIZE,
(BTE_NOTIFY | BTE_WACQUIRE), NULL);
if (bres != BTE_SUCCESS) {
XPC_DEACTIVATE_PARTITION(part,
if (((remote_vars->heartbeat == part->last_heartbeat) &&
(remote_vars->kdb_status == 0)) ||
- !XPC_HB_ALLOWED(sn_partition_id, remote_vars)) {
+ !xpc_hb_allowed(sn_partition_id, remote_vars)) {
XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat);
continue;
/*
- * Get a copy of the remote partition's rsvd page.
+ * Get a copy of a portion of the remote partition's rsvd page.
*
* remote_rp points to a buffer that is cacheline aligned for BTE copies and
- * assumed to be of size XPC_RSVD_PAGE_ALIGNED_SIZE.
+ * is large enough to contain a copy of their reserved page header and
+ * part_nasids mask.
*/
static enum xpc_retval
xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
- struct xpc_rsvd_page *remote_rp, u64 *remote_rsvd_page_pa)
+ struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa)
{
int bres, i;
/* get the reserved page's physical address */
- *remote_rsvd_page_pa = xpc_get_rsvd_page_pa(nasid, (u64) remote_rp,
- XPC_RSVD_PAGE_ALIGNED_SIZE);
- if (*remote_rsvd_page_pa == 0) {
+ *remote_rp_pa = xpc_get_rsvd_page_pa(nasid);
+ if (*remote_rp_pa == 0) {
return xpcNoRsvdPageAddr;
}
- /* pull over the reserved page structure */
+ /* pull over the reserved page header and part_nasids mask */
- bres = xp_bte_copy(*remote_rsvd_page_pa, ia64_tpa((u64) remote_rp),
- XPC_RSVD_PAGE_ALIGNED_SIZE,
+ bres = xp_bte_copy(*remote_rp_pa, ia64_tpa((u64) remote_rp),
+ XPC_RP_HEADER_SIZE + xp_nasid_mask_bytes,
(BTE_NOTIFY | BTE_WACQUIRE), NULL);
if (bres != BTE_SUCCESS) {
return xpc_map_bte_errors(bres);
if (discovered_nasids != NULL) {
- for (i = 0; i < XP_NASID_MASK_WORDS; i++) {
- discovered_nasids[i] |= remote_rp->part_nasids[i];
+ u64 *remote_part_nasids = XPC_RP_PART_NASIDS(remote_rp);
+
+
+ for (i = 0; i < xp_nasid_mask_words; i++) {
+ discovered_nasids[i] |= remote_part_nasids[i];
}
}
/*
- * Get a copy of the remote partition's XPC variables.
+ * Get a copy of the remote partition's XPC variables from the reserved page.
*
* remote_vars points to a buffer that is cacheline aligned for BTE copies and
- * assumed to be of size XPC_VARS_ALIGNED_SIZE.
+ * assumed to be of size XPC_RP_VARS_SIZE.
*/
static enum xpc_retval
xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
/* pull over the cross partition variables */
bres = xp_bte_copy(remote_vars_pa, ia64_tpa((u64) remote_vars),
- XPC_VARS_ALIGNED_SIZE,
+ XPC_RP_VARS_SIZE,
(BTE_NOTIFY | BTE_WACQUIRE), NULL);
if (bres != BTE_SUCCESS) {
return xpc_map_bte_errors(bres);
/*
- * Prior code has determine the nasid which generated an IPI. Inspect
+ * Update the remote partition's info.
+ */
+ static void
+ xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version,
+ struct timespec *remote_rp_stamp, u64 remote_rp_pa,
+ u64 remote_vars_pa, struct xpc_vars *remote_vars)
+ {
+ part->remote_rp_version = remote_rp_version;
+ dev_dbg(xpc_part, " remote_rp_version = 0x%016lx\n",
+ part->remote_rp_version);
+
+ part->remote_rp_stamp = *remote_rp_stamp;
+ dev_dbg(xpc_part, " remote_rp_stamp (tv_sec = 0x%lx tv_nsec = 0x%lx\n",
+ part->remote_rp_stamp.tv_sec, part->remote_rp_stamp.tv_nsec);
+
+ part->remote_rp_pa = remote_rp_pa;
+ dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);
+
+ part->remote_vars_pa = remote_vars_pa;
+ dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n",
+ part->remote_vars_pa);
+
+ part->last_heartbeat = remote_vars->heartbeat;
+ dev_dbg(xpc_part, " last_heartbeat = 0x%016lx\n",
+ part->last_heartbeat);
+
+ part->remote_vars_part_pa = remote_vars->vars_part_pa;
+ dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n",
+ part->remote_vars_part_pa);
+
+ part->remote_act_nasid = remote_vars->act_nasid;
+ dev_dbg(xpc_part, " remote_act_nasid = 0x%x\n",
+ part->remote_act_nasid);
+
+ part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
+ dev_dbg(xpc_part, " remote_act_phys_cpuid = 0x%x\n",
+ part->remote_act_phys_cpuid);
+
+ part->remote_amos_page_pa = remote_vars->amos_page_pa;
+ dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n",
+ part->remote_amos_page_pa);
+
+ part->remote_vars_version = remote_vars->version;
+ dev_dbg(xpc_part, " remote_vars_version = 0x%x\n",
+ part->remote_vars_version);
+ }
+
+
+ /*
+ * Prior code has determined the nasid which generated an IPI. Inspect
* that nasid to determine if its partition needs to be activated or
* deactivated.
*
{
struct xpc_rsvd_page *remote_rp;
struct xpc_vars *remote_vars;
- u64 remote_rsvd_page_pa;
+ u64 remote_rp_pa;
u64 remote_vars_pa;
+ int remote_rp_version;
+ int reactivate = 0;
+ int stamp_diff;
+ struct timespec remote_rp_stamp = { 0, 0 };
partid_t partid;
struct xpc_partition *part;
enum xpc_retval ret;
remote_rp = (struct xpc_rsvd_page *) xpc_remote_copy_buffer;
- ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rsvd_page_pa);
+ ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa);
if (ret != xpcSuccess) {
dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
"which sent interrupt, reason=%d\n", nasid, ret);
}
remote_vars_pa = remote_rp->vars_pa;
+ remote_rp_version = remote_rp->version;
+ if (XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
+ remote_rp_stamp = remote_rp->stamp;
+ }
partid = remote_rp->partid;
part = &xpc_partitions[partid];
"%ld:0x%lx\n", (int) nasid, (int) partid, part->act_IRQ_rcvd,
remote_vars->heartbeat, remote_vars->heartbeating_to_mask);
+ if (xpc_partition_disengaged(part) &&
+ part->act_state == XPC_P_INACTIVE) {
- if (part->act_state == XPC_P_INACTIVE) {
+ xpc_update_partition_info(part, remote_rp_version,
+ &remote_rp_stamp, remote_rp_pa,
+ remote_vars_pa, remote_vars);
- part->remote_rp_pa = remote_rsvd_page_pa;
- dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n",
- part->remote_rp_pa);
+ if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
+ if (xpc_partition_disengage_requested(1UL << partid)) {
+ /*
+ * Other side is waiting on us to disengage,
+ * even though we already have.
+ */
+ return;
+ }
+ } else {
+ /* other side doesn't support disengage requests */
+ xpc_clear_partition_disengage_request(1UL << partid);
+ }
- part->remote_vars_pa = remote_vars_pa;
- dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n",
- part->remote_vars_pa);
+ xpc_activate_partition(part);
+ return;
+ }
- part->last_heartbeat = remote_vars->heartbeat;
- dev_dbg(xpc_part, " last_heartbeat = 0x%016lx\n",
- part->last_heartbeat);
+ DBUG_ON(part->remote_rp_version == 0);
+ DBUG_ON(part->remote_vars_version == 0);
+
+ if (!XPC_SUPPORTS_RP_STAMP(part->remote_rp_version)) {
+ DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(part->
+ remote_vars_version));
+
+ if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
+ DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
+ version));
+ /* see if the other side rebooted */
+ if (part->remote_amos_page_pa ==
+ remote_vars->amos_page_pa &&
+ xpc_hb_allowed(sn_partition_id,
+ remote_vars)) {
+ /* doesn't look that way, so ignore the IPI */
+ return;
+ }
+ }
- part->remote_vars_part_pa = remote_vars->vars_part_pa;
- dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n",
- part->remote_vars_part_pa);
+ /*
+ * Other side rebooted and previous XPC didn't support the
+ * disengage request, so we don't need to do anything special.
+ */
- part->remote_act_nasid = remote_vars->act_nasid;
- dev_dbg(xpc_part, " remote_act_nasid = 0x%x\n",
- part->remote_act_nasid);
+ xpc_update_partition_info(part, remote_rp_version,
+ &remote_rp_stamp, remote_rp_pa,
+ remote_vars_pa, remote_vars);
+ part->reactivate_nasid = nasid;
+ XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
+ return;
+ }
- part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
- dev_dbg(xpc_part, " remote_act_phys_cpuid = 0x%x\n",
- part->remote_act_phys_cpuid);
+ DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version));
- part->remote_amos_page_pa = remote_vars->amos_page_pa;
- dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n",
- part->remote_amos_page_pa);
+ if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
+ DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
- xpc_activate_partition(part);
+ /*
+ * Other side rebooted and previous XPC did support the
+ * disengage request, but the new one doesn't.
+ */
+
+ xpc_clear_partition_engaged(1UL << partid);
+ xpc_clear_partition_disengage_request(1UL << partid);
- } else if (part->remote_amos_page_pa != remote_vars->amos_page_pa ||
- !XPC_HB_ALLOWED(sn_partition_id, remote_vars)) {
+ xpc_update_partition_info(part, remote_rp_version,
+ &remote_rp_stamp, remote_rp_pa,
+ remote_vars_pa, remote_vars);
+ reactivate = 1;
+
+ } else {
+ DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
+ stamp_diff = xpc_compare_stamps(&part->remote_rp_stamp,
+ &remote_rp_stamp);
+ if (stamp_diff != 0) {
+ DBUG_ON(stamp_diff >= 0);
+
+ /*
+ * Other side rebooted and the previous XPC did support
+ * the disengage request, as does the new one.
+ */
+
+ DBUG_ON(xpc_partition_engaged(1UL << partid));
+ DBUG_ON(xpc_partition_disengage_requested(1UL <<
+ partid));
+
+ xpc_update_partition_info(part, remote_rp_version,
+ &remote_rp_stamp, remote_rp_pa,
+ remote_vars_pa, remote_vars);
+ reactivate = 1;
+ }
+ }
+
+ if (!xpc_partition_disengaged(part)) {
+ /* still waiting on other side to disengage from us */
+ return;
+ }
+
+ if (reactivate) {
part->reactivate_nasid = nasid;
XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
+
+ } else if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version) &&
+ xpc_partition_disengage_requested(1UL << partid)) {
+ XPC_DEACTIVATE_PARTITION(part, xpcOtherGoingDown);
}
}
u64 nasid; /* remote nasid */
int n_IRQs_detected = 0;
AMO_t *act_amos;
- struct xpc_rsvd_page *rp = (struct xpc_rsvd_page *) xpc_rsvd_page;
- act_amos = xpc_vars->act_amos;
+ act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS;
/* scan through act AMO variable looking for non-zero entries */
- for (word = 0; word < XP_NASID_MASK_WORDS; word++) {
+ for (word = 0; word < xp_nasid_mask_words; word++) {
+
+ if (xpc_exiting) {
+ break;
+ }
nasid_mask = xpc_IPI_receive(&act_amos[word]);
if (nasid_mask == 0) {
* remote nasid in our reserved pages machine mask.
* This is used in the event of module reload.
*/
- rp->mach_nasids[word] |= nasid_mask;
+ xpc_mach_nasids[word] |= nasid_mask;
/* locate the nasid(s) which sent interrupts */
}
+ /*
+ * See if the other side has responded to a partition disengage request
+ * from us.
+ */
+ int
+ xpc_partition_disengaged(struct xpc_partition *part)
+ {
+ partid_t partid = XPC_PARTID(part);
+ int disengaged;
+
+
+ disengaged = (xpc_partition_engaged(1UL << partid) == 0);
+ if (part->disengage_request_timeout) {
+ if (!disengaged) {
+ if (jiffies < part->disengage_request_timeout) {
+ /* timelimit hasn't been reached yet */
+ return 0;
+ }
+
+ /*
+ * Other side hasn't responded to our disengage
+ * request in a timely fashion, so assume it's dead.
+ */
+
+ xpc_clear_partition_engaged(1UL << partid);
+ disengaged = 1;
+ }
+ part->disengage_request_timeout = 0;
+
+ /* cancel the timer function, provided it's not us */
+ if (!in_interrupt()) {
+ del_singleshot_timer_sync(&part->
+ disengage_request_timer);
+ }
+
+ DBUG_ON(part->act_state != XPC_P_DEACTIVATING &&
+ part->act_state != XPC_P_INACTIVE);
+ if (part->act_state != XPC_P_INACTIVE) {
+ xpc_wakeup_channel_mgr(part);
+ }
+
+ if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
+ xpc_cancel_partition_disengage_request(part);
+ }
+ }
+ return disengaged;
+ }
+
+
/*
* Mark specified partition as active.
*/
enum xpc_retval reason)
{
unsigned long irq_flags;
- partid_t partid = XPC_PARTID(part);
spin_lock_irqsave(&part->act_lock, irq_flags);
spin_unlock_irqrestore(&part->act_lock, irq_flags);
- XPC_DISALLOW_HB(partid, xpc_vars);
+ if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
+ xpc_request_partition_disengage(part);
+ xpc_IPI_send_disengage(part);
- dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n", partid,
- reason);
+ /* set a timelimit on the disengage request */
+ part->disengage_request_timeout = jiffies +
+ (xpc_disengage_request_timelimit * HZ);
+ part->disengage_request_timer.expires =
+ part->disengage_request_timeout;
+ add_timer(&part->disengage_request_timer);
+ }
+
+ dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
+ XPC_PARTID(part), reason);
- xpc_partition_down(part, reason);
+ xpc_partition_going_down(part, reason);
}
/*
- * Mark specified partition as active.
+ * Mark specified partition as inactive.
*/
void
xpc_mark_partition_inactive(struct xpc_partition *part)
void *remote_rp_base;
struct xpc_rsvd_page *remote_rp;
struct xpc_vars *remote_vars;
- u64 remote_rsvd_page_pa;
+ u64 remote_rp_pa;
u64 remote_vars_pa;
int region;
+ int region_size;
int max_regions;
int nasid;
struct xpc_rsvd_page *rp;
enum xpc_retval ret;
- remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RSVD_PAGE_ALIGNED_SIZE,
+ remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
+ xp_nasid_mask_bytes,
GFP_KERNEL, &remote_rp_base);
if (remote_rp == NULL) {
return;
remote_vars = (struct xpc_vars *) remote_rp;
- discovered_nasids = kmalloc(sizeof(u64) * XP_NASID_MASK_WORDS,
+ discovered_nasids = kmalloc(sizeof(u64) * xp_nasid_mask_words,
GFP_KERNEL);
if (discovered_nasids == NULL) {
kfree(remote_rp_base);
return;
}
- memset(discovered_nasids, 0, sizeof(u64) * XP_NASID_MASK_WORDS);
+ memset(discovered_nasids, 0, sizeof(u64) * xp_nasid_mask_words);
rp = (struct xpc_rsvd_page *) xpc_rsvd_page;
* nodes that can comprise an access protection grouping. The access
* protection is in regards to memory, IOI and IPI.
*/
- //>>> move the next two #defines into either include/asm-ia64/sn/arch.h or
- //>>> include/asm-ia64/sn/addrs.h
- #define SH1_MAX_REGIONS 64
- #define SH2_MAX_REGIONS 256
- max_regions = is_shub2() ? SH2_MAX_REGIONS : SH1_MAX_REGIONS;
+ max_regions = 64;
+ region_size = sn_region_size;
+
+ switch (region_size) {
+ case 128:
+ max_regions *= 2;
+ case 64:
+ max_regions *= 2;
+ case 32:
+ max_regions *= 2;
+ region_size = 16;
+ DBUG_ON(!is_shub2());
+ }
for (region = 0; region < max_regions; region++) {
dev_dbg(xpc_part, "searching region %d\n", region);
- for (nasid = (region * sn_region_size * 2);
- nasid < ((region + 1) * sn_region_size * 2);
+ for (nasid = (region * region_size * 2);
+ nasid < ((region + 1) * region_size * 2);
nasid += 2) {
if ((volatile int) xpc_exiting) {
dev_dbg(xpc_part, "checking nasid %d\n", nasid);
- if (XPC_NASID_IN_ARRAY(nasid, rp->part_nasids)) {
+ if (XPC_NASID_IN_ARRAY(nasid, xpc_part_nasids)) {
dev_dbg(xpc_part, "PROM indicates Nasid %d is "
"part of the local partition; skipping "
"region\n", nasid);
break;
}
- if (!(XPC_NASID_IN_ARRAY(nasid, rp->mach_nasids))) {
+ if (!(XPC_NASID_IN_ARRAY(nasid, xpc_mach_nasids))) {
dev_dbg(xpc_part, "PROM indicates Nasid %d was "
"not on Numa-Link network at reset\n",
nasid);
/* pull over the reserved page structure */
ret = xpc_get_remote_rp(nasid, discovered_nasids,
- remote_rp, &remote_rsvd_page_pa);
+ remote_rp, &remote_rp_pa);
if (ret != xpcSuccess) {
dev_dbg(xpc_part, "unable to get reserved page "
"from nasid %d, reason=%d\n", nasid,
remote_vars->act_nasid,
remote_vars->act_phys_cpuid);
+ if (XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
+ version)) {
+ part->remote_amos_page_pa =
+ remote_vars->amos_page_pa;
+ xpc_mark_partition_disengaged(part);
+ xpc_cancel_partition_disengage_request(part);
+ }
xpc_IPI_send_activate(remote_vars);
}
}
return xpcPartitionDown;
}
- part_nasid_pa = part->remote_rp_pa +
- (u64) &((struct xpc_rsvd_page *) 0)->part_nasids;
+ memset(nasid_mask, 0, XP_NASID_MASK_BYTES);
+
+ part_nasid_pa = (u64) XPC_RP_PART_NASIDS(part->remote_rp_pa);
bte_res = xp_bte_copy(part_nasid_pa, ia64_tpa((u64) nasid_mask),
- L1_CACHE_ALIGN(XP_NASID_MASK_BYTES),
- (BTE_NOTIFY | BTE_WACQUIRE), NULL);
+ xp_nasid_mask_bytes, (BTE_NOTIFY | BTE_WACQUIRE), NULL);
return xpc_map_bte_errors(bte_res);
}