22 files changed, 643 insertions, 1768 deletions
diff --git a/arch/ia64/sn/kernel/Makefile b/arch/ia64/sn/kernel/Makefile
index 4351c4f..3e9b4ee 100644
--- a/arch/ia64/sn/kernel/Makefile
+++ b/arch/ia64/sn/kernel/Makefile
@@ -7,6 +7,8 @@
 # Copyright (C) 1999,2001-2005 Silicon Graphics, Inc.  All Rights Reserved.
 #
 
+CPPFLAGS += -I$(srctree)/arch/ia64/sn/include
+
 obj-y				+= setup.o bte.o bte_error.o irq.o mca.o idle.o \
 				   huberror.o io_init.o iomv.o klconflib.o sn2/
 obj-$(CONFIG_IA64_GENERIC)      += machvec.o
diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c
index dd73c0c..1f11db4 100644
--- a/arch/ia64/sn/kernel/bte.c
+++ b/arch/ia64/sn/kernel/bte.c
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 #include <linux/config.h>
@@ -186,18 +186,13 @@ retry_bteop:
 
 	/* Initialize the notification to a known value. */
 	*bte->most_rcnt_na = BTE_WORD_BUSY;
-	notif_phys_addr = TO_PHYS(ia64_tpa((unsigned long)bte->most_rcnt_na));
+	notif_phys_addr = (u64)bte->most_rcnt_na;
 
-	if (is_shub2()) {
-		src = SH2_TIO_PHYS_TO_DMA(src);
-		dest = SH2_TIO_PHYS_TO_DMA(dest);
-		notif_phys_addr = SH2_TIO_PHYS_TO_DMA(notif_phys_addr);
-	}
 	/* Set the source and destination registers */
-	BTE_PRINTKV(("IBSA = 0x%lx)\n", (TO_PHYS(src))));
-	BTE_SRC_STORE(bte, TO_PHYS(src));
-	BTE_PRINTKV(("IBDA = 0x%lx)\n", (TO_PHYS(dest))));
-	BTE_DEST_STORE(bte, TO_PHYS(dest));
+	BTE_PRINTKV(("IBSA = 0x%lx)\n", src));
+	BTE_SRC_STORE(bte, src);
+	BTE_PRINTKV(("IBDA = 0x%lx)\n", dest));
+	BTE_DEST_STORE(bte, dest);
 
 	/* Set the notification register */
 	BTE_PRINTKV(("IBNA = 0x%lx)\n", notif_phys_addr));
diff --git a/arch/ia64/sn/kernel/bte_error.c b/arch/ia64/sn/kernel/bte_error.c
index fcbc748..f1ec137 100644
--- a/arch/ia64/sn/kernel/bte_error.c
+++ b/arch/ia64/sn/kernel/bte_error.c
@@ -33,7 +33,7 @@ void bte_error_handler(unsigned long);
  * Wait until all BTE related CRBs are completed
  * and then reset the interfaces.
  */
-void shub1_bte_error_handler(unsigned long _nodepda)
+int shub1_bte_error_handler(unsigned long _nodepda)
 {
 	struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda;
 	struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer;
@@ -53,7 +53,7 @@ void shub1_bte_error_handler(unsigned long _nodepda)
 	    (err_nodepda->bte_if[1].bh_error == BTE_SUCCESS)) {
 		BTE_PRINTK(("eh:%p:%d Nothing to do.\n", err_nodepda,
 			    smp_processor_id()));
-		return;
+		return 1;
 	}
 
 	/* Determine information about our hub */
@@ -81,7 +81,7 @@ void shub1_bte_error_handler(unsigned long _nodepda)
 		mod_timer(recovery_timer, HZ * 5);
 		BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda,
 			    smp_processor_id()));
-		return;
+		return 1;
 	}
 	if (icmr.ii_icmr_fld_s.i_crb_vld != 0) {
 
@@ -99,7 +99,7 @@ void shub1_bte_error_handler(unsigned long _nodepda)
 				BTE_PRINTK(("eh:%p:%d Valid %d, Giving up\n",
 					    err_nodepda, smp_processor_id(),
 					    i));
-				return;
+				return 1;
 			}
 		}
 	}
@@ -124,6 +124,42 @@ void shub1_bte_error_handler(unsigned long _nodepda)
 	REMOTE_HUB_S(nasid, IIO_IBCR, ibcr.ii_ibcr_regval);
 
 	del_timer(recovery_timer);
+	return 0;
+}
+
+/*
+ * Wait until all BTE related CRBs are completed
+ * and then reset the interfaces.
+ */
+int shub2_bte_error_handler(unsigned long _nodepda)
+{
+	struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda;
+	struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer;
+	struct bteinfo_s *bte;
+	nasid_t nasid;
+	u64 status;
+	int i;
+
+	nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode);
+
+	/*
+	 * Verify that all the BTEs are complete
+	 */
+	for (i = 0; i < BTES_PER_NODE; i++) {
+		bte = &err_nodepda->bte_if[i];
+		status = BTE_LNSTAT_LOAD(bte);
+		if ((status & IBLS_ERROR) || !(status & IBLS_BUSY))
+			continue;
+		mod_timer(recovery_timer, HZ * 5);
+		BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda,
+			    smp_processor_id()));
+		return 1;
+	}
+	if (ia64_sn_bte_recovery(nasid))
+		panic("bte_error_handler(): Fatal BTE Error");
+
+	del_timer(recovery_timer);
+	return 0;
 }
 
 /*
@@ -135,7 +171,6 @@ void bte_error_handler(unsigned long _nodepda)
 	struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda;
 	spinlock_t *recovery_lock = &err_nodepda->bte_recovery_lock;
 	int i;
-	nasid_t nasid;
 	unsigned long irq_flags;
 	volatile u64 *notify;
 	bte_result_t bh_error;
@@ -160,12 +195,15 @@ void bte_error_handler(unsigned long _nodepda)
 	}
 
 	if (is_shub1()) {
-		shub1_bte_error_handler(_nodepda);
+		if (shub1_bte_error_handler(_nodepda)) {
+			spin_unlock_irqrestore(recovery_lock, irq_flags);
+			return;
+		}
 	} else {
-		nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode);
-
-		if (ia64_sn_bte_recovery(nasid))
-			panic("bte_error_handler(): Fatal BTE Error");
+		if (shub2_bte_error_handler(_nodepda)) {
+			spin_unlock_irqrestore(recovery_lock, irq_flags);
+			return;
+		}
 	}
 
 	for (i = 0; i < BTES_PER_NODE; i++) {
diff --git a/arch/ia64/sn/kernel/huberror.c b/arch/ia64/sn/kernel/huberror.c
index 5c5eb01..56ab6ba 100644
--- a/arch/ia64/sn/kernel/huberror.c
+++ b/arch/ia64/sn/kernel/huberror.c
@@ -32,13 +32,14 @@ static irqreturn_t hub_eint_handler(int irq, void *arg, struct pt_regs *ep)
 	ret_stuff.v0 = 0;
 	hubdev_info = (struct hubdev_info *)arg;
 	nasid = hubdev_info->hdi_nasid;
-	SAL_CALL_NOLOCK(ret_stuff, SN_SAL_HUB_ERROR_INTERRUPT,
+
+	if (is_shub1()) {
+		SAL_CALL_NOLOCK(ret_stuff, SN_SAL_HUB_ERROR_INTERRUPT,
 			(u64) nasid, 0, 0, 0, 0, 0, 0);
 
-	if ((int)ret_stuff.v0)
-		panic("hubii_eint_handler(): Fatal TIO Error");
+		if ((int)ret_stuff.v0)
+			panic("hubii_eint_handler(): Fatal TIO Error");
 
-	if (is_shub1()) {
 		if (!(nasid & 1)) /* Not a TIO, handle CRB errors */
 			(void)hubiio_crb_error_handler(hubdev_info);
 	} else 
diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c
index 318087e..dfb3f29 100644
--- a/arch/ia64/sn/kernel/io_init.c
+++ b/arch/ia64/sn/kernel/io_init.c
@@ -10,6 +10,7 @@
 #include <linux/nodemask.h>
 #include <asm/sn/types.h>
 #include <asm/sn/addrs.h>
+#include <asm/sn/sn_feature_sets.h>
 #include <asm/sn/geo.h>
 #include <asm/sn/io.h>
 #include <asm/sn/pcibr_provider.h>
@@ -22,6 +23,10 @@
 #include "xtalk/hubdev.h"
 #include "xtalk/xwidgetdev.h"
 
+
+extern void sn_init_cpei_timer(void);
+extern void register_sn_procfs(void);
+
 static struct list_head sn_sysdata_list;
 
 /* sysdata list struct */
@@ -39,12 +44,12 @@ struct brick {
 	struct slab_info slab_info[MAX_SLABS + 1];
 };
 
-int sn_ioif_inited = 0;		/* SN I/O infrastructure initialized? */
+int sn_ioif_inited;		/* SN I/O infrastructure initialized? */
 
 struct sn_pcibus_provider *sn_pci_provider[PCIIO_ASIC_MAX_TYPES];	/* indexed by asic type */
 
-static int max_segment_number = 0; /* Default highest segment number */
-static int max_pcibus_number = 255; /* Default highest pci bus number */
+static int max_segment_number;		 /* Default highest segment number */
+static int max_pcibus_number = 255;	/* Default highest pci bus number */
 
 /*
  * Hooks and struct for unsupported pci providers
@@ -76,31 +81,29 @@ static struct sn_pcibus_provider sn_pci_default_provider = {
 };
 
 /*
- * Retrieve the DMA Flush List given nasid.  This list is needed 
- * to implement the WAR - Flush DMA data on PIO Reads.
+ * Retrieve the DMA Flush List given nasid, widget, and device.
+ * This list is needed to implement the WAR - Flush DMA data on PIO Reads.
  */
-static inline uint64_t
-sal_get_widget_dmaflush_list(u64 nasid, u64 widget_num, u64 address)
+static inline u64
+sal_get_device_dmaflush_list(u64 nasid, u64 widget_num, u64 device_num,
+			     u64 address)
 {
-
 	struct ia64_sal_retval ret_stuff;
 	ret_stuff.status = 0;
 	ret_stuff.v0 = 0;
 
 	SAL_CALL_NOLOCK(ret_stuff,
-			(u64) SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST,
-			(u64) nasid, (u64) widget_num, (u64) address, 0, 0, 0,
-			0);
-	return ret_stuff.v0;
-
+			(u64) SN_SAL_IOIF_GET_DEVICE_DMAFLUSH_LIST,
+			(u64) nasid, (u64) widget_num,
+			(u64) device_num, (u64) address, 0, 0, 0);
+	return ret_stuff.status;
 }
 
 /*
  * Retrieve the hub device info structure for the given nasid.
  */
-static inline uint64_t sal_get_hubdev_info(u64 handle, u64 address)
+static inline u64 sal_get_hubdev_info(u64 handle, u64 address)
 {
-
 	struct ia64_sal_retval ret_stuff;
 	ret_stuff.status = 0;
 	ret_stuff.v0 = 0;
@@ -114,9 +117,8 @@ static inline uint64_t sal_get_hubdev_info(u64 handle, u64 address)
 /*
  * Retrieve the pci bus information given the bus number.
  */
-static inline uint64_t sal_get_pcibus_info(u64 segment, u64 busnum, u64 address)
+static inline u64 sal_get_pcibus_info(u64 segment, u64 busnum, u64 address)
 {
-
 	struct ia64_sal_retval ret_stuff;
 	ret_stuff.status = 0;
 	ret_stuff.v0 = 0;
@@ -130,9 +132,9 @@ static inline uint64_t sal_get_pcibus_info(u64 segment, u64 busnum, u64 address)
 /*
  * Retrieve the pci device information given the bus and device|function number.
  */
-static inline uint64_t
-sal_get_pcidev_info(u64 segment, u64 bus_number, u64 devfn, u64 pci_dev, 
-			u64 sn_irq_info)
+static inline u64
+sal_get_pcidev_info(u64 segment, u64 bus_number, u64 devfn, u64 pci_dev,
+		    u64 sn_irq_info)
 {
 	struct ia64_sal_retval ret_stuff;
 	ret_stuff.status = 0;
@@ -140,7 +142,7 @@ sal_get_pcidev_info(u64 segment, u64 bus_number, u64 devfn, u64 pci_dev,
 
 	SAL_CALL_NOLOCK(ret_stuff,
 			(u64) SN_SAL_IOIF_GET_PCIDEV_INFO,
-			(u64) segment, (u64) bus_number, (u64) devfn, 
+			(u64) segment, (u64) bus_number, (u64) devfn,
 			(u64) pci_dev,
 			sn_irq_info, 0, 0);
 	return ret_stuff.v0;
@@ -164,18 +166,56 @@ sn_pcidev_info_get(struct pci_dev *dev)
 	return NULL;
 }
 
+/* Older PROM flush WAR
+ *
+ * 01/16/06 -- This war will be in place until a new official PROM is released.
+ * Additionally note that the struct sn_flush_device_war also has to be
+ * removed from arch/ia64/sn/include/xtalk/hubdev.h
+ */
+static u8 war_implemented = 0;
+
+static s64 sn_device_fixup_war(u64 nasid, u64 widget, int device,
+			       struct sn_flush_device_common *common)
+{
+	struct sn_flush_device_war *war_list;
+	struct sn_flush_device_war *dev_entry;
+	struct ia64_sal_retval isrv = {0,0,0,0};
+
+	if (!war_implemented) {
+		printk(KERN_WARNING "PROM version < 4.50 -- implementing old "
+		       "PROM flush WAR\n");
+		war_implemented = 1;
+	}
+
+	war_list = kzalloc(DEV_PER_WIDGET * sizeof(*war_list), GFP_KERNEL);
+	if (!war_list)
+		BUG();
+
+	SAL_CALL_NOLOCK(isrv, SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST,
+			nasid, widget, __pa(war_list), 0, 0, 0 ,0);
+	if (isrv.status)
+		panic("sn_device_fixup_war failed: %s\n",
+		      ia64_sal_strerror(isrv.status));
+
+	dev_entry = war_list + device;
+	memcpy(common,dev_entry, sizeof(*common));
+	kfree(war_list);
+
+	return isrv.status;
+}
+
 /*
- * sn_fixup_ionodes() - This routine initializes the HUB data strcuture for 
+ * sn_fixup_ionodes() - This routine initializes the HUB data strcuture for
  *	each node in the system.
  */
-static void sn_fixup_ionodes(void)
+static void __init sn_fixup_ionodes(void)
 {
-
-	struct sn_flush_device_list *sn_flush_device_list;
+	struct sn_flush_device_kernel *sn_flush_device_kernel;
+	struct sn_flush_device_kernel *dev_entry;
 	struct hubdev_info *hubdev;
-	uint64_t status;
-	uint64_t nasid;
-	int i, widget;
+	u64 status;
+	u64 nasid;
+	int i, widget, device, size;
 
 	/*
 	 * Get SGI Specific HUB chipset information.
@@ -186,7 +226,7 @@ static void sn_fixup_ionodes(void)
 		nasid = cnodeid_to_nasid(i);
 		hubdev->max_segment_number = 0xffffffff;
 		hubdev->max_pcibus_number = 0xff;
-		status = sal_get_hubdev_info(nasid, (uint64_t) __pa(hubdev));
+		status = sal_get_hubdev_info(nasid, (u64) __pa(hubdev));
 		if (status)
 			continue;
 
@@ -211,40 +251,49 @@ static void sn_fixup_ionodes(void)
 		if (!hubdev->hdi_flush_nasid_list.widget_p)
 			continue;
 
+		size = (HUB_WIDGET_ID_MAX + 1) *
+			sizeof(struct sn_flush_device_kernel *);
 		hubdev->hdi_flush_nasid_list.widget_p =
-		    kmalloc((HUB_WIDGET_ID_MAX + 1) *
-			    sizeof(struct sn_flush_device_list *), GFP_KERNEL);
-
-		memset(hubdev->hdi_flush_nasid_list.widget_p, 0x0,
-		       (HUB_WIDGET_ID_MAX + 1) *
-		       sizeof(struct sn_flush_device_list *));
+			kzalloc(size, GFP_KERNEL);
+		if (!hubdev->hdi_flush_nasid_list.widget_p)
+			BUG();
 
 		for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) {
-			sn_flush_device_list = kmalloc(DEV_PER_WIDGET *
-						       sizeof(struct
-							      sn_flush_device_list),
-						       GFP_KERNEL);
-			memset(sn_flush_device_list, 0x0,
-			       DEV_PER_WIDGET *
-			       sizeof(struct sn_flush_device_list));
-
-			status =
-			    sal_get_widget_dmaflush_list(nasid, widget,
-							 (uint64_t)
-							 __pa
-							 (sn_flush_device_list));
-			if (status) {
-				kfree(sn_flush_device_list);
-				continue;
+			size = DEV_PER_WIDGET *
+				sizeof(struct sn_flush_device_kernel);
+			sn_flush_device_kernel = kzalloc(size, GFP_KERNEL);
+			if (!sn_flush_device_kernel)
+				BUG();
+
+			dev_entry = sn_flush_device_kernel;
+			for (device = 0; device < DEV_PER_WIDGET;
+			     device++,dev_entry++) {
+				size = sizeof(struct sn_flush_device_common);
+				dev_entry->common = kzalloc(size, GFP_KERNEL);
+				if (!dev_entry->common)
+					BUG();
+
+				if (sn_prom_feature_available(
+						       PRF_DEVICE_FLUSH_LIST))
+					status = sal_get_device_dmaflush_list(
+						     nasid, widget, device,
+						     (u64)(dev_entry->common));
+				else
+					status = sn_device_fixup_war(nasid,
+						     widget, device,
+						     dev_entry->common);
+				if (status != SALRET_OK)
+					panic("SAL call failed: %s\n",
+					      ia64_sal_strerror(status));
+
+				spin_lock_init(&dev_entry->sfdl_flush_lock);
 			}
 
-			spin_lock_init(&sn_flush_device_list->sfdl_flush_lock);
-			hubdev->hdi_flush_nasid_list.widget_p[widget] =
-			    sn_flush_device_list;
-		}
-
+			if (sn_flush_device_kernel)
+				hubdev->hdi_flush_nasid_list.widget_p[widget] =
+						       sn_flush_device_kernel;
+	        }
 	}
-
 }
 
 /*
@@ -256,7 +305,7 @@ static void sn_fixup_ionodes(void)
  */
 static void
 sn_pci_window_fixup(struct pci_dev *dev, unsigned int count,
-		    int64_t * pci_addrs)
+		    s64 * pci_addrs)
 {
 	struct pci_controller *controller = PCI_CONTROLLER(dev->bus);
 	unsigned int i;
@@ -316,20 +365,19 @@ void sn_pci_fixup_slot(struct pci_dev *dev)
  	struct pci_bus *host_pci_bus;
  	struct pci_dev *host_pci_dev;
 	struct pcidev_info *pcidev_info;
-	int64_t pci_addrs[PCI_ROM_RESOURCE + 1];
+	s64 pci_addrs[PCI_ROM_RESOURCE + 1];
  	struct sn_irq_info *sn_irq_info;
  	unsigned long size;
  	unsigned int bus_no, devfn;
 
 	pci_dev_get(dev); /* for the sysdata pointer */
 	pcidev_info = kzalloc(sizeof(struct pcidev_info), GFP_KERNEL);
-	if (pcidev_info <= 0)
+	if (!pcidev_info)
 		BUG();		/* Cannot afford to run out of memory */
 
-	sn_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_KERNEL);
-	if (sn_irq_info <= 0)
+	sn_irq_info = kzalloc(sizeof(struct sn_irq_info), GFP_KERNEL);
+	if (!sn_irq_info)
 		BUG();		/* Cannot afford to run out of memory */
-	memset(sn_irq_info, 0, sizeof(struct sn_irq_info));
 
 	/* Call to retrieve pci device information needed by kernel. */
 	status = sal_get_pcidev_info((u64) segment, (u64) dev->bus->number, 
@@ -407,6 +455,13 @@ void sn_pci_fixup_slot(struct pci_dev *dev)
 		pcidev_info->pdi_sn_irq_info = NULL;
 		kfree(sn_irq_info);
 	}
+
+	/*
+	 * MSI currently not supported on altix.  Remove this when
+	 * the MSI abstraction patches are integrated into the kernel
+	 * (sometime after 2.6.16 releases)
+	 */
+	dev->no_msi = 1;
 }
 
 /*
@@ -415,13 +470,13 @@ void sn_pci_fixup_slot(struct pci_dev *dev)
  */
 void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus)
 {
-	int status = 0;
+	int status;
 	int nasid, cnode;
 	struct pci_controller *controller;
 	struct sn_pci_controller *sn_controller;
 	struct pcibus_bussoft *prom_bussoft_ptr;
 	struct hubdev_info *hubdev_info;
-	void *provider_soft = NULL;
+	void *provider_soft;
 	struct sn_pcibus_provider *provider;
 
  	status = sal_get_pcibus_info((u64) segment, (u64) busnum,
@@ -468,6 +523,8 @@ void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus)
 	bus->sysdata = controller;
 	if (provider->bus_fixup)
 		provider_soft = (*provider->bus_fixup) (prom_bussoft_ptr, controller);
+	else
+		provider_soft = NULL;
 
 	if (provider_soft == NULL) {
 		/* fixup failed or not applicable */
@@ -550,15 +607,15 @@ void sn_bus_store_sysdata(struct pci_dev *dev)
 void sn_bus_free_sysdata(void)
 {
 	struct sysdata_el *element;
-	struct list_head *list;
+	struct list_head *list, *safe;
 
-sn_sysdata_free_start:
-	list_for_each(list, &sn_sysdata_list) {
+	list_for_each_safe(list, safe, &sn_sysdata_list) {
 		element = list_entry(list, struct sysdata_el, entry);
 		list_del(&element->entry);
+		list_del(&(((struct pcidev_info *)
+			     (element->sysdata))->pdi_list));
 		kfree(element->sysdata);
 		kfree(element);
-		goto sn_sysdata_free_start;
 	}
 	return;
 }
@@ -571,13 +628,8 @@ sn_sysdata_free_start:
 
 static int __init sn_pci_init(void)
 {
-	int i = 0;
-	int j = 0;
+	int i, j;
 	struct pci_dev *pci_dev = NULL;
-	extern void sn_init_cpei_timer(void);
-#ifdef CONFIG_PROC_FS
-	extern void register_sn_procfs(void);
-#endif
 
 	if (!ia64_platform_is("sn2") || IS_RUNNING_ON_FAKE_PROM())
 		return 0;
@@ -633,32 +685,29 @@ static int __init sn_pci_init(void)
  */
 void hubdev_init_node(nodepda_t * npda, cnodeid_t node)
 {
-
 	struct hubdev_info *hubdev_info;
+	int size;
+	pg_data_t *pg;
+
+	size = sizeof(struct hubdev_info);
 
 	if (node >= num_online_nodes())	/* Headless/memless IO nodes */
-		hubdev_info =
-		    (struct hubdev_info *)alloc_bootmem_node(NODE_DATA(0),
-							     sizeof(struct
-								    hubdev_info));
+		pg = NODE_DATA(0);
 	else
-		hubdev_info =
-		    (struct hubdev_info *)alloc_bootmem_node(NODE_DATA(node),
-							     sizeof(struct
-								    hubdev_info));
-	npda->pdinfo = (void *)hubdev_info;
+		pg = NODE_DATA(node);
 
+	hubdev_info = (struct hubdev_info *)alloc_bootmem_node(pg, size);
+
+	npda->pdinfo = (void *)hubdev_info;
 }
 
 geoid_t
 cnodeid_get_geoid(cnodeid_t cnode)
 {
-
 	struct hubdev_info *hubdev;
 
 	hubdev = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo);
 	return hubdev->hdi_geoid;
-
 }
 
 subsys_initcall(sn_pci_init);
diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c
index 01d18b7..c373113 100644
--- a/arch/ia64/sn/kernel/irq.c
+++ b/arch/ia64/sn/kernel/irq.c
@@ -5,11 +5,12 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 #include <linux/irq.h>
 #include <linux/spinlock.h>
+#include <linux/init.h>
 #include <asm/sn/addrs.h>
 #include <asm/sn/arch.h>
 #include <asm/sn/intr.h>
@@ -28,7 +29,7 @@ extern int sn_ioif_inited;
 static struct list_head **sn_irq_lh;
 static spinlock_t sn_irq_info_lock = SPIN_LOCK_UNLOCKED; /* non-IRQ lock */
 
-static inline uint64_t sn_intr_alloc(nasid_t local_nasid, int local_widget,
+static inline u64 sn_intr_alloc(nasid_t local_nasid, int local_widget,
 				     u64 sn_irq_info,
 				     int req_irq, nasid_t req_nasid,
 				     int req_slice)
@@ -76,17 +77,15 @@ static void sn_enable_irq(unsigned int irq)
 
 static void sn_ack_irq(unsigned int irq)
 {
-	u64 event_occurred, mask = 0;
+	u64 event_occurred, mask;
 
 	irq = irq & 0xff;
-	event_occurred =
-	    HUB_L((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED));
+	event_occurred = HUB_L((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED));
 	mask = event_occurred & SH_ALL_INT_MASK;
-	HUB_S((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED_ALIAS),
-	      mask);
+	HUB_S((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED_ALIAS), mask);
 	__set_bit(irq, (volatile void *)pda->sn_in_service_ivecs);
 
-	move_irq(irq);
+	move_native_irq(irq);
 }
 
 static void sn_end_irq(unsigned int irq)
@@ -123,7 +122,7 @@ static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask)
 
 	list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe,
 				 sn_irq_lh[irq], list) {
-		uint64_t bridge;
+		u64 bridge;
 		int local_widget, status;
 		nasid_t local_nasid;
 		struct sn_irq_info *new_irq_info;
@@ -134,7 +133,7 @@ static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask)
 			break;
 		memcpy(new_irq_info, sn_irq_info, sizeof(struct sn_irq_info));
 
-		bridge = (uint64_t) new_irq_info->irq_bridge;
+		bridge = (u64) new_irq_info->irq_bridge;
 		if (!bridge) {
 			kfree(new_irq_info);
 			break; /* irq is not a device interrupt */
@@ -219,9 +218,8 @@ static void register_intr_pda(struct sn_irq_info *sn_irq_info)
 		pdacpu(cpu)->sn_last_irq = irq;
 	}
 
-	if (pdacpu(cpu)->sn_first_irq == 0 || pdacpu(cpu)->sn_first_irq > irq) {
+	if (pdacpu(cpu)->sn_first_irq == 0 || pdacpu(cpu)->sn_first_irq > irq)
 		pdacpu(cpu)->sn_first_irq = irq;
-	}
 }
 
 static void unregister_intr_pda(struct sn_irq_info *sn_irq_info)
@@ -289,7 +287,7 @@ void sn_irq_fixup(struct pci_dev *pci_dev, struct sn_irq_info *sn_irq_info)
 	list_add_rcu(&sn_irq_info->list, sn_irq_lh[sn_irq_info->irq_irq]);
 	spin_unlock(&sn_irq_info_lock);
 
-	(void)register_intr_pda(sn_irq_info);
+	register_intr_pda(sn_irq_info);
 }
 
 void sn_irq_unfixup(struct pci_dev *pci_dev)
@@ -301,7 +299,9 @@ void sn_irq_unfixup(struct pci_dev *pci_dev)
 		return;
 
 	sn_irq_info = SN_PCIDEV_INFO(pci_dev)->pdi_sn_irq_info;
-	if (!sn_irq_info || !sn_irq_info->irq_irq) {
+	if (!sn_irq_info)
+		return;
+	if (!sn_irq_info->irq_irq) {
 		kfree(sn_irq_info);
 		return;
 	}
@@ -349,10 +349,10 @@ static void force_interrupt(int irq)
  */
 static void sn_check_intr(int irq, struct sn_irq_info *sn_irq_info)
 {
-	uint64_t regval;
+	u64 regval;
 	int irr_reg_num;
 	int irr_bit;
-	uint64_t irr_reg;
+	u64 irr_reg;
 	struct pcidev_info *pcidev_info;
 	struct pcibus_info *pcibus_info;
 
@@ -419,7 +419,7 @@ void sn_lb_int_war_check(void)
 	rcu_read_unlock();
 }
 
-void sn_irq_lh_init(void)
+void __init sn_irq_lh_init(void)
 {
 	int i;
 
@@ -434,5 +434,4 @@ void sn_irq_lh_init(void)
 
 		INIT_LIST_HEAD(sn_irq_lh[i]);
 	}
-
 }
diff --git a/arch/ia64/sn/kernel/klconflib.c b/arch/ia64/sn/kernel/klconflib.c
index 0f11a32..87682b4 100644
--- a/arch/ia64/sn/kernel/klconflib.c
+++ b/arch/ia64/sn/kernel/klconflib.c
@@ -78,31 +78,30 @@ format_module_id(char *buffer, moduleid_t m, int fmt)
 	position = MODULE_GET_BPOS(m);
 
 	if ((fmt == MODULE_FORMAT_BRIEF) || (fmt == MODULE_FORMAT_LCD)) {
-	    /* Brief module number format, eg. 002c15 */
+		/* Brief module number format, eg. 002c15 */
 
-	    /* Decompress the rack number */
-	    *buffer++ = '0' + RACK_GET_CLASS(rack);
-	    *buffer++ = '0' + RACK_GET_GROUP(rack);
-	    *buffer++ = '0' + RACK_GET_NUM(rack);
+		/* Decompress the rack number */
+		*buffer++ = '0' + RACK_GET_CLASS(rack);
+		*buffer++ = '0' + RACK_GET_GROUP(rack);
+		*buffer++ = '0' + RACK_GET_NUM(rack);
 
-	    /* Add the brick type */
-	    *buffer++ = brickchar;
+		/* Add the brick type */
+		*buffer++ = brickchar;
 	}
 	else if (fmt == MODULE_FORMAT_LONG) {
-	    /* Fuller hwgraph format, eg. rack/002/bay/15 */
+		/* Fuller hwgraph format, eg. rack/002/bay/15 */
 
-	    strcpy(buffer, "rack" "/");  buffer += strlen(buffer);
+		strcpy(buffer, "rack" "/");  buffer += strlen(buffer);
 
-	    *buffer++ = '0' + RACK_GET_CLASS(rack);
-	    *buffer++ = '0' + RACK_GET_GROUP(rack);
-	    *buffer++ = '0' + RACK_GET_NUM(rack);
+		*buffer++ = '0' + RACK_GET_CLASS(rack);
+		*buffer++ = '0' + RACK_GET_GROUP(rack);
+		*buffer++ = '0' + RACK_GET_NUM(rack);
 
-	    strcpy(buffer, "/" "bay" "/");  buffer += strlen(buffer);
+		strcpy(buffer, "/" "bay" "/");  buffer += strlen(buffer);
 	}
 
 	/* Add the bay position, using at least two digits */
 	if (position < 10)
-	    *buffer++ = '0';
+		*buffer++ = '0';
 	sprintf(buffer, "%d", position);
-
 }
diff --git a/arch/ia64/sn/kernel/mca.c b/arch/ia64/sn/kernel/mca.c
index 6546db6..3db62f2 100644
--- a/arch/ia64/sn/kernel/mca.c
+++ b/arch/ia64/sn/kernel/mca.c
@@ -3,13 +3,14 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/timer.h>
 #include <linux/vmalloc.h>
+#include <linux/mutex.h>
 #include <asm/mca.h>
 #include <asm/sal.h>
 #include <asm/sn/sn_sal.h>
@@ -27,7 +28,7 @@ void sn_init_cpei_timer(void);
 /* Printing oemdata from mca uses data that is not passed through SAL, it is
  * global.  Only one user at a time.
  */
-static DECLARE_MUTEX(sn_oemdata_mutex);
+static DEFINE_MUTEX(sn_oemdata_mutex);
 static u8 **sn_oemdata;
 static u64 *sn_oemdata_size, sn_oemdata_bufsize;
 
@@ -89,7 +90,7 @@ static int
 sn_platform_plat_specific_err_print(const u8 * sect_header, u8 ** oemdata,
 				    u64 * oemdata_size)
 {
-	down(&sn_oemdata_mutex);
+	mutex_lock(&sn_oemdata_mutex);
 	sn_oemdata = oemdata;
 	sn_oemdata_size = oemdata_size;
 	sn_oemdata_bufsize = 0;
@@ -107,7 +108,7 @@ sn_platform_plat_specific_err_print(const u8 * sect_header, u8 ** oemdata,
 		*sn_oemdata_size = 0;
 		ia64_sn_plat_specific_err_print(print_hook, (char *)sect_header);
 	}
-	up(&sn_oemdata_mutex);
+	mutex_unlock(&sn_oemdata_mutex);
 	return 0;
 }
 
@@ -136,7 +137,8 @@ int sn_salinfo_platform_oemdata(const u8 *sect_header, u8 **oemdata, u64 *oemdat
 
 static int __init sn_salinfo_init(void)
 {
-	salinfo_platform_oemdata = &sn_salinfo_platform_oemdata;
+	if (ia64_platform_is("sn2"))
+		salinfo_platform_oemdata = &sn_salinfo_platform_oemdata;
 	return 0;
 }
 
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index e510dce..5b84836 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -67,6 +67,7 @@ extern unsigned long last_time_offset;
 extern void (*ia64_mark_idle) (int);
 extern void snidle(int);
 extern unsigned char acpi_kbd_controller_present;
+extern unsigned long long (*ia64_printk_clock)(void);
 
 unsigned long sn_rtc_cycles_per_second;
 EXPORT_SYMBOL(sn_rtc_cycles_per_second);
@@ -74,7 +75,7 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second);
 DEFINE_PER_CPU(struct sn_hub_info_s, __sn_hub_info);
 EXPORT_PER_CPU_SYMBOL(__sn_hub_info);
 
-DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_NUMNODES]);
+DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]);
 EXPORT_PER_CPU_SYMBOL(__sn_cnodeid_to_nasid);
 
 DEFINE_PER_CPU(struct nodepda_s *, __sn_nodepda);
@@ -125,20 +126,6 @@ struct screen_info sn_screen_info = {
 };
 
 /*
- * This is here so we can use the CMOS detection in ide-probe.c to
- * determine what drives are present.  In theory, we don't need this
- * as the auto-detection could be done via ide-probe.c:do_probe() but
- * in practice that would be much slower, which is painful when
- * running in the simulator.  Note that passing zeroes in DRIVE_INFO
- * is sufficient (the IDE driver will autodetect the drive geometry).
- */
-#ifdef CONFIG_IA64_GENERIC
-extern char drive_info[4 * 16];
-#else
-char drive_info[4 * 16];
-#endif
-
-/*
  * This routine can only be used during init, since
  * smp_boot_data is an init data structure.
  * We have to use smp_boot_data.cpu_phys_id to find
@@ -209,7 +196,7 @@ void __init early_sn_setup(void)
 }
 
 extern int platform_intr_list[];
-static int __initdata shub_1_1_found = 0;
+static int __initdata shub_1_1_found;
 
 /*
  * sn_check_for_wars
@@ -330,6 +317,7 @@ struct pcdp_vga_device {
 #define PCDP_PCI_TRANS_IOPORT	0x02
 #define PCDP_PCI_TRANS_MMIO	0x01
 
+#if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE)
 static void
 sn_scan_pcdp(void)
 {
@@ -371,6 +359,17 @@ sn_scan_pcdp(void)
 		break; /* once we find the primary, we're done */
 	}
 }
+#endif
+
+static unsigned long sn2_rtc_initial;
+
+static unsigned long long ia64_sn2_printk_clock(void)
+{
+	unsigned long rtc_now = rtc_time();
+
+	return (rtc_now - sn2_rtc_initial) *
+		(1000000000 / sn_rtc_cycles_per_second);
+}
 
 /**
  * sn_setup - SN platform setup routine
@@ -386,6 +385,7 @@ void __init sn_setup(char **cmdline_p)
 	u32 version = sn_sal_rev();
 	extern void sn_cpu_init(void);
 
+	sn2_rtc_initial = rtc_time();
 	ia64_sn_plat_set_error_handling_features();	// obsolete
 	ia64_sn_set_os_feature(OSF_MCA_SLV_TO_OS_INIT_SLV);
 	ia64_sn_set_os_feature(OSF_FEAT_LOG_SBES);
@@ -437,19 +437,6 @@ void __init sn_setup(char **cmdline_p)
 	 */
 	build_cnode_tables();
 
-	/*
-	 * Old PROMs do not provide an ACPI FADT. Disable legacy keyboard
-	 * support here so we don't have to listen to failed keyboard probe
-	 * messages.
-	 */
-	if (version <= 0x0209 && acpi_kbd_controller_present) {
-		printk(KERN_INFO "Disabling legacy keyboard support as prom "
-		       "is too old and doesn't provide FADT\n");
-		acpi_kbd_controller_present = 0;
-	}
-
-	printk("SGI SAL version %x.%02x\n", version >> 8, version & 0x00FF);
-
 	status =
 	    ia64_sal_freq_base(SAL_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec,
 			       &drift);
@@ -463,6 +450,21 @@ void __init sn_setup(char **cmdline_p)
 
 	platform_intr_list[ACPI_INTERRUPT_CPEI] = IA64_CPE_VECTOR;
 
+	ia64_printk_clock = ia64_sn2_printk_clock;
+
+	/*
+	 * Old PROMs do not provide an ACPI FADT. Disable legacy keyboard
+	 * support here so we don't have to listen to failed keyboard probe
+	 * messages.
+	 */
+	if (version <= 0x0209 && acpi_kbd_controller_present) {
+		printk(KERN_INFO "Disabling legacy keyboard support as prom "
+		       "is too old and doesn't provide FADT\n");
+		acpi_kbd_controller_present = 0;
+	}
+
+	printk("SGI SAL version %x.%02x\n", version >> 8, version & 0x00FF);
+
 	/*
 	 * we set the default root device to /dev/hda
 	 * to make simulation easy
@@ -578,13 +580,17 @@ void __init sn_cpu_init(void)
 			sn_prom_type = 2;
 		else
 			sn_prom_type = 1;
-		printk("Running on medusa with %s PROM\n", (sn_prom_type == 1) ? "real" : "fake");
+		printk(KERN_INFO "Running on medusa with %s PROM\n",
+		       (sn_prom_type == 1) ? "real" : "fake");
 	}
 
 	memset(pda, 0, sizeof(pda));
-	if (ia64_sn_get_sn_info(0, &sn_hub_info->shub2, &sn_hub_info->nasid_bitmask, &sn_hub_info->nasid_shift,
-				&sn_system_size, &sn_sharing_domain_size, &sn_partition_id,
-				&sn_coherency_id, &sn_region_size))
+	if (ia64_sn_get_sn_info(0, &sn_hub_info->shub2,
+				&sn_hub_info->nasid_bitmask,
+				&sn_hub_info->nasid_shift,
+				&sn_system_size, &sn_sharing_domain_size,
+				&sn_partition_id, &sn_coherency_id,
+				&sn_region_size))
 		BUG();
 	sn_hub_info->as_shift = sn_hub_info->nasid_shift - 2;
 
@@ -716,7 +722,8 @@ void __init build_cnode_tables(void)
 	for_each_online_node(node) {
 		kl_config_hdr_t *klgraph_header;
 		nasid = cnodeid_to_nasid(node);
-		if ((klgraph_header = ia64_sn_get_klconfig_addr(nasid)) == NULL)
+		klgraph_header = ia64_sn_get_klconfig_addr(nasid);
+		if (klgraph_header == NULL)
 			BUG();
 		brd = NODE_OFFSET_TO_LBOARD(nasid, klgraph_header->ch_board_info);
 		while (brd) {
@@ -734,7 +741,7 @@ nasid_slice_to_cpuid(int nasid, int slice)
 {
 	long cpu;
 
-	for (cpu=0; cpu < NR_CPUS; cpu++)
+	for (cpu = 0; cpu < NR_CPUS; cpu++)
 		if (cpuid_to_nasid(cpu) == nasid &&
 					cpuid_to_slice(cpu) == slice)
 			return cpu;
diff --git a/arch/ia64/sn/kernel/sn2/Makefile b/arch/ia64/sn/kernel/sn2/Makefile
index 170bde4..99e1776 100644
--- a/arch/ia64/sn/kernel/sn2/Makefile
+++ b/arch/ia64/sn/kernel/sn2/Makefile
@@ -9,5 +9,7 @@
 # sn2 specific kernel files
 #
 
+CPPFLAGS += -I$(srctree)/arch/ia64/sn/include
+
 obj-y += cache.o io.o ptc_deadlock.o sn2_smp.o sn_proc_fs.o \
 	 prominfo_proc.o timer.o timer_interrupt.o sn_hwperf.o
diff --git a/arch/ia64/sn/kernel/sn2/prominfo_proc.c b/arch/ia64/sn/kernel/sn2/prominfo_proc.c
index 81c63b2..6ae276d 100644
--- a/arch/ia64/sn/kernel/sn2/prominfo_proc.c
+++ b/arch/ia64/sn/kernel/sn2/prominfo_proc.c
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 1999,2001-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (C) 1999,2001-2004, 2006 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * Module to export the system's Firmware Interface Tables, including
  * PROM revision numbers and banners, in /proc
@@ -190,7 +190,7 @@ static int
 read_version_entry(char *page, char **start, off_t off, int count, int *eof,
 		   void *data)
 {
-	int len = 0;
+	int len;
 
 	/* data holds the NASID of the node */
 	len = dump_version(page, (unsigned long)data);
@@ -202,7 +202,7 @@ static int
 read_fit_entry(char *page, char **start, off_t off, int count, int *eof,
 	       void *data)
 {
-	int len = 0;
+	int len;
 
 	/* data holds the NASID of the node */
 	len = dump_fit(page, (unsigned long)data);
@@ -229,13 +229,16 @@ int __init prominfo_init(void)
 	struct proc_dir_entry *p;
 	cnodeid_t cnodeid;
 	unsigned long nasid;
+	int size;
 	char name[NODE_NAME_LEN];
 
 	if (!ia64_platform_is("sn2"))
 		return 0;
 
-	proc_entries = kmalloc(num_online_nodes() * sizeof(struct proc_dir_entry *),
-			       GFP_KERNEL);
+	size = num_online_nodes() * sizeof(struct proc_dir_entry *);
+	proc_entries = kzalloc(size, GFP_KERNEL);
+	if (!proc_entries)
+		return -ENOMEM;
 
 	sgi_prominfo_entry = proc_mkdir("sgi_prominfo", NULL);
 
@@ -244,14 +247,12 @@ int __init prominfo_init(void)
 		sprintf(name, "node%d", cnodeid);
 		*entp = proc_mkdir(name, sgi_prominfo_entry);
 		nasid = cnodeid_to_nasid(cnodeid);
-		p = create_proc_read_entry(
-			"fit", 0, *entp, read_fit_entry,
-			(void *)nasid);
+		p = create_proc_read_entry("fit", 0, *entp, read_fit_entry,
+					   (void *)nasid);
 		if (p)
 			p->owner = THIS_MODULE;
-		p = create_proc_read_entry(
-			"version", 0, *entp, read_version_entry,
-			(void *)nasid);
+		p = create_proc_read_entry("version", 0, *entp,
+					   read_version_entry, (void *)nasid);
 		if (p)
 			p->owner = THIS_MODULE;
 		entp++;
@@ -263,7 +264,7 @@ int __init prominfo_init(void)
 void __exit prominfo_exit(void)
 {
 	struct proc_dir_entry **entp;
-	unsigned cnodeid;
+	unsigned int cnodeid;
 	char name[NODE_NAME_LEN];
 
 	entp = proc_entries;
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index 471bbaa..b2e1e74 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -5,7 +5,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2000-2006 Silicon Graphics, Inc. All rights reserved.
  */
 
 #include <linux/init.h>
@@ -46,104 +46,24 @@ DECLARE_PER_CPU(struct ptc_stats, ptcstats);
 
 static  __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock);
 
-void sn2_ptc_deadlock_recovery(short *, short, int, volatile unsigned long *, unsigned long data0,
-	volatile unsigned long *, unsigned long data1);
+extern unsigned long
+sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long,
+			       volatile unsigned long *, unsigned long,
+			       volatile unsigned long *, unsigned long);
+void
+sn2_ptc_deadlock_recovery(short *, short, short, int,
+			  volatile unsigned long *, unsigned long,
+			  volatile unsigned long *, unsigned long);
 
-#ifdef DEBUG_PTC
 /*
- * ptctest:
- *
- * 	xyz - 3 digit hex number:
- * 		x - Force PTC purges to use shub:
- * 			0 - no force
- * 			1 - force
- * 		y - interupt enable
- * 			0 - disable interrupts
- * 			1 - leave interuupts enabled
- * 		z - type of lock:
- * 			0 - global lock
- * 			1 - node local lock
- * 			2 - no lock
- *
- *   	Note: on shub1, only ptctest == 0 is supported. Don't try other values!
+ * Note: some is the following is captured here to make degugging easier
+ * (the macros make more sense if you see the debug patch - not posted)
  */
-
-static unsigned int sn2_ptctest = 0;
-
-static int __init ptc_test(char *str)
-{
-	get_option(&str, &sn2_ptctest);
-	return 1;
-}
-__setup("ptctest=", ptc_test);
-
-static inline int ptc_lock(unsigned long *flagp)
-{
-	unsigned long opt = sn2_ptctest & 255;
-
-	switch (opt) {
-	case 0x00:
-		spin_lock_irqsave(&sn2_global_ptc_lock, *flagp);
-		break;
-	case 0x01:
-		spin_lock_irqsave(&sn_nodepda->ptc_lock, *flagp);
-		break;
-	case 0x02:
-		local_irq_save(*flagp);
-		break;
-	case 0x10:
-		spin_lock(&sn2_global_ptc_lock);
-		break;
-	case 0x11:
-		spin_lock(&sn_nodepda->ptc_lock);
-		break;
-	case 0x12:
-		break;
-	default:
-		BUG();
-	}
-	return opt;
-}
-
-static inline void ptc_unlock(unsigned long flags, int opt)
-{
-	switch (opt) {
-	case 0x00:
-		spin_unlock_irqrestore(&sn2_global_ptc_lock, flags);
-		break;
-	case 0x01:
-		spin_unlock_irqrestore(&sn_nodepda->ptc_lock, flags);
-		break;
-	case 0x02:
-		local_irq_restore(flags);
-		break;
-	case 0x10:
-		spin_unlock(&sn2_global_ptc_lock);
-		break;
-	case 0x11:
-		spin_unlock(&sn_nodepda->ptc_lock);
-		break;
-	case 0x12:
-		break;
-	default:
-		BUG();
-	}
-}
-#else
-
 #define sn2_ptctest	0
-
-static inline int ptc_lock(unsigned long *flagp)
-{
-	spin_lock_irqsave(&sn2_global_ptc_lock, *flagp);
-	return 0;
-}
-
-static inline void ptc_unlock(unsigned long flags, int opt)
-{
-	spin_unlock_irqrestore(&sn2_global_ptc_lock, flags);
-}
-#endif
+#define local_node_uses_ptc_ga(sh1)	((sh1) ? 1 : 0)
+#define max_active_pio(sh1)		((sh1) ? 32 : 7)
+#define reset_max_active_on_deadlock()	1
+#define PTC_LOCK(sh1)			((sh1) ? &sn2_global_ptc_lock : &sn_nodepda->ptc_lock)
 
 struct ptc_stats {
 	unsigned long ptc_l;
@@ -151,27 +71,32 @@ struct ptc_stats {
 	unsigned long shub_ptc_flushes;
 	unsigned long nodes_flushed;
 	unsigned long deadlocks;
+	unsigned long deadlocks2;
 	unsigned long lock_itc_clocks;
 	unsigned long shub_itc_clocks;
 	unsigned long shub_itc_clocks_max;
+	unsigned long shub_ptc_flushes_not_my_mm;
 };
 
+#define sn2_ptctest	0
+
 static inline unsigned long wait_piowc(void)
 {
-	volatile unsigned long *piows, zeroval;
-	unsigned long ws;
+	volatile unsigned long *piows;
+	unsigned long zeroval, ws;
 
 	piows = pda->pio_write_status_addr;
 	zeroval = pda->pio_write_status_val;
 	do {
 		cpu_relax();
 	} while (((ws = *piows) & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) != zeroval);
-	return ws;
+	return (ws & SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK) != 0;
 }
 
 void sn_tlb_migrate_finish(struct mm_struct *mm)
 {
-	if (mm == current->mm)
+	/* flush_tlb_mm is inefficient if more than 1 users of mm */
+	if (mm == current->mm && mm && atomic_read(&mm->mm_users) == 1)
 		flush_tlb_mm(mm);
 }
 
@@ -201,12 +126,14 @@ void
 sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
 		     unsigned long end, unsigned long nbits)
 {
-	int i, opt, shub1, cnode, mynasid, cpu, lcpu = 0, nasid, flushed = 0;
-	int mymm = (mm == current->active_mm && current->mm);
+	int i, ibegin, shub1, cnode, mynasid, cpu, lcpu = 0, nasid;
+	int mymm = (mm == current->active_mm && mm == current->mm);
+	int use_cpu_ptcga;
 	volatile unsigned long *ptc0, *ptc1;
-	unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value;
+	unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value, old_rr = 0;
 	short nasids[MAX_NUMNODES], nix;
 	nodemask_t nodes_flushed;
+	int active, max_active, deadlock;
 
 	nodes_clear(nodes_flushed);
 	i = 0;
@@ -267,41 +194,56 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
 	
 
 	mynasid = get_nasid();
+	use_cpu_ptcga = local_node_uses_ptc_ga(shub1);
+	max_active = max_active_pio(shub1);
 
 	itc = ia64_get_itc();
-	opt = ptc_lock(&flags);
+	spin_lock_irqsave(PTC_LOCK(shub1), flags);
 	itc2 = ia64_get_itc();
+
 	__get_cpu_var(ptcstats).lock_itc_clocks += itc2 - itc;
 	__get_cpu_var(ptcstats).shub_ptc_flushes++;
 	__get_cpu_var(ptcstats).nodes_flushed += nix;
+	if (!mymm)
+		 __get_cpu_var(ptcstats).shub_ptc_flushes_not_my_mm++;
 
+	if (use_cpu_ptcga && !mymm) {
+		old_rr = ia64_get_rr(start);
+		ia64_set_rr(start, (old_rr & 0xff) | (rr_value << 8));
+		ia64_srlz_d();
+	}
+
+	wait_piowc();
 	do {
 		if (shub1)
 			data1 = start | (1UL << SH1_PTC_1_START_SHFT);
 		else
 			data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK);
-		for (i = 0; i < nix; i++) {
+		deadlock = 0;
+		active = 0;
+		for (ibegin = 0, i = 0; i < nix; i++) {
 			nasid = nasids[i];
-			if ((!(sn2_ptctest & 3)) && unlikely(nasid == mynasid && mymm)) {
+			if (use_cpu_ptcga && unlikely(nasid == mynasid)) {
 				ia64_ptcga(start, nbits << 2);
 				ia64_srlz_i();
 			} else {
 				ptc0 = CHANGE_NASID(nasid, ptc0);
 				if (ptc1)
 					ptc1 = CHANGE_NASID(nasid, ptc1);
-				pio_atomic_phys_write_mmrs(ptc0, data0, ptc1,
-							   data1);
-				flushed = 1;
+				pio_atomic_phys_write_mmrs(ptc0, data0, ptc1, data1);
+				active++;
+			}
+			if (active >= max_active || i == (nix - 1)) {
+				if ((deadlock = wait_piowc())) {
+					sn2_ptc_deadlock_recovery(nasids, ibegin, i, mynasid, ptc0, data0, ptc1, data1);
+					if (reset_max_active_on_deadlock())
+						max_active = 1;
+				}
+				active = 0;
+				ibegin = i + 1;
 			}
 		}
-		if (flushed
-		    && (wait_piowc() &
-				(SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK))) {
-			sn2_ptc_deadlock_recovery(nasids, nix, mynasid, ptc0, data0, ptc1, data1);
-		}
-
 		start += (1UL << nbits);
-
 	} while (start < end);
 
 	itc2 = ia64_get_itc() - itc2;
@@ -309,7 +251,12 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
 	if (itc2 > __get_cpu_var(ptcstats).shub_itc_clocks_max)
 		__get_cpu_var(ptcstats).shub_itc_clocks_max = itc2;
 
-	ptc_unlock(flags, opt);
+	if (old_rr) {
+		ia64_set_rr(start, old_rr);
+		ia64_srlz_d();
+	}
+
+	spin_unlock_irqrestore(PTC_LOCK(shub1), flags);
 
 	preempt_enable();
 }
@@ -321,27 +268,31 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
  * TLB flush transaction.  The recovery sequence is somewhat tricky & is
  * coded in assembly language.
  */
-void sn2_ptc_deadlock_recovery(short *nasids, short nix, int mynasid, volatile unsigned long *ptc0, unsigned long data0,
-	volatile unsigned long *ptc1, unsigned long data1)
+
+void
+sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid,
+			  volatile unsigned long *ptc0, unsigned long data0,
+			  volatile unsigned long *ptc1, unsigned long data1)
 {
-	extern void sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long,
-	        volatile unsigned long *, unsigned long, volatile unsigned long *, unsigned long);
 	short nasid, i;
-	unsigned long *piows, zeroval;
+	unsigned long *piows, zeroval, n;
 
 	__get_cpu_var(ptcstats).deadlocks++;
 
 	piows = (unsigned long *) pda->pio_write_status_addr;
 	zeroval = pda->pio_write_status_val;
 
-	for (i=0; i < nix; i++) {
+
+	for (i=ib; i <= ie; i++) {
 		nasid = nasids[i];
-		if (!(sn2_ptctest & 3) && nasid == mynasid)
+		if (local_node_uses_ptc_ga(is_shub1()) && nasid == mynasid)
 			continue;
 		ptc0 = CHANGE_NASID(nasid, ptc0);
 		if (ptc1)
 			ptc1 = CHANGE_NASID(nasid, ptc1);
-		sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval);
+
+		n = sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval);
+		__get_cpu_var(ptcstats).deadlocks2 += n;
 	}
 
 }
@@ -452,20 +403,22 @@ static int sn2_ptc_seq_show(struct seq_file *file, void *data)
 	cpu = *(loff_t *) data;
 
 	if (!cpu) {
-		seq_printf(file, "# ptc_l change_rid shub_ptc_flushes shub_nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max\n");
+		seq_printf(file,
+			   "# cpu ptc_l newrid ptc_flushes nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max not_my_mm deadlock2\n");
 		seq_printf(file, "# ptctest %d\n", sn2_ptctest);
 	}
 
 	if (cpu < NR_CPUS && cpu_online(cpu)) {
 		stat = &per_cpu(ptcstats, cpu);
-		seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l,
+		seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l,
 				stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed,
 				stat->deadlocks,
 				1000 * stat->lock_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec,
 				1000 * stat->shub_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec,
-				1000 * stat->shub_itc_clocks_max / per_cpu(cpu_info, cpu).cyc_per_usec);
+				1000 * stat->shub_itc_clocks_max / per_cpu(cpu_info, cpu).cyc_per_usec,
+				stat->shub_ptc_flushes_not_my_mm,
+				stat->deadlocks2);
 	}
-
 	return 0;
 }
 
@@ -476,7 +429,7 @@ static struct seq_operations sn2_ptc_seq_ops = {
 	.show = sn2_ptc_seq_show
 };
 
-int sn2_ptc_proc_open(struct inode *inode, struct file *file)
+static int sn2_ptc_proc_open(struct inode *inode, struct file *file)
 {
 	return seq_open(file, &sn2_ptc_seq_ops);
 }
@@ -493,7 +446,7 @@ static struct proc_dir_entry *proc_sn2_ptc;
 static int __init sn2_ptc_init(void)
 {
 	if (!ia64_platform_is("sn2"))
-		return -ENOSYS;
+		return 0;
 
 	if (!(proc_sn2_ptc = create_proc_entry(PTC_BASENAME, 0444, NULL))) {
 		printk(KERN_ERR "unable to create %s proc entry", PTC_BASENAME);
diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
index 19b54fb..70db21f 100644
--- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c
+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 2004-2005 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2004-2006 Silicon Graphics, Inc. All rights reserved.
  *
  * SGI Altix topology and hardware performance monitoring API.
  * Mark Goodwin <markgw@sgi.com>. 
@@ -973,6 +973,9 @@ static int __devinit sn_hwperf_misc_register_init(void)
 {
 	int e;
 
+	if (!ia64_platform_is("sn2"))
+		return 0;
+
 	sn_hwperf_init();
 
 	/*
diff --git a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
index a06719d..c686d9c 100644
--- a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
+++ b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
@@ -6,11 +6,11 @@
  * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved.
  */
 #include <linux/config.h>
-#include <asm/uaccess.h>
 
 #ifdef CONFIG_PROC_FS
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <asm/uaccess.h>
 #include <asm/sn/sn_sal.h>
 
 static int partition_id_show(struct seq_file *s, void *p)
@@ -90,10 +90,10 @@ static int coherence_id_open(struct inode *inode, struct file *file)
 	return single_open(file, coherence_id_show, NULL);
 }
 
-static struct proc_dir_entry *sn_procfs_create_entry(
-	const char *name, struct proc_dir_entry *parent,
-	int (*openfunc)(struct inode *, struct file *),
-	int (*releasefunc)(struct inode *, struct file *))
+static struct proc_dir_entry
+*sn_procfs_create_entry(const char *name, struct proc_dir_entry *parent,
+			int (*openfunc)(struct inode *, struct file *),
+			int (*releasefunc)(struct inode *, struct file *))
 {
 	struct proc_dir_entry *e = create_proc_entry(name, 0444, parent);
 
@@ -126,24 +126,24 @@ void register_sn_procfs(void)
 		return;
 
 	sn_procfs_create_entry("partition_id", sgi_proc_dir,
-		partition_id_open, single_release);
+			       partition_id_open, single_release);
 
 	sn_procfs_create_entry("system_serial_number", sgi_proc_dir,
-		system_serial_number_open, single_release);
+			       system_serial_number_open, single_release);
 
 	sn_procfs_create_entry("licenseID", sgi_proc_dir, 
-		licenseID_open, single_release);
+			       licenseID_open, single_release);
 
 	e = sn_procfs_create_entry("sn_force_interrupt", sgi_proc_dir, 
-		sn_force_interrupt_open, single_release);
+				   sn_force_interrupt_open, single_release);
 	if (e) 
 		e->proc_fops->write = sn_force_interrupt_write_proc;
 
 	sn_procfs_create_entry("coherence_id", sgi_proc_dir, 
-		coherence_id_open, single_release);
+			       coherence_id_open, single_release);
 	
 	sn_procfs_create_entry("sn_topology", sgi_proc_dir,
-		sn_topology_open, sn_topology_release);
+			       sn_topology_open, sn_topology_release);
 }
 
 #endif /* CONFIG_PROC_FS */
diff --git a/arch/ia64/sn/kernel/sn2/timer.c b/arch/ia64/sn/kernel/sn2/timer.c
index deb9baf..56a88b6 100644
--- a/arch/ia64/sn/kernel/sn2/timer.c
+++ b/arch/ia64/sn/kernel/sn2/timer.c
@@ -14,6 +14,7 @@
 
 #include <asm/hw_irq.h>
 #include <asm/system.h>
+#include <asm/timex.h>
 
 #include <asm/sn/leds.h>
 #include <asm/sn/shub_mmr.h>
@@ -28,9 +29,27 @@ static struct time_interpolator sn2_interpolator = {
 	.source = TIME_SOURCE_MMIO64
 };
 
+/*
+ * sn udelay uses the RTC instead of the ITC because the ITC is not
+ * synchronized across all CPUs, and the thread may migrate to another CPU
+ * if preemption is enabled.
+ */
+static void
+ia64_sn_udelay (unsigned long usecs)
+{
+	unsigned long start = rtc_time();
+	unsigned long end = start +
+			usecs * sn_rtc_cycles_per_second / 1000000;
+
+	while (time_before((unsigned long)rtc_time(), end))
+		cpu_relax();
+}
+
 void __init sn_timer_init(void)
 {
 	sn2_interpolator.frequency = sn_rtc_cycles_per_second;
 	sn2_interpolator.addr = RTC_COUNTER_ADDR;
 	register_time_interpolator(&sn2_interpolator);
+
+	ia64_udelay = &ia64_sn_udelay;
 }
diff --git a/arch/ia64/sn/kernel/sn2/timer_interrupt.c b/arch/ia64/sn/kernel/sn2/timer_interrupt.c
index adf5db2..fa7f699 100644
--- a/arch/ia64/sn/kernel/sn2/timer_interrupt.c
+++ b/arch/ia64/sn/kernel/sn2/timer_interrupt.c
@@ -1,7 +1,7 @@
 /*
  *
  *
- * Copyright (c) 2005 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2005, 2006 Silicon Graphics, Inc.  All Rights Reserved.
  * 
  * This program is free software; you can redistribute it and/or modify it 
  * under the terms of version 2 of the GNU General Public License 
@@ -22,11 +22,6 @@
  * License along with this program; if not, write the Free Software 
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
  * 
- * Contact information:  Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, 
- * Mountain View, CA  94043, or:
- * 
- * http://www.sgi.com 
- * 
  * For further information regarding this notice, see: 
  * 
  * http://oss.sgi.com/projects/GenInfo/NoticeExplan
diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c
index 768c21d..99cb28e 100644
--- a/arch/ia64/sn/kernel/tiocx.c
+++ b/arch/ia64/sn/kernel/tiocx.c
@@ -11,6 +11,7 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/proc_fs.h>
+#include <linux/capability.h>
 #include <linux/device.h>
 #include <linux/delay.h>
 #include <asm/system.h>
@@ -76,12 +77,6 @@ static void tiocx_bus_release(struct device *dev)
 	kfree(to_cx_dev(dev));
 }
 
-struct bus_type tiocx_bus_type = {
-	.name = "tiocx",
-	.match = tiocx_match,
-	.uevent = tiocx_uevent,
-};
-
 /**
  * cx_device_match - Find cx_device in the id table.
  * @ids: id table from driver
@@ -148,6 +143,14 @@ static int cx_driver_remove(struct device *dev)
 	return 0;
 }
 
+struct bus_type tiocx_bus_type = {
+	.name = "tiocx",
+	.match = tiocx_match,
+	.uevent = tiocx_uevent,
+	.probe = cx_device_probe,
+	.remove = cx_driver_remove,
+};
+
 /**
  * cx_driver_register - Register the driver.
  * @cx_driver: driver table (cx_drv struct) from driver
@@ -161,8 +164,6 @@ int cx_driver_register(struct cx_drv *cx_driver)
 {
 	cx_driver->driver.name = cx_driver->name;
 	cx_driver->driver.bus = &tiocx_bus_type;
-	cx_driver->driver.probe = cx_device_probe;
-	cx_driver->driver.remove = cx_driver_remove;
 
 	return driver_register(&cx_driver->driver);
 }
@@ -244,7 +245,7 @@ static int cx_device_reload(struct cx_dev *cx_dev)
 				  cx_dev->bt);
 }
 
-static inline uint64_t tiocx_intr_alloc(nasid_t nasid, int widget,
+static inline u64 tiocx_intr_alloc(nasid_t nasid, int widget,
 					u64 sn_irq_info,
 					int req_irq, nasid_t req_nasid,
 					int req_slice)
@@ -283,12 +284,10 @@ struct sn_irq_info *tiocx_irq_alloc(nasid_t nasid, int widget, int irq,
 	if ((nasid & 1) == 0)
 		return NULL;
 
-	sn_irq_info = kmalloc(sn_irq_size, GFP_KERNEL);
+	sn_irq_info = kzalloc(sn_irq_size, GFP_KERNEL);
 	if (sn_irq_info == NULL)
 		return NULL;
 
-	memset(sn_irq_info, 0x0, sn_irq_size);
-
 	status = tiocx_intr_alloc(nasid, widget, __pa(sn_irq_info), irq,
 				  req_nasid, slice);
 	if (status) {
@@ -301,7 +300,7 @@ struct sn_irq_info *tiocx_irq_alloc(nasid_t nasid, int widget, int irq,
 
 void tiocx_irq_free(struct sn_irq_info *sn_irq_info)
 {
-	uint64_t bridge = (uint64_t) sn_irq_info->irq_bridge;
+	u64 bridge = (u64) sn_irq_info->irq_bridge;
 	nasid_t nasid = NASID_GET(bridge);
 	int widget;
 
@@ -312,12 +311,12 @@ void tiocx_irq_free(struct sn_irq_info *sn_irq_info)
 	}
 }
 
-uint64_t tiocx_dma_addr(uint64_t addr)
+u64 tiocx_dma_addr(u64 addr)
 {
 	return PHYS_TO_TIODMA(addr);
 }
 
-uint64_t tiocx_swin_base(int nasid)
+u64 tiocx_swin_base(int nasid)
 {
 	return TIO_SWIN_BASE(nasid, TIOCX_CORELET);
 }
@@ -334,8 +333,8 @@ EXPORT_SYMBOL(tiocx_swin_base);
 
 static void tio_conveyor_set(nasid_t nasid, int enable_flag)
 {
-	uint64_t ice_frz;
-	uint64_t disable_cb = (1ull << 61);
+	u64 ice_frz;
+	u64 disable_cb = (1ull << 61);
 
 	if (!(nasid & 1))
 		return;
@@ -387,7 +386,7 @@ static int is_fpga_tio(int nasid, int *bt)
 
 static int bitstream_loaded(nasid_t nasid)
 {
-	uint64_t cx_credits;
+	u64 cx_credits;
 
 	cx_credits = REMOTE_HUB_L(nasid, TIO_ICE_PMI_TX_DYN_CREDIT_STAT_CB3);
 	cx_credits &= TIO_ICE_PMI_TX_DYN_CREDIT_STAT_CB3_CREDIT_CNT_MASK;
@@ -403,14 +402,14 @@ static int tiocx_reload(struct cx_dev *cx_dev)
 	nasid_t nasid = cx_dev->cx_id.nasid;
 
 	if (bitstream_loaded(nasid)) {
-		uint64_t cx_id;
+		u64 cx_id;
 		int rv;
 
 		rv = ia64_sn_sysctl_tio_clock_reset(nasid);
 		if (rv) {
 			printk(KERN_ALERT "CX port JTAG reset failed.\n");
 		} else {
-			cx_id = *(volatile uint64_t *)
+			cx_id = *(volatile u64 *)
 				(TIO_SWIN_BASE(nasid, TIOCX_CORELET) +
 					  WIDGET_ID);
 			part_num = XWIDGET_PART_NUM(cx_id);
@@ -485,7 +484,7 @@ static int __init tiocx_init(void)
 	int found_tiocx_device = 0;
 
 	if (!ia64_platform_is("sn2"))
-		return -ENODEV;
+		return 0;
 
 	bus_register(&tiocx_bus_type);
 
diff --git a/arch/ia64/sn/kernel/xp_main.c b/arch/ia64/sn/kernel/xp_main.c
index 3be52a3..b7ea466 100644
--- a/arch/ia64/sn/kernel/xp_main.c
+++ b/arch/ia64/sn/kernel/xp_main.c
@@ -19,6 +19,7 @@
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <asm/sn/intr.h>
 #include <asm/sn/sn_sal.h>
 #include <asm/sn/xp.h>
@@ -136,13 +137,13 @@ xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size,
 
 	registration = &xpc_registrations[ch_number];
 
-	if (down_interruptible(&registration->sema) != 0) {
+	if (mutex_lock_interruptible(&registration->mutex) != 0) {
 		return xpcInterrupted;
 	}
 
 	/* if XPC_CHANNEL_REGISTERED(ch_number) */
 	if (registration->func != NULL) {
-		up(&registration->sema);
+		mutex_unlock(&registration->mutex);
 		return xpcAlreadyRegistered;
 	}
 
@@ -154,7 +155,7 @@ xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size,
 	registration->key = key;
 	registration->func = func;
 
-	up(&registration->sema);
+	mutex_unlock(&registration->mutex);
 
 	xpc_interface.connect(ch_number);
 
@@ -190,11 +191,11 @@ xpc_disconnect(int ch_number)
 	 * figured XPC's users will just turn around and call xpc_disconnect()
 	 * again anyways, so we might as well wait, if need be.
 	 */
-	down(&registration->sema);
+	mutex_lock(&registration->mutex);
 
 	/* if !XPC_CHANNEL_REGISTERED(ch_number) */
 	if (registration->func == NULL) {
-		up(&registration->sema);
+		mutex_unlock(&registration->mutex);
 		return;
 	}
 
@@ -208,7 +209,7 @@ xpc_disconnect(int ch_number)
 
 	xpc_interface.disconnect(ch_number);
 
-	up(&registration->sema);
+	mutex_unlock(&registration->mutex);
 
 	return;
 }
@@ -250,9 +251,9 @@ xp_init(void)
 		xp_nofault_PIOR_target = SH1_IPI_ACCESS;
 	}
 
-	/* initialize the connection registration semaphores */
+	/* initialize the connection registration mutex */
 	for (ch_number = 0; ch_number < XPC_NCHANNELS; ch_number++) {
-		sema_init(&xpc_registrations[ch_number].sema, 1);  /* mutex */
+		mutex_init(&xpc_registrations[ch_number].mutex);
 	}
 
 	return 0;
diff --git a/arch/ia64/sn/kernel/xpc.h b/arch/ia64/sn/kernel/xpc.h
deleted file mode 100644
index 5483a9f..0000000
--- a/arch/ia64/sn/kernel/xpc.h
+++ /dev/null
@@ -1,1273 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.  All Rights Reserved.
- */
-
-
-/*
- * Cross Partition Communication (XPC) structures and macros.
- */
-
-#ifndef _IA64_SN_KERNEL_XPC_H
-#define _IA64_SN_KERNEL_XPC_H
-
-
-#include <linux/config.h>
-#include <linux/interrupt.h>
-#include <linux/sysctl.h>
-#include <linux/device.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/sn/bte.h>
-#include <asm/sn/clksupport.h>
-#include <asm/sn/addrs.h>
-#include <asm/sn/mspec.h>
-#include <asm/sn/shub_mmr.h>
-#include <asm/sn/xp.h>
-
-
-/*
- * XPC Version numbers consist of a major and minor number. XPC can always
- * talk to versions with same major #, and never talk to versions with a
- * different major #.
- */
-#define _XPC_VERSION(_maj, _min)	(((_maj) << 4) | ((_min) & 0xf))
-#define XPC_VERSION_MAJOR(_v)		((_v) >> 4)
-#define XPC_VERSION_MINOR(_v)		((_v) & 0xf)
-
-
-/*
- * The next macros define word or bit representations for given
- * C-brick nasid in either the SAL provided bit array representing
- * nasids in the partition/machine or the AMO_t array used for
- * inter-partition initiation communications.
- *
- * For SN2 machines, C-Bricks are alway even numbered NASIDs.  As
- * such, some space will be saved by insisting that nasid information
- * passed from SAL always be packed for C-Bricks and the
- * cross-partition interrupts use the same packing scheme.
- */
-#define XPC_NASID_W_INDEX(_n)	(((_n) / 64) / 2)
-#define XPC_NASID_B_INDEX(_n)	(((_n) / 2) & (64 - 1))
-#define XPC_NASID_IN_ARRAY(_n, _p) ((_p)[XPC_NASID_W_INDEX(_n)] & \
-				    (1UL << XPC_NASID_B_INDEX(_n)))
-#define XPC_NASID_FROM_W_B(_w, _b) (((_w) * 64 + (_b)) * 2)
-
-#define XPC_HB_DEFAULT_INTERVAL		5	/* incr HB every x secs */
-#define XPC_HB_CHECK_DEFAULT_INTERVAL	20	/* check HB every x secs */
-
-/* define the process name of HB checker and the CPU it is pinned to */
-#define XPC_HB_CHECK_THREAD_NAME	"xpc_hb"
-#define XPC_HB_CHECK_CPU		0
-
-/* define the process name of the discovery thread */
-#define XPC_DISCOVERY_THREAD_NAME	"xpc_discovery"
-
-
-/*
- * the reserved page
- *
- *   SAL reserves one page of memory per partition for XPC. Though a full page
- *   in length (16384 bytes), its starting address is not page aligned, but it
- *   is cacheline aligned. The reserved page consists of the following:
- *
- *   reserved page header
- *
- *     The first cacheline of the reserved page contains the header
- *     (struct xpc_rsvd_page). Before SAL initialization has completed,
- *     SAL has set up the following fields of the reserved page header:
- *     SAL_signature, SAL_version, partid, and nasids_size. The other
- *     fields are set up by XPC. (xpc_rsvd_page points to the local
- *     partition's reserved page.)
- *
- *   part_nasids mask
- *   mach_nasids mask
- *
- *     SAL also sets up two bitmaps (or masks), one that reflects the actual
- *     nasids in this partition (part_nasids), and the other that reflects
- *     the actual nasids in the entire machine (mach_nasids). We're only
- *     interested in the even numbered nasids (which contain the processors
- *     and/or memory), so we only need half as many bits to represent the
- *     nasids. The part_nasids mask is located starting at the first cacheline
- *     following the reserved page header. The mach_nasids mask follows right
- *     after the part_nasids mask. The size in bytes of each mask is reflected
- *     by the reserved page header field 'nasids_size'. (Local partition's
- *     mask pointers are xpc_part_nasids and xpc_mach_nasids.)
- *
- *   vars
- *   vars part
- *
- *     Immediately following the mach_nasids mask are the XPC variables
- *     required by other partitions. First are those that are generic to all
- *     partitions (vars), followed on the next available cacheline by those
- *     which are partition specific (vars part). These are setup by XPC.
- *     (Local partition's vars pointers are xpc_vars and xpc_vars_part.)
- *
- * Note: Until vars_pa is set, the partition XPC code has not been initialized.
- */
-struct xpc_rsvd_page {
-	u64 SAL_signature;	/* SAL: unique signature */
-	u64 SAL_version;	/* SAL: version */
-	u8 partid;		/* SAL: partition ID */
-	u8 version;
-	u8 pad1[6];		/* align to next u64 in cacheline */
-	volatile u64 vars_pa;
-	struct timespec stamp;	/* time when reserved page was setup by XPC */
-	u64 pad2[9];		/* align to last u64 in cacheline */
-	u64 nasids_size;	/* SAL: size of each nasid mask in bytes */
-};
-
-#define XPC_RP_VERSION _XPC_VERSION(1,1) /* version 1.1 of the reserved page */
-
-#define XPC_SUPPORTS_RP_STAMP(_version) \
-			(_version >= _XPC_VERSION(1,1))
-
-/*
- * compare stamps - the return value is:
- *
- *	< 0,	if stamp1 < stamp2
- *	= 0,	if stamp1 == stamp2
- *	> 0,	if stamp1 > stamp2
- */
-static inline int
-xpc_compare_stamps(struct timespec *stamp1, struct timespec *stamp2)
-{
-	int ret;
-
-
-	if ((ret = stamp1->tv_sec - stamp2->tv_sec) == 0) {
-		ret = stamp1->tv_nsec - stamp2->tv_nsec;
-	}
-	return ret;
-}
-
-
-/*
- * Define the structures by which XPC variables can be exported to other
- * partitions. (There are two: struct xpc_vars and struct xpc_vars_part)
- */
-
-/*
- * The following structure describes the partition generic variables
- * needed by other partitions in order to properly initialize.
- *
- * struct xpc_vars version number also applies to struct xpc_vars_part.
- * Changes to either structure and/or related functionality should be
- * reflected by incrementing either the major or minor version numbers
- * of struct xpc_vars.
- */
-struct xpc_vars {
-	u8 version;
-	u64 heartbeat;
-	u64 heartbeating_to_mask;
-	u64 heartbeat_offline;	/* if 0, heartbeat should be changing */
-	int act_nasid;
-	int act_phys_cpuid;
-	u64 vars_part_pa;
-	u64 amos_page_pa;	/* paddr of page of AMOs from MSPEC driver */
-	AMO_t *amos_page;	/* vaddr of page of AMOs from MSPEC driver */
-};
-
-#define XPC_V_VERSION _XPC_VERSION(3,1) /* version 3.1 of the cross vars */
-
-#define XPC_SUPPORTS_DISENGAGE_REQUEST(_version) \
-			(_version >= _XPC_VERSION(3,1))
-
-
-static inline int
-xpc_hb_allowed(partid_t partid, struct xpc_vars *vars)
-{
-	return ((vars->heartbeating_to_mask & (1UL << partid)) != 0);
-}
-
-static inline void
-xpc_allow_hb(partid_t partid, struct xpc_vars *vars)
-{
-	u64 old_mask, new_mask;
-
-	do {
-		old_mask = vars->heartbeating_to_mask;
-		new_mask = (old_mask | (1UL << partid));
-	} while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) !=
-							old_mask);
-}
-
-static inline void
-xpc_disallow_hb(partid_t partid, struct xpc_vars *vars)
-{
-	u64 old_mask, new_mask;
-
-	do {
-		old_mask = vars->heartbeating_to_mask;
-		new_mask = (old_mask & ~(1UL << partid));
-	} while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) !=
-							old_mask);
-}
-
-
-/*
- * The AMOs page consists of a number of AMO variables which are divided into
- * four groups, The first two groups are used to identify an IRQ's sender.
- * These two groups consist of 64 and 128 AMO variables respectively. The last
- * two groups, consisting of just one AMO variable each, are used to identify
- * the remote partitions that are currently engaged (from the viewpoint of
- * the XPC running on the remote partition).
- */
-#define XPC_NOTIFY_IRQ_AMOS	   0
-#define XPC_ACTIVATE_IRQ_AMOS	   (XPC_NOTIFY_IRQ_AMOS + XP_MAX_PARTITIONS)
-#define XPC_ENGAGED_PARTITIONS_AMO (XPC_ACTIVATE_IRQ_AMOS + XP_NASID_MASK_WORDS)
-#define XPC_DISENGAGE_REQUEST_AMO  (XPC_ENGAGED_PARTITIONS_AMO + 1)
-
-
-/*
- * The following structure describes the per partition specific variables.
- *
- * An array of these structures, one per partition, will be defined. As a
- * partition becomes active XPC will copy the array entry corresponding to
- * itself from that partition. It is desirable that the size of this
- * structure evenly divide into a cacheline, such that none of the entries
- * in this array crosses a cacheline boundary. As it is now, each entry
- * occupies half a cacheline.
- */
-struct xpc_vars_part {
-	volatile u64 magic;
-
-	u64 openclose_args_pa;	/* physical address of open and close args */
-	u64 GPs_pa;		/* physical address of Get/Put values */
-
-	u64 IPI_amo_pa;		/* physical address of IPI AMO_t structure */
-	int IPI_nasid;		/* nasid of where to send IPIs */
-	int IPI_phys_cpuid;	/* physical CPU ID of where to send IPIs */
-
-	u8 nchannels;		/* #of defined channels supported */
-
-	u8 reserved[23];	/* pad to a full 64 bytes */
-};
-
-/*
- * The vars_part MAGIC numbers play a part in the first contact protocol.
- *
- * MAGIC1 indicates that the per partition specific variables for a remote
- * partition have been initialized by this partition.
- *
- * MAGIC2 indicates that this partition has pulled the remote partititions
- * per partition variables that pertain to this partition.
- */
-#define XPC_VP_MAGIC1	0x0053524156435058L  /* 'XPCVARS\0'L (little endian) */
-#define XPC_VP_MAGIC2	0x0073726176435058L  /* 'XPCvars\0'L (little endian) */
-
-
-/* the reserved page sizes and offsets */
-
-#define XPC_RP_HEADER_SIZE	L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page))
-#define XPC_RP_VARS_SIZE 	L1_CACHE_ALIGN(sizeof(struct xpc_vars))
-
-#define XPC_RP_PART_NASIDS(_rp) (u64 *) ((u8 *) _rp + XPC_RP_HEADER_SIZE)
-#define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + xp_nasid_mask_words)
-#define XPC_RP_VARS(_rp)	((struct xpc_vars *) XPC_RP_MACH_NASIDS(_rp) + xp_nasid_mask_words)
-#define XPC_RP_VARS_PART(_rp)	(struct xpc_vars_part *) ((u8 *) XPC_RP_VARS(rp) + XPC_RP_VARS_SIZE)
-
-
-/*
- * Functions registered by add_timer() or called by kernel_thread() only
- * allow for a single 64-bit argument. The following macros can be used to
- * pack and unpack two (32-bit, 16-bit or 8-bit) arguments into or out from
- * the passed argument.
- */
-#define XPC_PACK_ARGS(_arg1, _arg2) \
-			((((u64) _arg1) & 0xffffffff) | \
-			((((u64) _arg2) & 0xffffffff) << 32))
-
-#define XPC_UNPACK_ARG1(_args)	(((u64) _args) & 0xffffffff)
-#define XPC_UNPACK_ARG2(_args)	((((u64) _args) >> 32) & 0xffffffff)
-
-
-
-/*
- * Define a Get/Put value pair (pointers) used with a message queue.
- */
-struct xpc_gp {
-	volatile s64 get;	/* Get value */
-	volatile s64 put;	/* Put value */
-};
-
-#define XPC_GP_SIZE \
-		L1_CACHE_ALIGN(sizeof(struct xpc_gp) * XPC_NCHANNELS)
-
-
-
-/*
- * Define a structure that contains arguments associated with opening and
- * closing a channel.
- */
-struct xpc_openclose_args {
-	u16 reason;		/* reason why channel is closing */
-	u16 msg_size;		/* sizeof each message entry */
-	u16 remote_nentries;	/* #of message entries in remote msg queue */
-	u16 local_nentries;	/* #of message entries in local msg queue */
-	u64 local_msgqueue_pa;	/* physical address of local message queue */
-};
-
-#define XPC_OPENCLOSE_ARGS_SIZE \
-	      L1_CACHE_ALIGN(sizeof(struct xpc_openclose_args) * XPC_NCHANNELS)
-
-
-
-/* struct xpc_msg flags */
-
-#define	XPC_M_DONE		0x01	/* msg has been received/consumed */
-#define	XPC_M_READY		0x02	/* msg is ready to be sent */
-#define	XPC_M_INTERRUPT		0x04	/* send interrupt when msg consumed */
-
-
-#define XPC_MSG_ADDRESS(_payload) \
-		((struct xpc_msg *)((u8 *)(_payload) - XPC_MSG_PAYLOAD_OFFSET))
-
-
-
-/*
- * Defines notify entry.
- *
- * This is used to notify a message's sender that their message was received
- * and consumed by the intended recipient.
- */
-struct xpc_notify {
-	struct semaphore sema;		/* notify semaphore */
-	volatile u8 type;			/* type of notification */
-
-	/* the following two fields are only used if type == XPC_N_CALL */
-	xpc_notify_func func;		/* user's notify function */
-	void *key;			/* pointer to user's key */
-};
-
-/* struct xpc_notify type of notification */
-
-#define	XPC_N_CALL		0x01	/* notify function provided by user */
-
-
-
-/*
- * Define the structure that manages all the stuff required by a channel. In
- * particular, they are used to manage the messages sent across the channel.
- *
- * This structure is private to a partition, and is NOT shared across the
- * partition boundary.
- *
- * There is an array of these structures for each remote partition. It is
- * allocated at the time a partition becomes active. The array contains one
- * of these structures for each potential channel connection to that partition.
- *
- * Each of these structures manages two message queues (circular buffers).
- * They are allocated at the time a channel connection is made. One of
- * these message queues (local_msgqueue) holds the locally created messages
- * that are destined for the remote partition. The other of these message
- * queues (remote_msgqueue) is a locally cached copy of the remote partition's
- * own local_msgqueue.
- *
- * The following is a description of the Get/Put pointers used to manage these
- * two message queues. Consider the local_msgqueue to be on one partition
- * and the remote_msgqueue to be its cached copy on another partition. A
- * description of what each of the lettered areas contains is included.
- *
- *
- *                     local_msgqueue      remote_msgqueue
- *
- *                        |/////////|      |/////////|
- *    w_remote_GP.get --> +---------+      |/////////|
- *                        |    F    |      |/////////|
- *     remote_GP.get  --> +---------+      +---------+ <-- local_GP->get
- *                        |         |      |         |
- *                        |         |      |    E    |
- *                        |         |      |         |
- *                        |         |      +---------+ <-- w_local_GP.get
- *                        |    B    |      |/////////|
- *                        |         |      |////D////|
- *                        |         |      |/////////|
- *                        |         |      +---------+ <-- w_remote_GP.put
- *                        |         |      |////C////|
- *      local_GP->put --> +---------+      +---------+ <-- remote_GP.put
- *                        |         |      |/////////|
- *                        |    A    |      |/////////|
- *                        |         |      |/////////|
- *     w_local_GP.put --> +---------+      |/////////|
- *                        |/////////|      |/////////|
- *
- *
- *	    ( remote_GP.[get|put] are cached copies of the remote
- *	      partition's local_GP->[get|put], and thus their values can
- *	      lag behind their counterparts on the remote partition. )
- *
- *
- *  A - Messages that have been allocated, but have not yet been sent to the
- *	remote partition.
- *
- *  B - Messages that have been sent, but have not yet been acknowledged by the
- *      remote partition as having been received.
- *
- *  C - Area that needs to be prepared for the copying of sent messages, by
- *	the clearing of the message flags of any previously received messages.
- *
- *  D - Area into which sent messages are to be copied from the remote
- *	partition's local_msgqueue and then delivered to their intended
- *	recipients. [ To allow for a multi-message copy, another pointer
- *	(next_msg_to_pull) has been added to keep track of the next message
- *	number needing to be copied (pulled). It chases after w_remote_GP.put.
- *	Any messages lying between w_local_GP.get and next_msg_to_pull have
- *	been copied and are ready to be delivered. ]
- *
- *  E - Messages that have been copied and delivered, but have not yet been
- *	acknowledged by the recipient as having been received.
- *
- *  F - Messages that have been acknowledged, but XPC has not yet notified the
- *	sender that the message was received by its intended recipient.
- *	This is also an area that needs to be prepared for the allocating of
- *	new messages, by the clearing of the message flags of the acknowledged
- *	messages.
- */
-struct xpc_channel {
-	partid_t partid;		/* ID of remote partition connected */
-	spinlock_t lock;		/* lock for updating this structure */
-	u32 flags;			/* general flags */
-
-	enum xpc_retval reason;		/* reason why channel is disconnect'g */
-	int reason_line;		/* line# disconnect initiated from */
-
-	u16 number;			/* channel # */
-
-	u16 msg_size;			/* sizeof each msg entry */
-	u16 local_nentries;		/* #of msg entries in local msg queue */
-	u16 remote_nentries;		/* #of msg entries in remote msg queue*/
-
-	void *local_msgqueue_base;	/* base address of kmalloc'd space */
-	struct xpc_msg *local_msgqueue;	/* local message queue */
-	void *remote_msgqueue_base;	/* base address of kmalloc'd space */
-	struct xpc_msg *remote_msgqueue;/* cached copy of remote partition's */
-					/* local message queue */
-	u64 remote_msgqueue_pa;		/* phys addr of remote partition's */
-					/* local message queue */
-
-	atomic_t references;		/* #of external references to queues */
-
-	atomic_t n_on_msg_allocate_wq;   /* #on msg allocation wait queue */
-	wait_queue_head_t msg_allocate_wq; /* msg allocation wait queue */
-
-	u8 delayed_IPI_flags;		/* IPI flags received, but delayed */
-					/* action until channel disconnected */
-
-	/* queue of msg senders who want to be notified when msg received */
-
-	atomic_t n_to_notify;		/* #of msg senders to notify */
-	struct xpc_notify *notify_queue;/* notify queue for messages sent */
-
-	xpc_channel_func func;		/* user's channel function */
-	void *key;			/* pointer to user's key */
-
-	struct semaphore msg_to_pull_sema; /* next msg to pull serialization */
-	struct semaphore wdisconnect_sema; /* wait for channel disconnect */
-
-	struct xpc_openclose_args *local_openclose_args; /* args passed on */
-					/* opening or closing of channel */
-
-	/* various flavors of local and remote Get/Put values */
-
-	struct xpc_gp *local_GP;	/* local Get/Put values */
-	struct xpc_gp remote_GP;	/* remote Get/Put values */
-	struct xpc_gp w_local_GP;	/* working local Get/Put values */
-	struct xpc_gp w_remote_GP;	/* working remote Get/Put values */
-	s64 next_msg_to_pull;		/* Put value of next msg to pull */
-
-	/* kthread management related fields */
-
-// >>> rethink having kthreads_assigned_limit and kthreads_idle_limit; perhaps
-// >>> allow the assigned limit be unbounded and let the idle limit be dynamic
-// >>> dependent on activity over the last interval of time
-	atomic_t kthreads_assigned;	/* #of kthreads assigned to channel */
-	u32 kthreads_assigned_limit; 	/* limit on #of kthreads assigned */
-	atomic_t kthreads_idle;		/* #of kthreads idle waiting for work */
-	u32 kthreads_idle_limit;	/* limit on #of kthreads idle */
-	atomic_t kthreads_active;	/* #of kthreads actively working */
-	// >>> following field is temporary
-	u32 kthreads_created;		/* total #of kthreads created */
-
-	wait_queue_head_t idle_wq;	/* idle kthread wait queue */
-
-} ____cacheline_aligned;
-
-
-/* struct xpc_channel flags */
-
-#define	XPC_C_WASCONNECTED	0x00000001 /* channel was connected */
-
-#define	XPC_C_ROPENREPLY	0x00000002 /* remote open channel reply */
-#define	XPC_C_OPENREPLY		0x00000004 /* local open channel reply */
-#define	XPC_C_ROPENREQUEST	0x00000008 /* remote open channel request */
-#define	XPC_C_OPENREQUEST	0x00000010 /* local open channel request */
-
-#define	XPC_C_SETUP		0x00000020 /* channel's msgqueues are alloc'd */
-#define	XPC_C_CONNECTCALLOUT	0x00000040 /* channel connected callout made */
-#define	XPC_C_CONNECTED		0x00000080 /* local channel is connected */
-#define	XPC_C_CONNECTING	0x00000100 /* channel is being connected */
-
-#define	XPC_C_RCLOSEREPLY	0x00000200 /* remote close channel reply */
-#define	XPC_C_CLOSEREPLY	0x00000400 /* local close channel reply */
-#define	XPC_C_RCLOSEREQUEST	0x00000800 /* remote close channel request */
-#define	XPC_C_CLOSEREQUEST	0x00001000 /* local close channel request */
-
-#define	XPC_C_DISCONNECTED	0x00002000 /* channel is disconnected */
-#define	XPC_C_DISCONNECTING	0x00004000 /* channel is being disconnected */
-#define	XPC_C_DISCONNECTCALLOUT	0x00008000 /* chan disconnected callout made */
-#define	XPC_C_WDISCONNECT	0x00010000 /* waiting for channel disconnect */
-
-
-
-/*
- * Manages channels on a partition basis. There is one of these structures
- * for each partition (a partition will never utilize the structure that
- * represents itself).
- */
-struct xpc_partition {
-
-	/* XPC HB infrastructure */
-
-	u8 remote_rp_version;		/* version# of partition's rsvd pg */
-	struct timespec remote_rp_stamp;/* time when rsvd pg was initialized */
-	u64 remote_rp_pa;		/* phys addr of partition's rsvd pg */
-	u64 remote_vars_pa;		/* phys addr of partition's vars */
-	u64 remote_vars_part_pa;	/* phys addr of partition's vars part */
-	u64 last_heartbeat;		/* HB at last read */
-	u64 remote_amos_page_pa;	/* phys addr of partition's amos page */
-	int remote_act_nasid;		/* active part's act/deact nasid */
-	int remote_act_phys_cpuid;	/* active part's act/deact phys cpuid */
-	u32 act_IRQ_rcvd;		/* IRQs since activation */
-	spinlock_t act_lock;		/* protect updating of act_state */
-	u8 act_state;			/* from XPC HB viewpoint */
-	u8 remote_vars_version;		/* version# of partition's vars */
-	enum xpc_retval reason;		/* reason partition is deactivating */
-	int reason_line;		/* line# deactivation initiated from */
-	int reactivate_nasid;		/* nasid in partition to reactivate */
-
-	unsigned long disengage_request_timeout; /* timeout in jiffies */
-	struct timer_list disengage_request_timer;
-
-
-	/* XPC infrastructure referencing and teardown control */
-
-	volatile u8 setup_state;	/* infrastructure setup state */
-	wait_queue_head_t teardown_wq;	/* kthread waiting to teardown infra */
-	atomic_t references;		/* #of references to infrastructure */
-
-
-	/*
-	 * NONE OF THE PRECEDING FIELDS OF THIS STRUCTURE WILL BE CLEARED WHEN
-	 * XPC SETS UP THE NECESSARY INFRASTRUCTURE TO SUPPORT CROSS PARTITION
-	 * COMMUNICATION. ALL OF THE FOLLOWING FIELDS WILL BE CLEARED. (THE
-	 * 'nchannels' FIELD MUST BE THE FIRST OF THE FIELDS TO BE CLEARED.)
-	 */
-
-
-	u8 nchannels;		   /* #of defined channels supported */
-	atomic_t nchannels_active; /* #of channels that are not DISCONNECTED */
-	atomic_t nchannels_engaged;/* #of channels engaged with remote part */
-	struct xpc_channel *channels;/* array of channel structures */
-
-	void *local_GPs_base;	  /* base address of kmalloc'd space */
-	struct xpc_gp *local_GPs; /* local Get/Put values */
-	void *remote_GPs_base;    /* base address of kmalloc'd space */
-	struct xpc_gp *remote_GPs;/* copy of remote partition's local Get/Put */
-				  /* values */
-	u64 remote_GPs_pa;	  /* phys address of remote partition's local */
-				  /* Get/Put values */
-
-
-	/* fields used to pass args when opening or closing a channel */
-
-	void *local_openclose_args_base;  /* base address of kmalloc'd space */
-	struct xpc_openclose_args *local_openclose_args;  /* local's args */
-	void *remote_openclose_args_base; /* base address of kmalloc'd space */
-	struct xpc_openclose_args *remote_openclose_args; /* copy of remote's */
-					  /* args */
-	u64 remote_openclose_args_pa;	  /* phys addr of remote's args */
-
-
-	/* IPI sending, receiving and handling related fields */
-
-	int remote_IPI_nasid;	    /* nasid of where to send IPIs */
-	int remote_IPI_phys_cpuid;  /* phys CPU ID of where to send IPIs */
-	AMO_t *remote_IPI_amo_va;   /* address of remote IPI AMO_t structure */
-
-	AMO_t *local_IPI_amo_va;    /* address of IPI AMO_t structure */
-	u64 local_IPI_amo;	    /* IPI amo flags yet to be handled */
-	char IPI_owner[8];	    /* IPI owner's name */
-	struct timer_list dropped_IPI_timer; /* dropped IPI timer */
-
-	spinlock_t IPI_lock;	    /* IPI handler lock */
-
-
-	/* channel manager related fields */
-
-	atomic_t channel_mgr_requests;	/* #of requests to activate chan mgr */
-	wait_queue_head_t channel_mgr_wq; /* channel mgr's wait queue */
-
-} ____cacheline_aligned;
-
-
-/* struct xpc_partition act_state values (for XPC HB) */
-
-#define	XPC_P_INACTIVE		0x00	/* partition is not active */
-#define XPC_P_ACTIVATION_REQ	0x01	/* created thread to activate */
-#define XPC_P_ACTIVATING	0x02	/* activation thread started */
-#define XPC_P_ACTIVE		0x03	/* xpc_partition_up() was called */
-#define XPC_P_DEACTIVATING	0x04	/* partition deactivation initiated */
-
-
-#define XPC_DEACTIVATE_PARTITION(_p, _reason) \
-			xpc_deactivate_partition(__LINE__, (_p), (_reason))
-
-
-/* struct xpc_partition setup_state values */
-
-#define XPC_P_UNSET		0x00	/* infrastructure was never setup */
-#define XPC_P_SETUP		0x01	/* infrastructure is setup */
-#define XPC_P_WTEARDOWN		0x02	/* waiting to teardown infrastructure */
-#define XPC_P_TORNDOWN		0x03	/* infrastructure is torndown */
-
-
-
-/*
- * struct xpc_partition IPI_timer #of seconds to wait before checking for
- * dropped IPIs. These occur whenever an IPI amo write doesn't complete until
- * after the IPI was received.
- */
-#define XPC_P_DROPPED_IPI_WAIT	(0.25 * HZ)
-
-
-/* number of seconds to wait for other partitions to disengage */
-#define XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT	90
-
-/* interval in seconds to print 'waiting disengagement' messages */
-#define XPC_DISENGAGE_PRINTMSG_INTERVAL		10
-
-
-#define XPC_PARTID(_p)	((partid_t) ((_p) - &xpc_partitions[0]))
-
-
-
-/* found in xp_main.c */
-extern struct xpc_registration xpc_registrations[];
-
-
-/* found in xpc_main.c */
-extern struct device *xpc_part;
-extern struct device *xpc_chan;
-extern int xpc_disengage_request_timelimit;
-extern irqreturn_t xpc_notify_IRQ_handler(int, void *, struct pt_regs *);
-extern void xpc_dropped_IPI_check(struct xpc_partition *);
-extern void xpc_activate_partition(struct xpc_partition *);
-extern void xpc_activate_kthreads(struct xpc_channel *, int);
-extern void xpc_create_kthreads(struct xpc_channel *, int);
-extern void xpc_disconnect_wait(int);
-
-
-/* found in xpc_partition.c */
-extern int xpc_exiting;
-extern struct xpc_vars *xpc_vars;
-extern struct xpc_rsvd_page *xpc_rsvd_page;
-extern struct xpc_vars_part *xpc_vars_part;
-extern struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
-extern char xpc_remote_copy_buffer[];
-extern struct xpc_rsvd_page *xpc_rsvd_page_init(void);
-extern void xpc_allow_IPI_ops(void);
-extern void xpc_restrict_IPI_ops(void);
-extern int xpc_identify_act_IRQ_sender(void);
-extern int xpc_partition_disengaged(struct xpc_partition *);
-extern enum xpc_retval xpc_mark_partition_active(struct xpc_partition *);
-extern void xpc_mark_partition_inactive(struct xpc_partition *);
-extern void xpc_discovery(void);
-extern void xpc_check_remote_hb(void);
-extern void xpc_deactivate_partition(const int, struct xpc_partition *,
-						enum xpc_retval);
-extern enum xpc_retval xpc_initiate_partid_to_nasids(partid_t, void *);
-
-
-/* found in xpc_channel.c */
-extern void xpc_initiate_connect(int);
-extern void xpc_initiate_disconnect(int);
-extern enum xpc_retval xpc_initiate_allocate(partid_t, int, u32, void **);
-extern enum xpc_retval xpc_initiate_send(partid_t, int, void *);
-extern enum xpc_retval xpc_initiate_send_notify(partid_t, int, void *,
-						xpc_notify_func, void *);
-extern void xpc_initiate_received(partid_t, int, void *);
-extern enum xpc_retval xpc_setup_infrastructure(struct xpc_partition *);
-extern enum xpc_retval xpc_pull_remote_vars_part(struct xpc_partition *);
-extern void xpc_process_channel_activity(struct xpc_partition *);
-extern void xpc_connected_callout(struct xpc_channel *);
-extern void xpc_deliver_msg(struct xpc_channel *);
-extern void xpc_disconnect_channel(const int, struct xpc_channel *,
-					enum xpc_retval, unsigned long *);
-extern void xpc_disconnecting_callout(struct xpc_channel *);
-extern void xpc_partition_going_down(struct xpc_partition *, enum xpc_retval);
-extern void xpc_teardown_infrastructure(struct xpc_partition *);
-
-
-
-static inline void
-xpc_wakeup_channel_mgr(struct xpc_partition *part)
-{
-	if (atomic_inc_return(&part->channel_mgr_requests) == 1) {
-		wake_up(&part->channel_mgr_wq);
-	}
-}
-
-
-
-/*
- * These next two inlines are used to keep us from tearing down a channel's
- * msg queues while a thread may be referencing them.
- */
-static inline void
-xpc_msgqueue_ref(struct xpc_channel *ch)
-{
-	atomic_inc(&ch->references);
-}
-
-static inline void
-xpc_msgqueue_deref(struct xpc_channel *ch)
-{
-	s32 refs = atomic_dec_return(&ch->references);
-
-	DBUG_ON(refs < 0);
-	if (refs == 0) {
-		xpc_wakeup_channel_mgr(&xpc_partitions[ch->partid]);
-	}
-}
-
-
-
-#define XPC_DISCONNECT_CHANNEL(_ch, _reason, _irqflgs) \
-		xpc_disconnect_channel(__LINE__, _ch, _reason, _irqflgs)
-
-
-/*
- * These two inlines are used to keep us from tearing down a partition's
- * setup infrastructure while a thread may be referencing it.
- */
-static inline void
-xpc_part_deref(struct xpc_partition *part)
-{
-	s32 refs = atomic_dec_return(&part->references);
-
-
-	DBUG_ON(refs < 0);
-	if (refs == 0 && part->setup_state == XPC_P_WTEARDOWN) {
-		wake_up(&part->teardown_wq);
-	}
-}
-
-static inline int
-xpc_part_ref(struct xpc_partition *part)
-{
-	int setup;
-
-
-	atomic_inc(&part->references);
-	setup = (part->setup_state == XPC_P_SETUP);
-	if (!setup) {
-		xpc_part_deref(part);
-	}
-	return setup;
-}
-
-
-
-/*
- * The following macro is to be used for the setting of the reason and
- * reason_line fields in both the struct xpc_channel and struct xpc_partition
- * structures.
- */
-#define XPC_SET_REASON(_p, _reason, _line) \
-	{ \
-		(_p)->reason = _reason; \
-		(_p)->reason_line = _line; \
-	}
-
-
-
-/*
- * This next set of inlines are used to keep track of when a partition is
- * potentially engaged in accessing memory belonging to another partition.
- */
-
-static inline void
-xpc_mark_partition_engaged(struct xpc_partition *part)
-{
-	unsigned long irq_flags;
-	AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa +
-				(XPC_ENGAGED_PARTITIONS_AMO * sizeof(AMO_t)));
-
-
-	local_irq_save(irq_flags);
-
-	/* set bit corresponding to our partid in remote partition's AMO */
-	FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_OR,
-						(1UL << sn_partition_id));
-	/*
-	 * We must always use the nofault function regardless of whether we
-	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
-	 * didn't, we'd never know that the other partition is down and would
-	 * keep sending IPIs and AMOs to it until the heartbeat times out.
-	 */
-	(void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->
-				variable), xp_nofault_PIOR_target));
-
-	local_irq_restore(irq_flags);
-}
-
-static inline void
-xpc_mark_partition_disengaged(struct xpc_partition *part)
-{
-	unsigned long irq_flags;
-	AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa +
-				(XPC_ENGAGED_PARTITIONS_AMO * sizeof(AMO_t)));
-
-
-	local_irq_save(irq_flags);
-
-	/* clear bit corresponding to our partid in remote partition's AMO */
-	FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND,
-						~(1UL << sn_partition_id));
-	/*
-	 * We must always use the nofault function regardless of whether we
-	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
-	 * didn't, we'd never know that the other partition is down and would
-	 * keep sending IPIs and AMOs to it until the heartbeat times out.
-	 */
-	(void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->
-				variable), xp_nofault_PIOR_target));
-
-	local_irq_restore(irq_flags);
-}
-
-static inline void
-xpc_request_partition_disengage(struct xpc_partition *part)
-{
-	unsigned long irq_flags;
-	AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa +
-				(XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t)));
-
-
-	local_irq_save(irq_flags);
-
-	/* set bit corresponding to our partid in remote partition's AMO */
-	FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_OR,
-						(1UL << sn_partition_id));
-	/*
-	 * We must always use the nofault function regardless of whether we
-	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
-	 * didn't, we'd never know that the other partition is down and would
-	 * keep sending IPIs and AMOs to it until the heartbeat times out.
-	 */
-	(void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->
-				variable), xp_nofault_PIOR_target));
-
-	local_irq_restore(irq_flags);
-}
-
-static inline void
-xpc_cancel_partition_disengage_request(struct xpc_partition *part)
-{
-	unsigned long irq_flags;
-	AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa +
-				(XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t)));
-
-
-	local_irq_save(irq_flags);
-
-	/* clear bit corresponding to our partid in remote partition's AMO */
-	FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND,
-						~(1UL << sn_partition_id));
-	/*
-	 * We must always use the nofault function regardless of whether we
-	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
-	 * didn't, we'd never know that the other partition is down and would
-	 * keep sending IPIs and AMOs to it until the heartbeat times out.
-	 */
-	(void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->
-				variable), xp_nofault_PIOR_target));
-
-	local_irq_restore(irq_flags);
-}
-
-static inline u64
-xpc_partition_engaged(u64 partid_mask)
-{
-	AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
-
-
-	/* return our partition's AMO variable ANDed with partid_mask */
-	return (FETCHOP_LOAD_OP(TO_AMO((u64) &amo->variable), FETCHOP_LOAD) &
-								partid_mask);
-}
-
-static inline u64
-xpc_partition_disengage_requested(u64 partid_mask)
-{
-	AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO;
-
-
-	/* return our partition's AMO variable ANDed with partid_mask */
-	return (FETCHOP_LOAD_OP(TO_AMO((u64) &amo->variable), FETCHOP_LOAD) &
-								partid_mask);
-}
-
-static inline void
-xpc_clear_partition_engaged(u64 partid_mask)
-{
-	AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
-
-
-	/* clear bit(s) based on partid_mask in our partition's AMO */
-	FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND,
-								~partid_mask);
-}
-
-static inline void
-xpc_clear_partition_disengage_request(u64 partid_mask)
-{
-	AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO;
-
-
-	/* clear bit(s) based on partid_mask in our partition's AMO */
-	FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND,
-								~partid_mask);
-}
-
-
-
-/*
- * The following set of macros and inlines are used for the sending and
- * receiving of IPIs (also known as IRQs). There are two flavors of IPIs,
- * one that is associated with partition activity (SGI_XPC_ACTIVATE) and
- * the other that is associated with channel activity (SGI_XPC_NOTIFY).
- */
-
-static inline u64
-xpc_IPI_receive(AMO_t *amo)
-{
-	return FETCHOP_LOAD_OP(TO_AMO((u64) &amo->variable), FETCHOP_CLEAR);
-}
-
-
-static inline enum xpc_retval
-xpc_IPI_send(AMO_t *amo, u64 flag, int nasid, int phys_cpuid, int vector)
-{
-	int ret = 0;
-	unsigned long irq_flags;
-
-
-	local_irq_save(irq_flags);
-
-	FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_OR, flag);
-	sn_send_IPI_phys(nasid, phys_cpuid, vector, 0);
-
-	/*
-	 * We must always use the nofault function regardless of whether we
-	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
-	 * didn't, we'd never know that the other partition is down and would
-	 * keep sending IPIs and AMOs to it until the heartbeat times out.
-	 */
-	ret = xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->variable),
-				xp_nofault_PIOR_target));
-
-	local_irq_restore(irq_flags);
-
-	return ((ret == 0) ? xpcSuccess : xpcPioReadError);
-}
-
-
-/*
- * IPIs associated with SGI_XPC_ACTIVATE IRQ.
- */
-
-/*
- * Flag the appropriate AMO variable and send an IPI to the specified node.
- */
-static inline void
-xpc_activate_IRQ_send(u64 amos_page_pa, int from_nasid, int to_nasid,
-			int to_phys_cpuid)
-{
-	int w_index = XPC_NASID_W_INDEX(from_nasid);
-	int b_index = XPC_NASID_B_INDEX(from_nasid);
-	AMO_t *amos = (AMO_t *) __va(amos_page_pa +
-				(XPC_ACTIVATE_IRQ_AMOS * sizeof(AMO_t)));
-
-
-	(void) xpc_IPI_send(&amos[w_index], (1UL << b_index), to_nasid,
-				to_phys_cpuid, SGI_XPC_ACTIVATE);
-}
-
-static inline void
-xpc_IPI_send_activate(struct xpc_vars *vars)
-{
-	xpc_activate_IRQ_send(vars->amos_page_pa, cnodeid_to_nasid(0),
-				vars->act_nasid, vars->act_phys_cpuid);
-}
-
-static inline void
-xpc_IPI_send_activated(struct xpc_partition *part)
-{
-	xpc_activate_IRQ_send(part->remote_amos_page_pa, cnodeid_to_nasid(0),
-			part->remote_act_nasid, part->remote_act_phys_cpuid);
-}
-
-static inline void
-xpc_IPI_send_reactivate(struct xpc_partition *part)
-{
-	xpc_activate_IRQ_send(xpc_vars->amos_page_pa, part->reactivate_nasid,
-				xpc_vars->act_nasid, xpc_vars->act_phys_cpuid);
-}
-
-static inline void
-xpc_IPI_send_disengage(struct xpc_partition *part)
-{
-	xpc_activate_IRQ_send(part->remote_amos_page_pa, cnodeid_to_nasid(0),
-			part->remote_act_nasid, part->remote_act_phys_cpuid);
-}
-
-
-/*
- * IPIs associated with SGI_XPC_NOTIFY IRQ.
- */
-
-/*
- * Send an IPI to the remote partition that is associated with the
- * specified channel.
- */
-#define XPC_NOTIFY_IRQ_SEND(_ch, _ipi_f, _irq_f) \
-		xpc_notify_IRQ_send(_ch, _ipi_f, #_ipi_f, _irq_f)
-
-static inline void
-xpc_notify_IRQ_send(struct xpc_channel *ch, u8 ipi_flag, char *ipi_flag_string,
-			unsigned long *irq_flags)
-{
-	struct xpc_partition *part = &xpc_partitions[ch->partid];
-	enum xpc_retval ret;
-
-
-	if (likely(part->act_state != XPC_P_DEACTIVATING)) {
-		ret = xpc_IPI_send(part->remote_IPI_amo_va,
-					(u64) ipi_flag << (ch->number * 8),
-					part->remote_IPI_nasid,
-					part->remote_IPI_phys_cpuid,
-					SGI_XPC_NOTIFY);
-		dev_dbg(xpc_chan, "%s sent to partid=%d, channel=%d, ret=%d\n",
-			ipi_flag_string, ch->partid, ch->number, ret);
-		if (unlikely(ret != xpcSuccess)) {
-			if (irq_flags != NULL) {
-				spin_unlock_irqrestore(&ch->lock, *irq_flags);
-			}
-			XPC_DEACTIVATE_PARTITION(part, ret);
-			if (irq_flags != NULL) {
-				spin_lock_irqsave(&ch->lock, *irq_flags);
-			}
-		}
-	}
-}
-
-
-/*
- * Make it look like the remote partition, which is associated with the
- * specified channel, sent us an IPI. This faked IPI will be handled
- * by xpc_dropped_IPI_check().
- */
-#define XPC_NOTIFY_IRQ_SEND_LOCAL(_ch, _ipi_f) \
-		xpc_notify_IRQ_send_local(_ch, _ipi_f, #_ipi_f)
-
-static inline void
-xpc_notify_IRQ_send_local(struct xpc_channel *ch, u8 ipi_flag,
-				char *ipi_flag_string)
-{
-	struct xpc_partition *part = &xpc_partitions[ch->partid];
-
-
-	FETCHOP_STORE_OP(TO_AMO((u64) &part->local_IPI_amo_va->variable),
-			FETCHOP_OR, ((u64) ipi_flag << (ch->number * 8)));
-	dev_dbg(xpc_chan, "%s sent local from partid=%d, channel=%d\n",
-		ipi_flag_string, ch->partid, ch->number);
-}
-
-
-/*
- * The sending and receiving of IPIs includes the setting of an AMO variable
- * to indicate the reason the IPI was sent. The 64-bit variable is divided
- * up into eight bytes, ordered from right to left. Byte zero pertains to
- * channel 0, byte one to channel 1, and so on. Each byte is described by
- * the following IPI flags.
- */
-
-#define	XPC_IPI_CLOSEREQUEST	0x01
-#define	XPC_IPI_CLOSEREPLY	0x02
-#define	XPC_IPI_OPENREQUEST	0x04
-#define	XPC_IPI_OPENREPLY	0x08
-#define	XPC_IPI_MSGREQUEST	0x10
-
-
-/* given an AMO variable and a channel#, get its associated IPI flags */
-#define XPC_GET_IPI_FLAGS(_amo, _c)	((u8) (((_amo) >> ((_c) * 8)) & 0xff))
-#define XPC_SET_IPI_FLAGS(_amo, _c, _f)	(_amo) |= ((u64) (_f) << ((_c) * 8))
-
-#define	XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(_amo) ((_amo) & 0x0f0f0f0f0f0f0f0f)
-#define XPC_ANY_MSG_IPI_FLAGS_SET(_amo)       ((_amo) & 0x1010101010101010)
-
-
-static inline void
-xpc_IPI_send_closerequest(struct xpc_channel *ch, unsigned long *irq_flags)
-{
-	struct xpc_openclose_args *args = ch->local_openclose_args;
-
-
-	args->reason = ch->reason;
-
-	XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_CLOSEREQUEST, irq_flags);
-}
-
-static inline void
-xpc_IPI_send_closereply(struct xpc_channel *ch, unsigned long *irq_flags)
-{
-	XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_CLOSEREPLY, irq_flags);
-}
-
-static inline void
-xpc_IPI_send_openrequest(struct xpc_channel *ch, unsigned long *irq_flags)
-{
-	struct xpc_openclose_args *args = ch->local_openclose_args;
-
-
-	args->msg_size = ch->msg_size;
-	args->local_nentries = ch->local_nentries;
-
-	XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_OPENREQUEST, irq_flags);
-}
-
-static inline void
-xpc_IPI_send_openreply(struct xpc_channel *ch, unsigned long *irq_flags)
-{
-	struct xpc_openclose_args *args = ch->local_openclose_args;
-
-
-	args->remote_nentries = ch->remote_nentries;
-	args->local_nentries = ch->local_nentries;
-	args->local_msgqueue_pa = __pa(ch->local_msgqueue);
-
-	XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_OPENREPLY, irq_flags);
-}
-
-static inline void
-xpc_IPI_send_msgrequest(struct xpc_channel *ch)
-{
-	XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_MSGREQUEST, NULL);
-}
-
-static inline void
-xpc_IPI_send_local_msgrequest(struct xpc_channel *ch)
-{
-	XPC_NOTIFY_IRQ_SEND_LOCAL(ch, XPC_IPI_MSGREQUEST);
-}
-
-
-/*
- * Memory for XPC's AMO variables is allocated by the MSPEC driver. These
- * pages are located in the lowest granule. The lowest granule uses 4k pages
- * for cached references and an alternate TLB handler to never provide a
- * cacheable mapping for the entire region. This will prevent speculative
- * reading of cached copies of our lines from being issued which will cause
- * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64
- * AMO variables (based on XP_MAX_PARTITIONS) for message notification and an
- * additional 128 AMO variables (based on XP_NASID_MASK_WORDS) for partition
- * activation and 2 AMO variables for partition deactivation.
- */
-static inline AMO_t *
-xpc_IPI_init(int index)
-{
-	AMO_t *amo = xpc_vars->amos_page + index;
-
-
-	(void) xpc_IPI_receive(amo);	/* clear AMO variable */
-	return amo;
-}
-
-
-
-static inline enum xpc_retval
-xpc_map_bte_errors(bte_result_t error)
-{
-	switch (error) {
-	case BTE_SUCCESS:	return xpcSuccess;
-	case BTEFAIL_DIR:	return xpcBteDirectoryError;
-	case BTEFAIL_POISON:	return xpcBtePoisonError;
-	case BTEFAIL_WERR:	return xpcBteWriteError;
-	case BTEFAIL_ACCESS:	return xpcBteAccessError;
-	case BTEFAIL_PWERR:	return xpcBtePWriteError;
-	case BTEFAIL_PRERR:	return xpcBtePReadError;
-	case BTEFAIL_TOUT:	return xpcBteTimeOutError;
-	case BTEFAIL_XTERR:	return xpcBteXtalkError;
-	case BTEFAIL_NOTAVAIL:	return xpcBteNotAvailable;
-	default:		return xpcBteUnmappedError;
-	}
-}
-
-
-
-static inline void *
-xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
-{
-	/* see if kmalloc will give us cachline aligned memory by default */
-	*base = kmalloc(size, flags);
-	if (*base == NULL) {
-		return NULL;
-	}
-	if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) {
-		return *base;
-	}
-	kfree(*base);
-
-	/* nope, we'll have to do it ourselves */
-	*base = kmalloc(size + L1_CACHE_BYTES, flags);
-	if (*base == NULL) {
-		return NULL;
-	}
-	return (void *) L1_CACHE_ALIGN((u64) *base);
-}
-
-
-/*
- * Check to see if there is any channel activity to/from the specified
- * partition.
- */
-static inline void
-xpc_check_for_channel_activity(struct xpc_partition *part)
-{
-	u64 IPI_amo;
-	unsigned long irq_flags;
-
-
-	IPI_amo = xpc_IPI_receive(part->local_IPI_amo_va);
-	if (IPI_amo == 0) {
-		return;
-	}
-
-	spin_lock_irqsave(&part->IPI_lock, irq_flags);
-	part->local_IPI_amo |= IPI_amo;
-	spin_unlock_irqrestore(&part->IPI_lock, irq_flags);
-
-	dev_dbg(xpc_chan, "received IPI from partid=%d, IPI_amo=0x%lx\n",
-		XPC_PARTID(part), IPI_amo);
-
-	xpc_wakeup_channel_mgr(part);
-}
-
-
-#endif /* _IA64_SN_KERNEL_XPC_H */
-
diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c
index abf4fc2..cdf6856 100644
--- a/arch/ia64/sn/kernel/xpc_channel.c
+++ b/arch/ia64/sn/kernel/xpc_channel.c
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2004-2006 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 
@@ -22,9 +22,11 @@
 #include <linux/cache.h>
 #include <linux/interrupt.h>
 #include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/completion.h>
 #include <asm/sn/bte.h>
 #include <asm/sn/sn_sal.h>
-#include "xpc.h"
+#include <asm/sn/xpc.h>
 
 
 /*
@@ -56,8 +58,8 @@ xpc_initialize_channels(struct xpc_partition *part, partid_t partid)
 		atomic_set(&ch->n_to_notify, 0);
 
 		spin_lock_init(&ch->lock);
-		sema_init(&ch->msg_to_pull_sema, 1);	/* mutex */
-		sema_init(&ch->wdisconnect_sema, 0);	/* event wait */
+		mutex_init(&ch->msg_to_pull_mutex);
+		init_completion(&ch->wdisconnect_wait);
 
 		atomic_set(&ch->n_on_msg_allocate_wq, 0);
 		init_waitqueue_head(&ch->msg_allocate_wq);
@@ -445,7 +447,7 @@ xpc_allocate_local_msgqueue(struct xpc_channel *ch)
 
 		nbytes = nentries * ch->msg_size;
 		ch->local_msgqueue = xpc_kmalloc_cacheline_aligned(nbytes,
-						(GFP_KERNEL | GFP_DMA),
+						GFP_KERNEL,
 						&ch->local_msgqueue_base);
 		if (ch->local_msgqueue == NULL) {
 			continue;
@@ -453,7 +455,7 @@ xpc_allocate_local_msgqueue(struct xpc_channel *ch)
 		memset(ch->local_msgqueue, 0, nbytes);
 
 		nbytes = nentries * sizeof(struct xpc_notify);
-		ch->notify_queue = kmalloc(nbytes, (GFP_KERNEL | GFP_DMA));
+		ch->notify_queue = kmalloc(nbytes, GFP_KERNEL);
 		if (ch->notify_queue == NULL) {
 			kfree(ch->local_msgqueue_base);
 			ch->local_msgqueue = NULL;
@@ -500,7 +502,7 @@ xpc_allocate_remote_msgqueue(struct xpc_channel *ch)
 
 		nbytes = nentries * ch->msg_size;
 		ch->remote_msgqueue = xpc_kmalloc_cacheline_aligned(nbytes,
-						(GFP_KERNEL | GFP_DMA),
+						GFP_KERNEL,
 						&ch->remote_msgqueue_base);
 		if (ch->remote_msgqueue == NULL) {
 			continue;
@@ -534,7 +536,6 @@ static enum xpc_retval
 xpc_allocate_msgqueues(struct xpc_channel *ch)
 {
 	unsigned long irq_flags;
-	int i;
 	enum xpc_retval ret;
 
 
@@ -552,11 +553,6 @@ xpc_allocate_msgqueues(struct xpc_channel *ch)
 		return ret;
 	}
 
-	for (i = 0; i < ch->local_nentries; i++) {
-		/* use a semaphore as an event wait queue */
-		sema_init(&ch->notify_queue[i].sema, 0);
-	}
-
 	spin_lock_irqsave(&ch->lock, irq_flags);
 	ch->flags |= XPC_C_SETUP;
 	spin_unlock_irqrestore(&ch->lock, irq_flags);
@@ -742,7 +738,9 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 
 	/* make sure all activity has settled down first */
 
-	if (atomic_read(&ch->references) > 0) {
+	if (atomic_read(&ch->references) > 0 ||
+			((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
+			!(ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE))) {
 		return;
 	}
 	DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0);
@@ -779,6 +777,12 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 
 	/* both sides are disconnected now */
 
+	if (ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE) {
+		spin_unlock_irqrestore(&ch->lock, *irq_flags);
+		xpc_disconnect_callout(ch, xpcDisconnected);
+		spin_lock_irqsave(&ch->lock, *irq_flags);
+	}
+
 	/* it's now safe to free the channel's message queues */
 	xpc_free_msgqueues(ch);
 
@@ -793,10 +797,8 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 	}
 
 	if (ch->flags & XPC_C_WDISCONNECT) {
-		spin_unlock_irqrestore(&ch->lock, *irq_flags);
-		up(&ch->wdisconnect_sema);
-		spin_lock_irqsave(&ch->lock, *irq_flags);
-
+		/* we won't lose the CPU since we're holding ch->lock */
+		complete(&ch->wdisconnect_wait);
 	} else if (ch->delayed_IPI_flags) {
 		if (part->act_state != XPC_P_DEACTIVATING) {
 			/* time to take action on any delayed IPI flags */
@@ -1086,12 +1088,12 @@ xpc_connect_channel(struct xpc_channel *ch)
 	struct xpc_registration *registration = &xpc_registrations[ch->number];
 
 
-	if (down_trylock(&registration->sema) != 0) {
+	if (mutex_trylock(&registration->mutex) == 0) {
 		return xpcRetry;
 	}
 
 	if (!XPC_CHANNEL_REGISTERED(ch->number)) {
-		up(&registration->sema);
+		mutex_unlock(&registration->mutex);
 		return xpcUnregistered;
 	}
 
@@ -1102,7 +1104,7 @@ xpc_connect_channel(struct xpc_channel *ch)
 
 	if (ch->flags & XPC_C_DISCONNECTING) {
 		spin_unlock_irqrestore(&ch->lock, irq_flags);
-		up(&registration->sema);
+		mutex_unlock(&registration->mutex);
 		return ch->reason;
 	}
 
@@ -1134,7 +1136,7 @@ xpc_connect_channel(struct xpc_channel *ch)
 			 * channel lock be locked and will unlock and relock
 			 * the channel lock as needed.
 			 */
-			up(&registration->sema);
+			mutex_unlock(&registration->mutex);
 			XPC_DISCONNECT_CHANNEL(ch, xpcUnequalMsgSizes,
 								&irq_flags);
 			spin_unlock_irqrestore(&ch->lock, irq_flags);
@@ -1149,7 +1151,7 @@ xpc_connect_channel(struct xpc_channel *ch)
 		atomic_inc(&xpc_partitions[ch->partid].nchannels_active);
 	}
 
-	up(&registration->sema);
+	mutex_unlock(&registration->mutex);
 
 
 	/* initiate the connection */
@@ -1300,7 +1302,7 @@ xpc_process_msg_IPI(struct xpc_partition *part, int ch_number)
 				"delivered=%d, partid=%d, channel=%d\n",
 				nmsgs_sent, ch->partid, ch->number);
 
-			if (ch->flags & XPC_C_CONNECTCALLOUT) {
+			if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) {
 				xpc_activate_kthreads(ch, nmsgs_sent);
 			}
 		}
@@ -1645,7 +1647,7 @@ xpc_disconnect_channel(const int line, struct xpc_channel *ch,
 
 
 void
-xpc_disconnecting_callout(struct xpc_channel *ch)
+xpc_disconnect_callout(struct xpc_channel *ch, enum xpc_retval reason)
 {
 	/*
 	 * Let the channel's registerer know that the channel is being
@@ -1654,15 +1656,13 @@ xpc_disconnecting_callout(struct xpc_channel *ch)
 	 */
 
 	if (ch->func != NULL) {
-		dev_dbg(xpc_chan, "ch->func() called, reason=xpcDisconnecting,"
-			" partid=%d, channel=%d\n", ch->partid, ch->number);
+		dev_dbg(xpc_chan, "ch->func() called, reason=%d, partid=%d, "
+			"channel=%d\n", reason, ch->partid, ch->number);
 
-		ch->func(xpcDisconnecting, ch->partid, ch->number, NULL,
-								ch->key);
+		ch->func(reason, ch->partid, ch->number, NULL, ch->key);
 
-		dev_dbg(xpc_chan, "ch->func() returned, reason="
-			"xpcDisconnecting, partid=%d, channel=%d\n",
-			ch->partid, ch->number);
+		dev_dbg(xpc_chan, "ch->func() returned, reason=%d, partid=%d, "
+			"channel=%d\n", reason, ch->partid, ch->number);
 	}
 }
 
@@ -2085,7 +2085,7 @@ xpc_pull_remote_msg(struct xpc_channel *ch, s64 get)
 	enum xpc_retval ret;
 
 
-	if (down_interruptible(&ch->msg_to_pull_sema) != 0) {
+	if (mutex_lock_interruptible(&ch->msg_to_pull_mutex) != 0) {
 		/* we were interrupted by a signal */
 		return NULL;
 	}
@@ -2121,7 +2121,7 @@ xpc_pull_remote_msg(struct xpc_channel *ch, s64 get)
 
 			XPC_DEACTIVATE_PARTITION(part, ret);
 
-			up(&ch->msg_to_pull_sema);
+			mutex_unlock(&ch->msg_to_pull_mutex);
 			return NULL;
 		}
 
@@ -2130,7 +2130,7 @@ xpc_pull_remote_msg(struct xpc_channel *ch, s64 get)
 		ch->next_msg_to_pull += nmsgs;
 	}
 
-	up(&ch->msg_to_pull_sema);
+	mutex_unlock(&ch->msg_to_pull_mutex);
 
 	/* return the message we were looking for */
 	msg_offset = (get % ch->remote_nentries) * ch->msg_size;
diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c
index b617236..8cbf164 100644
--- a/arch/ia64/sn/kernel/xpc_main.c
+++ b/arch/ia64/sn/kernel/xpc_main.c
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2004-2006 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 
@@ -55,11 +55,12 @@
 #include <linux/slab.h>
 #include <linux/delay.h>
 #include <linux/reboot.h>
+#include <linux/completion.h>
 #include <asm/sn/intr.h>
 #include <asm/sn/sn_sal.h>
 #include <asm/kdebug.h>
 #include <asm/uaccess.h>
-#include "xpc.h"
+#include <asm/sn/xpc.h>
 
 
 /* define two XPC debug device structures to be used with dev_dbg() et al */
@@ -82,6 +83,9 @@ struct device *xpc_part = &xpc_part_dbg_subname;
 struct device *xpc_chan = &xpc_chan_dbg_subname;
 
 
+static int xpc_kdebug_ignore;
+
+
 /* systune related variables for /proc/sys directories */
 
 static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
@@ -162,6 +166,8 @@ static ctl_table xpc_sys_dir[] = {
 };
 static struct ctl_table_header *xpc_sysctl;
 
+/* non-zero if any remote partition disengage request was timed out */
+int xpc_disengage_request_timedout;
 
 /* #of IRQs received */
 static atomic_t xpc_act_IRQ_rcvd;
@@ -172,10 +178,10 @@ static DECLARE_WAIT_QUEUE_HEAD(xpc_act_IRQ_wq);
 static unsigned long xpc_hb_check_timeout;
 
 /* notification that the xpc_hb_checker thread has exited */
-static DECLARE_MUTEX_LOCKED(xpc_hb_checker_exited);
+static DECLARE_COMPLETION(xpc_hb_checker_exited);
 
 /* notification that the xpc_discovery thread has exited */
-static DECLARE_MUTEX_LOCKED(xpc_discovery_exited);
+static DECLARE_COMPLETION(xpc_discovery_exited);
 
 
 static struct timer_list xpc_hb_timer;
@@ -316,7 +322,7 @@ xpc_hb_checker(void *ignore)
 
 
 	/* mark this thread as having exited */
-	up(&xpc_hb_checker_exited);
+	complete(&xpc_hb_checker_exited);
 	return 0;
 }
 
@@ -336,7 +342,7 @@ xpc_initiate_discovery(void *ignore)
 	dev_dbg(xpc_part, "discovery thread is exiting\n");
 
 	/* mark this thread as having exited */
-	up(&xpc_discovery_exited);
+	complete(&xpc_discovery_exited);
 	return 0;
 }
 
@@ -569,18 +575,21 @@ xpc_activate_partition(struct xpc_partition *part)
 
 	spin_lock_irqsave(&part->act_lock, irq_flags);
 
-	pid = kernel_thread(xpc_activating, (void *) ((u64) partid), 0);
-
 	DBUG_ON(part->act_state != XPC_P_INACTIVE);
 
-	if (pid > 0) {
-		part->act_state = XPC_P_ACTIVATION_REQ;
-		XPC_SET_REASON(part, xpcCloneKThread, __LINE__);
-	} else {
-		XPC_SET_REASON(part, xpcCloneKThreadFailed, __LINE__);
-	}
+	part->act_state = XPC_P_ACTIVATION_REQ;
+	XPC_SET_REASON(part, xpcCloneKThread, __LINE__);
 
 	spin_unlock_irqrestore(&part->act_lock, irq_flags);
+
+	pid = kernel_thread(xpc_activating, (void *) ((u64) partid), 0);
+
+	if (unlikely(pid <= 0)) {
+		spin_lock_irqsave(&part->act_lock, irq_flags);
+		part->act_state = XPC_P_INACTIVE;
+		XPC_SET_REASON(part, xpcCloneKThreadFailed, __LINE__);
+		spin_unlock_irqrestore(&part->act_lock, irq_flags);
+	}
 }
 
 
@@ -741,12 +750,16 @@ xpc_daemonize_kthread(void *args)
 		/* let registerer know that connection has been established */
 
 		spin_lock_irqsave(&ch->lock, irq_flags);
-		if (!(ch->flags & XPC_C_CONNECTCALLOUT)) {
-			ch->flags |= XPC_C_CONNECTCALLOUT;
+		if (!(ch->flags & XPC_C_CONNECTEDCALLOUT)) {
+			ch->flags |= XPC_C_CONNECTEDCALLOUT;
 			spin_unlock_irqrestore(&ch->lock, irq_flags);
 
 			xpc_connected_callout(ch);
 
+			spin_lock_irqsave(&ch->lock, irq_flags);
+			ch->flags |= XPC_C_CONNECTEDCALLOUT_MADE;
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+
 			/*
 			 * It is possible that while the callout was being
 			 * made that the remote partition sent some messages.
@@ -768,15 +781,17 @@ xpc_daemonize_kthread(void *args)
 
 	if (atomic_dec_return(&ch->kthreads_assigned) == 0) {
 		spin_lock_irqsave(&ch->lock, irq_flags);
-		if ((ch->flags & XPC_C_CONNECTCALLOUT) &&
-				!(ch->flags & XPC_C_DISCONNECTCALLOUT)) {
-			ch->flags |= XPC_C_DISCONNECTCALLOUT;
+		if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
+				!(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) {
+			ch->flags |= XPC_C_DISCONNECTINGCALLOUT;
 			spin_unlock_irqrestore(&ch->lock, irq_flags);
 
-			xpc_disconnecting_callout(ch);
-		} else {
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
+			xpc_disconnect_callout(ch, xpcDisconnecting);
+
+			spin_lock_irqsave(&ch->lock, irq_flags);
+			ch->flags |= XPC_C_DISCONNECTINGCALLOUT_MADE;
 		}
+		spin_unlock_irqrestore(&ch->lock, irq_flags);
 		if (atomic_dec_return(&part->nchannels_engaged) == 0) {
 			xpc_mark_partition_disengaged(part);
 			xpc_IPI_send_disengage(part);
@@ -888,7 +903,7 @@ xpc_disconnect_wait(int ch_number)
 			continue;
 		}
 
-		(void) down(&ch->wdisconnect_sema);
+		wait_for_completion(&ch->wdisconnect_wait);
 
 		spin_lock_irqsave(&ch->lock, irq_flags);
 		DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
@@ -921,9 +936,9 @@ static void
 xpc_do_exit(enum xpc_retval reason)
 {
 	partid_t partid;
-	int active_part_count;
+	int active_part_count, printed_waiting_msg = 0;
 	struct xpc_partition *part;
-	unsigned long printmsg_time;
+	unsigned long printmsg_time, disengage_request_timeout = 0;
 
 
 	/* a 'rmmod XPC' and a 'reboot' cannot both end up here together */
@@ -941,10 +956,10 @@ xpc_do_exit(enum xpc_retval reason)
 	free_irq(SGI_XPC_ACTIVATE, NULL);
 
 	/* wait for the discovery thread to exit */
-	down(&xpc_discovery_exited);
+	wait_for_completion(&xpc_discovery_exited);
 
 	/* wait for the heartbeat checker thread to exit */
-	down(&xpc_hb_checker_exited);
+	wait_for_completion(&xpc_hb_checker_exited);
 
 
 	/* sleep for a 1/3 of a second or so */
@@ -953,7 +968,8 @@ xpc_do_exit(enum xpc_retval reason)
 
 	/* wait for all partitions to become inactive */
 
-	printmsg_time = jiffies;
+	printmsg_time = jiffies + (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
+	xpc_disengage_request_timedout = 0;
 
 	do {
 		active_part_count = 0;
@@ -969,20 +985,39 @@ xpc_do_exit(enum xpc_retval reason)
 			active_part_count++;
 
 			XPC_DEACTIVATE_PARTITION(part, reason);
-		}
 
-		if (active_part_count == 0) {
-			break;
+			if (part->disengage_request_timeout >
+						disengage_request_timeout) {
+				disengage_request_timeout =
+						part->disengage_request_timeout;
+			}
 		}
 
-		if (jiffies >= printmsg_time) {
-			dev_info(xpc_part, "waiting for partitions to "
-				"deactivate/disengage, active count=%d, remote "
-				"engaged=0x%lx\n", active_part_count,
-				xpc_partition_engaged(1UL << partid));
-
-			printmsg_time = jiffies +
+		if (xpc_partition_engaged(-1UL)) {
+			if (time_after(jiffies, printmsg_time)) {
+				dev_info(xpc_part, "waiting for remote "
+					"partitions to disengage, timeout in "
+					"%ld seconds\n",
+					(disengage_request_timeout - jiffies)
+									/ HZ);
+				printmsg_time = jiffies +
 					(XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
+				printed_waiting_msg = 1;
+			}
+
+		} else if (active_part_count > 0) {
+			if (printed_waiting_msg) {
+				dev_info(xpc_part, "waiting for local partition"
+					" to disengage\n");
+				printed_waiting_msg = 0;
+			}
+
+		} else {
+			if (!xpc_disengage_request_timedout) {
+				dev_info(xpc_part, "all partitions have "
+					"disengaged\n");
+			}
+			break;
 		}
 
 		/* sleep for a 1/3 of a second or so */
@@ -1000,11 +1035,13 @@ xpc_do_exit(enum xpc_retval reason)
 	del_timer_sync(&xpc_hb_timer);
 	DBUG_ON(xpc_vars->heartbeating_to_mask != 0);
 
-	/* take ourselves off of the reboot_notifier_list */
-	(void) unregister_reboot_notifier(&xpc_reboot_notifier);
+	if (reason == xpcUnloading) {
+		/* take ourselves off of the reboot_notifier_list */
+		(void) unregister_reboot_notifier(&xpc_reboot_notifier);
 
-	/* take ourselves off of the die_notifier list */
-	(void) unregister_die_notifier(&xpc_die_notifier);
+		/* take ourselves off of the die_notifier list */
+		(void) unregister_die_notifier(&xpc_die_notifier);
+	}
 
 	/* close down protections for IPI operations */
 	xpc_restrict_IPI_ops();
@@ -1020,7 +1057,35 @@ xpc_do_exit(enum xpc_retval reason)
 
 
 /*
- * Called when the system is about to be either restarted or halted.
+ * This function is called when the system is being rebooted.
+ */
+static int
+xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
+{
+	enum xpc_retval reason;
+
+
+	switch (event) {
+	case SYS_RESTART:
+		reason = xpcSystemReboot;
+		break;
+	case SYS_HALT:
+		reason = xpcSystemHalt;
+		break;
+	case SYS_POWER_OFF:
+		reason = xpcSystemPoweroff;
+		break;
+	default:
+		reason = xpcSystemGoingDown;
+	}
+
+	xpc_do_exit(reason);
+	return NOTIFY_DONE;
+}
+
+
+/*
+ * Notify other partitions to disengage from all references to our memory.
  */
 static void
 xpc_die_disengage(void)
@@ -1028,7 +1093,7 @@ xpc_die_disengage(void)
 	struct xpc_partition *part;
 	partid_t partid;
 	unsigned long engaged;
-	long time, print_time, disengage_request_timeout;
+	long time, printmsg_time, disengage_request_timeout;
 
 
 	/* keep xpc_hb_checker thread from doing anything (just in case) */
@@ -1055,57 +1120,53 @@ xpc_die_disengage(void)
 		}
 	}
 
-	print_time = rtc_time();
-	disengage_request_timeout = print_time +
+	time = rtc_time();
+	printmsg_time = time +
+		(XPC_DISENGAGE_PRINTMSG_INTERVAL * sn_rtc_cycles_per_second);
+	disengage_request_timeout = time +
 		(xpc_disengage_request_timelimit * sn_rtc_cycles_per_second);
 
 	/* wait for all other partitions to disengage from us */
 
-	while ((engaged = xpc_partition_engaged(-1UL)) &&
-			(time = rtc_time()) < disengage_request_timeout) {
+	while (1) {
+		engaged = xpc_partition_engaged(-1UL);
+		if (!engaged) {
+			dev_info(xpc_part, "all partitions have disengaged\n");
+			break;
+		}
 
-		if (time >= print_time) {
+		time = rtc_time();
+		if (time >= disengage_request_timeout) {
+			for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+				if (engaged & (1UL << partid)) {
+					dev_info(xpc_part, "disengage from "
+						"remote partition %d timed "
+						"out\n", partid);
+				}
+			}
+			break;
+		}
+
+		if (time >= printmsg_time) {
 			dev_info(xpc_part, "waiting for remote partitions to "
-				"disengage, engaged=0x%lx\n", engaged);
-			print_time = time + (XPC_DISENGAGE_PRINTMSG_INTERVAL *
+				"disengage, timeout in %ld seconds\n",
+				(disengage_request_timeout - time) /
+						sn_rtc_cycles_per_second);
+			printmsg_time = time +
+					(XPC_DISENGAGE_PRINTMSG_INTERVAL *
 						sn_rtc_cycles_per_second);
 		}
 	}
-	dev_info(xpc_part, "finished waiting for remote partitions to "
-				"disengage, engaged=0x%lx\n", engaged);
 }
 
 
 /*
- * This function is called when the system is being rebooted.
- */
-static int
-xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
-{
-	enum xpc_retval reason;
-
-
-	switch (event) {
-	case SYS_RESTART:
-		reason = xpcSystemReboot;
-		break;
-	case SYS_HALT:
-		reason = xpcSystemHalt;
-		break;
-	case SYS_POWER_OFF:
-		reason = xpcSystemPoweroff;
-		break;
-	default:
-		reason = xpcSystemGoingDown;
-	}
-
-	xpc_do_exit(reason);
-	return NOTIFY_DONE;
-}
-
-
-/*
- * This function is called when the system is being rebooted.
+ * This function is called when the system is being restarted or halted due
+ * to some sort of system failure. If this is the case we need to notify the
+ * other partitions to disengage from all references to our memory.
+ * This function can also be called when our heartbeater could be offlined
+ * for a time. In this case we need to notify other partitions to not worry
+ * about the lack of a heartbeat.
  */
 static int
 xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
@@ -1115,11 +1176,25 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
 	case DIE_MACHINE_HALT:
 		xpc_die_disengage();
 		break;
+
+	case DIE_KDEBUG_ENTER:
+		/* Should lack of heartbeat be ignored by other partitions? */
+		if (!xpc_kdebug_ignore) {
+			break;
+		}
+		/* fall through */
 	case DIE_MCA_MONARCH_ENTER:
 	case DIE_INIT_MONARCH_ENTER:
 		xpc_vars->heartbeat++;
 		xpc_vars->heartbeat_offline = 1;
 		break;
+
+	case DIE_KDEBUG_LEAVE:
+		/* Is lack of heartbeat being ignored by other partitions? */
+		if (!xpc_kdebug_ignore) {
+			break;
+		}
+		/* fall through */
 	case DIE_MCA_MONARCH_LEAVE:
 	case DIE_INIT_MONARCH_LEAVE:
 		xpc_vars->heartbeat++;
@@ -1302,7 +1377,7 @@ xpc_init(void)
 		dev_err(xpc_part, "failed while forking discovery thread\n");
 
 		/* mark this new thread as a non-starter */
-		up(&xpc_discovery_exited);
+		complete(&xpc_discovery_exited);
 
 		xpc_do_exit(xpcUnloading);
 		return -EBUSY;
@@ -1344,3 +1419,7 @@ module_param(xpc_disengage_request_timelimit, int, 0);
 MODULE_PARM_DESC(xpc_disengage_request_timelimit, "Number of seconds to wait "
 		"for disengage request to complete.");
 
+module_param(xpc_kdebug_ignore, int, 0);
+MODULE_PARM_DESC(xpc_kdebug_ignore, "Should lack of heartbeat be ignored by "
+		"other partitions when dropping into kdebug.");
+
diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c
index cdd6431..88a730e 100644
--- a/arch/ia64/sn/kernel/xpc_partition.c
+++ b/arch/ia64/sn/kernel/xpc_partition.c
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2004-2006 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 
@@ -28,7 +28,7 @@
 #include <asm/sn/sn_sal.h>
 #include <asm/sn/nodepda.h>
 #include <asm/sn/addrs.h>
-#include "xpc.h"
+#include <asm/sn/xpc.h>
 
 
 /* XPC is exiting flag */
@@ -771,7 +771,8 @@ xpc_identify_act_IRQ_req(int nasid)
 		}
 	}
 
-	if (!xpc_partition_disengaged(part)) {
+	if (part->disengage_request_timeout > 0 &&
+					!xpc_partition_disengaged(part)) {
 		/* still waiting on other side to disengage from us */
 		return;
 	}
@@ -873,6 +874,9 @@ xpc_partition_disengaged(struct xpc_partition *part)
 			 * request in a timely fashion, so assume it's dead.
 			 */
 
+			dev_info(xpc_part, "disengage from remote partition %d "
+				"timed out\n", partid);
+			xpc_disengage_request_timedout = 1;
 			xpc_clear_partition_engaged(1UL << partid);
 			disengaged = 1;
 		}