diff options
Diffstat (limited to 'arch/ia64/sn/kernel')
-rw-r--r-- | arch/ia64/sn/kernel/Makefile | 2 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/bte.c | 17 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/bte_error.c | 58 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/huberror.c | 9 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/io_init.c | 223 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/irq.c | 35 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/klconflib.c | 29 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/mca.c | 12 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/setup.c | 77 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/sn2/Makefile | 2 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/sn2/prominfo_proc.c | 25 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/sn2/sn2_smp.c | 213 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/sn2/sn_hwperf.c | 5 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/sn2/sn_proc_fs.c | 22 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/sn2/timer.c | 19 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/sn2/timer_interrupt.c | 7 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/tiocx.c | 41 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/xp_main.c | 17 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/xpc.h | 1273 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/xpc_channel.c | 70 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/xpc_main.c | 245 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/xpc_partition.c | 10 |
22 files changed, 643 insertions, 1768 deletions
diff --git a/arch/ia64/sn/kernel/Makefile b/arch/ia64/sn/kernel/Makefile index 4351c4f..3e9b4ee 100644 --- a/arch/ia64/sn/kernel/Makefile +++ b/arch/ia64/sn/kernel/Makefile @@ -7,6 +7,8 @@ # Copyright (C) 1999,2001-2005 Silicon Graphics, Inc. All Rights Reserved. # +CPPFLAGS += -I$(srctree)/arch/ia64/sn/include + obj-y += setup.o bte.o bte_error.o irq.o mca.o idle.o \ huberror.o io_init.o iomv.o klconflib.o sn2/ obj-$(CONFIG_IA64_GENERIC) += machvec.o diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c index dd73c0c..1f11db4 100644 --- a/arch/ia64/sn/kernel/bte.c +++ b/arch/ia64/sn/kernel/bte.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2006 Silicon Graphics, Inc. All Rights Reserved. */ #include <linux/config.h> @@ -186,18 +186,13 @@ retry_bteop: /* Initialize the notification to a known value. */ *bte->most_rcnt_na = BTE_WORD_BUSY; - notif_phys_addr = TO_PHYS(ia64_tpa((unsigned long)bte->most_rcnt_na)); + notif_phys_addr = (u64)bte->most_rcnt_na; - if (is_shub2()) { - src = SH2_TIO_PHYS_TO_DMA(src); - dest = SH2_TIO_PHYS_TO_DMA(dest); - notif_phys_addr = SH2_TIO_PHYS_TO_DMA(notif_phys_addr); - } /* Set the source and destination registers */ - BTE_PRINTKV(("IBSA = 0x%lx)\n", (TO_PHYS(src)))); - BTE_SRC_STORE(bte, TO_PHYS(src)); - BTE_PRINTKV(("IBDA = 0x%lx)\n", (TO_PHYS(dest)))); - BTE_DEST_STORE(bte, TO_PHYS(dest)); + BTE_PRINTKV(("IBSA = 0x%lx)\n", src)); + BTE_SRC_STORE(bte, src); + BTE_PRINTKV(("IBDA = 0x%lx)\n", dest)); + BTE_DEST_STORE(bte, dest); /* Set the notification register */ BTE_PRINTKV(("IBNA = 0x%lx)\n", notif_phys_addr)); diff --git a/arch/ia64/sn/kernel/bte_error.c b/arch/ia64/sn/kernel/bte_error.c index fcbc748..f1ec137 100644 --- a/arch/ia64/sn/kernel/bte_error.c +++ b/arch/ia64/sn/kernel/bte_error.c @@ -33,7 +33,7 @@ void bte_error_handler(unsigned long); * Wait until all BTE related CRBs are completed * and then reset the interfaces. */ -void shub1_bte_error_handler(unsigned long _nodepda) +int shub1_bte_error_handler(unsigned long _nodepda) { struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda; struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer; @@ -53,7 +53,7 @@ void shub1_bte_error_handler(unsigned long _nodepda) (err_nodepda->bte_if[1].bh_error == BTE_SUCCESS)) { BTE_PRINTK(("eh:%p:%d Nothing to do.\n", err_nodepda, smp_processor_id())); - return; + return 1; } /* Determine information about our hub */ @@ -81,7 +81,7 @@ void shub1_bte_error_handler(unsigned long _nodepda) mod_timer(recovery_timer, HZ * 5); BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda, smp_processor_id())); - return; + return 1; } if (icmr.ii_icmr_fld_s.i_crb_vld != 0) { @@ -99,7 +99,7 @@ void shub1_bte_error_handler(unsigned long _nodepda) BTE_PRINTK(("eh:%p:%d Valid %d, Giving up\n", err_nodepda, smp_processor_id(), i)); - return; + return 1; } } } @@ -124,6 +124,42 @@ void shub1_bte_error_handler(unsigned long _nodepda) REMOTE_HUB_S(nasid, IIO_IBCR, ibcr.ii_ibcr_regval); del_timer(recovery_timer); + return 0; +} + +/* + * Wait until all BTE related CRBs are completed + * and then reset the interfaces. + */ +int shub2_bte_error_handler(unsigned long _nodepda) +{ + struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda; + struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer; + struct bteinfo_s *bte; + nasid_t nasid; + u64 status; + int i; + + nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode); + + /* + * Verify that all the BTEs are complete + */ + for (i = 0; i < BTES_PER_NODE; i++) { + bte = &err_nodepda->bte_if[i]; + status = BTE_LNSTAT_LOAD(bte); + if ((status & IBLS_ERROR) || !(status & IBLS_BUSY)) + continue; + mod_timer(recovery_timer, HZ * 5); + BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda, + smp_processor_id())); + return 1; + } + if (ia64_sn_bte_recovery(nasid)) + panic("bte_error_handler(): Fatal BTE Error"); + + del_timer(recovery_timer); + return 0; } /* @@ -135,7 +171,6 @@ void bte_error_handler(unsigned long _nodepda) struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda; spinlock_t *recovery_lock = &err_nodepda->bte_recovery_lock; int i; - nasid_t nasid; unsigned long irq_flags; volatile u64 *notify; bte_result_t bh_error; @@ -160,12 +195,15 @@ void bte_error_handler(unsigned long _nodepda) } if (is_shub1()) { - shub1_bte_error_handler(_nodepda); + if (shub1_bte_error_handler(_nodepda)) { + spin_unlock_irqrestore(recovery_lock, irq_flags); + return; + } } else { - nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode); - - if (ia64_sn_bte_recovery(nasid)) - panic("bte_error_handler(): Fatal BTE Error"); + if (shub2_bte_error_handler(_nodepda)) { + spin_unlock_irqrestore(recovery_lock, irq_flags); + return; + } } for (i = 0; i < BTES_PER_NODE; i++) { diff --git a/arch/ia64/sn/kernel/huberror.c b/arch/ia64/sn/kernel/huberror.c index 5c5eb01..56ab6ba 100644 --- a/arch/ia64/sn/kernel/huberror.c +++ b/arch/ia64/sn/kernel/huberror.c @@ -32,13 +32,14 @@ static irqreturn_t hub_eint_handler(int irq, void *arg, struct pt_regs *ep) ret_stuff.v0 = 0; hubdev_info = (struct hubdev_info *)arg; nasid = hubdev_info->hdi_nasid; - SAL_CALL_NOLOCK(ret_stuff, SN_SAL_HUB_ERROR_INTERRUPT, + + if (is_shub1()) { + SAL_CALL_NOLOCK(ret_stuff, SN_SAL_HUB_ERROR_INTERRUPT, (u64) nasid, 0, 0, 0, 0, 0, 0); - if ((int)ret_stuff.v0) - panic("hubii_eint_handler(): Fatal TIO Error"); + if ((int)ret_stuff.v0) + panic("hubii_eint_handler(): Fatal TIO Error"); - if (is_shub1()) { if (!(nasid & 1)) /* Not a TIO, handle CRB errors */ (void)hubiio_crb_error_handler(hubdev_info); } else diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c index 318087e..dfb3f29 100644 --- a/arch/ia64/sn/kernel/io_init.c +++ b/arch/ia64/sn/kernel/io_init.c @@ -10,6 +10,7 @@ #include <linux/nodemask.h> #include <asm/sn/types.h> #include <asm/sn/addrs.h> +#include <asm/sn/sn_feature_sets.h> #include <asm/sn/geo.h> #include <asm/sn/io.h> #include <asm/sn/pcibr_provider.h> @@ -22,6 +23,10 @@ #include "xtalk/hubdev.h" #include "xtalk/xwidgetdev.h" + +extern void sn_init_cpei_timer(void); +extern void register_sn_procfs(void); + static struct list_head sn_sysdata_list; /* sysdata list struct */ @@ -39,12 +44,12 @@ struct brick { struct slab_info slab_info[MAX_SLABS + 1]; }; -int sn_ioif_inited = 0; /* SN I/O infrastructure initialized? */ +int sn_ioif_inited; /* SN I/O infrastructure initialized? */ struct sn_pcibus_provider *sn_pci_provider[PCIIO_ASIC_MAX_TYPES]; /* indexed by asic type */ -static int max_segment_number = 0; /* Default highest segment number */ -static int max_pcibus_number = 255; /* Default highest pci bus number */ +static int max_segment_number; /* Default highest segment number */ +static int max_pcibus_number = 255; /* Default highest pci bus number */ /* * Hooks and struct for unsupported pci providers @@ -76,31 +81,29 @@ static struct sn_pcibus_provider sn_pci_default_provider = { }; /* - * Retrieve the DMA Flush List given nasid. This list is needed - * to implement the WAR - Flush DMA data on PIO Reads. + * Retrieve the DMA Flush List given nasid, widget, and device. + * This list is needed to implement the WAR - Flush DMA data on PIO Reads. */ -static inline uint64_t -sal_get_widget_dmaflush_list(u64 nasid, u64 widget_num, u64 address) +static inline u64 +sal_get_device_dmaflush_list(u64 nasid, u64 widget_num, u64 device_num, + u64 address) { - struct ia64_sal_retval ret_stuff; ret_stuff.status = 0; ret_stuff.v0 = 0; SAL_CALL_NOLOCK(ret_stuff, - (u64) SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST, - (u64) nasid, (u64) widget_num, (u64) address, 0, 0, 0, - 0); - return ret_stuff.v0; - + (u64) SN_SAL_IOIF_GET_DEVICE_DMAFLUSH_LIST, + (u64) nasid, (u64) widget_num, + (u64) device_num, (u64) address, 0, 0, 0); + return ret_stuff.status; } /* * Retrieve the hub device info structure for the given nasid. */ -static inline uint64_t sal_get_hubdev_info(u64 handle, u64 address) +static inline u64 sal_get_hubdev_info(u64 handle, u64 address) { - struct ia64_sal_retval ret_stuff; ret_stuff.status = 0; ret_stuff.v0 = 0; @@ -114,9 +117,8 @@ static inline uint64_t sal_get_hubdev_info(u64 handle, u64 address) /* * Retrieve the pci bus information given the bus number. */ -static inline uint64_t sal_get_pcibus_info(u64 segment, u64 busnum, u64 address) +static inline u64 sal_get_pcibus_info(u64 segment, u64 busnum, u64 address) { - struct ia64_sal_retval ret_stuff; ret_stuff.status = 0; ret_stuff.v0 = 0; @@ -130,9 +132,9 @@ static inline uint64_t sal_get_pcibus_info(u64 segment, u64 busnum, u64 address) /* * Retrieve the pci device information given the bus and device|function number. */ -static inline uint64_t -sal_get_pcidev_info(u64 segment, u64 bus_number, u64 devfn, u64 pci_dev, - u64 sn_irq_info) +static inline u64 +sal_get_pcidev_info(u64 segment, u64 bus_number, u64 devfn, u64 pci_dev, + u64 sn_irq_info) { struct ia64_sal_retval ret_stuff; ret_stuff.status = 0; @@ -140,7 +142,7 @@ sal_get_pcidev_info(u64 segment, u64 bus_number, u64 devfn, u64 pci_dev, SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_GET_PCIDEV_INFO, - (u64) segment, (u64) bus_number, (u64) devfn, + (u64) segment, (u64) bus_number, (u64) devfn, (u64) pci_dev, sn_irq_info, 0, 0); return ret_stuff.v0; @@ -164,18 +166,56 @@ sn_pcidev_info_get(struct pci_dev *dev) return NULL; } +/* Older PROM flush WAR + * + * 01/16/06 -- This war will be in place until a new official PROM is released. + * Additionally note that the struct sn_flush_device_war also has to be + * removed from arch/ia64/sn/include/xtalk/hubdev.h + */ +static u8 war_implemented = 0; + +static s64 sn_device_fixup_war(u64 nasid, u64 widget, int device, + struct sn_flush_device_common *common) +{ + struct sn_flush_device_war *war_list; + struct sn_flush_device_war *dev_entry; + struct ia64_sal_retval isrv = {0,0,0,0}; + + if (!war_implemented) { + printk(KERN_WARNING "PROM version < 4.50 -- implementing old " + "PROM flush WAR\n"); + war_implemented = 1; + } + + war_list = kzalloc(DEV_PER_WIDGET * sizeof(*war_list), GFP_KERNEL); + if (!war_list) + BUG(); + + SAL_CALL_NOLOCK(isrv, SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST, + nasid, widget, __pa(war_list), 0, 0, 0 ,0); + if (isrv.status) + panic("sn_device_fixup_war failed: %s\n", + ia64_sal_strerror(isrv.status)); + + dev_entry = war_list + device; + memcpy(common,dev_entry, sizeof(*common)); + kfree(war_list); + + return isrv.status; +} + /* - * sn_fixup_ionodes() - This routine initializes the HUB data strcuture for + * sn_fixup_ionodes() - This routine initializes the HUB data strcuture for * each node in the system. */ -static void sn_fixup_ionodes(void) +static void __init sn_fixup_ionodes(void) { - - struct sn_flush_device_list *sn_flush_device_list; + struct sn_flush_device_kernel *sn_flush_device_kernel; + struct sn_flush_device_kernel *dev_entry; struct hubdev_info *hubdev; - uint64_t status; - uint64_t nasid; - int i, widget; + u64 status; + u64 nasid; + int i, widget, device, size; /* * Get SGI Specific HUB chipset information. @@ -186,7 +226,7 @@ static void sn_fixup_ionodes(void) nasid = cnodeid_to_nasid(i); hubdev->max_segment_number = 0xffffffff; hubdev->max_pcibus_number = 0xff; - status = sal_get_hubdev_info(nasid, (uint64_t) __pa(hubdev)); + status = sal_get_hubdev_info(nasid, (u64) __pa(hubdev)); if (status) continue; @@ -211,40 +251,49 @@ static void sn_fixup_ionodes(void) if (!hubdev->hdi_flush_nasid_list.widget_p) continue; + size = (HUB_WIDGET_ID_MAX + 1) * + sizeof(struct sn_flush_device_kernel *); hubdev->hdi_flush_nasid_list.widget_p = - kmalloc((HUB_WIDGET_ID_MAX + 1) * - sizeof(struct sn_flush_device_list *), GFP_KERNEL); - - memset(hubdev->hdi_flush_nasid_list.widget_p, 0x0, - (HUB_WIDGET_ID_MAX + 1) * - sizeof(struct sn_flush_device_list *)); + kzalloc(size, GFP_KERNEL); + if (!hubdev->hdi_flush_nasid_list.widget_p) + BUG(); for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) { - sn_flush_device_list = kmalloc(DEV_PER_WIDGET * - sizeof(struct - sn_flush_device_list), - GFP_KERNEL); - memset(sn_flush_device_list, 0x0, - DEV_PER_WIDGET * - sizeof(struct sn_flush_device_list)); - - status = - sal_get_widget_dmaflush_list(nasid, widget, - (uint64_t) - __pa - (sn_flush_device_list)); - if (status) { - kfree(sn_flush_device_list); - continue; + size = DEV_PER_WIDGET * + sizeof(struct sn_flush_device_kernel); + sn_flush_device_kernel = kzalloc(size, GFP_KERNEL); + if (!sn_flush_device_kernel) + BUG(); + + dev_entry = sn_flush_device_kernel; + for (device = 0; device < DEV_PER_WIDGET; + device++,dev_entry++) { + size = sizeof(struct sn_flush_device_common); + dev_entry->common = kzalloc(size, GFP_KERNEL); + if (!dev_entry->common) + BUG(); + + if (sn_prom_feature_available( + PRF_DEVICE_FLUSH_LIST)) + status = sal_get_device_dmaflush_list( + nasid, widget, device, + (u64)(dev_entry->common)); + else + status = sn_device_fixup_war(nasid, + widget, device, + dev_entry->common); + if (status != SALRET_OK) + panic("SAL call failed: %s\n", + ia64_sal_strerror(status)); + + spin_lock_init(&dev_entry->sfdl_flush_lock); } - spin_lock_init(&sn_flush_device_list->sfdl_flush_lock); - hubdev->hdi_flush_nasid_list.widget_p[widget] = - sn_flush_device_list; - } - + if (sn_flush_device_kernel) + hubdev->hdi_flush_nasid_list.widget_p[widget] = + sn_flush_device_kernel; + } } - } /* @@ -256,7 +305,7 @@ static void sn_fixup_ionodes(void) */ static void sn_pci_window_fixup(struct pci_dev *dev, unsigned int count, - int64_t * pci_addrs) + s64 * pci_addrs) { struct pci_controller *controller = PCI_CONTROLLER(dev->bus); unsigned int i; @@ -316,20 +365,19 @@ void sn_pci_fixup_slot(struct pci_dev *dev) struct pci_bus *host_pci_bus; struct pci_dev *host_pci_dev; struct pcidev_info *pcidev_info; - int64_t pci_addrs[PCI_ROM_RESOURCE + 1]; + s64 pci_addrs[PCI_ROM_RESOURCE + 1]; struct sn_irq_info *sn_irq_info; unsigned long size; unsigned int bus_no, devfn; pci_dev_get(dev); /* for the sysdata pointer */ pcidev_info = kzalloc(sizeof(struct pcidev_info), GFP_KERNEL); - if (pcidev_info <= 0) + if (!pcidev_info) BUG(); /* Cannot afford to run out of memory */ - sn_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_KERNEL); - if (sn_irq_info <= 0) + sn_irq_info = kzalloc(sizeof(struct sn_irq_info), GFP_KERNEL); + if (!sn_irq_info) BUG(); /* Cannot afford to run out of memory */ - memset(sn_irq_info, 0, sizeof(struct sn_irq_info)); /* Call to retrieve pci device information needed by kernel. */ status = sal_get_pcidev_info((u64) segment, (u64) dev->bus->number, @@ -407,6 +455,13 @@ void sn_pci_fixup_slot(struct pci_dev *dev) pcidev_info->pdi_sn_irq_info = NULL; kfree(sn_irq_info); } + + /* + * MSI currently not supported on altix. Remove this when + * the MSI abstraction patches are integrated into the kernel + * (sometime after 2.6.16 releases) + */ + dev->no_msi = 1; } /* @@ -415,13 +470,13 @@ void sn_pci_fixup_slot(struct pci_dev *dev) */ void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus) { - int status = 0; + int status; int nasid, cnode; struct pci_controller *controller; struct sn_pci_controller *sn_controller; struct pcibus_bussoft *prom_bussoft_ptr; struct hubdev_info *hubdev_info; - void *provider_soft = NULL; + void *provider_soft; struct sn_pcibus_provider *provider; status = sal_get_pcibus_info((u64) segment, (u64) busnum, @@ -468,6 +523,8 @@ void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus) bus->sysdata = controller; if (provider->bus_fixup) provider_soft = (*provider->bus_fixup) (prom_bussoft_ptr, controller); + else + provider_soft = NULL; if (provider_soft == NULL) { /* fixup failed or not applicable */ @@ -550,15 +607,15 @@ void sn_bus_store_sysdata(struct pci_dev *dev) void sn_bus_free_sysdata(void) { struct sysdata_el *element; - struct list_head *list; + struct list_head *list, *safe; -sn_sysdata_free_start: - list_for_each(list, &sn_sysdata_list) { + list_for_each_safe(list, safe, &sn_sysdata_list) { element = list_entry(list, struct sysdata_el, entry); list_del(&element->entry); + list_del(&(((struct pcidev_info *) + (element->sysdata))->pdi_list)); kfree(element->sysdata); kfree(element); - goto sn_sysdata_free_start; } return; } @@ -571,13 +628,8 @@ sn_sysdata_free_start: static int __init sn_pci_init(void) { - int i = 0; - int j = 0; + int i, j; struct pci_dev *pci_dev = NULL; - extern void sn_init_cpei_timer(void); -#ifdef CONFIG_PROC_FS - extern void register_sn_procfs(void); -#endif if (!ia64_platform_is("sn2") || IS_RUNNING_ON_FAKE_PROM()) return 0; @@ -633,32 +685,29 @@ static int __init sn_pci_init(void) */ void hubdev_init_node(nodepda_t * npda, cnodeid_t node) { - struct hubdev_info *hubdev_info; + int size; + pg_data_t *pg; + + size = sizeof(struct hubdev_info); if (node >= num_online_nodes()) /* Headless/memless IO nodes */ - hubdev_info = - (struct hubdev_info *)alloc_bootmem_node(NODE_DATA(0), - sizeof(struct - hubdev_info)); + pg = NODE_DATA(0); else - hubdev_info = - (struct hubdev_info *)alloc_bootmem_node(NODE_DATA(node), - sizeof(struct - hubdev_info)); - npda->pdinfo = (void *)hubdev_info; + pg = NODE_DATA(node); + hubdev_info = (struct hubdev_info *)alloc_bootmem_node(pg, size); + + npda->pdinfo = (void *)hubdev_info; } geoid_t cnodeid_get_geoid(cnodeid_t cnode) { - struct hubdev_info *hubdev; hubdev = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo); return hubdev->hdi_geoid; - } subsys_initcall(sn_pci_init); diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index 01d18b7..c373113 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -5,11 +5,12 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2006 Silicon Graphics, Inc. All Rights Reserved. */ #include <linux/irq.h> #include <linux/spinlock.h> +#include <linux/init.h> #include <asm/sn/addrs.h> #include <asm/sn/arch.h> #include <asm/sn/intr.h> @@ -28,7 +29,7 @@ extern int sn_ioif_inited; static struct list_head **sn_irq_lh; static spinlock_t sn_irq_info_lock = SPIN_LOCK_UNLOCKED; /* non-IRQ lock */ -static inline uint64_t sn_intr_alloc(nasid_t local_nasid, int local_widget, +static inline u64 sn_intr_alloc(nasid_t local_nasid, int local_widget, u64 sn_irq_info, int req_irq, nasid_t req_nasid, int req_slice) @@ -76,17 +77,15 @@ static void sn_enable_irq(unsigned int irq) static void sn_ack_irq(unsigned int irq) { - u64 event_occurred, mask = 0; + u64 event_occurred, mask; irq = irq & 0xff; - event_occurred = - HUB_L((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED)); + event_occurred = HUB_L((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED)); mask = event_occurred & SH_ALL_INT_MASK; - HUB_S((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED_ALIAS), - mask); + HUB_S((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED_ALIAS), mask); __set_bit(irq, (volatile void *)pda->sn_in_service_ivecs); - move_irq(irq); + move_native_irq(irq); } static void sn_end_irq(unsigned int irq) @@ -123,7 +122,7 @@ static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask) list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe, sn_irq_lh[irq], list) { - uint64_t bridge; + u64 bridge; int local_widget, status; nasid_t local_nasid; struct sn_irq_info *new_irq_info; @@ -134,7 +133,7 @@ static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask) break; memcpy(new_irq_info, sn_irq_info, sizeof(struct sn_irq_info)); - bridge = (uint64_t) new_irq_info->irq_bridge; + bridge = (u64) new_irq_info->irq_bridge; if (!bridge) { kfree(new_irq_info); break; /* irq is not a device interrupt */ @@ -219,9 +218,8 @@ static void register_intr_pda(struct sn_irq_info *sn_irq_info) pdacpu(cpu)->sn_last_irq = irq; } - if (pdacpu(cpu)->sn_first_irq == 0 || pdacpu(cpu)->sn_first_irq > irq) { + if (pdacpu(cpu)->sn_first_irq == 0 || pdacpu(cpu)->sn_first_irq > irq) pdacpu(cpu)->sn_first_irq = irq; - } } static void unregister_intr_pda(struct sn_irq_info *sn_irq_info) @@ -289,7 +287,7 @@ void sn_irq_fixup(struct pci_dev *pci_dev, struct sn_irq_info *sn_irq_info) list_add_rcu(&sn_irq_info->list, sn_irq_lh[sn_irq_info->irq_irq]); spin_unlock(&sn_irq_info_lock); - (void)register_intr_pda(sn_irq_info); + register_intr_pda(sn_irq_info); } void sn_irq_unfixup(struct pci_dev *pci_dev) @@ -301,7 +299,9 @@ void sn_irq_unfixup(struct pci_dev *pci_dev) return; sn_irq_info = SN_PCIDEV_INFO(pci_dev)->pdi_sn_irq_info; - if (!sn_irq_info || !sn_irq_info->irq_irq) { + if (!sn_irq_info) + return; + if (!sn_irq_info->irq_irq) { kfree(sn_irq_info); return; } @@ -349,10 +349,10 @@ static void force_interrupt(int irq) */ static void sn_check_intr(int irq, struct sn_irq_info *sn_irq_info) { - uint64_t regval; + u64 regval; int irr_reg_num; int irr_bit; - uint64_t irr_reg; + u64 irr_reg; struct pcidev_info *pcidev_info; struct pcibus_info *pcibus_info; @@ -419,7 +419,7 @@ void sn_lb_int_war_check(void) rcu_read_unlock(); } -void sn_irq_lh_init(void) +void __init sn_irq_lh_init(void) { int i; @@ -434,5 +434,4 @@ void sn_irq_lh_init(void) INIT_LIST_HEAD(sn_irq_lh[i]); } - } diff --git a/arch/ia64/sn/kernel/klconflib.c b/arch/ia64/sn/kernel/klconflib.c index 0f11a32..87682b4 100644 --- a/arch/ia64/sn/kernel/klconflib.c +++ b/arch/ia64/sn/kernel/klconflib.c @@ -78,31 +78,30 @@ format_module_id(char *buffer, moduleid_t m, int fmt) position = MODULE_GET_BPOS(m); if ((fmt == MODULE_FORMAT_BRIEF) || (fmt == MODULE_FORMAT_LCD)) { - /* Brief module number format, eg. 002c15 */ + /* Brief module number format, eg. 002c15 */ - /* Decompress the rack number */ - *buffer++ = '0' + RACK_GET_CLASS(rack); - *buffer++ = '0' + RACK_GET_GROUP(rack); - *buffer++ = '0' + RACK_GET_NUM(rack); + /* Decompress the rack number */ + *buffer++ = '0' + RACK_GET_CLASS(rack); + *buffer++ = '0' + RACK_GET_GROUP(rack); + *buffer++ = '0' + RACK_GET_NUM(rack); - /* Add the brick type */ - *buffer++ = brickchar; + /* Add the brick type */ + *buffer++ = brickchar; } else if (fmt == MODULE_FORMAT_LONG) { - /* Fuller hwgraph format, eg. rack/002/bay/15 */ + /* Fuller hwgraph format, eg. rack/002/bay/15 */ - strcpy(buffer, "rack" "/"); buffer += strlen(buffer); + strcpy(buffer, "rack" "/"); buffer += strlen(buffer); - *buffer++ = '0' + RACK_GET_CLASS(rack); - *buffer++ = '0' + RACK_GET_GROUP(rack); - *buffer++ = '0' + RACK_GET_NUM(rack); + *buffer++ = '0' + RACK_GET_CLASS(rack); + *buffer++ = '0' + RACK_GET_GROUP(rack); + *buffer++ = '0' + RACK_GET_NUM(rack); - strcpy(buffer, "/" "bay" "/"); buffer += strlen(buffer); + strcpy(buffer, "/" "bay" "/"); buffer += strlen(buffer); } /* Add the bay position, using at least two digits */ if (position < 10) - *buffer++ = '0'; + *buffer++ = '0'; sprintf(buffer, "%d", position); - } diff --git a/arch/ia64/sn/kernel/mca.c b/arch/ia64/sn/kernel/mca.c index 6546db6..3db62f2 100644 --- a/arch/ia64/sn/kernel/mca.c +++ b/arch/ia64/sn/kernel/mca.c @@ -3,13 +3,14 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2006 Silicon Graphics, Inc. All Rights Reserved. */ #include <linux/types.h> #include <linux/kernel.h> #include <linux/timer.h> #include <linux/vmalloc.h> +#include <linux/mutex.h> #include <asm/mca.h> #include <asm/sal.h> #include <asm/sn/sn_sal.h> @@ -27,7 +28,7 @@ void sn_init_cpei_timer(void); /* Printing oemdata from mca uses data that is not passed through SAL, it is * global. Only one user at a time. */ -static DECLARE_MUTEX(sn_oemdata_mutex); +static DEFINE_MUTEX(sn_oemdata_mutex); static u8 **sn_oemdata; static u64 *sn_oemdata_size, sn_oemdata_bufsize; @@ -89,7 +90,7 @@ static int sn_platform_plat_specific_err_print(const u8 * sect_header, u8 ** oemdata, u64 * oemdata_size) { - down(&sn_oemdata_mutex); + mutex_lock(&sn_oemdata_mutex); sn_oemdata = oemdata; sn_oemdata_size = oemdata_size; sn_oemdata_bufsize = 0; @@ -107,7 +108,7 @@ sn_platform_plat_specific_err_print(const u8 * sect_header, u8 ** oemdata, *sn_oemdata_size = 0; ia64_sn_plat_specific_err_print(print_hook, (char *)sect_header); } - up(&sn_oemdata_mutex); + mutex_unlock(&sn_oemdata_mutex); return 0; } @@ -136,7 +137,8 @@ int sn_salinfo_platform_oemdata(const u8 *sect_header, u8 **oemdata, u64 *oemdat static int __init sn_salinfo_init(void) { - salinfo_platform_oemdata = &sn_salinfo_platform_oemdata; + if (ia64_platform_is("sn2")) + salinfo_platform_oemdata = &sn_salinfo_platform_oemdata; return 0; } diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index e510dce..5b84836 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -67,6 +67,7 @@ extern unsigned long last_time_offset; extern void (*ia64_mark_idle) (int); extern void snidle(int); extern unsigned char acpi_kbd_controller_present; +extern unsigned long long (*ia64_printk_clock)(void); unsigned long sn_rtc_cycles_per_second; EXPORT_SYMBOL(sn_rtc_cycles_per_second); @@ -74,7 +75,7 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second); DEFINE_PER_CPU(struct sn_hub_info_s, __sn_hub_info); EXPORT_PER_CPU_SYMBOL(__sn_hub_info); -DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_NUMNODES]); +DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]); EXPORT_PER_CPU_SYMBOL(__sn_cnodeid_to_nasid); DEFINE_PER_CPU(struct nodepda_s *, __sn_nodepda); @@ -125,20 +126,6 @@ struct screen_info sn_screen_info = { }; /* - * This is here so we can use the CMOS detection in ide-probe.c to - * determine what drives are present. In theory, we don't need this - * as the auto-detection could be done via ide-probe.c:do_probe() but - * in practice that would be much slower, which is painful when - * running in the simulator. Note that passing zeroes in DRIVE_INFO - * is sufficient (the IDE driver will autodetect the drive geometry). - */ -#ifdef CONFIG_IA64_GENERIC -extern char drive_info[4 * 16]; -#else -char drive_info[4 * 16]; -#endif - -/* * This routine can only be used during init, since * smp_boot_data is an init data structure. * We have to use smp_boot_data.cpu_phys_id to find @@ -209,7 +196,7 @@ void __init early_sn_setup(void) } extern int platform_intr_list[]; -static int __initdata shub_1_1_found = 0; +static int __initdata shub_1_1_found; /* * sn_check_for_wars @@ -330,6 +317,7 @@ struct pcdp_vga_device { #define PCDP_PCI_TRANS_IOPORT 0x02 #define PCDP_PCI_TRANS_MMIO 0x01 +#if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE) static void sn_scan_pcdp(void) { @@ -371,6 +359,17 @@ sn_scan_pcdp(void) break; /* once we find the primary, we're done */ } } +#endif + +static unsigned long sn2_rtc_initial; + +static unsigned long long ia64_sn2_printk_clock(void) +{ + unsigned long rtc_now = rtc_time(); + + return (rtc_now - sn2_rtc_initial) * + (1000000000 / sn_rtc_cycles_per_second); +} /** * sn_setup - SN platform setup routine @@ -386,6 +385,7 @@ void __init sn_setup(char **cmdline_p) u32 version = sn_sal_rev(); extern void sn_cpu_init(void); + sn2_rtc_initial = rtc_time(); ia64_sn_plat_set_error_handling_features(); // obsolete ia64_sn_set_os_feature(OSF_MCA_SLV_TO_OS_INIT_SLV); ia64_sn_set_os_feature(OSF_FEAT_LOG_SBES); @@ -437,19 +437,6 @@ void __init sn_setup(char **cmdline_p) */ build_cnode_tables(); - /* - * Old PROMs do not provide an ACPI FADT. Disable legacy keyboard - * support here so we don't have to listen to failed keyboard probe - * messages. - */ - if (version <= 0x0209 && acpi_kbd_controller_present) { - printk(KERN_INFO "Disabling legacy keyboard support as prom " - "is too old and doesn't provide FADT\n"); - acpi_kbd_controller_present = 0; - } - - printk("SGI SAL version %x.%02x\n", version >> 8, version & 0x00FF); - status = ia64_sal_freq_base(SAL_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec, &drift); @@ -463,6 +450,21 @@ void __init sn_setup(char **cmdline_p) platform_intr_list[ACPI_INTERRUPT_CPEI] = IA64_CPE_VECTOR; + ia64_printk_clock = ia64_sn2_printk_clock; + + /* + * Old PROMs do not provide an ACPI FADT. Disable legacy keyboard + * support here so we don't have to listen to failed keyboard probe + * messages. + */ + if (version <= 0x0209 && acpi_kbd_controller_present) { + printk(KERN_INFO "Disabling legacy keyboard support as prom " + "is too old and doesn't provide FADT\n"); + acpi_kbd_controller_present = 0; + } + + printk("SGI SAL version %x.%02x\n", version >> 8, version & 0x00FF); + /* * we set the default root device to /dev/hda * to make simulation easy @@ -578,13 +580,17 @@ void __init sn_cpu_init(void) sn_prom_type = 2; else sn_prom_type = 1; - printk("Running on medusa with %s PROM\n", (sn_prom_type == 1) ? "real" : "fake"); + printk(KERN_INFO "Running on medusa with %s PROM\n", + (sn_prom_type == 1) ? "real" : "fake"); } memset(pda, 0, sizeof(pda)); - if (ia64_sn_get_sn_info(0, &sn_hub_info->shub2, &sn_hub_info->nasid_bitmask, &sn_hub_info->nasid_shift, - &sn_system_size, &sn_sharing_domain_size, &sn_partition_id, - &sn_coherency_id, &sn_region_size)) + if (ia64_sn_get_sn_info(0, &sn_hub_info->shub2, + &sn_hub_info->nasid_bitmask, + &sn_hub_info->nasid_shift, + &sn_system_size, &sn_sharing_domain_size, + &sn_partition_id, &sn_coherency_id, + &sn_region_size)) BUG(); sn_hub_info->as_shift = sn_hub_info->nasid_shift - 2; @@ -716,7 +722,8 @@ void __init build_cnode_tables(void) for_each_online_node(node) { kl_config_hdr_t *klgraph_header; nasid = cnodeid_to_nasid(node); - if ((klgraph_header = ia64_sn_get_klconfig_addr(nasid)) == NULL) + klgraph_header = ia64_sn_get_klconfig_addr(nasid); + if (klgraph_header == NULL) BUG(); brd = NODE_OFFSET_TO_LBOARD(nasid, klgraph_header->ch_board_info); while (brd) { @@ -734,7 +741,7 @@ nasid_slice_to_cpuid(int nasid, int slice) { long cpu; - for (cpu=0; cpu < NR_CPUS; cpu++) + for (cpu = 0; cpu < NR_CPUS; cpu++) if (cpuid_to_nasid(cpu) == nasid && cpuid_to_slice(cpu) == slice) return cpu; diff --git a/arch/ia64/sn/kernel/sn2/Makefile b/arch/ia64/sn/kernel/sn2/Makefile index 170bde4..99e1776 100644 --- a/arch/ia64/sn/kernel/sn2/Makefile +++ b/arch/ia64/sn/kernel/sn2/Makefile @@ -9,5 +9,7 @@ # sn2 specific kernel files # +CPPFLAGS += -I$(srctree)/arch/ia64/sn/include + obj-y += cache.o io.o ptc_deadlock.o sn2_smp.o sn_proc_fs.o \ prominfo_proc.o timer.o timer_interrupt.o sn_hwperf.o diff --git a/arch/ia64/sn/kernel/sn2/prominfo_proc.c b/arch/ia64/sn/kernel/sn2/prominfo_proc.c index 81c63b2..6ae276d 100644 --- a/arch/ia64/sn/kernel/sn2/prominfo_proc.c +++ b/arch/ia64/sn/kernel/sn2/prominfo_proc.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 1999,2001-2004 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (C) 1999,2001-2004, 2006 Silicon Graphics, Inc. All Rights Reserved. * * Module to export the system's Firmware Interface Tables, including * PROM revision numbers and banners, in /proc @@ -190,7 +190,7 @@ static int read_version_entry(char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = 0; + int len; /* data holds the NASID of the node */ len = dump_version(page, (unsigned long)data); @@ -202,7 +202,7 @@ static int read_fit_entry(char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = 0; + int len; /* data holds the NASID of the node */ len = dump_fit(page, (unsigned long)data); @@ -229,13 +229,16 @@ int __init prominfo_init(void) struct proc_dir_entry *p; cnodeid_t cnodeid; unsigned long nasid; + int size; char name[NODE_NAME_LEN]; if (!ia64_platform_is("sn2")) return 0; - proc_entries = kmalloc(num_online_nodes() * sizeof(struct proc_dir_entry *), - GFP_KERNEL); + size = num_online_nodes() * sizeof(struct proc_dir_entry *); + proc_entries = kzalloc(size, GFP_KERNEL); + if (!proc_entries) + return -ENOMEM; sgi_prominfo_entry = proc_mkdir("sgi_prominfo", NULL); @@ -244,14 +247,12 @@ int __init prominfo_init(void) sprintf(name, "node%d", cnodeid); *entp = proc_mkdir(name, sgi_prominfo_entry); nasid = cnodeid_to_nasid(cnodeid); - p = create_proc_read_entry( - "fit", 0, *entp, read_fit_entry, - (void *)nasid); + p = create_proc_read_entry("fit", 0, *entp, read_fit_entry, + (void *)nasid); if (p) p->owner = THIS_MODULE; - p = create_proc_read_entry( - "version", 0, *entp, read_version_entry, - (void *)nasid); + p = create_proc_read_entry("version", 0, *entp, + read_version_entry, (void *)nasid); if (p) p->owner = THIS_MODULE; entp++; @@ -263,7 +264,7 @@ int __init prominfo_init(void) void __exit prominfo_exit(void) { struct proc_dir_entry **entp; - unsigned cnodeid; + unsigned int cnodeid; char name[NODE_NAME_LEN]; entp = proc_entries; diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c index 471bbaa..b2e1e74 100644 --- a/arch/ia64/sn/kernel/sn2/sn2_smp.c +++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c @@ -5,7 +5,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2000-2006 Silicon Graphics, Inc. All rights reserved. */ #include <linux/init.h> @@ -46,104 +46,24 @@ DECLARE_PER_CPU(struct ptc_stats, ptcstats); static __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock); -void sn2_ptc_deadlock_recovery(short *, short, int, volatile unsigned long *, unsigned long data0, - volatile unsigned long *, unsigned long data1); +extern unsigned long +sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long, + volatile unsigned long *, unsigned long, + volatile unsigned long *, unsigned long); +void +sn2_ptc_deadlock_recovery(short *, short, short, int, + volatile unsigned long *, unsigned long, + volatile unsigned long *, unsigned long); -#ifdef DEBUG_PTC /* - * ptctest: - * - * xyz - 3 digit hex number: - * x - Force PTC purges to use shub: - * 0 - no force - * 1 - force - * y - interupt enable - * 0 - disable interrupts - * 1 - leave interuupts enabled - * z - type of lock: - * 0 - global lock - * 1 - node local lock - * 2 - no lock - * - * Note: on shub1, only ptctest == 0 is supported. Don't try other values! + * Note: some is the following is captured here to make degugging easier + * (the macros make more sense if you see the debug patch - not posted) */ - -static unsigned int sn2_ptctest = 0; - -static int __init ptc_test(char *str) -{ - get_option(&str, &sn2_ptctest); - return 1; -} -__setup("ptctest=", ptc_test); - -static inline int ptc_lock(unsigned long *flagp) -{ - unsigned long opt = sn2_ptctest & 255; - - switch (opt) { - case 0x00: - spin_lock_irqsave(&sn2_global_ptc_lock, *flagp); - break; - case 0x01: - spin_lock_irqsave(&sn_nodepda->ptc_lock, *flagp); - break; - case 0x02: - local_irq_save(*flagp); - break; - case 0x10: - spin_lock(&sn2_global_ptc_lock); - break; - case 0x11: - spin_lock(&sn_nodepda->ptc_lock); - break; - case 0x12: - break; - default: - BUG(); - } - return opt; -} - -static inline void ptc_unlock(unsigned long flags, int opt) -{ - switch (opt) { - case 0x00: - spin_unlock_irqrestore(&sn2_global_ptc_lock, flags); - break; - case 0x01: - spin_unlock_irqrestore(&sn_nodepda->ptc_lock, flags); - break; - case 0x02: - local_irq_restore(flags); - break; - case 0x10: - spin_unlock(&sn2_global_ptc_lock); - break; - case 0x11: - spin_unlock(&sn_nodepda->ptc_lock); - break; - case 0x12: - break; - default: - BUG(); - } -} -#else - #define sn2_ptctest 0 - -static inline int ptc_lock(unsigned long *flagp) -{ - spin_lock_irqsave(&sn2_global_ptc_lock, *flagp); - return 0; -} - -static inline void ptc_unlock(unsigned long flags, int opt) -{ - spin_unlock_irqrestore(&sn2_global_ptc_lock, flags); -} -#endif +#define local_node_uses_ptc_ga(sh1) ((sh1) ? 1 : 0) +#define max_active_pio(sh1) ((sh1) ? 32 : 7) +#define reset_max_active_on_deadlock() 1 +#define PTC_LOCK(sh1) ((sh1) ? &sn2_global_ptc_lock : &sn_nodepda->ptc_lock) struct ptc_stats { unsigned long ptc_l; @@ -151,27 +71,32 @@ struct ptc_stats { unsigned long shub_ptc_flushes; unsigned long nodes_flushed; unsigned long deadlocks; + unsigned long deadlocks2; unsigned long lock_itc_clocks; unsigned long shub_itc_clocks; unsigned long shub_itc_clocks_max; + unsigned long shub_ptc_flushes_not_my_mm; }; +#define sn2_ptctest 0 + static inline unsigned long wait_piowc(void) { - volatile unsigned long *piows, zeroval; - unsigned long ws; + volatile unsigned long *piows; + unsigned long zeroval, ws; piows = pda->pio_write_status_addr; zeroval = pda->pio_write_status_val; do { cpu_relax(); } while (((ws = *piows) & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) != zeroval); - return ws; + return (ws & SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK) != 0; } void sn_tlb_migrate_finish(struct mm_struct *mm) { - if (mm == current->mm) + /* flush_tlb_mm is inefficient if more than 1 users of mm */ + if (mm == current->mm && mm && atomic_read(&mm->mm_users) == 1) flush_tlb_mm(mm); } @@ -201,12 +126,14 @@ void sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long nbits) { - int i, opt, shub1, cnode, mynasid, cpu, lcpu = 0, nasid, flushed = 0; - int mymm = (mm == current->active_mm && current->mm); + int i, ibegin, shub1, cnode, mynasid, cpu, lcpu = 0, nasid; + int mymm = (mm == current->active_mm && mm == current->mm); + int use_cpu_ptcga; volatile unsigned long *ptc0, *ptc1; - unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value; + unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value, old_rr = 0; short nasids[MAX_NUMNODES], nix; nodemask_t nodes_flushed; + int active, max_active, deadlock; nodes_clear(nodes_flushed); i = 0; @@ -267,41 +194,56 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, mynasid = get_nasid(); + use_cpu_ptcga = local_node_uses_ptc_ga(shub1); + max_active = max_active_pio(shub1); itc = ia64_get_itc(); - opt = ptc_lock(&flags); + spin_lock_irqsave(PTC_LOCK(shub1), flags); itc2 = ia64_get_itc(); + __get_cpu_var(ptcstats).lock_itc_clocks += itc2 - itc; __get_cpu_var(ptcstats).shub_ptc_flushes++; __get_cpu_var(ptcstats).nodes_flushed += nix; + if (!mymm) + __get_cpu_var(ptcstats).shub_ptc_flushes_not_my_mm++; + if (use_cpu_ptcga && !mymm) { + old_rr = ia64_get_rr(start); + ia64_set_rr(start, (old_rr & 0xff) | (rr_value << 8)); + ia64_srlz_d(); + } + + wait_piowc(); do { if (shub1) data1 = start | (1UL << SH1_PTC_1_START_SHFT); else data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK); - for (i = 0; i < nix; i++) { + deadlock = 0; + active = 0; + for (ibegin = 0, i = 0; i < nix; i++) { nasid = nasids[i]; - if ((!(sn2_ptctest & 3)) && unlikely(nasid == mynasid && mymm)) { + if (use_cpu_ptcga && unlikely(nasid == mynasid)) { ia64_ptcga(start, nbits << 2); ia64_srlz_i(); } else { ptc0 = CHANGE_NASID(nasid, ptc0); if (ptc1) ptc1 = CHANGE_NASID(nasid, ptc1); - pio_atomic_phys_write_mmrs(ptc0, data0, ptc1, - data1); - flushed = 1; + pio_atomic_phys_write_mmrs(ptc0, data0, ptc1, data1); + active++; + } + if (active >= max_active || i == (nix - 1)) { + if ((deadlock = wait_piowc())) { + sn2_ptc_deadlock_recovery(nasids, ibegin, i, mynasid, ptc0, data0, ptc1, data1); + if (reset_max_active_on_deadlock()) + max_active = 1; + } + active = 0; + ibegin = i + 1; } } - if (flushed - && (wait_piowc() & - (SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK))) { - sn2_ptc_deadlock_recovery(nasids, nix, mynasid, ptc0, data0, ptc1, data1); - } - start += (1UL << nbits); - } while (start < end); itc2 = ia64_get_itc() - itc2; @@ -309,7 +251,12 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, if (itc2 > __get_cpu_var(ptcstats).shub_itc_clocks_max) __get_cpu_var(ptcstats).shub_itc_clocks_max = itc2; - ptc_unlock(flags, opt); + if (old_rr) { + ia64_set_rr(start, old_rr); + ia64_srlz_d(); + } + + spin_unlock_irqrestore(PTC_LOCK(shub1), flags); preempt_enable(); } @@ -321,27 +268,31 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, * TLB flush transaction. The recovery sequence is somewhat tricky & is * coded in assembly language. */ -void sn2_ptc_deadlock_recovery(short *nasids, short nix, int mynasid, volatile unsigned long *ptc0, unsigned long data0, - volatile unsigned long *ptc1, unsigned long data1) + +void +sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid, + volatile unsigned long *ptc0, unsigned long data0, + volatile unsigned long *ptc1, unsigned long data1) { - extern void sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long, - volatile unsigned long *, unsigned long, volatile unsigned long *, unsigned long); short nasid, i; - unsigned long *piows, zeroval; + unsigned long *piows, zeroval, n; __get_cpu_var(ptcstats).deadlocks++; piows = (unsigned long *) pda->pio_write_status_addr; zeroval = pda->pio_write_status_val; - for (i=0; i < nix; i++) { + + for (i=ib; i <= ie; i++) { nasid = nasids[i]; - if (!(sn2_ptctest & 3) && nasid == mynasid) + if (local_node_uses_ptc_ga(is_shub1()) && nasid == mynasid) continue; ptc0 = CHANGE_NASID(nasid, ptc0); if (ptc1) ptc1 = CHANGE_NASID(nasid, ptc1); - sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval); + + n = sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval); + __get_cpu_var(ptcstats).deadlocks2 += n; } } @@ -452,20 +403,22 @@ static int sn2_ptc_seq_show(struct seq_file *file, void *data) cpu = *(loff_t *) data; if (!cpu) { - seq_printf(file, "# ptc_l change_rid shub_ptc_flushes shub_nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max\n"); + seq_printf(file, + "# cpu ptc_l newrid ptc_flushes nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max not_my_mm deadlock2\n"); seq_printf(file, "# ptctest %d\n", sn2_ptctest); } if (cpu < NR_CPUS && cpu_online(cpu)) { stat = &per_cpu(ptcstats, cpu); - seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l, + seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l, stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed, stat->deadlocks, 1000 * stat->lock_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec, 1000 * stat->shub_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec, - 1000 * stat->shub_itc_clocks_max / per_cpu(cpu_info, cpu).cyc_per_usec); + 1000 * stat->shub_itc_clocks_max / per_cpu(cpu_info, cpu).cyc_per_usec, + stat->shub_ptc_flushes_not_my_mm, + stat->deadlocks2); } - return 0; } @@ -476,7 +429,7 @@ static struct seq_operations sn2_ptc_seq_ops = { .show = sn2_ptc_seq_show }; -int sn2_ptc_proc_open(struct inode *inode, struct file *file) +static int sn2_ptc_proc_open(struct inode *inode, struct file *file) { return seq_open(file, &sn2_ptc_seq_ops); } @@ -493,7 +446,7 @@ static struct proc_dir_entry *proc_sn2_ptc; static int __init sn2_ptc_init(void) { if (!ia64_platform_is("sn2")) - return -ENOSYS; + return 0; if (!(proc_sn2_ptc = create_proc_entry(PTC_BASENAME, 0444, NULL))) { printk(KERN_ERR "unable to create %s proc entry", PTC_BASENAME); diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c index 19b54fb..70db21f 100644 --- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2004-2005 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2004-2006 Silicon Graphics, Inc. All rights reserved. * * SGI Altix topology and hardware performance monitoring API. * Mark Goodwin <markgw@sgi.com>. @@ -973,6 +973,9 @@ static int __devinit sn_hwperf_misc_register_init(void) { int e; + if (!ia64_platform_is("sn2")) + return 0; + sn_hwperf_init(); /* diff --git a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c index a06719d..c686d9c 100644 --- a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c +++ b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c @@ -6,11 +6,11 @@ * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. */ #include <linux/config.h> -#include <asm/uaccess.h> #ifdef CONFIG_PROC_FS #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <asm/uaccess.h> #include <asm/sn/sn_sal.h> static int partition_id_show(struct seq_file *s, void *p) @@ -90,10 +90,10 @@ static int coherence_id_open(struct inode *inode, struct file *file) return single_open(file, coherence_id_show, NULL); } -static struct proc_dir_entry *sn_procfs_create_entry( - const char *name, struct proc_dir_entry *parent, - int (*openfunc)(struct inode *, struct file *), - int (*releasefunc)(struct inode *, struct file *)) +static struct proc_dir_entry +*sn_procfs_create_entry(const char *name, struct proc_dir_entry *parent, + int (*openfunc)(struct inode *, struct file *), + int (*releasefunc)(struct inode *, struct file *)) { struct proc_dir_entry *e = create_proc_entry(name, 0444, parent); @@ -126,24 +126,24 @@ void register_sn_procfs(void) return; sn_procfs_create_entry("partition_id", sgi_proc_dir, - partition_id_open, single_release); + partition_id_open, single_release); sn_procfs_create_entry("system_serial_number", sgi_proc_dir, - system_serial_number_open, single_release); + system_serial_number_open, single_release); sn_procfs_create_entry("licenseID", sgi_proc_dir, - licenseID_open, single_release); + licenseID_open, single_release); e = sn_procfs_create_entry("sn_force_interrupt", sgi_proc_dir, - sn_force_interrupt_open, single_release); + sn_force_interrupt_open, single_release); if (e) e->proc_fops->write = sn_force_interrupt_write_proc; sn_procfs_create_entry("coherence_id", sgi_proc_dir, - coherence_id_open, single_release); + coherence_id_open, single_release); sn_procfs_create_entry("sn_topology", sgi_proc_dir, - sn_topology_open, sn_topology_release); + sn_topology_open, sn_topology_release); } #endif /* CONFIG_PROC_FS */ diff --git a/arch/ia64/sn/kernel/sn2/timer.c b/arch/ia64/sn/kernel/sn2/timer.c index deb9baf..56a88b6 100644 --- a/arch/ia64/sn/kernel/sn2/timer.c +++ b/arch/ia64/sn/kernel/sn2/timer.c @@ -14,6 +14,7 @@ #include <asm/hw_irq.h> #include <asm/system.h> +#include <asm/timex.h> #include <asm/sn/leds.h> #include <asm/sn/shub_mmr.h> @@ -28,9 +29,27 @@ static struct time_interpolator sn2_interpolator = { .source = TIME_SOURCE_MMIO64 }; +/* + * sn udelay uses the RTC instead of the ITC because the ITC is not + * synchronized across all CPUs, and the thread may migrate to another CPU + * if preemption is enabled. + */ +static void +ia64_sn_udelay (unsigned long usecs) +{ + unsigned long start = rtc_time(); + unsigned long end = start + + usecs * sn_rtc_cycles_per_second / 1000000; + + while (time_before((unsigned long)rtc_time(), end)) + cpu_relax(); +} + void __init sn_timer_init(void) { sn2_interpolator.frequency = sn_rtc_cycles_per_second; sn2_interpolator.addr = RTC_COUNTER_ADDR; register_time_interpolator(&sn2_interpolator); + + ia64_udelay = &ia64_sn_udelay; } diff --git a/arch/ia64/sn/kernel/sn2/timer_interrupt.c b/arch/ia64/sn/kernel/sn2/timer_interrupt.c index adf5db2..fa7f699 100644 --- a/arch/ia64/sn/kernel/sn2/timer_interrupt.c +++ b/arch/ia64/sn/kernel/sn2/timer_interrupt.c @@ -1,7 +1,7 @@ /* * * - * Copyright (c) 2005 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2005, 2006 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License @@ -22,11 +22,6 @@ * License along with this program; if not, write the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. * - * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, - * Mountain View, CA 94043, or: - * - * http://www.sgi.com - * * For further information regarding this notice, see: * * http://oss.sgi.com/projects/GenInfo/NoticeExplan diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c index 768c21d..99cb28e 100644 --- a/arch/ia64/sn/kernel/tiocx.c +++ b/arch/ia64/sn/kernel/tiocx.c @@ -11,6 +11,7 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/proc_fs.h> +#include <linux/capability.h> #include <linux/device.h> #include <linux/delay.h> #include <asm/system.h> @@ -76,12 +77,6 @@ static void tiocx_bus_release(struct device *dev) kfree(to_cx_dev(dev)); } -struct bus_type tiocx_bus_type = { - .name = "tiocx", - .match = tiocx_match, - .uevent = tiocx_uevent, -}; - /** * cx_device_match - Find cx_device in the id table. * @ids: id table from driver @@ -148,6 +143,14 @@ static int cx_driver_remove(struct device *dev) return 0; } +struct bus_type tiocx_bus_type = { + .name = "tiocx", + .match = tiocx_match, + .uevent = tiocx_uevent, + .probe = cx_device_probe, + .remove = cx_driver_remove, +}; + /** * cx_driver_register - Register the driver. * @cx_driver: driver table (cx_drv struct) from driver @@ -161,8 +164,6 @@ int cx_driver_register(struct cx_drv *cx_driver) { cx_driver->driver.name = cx_driver->name; cx_driver->driver.bus = &tiocx_bus_type; - cx_driver->driver.probe = cx_device_probe; - cx_driver->driver.remove = cx_driver_remove; return driver_register(&cx_driver->driver); } @@ -244,7 +245,7 @@ static int cx_device_reload(struct cx_dev *cx_dev) cx_dev->bt); } -static inline uint64_t tiocx_intr_alloc(nasid_t nasid, int widget, +static inline u64 tiocx_intr_alloc(nasid_t nasid, int widget, u64 sn_irq_info, int req_irq, nasid_t req_nasid, int req_slice) @@ -283,12 +284,10 @@ struct sn_irq_info *tiocx_irq_alloc(nasid_t nasid, int widget, int irq, if ((nasid & 1) == 0) return NULL; - sn_irq_info = kmalloc(sn_irq_size, GFP_KERNEL); + sn_irq_info = kzalloc(sn_irq_size, GFP_KERNEL); if (sn_irq_info == NULL) return NULL; - memset(sn_irq_info, 0x0, sn_irq_size); - status = tiocx_intr_alloc(nasid, widget, __pa(sn_irq_info), irq, req_nasid, slice); if (status) { @@ -301,7 +300,7 @@ struct sn_irq_info *tiocx_irq_alloc(nasid_t nasid, int widget, int irq, void tiocx_irq_free(struct sn_irq_info *sn_irq_info) { - uint64_t bridge = (uint64_t) sn_irq_info->irq_bridge; + u64 bridge = (u64) sn_irq_info->irq_bridge; nasid_t nasid = NASID_GET(bridge); int widget; @@ -312,12 +311,12 @@ void tiocx_irq_free(struct sn_irq_info *sn_irq_info) } } -uint64_t tiocx_dma_addr(uint64_t addr) +u64 tiocx_dma_addr(u64 addr) { return PHYS_TO_TIODMA(addr); } -uint64_t tiocx_swin_base(int nasid) +u64 tiocx_swin_base(int nasid) { return TIO_SWIN_BASE(nasid, TIOCX_CORELET); } @@ -334,8 +333,8 @@ EXPORT_SYMBOL(tiocx_swin_base); static void tio_conveyor_set(nasid_t nasid, int enable_flag) { - uint64_t ice_frz; - uint64_t disable_cb = (1ull << 61); + u64 ice_frz; + u64 disable_cb = (1ull << 61); if (!(nasid & 1)) return; @@ -387,7 +386,7 @@ static int is_fpga_tio(int nasid, int *bt) static int bitstream_loaded(nasid_t nasid) { - uint64_t cx_credits; + u64 cx_credits; cx_credits = REMOTE_HUB_L(nasid, TIO_ICE_PMI_TX_DYN_CREDIT_STAT_CB3); cx_credits &= TIO_ICE_PMI_TX_DYN_CREDIT_STAT_CB3_CREDIT_CNT_MASK; @@ -403,14 +402,14 @@ static int tiocx_reload(struct cx_dev *cx_dev) nasid_t nasid = cx_dev->cx_id.nasid; if (bitstream_loaded(nasid)) { - uint64_t cx_id; + u64 cx_id; int rv; rv = ia64_sn_sysctl_tio_clock_reset(nasid); if (rv) { printk(KERN_ALERT "CX port JTAG reset failed.\n"); } else { - cx_id = *(volatile uint64_t *) + cx_id = *(volatile u64 *) (TIO_SWIN_BASE(nasid, TIOCX_CORELET) + WIDGET_ID); part_num = XWIDGET_PART_NUM(cx_id); @@ -485,7 +484,7 @@ static int __init tiocx_init(void) int found_tiocx_device = 0; if (!ia64_platform_is("sn2")) - return -ENODEV; + return 0; bus_register(&tiocx_bus_type); diff --git a/arch/ia64/sn/kernel/xp_main.c b/arch/ia64/sn/kernel/xp_main.c index 3be52a3..b7ea466 100644 --- a/arch/ia64/sn/kernel/xp_main.c +++ b/arch/ia64/sn/kernel/xp_main.c @@ -19,6 +19,7 @@ #include <linux/kernel.h> #include <linux/interrupt.h> #include <linux/module.h> +#include <linux/mutex.h> #include <asm/sn/intr.h> #include <asm/sn/sn_sal.h> #include <asm/sn/xp.h> @@ -136,13 +137,13 @@ xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size, registration = &xpc_registrations[ch_number]; - if (down_interruptible(®istration->sema) != 0) { + if (mutex_lock_interruptible(®istration->mutex) != 0) { return xpcInterrupted; } /* if XPC_CHANNEL_REGISTERED(ch_number) */ if (registration->func != NULL) { - up(®istration->sema); + mutex_unlock(®istration->mutex); return xpcAlreadyRegistered; } @@ -154,7 +155,7 @@ xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size, registration->key = key; registration->func = func; - up(®istration->sema); + mutex_unlock(®istration->mutex); xpc_interface.connect(ch_number); @@ -190,11 +191,11 @@ xpc_disconnect(int ch_number) * figured XPC's users will just turn around and call xpc_disconnect() * again anyways, so we might as well wait, if need be. */ - down(®istration->sema); + mutex_lock(®istration->mutex); /* if !XPC_CHANNEL_REGISTERED(ch_number) */ if (registration->func == NULL) { - up(®istration->sema); + mutex_unlock(®istration->mutex); return; } @@ -208,7 +209,7 @@ xpc_disconnect(int ch_number) xpc_interface.disconnect(ch_number); - up(®istration->sema); + mutex_unlock(®istration->mutex); return; } @@ -250,9 +251,9 @@ xp_init(void) xp_nofault_PIOR_target = SH1_IPI_ACCESS; } - /* initialize the connection registration semaphores */ + /* initialize the connection registration mutex */ for (ch_number = 0; ch_number < XPC_NCHANNELS; ch_number++) { - sema_init(&xpc_registrations[ch_number].sema, 1); /* mutex */ + mutex_init(&xpc_registrations[ch_number].mutex); } return 0; diff --git a/arch/ia64/sn/kernel/xpc.h b/arch/ia64/sn/kernel/xpc.h deleted file mode 100644 index 5483a9f..0000000 --- a/arch/ia64/sn/kernel/xpc.h +++ /dev/null @@ -1,1273 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved. - */ - - -/* - * Cross Partition Communication (XPC) structures and macros. - */ - -#ifndef _IA64_SN_KERNEL_XPC_H -#define _IA64_SN_KERNEL_XPC_H - - -#include <linux/config.h> -#include <linux/interrupt.h> -#include <linux/sysctl.h> -#include <linux/device.h> -#include <asm/pgtable.h> -#include <asm/processor.h> -#include <asm/sn/bte.h> -#include <asm/sn/clksupport.h> -#include <asm/sn/addrs.h> -#include <asm/sn/mspec.h> -#include <asm/sn/shub_mmr.h> -#include <asm/sn/xp.h> - - -/* - * XPC Version numbers consist of a major and minor number. XPC can always - * talk to versions with same major #, and never talk to versions with a - * different major #. - */ -#define _XPC_VERSION(_maj, _min) (((_maj) << 4) | ((_min) & 0xf)) -#define XPC_VERSION_MAJOR(_v) ((_v) >> 4) -#define XPC_VERSION_MINOR(_v) ((_v) & 0xf) - - -/* - * The next macros define word or bit representations for given - * C-brick nasid in either the SAL provided bit array representing - * nasids in the partition/machine or the AMO_t array used for - * inter-partition initiation communications. - * - * For SN2 machines, C-Bricks are alway even numbered NASIDs. As - * such, some space will be saved by insisting that nasid information - * passed from SAL always be packed for C-Bricks and the - * cross-partition interrupts use the same packing scheme. - */ -#define XPC_NASID_W_INDEX(_n) (((_n) / 64) / 2) -#define XPC_NASID_B_INDEX(_n) (((_n) / 2) & (64 - 1)) -#define XPC_NASID_IN_ARRAY(_n, _p) ((_p)[XPC_NASID_W_INDEX(_n)] & \ - (1UL << XPC_NASID_B_INDEX(_n))) -#define XPC_NASID_FROM_W_B(_w, _b) (((_w) * 64 + (_b)) * 2) - -#define XPC_HB_DEFAULT_INTERVAL 5 /* incr HB every x secs */ -#define XPC_HB_CHECK_DEFAULT_INTERVAL 20 /* check HB every x secs */ - -/* define the process name of HB checker and the CPU it is pinned to */ -#define XPC_HB_CHECK_THREAD_NAME "xpc_hb" -#define XPC_HB_CHECK_CPU 0 - -/* define the process name of the discovery thread */ -#define XPC_DISCOVERY_THREAD_NAME "xpc_discovery" - - -/* - * the reserved page - * - * SAL reserves one page of memory per partition for XPC. Though a full page - * in length (16384 bytes), its starting address is not page aligned, but it - * is cacheline aligned. The reserved page consists of the following: - * - * reserved page header - * - * The first cacheline of the reserved page contains the header - * (struct xpc_rsvd_page). Before SAL initialization has completed, - * SAL has set up the following fields of the reserved page header: - * SAL_signature, SAL_version, partid, and nasids_size. The other - * fields are set up by XPC. (xpc_rsvd_page points to the local - * partition's reserved page.) - * - * part_nasids mask - * mach_nasids mask - * - * SAL also sets up two bitmaps (or masks), one that reflects the actual - * nasids in this partition (part_nasids), and the other that reflects - * the actual nasids in the entire machine (mach_nasids). We're only - * interested in the even numbered nasids (which contain the processors - * and/or memory), so we only need half as many bits to represent the - * nasids. The part_nasids mask is located starting at the first cacheline - * following the reserved page header. The mach_nasids mask follows right - * after the part_nasids mask. The size in bytes of each mask is reflected - * by the reserved page header field 'nasids_size'. (Local partition's - * mask pointers are xpc_part_nasids and xpc_mach_nasids.) - * - * vars - * vars part - * - * Immediately following the mach_nasids mask are the XPC variables - * required by other partitions. First are those that are generic to all - * partitions (vars), followed on the next available cacheline by those - * which are partition specific (vars part). These are setup by XPC. - * (Local partition's vars pointers are xpc_vars and xpc_vars_part.) - * - * Note: Until vars_pa is set, the partition XPC code has not been initialized. - */ -struct xpc_rsvd_page { - u64 SAL_signature; /* SAL: unique signature */ - u64 SAL_version; /* SAL: version */ - u8 partid; /* SAL: partition ID */ - u8 version; - u8 pad1[6]; /* align to next u64 in cacheline */ - volatile u64 vars_pa; - struct timespec stamp; /* time when reserved page was setup by XPC */ - u64 pad2[9]; /* align to last u64 in cacheline */ - u64 nasids_size; /* SAL: size of each nasid mask in bytes */ -}; - -#define XPC_RP_VERSION _XPC_VERSION(1,1) /* version 1.1 of the reserved page */ - -#define XPC_SUPPORTS_RP_STAMP(_version) \ - (_version >= _XPC_VERSION(1,1)) - -/* - * compare stamps - the return value is: - * - * < 0, if stamp1 < stamp2 - * = 0, if stamp1 == stamp2 - * > 0, if stamp1 > stamp2 - */ -static inline int -xpc_compare_stamps(struct timespec *stamp1, struct timespec *stamp2) -{ - int ret; - - - if ((ret = stamp1->tv_sec - stamp2->tv_sec) == 0) { - ret = stamp1->tv_nsec - stamp2->tv_nsec; - } - return ret; -} - - -/* - * Define the structures by which XPC variables can be exported to other - * partitions. (There are two: struct xpc_vars and struct xpc_vars_part) - */ - -/* - * The following structure describes the partition generic variables - * needed by other partitions in order to properly initialize. - * - * struct xpc_vars version number also applies to struct xpc_vars_part. - * Changes to either structure and/or related functionality should be - * reflected by incrementing either the major or minor version numbers - * of struct xpc_vars. - */ -struct xpc_vars { - u8 version; - u64 heartbeat; - u64 heartbeating_to_mask; - u64 heartbeat_offline; /* if 0, heartbeat should be changing */ - int act_nasid; - int act_phys_cpuid; - u64 vars_part_pa; - u64 amos_page_pa; /* paddr of page of AMOs from MSPEC driver */ - AMO_t *amos_page; /* vaddr of page of AMOs from MSPEC driver */ -}; - -#define XPC_V_VERSION _XPC_VERSION(3,1) /* version 3.1 of the cross vars */ - -#define XPC_SUPPORTS_DISENGAGE_REQUEST(_version) \ - (_version >= _XPC_VERSION(3,1)) - - -static inline int -xpc_hb_allowed(partid_t partid, struct xpc_vars *vars) -{ - return ((vars->heartbeating_to_mask & (1UL << partid)) != 0); -} - -static inline void -xpc_allow_hb(partid_t partid, struct xpc_vars *vars) -{ - u64 old_mask, new_mask; - - do { - old_mask = vars->heartbeating_to_mask; - new_mask = (old_mask | (1UL << partid)); - } while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) != - old_mask); -} - -static inline void -xpc_disallow_hb(partid_t partid, struct xpc_vars *vars) -{ - u64 old_mask, new_mask; - - do { - old_mask = vars->heartbeating_to_mask; - new_mask = (old_mask & ~(1UL << partid)); - } while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) != - old_mask); -} - - -/* - * The AMOs page consists of a number of AMO variables which are divided into - * four groups, The first two groups are used to identify an IRQ's sender. - * These two groups consist of 64 and 128 AMO variables respectively. The last - * two groups, consisting of just one AMO variable each, are used to identify - * the remote partitions that are currently engaged (from the viewpoint of - * the XPC running on the remote partition). - */ -#define XPC_NOTIFY_IRQ_AMOS 0 -#define XPC_ACTIVATE_IRQ_AMOS (XPC_NOTIFY_IRQ_AMOS + XP_MAX_PARTITIONS) -#define XPC_ENGAGED_PARTITIONS_AMO (XPC_ACTIVATE_IRQ_AMOS + XP_NASID_MASK_WORDS) -#define XPC_DISENGAGE_REQUEST_AMO (XPC_ENGAGED_PARTITIONS_AMO + 1) - - -/* - * The following structure describes the per partition specific variables. - * - * An array of these structures, one per partition, will be defined. As a - * partition becomes active XPC will copy the array entry corresponding to - * itself from that partition. It is desirable that the size of this - * structure evenly divide into a cacheline, such that none of the entries - * in this array crosses a cacheline boundary. As it is now, each entry - * occupies half a cacheline. - */ -struct xpc_vars_part { - volatile u64 magic; - - u64 openclose_args_pa; /* physical address of open and close args */ - u64 GPs_pa; /* physical address of Get/Put values */ - - u64 IPI_amo_pa; /* physical address of IPI AMO_t structure */ - int IPI_nasid; /* nasid of where to send IPIs */ - int IPI_phys_cpuid; /* physical CPU ID of where to send IPIs */ - - u8 nchannels; /* #of defined channels supported */ - - u8 reserved[23]; /* pad to a full 64 bytes */ -}; - -/* - * The vars_part MAGIC numbers play a part in the first contact protocol. - * - * MAGIC1 indicates that the per partition specific variables for a remote - * partition have been initialized by this partition. - * - * MAGIC2 indicates that this partition has pulled the remote partititions - * per partition variables that pertain to this partition. - */ -#define XPC_VP_MAGIC1 0x0053524156435058L /* 'XPCVARS\0'L (little endian) */ -#define XPC_VP_MAGIC2 0x0073726176435058L /* 'XPCvars\0'L (little endian) */ - - -/* the reserved page sizes and offsets */ - -#define XPC_RP_HEADER_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page)) -#define XPC_RP_VARS_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_vars)) - -#define XPC_RP_PART_NASIDS(_rp) (u64 *) ((u8 *) _rp + XPC_RP_HEADER_SIZE) -#define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + xp_nasid_mask_words) -#define XPC_RP_VARS(_rp) ((struct xpc_vars *) XPC_RP_MACH_NASIDS(_rp) + xp_nasid_mask_words) -#define XPC_RP_VARS_PART(_rp) (struct xpc_vars_part *) ((u8 *) XPC_RP_VARS(rp) + XPC_RP_VARS_SIZE) - - -/* - * Functions registered by add_timer() or called by kernel_thread() only - * allow for a single 64-bit argument. The following macros can be used to - * pack and unpack two (32-bit, 16-bit or 8-bit) arguments into or out from - * the passed argument. - */ -#define XPC_PACK_ARGS(_arg1, _arg2) \ - ((((u64) _arg1) & 0xffffffff) | \ - ((((u64) _arg2) & 0xffffffff) << 32)) - -#define XPC_UNPACK_ARG1(_args) (((u64) _args) & 0xffffffff) -#define XPC_UNPACK_ARG2(_args) ((((u64) _args) >> 32) & 0xffffffff) - - - -/* - * Define a Get/Put value pair (pointers) used with a message queue. - */ -struct xpc_gp { - volatile s64 get; /* Get value */ - volatile s64 put; /* Put value */ -}; - -#define XPC_GP_SIZE \ - L1_CACHE_ALIGN(sizeof(struct xpc_gp) * XPC_NCHANNELS) - - - -/* - * Define a structure that contains arguments associated with opening and - * closing a channel. - */ -struct xpc_openclose_args { - u16 reason; /* reason why channel is closing */ - u16 msg_size; /* sizeof each message entry */ - u16 remote_nentries; /* #of message entries in remote msg queue */ - u16 local_nentries; /* #of message entries in local msg queue */ - u64 local_msgqueue_pa; /* physical address of local message queue */ -}; - -#define XPC_OPENCLOSE_ARGS_SIZE \ - L1_CACHE_ALIGN(sizeof(struct xpc_openclose_args) * XPC_NCHANNELS) - - - -/* struct xpc_msg flags */ - -#define XPC_M_DONE 0x01 /* msg has been received/consumed */ -#define XPC_M_READY 0x02 /* msg is ready to be sent */ -#define XPC_M_INTERRUPT 0x04 /* send interrupt when msg consumed */ - - -#define XPC_MSG_ADDRESS(_payload) \ - ((struct xpc_msg *)((u8 *)(_payload) - XPC_MSG_PAYLOAD_OFFSET)) - - - -/* - * Defines notify entry. - * - * This is used to notify a message's sender that their message was received - * and consumed by the intended recipient. - */ -struct xpc_notify { - struct semaphore sema; /* notify semaphore */ - volatile u8 type; /* type of notification */ - - /* the following two fields are only used if type == XPC_N_CALL */ - xpc_notify_func func; /* user's notify function */ - void *key; /* pointer to user's key */ -}; - -/* struct xpc_notify type of notification */ - -#define XPC_N_CALL 0x01 /* notify function provided by user */ - - - -/* - * Define the structure that manages all the stuff required by a channel. In - * particular, they are used to manage the messages sent across the channel. - * - * This structure is private to a partition, and is NOT shared across the - * partition boundary. - * - * There is an array of these structures for each remote partition. It is - * allocated at the time a partition becomes active. The array contains one - * of these structures for each potential channel connection to that partition. - * - * Each of these structures manages two message queues (circular buffers). - * They are allocated at the time a channel connection is made. One of - * these message queues (local_msgqueue) holds the locally created messages - * that are destined for the remote partition. The other of these message - * queues (remote_msgqueue) is a locally cached copy of the remote partition's - * own local_msgqueue. - * - * The following is a description of the Get/Put pointers used to manage these - * two message queues. Consider the local_msgqueue to be on one partition - * and the remote_msgqueue to be its cached copy on another partition. A - * description of what each of the lettered areas contains is included. - * - * - * local_msgqueue remote_msgqueue - * - * |/////////| |/////////| - * w_remote_GP.get --> +---------+ |/////////| - * | F | |/////////| - * remote_GP.get --> +---------+ +---------+ <-- local_GP->get - * | | | | - * | | | E | - * | | | | - * | | +---------+ <-- w_local_GP.get - * | B | |/////////| - * | | |////D////| - * | | |/////////| - * | | +---------+ <-- w_remote_GP.put - * | | |////C////| - * local_GP->put --> +---------+ +---------+ <-- remote_GP.put - * | | |/////////| - * | A | |/////////| - * | | |/////////| - * w_local_GP.put --> +---------+ |/////////| - * |/////////| |/////////| - * - * - * ( remote_GP.[get|put] are cached copies of the remote - * partition's local_GP->[get|put], and thus their values can - * lag behind their counterparts on the remote partition. ) - * - * - * A - Messages that have been allocated, but have not yet been sent to the - * remote partition. - * - * B - Messages that have been sent, but have not yet been acknowledged by the - * remote partition as having been received. - * - * C - Area that needs to be prepared for the copying of sent messages, by - * the clearing of the message flags of any previously received messages. - * - * D - Area into which sent messages are to be copied from the remote - * partition's local_msgqueue and then delivered to their intended - * recipients. [ To allow for a multi-message copy, another pointer - * (next_msg_to_pull) has been added to keep track of the next message - * number needing to be copied (pulled). It chases after w_remote_GP.put. - * Any messages lying between w_local_GP.get and next_msg_to_pull have - * been copied and are ready to be delivered. ] - * - * E - Messages that have been copied and delivered, but have not yet been - * acknowledged by the recipient as having been received. - * - * F - Messages that have been acknowledged, but XPC has not yet notified the - * sender that the message was received by its intended recipient. - * This is also an area that needs to be prepared for the allocating of - * new messages, by the clearing of the message flags of the acknowledged - * messages. - */ -struct xpc_channel { - partid_t partid; /* ID of remote partition connected */ - spinlock_t lock; /* lock for updating this structure */ - u32 flags; /* general flags */ - - enum xpc_retval reason; /* reason why channel is disconnect'g */ - int reason_line; /* line# disconnect initiated from */ - - u16 number; /* channel # */ - - u16 msg_size; /* sizeof each msg entry */ - u16 local_nentries; /* #of msg entries in local msg queue */ - u16 remote_nentries; /* #of msg entries in remote msg queue*/ - - void *local_msgqueue_base; /* base address of kmalloc'd space */ - struct xpc_msg *local_msgqueue; /* local message queue */ - void *remote_msgqueue_base; /* base address of kmalloc'd space */ - struct xpc_msg *remote_msgqueue;/* cached copy of remote partition's */ - /* local message queue */ - u64 remote_msgqueue_pa; /* phys addr of remote partition's */ - /* local message queue */ - - atomic_t references; /* #of external references to queues */ - - atomic_t n_on_msg_allocate_wq; /* #on msg allocation wait queue */ - wait_queue_head_t msg_allocate_wq; /* msg allocation wait queue */ - - u8 delayed_IPI_flags; /* IPI flags received, but delayed */ - /* action until channel disconnected */ - - /* queue of msg senders who want to be notified when msg received */ - - atomic_t n_to_notify; /* #of msg senders to notify */ - struct xpc_notify *notify_queue;/* notify queue for messages sent */ - - xpc_channel_func func; /* user's channel function */ - void *key; /* pointer to user's key */ - - struct semaphore msg_to_pull_sema; /* next msg to pull serialization */ - struct semaphore wdisconnect_sema; /* wait for channel disconnect */ - - struct xpc_openclose_args *local_openclose_args; /* args passed on */ - /* opening or closing of channel */ - - /* various flavors of local and remote Get/Put values */ - - struct xpc_gp *local_GP; /* local Get/Put values */ - struct xpc_gp remote_GP; /* remote Get/Put values */ - struct xpc_gp w_local_GP; /* working local Get/Put values */ - struct xpc_gp w_remote_GP; /* working remote Get/Put values */ - s64 next_msg_to_pull; /* Put value of next msg to pull */ - - /* kthread management related fields */ - -// >>> rethink having kthreads_assigned_limit and kthreads_idle_limit; perhaps -// >>> allow the assigned limit be unbounded and let the idle limit be dynamic -// >>> dependent on activity over the last interval of time - atomic_t kthreads_assigned; /* #of kthreads assigned to channel */ - u32 kthreads_assigned_limit; /* limit on #of kthreads assigned */ - atomic_t kthreads_idle; /* #of kthreads idle waiting for work */ - u32 kthreads_idle_limit; /* limit on #of kthreads idle */ - atomic_t kthreads_active; /* #of kthreads actively working */ - // >>> following field is temporary - u32 kthreads_created; /* total #of kthreads created */ - - wait_queue_head_t idle_wq; /* idle kthread wait queue */ - -} ____cacheline_aligned; - - -/* struct xpc_channel flags */ - -#define XPC_C_WASCONNECTED 0x00000001 /* channel was connected */ - -#define XPC_C_ROPENREPLY 0x00000002 /* remote open channel reply */ -#define XPC_C_OPENREPLY 0x00000004 /* local open channel reply */ -#define XPC_C_ROPENREQUEST 0x00000008 /* remote open channel request */ -#define XPC_C_OPENREQUEST 0x00000010 /* local open channel request */ - -#define XPC_C_SETUP 0x00000020 /* channel's msgqueues are alloc'd */ -#define XPC_C_CONNECTCALLOUT 0x00000040 /* channel connected callout made */ -#define XPC_C_CONNECTED 0x00000080 /* local channel is connected */ -#define XPC_C_CONNECTING 0x00000100 /* channel is being connected */ - -#define XPC_C_RCLOSEREPLY 0x00000200 /* remote close channel reply */ -#define XPC_C_CLOSEREPLY 0x00000400 /* local close channel reply */ -#define XPC_C_RCLOSEREQUEST 0x00000800 /* remote close channel request */ -#define XPC_C_CLOSEREQUEST 0x00001000 /* local close channel request */ - -#define XPC_C_DISCONNECTED 0x00002000 /* channel is disconnected */ -#define XPC_C_DISCONNECTING 0x00004000 /* channel is being disconnected */ -#define XPC_C_DISCONNECTCALLOUT 0x00008000 /* chan disconnected callout made */ -#define XPC_C_WDISCONNECT 0x00010000 /* waiting for channel disconnect */ - - - -/* - * Manages channels on a partition basis. There is one of these structures - * for each partition (a partition will never utilize the structure that - * represents itself). - */ -struct xpc_partition { - - /* XPC HB infrastructure */ - - u8 remote_rp_version; /* version# of partition's rsvd pg */ - struct timespec remote_rp_stamp;/* time when rsvd pg was initialized */ - u64 remote_rp_pa; /* phys addr of partition's rsvd pg */ - u64 remote_vars_pa; /* phys addr of partition's vars */ - u64 remote_vars_part_pa; /* phys addr of partition's vars part */ - u64 last_heartbeat; /* HB at last read */ - u64 remote_amos_page_pa; /* phys addr of partition's amos page */ - int remote_act_nasid; /* active part's act/deact nasid */ - int remote_act_phys_cpuid; /* active part's act/deact phys cpuid */ - u32 act_IRQ_rcvd; /* IRQs since activation */ - spinlock_t act_lock; /* protect updating of act_state */ - u8 act_state; /* from XPC HB viewpoint */ - u8 remote_vars_version; /* version# of partition's vars */ - enum xpc_retval reason; /* reason partition is deactivating */ - int reason_line; /* line# deactivation initiated from */ - int reactivate_nasid; /* nasid in partition to reactivate */ - - unsigned long disengage_request_timeout; /* timeout in jiffies */ - struct timer_list disengage_request_timer; - - - /* XPC infrastructure referencing and teardown control */ - - volatile u8 setup_state; /* infrastructure setup state */ - wait_queue_head_t teardown_wq; /* kthread waiting to teardown infra */ - atomic_t references; /* #of references to infrastructure */ - - - /* - * NONE OF THE PRECEDING FIELDS OF THIS STRUCTURE WILL BE CLEARED WHEN - * XPC SETS UP THE NECESSARY INFRASTRUCTURE TO SUPPORT CROSS PARTITION - * COMMUNICATION. ALL OF THE FOLLOWING FIELDS WILL BE CLEARED. (THE - * 'nchannels' FIELD MUST BE THE FIRST OF THE FIELDS TO BE CLEARED.) - */ - - - u8 nchannels; /* #of defined channels supported */ - atomic_t nchannels_active; /* #of channels that are not DISCONNECTED */ - atomic_t nchannels_engaged;/* #of channels engaged with remote part */ - struct xpc_channel *channels;/* array of channel structures */ - - void *local_GPs_base; /* base address of kmalloc'd space */ - struct xpc_gp *local_GPs; /* local Get/Put values */ - void *remote_GPs_base; /* base address of kmalloc'd space */ - struct xpc_gp *remote_GPs;/* copy of remote partition's local Get/Put */ - /* values */ - u64 remote_GPs_pa; /* phys address of remote partition's local */ - /* Get/Put values */ - - - /* fields used to pass args when opening or closing a channel */ - - void *local_openclose_args_base; /* base address of kmalloc'd space */ - struct xpc_openclose_args *local_openclose_args; /* local's args */ - void *remote_openclose_args_base; /* base address of kmalloc'd space */ - struct xpc_openclose_args *remote_openclose_args; /* copy of remote's */ - /* args */ - u64 remote_openclose_args_pa; /* phys addr of remote's args */ - - - /* IPI sending, receiving and handling related fields */ - - int remote_IPI_nasid; /* nasid of where to send IPIs */ - int remote_IPI_phys_cpuid; /* phys CPU ID of where to send IPIs */ - AMO_t *remote_IPI_amo_va; /* address of remote IPI AMO_t structure */ - - AMO_t *local_IPI_amo_va; /* address of IPI AMO_t structure */ - u64 local_IPI_amo; /* IPI amo flags yet to be handled */ - char IPI_owner[8]; /* IPI owner's name */ - struct timer_list dropped_IPI_timer; /* dropped IPI timer */ - - spinlock_t IPI_lock; /* IPI handler lock */ - - - /* channel manager related fields */ - - atomic_t channel_mgr_requests; /* #of requests to activate chan mgr */ - wait_queue_head_t channel_mgr_wq; /* channel mgr's wait queue */ - -} ____cacheline_aligned; - - -/* struct xpc_partition act_state values (for XPC HB) */ - -#define XPC_P_INACTIVE 0x00 /* partition is not active */ -#define XPC_P_ACTIVATION_REQ 0x01 /* created thread to activate */ -#define XPC_P_ACTIVATING 0x02 /* activation thread started */ -#define XPC_P_ACTIVE 0x03 /* xpc_partition_up() was called */ -#define XPC_P_DEACTIVATING 0x04 /* partition deactivation initiated */ - - -#define XPC_DEACTIVATE_PARTITION(_p, _reason) \ - xpc_deactivate_partition(__LINE__, (_p), (_reason)) - - -/* struct xpc_partition setup_state values */ - -#define XPC_P_UNSET 0x00 /* infrastructure was never setup */ -#define XPC_P_SETUP 0x01 /* infrastructure is setup */ -#define XPC_P_WTEARDOWN 0x02 /* waiting to teardown infrastructure */ -#define XPC_P_TORNDOWN 0x03 /* infrastructure is torndown */ - - - -/* - * struct xpc_partition IPI_timer #of seconds to wait before checking for - * dropped IPIs. These occur whenever an IPI amo write doesn't complete until - * after the IPI was received. - */ -#define XPC_P_DROPPED_IPI_WAIT (0.25 * HZ) - - -/* number of seconds to wait for other partitions to disengage */ -#define XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT 90 - -/* interval in seconds to print 'waiting disengagement' messages */ -#define XPC_DISENGAGE_PRINTMSG_INTERVAL 10 - - -#define XPC_PARTID(_p) ((partid_t) ((_p) - &xpc_partitions[0])) - - - -/* found in xp_main.c */ -extern struct xpc_registration xpc_registrations[]; - - -/* found in xpc_main.c */ -extern struct device *xpc_part; -extern struct device *xpc_chan; -extern int xpc_disengage_request_timelimit; -extern irqreturn_t xpc_notify_IRQ_handler(int, void *, struct pt_regs *); -extern void xpc_dropped_IPI_check(struct xpc_partition *); -extern void xpc_activate_partition(struct xpc_partition *); -extern void xpc_activate_kthreads(struct xpc_channel *, int); -extern void xpc_create_kthreads(struct xpc_channel *, int); -extern void xpc_disconnect_wait(int); - - -/* found in xpc_partition.c */ -extern int xpc_exiting; -extern struct xpc_vars *xpc_vars; -extern struct xpc_rsvd_page *xpc_rsvd_page; -extern struct xpc_vars_part *xpc_vars_part; -extern struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1]; -extern char xpc_remote_copy_buffer[]; -extern struct xpc_rsvd_page *xpc_rsvd_page_init(void); -extern void xpc_allow_IPI_ops(void); -extern void xpc_restrict_IPI_ops(void); -extern int xpc_identify_act_IRQ_sender(void); -extern int xpc_partition_disengaged(struct xpc_partition *); -extern enum xpc_retval xpc_mark_partition_active(struct xpc_partition *); -extern void xpc_mark_partition_inactive(struct xpc_partition *); -extern void xpc_discovery(void); -extern void xpc_check_remote_hb(void); -extern void xpc_deactivate_partition(const int, struct xpc_partition *, - enum xpc_retval); -extern enum xpc_retval xpc_initiate_partid_to_nasids(partid_t, void *); - - -/* found in xpc_channel.c */ -extern void xpc_initiate_connect(int); -extern void xpc_initiate_disconnect(int); -extern enum xpc_retval xpc_initiate_allocate(partid_t, int, u32, void **); -extern enum xpc_retval xpc_initiate_send(partid_t, int, void *); -extern enum xpc_retval xpc_initiate_send_notify(partid_t, int, void *, - xpc_notify_func, void *); -extern void xpc_initiate_received(partid_t, int, void *); -extern enum xpc_retval xpc_setup_infrastructure(struct xpc_partition *); -extern enum xpc_retval xpc_pull_remote_vars_part(struct xpc_partition *); -extern void xpc_process_channel_activity(struct xpc_partition *); -extern void xpc_connected_callout(struct xpc_channel *); -extern void xpc_deliver_msg(struct xpc_channel *); -extern void xpc_disconnect_channel(const int, struct xpc_channel *, - enum xpc_retval, unsigned long *); -extern void xpc_disconnecting_callout(struct xpc_channel *); -extern void xpc_partition_going_down(struct xpc_partition *, enum xpc_retval); -extern void xpc_teardown_infrastructure(struct xpc_partition *); - - - -static inline void -xpc_wakeup_channel_mgr(struct xpc_partition *part) -{ - if (atomic_inc_return(&part->channel_mgr_requests) == 1) { - wake_up(&part->channel_mgr_wq); - } -} - - - -/* - * These next two inlines are used to keep us from tearing down a channel's - * msg queues while a thread may be referencing them. - */ -static inline void -xpc_msgqueue_ref(struct xpc_channel *ch) -{ - atomic_inc(&ch->references); -} - -static inline void -xpc_msgqueue_deref(struct xpc_channel *ch) -{ - s32 refs = atomic_dec_return(&ch->references); - - DBUG_ON(refs < 0); - if (refs == 0) { - xpc_wakeup_channel_mgr(&xpc_partitions[ch->partid]); - } -} - - - -#define XPC_DISCONNECT_CHANNEL(_ch, _reason, _irqflgs) \ - xpc_disconnect_channel(__LINE__, _ch, _reason, _irqflgs) - - -/* - * These two inlines are used to keep us from tearing down a partition's - * setup infrastructure while a thread may be referencing it. - */ -static inline void -xpc_part_deref(struct xpc_partition *part) -{ - s32 refs = atomic_dec_return(&part->references); - - - DBUG_ON(refs < 0); - if (refs == 0 && part->setup_state == XPC_P_WTEARDOWN) { - wake_up(&part->teardown_wq); - } -} - -static inline int -xpc_part_ref(struct xpc_partition *part) -{ - int setup; - - - atomic_inc(&part->references); - setup = (part->setup_state == XPC_P_SETUP); - if (!setup) { - xpc_part_deref(part); - } - return setup; -} - - - -/* - * The following macro is to be used for the setting of the reason and - * reason_line fields in both the struct xpc_channel and struct xpc_partition - * structures. - */ -#define XPC_SET_REASON(_p, _reason, _line) \ - { \ - (_p)->reason = _reason; \ - (_p)->reason_line = _line; \ - } - - - -/* - * This next set of inlines are used to keep track of when a partition is - * potentially engaged in accessing memory belonging to another partition. - */ - -static inline void -xpc_mark_partition_engaged(struct xpc_partition *part) -{ - unsigned long irq_flags; - AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa + - (XPC_ENGAGED_PARTITIONS_AMO * sizeof(AMO_t))); - - - local_irq_save(irq_flags); - - /* set bit corresponding to our partid in remote partition's AMO */ - FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_OR, - (1UL << sn_partition_id)); - /* - * We must always use the nofault function regardless of whether we - * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we - * didn't, we'd never know that the other partition is down and would - * keep sending IPIs and AMOs to it until the heartbeat times out. - */ - (void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo-> - variable), xp_nofault_PIOR_target)); - - local_irq_restore(irq_flags); -} - -static inline void -xpc_mark_partition_disengaged(struct xpc_partition *part) -{ - unsigned long irq_flags; - AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa + - (XPC_ENGAGED_PARTITIONS_AMO * sizeof(AMO_t))); - - - local_irq_save(irq_flags); - - /* clear bit corresponding to our partid in remote partition's AMO */ - FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND, - ~(1UL << sn_partition_id)); - /* - * We must always use the nofault function regardless of whether we - * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we - * didn't, we'd never know that the other partition is down and would - * keep sending IPIs and AMOs to it until the heartbeat times out. - */ - (void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo-> - variable), xp_nofault_PIOR_target)); - - local_irq_restore(irq_flags); -} - -static inline void -xpc_request_partition_disengage(struct xpc_partition *part) -{ - unsigned long irq_flags; - AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa + - (XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t))); - - - local_irq_save(irq_flags); - - /* set bit corresponding to our partid in remote partition's AMO */ - FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_OR, - (1UL << sn_partition_id)); - /* - * We must always use the nofault function regardless of whether we - * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we - * didn't, we'd never know that the other partition is down and would - * keep sending IPIs and AMOs to it until the heartbeat times out. - */ - (void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo-> - variable), xp_nofault_PIOR_target)); - - local_irq_restore(irq_flags); -} - -static inline void -xpc_cancel_partition_disengage_request(struct xpc_partition *part) -{ - unsigned long irq_flags; - AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa + - (XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t))); - - - local_irq_save(irq_flags); - - /* clear bit corresponding to our partid in remote partition's AMO */ - FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND, - ~(1UL << sn_partition_id)); - /* - * We must always use the nofault function regardless of whether we - * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we - * didn't, we'd never know that the other partition is down and would - * keep sending IPIs and AMOs to it until the heartbeat times out. - */ - (void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo-> - variable), xp_nofault_PIOR_target)); - - local_irq_restore(irq_flags); -} - -static inline u64 -xpc_partition_engaged(u64 partid_mask) -{ - AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO; - - - /* return our partition's AMO variable ANDed with partid_mask */ - return (FETCHOP_LOAD_OP(TO_AMO((u64) &amo->variable), FETCHOP_LOAD) & - partid_mask); -} - -static inline u64 -xpc_partition_disengage_requested(u64 partid_mask) -{ - AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO; - - - /* return our partition's AMO variable ANDed with partid_mask */ - return (FETCHOP_LOAD_OP(TO_AMO((u64) &amo->variable), FETCHOP_LOAD) & - partid_mask); -} - -static inline void -xpc_clear_partition_engaged(u64 partid_mask) -{ - AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO; - - - /* clear bit(s) based on partid_mask in our partition's AMO */ - FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND, - ~partid_mask); -} - -static inline void -xpc_clear_partition_disengage_request(u64 partid_mask) -{ - AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO; - - - /* clear bit(s) based on partid_mask in our partition's AMO */ - FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND, - ~partid_mask); -} - - - -/* - * The following set of macros and inlines are used for the sending and - * receiving of IPIs (also known as IRQs). There are two flavors of IPIs, - * one that is associated with partition activity (SGI_XPC_ACTIVATE) and - * the other that is associated with channel activity (SGI_XPC_NOTIFY). - */ - -static inline u64 -xpc_IPI_receive(AMO_t *amo) -{ - return FETCHOP_LOAD_OP(TO_AMO((u64) &amo->variable), FETCHOP_CLEAR); -} - - -static inline enum xpc_retval -xpc_IPI_send(AMO_t *amo, u64 flag, int nasid, int phys_cpuid, int vector) -{ - int ret = 0; - unsigned long irq_flags; - - - local_irq_save(irq_flags); - - FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_OR, flag); - sn_send_IPI_phys(nasid, phys_cpuid, vector, 0); - - /* - * We must always use the nofault function regardless of whether we - * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we - * didn't, we'd never know that the other partition is down and would - * keep sending IPIs and AMOs to it until the heartbeat times out. - */ - ret = xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->variable), - xp_nofault_PIOR_target)); - - local_irq_restore(irq_flags); - - return ((ret == 0) ? xpcSuccess : xpcPioReadError); -} - - -/* - * IPIs associated with SGI_XPC_ACTIVATE IRQ. - */ - -/* - * Flag the appropriate AMO variable and send an IPI to the specified node. - */ -static inline void -xpc_activate_IRQ_send(u64 amos_page_pa, int from_nasid, int to_nasid, - int to_phys_cpuid) -{ - int w_index = XPC_NASID_W_INDEX(from_nasid); - int b_index = XPC_NASID_B_INDEX(from_nasid); - AMO_t *amos = (AMO_t *) __va(amos_page_pa + - (XPC_ACTIVATE_IRQ_AMOS * sizeof(AMO_t))); - - - (void) xpc_IPI_send(&amos[w_index], (1UL << b_index), to_nasid, - to_phys_cpuid, SGI_XPC_ACTIVATE); -} - -static inline void -xpc_IPI_send_activate(struct xpc_vars *vars) -{ - xpc_activate_IRQ_send(vars->amos_page_pa, cnodeid_to_nasid(0), - vars->act_nasid, vars->act_phys_cpuid); -} - -static inline void -xpc_IPI_send_activated(struct xpc_partition *part) -{ - xpc_activate_IRQ_send(part->remote_amos_page_pa, cnodeid_to_nasid(0), - part->remote_act_nasid, part->remote_act_phys_cpuid); -} - -static inline void -xpc_IPI_send_reactivate(struct xpc_partition *part) -{ - xpc_activate_IRQ_send(xpc_vars->amos_page_pa, part->reactivate_nasid, - xpc_vars->act_nasid, xpc_vars->act_phys_cpuid); -} - -static inline void -xpc_IPI_send_disengage(struct xpc_partition *part) -{ - xpc_activate_IRQ_send(part->remote_amos_page_pa, cnodeid_to_nasid(0), - part->remote_act_nasid, part->remote_act_phys_cpuid); -} - - -/* - * IPIs associated with SGI_XPC_NOTIFY IRQ. - */ - -/* - * Send an IPI to the remote partition that is associated with the - * specified channel. - */ -#define XPC_NOTIFY_IRQ_SEND(_ch, _ipi_f, _irq_f) \ - xpc_notify_IRQ_send(_ch, _ipi_f, #_ipi_f, _irq_f) - -static inline void -xpc_notify_IRQ_send(struct xpc_channel *ch, u8 ipi_flag, char *ipi_flag_string, - unsigned long *irq_flags) -{ - struct xpc_partition *part = &xpc_partitions[ch->partid]; - enum xpc_retval ret; - - - if (likely(part->act_state != XPC_P_DEACTIVATING)) { - ret = xpc_IPI_send(part->remote_IPI_amo_va, - (u64) ipi_flag << (ch->number * 8), - part->remote_IPI_nasid, - part->remote_IPI_phys_cpuid, - SGI_XPC_NOTIFY); - dev_dbg(xpc_chan, "%s sent to partid=%d, channel=%d, ret=%d\n", - ipi_flag_string, ch->partid, ch->number, ret); - if (unlikely(ret != xpcSuccess)) { - if (irq_flags != NULL) { - spin_unlock_irqrestore(&ch->lock, *irq_flags); - } - XPC_DEACTIVATE_PARTITION(part, ret); - if (irq_flags != NULL) { - spin_lock_irqsave(&ch->lock, *irq_flags); - } - } - } -} - - -/* - * Make it look like the remote partition, which is associated with the - * specified channel, sent us an IPI. This faked IPI will be handled - * by xpc_dropped_IPI_check(). - */ -#define XPC_NOTIFY_IRQ_SEND_LOCAL(_ch, _ipi_f) \ - xpc_notify_IRQ_send_local(_ch, _ipi_f, #_ipi_f) - -static inline void -xpc_notify_IRQ_send_local(struct xpc_channel *ch, u8 ipi_flag, - char *ipi_flag_string) -{ - struct xpc_partition *part = &xpc_partitions[ch->partid]; - - - FETCHOP_STORE_OP(TO_AMO((u64) &part->local_IPI_amo_va->variable), - FETCHOP_OR, ((u64) ipi_flag << (ch->number * 8))); - dev_dbg(xpc_chan, "%s sent local from partid=%d, channel=%d\n", - ipi_flag_string, ch->partid, ch->number); -} - - -/* - * The sending and receiving of IPIs includes the setting of an AMO variable - * to indicate the reason the IPI was sent. The 64-bit variable is divided - * up into eight bytes, ordered from right to left. Byte zero pertains to - * channel 0, byte one to channel 1, and so on. Each byte is described by - * the following IPI flags. - */ - -#define XPC_IPI_CLOSEREQUEST 0x01 -#define XPC_IPI_CLOSEREPLY 0x02 -#define XPC_IPI_OPENREQUEST 0x04 -#define XPC_IPI_OPENREPLY 0x08 -#define XPC_IPI_MSGREQUEST 0x10 - - -/* given an AMO variable and a channel#, get its associated IPI flags */ -#define XPC_GET_IPI_FLAGS(_amo, _c) ((u8) (((_amo) >> ((_c) * 8)) & 0xff)) -#define XPC_SET_IPI_FLAGS(_amo, _c, _f) (_amo) |= ((u64) (_f) << ((_c) * 8)) - -#define XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(_amo) ((_amo) & 0x0f0f0f0f0f0f0f0f) -#define XPC_ANY_MSG_IPI_FLAGS_SET(_amo) ((_amo) & 0x1010101010101010) - - -static inline void -xpc_IPI_send_closerequest(struct xpc_channel *ch, unsigned long *irq_flags) -{ - struct xpc_openclose_args *args = ch->local_openclose_args; - - - args->reason = ch->reason; - - XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_CLOSEREQUEST, irq_flags); -} - -static inline void -xpc_IPI_send_closereply(struct xpc_channel *ch, unsigned long *irq_flags) -{ - XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_CLOSEREPLY, irq_flags); -} - -static inline void -xpc_IPI_send_openrequest(struct xpc_channel *ch, unsigned long *irq_flags) -{ - struct xpc_openclose_args *args = ch->local_openclose_args; - - - args->msg_size = ch->msg_size; - args->local_nentries = ch->local_nentries; - - XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_OPENREQUEST, irq_flags); -} - -static inline void -xpc_IPI_send_openreply(struct xpc_channel *ch, unsigned long *irq_flags) -{ - struct xpc_openclose_args *args = ch->local_openclose_args; - - - args->remote_nentries = ch->remote_nentries; - args->local_nentries = ch->local_nentries; - args->local_msgqueue_pa = __pa(ch->local_msgqueue); - - XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_OPENREPLY, irq_flags); -} - -static inline void -xpc_IPI_send_msgrequest(struct xpc_channel *ch) -{ - XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_MSGREQUEST, NULL); -} - -static inline void -xpc_IPI_send_local_msgrequest(struct xpc_channel *ch) -{ - XPC_NOTIFY_IRQ_SEND_LOCAL(ch, XPC_IPI_MSGREQUEST); -} - - -/* - * Memory for XPC's AMO variables is allocated by the MSPEC driver. These - * pages are located in the lowest granule. The lowest granule uses 4k pages - * for cached references and an alternate TLB handler to never provide a - * cacheable mapping for the entire region. This will prevent speculative - * reading of cached copies of our lines from being issued which will cause - * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64 - * AMO variables (based on XP_MAX_PARTITIONS) for message notification and an - * additional 128 AMO variables (based on XP_NASID_MASK_WORDS) for partition - * activation and 2 AMO variables for partition deactivation. - */ -static inline AMO_t * -xpc_IPI_init(int index) -{ - AMO_t *amo = xpc_vars->amos_page + index; - - - (void) xpc_IPI_receive(amo); /* clear AMO variable */ - return amo; -} - - - -static inline enum xpc_retval -xpc_map_bte_errors(bte_result_t error) -{ - switch (error) { - case BTE_SUCCESS: return xpcSuccess; - case BTEFAIL_DIR: return xpcBteDirectoryError; - case BTEFAIL_POISON: return xpcBtePoisonError; - case BTEFAIL_WERR: return xpcBteWriteError; - case BTEFAIL_ACCESS: return xpcBteAccessError; - case BTEFAIL_PWERR: return xpcBtePWriteError; - case BTEFAIL_PRERR: return xpcBtePReadError; - case BTEFAIL_TOUT: return xpcBteTimeOutError; - case BTEFAIL_XTERR: return xpcBteXtalkError; - case BTEFAIL_NOTAVAIL: return xpcBteNotAvailable; - default: return xpcBteUnmappedError; - } -} - - - -static inline void * -xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base) -{ - /* see if kmalloc will give us cachline aligned memory by default */ - *base = kmalloc(size, flags); - if (*base == NULL) { - return NULL; - } - if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) { - return *base; - } - kfree(*base); - - /* nope, we'll have to do it ourselves */ - *base = kmalloc(size + L1_CACHE_BYTES, flags); - if (*base == NULL) { - return NULL; - } - return (void *) L1_CACHE_ALIGN((u64) *base); -} - - -/* - * Check to see if there is any channel activity to/from the specified - * partition. - */ -static inline void -xpc_check_for_channel_activity(struct xpc_partition *part) -{ - u64 IPI_amo; - unsigned long irq_flags; - - - IPI_amo = xpc_IPI_receive(part->local_IPI_amo_va); - if (IPI_amo == 0) { - return; - } - - spin_lock_irqsave(&part->IPI_lock, irq_flags); - part->local_IPI_amo |= IPI_amo; - spin_unlock_irqrestore(&part->IPI_lock, irq_flags); - - dev_dbg(xpc_chan, "received IPI from partid=%d, IPI_amo=0x%lx\n", - XPC_PARTID(part), IPI_amo); - - xpc_wakeup_channel_mgr(part); -} - - -#endif /* _IA64_SN_KERNEL_XPC_H */ - diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c index abf4fc2..cdf6856 100644 --- a/arch/ia64/sn/kernel/xpc_channel.c +++ b/arch/ia64/sn/kernel/xpc_channel.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2004-2006 Silicon Graphics, Inc. All Rights Reserved. */ @@ -22,9 +22,11 @@ #include <linux/cache.h> #include <linux/interrupt.h> #include <linux/slab.h> +#include <linux/mutex.h> +#include <linux/completion.h> #include <asm/sn/bte.h> #include <asm/sn/sn_sal.h> -#include "xpc.h" +#include <asm/sn/xpc.h> /* @@ -56,8 +58,8 @@ xpc_initialize_channels(struct xpc_partition *part, partid_t partid) atomic_set(&ch->n_to_notify, 0); spin_lock_init(&ch->lock); - sema_init(&ch->msg_to_pull_sema, 1); /* mutex */ - sema_init(&ch->wdisconnect_sema, 0); /* event wait */ + mutex_init(&ch->msg_to_pull_mutex); + init_completion(&ch->wdisconnect_wait); atomic_set(&ch->n_on_msg_allocate_wq, 0); init_waitqueue_head(&ch->msg_allocate_wq); @@ -445,7 +447,7 @@ xpc_allocate_local_msgqueue(struct xpc_channel *ch) nbytes = nentries * ch->msg_size; ch->local_msgqueue = xpc_kmalloc_cacheline_aligned(nbytes, - (GFP_KERNEL | GFP_DMA), + GFP_KERNEL, &ch->local_msgqueue_base); if (ch->local_msgqueue == NULL) { continue; @@ -453,7 +455,7 @@ xpc_allocate_local_msgqueue(struct xpc_channel *ch) memset(ch->local_msgqueue, 0, nbytes); nbytes = nentries * sizeof(struct xpc_notify); - ch->notify_queue = kmalloc(nbytes, (GFP_KERNEL | GFP_DMA)); + ch->notify_queue = kmalloc(nbytes, GFP_KERNEL); if (ch->notify_queue == NULL) { kfree(ch->local_msgqueue_base); ch->local_msgqueue = NULL; @@ -500,7 +502,7 @@ xpc_allocate_remote_msgqueue(struct xpc_channel *ch) nbytes = nentries * ch->msg_size; ch->remote_msgqueue = xpc_kmalloc_cacheline_aligned(nbytes, - (GFP_KERNEL | GFP_DMA), + GFP_KERNEL, &ch->remote_msgqueue_base); if (ch->remote_msgqueue == NULL) { continue; @@ -534,7 +536,6 @@ static enum xpc_retval xpc_allocate_msgqueues(struct xpc_channel *ch) { unsigned long irq_flags; - int i; enum xpc_retval ret; @@ -552,11 +553,6 @@ xpc_allocate_msgqueues(struct xpc_channel *ch) return ret; } - for (i = 0; i < ch->local_nentries; i++) { - /* use a semaphore as an event wait queue */ - sema_init(&ch->notify_queue[i].sema, 0); - } - spin_lock_irqsave(&ch->lock, irq_flags); ch->flags |= XPC_C_SETUP; spin_unlock_irqrestore(&ch->lock, irq_flags); @@ -742,7 +738,9 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags) /* make sure all activity has settled down first */ - if (atomic_read(&ch->references) > 0) { + if (atomic_read(&ch->references) > 0 || + ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && + !(ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE))) { return; } DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0); @@ -779,6 +777,12 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags) /* both sides are disconnected now */ + if (ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE) { + spin_unlock_irqrestore(&ch->lock, *irq_flags); + xpc_disconnect_callout(ch, xpcDisconnected); + spin_lock_irqsave(&ch->lock, *irq_flags); + } + /* it's now safe to free the channel's message queues */ xpc_free_msgqueues(ch); @@ -793,10 +797,8 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags) } if (ch->flags & XPC_C_WDISCONNECT) { - spin_unlock_irqrestore(&ch->lock, *irq_flags); - up(&ch->wdisconnect_sema); - spin_lock_irqsave(&ch->lock, *irq_flags); - + /* we won't lose the CPU since we're holding ch->lock */ + complete(&ch->wdisconnect_wait); } else if (ch->delayed_IPI_flags) { if (part->act_state != XPC_P_DEACTIVATING) { /* time to take action on any delayed IPI flags */ @@ -1086,12 +1088,12 @@ xpc_connect_channel(struct xpc_channel *ch) struct xpc_registration *registration = &xpc_registrations[ch->number]; - if (down_trylock(®istration->sema) != 0) { + if (mutex_trylock(®istration->mutex) == 0) { return xpcRetry; } if (!XPC_CHANNEL_REGISTERED(ch->number)) { - up(®istration->sema); + mutex_unlock(®istration->mutex); return xpcUnregistered; } @@ -1102,7 +1104,7 @@ xpc_connect_channel(struct xpc_channel *ch) if (ch->flags & XPC_C_DISCONNECTING) { spin_unlock_irqrestore(&ch->lock, irq_flags); - up(®istration->sema); + mutex_unlock(®istration->mutex); return ch->reason; } @@ -1134,7 +1136,7 @@ xpc_connect_channel(struct xpc_channel *ch) * channel lock be locked and will unlock and relock * the channel lock as needed. */ - up(®istration->sema); + mutex_unlock(®istration->mutex); XPC_DISCONNECT_CHANNEL(ch, xpcUnequalMsgSizes, &irq_flags); spin_unlock_irqrestore(&ch->lock, irq_flags); @@ -1149,7 +1151,7 @@ xpc_connect_channel(struct xpc_channel *ch) atomic_inc(&xpc_partitions[ch->partid].nchannels_active); } - up(®istration->sema); + mutex_unlock(®istration->mutex); /* initiate the connection */ @@ -1300,7 +1302,7 @@ xpc_process_msg_IPI(struct xpc_partition *part, int ch_number) "delivered=%d, partid=%d, channel=%d\n", nmsgs_sent, ch->partid, ch->number); - if (ch->flags & XPC_C_CONNECTCALLOUT) { + if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) { xpc_activate_kthreads(ch, nmsgs_sent); } } @@ -1645,7 +1647,7 @@ xpc_disconnect_channel(const int line, struct xpc_channel *ch, void -xpc_disconnecting_callout(struct xpc_channel *ch) +xpc_disconnect_callout(struct xpc_channel *ch, enum xpc_retval reason) { /* * Let the channel's registerer know that the channel is being @@ -1654,15 +1656,13 @@ xpc_disconnecting_callout(struct xpc_channel *ch) */ if (ch->func != NULL) { - dev_dbg(xpc_chan, "ch->func() called, reason=xpcDisconnecting," - " partid=%d, channel=%d\n", ch->partid, ch->number); + dev_dbg(xpc_chan, "ch->func() called, reason=%d, partid=%d, " + "channel=%d\n", reason, ch->partid, ch->number); - ch->func(xpcDisconnecting, ch->partid, ch->number, NULL, - ch->key); + ch->func(reason, ch->partid, ch->number, NULL, ch->key); - dev_dbg(xpc_chan, "ch->func() returned, reason=" - "xpcDisconnecting, partid=%d, channel=%d\n", - ch->partid, ch->number); + dev_dbg(xpc_chan, "ch->func() returned, reason=%d, partid=%d, " + "channel=%d\n", reason, ch->partid, ch->number); } } @@ -2085,7 +2085,7 @@ xpc_pull_remote_msg(struct xpc_channel *ch, s64 get) enum xpc_retval ret; - if (down_interruptible(&ch->msg_to_pull_sema) != 0) { + if (mutex_lock_interruptible(&ch->msg_to_pull_mutex) != 0) { /* we were interrupted by a signal */ return NULL; } @@ -2121,7 +2121,7 @@ xpc_pull_remote_msg(struct xpc_channel *ch, s64 get) XPC_DEACTIVATE_PARTITION(part, ret); - up(&ch->msg_to_pull_sema); + mutex_unlock(&ch->msg_to_pull_mutex); return NULL; } @@ -2130,7 +2130,7 @@ xpc_pull_remote_msg(struct xpc_channel *ch, s64 get) ch->next_msg_to_pull += nmsgs; } - up(&ch->msg_to_pull_sema); + mutex_unlock(&ch->msg_to_pull_mutex); /* return the message we were looking for */ msg_offset = (get % ch->remote_nentries) * ch->msg_size; diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c index b617236..8cbf164 100644 --- a/arch/ia64/sn/kernel/xpc_main.c +++ b/arch/ia64/sn/kernel/xpc_main.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2004-2006 Silicon Graphics, Inc. All Rights Reserved. */ @@ -55,11 +55,12 @@ #include <linux/slab.h> #include <linux/delay.h> #include <linux/reboot.h> +#include <linux/completion.h> #include <asm/sn/intr.h> #include <asm/sn/sn_sal.h> #include <asm/kdebug.h> #include <asm/uaccess.h> -#include "xpc.h" +#include <asm/sn/xpc.h> /* define two XPC debug device structures to be used with dev_dbg() et al */ @@ -82,6 +83,9 @@ struct device *xpc_part = &xpc_part_dbg_subname; struct device *xpc_chan = &xpc_chan_dbg_subname; +static int xpc_kdebug_ignore; + + /* systune related variables for /proc/sys directories */ static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL; @@ -162,6 +166,8 @@ static ctl_table xpc_sys_dir[] = { }; static struct ctl_table_header *xpc_sysctl; +/* non-zero if any remote partition disengage request was timed out */ +int xpc_disengage_request_timedout; /* #of IRQs received */ static atomic_t xpc_act_IRQ_rcvd; @@ -172,10 +178,10 @@ static DECLARE_WAIT_QUEUE_HEAD(xpc_act_IRQ_wq); static unsigned long xpc_hb_check_timeout; /* notification that the xpc_hb_checker thread has exited */ -static DECLARE_MUTEX_LOCKED(xpc_hb_checker_exited); +static DECLARE_COMPLETION(xpc_hb_checker_exited); /* notification that the xpc_discovery thread has exited */ -static DECLARE_MUTEX_LOCKED(xpc_discovery_exited); +static DECLARE_COMPLETION(xpc_discovery_exited); static struct timer_list xpc_hb_timer; @@ -316,7 +322,7 @@ xpc_hb_checker(void *ignore) /* mark this thread as having exited */ - up(&xpc_hb_checker_exited); + complete(&xpc_hb_checker_exited); return 0; } @@ -336,7 +342,7 @@ xpc_initiate_discovery(void *ignore) dev_dbg(xpc_part, "discovery thread is exiting\n"); /* mark this thread as having exited */ - up(&xpc_discovery_exited); + complete(&xpc_discovery_exited); return 0; } @@ -569,18 +575,21 @@ xpc_activate_partition(struct xpc_partition *part) spin_lock_irqsave(&part->act_lock, irq_flags); - pid = kernel_thread(xpc_activating, (void *) ((u64) partid), 0); - DBUG_ON(part->act_state != XPC_P_INACTIVE); - if (pid > 0) { - part->act_state = XPC_P_ACTIVATION_REQ; - XPC_SET_REASON(part, xpcCloneKThread, __LINE__); - } else { - XPC_SET_REASON(part, xpcCloneKThreadFailed, __LINE__); - } + part->act_state = XPC_P_ACTIVATION_REQ; + XPC_SET_REASON(part, xpcCloneKThread, __LINE__); spin_unlock_irqrestore(&part->act_lock, irq_flags); + + pid = kernel_thread(xpc_activating, (void *) ((u64) partid), 0); + + if (unlikely(pid <= 0)) { + spin_lock_irqsave(&part->act_lock, irq_flags); + part->act_state = XPC_P_INACTIVE; + XPC_SET_REASON(part, xpcCloneKThreadFailed, __LINE__); + spin_unlock_irqrestore(&part->act_lock, irq_flags); + } } @@ -741,12 +750,16 @@ xpc_daemonize_kthread(void *args) /* let registerer know that connection has been established */ spin_lock_irqsave(&ch->lock, irq_flags); - if (!(ch->flags & XPC_C_CONNECTCALLOUT)) { - ch->flags |= XPC_C_CONNECTCALLOUT; + if (!(ch->flags & XPC_C_CONNECTEDCALLOUT)) { + ch->flags |= XPC_C_CONNECTEDCALLOUT; spin_unlock_irqrestore(&ch->lock, irq_flags); xpc_connected_callout(ch); + spin_lock_irqsave(&ch->lock, irq_flags); + ch->flags |= XPC_C_CONNECTEDCALLOUT_MADE; + spin_unlock_irqrestore(&ch->lock, irq_flags); + /* * It is possible that while the callout was being * made that the remote partition sent some messages. @@ -768,15 +781,17 @@ xpc_daemonize_kthread(void *args) if (atomic_dec_return(&ch->kthreads_assigned) == 0) { spin_lock_irqsave(&ch->lock, irq_flags); - if ((ch->flags & XPC_C_CONNECTCALLOUT) && - !(ch->flags & XPC_C_DISCONNECTCALLOUT)) { - ch->flags |= XPC_C_DISCONNECTCALLOUT; + if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && + !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) { + ch->flags |= XPC_C_DISCONNECTINGCALLOUT; spin_unlock_irqrestore(&ch->lock, irq_flags); - xpc_disconnecting_callout(ch); - } else { - spin_unlock_irqrestore(&ch->lock, irq_flags); + xpc_disconnect_callout(ch, xpcDisconnecting); + + spin_lock_irqsave(&ch->lock, irq_flags); + ch->flags |= XPC_C_DISCONNECTINGCALLOUT_MADE; } + spin_unlock_irqrestore(&ch->lock, irq_flags); if (atomic_dec_return(&part->nchannels_engaged) == 0) { xpc_mark_partition_disengaged(part); xpc_IPI_send_disengage(part); @@ -888,7 +903,7 @@ xpc_disconnect_wait(int ch_number) continue; } - (void) down(&ch->wdisconnect_sema); + wait_for_completion(&ch->wdisconnect_wait); spin_lock_irqsave(&ch->lock, irq_flags); DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED)); @@ -921,9 +936,9 @@ static void xpc_do_exit(enum xpc_retval reason) { partid_t partid; - int active_part_count; + int active_part_count, printed_waiting_msg = 0; struct xpc_partition *part; - unsigned long printmsg_time; + unsigned long printmsg_time, disengage_request_timeout = 0; /* a 'rmmod XPC' and a 'reboot' cannot both end up here together */ @@ -941,10 +956,10 @@ xpc_do_exit(enum xpc_retval reason) free_irq(SGI_XPC_ACTIVATE, NULL); /* wait for the discovery thread to exit */ - down(&xpc_discovery_exited); + wait_for_completion(&xpc_discovery_exited); /* wait for the heartbeat checker thread to exit */ - down(&xpc_hb_checker_exited); + wait_for_completion(&xpc_hb_checker_exited); /* sleep for a 1/3 of a second or so */ @@ -953,7 +968,8 @@ xpc_do_exit(enum xpc_retval reason) /* wait for all partitions to become inactive */ - printmsg_time = jiffies; + printmsg_time = jiffies + (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ); + xpc_disengage_request_timedout = 0; do { active_part_count = 0; @@ -969,20 +985,39 @@ xpc_do_exit(enum xpc_retval reason) active_part_count++; XPC_DEACTIVATE_PARTITION(part, reason); - } - if (active_part_count == 0) { - break; + if (part->disengage_request_timeout > + disengage_request_timeout) { + disengage_request_timeout = + part->disengage_request_timeout; + } } - if (jiffies >= printmsg_time) { - dev_info(xpc_part, "waiting for partitions to " - "deactivate/disengage, active count=%d, remote " - "engaged=0x%lx\n", active_part_count, - xpc_partition_engaged(1UL << partid)); - - printmsg_time = jiffies + + if (xpc_partition_engaged(-1UL)) { + if (time_after(jiffies, printmsg_time)) { + dev_info(xpc_part, "waiting for remote " + "partitions to disengage, timeout in " + "%ld seconds\n", + (disengage_request_timeout - jiffies) + / HZ); + printmsg_time = jiffies + (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ); + printed_waiting_msg = 1; + } + + } else if (active_part_count > 0) { + if (printed_waiting_msg) { + dev_info(xpc_part, "waiting for local partition" + " to disengage\n"); + printed_waiting_msg = 0; + } + + } else { + if (!xpc_disengage_request_timedout) { + dev_info(xpc_part, "all partitions have " + "disengaged\n"); + } + break; } /* sleep for a 1/3 of a second or so */ @@ -1000,11 +1035,13 @@ xpc_do_exit(enum xpc_retval reason) del_timer_sync(&xpc_hb_timer); DBUG_ON(xpc_vars->heartbeating_to_mask != 0); - /* take ourselves off of the reboot_notifier_list */ - (void) unregister_reboot_notifier(&xpc_reboot_notifier); + if (reason == xpcUnloading) { + /* take ourselves off of the reboot_notifier_list */ + (void) unregister_reboot_notifier(&xpc_reboot_notifier); - /* take ourselves off of the die_notifier list */ - (void) unregister_die_notifier(&xpc_die_notifier); + /* take ourselves off of the die_notifier list */ + (void) unregister_die_notifier(&xpc_die_notifier); + } /* close down protections for IPI operations */ xpc_restrict_IPI_ops(); @@ -1020,7 +1057,35 @@ xpc_do_exit(enum xpc_retval reason) /* - * Called when the system is about to be either restarted or halted. + * This function is called when the system is being rebooted. + */ +static int +xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused) +{ + enum xpc_retval reason; + + + switch (event) { + case SYS_RESTART: + reason = xpcSystemReboot; + break; + case SYS_HALT: + reason = xpcSystemHalt; + break; + case SYS_POWER_OFF: + reason = xpcSystemPoweroff; + break; + default: + reason = xpcSystemGoingDown; + } + + xpc_do_exit(reason); + return NOTIFY_DONE; +} + + +/* + * Notify other partitions to disengage from all references to our memory. */ static void xpc_die_disengage(void) @@ -1028,7 +1093,7 @@ xpc_die_disengage(void) struct xpc_partition *part; partid_t partid; unsigned long engaged; - long time, print_time, disengage_request_timeout; + long time, printmsg_time, disengage_request_timeout; /* keep xpc_hb_checker thread from doing anything (just in case) */ @@ -1055,57 +1120,53 @@ xpc_die_disengage(void) } } - print_time = rtc_time(); - disengage_request_timeout = print_time + + time = rtc_time(); + printmsg_time = time + + (XPC_DISENGAGE_PRINTMSG_INTERVAL * sn_rtc_cycles_per_second); + disengage_request_timeout = time + (xpc_disengage_request_timelimit * sn_rtc_cycles_per_second); /* wait for all other partitions to disengage from us */ - while ((engaged = xpc_partition_engaged(-1UL)) && - (time = rtc_time()) < disengage_request_timeout) { + while (1) { + engaged = xpc_partition_engaged(-1UL); + if (!engaged) { + dev_info(xpc_part, "all partitions have disengaged\n"); + break; + } - if (time >= print_time) { + time = rtc_time(); + if (time >= disengage_request_timeout) { + for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { + if (engaged & (1UL << partid)) { + dev_info(xpc_part, "disengage from " + "remote partition %d timed " + "out\n", partid); + } + } + break; + } + + if (time >= printmsg_time) { dev_info(xpc_part, "waiting for remote partitions to " - "disengage, engaged=0x%lx\n", engaged); - print_time = time + (XPC_DISENGAGE_PRINTMSG_INTERVAL * + "disengage, timeout in %ld seconds\n", + (disengage_request_timeout - time) / + sn_rtc_cycles_per_second); + printmsg_time = time + + (XPC_DISENGAGE_PRINTMSG_INTERVAL * sn_rtc_cycles_per_second); } } - dev_info(xpc_part, "finished waiting for remote partitions to " - "disengage, engaged=0x%lx\n", engaged); } /* - * This function is called when the system is being rebooted. - */ -static int -xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused) -{ - enum xpc_retval reason; - - - switch (event) { - case SYS_RESTART: - reason = xpcSystemReboot; - break; - case SYS_HALT: - reason = xpcSystemHalt; - break; - case SYS_POWER_OFF: - reason = xpcSystemPoweroff; - break; - default: - reason = xpcSystemGoingDown; - } - - xpc_do_exit(reason); - return NOTIFY_DONE; -} - - -/* - * This function is called when the system is being rebooted. + * This function is called when the system is being restarted or halted due + * to some sort of system failure. If this is the case we need to notify the + * other partitions to disengage from all references to our memory. + * This function can also be called when our heartbeater could be offlined + * for a time. In this case we need to notify other partitions to not worry + * about the lack of a heartbeat. */ static int xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused) @@ -1115,11 +1176,25 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused) case DIE_MACHINE_HALT: xpc_die_disengage(); break; + + case DIE_KDEBUG_ENTER: + /* Should lack of heartbeat be ignored by other partitions? */ + if (!xpc_kdebug_ignore) { + break; + } + /* fall through */ case DIE_MCA_MONARCH_ENTER: case DIE_INIT_MONARCH_ENTER: xpc_vars->heartbeat++; xpc_vars->heartbeat_offline = 1; break; + + case DIE_KDEBUG_LEAVE: + /* Is lack of heartbeat being ignored by other partitions? */ + if (!xpc_kdebug_ignore) { + break; + } + /* fall through */ case DIE_MCA_MONARCH_LEAVE: case DIE_INIT_MONARCH_LEAVE: xpc_vars->heartbeat++; @@ -1302,7 +1377,7 @@ xpc_init(void) dev_err(xpc_part, "failed while forking discovery thread\n"); /* mark this new thread as a non-starter */ - up(&xpc_discovery_exited); + complete(&xpc_discovery_exited); xpc_do_exit(xpcUnloading); return -EBUSY; @@ -1344,3 +1419,7 @@ module_param(xpc_disengage_request_timelimit, int, 0); MODULE_PARM_DESC(xpc_disengage_request_timelimit, "Number of seconds to wait " "for disengage request to complete."); +module_param(xpc_kdebug_ignore, int, 0); +MODULE_PARM_DESC(xpc_kdebug_ignore, "Should lack of heartbeat be ignored by " + "other partitions when dropping into kdebug."); + diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c index cdd6431..88a730e 100644 --- a/arch/ia64/sn/kernel/xpc_partition.c +++ b/arch/ia64/sn/kernel/xpc_partition.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2004-2006 Silicon Graphics, Inc. All Rights Reserved. */ @@ -28,7 +28,7 @@ #include <asm/sn/sn_sal.h> #include <asm/sn/nodepda.h> #include <asm/sn/addrs.h> -#include "xpc.h" +#include <asm/sn/xpc.h> /* XPC is exiting flag */ @@ -771,7 +771,8 @@ xpc_identify_act_IRQ_req(int nasid) } } - if (!xpc_partition_disengaged(part)) { + if (part->disengage_request_timeout > 0 && + !xpc_partition_disengaged(part)) { /* still waiting on other side to disengage from us */ return; } @@ -873,6 +874,9 @@ xpc_partition_disengaged(struct xpc_partition *part) * request in a timely fashion, so assume it's dead. */ + dev_info(xpc_part, "disengage from remote partition %d " + "timed out\n", partid); + xpc_disengage_request_timedout = 1; xpc_clear_partition_engaged(1UL << partid); disengaged = 1; } |