diff options
Diffstat (limited to 'arch/powerpc/platforms/pseries/eeh.c')
-rw-r--r-- | arch/powerpc/platforms/pseries/eeh.c | 204 |
1 files changed, 118 insertions, 86 deletions
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 6cedbc0..48fbd44 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -74,7 +74,10 @@ * is broken and panic. This sets the threshold for how many read * attempts we allow before panicking. */ -#define EEH_MAX_FAILS 100000 +#define EEH_MAX_FAILS 2100000 + +/* Time to wait for a PCI slot to retport status, in milliseconds */ +#define PCI_BUS_RESET_WAIT_MSEC (60*1000) /* RTAS tokens */ static int ibm_set_eeh_option; @@ -83,6 +86,7 @@ static int ibm_read_slot_reset_state; static int ibm_read_slot_reset_state2; static int ibm_slot_error_detail; static int ibm_get_config_addr_info; +static int ibm_get_config_addr_info2; static int ibm_configure_bridge; int eeh_subsystem_enabled; @@ -168,6 +172,55 @@ static int read_slot_reset_state(struct pci_dn *pdn, int rets[]) } /** + * eeh_wait_for_slot_status - returns error status of slot + * @pdn pci device node + * @max_wait_msecs maximum number to millisecs to wait + * + * Return negative value if a permanent error, else return + * Partition Endpoint (PE) status value. + * + * If @max_wait_msecs is positive, then this routine will + * sleep until a valid status can be obtained, or until + * the max allowed wait time is exceeded, in which case + * a -2 is returned. + */ +int +eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs) +{ + int rc; + int rets[3]; + int mwait; + + while (1) { + rc = read_slot_reset_state(pdn, rets); + if (rc) return rc; + if (rets[1] == 0) return -1; /* EEH is not supported */ + + if (rets[0] != 5) return rets[0]; /* return actual status */ + + if (rets[2] == 0) return -1; /* permanently unavailable */ + + if (max_wait_msecs <= 0) return -1; + + mwait = rets[2]; + if (mwait <= 0) { + printk (KERN_WARNING + "EEH: Firmware returned bad wait value=%d\n", mwait); + mwait = 1000; + } else if (mwait > 300*1000) { + printk (KERN_WARNING + "EEH: Firmware is taking too long, time=%d\n", mwait); + mwait = 300*1000; + } + max_wait_msecs -= mwait; + msleep (mwait); + } + + printk(KERN_WARNING "EEH: Timed out waiting for slot status\n"); + return -2; +} + +/** * eeh_token_to_phys - convert EEH address token to phys address * @token i/o token, should be address in the form 0xA.... */ @@ -229,7 +282,7 @@ void eeh_mark_slot (struct device_node *dn, int mode_flag) dn = find_device_pe (dn); /* Back up one, since config addrs might be shared */ - if (PCI_DN(dn) && PCI_DN(dn)->eeh_pe_config_addr) + if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent)) dn = dn->parent; PCI_DN(dn)->eeh_mode |= mode_flag; @@ -263,7 +316,7 @@ void eeh_clear_slot (struct device_node *dn, int mode_flag) dn = find_device_pe (dn); /* Back up one, since config addrs might be shared */ - if (PCI_DN(dn) && PCI_DN(dn)->eeh_pe_config_addr) + if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent)) dn = dn->parent; PCI_DN(dn)->eeh_mode &= ~mode_flag; @@ -293,7 +346,6 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) int rets[3]; unsigned long flags; struct pci_dn *pdn; - enum pci_channel_state state; int rc = 0; total_mmio_ffs++; @@ -367,25 +419,25 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) goto dn_unlock; } - /* If EEH is not supported on this device, punt. */ - if (rets[1] != 1) { - printk(KERN_WARNING "EEH: event on unsupported device, rc=%d dn=%s\n", - ret, dn->full_name); + /* Note that config-io to empty slots may fail; + * they are empty when they don't have children. */ + if ((rets[0] == 5) && (dn->child == NULL)) { false_positives++; rc = 0; goto dn_unlock; } - /* If not the kind of error we know about, punt. */ - if (rets[0] != 2 && rets[0] != 4 && rets[0] != 5) { + /* If EEH is not supported on this device, punt. */ + if (rets[1] != 1) { + printk(KERN_WARNING "EEH: event on unsupported device, rc=%d dn=%s\n", + ret, dn->full_name); false_positives++; rc = 0; goto dn_unlock; } - /* Note that config-io to empty slots may fail; - * we recognize empty because they don't have children. */ - if ((rets[0] == 5) && (dn->child == NULL)) { + /* If not the kind of error we know about, punt. */ + if (rets[0] != 1 && rets[0] != 2 && rets[0] != 4 && rets[0] != 5) { false_positives++; rc = 0; goto dn_unlock; @@ -399,17 +451,12 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) eeh_mark_slot (dn, EEH_MODE_ISOLATED); spin_unlock_irqrestore(&confirm_error_lock, flags); - state = pci_channel_io_normal; - if ((rets[0] == 2) || (rets[0] == 4)) - state = pci_channel_io_frozen; - if (rets[0] == 5) - state = pci_channel_io_perm_failure; - eeh_send_failure_event (dn, dev, state, rets[2]); + eeh_send_failure_event (dn, dev); /* Most EEH events are due to device driver bugs. Having * a stack trace will help the device-driver authors figure * out what happened. So print that out. */ - if (rets[0] != 5) dump_stack(); + dump_stack(); return 1; dn_unlock: @@ -458,38 +505,6 @@ EXPORT_SYMBOL(eeh_check_failure); /* The code below deals with error recovery */ /** - * eeh_slot_availability - returns error status of slot - * @pdn pci device node - * - * Return negative value if a permanent error, else return - * a number of milliseconds to wait until the PCI slot is - * ready to be used. - */ -static int -eeh_slot_availability(struct pci_dn *pdn) -{ - int rc; - int rets[3]; - - rc = read_slot_reset_state(pdn, rets); - - if (rc) return rc; - - if (rets[1] == 0) return -1; /* EEH is not supported */ - if (rets[0] == 0) return 0; /* Oll Korrect */ - if (rets[0] == 5) { - if (rets[2] == 0) return -1; /* permanently unavailable */ - return rets[2]; /* number of millisecs to wait */ - } - if (rets[0] == 1) - return 250; - - printk (KERN_ERR "EEH: Slot unavailable: rc=%d, rets=%d %d %d\n", - rc, rets[0], rets[1], rets[2]); - return -2; -} - -/** * rtas_pci_enable - enable MMIO or DMA transfers for this slot * @pdn pci device node */ @@ -512,9 +527,13 @@ rtas_pci_enable(struct pci_dn *pdn, int function) function); if (rc) - printk(KERN_WARNING "EEH: Cannot enable function %d, err=%d dn=%s\n", + printk(KERN_WARNING "EEH: Unexpected state change %d, err=%d dn=%s\n", function, rc, pdn->node->full_name); + rc = eeh_wait_for_slot_status (pdn, PCI_BUS_RESET_WAIT_MSEC); + if ((rc == 4) && (function == EEH_THAW_MMIO)) + return 0; + return rc; } @@ -595,36 +614,24 @@ int rtas_set_slot_reset(struct pci_dn *pdn) { int i, rc; - __rtas_set_slot_reset(pdn); + /* Take three shots at resetting the bus */ + for (i=0; i<3; i++) { + __rtas_set_slot_reset(pdn); - /* Now double check with the firmware to make sure the device is - * ready to be used; if not, wait for recovery. */ - for (i=0; i<10; i++) { - rc = eeh_slot_availability (pdn); + rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC); if (rc == 0) return 0; - if (rc == -2) { - printk (KERN_ERR "EEH: failed (%d) to reset slot %s\n", - i, pdn->node->full_name); - __rtas_set_slot_reset(pdn); - continue; - } - if (rc < 0) { printk (KERN_ERR "EEH: unrecoverable slot failure %s\n", pdn->node->full_name); return -1; } - - msleep (rc+100); + printk (KERN_ERR "EEH: bus reset %d failed on slot %s\n", + i+1, pdn->node->full_name); } - rc = eeh_slot_availability (pdn); - if (rc) - printk (KERN_ERR "EEH: timeout resetting slot %s\n", pdn->node->full_name); - - return rc; + return -1; } /* ------------------------------------------------------- */ @@ -744,16 +751,48 @@ struct eeh_early_enable_info { unsigned int buid_lo; }; +static int get_pe_addr (int config_addr, + struct eeh_early_enable_info *info) +{ + unsigned int rets[3]; + int ret; + + /* Use latest config-addr token on power6 */ + if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) { + /* Make sure we have a PE in hand */ + ret = rtas_call (ibm_get_config_addr_info2, 4, 2, rets, + config_addr, info->buid_hi, info->buid_lo, 1); + if (ret || (rets[0]==0)) + return 0; + + ret = rtas_call (ibm_get_config_addr_info2, 4, 2, rets, + config_addr, info->buid_hi, info->buid_lo, 0); + if (ret) + return 0; + return rets[0]; + } + + /* Use older config-addr token on power5 */ + if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) { + ret = rtas_call (ibm_get_config_addr_info, 4, 2, rets, + config_addr, info->buid_hi, info->buid_lo, 0); + if (ret) + return 0; + return rets[0]; + } + return 0; +} + /* Enable eeh for the given device node. */ static void *early_enable_eeh(struct device_node *dn, void *data) { unsigned int rets[3]; struct eeh_early_enable_info *info = data; int ret; - const char *status = get_property(dn, "status", NULL); - const u32 *class_code = get_property(dn, "class-code", NULL); - const u32 *vendor_id = get_property(dn, "vendor-id", NULL); - const u32 *device_id = get_property(dn, "device-id", NULL); + const char *status = of_get_property(dn, "status", NULL); + const u32 *class_code = of_get_property(dn, "class-code", NULL); + const u32 *vendor_id = of_get_property(dn, "vendor-id", NULL); + const u32 *device_id = of_get_property(dn, "device-id", NULL); const u32 *regs; int enable; struct pci_dn *pdn = PCI_DN(dn); @@ -796,7 +835,7 @@ static void *early_enable_eeh(struct device_node *dn, void *data) /* Ok... see if this device supports EEH. Some do, some don't, * and the only way to find out is to check each and every one. */ - regs = get_property(dn, "reg", NULL); + regs = of_get_property(dn, "reg", NULL); if (regs) { /* First register entry is addr (00BBSS00) */ /* Try to enable eeh */ @@ -810,15 +849,7 @@ static void *early_enable_eeh(struct device_node *dn, void *data) /* If the newer, better, ibm,get-config-addr-info is supported, * then use that instead. */ - pdn->eeh_pe_config_addr = 0; - if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) { - ret = rtas_call (ibm_get_config_addr_info, 4, 2, rets, - pdn->eeh_config_addr, - info->buid_hi, info->buid_lo, - 0); - if (ret == 0) - pdn->eeh_pe_config_addr = rets[0]; - } + pdn->eeh_pe_config_addr = get_pe_addr(pdn->eeh_config_addr, info); /* Some older systems (Power4) allow the * ibm,set-eeh-option call to succeed even on nodes @@ -889,6 +920,7 @@ void __init eeh_init(void) ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state"); ibm_slot_error_detail = rtas_token("ibm,slot-error-detail"); ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info"); + ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2"); ibm_configure_bridge = rtas_token ("ibm,configure-bridge"); if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE) |