aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/edac/amd64_edac.c200
-rw-r--r--drivers/edac/amd76x_edac.c42
-rw-r--r--drivers/edac/cell_edac.c42
-rw-r--r--drivers/edac/cpc925_edac.c91
-rw-r--r--drivers/edac/e752x_edac.c116
-rw-r--r--drivers/edac/e7xxx_edac.c86
-rw-r--r--drivers/edac/edac_core.h47
-rw-r--r--drivers/edac/edac_device.c27
-rw-r--r--drivers/edac/edac_mc.c716
-rw-r--r--drivers/edac/edac_mc_sysfs.c70
-rw-r--r--drivers/edac/edac_module.h2
-rw-r--r--drivers/edac/edac_pci.c6
-rw-r--r--drivers/edac/i3000_edac.c49
-rw-r--r--drivers/edac/i3200_edac.c56
-rw-r--r--drivers/edac/i5000_edac.c236
-rw-r--r--drivers/edac/i5100_edac.c106
-rw-r--r--drivers/edac/i5400_edac.c265
-rw-r--r--drivers/edac/i7300_edac.c115
-rw-r--r--drivers/edac/i7core_edac.c270
-rw-r--r--drivers/edac/i82443bxgx_edac.c41
-rw-r--r--drivers/edac/i82860_edac.c55
-rw-r--r--drivers/edac/i82875p_edac.c51
-rw-r--r--drivers/edac/i82975x_edac.c58
-rw-r--r--drivers/edac/mpc85xx_edac.c37
-rw-r--r--drivers/edac/mv64x60_edac.c47
-rw-r--r--drivers/edac/pasemi_edac.c49
-rw-r--r--drivers/edac/ppc4xx_edac.c50
-rw-r--r--drivers/edac/r82600_edac.c40
-rw-r--r--drivers/edac/sb_edac.c212
-rw-r--r--drivers/edac/tile_edac.c33
-rw-r--r--drivers/edac/x38_edac.c52
-rw-r--r--include/linux/edac.h182
32 files changed, 1981 insertions, 1468 deletions
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 7ef73c9..7be9b72 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -715,25 +715,6 @@ static inline u64 input_addr_to_sys_addr(struct mem_ctl_info *mci,
input_addr_to_dram_addr(mci, input_addr));
}
-/*
- * Find the minimum and maximum InputAddr values that map to the given @csrow.
- * Pass back these values in *input_addr_min and *input_addr_max.
- */
-static void find_csrow_limits(struct mem_ctl_info *mci, int csrow,
- u64 *input_addr_min, u64 *input_addr_max)
-{
- struct amd64_pvt *pvt;
- u64 base, mask;
-
- pvt = mci->pvt_info;
- BUG_ON((csrow < 0) || (csrow >= pvt->csels[0].b_cnt));
-
- get_cs_base_and_mask(pvt, csrow, 0, &base, &mask);
-
- *input_addr_min = base & ~mask;
- *input_addr_max = base | mask;
-}
-
/* Map the Error address to a PAGE and PAGE OFFSET. */
static inline void error_address_to_page_and_offset(u64 error_address,
u32 *page, u32 *offset)
@@ -1058,6 +1039,37 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
int channel, csrow;
u32 page, offset;
+ error_address_to_page_and_offset(sys_addr, &page, &offset);
+
+ /*
+ * Find out which node the error address belongs to. This may be
+ * different from the node that detected the error.
+ */
+ src_mci = find_mc_by_sys_addr(mci, sys_addr);
+ if (!src_mci) {
+ amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n",
+ (unsigned long)sys_addr);
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+ page, offset, syndrome,
+ -1, -1, -1,
+ EDAC_MOD_STR,
+ "failed to map error addr to a node",
+ NULL);
+ return;
+ }
+
+ /* Now map the sys_addr to a CSROW */
+ csrow = sys_addr_to_csrow(src_mci, sys_addr);
+ if (csrow < 0) {
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+ page, offset, syndrome,
+ -1, -1, -1,
+ EDAC_MOD_STR,
+ "failed to map error addr to a csrow",
+ NULL);
+ return;
+ }
+
/* CHIPKILL enabled */
if (pvt->nbcfg & NBCFG_CHIPKILL) {
channel = get_channel_from_ecc_syndrome(mci, syndrome);
@@ -1067,9 +1079,15 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
* 2 DIMMs is in error. So we need to ID 'both' of them
* as suspect.
*/
- amd64_mc_warn(mci, "unknown syndrome 0x%04x - possible "
- "error reporting race\n", syndrome);
- edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
+ amd64_mc_warn(src_mci, "unknown syndrome 0x%04x - "
+ "possible error reporting race\n",
+ syndrome);
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+ page, offset, syndrome,
+ csrow, -1, -1,
+ EDAC_MOD_STR,
+ "unknown syndrome - possible error reporting race",
+ NULL);
return;
}
} else {
@@ -1084,28 +1102,10 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
channel = ((sys_addr & BIT(3)) != 0);
}
- /*
- * Find out which node the error address belongs to. This may be
- * different from the node that detected the error.
- */
- src_mci = find_mc_by_sys_addr(mci, sys_addr);
- if (!src_mci) {
- amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n",
- (unsigned long)sys_addr);
- edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
- return;
- }
-
- /* Now map the sys_addr to a CSROW */
- csrow = sys_addr_to_csrow(src_mci, sys_addr);
- if (csrow < 0) {
- edac_mc_handle_ce_no_info(src_mci, EDAC_MOD_STR);
- } else {
- error_address_to_page_and_offset(sys_addr, &page, &offset);
-
- edac_mc_handle_ce(src_mci, page, offset, syndrome, csrow,
- channel, EDAC_MOD_STR);
- }
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, src_mci,
+ page, offset, syndrome,
+ csrow, channel, -1,
+ EDAC_MOD_STR, "", NULL);
}
static int ddr2_cs_size(unsigned i, bool dct_width)
@@ -1611,15 +1611,20 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
u32 page, offset;
int nid, csrow, chan = 0;
+ error_address_to_page_and_offset(sys_addr, &page, &offset);
+
csrow = f1x_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan);
if (csrow < 0) {
- edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+ page, offset, syndrome,
+ -1, -1, -1,
+ EDAC_MOD_STR,
+ "failed to map error addr to a csrow",
+ NULL);
return;
}
- error_address_to_page_and_offset(sys_addr, &page, &offset);
-
/*
* We need the syndromes for channel detection only when we're
* ganged. Otherwise @chan should already contain the channel at
@@ -1628,16 +1633,10 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
if (dct_ganging_enabled(pvt))
chan = get_channel_from_ecc_syndrome(mci, syndrome);
- if (chan >= 0)
- edac_mc_handle_ce(mci, page, offset, syndrome, csrow, chan,
- EDAC_MOD_STR);
- else
- /*
- * Channel unknown, report all channels on this CSROW as failed.
- */
- for (chan = 0; chan < mci->csrows[csrow].nr_channels; chan++)
- edac_mc_handle_ce(mci, page, offset, syndrome,
- csrow, chan, EDAC_MOD_STR);
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+ page, offset, syndrome,
+ csrow, chan, -1,
+ EDAC_MOD_STR, "", NULL);
}
/*
@@ -1918,7 +1917,12 @@ static void amd64_handle_ce(struct mem_ctl_info *mci, struct mce *m)
/* Ensure that the Error Address is VALID */
if (!(m->status & MCI_STATUS_ADDRV)) {
amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n");
- edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+ 0, 0, 0,
+ -1, -1, -1,
+ EDAC_MOD_STR,
+ "HW has no ERROR_ADDRESS available",
+ NULL);
return;
}
@@ -1942,11 +1946,17 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m)
if (!(m->status & MCI_STATUS_ADDRV)) {
amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n");
- edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR);
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+ 0, 0, 0,
+ -1, -1, -1,
+ EDAC_MOD_STR,
+ "HW has no ERROR_ADDRESS available",
+ NULL);
return;
}
sys_addr = get_error_address(m);
+ error_address_to_page_and_offset(sys_addr, &page, &offset);
/*
* Find out which node the error address belongs to. This may be
@@ -1956,7 +1966,11 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m)
if (!src_mci) {
amd64_mc_err(mci, "ERROR ADDRESS (0x%lx) NOT mapped to a MC\n",
(unsigned long)sys_addr);
- edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR);
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+ page, offset, 0,
+ -1, -1, -1,
+ EDAC_MOD_STR,
+ "ERROR ADDRESS NOT mapped to a MC", NULL);
return;
}
@@ -1966,10 +1980,17 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m)
if (csrow < 0) {
amd64_mc_err(mci, "ERROR_ADDRESS (0x%lx) NOT mapped to CS\n",
(unsigned long)sys_addr);
- edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR);
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+ page, offset, 0,
+ -1, -1, -1,
+ EDAC_MOD_STR,
+ "ERROR ADDRESS NOT mapped to CS",
+ NULL);
} else {
- error_address_to_page_and_offset(sys_addr, &page, &offset);
- edac_mc_handle_ue(log_mci, page, offset, csrow, EDAC_MOD_STR);
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+ page, offset, 0,
+ csrow, -1, -1,
+ EDAC_MOD_STR, "", NULL);
}
}
@@ -2171,7 +2192,7 @@ static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr)
nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode) << (20 - PAGE_SHIFT);
debugf0(" (csrow=%d) DBAM map index= %d\n", csrow_nr, cs_mode);
- debugf0(" nr_pages= %u channel-count = %d\n",
+ debugf0(" nr_pages/channel= %u channel-count = %d\n",
nr_pages, pvt->channel_count);
return nr_pages;
@@ -2185,9 +2206,12 @@ static int init_csrows(struct mem_ctl_info *mci)
{
struct csrow_info *csrow;
struct amd64_pvt *pvt = mci->pvt_info;
- u64 input_addr_min, input_addr_max, sys_addr, base, mask;
+ u64 base, mask;
u32 val;
- int i, empty = 1;
+ int i, j, empty = 1;
+ enum mem_type mtype;
+ enum edac_type edac_mode;
+ int nr_pages = 0;
amd64_read_pci_cfg(pvt->F3, NBCFG, &val);
@@ -2211,41 +2235,32 @@ static int init_csrows(struct mem_ctl_info *mci)
empty = 0;
if (csrow_enabled(i, 0, pvt))
- csrow->nr_pages = amd64_csrow_nr_pages(pvt, 0, i);
+ nr_pages = amd64_csrow_nr_pages(pvt, 0, i);
if (csrow_enabled(i, 1, pvt))
- csrow->nr_pages += amd64_csrow_nr_pages(pvt, 1, i);
- find_csrow_limits(mci, i, &input_addr_min, &input_addr_max);
- sys_addr = input_addr_to_sys_addr(mci, input_addr_min);
- csrow->first_page = (u32) (sys_addr >> PAGE_SHIFT);
- sys_addr = input_addr_to_sys_addr(mci, input_addr_max);
- csrow->last_page = (u32) (sys_addr >> PAGE_SHIFT);
+ nr_pages += amd64_csrow_nr_pages(pvt, 1, i);
get_cs_base_and_mask(pvt, i, 0, &base, &mask);
- csrow->page_mask = ~mask;
/* 8 bytes of resolution */
- csrow->mtype = amd64_determine_memory_type(pvt, i);
+ mtype = amd64_determine_memory_type(pvt, i);
debugf1(" for MC node %d csrow %d:\n", pvt->mc_node_id, i);
- debugf1(" input_addr_min: 0x%lx input_addr_max: 0x%lx\n",
- (unsigned long)input_addr_min,
- (unsigned long)input_addr_max);
- debugf1(" sys_addr: 0x%lx page_mask: 0x%lx\n",
- (unsigned long)sys_addr, csrow->page_mask);
- debugf1(" nr_pages: %u first_page: 0x%lx "
- "last_page: 0x%lx\n",
- (unsigned)csrow->nr_pages,
- csrow->first_page, csrow->last_page);
+ debugf1(" nr_pages: %u\n", nr_pages * pvt->channel_count);
/*
* determine whether CHIPKILL or JUST ECC or NO ECC is operating
*/
if (pvt->nbcfg & NBCFG_ECC_ENABLE)
- csrow->edac_mode =
- (pvt->nbcfg & NBCFG_CHIPKILL) ?
- EDAC_S4ECD4ED : EDAC_SECDED;
+ edac_mode = (pvt->nbcfg & NBCFG_CHIPKILL) ?
+ EDAC_S4ECD4ED : EDAC_SECDED;
else
- csrow->edac_mode = EDAC_NONE;
+ edac_mode = EDAC_NONE;
+
+ for (j = 0; j < pvt->channel_count; j++) {
+ csrow->channels[j].dimm->mtype = mtype;
+ csrow->channels[j].dimm->edac_mode = edac_mode;
+ csrow->channels[j].dimm->nr_pages = nr_pages;
+ }
}
return empty;
@@ -2540,6 +2555,7 @@ static int amd64_init_one_instance(struct pci_dev *F2)
struct amd64_pvt *pvt = NULL;
struct amd64_family_type *fam_type = NULL;
struct mem_ctl_info *mci = NULL;
+ struct edac_mc_layer layers[2];
int err = 0, ret;
u8 nid = get_node_id(F2);
@@ -2574,7 +2590,13 @@ static int amd64_init_one_instance(struct pci_dev *F2)
goto err_siblings;
ret = -ENOMEM;
- mci = edac_mc_alloc(0, pvt->csels[0].b_cnt, pvt->channel_count, nid);
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = pvt->csels[0].b_cnt;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = pvt->channel_count;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(nid, ARRAY_SIZE(layers), layers, 0);
if (!mci)
goto err_siblings;
diff --git a/drivers/edac/amd76x_edac.c b/drivers/edac/amd76x_edac.c
index f8fd3c8..9774d44 100644
--- a/drivers/edac/amd76x_edac.c
+++ b/drivers/edac/amd76x_edac.c
@@ -29,7 +29,6 @@
edac_mc_chipset_printk(mci, level, "amd76x", fmt, ##arg)
#define AMD76X_NR_CSROWS 8
-#define AMD76X_NR_CHANS 1
#define AMD76X_NR_DIMMS 4
/* AMD 76x register addresses - device 0 function 0 - PCI bridge */
@@ -146,8 +145,10 @@ static int amd76x_process_error_info(struct mem_ctl_info *mci,
if (handle_errors) {
row = (info->ecc_mode_status >> 4) & 0xf;
- edac_mc_handle_ue(mci, mci->csrows[row].first_page, 0,
- row, mci->ctl_name);
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+ mci->csrows[row].first_page, 0, 0,
+ row, 0, -1,
+ mci->ctl_name, "", NULL);
}
}
@@ -159,8 +160,10 @@ static int amd76x_process_error_info(struct mem_ctl_info *mci,
if (handle_errors) {
row = info->ecc_mode_status & 0xf;
- edac_mc_handle_ce(mci, mci->csrows[row].first_page, 0,
- 0, row, 0, mci->ctl_name);
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+ mci->csrows[row].first_page, 0, 0,
+ row, 0, -1,
+ mci->ctl_name, "", NULL);
}
}
@@ -186,11 +189,13 @@ static void amd76x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
enum edac_type edac_mode)
{
struct csrow_info *csrow;
+ struct dimm_info *dimm;
u32 mba, mba_base, mba_mask, dms;
int index;
for (index = 0; index < mci->nr_csrows; index++) {
csrow = &mci->csrows[index];
+ dimm = csrow->channels[0].dimm;
/* find the DRAM Chip Select Base address and mask */
pci_read_config_dword(pdev,
@@ -203,13 +208,13 @@ static void amd76x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
mba_mask = ((mba & 0xff80) << 16) | 0x7fffffUL;
pci_read_config_dword(pdev, AMD76X_DRAM_MODE_STATUS, &dms);
csrow->first_page = mba_base >> PAGE_SHIFT;
- csrow->nr_pages = (mba_mask + 1) >> PAGE_SHIFT;
- csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
+ dimm->nr_pages = (mba_mask + 1) >> PAGE_SHIFT;
+ csrow->last_page = csrow->first_page + dimm->nr_pages - 1;
csrow->page_mask = mba_mask >> PAGE_SHIFT;
- csrow->grain = csrow->nr_pages << PAGE_SHIFT;
- csrow->mtype = MEM_RDDR;
- csrow->dtype = ((dms >> index) & 0x1) ? DEV_X4 : DEV_UNKNOWN;
- csrow->edac_mode = edac_mode;
+ dimm->grain = dimm->nr_pages << PAGE_SHIFT;
+ dimm->mtype = MEM_RDDR;
+ dimm->dtype = ((dms >> index) & 0x1) ? DEV_X4 : DEV_UNKNOWN;
+ dimm->edac_mode = edac_mode;
}
}
@@ -230,7 +235,8 @@ static int amd76x_probe1(struct pci_dev *pdev, int dev_idx)
EDAC_SECDED,
EDAC_SECDED
};
- struct mem_ctl_info *mci = NULL;
+ struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[2];
u32 ems;
u32 ems_mode;
struct amd76x_error_info discard;
@@ -238,11 +244,17 @@ static int amd76x_probe1(struct pci_dev *pdev, int dev_idx)
debugf0("%s()\n", __func__);
pci_read_config_dword(pdev, AMD76X_ECC_MODE_STATUS, &ems);
ems_mode = (ems >> 10) & 0x3;
- mci = edac_mc_alloc(0, AMD76X_NR_CSROWS, AMD76X_NR_CHANS, 0);
- if (mci == NULL) {
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = AMD76X_NR_CSROWS;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = 1;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0);
+
+ if (mci == NULL)
return -ENOMEM;
- }
debugf0("%s(): mci = %p\n", __func__, mci);
mci->dev = &pdev->dev;
diff --git a/drivers/edac/cell_edac.c b/drivers/edac/cell_edac.c
index 9a6a274..69ee6aa 100644
--- a/drivers/edac/cell_edac.c
+++ b/drivers/edac/cell_edac.c
@@ -48,8 +48,9 @@ static void cell_edac_count_ce(struct mem_ctl_info *mci, int chan, u64 ar)
syndrome = (ar & 0x000000001fe00000ul) >> 21;
/* TODO: Decoding of the error address */
- edac_mc_handle_ce(mci, csrow->first_page + pfn, offset,
- syndrome, 0, chan, "");
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+ csrow->first_page + pfn, offset, syndrome,
+ 0, chan, -1, "", "", NULL);
}
static void cell_edac_count_ue(struct mem_ctl_info *mci, int chan, u64 ar)
@@ -69,7 +70,9 @@ static void cell_edac_count_ue(struct mem_ctl_info *mci, int chan, u64 ar)
offset = address & ~PAGE_MASK;
/* TODO: Decoding of the error address */
- edac_mc_handle_ue(mci, csrow->first_page + pfn, offset, 0, "");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+ csrow->first_page + pfn, offset, 0,
+ 0, chan, -1, "", "", NULL);
}
static void cell_edac_check(struct mem_ctl_info *mci)
@@ -124,8 +127,11 @@ static void cell_edac_check(struct mem_ctl_info *mci)
static void __devinit cell_edac_init_csrows(struct mem_ctl_info *mci)
{
struct csrow_info *csrow = &mci->csrows[0];
+ struct dimm_info *dimm;
struct cell_edac_priv *priv = mci->pvt_info;
struct device_node *np;
+ int j;
+ u32 nr_pages;
for (np = NULL;
(np = of_find_node_by_name(np, "memory")) != NULL;) {
@@ -140,15 +146,20 @@ static void __devinit cell_edac_init_csrows(struct mem_ctl_info *mci)
if (of_node_to_nid(np) != priv->node)
continue;
csrow->first_page = r.start >> PAGE_SHIFT;
- csrow->nr_pages = resource_size(&r) >> PAGE_SHIFT;
- csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
- csrow->mtype = MEM_XDR;
- csrow->edac_mode = EDAC_SECDED;
+ nr_pages = resource_size(&r) >> PAGE_SHIFT;
+ csrow->last_page = csrow->first_page + nr_pages - 1;
+
+ for (j = 0; j < csrow->nr_channels; j++) {
+ dimm = csrow->channels[j].dimm;
+ dimm->mtype = MEM_XDR;
+ dimm->edac_mode = EDAC_SECDED;
+ dimm->nr_pages = nr_pages / csrow->nr_channels;
+ }
dev_dbg(mci->dev,
"Initialized on node %d, chanmask=0x%x,"
" first_page=0x%lx, nr_pages=0x%x\n",
priv->node, priv->chanmask,
- csrow->first_page, csrow->nr_pages);
+ csrow->first_page, nr_pages);
break;
}
}
@@ -157,9 +168,10 @@ static int __devinit cell_edac_probe(struct platform_device *pdev)
{
struct cbe_mic_tm_regs __iomem *regs;
struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[2];
struct cell_edac_priv *priv;
u64 reg;
- int rc, chanmask;
+ int rc, chanmask, num_chans;
regs = cbe_get_cpu_mic_tm_regs(cbe_node_to_cpu(pdev->id));
if (regs == NULL)
@@ -184,8 +196,16 @@ static int __devinit cell_edac_probe(struct platform_device *pdev)
in_be64(&regs->mic_fir));
/* Allocate & init EDAC MC data structure */
- mci = edac_mc_alloc(sizeof(struct cell_edac_priv), 1,
- chanmask == 3 ? 2 : 1, pdev->id);
+ num_chans = chanmask == 3 ? 2 : 1;
+
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = 1;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = num_chans;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(pdev->id, ARRAY_SIZE(layers), layers,
+ sizeof(struct cell_edac_priv));
if (mci == NULL)
return -ENOMEM;
priv = mci->pvt_info;
diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c
index a774c0d..e22030a 100644
--- a/drivers/edac/cpc925_edac.c
+++ b/drivers/edac/cpc925_edac.c
@@ -329,9 +329,10 @@ static void cpc925_init_csrows(struct mem_ctl_info *mci)
{
struct cpc925_mc_pdata *pdata = mci->pvt_info;
struct csrow_info *csrow;
- int index;
+ struct dimm_info *dimm;
+ int index, j;
u32 mbmr, mbbar, bba;
- unsigned long row_size, last_nr_pages = 0;
+ unsigned long row_size, nr_pages, last_nr_pages = 0;
get_total_mem(pdata);
@@ -350,36 +351,41 @@ static void cpc925_init_csrows(struct mem_ctl_info *mci)
row_size = bba * (1UL << 28); /* 256M */
csrow->first_page = last_nr_pages;
- csrow->nr_pages = row_size >> PAGE_SHIFT;
- csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
+ nr_pages = row_size >> PAGE_SHIFT;
+ csrow->last_page = csrow->first_page + nr_pages - 1;
last_nr_pages = csrow->last_page + 1;
- csrow->mtype = MEM_RDDR;
- csrow->edac_mode = EDAC_SECDED;
-
- switch (csrow->nr_channels) {
- case 1: /* Single channel */
- csrow->grain = 32; /* four-beat burst of 32 bytes */
- break;
- case 2: /* Dual channel */
- default:
- csrow->grain = 64; /* four-beat burst of 64 bytes */
- break;
- }
-
- switch ((mbmr & MBMR_MODE_MASK) >> MBMR_MODE_SHIFT) {
- case 6: /* 0110, no way to differentiate X8 VS X16 */
- case 5: /* 0101 */
- case 8: /* 1000 */
- csrow->dtype = DEV_X16;
- break;
- case 7: /* 0111 */
- case 9: /* 1001 */
- csrow->dtype = DEV_X8;
- break;
- default:
- csrow->dtype = DEV_UNKNOWN;
- break;
+ for (j = 0; j < csrow->nr_channels; j++) {
+ dimm = csrow->channels[j].dimm;
+
+ dimm->nr_pages = nr_pages / csrow->nr_channels;
+ dimm->mtype = MEM_RDDR;
+ dimm->edac_mode = EDAC_SECDED;
+
+ switch (csrow->nr_channels) {
+ case 1: /* Single channel */
+ dimm->grain = 32; /* four-beat burst of 32 bytes */
+ break;
+ case 2: /* Dual channel */
+ default:
+ dimm->grain = 64; /* four-beat burst of 64 bytes */
+ break;
+ }
+
+ switch ((mbmr & MBMR_MODE_MASK) >> MBMR_MODE_SHIFT) {
+ case 6: /* 0110, no way to differentiate X8 VS X16 */
+ case 5: /* 0101 */
+ case 8: /* 1000 */
+ dimm->dtype = DEV_X16;
+ break;
+ case 7: /* 0111 */
+ case 9: /* 1001 */
+ dimm->dtype = DEV_X8;
+ break;
+ default:
+ dimm->dtype = DEV_UNKNOWN;
+ break;
+ }
}
}
}
@@ -549,13 +555,18 @@ static void cpc925_mc_check(struct mem_ctl_info *mci)
if (apiexcp & CECC_EXCP_DETECTED) {
cpc925_mc_printk(mci, KERN_INFO, "DRAM CECC Fault\n");
channel = cpc925_mc_find_channel(mci, syndrome);
- edac_mc_handle_ce(mci, pfn, offset, syndrome,
- csrow, channel, mci->ctl_name);
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+ pfn, offset, syndrome,
+ csrow, channel, -1,
+ mci->ctl_name, "", NULL);
}
if (apiexcp & UECC_EXCP_DETECTED) {
cpc925_mc_printk(mci, KERN_INFO, "DRAM UECC Fault\n");
- edac_mc_handle_ue(mci, pfn, offset, csrow, mci->ctl_name);
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+ pfn, offset, 0,
+ csrow, -1, -1,
+ mci->ctl_name, "", NULL);
}
cpc925_mc_printk(mci, KERN_INFO, "Dump registers:\n");
@@ -927,6 +938,7 @@ static int __devinit cpc925_probe(struct platform_device *pdev)
{
static int edac_mc_idx;
struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[2];
void __iomem *vbase;
struct cpc925_mc_pdata *pdata;
struct resource *r;
@@ -962,9 +974,16 @@ static int __devinit cpc925_probe(struct platform_device *pdev)
goto err2;
}
- nr_channels = cpc925_mc_get_channels(vbase);
- mci = edac_mc_alloc(sizeof(struct cpc925_mc_pdata),
- CPC925_NR_CSROWS, nr_channels + 1, edac_mc_idx);
+ nr_channels = cpc925_mc_get_channels(vbase) + 1;
+
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = CPC925_NR_CSROWS;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = nr_channels;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(edac_mc_idx, ARRAY_SIZE(layers), layers,
+ sizeof(struct cpc925_mc_pdata));
if (!mci) {
cpc925_printk(KERN_ERR, "No memory for mem_ctl_info\n");
res = -ENOMEM;
diff --git a/drivers/edac/e752x_edac.c b/drivers/edac/e752x_edac.c
index 4122326..3186512 100644
--- a/drivers/edac/e752x_edac.c
+++ b/drivers/edac/e752x_edac.c
@@ -4,7 +4,11 @@
* This file may be distributed under the terms of the
* GNU General Public License.
*
- * See "enum e752x_chips" below for supported chipsets
+ * Implement support for the e7520, E7525, e7320 and i3100 memory controllers.
+ *
+ * Datasheets:
+ * http://www.intel.in/content/www/in/en/chipsets/e7525-memory-controller-hub-datasheet.html
+ * ftp://download.intel.com/design/intarch/datashts/31345803.pdf
*
* Written by Tom Zimmerman
*
@@ -13,8 +17,6 @@
* Wang Zhenyu at intel.com
* Dave Jiang at mvista.com
*
- * $Id: edac_e752x.c,v 1.5.2.11 2005/10/05 00:43:44 dsp_llnl Exp $
- *
*/
#include <linux/module.h>
@@ -187,6 +189,25 @@ enum e752x_chips {
I3100 = 3
};
+/*
+ * Those chips Support single-rank and dual-rank memories only.
+ *
+ * On e752x chips, the odd rows are present only on dual-rank memories.
+ * Dividing the rank by two will provide the dimm#
+ *
+ * i3100 MC has a different mapping: it supports only 4 ranks.
+ *
+ * The mapping is (from 1 to n):
+ * slot single-ranked double-ranked
+ * dimm #1 -> rank #4 NA
+ * dimm #2 -> rank #3 NA
+ * dimm #3 -> rank #2 Ranks 2 and 3
+ * dimm #4 -> rank $1 Ranks 1 and 4
+ *
+ * FIXME: The current mapping for i3100 considers that it supports up to 8
+ * ranks/chanel, but datasheet says that the MC supports only 4 ranks.
+ */
+
struct e752x_pvt {
struct pci_dev *bridge_ck;
struct pci_dev *dev_d0f0;
@@ -350,8 +371,10 @@ static void do_process_ce(struct mem_ctl_info *mci, u16 error_one,
channel = !(error_one & 1);
/* e752x mc reads 34:6 of the DRAM linear address */
- edac_mc_handle_ce(mci, page, offset_in_page(sec1_add << 4),
- sec1_syndrome, row, channel, "e752x CE");
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+ page, offset_in_page(sec1_add << 4), sec1_syndrome,
+ row, channel, -1,
+ "e752x CE", "", NULL);
}
static inline void process_ce(struct mem_ctl_info *mci, u16 error_one,
@@ -385,9 +408,12 @@ static void do_process_ue(struct mem_ctl_info *mci, u16 error_one,
edac_mc_find_csrow_by_page(mci, block_page);
/* e752x mc reads 34:6 of the DRAM linear address */
- edac_mc_handle_ue(mci, block_page,
- offset_in_page(error_2b << 4),
- row, "e752x UE from Read");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+ block_page,
+ offset_in_page(error_2b << 4), 0,
+ row, -1, -1,
+ "e752x UE from Read", "", NULL);
+
}
if (error_one & 0x0404) {
error_2b = scrb_add;
@@ -401,9 +427,11 @@ static void do_process_ue(struct mem_ctl_info *mci, u16 error_one,
edac_mc_find_csrow_by_page(mci, block_page);
/* e752x mc reads 34:6 of the DRAM linear address */
- edac_mc_handle_ue(mci, block_page,
- offset_in_page(error_2b << 4),
- row, "e752x UE from Scruber");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+ block_page,
+ offset_in_page(error_2b << 4), 0,
+ row, -1, -1,
+ "e752x UE from Scruber", "", NULL);
}
}
@@ -426,7 +454,9 @@ static inline void process_ue_no_info_wr(struct mem_ctl_info *mci,
return;
debugf3("%s()\n", __func__);
- edac_mc_handle_ue_no_info(mci, "e752x UE log memory write");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 0, 0, 0,
+ -1, -1, -1,
+ "e752x UE log memory write", "", NULL);
}
static void do_process_ded_retry(struct mem_ctl_info *mci, u16 error,
@@ -1044,7 +1074,7 @@ static void e752x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
int drc_drbg; /* DRB granularity 0=64mb, 1=128mb */
int drc_ddim; /* DRAM Data Integrity Mode 0=none, 2=edac */
u8 value;
- u32 dra, drc, cumul_size;
+ u32 dra, drc, cumul_size, i, nr_pages;
dra = 0;
for (index = 0; index < 4; index++) {
@@ -1053,7 +1083,7 @@ static void e752x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
dra |= dra_reg << (index * 8);
}
pci_read_config_dword(pdev, E752X_DRC, &drc);
- drc_chan = dual_channel_active(ddrcsr);
+ drc_chan = dual_channel_active(ddrcsr) ? 1 : 0;
drc_drbg = drc_chan + 1; /* 128 in dual mode, 64 in single */
drc_ddim = (drc >> 20) & 0x3;
@@ -1078,26 +1108,33 @@ static void e752x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
csrow->first_page = last_cumul_size;
csrow->last_page = cumul_size - 1;
- csrow->nr_pages = cumul_size - last_cumul_size;
+ nr_pages = cumul_size - last_cumul_size;
last_cumul_size = cumul_size;
- csrow->grain = 1 << 12; /* 4KiB - resolution of CELOG */
- csrow->mtype = MEM_RDDR; /* only one type supported */
- csrow->dtype = mem_dev ? DEV_X4 : DEV_X8;
-
- /*
- * if single channel or x8 devices then SECDED
- * if dual channel and x4 then S4ECD4ED
- */
- if (drc_ddim) {
- if (drc_chan && mem_dev) {
- csrow->edac_mode = EDAC_S4ECD4ED;
- mci->edac_cap |= EDAC_FLAG_S4ECD4ED;
- } else {
- csrow->edac_mode = EDAC_SECDED;
- mci->edac_cap |= EDAC_FLAG_SECDED;
- }
- } else
- csrow->edac_mode = EDAC_NONE;
+
+ for (i = 0; i < csrow->nr_channels; i++) {
+ struct dimm_info *dimm = csrow->channels[i].dimm;
+
+ debugf3("Initializing rank at (%i,%i)\n", index, i);
+ dimm->nr_pages = nr_pages / csrow->nr_channels;
+ dimm->grain = 1 << 12; /* 4KiB - resolution of CELOG */
+ dimm->mtype = MEM_RDDR; /* only one type supported */
+ dimm->dtype = mem_dev ? DEV_X4 : DEV_X8;
+
+ /*
+ * if single channel or x8 devices then SECDED
+ * if dual channel and x4 then S4ECD4ED
+ */
+ if (drc_ddim) {
+ if (drc_chan && mem_dev) {
+ dimm->edac_mode = EDAC_S4ECD4ED;
+ mci->edac_cap |= EDAC_FLAG_S4ECD4ED;
+ } else {
+ dimm->edac_mode = EDAC_SECDED;
+ mci->edac_cap |= EDAC_FLAG_SECDED;
+ }
+ } else
+ dimm->edac_mode = EDAC_NONE;
+ }
}
}
@@ -1226,6 +1263,7 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx)
u16 pci_data;
u8 stat8;
struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[2];
struct e752x_pvt *pvt;
u16 ddrcsr;
int drc_chan; /* Number of channels 0=1chan,1=2chan */
@@ -1252,11 +1290,15 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx)
/* Dual channel = 1, Single channel = 0 */
drc_chan = dual_channel_active(ddrcsr);
- mci = edac_mc_alloc(sizeof(*pvt), E752X_NR_CSROWS, drc_chan + 1, 0);
-
- if (mci == NULL) {
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = E752X_NR_CSROWS;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = drc_chan + 1;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
+ if (mci == NULL)
return -ENOMEM;
- }
debugf3("%s(): init mci\n", __func__);
mci->mtype_cap = MEM_FLAG_RDDR;
diff --git a/drivers/edac/e7xxx_edac.c b/drivers/edac/e7xxx_edac.c
index 68dea87..9a9c1a5 100644
--- a/drivers/edac/e7xxx_edac.c
+++ b/drivers/edac/e7xxx_edac.c
@@ -10,6 +10,9 @@
* Based on work by Dan Hollis <goemon at anime dot net> and others.
* http://www.anime.net/~goemon/linux-ecc/
*
+ * Datasheet:
+ * http://www.intel.com/content/www/us/en/chipsets/e7501-chipset-memory-controller-hub-datasheet.html
+ *
* Contributors:
* Eric Biederman (Linux Networx)
* Tom Zimmerman (Linux Networx)
@@ -71,7 +74,7 @@
#endif /* PCI_DEVICE_ID_INTEL_7505_1_ERR */
#define E7XXX_NR_CSROWS 8 /* number of csrows */
-#define E7XXX_NR_DIMMS 8 /* FIXME - is this correct? */
+#define E7XXX_NR_DIMMS 8 /* 2 channels, 4 dimms/channel */
/* E7XXX register addresses - device 0 function 0 */
#define E7XXX_DRB 0x60 /* DRAM row boundary register (8b) */
@@ -216,13 +219,15 @@ static void process_ce(struct mem_ctl_info *mci, struct e7xxx_error_info *info)
row = edac_mc_find_csrow_by_page(mci, page);
/* convert syndrome to channel */
channel = e7xxx_find_channel(syndrome);
- edac_mc_handle_ce(mci, page, 0, syndrome, row, channel, "e7xxx CE");
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, page, 0, syndrome,
+ row, channel, -1, "e7xxx CE", "", NULL);
}
static void process_ce_no_info(struct mem_ctl_info *mci)
{
debugf3("%s()\n", __func__);
- edac_mc_handle_ce_no_info(mci, "e7xxx CE log register overflow");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 0, 0, 0, -1, -1, -1,
+ "e7xxx CE log register overflow", "", NULL);
}
static void process_ue(struct mem_ctl_info *mci, struct e7xxx_error_info *info)
@@ -236,13 +241,17 @@ static void process_ue(struct mem_ctl_info *mci, struct e7xxx_error_info *info)
/* FIXME - should use PAGE_SHIFT */
block_page = error_2b >> 6; /* convert to 4k address */
row = edac_mc_find_csrow_by_page(mci, block_page);
- edac_mc_handle_ue(mci, block_page, 0, row, "e7xxx UE");
+
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, block_page, 0, 0,
+ row, -1, -1, "e7xxx UE", "", NULL);
}
static void process_ue_no_info(struct mem_ctl_info *mci)
{
debugf3("%s()\n", __func__);
- edac_mc_handle_ue_no_info(mci, "e7xxx UE log register overflow");
+
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 0, 0, 0, -1, -1, -1,
+ "e7xxx UE log register overflow", "", NULL);
}
static void e7xxx_get_error_info(struct mem_ctl_info *mci,
@@ -347,11 +356,12 @@ static void e7xxx_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
int dev_idx, u32 drc)
{
unsigned long last_cumul_size;
- int index;
+ int index, j;
u8 value;
- u32 dra, cumul_size;
+ u32 dra, cumul_size, nr_pages;
int drc_chan, drc_drbg, drc_ddim, mem_dev;
struct csrow_info *csrow;
+ struct dimm_info *dimm;
pci_read_config_dword(pdev, E7XXX_DRA, &dra);
drc_chan = dual_channel_active(drc, dev_idx);
@@ -379,26 +389,32 @@ static void e7xxx_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
csrow->first_page = last_cumul_size;
csrow->last_page = cumul_size - 1;
- csrow->nr_pages = cumul_size - last_cumul_size;
+ nr_pages = cumul_size - last_cumul_size;
last_cumul_size = cumul_size;
- csrow->grain = 1 << 12; /* 4KiB - resolution of CELOG */
- csrow->mtype = MEM_RDDR; /* only one type supported */
- csrow->dtype = mem_dev ? DEV_X4 : DEV_X8;
-
- /*
- * if single channel or x8 devices then SECDED
- * if dual channel and x4 then S4ECD4ED
- */
- if (drc_ddim) {
- if (drc_chan && mem_dev) {
- csrow->edac_mode = EDAC_S4ECD4ED;
- mci->edac_cap |= EDAC_FLAG_S4ECD4ED;
- } else {
- csrow->edac_mode = EDAC_SECDED;
- mci->edac_cap |= EDAC_FLAG_SECDED;
- }
- } else
- csrow->edac_mode = EDAC_NONE;
+
+ for (j = 0; j < drc_chan + 1; j++) {
+ dimm = csrow->channels[j].dimm;
+
+ dimm->nr_pages = nr_pages / (drc_chan + 1);
+ dimm->grain = 1 << 12; /* 4KiB - resolution of CELOG */
+ dimm->mtype = MEM_RDDR; /* only one type supported */
+ dimm->dtype = mem_dev ? DEV_X4 : DEV_X8;
+
+ /*
+ * if single channel or x8 devices then SECDED
+ * if dual channel and x4 then S4ECD4ED
+ */
+ if (drc_ddim) {
+ if (drc_chan && mem_dev) {
+ dimm->edac_mode = EDAC_S4ECD4ED;
+ mci->edac_cap |= EDAC_FLAG_S4ECD4ED;
+ } else {
+ dimm->edac_mode = EDAC_SECDED;
+ mci->edac_cap |= EDAC_FLAG_SECDED;
+ }
+ } else
+ dimm->edac_mode = EDAC_NONE;
+ }
}
}
@@ -406,6 +422,7 @@ static int e7xxx_probe1(struct pci_dev *pdev, int dev_idx)
{
u16 pci_data;
struct mem_ctl_info *mci = NULL;
+ struct edac_mc_layer layers[2];
struct e7xxx_pvt *pvt = NULL;
u32 drc;
int drc_chan;
@@ -416,8 +433,21 @@ static int e7xxx_probe1(struct pci_dev *pdev, int dev_idx)
pci_read_config_dword(pdev, E7XXX_DRC, &drc);
drc_chan = dual_channel_active(drc, dev_idx);
- mci = edac_mc_alloc(sizeof(*pvt), E7XXX_NR_CSROWS, drc_chan + 1, 0);
-
+ /*
+ * According with the datasheet, this device has a maximum of
+ * 4 DIMMS per channel, either single-rank or dual-rank. So, the
+ * total amount of dimms is 8 (E7XXX_NR_DIMMS).
+ * That means that the DIMM is mapped as CSROWs, and the channel
+ * will map the rank. So, an error to either channel should be
+ * attributed to the same dimm.
+ */
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = E7XXX_NR_CSROWS;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = drc_chan + 1;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
if (mci == NULL)
return -ENOMEM;
diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h
index 5b73941..117490d 100644
--- a/drivers/edac/edac_core.h
+++ b/drivers/edac/edac_core.h
@@ -447,8 +447,10 @@ static inline void pci_write_bits32(struct pci_dev *pdev, int offset,
#endif /* CONFIG_PCI */
-extern struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
- unsigned nr_chans, int edac_index);
+struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
+ unsigned n_layers,
+ struct edac_mc_layer *layers,
+ unsigned sz_pvt);
extern int edac_mc_add_mc(struct mem_ctl_info *mci);
extern void edac_mc_free(struct mem_ctl_info *mci);
extern struct mem_ctl_info *edac_mc_find(int idx);
@@ -456,35 +458,17 @@ extern struct mem_ctl_info *find_mci_by_dev(struct device *dev);
extern struct mem_ctl_info *edac_mc_del_mc(struct device *dev);
extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
unsigned long page);
-
-/*
- * The no info errors are used when error overflows are reported.
- * There are a limited number of error logging registers that can
- * be exausted. When all registers are exhausted and an additional
- * error occurs then an error overflow register records that an
- * error occurred and the type of error, but doesn't have any
- * further information. The ce/ue versions make for cleaner
- * reporting logic and function interface - reduces conditional
- * statement clutter and extra function arguments.
- */
-extern void edac_mc_handle_ce(struct mem_ctl_info *mci,
- unsigned long page_frame_number,
- unsigned long offset_in_page,
- unsigned long syndrome, int row, int channel,
- const char *msg);
-extern void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci,
- const char *msg);
-extern void edac_mc_handle_ue(struct mem_ctl_info *mci,
- unsigned long page_frame_number,
- unsigned long offset_in_page, int row,
- const char *msg);
-extern void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci,
- const char *msg);
-extern void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci, unsigned int csrow,
- unsigned int channel0, unsigned int channel1,
- char *msg);
-extern void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci, unsigned int csrow,
- unsigned int channel, char *msg);
+void edac_mc_handle_error(const enum hw_event_mc_err_type type,
+ struct mem_ctl_info *mci,
+ const unsigned long page_frame_number,
+ const unsigned long offset_in_page,
+ const unsigned long syndrome,
+ const int layer0,
+ const int layer1,
+ const int layer2,
+ const char *msg,
+ const char *other_detail,
+ const void *mcelog);
/*
* edac_device APIs
@@ -496,6 +480,7 @@ extern void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
int inst_nr, int block_nr, const char *msg);
extern int edac_device_alloc_index(void);
+extern const char *edac_layer_name[];
/*
* edac_pci APIs
diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c
index 45b8f4b..ee3f1f8 100644
--- a/drivers/edac/edac_device.c
+++ b/drivers/edac/edac_device.c
@@ -79,7 +79,7 @@ struct edac_device_ctl_info *edac_device_alloc_ctl_info(
unsigned total_size;
unsigned count;
unsigned instance, block, attr;
- void *pvt;
+ void *pvt, *p;
int err;
debugf4("%s() instances=%d blocks=%d\n",
@@ -92,35 +92,30 @@ struct edac_device_ctl_info *edac_device_alloc_ctl_info(
* to be at least as stringent as what the compiler would
* provide if we could simply hardcode everything into a single struct.
*/
- dev_ctl = (struct edac_device_ctl_info *)NULL;
+ p = NULL;
+ dev_ctl = edac_align_ptr(&p, sizeof(*dev_ctl), 1);
/* Calc the 'end' offset past end of ONE ctl_info structure
* which will become the start of the 'instance' array
*/
- dev_inst = edac_align_ptr(&dev_ctl[1], sizeof(*dev_inst));
+ dev_inst = edac_align_ptr(&p, sizeof(*dev_inst), nr_instances);
/* Calc the 'end' offset past the instance array within the ctl_info
* which will become the start of the block array
*/
- dev_blk = edac_align_ptr(&dev_inst[nr_instances], sizeof(*dev_blk));
+ count = nr_instances * nr_blocks;
+ dev_blk = edac_align_ptr(&p, sizeof(*dev_blk), count);
/* Calc the 'end' offset past the dev_blk array
* which will become the start of the attrib array, if any.
*/
- count = nr_instances * nr_blocks;
- dev_attrib = edac_align_ptr(&dev_blk[count], sizeof(*dev_attrib));
-
- /* Check for case of when an attribute array is specified */
- if (nr_attrib > 0) {
- /* calc how many nr_attrib we need */
+ /* calc how many nr_attrib we need */
+ if (nr_attrib > 0)
count *= nr_attrib;
+ dev_attrib = edac_align_ptr(&p, sizeof(*dev_attrib), count);
- /* Calc the 'end' offset past the attributes array */
- pvt = edac_align_ptr(&dev_attrib[count], sz_private);
- } else {
- /* no attribute array specified */
- pvt = edac_align_ptr(dev_attrib, sz_private);
- }
+ /* Calc the 'end' offset past the attributes array */
+ pvt = edac_align_ptr(&p, sz_private, 1);
/* 'pvt' now points to where the private data area is.
* At this point 'pvt' (like dev_inst,dev_blk and dev_attrib)
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index feef773..10f3750 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -43,9 +43,26 @@ static void edac_mc_dump_channel(struct rank_info *chan)
{
debugf4("\tchannel = %p\n", chan);
debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
- debugf4("\tchannel->ce_count = %d\n", chan->ce_count);
- debugf4("\tchannel->label = '%s'\n", chan->label);
debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
+ debugf4("\tchannel->dimm = %p\n", chan->dimm);
+}
+
+static void edac_mc_dump_dimm(struct dimm_info *dimm)
+{
+ int i;
+
+ debugf4("\tdimm = %p\n", dimm);
+ debugf4("\tdimm->label = '%s'\n", dimm->label);
+ debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
+ debugf4("\tdimm location ");
+ for (i = 0; i < dimm->mci->n_layers; i++) {
+ printk(KERN_CONT "%d", dimm->location[i]);
+ if (i < dimm->mci->n_layers - 1)
+ printk(KERN_CONT ".");
+ }
+ printk(KERN_CONT "\n");
+ debugf4("\tdimm->grain = %d\n", dimm->grain);
+ debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
}
static void edac_mc_dump_csrow(struct csrow_info *csrow)
@@ -55,7 +72,6 @@ static void edac_mc_dump_csrow(struct csrow_info *csrow)
debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
- debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages);
debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
debugf4("\tcsrow->channels = %p\n", csrow->channels);
debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
@@ -70,6 +86,8 @@ static void edac_mc_dump_mci(struct mem_ctl_info *mci)
debugf4("\tmci->edac_check = %p\n", mci->edac_check);
debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
mci->nr_csrows, mci->csrows);
+ debugf3("\tmci->nr_dimms = %d, dimms = %p\n",
+ mci->tot_dimms, mci->dimms);
debugf3("\tdev = %p\n", mci->dev);
debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
@@ -101,18 +119,37 @@ const char *edac_mem_types[] = {
};
EXPORT_SYMBOL_GPL(edac_mem_types);
-/* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'.
- * Adjust 'ptr' so that its alignment is at least as stringent as what the
- * compiler would provide for X and return the aligned result.
+/**
+ * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
+ * @p: pointer to a pointer with the memory offset to be used. At
+ * return, this will be incremented to point to the next offset
+ * @size: Size of the data structure to be reserved
+ * @n_elems: Number of elements that should be reserved
*
* If 'size' is a constant, the compiler will optimize this whole function
- * down to either a no-op or the addition of a constant to the value of 'ptr'.
+ * down to either a no-op or the addition of a constant to the value of '*p'.
+ *
+ * The 'p' pointer is absolutely needed to keep the proper advancing
+ * further in memory to the proper offsets when allocating the struct along
+ * with its embedded structs, as edac_device_alloc_ctl_info() does it
+ * above, for example.
+ *
+ * At return, the pointer 'p' will be incremented to be used on a next call
+ * to this function.
*/
-void *edac_align_ptr(void *ptr, unsigned size)
+void *edac_align_ptr(void **p, unsigned size, int n_elems)
{
unsigned align, r;
+ void *ptr = *p;
+
+ *p += size * n_elems;
- /* Here we assume that the alignment of a "long long" is the most
+ /*
+ * 'p' can possibly be an unaligned item X such that sizeof(X) is
+ * 'size'. Adjust 'p' so that its alignment is at least as
+ * stringent as what the compiler would provide for X and return
+ * the aligned result.
+ * Here we assume that the alignment of a "long long" is the most
* stringent alignment that the compiler will ever provide by default.
* As far as I know, this is a reasonable assumption.
*/
@@ -132,14 +169,18 @@ void *edac_align_ptr(void *ptr, unsigned size)
if (r == 0)
return (char *)ptr;
+ *p += align - r;
+
return (void *)(((unsigned long)ptr) + align - r);
}
/**
- * edac_mc_alloc: Allocate a struct mem_ctl_info structure
- * @size_pvt: size of private storage needed
- * @nr_csrows: Number of CWROWS needed for this MC
- * @nr_chans: Number of channels for the MC
+ * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
+ * @mc_num: Memory controller number
+ * @n_layers: Number of MC hierarchy layers
+ * layers: Describes each layer as seen by the Memory Controller
+ * @size_pvt: size of private storage needed
+ *
*
* Everything is kmalloc'ed as one big chunk - more efficient.
* Only can be used if all structures have the same lifetime - otherwise
@@ -147,32 +188,77 @@ void *edac_align_ptr(void *ptr, unsigned size)
*
* Use edac_mc_free() to free mc structures allocated by this function.
*
+ * NOTE: drivers handle multi-rank memories in different ways: in some
+ * drivers, one multi-rank memory stick is mapped as one entry, while, in
+ * others, a single multi-rank memory stick would be mapped into several
+ * entries. Currently, this function will allocate multiple struct dimm_info
+ * on such scenarios, as grouping the multiple ranks require drivers change.
+ *
* Returns:
- * NULL allocation failed
- * struct mem_ctl_info pointer
+ * On failure: NULL
+ * On success: struct mem_ctl_info pointer
*/
-struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
- unsigned nr_chans, int edac_index)
+struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
+ unsigned n_layers,
+ struct edac_mc_layer *layers,
+ unsigned sz_pvt)
{
struct mem_ctl_info *mci;
- struct csrow_info *csi, *csrow;
+ struct edac_mc_layer *layer;
+ struct csrow_info *csi, *csr;
struct rank_info *chi, *chp, *chan;
- void *pvt;
- unsigned size;
- int row, chn;
- int err;
+ struct dimm_info *dimm;
+ u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
+ unsigned pos[EDAC_MAX_LAYERS];
+ unsigned size, tot_dimms = 1, count = 1;
+ unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
+ void *pvt, *p, *ptr = NULL;
+ int i, j, err, row, chn, n, len;
+ bool per_rank = false;
+
+ BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
+ /*
+ * Calculate the total amount of dimms and csrows/cschannels while
+ * in the old API emulation mode
+ */
+ for (i = 0; i < n_layers; i++) {
+ tot_dimms *= layers[i].size;
+ if (layers[i].is_virt_csrow)
+ tot_csrows *= layers[i].size;
+ else
+ tot_channels *= layers[i].size;
+
+ if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
+ per_rank = true;
+ }
/* Figure out the offsets of the various items from the start of an mc
* structure. We want the alignment of each item to be at least as
* stringent as what the compiler would provide if we could simply
* hardcode everything into a single struct.
*/
- mci = (struct mem_ctl_info *)0;
- csi = edac_align_ptr(&mci[1], sizeof(*csi));
- chi = edac_align_ptr(&csi[nr_csrows], sizeof(*chi));
- pvt = edac_align_ptr(&chi[nr_chans * nr_csrows], sz_pvt);
+ mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
+ layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
+ csi = edac_align_ptr(&ptr, sizeof(*csi), tot_csrows);
+ chi = edac_align_ptr(&ptr, sizeof(*chi), tot_csrows * tot_channels);
+ dimm = edac_align_ptr(&ptr, sizeof(*dimm), tot_dimms);
+ for (i = 0; i < n_layers; i++) {
+ count *= layers[i].size;
+ debugf4("%s: errcount layer %d size %d\n", __func__, i, count);
+ ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
+ ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
+ tot_errcount += 2 * count;
+ }
+
+ debugf4("%s: allocating %d error counters\n", __func__, tot_errcount);
+ pvt = edac_align_ptr(&ptr, sz_pvt, 1);
size = ((unsigned long)pvt) + sz_pvt;
+ debugf1("%s(): allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
+ __func__, size,
+ tot_dimms,
+ per_rank ? "ranks" : "dimms",
+ tot_csrows * tot_channels);
mci = kzalloc(size, GFP_KERNEL);
if (mci == NULL)
return NULL;
@@ -180,28 +266,103 @@ struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
/* Adjust pointers so they point within the memory we just allocated
* rather than an imaginary chunk of memory located at address 0.
*/
+ layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
chi = (struct rank_info *)(((char *)mci) + ((unsigned long)chi));
+ dimm = (struct dimm_info *)(((char *)mci) + ((unsigned long)dimm));
+ for (i = 0; i < n_layers; i++) {
+ mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
+ mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
+ }
pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
/* setup index and various internal pointers */
- mci->mc_idx = edac_index;
+ mci->mc_idx = mc_num;
mci->csrows = csi;
+ mci->dimms = dimm;
+ mci->tot_dimms = tot_dimms;
mci->pvt_info = pvt;
- mci->nr_csrows = nr_csrows;
-
- for (row = 0; row < nr_csrows; row++) {
- csrow = &csi[row];
- csrow->csrow_idx = row;
- csrow->mci = mci;
- csrow->nr_channels = nr_chans;
- chp = &chi[row * nr_chans];
- csrow->channels = chp;
+ mci->n_layers = n_layers;
+ mci->layers = layer;
+ memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
+ mci->nr_csrows = tot_csrows;
+ mci->num_cschannel = tot_channels;
+ mci->mem_is_per_rank = per_rank;
- for (chn = 0; chn < nr_chans; chn++) {
+ /*
+ * Fill the csrow struct
+ */
+ for (row = 0; row < tot_csrows; row++) {
+ csr = &csi[row];
+ csr->csrow_idx = row;
+ csr->mci = mci;
+ csr->nr_channels = tot_channels;
+ chp = &chi[row * tot_channels];
+ csr->channels = chp;
+
+ for (chn = 0; chn < tot_channels; chn++) {
chan = &chp[chn];
chan->chan_idx = chn;
- chan->csrow = csrow;
+ chan->csrow = csr;
+ }
+ }
+
+ /*
+ * Fill the dimm struct
+ */
+ memset(&pos, 0, sizeof(pos));
+ row = 0;
+ chn = 0;
+ debugf4("%s: initializing %d %s\n", __func__, tot_dimms,
+ per_rank ? "ranks" : "dimms");
+ for (i = 0; i < tot_dimms; i++) {
+ chan = &csi[row].channels[chn];
+ dimm = EDAC_DIMM_PTR(layer, mci->dimms, n_layers,
+ pos[0], pos[1], pos[2]);
+ dimm->mci = mci;
+
+ debugf2("%s: %d: %s%zd (%d:%d:%d): row %d, chan %d\n", __func__,
+ i, per_rank ? "rank" : "dimm", (dimm - mci->dimms),
+ pos[0], pos[1], pos[2], row, chn);
+
+ /*
+ * Copy DIMM location and initialize it.
+ */
+ len = sizeof(dimm->label);
+ p = dimm->label;
+ n = snprintf(p, len, "mc#%u", mc_num);
+ p += n;
+ len -= n;
+ for (j = 0; j < n_layers; j++) {
+ n = snprintf(p, len, "%s#%u",
+ edac_layer_name[layers[j].type],
+ pos[j]);
+ p += n;
+ len -= n;
+ dimm->location[j] = pos[j];
+
+ if (len <= 0)
+ break;
+ }
+
+ /* Link it to the csrows old API data */
+ chan->dimm = dimm;
+ dimm->csrow = row;
+ dimm->cschannel = chn;
+
+ /* Increment csrow location */
+ row++;
+ if (row == tot_csrows) {
+ row = 0;
+ chn++;
+ }
+
+ /* Increment dimm location */
+ for (j = n_layers - 1; j >= 0; j--) {
+ pos[j]++;
+ if (pos[j] < layers[j].size)
+ break;
+ pos[j] = 0;
}
}
@@ -490,7 +651,6 @@ EXPORT_SYMBOL(edac_mc_find);
* edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
* create sysfs entries associated with mci structure
* @mci: pointer to the mci structure to be added to the list
- * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure.
*
* Return:
* 0 Success
@@ -517,6 +677,8 @@ int edac_mc_add_mc(struct mem_ctl_info *mci)
edac_mc_dump_channel(&mci->csrows[i].
channels[j]);
}
+ for (i = 0; i < mci->tot_dimms; i++)
+ edac_mc_dump_dimm(&mci->dimms[i]);
}
#endif
mutex_lock(&mem_ctls_mutex);
@@ -636,15 +798,19 @@ static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
{
struct csrow_info *csrows = mci->csrows;
- int row, i;
+ int row, i, j, n;
debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
row = -1;
for (i = 0; i < mci->nr_csrows; i++) {
struct csrow_info *csrow = &csrows[i];
-
- if (csrow->nr_pages == 0)
+ n = 0;
+ for (j = 0; j < csrow->nr_channels; j++) {
+ struct dimm_info *dimm = csrow->channels[j].dimm;
+ n += dimm->nr_pages;
+ }
+ if (n == 0)
continue;
debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
@@ -670,249 +836,307 @@ int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
}
EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
-/* FIXME - setable log (warning/emerg) levels */
-/* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */
-void edac_mc_handle_ce(struct mem_ctl_info *mci,
- unsigned long page_frame_number,
- unsigned long offset_in_page, unsigned long syndrome,
- int row, int channel, const char *msg)
-{
- unsigned long remapped_page;
+const char *edac_layer_name[] = {
+ [EDAC_MC_LAYER_BRANCH] = "branch",
+ [EDAC_MC_LAYER_CHANNEL] = "channel",
+ [EDAC_MC_LAYER_SLOT] = "slot",
+ [EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
+};
+EXPORT_SYMBOL_GPL(edac_layer_name);
- debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
+static void edac_inc_ce_error(struct mem_ctl_info *mci,
+ bool enable_per_layer_report,
+ const int pos[EDAC_MAX_LAYERS])
+{
+ int i, index = 0;
- /* FIXME - maybe make panic on INTERNAL ERROR an option */
- if (row >= mci->nr_csrows || row < 0) {
- /* something is wrong */
- edac_mc_printk(mci, KERN_ERR,
- "INTERNAL ERROR: row out of range "
- "(%d >= %d)\n", row, mci->nr_csrows);
- edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
- return;
- }
+ mci->ce_mc++;
- if (channel >= mci->csrows[row].nr_channels || channel < 0) {
- /* something is wrong */
- edac_mc_printk(mci, KERN_ERR,
- "INTERNAL ERROR: channel out of range "
- "(%d >= %d)\n", channel,
- mci->csrows[row].nr_channels);
- edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
+ if (!enable_per_layer_report) {
+ mci->ce_noinfo_count++;
return;
}
- if (edac_mc_get_log_ce())
- /* FIXME - put in DIMM location */
- edac_mc_printk(mci, KERN_WARNING,
- "CE page 0x%lx, offset 0x%lx, grain %d, syndrome "
- "0x%lx, row %d, channel %d, label \"%s\": %s\n",
- page_frame_number, offset_in_page,
- mci->csrows[row].grain, syndrome, row, channel,
- mci->csrows[row].channels[channel].label, msg);
-
- mci->ce_count++;
- mci->csrows[row].ce_count++;
- mci->csrows[row].channels[channel].ce_count++;
-
- if (mci->scrub_mode & SCRUB_SW_SRC) {
- /*
- * Some MC's can remap memory so that it is still available
- * at a different address when PCI devices map into memory.
- * MC's that can't do this lose the memory where PCI devices
- * are mapped. This mapping is MC dependent and so we call
- * back into the MC driver for it to map the MC page to
- * a physical (CPU) page which can then be mapped to a virtual
- * page - which can then be scrubbed.
- */
- remapped_page = mci->ctl_page_to_phys ?
- mci->ctl_page_to_phys(mci, page_frame_number) :
- page_frame_number;
+ for (i = 0; i < mci->n_layers; i++) {
+ if (pos[i] < 0)
+ break;
+ index += pos[i];
+ mci->ce_per_layer[i][index]++;
- edac_mc_scrub_block(remapped_page, offset_in_page,
- mci->csrows[row].grain);
+ if (i < mci->n_layers - 1)
+ index *= mci->layers[i + 1].size;
}
}
-EXPORT_SYMBOL_GPL(edac_mc_handle_ce);
-void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg)
+static void edac_inc_ue_error(struct mem_ctl_info *mci,
+ bool enable_per_layer_report,
+ const int pos[EDAC_MAX_LAYERS])
{
- if (edac_mc_get_log_ce())
- edac_mc_printk(mci, KERN_WARNING,
- "CE - no information available: %s\n", msg);
+ int i, index = 0;
- mci->ce_noinfo_count++;
- mci->ce_count++;
-}
-EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info);
+ mci->ue_mc++;
-void edac_mc_handle_ue(struct mem_ctl_info *mci,
- unsigned long page_frame_number,
- unsigned long offset_in_page, int row, const char *msg)
-{
- int len = EDAC_MC_LABEL_LEN * 4;
- char labels[len + 1];
- char *pos = labels;
- int chan;
- int chars;
-
- debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
-
- /* FIXME - maybe make panic on INTERNAL ERROR an option */
- if (row >= mci->nr_csrows || row < 0) {
- /* something is wrong */
- edac_mc_printk(mci, KERN_ERR,
- "INTERNAL ERROR: row out of range "
- "(%d >= %d)\n", row, mci->nr_csrows);
- edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
+ if (!enable_per_layer_report) {
+ mci->ce_noinfo_count++;
return;
}
- chars = snprintf(pos, len + 1, "%s",
- mci->csrows[row].channels[0].label);
- len -= chars;
- pos += chars;
+ for (i = 0; i < mci->n_layers; i++) {
+ if (pos[i] < 0)
+ break;
+ index += pos[i];
+ mci->ue_per_layer[i][index]++;
- for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
- chan++) {
- chars = snprintf(pos, len + 1, ":%s",
- mci->csrows[row].channels[chan].label);
- len -= chars;
- pos += chars;
+ if (i < mci->n_layers - 1)
+ index *= mci->layers[i + 1].size;
}
+}
- if (edac_mc_get_log_ue())
- edac_mc_printk(mci, KERN_EMERG,
- "UE page 0x%lx, offset 0x%lx, grain %d, row %d, "
- "labels \"%s\": %s\n", page_frame_number,
- offset_in_page, mci->csrows[row].grain, row,
- labels, msg);
+static void edac_ce_error(struct mem_ctl_info *mci,
+ const int pos[EDAC_MAX_LAYERS],
+ const char *msg,
+ const char *location,
+ const char *label,
+ const char *detail,
+ const char *other_detail,
+ const bool enable_per_layer_report,
+ const unsigned long page_frame_number,
+ const unsigned long offset_in_page,
+ u32 grain)
+{
+ unsigned long remapped_page;
- if (edac_mc_get_panic_on_ue())
- panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, "
- "row %d, labels \"%s\": %s\n", mci->mc_idx,
- page_frame_number, offset_in_page,
- mci->csrows[row].grain, row, labels, msg);
+ if (edac_mc_get_log_ce()) {
+ if (other_detail && *other_detail)
+ edac_mc_printk(mci, KERN_WARNING,
+ "CE %s on %s (%s%s - %s)\n",
+ msg, label, location,
+ detail, other_detail);
+ else
+ edac_mc_printk(mci, KERN_WARNING,
+ "CE %s on %s (%s%s)\n",
+ msg, label, location,
+ detail);
+ }
+ edac_inc_ce_error(mci, enable_per_layer_report, pos);
- mci->ue_count++;
- mci->csrows[row].ue_count++;
+ if (mci->scrub_mode & SCRUB_SW_SRC) {
+ /*
+ * Some memory controllers (called MCs below) can remap
+ * memory so that it is still available at a different
+ * address when PCI devices map into memory.
+ * MC's that can't do this, lose the memory where PCI
+ * devices are mapped. This mapping is MC-dependent
+ * and so we call back into the MC driver for it to
+ * map the MC page to a physical (CPU) page which can
+ * then be mapped to a virtual page - which can then
+ * be scrubbed.
+ */
+ remapped_page = mci->ctl_page_to_phys ?
+ mci->ctl_page_to_phys(mci, page_frame_number) :
+ page_frame_number;
+
+ edac_mc_scrub_block(remapped_page,
+ offset_in_page, grain);
+ }
}
-EXPORT_SYMBOL_GPL(edac_mc_handle_ue);
-void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg)
+static void edac_ue_error(struct mem_ctl_info *mci,
+ const int pos[EDAC_MAX_LAYERS],
+ const char *msg,
+ const char *location,
+ const char *label,
+ const char *detail,
+ const char *other_detail,
+ const bool enable_per_layer_report)
{
- if (edac_mc_get_panic_on_ue())
- panic("EDAC MC%d: Uncorrected Error", mci->mc_idx);
+ if (edac_mc_get_log_ue()) {
+ if (other_detail && *other_detail)
+ edac_mc_printk(mci, KERN_WARNING,
+ "UE %s on %s (%s%s - %s)\n",
+ msg, label, location, detail,
+ other_detail);
+ else
+ edac_mc_printk(mci, KERN_WARNING,
+ "UE %s on %s (%s%s)\n",
+ msg, label, location, detail);
+ }
- if (edac_mc_get_log_ue())
- edac_mc_printk(mci, KERN_WARNING,
- "UE - no information available: %s\n", msg);
- mci->ue_noinfo_count++;
- mci->ue_count++;
+ if (edac_mc_get_panic_on_ue()) {
+ if (other_detail && *other_detail)
+ panic("UE %s on %s (%s%s - %s)\n",
+ msg, label, location, detail, other_detail);
+ else
+ panic("UE %s on %s (%s%s)\n",
+ msg, label, location, detail);
+ }
+
+ edac_inc_ue_error(mci, enable_per_layer_report, pos);
}
-EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info);
-/*************************************************************
- * On Fully Buffered DIMM modules, this help function is
- * called to process UE events
- */
-void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci,
- unsigned int csrow,
- unsigned int channela,
- unsigned int channelb, char *msg)
+#define OTHER_LABEL " or "
+void edac_mc_handle_error(const enum hw_event_mc_err_type type,
+ struct mem_ctl_info *mci,
+ const unsigned long page_frame_number,
+ const unsigned long offset_in_page,
+ const unsigned long syndrome,
+ const int layer0,
+ const int layer1,
+ const int layer2,
+ const char *msg,
+ const char *other_detail,
+ const void *mcelog)
{
- int len = EDAC_MC_LABEL_LEN * 4;
- char labels[len + 1];
- char *pos = labels;
- int chars;
+ /* FIXME: too much for stack: move it to some pre-alocated area */
+ char detail[80], location[80];
+ char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
+ char *p;
+ int row = -1, chan = -1;
+ int pos[EDAC_MAX_LAYERS] = { layer0, layer1, layer2 };
+ int i;
+ u32 grain;
+ bool enable_per_layer_report = false;
- if (csrow >= mci->nr_csrows) {
- /* something is wrong */
- edac_mc_printk(mci, KERN_ERR,
- "INTERNAL ERROR: row out of range (%d >= %d)\n",
- csrow, mci->nr_csrows);
- edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
- return;
- }
+ debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
- if (channela >= mci->csrows[csrow].nr_channels) {
- /* something is wrong */
- edac_mc_printk(mci, KERN_ERR,
- "INTERNAL ERROR: channel-a out of range "
- "(%d >= %d)\n",
- channela, mci->csrows[csrow].nr_channels);
- edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
- return;
+ /*
+ * Check if the event report is consistent and if the memory
+ * location is known. If it is known, enable_per_layer_report will be
+ * true, the DIMM(s) label info will be filled and the per-layer
+ * error counters will be incremented.
+ */
+ for (i = 0; i < mci->n_layers; i++) {
+ if (pos[i] >= (int)mci->layers[i].size) {
+ if (type == HW_EVENT_ERR_CORRECTED)
+ p = "CE";
+ else
+ p = "UE";
+
+ edac_mc_printk(mci, KERN_ERR,
+ "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
+ edac_layer_name[mci->layers[i].type],
+ pos[i], mci->layers[i].size);
+ /*
+ * Instead of just returning it, let's use what's
+ * known about the error. The increment routines and
+ * the DIMM filter logic will do the right thing by
+ * pointing the likely damaged DIMMs.
+ */
+ pos[i] = -1;
+ }
+ if (pos[i] >= 0)
+ enable_per_layer_report = true;
}
- if (channelb >= mci->csrows[csrow].nr_channels) {
- /* something is wrong */
- edac_mc_printk(mci, KERN_ERR,
- "INTERNAL ERROR: channel-b out of range "
- "(%d >= %d)\n",
- channelb, mci->csrows[csrow].nr_channels);
- edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
- return;
- }
+ /*
+ * Get the dimm label/grain that applies to the match criteria.
+ * As the error algorithm may not be able to point to just one memory
+ * stick, the logic here will get all possible labels that could
+ * pottentially be affected by the error.
+ * On FB-DIMM memory controllers, for uncorrected errors, it is common
+ * to have only the MC channel and the MC dimm (also called "branch")
+ * but the channel is not known, as the memory is arranged in pairs,
+ * where each memory belongs to a separate channel within the same
+ * branch.
+ */
+ grain = 0;
+ p = label;
+ *p = '\0';
+ for (i = 0; i < mci->tot_dimms; i++) {
+ struct dimm_info *dimm = &mci->dimms[i];
- mci->ue_count++;
- mci->csrows[csrow].ue_count++;
+ if (layer0 >= 0 && layer0 != dimm->location[0])
+ continue;
+ if (layer1 >= 0 && layer1 != dimm->location[1])
+ continue;
+ if (layer2 >= 0 && layer2 != dimm->location[2])
+ continue;
- /* Generate the DIMM labels from the specified channels */
- chars = snprintf(pos, len + 1, "%s",
- mci->csrows[csrow].channels[channela].label);
- len -= chars;
- pos += chars;
- chars = snprintf(pos, len + 1, "-%s",
- mci->csrows[csrow].channels[channelb].label);
+ /* get the max grain, over the error match range */
+ if (dimm->grain > grain)
+ grain = dimm->grain;
- if (edac_mc_get_log_ue())
- edac_mc_printk(mci, KERN_EMERG,
- "UE row %d, channel-a= %d channel-b= %d "
- "labels \"%s\": %s\n", csrow, channela, channelb,
- labels, msg);
+ /*
+ * If the error is memory-controller wide, there's no need to
+ * seek for the affected DIMMs because the whole
+ * channel/memory controller/... may be affected.
+ * Also, don't show errors for empty DIMM slots.
+ */
+ if (enable_per_layer_report && dimm->nr_pages) {
+ if (p != label) {
+ strcpy(p, OTHER_LABEL);
+ p += strlen(OTHER_LABEL);
+ }
+ strcpy(p, dimm->label);
+ p += strlen(p);
+ *p = '\0';
+
+ /*
+ * get csrow/channel of the DIMM, in order to allow
+ * incrementing the compat API counters
+ */
+ debugf4("%s: %s csrows map: (%d,%d)\n",
+ __func__,
+ mci->mem_is_per_rank ? "rank" : "dimm",
+ dimm->csrow, dimm->cschannel);
+
+ if (row == -1)
+ row = dimm->csrow;
+ else if (row >= 0 && row != dimm->csrow)
+ row = -2;
+
+ if (chan == -1)
+ chan = dimm->cschannel;
+ else if (chan >= 0 && chan != dimm->cschannel)
+ chan = -2;
+ }
+ }
- if (edac_mc_get_panic_on_ue())
- panic("UE row %d, channel-a= %d channel-b= %d "
- "labels \"%s\": %s\n", csrow, channela,
- channelb, labels, msg);
-}
-EXPORT_SYMBOL(edac_mc_handle_fbd_ue);
+ if (!enable_per_layer_report) {
+ strcpy(label, "any memory");
+ } else {
+ debugf4("%s: csrow/channel to increment: (%d,%d)\n",
+ __func__, row, chan);
+ if (p == label)
+ strcpy(label, "unknown memory");
+ if (type == HW_EVENT_ERR_CORRECTED) {
+ if (row >= 0) {
+ mci->csrows[row].ce_count++;
+ if (chan >= 0)
+ mci->csrows[row].channels[chan].ce_count++;
+ }
+ } else
+ if (row >= 0)
+ mci->csrows[row].ue_count++;
+ }
-/*************************************************************
- * On Fully Buffered DIMM modules, this help function is
- * called to process CE events
- */
-void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
- unsigned int csrow, unsigned int channel, char *msg)
-{
+ /* Fill the RAM location data */
+ p = location;
+ for (i = 0; i < mci->n_layers; i++) {
+ if (pos[i] < 0)
+ continue;
- /* Ensure boundary values */
- if (csrow >= mci->nr_csrows) {
- /* something is wrong */
- edac_mc_printk(mci, KERN_ERR,
- "INTERNAL ERROR: row out of range (%d >= %d)\n",
- csrow, mci->nr_csrows);
- edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
- return;
- }
- if (channel >= mci->csrows[csrow].nr_channels) {
- /* something is wrong */
- edac_mc_printk(mci, KERN_ERR,
- "INTERNAL ERROR: channel out of range (%d >= %d)\n",
- channel, mci->csrows[csrow].nr_channels);
- edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
- return;
+ p += sprintf(p, "%s:%d ",
+ edac_layer_name[mci->layers[i].type],
+ pos[i]);
}
- if (edac_mc_get_log_ce())
- /* FIXME - put in DIMM location */
- edac_mc_printk(mci, KERN_WARNING,
- "CE row %d, channel %d, label \"%s\": %s\n",
- csrow, channel,
- mci->csrows[csrow].channels[channel].label, msg);
+ /* Memory type dependent details about the error */
+ if (type == HW_EVENT_ERR_CORRECTED) {
+ snprintf(detail, sizeof(detail),
+ "page:0x%lx offset:0x%lx grain:%d syndrome:0x%lx",
+ page_frame_number, offset_in_page,
+ grain, syndrome);
+ edac_ce_error(mci, pos, msg, location, label, detail,
+ other_detail, enable_per_layer_report,
+ page_frame_number, offset_in_page, grain);
+ } else {
+ snprintf(detail, sizeof(detail),
+ "page:0x%lx offset:0x%lx grain:%d",
+ page_frame_number, offset_in_page, grain);
- mci->ce_count++;
- mci->csrows[csrow].ce_count++;
- mci->csrows[csrow].channels[channel].ce_count++;
+ edac_ue_error(mci, pos, msg, location, label, detail,
+ other_detail, enable_per_layer_report);
+ }
}
-EXPORT_SYMBOL(edac_mc_handle_fbd_ce);
+EXPORT_SYMBOL_GPL(edac_mc_handle_error);
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index e9a28f5..f6a29b0 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -144,25 +144,31 @@ static ssize_t csrow_ce_count_show(struct csrow_info *csrow, char *data,
static ssize_t csrow_size_show(struct csrow_info *csrow, char *data,
int private)
{
- return sprintf(data, "%u\n", PAGES_TO_MiB(csrow->nr_pages));
+ int i;
+ u32 nr_pages = 0;
+
+ for (i = 0; i < csrow->nr_channels; i++)
+ nr_pages += csrow->channels[i].dimm->nr_pages;
+
+ return sprintf(data, "%u\n", PAGES_TO_MiB(nr_pages));
}
static ssize_t csrow_mem_type_show(struct csrow_info *csrow, char *data,
int private)
{
- return sprintf(data, "%s\n", mem_types[csrow->mtype]);
+ return sprintf(data, "%s\n", mem_types[csrow->channels[0].dimm->mtype]);
}
static ssize_t csrow_dev_type_show(struct csrow_info *csrow, char *data,
int private)
{
- return sprintf(data, "%s\n", dev_types[csrow->dtype]);
+ return sprintf(data, "%s\n", dev_types[csrow->channels[0].dimm->dtype]);
}
static ssize_t csrow_edac_mode_show(struct csrow_info *csrow, char *data,
int private)
{
- return sprintf(data, "%s\n", edac_caps[csrow->edac_mode]);
+ return sprintf(data, "%s\n", edac_caps[csrow->channels[0].dimm->edac_mode]);
}
/* show/store functions for DIMM Label attributes */
@@ -170,11 +176,11 @@ static ssize_t channel_dimm_label_show(struct csrow_info *csrow,
char *data, int channel)
{
/* if field has not been initialized, there is nothing to send */
- if (!csrow->channels[channel].label[0])
+ if (!csrow->channels[channel].dimm->label[0])
return 0;
return snprintf(data, EDAC_MC_LABEL_LEN, "%s\n",
- csrow->channels[channel].label);
+ csrow->channels[channel].dimm->label);
}
static ssize_t channel_dimm_label_store(struct csrow_info *csrow,
@@ -184,8 +190,8 @@ static ssize_t channel_dimm_label_store(struct csrow_info *csrow,
ssize_t max_size = 0;
max_size = min((ssize_t) count, (ssize_t) EDAC_MC_LABEL_LEN - 1);
- strncpy(csrow->channels[channel].label, data, max_size);
- csrow->channels[channel].label[max_size] = '\0';
+ strncpy(csrow->channels[channel].dimm->label, data, max_size);
+ csrow->channels[channel].dimm->label[max_size] = '\0';
return max_size;
}
@@ -419,8 +425,8 @@ static ssize_t mci_reset_counters_store(struct mem_ctl_info *mci,
mci->ue_noinfo_count = 0;
mci->ce_noinfo_count = 0;
- mci->ue_count = 0;
- mci->ce_count = 0;
+ mci->ue_mc = 0;
+ mci->ce_mc = 0;
for (row = 0; row < mci->nr_csrows; row++) {
struct csrow_info *ri = &mci->csrows[row];
@@ -489,12 +495,12 @@ static ssize_t mci_sdram_scrub_rate_show(struct mem_ctl_info *mci, char *data)
/* default attribute files for the MCI object */
static ssize_t mci_ue_count_show(struct mem_ctl_info *mci, char *data)
{
- return sprintf(data, "%d\n", mci->ue_count);
+ return sprintf(data, "%d\n", mci->ue_mc);
}
static ssize_t mci_ce_count_show(struct mem_ctl_info *mci, char *data)
{
- return sprintf(data, "%d\n", mci->ce_count);
+ return sprintf(data, "%d\n", mci->ce_mc);
}
static ssize_t mci_ce_noinfo_show(struct mem_ctl_info *mci, char *data)
@@ -519,16 +525,16 @@ static ssize_t mci_ctl_name_show(struct mem_ctl_info *mci, char *data)
static ssize_t mci_size_mb_show(struct mem_ctl_info *mci, char *data)
{
- int total_pages, csrow_idx;
+ int total_pages = 0, csrow_idx, j;
- for (total_pages = csrow_idx = 0; csrow_idx < mci->nr_csrows;
- csrow_idx++) {
+ for (csrow_idx = 0; csrow_idx < mci->nr_csrows; csrow_idx++) {
struct csrow_info *csrow = &mci->csrows[csrow_idx];
- if (!csrow->nr_pages)
- continue;
+ for (j = 0; j < csrow->nr_channels; j++) {
+ struct dimm_info *dimm = csrow->channels[j].dimm;
- total_pages += csrow->nr_pages;
+ total_pages += dimm->nr_pages;
+ }
}
return sprintf(data, "%u\n", PAGES_TO_MiB(total_pages));
@@ -900,7 +906,7 @@ static void edac_remove_mci_instance_attributes(struct mem_ctl_info *mci,
*/
int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
{
- int i;
+ int i, j;
int err;
struct csrow_info *csrow;
struct kobject *kobj_mci = &mci->edac_mci_kobj;
@@ -934,10 +940,13 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
/* Make directories for each CSROW object under the mc<id> kobject
*/
for (i = 0; i < mci->nr_csrows; i++) {
+ int nr_pages = 0;
+
csrow = &mci->csrows[i];
+ for (j = 0; j < csrow->nr_channels; j++)
+ nr_pages += csrow->channels[j].dimm->nr_pages;
- /* Only expose populated CSROWs */
- if (csrow->nr_pages > 0) {
+ if (nr_pages > 0) {
err = edac_create_csrow_object(mci, csrow, i);
if (err) {
debugf1("%s() failure: create csrow %d obj\n",
@@ -949,12 +958,15 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
return 0;
- /* CSROW error: backout what has already been registered, */
fail1:
for (i--; i >= 0; i--) {
- if (csrow->nr_pages > 0) {
+ int nr_pages = 0;
+
+ csrow = &mci->csrows[i];
+ for (j = 0; j < csrow->nr_channels; j++)
+ nr_pages += csrow->channels[j].dimm->nr_pages;
+ if (nr_pages > 0)
kobject_put(&mci->csrows[i].kobj);
- }
}
/* remove the mci instance's attributes, if any */
@@ -973,14 +985,20 @@ fail0:
*/
void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci)
{
- int i;
+ struct csrow_info *csrow;
+ int i, j;
debugf0("%s()\n", __func__);
/* remove all csrow kobjects */
debugf4("%s() unregister this mci kobj\n", __func__);
for (i = 0; i < mci->nr_csrows; i++) {
- if (mci->csrows[i].nr_pages > 0) {
+ int nr_pages = 0;
+
+ csrow = &mci->csrows[i];
+ for (j = 0; j < csrow->nr_channels; j++)
+ nr_pages += csrow->channels[j].dimm->nr_pages;
+ if (nr_pages > 0) {
debugf0("%s() unreg csrow-%d\n", __func__, i);
kobject_put(&mci->csrows[i].kobj);
}
diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h
index 00f81b4..0ea7d14 100644
--- a/drivers/edac/edac_module.h
+++ b/drivers/edac/edac_module.h
@@ -50,7 +50,7 @@ extern void edac_device_reset_delay_period(struct edac_device_ctl_info
*edac_dev, unsigned long value);
extern void edac_mc_reset_delay_period(int value);
-extern void *edac_align_ptr(void *ptr, unsigned size);
+extern void *edac_align_ptr(void **p, unsigned size, int n_elems);
/*
* EDAC PCI functions
diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c
index 63af1c5..f1ac866 100644
--- a/drivers/edac/edac_pci.c
+++ b/drivers/edac/edac_pci.c
@@ -42,13 +42,13 @@ struct edac_pci_ctl_info *edac_pci_alloc_ctl_info(unsigned int sz_pvt,
const char *edac_pci_name)
{
struct edac_pci_ctl_info *pci;
- void *pvt;
+ void *p = NULL, *pvt;
unsigned int size;
debugf1("%s()\n", __func__);
- pci = (struct edac_pci_ctl_info *)0;
- pvt = edac_align_ptr(&pci[1], sz_pvt);
+ pci = edac_align_ptr(&p, sizeof(*pci), 1);
+ pvt = edac_align_ptr(&p, 1, sz_pvt);
size = ((unsigned long)pvt) + sz_pvt;
/* Alloc the needed control struct memory */
diff --git a/drivers/edac/i3000_edac.c b/drivers/edac/i3000_edac.c
index 277689a..8ad1744 100644
--- a/drivers/edac/i3000_edac.c
+++ b/drivers/edac/i3000_edac.c
@@ -245,7 +245,9 @@ static int i3000_process_error_info(struct mem_ctl_info *mci,
return 1;
if ((info->errsts ^ info->errsts2) & I3000_ERRSTS_BITS) {
- edac_mc_handle_ce_no_info(mci, "UE overwrote CE");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 0, 0, 0,
+ -1, -1, -1,
+ "UE overwrote CE", "", NULL);
info->errsts = info->errsts2;
}
@@ -256,10 +258,15 @@ static int i3000_process_error_info(struct mem_ctl_info *mci,
row = edac_mc_find_csrow_by_page(mci, pfn);
if (info->errsts & I3000_ERRSTS_UE)
- edac_mc_handle_ue(mci, pfn, offset, row, "i3000 UE");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+ pfn, offset, 0,
+ row, -1, -1,
+ "i3000 UE", "", NULL);
else
- edac_mc_handle_ce(mci, pfn, offset, info->derrsyn, row,
- multi_chan ? channel : 0, "i3000 CE");
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+ pfn, offset, info->derrsyn,
+ row, multi_chan ? channel : 0, -1,
+ "i3000 CE", "", NULL);
return 1;
}
@@ -304,9 +311,10 @@ static int i3000_is_interleaved(const unsigned char *c0dra,
static int i3000_probe1(struct pci_dev *pdev, int dev_idx)
{
int rc;
- int i;
+ int i, j;
struct mem_ctl_info *mci = NULL;
- unsigned long last_cumul_size;
+ struct edac_mc_layer layers[2];
+ unsigned long last_cumul_size, nr_pages;
int interleaved, nr_channels;
unsigned char dra[I3000_RANKS / 2], drb[I3000_RANKS];
unsigned char *c0dra = dra, *c1dra = &dra[I3000_RANKS_PER_CHANNEL / 2];
@@ -347,7 +355,14 @@ static int i3000_probe1(struct pci_dev *pdev, int dev_idx)
*/
interleaved = i3000_is_interleaved(c0dra, c1dra, c0drb, c1drb);
nr_channels = interleaved ? 2 : 1;
- mci = edac_mc_alloc(0, I3000_RANKS / nr_channels, nr_channels, 0);
+
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = I3000_RANKS / nr_channels;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = nr_channels;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0);
if (!mci)
return -ENOMEM;
@@ -386,19 +401,23 @@ static int i3000_probe1(struct pci_dev *pdev, int dev_idx)
cumul_size <<= 1;
debugf3("MC: %s(): (%d) cumul_size 0x%x\n",
__func__, i, cumul_size);
- if (cumul_size == last_cumul_size) {
- csrow->mtype = MEM_EMPTY;
+ if (cumul_size == last_cumul_size)
continue;
- }
csrow->first_page = last_cumul_size;
csrow->last_page = cumul_size - 1;
- csrow->nr_pages = cumul_size - last_cumul_size;
+ nr_pages = cumul_size - last_cumul_size;
last_cumul_size = cumul_size;
- csrow->grain = I3000_DEAP_GRAIN;
- csrow->mtype = MEM_DDR2;
- csrow->dtype = DEV_UNKNOWN;
- csrow->edac_mode = EDAC_UNKNOWN;
+
+ for (j = 0; j < nr_channels; j++) {
+ struct dimm_info *dimm = csrow->channels[j].dimm;
+
+ dimm->nr_pages = nr_pages / nr_channels;
+ dimm->grain = I3000_DEAP_GRAIN;
+ dimm->mtype = MEM_DDR2;
+ dimm->dtype = DEV_UNKNOWN;
+ dimm->edac_mode = EDAC_UNKNOWN;
+ }
}
/*
diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c
index 046808c..bbe43ef 100644
--- a/drivers/edac/i3200_edac.c
+++ b/drivers/edac/i3200_edac.c
@@ -23,6 +23,7 @@
#define PCI_DEVICE_ID_INTEL_3200_HB 0x29f0
+#define I3200_DIMMS 4
#define I3200_RANKS 8
#define I3200_RANKS_PER_CHANNEL 4
#define I3200_CHANNELS 2
@@ -217,21 +218,25 @@ static void i3200_process_error_info(struct mem_ctl_info *mci,
return;
if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) {
- edac_mc_handle_ce_no_info(mci, "UE overwrote CE");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 0, 0, 0,
+ -1, -1, -1, "UE overwrote CE", "", NULL);
info->errsts = info->errsts2;
}
for (channel = 0; channel < nr_channels; channel++) {
log = info->eccerrlog[channel];
if (log & I3200_ECCERRLOG_UE) {
- edac_mc_handle_ue(mci, 0, 0,
- eccerrlog_row(channel, log),
- "i3200 UE");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+ 0, 0, 0,
+ eccerrlog_row(channel, log),
+ -1, -1,
+ "i3000 UE", "", NULL);
} else if (log & I3200_ECCERRLOG_CE) {
- edac_mc_handle_ce(mci, 0, 0,
- eccerrlog_syndrome(log),
- eccerrlog_row(channel, log), 0,
- "i3200 CE");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+ 0, 0, eccerrlog_syndrome(log),
+ eccerrlog_row(channel, log),
+ -1, -1,
+ "i3000 UE", "", NULL);
}
}
}
@@ -319,9 +324,9 @@ static unsigned long drb_to_nr_pages(
static int i3200_probe1(struct pci_dev *pdev, int dev_idx)
{
int rc;
- int i;
+ int i, j;
struct mem_ctl_info *mci = NULL;
- unsigned long last_page;
+ struct edac_mc_layer layers[2];
u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL];
bool stacked;
void __iomem *window;
@@ -336,8 +341,14 @@ static int i3200_probe1(struct pci_dev *pdev, int dev_idx)
i3200_get_drbs(window, drbs);
nr_channels = how_many_channels(pdev);
- mci = edac_mc_alloc(sizeof(struct i3200_priv), I3200_RANKS,
- nr_channels, 0);
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = I3200_DIMMS;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = nr_channels;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
+ sizeof(struct i3200_priv));
if (!mci)
return -ENOMEM;
@@ -366,7 +377,6 @@ static int i3200_probe1(struct pci_dev *pdev, int dev_idx)
* cumulative; the last one will contain the total memory
* contained in all ranks.
*/
- last_page = -1UL;
for (i = 0; i < mci->nr_csrows; i++) {
unsigned long nr_pages;
struct csrow_info *csrow = &mci->csrows[i];
@@ -375,20 +385,18 @@ static int i3200_probe1(struct pci_dev *pdev, int dev_idx)
i / I3200_RANKS_PER_CHANNEL,
i % I3200_RANKS_PER_CHANNEL);
- if (nr_pages == 0) {
- csrow->mtype = MEM_EMPTY;
+ if (nr_pages == 0)
continue;
- }
- csrow->first_page = last_page + 1;
- last_page += nr_pages;
- csrow->last_page = last_page;
- csrow->nr_pages = nr_pages;
+ for (j = 0; j < nr_channels; j++) {
+ struct dimm_info *dimm = csrow->channels[j].dimm;
- csrow->grain = nr_pages << PAGE_SHIFT;
- csrow->mtype = MEM_DDR2;
- csrow->dtype = DEV_UNKNOWN;
- csrow->edac_mode = EDAC_UNKNOWN;
+ dimm->nr_pages = nr_pages / nr_channels;
+ dimm->grain = nr_pages << PAGE_SHIFT;
+ dimm->mtype = MEM_DDR2;
+ dimm->dtype = DEV_UNKNOWN;
+ dimm->edac_mode = EDAC_UNKNOWN;
+ }
}
i3200_clear_error_info(mci);
diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c
index a2680d8..11ea835 100644
--- a/drivers/edac/i5000_edac.c
+++ b/drivers/edac/i5000_edac.c
@@ -270,7 +270,8 @@
#define MTR3 0x8C
#define NUM_MTRS 4
-#define CHANNELS_PER_BRANCH (2)
+#define CHANNELS_PER_BRANCH 2
+#define MAX_BRANCHES 2
/* Defines to extract the vaious fields from the
* MTRx - Memory Technology Registers
@@ -473,7 +474,6 @@ static void i5000_process_fatal_error_info(struct mem_ctl_info *mci,
char msg[EDAC_MC_LABEL_LEN + 1 + 160];
char *specific = NULL;
u32 allErrors;
- int branch;
int channel;
int bank;
int rank;
@@ -485,8 +485,7 @@ static void i5000_process_fatal_error_info(struct mem_ctl_info *mci,
if (!allErrors)
return; /* if no error, return now */
- branch = EXTRACT_FBDCHAN_INDX(info->ferr_fat_fbd);
- channel = branch;
+ channel = EXTRACT_FBDCHAN_INDX(info->ferr_fat_fbd);
/* Use the NON-Recoverable macros to extract data */
bank = NREC_BANK(info->nrecmema);
@@ -495,9 +494,9 @@ static void i5000_process_fatal_error_info(struct mem_ctl_info *mci,
ras = NREC_RAS(info->nrecmemb);
cas = NREC_CAS(info->nrecmemb);
- debugf0("\t\tCSROW= %d Channels= %d,%d (Branch= %d "
- "DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n",
- rank, channel, channel + 1, branch >> 1, bank,
+ debugf0("\t\tCSROW= %d Channel= %d "
+ "(DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n",
+ rank, channel, bank,
rdwr ? "Write" : "Read", ras, cas);
/* Only 1 bit will be on */
@@ -533,13 +532,14 @@ static void i5000_process_fatal_error_info(struct mem_ctl_info *mci,
/* Form out message */
snprintf(msg, sizeof(msg),
- "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d CAS=%d "
- "FATAL Err=0x%x (%s))",
- branch >> 1, bank, rdwr ? "Write" : "Read", ras, cas,
- allErrors, specific);
+ "Bank=%d RAS=%d CAS=%d FATAL Err=0x%x (%s)",
+ bank, ras, cas, allErrors, specific);
/* Call the helper to output message */
- edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg);
+ edac_mc_handle_error(HW_EVENT_ERR_FATAL, mci, 0, 0, 0,
+ channel >> 1, channel & 1, rank,
+ rdwr ? "Write error" : "Read error",
+ msg, NULL);
}
/*
@@ -633,13 +633,14 @@ static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci,
/* Form out message */
snprintf(msg, sizeof(msg),
- "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d "
- "CAS=%d, UE Err=0x%x (%s))",
- branch >> 1, bank, rdwr ? "Write" : "Read", ras, cas,
- ue_errors, specific);
+ "Rank=%d Bank=%d RAS=%d CAS=%d, UE Err=0x%x (%s)",
+ rank, bank, ras, cas, ue_errors, specific);
/* Call the helper to output message */
- edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg);
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 0, 0, 0,
+ channel >> 1, -1, rank,
+ rdwr ? "Write error" : "Read error",
+ msg, NULL);
}
/* Check correctable errors */
@@ -685,13 +686,16 @@ static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci,
/* Form out message */
snprintf(msg, sizeof(msg),
- "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d "
+ "Rank=%d Bank=%d RDWR=%s RAS=%d "
"CAS=%d, CE Err=0x%x (%s))", branch >> 1, bank,
rdwr ? "Write" : "Read", ras, cas, ce_errors,
specific);
/* Call the helper to output message */
- edac_mc_handle_fbd_ce(mci, rank, channel, msg);
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 0, 0, 0,
+ channel >> 1, channel % 2, rank,
+ rdwr ? "Write error" : "Read error",
+ msg, NULL);
}
if (!misc_messages)
@@ -731,11 +735,12 @@ static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci,
/* Form out message */
snprintf(msg, sizeof(msg),
- "(Branch=%d Err=%#x (%s))", branch >> 1,
- misc_errors, specific);
+ "Err=%#x (%s)", misc_errors, specific);
/* Call the helper to output message */
- edac_mc_handle_fbd_ce(mci, 0, 0, msg);
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 0, 0, 0,
+ branch >> 1, -1, -1,
+ "Misc error", msg, NULL);
}
}
@@ -956,14 +961,14 @@ static int determine_amb_present_reg(struct i5000_pvt *pvt, int channel)
*
* return the proper MTR register as determine by the csrow and channel desired
*/
-static int determine_mtr(struct i5000_pvt *pvt, int csrow, int channel)
+static int determine_mtr(struct i5000_pvt *pvt, int slot, int channel)
{
int mtr;
if (channel < CHANNELS_PER_BRANCH)
- mtr = pvt->b0_mtr[csrow >> 1];
+ mtr = pvt->b0_mtr[slot];
else
- mtr = pvt->b1_mtr[csrow >> 1];
+ mtr = pvt->b1_mtr[slot];
return mtr;
}
@@ -988,37 +993,34 @@ static void decode_mtr(int slot_row, u16 mtr)
debugf2("\t\tNUMCOL: %s\n", numcol_toString[MTR_DIMM_COLS(mtr)]);
}
-static void handle_channel(struct i5000_pvt *pvt, int csrow, int channel,
+static void handle_channel(struct i5000_pvt *pvt, int slot, int channel,
struct i5000_dimm_info *dinfo)
{
int mtr;
int amb_present_reg;
int addrBits;
- mtr = determine_mtr(pvt, csrow, channel);
+ mtr = determine_mtr(pvt, slot, channel);
if (MTR_DIMMS_PRESENT(mtr)) {
amb_present_reg = determine_amb_present_reg(pvt, channel);
- /* Determine if there is a DIMM present in this DIMM slot */
- if (amb_present_reg & (1 << (csrow >> 1))) {
+ /* Determine if there is a DIMM present in this DIMM slot */
+ if (amb_present_reg) {
dinfo->dual_rank = MTR_DIMM_RANK(mtr);
- if (!((dinfo->dual_rank == 0) &&
- ((csrow & 0x1) == 0x1))) {
- /* Start with the number of bits for a Bank
- * on the DRAM */
- addrBits = MTR_DRAM_BANKS_ADDR_BITS(mtr);
- /* Add thenumber of ROW bits */
- addrBits += MTR_DIMM_ROWS_ADDR_BITS(mtr);
- /* add the number of COLUMN bits */
- addrBits += MTR_DIMM_COLS_ADDR_BITS(mtr);
-
- addrBits += 6; /* add 64 bits per DIMM */
- addrBits -= 20; /* divide by 2^^20 */
- addrBits -= 3; /* 8 bits per bytes */
-
- dinfo->megabytes = 1 << addrBits;
- }
+ /* Start with the number of bits for a Bank
+ * on the DRAM */
+ addrBits = MTR_DRAM_BANKS_ADDR_BITS(mtr);
+ /* Add the number of ROW bits */
+ addrBits += MTR_DIMM_ROWS_ADDR_BITS(mtr);
+ /* add the number of COLUMN bits */
+ addrBits += MTR_DIMM_COLS_ADDR_BITS(mtr);
+
+ addrBits += 6; /* add 64 bits per DIMM */
+ addrBits -= 20; /* divide by 2^^20 */
+ addrBits -= 3; /* 8 bits per bytes */
+
+ dinfo->megabytes = 1 << addrBits;
}
}
}
@@ -1032,10 +1034,9 @@ static void handle_channel(struct i5000_pvt *pvt, int csrow, int channel,
static void calculate_dimm_size(struct i5000_pvt *pvt)
{
struct i5000_dimm_info *dinfo;
- int csrow, max_csrows;
+ int slot, channel, branch;
char *p, *mem_buffer;
int space, n;
- int channel;
/* ================= Generate some debug output ================= */
space = PAGE_SIZE;
@@ -1046,22 +1047,17 @@ static void calculate_dimm_size(struct i5000_pvt *pvt)
return;
}
- n = snprintf(p, space, "\n");
- p += n;
- space -= n;
-
- /* Scan all the actual CSROWS (which is # of DIMMS * 2)
+ /* Scan all the actual slots
* and calculate the information for each DIMM
- * Start with the highest csrow first, to display it first
- * and work toward the 0th csrow
+ * Start with the highest slot first, to display it first
+ * and work toward the 0th slot
*/
- max_csrows = pvt->maxdimmperch * 2;
- for (csrow = max_csrows - 1; csrow >= 0; csrow--) {
+ for (slot = pvt->maxdimmperch - 1; slot >= 0; slot--) {
- /* on an odd csrow, first output a 'boundary' marker,
+ /* on an odd slot, first output a 'boundary' marker,
* then reset the message buffer */
- if (csrow & 0x1) {
- n = snprintf(p, space, "---------------------------"
+ if (slot & 0x1) {
+ n = snprintf(p, space, "--------------------------"
"--------------------------------");
p += n;
space -= n;
@@ -1069,30 +1065,39 @@ static void calculate_dimm_size(struct i5000_pvt *pvt)
p = mem_buffer;
space = PAGE_SIZE;
}
- n = snprintf(p, space, "csrow %2d ", csrow);
+ n = snprintf(p, space, "slot %2d ", slot);
p += n;
space -= n;
for (channel = 0; channel < pvt->maxch; channel++) {
- dinfo = &pvt->dimm_info[csrow][channel];
- handle_channel(pvt, csrow, channel, dinfo);
- n = snprintf(p, space, "%4d MB | ", dinfo->megabytes);
+ dinfo = &pvt->dimm_info[slot][channel];
+ handle_channel(pvt, slot, channel, dinfo);
+ if (dinfo->megabytes)
+ n = snprintf(p, space, "%4d MB %dR| ",
+ dinfo->megabytes, dinfo->dual_rank + 1);
+ else
+ n = snprintf(p, space, "%4d MB | ", 0);
p += n;
space -= n;
}
- n = snprintf(p, space, "\n");
p += n;
space -= n;
+ debugf2("%s\n", mem_buffer);
+ p = mem_buffer;
+ space = PAGE_SIZE;
}
/* Output the last bottom 'boundary' marker */
- n = snprintf(p, space, "---------------------------"
- "--------------------------------\n");
+ n = snprintf(p, space, "--------------------------"
+ "--------------------------------");
p += n;
space -= n;
+ debugf2("%s\n", mem_buffer);
+ p = mem_buffer;
+ space = PAGE_SIZE;
/* now output the 'channel' labels */
- n = snprintf(p, space, " ");
+ n = snprintf(p, space, " ");
p += n;
space -= n;
for (channel = 0; channel < pvt->maxch; channel++) {
@@ -1100,9 +1105,17 @@ static void calculate_dimm_size(struct i5000_pvt *pvt)
p += n;
space -= n;
}
- n = snprintf(p, space, "\n");
+ debugf2("%s\n", mem_buffer);
+ p = mem_buffer;
+ space = PAGE_SIZE;
+
+ n = snprintf(p, space, " ");
p += n;
- space -= n;
+ for (branch = 0; branch < MAX_BRANCHES; branch++) {
+ n = snprintf(p, space, " branch %d | ", branch);
+ p += n;
+ space -= n;
+ }
/* output the last message and free buffer */
debugf2("%s\n", mem_buffer);
@@ -1235,13 +1248,13 @@ static void i5000_get_mc_regs(struct mem_ctl_info *mci)
static int i5000_init_csrows(struct mem_ctl_info *mci)
{
struct i5000_pvt *pvt;
- struct csrow_info *p_csrow;
+ struct dimm_info *dimm;
int empty, channel_count;
int max_csrows;
- int mtr, mtr1;
+ int mtr;
int csrow_megs;
int channel;
- int csrow;
+ int slot;
pvt = mci->pvt_info;
@@ -1250,43 +1263,40 @@ static int i5000_init_csrows(struct mem_ctl_info *mci)
empty = 1; /* Assume NO memory */
- for (csrow = 0; csrow < max_csrows; csrow++) {
- p_csrow = &mci->csrows[csrow];
-
- p_csrow->csrow_idx = csrow;
-
- /* use branch 0 for the basis */
- mtr = pvt->b0_mtr[csrow >> 1];
- mtr1 = pvt->b1_mtr[csrow >> 1];
-
- /* if no DIMMS on this row, continue */
- if (!MTR_DIMMS_PRESENT(mtr) && !MTR_DIMMS_PRESENT(mtr1))
- continue;
+ /*
+ * FIXME: The memory layout used to map slot/channel into the
+ * real memory architecture is weird: branch+slot are "csrows"
+ * and channel is channel. That required an extra array (dimm_info)
+ * to map the dimms. A good cleanup would be to remove this array,
+ * and do a loop here with branch, channel, slot
+ */
+ for (slot = 0; slot < max_csrows; slot++) {
+ for (channel = 0; channel < pvt->maxch; channel++) {
- /* FAKE OUT VALUES, FIXME */
- p_csrow->first_page = 0 + csrow * 20;
- p_csrow->last_page = 9 + csrow * 20;
- p_csrow->page_mask = 0xFFF;
+ mtr = determine_mtr(pvt, slot, channel);
- p_csrow->grain = 8;
+ if (!MTR_DIMMS_PRESENT(mtr))
+ continue;
- csrow_megs = 0;
- for (channel = 0; channel < pvt->maxch; channel++) {
- csrow_megs += pvt->dimm_info[csrow][channel].megabytes;
- }
+ dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
+ channel / MAX_BRANCHES,
+ channel % MAX_BRANCHES, slot);
- p_csrow->nr_pages = csrow_megs << 8;
+ csrow_megs = pvt->dimm_info[slot][channel].megabytes;
+ dimm->grain = 8;
- /* Assume DDR2 for now */
- p_csrow->mtype = MEM_FB_DDR2;
+ /* Assume DDR2 for now */
+ dimm->mtype = MEM_FB_DDR2;
- /* ask what device type on this row */
- if (MTR_DRAM_WIDTH(mtr))
- p_csrow->dtype = DEV_X8;
- else
- p_csrow->dtype = DEV_X4;
+ /* ask what device type on this row */
+ if (MTR_DRAM_WIDTH(mtr))
+ dimm->dtype = DEV_X8;
+ else
+ dimm->dtype = DEV_X4;
- p_csrow->edac_mode = EDAC_S8ECD8ED;
+ dimm->edac_mode = EDAC_S8ECD8ED;
+ dimm->nr_pages = csrow_megs << 8;
+ }
empty = 0;
}
@@ -1317,7 +1327,7 @@ static void i5000_enable_error_reporting(struct mem_ctl_info *mci)
}
/*
- * i5000_get_dimm_and_channel_counts(pdev, &num_csrows, &num_channels)
+ * i5000_get_dimm_and_channel_counts(pdev, &nr_csrows, &num_channels)
*
* ask the device how many channels are present and how many CSROWS
* as well
@@ -1332,7 +1342,7 @@ static void i5000_get_dimm_and_channel_counts(struct pci_dev *pdev,
* supported on this memory controller
*/
pci_read_config_byte(pdev, MAXDIMMPERCH, &value);
- *num_dimms_per_channel = (int)value *2;
+ *num_dimms_per_channel = (int)value;
pci_read_config_byte(pdev, MAXCH, &value);
*num_channels = (int)value;
@@ -1348,10 +1358,10 @@ static void i5000_get_dimm_and_channel_counts(struct pci_dev *pdev,
static int i5000_probe1(struct pci_dev *pdev, int dev_idx)
{
struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[3];
struct i5000_pvt *pvt;
int num_channels;
int num_dimms_per_channel;
- int num_csrows;
debugf0("MC: %s: %s(), pdev bus %u dev=0x%x fn=0x%x\n",
__FILE__, __func__,
@@ -1377,14 +1387,22 @@ static int i5000_probe1(struct pci_dev *pdev, int dev_idx)
*/
i5000_get_dimm_and_channel_counts(pdev, &num_dimms_per_channel,
&num_channels);
- num_csrows = num_dimms_per_channel * 2;
- debugf0("MC: %s(): Number of - Channels= %d DIMMS= %d CSROWS= %d\n",
- __func__, num_channels, num_dimms_per_channel, num_csrows);
+ debugf0("MC: %s(): Number of Branches=2 Channels= %d DIMMS= %d\n",
+ __func__, num_channels, num_dimms_per_channel);
/* allocate a new MC control structure */
- mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels, 0);
+ layers[0].type = EDAC_MC_LAYER_BRANCH;
+ layers[0].size = MAX_BRANCHES;
+ layers[0].is_virt_csrow = false;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = num_channels / MAX_BRANCHES;
+ layers[1].is_virt_csrow = false;
+ layers[2].type = EDAC_MC_LAYER_SLOT;
+ layers[2].size = num_dimms_per_channel;
+ layers[2].is_virt_csrow = true;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
if (mci == NULL)
return -ENOMEM;
diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
index d500749..e9e7c2a 100644
--- a/drivers/edac/i5100_edac.c
+++ b/drivers/edac/i5100_edac.c
@@ -14,6 +14,11 @@
* rows for each respective channel are laid out one after another,
* the first half belonging to channel 0, the second half belonging
* to channel 1.
+ *
+ * This driver is for DDR2 DIMMs, and it uses chip select to select among the
+ * several ranks. However, instead of showing memories as ranks, it outputs
+ * them as DIMM's. An internal table creates the association between ranks
+ * and DIMM's.
*/
#include <linux/module.h>
#include <linux/init.h>
@@ -410,14 +415,6 @@ static int i5100_csrow_to_chan(const struct mem_ctl_info *mci, int csrow)
return csrow / priv->ranksperchan;
}
-static unsigned i5100_rank_to_csrow(const struct mem_ctl_info *mci,
- int chan, int rank)
-{
- const struct i5100_priv *priv = mci->pvt_info;
-
- return chan * priv->ranksperchan + rank;
-}
-
static void i5100_handle_ce(struct mem_ctl_info *mci,
int chan,
unsigned bank,
@@ -427,17 +424,17 @@ static void i5100_handle_ce(struct mem_ctl_info *mci,
unsigned ras,
const char *msg)
{
- const int csrow = i5100_rank_to_csrow(mci, chan, rank);
+ char detail[80];
- printk(KERN_ERR
- "CE chan %d, bank %u, rank %u, syndrome 0x%lx, "
- "cas %u, ras %u, csrow %u, label \"%s\": %s\n",
- chan, bank, rank, syndrome, cas, ras,
- csrow, mci->csrows[csrow].channels[0].label, msg);
+ /* Form out message */
+ snprintf(detail, sizeof(detail),
+ "bank %u, cas %u, ras %u\n",
+ bank, cas, ras);
- mci->ce_count++;
- mci->csrows[csrow].ce_count++;
- mci->csrows[csrow].channels[0].ce_count++;
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+ 0, 0, syndrome,
+ chan, rank, -1,
+ msg, detail, NULL);
}
static void i5100_handle_ue(struct mem_ctl_info *mci,
@@ -449,16 +446,17 @@ static void i5100_handle_ue(struct mem_ctl_info *mci,
unsigned ras,
const char *msg)
{
- const int csrow = i5100_rank_to_csrow(mci, chan, rank);
+ char detail[80];
- printk(KERN_ERR
- "UE chan %d, bank %u, rank %u, syndrome 0x%lx, "
- "cas %u, ras %u, csrow %u, label \"%s\": %s\n",
- chan, bank, rank, syndrome, cas, ras,
- csrow, mci->csrows[csrow].channels[0].label, msg);
+ /* Form out message */
+ snprintf(detail, sizeof(detail),
+ "bank %u, cas %u, ras %u\n",
+ bank, cas, ras);
- mci->ue_count++;
- mci->csrows[csrow].ue_count++;
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+ 0, 0, syndrome,
+ chan, rank, -1,
+ msg, detail, NULL);
}
static void i5100_read_log(struct mem_ctl_info *mci, int chan,
@@ -835,10 +833,10 @@ static void __devinit i5100_init_interleaving(struct pci_dev *pdev,
static void __devinit i5100_init_csrows(struct mem_ctl_info *mci)
{
int i;
- unsigned long total_pages = 0UL;
struct i5100_priv *priv = mci->pvt_info;
- for (i = 0; i < mci->nr_csrows; i++) {
+ for (i = 0; i < mci->tot_dimms; i++) {
+ struct dimm_info *dimm;
const unsigned long npages = i5100_npages(mci, i);
const unsigned chan = i5100_csrow_to_chan(mci, i);
const unsigned rank = i5100_csrow_to_rank(mci, i);
@@ -846,33 +844,23 @@ static void __devinit i5100_init_csrows(struct mem_ctl_info *mci)
if (!npages)
continue;
- /*
- * FIXME: these two are totally bogus -- I don't see how to
- * map them correctly to this structure...
- */
- mci->csrows[i].first_page = total_pages;
- mci->csrows[i].last_page = total_pages + npages - 1;
- mci->csrows[i].page_mask = 0UL;
-
- mci->csrows[i].nr_pages = npages;
- mci->csrows[i].grain = 32;
- mci->csrows[i].csrow_idx = i;
- mci->csrows[i].dtype =
- (priv->mtr[chan][rank].width == 4) ? DEV_X4 : DEV_X8;
- mci->csrows[i].ue_count = 0;
- mci->csrows[i].ce_count = 0;
- mci->csrows[i].mtype = MEM_RDDR2;
- mci->csrows[i].edac_mode = EDAC_SECDED;
- mci->csrows[i].mci = mci;
- mci->csrows[i].nr_channels = 1;
- mci->csrows[i].channels[0].chan_idx = 0;
- mci->csrows[i].channels[0].ce_count = 0;
- mci->csrows[i].channels[0].csrow = mci->csrows + i;
- snprintf(mci->csrows[i].channels[0].label,
- sizeof(mci->csrows[i].channels[0].label),
- "DIMM%u", i5100_rank_to_slot(mci, chan, rank));
-
- total_pages += npages;
+ dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
+ chan, rank, 0);
+
+ dimm->nr_pages = npages;
+ if (npages) {
+ dimm->grain = 32;
+ dimm->dtype = (priv->mtr[chan][rank].width == 4) ?
+ DEV_X4 : DEV_X8;
+ dimm->mtype = MEM_RDDR2;
+ dimm->edac_mode = EDAC_SECDED;
+ snprintf(dimm->label, sizeof(dimm->label),
+ "DIMM%u",
+ i5100_rank_to_slot(mci, chan, rank));
+ }
+
+ debugf2("dimm channel %d, rank %d, size %ld\n",
+ chan, rank, (long)PAGES_TO_MiB(npages));
}
}
@@ -881,6 +869,7 @@ static int __devinit i5100_init_one(struct pci_dev *pdev,
{
int rc;
struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[2];
struct i5100_priv *priv;
struct pci_dev *ch0mm, *ch1mm;
int ret = 0;
@@ -941,7 +930,14 @@ static int __devinit i5100_init_one(struct pci_dev *pdev,
goto bail_ch1;
}
- mci = edac_mc_alloc(sizeof(*priv), ranksperch * 2, 1, 0);
+ layers[0].type = EDAC_MC_LAYER_CHANNEL;
+ layers[0].size = 2;
+ layers[0].is_virt_csrow = false;
+ layers[1].type = EDAC_MC_LAYER_SLOT;
+ layers[1].size = ranksperch;
+ layers[1].is_virt_csrow = true;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
+ sizeof(*priv));
if (!mci) {
ret = -ENOMEM;
goto bail_disable_ch1;
diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c
index 1869a10..6640c29 100644
--- a/drivers/edac/i5400_edac.c
+++ b/drivers/edac/i5400_edac.c
@@ -18,6 +18,10 @@
* Intel 5400 Chipset Memory Controller Hub (MCH) - Datasheet
* http://developer.intel.com/design/chipsets/datashts/313070.htm
*
+ * This Memory Controller manages DDR2 FB-DIMMs. It has 2 branches, each with
+ * 2 channels operating in lockstep no-mirror mode. Each channel can have up to
+ * 4 dimm's, each with up to 8GB.
+ *
*/
#include <linux/module.h>
@@ -44,12 +48,10 @@
edac_mc_chipset_printk(mci, level, "i5400", fmt, ##arg)
/* Limits for i5400 */
-#define NUM_MTRS_PER_BRANCH 4
+#define MAX_BRANCHES 2
#define CHANNELS_PER_BRANCH 2
-#define MAX_DIMMS_PER_CHANNEL NUM_MTRS_PER_BRANCH
-#define MAX_CHANNELS 4
-/* max possible csrows per channel */
-#define MAX_CSROWS (MAX_DIMMS_PER_CHANNEL)
+#define DIMMS_PER_CHANNEL 4
+#define MAX_CHANNELS (MAX_BRANCHES * CHANNELS_PER_BRANCH)
/* Device 16,
* Function 0: System Address
@@ -347,16 +349,16 @@ struct i5400_pvt {
u16 mir0, mir1;
- u16 b0_mtr[NUM_MTRS_PER_BRANCH]; /* Memory Technlogy Reg */
+ u16 b0_mtr[DIMMS_PER_CHANNEL]; /* Memory Technlogy Reg */
u16 b0_ambpresent0; /* Branch 0, Channel 0 */
u16 b0_ambpresent1; /* Brnach 0, Channel 1 */
- u16 b1_mtr[NUM_MTRS_PER_BRANCH]; /* Memory Technlogy Reg */
+ u16 b1_mtr[DIMMS_PER_CHANNEL]; /* Memory Technlogy Reg */
u16 b1_ambpresent0; /* Branch 1, Channel 8 */
u16 b1_ambpresent1; /* Branch 1, Channel 1 */
/* DIMM information matrix, allocating architecture maximums */
- struct i5400_dimm_info dimm_info[MAX_CSROWS][MAX_CHANNELS];
+ struct i5400_dimm_info dimm_info[DIMMS_PER_CHANNEL][MAX_CHANNELS];
/* Actual values for this controller */
int maxch; /* Max channels */
@@ -532,13 +534,15 @@ static void i5400_proccess_non_recoverable_info(struct mem_ctl_info *mci,
int ras, cas;
int errnum;
char *type = NULL;
+ enum hw_event_mc_err_type tp_event = HW_EVENT_ERR_UNCORRECTED;
if (!allErrors)
return; /* if no error, return now */
- if (allErrors & ERROR_FAT_MASK)
+ if (allErrors & ERROR_FAT_MASK) {
type = "FATAL";
- else if (allErrors & FERR_NF_UNCORRECTABLE)
+ tp_event = HW_EVENT_ERR_FATAL;
+ } else if (allErrors & FERR_NF_UNCORRECTABLE)
type = "NON-FATAL uncorrected";
else
type = "NON-FATAL recoverable";
@@ -556,7 +560,7 @@ static void i5400_proccess_non_recoverable_info(struct mem_ctl_info *mci,
ras = nrec_ras(info);
cas = nrec_cas(info);
- debugf0("\t\tCSROW= %d Channels= %d,%d (Branch= %d "
+ debugf0("\t\tDIMM= %d Channels= %d,%d (Branch= %d "
"DRAM Bank= %d Buffer ID = %d rdwr= %s ras= %d cas= %d)\n",
rank, channel, channel + 1, branch >> 1, bank,
buf_id, rdwr_str(rdwr), ras, cas);
@@ -566,13 +570,13 @@ static void i5400_proccess_non_recoverable_info(struct mem_ctl_info *mci,
/* Form out message */
snprintf(msg, sizeof(msg),
- "%s (Branch=%d DRAM-Bank=%d Buffer ID = %d RDWR=%s "
- "RAS=%d CAS=%d %s Err=0x%lx (%s))",
- type, branch >> 1, bank, buf_id, rdwr_str(rdwr), ras, cas,
- type, allErrors, error_name[errnum]);
+ "Bank=%d Buffer ID = %d RAS=%d CAS=%d Err=0x%lx (%s)",
+ bank, buf_id, ras, cas, allErrors, error_name[errnum]);
- /* Call the helper to output message */
- edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg);
+ edac_mc_handle_error(tp_event, mci, 0, 0, 0,
+ branch >> 1, -1, rank,
+ rdwr ? "Write error" : "Read error",
+ msg, NULL);
}
/*
@@ -630,7 +634,7 @@ static void i5400_process_nonfatal_error_info(struct mem_ctl_info *mci,
/* Only 1 bit will be on */
errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name));
- debugf0("\t\tCSROW= %d Channel= %d (Branch %d "
+ debugf0("\t\tDIMM= %d Channel= %d (Branch %d "
"DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n",
rank, channel, branch >> 1, bank,
rdwr_str(rdwr), ras, cas);
@@ -642,8 +646,10 @@ static void i5400_process_nonfatal_error_info(struct mem_ctl_info *mci,
branch >> 1, bank, rdwr_str(rdwr), ras, cas,
allErrors, error_name[errnum]);
- /* Call the helper to output message */
- edac_mc_handle_fbd_ce(mci, rank, channel, msg);
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 0, 0, 0,
+ branch >> 1, channel % 2, rank,
+ rdwr ? "Write error" : "Read error",
+ msg, NULL);
return;
}
@@ -831,8 +837,8 @@ static int i5400_get_devices(struct mem_ctl_info *mci, int dev_idx)
/*
* determine_amb_present
*
- * the information is contained in NUM_MTRS_PER_BRANCH different
- * registers determining which of the NUM_MTRS_PER_BRANCH requires
+ * the information is contained in DIMMS_PER_CHANNEL different
+ * registers determining which of the DIMMS_PER_CHANNEL requires
* knowing which channel is in question
*
* 2 branches, each with 2 channels
@@ -861,11 +867,11 @@ static int determine_amb_present_reg(struct i5400_pvt *pvt, int channel)
}
/*
- * determine_mtr(pvt, csrow, channel)
+ * determine_mtr(pvt, dimm, channel)
*
- * return the proper MTR register as determine by the csrow and desired channel
+ * return the proper MTR register as determine by the dimm and desired channel
*/
-static int determine_mtr(struct i5400_pvt *pvt, int csrow, int channel)
+static int determine_mtr(struct i5400_pvt *pvt, int dimm, int channel)
{
int mtr;
int n;
@@ -873,11 +879,11 @@ static int determine_mtr(struct i5400_pvt *pvt, int csrow, int channel)
/* There is one MTR for each slot pair of FB-DIMMs,
Each slot pair may be at branch 0 or branch 1.
*/
- n = csrow;
+ n = dimm;
- if (n >= NUM_MTRS_PER_BRANCH) {
- debugf0("ERROR: trying to access an invalid csrow: %d\n",
- csrow);
+ if (n >= DIMMS_PER_CHANNEL) {
+ debugf0("ERROR: trying to access an invalid dimm: %d\n",
+ dimm);
return 0;
}
@@ -913,19 +919,19 @@ static void decode_mtr(int slot_row, u16 mtr)
debugf2("\t\tNUMCOL: %s\n", numcol_toString[MTR_DIMM_COLS(mtr)]);
}
-static void handle_channel(struct i5400_pvt *pvt, int csrow, int channel,
+static void handle_channel(struct i5400_pvt *pvt, int dimm, int channel,
struct i5400_dimm_info *dinfo)
{
int mtr;
int amb_present_reg;
int addrBits;
- mtr = determine_mtr(pvt, csrow, channel);
+ mtr = determine_mtr(pvt, dimm, channel);
if (MTR_DIMMS_PRESENT(mtr)) {
amb_present_reg = determine_amb_present_reg(pvt, channel);
/* Determine if there is a DIMM present in this DIMM slot */
- if (amb_present_reg & (1 << csrow)) {
+ if (amb_present_reg & (1 << dimm)) {
/* Start with the number of bits for a Bank
* on the DRAM */
addrBits = MTR_DRAM_BANKS_ADDR_BITS(mtr);
@@ -954,10 +960,10 @@ static void handle_channel(struct i5400_pvt *pvt, int csrow, int channel,
static void calculate_dimm_size(struct i5400_pvt *pvt)
{
struct i5400_dimm_info *dinfo;
- int csrow, max_csrows;
+ int dimm, max_dimms;
char *p, *mem_buffer;
int space, n;
- int channel;
+ int channel, branch;
/* ================= Generate some debug output ================= */
space = PAGE_SIZE;
@@ -968,32 +974,32 @@ static void calculate_dimm_size(struct i5400_pvt *pvt)
return;
}
- /* Scan all the actual CSROWS
+ /* Scan all the actual DIMMS
* and calculate the information for each DIMM
- * Start with the highest csrow first, to display it first
- * and work toward the 0th csrow
+ * Start with the highest dimm first, to display it first
+ * and work toward the 0th dimm
*/
- max_csrows = pvt->maxdimmperch;
- for (csrow = max_csrows - 1; csrow >= 0; csrow--) {
+ max_dimms = pvt->maxdimmperch;
+ for (dimm = max_dimms - 1; dimm >= 0; dimm--) {
- /* on an odd csrow, first output a 'boundary' marker,
+ /* on an odd dimm, first output a 'boundary' marker,
* then reset the message buffer */
- if (csrow & 0x1) {
+ if (dimm & 0x1) {
n = snprintf(p, space, "---------------------------"
- "--------------------------------");
+ "-------------------------------");
p += n;
space -= n;
debugf2("%s\n", mem_buffer);
p = mem_buffer;
space = PAGE_SIZE;
}
- n = snprintf(p, space, "csrow %2d ", csrow);
+ n = snprintf(p, space, "dimm %2d ", dimm);
p += n;
space -= n;
for (channel = 0; channel < pvt->maxch; channel++) {
- dinfo = &pvt->dimm_info[csrow][channel];
- handle_channel(pvt, csrow, channel, dinfo);
+ dinfo = &pvt->dimm_info[dimm][channel];
+ handle_channel(pvt, dimm, channel, dinfo);
n = snprintf(p, space, "%4d MB | ", dinfo->megabytes);
p += n;
space -= n;
@@ -1005,7 +1011,7 @@ static void calculate_dimm_size(struct i5400_pvt *pvt)
/* Output the last bottom 'boundary' marker */
n = snprintf(p, space, "---------------------------"
- "--------------------------------");
+ "-------------------------------");
p += n;
space -= n;
debugf2("%s\n", mem_buffer);
@@ -1013,7 +1019,7 @@ static void calculate_dimm_size(struct i5400_pvt *pvt)
space = PAGE_SIZE;
/* now output the 'channel' labels */
- n = snprintf(p, space, " ");
+ n = snprintf(p, space, " ");
p += n;
space -= n;
for (channel = 0; channel < pvt->maxch; channel++) {
@@ -1022,6 +1028,19 @@ static void calculate_dimm_size(struct i5400_pvt *pvt)
space -= n;
}
+ space -= n;
+ debugf2("%s\n", mem_buffer);
+ p = mem_buffer;
+ space = PAGE_SIZE;
+
+ n = snprintf(p, space, " ");
+ p += n;
+ for (branch = 0; branch < MAX_BRANCHES; branch++) {
+ n = snprintf(p, space, " branch %d | ", branch);
+ p += n;
+ space -= n;
+ }
+
/* output the last message and free buffer */
debugf2("%s\n", mem_buffer);
kfree(mem_buffer);
@@ -1080,7 +1099,7 @@ static void i5400_get_mc_regs(struct mem_ctl_info *mci)
debugf2("MIR1: limit= 0x%x WAY1= %u WAY0= %x\n", limit, way1, way0);
/* Get the set of MTR[0-3] regs by each branch */
- for (slot_row = 0; slot_row < NUM_MTRS_PER_BRANCH; slot_row++) {
+ for (slot_row = 0; slot_row < DIMMS_PER_CHANNEL; slot_row++) {
int where = MTR0 + (slot_row * sizeof(u16));
/* Branch 0 set of MTR registers */
@@ -1105,7 +1124,7 @@ static void i5400_get_mc_regs(struct mem_ctl_info *mci)
/* Read and dump branch 0's MTRs */
debugf2("\nMemory Technology Registers:\n");
debugf2(" Branch 0:\n");
- for (slot_row = 0; slot_row < NUM_MTRS_PER_BRANCH; slot_row++)
+ for (slot_row = 0; slot_row < DIMMS_PER_CHANNEL; slot_row++)
decode_mtr(slot_row, pvt->b0_mtr[slot_row]);
pci_read_config_word(pvt->branch_0, AMBPRESENT_0,
@@ -1122,7 +1141,7 @@ static void i5400_get_mc_regs(struct mem_ctl_info *mci)
} else {
/* Read and dump branch 1's MTRs */
debugf2(" Branch 1:\n");
- for (slot_row = 0; slot_row < NUM_MTRS_PER_BRANCH; slot_row++)
+ for (slot_row = 0; slot_row < DIMMS_PER_CHANNEL; slot_row++)
decode_mtr(slot_row, pvt->b1_mtr[slot_row]);
pci_read_config_word(pvt->branch_1, AMBPRESENT_0,
@@ -1141,7 +1160,7 @@ static void i5400_get_mc_regs(struct mem_ctl_info *mci)
}
/*
- * i5400_init_csrows Initialize the 'csrows' table within
+ * i5400_init_dimms Initialize the 'dimms' table within
* the mci control structure with the
* addressing of memory.
*
@@ -1149,64 +1168,68 @@ static void i5400_get_mc_regs(struct mem_ctl_info *mci)
* 0 success
* 1 no actual memory found on this MC
*/
-static int i5400_init_csrows(struct mem_ctl_info *mci)
+static int i5400_init_dimms(struct mem_ctl_info *mci)
{
struct i5400_pvt *pvt;
- struct csrow_info *p_csrow;
- int empty, channel_count;
- int max_csrows;
+ struct dimm_info *dimm;
+ int ndimms, channel_count;
+ int max_dimms;
int mtr;
- int csrow_megs;
- int channel;
- int csrow;
+ int size_mb;
+ int channel, slot;
pvt = mci->pvt_info;
channel_count = pvt->maxch;
- max_csrows = pvt->maxdimmperch;
+ max_dimms = pvt->maxdimmperch;
- empty = 1; /* Assume NO memory */
+ ndimms = 0;
- for (csrow = 0; csrow < max_csrows; csrow++) {
- p_csrow = &mci->csrows[csrow];
-
- p_csrow->csrow_idx = csrow;
-
- /* use branch 0 for the basis */
- mtr = determine_mtr(pvt, csrow, 0);
-
- /* if no DIMMS on this row, continue */
- if (!MTR_DIMMS_PRESENT(mtr))
- continue;
-
- /* FAKE OUT VALUES, FIXME */
- p_csrow->first_page = 0 + csrow * 20;
- p_csrow->last_page = 9 + csrow * 20;
- p_csrow->page_mask = 0xFFF;
-
- p_csrow->grain = 8;
-
- csrow_megs = 0;
- for (channel = 0; channel < pvt->maxch; channel++)
- csrow_megs += pvt->dimm_info[csrow][channel].megabytes;
-
- p_csrow->nr_pages = csrow_megs << 8;
-
- /* Assume DDR2 for now */
- p_csrow->mtype = MEM_FB_DDR2;
-
- /* ask what device type on this row */
- if (MTR_DRAM_WIDTH(mtr))
- p_csrow->dtype = DEV_X8;
- else
- p_csrow->dtype = DEV_X4;
-
- p_csrow->edac_mode = EDAC_S8ECD8ED;
-
- empty = 0;
+ /*
+ * FIXME: remove pvt->dimm_info[slot][channel] and use the 3
+ * layers here.
+ */
+ for (channel = 0; channel < mci->layers[0].size * mci->layers[1].size;
+ channel++) {
+ for (slot = 0; slot < mci->layers[2].size; slot++) {
+ mtr = determine_mtr(pvt, slot, channel);
+
+ /* if no DIMMS on this slot, continue */
+ if (!MTR_DIMMS_PRESENT(mtr))
+ continue;
+
+ dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
+ channel / 2, channel % 2, slot);
+
+ size_mb = pvt->dimm_info[slot][channel].megabytes;
+
+ debugf2("%s: dimm%zd (branch %d channel %d slot %d): %d.%03d GB\n",
+ __func__, dimm - mci->dimms,
+ channel / 2, channel % 2, slot,
+ size_mb / 1000, size_mb % 1000);
+
+ dimm->nr_pages = size_mb << 8;
+ dimm->grain = 8;
+ dimm->dtype = MTR_DRAM_WIDTH(mtr) ? DEV_X8 : DEV_X4;
+ dimm->mtype = MEM_FB_DDR2;
+ /*
+ * The eccc mechanism is SDDC (aka SECC), with
+ * is similar to Chipkill.
+ */
+ dimm->edac_mode = MTR_DRAM_WIDTH(mtr) ?
+ EDAC_S8ECD8ED : EDAC_S4ECD4ED;
+ ndimms++;
+ }
}
- return empty;
+ /*
+ * When just one memory is provided, it should be at location (0,0,0).
+ * With such single-DIMM mode, the SDCC algorithm degrades to SECDEC+.
+ */
+ if (ndimms == 1)
+ mci->dimms[0].edac_mode = EDAC_SECDED;
+
+ return (ndimms == 0);
}
/*
@@ -1242,9 +1265,7 @@ static int i5400_probe1(struct pci_dev *pdev, int dev_idx)
{
struct mem_ctl_info *mci;
struct i5400_pvt *pvt;
- int num_channels;
- int num_dimms_per_channel;
- int num_csrows;
+ struct edac_mc_layer layers[3];
if (dev_idx >= ARRAY_SIZE(i5400_devs))
return -EINVAL;
@@ -1258,23 +1279,21 @@ static int i5400_probe1(struct pci_dev *pdev, int dev_idx)
if (PCI_FUNC(pdev->devfn) != 0)
return -ENODEV;
- /* As we don't have a motherboard identification routine to determine
- * actual number of slots/dimms per channel, we thus utilize the
- * resource as specified by the chipset. Thus, we might have
- * have more DIMMs per channel than actually on the mobo, but this
- * allows the driver to support up to the chipset max, without
- * some fancy mobo determination.
+ /*
+ * allocate a new MC control structure
+ *
+ * This drivers uses the DIMM slot as "csrow" and the rest as "channel".
*/
- num_dimms_per_channel = MAX_DIMMS_PER_CHANNEL;
- num_channels = MAX_CHANNELS;
- num_csrows = num_dimms_per_channel;
-
- debugf0("MC: %s(): Number of - Channels= %d DIMMS= %d CSROWS= %d\n",
- __func__, num_channels, num_dimms_per_channel, num_csrows);
-
- /* allocate a new MC control structure */
- mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels, 0);
-
+ layers[0].type = EDAC_MC_LAYER_BRANCH;
+ layers[0].size = MAX_BRANCHES;
+ layers[0].is_virt_csrow = false;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = CHANNELS_PER_BRANCH;
+ layers[1].is_virt_csrow = false;
+ layers[2].type = EDAC_MC_LAYER_SLOT;
+ layers[2].size = DIMMS_PER_CHANNEL;
+ layers[2].is_virt_csrow = true;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
if (mci == NULL)
return -ENOMEM;
@@ -1284,8 +1303,8 @@ static int i5400_probe1(struct pci_dev *pdev, int dev_idx)
pvt = mci->pvt_info;
pvt->system_address = pdev; /* Record this device in our private */
- pvt->maxch = num_channels;
- pvt->maxdimmperch = num_dimms_per_channel;
+ pvt->maxch = MAX_CHANNELS;
+ pvt->maxdimmperch = DIMMS_PER_CHANNEL;
/* 'get' the pci devices we want to reserve for our use */
if (i5400_get_devices(mci, dev_idx))
@@ -1307,13 +1326,13 @@ static int i5400_probe1(struct pci_dev *pdev, int dev_idx)
/* Set the function pointer to an actual operation function */
mci->edac_check = i5400_check_error;
- /* initialize the MC control structure 'csrows' table
+ /* initialize the MC control structure 'dimms' table
* with the mapping and control information */
- if (i5400_init_csrows(mci)) {
+ if (i5400_init_dimms(mci)) {
debugf0("MC: Setting mci->edac_cap to EDAC_FLAG_NONE\n"
- " because i5400_init_csrows() returned nonzero "
+ " because i5400_init_dimms() returned nonzero "
"value\n");
- mci->edac_cap = EDAC_FLAG_NONE; /* no csrows found */
+ mci->edac_cap = EDAC_FLAG_NONE; /* no dimms found */
} else {
debugf1("MC: Enable error reporting now\n");
i5400_enable_error_reporting(mci);
diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c
index 3bafa3b..97c22fd 100644
--- a/drivers/edac/i7300_edac.c
+++ b/drivers/edac/i7300_edac.c
@@ -464,17 +464,14 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci)
FERR_FAT_FBD, error_reg);
snprintf(pvt->tmp_prt_buffer, PAGE_SIZE,
- "FATAL (Branch=%d DRAM-Bank=%d %s "
- "RAS=%d CAS=%d Err=0x%lx (%s))",
- branch, bank,
- is_wr ? "RDWR" : "RD",
- ras, cas,
- errors, specific);
-
- /* Call the helper to output message */
- edac_mc_handle_fbd_ue(mci, rank, branch << 1,
- (branch << 1) + 1,
- pvt->tmp_prt_buffer);
+ "Bank=%d RAS=%d CAS=%d Err=0x%lx (%s))",
+ bank, ras, cas, errors, specific);
+
+ edac_mc_handle_error(HW_EVENT_ERR_FATAL, mci, 0, 0, 0,
+ branch, -1, rank,
+ is_wr ? "Write error" : "Read error",
+ pvt->tmp_prt_buffer, NULL);
+
}
/* read in the 1st NON-FATAL error register */
@@ -513,23 +510,14 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci)
/* Form out message */
snprintf(pvt->tmp_prt_buffer, PAGE_SIZE,
- "Corrected error (Branch=%d, Channel %d), "
- " DRAM-Bank=%d %s "
- "RAS=%d CAS=%d, CE Err=0x%lx, Syndrome=0x%08x(%s))",
- branch, channel,
- bank,
- is_wr ? "RDWR" : "RD",
- ras, cas,
- errors, syndrome, specific);
-
- /*
- * Call the helper to output message
- * NOTE: Errors are reported per-branch, and not per-channel
- * Currently, we don't know how to identify the right
- * channel.
- */
- edac_mc_handle_fbd_ce(mci, rank, channel,
- pvt->tmp_prt_buffer);
+ "DRAM-Bank=%d RAS=%d CAS=%d, Err=0x%lx (%s))",
+ bank, ras, cas, errors, specific);
+
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 0, 0,
+ syndrome,
+ branch >> 1, channel % 2, rank,
+ is_wr ? "Write error" : "Read error",
+ pvt->tmp_prt_buffer, NULL);
}
return;
}
@@ -617,8 +605,7 @@ static void i7300_enable_error_reporting(struct mem_ctl_info *mci)
static int decode_mtr(struct i7300_pvt *pvt,
int slot, int ch, int branch,
struct i7300_dimm_info *dinfo,
- struct csrow_info *p_csrow,
- u32 *nr_pages)
+ struct dimm_info *dimm)
{
int mtr, ans, addrBits, channel;
@@ -650,7 +637,6 @@ static int decode_mtr(struct i7300_pvt *pvt,
addrBits -= 3; /* 8 bits per bytes */
dinfo->megabytes = 1 << addrBits;
- *nr_pages = dinfo->megabytes << 8;
debugf2("\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr));
@@ -663,11 +649,6 @@ static int decode_mtr(struct i7300_pvt *pvt,
debugf2("\t\tNUMCOL: %s\n", numcol_toString[MTR_DIMM_COLS(mtr)]);
debugf2("\t\tSIZE: %d MB\n", dinfo->megabytes);
- p_csrow->grain = 8;
- p_csrow->mtype = MEM_FB_DDR2;
- p_csrow->csrow_idx = slot;
- p_csrow->page_mask = 0;
-
/*
* The type of error detection actually depends of the
* mode of operation. When it is just one single memory chip, at
@@ -677,15 +658,18 @@ static int decode_mtr(struct i7300_pvt *pvt,
* See datasheet Sections 7.3.6 to 7.3.8
*/
+ dimm->nr_pages = MiB_TO_PAGES(dinfo->megabytes);
+ dimm->grain = 8;
+ dimm->mtype = MEM_FB_DDR2;
if (IS_SINGLE_MODE(pvt->mc_settings_a)) {
- p_csrow->edac_mode = EDAC_SECDED;
+ dimm->edac_mode = EDAC_SECDED;
debugf2("\t\tECC code is 8-byte-over-32-byte SECDED+ code\n");
} else {
debugf2("\t\tECC code is on Lockstep mode\n");
if (MTR_DRAM_WIDTH(mtr) == 8)
- p_csrow->edac_mode = EDAC_S8ECD8ED;
+ dimm->edac_mode = EDAC_S8ECD8ED;
else
- p_csrow->edac_mode = EDAC_S4ECD4ED;
+ dimm->edac_mode = EDAC_S4ECD4ED;
}
/* ask what device type on this row */
@@ -694,9 +678,9 @@ static int decode_mtr(struct i7300_pvt *pvt,
IS_SCRBALGO_ENHANCED(pvt->mc_settings) ?
"enhanced" : "normal");
- p_csrow->dtype = DEV_X8;
+ dimm->dtype = DEV_X8;
} else
- p_csrow->dtype = DEV_X4;
+ dimm->dtype = DEV_X4;
return mtr;
}
@@ -774,11 +758,10 @@ static int i7300_init_csrows(struct mem_ctl_info *mci)
{
struct i7300_pvt *pvt;
struct i7300_dimm_info *dinfo;
- struct csrow_info *p_csrow;
int rc = -ENODEV;
int mtr;
int ch, branch, slot, channel;
- u32 last_page = 0, nr_pages;
+ struct dimm_info *dimm;
pvt = mci->pvt_info;
@@ -809,25 +792,23 @@ static int i7300_init_csrows(struct mem_ctl_info *mci)
pci_read_config_word(pvt->pci_dev_2x_0_fbd_branch[branch],
where,
&pvt->mtr[slot][branch]);
- for (ch = 0; ch < MAX_BRANCHES; ch++) {
+ for (ch = 0; ch < MAX_CH_PER_BRANCH; ch++) {
int channel = to_channel(ch, branch);
+ dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
+ mci->n_layers, branch, ch, slot);
+
dinfo = &pvt->dimm_info[slot][channel];
- p_csrow = &mci->csrows[slot];
mtr = decode_mtr(pvt, slot, ch, branch,
- dinfo, p_csrow, &nr_pages);
+ dinfo, dimm);
+
/* if no DIMMS on this row, continue */
if (!MTR_DIMMS_PRESENT(mtr))
continue;
- /* Update per_csrow memory count */
- p_csrow->nr_pages += nr_pages;
- p_csrow->first_page = last_page;
- last_page += nr_pages;
- p_csrow->last_page = last_page;
-
rc = 0;
+
}
}
}
@@ -1042,10 +1023,8 @@ static int __devinit i7300_init_one(struct pci_dev *pdev,
const struct pci_device_id *id)
{
struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[3];
struct i7300_pvt *pvt;
- int num_channels;
- int num_dimms_per_channel;
- int num_csrows;
int rc;
/* wake up device */
@@ -1062,23 +1041,17 @@ static int __devinit i7300_init_one(struct pci_dev *pdev,
if (PCI_FUNC(pdev->devfn) != 0)
return -ENODEV;
- /* As we don't have a motherboard identification routine to determine
- * actual number of slots/dimms per channel, we thus utilize the
- * resource as specified by the chipset. Thus, we might have
- * have more DIMMs per channel than actually on the mobo, but this
- * allows the driver to support up to the chipset max, without
- * some fancy mobo determination.
- */
- num_dimms_per_channel = MAX_SLOTS;
- num_channels = MAX_CHANNELS;
- num_csrows = MAX_SLOTS * MAX_CHANNELS;
-
- debugf0("MC: %s(): Number of - Channels= %d DIMMS= %d CSROWS= %d\n",
- __func__, num_channels, num_dimms_per_channel, num_csrows);
-
/* allocate a new MC control structure */
- mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels, 0);
-
+ layers[0].type = EDAC_MC_LAYER_BRANCH;
+ layers[0].size = MAX_BRANCHES;
+ layers[0].is_virt_csrow = false;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = MAX_CH_PER_BRANCH;
+ layers[1].is_virt_csrow = true;
+ layers[2].type = EDAC_MC_LAYER_SLOT;
+ layers[2].size = MAX_SLOTS;
+ layers[2].is_virt_csrow = true;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
if (mci == NULL)
return -ENOMEM;
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
index 7f1dfcc..d27778f 100644
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -221,7 +221,9 @@ struct i7core_inject {
};
struct i7core_channel {
- u32 ranks;
+ bool is_3dimms_present;
+ bool is_single_4rank;
+ bool has_4rank;
u32 dimms;
};
@@ -257,7 +259,6 @@ struct i7core_pvt {
struct i7core_channel channel[NUM_CHANS];
int ce_count_available;
- int csrow_map[NUM_CHANS][MAX_DIMMS];
/* ECC corrected errors counts per udimm */
unsigned long udimm_ce_count[MAX_DIMMS];
@@ -492,116 +493,15 @@ static void free_i7core_dev(struct i7core_dev *i7core_dev)
/****************************************************************************
Memory check routines
****************************************************************************/
-static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
- unsigned func)
-{
- struct i7core_dev *i7core_dev = get_i7core_dev(socket);
- int i;
-
- if (!i7core_dev)
- return NULL;
-
- for (i = 0; i < i7core_dev->n_devs; i++) {
- if (!i7core_dev->pdev[i])
- continue;
-
- if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
- PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
- return i7core_dev->pdev[i];
- }
- }
-
- return NULL;
-}
-
-/**
- * i7core_get_active_channels() - gets the number of channels and csrows
- * @socket: Quick Path Interconnect socket
- * @channels: Number of channels that will be returned
- * @csrows: Number of csrows found
- *
- * Since EDAC core needs to know in advance the number of available channels
- * and csrows, in order to allocate memory for csrows/channels, it is needed
- * to run two similar steps. At the first step, implemented on this function,
- * it checks the number of csrows/channels present at one socket.
- * this is used in order to properly allocate the size of mci components.
- *
- * It should be noticed that none of the current available datasheets explain
- * or even mention how csrows are seen by the memory controller. So, we need
- * to add a fake description for csrows.
- * So, this driver is attributing one DIMM memory for one csrow.
- */
-static int i7core_get_active_channels(const u8 socket, unsigned *channels,
- unsigned *csrows)
-{
- struct pci_dev *pdev = NULL;
- int i, j;
- u32 status, control;
-
- *channels = 0;
- *csrows = 0;
-
- pdev = get_pdev_slot_func(socket, 3, 0);
- if (!pdev) {
- i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
- socket);
- return -ENODEV;
- }
-
- /* Device 3 function 0 reads */
- pci_read_config_dword(pdev, MC_STATUS, &status);
- pci_read_config_dword(pdev, MC_CONTROL, &control);
-
- for (i = 0; i < NUM_CHANS; i++) {
- u32 dimm_dod[3];
- /* Check if the channel is active */
- if (!(control & (1 << (8 + i))))
- continue;
-
- /* Check if the channel is disabled */
- if (status & (1 << i))
- continue;
-
- pdev = get_pdev_slot_func(socket, i + 4, 1);
- if (!pdev) {
- i7core_printk(KERN_ERR, "Couldn't find socket %d "
- "fn %d.%d!!!\n",
- socket, i + 4, 1);
- return -ENODEV;
- }
- /* Devices 4-6 function 1 */
- pci_read_config_dword(pdev,
- MC_DOD_CH_DIMM0, &dimm_dod[0]);
- pci_read_config_dword(pdev,
- MC_DOD_CH_DIMM1, &dimm_dod[1]);
- pci_read_config_dword(pdev,
- MC_DOD_CH_DIMM2, &dimm_dod[2]);
- (*channels)++;
-
- for (j = 0; j < 3; j++) {
- if (!DIMM_PRESENT(dimm_dod[j]))
- continue;
- (*csrows)++;
- }
- }
-
- debugf0("Number of active channels on socket %d: %d\n",
- socket, *channels);
-
- return 0;
-}
-
-static int get_dimm_config(const struct mem_ctl_info *mci)
+static int get_dimm_config(struct mem_ctl_info *mci)
{
struct i7core_pvt *pvt = mci->pvt_info;
- struct csrow_info *csr;
struct pci_dev *pdev;
int i, j;
- int csrow = 0;
- unsigned long last_page = 0;
enum edac_type mode;
enum mem_type mtype;
+ struct dimm_info *dimm;
/* Get data from the MC register, function 0 */
pdev = pvt->pci_mcr[0];
@@ -657,21 +557,20 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
pci_read_config_dword(pvt->pci_ch[i][0],
MC_CHANNEL_DIMM_INIT_PARAMS, &data);
- pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
- 4 : 2;
+
+ if (data & THREE_DIMMS_PRESENT)
+ pvt->channel[i].is_3dimms_present = true;
+
+ if (data & SINGLE_QUAD_RANK_PRESENT)
+ pvt->channel[i].is_single_4rank = true;
+
+ if (data & QUAD_RANK_PRESENT)
+ pvt->channel[i].has_4rank = true;
if (data & REGISTERED_DIMM)
mtype = MEM_RDDR3;
else
mtype = MEM_DDR3;
-#if 0
- if (data & THREE_DIMMS_PRESENT)
- pvt->channel[i].dimms = 3;
- else if (data & SINGLE_QUAD_RANK_PRESENT)
- pvt->channel[i].dimms = 1;
- else
- pvt->channel[i].dimms = 2;
-#endif
/* Devices 4-6 function 1 */
pci_read_config_dword(pvt->pci_ch[i][1],
@@ -682,11 +581,13 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
MC_DOD_CH_DIMM2, &dimm_dod[2]);
debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
- "%d ranks, %cDIMMs\n",
+ "%s%s%s%cDIMMs\n",
i,
RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
data,
- pvt->channel[i].ranks,
+ pvt->channel[i].is_3dimms_present ? "3DIMMS " : "",
+ pvt->channel[i].is_3dimms_present ? "SINGLE_4R " : "",
+ pvt->channel[i].has_4rank ? "HAS_4R " : "",
(data & REGISTERED_DIMM) ? 'R' : 'U');
for (j = 0; j < 3; j++) {
@@ -696,6 +597,8 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
if (!DIMM_PRESENT(dimm_dod[j]))
continue;
+ dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
+ i, j, 0);
banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
@@ -704,8 +607,6 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
/* DDR3 has 8 I/O banks */
size = (rows * cols * banks * ranks) >> (20 - 3);
- pvt->channel[i].dimms++;
-
debugf0("\tdimm %d %d Mb offset: %x, "
"bank: %d, rank: %d, row: %#x, col: %#x\n",
j, size,
@@ -714,44 +615,28 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
npages = MiB_TO_PAGES(size);
- csr = &mci->csrows[csrow];
- csr->first_page = last_page + 1;
- last_page += npages;
- csr->last_page = last_page;
- csr->nr_pages = npages;
-
- csr->page_mask = 0;
- csr->grain = 8;
- csr->csrow_idx = csrow;
- csr->nr_channels = 1;
-
- csr->channels[0].chan_idx = i;
- csr->channels[0].ce_count = 0;
-
- pvt->csrow_map[i][j] = csrow;
+ dimm->nr_pages = npages;
switch (banks) {
case 4:
- csr->dtype = DEV_X4;
+ dimm->dtype = DEV_X4;
break;
case 8:
- csr->dtype = DEV_X8;
+ dimm->dtype = DEV_X8;
break;
case 16:
- csr->dtype = DEV_X16;
+ dimm->dtype = DEV_X16;
break;
default:
- csr->dtype = DEV_UNKNOWN;
+ dimm->dtype = DEV_UNKNOWN;
}
- csr->edac_mode = mode;
- csr->mtype = mtype;
- snprintf(csr->channels[0].label,
- sizeof(csr->channels[0].label),
- "CPU#%uChannel#%u_DIMM#%u",
- pvt->i7core_dev->socket, i, j);
-
- csrow++;
+ snprintf(dimm->label, sizeof(dimm->label),
+ "CPU#%uChannel#%u_DIMM#%u",
+ pvt->i7core_dev->socket, i, j);
+ dimm->grain = 8;
+ dimm->edac_mode = mode;
+ dimm->mtype = mtype;
}
pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
@@ -1567,22 +1452,16 @@ error:
/****************************************************************************
Error check routines
****************************************************************************/
-static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
+static void i7core_rdimm_update_errcount(struct mem_ctl_info *mci,
const int chan,
const int dimm,
const int add)
{
- char *msg;
- struct i7core_pvt *pvt = mci->pvt_info;
- int row = pvt->csrow_map[chan][dimm], i;
+ int i;
for (i = 0; i < add; i++) {
- msg = kasprintf(GFP_KERNEL, "Corrected error "
- "(Socket=%d channel=%d dimm=%d)",
- pvt->i7core_dev->socket, chan, dimm);
-
- edac_mc_handle_fbd_ce(mci, row, 0, msg);
- kfree (msg);
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 0, 0, 0,
+ chan, dimm, -1, "error", "", NULL);
}
}
@@ -1623,11 +1502,11 @@ static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
/*updated the edac core */
if (add0 != 0)
- i7core_rdimm_update_csrow(mci, chan, 0, add0);
+ i7core_rdimm_update_errcount(mci, chan, 0, add0);
if (add1 != 0)
- i7core_rdimm_update_csrow(mci, chan, 1, add1);
+ i7core_rdimm_update_errcount(mci, chan, 1, add1);
if (add2 != 0)
- i7core_rdimm_update_csrow(mci, chan, 2, add2);
+ i7core_rdimm_update_errcount(mci, chan, 2, add2);
}
@@ -1747,20 +1626,30 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci,
const struct mce *m)
{
struct i7core_pvt *pvt = mci->pvt_info;
- char *type, *optype, *err, *msg;
+ char *type, *optype, *err, msg[80];
+ enum hw_event_mc_err_type tp_event;
unsigned long error = m->status & 0x1ff0000l;
+ bool uncorrected_error = m->mcgstatus & 1ll << 61;
+ bool ripv = m->mcgstatus & 1;
u32 optypenum = (m->status >> 4) & 0x07;
u32 core_err_cnt = (m->status >> 38) & 0x7fff;
u32 dimm = (m->misc >> 16) & 0x3;
u32 channel = (m->misc >> 18) & 0x3;
u32 syndrome = m->misc >> 32;
u32 errnum = find_first_bit(&error, 32);
- int csrow;
- if (m->mcgstatus & 1)
- type = "FATAL";
- else
- type = "NON_FATAL";
+ if (uncorrected_error) {
+ if (ripv) {
+ type = "FATAL";
+ tp_event = HW_EVENT_ERR_FATAL;
+ } else {
+ type = "NON_FATAL";
+ tp_event = HW_EVENT_ERR_UNCORRECTED;
+ }
+ } else {
+ type = "CORRECTED";
+ tp_event = HW_EVENT_ERR_CORRECTED;
+ }
switch (optypenum) {
case 0:
@@ -1815,27 +1704,20 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci,
err = "unknown";
}
- /* FIXME: should convert addr into bank and rank information */
- msg = kasprintf(GFP_ATOMIC,
- "%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
- "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
- type, (long long) m->addr, m->cpu, dimm, channel,
- syndrome, core_err_cnt, (long long)m->status,
- (long long)m->misc, optype, err);
-
- debugf0("%s", msg);
-
- csrow = pvt->csrow_map[channel][dimm];
+ snprintf(msg, sizeof(msg), "count=%d %s", core_err_cnt, optype);
- /* Call the helper to output message */
- if (m->mcgstatus & 1)
- edac_mc_handle_fbd_ue(mci, csrow, 0,
- 0 /* FIXME: should be channel here */, msg);
- else if (!pvt->is_registered)
- edac_mc_handle_fbd_ce(mci, csrow,
- 0 /* FIXME: should be channel here */, msg);
-
- kfree(msg);
+ /*
+ * Call the helper to output message
+ * FIXME: what to do if core_err_cnt > 1? Currently, it generates
+ * only one event
+ */
+ if (uncorrected_error || !pvt->is_registered)
+ edac_mc_handle_error(tp_event, mci,
+ m->addr >> PAGE_SHIFT,
+ m->addr & ~PAGE_MASK,
+ syndrome,
+ channel, dimm, -1,
+ err, msg, m);
}
/*
@@ -2252,15 +2134,19 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev)
{
struct mem_ctl_info *mci;
struct i7core_pvt *pvt;
- int rc, channels, csrows;
-
- /* Check the number of active and not disabled channels */
- rc = i7core_get_active_channels(i7core_dev->socket, &channels, &csrows);
- if (unlikely(rc < 0))
- return rc;
+ int rc;
+ struct edac_mc_layer layers[2];
/* allocate a new MC control structure */
- mci = edac_mc_alloc(sizeof(*pvt), csrows, channels, i7core_dev->socket);
+
+ layers[0].type = EDAC_MC_LAYER_CHANNEL;
+ layers[0].size = NUM_CHANS;
+ layers[0].is_virt_csrow = false;
+ layers[1].type = EDAC_MC_LAYER_SLOT;
+ layers[1].size = MAX_DIMMS;
+ layers[1].is_virt_csrow = true;
+ mci = edac_mc_alloc(i7core_dev->socket, ARRAY_SIZE(layers), layers,
+ sizeof(*pvt));
if (unlikely(!mci))
return -ENOMEM;
diff --git a/drivers/edac/i82443bxgx_edac.c b/drivers/edac/i82443bxgx_edac.c
index 3bf2b2f..52072c2 100644
--- a/drivers/edac/i82443bxgx_edac.c
+++ b/drivers/edac/i82443bxgx_edac.c
@@ -12,7 +12,7 @@
* 440GX fix by Jason Uhlenkott <juhlenko@akamai.com>.
*
* Written with reference to 82443BX Host Bridge Datasheet:
- * http://download.intel.com/design/chipsets/datashts/29063301.pdf
+ * http://download.intel.com/design/chipsets/datashts/29063301.pdf
* references to this document given in [].
*
* This module doesn't support the 440LX, but it may be possible to
@@ -156,19 +156,19 @@ static int i82443bxgx_edacmc_process_error_info(struct mem_ctl_info *mci,
if (info->eap & I82443BXGX_EAP_OFFSET_SBE) {
error_found = 1;
if (handle_errors)
- edac_mc_handle_ce(mci, page, pageoffset,
- /* 440BX/GX don't make syndrome information
- * available */
- 0, edac_mc_find_csrow_by_page(mci, page), 0,
- mci->ctl_name);
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+ page, pageoffset, 0,
+ edac_mc_find_csrow_by_page(mci, page),
+ 0, -1, mci->ctl_name, "", NULL);
}
if (info->eap & I82443BXGX_EAP_OFFSET_MBE) {
error_found = 1;
if (handle_errors)
- edac_mc_handle_ue(mci, page, pageoffset,
- edac_mc_find_csrow_by_page(mci, page),
- mci->ctl_name);
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+ page, pageoffset, 0,
+ edac_mc_find_csrow_by_page(mci, page),
+ 0, -1, mci->ctl_name, "", NULL);
}
return error_found;
@@ -189,6 +189,7 @@ static void i82443bxgx_init_csrows(struct mem_ctl_info *mci,
enum mem_type mtype)
{
struct csrow_info *csrow;
+ struct dimm_info *dimm;
int index;
u8 drbar, dramc;
u32 row_base, row_high_limit, row_high_limit_last;
@@ -197,6 +198,8 @@ static void i82443bxgx_init_csrows(struct mem_ctl_info *mci,
row_high_limit_last = 0;
for (index = 0; index < mci->nr_csrows; index++) {
csrow = &mci->csrows[index];
+ dimm = csrow->channels[0].dimm;
+
pci_read_config_byte(pdev, I82443BXGX_DRB + index, &drbar);
debugf1("MC%d: %s: %s() Row=%d DRB = %#0x\n",
mci->mc_idx, __FILE__, __func__, index, drbar);
@@ -217,14 +220,14 @@ static void i82443bxgx_init_csrows(struct mem_ctl_info *mci,
row_base = row_high_limit_last;
csrow->first_page = row_base >> PAGE_SHIFT;
csrow->last_page = (row_high_limit >> PAGE_SHIFT) - 1;
- csrow->nr_pages = csrow->last_page - csrow->first_page + 1;
+ dimm->nr_pages = csrow->last_page - csrow->first_page + 1;
/* EAP reports in 4kilobyte granularity [61] */
- csrow->grain = 1 << 12;
- csrow->mtype = mtype;
+ dimm->grain = 1 << 12;
+ dimm->mtype = mtype;
/* I don't think 440BX can tell you device type? FIXME? */
- csrow->dtype = DEV_UNKNOWN;
+ dimm->dtype = DEV_UNKNOWN;
/* Mode is global to all rows on 440BX */
- csrow->edac_mode = edac_mode;
+ dimm->edac_mode = edac_mode;
row_high_limit_last = row_high_limit;
}
}
@@ -232,6 +235,7 @@ static void i82443bxgx_init_csrows(struct mem_ctl_info *mci,
static int i82443bxgx_edacmc_probe1(struct pci_dev *pdev, int dev_idx)
{
struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[2];
u8 dramc;
u32 nbxcfg, ecc_mode;
enum mem_type mtype;
@@ -245,8 +249,13 @@ static int i82443bxgx_edacmc_probe1(struct pci_dev *pdev, int dev_idx)
if (pci_read_config_dword(pdev, I82443BXGX_NBXCFG, &nbxcfg))
return -EIO;
- mci = edac_mc_alloc(0, I82443BXGX_NR_CSROWS, I82443BXGX_NR_CHANS, 0);
-
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = I82443BXGX_NR_CSROWS;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = I82443BXGX_NR_CHANS;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0);
if (mci == NULL)
return -ENOMEM;
diff --git a/drivers/edac/i82860_edac.c b/drivers/edac/i82860_edac.c
index c779092..0804505 100644
--- a/drivers/edac/i82860_edac.c
+++ b/drivers/edac/i82860_edac.c
@@ -99,6 +99,7 @@ static int i82860_process_error_info(struct mem_ctl_info *mci,
struct i82860_error_info *info,
int handle_errors)
{
+ struct dimm_info *dimm;
int row;
if (!(info->errsts2 & 0x0003))
@@ -108,18 +109,25 @@ static int i82860_process_error_info(struct mem_ctl_info *mci,
return 1;
if ((info->errsts ^ info->errsts2) & 0x0003) {
- edac_mc_handle_ce_no_info(mci, "UE overwrote CE");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 0, 0, 0,
+ -1, -1, -1, "UE overwrote CE", "", NULL);
info->errsts = info->errsts2;
}
info->eap >>= PAGE_SHIFT;
row = edac_mc_find_csrow_by_page(mci, info->eap);
+ dimm = mci->csrows[row].channels[0].dimm;
if (info->errsts & 0x0002)
- edac_mc_handle_ue(mci, info->eap, 0, row, "i82860 UE");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+ info->eap, 0, 0,
+ dimm->location[0], dimm->location[1], -1,
+ "i82860 UE", "", NULL);
else
- edac_mc_handle_ce(mci, info->eap, 0, info->derrsyn, row, 0,
- "i82860 UE");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+ info->eap, 0, info->derrsyn,
+ dimm->location[0], dimm->location[1], -1,
+ "i82860 CE", "", NULL);
return 1;
}
@@ -140,6 +148,7 @@ static void i82860_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev)
u16 value;
u32 cumul_size;
struct csrow_info *csrow;
+ struct dimm_info *dimm;
int index;
pci_read_config_word(pdev, I82860_MCHCFG, &mchcfg_ddim);
@@ -153,6 +162,8 @@ static void i82860_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev)
*/
for (index = 0; index < mci->nr_csrows; index++) {
csrow = &mci->csrows[index];
+ dimm = csrow->channels[0].dimm;
+
pci_read_config_word(pdev, I82860_GBA + index * 2, &value);
cumul_size = (value & I82860_GBA_MASK) <<
(I82860_GBA_SHIFT - PAGE_SHIFT);
@@ -164,30 +175,38 @@ static void i82860_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev)
csrow->first_page = last_cumul_size;
csrow->last_page = cumul_size - 1;
- csrow->nr_pages = cumul_size - last_cumul_size;
+ dimm->nr_pages = cumul_size - last_cumul_size;
last_cumul_size = cumul_size;
- csrow->grain = 1 << 12; /* I82860_EAP has 4KiB reolution */
- csrow->mtype = MEM_RMBS;
- csrow->dtype = DEV_UNKNOWN;
- csrow->edac_mode = mchcfg_ddim ? EDAC_SECDED : EDAC_NONE;
+ dimm->grain = 1 << 12; /* I82860_EAP has 4KiB reolution */
+ dimm->mtype = MEM_RMBS;
+ dimm->dtype = DEV_UNKNOWN;
+ dimm->edac_mode = mchcfg_ddim ? EDAC_SECDED : EDAC_NONE;
}
}
static int i82860_probe1(struct pci_dev *pdev, int dev_idx)
{
struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[2];
struct i82860_error_info discard;
- /* RDRAM has channels but these don't map onto the abstractions that
- edac uses.
- The device groups from the GRA registers seem to map reasonably
- well onto the notion of a chip select row.
- There are 16 GRA registers and since the name is associated with
- the channel and the GRA registers map to physical devices so we are
- going to make 1 channel for group.
+ /*
+ * RDRAM has channels but these don't map onto the csrow abstraction.
+ * According with the datasheet, there are 2 Rambus channels, supporting
+ * up to 16 direct RDRAM devices.
+ * The device groups from the GRA registers seem to map reasonably
+ * well onto the notion of a chip select row.
+ * There are 16 GRA registers and since the name is associated with
+ * the channel and the GRA registers map to physical devices so we are
+ * going to make 1 channel for group.
*/
- mci = edac_mc_alloc(0, 16, 1, 0);
-
+ layers[0].type = EDAC_MC_LAYER_CHANNEL;
+ layers[0].size = 2;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_SLOT;
+ layers[1].size = 8;
+ layers[1].is_virt_csrow = true;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0);
if (!mci)
return -ENOMEM;
diff --git a/drivers/edac/i82875p_edac.c b/drivers/edac/i82875p_edac.c
index 10f15d8..b613e31 100644
--- a/drivers/edac/i82875p_edac.c
+++ b/drivers/edac/i82875p_edac.c
@@ -38,7 +38,8 @@
#endif /* PCI_DEVICE_ID_INTEL_82875_6 */
/* four csrows in dual channel, eight in single channel */
-#define I82875P_NR_CSROWS(nr_chans) (8/(nr_chans))
+#define I82875P_NR_DIMMS 8
+#define I82875P_NR_CSROWS(nr_chans) (I82875P_NR_DIMMS / (nr_chans))
/* Intel 82875p register addresses - device 0 function 0 - DRAM Controller */
#define I82875P_EAP 0x58 /* Error Address Pointer (32b)
@@ -235,7 +236,9 @@ static int i82875p_process_error_info(struct mem_ctl_info *mci,
return 1;
if ((info->errsts ^ info->errsts2) & 0x0081) {
- edac_mc_handle_ce_no_info(mci, "UE overwrote CE");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 0, 0, 0,
+ -1, -1, -1,
+ "UE overwrote CE", "", NULL);
info->errsts = info->errsts2;
}
@@ -243,11 +246,15 @@ static int i82875p_process_error_info(struct mem_ctl_info *mci,
row = edac_mc_find_csrow_by_page(mci, info->eap);
if (info->errsts & 0x0080)
- edac_mc_handle_ue(mci, info->eap, 0, row, "i82875p UE");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+ info->eap, 0, 0,
+ row, -1, -1,
+ "i82875p UE", "", NULL);
else
- edac_mc_handle_ce(mci, info->eap, 0, info->derrsyn, row,
- multi_chan ? (info->des & 0x1) : 0,
- "i82875p CE");
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+ info->eap, 0, info->derrsyn,
+ row, multi_chan ? (info->des & 0x1) : 0,
+ -1, "i82875p CE", "", NULL);
return 1;
}
@@ -342,11 +349,13 @@ static void i82875p_init_csrows(struct mem_ctl_info *mci,
void __iomem * ovrfl_window, u32 drc)
{
struct csrow_info *csrow;
+ struct dimm_info *dimm;
+ unsigned nr_chans = dual_channel_active(drc) + 1;
unsigned long last_cumul_size;
u8 value;
u32 drc_ddim; /* DRAM Data Integrity Mode 0=none,2=edac */
- u32 cumul_size;
- int index;
+ u32 cumul_size, nr_pages;
+ int index, j;
drc_ddim = (drc >> 18) & 0x1;
last_cumul_size = 0;
@@ -369,12 +378,18 @@ static void i82875p_init_csrows(struct mem_ctl_info *mci,
csrow->first_page = last_cumul_size;
csrow->last_page = cumul_size - 1;
- csrow->nr_pages = cumul_size - last_cumul_size;
+ nr_pages = cumul_size - last_cumul_size;
last_cumul_size = cumul_size;
- csrow->grain = 1 << 12; /* I82875P_EAP has 4KiB reolution */
- csrow->mtype = MEM_DDR;
- csrow->dtype = DEV_UNKNOWN;
- csrow->edac_mode = drc_ddim ? EDAC_SECDED : EDAC_NONE;
+
+ for (j = 0; j < nr_chans; j++) {
+ dimm = csrow->channels[j].dimm;
+
+ dimm->nr_pages = nr_pages / nr_chans;
+ dimm->grain = 1 << 12; /* I82875P_EAP has 4KiB reolution */
+ dimm->mtype = MEM_DDR;
+ dimm->dtype = DEV_UNKNOWN;
+ dimm->edac_mode = drc_ddim ? EDAC_SECDED : EDAC_NONE;
+ }
}
}
@@ -382,6 +397,7 @@ static int i82875p_probe1(struct pci_dev *pdev, int dev_idx)
{
int rc = -ENODEV;
struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[2];
struct i82875p_pvt *pvt;
struct pci_dev *ovrfl_pdev;
void __iomem *ovrfl_window;
@@ -397,9 +413,14 @@ static int i82875p_probe1(struct pci_dev *pdev, int dev_idx)
return -ENODEV;
drc = readl(ovrfl_window + I82875P_DRC);
nr_chans = dual_channel_active(drc) + 1;
- mci = edac_mc_alloc(sizeof(*pvt), I82875P_NR_CSROWS(nr_chans),
- nr_chans, 0);
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = I82875P_NR_CSROWS(nr_chans);
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = nr_chans;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
if (!mci) {
rc = -ENOMEM;
goto fail0;
diff --git a/drivers/edac/i82975x_edac.c b/drivers/edac/i82975x_edac.c
index 0cd8368..433332c 100644
--- a/drivers/edac/i82975x_edac.c
+++ b/drivers/edac/i82975x_edac.c
@@ -29,7 +29,8 @@
#define PCI_DEVICE_ID_INTEL_82975_0 0x277c
#endif /* PCI_DEVICE_ID_INTEL_82975_0 */
-#define I82975X_NR_CSROWS(nr_chans) (8/(nr_chans))
+#define I82975X_NR_DIMMS 8
+#define I82975X_NR_CSROWS(nr_chans) (I82975X_NR_DIMMS / (nr_chans))
/* Intel 82975X register addresses - device 0 function 0 - DRAM Controller */
#define I82975X_EAP 0x58 /* Dram Error Address Pointer (32b)
@@ -287,7 +288,8 @@ static int i82975x_process_error_info(struct mem_ctl_info *mci,
return 1;
if ((info->errsts ^ info->errsts2) & 0x0003) {
- edac_mc_handle_ce_no_info(mci, "UE overwrote CE");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 0, 0, 0,
+ -1, -1, -1, "UE overwrote CE", "", NULL);
info->errsts = info->errsts2;
}
@@ -309,13 +311,18 @@ static int i82975x_process_error_info(struct mem_ctl_info *mci,
chan = (mci->csrows[row].nr_channels == 1) ? 0 : info->eap & 1;
offst = info->eap
& ((1 << PAGE_SHIFT) -
- (1 << mci->csrows[row].grain));
+ (1 << mci->csrows[row].channels[chan].dimm->grain));
if (info->errsts & 0x0002)
- edac_mc_handle_ue(mci, page, offst , row, "i82975x UE");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+ page, offst, 0,
+ row, -1, -1,
+ "i82975x UE", "", NULL);
else
- edac_mc_handle_ce(mci, page, offst, info->derrsyn, row,
- chan, "i82975x CE");
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+ page, offst, info->derrsyn,
+ row, chan ? chan : 0, -1,
+ "i82975x CE", "", NULL);
return 1;
}
@@ -370,8 +377,10 @@ static void i82975x_init_csrows(struct mem_ctl_info *mci,
struct csrow_info *csrow;
unsigned long last_cumul_size;
u8 value;
- u32 cumul_size;
+ u32 cumul_size, nr_pages;
int index, chan;
+ struct dimm_info *dimm;
+ enum dev_type dtype;
last_cumul_size = 0;
@@ -400,28 +409,33 @@ static void i82975x_init_csrows(struct mem_ctl_info *mci,
debugf3("%s(): (%d) cumul_size 0x%x\n", __func__, index,
cumul_size);
+ nr_pages = cumul_size - last_cumul_size;
+ if (!nr_pages)
+ continue;
+
/*
* Initialise dram labels
* index values:
* [0-7] for single-channel; i.e. csrow->nr_channels = 1
* [0-3] for dual-channel; i.e. csrow->nr_channels = 2
*/
- for (chan = 0; chan < csrow->nr_channels; chan++)
- strncpy(csrow->channels[chan].label,
+ dtype = i82975x_dram_type(mch_window, index);
+ for (chan = 0; chan < csrow->nr_channels; chan++) {
+ dimm = mci->csrows[index].channels[chan].dimm;
+
+ dimm->nr_pages = nr_pages / csrow->nr_channels;
+ strncpy(csrow->channels[chan].dimm->label,
labels[(index >> 1) + (chan * 2)],
EDAC_MC_LABEL_LEN);
-
- if (cumul_size == last_cumul_size)
- continue; /* not populated */
+ dimm->grain = 1 << 7; /* 128Byte cache-line resolution */
+ dimm->dtype = i82975x_dram_type(mch_window, index);
+ dimm->mtype = MEM_DDR2; /* I82975x supports only DDR2 */
+ dimm->edac_mode = EDAC_SECDED; /* only supported */
+ }
csrow->first_page = last_cumul_size;
csrow->last_page = cumul_size - 1;
- csrow->nr_pages = cumul_size - last_cumul_size;
last_cumul_size = cumul_size;
- csrow->grain = 1 << 7; /* 128Byte cache-line resolution */
- csrow->mtype = MEM_DDR2; /* I82975x supports only DDR2 */
- csrow->dtype = i82975x_dram_type(mch_window, index);
- csrow->edac_mode = EDAC_SECDED; /* only supported */
}
}
@@ -463,6 +477,7 @@ static int i82975x_probe1(struct pci_dev *pdev, int dev_idx)
{
int rc = -ENODEV;
struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[2];
struct i82975x_pvt *pvt;
void __iomem *mch_window;
u32 mchbar;
@@ -531,8 +546,13 @@ static int i82975x_probe1(struct pci_dev *pdev, int dev_idx)
chans = dual_channel_active(mch_window) + 1;
/* assuming only one controller, index thus is 0 */
- mci = edac_mc_alloc(sizeof(*pvt), I82975X_NR_CSROWS(chans),
- chans, 0);
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = I82975X_NR_DIMMS;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = I82975X_NR_CSROWS(chans);
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
if (!mci) {
rc = -ENOMEM;
goto fail1;
diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c
index 73464a6..4c40235 100644
--- a/drivers/edac/mpc85xx_edac.c
+++ b/drivers/edac/mpc85xx_edac.c
@@ -854,12 +854,16 @@ static void mpc85xx_mc_check(struct mem_ctl_info *mci)
mpc85xx_mc_printk(mci, KERN_ERR, "PFN out of range!\n");
if (err_detect & DDR_EDE_SBE)
- edac_mc_handle_ce(mci, pfn, err_addr & ~PAGE_MASK,
- syndrome, row_index, 0, mci->ctl_name);
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+ pfn, err_addr & ~PAGE_MASK, syndrome,
+ row_index, 0, -1,
+ mci->ctl_name, "", NULL);
if (err_detect & DDR_EDE_MBE)
- edac_mc_handle_ue(mci, pfn, err_addr & ~PAGE_MASK,
- row_index, mci->ctl_name);
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+ pfn, err_addr & ~PAGE_MASK, syndrome,
+ row_index, 0, -1,
+ mci->ctl_name, "", NULL);
out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DETECT, err_detect);
}
@@ -883,6 +887,7 @@ static void __devinit mpc85xx_init_csrows(struct mem_ctl_info *mci)
{
struct mpc85xx_mc_pdata *pdata = mci->pvt_info;
struct csrow_info *csrow;
+ struct dimm_info *dimm;
u32 sdram_ctl;
u32 sdtype;
enum mem_type mtype;
@@ -929,6 +934,8 @@ static void __devinit mpc85xx_init_csrows(struct mem_ctl_info *mci)
u32 end;
csrow = &mci->csrows[index];
+ dimm = csrow->channels[0].dimm;
+
cs_bnds = in_be32(pdata->mc_vbase + MPC85XX_MC_CS_BNDS_0 +
(index * MPC85XX_MC_CS_BNDS_OFS));
@@ -944,19 +951,21 @@ static void __devinit mpc85xx_init_csrows(struct mem_ctl_info *mci)
csrow->first_page = start;
csrow->last_page = end;
- csrow->nr_pages = end + 1 - start;
- csrow->grain = 8;
- csrow->mtype = mtype;
- csrow->dtype = DEV_UNKNOWN;
+
+ dimm->nr_pages = end + 1 - start;
+ dimm->grain = 8;
+ dimm->mtype = mtype;
+ dimm->dtype = DEV_UNKNOWN;
if (sdram_ctl & DSC_X32_EN)
- csrow->dtype = DEV_X32;
- csrow->edac_mode = EDAC_SECDED;
+ dimm->dtype = DEV_X32;
+ dimm->edac_mode = EDAC_SECDED;
}
}
static int __devinit mpc85xx_mc_err_probe(struct platform_device *op)
{
struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[2];
struct mpc85xx_mc_pdata *pdata;
struct resource r;
u32 sdram_ctl;
@@ -965,7 +974,13 @@ static int __devinit mpc85xx_mc_err_probe(struct platform_device *op)
if (!devres_open_group(&op->dev, mpc85xx_mc_err_probe, GFP_KERNEL))
return -ENOMEM;
- mci = edac_mc_alloc(sizeof(*pdata), 4, 1, edac_mc_idx);
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = 4;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = 1;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(edac_mc_idx, ARRAY_SIZE(layers), sizeof(*pdata));
if (!mci) {
devres_release_group(&op->dev, mpc85xx_mc_err_probe);
return -ENOMEM;
diff --git a/drivers/edac/mv64x60_edac.c b/drivers/edac/mv64x60_edac.c
index 7e5ff36..b0bb5a3 100644
--- a/drivers/edac/mv64x60_edac.c
+++ b/drivers/edac/mv64x60_edac.c
@@ -611,12 +611,17 @@ static void mv64x60_mc_check(struct mem_ctl_info *mci)
/* first bit clear in ECC Err Reg, 1 bit error, correctable by HW */
if (!(reg & 0x1))
- edac_mc_handle_ce(mci, err_addr >> PAGE_SHIFT,
- err_addr & PAGE_MASK, syndrome, 0, 0,
- mci->ctl_name);
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+ err_addr >> PAGE_SHIFT,
+ err_addr & PAGE_MASK, syndrome,
+ 0, 0, -1,
+ mci->ctl_name, "", NULL);
else /* 2 bit error, UE */
- edac_mc_handle_ue(mci, err_addr >> PAGE_SHIFT,
- err_addr & PAGE_MASK, 0, mci->ctl_name);
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+ err_addr >> PAGE_SHIFT,
+ err_addr & PAGE_MASK, 0,
+ 0, 0, -1,
+ mci->ctl_name, "", NULL);
/* clear the error */
out_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR, 0);
@@ -656,6 +661,8 @@ static void mv64x60_init_csrows(struct mem_ctl_info *mci,
struct mv64x60_mc_pdata *pdata)
{
struct csrow_info *csrow;
+ struct dimm_info *dimm;
+
u32 devtype;
u32 ctl;
@@ -664,35 +671,36 @@ static void mv64x60_init_csrows(struct mem_ctl_info *mci,
ctl = in_le32(pdata->mc_vbase + MV64X60_SDRAM_CONFIG);
csrow = &mci->csrows[0];
- csrow->first_page = 0;
- csrow->nr_pages = pdata->total_mem >> PAGE_SHIFT;
- csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
- csrow->grain = 8;
+ dimm = csrow->channels[0].dimm;
+
+ dimm->nr_pages = pdata->total_mem >> PAGE_SHIFT;
+ dimm->grain = 8;
- csrow->mtype = (ctl & MV64X60_SDRAM_REGISTERED) ? MEM_RDDR : MEM_DDR;
+ dimm->mtype = (ctl & MV64X60_SDRAM_REGISTERED) ? MEM_RDDR : MEM_DDR;
devtype = (ctl >> 20) & 0x3;
switch (devtype) {
case 0x0:
- csrow->dtype = DEV_X32;
+ dimm->dtype = DEV_X32;
break;
case 0x2: /* could be X8 too, but no way to tell */
- csrow->dtype = DEV_X16;
+ dimm->dtype = DEV_X16;
break;
case 0x3:
- csrow->dtype = DEV_X4;
+ dimm->dtype = DEV_X4;
break;
default:
- csrow->dtype = DEV_UNKNOWN;
+ dimm->dtype = DEV_UNKNOWN;
break;
}
- csrow->edac_mode = EDAC_SECDED;
+ dimm->edac_mode = EDAC_SECDED;
}
static int __devinit mv64x60_mc_err_probe(struct platform_device *pdev)
{
struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[2];
struct mv64x60_mc_pdata *pdata;
struct resource *r;
u32 ctl;
@@ -701,7 +709,14 @@ static int __devinit mv64x60_mc_err_probe(struct platform_device *pdev)
if (!devres_open_group(&pdev->dev, mv64x60_mc_err_probe, GFP_KERNEL))
return -ENOMEM;
- mci = edac_mc_alloc(sizeof(struct mv64x60_mc_pdata), 1, 1, edac_mc_idx);
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = 1;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = 1;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(edac_mc_idx, ARRAY_SIZE(layers), layers,
+ sizeof(struct mv64x60_mc_pdata));
if (!mci) {
printk(KERN_ERR "%s: No memory for CPU err\n", __func__);
devres_release_group(&pdev->dev, mv64x60_mc_err_probe);
diff --git a/drivers/edac/pasemi_edac.c b/drivers/edac/pasemi_edac.c
index 7f71ee4..b095a90 100644
--- a/drivers/edac/pasemi_edac.c
+++ b/drivers/edac/pasemi_edac.c
@@ -110,15 +110,16 @@ static void pasemi_edac_process_error_info(struct mem_ctl_info *mci, u32 errsta)
/* uncorrectable/multi-bit errors */
if (errsta & (MCDEBUG_ERRSTA_MBE_STATUS |
MCDEBUG_ERRSTA_RFL_STATUS)) {
- edac_mc_handle_ue(mci, mci->csrows[cs].first_page, 0,
- cs, mci->ctl_name);
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+ mci->csrows[cs].first_page, 0, 0,
+ cs, 0, -1, mci->ctl_name, "", NULL);
}
/* correctable/single-bit errors */
- if (errsta & MCDEBUG_ERRSTA_SBE_STATUS) {
- edac_mc_handle_ce(mci, mci->csrows[cs].first_page, 0,
- 0, cs, 0, mci->ctl_name);
- }
+ if (errsta & MCDEBUG_ERRSTA_SBE_STATUS)
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+ mci->csrows[cs].first_page, 0, 0,
+ cs, 0, -1, mci->ctl_name, "", NULL);
}
static void pasemi_edac_check(struct mem_ctl_info *mci)
@@ -135,11 +136,13 @@ static int pasemi_edac_init_csrows(struct mem_ctl_info *mci,
enum edac_type edac_mode)
{
struct csrow_info *csrow;
+ struct dimm_info *dimm;
u32 rankcfg;
int index;
for (index = 0; index < mci->nr_csrows; index++) {
csrow = &mci->csrows[index];
+ dimm = csrow->channels[0].dimm;
pci_read_config_dword(pdev,
MCDRAM_RANKCFG + (index * 12),
@@ -151,20 +154,20 @@ static int pasemi_edac_init_csrows(struct mem_ctl_info *mci,
switch ((rankcfg & MCDRAM_RANKCFG_TYPE_SIZE_M) >>
MCDRAM_RANKCFG_TYPE_SIZE_S) {
case 0:
- csrow->nr_pages = 128 << (20 - PAGE_SHIFT);
+ dimm->nr_pages = 128 << (20 - PAGE_SHIFT);
break;
case 1:
- csrow->nr_pages = 256 << (20 - PAGE_SHIFT);
+ dimm->nr_pages = 256 << (20 - PAGE_SHIFT);
break;
case 2:
case 3:
- csrow->nr_pages = 512 << (20 - PAGE_SHIFT);
+ dimm->nr_pages = 512 << (20 - PAGE_SHIFT);
break;
case 4:
- csrow->nr_pages = 1024 << (20 - PAGE_SHIFT);
+ dimm->nr_pages = 1024 << (20 - PAGE_SHIFT);
break;
case 5:
- csrow->nr_pages = 2048 << (20 - PAGE_SHIFT);
+ dimm->nr_pages = 2048 << (20 - PAGE_SHIFT);
break;
default:
edac_mc_printk(mci, KERN_ERR,
@@ -174,13 +177,13 @@ static int pasemi_edac_init_csrows(struct mem_ctl_info *mci,
}
csrow->first_page = last_page_in_mmc;
- csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
- last_page_in_mmc += csrow->nr_pages;
+ csrow->last_page = csrow->first_page + dimm->nr_pages - 1;
+ last_page_in_mmc += dimm->nr_pages;
csrow->page_mask = 0;
- csrow->grain = PASEMI_EDAC_ERROR_GRAIN;
- csrow->mtype = MEM_DDR;
- csrow->dtype = DEV_UNKNOWN;
- csrow->edac_mode = edac_mode;
+ dimm->grain = PASEMI_EDAC_ERROR_GRAIN;
+ dimm->mtype = MEM_DDR;
+ dimm->dtype = DEV_UNKNOWN;
+ dimm->edac_mode = edac_mode;
}
return 0;
}
@@ -189,6 +192,7 @@ static int __devinit pasemi_edac_probe(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
struct mem_ctl_info *mci = NULL;
+ struct edac_mc_layer layers[2];
u32 errctl1, errcor, scrub, mcen;
pci_read_config_dword(pdev, MCCFG_MCEN, &mcen);
@@ -205,9 +209,14 @@ static int __devinit pasemi_edac_probe(struct pci_dev *pdev,
MCDEBUG_ERRCTL1_RFL_LOG_EN;
pci_write_config_dword(pdev, MCDEBUG_ERRCTL1, errctl1);
- mci = edac_mc_alloc(0, PASEMI_EDAC_NR_CSROWS, PASEMI_EDAC_NR_CHANS,
- system_mmc_id++);
-
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = PASEMI_EDAC_NR_CSROWS;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = PASEMI_EDAC_NR_CHANS;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(system_mmc_id++, ARRAY_SIZE(layers), layers,
+ 0);
if (mci == NULL)
return -ENOMEM;
diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c
index d427c69..f3f9fed 100644
--- a/drivers/edac/ppc4xx_edac.c
+++ b/drivers/edac/ppc4xx_edac.c
@@ -727,7 +727,10 @@ ppc4xx_edac_handle_ce(struct mem_ctl_info *mci,
for (row = 0; row < mci->nr_csrows; row++)
if (ppc4xx_edac_check_bank_error(status, row))
- edac_mc_handle_ce_no_info(mci, message);
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+ 0, 0, 0,
+ row, 0, -1,
+ message, "", NULL);
}
/**
@@ -755,7 +758,10 @@ ppc4xx_edac_handle_ue(struct mem_ctl_info *mci,
for (row = 0; row < mci->nr_csrows; row++)
if (ppc4xx_edac_check_bank_error(status, row))
- edac_mc_handle_ue(mci, page, offset, row, message);
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+ page, offset, 0,
+ row, 0, -1,
+ message, "", NULL);
}
/**
@@ -895,9 +901,8 @@ ppc4xx_edac_init_csrows(struct mem_ctl_info *mci, u32 mcopt1)
enum mem_type mtype;
enum dev_type dtype;
enum edac_type edac_mode;
- int row;
- u32 mbxcf, size;
- static u32 ppc4xx_last_page;
+ int row, j;
+ u32 mbxcf, size, nr_pages;
/* Establish the memory type and width */
@@ -948,7 +953,7 @@ ppc4xx_edac_init_csrows(struct mem_ctl_info *mci, u32 mcopt1)
case SDRAM_MBCF_SZ_2GB:
case SDRAM_MBCF_SZ_4GB:
case SDRAM_MBCF_SZ_8GB:
- csi->nr_pages = SDRAM_MBCF_SZ_TO_PAGES(size);
+ nr_pages = SDRAM_MBCF_SZ_TO_PAGES(size);
break;
default:
ppc4xx_edac_mc_printk(KERN_ERR, mci,
@@ -959,10 +964,6 @@ ppc4xx_edac_init_csrows(struct mem_ctl_info *mci, u32 mcopt1)
goto done;
}
- csi->first_page = ppc4xx_last_page;
- csi->last_page = csi->first_page + csi->nr_pages - 1;
- csi->page_mask = 0;
-
/*
* It's unclear exactly what grain should be set to
* here. The SDRAM_ECCES register allows resolution of
@@ -975,15 +976,17 @@ ppc4xx_edac_init_csrows(struct mem_ctl_info *mci, u32 mcopt1)
* possible values would be the PLB width (16), the
* page size (PAGE_SIZE) or the memory width (2 or 4).
*/
+ for (j = 0; j < csi->nr_channels; j++) {
+ struct dimm_info *dimm = csi->channels[j].dimm;
- csi->grain = 1;
-
- csi->mtype = mtype;
- csi->dtype = dtype;
+ dimm->nr_pages = nr_pages / csi->nr_channels;
+ dimm->grain = 1;
- csi->edac_mode = edac_mode;
+ dimm->mtype = mtype;
+ dimm->dtype = dtype;
- ppc4xx_last_page += csi->nr_pages;
+ dimm->edac_mode = edac_mode;
+ }
}
done:
@@ -1236,6 +1239,7 @@ static int __devinit ppc4xx_edac_probe(struct platform_device *op)
dcr_host_t dcr_host;
const struct device_node *np = op->dev.of_node;
struct mem_ctl_info *mci = NULL;
+ struct edac_mc_layer layers[2];
static int ppc4xx_edac_instance;
/*
@@ -1281,12 +1285,14 @@ static int __devinit ppc4xx_edac_probe(struct platform_device *op)
* controller instance and perform the appropriate
* initialization.
*/
-
- mci = edac_mc_alloc(sizeof(struct ppc4xx_edac_pdata),
- ppc4xx_edac_nr_csrows,
- ppc4xx_edac_nr_chans,
- ppc4xx_edac_instance);
-
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = ppc4xx_edac_nr_csrows;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = ppc4xx_edac_nr_chans;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(ppc4xx_edac_instance, ARRAY_SIZE(layers), layers,
+ sizeof(struct ppc4xx_edac_pdata));
if (mci == NULL) {
ppc4xx_edac_printk(KERN_ERR, "%s: "
"Failed to allocate EDAC MC instance!\n",
diff --git a/drivers/edac/r82600_edac.c b/drivers/edac/r82600_edac.c
index 6d908ad..e1cacd1 100644
--- a/drivers/edac/r82600_edac.c
+++ b/drivers/edac/r82600_edac.c
@@ -179,10 +179,11 @@ static int r82600_process_error_info(struct mem_ctl_info *mci,
error_found = 1;
if (handle_errors)
- edac_mc_handle_ce(mci, page, 0, /* not avail */
- syndrome,
- edac_mc_find_csrow_by_page(mci, page),
- 0, mci->ctl_name);
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+ page, 0, syndrome,
+ edac_mc_find_csrow_by_page(mci, page),
+ 0, -1,
+ mci->ctl_name, "", NULL);
}
if (info->eapr & BIT(1)) { /* UE? */
@@ -190,9 +191,11 @@ static int r82600_process_error_info(struct mem_ctl_info *mci,
if (handle_errors)
/* 82600 doesn't give enough info */
- edac_mc_handle_ue(mci, page, 0,
- edac_mc_find_csrow_by_page(mci, page),
- mci->ctl_name);
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+ page, 0, 0,
+ edac_mc_find_csrow_by_page(mci, page),
+ 0, -1,
+ mci->ctl_name, "", NULL);
}
return error_found;
@@ -216,6 +219,7 @@ static void r82600_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
u8 dramcr)
{
struct csrow_info *csrow;
+ struct dimm_info *dimm;
int index;
u8 drbar; /* SDRAM Row Boundary Address Register */
u32 row_high_limit, row_high_limit_last;
@@ -227,6 +231,7 @@ static void r82600_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
for (index = 0; index < mci->nr_csrows; index++) {
csrow = &mci->csrows[index];
+ dimm = csrow->channels[0].dimm;
/* find the DRAM Chip Select Base address and mask */
pci_read_config_byte(pdev, R82600_DRBA + index, &drbar);
@@ -247,16 +252,17 @@ static void r82600_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
csrow->first_page = row_base >> PAGE_SHIFT;
csrow->last_page = (row_high_limit >> PAGE_SHIFT) - 1;
- csrow->nr_pages = csrow->last_page - csrow->first_page + 1;
+
+ dimm->nr_pages = csrow->last_page - csrow->first_page + 1;
/* Error address is top 19 bits - so granularity is *
* 14 bits */
- csrow->grain = 1 << 14;
- csrow->mtype = reg_sdram ? MEM_RDDR : MEM_DDR;
+ dimm->grain = 1 << 14;
+ dimm->mtype = reg_sdram ? MEM_RDDR : MEM_DDR;
/* FIXME - check that this is unknowable with this chipset */
- csrow->dtype = DEV_UNKNOWN;
+ dimm->dtype = DEV_UNKNOWN;
/* Mode is global on 82600 */
- csrow->edac_mode = ecc_on ? EDAC_SECDED : EDAC_NONE;
+ dimm->edac_mode = ecc_on ? EDAC_SECDED : EDAC_NONE;
row_high_limit_last = row_high_limit;
}
}
@@ -264,6 +270,7 @@ static void r82600_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
static int r82600_probe1(struct pci_dev *pdev, int dev_idx)
{
struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[2];
u8 dramcr;
u32 eapr;
u32 scrub_disabled;
@@ -278,8 +285,13 @@ static int r82600_probe1(struct pci_dev *pdev, int dev_idx)
debugf2("%s(): sdram refresh rate = %#0x\n", __func__,
sdram_refresh_rate);
debugf2("%s(): DRAMC register = %#0x\n", __func__, dramcr);
- mci = edac_mc_alloc(0, R82600_NR_CSROWS, R82600_NR_CHANS, 0);
-
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = R82600_NR_CSROWS;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = R82600_NR_CHANS;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0);
if (mci == NULL)
return -ENOMEM;
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 123204f..4adaf4b 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -314,8 +314,6 @@ struct sbridge_pvt {
struct sbridge_info info;
struct sbridge_channel channel[NUM_CHANNELS];
- int csrow_map[NUM_CHANNELS][MAX_DIMMS];
-
/* Memory type detection */
bool is_mirrored, is_lockstep, is_close_pg;
@@ -487,29 +485,14 @@ static struct pci_dev *get_pdev_slot_func(u8 bus, unsigned slot,
}
/**
- * sbridge_get_active_channels() - gets the number of channels and csrows
+ * check_if_ecc_is_active() - Checks if ECC is active
* bus: Device bus
- * @channels: Number of channels that will be returned
- * @csrows: Number of csrows found
- *
- * Since EDAC core needs to know in advance the number of available channels
- * and csrows, in order to allocate memory for csrows/channels, it is needed
- * to run two similar steps. At the first step, implemented on this function,
- * it checks the number of csrows/channels present at one socket, identified
- * by the associated PCI bus.
- * this is used in order to properly allocate the size of mci components.
- * Note: one csrow is one dimm.
*/
-static int sbridge_get_active_channels(const u8 bus, unsigned *channels,
- unsigned *csrows)
+static int check_if_ecc_is_active(const u8 bus)
{
struct pci_dev *pdev = NULL;
- int i, j;
u32 mcmtr;
- *channels = 0;
- *csrows = 0;
-
pdev = get_pdev_slot_func(bus, 15, 0);
if (!pdev) {
sbridge_printk(KERN_ERR, "Couldn't find PCI device "
@@ -523,41 +506,14 @@ static int sbridge_get_active_channels(const u8 bus, unsigned *channels,
sbridge_printk(KERN_ERR, "ECC is disabled. Aborting\n");
return -ENODEV;
}
-
- for (i = 0; i < NUM_CHANNELS; i++) {
- u32 mtr;
-
- /* Device 15 functions 2 - 5 */
- pdev = get_pdev_slot_func(bus, 15, 2 + i);
- if (!pdev) {
- sbridge_printk(KERN_ERR, "Couldn't find PCI device "
- "%2x.%02d.%d!!!\n",
- bus, 15, 2 + i);
- return -ENODEV;
- }
- (*channels)++;
-
- for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) {
- pci_read_config_dword(pdev, mtr_regs[j], &mtr);
- debugf1("Bus#%02x channel #%d MTR%d = %x\n", bus, i, j, mtr);
- if (IS_DIMM_PRESENT(mtr))
- (*csrows)++;
- }
- }
-
- debugf0("Number of active channels: %d, number of active dimms: %d\n",
- *channels, *csrows);
-
return 0;
}
-static int get_dimm_config(const struct mem_ctl_info *mci)
+static int get_dimm_config(struct mem_ctl_info *mci)
{
struct sbridge_pvt *pvt = mci->pvt_info;
- struct csrow_info *csr;
+ struct dimm_info *dimm;
int i, j, banks, ranks, rows, cols, size, npages;
- int csrow = 0;
- unsigned long last_page = 0;
u32 reg;
enum edac_type mode;
enum mem_type mtype;
@@ -616,6 +572,8 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
u32 mtr;
for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) {
+ dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
+ i, j, 0);
pci_read_config_dword(pvt->pci_tad[i],
mtr_regs[j], &mtr);
debugf4("Channel #%d MTR%d = %x\n", i, j, mtr);
@@ -634,29 +592,15 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
pvt->sbridge_dev->mc, i, j,
size, npages,
banks, ranks, rows, cols);
- csr = &mci->csrows[csrow];
-
- csr->first_page = last_page;
- csr->last_page = last_page + npages - 1;
- csr->page_mask = 0UL; /* Unused */
- csr->nr_pages = npages;
- csr->grain = 32;
- csr->csrow_idx = csrow;
- csr->dtype = (banks == 8) ? DEV_X8 : DEV_X4;
- csr->ce_count = 0;
- csr->ue_count = 0;
- csr->mtype = mtype;
- csr->edac_mode = mode;
- csr->nr_channels = 1;
- csr->channels[0].chan_idx = i;
- csr->channels[0].ce_count = 0;
- pvt->csrow_map[i][j] = csrow;
- snprintf(csr->channels[0].label,
- sizeof(csr->channels[0].label),
+
+ dimm->nr_pages = npages;
+ dimm->grain = 32;
+ dimm->dtype = (banks == 8) ? DEV_X8 : DEV_X4;
+ dimm->mtype = mtype;
+ dimm->edac_mode = mode;
+ snprintf(dimm->label, sizeof(dimm->label),
"CPU_SrcID#%u_Channel#%u_DIMM#%u",
pvt->sbridge_dev->source_id, i, j);
- last_page += npages;
- csrow++;
}
}
}
@@ -844,11 +788,10 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
u8 *socket,
long *channel_mask,
u8 *rank,
- char *area_type)
+ char **area_type, char *msg)
{
struct mem_ctl_info *new_mci;
struct sbridge_pvt *pvt = mci->pvt_info;
- char msg[256];
int n_rir, n_sads, n_tads, sad_way, sck_xch;
int sad_interl, idx, base_ch;
int interleave_mode;
@@ -870,12 +813,10 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
*/
if ((addr > (u64) pvt->tolm) && (addr < (1LL << 32))) {
sprintf(msg, "Error at TOLM area, on addr 0x%08Lx", addr);
- edac_mc_handle_ce_no_info(mci, msg);
return -EINVAL;
}
if (addr >= (u64)pvt->tohm) {
sprintf(msg, "Error at MMIOH area, on addr 0x%016Lx", addr);
- edac_mc_handle_ce_no_info(mci, msg);
return -EINVAL;
}
@@ -892,7 +833,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
limit = SAD_LIMIT(reg);
if (limit <= prv) {
sprintf(msg, "Can't discover the memory socket");
- edac_mc_handle_ce_no_info(mci, msg);
return -EINVAL;
}
if (addr <= limit)
@@ -901,10 +841,9 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
}
if (n_sads == MAX_SAD) {
sprintf(msg, "Can't discover the memory socket");
- edac_mc_handle_ce_no_info(mci, msg);
return -EINVAL;
}
- area_type = get_dram_attr(reg);
+ *area_type = get_dram_attr(reg);
interleave_mode = INTERLEAVE_MODE(reg);
pci_read_config_dword(pvt->pci_sad0, interleave_list[n_sads],
@@ -942,7 +881,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
break;
default:
sprintf(msg, "Can't discover socket interleave");
- edac_mc_handle_ce_no_info(mci, msg);
return -EINVAL;
}
*socket = sad_interleave[idx];
@@ -957,7 +895,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
if (!new_mci) {
sprintf(msg, "Struct for socket #%u wasn't initialized",
*socket);
- edac_mc_handle_ce_no_info(mci, msg);
return -EINVAL;
}
mci = new_mci;
@@ -973,7 +910,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
limit = TAD_LIMIT(reg);
if (limit <= prv) {
sprintf(msg, "Can't discover the memory channel");
- edac_mc_handle_ce_no_info(mci, msg);
return -EINVAL;
}
if (addr <= limit)
@@ -1013,7 +949,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
break;
default:
sprintf(msg, "Can't discover the TAD target");
- edac_mc_handle_ce_no_info(mci, msg);
return -EINVAL;
}
*channel_mask = 1 << base_ch;
@@ -1027,7 +962,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
break;
default:
sprintf(msg, "Invalid mirror set. Can't decode addr");
- edac_mc_handle_ce_no_info(mci, msg);
return -EINVAL;
}
} else
@@ -1055,7 +989,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
if (offset > addr) {
sprintf(msg, "Can't calculate ch addr: TAD offset 0x%08Lx is too high for addr 0x%08Lx!",
offset, addr);
- edac_mc_handle_ce_no_info(mci, msg);
return -EINVAL;
}
addr -= offset;
@@ -1095,7 +1028,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
if (n_rir == MAX_RIR_RANGES) {
sprintf(msg, "Can't discover the memory rank for ch addr 0x%08Lx",
ch_addr);
- edac_mc_handle_ce_no_info(mci, msg);
return -EINVAL;
}
rir_way = RIR_WAY(reg);
@@ -1409,7 +1341,8 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
{
struct mem_ctl_info *new_mci;
struct sbridge_pvt *pvt = mci->pvt_info;
- char *type, *optype, *msg, *recoverable_msg;
+ enum hw_event_mc_err_type tp_event;
+ char *type, *optype, msg[256];
bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
bool overflow = GET_BITFIELD(m->status, 62, 62);
bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
@@ -1421,13 +1354,21 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
u32 optypenum = GET_BITFIELD(m->status, 4, 6);
long channel_mask, first_channel;
u8 rank, socket;
- int csrow, rc, dimm;
- char *area_type = "Unknown";
-
- if (ripv)
- type = "NON_FATAL";
- else
- type = "FATAL";
+ int rc, dimm;
+ char *area_type = NULL;
+
+ if (uncorrected_error) {
+ if (ripv) {
+ type = "FATAL";
+ tp_event = HW_EVENT_ERR_FATAL;
+ } else {
+ type = "NON_FATAL";
+ tp_event = HW_EVENT_ERR_UNCORRECTED;
+ }
+ } else {
+ type = "CORRECTED";
+ tp_event = HW_EVENT_ERR_CORRECTED;
+ }
/*
* According with Table 15-9 of the Intel Architecture spec vol 3A,
@@ -1445,19 +1386,19 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
} else {
switch (optypenum) {
case 0:
- optype = "generic undef request";
+ optype = "generic undef request error";
break;
case 1:
- optype = "memory read";
+ optype = "memory read error";
break;
case 2:
- optype = "memory write";
+ optype = "memory write error";
break;
case 3:
- optype = "addr/cmd";
+ optype = "addr/cmd error";
break;
case 4:
- optype = "memory scrubbing";
+ optype = "memory scrubbing error";
break;
default:
optype = "reserved";
@@ -1466,13 +1407,13 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
}
rc = get_memory_error_data(mci, m->addr, &socket,
- &channel_mask, &rank, area_type);
+ &channel_mask, &rank, &area_type, msg);
if (rc < 0)
- return;
+ goto err_parsing;
new_mci = get_mci_for_node_id(socket);
if (!new_mci) {
- edac_mc_handle_ce_no_info(mci, "Error: socket got corrupted!");
- return;
+ strcpy(msg, "Error: socket got corrupted!");
+ goto err_parsing;
}
mci = new_mci;
pvt = mci->pvt_info;
@@ -1486,45 +1427,39 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
else
dimm = 2;
- csrow = pvt->csrow_map[first_channel][dimm];
-
- if (uncorrected_error && recoverable)
- recoverable_msg = " recoverable";
- else
- recoverable_msg = "";
/*
- * FIXME: What should we do with "channel" information on mcelog?
- * Probably, we can just discard it, as the channel information
- * comes from the get_memory_error_data() address decoding
+ * FIXME: On some memory configurations (mirror, lockstep), the
+ * Memory Controller can't point the error to a single DIMM. The
+ * EDAC core should be handling the channel mask, in order to point
+ * to the group of dimm's where the error may be happening.
*/
- msg = kasprintf(GFP_ATOMIC,
- "%d %s error(s): %s on %s area %s%s: cpu=%d Err=%04x:%04x (ch=%d), "
- "addr = 0x%08llx => socket=%d, Channel=%ld(mask=%ld), rank=%d\n",
- core_err_cnt,
- area_type,
- optype,
- type,
- recoverable_msg,
- overflow ? "OVERFLOW" : "",
- m->cpu,
- mscod, errcode,
- channel, /* 1111b means not specified */
- (long long) m->addr,
- socket,
- first_channel, /* This is the real channel on SB */
- channel_mask,
- rank);
+ snprintf(msg, sizeof(msg),
+ "count:%d%s%s area:%s err_code:%04x:%04x socket:%d channel_mask:%ld rank:%d",
+ core_err_cnt,
+ overflow ? " OVERFLOW" : "",
+ (uncorrected_error && recoverable) ? " recoverable" : "",
+ area_type,
+ mscod, errcode,
+ socket,
+ channel_mask,
+ rank);
debugf0("%s", msg);
+ /* FIXME: need support for channel mask */
+
/* Call the helper to output message */
- if (uncorrected_error)
- edac_mc_handle_fbd_ue(mci, csrow, 0, 0, msg);
- else
- edac_mc_handle_fbd_ce(mci, csrow, 0, msg);
+ edac_mc_handle_error(tp_event, mci,
+ m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
+ channel, dimm, -1,
+ optype, msg, m);
+ return;
+err_parsing:
+ edac_mc_handle_error(tp_event, mci, 0, 0, 0,
+ -1, -1, -1,
+ msg, "", m);
- kfree(msg);
}
/*
@@ -1683,16 +1618,25 @@ static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev)
static int sbridge_register_mci(struct sbridge_dev *sbridge_dev)
{
struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[2];
struct sbridge_pvt *pvt;
- int rc, channels, csrows;
+ int rc;
/* Check the number of active and not disabled channels */
- rc = sbridge_get_active_channels(sbridge_dev->bus, &channels, &csrows);
+ rc = check_if_ecc_is_active(sbridge_dev->bus);
if (unlikely(rc < 0))
return rc;
/* allocate a new MC control structure */
- mci = edac_mc_alloc(sizeof(*pvt), csrows, channels, sbridge_dev->mc);
+ layers[0].type = EDAC_MC_LAYER_CHANNEL;
+ layers[0].size = NUM_CHANNELS;
+ layers[0].is_virt_csrow = false;
+ layers[1].type = EDAC_MC_LAYER_SLOT;
+ layers[1].size = MAX_DIMMS;
+ layers[1].is_virt_csrow = true;
+ mci = edac_mc_alloc(sbridge_dev->mc, ARRAY_SIZE(layers), layers,
+ sizeof(*pvt));
+
if (unlikely(!mci))
return -ENOMEM;
diff --git a/drivers/edac/tile_edac.c b/drivers/edac/tile_edac.c
index e99d009..7bb4614 100644
--- a/drivers/edac/tile_edac.c
+++ b/drivers/edac/tile_edac.c
@@ -71,7 +71,10 @@ static void tile_edac_check(struct mem_ctl_info *mci)
if (mem_error.sbe_count != priv->ce_count) {
dev_dbg(mci->dev, "ECC CE err on node %d\n", priv->node);
priv->ce_count = mem_error.sbe_count;
- edac_mc_handle_ce(mci, 0, 0, 0, 0, 0, mci->ctl_name);
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+ 0, 0, 0,
+ 0, 0, -1,
+ mci->ctl_name, "", NULL);
}
}
@@ -84,6 +87,7 @@ static int __devinit tile_edac_init_csrows(struct mem_ctl_info *mci)
struct csrow_info *csrow = &mci->csrows[0];
struct tile_edac_priv *priv = mci->pvt_info;
struct mshim_mem_info mem_info;
+ struct dimm_info *dimm = csrow->channels[0].dimm;
if (hv_dev_pread(priv->hv_devhdl, 0, (HV_VirtAddr)&mem_info,
sizeof(struct mshim_mem_info), MSHIM_MEM_INFO_OFF) !=
@@ -93,27 +97,25 @@ static int __devinit tile_edac_init_csrows(struct mem_ctl_info *mci)
}
if (mem_info.mem_ecc)
- csrow->edac_mode = EDAC_SECDED;
+ dimm->edac_mode = EDAC_SECDED;
else
- csrow->edac_mode = EDAC_NONE;
+ dimm->edac_mode = EDAC_NONE;
switch (mem_info.mem_type) {
case DDR2:
- csrow->mtype = MEM_DDR2;
+ dimm->mtype = MEM_DDR2;
break;
case DDR3:
- csrow->mtype = MEM_DDR3;
+ dimm->mtype = MEM_DDR3;
break;
default:
return -1;
}
- csrow->first_page = 0;
- csrow->nr_pages = mem_info.mem_size >> PAGE_SHIFT;
- csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
- csrow->grain = TILE_EDAC_ERROR_GRAIN;
- csrow->dtype = DEV_UNKNOWN;
+ dimm->nr_pages = mem_info.mem_size >> PAGE_SHIFT;
+ dimm->grain = TILE_EDAC_ERROR_GRAIN;
+ dimm->dtype = DEV_UNKNOWN;
return 0;
}
@@ -123,6 +125,7 @@ static int __devinit tile_edac_mc_probe(struct platform_device *pdev)
char hv_file[32];
int hv_devhdl;
struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[2];
struct tile_edac_priv *priv;
int rc;
@@ -132,8 +135,14 @@ static int __devinit tile_edac_mc_probe(struct platform_device *pdev)
return -EINVAL;
/* A TILE MC has a single channel and one chip-select row. */
- mci = edac_mc_alloc(sizeof(struct tile_edac_priv),
- TILE_EDAC_NR_CSROWS, TILE_EDAC_NR_CHANS, pdev->id);
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = TILE_EDAC_NR_CSROWS;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = TILE_EDAC_NR_CHANS;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(pdev->id, ARRAY_SIZE(layers), layers,
+ sizeof(struct tile_edac_priv));
if (mci == NULL)
return -ENOMEM;
priv = mci->pvt_info;
diff --git a/drivers/edac/x38_edac.c b/drivers/edac/x38_edac.c
index a438297..1ac7962 100644
--- a/drivers/edac/x38_edac.c
+++ b/drivers/edac/x38_edac.c
@@ -215,19 +215,26 @@ static void x38_process_error_info(struct mem_ctl_info *mci,
return;
if ((info->errsts ^ info->errsts2) & X38_ERRSTS_BITS) {
- edac_mc_handle_ce_no_info(mci, "UE overwrote CE");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 0, 0, 0,
+ -1, -1, -1,
+ "UE overwrote CE", "", NULL);
info->errsts = info->errsts2;
}
for (channel = 0; channel < x38_channel_num; channel++) {
log = info->eccerrlog[channel];
if (log & X38_ECCERRLOG_UE) {
- edac_mc_handle_ue(mci, 0, 0,
- eccerrlog_row(channel, log), "x38 UE");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+ 0, 0, 0,
+ eccerrlog_row(channel, log),
+ -1, -1,
+ "x38 UE", "", NULL);
} else if (log & X38_ECCERRLOG_CE) {
- edac_mc_handle_ce(mci, 0, 0,
- eccerrlog_syndrome(log),
- eccerrlog_row(channel, log), 0, "x38 CE");
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+ 0, 0, eccerrlog_syndrome(log),
+ eccerrlog_row(channel, log),
+ -1, -1,
+ "x38 CE", "", NULL);
}
}
}
@@ -317,9 +324,9 @@ static unsigned long drb_to_nr_pages(
static int x38_probe1(struct pci_dev *pdev, int dev_idx)
{
int rc;
- int i;
+ int i, j;
struct mem_ctl_info *mci = NULL;
- unsigned long last_page;
+ struct edac_mc_layer layers[2];
u16 drbs[X38_CHANNELS][X38_RANKS_PER_CHANNEL];
bool stacked;
void __iomem *window;
@@ -335,7 +342,13 @@ static int x38_probe1(struct pci_dev *pdev, int dev_idx)
how_many_channel(pdev);
/* FIXME: unconventional pvt_info usage */
- mci = edac_mc_alloc(0, X38_RANKS, x38_channel_num, 0);
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = X38_RANKS;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = x38_channel_num;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0);
if (!mci)
return -ENOMEM;
@@ -363,7 +376,6 @@ static int x38_probe1(struct pci_dev *pdev, int dev_idx)
* cumulative; the last one will contain the total memory
* contained in all ranks.
*/
- last_page = -1UL;
for (i = 0; i < mci->nr_csrows; i++) {
unsigned long nr_pages;
struct csrow_info *csrow = &mci->csrows[i];
@@ -372,20 +384,18 @@ static int x38_probe1(struct pci_dev *pdev, int dev_idx)
i / X38_RANKS_PER_CHANNEL,
i % X38_RANKS_PER_CHANNEL);
- if (nr_pages == 0) {
- csrow->mtype = MEM_EMPTY;
+ if (nr_pages == 0)
continue;
- }
- csrow->first_page = last_page + 1;
- last_page += nr_pages;
- csrow->last_page = last_page;
- csrow->nr_pages = nr_pages;
+ for (j = 0; j < x38_channel_num; j++) {
+ struct dimm_info *dimm = csrow->channels[j].dimm;
- csrow->grain = nr_pages << PAGE_SHIFT;
- csrow->mtype = MEM_DDR2;
- csrow->dtype = DEV_UNKNOWN;
- csrow->edac_mode = EDAC_UNKNOWN;
+ dimm->nr_pages = nr_pages / x38_channel_num;
+ dimm->grain = nr_pages << PAGE_SHIFT;
+ dimm->mtype = MEM_DDR2;
+ dimm->dtype = DEV_UNKNOWN;
+ dimm->edac_mode = EDAC_UNKNOWN;
+ }
}
x38_clear_error_info(mci);
diff --git a/include/linux/edac.h b/include/linux/edac.h
index c621d76..91ba3ba 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -71,6 +71,25 @@ enum dev_type {
#define DEV_FLAG_X64 BIT(DEV_X64)
/**
+ * enum hw_event_mc_err_type - type of the detected error
+ *
+ * @HW_EVENT_ERR_CORRECTED: Corrected Error - Indicates that an ECC
+ * corrected error was detected
+ * @HW_EVENT_ERR_UNCORRECTED: Uncorrected Error - Indicates an error that
+ * can't be corrected by ECC, but it is not
+ * fatal (maybe it is on an unused memory area,
+ * or the memory controller could recover from
+ * it for example, by re-trying the operation).
+ * @HW_EVENT_ERR_FATAL: Fatal Error - Uncorrected error that could not
+ * be recovered.
+ */
+enum hw_event_mc_err_type {
+ HW_EVENT_ERR_CORRECTED,
+ HW_EVENT_ERR_UNCORRECTED,
+ HW_EVENT_ERR_FATAL,
+};
+
+/**
* enum mem_type - memory types. For a more detailed reference, please see
* http://en.wikipedia.org/wiki/DRAM
*
@@ -313,38 +332,141 @@ enum scrub_type {
*/
/**
+ * enum edac_mc_layer - memory controller hierarchy layer
+ *
+ * @EDAC_MC_LAYER_BRANCH: memory layer is named "branch"
+ * @EDAC_MC_LAYER_CHANNEL: memory layer is named "channel"
+ * @EDAC_MC_LAYER_SLOT: memory layer is named "slot"
+ * @EDAC_MC_LAYER_CHIP_SELECT: memory layer is named "chip select"
+ *
+ * This enum is used by the drivers to tell edac_mc_sysfs what name should
+ * be used when describing a memory stick location.
+ */
+enum edac_mc_layer_type {
+ EDAC_MC_LAYER_BRANCH,
+ EDAC_MC_LAYER_CHANNEL,
+ EDAC_MC_LAYER_SLOT,
+ EDAC_MC_LAYER_CHIP_SELECT,
+};
+
+/**
+ * struct edac_mc_layer - describes the memory controller hierarchy
+ * @layer: layer type
+ * @size: number of components per layer. For example,
+ * if the channel layer has two channels, size = 2
+ * @is_virt_csrow: This layer is part of the "csrow" when old API
+ * compatibility mode is enabled. Otherwise, it is
+ * a channel
+ */
+struct edac_mc_layer {
+ enum edac_mc_layer_type type;
+ unsigned size;
+ bool is_virt_csrow;
+};
+
+/*
+ * Maximum number of layers used by the memory controller to uniquely
+ * identify a single memory stick.
+ * NOTE: Changing this constant requires not only to change the constant
+ * below, but also to change the existing code at the core, as there are
+ * some code there that are optimized for 3 layers.
+ */
+#define EDAC_MAX_LAYERS 3
+
+/**
+ * EDAC_DIMM_PTR - Macro responsible to find a pointer inside a pointer array
+ * for the element given by [layer0,layer1,layer2] position
+ *
+ * @layers: a struct edac_mc_layer array, describing how many elements
+ * were allocated for each layer
+ * @var: name of the var where we want to get the pointer
+ * (like mci->dimms)
+ * @n_layers: Number of layers at the @layers array
+ * @layer0: layer0 position
+ * @layer1: layer1 position. Unused if n_layers < 2
+ * @layer2: layer2 position. Unused if n_layers < 3
+ *
+ * For 1 layer, this macro returns &var[layer0]
+ * For 2 layers, this macro is similar to allocate a bi-dimensional array
+ * and to return "&var[layer0][layer1]"
+ * For 3 layers, this macro is similar to allocate a tri-dimensional array
+ * and to return "&var[layer0][layer1][layer2]"
+ *
+ * A loop could be used here to make it more generic, but, as we only have
+ * 3 layers, this is a little faster.
+ * By design, layers can never be 0 or more than 3. If that ever happens,
+ * a NULL is returned, causing an OOPS during the memory allocation routine,
+ * with would point to the developer that he's doing something wrong.
+ */
+#define EDAC_DIMM_PTR(layers, var, nlayers, layer0, layer1, layer2) ({ \
+ typeof(var) __p; \
+ if ((nlayers) == 1) \
+ __p = &var[layer0]; \
+ else if ((nlayers) == 2) \
+ __p = &var[(layer1) + ((layers[1]).size * (layer0))]; \
+ else if ((nlayers) == 3) \
+ __p = &var[(layer2) + ((layers[2]).size * ((layer1) + \
+ ((layers[1]).size * (layer0))))]; \
+ else \
+ __p = NULL; \
+ __p; \
+})
+
+
+/* FIXME: add the proper per-location error counts */
+struct dimm_info {
+ char label[EDAC_MC_LABEL_LEN + 1]; /* DIMM label on motherboard */
+
+ /* Memory location data */
+ unsigned location[EDAC_MAX_LAYERS];
+
+ struct mem_ctl_info *mci; /* the parent */
+
+ u32 grain; /* granularity of reported error in bytes */
+ enum dev_type dtype; /* memory device type */
+ enum mem_type mtype; /* memory dimm type */
+ enum edac_type edac_mode; /* EDAC mode for this dimm */
+
+ u32 nr_pages; /* number of pages on this dimm */
+
+ unsigned csrow, cschannel; /* Points to the old API data */
+};
+
+/**
* struct rank_info - contains the information for one DIMM rank
*
* @chan_idx: channel number where the rank is (typically, 0 or 1)
* @ce_count: number of correctable errors for this rank
- * @label: DIMM label. Different ranks for the same DIMM should be
- * filled, on userspace, with the same label.
- * FIXME: The core currently won't enforce it.
* @csrow: A pointer to the chip select row structure (the parent
* structure). The location of the rank is given by
* the (csrow->csrow_idx, chan_idx) vector.
+ * @dimm: A pointer to the DIMM structure, where the DIMM label
+ * information is stored.
+ *
+ * FIXME: Currently, the EDAC core model will assume one DIMM per rank.
+ * This is a bad assumption, but it makes this patch easier. Later
+ * patches in this series will fix this issue.
*/
struct rank_info {
int chan_idx;
- u32 ce_count;
- char label[EDAC_MC_LABEL_LEN + 1];
- struct csrow_info *csrow; /* the parent */
+ struct csrow_info *csrow;
+ struct dimm_info *dimm;
+
+ u32 ce_count; /* Correctable Errors for this csrow */
};
struct csrow_info {
- unsigned long first_page; /* first page number in dimm */
- unsigned long last_page; /* last page number in dimm */
+ /* Used only by edac_mc_find_csrow_by_page() */
+ unsigned long first_page; /* first page number in csrow */
+ unsigned long last_page; /* last page number in csrow */
unsigned long page_mask; /* used for interleaving -
- * 0UL for non intlv
- */
- u32 nr_pages; /* number of pages in csrow */
- u32 grain; /* granularity of reported error in bytes */
- int csrow_idx; /* the chip-select row */
- enum dev_type dtype; /* memory device type */
+ * 0UL for non intlv */
+
+ int csrow_idx; /* the chip-select row */
+
u32 ue_count; /* Uncorrectable Errors for this csrow */
u32 ce_count; /* Correctable Errors for this csrow */
- enum mem_type mtype; /* memory csrow type */
- enum edac_type edac_mode; /* EDAC mode for this csrow */
+
struct mem_ctl_info *mci; /* the parent */
struct kobject kobj; /* sysfs kobject for this csrow */
@@ -426,8 +548,20 @@ struct mem_ctl_info {
unsigned long (*ctl_page_to_phys) (struct mem_ctl_info * mci,
unsigned long page);
int mc_idx;
- int nr_csrows;
struct csrow_info *csrows;
+ unsigned nr_csrows, num_cschannel;
+
+ /* Memory Controller hierarchy */
+ unsigned n_layers;
+ struct edac_mc_layer *layers;
+ bool mem_is_per_rank;
+
+ /*
+ * DIMM info. Will eventually remove the entire csrows_info some day
+ */
+ unsigned tot_dimms;
+ struct dimm_info *dimms;
+
/*
* FIXME - what about controllers on other busses? - IDs must be
* unique. dev pointer should be sufficiently unique, but
@@ -440,12 +574,16 @@ struct mem_ctl_info {
const char *dev_name;
char proc_name[MC_PROC_NAME_MAX_LEN + 1];
void *pvt_info;
- u32 ue_noinfo_count; /* Uncorrectable Errors w/o info */
- u32 ce_noinfo_count; /* Correctable Errors w/o info */
- u32 ue_count; /* Total Uncorrectable Errors for this MC */
- u32 ce_count; /* Total Correctable Errors for this MC */
unsigned long start_time; /* mci load start time (in jiffies) */
+ /*
+ * drivers shouldn't access those fields directly, as the core
+ * already handles that.
+ */
+ u32 ce_noinfo_count, ue_noinfo_count;
+ u32 ue_mc, ce_mc;
+ u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
+
struct completion complete;
/* edac sysfs device control */
@@ -458,7 +596,7 @@ struct mem_ctl_info {
* by the low level driver.
*
* Set by the low level driver to provide attributes at the
- * controller level, same level as 'ue_count' and 'ce_count' above.
+ * controller level.
* An array of structures, NULL terminated
*
* If attributes are desired, then set to array of attributes