From 63b6a6758eede2f9283c3594265b6e32e75d7456 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 23 Apr 2011 00:57:42 +0200 Subject: perf events, x86: Fix Intel Nehalem and Westmere last level cache event definitions The Intel Nehalem offcore bits implemented in: e994d7d23a0b: perf: Fix LLC-* events on Intel Nehalem/Westmere ... are wrong: they implemented _ACCESS as _HIT and counted OTHER_CORE_HIT* as MISS even though its clearly documented as an L3 hit ... Fix them and the Westmere definitions as well. Cc: Andi Kleen Cc: Lin Ming Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1299119690-13991-3-git-send-email-ming.m.lin@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 87 ++++++++++++++++++++-------------- 1 file changed, 52 insertions(+), 35 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index e61539b..447a28d 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -184,26 +184,23 @@ static __initconst const u64 snb_hw_cache_event_ids }, }, [ C(LL ) ] = { - /* - * TBD: Need Off-core Response Performance Monitoring support - */ [ C(OP_READ) ] = { - /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ + /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ [ C(RESULT_ACCESS) ] = 0x01b7, - /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ - [ C(RESULT_MISS) ] = 0x01bb, + /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, }, [ C(OP_WRITE) ] = { - /* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */ + /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ [ C(RESULT_ACCESS) ] = 0x01b7, - /* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */ - [ C(RESULT_MISS) ] = 0x01bb, + /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, }, [ C(OP_PREFETCH) ] = { - /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ + /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ [ C(RESULT_ACCESS) ] = 0x01b7, - /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ - [ C(RESULT_MISS) ] = 0x01bb, + /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, }, }, [ C(DTLB) ] = { @@ -285,26 +282,26 @@ static __initconst const u64 westmere_hw_cache_event_ids }, [ C(LL ) ] = { [ C(OP_READ) ] = { - /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ + /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ [ C(RESULT_ACCESS) ] = 0x01b7, - /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ - [ C(RESULT_MISS) ] = 0x01bb, + /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, }, /* * Use RFO, not WRITEBACK, because a write miss would typically occur * on RFO. */ [ C(OP_WRITE) ] = { - /* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */ - [ C(RESULT_ACCESS) ] = 0x01bb, - /* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */ + /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ [ C(RESULT_MISS) ] = 0x01b7, }, [ C(OP_PREFETCH) ] = { - /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ + /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ [ C(RESULT_ACCESS) ] = 0x01b7, - /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ - [ C(RESULT_MISS) ] = 0x01bb, + /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, }, }, [ C(DTLB) ] = { @@ -352,16 +349,36 @@ static __initconst const u64 westmere_hw_cache_event_ids }; /* - * OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3 + * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits; + * See IA32 SDM Vol 3B 30.6.1.3 */ -#define DMND_DATA_RD (1 << 0) -#define DMND_RFO (1 << 1) -#define DMND_WB (1 << 3) -#define PF_DATA_RD (1 << 4) -#define PF_DATA_RFO (1 << 5) -#define RESP_UNCORE_HIT (1 << 8) -#define RESP_MISS (0xf600) /* non uncore hit */ +#define NHM_DMND_DATA_RD (1 << 0) +#define NHM_DMND_RFO (1 << 1) +#define NHM_DMND_IFETCH (1 << 2) +#define NHM_DMND_WB (1 << 3) +#define NHM_PF_DATA_RD (1 << 4) +#define NHM_PF_DATA_RFO (1 << 5) +#define NHM_PF_IFETCH (1 << 6) +#define NHM_OFFCORE_OTHER (1 << 7) +#define NHM_UNCORE_HIT (1 << 8) +#define NHM_OTHER_CORE_HIT_SNP (1 << 9) +#define NHM_OTHER_CORE_HITM (1 << 10) + /* reserved */ +#define NHM_REMOTE_CACHE_FWD (1 << 12) +#define NHM_REMOTE_DRAM (1 << 13) +#define NHM_LOCAL_DRAM (1 << 14) +#define NHM_NON_DRAM (1 << 15) + +#define NHM_ALL_DRAM (NHM_REMOTE_DRAM|NHM_LOCAL_DRAM) + +#define NHM_DMND_READ (NHM_DMND_DATA_RD) +#define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB) +#define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO) + +#define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM) +#define NHM_L3_MISS (NHM_NON_DRAM|NHM_ALL_DRAM|NHM_REMOTE_CACHE_FWD) +#define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS) static __initconst const u64 nehalem_hw_cache_extra_regs [PERF_COUNT_HW_CACHE_MAX] @@ -370,16 +387,16 @@ static __initconst const u64 nehalem_hw_cache_extra_regs { [ C(LL ) ] = { [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT, - [ C(RESULT_MISS) ] = DMND_DATA_RD|RESP_MISS, + [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS, + [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_L3_MISS, }, [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT, - [ C(RESULT_MISS) ] = DMND_RFO|DMND_WB|RESP_MISS, + [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS, + [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_L3_MISS, }, [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT, - [ C(RESULT_MISS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS, + [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS, + [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS, }, } }; -- cgit v1.1 From d9a5ac9ef306eb5cc874f285185a15c303c50009 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 13 May 2011 15:52:09 +0200 Subject: x86, mce, AMD: Fix leaving freed data in a list b may be added to a list, but is not removed before being freed in the case of an error. This is done in the corresponding deallocation function, so the code here has been changed to follow that. The sematic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression E,E1,E2; identifier l; @@ *list_add(&E->l,E1); ... when != E1 when != list_del(&E->l) when != list_del_init(&E->l) when != E = E2 *kfree(E);// Signed-off-by: Julia Lawall Cc: Borislav Petkov Cc: Robert Richter Cc: Yinghai Lu Cc: Andreas Herrmann Cc: Link: http://lkml.kernel.org/r/1305294731-12127-1-git-send-email-julia@diku.dk Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 167f97b..bb0adad 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -509,6 +509,7 @@ recurse: out_free: if (b) { kobject_put(&b->kobj); + list_del(&b->miscj); kfree(b); } return err; -- cgit v1.1 From e503f9e4b092e2349a9477a333543de8f3c7f5d9 Mon Sep 17 00:00:00 2001 From: Youquan Song Date: Fri, 22 Apr 2011 00:22:43 +0800 Subject: x86, apic: Fix spurious error interrupts triggering on all non-boot APs This patch fixes a bug reported by a customer, who found that many unreasonable error interrupts reported on all non-boot CPUs (APs) during the system boot stage. According to Chapter 10 of Intel Software Developer Manual Volume 3A, Local APIC may signal an illegal vector error when an LVT entry is set as an illegal vector value (0~15) under FIXED delivery mode (bits 8-11 is 0), regardless of whether the mask bit is set or an interrupt actually happen. These errors are seen as error interrupts. The initial value of thermal LVT entries on all APs always reads 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI sequence to them and LVT registers are reset to 0s except for the mask bits which are set to 1s when APs receive INIT IPI. When the BIOS takes over the thermal throttling interrupt, the LVT thermal deliver mode should be SMI and it is required from the kernel to keep AP's LVT thermal monitoring register programmed as such as well. This issue happens when BIOS does not take over thermal throttling interrupt, AP's LVT thermal monitor register will be restored to 0x10000 which means vector 0 and fixed deliver mode, so all APs will signal illegal vector error interrupts. This patch check if interrupt delivery mode is not fixed mode before restoring AP's LVT thermal monitor register. Signed-off-by: Youquan Song Acked-by: Suresh Siddha Acked-by: Yong Wang Cc: hpa@linux.intel.com Cc: joe@perches.com Cc: jbaron@redhat.com Cc: trenn@suse.de Cc: kent.liu@intel.com Cc: chaohong.guo@intel.com Cc: # As far back as possible Link: http://lkml.kernel.org/r/1303402963-17738-1-git-send-email-youquan.song@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/therm_throt.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 6f8c5e9..0f03446 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -446,18 +446,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c) */ rdmsr(MSR_IA32_MISC_ENABLE, l, h); + h = lvtthmr_init; /* * The initial value of thermal LVT entries on all APs always reads * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI * sequence to them and LVT registers are reset to 0s except for * the mask bits which are set to 1s when APs receive INIT IPI. - * Always restore the value that BIOS has programmed on AP based on - * BSP's info we saved since BIOS is always setting the same value - * for all threads/cores + * If BIOS takes over the thermal interrupt and sets its interrupt + * delivery mode to SMI (not fixed), it restores the value that the + * BIOS has programmed on AP based on BSP's info we saved since BIOS + * is always setting the same value for all threads/cores. */ - apic_write(APIC_LVTTHMR, lvtthmr_init); + if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED) + apic_write(APIC_LVTTHMR, lvtthmr_init); - h = lvtthmr_init; if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { printk(KERN_DEBUG -- cgit v1.1 From 328935e6348c6a7cb34798a68c326f4b8372e68a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 17 May 2011 14:55:18 +0200 Subject: Revert "x86, AMD: Fix APIC timer erratum 400 affecting K8 Rev.A-E processors" This reverts commit e20a2d205c05cef6b5783df339a7d54adeb50962, as it crashes certain boxes with specific AMD CPU models. Moving the lower endpoint of the Erratum 400 check to accomodate earlier K8 revisions (A-E) opens a can of worms which is simply not worth to fix properly by tweaking the errata checking framework: * missing IntPenging MSR on revisions < CG cause #GP: http://marc.info/?l=linux-kernel&m=130541471818831 * makes earlier revisions use the LAPIC timer instead of the C1E idle routine which switches to HPET, thus not waking up in deeper C-states: http://lkml.org/lkml/2011/4/24/20 Therefore, leave the original boundary starting with K8-revF. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index bb9eb29..3532d3b 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -698,7 +698,7 @@ cpu_dev_register(amd_cpu_dev); */ const int amd_erratum_400[] = - AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0x0f, 0x4, 0x2, 0xff, 0xf), + AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf), AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)); EXPORT_SYMBOL_GPL(amd_erratum_400); -- cgit v1.1 From 14fb57dccb6e1defe9f89a66f548fcb24c374c1d Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 17 May 2011 14:55:19 +0200 Subject: x86, AMD: Fix ARAT feature setting again Trying to enable the local APIC timer on early K8 revisions uncovers a number of other issues with it, in conjunction with the C1E enter path on AMD. Fixing those causes much more churn and troubles than the benefit of using that timer brings so don't enable it on K8 at all, falling back to the original functionality the kernel had wrt to that. Reported-and-bisected-by: Nick Bowler Cc: Boris Ostrovsky Cc: Andreas Herrmann Cc: Greg Kroah-Hartman Cc: Hans Rosenfeld Cc: Nick Bowler Cc: Joerg-Volker-Peetz Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1305636919-31165-3-git-send-email-bp@amd64.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 3532d3b..6f9d1f6 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -613,7 +613,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) #endif /* As a rule processors have APIC timer running in deep C states */ - if (c->x86 >= 0xf && !cpu_has_amd_erratum(amd_erratum_400)) + if (c->x86 > 0xf && !cpu_has_amd_erratum(amd_erratum_400)) set_cpu_cap(c, X86_FEATURE_ARAT); /* -- cgit v1.1