From 9a6655e49fd98f3748bb80da20705448aad9ee57 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 31 Aug 2010 13:05:22 +0100 Subject: ARM: Improve the L2 cache performance when PL310 is used With this L2 cache controller, the cache maintenance by PA and sync operations are atomic and do not require a "wait" loop. This patch conditionally defines the cache_wait() function. Since L2x0 cache controllers do not work with ARMv7 CPUs, the patch automatically enables CACHE_PL310 when only CPU_V7 is defined. Signed-off-by: Catalin Marinas --- arch/arm/mm/Kconfig | 8 ++++++++ arch/arm/mm/cache-l2x0.c | 15 ++++++++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index a0a2928..4414a01 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -779,6 +779,14 @@ config CACHE_L2X0 help This option enables the L2x0 PrimeCell. +config CACHE_PL310 + bool + depends on CACHE_L2X0 + default y if CPU_V7 && !CPU_V6 + help + This option enables optimisations for the PL310 cache + controller. + config CACHE_TAUROS2 bool "Enable the Tauros2 L2 cache controller" depends on (ARCH_DOVE || ARCH_MMP) diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 9982eb3..edb43ff 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -29,13 +29,22 @@ static void __iomem *l2x0_base; static DEFINE_SPINLOCK(l2x0_lock); static uint32_t l2x0_way_mask; /* Bitmask of active ways */ -static inline void cache_wait(void __iomem *reg, unsigned long mask) +static inline void cache_wait_way(void __iomem *reg, unsigned long mask) { - /* wait for the operation to complete */ + /* wait for cache operation by line or way to complete */ while (readl_relaxed(reg) & mask) ; } +#ifdef CONFIG_CACHE_PL310 +static inline void cache_wait(void __iomem *reg, unsigned long mask) +{ + /* cache operations by line are atomic on PL310 */ +} +#else +#define cache_wait cache_wait_way +#endif + static inline void cache_sync(void) { void __iomem *base = l2x0_base; @@ -110,7 +119,7 @@ static inline void l2x0_inv_all(void) /* invalidate all ways */ spin_lock_irqsave(&l2x0_lock, flags); writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_INV_WAY); - cache_wait(l2x0_base + L2X0_INV_WAY, l2x0_way_mask); + cache_wait_way(l2x0_base + L2X0_INV_WAY, l2x0_way_mask); cache_sync(); spin_unlock_irqrestore(&l2x0_lock, flags); } -- cgit v1.1 From ae360a78f41164e7f9c4cf846696b5b6d8dae5c8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner <[tglx@linutronix.de]> Date: Sat, 31 Jul 2010 21:06:06 +0530 Subject: arm: Disable outer (L2) cache in kexec kexec does not disable the outer cache before disabling the inner caches in cpu_proc_fin(). So L2 is enabled across the kexec jump. When the new kernel enables chaches again, it randomly crashes. Disabling L2 before calling cpu_proc_fin() cures the problem. Disabling L2 requires the following new functions: flush_all(), inv_all() and disable(). Add them to outer_cache_fns and call them from the kexec code. Signed-off-by: Thomas Gleixner Acked-by: Catalin Marinas Acked-by: Linus Walleij --- arch/arm/include/asm/outercache.h | 24 ++++++++++++++++++++++++ arch/arm/kernel/machine_kexec.c | 3 +++ 2 files changed, 27 insertions(+) diff --git a/arch/arm/include/asm/outercache.h b/arch/arm/include/asm/outercache.h index 25f76ba..fc19009 100644 --- a/arch/arm/include/asm/outercache.h +++ b/arch/arm/include/asm/outercache.h @@ -25,6 +25,9 @@ struct outer_cache_fns { void (*inv_range)(unsigned long, unsigned long); void (*clean_range)(unsigned long, unsigned long); void (*flush_range)(unsigned long, unsigned long); + void (*flush_all)(void); + void (*inv_all)(void); + void (*disable)(void); #ifdef CONFIG_OUTER_CACHE_SYNC void (*sync)(void); #endif @@ -50,6 +53,24 @@ static inline void outer_flush_range(unsigned long start, unsigned long end) outer_cache.flush_range(start, end); } +static inline void outer_flush_all(void) +{ + if (outer_cache.flush_all) + outer_cache.flush_all(); +} + +static inline void outer_inv_all(void) +{ + if (outer_cache.inv_all) + outer_cache.inv_all(); +} + +static inline void outer_disable(void) +{ + if (outer_cache.disable) + outer_cache.disable(); +} + #else static inline void outer_inv_range(unsigned long start, unsigned long end) @@ -58,6 +79,9 @@ static inline void outer_clean_range(unsigned long start, unsigned long end) { } static inline void outer_flush_range(unsigned long start, unsigned long end) { } +static inline void outer_flush_all(void) { } +static inline void outer_inv_all(void) { } +static inline void outer_disable(void) { } #endif diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index 1fc74cb..3a8fd51 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -78,7 +78,10 @@ void machine_kexec(struct kimage *image) local_fiq_disable(); setup_mm_for_reboot(0); /* mode is not used, so just pass 0*/ flush_cache_all(); + outer_flush_all(); + outer_disable(); cpu_proc_fin(); + outer_inv_all(); flush_cache_all(); cpu_reset(reboot_code_buffer_phys); } -- cgit v1.1 From 2fd8658931193599c867fd6974fa184ec34af16c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner <[tglx@linutronix.de]> Date: Sat, 31 Jul 2010 21:05:24 +0530 Subject: arm: Implement l2x0 cache disable functions Add flush_all, inv_all and disable functions to the l2x0 code. These functions are called from kexec code to prevent random crashes in the new kernel. Platforms like OMAP which control L2 enable/disable via SMI mode can override the outer_cache.disable() function to implement their own. Signed-off-by: Thomas Gleixner Acked-by: Catalin Marinas Acked-by: Linus Walleij --- arch/arm/mm/cache-l2x0.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index edb43ff..9310d61 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -112,12 +112,26 @@ static void l2x0_cache_sync(void) spin_unlock_irqrestore(&l2x0_lock, flags); } -static inline void l2x0_inv_all(void) +static void l2x0_flush_all(void) +{ + unsigned long flags; + + /* clean all ways */ + spin_lock_irqsave(&l2x0_lock, flags); + writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_CLEAN_INV_WAY); + cache_wait_way(l2x0_base + L2X0_CLEAN_INV_WAY, l2x0_way_mask); + cache_sync(); + spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2x0_inv_all(void) { unsigned long flags; /* invalidate all ways */ spin_lock_irqsave(&l2x0_lock, flags); + /* Invalidating when L2 is enabled is a nono */ + BUG_ON(readl(l2x0_base + L2X0_CTRL) & 1); writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_INV_WAY); cache_wait_way(l2x0_base + L2X0_INV_WAY, l2x0_way_mask); cache_sync(); @@ -215,6 +229,15 @@ static void l2x0_flush_range(unsigned long start, unsigned long end) spin_unlock_irqrestore(&l2x0_lock, flags); } +static void l2x0_disable(void) +{ + unsigned long flags; + + spin_lock_irqsave(&l2x0_lock, flags); + writel(0, l2x0_base + L2X0_CTRL); + spin_unlock_irqrestore(&l2x0_lock, flags); +} + void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask) { __u32 aux; @@ -272,6 +295,9 @@ void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask) outer_cache.clean_range = l2x0_clean_range; outer_cache.flush_range = l2x0_flush_range; outer_cache.sync = l2x0_cache_sync; + outer_cache.flush_all = l2x0_flush_all; + outer_cache.inv_all = l2x0_inv_all; + outer_cache.disable = l2x0_disable; printk(KERN_INFO "%s cache controller enabled\n", type); printk(KERN_INFO "l2x0: %d ways, CACHE_ID 0x%08x, AUX_CTRL 0x%08x\n", -- cgit v1.1 From 4e803c40b33822b52389952040f490e79973e94a Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Sat, 31 Jul 2010 21:40:10 +0530 Subject: omap4: l2x0: Override the default l2x0_disable The machine_kexec() calls outer_disable which can crash on OMAP4 becasue of trustzone restrictions. This patch overrides the default l2x0_disable with a OMAP4 specific implementation taking care of trustzone Signed-off-by: Santosh Shilimkar Reviewed-by: Tony Lindgren Acked-by: Linus Walleij --- arch/arm/mach-omap2/omap4-common.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/arm/mach-omap2/omap4-common.c b/arch/arm/mach-omap2/omap4-common.c index 13dc979..b557cc2 100644 --- a/arch/arm/mach-omap2/omap4-common.c +++ b/arch/arm/mach-omap2/omap4-common.c @@ -44,6 +44,13 @@ void __init gic_init_irq(void) } #ifdef CONFIG_CACHE_L2X0 + +static void omap4_l2x0_disable(void) +{ + /* Disable PL310 L2 Cache controller */ + omap_smc1(0x102, 0x0); +} + static int __init omap_l2_cache_init(void) { /* @@ -66,6 +73,12 @@ static int __init omap_l2_cache_init(void) */ l2x0_init(l2cache_base, 0x0e050000, 0xc0000fff); + /* + * Override default outer_cache.disable with a OMAP4 + * specific one + */ + outer_cache.disable = omap4_l2x0_disable; + return 0; } early_initcall(omap_l2_cache_init); -- cgit v1.1 From 7db27e864abe0110cec5087b77de777455581e8f Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Sun, 11 Jul 2010 14:13:26 +0530 Subject: ARM: l2x0: Fix coding-style in the cache-l2x0.h Replace tab with space after #define to be consisten with other define in the file. Also move the bit mask below the register offsets. Signed-off-by: Santosh Shilimkar Acked-by: Catalin Marinas Acked-by: Linus Walleij --- arch/arm/include/asm/hardware/cache-l2x0.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h index 6bcba48..d833355 100644 --- a/arch/arm/include/asm/hardware/cache-l2x0.h +++ b/arch/arm/include/asm/hardware/cache-l2x0.h @@ -21,9 +21,6 @@ #define __ASM_ARM_HARDWARE_L2X0_H #define L2X0_CACHE_ID 0x000 -#define L2X0_CACHE_ID_PART_MASK (0xf << 6) -#define L2X0_CACHE_ID_PART_L210 (1 << 6) -#define L2X0_CACHE_ID_PART_L310 (3 << 6) #define L2X0_CACHE_TYPE 0x004 #define L2X0_CTRL 0x100 #define L2X0_AUX_CTRL 0x104 @@ -54,6 +51,11 @@ #define L2X0_LINE_TAG 0xF30 #define L2X0_DEBUG_CTRL 0xF40 +/* Registers shifts and masks */ +#define L2X0_CACHE_ID_PART_MASK (0xf << 6) +#define L2X0_CACHE_ID_PART_L210 (1 << 6) +#define L2X0_CACHE_ID_PART_L310 (3 << 6) + #ifndef __ASSEMBLY__ extern void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask); #endif -- cgit v1.1 From 5ba70372289a1fb378b95cee2cf46b0203d65291 Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Sun, 11 Jul 2010 14:35:37 +0530 Subject: ARM: l2x0: Determine the cache size The cache size is needed for to optimise range based maintainance operations Signed-off-by: Santosh Shilimkar Acked-by: Catalin Marinas Acked-by: Linus Walleij --- arch/arm/include/asm/hardware/cache-l2x0.h | 1 + arch/arm/mm/cache-l2x0.c | 13 +++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h index d833355..4633d2a 100644 --- a/arch/arm/include/asm/hardware/cache-l2x0.h +++ b/arch/arm/include/asm/hardware/cache-l2x0.h @@ -55,6 +55,7 @@ #define L2X0_CACHE_ID_PART_MASK (0xf << 6) #define L2X0_CACHE_ID_PART_L210 (1 << 6) #define L2X0_CACHE_ID_PART_L310 (3 << 6) +#define L2X0_AUX_CTRL_WAY_SIZE_MASK (0x3 << 17) #ifndef __ASSEMBLY__ extern void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask); diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 9310d61..262c752 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -28,6 +28,7 @@ static void __iomem *l2x0_base; static DEFINE_SPINLOCK(l2x0_lock); static uint32_t l2x0_way_mask; /* Bitmask of active ways */ +static uint32_t l2x0_size; static inline void cache_wait_way(void __iomem *reg, unsigned long mask) { @@ -242,6 +243,7 @@ void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask) { __u32 aux; __u32 cache_id; + __u32 way_size = 0; int ways; const char *type; @@ -276,6 +278,13 @@ void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask) l2x0_way_mask = (1 << ways) - 1; /* + * L2 cache Size = Way size * Number of ways + */ + way_size = (aux & L2X0_AUX_CTRL_WAY_SIZE_MASK) >> 17; + way_size = 1 << (way_size + 3); + l2x0_size = ways * way_size * SZ_1K; + + /* * Check if l2x0 controller is already enabled. * If you are booting from non-secure mode * accessing the below registers will fault. @@ -300,6 +309,6 @@ void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask) outer_cache.disable = l2x0_disable; printk(KERN_INFO "%s cache controller enabled\n", type); - printk(KERN_INFO "l2x0: %d ways, CACHE_ID 0x%08x, AUX_CTRL 0x%08x\n", - ways, cache_id, aux); + printk(KERN_INFO "l2x0: %d ways, CACHE_ID 0x%08x, AUX_CTRL 0x%08x, Cache size: %d B\n", + ways, cache_id, aux, l2x0_size); } -- cgit v1.1 From 444457c1f59d58bc48acf5b4fc585225106c11ff Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Sun, 11 Jul 2010 14:58:41 +0530 Subject: ARM: l2x0: Optimise the range based operations For the big buffers which are in excess of cache size, the maintaince operations by PA are very slow. For such buffers the maintainace operations can be speeded up by using the WAY based method. Signed-off-by: Santosh Shilimkar Acked-by: Catalin Marinas Acked-by: Linus Walleij --- arch/arm/mm/cache-l2x0.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 262c752..170c9bb 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -125,6 +125,18 @@ static void l2x0_flush_all(void) spin_unlock_irqrestore(&l2x0_lock, flags); } +static void l2x0_clean_all(void) +{ + unsigned long flags; + + /* clean all ways */ + spin_lock_irqsave(&l2x0_lock, flags); + writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_CLEAN_WAY); + cache_wait_way(l2x0_base + L2X0_CLEAN_WAY, l2x0_way_mask); + cache_sync(); + spin_unlock_irqrestore(&l2x0_lock, flags); +} + static void l2x0_inv_all(void) { unsigned long flags; @@ -183,6 +195,11 @@ static void l2x0_clean_range(unsigned long start, unsigned long end) void __iomem *base = l2x0_base; unsigned long flags; + if ((end - start) >= l2x0_size) { + l2x0_clean_all(); + return; + } + spin_lock_irqsave(&l2x0_lock, flags); start &= ~(CACHE_LINE_SIZE - 1); while (start < end) { @@ -208,6 +225,11 @@ static void l2x0_flush_range(unsigned long start, unsigned long end) void __iomem *base = l2x0_base; unsigned long flags; + if ((end - start) >= l2x0_size) { + l2x0_flush_all(); + return; + } + spin_lock_irqsave(&l2x0_lock, flags); start &= ~(CACHE_LINE_SIZE - 1); while (start < end) { -- cgit v1.1 From ae6948048c417d429b8a0f85fad13e483f7cc1a3 Mon Sep 17 00:00:00 2001 From: Per Fransson Date: Wed, 8 Sep 2010 21:21:40 +0530 Subject: ARM: ux500 specific L2 cache code The generic version of l2x0_inv_all is only called just after disabling the L2 cache and is surrounded by a spinlock. However, we're not really turning off the L2 cache right now, and the PL310 does not support exclusive accesses (used to implement the spinlock). So, the invalidation needs to be done without the spinlock. Cc: Thomas Gleixner Cc: Catalin Marinas Cc: Hans-Juergen Koch Cc: Santosh Shilimkar Signed-off-by: Per Fransson Signed-off-by: Linus Walleij --- arch/arm/mach-ux500/cpu.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/arch/arm/mach-ux500/cpu.c b/arch/arm/mach-ux500/cpu.c index e0fd747..73fb1a5 100644 --- a/arch/arm/mach-ux500/cpu.c +++ b/arch/arm/mach-ux500/cpu.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -71,6 +72,46 @@ void __init ux500_init_irq(void) } #ifdef CONFIG_CACHE_L2X0 +static inline void ux500_cache_wait(void __iomem *reg, unsigned long mask) +{ + /* wait for the operation to complete */ + while (readl(reg) & mask) + ; +} + +static inline void ux500_cache_sync(void) +{ + void __iomem *base = __io_address(UX500_L2CC_BASE); + writel(0, base + L2X0_CACHE_SYNC); + ux500_cache_wait(base + L2X0_CACHE_SYNC, 1); +} + +/* + * The L2 cache cannot be turned off in the non-secure world. + * Dummy until a secure service is in place. + */ +static void ux500_l2x0_disable(void) +{ +} + +/* + * This is only called when doing a kexec, just after turning off the L2 + * and L1 cache, and it is surrounded by a spinlock in the generic version. + * However, we're not really turning off the L2 cache right now and the + * PL310 does not support exclusive accesses (used to implement the spinlock). + * So, the invalidation needs to be done without the spinlock. + */ +static void ux500_l2x0_inv_all(void) +{ + void __iomem *l2x0_base = __io_address(UX500_L2CC_BASE); + uint32_t l2x0_way_mask = (1<<16) - 1; /* Bitmask of active ways */ + + /* invalidate all ways */ + writel(l2x0_way_mask, l2x0_base + L2X0_INV_WAY); + ux500_cache_wait(l2x0_base + L2X0_INV_WAY, l2x0_way_mask); + ux500_cache_sync(); +} + static int ux500_l2x0_init(void) { void __iomem *l2x0_base; @@ -80,6 +121,10 @@ static int ux500_l2x0_init(void) /* 64KB way size, 8 way associativity, force WA */ l2x0_init(l2x0_base, 0x3e060000, 0xc0000fff); + /* Override invalidate function */ + outer_cache.disable = ux500_l2x0_disable; + outer_cache.inv_all = ux500_l2x0_inv_all; + return 0; } early_initcall(ux500_l2x0_init); -- cgit v1.1