diff options
author | Stuart Menefy <stuart.menefy@st.com> | 2006-11-24 11:42:24 +0900 |
---|---|---|
committer | Paul Mundt <lethal@linux-sh.org> | 2006-12-06 10:45:38 +0900 |
commit | 9b3a53ab76771e3669e50086c131e1574fe25847 (patch) | |
tree | 07dab1cd3972c7b82ddd5b7ad1e28628d7756dbb /arch/sh/kernel | |
parent | 9daa0c257d6c200b58092e0bfc32b77c4618a8af (diff) | |
download | kernel_samsung_crespo-9b3a53ab76771e3669e50086c131e1574fe25847.zip kernel_samsung_crespo-9b3a53ab76771e3669e50086c131e1574fe25847.tar.gz kernel_samsung_crespo-9b3a53ab76771e3669e50086c131e1574fe25847.tar.bz2 |
sh: TLB miss fast-path optimizations.
Handle simple TLB miss faults which can be resolved completely
from the page table in assembler.
Signed-off-by: Stuart Menefy <stuart.menefy@st.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'arch/sh/kernel')
-rw-r--r-- | arch/sh/kernel/cpu/sh3/entry.S | 206 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/sh4/probe.c | 19 |
2 files changed, 197 insertions, 28 deletions
diff --git a/arch/sh/kernel/cpu/sh3/entry.S b/arch/sh/kernel/cpu/sh3/entry.S index 869d56f..5de99b4 100644 --- a/arch/sh/kernel/cpu/sh3/entry.S +++ b/arch/sh/kernel/cpu/sh3/entry.S @@ -13,8 +13,10 @@ #include <linux/linkage.h> #include <asm/asm-offsets.h> #include <asm/thread_info.h> -#include <asm/cpu/mmu_context.h> #include <asm/unistd.h> +#include <asm/cpu/mmu_context.h> +#include <asm/pgtable.h> +#include <asm/page.h> ! NOTE: ! GNU as (as of 2.9.1) changes bf/s into bt/s and bra, when the address @@ -136,29 +138,14 @@ ENTRY(tlb_protection_violation_store) call_dpf: mov.l 1f, r0 - mov r5, r8 - mov.l @r0, r6 - mov r6, r9 - mov.l 2f, r0 - sts pr, r10 - jsr @r0 - mov r15, r4 - ! - tst r0, r0 - bf/s 0f - lds r10, pr - rts - nop -0: sti + mov.l @r0, r6 ! address mov.l 3f, r0 - mov r9, r6 - mov r8, r5 + sti jmp @r0 - mov r15, r4 + mov r15, r4 ! regs .align 2 1: .long MMU_TEA -2: .long __do_page_fault 3: .long do_page_fault .align 2 @@ -344,9 +331,176 @@ general_exception: 2: .long ret_from_exception ! ! + +/* This code makes some assumptions to improve performance. + * Make sure they are stil true. */ +#if PTRS_PER_PGD != PTRS_PER_PTE +#error PDG and PTE sizes don't match +#endif + +/* gas doesn't flag impossible values for mov #immediate as an error */ +#if (_PAGE_PRESENT >> 2) > 0x7f +#error cannot load PAGE_PRESENT as an immediate +#endif +#if _PAGE_DIRTY > 0x7f +#error cannot load PAGE_DIRTY as an immediate +#endif +#if (_PAGE_PRESENT << 2) != _PAGE_ACCESSED +#error cannot derive PAGE_ACCESSED from PAGE_PRESENT +#endif + +#if defined(CONFIG_CPU_SH4) +#define ldmmupteh(r) mov.l 8f, r +#else +#define ldmmupteh(r) mov #MMU_PTEH, r +#endif + .balign 1024,0,1024 tlb_miss: - mov.l 1f, k2 +#ifdef COUNT_EXCEPTIONS + ! Increment the counts + mov.l 9f, k1 + mov.l @k1, k2 + add #1, k2 + mov.l k2, @k1 +#endif + + ! k0 scratch + ! k1 pgd and pte pointers + ! k2 faulting address + ! k3 pgd and pte index masks + ! k4 shift + + ! Load up the pgd entry (k1) + + ldmmupteh(k0) ! 9 LS (latency=2) MMU_PTEH + + mov.w 4f, k3 ! 8 LS (latency=2) (PTRS_PER_PGD-1) << 2 + mov #-(PGDIR_SHIFT-2), k4 ! 6 EX + + mov.l @(MMU_TEA-MMU_PTEH,k0), k2 ! 18 LS (latency=2) + + mov.l @(MMU_TTB-MMU_PTEH,k0), k1 ! 18 LS (latency=2) + + mov k2, k0 ! 5 MT (latency=0) + shld k4, k0 ! 99 EX + + and k3, k0 ! 78 EX + + mov.l @(k0, k1), k1 ! 21 LS (latency=2) + mov #-(PAGE_SHIFT-2), k4 ! 6 EX + + ! Load up the pte entry (k2) + + mov k2, k0 ! 5 MT (latency=0) + shld k4, k0 ! 99 EX + + tst k1, k1 ! 86 MT + + bt 20f ! 110 BR + + and k3, k0 ! 78 EX + mov.w 5f, k4 ! 8 LS (latency=2) _PAGE_PRESENT + + mov.l @(k0, k1), k2 ! 21 LS (latency=2) + add k0, k1 ! 49 EX + +#ifdef CONFIG_CPU_HAS_PTEA + ! Test the entry for present and _PAGE_ACCESSED + + mov #-28, k3 ! 6 EX + mov k2, k0 ! 5 MT (latency=0) + + tst k4, k2 ! 68 MT + shld k3, k0 ! 99 EX + + bt 20f ! 110 BR + + ! Set PTEA register + ! MMU_PTEA = ((pteval >> 28) & 0xe) | (pteval & 0x1) + ! + ! k0=pte>>28, k1=pte*, k2=pte, k3=<unused>, k4=_PAGE_PRESENT + + and #0xe, k0 ! 79 EX + + mov k0, k3 ! 5 MT (latency=0) + mov k2, k0 ! 5 MT (latency=0) + + and #1, k0 ! 79 EX + + or k0, k3 ! 82 EX + + ldmmupteh(k0) ! 9 LS (latency=2) + shll2 k4 ! 101 EX _PAGE_ACCESSED + + tst k4, k2 ! 68 MT + + mov.l k3, @(MMU_PTEA-MMU_PTEH,k0) ! 27 LS + + mov.l 7f, k3 ! 9 LS (latency=2) _PAGE_FLAGS_HARDWARE_MASK + + ! k0=MMU_PTEH, k1=pte*, k2=pte, k3=_PAGE_FLAGS_HARDWARE, k4=_PAGE_ACCESSED +#else + + ! Test the entry for present and _PAGE_ACCESSED + + mov.l 7f, k3 ! 9 LS (latency=2) _PAGE_FLAGS_HARDWARE_MASK + tst k4, k2 ! 68 MT + + shll2 k4 ! 101 EX _PAGE_ACCESSED + ldmmupteh(k0) ! 9 LS (latency=2) + + bt 20f ! 110 BR + tst k4, k2 ! 68 MT + + ! k0=MMU_PTEH, k1=pte*, k2=pte, k3=_PAGE_FLAGS_HARDWARE, k4=_PAGE_ACCESSED + +#endif + + ! Set up the entry + + and k2, k3 ! 78 EX + bt/s 10f ! 108 BR + + mov.l k3, @(MMU_PTEL-MMU_PTEH,k0) ! 27 LS + + ldtlb ! 128 CO + + ! At least one instruction between ldtlb and rte + nop ! 119 NOP + + rte ! 126 CO + + nop ! 119 NOP + + +10: or k4, k2 ! 82 EX + + ldtlb ! 128 CO + + ! At least one instruction between ldtlb and rte + mov.l k2, @k1 ! 27 LS + + rte ! 126 CO + + ! Note we cannot execute mov here, because it is executed after + ! restoring SSR, so would be executed in user space. + nop ! 119 NOP + + + .align 5 + ! Once cache line if possible... +1: .long swapper_pg_dir +4: .short (PTRS_PER_PGD-1) << 2 +5: .short _PAGE_PRESENT +7: .long _PAGE_FLAGS_HARDWARE_MASK +8: .long MMU_PTEH +#ifdef COUNT_EXCEPTIONS +9: .long exception_count_miss +#endif + + ! Either pgd or pte not present +20: mov.l 1f, k2 mov.l 4f, k3 bra handle_exception mov.l @k2, k2 @@ -496,6 +650,15 @@ skip_save: bf interrupt_exception shlr2 r8 shlr r8 + +#ifdef COUNT_EXCEPTIONS + mov.l 5f, r9 + add r8, r9 + mov.l @r9, r10 + add #1, r10 + mov.l r10, @r9 +#endif + mov.l 4f, r9 add r8, r9 mov.l @r9, r9 @@ -509,6 +672,9 @@ skip_save: 2: .long 0x000080f0 ! FD=1, IMASK=15 3: .long 0xcfffffff ! RB=0, BL=0 4: .long exception_handling_table +#ifdef COUNT_EXCEPTIONS +5: .long exception_count_table +#endif interrupt_exception: mov.l 1f, r9 diff --git a/arch/sh/kernel/cpu/sh4/probe.c b/arch/sh/kernel/cpu/sh4/probe.c index c294de1..afe0f1b 100644 --- a/arch/sh/kernel/cpu/sh4/probe.c +++ b/arch/sh/kernel/cpu/sh4/probe.c @@ -79,16 +79,16 @@ int __init detect_cpu_and_cache_system(void) case 0x205: cpu_data->type = CPU_SH7750; cpu_data->flags |= CPU_HAS_P2_FLUSH_BUG | CPU_HAS_FPU | - CPU_HAS_PERF_COUNTER | CPU_HAS_PTEA; + CPU_HAS_PERF_COUNTER; break; case 0x206: cpu_data->type = CPU_SH7750S; cpu_data->flags |= CPU_HAS_P2_FLUSH_BUG | CPU_HAS_FPU | - CPU_HAS_PERF_COUNTER | CPU_HAS_PTEA; + CPU_HAS_PERF_COUNTER; break; case 0x1100: cpu_data->type = CPU_SH7751; - cpu_data->flags |= CPU_HAS_FPU | CPU_HAS_PTEA; + cpu_data->flags |= CPU_HAS_FPU; break; case 0x2000: cpu_data->type = CPU_SH73180; @@ -126,23 +126,22 @@ int __init detect_cpu_and_cache_system(void) break; case 0x8000: cpu_data->type = CPU_ST40RA; - cpu_data->flags |= CPU_HAS_FPU | CPU_HAS_PTEA; + cpu_data->flags |= CPU_HAS_FPU; break; case 0x8100: cpu_data->type = CPU_ST40GX1; - cpu_data->flags |= CPU_HAS_FPU | CPU_HAS_PTEA; + cpu_data->flags |= CPU_HAS_FPU; break; case 0x700: cpu_data->type = CPU_SH4_501; cpu_data->icache.ways = 2; cpu_data->dcache.ways = 2; - cpu_data->flags |= CPU_HAS_PTEA; break; case 0x600: cpu_data->type = CPU_SH4_202; cpu_data->icache.ways = 2; cpu_data->dcache.ways = 2; - cpu_data->flags |= CPU_HAS_FPU | CPU_HAS_PTEA; + cpu_data->flags |= CPU_HAS_FPU; break; case 0x500 ... 0x501: switch (prr) { @@ -160,7 +159,7 @@ int __init detect_cpu_and_cache_system(void) cpu_data->icache.ways = 2; cpu_data->dcache.ways = 2; - cpu_data->flags |= CPU_HAS_FPU | CPU_HAS_PTEA; + cpu_data->flags |= CPU_HAS_FPU; break; default: @@ -173,6 +172,10 @@ int __init detect_cpu_and_cache_system(void) cpu_data->dcache.ways = 1; #endif +#ifdef CONFIG_CPU_HAS_PTEA + cpu_data->flags |= CPU_HAS_PTEA; +#endif + /* * On anything that's not a direct-mapped cache, look to the CVR * for I/D-cache specifics. |