aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/fault.c46
-rw-r--r--arch/powerpc/mm/fsl_booke_mmu.c95
-rw-r--r--arch/powerpc/mm/mem.c33
-rw-r--r--arch/powerpc/mm/mmap.c6
-rw-r--r--arch/powerpc/mm/numa.c40
-rw-r--r--arch/powerpc/mm/pgtable.c131
6 files changed, 201 insertions, 150 deletions
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 91c7b86..7699394 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -253,45 +253,33 @@ good_area:
#endif /* CONFIG_8xx */
if (is_exec) {
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
- /* protection fault */
+#ifdef CONFIG_PPC_STD_MMU
+ /* Protection fault on exec go straight to failure on
+ * Hash based MMUs as they either don't support per-page
+ * execute permission, or if they do, it's handled already
+ * at the hash level. This test would probably have to
+ * be removed if we change the way this works to make hash
+ * processors use the same I/D cache coherency mechanism
+ * as embedded.
+ */
if (error_code & DSISR_PROTFAULT)
goto bad_area;
+#endif /* CONFIG_PPC_STD_MMU */
+
/*
* Allow execution from readable areas if the MMU does not
* provide separate controls over reading and executing.
+ *
+ * Note: That code used to not be enabled for 4xx/BookE.
+ * It is now as I/D cache coherency for these is done at
+ * set_pte_at() time and I see no reason why the test
+ * below wouldn't be valid on those processors. This -may-
+ * break programs compiled with a really old ABI though.
*/
if (!(vma->vm_flags & VM_EXEC) &&
(cpu_has_feature(CPU_FTR_NOEXECUTE) ||
!(vma->vm_flags & (VM_READ | VM_WRITE))))
goto bad_area;
-#else
- pte_t *ptep;
- pmd_t *pmdp;
-
- /* Since 4xx/Book-E supports per-page execute permission,
- * we lazily flush dcache to icache. */
- ptep = NULL;
- if (get_pteptr(mm, address, &ptep, &pmdp)) {
- spinlock_t *ptl = pte_lockptr(mm, pmdp);
- spin_lock(ptl);
- if (pte_present(*ptep)) {
- struct page *page = pte_page(*ptep);
-
- if (!test_bit(PG_arch_1, &page->flags)) {
- flush_dcache_icache_page(page);
- set_bit(PG_arch_1, &page->flags);
- }
- pte_update(ptep, 0, _PAGE_HWEXEC |
- _PAGE_ACCESSED);
- local_flush_tlb_page(vma, address);
- pte_unmap_unlock(ptep, ptl);
- up_read(&mm->mmap_sem);
- return 0;
- }
- pte_unmap_unlock(ptep, ptl);
- }
-#endif
/* a write */
} else if (is_write) {
if (!(vma->vm_flags & VM_WRITE))
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index ea6e41e..985b6c3 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -56,10 +56,14 @@
extern void loadcam_entry(unsigned int index);
unsigned int tlbcam_index;
-static unsigned long __cam0, __cam1, __cam2;
+static unsigned long cam[CONFIG_LOWMEM_CAM_NUM];
#define NUM_TLBCAMS (16)
+#if defined(CONFIG_LOWMEM_CAM_NUM_BOOL) && (CONFIG_LOWMEM_CAM_NUM >= NUM_TLBCAMS)
+#error "LOWMEM_CAM_NUM must be less than NUM_TLBCAMS"
+#endif
+
struct tlbcam TLBCAM[NUM_TLBCAMS];
struct tlbcamrange {
@@ -107,7 +111,7 @@ void settlbcam(int index, unsigned long virt, phys_addr_t phys,
unsigned int tsize, lz;
asm ("cntlzw %0,%1" : "=r" (lz) : "r" (size));
- tsize = (21 - lz) / 2;
+ tsize = 21 - lz;
#ifdef CONFIG_SMP
if ((flags & _PAGE_NO_CACHE) == 0)
@@ -152,19 +156,19 @@ void invalidate_tlbcam_entry(int index)
loadcam_entry(index);
}
-void __init cam_mapin_ram(unsigned long cam0, unsigned long cam1,
- unsigned long cam2)
+unsigned long __init mmu_mapin_ram(void)
{
- settlbcam(0, PAGE_OFFSET, memstart_addr, cam0, _PAGE_KERNEL, 0);
- tlbcam_index++;
- if (cam1) {
- tlbcam_index++;
- settlbcam(1, PAGE_OFFSET+cam0, memstart_addr+cam0, cam1, _PAGE_KERNEL, 0);
- }
- if (cam2) {
+ unsigned long virt = PAGE_OFFSET;
+ phys_addr_t phys = memstart_addr;
+
+ while (cam[tlbcam_index] && tlbcam_index < ARRAY_SIZE(cam)) {
+ settlbcam(tlbcam_index, virt, phys, cam[tlbcam_index], _PAGE_KERNEL, 0);
+ virt += cam[tlbcam_index];
+ phys += cam[tlbcam_index];
tlbcam_index++;
- settlbcam(2, PAGE_OFFSET+cam0+cam1, memstart_addr+cam0+cam1, cam2, _PAGE_KERNEL, 0);
}
+
+ return virt - PAGE_OFFSET;
}
/*
@@ -175,51 +179,46 @@ void __init MMU_init_hw(void)
flush_instruction_cache();
}
-unsigned long __init mmu_mapin_ram(void)
-{
- cam_mapin_ram(__cam0, __cam1, __cam2);
-
- return __cam0 + __cam1 + __cam2;
-}
-
-
void __init
adjust_total_lowmem(void)
{
- phys_addr_t max_lowmem_size = __max_low_memory;
- phys_addr_t cam_max_size = 0x10000000;
phys_addr_t ram;
+ unsigned int max_cam = (mfspr(SPRN_TLB1CFG) >> 16) & 0xff;
+ char buf[ARRAY_SIZE(cam) * 5 + 1], *p = buf;
+ int i;
+ unsigned long virt = PAGE_OFFSET & 0xffffffffUL;
+ unsigned long phys = memstart_addr & 0xffffffffUL;
- /* adjust CAM size to max_lowmem_size */
- if (max_lowmem_size < cam_max_size)
- cam_max_size = max_lowmem_size;
+ /* Convert (4^max) kB to (2^max) bytes */
+ max_cam = max_cam * 2 + 10;
- /* adjust lowmem size to max_lowmem_size */
- ram = min(max_lowmem_size, total_lowmem);
+ /* adjust lowmem size to __max_low_memory */
+ ram = min((phys_addr_t)__max_low_memory, (phys_addr_t)total_lowmem);
/* Calculate CAM values */
- __cam0 = 1UL << 2 * (__ilog2(ram) / 2);
- if (__cam0 > cam_max_size)
- __cam0 = cam_max_size;
- ram -= __cam0;
- if (ram) {
- __cam1 = 1UL << 2 * (__ilog2(ram) / 2);
- if (__cam1 > cam_max_size)
- __cam1 = cam_max_size;
- ram -= __cam1;
- }
- if (ram) {
- __cam2 = 1UL << 2 * (__ilog2(ram) / 2);
- if (__cam2 > cam_max_size)
- __cam2 = cam_max_size;
- ram -= __cam2;
+ __max_low_memory = 0;
+ for (i = 0; ram && i < ARRAY_SIZE(cam); i++) {
+ unsigned int camsize = __ilog2(ram) & ~1U;
+ unsigned int align = __ffs(virt | phys) & ~1U;
+
+ if (camsize > align)
+ camsize = align;
+ if (camsize > max_cam)
+ camsize = max_cam;
+
+ cam[i] = 1UL << camsize;
+ ram -= cam[i];
+ __max_low_memory += cam[i];
+ virt += cam[i];
+ phys += cam[i];
+
+ p += sprintf(p, "%lu/", cam[i] >> 20);
}
+ for (; i < ARRAY_SIZE(cam); i++)
+ p += sprintf(p, "0/");
+ p[-1] = '\0';
- printk(KERN_INFO "Memory CAM mapping: CAM0=%ldMb, CAM1=%ldMb,"
- " CAM2=%ldMb residual: %ldMb\n",
- __cam0 >> 20, __cam1 >> 20, __cam2 >> 20,
- (long int)((total_lowmem - __cam0 - __cam1 - __cam2)
- >> 20));
- __max_low_memory = __cam0 + __cam1 + __cam2;
+ pr_info("Memory CAM mapping: %s Mb, residual: %dMb\n", buf,
+ (unsigned int)((total_lowmem - __max_low_memory) >> 20));
__initial_memory_limit_addr = memstart_addr + __max_low_memory;
}
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index f00f09a..f668fa9 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -472,40 +472,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
{
#ifdef CONFIG_PPC_STD_MMU
unsigned long access = 0, trap;
-#endif
- unsigned long pfn = pte_pfn(pte);
-
- /* handle i-cache coherency */
- if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) &&
- !cpu_has_feature(CPU_FTR_NOEXECUTE) &&
- pfn_valid(pfn)) {
- struct page *page = pfn_to_page(pfn);
-#ifdef CONFIG_8xx
- /* On 8xx, cache control instructions (particularly
- * "dcbst" from flush_dcache_icache) fault as write
- * operation if there is an unpopulated TLB entry
- * for the address in question. To workaround that,
- * we invalidate the TLB here, thus avoiding dcbst
- * misbehaviour.
- */
- _tlbil_va(address, 0 /* 8xx doesn't care about PID */);
-#endif
- /* The _PAGE_USER test should really be _PAGE_EXEC, but
- * older glibc versions execute some code from no-exec
- * pages, which for now we are supporting. If exec-only
- * pages are ever implemented, this will have to change.
- */
- if (!PageReserved(page) && (pte_val(pte) & _PAGE_USER)
- && !test_bit(PG_arch_1, &page->flags)) {
- if (vma->vm_mm == current->active_mm) {
- __flush_dcache_icache((void *) address);
- } else
- flush_dcache_icache_page(page);
- set_bit(PG_arch_1, &page->flags);
- }
- }
-#ifdef CONFIG_PPC_STD_MMU
/* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
if (!pte_young(pte) || address >= TASK_SIZE)
return;
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 86010fc..7db8abc 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -48,12 +48,6 @@ static inline unsigned long mmap_base(void)
static inline int mmap_is_legacy(void)
{
- /*
- * Force standard allocation for 64 bit programs.
- */
- if (!test_thread_flag(TIF_32BIT))
- return 1;
-
if (current->personality & ADDR_COMPAT_LAYOUT)
return 1;
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 7393bd7..0507faa 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -19,6 +19,7 @@
#include <linux/notifier.h>
#include <linux/lmb.h>
#include <linux/of.h>
+#include <linux/pfn.h>
#include <asm/sparsemem.h>
#include <asm/prom.h>
#include <asm/system.h>
@@ -157,35 +158,6 @@ static void unmap_cpu_from_node(unsigned long cpu)
}
#endif /* CONFIG_HOTPLUG_CPU */
-static struct device_node * __cpuinit find_cpu_node(unsigned int cpu)
-{
- unsigned int hw_cpuid = get_hard_smp_processor_id(cpu);
- struct device_node *cpu_node = NULL;
- const unsigned int *interrupt_server, *reg;
- int len;
-
- while ((cpu_node = of_find_node_by_type(cpu_node, "cpu")) != NULL) {
- /* Try interrupt server first */
- interrupt_server = of_get_property(cpu_node,
- "ibm,ppc-interrupt-server#s", &len);
-
- len = len / sizeof(u32);
-
- if (interrupt_server && (len > 0)) {
- while (len--) {
- if (interrupt_server[len] == hw_cpuid)
- return cpu_node;
- }
- } else {
- reg = of_get_property(cpu_node, "reg", &len);
- if (reg && (len > 0) && (reg[0] == hw_cpuid))
- return cpu_node;
- }
- }
-
- return NULL;
-}
-
/* must hold reference to node during call */
static const int *of_get_associativity(struct device_node *dev)
{
@@ -289,7 +261,7 @@ static int __init find_min_common_depth(void)
ref_points = of_get_property(rtas_root,
"ibm,associativity-reference-points", &len);
- if ((len >= 1) && ref_points) {
+ if ((len >= 2 * sizeof(unsigned int)) && ref_points) {
depth = ref_points[1];
} else {
dbg("NUMA: ibm,associativity-reference-points not found.\n");
@@ -469,7 +441,7 @@ static int of_drconf_to_nid_single(struct of_drconf_cell *drmem,
static int __cpuinit numa_setup_cpu(unsigned long lcpu)
{
int nid = 0;
- struct device_node *cpu = find_cpu_node(lcpu);
+ struct device_node *cpu = of_get_cpu_node(lcpu, NULL);
if (!cpu) {
WARN_ON(1);
@@ -651,7 +623,7 @@ static int __init parse_numa_properties(void)
for_each_present_cpu(i) {
int nid;
- cpu = find_cpu_node(i);
+ cpu = of_get_cpu_node(i, NULL);
BUG_ON(!cpu);
nid = of_node_to_nid_single(cpu);
of_node_put(cpu);
@@ -882,7 +854,7 @@ static void mark_reserved_regions_for_nid(int nid)
unsigned long physbase = lmb.reserved.region[i].base;
unsigned long size = lmb.reserved.region[i].size;
unsigned long start_pfn = physbase >> PAGE_SHIFT;
- unsigned long end_pfn = ((physbase + size) >> PAGE_SHIFT);
+ unsigned long end_pfn = PFN_UP(physbase + size);
struct node_active_region node_ar;
unsigned long node_end_pfn = node->node_start_pfn +
node->node_spanned_pages;
@@ -908,7 +880,7 @@ static void mark_reserved_regions_for_nid(int nid)
*/
if (end_pfn > node_ar.end_pfn)
reserve_size = (node_ar.end_pfn << PAGE_SHIFT)
- - (start_pfn << PAGE_SHIFT);
+ - physbase;
/*
* Only worry about *this* node, others may not
* yet have valid NODE_DATA().
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 6d94116..a27ded3 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -1,5 +1,6 @@
/*
* This file contains common routines for dealing with free of page tables
+ * Along with common page table handling code
*
* Derived from arch/powerpc/mm/tlb_64.c:
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -115,3 +116,133 @@ void pte_free_finish(void)
pte_free_submit(*batchp);
*batchp = NULL;
}
+
+/*
+ * Handle i/d cache flushing, called from set_pte_at() or ptep_set_access_flags()
+ */
+static pte_t do_dcache_icache_coherency(pte_t pte)
+{
+ unsigned long pfn = pte_pfn(pte);
+ struct page *page;
+
+ if (unlikely(!pfn_valid(pfn)))
+ return pte;
+ page = pfn_to_page(pfn);
+
+ if (!PageReserved(page) && !test_bit(PG_arch_1, &page->flags)) {
+ pr_debug("do_dcache_icache_coherency... flushing\n");
+ flush_dcache_icache_page(page);
+ set_bit(PG_arch_1, &page->flags);
+ }
+ else
+ pr_debug("do_dcache_icache_coherency... already clean\n");
+ return __pte(pte_val(pte) | _PAGE_HWEXEC);
+}
+
+static inline int is_exec_fault(void)
+{
+ return current->thread.regs && TRAP(current->thread.regs) == 0x400;
+}
+
+/* We only try to do i/d cache coherency on stuff that looks like
+ * reasonably "normal" PTEs. We currently require a PTE to be present
+ * and we avoid _PAGE_SPECIAL and _PAGE_NO_CACHE
+ */
+static inline int pte_looks_normal(pte_t pte)
+{
+ return (pte_val(pte) &
+ (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE)) ==
+ (_PAGE_PRESENT);
+}
+
+#if defined(CONFIG_PPC_STD_MMU)
+/* Server-style MMU handles coherency when hashing if HW exec permission
+ * is supposed per page (currently 64-bit only). Else, we always flush
+ * valid PTEs in set_pte.
+ */
+static inline int pte_need_exec_flush(pte_t pte, int set_pte)
+{
+ return set_pte && pte_looks_normal(pte) &&
+ !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) ||
+ cpu_has_feature(CPU_FTR_NOEXECUTE));
+}
+#elif _PAGE_HWEXEC == 0
+/* Embedded type MMU without HW exec support (8xx only so far), we flush
+ * the cache for any present PTE
+ */
+static inline int pte_need_exec_flush(pte_t pte, int set_pte)
+{
+ return set_pte && pte_looks_normal(pte);
+}
+#else
+/* Other embedded CPUs with HW exec support per-page, we flush on exec
+ * fault if HWEXEC is not set
+ */
+static inline int pte_need_exec_flush(pte_t pte, int set_pte)
+{
+ return pte_looks_normal(pte) && is_exec_fault() &&
+ !(pte_val(pte) & _PAGE_HWEXEC);
+}
+#endif
+
+/*
+ * set_pte stores a linux PTE into the linux page table.
+ */
+void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
+{
+#ifdef CONFIG_DEBUG_VM
+ WARN_ON(pte_present(*ptep));
+#endif
+ /* Note: mm->context.id might not yet have been assigned as
+ * this context might not have been activated yet when this
+ * is called.
+ */
+ pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
+ if (pte_need_exec_flush(pte, 1))
+ pte = do_dcache_icache_coherency(pte);
+
+ /* Perform the setting of the PTE */
+ __set_pte_at(mm, addr, ptep, pte, 0);
+}
+
+/*
+ * This is called when relaxing access to a PTE. It's also called in the page
+ * fault path when we don't hit any of the major fault cases, ie, a minor
+ * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
+ * handled those two for us, we additionally deal with missing execute
+ * permission here on some processors
+ */
+int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
+ pte_t *ptep, pte_t entry, int dirty)
+{
+ int changed;
+ if (!dirty && pte_need_exec_flush(entry, 0))
+ entry = do_dcache_icache_coherency(entry);
+ changed = !pte_same(*(ptep), entry);
+ if (changed) {
+ assert_pte_locked(vma->vm_mm, address);
+ __ptep_set_access_flags(ptep, entry);
+ flush_tlb_page_nohash(vma, address);
+ }
+ return changed;
+}
+
+#ifdef CONFIG_DEBUG_VM
+void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ if (mm == &init_mm)
+ return;
+ pgd = mm->pgd + pgd_index(addr);
+ BUG_ON(pgd_none(*pgd));
+ pud = pud_offset(pgd, addr);
+ BUG_ON(pud_none(*pud));
+ pmd = pmd_offset(pud, addr);
+ BUG_ON(!pmd_present(*pmd));
+ BUG_ON(!spin_is_locked(pte_lockptr(mm, pmd)));
+}
+#endif /* CONFIG_DEBUG_VM */
+