From dde5828f56cb2c1aa70365c476e6830482127258 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 15 Aug 2009 12:36:00 +0100 Subject: ARM: Fix broken highmem support Currently, highmem is selectable, and you can request an increased vmalloc area. However, none of this has any effect on the memory layout since a patch in the highmem series was accidentally dropped. Moreover, even if you did want highmem, all memory would still be registered as lowmem, possibly resulting in overflow of the available virtual mapping space. The highmem boundary is determined by the highest allowed beginning of the vmalloc area, which depends on its configurable minimum size (see commit 60296c71f6c5063e3c1f1d2619ca0b60940162e7 for details on this). We should create mappings and initialize bootmem only for low memory, while the zone allocator must still be told about highmem. Currently, memory nodes which are completely located in high memory are not supported. This is not a huge limitation since systems relying on highmem support are unlikely to have discontiguous memory with large holes. [ A similar patch was meant to be merged before commit 5f0fbf9ecaf3 and be available in Linux v2.6.30, however some git rebase screw-up of mine dropped the first commit of the series, and that goofage escaped testing somehow as well. -- Nico ] Signed-off-by: Russell King Reviewed-by: Nicolas Pitre --- arch/arm/mm/init.c | 118 +++++++++++++++++++++++++++++++++-------------------- arch/arm/mm/mmu.c | 9 +++- 2 files changed, 81 insertions(+), 46 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 8277802..3a7279c 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -120,6 +120,32 @@ void show_mem(void) printk("%d pages swap cached\n", cached); } +static void __init find_node_limits(int node, struct meminfo *mi, + unsigned long *min, unsigned long *max_low, unsigned long *max_high) +{ + int i; + + *min = -1UL; + *max_low = *max_high = 0; + + for_each_nodebank(i, mi, node) { + struct membank *bank = &mi->bank[i]; + unsigned long start, end; + + start = bank_pfn_start(bank); + end = bank_pfn_end(bank); + + if (*min > start) + *min = start; + if (*max_high < end) + *max_high = end; + if (bank->highmem) + continue; + if (*max_low < end) + *max_low = end; + } +} + /* * FIXME: We really want to avoid allocating the bootmap bitmap * over the top of the initrd. Hopefully, this is located towards @@ -210,41 +236,25 @@ static inline void map_memory_bank(struct membank *bank) #endif } -static unsigned long __init bootmem_init_node(int node, struct meminfo *mi) +static void __init bootmem_init_node(int node, struct meminfo *mi, + unsigned long start_pfn, unsigned long end_pfn) { - unsigned long start_pfn, end_pfn, boot_pfn; + unsigned long boot_pfn; unsigned int boot_pages; pg_data_t *pgdat; int i; - start_pfn = -1UL; - end_pfn = 0; - /* - * Calculate the pfn range, and map the memory banks for this node. + * Map the memory banks for this node. */ for_each_nodebank(i, mi, node) { struct membank *bank = &mi->bank[i]; - unsigned long start, end; - start = bank_pfn_start(bank); - end = bank_pfn_end(bank); - - if (start_pfn > start) - start_pfn = start; - if (end_pfn < end) - end_pfn = end; - - map_memory_bank(bank); + if (!bank->highmem) + map_memory_bank(bank); } /* - * If there is no memory in this node, ignore it. - */ - if (end_pfn == 0) - return end_pfn; - - /* * Allocate the bootmem bitmap page. */ boot_pages = bootmem_bootmap_pages(end_pfn - start_pfn); @@ -260,7 +270,8 @@ static unsigned long __init bootmem_init_node(int node, struct meminfo *mi) for_each_nodebank(i, mi, node) { struct membank *bank = &mi->bank[i]; - free_bootmem_node(pgdat, bank_phys_start(bank), bank_phys_size(bank)); + if (!bank->highmem) + free_bootmem_node(pgdat, bank_phys_start(bank), bank_phys_size(bank)); memory_present(node, bank_pfn_start(bank), bank_pfn_end(bank)); } @@ -269,8 +280,6 @@ static unsigned long __init bootmem_init_node(int node, struct meminfo *mi) */ reserve_bootmem_node(pgdat, boot_pfn << PAGE_SHIFT, boot_pages << PAGE_SHIFT, BOOTMEM_DEFAULT); - - return end_pfn; } static void __init bootmem_reserve_initrd(int node) @@ -297,33 +306,39 @@ static void __init bootmem_reserve_initrd(int node) static void __init bootmem_free_node(int node, struct meminfo *mi) { unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES]; - unsigned long start_pfn, end_pfn; - pg_data_t *pgdat = NODE_DATA(node); + unsigned long min, max_low, max_high; int i; - start_pfn = pgdat->bdata->node_min_pfn; - end_pfn = pgdat->bdata->node_low_pfn; + find_node_limits(node, mi, &min, &max_low, &max_high); /* * initialise the zones within this node. */ memset(zone_size, 0, sizeof(zone_size)); - memset(zhole_size, 0, sizeof(zhole_size)); /* * The size of this node has already been determined. If we need * to do anything fancy with the allocation of this memory to the * zones, now is the time to do it. */ - zone_size[0] = end_pfn - start_pfn; + zone_size[0] = max_low - min; +#ifdef CONFIG_HIGHMEM + zone_size[ZONE_HIGHMEM] = max_high - max_low; +#endif /* * For each bank in this node, calculate the size of the holes. * holes = node_size - sum(bank_sizes_in_node) */ - zhole_size[0] = zone_size[0]; - for_each_nodebank(i, mi, node) - zhole_size[0] -= bank_pfn_size(&mi->bank[i]); + memcpy(zhole_size, zone_size, sizeof(zhole_size)); + for_each_nodebank(i, mi, node) { + int idx = 0; +#ifdef CONFIG_HIGHMEM + if (mi->bank[i].highmem) + idx = ZONE_HIGHMEM; +#endif + zhole_size[idx] -= bank_pfn_size(&mi->bank[i]); + } /* * Adjust the sizes according to any special requirements for @@ -331,13 +346,13 @@ static void __init bootmem_free_node(int node, struct meminfo *mi) */ arch_adjust_zones(node, zone_size, zhole_size); - free_area_init_node(node, zone_size, start_pfn, zhole_size); + free_area_init_node(node, zone_size, min, zhole_size); } void __init bootmem_init(void) { struct meminfo *mi = &meminfo; - unsigned long memend_pfn = 0; + unsigned long min, max_low, max_high; int node, initrd_node; /* @@ -345,11 +360,29 @@ void __init bootmem_init(void) */ initrd_node = check_initrd(mi); + max_low = max_high = 0; + /* * Run through each node initialising the bootmem allocator. */ for_each_node(node) { - unsigned long end_pfn = bootmem_init_node(node, mi); + unsigned long node_low, node_high; + + find_node_limits(node, mi, &min, &node_low, &node_high); + + if (node_low > max_low) + max_low = node_low; + if (node_high > max_high) + max_high = node_high; + + /* + * If there is no memory in this node, ignore it. + * (We can't have nodes which have no lowmem) + */ + if (node_low == 0) + continue; + + bootmem_init_node(node, mi, min, node_low); /* * Reserve any special node zero regions. @@ -362,12 +395,6 @@ void __init bootmem_init(void) */ if (node == initrd_node) bootmem_reserve_initrd(node); - - /* - * Remember the highest memory PFN. - */ - if (end_pfn > memend_pfn) - memend_pfn = end_pfn; } /* @@ -383,7 +410,7 @@ void __init bootmem_init(void) for_each_node(node) bootmem_free_node(node, mi); - high_memory = __va((memend_pfn << PAGE_SHIFT) - 1) + 1; + high_memory = __va((max_low << PAGE_SHIFT) - 1) + 1; /* * This doesn't seem to be used by the Linux memory manager any @@ -393,7 +420,8 @@ void __init bootmem_init(void) * Note: max_low_pfn and max_pfn reflect the number of _pages_ in * the system, not the maximum PFN. */ - max_pfn = max_low_pfn = memend_pfn - PHYS_PFN_OFFSET; + max_low_pfn = max_low - PHYS_PFN_OFFSET; + max_pfn = max_high - PHYS_PFN_OFFSET; } static inline int free_area(unsigned long pfn, unsigned long end, char *s) diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 4722582..4426ee6 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -687,13 +687,19 @@ __early_param("vmalloc=", early_vmalloc); static void __init sanity_check_meminfo(void) { - int i, j; + int i, j, highmem = 0; for (i = 0, j = 0; i < meminfo.nr_banks; i++) { struct membank *bank = &meminfo.bank[j]; *bank = meminfo.bank[i]; #ifdef CONFIG_HIGHMEM + if (__va(bank->start) > VMALLOC_MIN || + __va(bank->start) < (void *)PAGE_OFFSET) + highmem = 1; + + bank->highmem = highmem; + /* * Split those memory banks which are partially overlapping * the vmalloc area greatly simplifying things later. @@ -714,6 +720,7 @@ static void __init sanity_check_meminfo(void) i++; bank[1].size -= VMALLOC_MIN - __va(bank->start); bank[1].start = __pa(VMALLOC_MIN - 1) + 1; + bank[1].highmem = highmem = 1; j++; } bank->size = VMALLOC_MIN - __va(bank->start); -- cgit v1.1 From 13f96d8f4c5a3f6a6b5e578d08869d79d690e0b2 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 1 Sep 2009 22:01:27 +0100 Subject: ARM: 5687/1: fix an oops with highmem In xdr_partial_copy_from_skb() there is that sequence: kaddr = kmap_atomic(*ppage, KM_SKB_SUNRPC_DATA); [...] flush_dcache_page(*ppage); kunmap_atomic(kaddr, KM_SKB_SUNRPC_DATA); Mixing flush_dcache_page() and kmap_atomic() is a bit odd, especially since kunmap_atomic() must deal with cache issues already. OTOH the non-highmem case must use flush_dcache_page() as kunmap_atomic() becomes a no op with no cache maintenance. Problem is that with highmem the implementation of kmap_atomic() doesn't set page->virtual, and page_address(page) returns 0 in that case. Here flush_dcache_page() calls __flush_dcache_page() which calls __cpuc_flush_dcache_page(page_address(page)) resulting in a kernel oops. None of the kmap_atomic() implementations uses set_page_address(). Hence we can assume page_address() is always expected to return 0 in that case. Let's conditionally call __cpuc_flush_dcache_page() only when the page address is non zero, and perform that test only when highmem is configured. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/mm/flush.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index c07222e..575f3ad 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -144,7 +144,14 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page) * page. This ensures that data in the physical page is mutually * coherent with the kernels mapping. */ - __cpuc_flush_dcache_page(page_address(page)); +#ifdef CONFIG_HIGHMEM + /* + * kmap_atomic() doesn't set the page virtual address, and + * kunmap_atomic() takes care of cache flushing already. + */ + if (page_address(page)) +#endif + __cpuc_flush_dcache_page(page_address(page)); /* * If this is a page cache page, and we have an aliasing VIPT cache, -- cgit v1.1 From 7929eb9cf643ae416e5081b2a6fa558d37b9854c Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 3 Sep 2009 21:45:59 +0100 Subject: ARM: 5691/1: fix cache aliasing issues between kmap() and kmap_atomic() with highmem Let's suppose a highmem page is kmap'd with kmap(). A pkmap entry is used, the page mapped to it, and the virtual cache is dirtied. Then kunmap() is used which does virtually nothing except for decrementing a usage count. Then, let's suppose the _same_ page gets mapped using kmap_atomic(). It is therefore mapped onto a fixmap entry instead, which has a different virtual address unaware of the dirty cache data for that page sitting in the pkmap mapping. Fortunately it is easy to know if a pkmap mapping still exists for that page and use it directly with kmap_atomic(), thanks to kmap_high_get(). And actual testing with a printk in the added code path shows that this condition is actually met *extremely* frequently. Seems that we've been quite lucky that things have worked so well with highmem so far. Cc: stable@kernel.org Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/mm/highmem.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c index a34954d..73cae57 100644 --- a/arch/arm/mm/highmem.c +++ b/arch/arm/mm/highmem.c @@ -40,11 +40,16 @@ void *kmap_atomic(struct page *page, enum km_type type) { unsigned int idx; unsigned long vaddr; + void *kmap; pagefault_disable(); if (!PageHighMem(page)) return page_address(page); + kmap = kmap_high_get(page); + if (kmap) + return kmap; + idx = type + KM_TYPE_NR * smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); #ifdef CONFIG_DEBUG_HIGHMEM @@ -80,6 +85,9 @@ void kunmap_atomic(void *kvaddr, enum km_type type) #else (void) idx; /* to kill a warning */ #endif + } else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) { + /* this address was obtained through kmap_high_get() */ + kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)])); } pagefault_enable(); } -- cgit v1.1 From b7cfda9fc3d7aa60cffab5367f2a72a4a70060cd Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 7 Sep 2009 15:06:42 +0100 Subject: ARM: Fix pfn_valid() for sparse memory On OMAP platforms, some people want to declare to segment up the memory between the kernel and a separate application such that there is a hole in the middle of the memory as far as Linux is concerned. However, they want to be able to mmap() the hole. This currently causes problems, because update_mmu_cache() thinks that there are valid struct pages for the "hole". Fix this by making pfn_valid() slightly more expensive, by checking whether the PFN is contained within the meminfo array. Signed-off-by: Russell King Tested-by: Khasim Syed Mohammed --- arch/arm/mm/init.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 3a7279c..ea36186 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -349,12 +350,43 @@ static void __init bootmem_free_node(int node, struct meminfo *mi) free_area_init_node(node, zone_size, min, zhole_size); } +#ifndef CONFIG_SPARSEMEM +int pfn_valid(unsigned long pfn) +{ + struct meminfo *mi = &meminfo; + unsigned int left = 0, right = mi->nr_banks; + + do { + unsigned int mid = (right + left) / 2; + struct membank *bank = &mi->bank[mid]; + + if (pfn < bank_pfn_start(bank)) + right = mid; + else if (pfn >= bank_pfn_end(bank)) + left = mid + 1; + else + return 1; + } while (left < right); + return 0; +} +EXPORT_SYMBOL(pfn_valid); +#endif + +static int __init meminfo_cmp(const void *_a, const void *_b) +{ + const struct membank *a = _a, *b = _b; + long cmp = bank_pfn_start(a) - bank_pfn_start(b); + return cmp < 0 ? -1 : cmp > 0 ? 1 : 0; +} + void __init bootmem_init(void) { struct meminfo *mi = &meminfo; unsigned long min, max_low, max_high; int node, initrd_node; + sort(&mi->bank, mi->nr_banks, sizeof(mi->bank[0]), meminfo_cmp, NULL); + /* * Locate which node contains the ramdisk image, if any. */ -- cgit v1.1