From e3ebadd95cb621e2c7436f3d3646447ac9d5c16d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 7 May 2007 08:44:24 -0700 Subject: Revert "[PATCH] x86: __pa and __pa_symbol address space separation" This was broken. It adds complexity, for no good reason. Rather than separate __pa() and __pa_symbol(), we should deprecate __pa_symbol(), and preferably __pa() too - and just use "virt_to_phys()" instead, which is more readable and has nicer semantics. However, right now, just undo the separation, and make __pa_symbol() be the exact same as __pa(). That fixes the bugs this patch introduced, and we can do the fairly obvious cleanups later. Do the new __phys_addr() function (which is now the actual workhorse for the unified __pa()/__pa_symbol()) as a real external function, that way all the potential issues with compile/link-time optimizations of constant symbol addresses go away, and we can also, if we choose to, add more sanity-checking of the argument. Cc: Eric W. Biederman Cc: Vivek Goyal Cc: Andi Kleen Cc: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/alternative.c | 4 ++-- arch/i386/mm/init.c | 15 +++++++-------- arch/x86_64/kernel/machine_kexec.c | 16 ++++++++-------- arch/x86_64/kernel/setup.c | 9 ++++----- arch/x86_64/kernel/smp.c | 2 +- arch/x86_64/mm/init.c | 31 ++++++++++++++++++------------- arch/x86_64/mm/ioremap.c | 9 +++++++++ arch/x86_64/mm/pageattr.c | 16 ++++++++-------- include/asm-x86_64/page.h | 18 +++++++----------- include/asm-x86_64/pgtable.h | 4 ++-- 10 files changed, 66 insertions(+), 58 deletions(-) diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c index e5cec66..d8cda14 100644 --- a/arch/i386/kernel/alternative.c +++ b/arch/i386/kernel/alternative.c @@ -390,8 +390,8 @@ void __init alternative_instructions(void) _text, _etext); } free_init_pages("SMP alternatives", - __pa_symbol(&__smp_locks), - __pa_symbol(&__smp_locks_end)); + (unsigned long)__smp_locks, + (unsigned long)__smp_locks_end); } else { alternatives_smp_module_add(NULL, "core kernel", __smp_locks, __smp_locks_end, diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index dbe16f6..1a7197e 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -843,11 +843,10 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) unsigned long addr; for (addr = begin; addr < end; addr += PAGE_SIZE) { - struct page *page = pfn_to_page(addr >> PAGE_SHIFT); - ClearPageReserved(page); - init_page_count(page); - memset(page_address(page), POISON_FREE_INITMEM, PAGE_SIZE); - __free_page(page); + ClearPageReserved(virt_to_page(addr)); + init_page_count(virt_to_page(addr)); + memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); + free_page(addr); totalram_pages++; } printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); @@ -856,14 +855,14 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) void free_initmem(void) { free_init_pages("unused kernel memory", - __pa_symbol(&__init_begin), - __pa_symbol(&__init_end)); + (unsigned long)(&__init_begin), + (unsigned long)(&__init_end)); } #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_init_pages("initrd memory", __pa(start), __pa(end)); + free_init_pages("initrd memory", start, end); } #endif diff --git a/arch/x86_64/kernel/machine_kexec.c b/arch/x86_64/kernel/machine_kexec.c index a8bb33c..c3a5547 100644 --- a/arch/x86_64/kernel/machine_kexec.c +++ b/arch/x86_64/kernel/machine_kexec.c @@ -189,21 +189,21 @@ NORET_TYPE void machine_kexec(struct kimage *image) control_page = page_address(image->control_code_page) + PAGE_SIZE; memcpy(control_page, relocate_kernel, PAGE_SIZE); - page_list[PA_CONTROL_PAGE] = __pa(control_page); + page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; - page_list[PA_PGD] = __pa_symbol(&kexec_pgd); + page_list[PA_PGD] = virt_to_phys(&kexec_pgd); page_list[VA_PGD] = (unsigned long)kexec_pgd; - page_list[PA_PUD_0] = __pa_symbol(&kexec_pud0); + page_list[PA_PUD_0] = virt_to_phys(&kexec_pud0); page_list[VA_PUD_0] = (unsigned long)kexec_pud0; - page_list[PA_PMD_0] = __pa_symbol(&kexec_pmd0); + page_list[PA_PMD_0] = virt_to_phys(&kexec_pmd0); page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; - page_list[PA_PTE_0] = __pa_symbol(&kexec_pte0); + page_list[PA_PTE_0] = virt_to_phys(&kexec_pte0); page_list[VA_PTE_0] = (unsigned long)kexec_pte0; - page_list[PA_PUD_1] = __pa_symbol(&kexec_pud1); + page_list[PA_PUD_1] = virt_to_phys(&kexec_pud1); page_list[VA_PUD_1] = (unsigned long)kexec_pud1; - page_list[PA_PMD_1] = __pa_symbol(&kexec_pmd1); + page_list[PA_PMD_1] = virt_to_phys(&kexec_pmd1); page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; - page_list[PA_PTE_1] = __pa_symbol(&kexec_pte1); + page_list[PA_PTE_1] = virt_to_phys(&kexec_pte1); page_list[VA_PTE_1] = (unsigned long)kexec_pte1; page_list[PA_TABLE_PAGE] = diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index db30b5b..db51577 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -245,12 +245,11 @@ void __init setup_arch(char **cmdline_p) init_mm.end_code = (unsigned long) &_etext; init_mm.end_data = (unsigned long) &_edata; init_mm.brk = (unsigned long) &_end; - init_mm.pgd = __va(__pa_symbol(&init_level4_pgt)); - code_resource.start = __pa_symbol(&_text); - code_resource.end = __pa_symbol(&_etext)-1; - data_resource.start = __pa_symbol(&_etext); - data_resource.end = __pa_symbol(&_edata)-1; + code_resource.start = virt_to_phys(&_text); + code_resource.end = virt_to_phys(&_etext)-1; + data_resource.start = virt_to_phys(&_etext); + data_resource.end = virt_to_phys(&_edata)-1; early_identify_cpu(&boot_cpu_data); diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c index 22abae4..bd1d123 100644 --- a/arch/x86_64/kernel/smp.c +++ b/arch/x86_64/kernel/smp.c @@ -76,7 +76,7 @@ static inline void leave_mm(int cpu) if (read_pda(mmu_state) == TLBSTATE_OK) BUG(); cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); - load_cr3(init_mm.pgd); + load_cr3(swapper_pg_dir); } /* diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 282b0a8..c082268 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -572,13 +572,13 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); for (addr = begin; addr < end; addr += PAGE_SIZE) { - struct page *page = pfn_to_page(addr >> PAGE_SHIFT); - ClearPageReserved(page); - init_page_count(page); - memset(page_address(page), POISON_FREE_INITMEM, PAGE_SIZE); + ClearPageReserved(virt_to_page(addr)); + init_page_count(virt_to_page(addr)); + memset((void *)(addr & ~(PAGE_SIZE-1)), + POISON_FREE_INITMEM, PAGE_SIZE); if (addr >= __START_KERNEL_map) change_page_attr_addr(addr, 1, __pgprot(0)); - __free_page(page); + free_page(addr); totalram_pages++; } if (addr > __START_KERNEL_map) @@ -588,26 +588,31 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) void free_initmem(void) { free_init_pages("unused kernel memory", - __pa_symbol(&__init_begin), - __pa_symbol(&__init_end)); + (unsigned long)(&__init_begin), + (unsigned long)(&__init_end)); } #ifdef CONFIG_DEBUG_RODATA void mark_rodata_ro(void) { - unsigned long start = PFN_ALIGN(__va(__pa_symbol(&_stext))), size; + unsigned long start = (unsigned long)_stext, end; #ifdef CONFIG_HOTPLUG_CPU /* It must still be possible to apply SMP alternatives. */ if (num_possible_cpus() > 1) - start = PFN_ALIGN(__va(__pa_symbol(&_etext))); + start = (unsigned long)_etext; #endif - size = (unsigned long)__va(__pa_symbol(&__end_rodata)) - start; - change_page_attr_addr(start, size >> PAGE_SHIFT, PAGE_KERNEL_RO); + end = (unsigned long)__end_rodata; + start = (start + PAGE_SIZE - 1) & PAGE_MASK; + end &= PAGE_MASK; + if (end <= start) + return; + + change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL_RO); printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", - size >> 10); + (end - start) >> 10); /* * change_page_attr_addr() requires a global_flush_tlb() call after it. @@ -622,7 +627,7 @@ void mark_rodata_ro(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_init_pages("initrd memory", __pa(start), __pa(end)); + free_init_pages("initrd memory", start, end); } #endif diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c index c6e5e8d..6cac90a 100644 --- a/arch/x86_64/mm/ioremap.c +++ b/arch/x86_64/mm/ioremap.c @@ -13,12 +13,21 @@ #include #include #include + #include #include #include #include #include +unsigned long __phys_addr(unsigned long x) +{ + if (x >= __START_KERNEL_map) + return x - __START_KERNEL_map + phys_base; + return x - PAGE_OFFSET; +} +EXPORT_SYMBOL(__phys_addr); + #define ISA_START_ADDRESS 0xa0000 #define ISA_END_ADDRESS 0x100000 diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c index bf4aa8d..d653d0b 100644 --- a/arch/x86_64/mm/pageattr.c +++ b/arch/x86_64/mm/pageattr.c @@ -51,6 +51,7 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot, SetPagePrivate(base); page_private(base) = 0; + address = __pa(address); addr = address & LARGE_PAGE_MASK; pbase = (pte_t *)page_address(base); for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { @@ -100,12 +101,13 @@ static inline void save_page(struct page *fpage) * No more special protections in this 2/4MB area - revert to a * large page again. */ -static void revert_page(unsigned long address, unsigned long pfn, pgprot_t ref_prot) +static void revert_page(unsigned long address, pgprot_t ref_prot) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t large_pte; + unsigned long pfn; pgd = pgd_offset_k(address); BUG_ON(pgd_none(*pgd)); @@ -113,6 +115,7 @@ static void revert_page(unsigned long address, unsigned long pfn, pgprot_t ref_p BUG_ON(pud_none(*pud)); pmd = pmd_offset(pud, address); BUG_ON(pmd_val(*pmd) & _PAGE_PSE); + pfn = (__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT; large_pte = pfn_pte(pfn, ref_prot); large_pte = pte_mkhuge(large_pte); set_pte((pte_t *)pmd, large_pte); @@ -138,8 +141,7 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, */ struct page *split; ref_prot2 = pte_pgprot(pte_clrhuge(*kpte)); - split = split_large_page(pfn << PAGE_SHIFT, prot, - ref_prot2); + split = split_large_page(address, prot, ref_prot2); if (!split) return -ENOMEM; set_pte(kpte, mk_pte(split, ref_prot2)); @@ -158,7 +160,7 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, if (page_private(kpte_page) == 0) { save_page(kpte_page); - revert_page(address, pfn, ref_prot); + revert_page(address, ref_prot); } return 0; } @@ -178,7 +180,6 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, */ int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot) { - unsigned long phys_base_pfn = __pa_symbol(__START_KERNEL_map) >> PAGE_SHIFT; int err = 0, kernel_map = 0; int i; @@ -199,11 +200,10 @@ int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot) } /* Handle kernel mapping too which aliases part of the * lowmem */ - if ((pfn >= phys_base_pfn) && - ((pfn - phys_base_pfn) < (KERNEL_TEXT_SIZE >> PAGE_SHIFT))) { + if (__pa(address) < KERNEL_TEXT_SIZE) { unsigned long addr2; pgprot_t prot2; - addr2 = __START_KERNEL_map + ((pfn - phys_base_pfn) << PAGE_SHIFT); + addr2 = __START_KERNEL_map + __pa(address); /* Make sure the kernel mappings stay executable */ prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot))); err = __change_page_attr(addr2, pfn, prot2, diff --git a/include/asm-x86_64/page.h b/include/asm-x86_64/page.h index b17fc16..4d04e24 100644 --- a/include/asm-x86_64/page.h +++ b/include/asm-x86_64/page.h @@ -94,26 +94,22 @@ extern unsigned long phys_base; #define KERNEL_TEXT_SIZE (40*1024*1024) #define KERNEL_TEXT_START 0xffffffff80000000 +#define PAGE_OFFSET __PAGE_OFFSET #ifndef __ASSEMBLY__ #include -#endif /* __ASSEMBLY__ */ +extern unsigned long __phys_addr(unsigned long); -#define PAGE_OFFSET __PAGE_OFFSET +#endif /* __ASSEMBLY__ */ -/* Note: __pa(&symbol_visible_to_c) should be always replaced with __pa_symbol. - Otherwise you risk miscompilation. */ -#define __pa(x) ((unsigned long)(x) - PAGE_OFFSET) -/* __pa_symbol should be used for C visible symbols. - This seems to be the official gcc blessed way to do such arithmetic. */ -#define __pa_symbol(x) \ - ({unsigned long v; \ - asm("" : "=r" (v) : "0" (x)); \ - ((v - __START_KERNEL_map) + phys_base); }) +#define __pa(x) __phys_addr((unsigned long)(x)) +#define __pa_symbol(x) __phys_addr((unsigned long)(x)) #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) +#define __boot_va(x) __va(x) +#define __boot_pa(x) __pa(x) #ifdef CONFIG_FLATMEM #define pfn_valid(pfn) ((pfn) < end_pfn) #endif diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h index 599993f..da3390f 100644 --- a/include/asm-x86_64/pgtable.h +++ b/include/asm-x86_64/pgtable.h @@ -19,7 +19,7 @@ extern pmd_t level2_kernel_pgt[512]; extern pgd_t init_level4_pgt[]; extern unsigned long __supported_pte_mask; -#define swapper_pg_dir ((pgd_t *)NULL) +#define swapper_pg_dir init_level4_pgt extern void paging_init(void); extern void clear_kernel_mapping(unsigned long addr, unsigned long size); @@ -29,7 +29,7 @@ extern void clear_kernel_mapping(unsigned long addr, unsigned long size); * for zero-mapped memory areas etc.. */ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; -#define ZERO_PAGE(vaddr) (pfn_to_page(__pa_symbol(&empty_zero_page) >> PAGE_SHIFT)) +#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) #endif /* !__ASSEMBLY__ */ -- cgit v1.1