From bcb9b99d1fb6a1cbe592f131dc95450d2f18c91f Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Mon, 5 Feb 2007 15:43:42 +0900 Subject: [IA64] kexec: Fix CONFIG_SMP=n compilation Kexec support for 2.6.20 on ia64 does not build properly using a config made up by CONFIG_SMP=n and CONFIG_HOTPLUG_CPU=n: Signed-off-by: Magnus Damm Acked-by: Simon Horman Acked-by: Jay Lan Signed-off-by: Tony Luck --- arch/ia64/kernel/crash.c | 11 +++++++---- arch/ia64/kernel/machine_kexec.c | 2 ++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c index bc2f64d..2018e62 100644 --- a/arch/ia64/kernel/crash.c +++ b/arch/ia64/kernel/crash.c @@ -79,6 +79,7 @@ crash_save_this_cpu() final_note(buf); } +#ifdef CONFIG_SMP static int kdump_wait_cpu_freeze(void) { @@ -91,6 +92,7 @@ kdump_wait_cpu_freeze(void) } return 1; } +#endif void machine_crash_shutdown(struct pt_regs *pt) @@ -132,11 +134,12 @@ kdump_cpu_freeze(struct unw_frame_info *info, void *arg) atomic_inc(&kdump_cpu_freezed); kdump_status[cpuid] = 1; mb(); - if (cpuid == 0) { - for (;;) - cpu_relax(); - } else +#ifdef CONFIG_HOTPLUG_CPU + if (cpuid != 0) ia64_jump_to_sal(&sal_boot_rendez_state[cpuid]); +#endif + for (;;) + cpu_relax(); } static int diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c index e2ccc9f..7141795 100644 --- a/arch/ia64/kernel/machine_kexec.c +++ b/arch/ia64/kernel/machine_kexec.c @@ -70,12 +70,14 @@ void machine_kexec_cleanup(struct kimage *image) void machine_shutdown(void) { +#ifdef CONFIG_HOTPLUG_CPU int cpu; for_each_online_cpu(cpu) { if (cpu != smp_processor_id()) cpu_down(cpu); } +#endif kexec_disable_iosapic(); } -- cgit v1.1 From 475c63bded322545d1e9ccc5930c8903d2c97c4c Mon Sep 17 00:00:00 2001 From: Horms Date: Mon, 5 Feb 2007 10:59:03 +0900 Subject: [IA64] Zero size /proc/vmcore on ia64 Set saved_max_pfn when discontig memory is in use. This sets up saved_max_pfn when disctontig memory is in use. This mirrors the code for contig memory. This patch does not entirely solve the problem of making vmcore work, however it does appear to be neccessary. Please consider applying. Signed-off-by: Simon Horman Signed-off-by: Tony Luck --- arch/ia64/mm/discontig.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 96722cb..d3edb12 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -506,6 +506,12 @@ void __init find_memory(void) max_pfn = max_low_pfn; find_initrd(); + +#ifdef CONFIG_CRASH_DUMP + /* If we are doing a crash dump, we still need to know the real mem + * size before original memory map is reset. */ + saved_max_pfn = max_pfn; +#endif } #ifdef CONFIG_SMP -- cgit v1.1 From 233c2f99d6605343fa4a4c68560a4f74882b2693 Mon Sep 17 00:00:00 2001 From: Horms Date: Mon, 5 Feb 2007 11:05:29 +0900 Subject: [IA64] kexec: typo in the saved_max_pfn description in contig.c Fix a typo in the saved_max_pfn description in contig.c Signed-off-by: Simon Horman Signed-off-by: Tony Luck --- arch/ia64/mm/contig.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index 1e79551..ec8657b 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -177,7 +177,7 @@ find_memory (void) #ifdef CONFIG_CRASH_DUMP /* If we are doing a crash dump, we still need to know the real mem - * size before original memory map is * reset. */ + * size before original memory map is reset. */ saved_max_pfn = max_pfn; #endif } -- cgit v1.1 From 8a697d0a4c8e7ed51cf71a467ad59c25bfb85b44 Mon Sep 17 00:00:00 2001 From: Horms Date: Mon, 5 Feb 2007 10:17:22 +0900 Subject: [IA64] kexec: Minor enhancement to includes in crash.c linux/uaccess.h was being included, but it seems that really the following includes are needed. asm/page.h: for __va() and PAGE_SHIFT asm/uaccess.h: for copy_to_user() I guess that linux/uaccess.h pulls in both asm/page.h and asm/uaccess.h. I notices this while backporting the code to xen's linux-2.6.16.33, which does not have linux/uaccess.h. I'm posting it as I think it is a correct, though somewhat cosmetic fix. Signed-off-by: Simon Horman Signed-off-by: Tony Luck --- arch/ia64/kernel/crash_dump.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/ia64/kernel/crash_dump.c b/arch/ia64/kernel/crash_dump.c index 83b8c91..da60e90 100644 --- a/arch/ia64/kernel/crash_dump.c +++ b/arch/ia64/kernel/crash_dump.c @@ -9,7 +9,8 @@ #include #include -#include +#include +#include /** * copy_oldmem_page - copy one page from "oldmem" -- cgit v1.1 From abac08dbb4739f417f570e5bdf03af36150b28c3 Mon Sep 17 00:00:00 2001 From: Horms Date: Mon, 5 Feb 2007 10:16:20 +0900 Subject: [IA64] kexec: Remove inline declaration of efi_get_pal_addr() Remove the Remove inline declaration of efi_get_pal_addr() as it is declared in linux/efi.h. Signed-Off-By: Simon Horman Signed-off-by: Tony Luck --- arch/ia64/kernel/machine_kexec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c index 7141795..655195d 100644 --- a/arch/ia64/kernel/machine_kexec.c +++ b/arch/ia64/kernel/machine_kexec.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -85,7 +86,6 @@ void machine_shutdown(void) * Do not allocate memory (or fail in any way) in machine_kexec(). * We are past the point of no return, committed to rebooting now. */ -extern void *efi_get_pal_addr(void); static void ia64_machine_kexec(struct unw_frame_info *info, void *arg) { struct kimage *image = arg; -- cgit v1.1 From 9473252f20e8482464415d9030b3957b5593796d Mon Sep 17 00:00:00 2001 From: Horms Date: Mon, 5 Feb 2007 10:17:38 +0900 Subject: [IA64] add newline to PAL-code warning message Signed-off-by: Simon Horman Signed-off-by: Tony Luck --- arch/ia64/kernel/efi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index 0b25a7d..6c03928 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -380,7 +380,7 @@ efi_get_pal_addr (void) #endif return __va(md->phys_addr); } - printk(KERN_WARNING "%s: no PAL-code memory-descriptor found", + printk(KERN_WARNING "%s: no PAL-code memory-descriptor found\n", __FUNCTION__); return NULL; } -- cgit v1.1 From c237508afa5d47282d3047784864013eebdc68ab Mon Sep 17 00:00:00 2001 From: Horms Date: Mon, 5 Feb 2007 13:49:10 -0800 Subject: [IA64] kexec: Move machine_shutdown from machine_kexec.c to process.c This moves the ia64 implementation of machine_shutdown() from machine_kexec.c to process.c, which is in keeping with the implelmentation on other architectures, and seems like a much more appropriate home for it. Signed-off-by: Simon Horman Signed-off-by: Tony Luck --- arch/ia64/kernel/machine_kexec.c | 13 ------------- arch/ia64/kernel/process.c | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c index 655195d..e51cd90 100644 --- a/arch/ia64/kernel/machine_kexec.c +++ b/arch/ia64/kernel/machine_kexec.c @@ -69,19 +69,6 @@ void machine_kexec_cleanup(struct kimage *image) { } -void machine_shutdown(void) -{ -#ifdef CONFIG_HOTPLUG_CPU - int cpu; - - for_each_online_cpu(cpu) { - if (cpu != smp_processor_id()) - cpu_down(cpu); - } -#endif - kexec_disable_iosapic(); -} - /* * Do not allocate memory (or fail in any way) in machine_kexec(). * We are past the point of no return, committed to rebooting now. diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 17685ab..ae96d41 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -803,6 +804,21 @@ cpu_halt (void) ia64_pal_halt(min_power_state); } +void machine_shutdown(void) +{ +#ifdef CONFIG_HOTPLUG_CPU + int cpu; + + for_each_online_cpu(cpu) { + if (cpu != smp_processor_id()) + cpu_down(cpu); + } +#endif +#ifdef CONFIG_KEXEC + kexec_disable_iosapic(); +#endif +} + void machine_restart (char *restart_cmd) { -- cgit v1.1 From 90f9d70a582c02f50b4dd847166cd5b037219891 Mon Sep 17 00:00:00 2001 From: "bibo,mao" Date: Wed, 31 Jan 2007 17:50:31 +0800 Subject: [IA64] enable singlestep on system call As is pointed out in http://www.gelato.org/community/view_linear.php?id=1_1036&from=authors&value=Ian%20Wienand#1_1039, if single step on break instruction, the break fault has higher priority than the single-step trap. When the break fault handler is entered, it advances the IP by 1 instruction so break instruction single-stepping is skipped, actually it is next instruction which is single stepped. This patch modifies this, it adds TIF_SINGLESTEP bit for thread flags, and generate a fake sigtrap when single stepping break instruction. Test case in attachment can verify this. Any comments is welcome. Signed-off-by: bibo, mao Signed-off-by: Tony Luck --- arch/ia64/kernel/ptrace.c | 12 ++++++------ include/asm-ia64/thread_info.h | 4 +++- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index aa705e4..f1ec129 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -1405,6 +1405,7 @@ ptrace_disable (struct task_struct *child) struct ia64_psr *child_psr = ia64_psr(task_pt_regs(child)); /* make sure the single step/taken-branch trap bits are not set: */ + clear_tsk_thread_flag(child, TIF_SINGLESTEP); child_psr->ss = 0; child_psr->tb = 0; } @@ -1525,6 +1526,7 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data) * Make sure the single step/taken-branch trap bits * are not set: */ + clear_tsk_thread_flag(child, TIF_SINGLESTEP); ia64_psr(pt)->ss = 0; ia64_psr(pt)->tb = 0; @@ -1556,6 +1558,7 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data) goto out_tsk; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + set_tsk_thread_flag(child, TIF_SINGLESTEP); if (request == PTRACE_SINGLESTEP) { ia64_psr(pt)->ss = 1; } else { @@ -1595,13 +1598,9 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data) } -void +static void syscall_trace (void) { - if (!test_thread_flag(TIF_SYSCALL_TRACE)) - return; - if (!(current->ptrace & PT_PTRACED)) - return; /* * The 0x80 provides a way for the tracing parent to * distinguish between a syscall stop and SIGTRAP delivery. @@ -1664,7 +1663,8 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3, audit_syscall_exit(success, result); } - if (test_thread_flag(TIF_SYSCALL_TRACE) + if ((test_thread_flag(TIF_SYSCALL_TRACE) + || test_thread_flag(TIF_SINGLESTEP)) && (current->ptrace & PT_PTRACED)) syscall_trace(); } diff --git a/include/asm-ia64/thread_info.h b/include/asm-ia64/thread_info.h index 9b505b2..9169859 100644 --- a/include/asm-ia64/thread_info.h +++ b/include/asm-ia64/thread_info.h @@ -84,6 +84,7 @@ struct thread_info { #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ #define TIF_SYSCALL_TRACE 3 /* syscall trace active */ #define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */ +#define TIF_SINGLESTEP 5 /* restore singlestep on return to user mode */ #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_MEMDIE 17 #define TIF_MCA_INIT 18 /* this task is processing MCA or INIT */ @@ -92,7 +93,8 @@ struct thread_info { #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) -#define _TIF_SYSCALL_TRACEAUDIT (_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT) +#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) +#define _TIF_SYSCALL_TRACEAUDIT (_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) -- cgit v1.1 From ae0af3e3462fdada42deba30479aba70c6cf8b72 Mon Sep 17 00:00:00 2001 From: Aron Griffis Date: Mon, 5 Feb 2007 13:54:31 -0800 Subject: [IA64] use snprintf() on features field of /proc/cpuinfo Some patches have turned up on xen-devel recently to convert strcpy() to safer alternatives and so forth. While reviewing those patches I noticed that the features string building could be cleaned up. This patch uses snprintf() instead of strcpy() and direct character pointer manipulation. It makes the features string building safe and gets rid of the special case for features output in show_cpuinfo() Additionally I removed the (int) cast of ARRAY_SIZE, which seems to serve no purpose. Signed-off-by: Aron Griffis Signed-off-by: Tony Luck --- arch/ia64/kernel/setup.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index ad567b8..83c2629 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -569,34 +569,31 @@ show_cpuinfo (struct seq_file *m, void *v) { 1UL << 1, "spontaneous deferral"}, { 1UL << 2, "16-byte atomic ops" } }; - char features[128], *cp, sep; + char features[128], *cp, *sep; struct cpuinfo_ia64 *c = v; unsigned long mask; unsigned long proc_freq; - int i; + int i, size; mask = c->features; /* build the feature string: */ - memcpy(features, " standard", 10); + memcpy(features, "standard", 9); cp = features; - sep = 0; - for (i = 0; i < (int) ARRAY_SIZE(feature_bits); ++i) { + size = sizeof(features); + sep = ""; + for (i = 0; i < ARRAY_SIZE(feature_bits) && size > 1; ++i) { if (mask & feature_bits[i].mask) { - if (sep) - *cp++ = sep; - sep = ','; - *cp++ = ' '; - strcpy(cp, feature_bits[i].feature_name); - cp += strlen(feature_bits[i].feature_name); + cp += snprintf(cp, size, "%s%s", sep, + feature_bits[i].feature_name), + sep = ", "; mask &= ~feature_bits[i].mask; + size = sizeof(features) - (cp - features); } } - if (mask) { - /* print unknown features as a hex value: */ - if (sep) - *cp++ = sep; - sprintf(cp, " 0x%lx", mask); + if (mask && size > 1) { + /* print unknown features as a hex value */ + snprintf(cp, size, "%s0x%lx", sep, mask); } proc_freq = cpufreq_quick_get(cpunum); @@ -612,7 +609,7 @@ show_cpuinfo (struct seq_file *m, void *v) "model name : %s\n" "revision : %u\n" "archrev : %u\n" - "features :%s\n" /* don't change this---it _is_ right! */ + "features : %s\n" "cpu number : %lu\n" "cpu regs : %u\n" "cpu MHz : %lu.%06lu\n" -- cgit v1.1 From 87f76d3aafe5b5e0a1d6d857088a0263b35afa6b Mon Sep 17 00:00:00 2001 From: "bibo,mao" Date: Tue, 30 Jan 2007 11:02:19 +0800 Subject: [IA64] find thread for user rbs address I encountered one problem when running ptrace test case the situation is this: traced process's syscall parameter needs to be accessed, but for sys_clone system call with clone_flag (CLONE_VFORK | CLONE_VM | SIGCHLD) parameter. This syscall's parameter accessing result is wrong. The reason is that vforked child process mm point is the same, but tgid is different. Without this patch find_thread_for_addr will return vforked process if vforked process is also stopped, but not the thread which calls vfork syscall. Signed-off-by: Tony Luck --- arch/ia64/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index f1ec129..3f89187 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -607,7 +607,7 @@ find_thread_for_addr (struct task_struct *child, unsigned long addr) */ list_for_each_safe(this, next, ¤t->children) { p = list_entry(this, struct task_struct, sibling); - if (p->mm != mm) + if (p->tgid != child->tgid) continue; if (thread_matches(p, addr)) { child = p; -- cgit v1.1 From c2c77fe8df3e0322a613ba1540910632ad14d96d Mon Sep 17 00:00:00 2001 From: Bernhard Walle Date: Sun, 28 Jan 2007 13:47:02 +0100 Subject: [IA64] Fix NULL-pointer dereference in ia64_machine_kexec() This patch fixes a NULL-pointer dereference in ia64_machine_kexec(). The variable ia64_kimage is set in machine_kexec_prepare() which is called from sys_kexec_load(). If kdump wasn't configured before, ia64_kimage is NULL. machine_kdump_on_init() passes ia64_kimage() to machine_kexec() which assumes a valid value. The patch also adds a few sanity checks for the image to simplify debugging of similar problems in future. Signed-off-by: Bernhard Walle Signed-off-by: Tony Luck --- arch/ia64/kernel/crash.c | 5 +++++ arch/ia64/kernel/machine_kexec.c | 2 ++ 2 files changed, 7 insertions(+) diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c index 2018e62..9d92097 100644 --- a/arch/ia64/kernel/crash.c +++ b/arch/ia64/kernel/crash.c @@ -118,6 +118,11 @@ machine_crash_shutdown(struct pt_regs *pt) static void machine_kdump_on_init(void) { + if (!ia64_kimage) { + printk(KERN_NOTICE "machine_kdump_on_init(): " + "kdump not configured\n"); + return; + } local_irq_disable(); kexec_disable_iosapic(); machine_kexec(ia64_kimage); diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c index e51cd90..4f0f3b8 100644 --- a/arch/ia64/kernel/machine_kexec.c +++ b/arch/ia64/kernel/machine_kexec.c @@ -82,6 +82,7 @@ static void ia64_machine_kexec(struct unw_frame_info *info, void *arg) unsigned long vector; int ii; + BUG_ON(!image); if (image->type == KEXEC_TYPE_CRASH) { crash_save_this_cpu(); current->thread.ksp = (__u64)info->sw - 16; @@ -120,6 +121,7 @@ static void ia64_machine_kexec(struct unw_frame_info *info, void *arg) void machine_kexec(struct kimage *image) { + BUG_ON(!image); unw_init_running(ia64_machine_kexec, image); for(;;); } -- cgit v1.1 From 06f87adff12e52429390b22c57443665b073cd82 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 26 Jan 2007 00:38:53 -0500 Subject: [IA64] fix ACPI Kconfig issues All IA64 systems except IA64_HP_SIM include ACPI and PCI. So prevent IA64 Kconfigs that try to do irritating things like building PCI without building ACPI. Signed-off-by: Len Brown Signed-off-by: Tony Luck --- arch/ia64/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index fcacfe2..4b35c93 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -11,6 +11,8 @@ menu "Processor type and features" config IA64 bool + select PCI if (!IA64_HP_SIM) + select ACPI if (!IA64_HP_SIM) default y help The Itanium Processor Family is Intel's 64-bit successor to @@ -84,8 +86,6 @@ choice config IA64_GENERIC bool "generic" - select ACPI - select PCI select NUMA select ACPI_NUMA help -- cgit v1.1 From 451fe00cf7fd48ba55acd1c8b891e7a65e1b3f81 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 24 Jan 2007 22:48:04 -0700 Subject: [IA64] Clear IRQ affinity when unregistered When we offline a CPU, migrate_irqs() tries to determine whether the affinity bits of the IRQ descriptor match any of the remaining online CPUs. If not, it fixes up the interrupt to point somewhere else. Unfortunately, if an IRQ is unregistered the IRQ descriptor may still have affinity to the CPU being offlined, but the no_irq_chip handler doesn't provide a set_affinity function. This causes us to hit the WARN_ON in migrate_irqs(). The easiest solution seems to be setting all the bits in the affinity mask when the last interrupt is removed from the vector. I hit this on an older kernel with Xen/ia64 using driver domains (so it probably needs more testing on upstream). Xen essentially uses the bind/unbind interface in sysfs to unregister a device from a driver and thus unregister the interrupt. Signed-off-by: Alex Williamson Signed-off-by: Tony Luck --- arch/ia64/kernel/iosapic.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index 0fc5fb7..d6aab40 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -925,6 +925,11 @@ iosapic_unregister_intr (unsigned int gsi) /* Clear the interrupt controller descriptor */ idesc->chip = &no_irq_type; +#ifdef CONFIG_SMP + /* Clear affinity */ + cpus_setall(idesc->affinity); +#endif + /* Clear the interrupt information */ memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info)); -- cgit v1.1 From f43691ef8a816018a0294c5a9fa9d22512886c49 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 15 Jan 2007 09:33:55 -0700 Subject: [IA64] remove bogus prototype ia64_esi_init() This function doesn't exist. Signed-off-by: Alex Williamson Signed-off-by: Tony Luck --- include/asm-ia64/esi.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/asm-ia64/esi.h b/include/asm-ia64/esi.h index 84aac0e..40991c6 100644 --- a/include/asm-ia64/esi.h +++ b/include/asm-ia64/esi.h @@ -19,7 +19,6 @@ enum esi_proc_type { ESI_PROC_REENTRANT /* MP-safe and reentrant */ }; -extern int ia64_esi_init (void); extern struct ia64_sal_retval esi_call_phys (void *, u64 *); extern int ia64_esi_call(efi_guid_t, struct ia64_sal_retval *, enum esi_proc_type, -- cgit v1.1 From 980dbfd421c8d33edbd2fbc8f5a6233ccbefb052 Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Mon, 8 Jan 2007 16:05:08 -0600 Subject: [IA64-SGI] Check for TIO errors on shub2 Altix The shub2 error interrupt handler must check for TIO errors. Signed-off-by: Russ Anderson (rja@sgi.com) Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/huberror.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/arch/ia64/sn/kernel/huberror.c b/arch/ia64/sn/kernel/huberror.c index abca6bd..fcf7f93 100644 --- a/arch/ia64/sn/kernel/huberror.c +++ b/arch/ia64/sn/kernel/huberror.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 1992 - 1997, 2000,2002-2005 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 1992 - 1997, 2000,2002-2007 Silicon Graphics, Inc. All rights reserved. */ #include @@ -38,12 +38,20 @@ static irqreturn_t hub_eint_handler(int irq, void *arg) (u64) nasid, 0, 0, 0, 0, 0, 0); if ((int)ret_stuff.v0) - panic("hubii_eint_handler(): Fatal TIO Error"); + panic("%s: Fatal %s Error", __FUNCTION__, + ((nasid & 1) ? "TIO" : "HUBII")); if (!(nasid & 1)) /* Not a TIO, handle CRB errors */ (void)hubiio_crb_error_handler(hubdev_info); - } else - bte_error_handler((unsigned long)NODEPDA(nasid_to_cnodeid(nasid))); + } else + if (nasid & 1) { /* TIO errors */ + SAL_CALL_NOLOCK(ret_stuff, SN_SAL_HUB_ERROR_INTERRUPT, + (u64) nasid, 0, 0, 0, 0, 0, 0); + + if ((int)ret_stuff.v0) + panic("%s: Fatal TIO Error", __FUNCTION__); + } else + bte_error_handler((unsigned long)NODEPDA(nasid_to_cnodeid(nasid))); return IRQ_HANDLED; } -- cgit v1.1 From d1598e05faa11d9f04e0a226122dd57674fb1dab Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 3 Jan 2007 09:26:21 +0000 Subject: [IA64] Enable SWIOTLB only when needed Don't force CONFIG_SWIOTLB on when not actually needed (i.e. HP_ZX1 and SGI_SN2). Signed-off-by: Jan Beulich Signed-off-by: Tony Luck --- arch/ia64/Kconfig | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 4b35c93..f1d2899 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -30,7 +30,6 @@ config MMU config SWIOTLB bool - default y config RWSEM_XCHGADD_ALGORITHM bool @@ -88,6 +87,7 @@ config IA64_GENERIC bool "generic" select NUMA select ACPI_NUMA + select SWIOTLB help This selects the system type of your hardware. A "generic" kernel will run on any supported IA-64 system. However, if you configure @@ -104,6 +104,7 @@ config IA64_GENERIC config IA64_DIG bool "DIG-compliant" + select SWIOTLB config IA64_HP_ZX1 bool "HP-zx1/sx1000" @@ -113,6 +114,7 @@ config IA64_HP_ZX1 config IA64_HP_ZX1_SWIOTLB bool "HP-zx1/sx1000 with software I/O TLB" + select SWIOTLB help Build a kernel that runs on HP zx1 and sx1000 systems even when they have broken PCI devices which cannot DMA to full 32 bits. Apart @@ -131,6 +133,7 @@ config IA64_SGI_SN2 config IA64_HP_SIM bool "Ski-simulator" + select SWIOTLB endchoice -- cgit v1.1 From 139b830477ccdca21b68c40f9a83ec327e65eb56 Mon Sep 17 00:00:00 2001 From: Bob Picco Date: Tue, 30 Jan 2007 02:11:09 -0800 Subject: [IA64] register memory ranges in a consistent manner While pursuing and unrelated issue with 64Mb granules I noticed a problem related to inconsistent use of add_active_range. There doesn't appear any reason to me why FLATMEM versus DISCONTIG_MEM should register memory to add_active_range with different code. So I've changed the code into a common implementation. The other subtle issue fixed by this patch was calling add_active_range in count_node_pages before granule aligning is performed. We were lucky with 16MB granules but not so with 64MB granules. count_node_pages has reserved regions filtered out and as a consequence linked kernel text and data aren't covered by calls to count_node_pages. So linked kernel regions wasn't reported to add_active_regions. This resulted in free_initmem causing numerous bad_page reports. This won't occur with this patch because now all known memory regions are reported by register_active_ranges. Acked-by: Mel Gorman Signed-off-by: Bob Picco Acked-by: Simon Horman Signed-off-by: Andrew Morton Signed-off-by: Tony Luck --- arch/ia64/mm/discontig.c | 4 +++- arch/ia64/mm/init.c | 19 +++++++++++++++++-- include/asm-ia64/meminit.h | 3 ++- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index d3edb12..999cefd 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -473,6 +473,9 @@ void __init find_memory(void) node_clear(node, memory_less_mask); mem_data[node].min_pfn = ~0UL; } + + efi_memmap_walk(register_active_ranges, NULL); + /* * Initialize the boot memory maps in reverse order since that's * what the bootmem allocator expects @@ -660,7 +663,6 @@ static __init int count_node_pages(unsigned long start, unsigned long len, int n { unsigned long end = start + len; - add_active_range(node, start >> PAGE_SHIFT, end >> PAGE_SHIFT); mem_data[node].num_physpages += len >> PAGE_SHIFT; if (start <= __pa(MAX_DMA_ADDRESS)) mem_data[node].num_dma_physpages += diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 1373fae..8b75998 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -595,13 +596,27 @@ find_largest_hole (u64 start, u64 end, void *arg) return 0; } +#endif /* CONFIG_VIRTUAL_MEM_MAP */ + int __init register_active_ranges(u64 start, u64 end, void *arg) { - add_active_range(0, __pa(start) >> PAGE_SHIFT, __pa(end) >> PAGE_SHIFT); + int nid = paddr_to_nid(__pa(start)); + + if (nid < 0) + nid = 0; +#ifdef CONFIG_KEXEC + if (start > crashk_res.start && start < crashk_res.end) + start = crashk_res.end; + if (end > crashk_res.start && end < crashk_res.end) + end = crashk_res.start; +#endif + + if (start < end) + add_active_range(nid, __pa(start) >> PAGE_SHIFT, + __pa(end) >> PAGE_SHIFT); return 0; } -#endif /* CONFIG_VIRTUAL_MEM_MAP */ static int __init count_reserved_pages (u64 start, u64 end, void *arg) diff --git a/include/asm-ia64/meminit.h b/include/asm-ia64/meminit.h index c8df759..6dd476b 100644 --- a/include/asm-ia64/meminit.h +++ b/include/asm-ia64/meminit.h @@ -51,12 +51,13 @@ extern void efi_memmap_init(unsigned long *, unsigned long *); #define IGNORE_PFN0 1 /* XXX fix me: ignore pfn 0 until TLB miss handler is updated... */ +extern int register_active_ranges(u64 start, u64 end, void *arg); + #ifdef CONFIG_VIRTUAL_MEM_MAP # define LARGE_GAP 0x40000000 /* Use virtual mem map if hole is > than this */ extern unsigned long vmalloc_end; extern struct page *vmem_map; extern int find_largest_hole (u64 start, u64 end, void *arg); - extern int register_active_ranges (u64 start, u64 end, void *arg); extern int create_mem_map_page_table (u64 start, u64 end, void *arg); extern int vmemmap_find_next_valid_pfn(int, int); #else -- cgit v1.1 From d00195ebc18049f067c8e389c186aa6f5d2b659f Mon Sep 17 00:00:00 2001 From: Kirill Korotaev Date: Mon, 5 Feb 2007 16:19:59 -0800 Subject: [IA64] alignment bug in ldscript Occasionally the FSYS_RETURN patch list can have an odd length, causing other data structures to get out of alignment. In OpenVZ it is odd and we get misaligned kernel image, which does not boot. Signed-off-by: Alexey Kuznetsov Signed-off-by: Kirill Korotaev Signed-off-by: Andrew Morton Signed-off-by: Tony Luck --- arch/ia64/kernel/vmlinux.lds.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index d6083a0..8f3d006 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -157,6 +157,7 @@ SECTIONS } #endif + . = ALIGN(8); __con_initcall_start = .; .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { *(.con_initcall.init) } -- cgit v1.1 From 71120061f271f00d8280659bf12e065ca6533d4d Mon Sep 17 00:00:00 2001 From: Kirill Korotaev Date: Mon, 5 Feb 2007 16:20:00 -0800 Subject: [IA64] virt_to_page() can be called with NULL arg It does not return NULL when arg is NULL. Signed-off-by: Alexey Kuznetsov Signed-off-by: Kirill Korotaev Signed-off-by: Andrew Morton Signed-off-by: Tony Luck --- include/asm-ia64/pgalloc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/asm-ia64/pgalloc.h b/include/asm-ia64/pgalloc.h index 393e04c..560c287 100644 --- a/include/asm-ia64/pgalloc.h +++ b/include/asm-ia64/pgalloc.h @@ -137,7 +137,8 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte) static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long addr) { - return virt_to_page(pgtable_quicklist_alloc()); + void *pg = pgtable_quicklist_alloc(); + return pg ? virt_to_page(pg) : NULL; } static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, -- cgit v1.1 From 671496affdb5228786896864c3f900f66563e8c1 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 5 Feb 2007 16:20:03 -0800 Subject: [IA64] missing exports hwsw_sync_... Add missing exports to allow several drivers to be built as module with CONFIG_IA64_HP_ZX1_SWIOTLB. Signed-off-by: Jan Beulich Signed-off-by: Andrew Morton Signed-off-by: Tony Luck --- arch/ia64/hp/common/hwsw_iommu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/ia64/hp/common/hwsw_iommu.c b/arch/ia64/hp/common/hwsw_iommu.c index a5a5637..2153bca 100644 --- a/arch/ia64/hp/common/hwsw_iommu.c +++ b/arch/ia64/hp/common/hwsw_iommu.c @@ -192,3 +192,7 @@ EXPORT_SYMBOL(hwsw_unmap_sg); EXPORT_SYMBOL(hwsw_dma_supported); EXPORT_SYMBOL(hwsw_alloc_coherent); EXPORT_SYMBOL(hwsw_free_coherent); +EXPORT_SYMBOL(hwsw_sync_single_for_cpu); +EXPORT_SYMBOL(hwsw_sync_single_for_device); +EXPORT_SYMBOL(hwsw_sync_sg_for_cpu); +EXPORT_SYMBOL(hwsw_sync_sg_for_device); -- cgit v1.1 From f1c0afa2e8802c01cf82c915e2bb3cb2a81570d4 Mon Sep 17 00:00:00 2001 From: George Beshers Date: Mon, 5 Feb 2007 16:20:04 -0800 Subject: [IA64] show_mem() for IA64 sparsemem NUMA On the ia64 architecture only this patch upgrades show_mem() for sparse memory to be the same as it was for discontig memory. It has been shown to work on NUMA and flatmem architectures. Signed-off-by: George Beshers Signed-off-by: Andrew Morton Signed-off-by: Tony Luck --- arch/ia64/mm/contig.c | 74 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 48 insertions(+), 26 deletions(-) diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index ec8657b..63e6d49 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -30,47 +30,69 @@ static unsigned long max_gap; #endif /** - * show_mem - display a memory statistics summary + * show_mem - give short summary of memory stats * - * Just walks the pages in the system and describes where they're allocated. + * Shows a simple page count of reserved and used pages in the system. + * For discontig machines, it does this on a per-pgdat basis. */ -void -show_mem (void) +void show_mem(void) { - int i, total = 0, reserved = 0; - int shared = 0, cached = 0; + int i, total_reserved = 0; + int total_shared = 0, total_cached = 0; + unsigned long total_present = 0; + pg_data_t *pgdat; printk(KERN_INFO "Mem-info:\n"); show_free_areas(); - printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); - i = max_mapnr; - for (i = 0; i < max_mapnr; i++) { - if (!pfn_valid(i)) { + printk(KERN_INFO "Node memory in pages:\n"); + for_each_online_pgdat(pgdat) { + unsigned long present; + unsigned long flags; + int shared = 0, cached = 0, reserved = 0; + + pgdat_resize_lock(pgdat, &flags); + present = pgdat->node_present_pages; + for(i = 0; i < pgdat->node_spanned_pages; i++) { + struct page *page; + if (pfn_valid(pgdat->node_start_pfn + i)) + page = pfn_to_page(pgdat->node_start_pfn + i); + else { #ifdef CONFIG_VIRTUAL_MEM_MAP - if (max_gap < LARGE_GAP) - continue; - i = vmemmap_find_next_valid_pfn(0, i) - 1; + if (max_gap < LARGE_GAP) + continue; #endif - continue; + i = vmemmap_find_next_valid_pfn(pgdat->node_id, + i) - 1; + continue; + } + if (PageReserved(page)) + reserved++; + else if (PageSwapCache(page)) + cached++; + else if (page_count(page)) + shared += page_count(page)-1; } - total++; - if (PageReserved(mem_map+i)) - reserved++; - else if (PageSwapCache(mem_map+i)) - cached++; - else if (page_count(mem_map + i)) - shared += page_count(mem_map + i) - 1; + pgdat_resize_unlock(pgdat, &flags); + total_present += present; + total_reserved += reserved; + total_cached += cached; + total_shared += shared; + printk(KERN_INFO "Node %4d: RAM: %11ld, rsvd: %8d, " + "shrd: %10d, swpd: %10d\n", pgdat->node_id, + present, reserved, shared, cached); } - printk(KERN_INFO "%d pages of RAM\n", total); - printk(KERN_INFO "%d reserved pages\n", reserved); - printk(KERN_INFO "%d pages shared\n", shared); - printk(KERN_INFO "%d pages swap cached\n", cached); - printk(KERN_INFO "%ld pages in page table cache\n", + printk(KERN_INFO "%ld pages of RAM\n", total_present); + printk(KERN_INFO "%d reserved pages\n", total_reserved); + printk(KERN_INFO "%d pages shared\n", total_shared); + printk(KERN_INFO "%d pages swap cached\n", total_cached); + printk(KERN_INFO "Total of %ld pages in page table cache\n", pgtable_quicklist_total_size()); + printk(KERN_INFO "%d free buffer pages\n", nr_free_buffer_pages()); } + /* physical address where the bootmem map is located */ unsigned long bootmap_start; -- cgit v1.1 From 524fd988bb83153ddc9cfea867129eb6efb7ac23 Mon Sep 17 00:00:00 2001 From: Bob Picco Date: Mon, 5 Feb 2007 16:20:08 -0800 Subject: [IA64] clean up sparsemem memory_present call Eliminate arch specific memory_present call ia64 NUMA by utilizing sparse_memory_present_with_active_regions. Acked-by: Mel Gorman Signed-off-by: Bob Picco Signed-off-by: Andrew Morton Signed-off-by: Tony Luck --- arch/ia64/mm/discontig.c | 36 +++--------------------------------- 1 file changed, 3 insertions(+), 33 deletions(-) diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 999cefd..6eae596 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -412,37 +412,6 @@ static void __init memory_less_nodes(void) return; } -#ifdef CONFIG_SPARSEMEM -/** - * register_sparse_mem - notify SPARSEMEM that this memory range exists. - * @start: physical start of range - * @end: physical end of range - * @arg: unused - * - * Simply calls SPARSEMEM to register memory section(s). - */ -static int __init register_sparse_mem(unsigned long start, unsigned long end, - void *arg) -{ - int nid; - - start = __pa(start) >> PAGE_SHIFT; - end = __pa(end) >> PAGE_SHIFT; - nid = early_pfn_to_nid(start); - memory_present(nid, start, end); - - return 0; -} - -static void __init arch_sparse_init(void) -{ - efi_memmap_walk(register_sparse_mem, NULL); - sparse_init(); -} -#else -#define arch_sparse_init() do {} while (0) -#endif - /** * find_memory - walk the EFI memory map and setup the bootmem allocator * @@ -694,10 +663,11 @@ void __init paging_init(void) max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT; - arch_sparse_init(); - efi_memmap_walk(filter_rsvd_memory, count_node_pages); + sparse_memory_present_with_active_regions(MAX_NUMNODES); + sparse_init(); + #ifdef CONFIG_VIRTUAL_MEM_MAP vmalloc_end -= PAGE_ALIGN(ALIGN(max_low_pfn, MAX_ORDER_NR_PAGES) * sizeof(struct page)); -- cgit v1.1 From 86afa9eb88af2248bcc91d5b3568c63fdea65d6c Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 5 Feb 2007 16:07:57 -0800 Subject: [IA64] Hook up getcpu system call for IA64 getcpu system call returns cpu# and node# on which this system call and its caller are running. This patch hooks up its implementation on IA64. Signed-off-by: Fenghua Yu Signed-off-by: Tony Luck --- arch/ia64/kernel/entry.S | 2 ++ include/asm-ia64/unistd.h | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 15234ed..e7873ee 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1610,5 +1610,7 @@ sys_call_table: data8 sys_sync_file_range // 1300 data8 sys_tee data8 sys_vmsplice + data8 sys_ni_syscall // reserved for move_pages + data8 sys_getcpu .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls diff --git a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h index 53c5c0e..a9e1fa4 100644 --- a/include/asm-ia64/unistd.h +++ b/include/asm-ia64/unistd.h @@ -291,11 +291,13 @@ #define __NR_sync_file_range 1300 #define __NR_tee 1301 #define __NR_vmsplice 1302 +/* 1303 reserved for move_pages */ +#define __NR_getcpu 1304 #ifdef __KERNEL__ -#define NR_syscalls 279 /* length of syscall table */ +#define NR_syscalls 281 /* length of syscall table */ #define __ARCH_WANT_SYS_RT_SIGACTION -- cgit v1.1 From cde14bbfb3aa79b479db35bd29e6c083513d8614 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 5 Feb 2007 18:46:40 -0800 Subject: [IA64] swiotlb bug fixes This patch fixes - marking I-cache clean of pages DMAed to now only done for IA64 - broken multiple inclusion in include/asm-x86_64/swiotlb.h - missing call to mark_clean in swiotlb_sync_sg() - a (perhaps only theoretical) issue in swiotlb_dma_supported() when io_tlb_end is exactly at the end of memory Signed-off-by: Jan Beulich Signed-off-by: Andrew Morton Signed-off-by: Tony Luck --- arch/ia64/mm/init.c | 19 +++++++++++++++++++ include/asm-ia64/dma.h | 2 ++ include/asm-x86_64/swiotlb.h | 7 ++++--- lib/swiotlb.c | 33 ++++++++------------------------- 4 files changed, 33 insertions(+), 28 deletions(-) diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 8b75998..faaca21 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -129,6 +129,25 @@ lazy_mmu_prot_update (pte_t pte) set_bit(PG_arch_1, &page->flags); /* mark page as clean */ } +/* + * Since DMA is i-cache coherent, any (complete) pages that were written via + * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to + * flush them when they get mapped into an executable vm-area. + */ +void +dma_mark_clean(void *addr, size_t size) +{ + unsigned long pg_addr, end; + + pg_addr = PAGE_ALIGN((unsigned long) addr); + end = (unsigned long) addr + size; + while (pg_addr + PAGE_SIZE <= end) { + struct page *page = virt_to_page(pg_addr); + set_bit(PG_arch_1, &page->flags); + pg_addr += PAGE_SIZE; + } +} + inline void ia64_set_rbs_bot (void) { diff --git a/include/asm-ia64/dma.h b/include/asm-ia64/dma.h index dad3a73..4d97f60 100644 --- a/include/asm-ia64/dma.h +++ b/include/asm-ia64/dma.h @@ -19,4 +19,6 @@ extern unsigned long MAX_DMA_ADDRESS; #define free_dma(x) +void dma_mark_clean(void *addr, size_t size); + #endif /* _ASM_IA64_DMA_H */ diff --git a/include/asm-x86_64/swiotlb.h b/include/asm-x86_64/swiotlb.h index ba94ab3..f9c5895 100644 --- a/include/asm-x86_64/swiotlb.h +++ b/include/asm-x86_64/swiotlb.h @@ -1,6 +1,5 @@ #ifndef _ASM_SWIOTLB_H -#define _ASM_SWTIOLB_H 1 - +#define _ASM_SWIOTLB_H 1 #include @@ -52,4 +51,6 @@ extern int swiotlb; extern void pci_swiotlb_init(void); -#endif /* _ASM_SWTIOLB_H */ +static inline void dma_mark_clean(void *addr, size_t size) {} + +#endif /* _ASM_SWIOTLB_H */ diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 1062578..3427833 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -558,25 +558,6 @@ swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir) } /* - * Since DMA is i-cache coherent, any (complete) pages that were written via - * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to - * flush them when they get mapped into an executable vm-area. - */ -static void -mark_clean(void *addr, size_t size) -{ - unsigned long pg_addr, end; - - pg_addr = PAGE_ALIGN((unsigned long) addr); - end = (unsigned long) addr + size; - while (pg_addr + PAGE_SIZE <= end) { - struct page *page = virt_to_page(pg_addr); - set_bit(PG_arch_1, &page->flags); - pg_addr += PAGE_SIZE; - } -} - -/* * Unmap a single streaming mode DMA translation. The dma_addr and size must * match what was provided for in a previous swiotlb_map_single call. All * other usages are undefined. @@ -594,7 +575,7 @@ swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size, if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) unmap_single(hwdev, dma_addr, size, dir); else if (dir == DMA_FROM_DEVICE) - mark_clean(dma_addr, size); + dma_mark_clean(dma_addr, size); } /* @@ -617,7 +598,7 @@ swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) sync_single(hwdev, dma_addr, size, dir, target); else if (dir == DMA_FROM_DEVICE) - mark_clean(dma_addr, size); + dma_mark_clean(dma_addr, size); } void @@ -648,7 +629,7 @@ swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr, if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) sync_single(hwdev, dma_addr, size, dir, target); else if (dir == DMA_FROM_DEVICE) - mark_clean(dma_addr, size); + dma_mark_clean(dma_addr, size); } void @@ -698,7 +679,6 @@ swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems, dev_addr = virt_to_phys(addr); if (swiotlb_force || address_needs_mapping(hwdev, dev_addr)) { void *map = map_single(hwdev, addr, sg->length, dir); - sg->dma_address = virt_to_bus(map); if (!map) { /* Don't panic here, we expect map_sg users to do proper error handling. */ @@ -707,6 +687,7 @@ swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems, sg[0].dma_length = 0; return 0; } + sg->dma_address = virt_to_bus(map); } else sg->dma_address = dev_addr; sg->dma_length = sg->length; @@ -730,7 +711,7 @@ swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems, if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) unmap_single(hwdev, (void *) phys_to_virt(sg->dma_address), sg->dma_length, dir); else if (dir == DMA_FROM_DEVICE) - mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length); + dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length); } /* @@ -752,6 +733,8 @@ swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sg, if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) sync_single(hwdev, (void *) sg->dma_address, sg->dma_length, dir, target); + else if (dir == DMA_FROM_DEVICE) + dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length); } void @@ -783,7 +766,7 @@ swiotlb_dma_mapping_error(dma_addr_t dma_addr) int swiotlb_dma_supported (struct device *hwdev, u64 mask) { - return (virt_to_phys (io_tlb_end) - 1) <= mask; + return virt_to_phys(io_tlb_end - 1) <= mask; } EXPORT_SYMBOL(swiotlb_init); -- cgit v1.1 From 93fbff63e62b87fe450814db41f859d60b048fb8 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 5 Feb 2007 18:49:45 -0800 Subject: [IA64] make swiotlb use bus_to_virt/virt_to_bus Convert all phys_to_virt/virt_to_phys uses to bus_to_virt/virt_to_bus, as is what is meant and what is needed in (at least) some virtualized environments like Xen. Signed-off-by: Jan Beulich Acked-by: Muli Ben-Yehuda Signed-off-by: Andrew Morton Signed-off-by: Tony Luck --- lib/swiotlb.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 3427833..bc684d1 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -36,7 +36,7 @@ ( (val) & ( (align) - 1))) #define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset) -#define SG_ENT_PHYS_ADDRESS(SG) virt_to_phys(SG_ENT_VIRT_ADDRESS(SG)) +#define SG_ENT_PHYS_ADDRESS(sg) virt_to_bus(SG_ENT_VIRT_ADDRESS(sg)) /* * Maximum allowable number of contiguous slabs to map, @@ -163,7 +163,7 @@ swiotlb_init_with_default_size (size_t default_size) */ io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); printk(KERN_INFO "Placing software IO TLB between 0x%lx - 0x%lx\n", - virt_to_phys(io_tlb_start), virt_to_phys(io_tlb_end)); + virt_to_bus(io_tlb_start), virt_to_bus(io_tlb_end)); } void @@ -244,7 +244,7 @@ swiotlb_late_init_with_default_size (size_t default_size) printk(KERN_INFO "Placing %ldMB software IO TLB between 0x%lx - " "0x%lx\n", (io_tlb_nslabs * (1 << IO_TLB_SHIFT)) >> 20, - virt_to_phys(io_tlb_start), virt_to_phys(io_tlb_end)); + virt_to_bus(io_tlb_start), virt_to_bus(io_tlb_end)); return 0; @@ -445,7 +445,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, flags |= GFP_DMA; ret = (void *)__get_free_pages(flags, order); - if (ret && address_needs_mapping(hwdev, virt_to_phys(ret))) { + if (ret && address_needs_mapping(hwdev, virt_to_bus(ret))) { /* * The allocated memory isn't reachable by the device. * Fall back on swiotlb_map_single(). @@ -465,11 +465,11 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, if (swiotlb_dma_mapping_error(handle)) return NULL; - ret = phys_to_virt(handle); + ret = bus_to_virt(handle); } memset(ret, 0, size); - dev_addr = virt_to_phys(ret); + dev_addr = virt_to_bus(ret); /* Confirm address can be DMA'd by device */ if (address_needs_mapping(hwdev, dev_addr)) { @@ -525,7 +525,7 @@ swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) dma_addr_t swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir) { - unsigned long dev_addr = virt_to_phys(ptr); + unsigned long dev_addr = virt_to_bus(ptr); void *map; BUG_ON(dir == DMA_NONE); @@ -546,7 +546,7 @@ swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir) map = io_tlb_overflow_buffer; } - dev_addr = virt_to_phys(map); + dev_addr = virt_to_bus(map); /* * Ensure that the address returned is DMA'ble @@ -569,7 +569,7 @@ void swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size, int dir) { - char *dma_addr = phys_to_virt(dev_addr); + char *dma_addr = bus_to_virt(dev_addr); BUG_ON(dir == DMA_NONE); if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) @@ -592,7 +592,7 @@ static inline void swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, size_t size, int dir, int target) { - char *dma_addr = phys_to_virt(dev_addr); + char *dma_addr = bus_to_virt(dev_addr); BUG_ON(dir == DMA_NONE); if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) @@ -623,7 +623,7 @@ swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr, unsigned long offset, size_t size, int dir, int target) { - char *dma_addr = phys_to_virt(dev_addr) + offset; + char *dma_addr = bus_to_virt(dev_addr) + offset; BUG_ON(dir == DMA_NONE); if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) @@ -676,7 +676,7 @@ swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems, for (i = 0; i < nelems; i++, sg++) { addr = SG_ENT_VIRT_ADDRESS(sg); - dev_addr = virt_to_phys(addr); + dev_addr = virt_to_bus(addr); if (swiotlb_force || address_needs_mapping(hwdev, dev_addr)) { void *map = map_single(hwdev, addr, sg->length, dir); if (!map) { @@ -709,7 +709,8 @@ swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems, for (i = 0; i < nelems; i++, sg++) if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) - unmap_single(hwdev, (void *) phys_to_virt(sg->dma_address), sg->dma_length, dir); + unmap_single(hwdev, bus_to_virt(sg->dma_address), + sg->dma_length, dir); else if (dir == DMA_FROM_DEVICE) dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length); } @@ -731,7 +732,7 @@ swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sg, for (i = 0; i < nelems; i++, sg++) if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) - sync_single(hwdev, (void *) sg->dma_address, + sync_single(hwdev, bus_to_virt(sg->dma_address), sg->dma_length, dir, target); else if (dir == DMA_FROM_DEVICE) dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length); @@ -754,7 +755,7 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, int swiotlb_dma_mapping_error(dma_addr_t dma_addr) { - return (dma_addr == virt_to_phys(io_tlb_overflow_buffer)); + return (dma_addr == virt_to_bus(io_tlb_overflow_buffer)); } /* @@ -766,7 +767,7 @@ swiotlb_dma_mapping_error(dma_addr_t dma_addr) int swiotlb_dma_supported (struct device *hwdev, u64 mask) { - return virt_to_phys(io_tlb_end - 1) <= mask; + return virt_to_bus(io_tlb_end - 1) <= mask; } EXPORT_SYMBOL(swiotlb_init); -- cgit v1.1 From 563aaf064f3776623ff5e7aef511ac2eb7e5f0bb Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 5 Feb 2007 18:51:25 -0800 Subject: [IA64] swiotlb cleanup - add proper __init decoration to swiotlb's init code (and the code calling it, where not already the case) - replace uses of 'unsigned long' with dma_addr_t where appropriate - do miscellaneous simplicfication and cleanup Signed-off-by: Jan Beulich Signed-off-by: Andrew Morton Signed-off-by: Tony Luck --- arch/x86_64/kernel/pci-swiotlb.c | 2 +- lib/swiotlb.c | 56 +++++++++++++++++++++++----------------- 2 files changed, 33 insertions(+), 25 deletions(-) diff --git a/arch/x86_64/kernel/pci-swiotlb.c b/arch/x86_64/kernel/pci-swiotlb.c index 697f0aa..eb18be5 100644 --- a/arch/x86_64/kernel/pci-swiotlb.c +++ b/arch/x86_64/kernel/pci-swiotlb.c @@ -29,7 +29,7 @@ struct dma_mapping_ops swiotlb_dma_ops = { .dma_supported = NULL, }; -void pci_swiotlb_init(void) +void __init pci_swiotlb_init(void) { /* don't initialize swiotlb if iommu=off (no_iommu=1) */ if (!iommu_detected && !no_iommu && end_pfn > MAX_DMA32_PFN) diff --git a/lib/swiotlb.c b/lib/swiotlb.c index bc684d1..067eed5 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -1,7 +1,7 @@ /* * Dynamic DMA mapping support. * - * This implementation is for IA-64 and EM64T platforms that do not support + * This implementation is a fallback for platforms that do not support * I/O TLBs (aka DMA address translation hardware). * Copyright (C) 2000 Asit Mallick * Copyright (C) 2000 Goutham Rao @@ -129,23 +129,25 @@ __setup("swiotlb=", setup_io_tlb_npages); * Statically reserve bounce buffer space and initialize bounce buffer data * structures for the software IO TLB used to implement the DMA API. */ -void -swiotlb_init_with_default_size (size_t default_size) +void __init +swiotlb_init_with_default_size(size_t default_size) { - unsigned long i; + unsigned long i, bytes; if (!io_tlb_nslabs) { io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); } + bytes = io_tlb_nslabs << IO_TLB_SHIFT; + /* * Get IO TLB memory from the low pages */ - io_tlb_start = alloc_bootmem_low_pages(io_tlb_nslabs * (1 << IO_TLB_SHIFT)); + io_tlb_start = alloc_bootmem_low_pages(bytes); if (!io_tlb_start) panic("Cannot allocate SWIOTLB buffer"); - io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT); + io_tlb_end = io_tlb_start + bytes; /* * Allocate and initialize the free list array. This array is used @@ -162,12 +164,15 @@ swiotlb_init_with_default_size (size_t default_size) * Get the overflow emergency buffer */ io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); + if (!io_tlb_overflow_buffer) + panic("Cannot allocate SWIOTLB overflow buffer!\n"); + printk(KERN_INFO "Placing software IO TLB between 0x%lx - 0x%lx\n", virt_to_bus(io_tlb_start), virt_to_bus(io_tlb_end)); } -void -swiotlb_init (void) +void __init +swiotlb_init(void) { swiotlb_init_with_default_size(64 * (1<<20)); /* default to 64MB */ } @@ -178,9 +183,9 @@ swiotlb_init (void) * This should be just like above, but with some error catching. */ int -swiotlb_late_init_with_default_size (size_t default_size) +swiotlb_late_init_with_default_size(size_t default_size) { - unsigned long i, req_nslabs = io_tlb_nslabs; + unsigned long i, bytes, req_nslabs = io_tlb_nslabs; unsigned int order; if (!io_tlb_nslabs) { @@ -191,8 +196,9 @@ swiotlb_late_init_with_default_size (size_t default_size) /* * Get IO TLB memory from the low pages */ - order = get_order(io_tlb_nslabs * (1 << IO_TLB_SHIFT)); + order = get_order(io_tlb_nslabs << IO_TLB_SHIFT); io_tlb_nslabs = SLABS_PER_PAGE << order; + bytes = io_tlb_nslabs << IO_TLB_SHIFT; while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { io_tlb_start = (char *)__get_free_pages(GFP_DMA | __GFP_NOWARN, @@ -205,13 +211,14 @@ swiotlb_late_init_with_default_size (size_t default_size) if (!io_tlb_start) goto cleanup1; - if (order != get_order(io_tlb_nslabs * (1 << IO_TLB_SHIFT))) { + if (order != get_order(bytes)) { printk(KERN_WARNING "Warning: only able to allocate %ld MB " "for software IO TLB\n", (PAGE_SIZE << order) >> 20); io_tlb_nslabs = SLABS_PER_PAGE << order; + bytes = io_tlb_nslabs << IO_TLB_SHIFT; } - io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT); - memset(io_tlb_start, 0, io_tlb_nslabs * (1 << IO_TLB_SHIFT)); + io_tlb_end = io_tlb_start + bytes; + memset(io_tlb_start, 0, bytes); /* * Allocate and initialize the free list array. This array is used @@ -242,8 +249,8 @@ swiotlb_late_init_with_default_size (size_t default_size) if (!io_tlb_overflow_buffer) goto cleanup4; - printk(KERN_INFO "Placing %ldMB software IO TLB between 0x%lx - " - "0x%lx\n", (io_tlb_nslabs * (1 << IO_TLB_SHIFT)) >> 20, + printk(KERN_INFO "Placing %luMB software IO TLB between 0x%lx - " + "0x%lx\n", bytes >> 20, virt_to_bus(io_tlb_start), virt_to_bus(io_tlb_end)); return 0; @@ -256,8 +263,8 @@ cleanup3: free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * sizeof(int))); io_tlb_list = NULL; - io_tlb_end = NULL; cleanup2: + io_tlb_end = NULL; free_pages((unsigned long)io_tlb_start, order); io_tlb_start = NULL; cleanup1: @@ -433,7 +440,7 @@ void * swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags) { - unsigned long dev_addr; + dma_addr_t dev_addr; void *ret; int order = get_order(size); @@ -473,8 +480,9 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, /* Confirm address can be DMA'd by device */ if (address_needs_mapping(hwdev, dev_addr)) { - printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016lx\n", - (unsigned long long)*hwdev->dma_mask, dev_addr); + printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n", + (unsigned long long)*hwdev->dma_mask, + (unsigned long long)dev_addr); panic("swiotlb_alloc_coherent: allocated memory is out of " "range for device"); } @@ -504,7 +512,7 @@ swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) * When the mapping is small enough return a static buffer to limit * the damage, or panic when the transfer is too big. */ - printk(KERN_ERR "DMA: Out of SW-IOMMU space for %lu bytes at " + printk(KERN_ERR "DMA: Out of SW-IOMMU space for %zu bytes at " "device %s\n", size, dev ? dev->bus_id : "?"); if (size > io_tlb_overflow && do_panic) { @@ -525,7 +533,7 @@ swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) dma_addr_t swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir) { - unsigned long dev_addr = virt_to_bus(ptr); + dma_addr_t dev_addr = virt_to_bus(ptr); void *map; BUG_ON(dir == DMA_NONE); @@ -669,7 +677,7 @@ swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems, int dir) { void *addr; - unsigned long dev_addr; + dma_addr_t dev_addr; int i; BUG_ON(dir == DMA_NONE); @@ -765,7 +773,7 @@ swiotlb_dma_mapping_error(dma_addr_t dma_addr) * this function. */ int -swiotlb_dma_supported (struct device *hwdev, u64 mask) +swiotlb_dma_supported(struct device *hwdev, u64 mask) { return virt_to_bus(io_tlb_end - 1) <= mask; } -- cgit v1.1 From 51099005ab8e09d68a13fea8d55bc739c1040ca6 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 5 Feb 2007 18:53:04 -0800 Subject: [IA64] swiotlb abstraction (e.g. for Xen) Add abstraction so that the file can be used by environments other than IA64 and EM64T, namely for Xen. Signed-off-by: Jan Beulich Signed-off-by: Andrew Morton Signed-off-by: Tony Luck --- include/asm-ia64/swiotlb.h | 9 +++ include/asm-x86_64/swiotlb.h | 1 + lib/swiotlb.c | 184 +++++++++++++++++++++++++++++++++++-------- 3 files changed, 159 insertions(+), 35 deletions(-) create mode 100644 include/asm-ia64/swiotlb.h diff --git a/include/asm-ia64/swiotlb.h b/include/asm-ia64/swiotlb.h new file mode 100644 index 0000000..452c162 --- /dev/null +++ b/include/asm-ia64/swiotlb.h @@ -0,0 +1,9 @@ +#ifndef _ASM_SWIOTLB_H +#define _ASM_SWIOTLB_H 1 + +#include + +#define SWIOTLB_ARCH_NEED_LATE_INIT +#define SWIOTLB_ARCH_NEED_ALLOC + +#endif /* _ASM_SWIOTLB_H */ diff --git a/include/asm-x86_64/swiotlb.h b/include/asm-x86_64/swiotlb.h index f9c5895..ab913ff 100644 --- a/include/asm-x86_64/swiotlb.h +++ b/include/asm-x86_64/swiotlb.h @@ -44,6 +44,7 @@ extern void swiotlb_init(void); extern int swiotlb_force; #ifdef CONFIG_SWIOTLB +#define SWIOTLB_ARCH_NEED_ALLOC extern int swiotlb; #else #define swiotlb 0 diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 067eed5..50a4380 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -35,8 +36,10 @@ #define OFFSET(val,align) ((unsigned long) \ ( (val) & ( (align) - 1))) +#ifndef SG_ENT_VIRT_ADDRESS #define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset) #define SG_ENT_PHYS_ADDRESS(sg) virt_to_bus(SG_ENT_VIRT_ADDRESS(sg)) +#endif /* * Maximum allowable number of contiguous slabs to map, @@ -101,13 +104,25 @@ static unsigned int io_tlb_index; * We need to save away the original address corresponding to a mapped entry * for the sync operations. */ -static unsigned char **io_tlb_orig_addr; +#ifndef SWIOTLB_ARCH_HAS_IO_TLB_ADDR_T +typedef char *io_tlb_addr_t; +#define swiotlb_orig_addr_null(buffer) (!(buffer)) +#define ptr_to_io_tlb_addr(ptr) (ptr) +#define page_to_io_tlb_addr(pg, off) (page_address(pg) + (off)) +#define sg_to_io_tlb_addr(sg) SG_ENT_VIRT_ADDRESS(sg) +#endif +static io_tlb_addr_t *io_tlb_orig_addr; /* * Protect the above data structures in the map and unmap calls */ static DEFINE_SPINLOCK(io_tlb_lock); +#ifdef SWIOTLB_EXTRA_VARIABLES +SWIOTLB_EXTRA_VARIABLES; +#endif + +#ifndef SWIOTLB_ARCH_HAS_SETUP_IO_TLB_NPAGES static int __init setup_io_tlb_npages(char *str) { @@ -122,9 +137,25 @@ setup_io_tlb_npages(char *str) swiotlb_force = 1; return 1; } +#endif __setup("swiotlb=", setup_io_tlb_npages); /* make io_tlb_overflow tunable too? */ +#ifndef swiotlb_adjust_size +#define swiotlb_adjust_size(size) ((void)0) +#endif + +#ifndef swiotlb_adjust_seg +#define swiotlb_adjust_seg(start, size) ((void)0) +#endif + +#ifndef swiotlb_print_info +#define swiotlb_print_info(bytes) \ + printk(KERN_INFO "Placing %luMB software IO TLB between 0x%lx - " \ + "0x%lx\n", bytes >> 20, \ + virt_to_bus(io_tlb_start), virt_to_bus(io_tlb_end)) +#endif + /* * Statically reserve bounce buffer space and initialize bounce buffer data * structures for the software IO TLB used to implement the DMA API. @@ -138,6 +169,8 @@ swiotlb_init_with_default_size(size_t default_size) io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); } + swiotlb_adjust_size(io_tlb_nslabs); + swiotlb_adjust_size(io_tlb_overflow); bytes = io_tlb_nslabs << IO_TLB_SHIFT; @@ -155,10 +188,14 @@ swiotlb_init_with_default_size(size_t default_size) * between io_tlb_start and io_tlb_end. */ io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int)); - for (i = 0; i < io_tlb_nslabs; i++) + for (i = 0; i < io_tlb_nslabs; i++) { + if ( !(i % IO_TLB_SEGSIZE) ) + swiotlb_adjust_seg(io_tlb_start + (i << IO_TLB_SHIFT), + IO_TLB_SEGSIZE << IO_TLB_SHIFT); io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); + } io_tlb_index = 0; - io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *)); + io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(io_tlb_addr_t)); /* * Get the overflow emergency buffer @@ -166,17 +203,21 @@ swiotlb_init_with_default_size(size_t default_size) io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); if (!io_tlb_overflow_buffer) panic("Cannot allocate SWIOTLB overflow buffer!\n"); + swiotlb_adjust_seg(io_tlb_overflow_buffer, io_tlb_overflow); - printk(KERN_INFO "Placing software IO TLB between 0x%lx - 0x%lx\n", - virt_to_bus(io_tlb_start), virt_to_bus(io_tlb_end)); + swiotlb_print_info(bytes); } +#ifndef __swiotlb_init_with_default_size +#define __swiotlb_init_with_default_size swiotlb_init_with_default_size +#endif void __init swiotlb_init(void) { - swiotlb_init_with_default_size(64 * (1<<20)); /* default to 64MB */ + __swiotlb_init_with_default_size(64 * (1<<20)); /* default to 64MB */ } +#ifdef SWIOTLB_ARCH_NEED_LATE_INIT /* * Systems with larger DMA zones (those that don't support ISA) can * initialize the swiotlb later using the slab allocator if needed. @@ -234,12 +275,12 @@ swiotlb_late_init_with_default_size(size_t default_size) io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); io_tlb_index = 0; - io_tlb_orig_addr = (unsigned char **)__get_free_pages(GFP_KERNEL, - get_order(io_tlb_nslabs * sizeof(char *))); + io_tlb_orig_addr = (io_tlb_addr_t *)__get_free_pages(GFP_KERNEL, + get_order(io_tlb_nslabs * sizeof(io_tlb_addr_t))); if (!io_tlb_orig_addr) goto cleanup3; - memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(char *)); + memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(io_tlb_addr_t)); /* * Get the overflow emergency buffer @@ -249,19 +290,17 @@ swiotlb_late_init_with_default_size(size_t default_size) if (!io_tlb_overflow_buffer) goto cleanup4; - printk(KERN_INFO "Placing %luMB software IO TLB between 0x%lx - " - "0x%lx\n", bytes >> 20, - virt_to_bus(io_tlb_start), virt_to_bus(io_tlb_end)); + swiotlb_print_info(bytes); return 0; cleanup4: - free_pages((unsigned long)io_tlb_orig_addr, get_order(io_tlb_nslabs * - sizeof(char *))); + free_pages((unsigned long)io_tlb_orig_addr, + get_order(io_tlb_nslabs * sizeof(io_tlb_addr_t))); io_tlb_orig_addr = NULL; cleanup3: - free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * - sizeof(int))); + free_pages((unsigned long)io_tlb_list, + get_order(io_tlb_nslabs * sizeof(int))); io_tlb_list = NULL; cleanup2: io_tlb_end = NULL; @@ -271,7 +310,9 @@ cleanup1: io_tlb_nslabs = req_nslabs; return -ENOMEM; } +#endif +#ifndef SWIOTLB_ARCH_HAS_NEEDS_MAPPING static inline int address_needs_mapping(struct device *hwdev, dma_addr_t addr) { @@ -282,11 +323,35 @@ address_needs_mapping(struct device *hwdev, dma_addr_t addr) return (addr & ~mask) != 0; } +static inline int range_needs_mapping(const void *ptr, size_t size) +{ + return swiotlb_force; +} + +static inline int order_needs_mapping(unsigned int order) +{ + return 0; +} +#endif + +static void +__sync_single(io_tlb_addr_t buffer, char *dma_addr, size_t size, int dir) +{ +#ifndef SWIOTLB_ARCH_HAS_SYNC_SINGLE + if (dir == DMA_TO_DEVICE) + memcpy(dma_addr, buffer, size); + else + memcpy(buffer, dma_addr, size); +#else + __swiotlb_arch_sync_single(buffer, dma_addr, size, dir); +#endif +} + /* * Allocates bounce buffer and returns its kernel virtual address. */ static void * -map_single(struct device *hwdev, char *buffer, size_t size, int dir) +map_single(struct device *hwdev, io_tlb_addr_t buffer, size_t size, int dir) { unsigned long flags; char *dma_addr; @@ -359,7 +424,7 @@ map_single(struct device *hwdev, char *buffer, size_t size, int dir) */ io_tlb_orig_addr[index] = buffer; if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) - memcpy(dma_addr, buffer, size); + __sync_single(buffer, dma_addr, size, DMA_TO_DEVICE); return dma_addr; } @@ -373,17 +438,18 @@ unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) unsigned long flags; int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; - char *buffer = io_tlb_orig_addr[index]; + io_tlb_addr_t buffer = io_tlb_orig_addr[index]; /* * First, sync the memory before unmapping the entry */ - if (buffer && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) + if (!swiotlb_orig_addr_null(buffer) + && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) /* * bounce... copy the data back into the original buffer * and * delete the bounce buffer. */ - memcpy(buffer, dma_addr, size); + __sync_single(buffer, dma_addr, size, DMA_FROM_DEVICE); /* * Return the buffer to the free list by setting the corresponding @@ -416,18 +482,18 @@ sync_single(struct device *hwdev, char *dma_addr, size_t size, int dir, int target) { int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; - char *buffer = io_tlb_orig_addr[index]; + io_tlb_addr_t buffer = io_tlb_orig_addr[index]; switch (target) { case SYNC_FOR_CPU: if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) - memcpy(buffer, dma_addr, size); + __sync_single(buffer, dma_addr, size, DMA_FROM_DEVICE); else BUG_ON(dir != DMA_TO_DEVICE); break; case SYNC_FOR_DEVICE: if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) - memcpy(dma_addr, buffer, size); + __sync_single(buffer, dma_addr, size, DMA_TO_DEVICE); else BUG_ON(dir != DMA_FROM_DEVICE); break; @@ -436,6 +502,8 @@ sync_single(struct device *hwdev, char *dma_addr, size_t size, } } +#ifdef SWIOTLB_ARCH_NEED_ALLOC + void * swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags) @@ -451,7 +519,10 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, */ flags |= GFP_DMA; - ret = (void *)__get_free_pages(flags, order); + if (!order_needs_mapping(order)) + ret = (void *)__get_free_pages(flags, order); + else + ret = NULL; if (ret && address_needs_mapping(hwdev, virt_to_bus(ret))) { /* * The allocated memory isn't reachable by the device. @@ -489,6 +560,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, *dma_handle = dev_addr; return ret; } +EXPORT_SYMBOL(swiotlb_alloc_coherent); void swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, @@ -501,6 +573,9 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ swiotlb_unmap_single (hwdev, dma_handle, size, DMA_TO_DEVICE); } +EXPORT_SYMBOL(swiotlb_free_coherent); + +#endif static void swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) @@ -542,13 +617,14 @@ swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir) * we can safely return the device addr and not worry about bounce * buffering it. */ - if (!address_needs_mapping(hwdev, dev_addr) && !swiotlb_force) + if (!range_needs_mapping(ptr, size) + && !address_needs_mapping(hwdev, dev_addr)) return dev_addr; /* * Oh well, have to allocate and map a bounce buffer. */ - map = map_single(hwdev, ptr, size, dir); + map = map_single(hwdev, ptr_to_io_tlb_addr(ptr), size, dir); if (!map) { swiotlb_full(hwdev, size, dir, 1); map = io_tlb_overflow_buffer; @@ -676,17 +752,16 @@ int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems, int dir) { - void *addr; dma_addr_t dev_addr; int i; BUG_ON(dir == DMA_NONE); for (i = 0; i < nelems; i++, sg++) { - addr = SG_ENT_VIRT_ADDRESS(sg); - dev_addr = virt_to_bus(addr); - if (swiotlb_force || address_needs_mapping(hwdev, dev_addr)) { - void *map = map_single(hwdev, addr, sg->length, dir); + dev_addr = SG_ENT_PHYS_ADDRESS(sg); + if (range_needs_mapping(SG_ENT_VIRT_ADDRESS(sg), sg->length) + || address_needs_mapping(hwdev, dev_addr)) { + void *map = map_single(hwdev, sg_to_io_tlb_addr(sg), sg->length, dir); if (!map) { /* Don't panic here, we expect map_sg users to do proper error handling. */ @@ -760,6 +835,44 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE); } +#ifdef SWIOTLB_ARCH_NEED_MAP_PAGE + +dma_addr_t +swiotlb_map_page(struct device *hwdev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + dma_addr_t dev_addr; + char *map; + + dev_addr = page_to_bus(page) + offset; + if (address_needs_mapping(hwdev, dev_addr)) { + map = map_single(hwdev, page_to_io_tlb_addr(page, offset), size, direction); + if (!map) { + swiotlb_full(hwdev, size, direction, 1); + map = io_tlb_overflow_buffer; + } + dev_addr = virt_to_bus(map); + } + + return dev_addr; +} + +void +swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction direction) +{ + char *dma_addr = bus_to_virt(dev_addr); + + BUG_ON(direction == DMA_NONE); + if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) + unmap_single(hwdev, dma_addr, size, direction); + else if (direction == DMA_FROM_DEVICE) + dma_mark_clean(dma_addr, size); +} + +#endif + int swiotlb_dma_mapping_error(dma_addr_t dma_addr) { @@ -772,10 +885,13 @@ swiotlb_dma_mapping_error(dma_addr_t dma_addr) * during bus mastering, then you would pass 0x00ffffff as the mask to * this function. */ +#ifndef __swiotlb_dma_supported +#define __swiotlb_dma_supported(hwdev, mask) (virt_to_bus(io_tlb_end - 1) <= (mask)) +#endif int swiotlb_dma_supported(struct device *hwdev, u64 mask) { - return virt_to_bus(io_tlb_end - 1) <= mask; + return __swiotlb_dma_supported(hwdev, mask); } EXPORT_SYMBOL(swiotlb_init); @@ -790,6 +906,4 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device); EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu); EXPORT_SYMBOL(swiotlb_sync_sg_for_device); EXPORT_SYMBOL(swiotlb_dma_mapping_error); -EXPORT_SYMBOL(swiotlb_alloc_coherent); -EXPORT_SYMBOL(swiotlb_free_coherent); EXPORT_SYMBOL(swiotlb_dma_supported); -- cgit v1.1