diff options
Diffstat (limited to 'arch')
383 files changed, 12503 insertions, 4643 deletions
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 0c79b9d..f7c9663 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -280,6 +280,10 @@ config ISA (MCA) or VESA. ISA is an older system, now being displaced by PCI; newer boards don't support it. If you have ISA, say Y, otherwise N. +config ISA_DMA_API + bool + default y + config PCI bool depends on !ALPHA_JENSEN diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 64e450d..167fd89f 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1150,16 +1150,13 @@ osf_usleep_thread(struct timeval32 __user *sleep, struct timeval32 __user *remai if (get_tv32(&tmp, sleep)) goto fault; - ticks = tmp.tv_usec; - ticks = (ticks + (1000000 / HZ) - 1) / (1000000 / HZ); - ticks += tmp.tv_sec * HZ; + ticks = timeval_to_jiffies(&tmp); current->state = TASK_INTERRUPTIBLE; ticks = schedule_timeout(ticks); if (remain) { - tmp.tv_sec = ticks / HZ; - tmp.tv_usec = ticks % HZ; + jiffies_to_timeval(ticks, &tmp); if (put_tv32(remain, &tmp)) goto fault; } diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 4055115..475950c 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -85,6 +85,7 @@ choice config ARCH_CLPS7500 bool "Cirrus-CL-PS7500FE" select TIMER_ACORN + select ISA config ARCH_CLPS711X bool "CLPS711x/EP721x-based" @@ -96,6 +97,7 @@ config ARCH_CO285 config ARCH_EBSA110 bool "EBSA-110" + select ISA help This is an evaluation board for the StrongARM processor available from Digital. It has limited hardware on-board, including an onboard @@ -120,13 +122,16 @@ config ARCH_INTEGRATOR config ARCH_IOP3XX bool "IOP3xx-based" + select PCI config ARCH_IXP4XX bool "IXP4xx-based" select DMABOUNCE + select PCI config ARCH_IXP2000 bool "IXP2400/2800-based" + select PCI config ARCH_L7200 bool "LinkUp-L7200" @@ -155,6 +160,8 @@ config ARCH_RPC config ARCH_SA1100 bool "SA1100-based" + select ISA + select DISCONTIGMEM config ARCH_S3C2410 bool "Samsung S3C2410" @@ -165,6 +172,9 @@ config ARCH_S3C2410 config ARCH_SHARK bool "Shark" + select ISA + select ISA_DMA + select PCI config ARCH_LH7A40X bool "Sharp LH7A40X" @@ -252,8 +262,6 @@ config ARM_AMBA config ISA bool - depends on FOOTBRIDGE_HOST || ARCH_SHARK || ARCH_CLPS7500 || ARCH_EBSA110 || ARCH_CDB89712 || ARCH_EDB7211 || ARCH_SA1100 || ARCH_MX1ADS - default y help Find out whether you have ISA slots on your motherboard. ISA is the name of a bus system, i.e. the way the CPU talks to the other stuff @@ -263,12 +271,13 @@ config ISA config ISA_DMA bool - depends on FOOTBRIDGE_HOST || ARCH_SHARK + +config ISA_DMA_API + bool default y config PCI bool "PCI support" if ARCH_INTEGRATOR_AP - default y if ARCH_SHARK || FOOTBRIDGE_HOST || ARCH_IOP3XX || ARCH_IXP4XX || ARCH_IXP2000 help Find out whether you have a PCI motherboard. PCI is the name of a bus system, i.e. the way the CPU talks to the other stuff inside @@ -296,7 +305,7 @@ menu "Kernel Features" config SMP bool "Symmetric Multi-Processing (EXPERIMENTAL)" - depends on EXPERIMENTAL && n + depends on EXPERIMENTAL #&& n help This enables support for systems with more than one CPU. If you have a system with only one CPU, like most personal computers, say N. If @@ -336,8 +345,7 @@ config PREEMPT config DISCONTIGMEM bool - depends on ARCH_EDB7211 || ARCH_SA1100 || (ARCH_LH7A40X && !LH7A40X_CONTIGMEM) - default y + default (ARCH_LH7A40X && !LH7A40X_CONTIGMEM) help Say Y to support efficient handling of discontiguous physical memory, for architectures which are either NUMA (Non-Uniform Memory Access) @@ -489,7 +497,7 @@ source "drivers/cpufreq/Kconfig" config CPU_FREQ_SA1100 bool - depends on CPU_FREQ && (SA1100_LART || SA1100_PLEB) + depends on CPU_FREQ && (SA1100_H3100 || SA1100_H3600 || SA1100_H3800 || SA1100_LART || SA1100_PLEB || SA1100_BADGE4 || SA1100_HACKKIT) default y config CPU_FREQ_SA1110 @@ -681,7 +689,9 @@ source "drivers/block/Kconfig" source "drivers/acorn/block/Kconfig" -if ARCH_CLPS7500 || ARCH_IOP3XX || ARCH_IXP4XX || ARCH_L7200 || ARCH_LH7A40X || ARCH_PXA || ARCH_RPC || ARCH_S3C2410 || ARCH_SA1100 || ARCH_SHARK || FOOTBRIDGE +if PCMCIA || ARCH_CLPS7500 || ARCH_IOP3XX || ARCH_IXP4XX \ + || ARCH_L7200 || ARCH_LH7A40X || ARCH_PXA || ARCH_RPC \ + || ARCH_S3C2410 || ARCH_SA1100 || ARCH_SHARK || FOOTBRIDGE source "drivers/ide/Kconfig" endif diff --git a/arch/arm/boot/compressed/head-xscale.S b/arch/arm/boot/compressed/head-xscale.S index 665bd2c..d3fe253 100644 --- a/arch/arm/boot/compressed/head-xscale.S +++ b/arch/arm/boot/compressed/head-xscale.S @@ -47,3 +47,10 @@ __XScale_start: orr r7, r7, #(MACH_TYPE_GTWX5715 & 0xff00) #endif +#ifdef CONFIG_ARCH_IXP2000 + mov r1, #-1 + mov r0, #0xd6000000 + str r1, [r0, #0x14] + str r1, [r0, #0x18] +#endif + diff --git a/arch/arm/configs/badge4_defconfig b/arch/arm/configs/badge4_defconfig index 2b4059d..5d92af9 100644 --- a/arch/arm/configs/badge4_defconfig +++ b/arch/arm/configs/badge4_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc1-bk2 -# Sat Mar 26 21:32:26 2005 +# Linux kernel version: 2.6.12-rc6-git3 +# Thu Jun 9 19:00:50 2005 # CONFIG_ARM=y CONFIG_MMU=y @@ -16,6 +16,7 @@ CONFIG_GENERIC_IOMAP=y CONFIG_EXPERIMENTAL=y CONFIG_CLEAN_COMPILE=y CONFIG_BROKEN_ON_SMP=y +CONFIG_INIT_ENV_ARG_LIMIT=32 # # General setup @@ -34,6 +35,8 @@ CONFIG_EMBEDDED=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y @@ -109,7 +112,6 @@ CONFIG_CPU_ABRT_EV4=y CONFIG_CPU_CACHE_V4WB=y CONFIG_CPU_CACHE_VIVT=y CONFIG_CPU_TLB_V4WB=y -CONFIG_CPU_MINICACHE=y # # Processor Features @@ -122,6 +124,7 @@ CONFIG_FORCE_MAX_ZONEORDER=9 # Bus support # CONFIG_ISA=y +CONFIG_ISA_DMA_API=y # # PCCARD (PCMCIA/CardBus) support @@ -131,6 +134,7 @@ CONFIG_ISA=y # # Kernel Features # +# CONFIG_SMP is not set # CONFIG_PREEMPT is not set CONFIG_DISCONTIGMEM=y # CONFIG_LEDS is not set @@ -152,12 +156,14 @@ CONFIG_CPU_FREQ_TABLE=y # CONFIG_CPU_FREQ_DEBUG is not set CONFIG_CPU_FREQ_STAT=y # CONFIG_CPU_FREQ_STAT_DETAILS is not set -CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y -# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y CONFIG_CPU_FREQ_GOV_PERFORMANCE=y # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set -# CONFIG_CPU_FREQ_GOV_USERSPACE is not set +CONFIG_CPU_FREQ_GOV_USERSPACE=y # CONFIG_CPU_FREQ_GOV_ONDEMAND is not set +# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set +CONFIG_CPU_FREQ_SA1100=y # # Floating point emulation @@ -294,7 +300,6 @@ CONFIG_PARPORT_NOT_PC=y # # Block devices # -# CONFIG_BLK_DEV_FD is not set # CONFIG_BLK_DEV_XD is not set # CONFIG_PARIDE is not set # CONFIG_BLK_DEV_COW_COMMON is not set @@ -428,7 +433,6 @@ CONFIG_NET=y # CONFIG_PACKET=y # CONFIG_PACKET_MMAP is not set -# CONFIG_NETLINK_DEV is not set CONFIG_UNIX=y # CONFIG_NET_KEY is not set CONFIG_INET=y @@ -526,6 +530,7 @@ CONFIG_IRDA_ULTRA=y # CONFIG_SMC_IRCC_FIR is not set # CONFIG_ALI_FIR is not set CONFIG_SA1100_FIR=y +# CONFIG_VIA_FIR is not set CONFIG_BT=m CONFIG_BT_L2CAP=m # CONFIG_BT_SCO is not set @@ -618,7 +623,6 @@ CONFIG_NET_WIRELESS=y # # CONFIG_SERIO is not set # CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y # # Character devices @@ -687,7 +691,6 @@ CONFIG_RTC=m # # TPM devices # -# CONFIG_TCG_TPM is not set # # I2C support @@ -736,6 +739,7 @@ CONFIG_I2C_ELEKTOR=m # CONFIG_SENSORS_LM85 is not set # CONFIG_SENSORS_LM87 is not set # CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_LM92 is not set # CONFIG_SENSORS_MAX1619 is not set # CONFIG_SENSORS_PC87360 is not set # CONFIG_SENSORS_SMSC47B397 is not set @@ -747,6 +751,7 @@ CONFIG_I2C_ELEKTOR=m # # Other I2C Chip support # +# CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_EEPROM is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCF8591 is not set @@ -871,7 +876,6 @@ CONFIG_USB_PRINTER=m # CONFIG_USB_STORAGE=y CONFIG_USB_STORAGE_DEBUG=y -# CONFIG_USB_STORAGE_RW_DETECT is not set # CONFIG_USB_STORAGE_DATAFAB is not set # CONFIG_USB_STORAGE_FREECOM is not set # CONFIG_USB_STORAGE_ISD200 is not set @@ -954,9 +958,11 @@ CONFIG_USB_USS720=m # CONFIG_USB_SERIAL=m CONFIG_USB_SERIAL_GENERIC=y +# CONFIG_USB_SERIAL_AIRPRIME is not set CONFIG_USB_SERIAL_BELKIN=m CONFIG_USB_SERIAL_WHITEHEAT=m CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m +# CONFIG_USB_SERIAL_CP2101 is not set # CONFIG_USB_SERIAL_CYPRESS_M8 is not set CONFIG_USB_SERIAL_EMPEG=m CONFIG_USB_SERIAL_FTDI_SIO=m @@ -985,6 +991,7 @@ CONFIG_USB_SERIAL_KEYSPAN=m # CONFIG_USB_SERIAL_KOBIL_SCT is not set CONFIG_USB_SERIAL_MCT_U232=m CONFIG_USB_SERIAL_PL2303=m +# CONFIG_USB_SERIAL_HP4X is not set # CONFIG_USB_SERIAL_SAFE is not set # CONFIG_USB_SERIAL_TI is not set CONFIG_USB_SERIAL_CYBERJACK=m diff --git a/arch/arm/configs/h3600_defconfig b/arch/arm/configs/h3600_defconfig index b4e297d..b9de07d 100644 --- a/arch/arm/configs/h3600_defconfig +++ b/arch/arm/configs/h3600_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc1-bk2 -# Mon Mar 28 00:02:26 2005 +# Linux kernel version: 2.6.12-rc4 +# Thu Jun 9 01:59:03 2005 # CONFIG_ARM=y CONFIG_MMU=y @@ -16,6 +16,7 @@ CONFIG_GENERIC_IOMAP=y CONFIG_EXPERIMENTAL=y CONFIG_CLEAN_COMPILE=y CONFIG_BROKEN_ON_SMP=y +CONFIG_INIT_ENV_ARG_LIMIT=32 # # General setup @@ -33,6 +34,8 @@ CONFIG_KOBJECT_UEVENT=y # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y @@ -120,6 +123,7 @@ CONFIG_CPU_MINICACHE=y # Bus support # CONFIG_ISA=y +CONFIG_ISA_DMA_API=y # # PCCARD (PCMCIA/CardBus) support @@ -138,6 +142,7 @@ CONFIG_PCMCIA_SA1100=y # # Kernel Features # +# CONFIG_SMP is not set # CONFIG_PREEMPT is not set CONFIG_DISCONTIGMEM=y # CONFIG_LEDS is not set @@ -159,12 +164,13 @@ CONFIG_CPU_FREQ_TABLE=y # CONFIG_CPU_FREQ_DEBUG is not set CONFIG_CPU_FREQ_STAT=y # CONFIG_CPU_FREQ_STAT_DETAILS is not set -CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y -# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set -CONFIG_CPU_FREQ_GOV_PERFORMANCE=y +# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y +# CONFIG_CPU_FREQ_GOV_PERFORMANCE is not set # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set -# CONFIG_CPU_FREQ_GOV_USERSPACE is not set +CONFIG_CPU_FREQ_GOV_USERSPACE=y # CONFIG_CPU_FREQ_GOV_ONDEMAND is not set +CONFIG_CPU_FREQ_SA1100=y # # Floating point emulation @@ -298,7 +304,6 @@ CONFIG_MTD_SA1100=y # # Block devices # -# CONFIG_BLK_DEV_FD is not set # CONFIG_BLK_DEV_XD is not set # CONFIG_BLK_DEV_COW_COMMON is not set CONFIG_BLK_DEV_LOOP=m @@ -379,7 +384,6 @@ CONFIG_NET=y # Networking options # # CONFIG_PACKET is not set -# CONFIG_NETLINK_DEV is not set CONFIG_UNIX=y # CONFIG_NET_KEY is not set CONFIG_INET=y @@ -476,6 +480,7 @@ CONFIG_IRCOMM=m # CONFIG_SMC_IRCC_FIR is not set # CONFIG_ALI_FIR is not set CONFIG_SA1100_FIR=m +# CONFIG_VIA_FIR is not set # CONFIG_BT is not set CONFIG_NETDEVICES=y # CONFIG_DUMMY is not set @@ -647,7 +652,6 @@ CONFIG_LEGACY_PTY_COUNT=256 # # TPM devices # -# CONFIG_TCG_TPM is not set # # I2C support @@ -676,9 +680,11 @@ CONFIG_FB_CFB_FILLRECT=y CONFIG_FB_CFB_COPYAREA=y CONFIG_FB_CFB_IMAGEBLIT=y CONFIG_FB_SOFT_CURSOR=y +# CONFIG_FB_MACMODES is not set # CONFIG_FB_MODE_HELPERS is not set # CONFIG_FB_TILEBLITTING is not set CONFIG_FB_SA1100=y +# CONFIG_FB_S1D13XXX is not set # CONFIG_FB_VIRTUAL is not set # diff --git a/arch/arm/configs/hackkit_defconfig b/arch/arm/configs/hackkit_defconfig index 6987c8c..fb41a36 100644 --- a/arch/arm/configs/hackkit_defconfig +++ b/arch/arm/configs/hackkit_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc1-bk2 -# Mon Mar 28 00:22:34 2005 +# Linux kernel version: 2.6.12-rc6-git3 +# Thu Jun 9 20:58:58 2005 # CONFIG_ARM=y CONFIG_MMU=y @@ -16,6 +16,7 @@ CONFIG_GENERIC_IOMAP=y CONFIG_EXPERIMENTAL=y CONFIG_CLEAN_COMPILE=y CONFIG_BROKEN_ON_SMP=y +CONFIG_INIT_ENV_ARG_LIMIT=32 # # General setup @@ -34,6 +35,8 @@ CONFIG_KOBJECT_UEVENT=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y @@ -109,7 +112,6 @@ CONFIG_CPU_ABRT_EV4=y CONFIG_CPU_CACHE_V4WB=y CONFIG_CPU_CACHE_VIVT=y CONFIG_CPU_TLB_V4WB=y -CONFIG_CPU_MINICACHE=y # # Processor Features @@ -119,6 +121,7 @@ CONFIG_CPU_MINICACHE=y # Bus support # CONFIG_ISA=y +CONFIG_ISA_DMA_API=y # # PCCARD (PCMCIA/CardBus) support @@ -128,6 +131,7 @@ CONFIG_ISA=y # # Kernel Features # +# CONFIG_SMP is not set # CONFIG_PREEMPT is not set CONFIG_DISCONTIGMEM=y CONFIG_LEDS=y @@ -151,12 +155,14 @@ CONFIG_CPU_FREQ_TABLE=y # CONFIG_CPU_FREQ_DEBUG is not set CONFIG_CPU_FREQ_STAT=y # CONFIG_CPU_FREQ_STAT_DETAILS is not set -CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y -# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y CONFIG_CPU_FREQ_GOV_PERFORMANCE=y # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set -# CONFIG_CPU_FREQ_GOV_USERSPACE is not set +CONFIG_CPU_FREQ_GOV_USERSPACE=y # CONFIG_CPU_FREQ_GOV_ONDEMAND is not set +# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set +CONFIG_CPU_FREQ_SA1100=y # # Floating point emulation @@ -280,7 +286,6 @@ CONFIG_MTD_CFI_UTIL=y # # Block devices # -# CONFIG_BLK_DEV_FD is not set # CONFIG_BLK_DEV_XD is not set # CONFIG_BLK_DEV_COW_COMMON is not set # CONFIG_BLK_DEV_LOOP is not set @@ -338,7 +343,6 @@ CONFIG_NET=y # CONFIG_PACKET=y # CONFIG_PACKET_MMAP is not set -# CONFIG_NETLINK_DEV is not set CONFIG_UNIX=y # CONFIG_NET_KEY is not set CONFIG_INET=y @@ -484,7 +488,6 @@ CONFIG_SERIO=y CONFIG_SERIO_SERPORT=y # CONFIG_SERIO_RAW is not set # CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y # # Character devices @@ -533,7 +536,6 @@ CONFIG_LEGACY_PTY_COUNT=256 # # TPM devices # -# CONFIG_TCG_TPM is not set # # I2C support diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 080df90..e14278d 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -17,8 +17,8 @@ #include <asm/glue.h> #include <asm/vfpmacros.h> -#include <asm/hardware.h> @ should be moved into entry-macro.S -#include <asm/arch/irqs.h> @ should be moved into entry-macro.S +#include <asm/hardware.h> /* should be moved into entry-macro.S */ +#include <asm/arch/irqs.h> /* should be moved into entry-macro.S */ #include <asm/arch/entry-macro.S> #include "entry-header.S" @@ -269,7 +269,7 @@ __pabt_svc: add r5, sp, #S_PC ldmia r7, {r2 - r4} @ Get USR pc, cpsr -#if __LINUX_ARM_ARCH__ < 6 +#if __LINUX_ARM_ARCH__ < 6 && !defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG) @ make sure our user space atomic helper is aborted cmp r2, #VIRT_OFFSET bichs r3, r3, #PSR_Z_BIT @@ -505,9 +505,9 @@ ENTRY(__switch_to) mra r4, r5, acc0 stmia ip, {r4, r5} #endif -#ifdef CONFIG_HAS_TLS_REG +#if defined(CONFIG_HAS_TLS_REG) mcr p15, 0, r3, c13, c0, 3 @ set TLS register -#else +#elif !defined(CONFIG_TLS_REG_EMUL) mov r4, #0xffff0fff str r3, [r4, #-15] @ TLS val at 0xffff0ff0 #endif @@ -616,11 +616,17 @@ __kuser_helper_start: __kuser_cmpxchg: @ 0xffff0fc0 -#if __LINUX_ARM_ARCH__ < 6 +#if defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG) -#ifdef CONFIG_SMP /* sanity check */ -#error "CONFIG_SMP on a machine supporting pre-ARMv6 processors?" -#endif + /* + * Poor you. No fast solution possible... + * The kernel itself must perform the operation. + * A special ghost syscall is used for that (see traps.c). + */ + swi #0x9ffff0 + mov pc, lr + +#elif __LINUX_ARM_ARCH__ < 6 /* * Theory of operation: @@ -690,11 +696,7 @@ __kuser_cmpxchg: @ 0xffff0fc0 __kuser_get_tls: @ 0xffff0fe0 -#ifndef CONFIG_HAS_TLS_REG - -#ifdef CONFIG_SMP /* sanity check */ -#error "CONFIG_SMP without CONFIG_HAS_TLS_REG is wrong" -#endif +#if !defined(CONFIG_HAS_TLS_REG) && !defined(CONFIG_TLS_REG_EMUL) ldr r0, [pc, #(16 - 8)] @ TLS stored at 0xffff0ff0 mov pc, lr diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 171b3e8..4733877 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -19,6 +19,7 @@ #include <asm/procinfo.h> #include <asm/ptrace.h> #include <asm/constants.h> +#include <asm/thread_info.h> #include <asm/system.h> #define PROCINFO_MMUFLAGS 8 @@ -131,7 +132,7 @@ __switch_data: .long processor_id @ r4 .long __machine_arch_type @ r5 .long cr_alignment @ r6 - .long init_thread_union+8192 @ sp + .long init_thread_union + THREAD_START_SP @ sp /* * The following fragment of code is executed with the MMU on, and uses diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 26eacd3..8f146a4 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -256,8 +256,6 @@ static unsigned long *thread_info_head; static unsigned int nr_thread_info; #define EXTRA_TASK_STRUCT 4 -#define ll_alloc_task_struct() ((struct thread_info *) __get_free_pages(GFP_KERNEL,1)) -#define ll_free_task_struct(p) free_pages((unsigned long)(p),1) struct thread_info *alloc_thread_info(struct task_struct *task) { @@ -274,17 +272,16 @@ struct thread_info *alloc_thread_info(struct task_struct *task) } if (!thread) - thread = ll_alloc_task_struct(); + thread = (struct thread_info *) + __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); -#ifdef CONFIG_MAGIC_SYSRQ +#ifdef CONFIG_DEBUG_STACK_USAGE /* * The stack must be cleared if you want SYSRQ-T to * give sensible stack usage information */ - if (thread) { - char *p = (char *)thread; - memzero(p+KERNEL_STACK_SIZE, KERNEL_STACK_SIZE); - } + if (thread) + memzero(thread, THREAD_SIZE); #endif return thread; } @@ -297,7 +294,7 @@ void free_thread_info(struct thread_info *thread) thread_info_head = p; nr_thread_info += 1; } else - ll_free_task_struct(thread); + free_pages((unsigned long)thread, THREAD_SIZE_ORDER); } /* @@ -350,7 +347,7 @@ copy_thread(int nr, unsigned long clone_flags, unsigned long stack_start, struct thread_info *thread = p->thread_info; struct pt_regs *childregs; - childregs = ((struct pt_regs *)((unsigned long)thread + THREAD_SIZE - 8)) - 1; + childregs = ((struct pt_regs *)((unsigned long)thread + THREAD_START_SP)) - 1; *childregs = *regs; childregs->ARM_r0 = 0; childregs->ARM_sp = stack_start; @@ -447,15 +444,17 @@ EXPORT_SYMBOL(kernel_thread); unsigned long get_wchan(struct task_struct *p) { unsigned long fp, lr; - unsigned long stack_page; + unsigned long stack_start, stack_end; int count = 0; if (!p || p == current || p->state == TASK_RUNNING) return 0; - stack_page = 4096 + (unsigned long)p->thread_info; + stack_start = (unsigned long)(p->thread_info + 1); + stack_end = ((unsigned long)p->thread_info) + THREAD_SIZE; + fp = thread_saved_fp(p); do { - if (fp < stack_page || fp > 4092+stack_page) + if (fp < stack_start || fp > stack_end) return 0; lr = pc_pointer (((unsigned long *)fp)[-1]); if (!in_sched_functions(lr)) diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c index ef32577..f897ce2 100644 --- a/arch/arm/kernel/sys_arm.c +++ b/arch/arm/kernel/sys_arm.c @@ -302,7 +302,7 @@ long execve(const char *filename, char **argv, char **envp) "b ret_to_user" : : "r" (current_thread_info()), - "Ir" (THREAD_SIZE - 8 - sizeof(regs)), + "Ir" (THREAD_START_SP - sizeof(regs)), "r" (®s), "Ir" (sizeof(regs)) : "r0", "r1", "r2", "r3", "ip", "memory"); diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 3a001fe..45d2a03 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -218,7 +218,8 @@ NORET_TYPE void die(const char *str, struct pt_regs *regs, int err) tsk->comm, tsk->pid, tsk->thread_info + 1); if (!user_mode(regs) || in_interrupt()) { - dump_mem("Stack: ", regs->ARM_sp, 8192+(unsigned long)tsk->thread_info); + dump_mem("Stack: ", regs->ARM_sp, + THREAD_SIZE + (unsigned long)tsk->thread_info); dump_backtrace(regs, tsk); dump_instr(regs); } @@ -450,9 +451,9 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs) case NR(set_tls): thread->tp_value = regs->ARM_r0; -#ifdef CONFIG_HAS_TLS_REG +#if defined(CONFIG_HAS_TLS_REG) asm ("mcr p15, 0, %0, c13, c0, 3" : : "r" (regs->ARM_r0) ); -#else +#elif !defined(CONFIG_TLS_REG_EMUL) /* * User space must never try to access this directly. * Expect your app to break eventually if you do so. @@ -463,6 +464,55 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs) #endif return 0; +#ifdef CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG + /* + * Atomically store r1 in *r2 if *r2 is equal to r0 for user space. + * Return zero in r0 if *MEM was changed or non-zero if no exchange + * happened. Also set the user C flag accordingly. + * If access permissions have to be fixed up then non-zero is + * returned and the operation has to be re-attempted. + * + * *NOTE*: This is a ghost syscall private to the kernel. Only the + * __kuser_cmpxchg code in entry-armv.S should be aware of its + * existence. Don't ever use this from user code. + */ + case 0xfff0: + { + extern void do_DataAbort(unsigned long addr, unsigned int fsr, + struct pt_regs *regs); + unsigned long val; + unsigned long addr = regs->ARM_r2; + struct mm_struct *mm = current->mm; + pgd_t *pgd; pmd_t *pmd; pte_t *pte; + + regs->ARM_cpsr &= ~PSR_C_BIT; + spin_lock(&mm->page_table_lock); + pgd = pgd_offset(mm, addr); + if (!pgd_present(*pgd)) + goto bad_access; + pmd = pmd_offset(pgd, addr); + if (!pmd_present(*pmd)) + goto bad_access; + pte = pte_offset_map(pmd, addr); + if (!pte_present(*pte) || !pte_write(*pte)) + goto bad_access; + val = *(unsigned long *)addr; + val -= regs->ARM_r0; + if (val == 0) { + *(unsigned long *)addr = regs->ARM_r1; + regs->ARM_cpsr |= PSR_C_BIT; + } + spin_unlock(&mm->page_table_lock); + return val; + + bad_access: + spin_unlock(&mm->page_table_lock); + /* simulate a read access fault */ + do_DataAbort(addr, 15 + (1 << 11), regs); + return -1; + } +#endif + default: /* Calls 9f00xx..9f07ff are defined to return -ENOSYS if not implemented, rather than raising SIGILL. This @@ -497,11 +547,14 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs) return 0; } -#if defined(CONFIG_CPU_32v6) && !defined(CONFIG_HAS_TLS_REG) +#ifdef CONFIG_TLS_REG_EMUL /* * We might be running on an ARMv6+ processor which should have the TLS - * register, but for some reason we can't use it and have to emulate it. + * register but for some reason we can't use it, or maybe an SMP system + * using a pre-ARMv6 processor (there are apparently a few prototypes like + * that in existence) and therefore access to that register must be + * emulated. */ static int get_tp_trap(struct pt_regs *regs, unsigned int instr) diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index a39c6a4..ad2d66c 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -5,6 +5,7 @@ #include <asm-generic/vmlinux.lds.h> #include <linux/config.h> +#include <asm/thread_info.h> OUTPUT_ARCH(arm) ENTRY(stext) @@ -103,7 +104,7 @@ SECTIONS __data_loc = ALIGN(4); /* location in binary */ . = DATAADDR; #else - . = ALIGN(8192); + . = ALIGN(THREAD_SIZE); __data_loc = .; #endif diff --git a/arch/arm/lib/io-writesw-armv4.S b/arch/arm/lib/io-writesw-armv4.S index 6d1d7c2..5e240e4 100644 --- a/arch/arm/lib/io-writesw-armv4.S +++ b/arch/arm/lib/io-writesw-armv4.S @@ -87,9 +87,9 @@ ENTRY(__raw_writesw) subs r2, r2, #2 orr ip, ip, r3, push_hbyte1 strh ip, [r0] - bpl 2b + bpl 1b -3: tst r2, #1 -2: movne ip, r3, lsr #8 + tst r2, #1 +3: movne ip, r3, lsr #8 strneh ip, [r0] mov pc, lr diff --git a/arch/arm/mach-clps711x/Kconfig b/arch/arm/mach-clps711x/Kconfig index f6e6763..45c930c 100644 --- a/arch/arm/mach-clps711x/Kconfig +++ b/arch/arm/mach-clps711x/Kconfig @@ -10,6 +10,7 @@ config ARCH_AUTCPU12 config ARCH_CDB89712 bool "CDB89712" + select ISA help This is an evaluation board from Cirrus for the CS89712 processor. The board includes 2 serial ports, Ethernet, IRDA, and expansion @@ -26,6 +27,8 @@ config ARCH_CLEP7312 config ARCH_EDB7211 bool "EDB7211" + select ISA + select DISCONTIGMEM help Say Y here if you intend to run this kernel on a Cirrus Logic EDB-7211 evaluation board. diff --git a/arch/arm/mach-footbridge/Kconfig b/arch/arm/mach-footbridge/Kconfig index 1090c68..324d9ed 100644 --- a/arch/arm/mach-footbridge/Kconfig +++ b/arch/arm/mach-footbridge/Kconfig @@ -5,6 +5,9 @@ menu "Footbridge Implementations" config ARCH_CATS bool "CATS" select FOOTBRIDGE_HOST + select ISA + select ISA_DMA + select PCI help Say Y here if you intend to run this kernel on the CATS. @@ -13,6 +16,9 @@ config ARCH_CATS config ARCH_PERSONAL_SERVER bool "Compaq Personal Server" select FOOTBRIDGE_HOST + select ISA + select ISA_DMA + select PCI ---help--- Say Y here if you intend to run this kernel on the Compaq Personal Server. @@ -42,6 +48,9 @@ config ARCH_EBSA285_HOST bool "EBSA285 (host mode)" select ARCH_EBSA285 select FOOTBRIDGE_HOST + select ISA + select ISA_DMA + select PCI help Say Y here if you intend to run this kernel on the EBSA285 card in host ("central function") mode. @@ -51,6 +60,9 @@ config ARCH_EBSA285_HOST config ARCH_NETWINDER bool "NetWinder" select FOOTBRIDGE_HOST + select ISA + select ISA_DMA + select PCI help Say Y here if you intend to run this kernel on the Rebel.COM NetWinder. Information about this machine can be found at: diff --git a/arch/arm/mach-imx/Kconfig b/arch/arm/mach-imx/Kconfig index ec85813..cddd194 100644 --- a/arch/arm/mach-imx/Kconfig +++ b/arch/arm/mach-imx/Kconfig @@ -4,6 +4,7 @@ menu "IMX Implementations" config ARCH_MX1ADS bool "mx1ads" depends on ARCH_IMX + select ISA help Say Y here if you are using the Motorola MX1ADS board diff --git a/arch/arm/mach-ixp4xx/common-pci.c b/arch/arm/mach-ixp4xx/common-pci.c index 94bcdb9..aa92e37 100644 --- a/arch/arm/mach-ixp4xx/common-pci.c +++ b/arch/arm/mach-ixp4xx/common-pci.c @@ -502,15 +502,6 @@ pci_set_dma_mask(struct pci_dev *dev, u64 mask) } int -pci_dac_set_dma_mask(struct pci_dev *dev, u64 mask) -{ - if (mask >= SZ_64M - 1 ) - return 0; - - return -EIO; -} - -int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask) { if (mask >= SZ_64M - 1 ) @@ -520,7 +511,6 @@ pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask) } EXPORT_SYMBOL(pci_set_dma_mask); -EXPORT_SYMBOL(pci_dac_set_dma_mask); EXPORT_SYMBOL(pci_set_consistent_dma_mask); EXPORT_SYMBOL(ixp4xx_pci_read); EXPORT_SYMBOL(ixp4xx_pci_write); diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c index 3f95223..6823ae2 100644 --- a/arch/arm/mach-pxa/mainstone.c +++ b/arch/arm/mach-pxa/mainstone.c @@ -304,6 +304,15 @@ static void __init mainstone_map_io(void) PWER = 0xC0000002; PRER = 0x00000002; PFER = 0x00000002; + /* for use I SRAM as framebuffer. */ + PSLR |= 0xF04; + PCFR = 0x66; + /* For Keypad wakeup. */ + KPC &=~KPC_ASACT; + KPC |=KPC_AS; + PKWR = 0x000FD000; + /* Need read PKWR back after set it. */ + PKWR; } MACHINE_START(MAINSTONE, "Intel HCDDBBVA0 Development Platform (aka Mainstone)") diff --git a/arch/arm/mach-pxa/pm.c b/arch/arm/mach-pxa/pm.c index 82a4bf3..9799fe8 100644 --- a/arch/arm/mach-pxa/pm.c +++ b/arch/arm/mach-pxa/pm.c @@ -29,9 +29,6 @@ */ #undef DEBUG -extern void pxa_cpu_suspend(void); -extern void pxa_cpu_resume(void); - #define SAVE(x) sleep_save[SLEEP_SAVE_##x] = x #define RESTORE(x) x = sleep_save[SLEEP_SAVE_##x] @@ -63,6 +60,12 @@ enum { SLEEP_SAVE_START = 0, SLEEP_SAVE_ICMR, SLEEP_SAVE_CKEN, +#ifdef CONFIG_PXA27x + SLEEP_SAVE_MDREFR, + SLEEP_SAVE_PWER, SLEEP_SAVE_PCFR, SLEEP_SAVE_PRER, + SLEEP_SAVE_PFER, SLEEP_SAVE_PKWR, +#endif + SLEEP_SAVE_CKSUM, SLEEP_SAVE_SIZE @@ -75,9 +78,7 @@ static int pxa_pm_enter(suspend_state_t state) unsigned long checksum = 0; struct timespec delta, rtc; int i; - - if (state != PM_SUSPEND_MEM) - return -EINVAL; + extern void pxa_cpu_pm_enter(suspend_state_t state); #ifdef CONFIG_IWMMXT /* force any iWMMXt context to ram **/ @@ -100,16 +101,17 @@ static int pxa_pm_enter(suspend_state_t state) SAVE(GAFR2_L); SAVE(GAFR2_U); #ifdef CONFIG_PXA27x + SAVE(MDREFR); SAVE(GPLR3); SAVE(GPDR3); SAVE(GRER3); SAVE(GFER3); SAVE(PGSR3); SAVE(GAFR3_L); SAVE(GAFR3_U); + SAVE(PWER); SAVE(PCFR); SAVE(PRER); + SAVE(PFER); SAVE(PKWR); #endif SAVE(ICMR); ICMR = 0; SAVE(CKEN); - CKEN = 0; - SAVE(PSTR); /* Note: wake up source are set up in each machine specific files */ @@ -123,16 +125,13 @@ static int pxa_pm_enter(suspend_state_t state) /* Clear sleep reset status */ RCSR = RCSR_SMR; - /* set resume return address */ - PSPR = virt_to_phys(pxa_cpu_resume); - /* before sleeping, calculate and save a checksum */ for (i = 0; i < SLEEP_SAVE_SIZE - 1; i++) checksum += sleep_save[i]; sleep_save[SLEEP_SAVE_CKSUM] = checksum; /* *** go zzz *** */ - pxa_cpu_suspend(); + pxa_cpu_pm_enter(state); /* after sleeping, validate the checksum */ checksum = 0; @@ -145,7 +144,7 @@ static int pxa_pm_enter(suspend_state_t state) LUB_HEXLED = 0xbadbadc5; #endif while (1) - pxa_cpu_suspend(); + pxa_cpu_pm_enter(state); } /* ensure not to come back here if it wasn't intended */ @@ -162,8 +161,11 @@ static int pxa_pm_enter(suspend_state_t state) RESTORE(PGSR0); RESTORE(PGSR1); RESTORE(PGSR2); #ifdef CONFIG_PXA27x + RESTORE(MDREFR); RESTORE(GAFR3_L); RESTORE(GAFR3_U); RESTORE_GPLEVEL(3); RESTORE(GPDR3); RESTORE(GRER3); RESTORE(GFER3); RESTORE(PGSR3); + RESTORE(PWER); RESTORE(PCFR); RESTORE(PRER); + RESTORE(PFER); RESTORE(PKWR); #endif PSSR = PSSR_RDH | PSSR_PH; @@ -197,7 +199,9 @@ unsigned long sleep_phys_sp(void *sp) */ static int pxa_pm_prepare(suspend_state_t state) { - return 0; + extern int pxa_cpu_pm_prepare(suspend_state_t state); + + return pxa_cpu_pm_prepare(state); } /* diff --git a/arch/arm/mach-pxa/pxa25x.c b/arch/arm/mach-pxa/pxa25x.c index e887b71..7869c3b 100644 --- a/arch/arm/mach-pxa/pxa25x.c +++ b/arch/arm/mach-pxa/pxa25x.c @@ -16,6 +16,7 @@ * initialization stuff for PXA machines which can be overridden later if * need be. */ +#include <linux/config.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> @@ -102,3 +103,35 @@ unsigned int get_lcdclk_frequency_10khz(void) } EXPORT_SYMBOL(get_lcdclk_frequency_10khz); + +#ifdef CONFIG_PM + +int pxa_cpu_pm_prepare(suspend_state_t state) +{ + switch (state) { + case PM_SUSPEND_MEM: + break; + default: + return -EINVAL; + } + + return 0; +} + +void pxa_cpu_pm_enter(suspend_state_t state) +{ + extern void pxa_cpu_suspend(unsigned int); + extern void pxa_cpu_resume(void); + + CKEN = 0; + + switch (state) { + case PM_SUSPEND_MEM: + /* set resume return address */ + PSPR = virt_to_phys(pxa_cpu_resume); + pxa_cpu_suspend(3); + break; + } +} + +#endif diff --git a/arch/arm/mach-pxa/pxa27x.c b/arch/arm/mach-pxa/pxa27x.c index 7e863af..893964f 100644 --- a/arch/arm/mach-pxa/pxa27x.c +++ b/arch/arm/mach-pxa/pxa27x.c @@ -120,6 +120,42 @@ EXPORT_SYMBOL(get_clk_frequency_khz); EXPORT_SYMBOL(get_memclk_frequency_10khz); EXPORT_SYMBOL(get_lcdclk_frequency_10khz); +#ifdef CONFIG_PM + +int pxa_cpu_pm_prepare(suspend_state_t state) +{ + switch (state) { + case PM_SUSPEND_MEM: + return 0; + default: + return -EINVAL; + } +} + +void pxa_cpu_pm_enter(suspend_state_t state) +{ + extern void pxa_cpu_standby(void); + extern void pxa_cpu_suspend(unsigned int); + extern void pxa_cpu_resume(void); + + CKEN = CKEN22_MEMC | CKEN9_OSTIMER; + + /* ensure voltage-change sequencer not initiated, which hangs */ + PCFR &= ~PCFR_FVC; + + /* Clear edge-detect status register. */ + PEDR = 0xDF12FE1B; + + switch (state) { + case PM_SUSPEND_MEM: + /* set resume return address */ + PSPR = virt_to_phys(pxa_cpu_resume); + pxa_cpu_suspend(3); + break; + } +} + +#endif /* * device registration specific to PXA27x. diff --git a/arch/arm/mach-s3c2410/clock.c b/arch/arm/mach-s3c2410/clock.c index e23f534..8d986b8 100644 --- a/arch/arm/mach-s3c2410/clock.c +++ b/arch/arm/mach-s3c2410/clock.c @@ -478,7 +478,7 @@ static int s3c2440_clk_add(struct sys_device *sysdev) { unsigned long upllcon = __raw_readl(S3C2410_UPLLCON); - s3c2440_clk_upll.rate = s3c2410_get_pll(upllcon, clk_xtal.rate) * 2; + s3c2440_clk_upll.rate = s3c2410_get_pll(upllcon, clk_xtal.rate); printk("S3C2440: Clock Support, UPLL %ld.%03ld MHz\n", print_mhz(s3c2440_clk_upll.rate)); diff --git a/arch/arm/mach-s3c2410/dma.c b/arch/arm/mach-s3c2410/dma.c index bc229fa..c7c2889 100644 --- a/arch/arm/mach-s3c2410/dma.c +++ b/arch/arm/mach-s3c2410/dma.c @@ -785,6 +785,10 @@ int s3c2410_dma_free(dmach_t channel, s3c2410_dma_client_t *client) chan->client = NULL; chan->in_use = 0; + if (chan->irq_claimed) + free_irq(chan->irq, (void *)chan); + chan->irq_claimed = 0; + local_irq_restore(flags); return 0; diff --git a/arch/arm/mach-s3c2410/s3c2440.c b/arch/arm/mach-s3c2410/s3c2440.c index 9a8cc5a..d4c8281 100644 --- a/arch/arm/mach-s3c2410/s3c2440.c +++ b/arch/arm/mach-s3c2410/s3c2440.c @@ -192,9 +192,11 @@ void __init s3c2440_map_io(struct map_desc *mach_desc, int size) iotable_init(s3c2440_iodesc, ARRAY_SIZE(s3c2440_iodesc)); iotable_init(mach_desc, size); + /* rename any peripherals used differing from the s3c2410 */ - s3c_device_i2c.name = "s3c2440-i2c"; + s3c_device_i2c.name = "s3c2440-i2c"; + s3c_device_nand.name = "s3c2440-nand"; /* change irq for watchdog */ @@ -225,7 +227,7 @@ void __init s3c2440_init_clocks(int xtal) break; case S3C2440_CLKDIVN_HDIVN_2: - hdiv = 1; + hdiv = 2; break; case S3C2440_CLKDIVN_HDIVN_4_8: diff --git a/arch/arm/mach-sa1100/Kconfig b/arch/arm/mach-sa1100/Kconfig index 50cde57..6923316 100644 --- a/arch/arm/mach-sa1100/Kconfig +++ b/arch/arm/mach-sa1100/Kconfig @@ -150,7 +150,7 @@ config SA1100_SSP config H3600_SLEEVE tristate "Compaq iPAQ Handheld sleeve support" - depends on SA1100_H3600 + depends on SA1100_H3100 || SA1100_H3600 help Choose this option to enable support for extension packs (sleeves) for the Compaq iPAQ H3XXX series of handheld computers. This option diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 27892e3..3fefb43 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -228,7 +228,6 @@ config CPU_SA1100 select CPU_CACHE_V4WB select CPU_CACHE_VIVT select CPU_TLB_V4WB - select CPU_MINICACHE # XScale config CPU_XSCALE @@ -239,7 +238,6 @@ config CPU_XSCALE select CPU_ABRT_EV5T select CPU_CACHE_VIVT select CPU_TLB_V4WBI - select CPU_MINICACHE # ARMv6 config CPU_V6 @@ -345,11 +343,6 @@ config CPU_TLB_V4WBI config CPU_TLB_V6 bool -config CPU_MINICACHE - bool - help - Processor has a minicache. - comment "Processor Features" config ARM_THUMB @@ -410,17 +403,30 @@ config CPU_BPREDICT_DISABLE help Say Y here to disable branch prediction. If unsure, say N. +config TLS_REG_EMUL + bool + default y if SMP && (CPU_32v5 || CPU_32v4 || CPU_32v3) + help + An SMP system using a pre-ARMv6 processor (there are apparently + a few prototypes like that in existence) and therefore access to + that required register must be emulated. + config HAS_TLS_REG bool - depends on CPU_32v6 && !CPU_32v5 && !CPU_32v4 && !CPU_32v3 - default y + depends on !TLS_REG_EMUL + default y if SMP || CPU_32v7 help This selects support for the CP15 thread register. - It is defined to be available on ARMv6 or later. However - if the kernel is configured to support multiple CPUs including - a pre-ARMv6 processors, or if a given ARMv6 processor doesn't - implement the thread register for some reason, then access to - this register from user space must be trapped and emulated. - If user space is relying on the __kuser_get_tls code then - there should not be any impact. + It is defined to be available on some ARMv6 processors (including + all SMP capable ARMv6's) or later processors. User space may + assume directly accessing that register and always obtain the + expected value only on ARMv7 and above. + +config NEEDS_SYSCALL_FOR_CMPXCHG + bool + default y if SMP && (CPU_32v5 || CPU_32v4 || CPU_32v3) + help + SMP on a pre-ARMv6 processor? Well OK then. + Forget about fast user space cmpxchg support. + It is just not possible. diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index ccf316c..59f47d4 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -31,8 +31,6 @@ obj-$(CONFIG_CPU_COPY_V6) += copypage-v6.o mmu.o obj-$(CONFIG_CPU_SA1100) += copypage-v4mc.o obj-$(CONFIG_CPU_XSCALE) += copypage-xscale.o -obj-$(CONFIG_CPU_MINICACHE) += minicache.o - obj-$(CONFIG_CPU_TLB_V3) += tlb-v3.o obj-$(CONFIG_CPU_TLB_V4WT) += tlb-v4.o obj-$(CONFIG_CPU_TLB_V4WB) += tlb-v4wb.o diff --git a/arch/arm/mm/copypage-v4mc.S b/arch/arm/mm/copypage-v4mc.S deleted file mode 100644 index 305af3d..0000000 --- a/arch/arm/mm/copypage-v4mc.S +++ /dev/null @@ -1,80 +0,0 @@ -/* - * linux/arch/arm/lib/copy_page-armv4mc.S - * - * Copyright (C) 1995-2001 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * ASM optimised string functions - */ -#include <linux/linkage.h> -#include <linux/init.h> -#include <asm/constants.h> - - .text - .align 5 -/* - * ARMv4 mini-dcache optimised copy_user_page - * - * We flush the destination cache lines just before we write the data into the - * corresponding address. Since the Dcache is read-allocate, this removes the - * Dcache aliasing issue. The writes will be forwarded to the write buffer, - * and merged as appropriate. - * - * Note: We rely on all ARMv4 processors implementing the "invalidate D line" - * instruction. If your processor does not supply this, you have to write your - * own copy_user_page that does the right thing. - */ -ENTRY(v4_mc_copy_user_page) - stmfd sp!, {r4, lr} @ 2 - mov r4, r0 - mov r0, r1 - bl map_page_minicache - mov r1, #PAGE_SZ/64 @ 1 - ldmia r0!, {r2, r3, ip, lr} @ 4 -1: mcr p15, 0, r4, c7, c6, 1 @ 1 invalidate D line - stmia r4!, {r2, r3, ip, lr} @ 4 - ldmia r0!, {r2, r3, ip, lr} @ 4+1 - stmia r4!, {r2, r3, ip, lr} @ 4 - ldmia r0!, {r2, r3, ip, lr} @ 4 - mcr p15, 0, r4, c7, c6, 1 @ 1 invalidate D line - stmia r4!, {r2, r3, ip, lr} @ 4 - ldmia r0!, {r2, r3, ip, lr} @ 4 - subs r1, r1, #1 @ 1 - stmia r4!, {r2, r3, ip, lr} @ 4 - ldmneia r0!, {r2, r3, ip, lr} @ 4 - bne 1b @ 1 - ldmfd sp!, {r4, pc} @ 3 - - .align 5 -/* - * ARMv4 optimised clear_user_page - * - * Same story as above. - */ -ENTRY(v4_mc_clear_user_page) - str lr, [sp, #-4]! - mov r1, #PAGE_SZ/64 @ 1 - mov r2, #0 @ 1 - mov r3, #0 @ 1 - mov ip, #0 @ 1 - mov lr, #0 @ 1 -1: mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line - stmia r0!, {r2, r3, ip, lr} @ 4 - stmia r0!, {r2, r3, ip, lr} @ 4 - mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line - stmia r0!, {r2, r3, ip, lr} @ 4 - stmia r0!, {r2, r3, ip, lr} @ 4 - subs r1, r1, #1 @ 1 - bne 1b @ 1 - ldr pc, [sp], #4 - - __INITDATA - - .type v4_mc_user_fns, #object -ENTRY(v4_mc_user_fns) - .long v4_mc_clear_user_page - .long v4_mc_copy_user_page - .size v4_mc_user_fns, . - v4_mc_user_fns diff --git a/arch/arm/mm/copypage-v4mc.c b/arch/arm/mm/copypage-v4mc.c new file mode 100644 index 0000000..fc69dcc --- /dev/null +++ b/arch/arm/mm/copypage-v4mc.c @@ -0,0 +1,111 @@ +/* + * linux/arch/arm/lib/copypage-armv4mc.S + * + * Copyright (C) 1995-2005 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This handles the mini data cache, as found on SA11x0 and XScale + * processors. When we copy a user page page, we map it in such a way + * that accesses to this page will not touch the main data cache, but + * will be cached in the mini data cache. This prevents us thrashing + * the main data cache on page faults. + */ +#include <linux/init.h> +#include <linux/mm.h> + +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> + +/* + * 0xffff8000 to 0xffffffff is reserved for any ARM architecture + * specific hacks for copying pages efficiently. + */ +#define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \ + L_PTE_CACHEABLE) + +#define TOP_PTE(x) pte_offset_kernel(top_pmd, x) + +static DEFINE_SPINLOCK(minicache_lock); + +/* + * ARMv4 mini-dcache optimised copy_user_page + * + * We flush the destination cache lines just before we write the data into the + * corresponding address. Since the Dcache is read-allocate, this removes the + * Dcache aliasing issue. The writes will be forwarded to the write buffer, + * and merged as appropriate. + * + * Note: We rely on all ARMv4 processors implementing the "invalidate D line" + * instruction. If your processor does not supply this, you have to write your + * own copy_user_page that does the right thing. + */ +static void __attribute__((naked)) +mc_copy_user_page(void *from, void *to) +{ + asm volatile( + "stmfd sp!, {r4, lr} @ 2\n\ + mov r4, %2 @ 1\n\ + ldmia %0!, {r2, r3, ip, lr} @ 4\n\ +1: mcr p15, 0, %1, c7, c6, 1 @ 1 invalidate D line\n\ + stmia %1!, {r2, r3, ip, lr} @ 4\n\ + ldmia %0!, {r2, r3, ip, lr} @ 4+1\n\ + stmia %1!, {r2, r3, ip, lr} @ 4\n\ + ldmia %0!, {r2, r3, ip, lr} @ 4\n\ + mcr p15, 0, %1, c7, c6, 1 @ 1 invalidate D line\n\ + stmia %1!, {r2, r3, ip, lr} @ 4\n\ + ldmia %0!, {r2, r3, ip, lr} @ 4\n\ + subs r4, r4, #1 @ 1\n\ + stmia %1!, {r2, r3, ip, lr} @ 4\n\ + ldmneia %0!, {r2, r3, ip, lr} @ 4\n\ + bne 1b @ 1\n\ + ldmfd sp!, {r4, pc} @ 3" + : + : "r" (from), "r" (to), "I" (PAGE_SIZE / 64)); +} + +void v4_mc_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr) +{ + spin_lock(&minicache_lock); + + set_pte(TOP_PTE(0xffff8000), pfn_pte(__pa(kfrom) >> PAGE_SHIFT, minicache_pgprot)); + flush_tlb_kernel_page(0xffff8000); + + mc_copy_user_page((void *)0xffff8000, kto); + + spin_unlock(&minicache_lock); +} + +/* + * ARMv4 optimised clear_user_page + */ +void __attribute__((naked)) +v4_mc_clear_user_page(void *kaddr, unsigned long vaddr) +{ + asm volatile( + "str lr, [sp, #-4]!\n\ + mov r1, %0 @ 1\n\ + mov r2, #0 @ 1\n\ + mov r3, #0 @ 1\n\ + mov ip, #0 @ 1\n\ + mov lr, #0 @ 1\n\ +1: mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line\n\ + stmia r0!, {r2, r3, ip, lr} @ 4\n\ + stmia r0!, {r2, r3, ip, lr} @ 4\n\ + mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line\n\ + stmia r0!, {r2, r3, ip, lr} @ 4\n\ + stmia r0!, {r2, r3, ip, lr} @ 4\n\ + subs r1, r1, #1 @ 1\n\ + bne 1b @ 1\n\ + ldr pc, [sp], #4" + : + : "I" (PAGE_SIZE / 64)); +} + +struct cpu_user_fns v4_mc_user_fns __initdata = { + .cpu_clear_user_page = v4_mc_clear_user_page, + .cpu_copy_user_page = v4_mc_copy_user_page, +}; diff --git a/arch/arm/mm/copypage-v6.c b/arch/arm/mm/copypage-v6.c index 694ac82..a8c0023 100644 --- a/arch/arm/mm/copypage-v6.c +++ b/arch/arm/mm/copypage-v6.c @@ -26,8 +26,8 @@ #define to_address (0xffffc000) #define to_pgprot PAGE_KERNEL -static pte_t *from_pte; -static pte_t *to_pte; +#define TOP_PTE(x) pte_offset_kernel(top_pmd, x) + static DEFINE_SPINLOCK(v6_lock); #define DCACHE_COLOUR(vaddr) ((vaddr & (SHMLBA - 1)) >> PAGE_SHIFT) @@ -74,8 +74,8 @@ void v6_copy_user_page_aliasing(void *kto, const void *kfrom, unsigned long vadd */ spin_lock(&v6_lock); - set_pte(from_pte + offset, pfn_pte(__pa(kfrom) >> PAGE_SHIFT, from_pgprot)); - set_pte(to_pte + offset, pfn_pte(__pa(kto) >> PAGE_SHIFT, to_pgprot)); + set_pte(TOP_PTE(from_address) + offset, pfn_pte(__pa(kfrom) >> PAGE_SHIFT, from_pgprot)); + set_pte(TOP_PTE(to_address) + offset, pfn_pte(__pa(kto) >> PAGE_SHIFT, to_pgprot)); from = from_address + (offset << PAGE_SHIFT); to = to_address + (offset << PAGE_SHIFT); @@ -114,7 +114,7 @@ void v6_clear_user_page_aliasing(void *kaddr, unsigned long vaddr) */ spin_lock(&v6_lock); - set_pte(to_pte + offset, pfn_pte(__pa(kaddr) >> PAGE_SHIFT, to_pgprot)); + set_pte(TOP_PTE(to_address) + offset, pfn_pte(__pa(kaddr) >> PAGE_SHIFT, to_pgprot)); flush_tlb_kernel_page(to); clear_page((void *)to); @@ -129,21 +129,6 @@ struct cpu_user_fns v6_user_fns __initdata = { static int __init v6_userpage_init(void) { if (cache_is_vipt_aliasing()) { - pgd_t *pgd; - pmd_t *pmd; - - pgd = pgd_offset_k(from_address); - pmd = pmd_alloc(&init_mm, pgd, from_address); - if (!pmd) - BUG(); - from_pte = pte_alloc_kernel(&init_mm, pmd, from_address); - if (!from_pte) - BUG(); - - to_pte = pte_alloc_kernel(&init_mm, pmd, to_address); - if (!to_pte) - BUG(); - cpu_user.cpu_clear_user_page = v6_clear_user_page_aliasing; cpu_user.cpu_copy_user_page = v6_copy_user_page_aliasing; } @@ -151,5 +136,4 @@ static int __init v6_userpage_init(void) return 0; } -__initcall(v6_userpage_init); - +core_initcall(v6_userpage_init); diff --git a/arch/arm/mm/copypage-xscale.S b/arch/arm/mm/copypage-xscale.S deleted file mode 100644 index bb27731..0000000 --- a/arch/arm/mm/copypage-xscale.S +++ /dev/null @@ -1,113 +0,0 @@ -/* - * linux/arch/arm/lib/copypage-xscale.S - * - * Copyright (C) 2001 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/linkage.h> -#include <linux/init.h> -#include <asm/constants.h> - -/* - * General note: - * We don't really want write-allocate cache behaviour for these functions - * since that will just eat through 8K of the cache. - */ - - .text - .align 5 -/* - * XScale optimised copy_user_page - * r0 = destination - * r1 = source - * r2 = virtual user address of ultimate destination page - * - * The source page may have some clean entries in the cache already, but we - * can safely ignore them - break_cow() will flush them out of the cache - * if we eventually end up using our copied page. - * - * What we could do is use the mini-cache to buffer reads from the source - * page. We rely on the mini-cache being smaller than one page, so we'll - * cycle through the complete cache anyway. - */ -ENTRY(xscale_mc_copy_user_page) - stmfd sp!, {r4, r5, lr} - mov r5, r0 - mov r0, r1 - bl map_page_minicache - mov r1, r5 - mov lr, #PAGE_SZ/64-1 - - /* - * Strangely enough, best performance is achieved - * when prefetching destination as well. (NP) - */ - pld [r0, #0] - pld [r0, #32] - pld [r1, #0] - pld [r1, #32] - -1: pld [r0, #64] - pld [r0, #96] - pld [r1, #64] - pld [r1, #96] - -2: ldrd r2, [r0], #8 - ldrd r4, [r0], #8 - mov ip, r1 - strd r2, [r1], #8 - ldrd r2, [r0], #8 - strd r4, [r1], #8 - ldrd r4, [r0], #8 - strd r2, [r1], #8 - strd r4, [r1], #8 - mcr p15, 0, ip, c7, c10, 1 @ clean D line - ldrd r2, [r0], #8 - mcr p15, 0, ip, c7, c6, 1 @ invalidate D line - ldrd r4, [r0], #8 - mov ip, r1 - strd r2, [r1], #8 - ldrd r2, [r0], #8 - strd r4, [r1], #8 - ldrd r4, [r0], #8 - strd r2, [r1], #8 - strd r4, [r1], #8 - mcr p15, 0, ip, c7, c10, 1 @ clean D line - subs lr, lr, #1 - mcr p15, 0, ip, c7, c6, 1 @ invalidate D line - bgt 1b - beq 2b - - ldmfd sp!, {r4, r5, pc} - - .align 5 -/* - * XScale optimised clear_user_page - * r0 = destination - * r1 = virtual user address of ultimate destination page - */ -ENTRY(xscale_mc_clear_user_page) - mov r1, #PAGE_SZ/32 - mov r2, #0 - mov r3, #0 -1: mov ip, r0 - strd r2, [r0], #8 - strd r2, [r0], #8 - strd r2, [r0], #8 - strd r2, [r0], #8 - mcr p15, 0, ip, c7, c10, 1 @ clean D line - subs r1, r1, #1 - mcr p15, 0, ip, c7, c6, 1 @ invalidate D line - bne 1b - mov pc, lr - - __INITDATA - - .type xscale_mc_user_fns, #object -ENTRY(xscale_mc_user_fns) - .long xscale_mc_clear_user_page - .long xscale_mc_copy_user_page - .size xscale_mc_user_fns, . - xscale_mc_user_fns diff --git a/arch/arm/mm/copypage-xscale.c b/arch/arm/mm/copypage-xscale.c new file mode 100644 index 0000000..42a6ee2 --- /dev/null +++ b/arch/arm/mm/copypage-xscale.c @@ -0,0 +1,131 @@ +/* + * linux/arch/arm/lib/copypage-xscale.S + * + * Copyright (C) 1995-2005 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This handles the mini data cache, as found on SA11x0 and XScale + * processors. When we copy a user page page, we map it in such a way + * that accesses to this page will not touch the main data cache, but + * will be cached in the mini data cache. This prevents us thrashing + * the main data cache on page faults. + */ +#include <linux/init.h> +#include <linux/mm.h> + +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> + +/* + * 0xffff8000 to 0xffffffff is reserved for any ARM architecture + * specific hacks for copying pages efficiently. + */ +#define COPYPAGE_MINICACHE 0xffff8000 + +#define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \ + L_PTE_CACHEABLE) + +#define TOP_PTE(x) pte_offset_kernel(top_pmd, x) + +static DEFINE_SPINLOCK(minicache_lock); + +/* + * XScale mini-dcache optimised copy_user_page + * + * We flush the destination cache lines just before we write the data into the + * corresponding address. Since the Dcache is read-allocate, this removes the + * Dcache aliasing issue. The writes will be forwarded to the write buffer, + * and merged as appropriate. + */ +static void __attribute__((naked)) +mc_copy_user_page(void *from, void *to) +{ + /* + * Strangely enough, best performance is achieved + * when prefetching destination as well. (NP) + */ + asm volatile( + "stmfd sp!, {r4, r5, lr} \n\ + mov lr, %2 \n\ + pld [r0, #0] \n\ + pld [r0, #32] \n\ + pld [r1, #0] \n\ + pld [r1, #32] \n\ +1: pld [r0, #64] \n\ + pld [r0, #96] \n\ + pld [r1, #64] \n\ + pld [r1, #96] \n\ +2: ldrd r2, [r0], #8 \n\ + ldrd r4, [r0], #8 \n\ + mov ip, r1 \n\ + strd r2, [r1], #8 \n\ + ldrd r2, [r0], #8 \n\ + strd r4, [r1], #8 \n\ + ldrd r4, [r0], #8 \n\ + strd r2, [r1], #8 \n\ + strd r4, [r1], #8 \n\ + mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\ + ldrd r2, [r0], #8 \n\ + mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\ + ldrd r4, [r0], #8 \n\ + mov ip, r1 \n\ + strd r2, [r1], #8 \n\ + ldrd r2, [r0], #8 \n\ + strd r4, [r1], #8 \n\ + ldrd r4, [r0], #8 \n\ + strd r2, [r1], #8 \n\ + strd r4, [r1], #8 \n\ + mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\ + subs lr, lr, #1 \n\ + mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\ + bgt 1b \n\ + beq 2b \n\ + ldmfd sp!, {r4, r5, pc} " + : + : "r" (from), "r" (to), "I" (PAGE_SIZE / 64 - 1)); +} + +void xscale_mc_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr) +{ + spin_lock(&minicache_lock); + + set_pte(TOP_PTE(COPYPAGE_MINICACHE), pfn_pte(__pa(kfrom) >> PAGE_SHIFT, minicache_pgprot)); + flush_tlb_kernel_page(COPYPAGE_MINICACHE); + + mc_copy_user_page((void *)COPYPAGE_MINICACHE, kto); + + spin_unlock(&minicache_lock); +} + +/* + * XScale optimised clear_user_page + */ +void __attribute__((naked)) +xscale_mc_clear_user_page(void *kaddr, unsigned long vaddr) +{ + asm volatile( + "mov r1, %0 \n\ + mov r2, #0 \n\ + mov r3, #0 \n\ +1: mov ip, r0 \n\ + strd r2, [r0], #8 \n\ + strd r2, [r0], #8 \n\ + strd r2, [r0], #8 \n\ + strd r2, [r0], #8 \n\ + mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\ + subs r1, r1, #1 \n\ + mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\ + bne 1b \n\ + mov pc, lr" + : + : "I" (PAGE_SIZE / 32)); +} + +struct cpu_user_fns xscale_mc_user_fns __initdata = { + .cpu_clear_user_page = xscale_mc_clear_user_page, + .cpu_copy_user_page = xscale_mc_copy_user_page, +}; diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index c6de48d..4085ed9 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -13,6 +13,29 @@ #include <asm/cacheflush.h> #include <asm/system.h> +#include <asm/tlbflush.h> + +#ifdef CONFIG_CPU_CACHE_VIPT +#define ALIAS_FLUSH_START 0xffff4000 + +#define TOP_PTE(x) pte_offset_kernel(top_pmd, x) + +static void flush_pfn_alias(unsigned long pfn, unsigned long vaddr) +{ + unsigned long to = ALIAS_FLUSH_START + (CACHE_COLOUR(vaddr) << PAGE_SHIFT); + + set_pte(TOP_PTE(to), pfn_pte(pfn, PAGE_KERNEL)); + flush_tlb_kernel_page(to); + + asm( "mcrr p15, 0, %1, %0, c14\n" + " mcrr p15, 0, %1, %0, c5\n" + : + : "r" (to), "r" (to + PAGE_SIZE - L1_CACHE_BYTES) + : "cc"); +} +#else +#define flush_pfn_alias(pfn,vaddr) do { } while (0) +#endif static void __flush_dcache_page(struct address_space *mapping, struct page *page) { @@ -37,6 +60,18 @@ static void __flush_dcache_page(struct address_space *mapping, struct page *page return; /* + * This is a page cache page. If we have a VIPT cache, we + * only need to do one flush - which would be at the relevant + * userspace colour, which is congruent with page->index. + */ + if (cache_is_vipt()) { + if (cache_is_vipt_aliasing()) + flush_pfn_alias(page_to_pfn(page), + page->index << PAGE_CACHE_SHIFT); + return; + } + + /* * There are possible user space mappings of this page: * - VIVT cache: we need to also write back and invalidate all user * data in the current VM view associated with this page. @@ -57,8 +92,6 @@ static void __flush_dcache_page(struct address_space *mapping, struct page *page continue; offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT; flush_cache_page(mpnt, mpnt->vm_start + offset, page_to_pfn(page)); - if (cache_is_vipt()) - break; } flush_dcache_mmap_unlock(mapping); } diff --git a/arch/arm/mm/minicache.c b/arch/arm/mm/minicache.c deleted file mode 100644 index dedf2ab..0000000 --- a/arch/arm/mm/minicache.c +++ /dev/null @@ -1,73 +0,0 @@ -/* - * linux/arch/arm/mm/minicache.c - * - * Copyright (C) 2001 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This handles the mini data cache, as found on SA11x0 and XScale - * processors. When we copy a user page page, we map it in such a way - * that accesses to this page will not touch the main data cache, but - * will be cached in the mini data cache. This prevents us thrashing - * the main data cache on page faults. - */ -#include <linux/init.h> -#include <linux/mm.h> - -#include <asm/page.h> -#include <asm/pgtable.h> -#include <asm/tlbflush.h> - -/* - * 0xffff8000 to 0xffffffff is reserved for any ARM architecture - * specific hacks for copying pages efficiently. - */ -#define minicache_address (0xffff8000) -#define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \ - L_PTE_CACHEABLE) - -static pte_t *minicache_pte; - -/* - * Note that this is intended to be called only from the copy_user_page - * asm code; anything else will require special locking to prevent the - * mini-cache space being re-used. (Note: probably preempt unsafe). - * - * We rely on the fact that the minicache is 2K, and we'll be pushing - * 4K of data through it, so we don't actually have to specifically - * flush the minicache when we change the mapping. - * - * Note also: assert(PAGE_OFFSET <= virt < high_memory). - * Unsafe: preempt, kmap. - */ -unsigned long map_page_minicache(unsigned long virt) -{ - set_pte(minicache_pte, pfn_pte(__pa(virt) >> PAGE_SHIFT, minicache_pgprot)); - flush_tlb_kernel_page(minicache_address); - - return minicache_address; -} - -static int __init minicache_init(void) -{ - pgd_t *pgd; - pmd_t *pmd; - - spin_lock(&init_mm.page_table_lock); - - pgd = pgd_offset_k(minicache_address); - pmd = pmd_alloc(&init_mm, pgd, minicache_address); - if (!pmd) - BUG(); - minicache_pte = pte_alloc_kernel(&init_mm, pmd, minicache_address); - if (!minicache_pte) - BUG(); - - spin_unlock(&init_mm.page_table_lock); - - return 0; -} - -core_initcall(minicache_init); diff --git a/arch/arm/mm/mm-armv.c b/arch/arm/mm/mm-armv.c index 585dfb8..2c2b93d 100644 --- a/arch/arm/mm/mm-armv.c +++ b/arch/arm/mm/mm-armv.c @@ -37,6 +37,8 @@ pgprot_t pgprot_kernel; EXPORT_SYMBOL(pgprot_kernel); +pmd_t *top_pmd; + struct cachepolicy { const char policy[16]; unsigned int cr_mask; @@ -142,6 +144,16 @@ __setup("noalign", noalign_setup); #define FIRST_KERNEL_PGD_NR (FIRST_USER_PGD_NR + USER_PTRS_PER_PGD) +static inline pmd_t *pmd_off(pgd_t *pgd, unsigned long virt) +{ + return pmd_offset(pgd, virt); +} + +static inline pmd_t *pmd_off_k(unsigned long virt) +{ + return pmd_off(pgd_offset_k(virt), virt); +} + /* * need to get a 16k page for level 1 */ @@ -220,7 +232,7 @@ void free_pgd_slow(pgd_t *pgd) return; /* pgd is always present and good */ - pmd = (pmd_t *)pgd; + pmd = pmd_off(pgd, 0); if (pmd_none(*pmd)) goto free; if (pmd_bad(*pmd)) { @@ -246,9 +258,8 @@ free: static inline void alloc_init_section(unsigned long virt, unsigned long phys, int prot) { - pmd_t *pmdp; + pmd_t *pmdp = pmd_off_k(virt); - pmdp = pmd_offset(pgd_offset_k(virt), virt); if (virt & (1 << 20)) pmdp++; @@ -283,11 +294,9 @@ alloc_init_supersection(unsigned long virt, unsigned long phys, int prot) static inline void alloc_init_page(unsigned long virt, unsigned long phys, unsigned int prot_l1, pgprot_t prot) { - pmd_t *pmdp; + pmd_t *pmdp = pmd_off_k(virt); pte_t *ptep; - pmdp = pmd_offset(pgd_offset_k(virt), virt); - if (pmd_none(*pmdp)) { unsigned long pmdval; ptep = alloc_bootmem_low_pages(2 * PTRS_PER_PTE * @@ -310,7 +319,7 @@ alloc_init_page(unsigned long virt, unsigned long phys, unsigned int prot_l1, pg */ static inline void clear_mapping(unsigned long virt) { - pmd_clear(pmd_offset(pgd_offset_k(virt), virt)); + pmd_clear(pmd_off_k(virt)); } struct mem_types { @@ -578,7 +587,7 @@ void setup_mm_for_reboot(char mode) PMD_TYPE_SECT; if (cpu_arch <= CPU_ARCH_ARMv5) pmdval |= PMD_BIT4; - pmd = pmd_offset(pgd + i, i << PGDIR_SHIFT); + pmd = pmd_off(pgd, i << PGDIR_SHIFT); pmd[0] = __pmd(pmdval); pmd[1] = __pmd(pmdval + (1 << (PGDIR_SHIFT - 1))); flush_pmd_entry(pmd); @@ -675,6 +684,8 @@ void __init memtable_init(struct meminfo *mi) flush_cache_all(); flush_tlb_all(); + + top_pmd = pmd_off_k(0xffff0000); } /* diff --git a/arch/arm26/Kconfig b/arch/arm26/Kconfig index 3955de5..6caed90 100644 --- a/arch/arm26/Kconfig +++ b/arch/arm26/Kconfig @@ -89,6 +89,10 @@ config PAGESIZE_16 machine with 4MB of memory. endmenu +config ISA_DMA_API + bool + default y + menu "General setup" # Compressed boot loader in ROM. Yes, we really want to ask about diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index 134aec1..b5f83e9 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -54,7 +54,7 @@ asmlinkage void ret_from_fork(void); void default_idle(void) { while(1) { - if (need_resched()) { + if (!need_resched()) { local_irq_enable(); __asm__("sleep"); local_irq_disable(); diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 99b4f29..dfd904f 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -183,7 +183,7 @@ config M386 - "Winchip-C6" for original IDT Winchip. - "Winchip-2" for IDT Winchip 2. - "Winchip-2A" for IDT Winchips with 3dNow! capabilities. - - "MediaGX/Geode" for Cyrix MediaGX aka Geode. + - "GeodeGX1" for Geode GX1 (Cyrix MediaGX). - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3. - "VIA C3-2 for VIA C3-2 "Nehemiah" (model 9 and above). @@ -311,12 +311,10 @@ config MWINCHIP3D stores for this CPU, which can increase performance of some operations. -config MGEODE - bool "MediaGX/Geode" +config MGEODEGX1 + bool "GeodeGX1" help - Select this for a Cyrix MediaGX aka Geode chip. Linux and GCC - treat this chip as a 586TSC with some extended instructions - and alignment reqirements. + Select this for a Geode GX1 (Cyrix MediaGX) chip. config MCYRIXIII bool "CyrixIII/VIA-C3" @@ -368,7 +366,7 @@ config X86_L1_CACHE_SHIFT int default "7" if MPENTIUM4 || X86_GENERIC default "4" if X86_ELAN || M486 || M386 - default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE + default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODEGX1 default "6" if MK7 || MK8 || MPENTIUMM config RWSEM_GENERIC_SPINLOCK @@ -387,7 +385,7 @@ config GENERIC_CALIBRATE_DELAY config X86_PPRO_FENCE bool - depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODE + depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODEGX1 default y config X86_F00F_BUG @@ -417,7 +415,7 @@ config X86_POPAD_OK config X86_ALIGNMENT_16 bool - depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODE + depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 default y config X86_GOOD_APIC @@ -442,7 +440,7 @@ config X86_USE_3DNOW config X86_OOSTORE bool - depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MGEODE) && MTRR + depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR default y config HPET_TIMER @@ -578,7 +576,7 @@ config X86_VISWS_APIC config X86_TSC bool - depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODE) && !X86_NUMAQ + depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1) && !X86_NUMAQ default y config X86_MCE @@ -1165,7 +1163,7 @@ config PCI_DIRECT config PCI_MMCONFIG bool - depends on PCI && (PCI_GOMMCONFIG || (PCI_GOANY && ACPI)) + depends on PCI && ACPI && (PCI_GOMMCONFIG || PCI_GOANY) select ACPI_BOOT default y @@ -1173,6 +1171,10 @@ source "drivers/pci/pcie/Kconfig" source "drivers/pci/Kconfig" +config ISA_DMA_API + bool + default y + config ISA bool "ISA support" depends on !(X86_VOYAGER || X86_VISWS) diff --git a/arch/i386/Makefile b/arch/i386/Makefile index 04783ce..1c36ca3 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -14,7 +14,7 @@ # 19990713 Artur Skawina <skawina@geocities.com> # Added '-march' and '-mpreferred-stack-boundary' support # -# Kianusch Sayah Karadji <kianusch@sk-tech.net> +# 20050320 Kianusch Sayah Karadji <kianusch@sk-tech.net> # Added support for GEODE CPU LDFLAGS := -m elf_i386 @@ -54,8 +54,8 @@ cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) # AMD Elan support cflags-$(CONFIG_X86_ELAN) += -march=i486 -# MediaGX aka Geode support -cflags-$(CONFIG_MGEODE) += $(call cc-option,-march=pentium-mmx,-march=i586) +# Geode GX1 support +cflags-$(CONFIG_MGEODEGX1) += $(call cc-option,-march=pentium-mmx,-march=i486) # -mregparm=3 works ok on gcc-3.0 and later # diff --git a/arch/i386/boot/bootsect.S b/arch/i386/boot/bootsect.S index ba9fe14d..011b7a4 100644 --- a/arch/i386/boot/bootsect.S +++ b/arch/i386/boot/bootsect.S @@ -83,7 +83,7 @@ bugger_off_msg: .ascii "\n" .ascii "Remove disk and press any key to reboot . . .\r\n" .byte 0 - + # Kernel attributes; used by setup diff --git a/arch/i386/boot/video.S b/arch/i386/boot/video.S index 925d3f5..0587477 100644 --- a/arch/i386/boot/video.S +++ b/arch/i386/boot/video.S @@ -1924,36 +1924,36 @@ skip10: movb %ah, %al ret store_edid: - pushw %es # just save all registers - pushw %ax + pushw %es # just save all registers + pushw %ax pushw %bx pushw %cx pushw %dx pushw %di - pushw %fs + pushw %fs popw %es movl $0x13131313, %eax # memset block with 0x13 movw $32, %cx movw $0x140, %di cld - rep - stosl + rep + stosl - movw $0x4f15, %ax # do VBE/DDC + movw $0x4f15, %ax # do VBE/DDC movw $0x01, %bx movw $0x00, %cx movw $0x01, %dx movw $0x140, %di - int $0x10 + int $0x10 - popw %di # restore all registers + popw %di # restore all registers popw %dx popw %cx popw %bx popw %ax - popw %es + popw %es ret # VIDEO_SELECT-only variables diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 0fbcfe0..51ecd51 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -43,7 +43,7 @@ obj-$(CONFIG_SCx200) += scx200.o # Note: kbuild does not track this dependency due to usage of .incbin $(obj)/vsyscall.o: $(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so targets += $(foreach F,int80 sysenter,vsyscall-$F.o vsyscall-$F.so) -targets += vsyscall.lds +targets += vsyscall-note.o vsyscall.lds # The DSO images are built using a special linker script. quiet_cmd_syscall = SYSCALL $@ diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index 53eb5cf..848bb97 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -650,7 +650,7 @@ acpi_find_rsdp (void) */ rsdp_phys = acpi_scan_rsdp (0, 0x400); if (!rsdp_phys) - rsdp_phys = acpi_scan_rsdp (0xE0000, 0xFFFFF); + rsdp_phys = acpi_scan_rsdp (0xE0000, 0x20000); return rsdp_phys; } diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index 45641a8..0ff65ab 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -1222,6 +1222,7 @@ static int suspend(int vetoable) save_processor_state(); err = set_system_power_state(APM_STATE_SUSPEND); + ignore_normal_resume = 1; restore_processor_state(); local_irq_disable(); @@ -1229,7 +1230,6 @@ static int suspend(int vetoable) spin_lock(&i8253_lock); reinit_timer(); set_time(); - ignore_normal_resume = 1; spin_unlock(&i8253_lock); write_sequnlock(&xtime_lock); diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index 16dbc41..73aeaf5 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c @@ -24,9 +24,6 @@ __asm__(".align 4\nvide: ret"); static void __init init_amd(struct cpuinfo_x86 *c) { -#ifdef CONFIG_X86_SMP - int cpu = c == &boot_cpu_data ? 0 : c - cpu_data; -#endif u32 l, h; int mbytes = num_physpages >> (20-PAGE_SHIFT); int r; @@ -198,14 +195,19 @@ static void __init init_amd(struct cpuinfo_x86 *c) c->x86_num_cores = 1; } -#ifdef CONFIG_X86_SMP +#ifdef CONFIG_X86_HT /* * On a AMD dual core setup the lower bits of the APIC id * distingush the cores. Assumes number of cores is a power * of two. */ if (c->x86_num_cores > 1) { - cpu_core_id[cpu] = cpu >> hweight32(c->x86_num_cores - 1); + int cpu = smp_processor_id(); + unsigned bits = 0; + while ((1 << bits) < c->x86_num_cores) + bits++; + cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1); + phys_proc_id[cpu] >>= bits; printk(KERN_INFO "CPU %d(%d) -> Core %d\n", cpu, c->x86_num_cores, cpu_core_id[cpu]); } diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 6be0310..d199e52 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -243,6 +243,10 @@ static void __init early_cpu_detect(void) } early_intel_workaround(c); + +#ifdef CONFIG_X86_HT + phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff; +#endif } void __init generic_identify(struct cpuinfo_x86 * c) diff --git a/arch/i386/kernel/cpu/cpufreq/Kconfig b/arch/i386/kernel/cpu/cpufreq/Kconfig index f25ffd7..0f1eb50 100644 --- a/arch/i386/kernel/cpu/cpufreq/Kconfig +++ b/arch/i386/kernel/cpu/cpufreq/Kconfig @@ -23,7 +23,7 @@ config X86_ACPI_CPUFREQ If in doubt, say N. config ELAN_CPUFREQ - tristate "AMD Elan" + tristate "AMD Elan SC400 and SC410" select CPU_FREQ_TABLE depends on X86_ELAN ---help--- @@ -38,6 +38,18 @@ config ELAN_CPUFREQ If in doubt, say N. +config SC520_CPUFREQ + tristate "AMD Elan SC520" + select CPU_FREQ_TABLE + depends on X86_ELAN + ---help--- + This adds the CPUFreq driver for AMD Elan SC520 processor. + + For details, take a look at <file:Documentation/cpu-freq/>. + + If in doubt, say N. + + config X86_POWERNOW_K6 tristate "AMD Mobile K6-2/K6-3 PowerNow!" select CPU_FREQ_TABLE diff --git a/arch/i386/kernel/cpu/cpufreq/Makefile b/arch/i386/kernel/cpu/cpufreq/Makefile index a922e97..2e894f1 100644 --- a/arch/i386/kernel/cpu/cpufreq/Makefile +++ b/arch/i386/kernel/cpu/cpufreq/Makefile @@ -3,6 +3,7 @@ obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o obj-$(CONFIG_X86_LONGHAUL) += longhaul.o obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o +obj-$(CONFIG_SC520_CPUFREQ) += sc520_freq.o obj-$(CONFIG_X86_LONGRUN) += longrun.o obj-$(CONFIG_X86_GX_SUSPMOD) += gx-suspmod.o obj-$(CONFIG_X86_SPEEDSTEP_ICH) += speedstep-ich.o diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c index ab0f9f5..04e3563d 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c @@ -29,6 +29,7 @@ #include <linux/cpufreq.h> #include <linux/slab.h> #include <linux/string.h> +#include <linux/pci.h> #include <asm/msr.h> #include <asm/timex.h> @@ -119,7 +120,13 @@ static int longhaul_get_cpu_mult(void) static void do_powersaver(union msr_longhaul *longhaul, unsigned int clock_ratio_index) { + struct pci_dev *dev; + unsigned long flags; + unsigned int tmp_mask; int version; + int i; + u16 pci_cmd; + u16 cmd_state[64]; switch (cpu_model) { case CPU_EZRA_T: @@ -137,17 +144,58 @@ static void do_powersaver(union msr_longhaul *longhaul, longhaul->bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4; longhaul->bits.EnableSoftBusRatio = 1; longhaul->bits.RevisionKey = 0; - local_irq_disable(); - wrmsrl(MSR_VIA_LONGHAUL, longhaul->val); + + preempt_disable(); + local_irq_save(flags); + + /* + * get current pci bus master state for all devices + * and clear bus master bit + */ + dev = NULL; + i = 0; + do { + dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); + if (dev != NULL) { + pci_read_config_word(dev, PCI_COMMAND, &pci_cmd); + cmd_state[i++] = pci_cmd; + pci_cmd &= ~PCI_COMMAND_MASTER; + pci_write_config_word(dev, PCI_COMMAND, pci_cmd); + } + } while (dev != NULL); + + tmp_mask=inb(0x21); /* works on C3. save mask. */ + outb(0xFE,0x21); /* TMR0 only */ + outb(0xFF,0x80); /* delay */ + local_irq_enable(); + + __hlt(); + wrmsrl(MSR_VIA_LONGHAUL, longhaul->val); __hlt(); + local_irq_disable(); + + outb(tmp_mask,0x21); /* restore mask */ + + /* restore pci bus master state for all devices */ + dev = NULL; + i = 0; + do { + dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); + if (dev != NULL) { + pci_cmd = cmd_state[i++]; + pci_write_config_byte(dev, PCI_COMMAND, pci_cmd); + } + } while (dev != NULL); + local_irq_restore(flags); + preempt_enable(); + + /* disable bus ratio bit */ rdmsrl(MSR_VIA_LONGHAUL, longhaul->val); longhaul->bits.EnableSoftBusRatio = 0; longhaul->bits.RevisionKey = version; - local_irq_disable(); wrmsrl(MSR_VIA_LONGHAUL, longhaul->val); - local_irq_enable(); } /** @@ -578,7 +626,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) longhaul_setup_voltagescaling(); policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + policy->cpuinfo.transition_latency = 200000; /* nsec */ policy->cur = calc_speed(longhaul_get_cpu_mult()); ret = cpufreq_frequency_table_cpuinfo(policy, longhaul_table); diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c index 913f652..5c53006 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c @@ -23,6 +23,7 @@ #include <linux/dmi.h> #include <asm/msr.h> +#include <asm/timer.h> #include <asm/timex.h> #include <asm/io.h> #include <asm/system.h> @@ -586,13 +587,17 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy) rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); - /* A K7 with powernow technology is set to max frequency by BIOS */ - fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.MFID]; + /* recalibrate cpu_khz */ + result = recalibrate_cpu_khz(); + if (result) + return result; + + fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.CFID]; if (!fsb) { printk(KERN_WARNING PFX "can not determine bus frequency\n"); return -EINVAL; } - dprintk("FSB: %3d.%03d MHz\n", fsb/1000, fsb%1000); + dprintk("FSB: %3dMHz\n", fsb/1000); if (dmi_check_system(powernow_dmi_table) || acpi_force) { printk (KERN_INFO PFX "PSB/PST known to be broken. Trying ACPI instead\n"); diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index a65ff7e..10cc096 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -4,7 +4,7 @@ * GNU general public license version 2. See "COPYING" or * http://www.gnu.org/licenses/gpl.html * - * Support : paul.devriendt@amd.com + * Support : mark.langsdorf@amd.com * * Based on the powernow-k7.c module written by Dave Jones. * (C) 2003 Dave Jones <davej@codemonkey.org.uk> on behalf of SuSE Labs @@ -15,12 +15,13 @@ * * Valuable input gratefully received from Dave Jones, Pavel Machek, * Dominik Brodowski, and others. + * Originally developed by Paul Devriendt. * Processor information obtained from Chapter 9 (Power and Thermal Management) * of the "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD * Opteron Processors" available for download from www.amd.com * * Tables for specific CPUs can be infrerred from - * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf + * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf */ #include <linux/kernel.h> @@ -30,6 +31,7 @@ #include <linux/cpufreq.h> #include <linux/slab.h> #include <linux/string.h> +#include <linux/cpumask.h> #include <asm/msr.h> #include <asm/io.h> @@ -42,7 +44,7 @@ #define PFX "powernow-k8: " #define BFX PFX "BIOS error: " -#define VERSION "version 1.00.09e" +#define VERSION "version 1.40.2" #include "powernow-k8.h" /* serialize freq changes */ @@ -50,6 +52,10 @@ static DECLARE_MUTEX(fidvid_sem); static struct powernow_k8_data *powernow_data[NR_CPUS]; +#ifndef CONFIG_SMP +static cpumask_t cpu_core_map[1]; +#endif + /* Return a frequency in MHz, given an input fid */ static u32 find_freq_from_fid(u32 fid) { @@ -274,11 +280,18 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid { u32 rvosteps = data->rvo; u32 savefid = data->currfid; + u32 maxvid, lo; dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, reqvid 0x%x, rvo 0x%x\n", smp_processor_id(), data->currfid, data->currvid, reqvid, data->rvo); + rdmsr(MSR_FIDVID_STATUS, lo, maxvid); + maxvid = 0x1f & (maxvid >> 16); + dprintk("ph1 maxvid=0x%x\n", maxvid); + if (reqvid < maxvid) /* lower numbers are higher voltages */ + reqvid = maxvid; + while (data->currvid > reqvid) { dprintk("ph1: curr 0x%x, req vid 0x%x\n", data->currvid, reqvid); @@ -286,8 +299,8 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid return 1; } - while ((rvosteps > 0) && ((data->rvo + data->currvid) > reqvid)) { - if (data->currvid == 0) { + while ((rvosteps > 0) && ((data->rvo + data->currvid) > reqvid)) { + if (data->currvid == maxvid) { rvosteps = 0; } else { dprintk("ph1: changing vid for rvo, req 0x%x\n", @@ -671,7 +684,7 @@ static int find_psb_table(struct powernow_k8_data *data) * BIOS and Kernel Developer's Guide, which is available on * www.amd.com */ - printk(KERN_ERR PFX "BIOS error - no PSB\n"); + printk(KERN_INFO PFX "BIOS error - no PSB or ACPI _PSS objects\n"); return -ENODEV; } @@ -695,7 +708,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) struct cpufreq_frequency_table *powernow_table; if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { - dprintk("register performance failed\n"); + dprintk("register performance failed: bad ACPI data\n"); return -EIO; } @@ -746,22 +759,23 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) continue; } - if (fid < HI_FID_TABLE_BOTTOM) { - if (cntlofreq) { - /* if both entries are the same, ignore this - * one... - */ - if ((powernow_table[i].frequency != powernow_table[cntlofreq].frequency) || - (powernow_table[i].index != powernow_table[cntlofreq].index)) { - printk(KERN_ERR PFX "Too many lo freq table entries\n"); - goto err_out_mem; - } - - dprintk("double low frequency table entry, ignoring it.\n"); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; - continue; - } else - cntlofreq = i; + /* verify only 1 entry from the lo frequency table */ + if (fid < HI_FID_TABLE_BOTTOM) { + if (cntlofreq) { + /* if both entries are the same, ignore this + * one... + */ + if ((powernow_table[i].frequency != powernow_table[cntlofreq].frequency) || + (powernow_table[i].index != powernow_table[cntlofreq].index)) { + printk(KERN_ERR PFX "Too many lo freq table entries\n"); + goto err_out_mem; + } + + dprintk("double low frequency table entry, ignoring it.\n"); + powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + continue; + } else + cntlofreq = i; } if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) { @@ -816,7 +830,7 @@ static int transition_frequency(struct powernow_k8_data *data, unsigned int inde { u32 fid; u32 vid; - int res; + int res, i; struct cpufreq_freqs freqs; dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); @@ -841,7 +855,8 @@ static int transition_frequency(struct powernow_k8_data *data, unsigned int inde } if ((fid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) { - printk("ignoring illegal change in lo freq table-%x to 0x%x\n", + printk(KERN_ERR PFX + "ignoring illegal change in lo freq table-%x to 0x%x\n", data->currfid, fid); return 1; } @@ -850,18 +865,20 @@ static int transition_frequency(struct powernow_k8_data *data, unsigned int inde smp_processor_id(), fid, vid); freqs.cpu = data->cpu; - freqs.old = find_khz_freq_from_fid(data->currfid); freqs.new = find_khz_freq_from_fid(fid); - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + for_each_cpu_mask(i, cpu_core_map[data->cpu]) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + } - down(&fidvid_sem); res = transition_fid_vid(data, fid, vid); - up(&fidvid_sem); freqs.new = find_khz_freq_from_fid(data->currfid); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - + for_each_cpu_mask(i, cpu_core_map[data->cpu]) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } return res; } @@ -874,6 +891,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi u32 checkvid = data->currvid; unsigned int newstate; int ret = -EIO; + int i; /* only run on specific CPU from here on */ oldmask = current->cpus_allowed; @@ -902,22 +920,41 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi data->currfid, data->currvid); if ((checkvid != data->currvid) || (checkfid != data->currfid)) { - printk(KERN_ERR PFX - "error - out of sync, fid 0x%x 0x%x, vid 0x%x 0x%x\n", - checkfid, data->currfid, checkvid, data->currvid); + printk(KERN_INFO PFX + "error - out of sync, fix 0x%x 0x%x, vid 0x%x 0x%x\n", + checkfid, data->currfid, checkvid, data->currvid); } if (cpufreq_frequency_table_target(pol, data->powernow_table, targfreq, relation, &newstate)) goto err_out; + down(&fidvid_sem); + + for_each_cpu_mask(i, cpu_core_map[pol->cpu]) { + /* make sure the sibling is initialized */ + if (!powernow_data[i]) { + ret = 0; + up(&fidvid_sem); + goto err_out; + } + } + powernow_k8_acpi_pst_values(data, newstate); if (transition_frequency(data, newstate)) { printk(KERN_ERR PFX "transition frequency failed\n"); ret = 1; + up(&fidvid_sem); goto err_out; } + /* Update all the fid/vids of our siblings */ + for_each_cpu_mask(i, cpu_core_map[pol->cpu]) { + powernow_data[i]->currvid = data->currvid; + powernow_data[i]->currfid = data->currfid; + } + up(&fidvid_sem); + pol->cur = find_khz_freq_from_fid(data->currfid); ret = 0; @@ -962,7 +999,7 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol) */ if ((num_online_cpus() != 1) || (num_possible_cpus() != 1)) { - printk(KERN_INFO PFX "MP systems not supported by PSB BIOS structure\n"); + printk(KERN_ERR PFX "MP systems not supported by PSB BIOS structure\n"); kfree(data); return -ENODEV; } @@ -1003,6 +1040,7 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol) schedule(); pol->governor = CPUFREQ_DEFAULT_GOVERNOR; + pol->cpus = cpu_core_map[pol->cpu]; /* Take a crude guess here. * That guess was in microseconds, so multiply with 1000 */ @@ -1069,7 +1107,7 @@ static unsigned int powernowk8_get (unsigned int cpu) return 0; } preempt_disable(); - + if (query_current_values_with_pending_wait(data)) goto out; @@ -1127,9 +1165,10 @@ static void __exit powernowk8_exit(void) cpufreq_unregister_driver(&cpufreq_amd64_driver); } -MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com>"); +MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com."); MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver."); MODULE_LICENSE("GPL"); late_initcall(powernowk8_init); module_exit(powernowk8_exit); + diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h index 63ebc84..9ed5bf2 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h @@ -174,3 +174,18 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvi static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid); static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index); + +#ifndef for_each_cpu_mask +#define for_each_cpu_mask(i,mask) for (i=0;i<1;i++) +#endif + +#ifdef CONFIG_SMP +static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[]) +{ +} +#else +static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[]) +{ + cpu_set(0, cpu_sharedcore_mask[0]); +} +#endif diff --git a/arch/i386/kernel/cpu/cpufreq/sc520_freq.c b/arch/i386/kernel/cpu/cpufreq/sc520_freq.c new file mode 100644 index 0000000..ef457d5 --- /dev/null +++ b/arch/i386/kernel/cpu/cpufreq/sc520_freq.c @@ -0,0 +1,186 @@ +/* + * sc520_freq.c: cpufreq driver for the AMD Elan sc520 + * + * Copyright (C) 2005 Sean Young <sean@mess.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Based on elanfreq.c + * + * 2005-03-30: - initial revision + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> + +#include <linux/delay.h> +#include <linux/cpufreq.h> + +#include <asm/msr.h> +#include <asm/timex.h> +#include <asm/io.h> + +#define MMCR_BASE 0xfffef000 /* The default base address */ +#define OFFS_CPUCTL 0x2 /* CPU Control Register */ + +static __u8 __iomem *cpuctl; + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "sc520_freq", msg) + +static struct cpufreq_frequency_table sc520_freq_table[] = { + {0x01, 100000}, + {0x02, 133000}, + {0, CPUFREQ_TABLE_END}, +}; + +static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu) +{ + u8 clockspeed_reg = *cpuctl; + + switch (clockspeed_reg & 0x03) { + default: + printk(KERN_ERR "sc520_freq: error: cpuctl register has unexpected value %02x\n", clockspeed_reg); + case 0x01: + return 100000; + case 0x02: + return 133000; + } +} + +static void sc520_freq_set_cpu_state (unsigned int state) +{ + + struct cpufreq_freqs freqs; + u8 clockspeed_reg; + + freqs.old = sc520_freq_get_cpu_frequency(0); + freqs.new = sc520_freq_table[state].frequency; + freqs.cpu = 0; /* AMD Elan is UP */ + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + dprintk("attempting to set frequency to %i kHz\n", + sc520_freq_table[state].frequency); + + local_irq_disable(); + + clockspeed_reg = *cpuctl & ~0x03; + *cpuctl = clockspeed_reg | sc520_freq_table[state].index; + + local_irq_enable(); + + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); +}; + +static int sc520_freq_verify (struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, &sc520_freq_table[0]); +} + +static int sc520_freq_target (struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + unsigned int newstate = 0; + + if (cpufreq_frequency_table_target(policy, sc520_freq_table, target_freq, relation, &newstate)) + return -EINVAL; + + sc520_freq_set_cpu_state(newstate); + + return 0; +} + + +/* + * Module init and exit code + */ + +static int sc520_freq_cpu_init(struct cpufreq_policy *policy) +{ + struct cpuinfo_x86 *c = cpu_data; + int result; + + /* capability check */ + if (c->x86_vendor != X86_VENDOR_AMD || + c->x86 != 4 || c->x86_model != 9) + return -ENODEV; + + /* cpuinfo and default policy values */ + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + policy->cpuinfo.transition_latency = 1000000; /* 1ms */ + policy->cur = sc520_freq_get_cpu_frequency(0); + + result = cpufreq_frequency_table_cpuinfo(policy, sc520_freq_table); + if (result) + return (result); + + cpufreq_frequency_table_get_attr(sc520_freq_table, policy->cpu); + + return 0; +} + + +static int sc520_freq_cpu_exit(struct cpufreq_policy *policy) +{ + cpufreq_frequency_table_put_attr(policy->cpu); + return 0; +} + + +static struct freq_attr* sc520_freq_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + + +static struct cpufreq_driver sc520_freq_driver = { + .get = sc520_freq_get_cpu_frequency, + .verify = sc520_freq_verify, + .target = sc520_freq_target, + .init = sc520_freq_cpu_init, + .exit = sc520_freq_cpu_exit, + .name = "sc520_freq", + .owner = THIS_MODULE, + .attr = sc520_freq_attr, +}; + + +static int __init sc520_freq_init(void) +{ + struct cpuinfo_x86 *c = cpu_data; + + /* Test if we have the right hardware */ + if(c->x86_vendor != X86_VENDOR_AMD || + c->x86 != 4 || c->x86_model != 9) { + dprintk("no Elan SC520 processor found!\n"); + return -ENODEV; + } + cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1); + if(!cpuctl) { + printk(KERN_ERR "sc520_freq: error: failed to remap memory\n"); + return -ENOMEM; + } + + return cpufreq_register_driver(&sc520_freq_driver); +} + + +static void __exit sc520_freq_exit(void) +{ + cpufreq_unregister_driver(&sc520_freq_driver); + iounmap(cpuctl); +} + + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Sean Young <sean@mess.org>"); +MODULE_DESCRIPTION("cpufreq driver for AMD's Elan sc520 CPU"); + +module_init(sc520_freq_init); +module_exit(sc520_freq_exit); + diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c index 07d5612..7dcbf70 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c @@ -54,6 +54,8 @@ enum { CPU_DOTHAN_A1, CPU_DOTHAN_A2, CPU_DOTHAN_B0, + CPU_MP4HT_D0, + CPU_MP4HT_E0, }; static const struct cpu_id cpu_ids[] = { @@ -61,6 +63,8 @@ static const struct cpu_id cpu_ids[] = { [CPU_DOTHAN_A1] = { 6, 13, 1 }, [CPU_DOTHAN_A2] = { 6, 13, 2 }, [CPU_DOTHAN_B0] = { 6, 13, 6 }, + [CPU_MP4HT_D0] = {15, 3, 4 }, + [CPU_MP4HT_E0] = {15, 4, 1 }, }; #define N_IDS (sizeof(cpu_ids)/sizeof(cpu_ids[0])) @@ -226,6 +230,8 @@ static struct cpu_model models[] = { &cpu_ids[CPU_DOTHAN_A1], NULL, 0, NULL }, { &cpu_ids[CPU_DOTHAN_A2], NULL, 0, NULL }, { &cpu_ids[CPU_DOTHAN_B0], NULL, 0, NULL }, + { &cpu_ids[CPU_MP4HT_D0], NULL, 0, NULL }, + { &cpu_ids[CPU_MP4HT_E0], NULL, 0, NULL }, { NULL, } }; diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c index 8ba430a..d368b3f 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c @@ -336,7 +336,7 @@ unsigned int speedstep_get_freqs(unsigned int processor, if (!prev_speed) return -EIO; - dprintk("previous seped is %u\n", prev_speed); + dprintk("previous speed is %u\n", prev_speed); local_irq_save(flags); @@ -348,7 +348,7 @@ unsigned int speedstep_get_freqs(unsigned int processor, goto out; } - dprintk("low seped is %u\n", *low_speed); + dprintk("low speed is %u\n", *low_speed); /* switch to high state */ set_state(SPEEDSTEP_HIGH); @@ -358,7 +358,7 @@ unsigned int speedstep_get_freqs(unsigned int processor, goto out; } - dprintk("high seped is %u\n", *high_speed); + dprintk("high speed is %u\n", *high_speed); if (*low_speed == *high_speed) { ret = -ENODEV; diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c index 79440b3..b25fb6b 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c @@ -357,6 +357,9 @@ static int __init speedstep_init(void) case SPEEDSTEP_PROCESSOR_PIII_C: case SPEEDSTEP_PROCESSOR_PIII_C_EARLY: break; + case SPEEDSTEP_PROCESSOR_P4M: + printk(KERN_INFO "speedstep-smi: you're trying to use this cpufreq driver on a Pentium 4-based CPU. Most likely it will not work.\n"); + break; default: speedstep_processor = 0; } diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index aeb5b4e..a710dc4 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -118,7 +118,7 @@ struct _cpuid4_info { }; #define MAX_CACHE_LEAVES 4 -static unsigned short __devinitdata num_cache_leaves; +static unsigned short num_cache_leaves; static int __devinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) { diff --git a/arch/i386/kernel/cpu/mtrr/cyrix.c b/arch/i386/kernel/cpu/mtrr/cyrix.c index 933b0dd..9027a98 100644 --- a/arch/i386/kernel/cpu/mtrr/cyrix.c +++ b/arch/i386/kernel/cpu/mtrr/cyrix.c @@ -218,12 +218,12 @@ typedef struct { mtrr_type type; } arr_state_t; -static arr_state_t arr_state[8] __initdata = { +static arr_state_t arr_state[8] __devinitdata = { {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL} }; -static unsigned char ccr_state[7] __initdata = { 0, 0, 0, 0, 0, 0, 0 }; +static unsigned char ccr_state[7] __devinitdata = { 0, 0, 0, 0, 0, 0, 0 }; static void cyrix_set_all(void) { diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c index 14ec354..903190a 100644 --- a/arch/i386/kernel/i386_ksyms.c +++ b/arch/i386/kernel/i386_ksyms.c @@ -169,10 +169,6 @@ EXPORT_SYMBOL(rtc_lock); EXPORT_SYMBOL_GPL(set_nmi_callback); EXPORT_SYMBOL_GPL(unset_nmi_callback); -#undef memcmp -extern int memcmp(const void *,const void *,__kernel_size_t); -EXPORT_SYMBOL(memcmp); - EXPORT_SYMBOL(register_die_notifier); #ifdef CONFIG_HAVE_DEC_LOCK EXPORT_SYMBOL(_atomic_dec_and_lock); diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index 6716816..59ff9b4 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -217,6 +217,13 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs) *tos &= ~(TF_MASK | IF_MASK); *tos |= kprobe_old_eflags; break; + case 0xc3: /* ret/lret */ + case 0xcb: + case 0xc2: + case 0xca: + regs->eflags &= ~TF_MASK; + /* eip is already adjusted, no more changes required*/ + return; case 0xe8: /* call relative - Fix return addr */ *tos = orig_eip + (*tos - copy_eip); break; diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 85bd56d..96e3ea6 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -400,11 +400,6 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, int err; childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1; - *childregs = *regs; - childregs->eax = 0; - childregs->esp = esp; - - p->thread.esp = (unsigned long) childregs; /* * The below -8 is to reserve 8 bytes on top of the ring0 stack. * This is necessary to guarantee that the entire "struct pt_regs" @@ -415,7 +410,13 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, * "struct pt_regs" is possible, but they may contain the * completely wrong values. */ - p->thread.esp0 = (unsigned long) (childregs+1) - 8; + childregs = (struct pt_regs *) ((unsigned long) childregs - 8); + *childregs = *regs; + childregs->eax = 0; + childregs->esp = esp; + + p->thread.esp = (unsigned long) childregs; + p->thread.esp0 = (unsigned long) (childregs+1); p->thread.eip = (unsigned long) ret_from_fork; diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index e8c965c..e34f651 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -683,24 +683,18 @@ void do_syscall_trace(struct pt_regs *regs, int entryexit) /* do the secure computing check first */ secure_computing(regs->orig_eax); - if (unlikely(current->audit_context)) { - if (!entryexit) - audit_syscall_entry(current, regs->orig_eax, - regs->ebx, regs->ecx, - regs->edx, regs->esi); - else - audit_syscall_exit(current, regs->eax); - } + if (unlikely(current->audit_context) && entryexit) + audit_syscall_exit(current, AUDITSC_RESULT(regs->eax), regs->eax); if (!(current->ptrace & PT_PTRACED)) - return; + goto out; /* Fake a debug trap */ if (test_thread_flag(TIF_SINGLESTEP)) send_sigtrap(current, regs, 0); if (!test_thread_flag(TIF_SYSCALL_TRACE)) - return; + goto out; /* the 0x80 provides a way for the tracing parent to distinguish between a syscall stop and SIGTRAP delivery */ @@ -715,4 +709,9 @@ void do_syscall_trace(struct pt_regs *regs, int entryexit) send_sig(current->exit_code, current, 1); current->exit_code = 0; } + out: + if (unlikely(current->audit_context) && !entryexit) + audit_syscall_entry(current, AUDIT_ARCH_I386, regs->orig_eax, + regs->ebx, regs->ecx, regs->edx, regs->esi); + } diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 945ec73..2bfbdde 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -1502,11 +1502,13 @@ void __init setup_arch(char **cmdline_p) if (efi_enabled) efi_map_memmap(); +#ifdef CONFIG_ACPI_BOOT /* * Parse the ACPI tables for possible boot-time SMP configuration. */ acpi_boot_table_init(); acpi_boot_init(); +#endif #ifdef CONFIG_X86_LOCAL_APIC if (smp_found_config) diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index cbea7ac..bc1bb69 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -888,6 +888,7 @@ void *xquad_portio; cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; +EXPORT_SYMBOL(cpu_core_map); static void __init smp_boot_cpus(unsigned int max_cpus) { @@ -1073,8 +1074,10 @@ static void __init smp_boot_cpus(unsigned int max_cpus) cpu_set(cpu, cpu_sibling_map[cpu]); } - if (siblings != smp_num_siblings) + if (siblings != smp_num_siblings) { printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings); + smp_num_siblings = siblings; + } if (c->x86_num_cores > 1) { for (i = 0; i < NR_CPUS; i++) { diff --git a/arch/i386/kernel/timers/common.c b/arch/i386/kernel/timers/common.c index f7f9000..8e20121 100644 --- a/arch/i386/kernel/timers/common.c +++ b/arch/i386/kernel/timers/common.c @@ -6,6 +6,7 @@ #include <linux/timex.h> #include <linux/errno.h> #include <linux/jiffies.h> +#include <linux/module.h> #include <asm/io.h> #include <asm/timer.h> @@ -24,7 +25,7 @@ #define CALIBRATE_TIME (5 * 1000020/HZ) -unsigned long __init calibrate_tsc(void) +unsigned long calibrate_tsc(void) { mach_prepare_counter(); @@ -139,7 +140,7 @@ bad_calibration: #endif /* calculate cpu_khz */ -void __init init_cpu_khz(void) +void init_cpu_khz(void) { if (cpu_has_tsc) { unsigned long tsc_quotient = calibrate_tsc(); @@ -158,3 +159,4 @@ void __init init_cpu_khz(void) } } } + diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c index 7926d96..180444d 100644 --- a/arch/i386/kernel/timers/timer_tsc.c +++ b/arch/i386/kernel/timers/timer_tsc.c @@ -320,6 +320,26 @@ core_initcall(cpufreq_tsc); static inline void cpufreq_delayed_get(void) { return; } #endif +int recalibrate_cpu_khz(void) +{ +#ifndef CONFIG_SMP + unsigned long cpu_khz_old = cpu_khz; + + if (cpu_has_tsc) { + init_cpu_khz(); + cpu_data[0].loops_per_jiffy = + cpufreq_scale(cpu_data[0].loops_per_jiffy, + cpu_khz_old, + cpu_khz); + return 0; + } else + return -ENODEV; +#else + return -ENODEV; +#endif +} +EXPORT_SYMBOL(recalibrate_cpu_khz); + static void mark_offset_tsc(void) { unsigned long lost,delay; diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c index 903d739..a6e0ddd 100644 --- a/arch/i386/mach-voyager/voyager_smp.c +++ b/arch/i386/mach-voyager/voyager_smp.c @@ -97,7 +97,6 @@ static void ack_vic_irq(unsigned int irq); static void vic_enable_cpi(void); static void do_boot_cpu(__u8 cpuid); static void do_quad_bootstrap(void); -static inline void wrapper_smp_local_timer_interrupt(struct pt_regs *); int hard_smp_processor_id(void); @@ -126,6 +125,14 @@ send_QIC_CPI(__u32 cpuset, __u8 cpi) } static inline void +wrapper_smp_local_timer_interrupt(struct pt_regs *regs) +{ + irq_enter(); + smp_local_timer_interrupt(regs); + irq_exit(); +} + +static inline void send_one_CPI(__u8 cpu, __u8 cpi) { if(voyager_quad_processors & (1<<cpu)) @@ -1249,14 +1256,6 @@ smp_vic_timer_interrupt(struct pt_regs *regs) smp_local_timer_interrupt(regs); } -static inline void -wrapper_smp_local_timer_interrupt(struct pt_regs *regs) -{ - irq_enter(); - smp_local_timer_interrupt(regs); - irq_exit(); -} - /* local (per CPU) timer interrupt. It does both profiling and * process statistics/rescheduling. * diff --git a/arch/i386/mm/ioremap.c b/arch/i386/mm/ioremap.c index db06f73..ab54279 100644 --- a/arch/i386/mm/ioremap.c +++ b/arch/i386/mm/ioremap.c @@ -238,19 +238,21 @@ void iounmap(volatile void __iomem *addr) addr < phys_to_virt(ISA_END_ADDRESS)) return; - p = remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) addr)); + write_lock(&vmlist_lock); + p = __remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) addr)); if (!p) { - printk("__iounmap: bad address %p\n", addr); - return; + printk("iounmap: bad address %p\n", addr); + goto out_unlock; } if ((p->flags >> 20) && p->phys_addr < virt_to_phys(high_memory) - 1) { - /* p->size includes the guard page, but cpa doesn't like that */ change_page_attr(virt_to_page(__va(p->phys_addr)), p->size >> PAGE_SHIFT, PAGE_KERNEL); global_flush_tlb(); } +out_unlock: + write_unlock(&vmlist_lock); kfree(p); } diff --git a/arch/i386/pci/fixup.c b/arch/i386/pci/fixup.c index be52c5a..8e8e895 100644 --- a/arch/i386/pci/fixup.c +++ b/arch/i386/pci/fixup.c @@ -253,7 +253,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE2, pci #define MAX_PCIEROOT 6 static int quirk_aspm_offset[MAX_PCIEROOT << 3]; -#define GET_INDEX(a, b) (((a - PCI_DEVICE_ID_INTEL_MCH_PA) << 3) + b) +#define GET_INDEX(a, b) ((((a) - PCI_DEVICE_ID_INTEL_MCH_PA) << 3) + ((b) & 7)) static int quirk_pcie_aspm_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value) { diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c index d6598da..da21b1d 100644 --- a/arch/i386/pci/irq.c +++ b/arch/i386/pci/irq.c @@ -1029,7 +1029,6 @@ void pcibios_penalize_isa_irq(int irq) static int pirq_enable_irq(struct pci_dev *dev) { u8 pin; - extern int via_interrupt_line_quirk; struct pci_dev *temp_dev; pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); @@ -1084,10 +1083,6 @@ static int pirq_enable_irq(struct pci_dev *dev) printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.%s\n", 'A' + pin, pci_name(dev), msg); } - /* VIA bridges use interrupt line for apic/pci steering across - the V-Link */ - else if (via_interrupt_line_quirk) - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq & 15); return 0; } diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 468dbe8..3ad2c4a 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -46,6 +46,10 @@ config GENERIC_IOMAP bool default y +config SCHED_NO_NO_OMIT_FRAME_POINTER + bool + default y + choice prompt "System type" default IA64_GENERIC @@ -217,6 +221,16 @@ config IA64_SGI_SN_SIM If you are compiling a kernel that will run under SGI's IA-64 simulator (Medusa) then say Y, otherwise say N. +config IA64_SGI_SN_XP + tristate "Support communication between SGI SSIs" + depends on MSPEC + help + An SGI machine can be divided into multiple Single System + Images which act independently of each other and have + hardware based memory protection from the others. Enabling + this feature will allow for direct communication between SSIs + based on a network adapter and DMA messaging. + config FORCE_MAX_ZONEORDER int default "18" @@ -261,6 +275,15 @@ config HOTPLUG_CPU can be controlled through /sys/devices/system/cpu/cpu#. Say N if you want to disable CPU hotplug. +config SCHED_SMT + bool "SMT scheduler support" + depends on SMP + default off + help + Improves the CPU scheduler's decision making when dealing with + Intel IA64 chips with MultiThreading at a cost of slightly increased + overhead in some places. If unsure say N here. + config PREEMPT bool "Preemptible Kernel" help diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig index 99830e8..9086b78 100644 --- a/arch/ia64/configs/tiger_defconfig +++ b/arch/ia64/configs/tiger_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.11-rc2 -# Sat Jan 22 11:17:02 2005 +# Linux kernel version: 2.6.12-rc3 +# Tue May 3 15:55:04 2005 # # @@ -10,6 +10,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_CLEAN_COMPILE=y CONFIG_LOCK_KERNEL=y +CONFIG_INIT_ENV_ARG_LIMIT=32 # # General setup @@ -21,24 +22,27 @@ CONFIG_POSIX_MQUEUE=y # CONFIG_BSD_PROCESS_ACCT is not set CONFIG_SYSCTL=y # CONFIG_AUDIT is not set -CONFIG_LOG_BUF_SHIFT=20 CONFIG_HOTPLUG=y CONFIG_KOBJECT_UEVENT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y +# CONFIG_CPUSETS is not set # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_SHMEM=y CONFIG_CC_ALIGN_FUNCTIONS=0 CONFIG_CC_ALIGN_LABELS=0 CONFIG_CC_ALIGN_LOOPS=0 CONFIG_CC_ALIGN_JUMPS=0 # CONFIG_TINY_SHMEM is not set +CONFIG_BASE_SMALL=0 # # Loadable module support @@ -85,6 +89,7 @@ CONFIG_FORCE_MAX_ZONEORDER=18 CONFIG_SMP=y CONFIG_NR_CPUS=4 CONFIG_HOTPLUG_CPU=y +# CONFIG_SCHED_SMT is not set # CONFIG_PREEMPT is not set CONFIG_HAVE_DEC_LOCK=y CONFIG_IA32_SUPPORT=y @@ -135,6 +140,7 @@ CONFIG_PCI_DOMAINS=y # CONFIG_PCI_MSI is not set CONFIG_PCI_LEGACY_PROC=y CONFIG_PCI_NAMES=y +# CONFIG_PCI_DEBUG is not set # # PCI Hotplug Support @@ -152,10 +158,6 @@ CONFIG_HOTPLUG_PCI_ACPI=m # CONFIG_PCCARD is not set # -# PC-card bridges -# - -# # Device Drivers # @@ -195,9 +197,10 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_NBD=m # CONFIG_BLK_DEV_SX8 is not set # CONFIG_BLK_DEV_UB is not set -CONFIG_BLK_DEV_RAM=m +CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=16 CONFIG_BLK_DEV_RAM_SIZE=4096 +CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="" # CONFIG_CDROM_PKTCDVD is not set @@ -313,7 +316,6 @@ CONFIG_SCSI_FC_ATTRS=y # CONFIG_SCSI_BUSLOGIC is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_EATA is not set -# CONFIG_SCSI_EATA_PIO is not set # CONFIG_SCSI_FUTURE_DOMAIN is not set # CONFIG_SCSI_GDTH is not set # CONFIG_SCSI_IPS is not set @@ -325,7 +327,6 @@ CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 # CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set # CONFIG_SCSI_IPR is not set -# CONFIG_SCSI_QLOGIC_ISP is not set CONFIG_SCSI_QLOGIC_FC=y # CONFIG_SCSI_QLOGIC_FC_FIRMWARE is not set CONFIG_SCSI_QLOGIC_1280=y @@ -336,6 +337,7 @@ CONFIG_SCSI_QLA22XX=m CONFIG_SCSI_QLA2300=m CONFIG_SCSI_QLA2322=m # CONFIG_SCSI_QLA6312 is not set +# CONFIG_SCSI_LPFC is not set # CONFIG_SCSI_DC395x is not set # CONFIG_SCSI_DC390T is not set # CONFIG_SCSI_DEBUG is not set @@ -358,6 +360,7 @@ CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m CONFIG_DM_MIRROR=m CONFIG_DM_ZERO=m +# CONFIG_DM_MULTIPATH is not set # # Fusion MPT device support @@ -386,7 +389,6 @@ CONFIG_NET=y # CONFIG_PACKET=y # CONFIG_PACKET_MMAP is not set -CONFIG_NETLINK_DEV=y CONFIG_UNIX=y # CONFIG_NET_KEY is not set CONFIG_INET=y @@ -446,7 +448,6 @@ CONFIG_DUMMY=m # CONFIG_BONDING is not set # CONFIG_EQUALIZER is not set # CONFIG_TUN is not set -# CONFIG_ETHERTAP is not set # # ARCnet devices @@ -484,7 +485,6 @@ CONFIG_NET_PCI=y # CONFIG_DGRS is not set CONFIG_EEPRO100=m CONFIG_E100=m -# CONFIG_E100_NAPI is not set # CONFIG_FEALNX is not set # CONFIG_NATSEMI is not set # CONFIG_NE2K_PCI is not set @@ -566,25 +566,6 @@ CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 # CONFIG_INPUT_EVBUG is not set # -# Input I/O drivers -# -CONFIG_GAMEPORT=m -CONFIG_SOUND_GAMEPORT=m -# CONFIG_GAMEPORT_NS558 is not set -# CONFIG_GAMEPORT_L4 is not set -# CONFIG_GAMEPORT_EMU10K1 is not set -# CONFIG_GAMEPORT_VORTEX is not set -# CONFIG_GAMEPORT_FM801 is not set -# CONFIG_GAMEPORT_CS461X is not set -CONFIG_SERIO=y -CONFIG_SERIO_I8042=y -# CONFIG_SERIO_SERPORT is not set -# CONFIG_SERIO_CT82C710 is not set -# CONFIG_SERIO_PCIPS2 is not set -CONFIG_SERIO_LIBPS2=y -# CONFIG_SERIO_RAW is not set - -# # Input Device Drivers # CONFIG_INPUT_KEYBOARD=y @@ -602,6 +583,24 @@ CONFIG_MOUSE_PS2=y # CONFIG_INPUT_MISC is not set # +# Hardware I/O ports +# +CONFIG_SERIO=y +CONFIG_SERIO_I8042=y +# CONFIG_SERIO_SERPORT is not set +# CONFIG_SERIO_PCIPS2 is not set +CONFIG_SERIO_LIBPS2=y +# CONFIG_SERIO_RAW is not set +CONFIG_GAMEPORT=m +# CONFIG_GAMEPORT_NS558 is not set +# CONFIG_GAMEPORT_L4 is not set +# CONFIG_GAMEPORT_EMU10K1 is not set +# CONFIG_GAMEPORT_VORTEX is not set +# CONFIG_GAMEPORT_FM801 is not set +# CONFIG_GAMEPORT_CS461X is not set +CONFIG_SOUND_GAMEPORT=m + +# # Character devices # CONFIG_VT=y @@ -615,6 +614,8 @@ CONFIG_SERIAL_NONSTANDARD=y # CONFIG_SYNCLINK is not set # CONFIG_SYNCLINKMP is not set # CONFIG_N_HDLC is not set +# CONFIG_SPECIALIX is not set +# CONFIG_SX is not set # CONFIG_STALDRV is not set # @@ -635,6 +636,7 @@ CONFIG_SERIAL_8250_SHARE_IRQ=y # CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 @@ -670,6 +672,12 @@ CONFIG_HPET=y # CONFIG_HPET_RTC_IRQ is not set CONFIG_HPET_MMAP=y CONFIG_MAX_RAW_DEVS=256 +# CONFIG_HANGCHECK_TIMER is not set + +# +# TPM devices +# +# CONFIG_TCG_TPM is not set # # I2C support @@ -705,7 +713,6 @@ CONFIG_MAX_RAW_DEVS=256 # CONFIG_VGA_CONSOLE=y CONFIG_DUMMY_CONSOLE=y -# CONFIG_BACKLIGHT_LCD_SUPPORT is not set # # Sound @@ -715,6 +722,8 @@ CONFIG_DUMMY_CONSOLE=y # # USB support # +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y CONFIG_USB=y # CONFIG_USB_DEBUG is not set @@ -726,8 +735,6 @@ CONFIG_USB_DEVICEFS=y # CONFIG_USB_DYNAMIC_MINORS is not set # CONFIG_USB_SUSPEND is not set # CONFIG_USB_OTG is not set -CONFIG_USB_ARCH_HAS_HCD=y -CONFIG_USB_ARCH_HAS_OHCI=y # # USB Host Controller Drivers @@ -736,6 +743,8 @@ CONFIG_USB_EHCI_HCD=m # CONFIG_USB_EHCI_SPLIT_ISO is not set # CONFIG_USB_EHCI_ROOT_HUB_TT is not set CONFIG_USB_OHCI_HCD=m +# CONFIG_USB_OHCI_BIG_ENDIAN is not set +CONFIG_USB_OHCI_LITTLE_ENDIAN=y CONFIG_USB_UHCI_HCD=y # CONFIG_USB_SL811_HCD is not set @@ -751,12 +760,11 @@ CONFIG_USB_UHCI_HCD=y # CONFIG_USB_STORAGE=m # CONFIG_USB_STORAGE_DEBUG is not set -# CONFIG_USB_STORAGE_RW_DETECT is not set # CONFIG_USB_STORAGE_DATAFAB is not set # CONFIG_USB_STORAGE_FREECOM is not set # CONFIG_USB_STORAGE_ISD200 is not set # CONFIG_USB_STORAGE_DPCM is not set -# CONFIG_USB_STORAGE_HP8200e is not set +# CONFIG_USB_STORAGE_USBAT is not set # CONFIG_USB_STORAGE_SDDR09 is not set # CONFIG_USB_STORAGE_SDDR55 is not set # CONFIG_USB_STORAGE_JUMPSHOT is not set @@ -800,6 +808,7 @@ CONFIG_USB_HIDINPUT=y # CONFIG_USB_PEGASUS is not set # CONFIG_USB_RTL8150 is not set # CONFIG_USB_USBNET is not set +# CONFIG_USB_MON is not set # # USB port drivers @@ -824,6 +833,7 @@ CONFIG_USB_HIDINPUT=y # CONFIG_USB_PHIDGETKIT is not set # CONFIG_USB_PHIDGETSERVO is not set # CONFIG_USB_IDMOUSE is not set +# CONFIG_USB_SISUSBVGA is not set # CONFIG_USB_TEST is not set # @@ -867,7 +877,12 @@ CONFIG_REISERFS_FS_POSIX_ACL=y CONFIG_REISERFS_FS_SECURITY=y # CONFIG_JFS_FS is not set CONFIG_FS_POSIX_ACL=y + +# +# XFS support +# CONFIG_XFS_FS=y +CONFIG_XFS_EXPORT=y # CONFIG_XFS_RT is not set # CONFIG_XFS_QUOTA is not set # CONFIG_XFS_SECURITY is not set @@ -945,7 +960,7 @@ CONFIG_NFSD_V4=y CONFIG_NFSD_TCP=y CONFIG_LOCKD=m CONFIG_LOCKD_V4=y -CONFIG_EXPORTFS=m +CONFIG_EXPORTFS=y CONFIG_SUNRPC=m CONFIG_SUNRPC_GSS=m CONFIG_RPCSEC_GSS_KRB5=m @@ -1042,8 +1057,10 @@ CONFIG_GENERIC_IRQ_PROBE=y # # Kernel hacking # +# CONFIG_PRINTK_TIME is not set CONFIG_DEBUG_KERNEL=y CONFIG_MAGIC_SYSRQ=y +CONFIG_LOG_BUF_SHIFT=20 # CONFIG_SCHEDSTATS is not set # CONFIG_DEBUG_SLAB is not set # CONFIG_DEBUG_SPINLOCK is not set @@ -1077,6 +1094,7 @@ CONFIG_CRYPTO_MD5=m # CONFIG_CRYPTO_SHA256 is not set # CONFIG_CRYPTO_SHA512 is not set # CONFIG_CRYPTO_WP512 is not set +# CONFIG_CRYPTO_TGR192 is not set CONFIG_CRYPTO_DES=m # CONFIG_CRYPTO_BLOWFISH is not set # CONFIG_CRYPTO_TWOFISH is not set diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index 6a8fcba..b8db6e3 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -1944,43 +1944,17 @@ sba_connect_bus(struct pci_bus *bus) static void __init sba_map_ioc_to_node(struct ioc *ioc, acpi_handle handle) { - struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; - union acpi_object *obj; - acpi_handle phandle; unsigned int node; + int pxm; ioc->node = MAX_NUMNODES; - /* - * Check for a _PXM on this node first. We don't typically see - * one here, so we'll end up getting it from the parent. - */ - if (ACPI_FAILURE(acpi_evaluate_object(handle, "_PXM", NULL, &buffer))) { - if (ACPI_FAILURE(acpi_get_parent(handle, &phandle))) - return; - - /* Reset the acpi buffer */ - buffer.length = ACPI_ALLOCATE_BUFFER; - buffer.pointer = NULL; - - if (ACPI_FAILURE(acpi_evaluate_object(phandle, "_PXM", NULL, - &buffer))) - return; - } + pxm = acpi_get_pxm(handle); - if (!buffer.length || !buffer.pointer) + if (pxm < 0) return; - obj = buffer.pointer; - - if (obj->type != ACPI_TYPE_INTEGER || - obj->integer.value >= MAX_PXM_DOMAINS) { - acpi_os_free(buffer.pointer); - return; - } - - node = pxm_to_nid_map[obj->integer.value]; - acpi_os_free(buffer.pointer); + node = pxm_to_nid_map[pxm]; if (node >= MAX_NUMNODES || !node_online(node)) return; diff --git a/arch/ia64/ia32/ia32_ioctl.c b/arch/ia64/ia32/ia32_ioctl.c index 9845dab..164b211 100644 --- a/arch/ia64/ia32/ia32_ioctl.c +++ b/arch/ia64/ia32/ia32_ioctl.c @@ -13,7 +13,6 @@ #define INCLUDES #include "compat_ioctl.c" -#include <asm/ioctl32.h> #define IOCTL_NR(a) ((a) & ~(_IOC_SIZEMASK << _IOC_SIZESHIFT)) diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index 247a21c..c1e20d6 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -2427,7 +2427,7 @@ sys32_epoll_wait(int epfd, struct epoll_event32 __user * events, int maxevents, { struct epoll_event *events64 = NULL; mm_segment_t old_fs = get_fs(); - int error, numevents, size; + int numevents, size; int evt_idx; int do_free_pages = 0; diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index a8e99c5..72dfd9e 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -779,7 +779,7 @@ acpi_map_iosapic (acpi_handle handle, u32 depth, void *context, void **ret) union acpi_object *obj; struct acpi_table_iosapic *iosapic; unsigned int gsi_base; - int node; + int pxm, node; /* Only care about objects w/ a method that returns the MADT */ if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) @@ -805,29 +805,16 @@ acpi_map_iosapic (acpi_handle handle, u32 depth, void *context, void **ret) gsi_base = iosapic->global_irq_base; acpi_os_free(buffer.pointer); - buffer.length = ACPI_ALLOCATE_BUFFER; - buffer.pointer = NULL; /* - * OK, it's an IOSAPIC MADT entry, look for a _PXM method to tell + * OK, it's an IOSAPIC MADT entry, look for a _PXM value to tell * us which node to associate this with. */ - if (ACPI_FAILURE(acpi_evaluate_object(handle, "_PXM", NULL, &buffer))) - return AE_OK; - - if (!buffer.length || !buffer.pointer) - return AE_OK; - - obj = buffer.pointer; - - if (obj->type != ACPI_TYPE_INTEGER || - obj->integer.value >= MAX_PXM_DOMAINS) { - acpi_os_free(buffer.pointer); + pxm = acpi_get_pxm(handle); + if (pxm < 0) return AE_OK; - } - node = pxm_to_nid_map[obj->integer.value]; - acpi_os_free(buffer.pointer); + node = pxm_to_nid_map[pxm]; if (node >= MAX_NUMNODES || !node_online(node) || cpus_empty(node_to_cpumask(node))) diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 9353adc..8ebfcc0 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -777,7 +777,7 @@ GLOBAL_ENTRY(ia64_ret_from_ia32_execve) st8.spill [r2]=r8 // store return value in slot for r8 and set unat bit .mem.offset 8,0 st8.spill [r3]=r0 // clear error indication in slot for r10 and set unat bit -END(ia64_ret_from_ia32_execve_syscall) +END(ia64_ret_from_ia32_execve) // fall through #endif /* CONFIG_IA32_SUPPORT */ GLOBAL_ENTRY(ia64_leave_kernel) @@ -1176,7 +1176,7 @@ ENTRY(notify_resume_user) ;; (pNonSys) mov out2=0 // out2==0 => not a syscall .fframe 16 - .spillpsp ar.unat, 16 // (note that offset is relative to psp+0x10!) + .spillsp ar.unat, 16 st8 [sp]=r9,-16 // allocate space for ar.unat and save it st8 [out1]=loc1,-8 // save ar.pfs, out1=&sigscratch .body @@ -1202,7 +1202,7 @@ GLOBAL_ENTRY(sys_rt_sigsuspend) adds out2=8,sp // out2=&sigscratch->ar_pfs ;; .fframe 16 - .spillpsp ar.unat, 16 // (note that offset is relative to psp+0x10!) + .spillsp ar.unat, 16 st8 [sp]=r9,-16 // allocate space for ar.unat and save it st8 [out2]=loc1,-8 // save ar.pfs, out2=&sigscratch .body diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S index f566ff4..7d7684a 100644 --- a/arch/ia64/kernel/fsys.S +++ b/arch/ia64/kernel/fsys.S @@ -460,9 +460,9 @@ EX(.fail_efault, ld8 r14=[r33]) // r14 <- *set ;; st8 [r2]=r14 // update current->blocked with new mask - cmpxchg4.acq r14=[r9],r18,ar.ccv // current->thread_info->flags <- r18 + cmpxchg4.acq r8=[r9],r18,ar.ccv // current->thread_info->flags <- r18 ;; - cmp.ne p6,p0=r17,r14 // update failed? + cmp.ne p6,p0=r17,r8 // update failed? (p6) br.cond.spnt.few 1b // yes -> retry #ifdef CONFIG_SMP diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 4d6c7b8f..736e328 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -1103,8 +1103,6 @@ ia64_mca_cpe_int_caller(int cpe_irq, void *arg, struct pt_regs *ptregs) return IRQ_HANDLED; } -#endif /* CONFIG_ACPI */ - /* * ia64_mca_cpe_poll * @@ -1122,6 +1120,8 @@ ia64_mca_cpe_poll (unsigned long dummy) platform_send_ipi(first_cpu(cpu_online_map), IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0); } +#endif /* CONFIG_ACPI */ + /* * C portion of the OS INIT handler * @@ -1390,8 +1390,7 @@ ia64_mca_init(void) register_percpu_irq(IA64_MCA_WAKEUP_VECTOR, &mca_wkup_irqaction); #ifdef CONFIG_ACPI - /* Setup the CPEI/P vector and handler */ - cpe_vector = acpi_request_vector(ACPI_INTERRUPT_CPEI); + /* Setup the CPEI/P handler */ register_percpu_irq(IA64_CPEP_VECTOR, &mca_cpep_irqaction); #endif @@ -1436,6 +1435,7 @@ ia64_mca_late_init(void) #ifdef CONFIG_ACPI /* Setup the CPEI/P vector and handler */ + cpe_vector = acpi_request_vector(ACPI_INTERRUPT_CPEI); init_timer(&cpe_poll_timer); cpe_poll_timer.function = ia64_mca_cpe_poll; diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c index ab47817..abc0113 100644 --- a/arch/ia64/kernel/mca_drv.c +++ b/arch/ia64/kernel/mca_drv.c @@ -132,8 +132,7 @@ mca_handler_bh(unsigned long paddr) spin_unlock(&mca_bh_lock); /* This process is about to be killed itself */ - force_sig(SIGKILL, current); - schedule(); + do_exit(SIGKILL); } /** @@ -439,6 +438,7 @@ recover_from_read_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_chec psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr; psr2->cpl = 0; psr2->ri = 0; + psr2->i = 0; return 1; } diff --git a/arch/ia64/kernel/mca_drv_asm.S b/arch/ia64/kernel/mca_drv_asm.S index bcfa05a..2d7e021 100644 --- a/arch/ia64/kernel/mca_drv_asm.S +++ b/arch/ia64/kernel/mca_drv_asm.S @@ -10,6 +10,7 @@ #include <asm/asmmacro.h> #include <asm/processor.h> +#include <asm/ptrace.h> GLOBAL_ENTRY(mca_handler_bhhook) invala // clear RSE ? @@ -20,12 +21,21 @@ GLOBAL_ENTRY(mca_handler_bhhook) ;; alloc r16=ar.pfs,0,2,1,0 // make a new frame ;; + mov ar.rsc=0 + ;; mov r13=IA64_KR(CURRENT) // current task pointer ;; - adds r12=IA64_TASK_THREAD_KSP_OFFSET,r13 + mov r2=r13 + ;; + addl r22=IA64_RBS_OFFSET,r2 + ;; + mov ar.bspstore=r22 ;; - ld8 r12=[r12] // stack pointer + addl sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 ;; + adds r2=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 + ;; + st1 [r2]=r0 // clear current->thread.on_ustack flag mov loc0=r16 movl loc1=mca_handler_bh // recovery C function ;; @@ -34,7 +44,9 @@ GLOBAL_ENTRY(mca_handler_bhhook) ;; mov loc1=rp ;; - br.call.sptk.many rp=b6 // not return ... + ssm psr.i + ;; + br.call.sptk.many rp=b6 // does not return ... ;; mov ar.pfs=loc0 mov rp=loc1 diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h index 1dbc7b2..f6d8a01 100644 --- a/arch/ia64/kernel/minstate.h +++ b/arch/ia64/kernel/minstate.h @@ -41,7 +41,7 @@ (pKStk) addl r3=THIS_CPU(ia64_mca_data),r3;; \ (pKStk) ld8 r3 = [r3];; \ (pKStk) addl r3=IA64_MCA_CPU_INIT_STACK_OFFSET,r3;; \ -(pKStk) addl sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r3; \ +(pKStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r3; \ (pUStk) mov ar.rsc=0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \ (pUStk) addl r22=IA64_RBS_OFFSET,r1; /* compute base of register backing store */ \ ;; \ @@ -50,7 +50,6 @@ (pUStk) mov r23=ar.bspstore; /* save ar.bspstore */ \ (pUStk) dep r22=-1,r22,61,3; /* compute kernel virtual addr of RBS */ \ ;; \ -(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \ (pUStk) mov ar.bspstore=r22; /* switch to kernel RBS */ \ ;; \ (pUStk) mov r18=ar.bsp; \ diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c index febc091..f1aca7c 100644 --- a/arch/ia64/kernel/module.c +++ b/arch/ia64/kernel/module.c @@ -825,14 +825,16 @@ apply_relocate_add (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symind * XXX Should have an arch-hook for running this after final section * addresses have been selected... */ - /* See if gp can cover the entire core module: */ - uint64_t gp = (uint64_t) mod->module_core + MAX_LTOFF / 2; - if (mod->core_size >= MAX_LTOFF) + uint64_t gp; + if (mod->core_size > MAX_LTOFF) /* * This takes advantage of fact that SHF_ARCH_SMALL gets allocated * at the end of the module. */ - gp = (uint64_t) mod->module_core + mod->core_size - MAX_LTOFF / 2; + gp = mod->core_size - MAX_LTOFF / 2; + else + gp = mod->core_size / 2; + gp = (uint64_t) mod->module_core + ((gp + 7) & -8); mod->arch.gp = gp; DEBUGP("%s: placing gp at 0x%lx\n", __FUNCTION__, gp); } diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 376fcbc..6407bff 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -11,7 +11,7 @@ * Version Perfmon-2.x is a rewrite of perfmon-1.x * by Stephane Eranian, Hewlett Packard Co. * - * Copyright (C) 1999-2003, 2005 Hewlett Packard Co + * Copyright (C) 1999-2005 Hewlett Packard Co * Stephane Eranian <eranian@hpl.hp.com> * David Mosberger-Tang <davidm@hpl.hp.com> * @@ -497,6 +497,9 @@ typedef struct { static pfm_stats_t pfm_stats[NR_CPUS]; static pfm_session_t pfm_sessions; /* global sessions information */ +static spinlock_t pfm_alt_install_check = SPIN_LOCK_UNLOCKED; +static pfm_intr_handler_desc_t *pfm_alt_intr_handler; + static struct proc_dir_entry *perfmon_dir; static pfm_uuid_t pfm_null_uuid = {0,}; @@ -606,6 +609,7 @@ DEFINE_PER_CPU(unsigned long, pfm_syst_info); DEFINE_PER_CPU(struct task_struct *, pmu_owner); DEFINE_PER_CPU(pfm_context_t *, pmu_ctx); DEFINE_PER_CPU(unsigned long, pmu_activation_number); +EXPORT_PER_CPU_SYMBOL_GPL(pfm_syst_info); /* forward declaration */ @@ -1265,6 +1269,8 @@ out: } EXPORT_SYMBOL(pfm_unregister_buffer_fmt); +extern void update_pal_halt_status(int); + static int pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu) { @@ -1311,6 +1317,11 @@ pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu) is_syswide, cpu)); + /* + * disable default_idle() to go to PAL_HALT + */ + update_pal_halt_status(0); + UNLOCK_PFS(flags); return 0; @@ -1318,7 +1329,7 @@ pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu) error_conflict: DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n", pfm_sessions.pfs_sys_session[cpu]->pid, - smp_processor_id())); + cpu)); abort: UNLOCK_PFS(flags); @@ -1366,6 +1377,12 @@ pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu) is_syswide, cpu)); + /* + * if possible, enable default_idle() to go into PAL_HALT + */ + if (pfm_sessions.pfs_task_sessions == 0 && pfm_sessions.pfs_sys_sessions == 0) + update_pal_halt_status(1); + UNLOCK_PFS(flags); return 0; @@ -4202,7 +4219,7 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) DPRINT(("cannot load to [%d], invalid ctx_state=%d\n", req->load_pid, ctx->ctx_state)); - return -EINVAL; + return -EBUSY; } DPRINT(("load_pid [%d] using_dbreg=%d\n", req->load_pid, ctx->ctx_fl_using_dbreg)); @@ -4704,16 +4721,26 @@ recheck: if (task == current || ctx->ctx_fl_system) return 0; /* - * if context is UNLOADED we are safe to go + * we are monitoring another thread */ - if (state == PFM_CTX_UNLOADED) return 0; - - /* - * no command can operate on a zombie context - */ - if (state == PFM_CTX_ZOMBIE) { - DPRINT(("cmd %d state zombie cannot operate on context\n", cmd)); - return -EINVAL; + switch(state) { + case PFM_CTX_UNLOADED: + /* + * if context is UNLOADED we are safe to go + */ + return 0; + case PFM_CTX_ZOMBIE: + /* + * no command can operate on a zombie context + */ + DPRINT(("cmd %d state zombie cannot operate on context\n", cmd)); + return -EINVAL; + case PFM_CTX_MASKED: + /* + * PMU state has been saved to software even though + * the thread may still be running. + */ + if (cmd != PFM_UNLOAD_CONTEXT) return 0; } /* @@ -5532,26 +5559,32 @@ pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs) int ret; this_cpu = get_cpu(); - min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min; - max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max; + if (likely(!pfm_alt_intr_handler)) { + min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min; + max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max; - start_cycles = ia64_get_itc(); + start_cycles = ia64_get_itc(); - ret = pfm_do_interrupt_handler(irq, arg, regs); + ret = pfm_do_interrupt_handler(irq, arg, regs); - total_cycles = ia64_get_itc(); + total_cycles = ia64_get_itc(); - /* - * don't measure spurious interrupts - */ - if (likely(ret == 0)) { - total_cycles -= start_cycles; + /* + * don't measure spurious interrupts + */ + if (likely(ret == 0)) { + total_cycles -= start_cycles; - if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles; - if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles; + if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles; + if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles; - pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles; + pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles; + } } + else { + (*pfm_alt_intr_handler->handler)(irq, arg, regs); + } + put_cpu_no_resched(); return IRQ_HANDLED; } @@ -6402,6 +6435,141 @@ static struct irqaction perfmon_irqaction = { .name = "perfmon" }; +static void +pfm_alt_save_pmu_state(void *data) +{ + struct pt_regs *regs; + + regs = ia64_task_regs(current); + + DPRINT(("called\n")); + + /* + * should not be necessary but + * let's take not risk + */ + pfm_clear_psr_up(); + pfm_clear_psr_pp(); + ia64_psr(regs)->pp = 0; + + /* + * This call is required + * May cause a spurious interrupt on some processors + */ + pfm_freeze_pmu(); + + ia64_srlz_d(); +} + +void +pfm_alt_restore_pmu_state(void *data) +{ + struct pt_regs *regs; + + regs = ia64_task_regs(current); + + DPRINT(("called\n")); + + /* + * put PMU back in state expected + * by perfmon + */ + pfm_clear_psr_up(); + pfm_clear_psr_pp(); + ia64_psr(regs)->pp = 0; + + /* + * perfmon runs with PMU unfrozen at all times + */ + pfm_unfreeze_pmu(); + + ia64_srlz_d(); +} + +int +pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) +{ + int ret, i; + int reserve_cpu; + + /* some sanity checks */ + if (hdl == NULL || hdl->handler == NULL) return -EINVAL; + + /* do the easy test first */ + if (pfm_alt_intr_handler) return -EBUSY; + + /* one at a time in the install or remove, just fail the others */ + if (!spin_trylock(&pfm_alt_install_check)) { + return -EBUSY; + } + + /* reserve our session */ + for_each_online_cpu(reserve_cpu) { + ret = pfm_reserve_session(NULL, 1, reserve_cpu); + if (ret) goto cleanup_reserve; + } + + /* save the current system wide pmu states */ + ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 0, 1); + if (ret) { + DPRINT(("on_each_cpu() failed: %d\n", ret)); + goto cleanup_reserve; + } + + /* officially change to the alternate interrupt handler */ + pfm_alt_intr_handler = hdl; + + spin_unlock(&pfm_alt_install_check); + + return 0; + +cleanup_reserve: + for_each_online_cpu(i) { + /* don't unreserve more than we reserved */ + if (i >= reserve_cpu) break; + + pfm_unreserve_session(NULL, 1, i); + } + + spin_unlock(&pfm_alt_install_check); + + return ret; +} +EXPORT_SYMBOL_GPL(pfm_install_alt_pmu_interrupt); + +int +pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) +{ + int i; + int ret; + + if (hdl == NULL) return -EINVAL; + + /* cannot remove someone else's handler! */ + if (pfm_alt_intr_handler != hdl) return -EINVAL; + + /* one at a time in the install or remove, just fail the others */ + if (!spin_trylock(&pfm_alt_install_check)) { + return -EBUSY; + } + + pfm_alt_intr_handler = NULL; + + ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 0, 1); + if (ret) { + DPRINT(("on_each_cpu() failed: %d\n", ret)); + } + + for_each_online_cpu(i) { + pfm_unreserve_session(NULL, 1, i); + } + + spin_unlock(&pfm_alt_install_check); + + return 0; +} +EXPORT_SYMBOL_GPL(pfm_remove_alt_pmu_interrupt); + /* * perfmon initialization routine, called from the initcall() table */ diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 7c43aea..ebb71f3 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -50,7 +50,7 @@ #include "sigframe.h" void (*ia64_mark_idle)(int); -static cpumask_t cpu_idle_map; +static DEFINE_PER_CPU(unsigned int, cpu_idle_state); unsigned long boot_option_idle_override = 0; EXPORT_SYMBOL(boot_option_idle_override); @@ -173,7 +173,9 @@ do_notify_resume_user (sigset_t *oldset, struct sigscratch *scr, long in_syscall ia64_do_signal(oldset, scr, in_syscall); } -static int pal_halt = 1; +static int pal_halt = 1; +static int can_do_pal_halt = 1; + static int __init nohalt_setup(char * str) { pal_halt = 0; @@ -181,16 +183,20 @@ static int __init nohalt_setup(char * str) } __setup("nohalt", nohalt_setup); +void +update_pal_halt_status(int status) +{ + can_do_pal_halt = pal_halt && status; +} + /* * We use this if we don't have any better idle routine.. */ void default_idle (void) { - unsigned long pmu_active = ia64_getreg(_IA64_REG_PSR) & (IA64_PSR_PP | IA64_PSR_UP); - while (!need_resched()) - if (pal_halt && !pmu_active) + if (can_do_pal_halt) safe_halt(); else cpu_relax(); @@ -223,20 +229,31 @@ static inline void play_dead(void) } #endif /* CONFIG_HOTPLUG_CPU */ - void cpu_idle_wait(void) { - int cpu; - cpumask_t map; + unsigned int cpu, this_cpu = get_cpu(); + cpumask_t map; - for_each_online_cpu(cpu) - cpu_set(cpu, cpu_idle_map); + set_cpus_allowed(current, cpumask_of_cpu(this_cpu)); + put_cpu(); - wmb(); - do { - ssleep(1); - cpus_and(map, cpu_idle_map, cpu_online_map); - } while (!cpus_empty(map)); + cpus_clear(map); + for_each_online_cpu(cpu) { + per_cpu(cpu_idle_state, cpu) = 1; + cpu_set(cpu, map); + } + + __get_cpu_var(cpu_idle_state) = 0; + + wmb(); + do { + ssleep(1); + for_each_online_cpu(cpu) { + if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu)) + cpu_clear(cpu, map); + } + cpus_and(map, map, cpu_online_map); + } while (!cpus_empty(map)); } EXPORT_SYMBOL_GPL(cpu_idle_wait); @@ -244,7 +261,6 @@ void __attribute__((noreturn)) cpu_idle (void) { void (*mark_idle)(int) = ia64_mark_idle; - int cpu = smp_processor_id(); /* endless idle loop with no priority at all */ while (1) { @@ -255,12 +271,13 @@ cpu_idle (void) while (!need_resched()) { void (*idle)(void); + if (__get_cpu_var(cpu_idle_state)) + __get_cpu_var(cpu_idle_state) = 0; + + rmb(); if (mark_idle) (*mark_idle)(1); - if (cpu_isset(cpu, cpu_idle_map)) - cpu_clear(cpu, cpu_idle_map); - rmb(); idle = pm_idle; if (!idle) idle = default_idle; diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 9e730c7..4c1d2f5 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -635,11 +635,17 @@ ia64_flush_fph (struct task_struct *task) { struct ia64_psr *psr = ia64_psr(ia64_task_regs(task)); + /* + * Prevent migrating this task while + * we're fiddling with the FPU state + */ + preempt_disable(); if (ia64_is_local_fpu_owner(task) && psr->mfh) { psr->mfh = 0; task->thread.flags |= IA64_THREAD_FPH_VALID; ia64_save_fpu(&task->thread.fph[0]); } + preempt_enable(); } /* @@ -692,16 +698,30 @@ convert_to_non_syscall (struct task_struct *child, struct pt_regs *pt, unsigned long cfm) { struct unw_frame_info info, prev_info; - unsigned long ip, pr; + unsigned long ip, sp, pr; unw_init_from_blocked_task(&info, child); while (1) { prev_info = info; if (unw_unwind(&info) < 0) return; - if (unw_get_rp(&info, &ip) < 0) + + unw_get_sp(&info, &sp); + if ((long)((unsigned long)child + IA64_STK_OFFSET - sp) + < IA64_PT_REGS_SIZE) { + dprintk("ptrace.%s: ran off the top of the kernel " + "stack\n", __FUNCTION__); + return; + } + if (unw_get_pr (&prev_info, &pr) < 0) { + unw_get_rp(&prev_info, &ip); + dprintk("ptrace.%s: failed to read " + "predicate register (ip=0x%lx)\n", + __FUNCTION__, ip); return; - if (ip < FIXADDR_USER_END) + } + if (unw_is_intr_frame(&info) + && (pr & (1UL << PRED_USER_STACK))) break; } @@ -1616,20 +1636,25 @@ syscall_trace_enter (long arg0, long arg1, long arg2, long arg3, long arg4, long arg5, long arg6, long arg7, struct pt_regs regs) { - long syscall; + if (test_thread_flag(TIF_SYSCALL_TRACE) + && (current->ptrace & PT_PTRACED)) + syscall_trace(); if (unlikely(current->audit_context)) { - if (IS_IA32_PROCESS(®s)) + long syscall; + int arch; + + if (IS_IA32_PROCESS(®s)) { syscall = regs.r1; - else + arch = AUDIT_ARCH_I386; + } else { syscall = regs.r15; + arch = AUDIT_ARCH_IA64; + } - audit_syscall_entry(current, syscall, arg0, arg1, arg2, arg3); + audit_syscall_entry(current, arch, syscall, arg0, arg1, arg2, arg3); } - if (test_thread_flag(TIF_SYSCALL_TRACE) - && (current->ptrace & PT_PTRACED)) - syscall_trace(); } /* "asmlinkage" so the input arguments are preserved... */ @@ -1640,7 +1665,7 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3, struct pt_regs regs) { if (unlikely(current->audit_context)) - audit_syscall_exit(current, regs.r8); + audit_syscall_exit(current, AUDITSC_RESULT(regs.r10), regs.r8); if (test_thread_flag(TIF_SYSCALL_TRACE) && (current->ptrace & PT_PTRACED)) diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index b7e6b4c..d14692e 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -720,7 +720,8 @@ cpu_init (void) ia64_set_kr(IA64_KR_PT_BASE, __pa(ia64_imva(empty_zero_page))); /* - * Initialize default control register to defer all speculative faults. The + * Initialize default control register to defer speculative faults except + * for those arising from TLB misses, which are not deferred. The * kernel MUST NOT depend on a particular setting of these bits (in other words, * the kernel must have recovery code for all speculative accesses). Turn on * dcr.lc as per recommendation by the architecture team. Most IA-32 apps diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index 6891d86..499b7e5 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -224,7 +224,8 @@ ia64_rt_sigreturn (struct sigscratch *scr) * could be corrupted. */ retval = (long) &ia64_leave_kernel; - if (test_thread_flag(TIF_SYSCALL_TRACE)) + if (test_thread_flag(TIF_SYSCALL_TRACE) + || test_thread_flag(TIF_SYSCALL_AUDIT)) /* * strace expects to be notified after sigreturn returns even though the * context to which we return may not be in the middle of a syscall. diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 0d5ee57..3865f08 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -624,7 +624,7 @@ static struct { __u16 thread_id; __u16 proc_fixed_addr; __u8 valid; -}mt_info[NR_CPUS] __devinit; +} mt_info[NR_CPUS] __devinitdata; #ifdef CONFIG_HOTPLUG_CPU static inline void diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c index a8cf6d8..770fab3 100644 --- a/arch/ia64/kernel/sys_ia64.c +++ b/arch/ia64/kernel/sys_ia64.c @@ -182,13 +182,6 @@ do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, un } } - /* - * A zero mmap always succeeds in Linux, independent of whether or not the - * remaining arguments are valid. - */ - if (len == 0) - goto out; - /* Careful about overflows.. */ len = PAGE_ALIGN(len); if (!len || len > TASK_SIZE) { diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index e82ad78..1861173 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -111,6 +111,24 @@ ia64_bad_break (unsigned long break_num, struct pt_regs *regs) siginfo_t siginfo; int sig, code; + /* break.b always sets cr.iim to 0, which causes problems for + * debuggers. Get the real break number from the original instruction, + * but only for kernel code. User space break.b is left alone, to + * preserve the existing behaviour. All break codings have the same + * format, so there is no need to check the slot type. + */ + if (break_num == 0 && !user_mode(regs)) { + struct ia64_psr *ipsr = ia64_psr(regs); + unsigned long *bundle = (unsigned long *)regs->cr_iip; + unsigned long slot; + switch (ipsr->ri) { + case 0: slot = (bundle[0] >> 5); break; + case 1: slot = (bundle[0] >> 46) | (bundle[1] << 18); break; + default: slot = (bundle[1] >> 23); break; + } + break_num = ((slot >> 36 & 1) << 20) | (slot >> 6 & 0xfffff); + } + /* SIGILL, SIGFPE, SIGSEGV, and SIGBUS want these field initialized: */ siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri); siginfo.si_imm = break_num; @@ -202,13 +220,21 @@ disabled_fph_fault (struct pt_regs *regs) /* first, grant user-level access to fph partition: */ psr->dfh = 0; + + /* + * Make sure that no other task gets in on this processor + * while we're claiming the FPU + */ + preempt_disable(); #ifndef CONFIG_SMP { struct task_struct *fpu_owner = (struct task_struct *)ia64_get_kr(IA64_KR_FPU_OWNER); - if (ia64_is_local_fpu_owner(current)) + if (ia64_is_local_fpu_owner(current)) { + preempt_enable_no_resched(); return; + } if (fpu_owner) ia64_flush_fph(fpu_owner); @@ -226,6 +252,7 @@ disabled_fph_fault (struct pt_regs *regs) */ psr->mfh = 1; } + preempt_enable_no_resched(); } static inline int diff --git a/arch/ia64/lib/flush.S b/arch/ia64/lib/flush.S index 29c802b..a1af914 100644 --- a/arch/ia64/lib/flush.S +++ b/arch/ia64/lib/flush.S @@ -1,8 +1,8 @@ /* * Cache flushing routines. * - * Copyright (C) 1999-2001 Hewlett-Packard Co - * Copyright (C) 1999-2001 David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1999-2001, 2005 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> */ #include <asm/asmmacro.h> #include <asm/page.h> @@ -26,7 +26,7 @@ GLOBAL_ENTRY(flush_icache_range) mov ar.lc=r8 ;; -.Loop: fc in0 // issuable on M0 only +.Loop: fc.i in0 // issuable on M2 only add in0=32,in0 br.cloop.sptk.few .Loop ;; diff --git a/arch/ia64/lib/memcpy_mck.S b/arch/ia64/lib/memcpy_mck.S index 3c2cd2f..6f308e6 100644 --- a/arch/ia64/lib/memcpy_mck.S +++ b/arch/ia64/lib/memcpy_mck.S @@ -75,6 +75,7 @@ GLOBAL_ENTRY(memcpy) mov f6=f0 br.cond.sptk .common_code ;; +END(memcpy) GLOBAL_ENTRY(__copy_user) .prologue // check dest alignment @@ -524,7 +525,6 @@ EK(.ex_handler, (p17) st8 [dst1]=r39,8); \ #undef B #undef C #undef D -END(memcpy) /* * Due to lack of local tag support in gcc 2.x assembler, it is not clear which diff --git a/arch/ia64/lib/memset.S b/arch/ia64/lib/memset.S index bd8cf90..f26c16a 100644 --- a/arch/ia64/lib/memset.S +++ b/arch/ia64/lib/memset.S @@ -57,10 +57,10 @@ GLOBAL_ENTRY(memset) { .mmi .prologue alloc tmp = ar.pfs, 3, 0, 0, 0 - .body lfetch.nt1 [dest] // .save ar.lc, save_lc mov.i save_lc = ar.lc + .body } { .mmi mov ret0 = dest // return value cmp.ne p_nz, p_zr = value, r0 // use stf.spill if value is zero diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 547785e..4eb2f52 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -305,8 +305,9 @@ setup_gate (void) struct page *page; /* - * Map the gate page twice: once read-only to export the ELF headers etc. and once - * execute-only page to enable privilege-promotion via "epc": + * Map the gate page twice: once read-only to export the ELF + * headers etc. and once execute-only page to enable + * privilege-promotion via "epc": */ page = virt_to_page(ia64_imva(__start_gate_section)); put_kernel_page(page, GATE_ADDR, PAGE_READONLY); @@ -315,6 +316,20 @@ setup_gate (void) put_kernel_page(page, GATE_ADDR + PAGE_SIZE, PAGE_GATE); #else put_kernel_page(page, GATE_ADDR + PERCPU_PAGE_SIZE, PAGE_GATE); + /* Fill in the holes (if any) with read-only zero pages: */ + { + unsigned long addr; + + for (addr = GATE_ADDR + PAGE_SIZE; + addr < GATE_ADDR + PERCPU_PAGE_SIZE; + addr += PAGE_SIZE) + { + put_kernel_page(ZERO_PAGE(0), addr, + PAGE_READONLY); + put_kernel_page(ZERO_PAGE(0), addr + PERCPU_PAGE_SIZE, + PAGE_READONLY); + } + } #endif ia64_patch_gate(); } diff --git a/arch/ia64/sn/kernel/Makefile b/arch/ia64/sn/kernel/Makefile index 4f381fb..4351c4f 100644 --- a/arch/ia64/sn/kernel/Makefile +++ b/arch/ia64/sn/kernel/Makefile @@ -4,10 +4,15 @@ # License. See the file "COPYING" in the main directory of this archive # for more details. # -# Copyright (C) 1999,2001-2003 Silicon Graphics, Inc. All Rights Reserved. +# Copyright (C) 1999,2001-2005 Silicon Graphics, Inc. All Rights Reserved. # obj-y += setup.o bte.o bte_error.o irq.o mca.o idle.o \ huberror.o io_init.o iomv.o klconflib.o sn2/ obj-$(CONFIG_IA64_GENERIC) += machvec.o obj-$(CONFIG_SGI_TIOCX) += tiocx.o +obj-$(CONFIG_IA64_SGI_SN_XP) += xp.o +xp-y := xp_main.o xp_nofault.o +obj-$(CONFIG_IA64_SGI_SN_XP) += xpc.o +xpc-y := xpc_main.o xpc_channel.o xpc_partition.o +obj-$(CONFIG_IA64_SGI_SN_XP) += xpnet.o diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c index 18160a0..9e07f54 100644 --- a/arch/ia64/sn/kernel/io_init.c +++ b/arch/ia64/sn/kernel/io_init.c @@ -174,6 +174,12 @@ static void sn_fixup_ionodes(void) if (status) continue; + /* Attach the error interrupt handlers */ + if (nasid & 1) + ice_error_init(hubdev); + else + hub_error_init(hubdev); + for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) hubdev->hdi_xwidget_info[widget].xwi_hubinfo = hubdev; @@ -211,10 +217,6 @@ static void sn_fixup_ionodes(void) sn_flush_device_list; } - if (!(i & 1)) - hub_error_init(hubdev); - else - ice_error_init(hubdev); } } diff --git a/arch/ia64/sn/kernel/mca.c b/arch/ia64/sn/kernel/mca.c index 857774b..6546db6 100644 --- a/arch/ia64/sn/kernel/mca.c +++ b/arch/ia64/sn/kernel/mca.c @@ -37,6 +37,11 @@ static u64 *sn_oemdata_size, sn_oemdata_bufsize; * This function is the callback routine that SAL calls to log error * info for platform errors. buf is appended to sn_oemdata, resizing as * required. + * Note: this is a SAL to OS callback, running under the same rules as the SAL + * code. SAL calls are run with preempt disabled so this routine must not + * sleep. vmalloc can sleep so print_hook cannot resize the output buffer + * itself, instead it must set the required size and return to let the caller + * resize the buffer then redrive the SAL call. */ static int print_hook(const char *fmt, ...) { @@ -47,18 +52,8 @@ static int print_hook(const char *fmt, ...) vsnprintf(buf, sizeof(buf), fmt, args); va_end(args); len = strlen(buf); - while (*sn_oemdata_size + len + 1 > sn_oemdata_bufsize) { - u8 *newbuf = vmalloc(sn_oemdata_bufsize += 1000); - if (!newbuf) { - printk(KERN_ERR "%s: unable to extend sn_oemdata\n", - __FUNCTION__); - return 0; - } - memcpy(newbuf, *sn_oemdata, *sn_oemdata_size); - vfree(*sn_oemdata); - *sn_oemdata = newbuf; - } - memcpy(*sn_oemdata + *sn_oemdata_size, buf, len + 1); + if (*sn_oemdata_size + len <= sn_oemdata_bufsize) + memcpy(*sn_oemdata + *sn_oemdata_size, buf, len); *sn_oemdata_size += len; return 0; } @@ -98,7 +93,20 @@ sn_platform_plat_specific_err_print(const u8 * sect_header, u8 ** oemdata, sn_oemdata = oemdata; sn_oemdata_size = oemdata_size; sn_oemdata_bufsize = 0; - ia64_sn_plat_specific_err_print(print_hook, (char *)sect_header); + *sn_oemdata_size = PAGE_SIZE; /* first guess at how much data will be generated */ + while (*sn_oemdata_size > sn_oemdata_bufsize) { + u8 *newbuf = vmalloc(*sn_oemdata_size); + if (!newbuf) { + printk(KERN_ERR "%s: unable to extend sn_oemdata\n", + __FUNCTION__); + return 1; + } + vfree(*sn_oemdata); + *sn_oemdata = newbuf; + sn_oemdata_bufsize = *sn_oemdata_size; + *sn_oemdata_size = 0; + ia64_sn_plat_specific_err_print(print_hook, (char *)sect_header); + } up(&sn_oemdata_mutex); return 0; } diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index d35f2a6..44bfc7f 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 1999,2001-2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 1999,2001-2005 Silicon Graphics, Inc. All rights reserved. */ #include <linux/config.h> @@ -73,6 +73,12 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second); DEFINE_PER_CPU(struct sn_hub_info_s, __sn_hub_info); EXPORT_PER_CPU_SYMBOL(__sn_hub_info); +DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_NUMNODES]); +EXPORT_PER_CPU_SYMBOL(__sn_cnodeid_to_nasid); + +DEFINE_PER_CPU(struct nodepda_s *, __sn_nodepda); +EXPORT_PER_CPU_SYMBOL(__sn_nodepda); + partid_t sn_partid = -1; EXPORT_SYMBOL(sn_partid); char sn_system_serial_number_string[128]; @@ -216,7 +222,7 @@ void __init early_sn_setup(void) extern int platform_intr_list[]; extern nasid_t master_nasid; -static int shub_1_1_found __initdata; +static int __initdata shub_1_1_found = 0; /* * sn_check_for_wars @@ -245,7 +251,7 @@ static void __init sn_check_for_wars(void) } else { for_each_online_node(cnode) { if (is_shub_1_1(cnodeid_to_nasid(cnode))) - sn_hub_info->shub_1_1_found = 1; + shub_1_1_found = 1; } } } @@ -265,6 +271,8 @@ void __init sn_setup(char **cmdline_p) int major = sn_sal_rev_major(), minor = sn_sal_rev_minor(); extern void sn_cpu_init(void); + ia64_sn_plat_set_error_handling_features(); + /* * If the generic code has enabled vga console support - lets * get rid of it again. This is a kludge for the fact that ACPI @@ -373,11 +381,11 @@ static void __init sn_init_pdas(char **cmdline_p) { cnodeid_t cnode; - memset(pda->cnodeid_to_nasid_table, -1, - sizeof(pda->cnodeid_to_nasid_table)); + memset(sn_cnodeid_to_nasid, -1, + sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid))); for_each_online_node(cnode) - pda->cnodeid_to_nasid_table[cnode] = - pxm_to_nasid(nid_to_pxm_map[cnode]); + sn_cnodeid_to_nasid[cnode] = + pxm_to_nasid(nid_to_pxm_map[cnode]); numionodes = num_online_nodes(); scan_for_ionodes(); @@ -477,7 +485,8 @@ void __init sn_cpu_init(void) cnode = nasid_to_cnodeid(nasid); - pda->p_nodepda = nodepdaindr[cnode]; + sn_nodepda = nodepdaindr[cnode]; + pda->led_address = (typeof(pda->led_address)) (LED0 + (slice << LED_CPU_SHIFT)); pda->led_state = LED_ALWAYS_SET; @@ -486,15 +495,18 @@ void __init sn_cpu_init(void) pda->idle_flag = 0; if (cpuid != 0) { - memcpy(pda->cnodeid_to_nasid_table, - pdacpu(0)->cnodeid_to_nasid_table, - sizeof(pda->cnodeid_to_nasid_table)); + /* copy cpu 0's sn_cnodeid_to_nasid table to this cpu's */ + memcpy(sn_cnodeid_to_nasid, + (&per_cpu(__sn_cnodeid_to_nasid, 0)), + sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid))); } /* * Check for WARs. * Only needs to be done once, on BSP. - * Has to be done after loop above, because it uses pda.cnodeid_to_nasid_table[i]. + * Has to be done after loop above, because it uses this cpu's + * sn_cnodeid_to_nasid table which was just initialized if this + * isn't cpu 0. * Has to be done before assignment below. */ if (!wars_have_been_checked) { @@ -580,8 +592,7 @@ static void __init scan_for_ionodes(void) brd = find_lboard_any(brd, KLTYPE_SNIA); while (brd) { - pda->cnodeid_to_nasid_table[numionodes] = - brd->brd_nasid; + sn_cnodeid_to_nasid[numionodes] = brd->brd_nasid; physical_node_map[brd->brd_nasid] = numionodes; root_lboard[numionodes] = brd; numionodes++; @@ -602,8 +613,7 @@ static void __init scan_for_ionodes(void) root_lboard[nasid_to_cnodeid(nasid)], KLTYPE_TIO); while (brd) { - pda->cnodeid_to_nasid_table[numionodes] = - brd->brd_nasid; + sn_cnodeid_to_nasid[numionodes] = brd->brd_nasid; physical_node_map[brd->brd_nasid] = numionodes; root_lboard[numionodes] = brd; numionodes++; @@ -614,7 +624,6 @@ static void __init scan_for_ionodes(void) brd = find_lboard_any(brd, KLTYPE_TIO); } } - } int @@ -623,7 +632,8 @@ nasid_slice_to_cpuid(int nasid, int slice) long cpu; for (cpu=0; cpu < NR_CPUS; cpu++) - if (nodepda->phys_cpuid[cpu].nasid == nasid && nodepda->phys_cpuid[cpu].slice == slice) + if (cpuid_to_nasid(cpu) == nasid && + cpuid_to_slice(cpu) == slice) return cpu; return -1; diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c index 66190d7..ab9b5f3 100644 --- a/arch/ia64/sn/kernel/tiocx.c +++ b/arch/ia64/sn/kernel/tiocx.c @@ -21,6 +21,8 @@ #include <asm/sn/types.h> #include <asm/sn/shubio.h> #include <asm/sn/tiocx.h> +#include <asm/sn/l1.h> +#include <asm/sn/module.h> #include "tio.h" #include "xtalk/xwidgetdev.h" #include "xtalk/hubdev.h" @@ -308,14 +310,12 @@ void tiocx_irq_free(struct sn_irq_info *sn_irq_info) } } -uint64_t -tiocx_dma_addr(uint64_t addr) +uint64_t tiocx_dma_addr(uint64_t addr) { return PHYS_TO_TIODMA(addr); } -uint64_t -tiocx_swin_base(int nasid) +uint64_t tiocx_swin_base(int nasid) { return TIO_SWIN_BASE(nasid, TIOCX_CORELET); } @@ -330,19 +330,6 @@ EXPORT_SYMBOL(tiocx_bus_type); EXPORT_SYMBOL(tiocx_dma_addr); EXPORT_SYMBOL(tiocx_swin_base); -static uint64_t tiocx_get_hubdev_info(u64 handle, u64 address) -{ - - struct ia64_sal_retval ret_stuff; - ret_stuff.status = 0; - ret_stuff.v0 = 0; - - ia64_sal_oemcall_nolock(&ret_stuff, - SN_SAL_IOIF_GET_HUBDEV_INFO, - handle, address, 0, 0, 0, 0, 0); - return ret_stuff.v0; -} - static void tio_conveyor_set(nasid_t nasid, int enable_flag) { uint64_t ice_frz; @@ -379,7 +366,29 @@ static void tio_corelet_reset(nasid_t nasid, int corelet) udelay(2000); } -static int fpga_attached(nasid_t nasid) +static int tiocx_btchar_get(int nasid) +{ + moduleid_t module_id; + geoid_t geoid; + int cnodeid; + + cnodeid = nasid_to_cnodeid(nasid); + geoid = cnodeid_get_geoid(cnodeid); + module_id = geo_module(geoid); + return MODULE_GET_BTCHAR(module_id); +} + +static int is_fpga_brick(int nasid) +{ + switch (tiocx_btchar_get(nasid)) { + case L1_BRICKTYPE_SA: + case L1_BRICKTYPE_ATHENA: + return 1; + } + return 0; +} + +static int bitstream_loaded(nasid_t nasid) { uint64_t cx_credits; @@ -396,7 +405,7 @@ static int tiocx_reload(struct cx_dev *cx_dev) int mfg_num = CX_DEV_NONE; nasid_t nasid = cx_dev->cx_id.nasid; - if (fpga_attached(nasid)) { + if (bitstream_loaded(nasid)) { uint64_t cx_id; cx_id = @@ -427,9 +436,10 @@ static ssize_t show_cxdev_control(struct device *dev, char *buf) { struct cx_dev *cx_dev = to_cx_dev(dev); - return sprintf(buf, "0x%x 0x%x 0x%x\n", + return sprintf(buf, "0x%x 0x%x 0x%x %d\n", cx_dev->cx_id.nasid, - cx_dev->cx_id.part_num, cx_dev->cx_id.mfg_num); + cx_dev->cx_id.part_num, cx_dev->cx_id.mfg_num, + tiocx_btchar_get(cx_dev->cx_id.nasid)); } static ssize_t store_cxdev_control(struct device *dev, const char *buf, @@ -475,20 +485,14 @@ static int __init tiocx_init(void) if ((nasid = cnodeid_to_nasid(cnodeid)) < 0) break; /* No more nasids .. bail out of loop */ - if (nasid & 0x1) { /* TIO's are always odd */ + if ((nasid & 0x1) && is_fpga_brick(nasid)) { struct hubdev_info *hubdev; - uint64_t status; struct xwidget_info *widgetp; DBG("Found TIO at nasid 0x%x\n", nasid); hubdev = (struct hubdev_info *)(NODEPDA(cnodeid)->pdinfo); - status = - tiocx_get_hubdev_info(nasid, - (uint64_t) __pa(hubdev)); - if (status) - continue; widgetp = &hubdev->hdi_xwidget_info[TIOCX_CORELET]; diff --git a/arch/ia64/sn/kernel/xp_main.c b/arch/ia64/sn/kernel/xp_main.c new file mode 100644 index 0000000..3be52a3 --- /dev/null +++ b/arch/ia64/sn/kernel/xp_main.c @@ -0,0 +1,289 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved. + */ + + +/* + * Cross Partition (XP) base. + * + * XP provides a base from which its users can interact + * with XPC, yet not be dependent on XPC. + * + */ + + +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <asm/sn/intr.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/xp.h> + + +/* + * Target of nofault PIO read. + */ +u64 xp_nofault_PIOR_target; + + +/* + * xpc_registrations[] keeps track of xpc_connect()'s done by the kernel-level + * users of XPC. + */ +struct xpc_registration xpc_registrations[XPC_NCHANNELS]; + + +/* + * Initialize the XPC interface to indicate that XPC isn't loaded. + */ +static enum xpc_retval xpc_notloaded(void) { return xpcNotLoaded; } + +struct xpc_interface xpc_interface = { + (void (*)(int)) xpc_notloaded, + (void (*)(int)) xpc_notloaded, + (enum xpc_retval (*)(partid_t, int, u32, void **)) xpc_notloaded, + (enum xpc_retval (*)(partid_t, int, void *)) xpc_notloaded, + (enum xpc_retval (*)(partid_t, int, void *, xpc_notify_func, void *)) + xpc_notloaded, + (void (*)(partid_t, int, void *)) xpc_notloaded, + (enum xpc_retval (*)(partid_t, void *)) xpc_notloaded +}; + + +/* + * XPC calls this when it (the XPC module) has been loaded. + */ +void +xpc_set_interface(void (*connect)(int), + void (*disconnect)(int), + enum xpc_retval (*allocate)(partid_t, int, u32, void **), + enum xpc_retval (*send)(partid_t, int, void *), + enum xpc_retval (*send_notify)(partid_t, int, void *, + xpc_notify_func, void *), + void (*received)(partid_t, int, void *), + enum xpc_retval (*partid_to_nasids)(partid_t, void *)) +{ + xpc_interface.connect = connect; + xpc_interface.disconnect = disconnect; + xpc_interface.allocate = allocate; + xpc_interface.send = send; + xpc_interface.send_notify = send_notify; + xpc_interface.received = received; + xpc_interface.partid_to_nasids = partid_to_nasids; +} + + +/* + * XPC calls this when it (the XPC module) is being unloaded. + */ +void +xpc_clear_interface(void) +{ + xpc_interface.connect = (void (*)(int)) xpc_notloaded; + xpc_interface.disconnect = (void (*)(int)) xpc_notloaded; + xpc_interface.allocate = (enum xpc_retval (*)(partid_t, int, u32, + void **)) xpc_notloaded; + xpc_interface.send = (enum xpc_retval (*)(partid_t, int, void *)) + xpc_notloaded; + xpc_interface.send_notify = (enum xpc_retval (*)(partid_t, int, void *, + xpc_notify_func, void *)) xpc_notloaded; + xpc_interface.received = (void (*)(partid_t, int, void *)) + xpc_notloaded; + xpc_interface.partid_to_nasids = (enum xpc_retval (*)(partid_t, void *)) + xpc_notloaded; +} + + +/* + * Register for automatic establishment of a channel connection whenever + * a partition comes up. + * + * Arguments: + * + * ch_number - channel # to register for connection. + * func - function to call for asynchronous notification of channel + * state changes (i.e., connection, disconnection, error) and + * the arrival of incoming messages. + * key - pointer to optional user-defined value that gets passed back + * to the user on any callouts made to func. + * payload_size - size in bytes of the XPC message's payload area which + * contains a user-defined message. The user should make + * this large enough to hold their largest message. + * nentries - max #of XPC message entries a message queue can contain. + * The actual number, which is determined when a connection + * is established and may be less then requested, will be + * passed to the user via the xpcConnected callout. + * assigned_limit - max number of kthreads allowed to be processing + * messages (per connection) at any given instant. + * idle_limit - max number of kthreads allowed to be idle at any given + * instant. + */ +enum xpc_retval +xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size, + u16 nentries, u32 assigned_limit, u32 idle_limit) +{ + struct xpc_registration *registration; + + + DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS); + DBUG_ON(payload_size == 0 || nentries == 0); + DBUG_ON(func == NULL); + DBUG_ON(assigned_limit == 0 || idle_limit > assigned_limit); + + registration = &xpc_registrations[ch_number]; + + if (down_interruptible(®istration->sema) != 0) { + return xpcInterrupted; + } + + /* if XPC_CHANNEL_REGISTERED(ch_number) */ + if (registration->func != NULL) { + up(®istration->sema); + return xpcAlreadyRegistered; + } + + /* register the channel for connection */ + registration->msg_size = XPC_MSG_SIZE(payload_size); + registration->nentries = nentries; + registration->assigned_limit = assigned_limit; + registration->idle_limit = idle_limit; + registration->key = key; + registration->func = func; + + up(®istration->sema); + + xpc_interface.connect(ch_number); + + return xpcSuccess; +} + + +/* + * Remove the registration for automatic connection of the specified channel + * when a partition comes up. + * + * Before returning this xpc_disconnect() will wait for all connections on the + * specified channel have been closed/torndown. So the caller can be assured + * that they will not be receiving any more callouts from XPC to their + * function registered via xpc_connect(). + * + * Arguments: + * + * ch_number - channel # to unregister. + */ +void +xpc_disconnect(int ch_number) +{ + struct xpc_registration *registration; + + + DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS); + + registration = &xpc_registrations[ch_number]; + + /* + * We've decided not to make this a down_interruptible(), since we + * figured XPC's users will just turn around and call xpc_disconnect() + * again anyways, so we might as well wait, if need be. + */ + down(®istration->sema); + + /* if !XPC_CHANNEL_REGISTERED(ch_number) */ + if (registration->func == NULL) { + up(®istration->sema); + return; + } + + /* remove the connection registration for the specified channel */ + registration->func = NULL; + registration->key = NULL; + registration->nentries = 0; + registration->msg_size = 0; + registration->assigned_limit = 0; + registration->idle_limit = 0; + + xpc_interface.disconnect(ch_number); + + up(®istration->sema); + + return; +} + + +int __init +xp_init(void) +{ + int ret, ch_number; + u64 func_addr = *(u64 *) xp_nofault_PIOR; + u64 err_func_addr = *(u64 *) xp_error_PIOR; + + + if (!ia64_platform_is("sn2")) { + return -ENODEV; + } + + /* + * Register a nofault code region which performs a cross-partition + * PIO read. If the PIO read times out, the MCA handler will consume + * the error and return to a kernel-provided instruction to indicate + * an error. This PIO read exists because it is guaranteed to timeout + * if the destination is down (AMO operations do not timeout on at + * least some CPUs on Shubs <= v1.2, which unfortunately we have to + * work around). + */ + if ((ret = sn_register_nofault_code(func_addr, err_func_addr, + err_func_addr, 1, 1)) != 0) { + printk(KERN_ERR "XP: can't register nofault code, error=%d\n", + ret); + } + /* + * Setup the nofault PIO read target. (There is no special reason why + * SH_IPI_ACCESS was selected.) + */ + if (is_shub2()) { + xp_nofault_PIOR_target = SH2_IPI_ACCESS0; + } else { + xp_nofault_PIOR_target = SH1_IPI_ACCESS; + } + + /* initialize the connection registration semaphores */ + for (ch_number = 0; ch_number < XPC_NCHANNELS; ch_number++) { + sema_init(&xpc_registrations[ch_number].sema, 1); /* mutex */ + } + + return 0; +} +module_init(xp_init); + + +void __exit +xp_exit(void) +{ + u64 func_addr = *(u64 *) xp_nofault_PIOR; + u64 err_func_addr = *(u64 *) xp_error_PIOR; + + + /* unregister the PIO read nofault code region */ + (void) sn_register_nofault_code(func_addr, err_func_addr, + err_func_addr, 1, 0); +} +module_exit(xp_exit); + + +MODULE_AUTHOR("Silicon Graphics, Inc."); +MODULE_DESCRIPTION("Cross Partition (XP) base"); +MODULE_LICENSE("GPL"); + +EXPORT_SYMBOL(xp_nofault_PIOR); +EXPORT_SYMBOL(xp_nofault_PIOR_target); +EXPORT_SYMBOL(xpc_registrations); +EXPORT_SYMBOL(xpc_interface); +EXPORT_SYMBOL(xpc_clear_interface); +EXPORT_SYMBOL(xpc_set_interface); +EXPORT_SYMBOL(xpc_connect); +EXPORT_SYMBOL(xpc_disconnect); + diff --git a/arch/ia64/sn/kernel/xp_nofault.S b/arch/ia64/sn/kernel/xp_nofault.S new file mode 100644 index 0000000..b772543 --- /dev/null +++ b/arch/ia64/sn/kernel/xp_nofault.S @@ -0,0 +1,31 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved. + */ + + +/* + * The xp_nofault_PIOR function takes a pointer to a remote PIO register + * and attempts to load and consume a value from it. This function + * will be registered as a nofault code block. In the event that the + * PIO read fails, the MCA handler will force the error to look + * corrected and vector to the xp_error_PIOR which will return an error. + * + * extern int xp_nofault_PIOR(void *remote_register); + */ + + .global xp_nofault_PIOR +xp_nofault_PIOR: + mov r8=r0 // Stage a success return value + ld8.acq r9=[r32];; // PIO Read the specified register + adds r9=1,r9 // Add to force a consume + br.ret.sptk.many b0;; // Return success + + .global xp_error_PIOR +xp_error_PIOR: + mov r8=1 // Return value of 1 + br.ret.sptk.many b0;; // Return failure + diff --git a/arch/ia64/sn/kernel/xpc.h b/arch/ia64/sn/kernel/xpc.h new file mode 100644 index 0000000..1a0aed8 --- /dev/null +++ b/arch/ia64/sn/kernel/xpc.h @@ -0,0 +1,991 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved. + */ + + +/* + * Cross Partition Communication (XPC) structures and macros. + */ + +#ifndef _IA64_SN_KERNEL_XPC_H +#define _IA64_SN_KERNEL_XPC_H + + +#include <linux/config.h> +#include <linux/interrupt.h> +#include <linux/sysctl.h> +#include <linux/device.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/sn/bte.h> +#include <asm/sn/clksupport.h> +#include <asm/sn/addrs.h> +#include <asm/sn/mspec.h> +#include <asm/sn/shub_mmr.h> +#include <asm/sn/xp.h> + + +/* + * XPC Version numbers consist of a major and minor number. XPC can always + * talk to versions with same major #, and never talk to versions with a + * different major #. + */ +#define _XPC_VERSION(_maj, _min) (((_maj) << 4) | ((_min) & 0xf)) +#define XPC_VERSION_MAJOR(_v) ((_v) >> 4) +#define XPC_VERSION_MINOR(_v) ((_v) & 0xf) + + +/* + * The next macros define word or bit representations for given + * C-brick nasid in either the SAL provided bit array representing + * nasids in the partition/machine or the AMO_t array used for + * inter-partition initiation communications. + * + * For SN2 machines, C-Bricks are alway even numbered NASIDs. As + * such, some space will be saved by insisting that nasid information + * passed from SAL always be packed for C-Bricks and the + * cross-partition interrupts use the same packing scheme. + */ +#define XPC_NASID_W_INDEX(_n) (((_n) / 64) / 2) +#define XPC_NASID_B_INDEX(_n) (((_n) / 2) & (64 - 1)) +#define XPC_NASID_IN_ARRAY(_n, _p) ((_p)[XPC_NASID_W_INDEX(_n)] & \ + (1UL << XPC_NASID_B_INDEX(_n))) +#define XPC_NASID_FROM_W_B(_w, _b) (((_w) * 64 + (_b)) * 2) + +#define XPC_HB_DEFAULT_INTERVAL 5 /* incr HB every x secs */ +#define XPC_HB_CHECK_DEFAULT_TIMEOUT 20 /* check HB every x secs */ + +/* define the process name of HB checker and the CPU it is pinned to */ +#define XPC_HB_CHECK_THREAD_NAME "xpc_hb" +#define XPC_HB_CHECK_CPU 0 + +/* define the process name of the discovery thread */ +#define XPC_DISCOVERY_THREAD_NAME "xpc_discovery" + + +#define XPC_HB_ALLOWED(_p, _v) ((_v)->heartbeating_to_mask & (1UL << (_p))) +#define XPC_ALLOW_HB(_p, _v) (_v)->heartbeating_to_mask |= (1UL << (_p)) +#define XPC_DISALLOW_HB(_p, _v) (_v)->heartbeating_to_mask &= (~(1UL << (_p))) + + +/* + * Reserved Page provided by SAL. + * + * SAL provides one page per partition of reserved memory. When SAL + * initialization is complete, SAL_signature, SAL_version, partid, + * part_nasids, and mach_nasids are set. + * + * Note: Until vars_pa is set, the partition XPC code has not been initialized. + */ +struct xpc_rsvd_page { + u64 SAL_signature; /* SAL unique signature */ + u64 SAL_version; /* SAL specified version */ + u8 partid; /* partition ID from SAL */ + u8 version; + u8 pad[6]; /* pad to u64 align */ + u64 vars_pa; + u64 part_nasids[XP_NASID_MASK_WORDS] ____cacheline_aligned; + u64 mach_nasids[XP_NASID_MASK_WORDS] ____cacheline_aligned; +}; +#define XPC_RP_VERSION _XPC_VERSION(1,0) /* version 1.0 of the reserved page */ + +#define XPC_RSVD_PAGE_ALIGNED_SIZE \ + (L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page))) + + +/* + * Define the structures by which XPC variables can be exported to other + * partitions. (There are two: struct xpc_vars and struct xpc_vars_part) + */ + +/* + * The following structure describes the partition generic variables + * needed by other partitions in order to properly initialize. + * + * struct xpc_vars version number also applies to struct xpc_vars_part. + * Changes to either structure and/or related functionality should be + * reflected by incrementing either the major or minor version numbers + * of struct xpc_vars. + */ +struct xpc_vars { + u8 version; + u64 heartbeat; + u64 heartbeating_to_mask; + u64 kdb_status; /* 0 = machine running */ + int act_nasid; + int act_phys_cpuid; + u64 vars_part_pa; + u64 amos_page_pa; /* paddr of page of AMOs from MSPEC driver */ + AMO_t *amos_page; /* vaddr of page of AMOs from MSPEC driver */ + AMO_t *act_amos; /* pointer to the first activation AMO */ +}; +#define XPC_V_VERSION _XPC_VERSION(3,0) /* version 3.0 of the cross vars */ + +#define XPC_VARS_ALIGNED_SIZE (L1_CACHE_ALIGN(sizeof(struct xpc_vars))) + +/* + * The following structure describes the per partition specific variables. + * + * An array of these structures, one per partition, will be defined. As a + * partition becomes active XPC will copy the array entry corresponding to + * itself from that partition. It is desirable that the size of this + * structure evenly divide into a cacheline, such that none of the entries + * in this array crosses a cacheline boundary. As it is now, each entry + * occupies half a cacheline. + */ +struct xpc_vars_part { + u64 magic; + + u64 openclose_args_pa; /* physical address of open and close args */ + u64 GPs_pa; /* physical address of Get/Put values */ + + u64 IPI_amo_pa; /* physical address of IPI AMO_t structure */ + int IPI_nasid; /* nasid of where to send IPIs */ + int IPI_phys_cpuid; /* physical CPU ID of where to send IPIs */ + + u8 nchannels; /* #of defined channels supported */ + + u8 reserved[23]; /* pad to a full 64 bytes */ +}; + +/* + * The vars_part MAGIC numbers play a part in the first contact protocol. + * + * MAGIC1 indicates that the per partition specific variables for a remote + * partition have been initialized by this partition. + * + * MAGIC2 indicates that this partition has pulled the remote partititions + * per partition variables that pertain to this partition. + */ +#define XPC_VP_MAGIC1 0x0053524156435058L /* 'XPCVARS\0'L (little endian) */ +#define XPC_VP_MAGIC2 0x0073726176435058L /* 'XPCvars\0'L (little endian) */ + + + +/* + * Functions registered by add_timer() or called by kernel_thread() only + * allow for a single 64-bit argument. The following macros can be used to + * pack and unpack two (32-bit, 16-bit or 8-bit) arguments into or out from + * the passed argument. + */ +#define XPC_PACK_ARGS(_arg1, _arg2) \ + ((((u64) _arg1) & 0xffffffff) | \ + ((((u64) _arg2) & 0xffffffff) << 32)) + +#define XPC_UNPACK_ARG1(_args) (((u64) _args) & 0xffffffff) +#define XPC_UNPACK_ARG2(_args) ((((u64) _args) >> 32) & 0xffffffff) + + + +/* + * Define a Get/Put value pair (pointers) used with a message queue. + */ +struct xpc_gp { + s64 get; /* Get value */ + s64 put; /* Put value */ +}; + +#define XPC_GP_SIZE \ + L1_CACHE_ALIGN(sizeof(struct xpc_gp) * XPC_NCHANNELS) + + + +/* + * Define a structure that contains arguments associated with opening and + * closing a channel. + */ +struct xpc_openclose_args { + u16 reason; /* reason why channel is closing */ + u16 msg_size; /* sizeof each message entry */ + u16 remote_nentries; /* #of message entries in remote msg queue */ + u16 local_nentries; /* #of message entries in local msg queue */ + u64 local_msgqueue_pa; /* physical address of local message queue */ +}; + +#define XPC_OPENCLOSE_ARGS_SIZE \ + L1_CACHE_ALIGN(sizeof(struct xpc_openclose_args) * XPC_NCHANNELS) + + + +/* struct xpc_msg flags */ + +#define XPC_M_DONE 0x01 /* msg has been received/consumed */ +#define XPC_M_READY 0x02 /* msg is ready to be sent */ +#define XPC_M_INTERRUPT 0x04 /* send interrupt when msg consumed */ + + +#define XPC_MSG_ADDRESS(_payload) \ + ((struct xpc_msg *)((u8 *)(_payload) - XPC_MSG_PAYLOAD_OFFSET)) + + + +/* + * Defines notify entry. + * + * This is used to notify a message's sender that their message was received + * and consumed by the intended recipient. + */ +struct xpc_notify { + struct semaphore sema; /* notify semaphore */ + u8 type; /* type of notification */ + + /* the following two fields are only used if type == XPC_N_CALL */ + xpc_notify_func func; /* user's notify function */ + void *key; /* pointer to user's key */ +}; + +/* struct xpc_notify type of notification */ + +#define XPC_N_CALL 0x01 /* notify function provided by user */ + + + +/* + * Define the structure that manages all the stuff required by a channel. In + * particular, they are used to manage the messages sent across the channel. + * + * This structure is private to a partition, and is NOT shared across the + * partition boundary. + * + * There is an array of these structures for each remote partition. It is + * allocated at the time a partition becomes active. The array contains one + * of these structures for each potential channel connection to that partition. + * + * Each of these structures manages two message queues (circular buffers). + * They are allocated at the time a channel connection is made. One of + * these message queues (local_msgqueue) holds the locally created messages + * that are destined for the remote partition. The other of these message + * queues (remote_msgqueue) is a locally cached copy of the remote partition's + * own local_msgqueue. + * + * The following is a description of the Get/Put pointers used to manage these + * two message queues. Consider the local_msgqueue to be on one partition + * and the remote_msgqueue to be its cached copy on another partition. A + * description of what each of the lettered areas contains is included. + * + * + * local_msgqueue remote_msgqueue + * + * |/////////| |/////////| + * w_remote_GP.get --> +---------+ |/////////| + * | F | |/////////| + * remote_GP.get --> +---------+ +---------+ <-- local_GP->get + * | | | | + * | | | E | + * | | | | + * | | +---------+ <-- w_local_GP.get + * | B | |/////////| + * | | |////D////| + * | | |/////////| + * | | +---------+ <-- w_remote_GP.put + * | | |////C////| + * local_GP->put --> +---------+ +---------+ <-- remote_GP.put + * | | |/////////| + * | A | |/////////| + * | | |/////////| + * w_local_GP.put --> +---------+ |/////////| + * |/////////| |/////////| + * + * + * ( remote_GP.[get|put] are cached copies of the remote + * partition's local_GP->[get|put], and thus their values can + * lag behind their counterparts on the remote partition. ) + * + * + * A - Messages that have been allocated, but have not yet been sent to the + * remote partition. + * + * B - Messages that have been sent, but have not yet been acknowledged by the + * remote partition as having been received. + * + * C - Area that needs to be prepared for the copying of sent messages, by + * the clearing of the message flags of any previously received messages. + * + * D - Area into which sent messages are to be copied from the remote + * partition's local_msgqueue and then delivered to their intended + * recipients. [ To allow for a multi-message copy, another pointer + * (next_msg_to_pull) has been added to keep track of the next message + * number needing to be copied (pulled). It chases after w_remote_GP.put. + * Any messages lying between w_local_GP.get and next_msg_to_pull have + * been copied and are ready to be delivered. ] + * + * E - Messages that have been copied and delivered, but have not yet been + * acknowledged by the recipient as having been received. + * + * F - Messages that have been acknowledged, but XPC has not yet notified the + * sender that the message was received by its intended recipient. + * This is also an area that needs to be prepared for the allocating of + * new messages, by the clearing of the message flags of the acknowledged + * messages. + */ +struct xpc_channel { + partid_t partid; /* ID of remote partition connected */ + spinlock_t lock; /* lock for updating this structure */ + u32 flags; /* general flags */ + + enum xpc_retval reason; /* reason why channel is disconnect'g */ + int reason_line; /* line# disconnect initiated from */ + + u16 number; /* channel # */ + + u16 msg_size; /* sizeof each msg entry */ + u16 local_nentries; /* #of msg entries in local msg queue */ + u16 remote_nentries; /* #of msg entries in remote msg queue*/ + + void *local_msgqueue_base; /* base address of kmalloc'd space */ + struct xpc_msg *local_msgqueue; /* local message queue */ + void *remote_msgqueue_base; /* base address of kmalloc'd space */ + struct xpc_msg *remote_msgqueue;/* cached copy of remote partition's */ + /* local message queue */ + u64 remote_msgqueue_pa; /* phys addr of remote partition's */ + /* local message queue */ + + atomic_t references; /* #of external references to queues */ + + atomic_t n_on_msg_allocate_wq; /* #on msg allocation wait queue */ + wait_queue_head_t msg_allocate_wq; /* msg allocation wait queue */ + + /* queue of msg senders who want to be notified when msg received */ + + atomic_t n_to_notify; /* #of msg senders to notify */ + struct xpc_notify *notify_queue;/* notify queue for messages sent */ + + xpc_channel_func func; /* user's channel function */ + void *key; /* pointer to user's key */ + + struct semaphore msg_to_pull_sema; /* next msg to pull serialization */ + struct semaphore teardown_sema; /* wait for teardown completion */ + + struct xpc_openclose_args *local_openclose_args; /* args passed on */ + /* opening or closing of channel */ + + /* various flavors of local and remote Get/Put values */ + + struct xpc_gp *local_GP; /* local Get/Put values */ + struct xpc_gp remote_GP; /* remote Get/Put values */ + struct xpc_gp w_local_GP; /* working local Get/Put values */ + struct xpc_gp w_remote_GP; /* working remote Get/Put values */ + s64 next_msg_to_pull; /* Put value of next msg to pull */ + + /* kthread management related fields */ + +// >>> rethink having kthreads_assigned_limit and kthreads_idle_limit; perhaps +// >>> allow the assigned limit be unbounded and let the idle limit be dynamic +// >>> dependent on activity over the last interval of time + atomic_t kthreads_assigned; /* #of kthreads assigned to channel */ + u32 kthreads_assigned_limit; /* limit on #of kthreads assigned */ + atomic_t kthreads_idle; /* #of kthreads idle waiting for work */ + u32 kthreads_idle_limit; /* limit on #of kthreads idle */ + atomic_t kthreads_active; /* #of kthreads actively working */ + // >>> following field is temporary + u32 kthreads_created; /* total #of kthreads created */ + + wait_queue_head_t idle_wq; /* idle kthread wait queue */ + +} ____cacheline_aligned; + + +/* struct xpc_channel flags */ + +#define XPC_C_WASCONNECTED 0x00000001 /* channel was connected */ + +#define XPC_C_ROPENREPLY 0x00000002 /* remote open channel reply */ +#define XPC_C_OPENREPLY 0x00000004 /* local open channel reply */ +#define XPC_C_ROPENREQUEST 0x00000008 /* remote open channel request */ +#define XPC_C_OPENREQUEST 0x00000010 /* local open channel request */ + +#define XPC_C_SETUP 0x00000020 /* channel's msgqueues are alloc'd */ +#define XPC_C_CONNECTCALLOUT 0x00000040 /* channel connected callout made */ +#define XPC_C_CONNECTED 0x00000080 /* local channel is connected */ +#define XPC_C_CONNECTING 0x00000100 /* channel is being connected */ + +#define XPC_C_RCLOSEREPLY 0x00000200 /* remote close channel reply */ +#define XPC_C_CLOSEREPLY 0x00000400 /* local close channel reply */ +#define XPC_C_RCLOSEREQUEST 0x00000800 /* remote close channel request */ +#define XPC_C_CLOSEREQUEST 0x00001000 /* local close channel request */ + +#define XPC_C_DISCONNECTED 0x00002000 /* channel is disconnected */ +#define XPC_C_DISCONNECTING 0x00004000 /* channel is being disconnected */ + + + +/* + * Manages channels on a partition basis. There is one of these structures + * for each partition (a partition will never utilize the structure that + * represents itself). + */ +struct xpc_partition { + + /* XPC HB infrastructure */ + + u64 remote_rp_pa; /* phys addr of partition's rsvd pg */ + u64 remote_vars_pa; /* phys addr of partition's vars */ + u64 remote_vars_part_pa; /* phys addr of partition's vars part */ + u64 last_heartbeat; /* HB at last read */ + u64 remote_amos_page_pa; /* phys addr of partition's amos page */ + int remote_act_nasid; /* active part's act/deact nasid */ + int remote_act_phys_cpuid; /* active part's act/deact phys cpuid */ + u32 act_IRQ_rcvd; /* IRQs since activation */ + spinlock_t act_lock; /* protect updating of act_state */ + u8 act_state; /* from XPC HB viewpoint */ + enum xpc_retval reason; /* reason partition is deactivating */ + int reason_line; /* line# deactivation initiated from */ + int reactivate_nasid; /* nasid in partition to reactivate */ + + + /* XPC infrastructure referencing and teardown control */ + + u8 setup_state; /* infrastructure setup state */ + wait_queue_head_t teardown_wq; /* kthread waiting to teardown infra */ + atomic_t references; /* #of references to infrastructure */ + + + /* + * NONE OF THE PRECEDING FIELDS OF THIS STRUCTURE WILL BE CLEARED WHEN + * XPC SETS UP THE NECESSARY INFRASTRUCTURE TO SUPPORT CROSS PARTITION + * COMMUNICATION. ALL OF THE FOLLOWING FIELDS WILL BE CLEARED. (THE + * 'nchannels' FIELD MUST BE THE FIRST OF THE FIELDS TO BE CLEARED.) + */ + + + u8 nchannels; /* #of defined channels supported */ + atomic_t nchannels_active; /* #of channels that are not DISCONNECTED */ + struct xpc_channel *channels;/* array of channel structures */ + + void *local_GPs_base; /* base address of kmalloc'd space */ + struct xpc_gp *local_GPs; /* local Get/Put values */ + void *remote_GPs_base; /* base address of kmalloc'd space */ + struct xpc_gp *remote_GPs;/* copy of remote partition's local Get/Put */ + /* values */ + u64 remote_GPs_pa; /* phys address of remote partition's local */ + /* Get/Put values */ + + + /* fields used to pass args when opening or closing a channel */ + + void *local_openclose_args_base; /* base address of kmalloc'd space */ + struct xpc_openclose_args *local_openclose_args; /* local's args */ + void *remote_openclose_args_base; /* base address of kmalloc'd space */ + struct xpc_openclose_args *remote_openclose_args; /* copy of remote's */ + /* args */ + u64 remote_openclose_args_pa; /* phys addr of remote's args */ + + + /* IPI sending, receiving and handling related fields */ + + int remote_IPI_nasid; /* nasid of where to send IPIs */ + int remote_IPI_phys_cpuid; /* phys CPU ID of where to send IPIs */ + AMO_t *remote_IPI_amo_va; /* address of remote IPI AMO_t structure */ + + AMO_t *local_IPI_amo_va; /* address of IPI AMO_t structure */ + u64 local_IPI_amo; /* IPI amo flags yet to be handled */ + char IPI_owner[8]; /* IPI owner's name */ + struct timer_list dropped_IPI_timer; /* dropped IPI timer */ + + spinlock_t IPI_lock; /* IPI handler lock */ + + + /* channel manager related fields */ + + atomic_t channel_mgr_requests; /* #of requests to activate chan mgr */ + wait_queue_head_t channel_mgr_wq; /* channel mgr's wait queue */ + +} ____cacheline_aligned; + + +/* struct xpc_partition act_state values (for XPC HB) */ + +#define XPC_P_INACTIVE 0x00 /* partition is not active */ +#define XPC_P_ACTIVATION_REQ 0x01 /* created thread to activate */ +#define XPC_P_ACTIVATING 0x02 /* activation thread started */ +#define XPC_P_ACTIVE 0x03 /* xpc_partition_up() was called */ +#define XPC_P_DEACTIVATING 0x04 /* partition deactivation initiated */ + + +#define XPC_DEACTIVATE_PARTITION(_p, _reason) \ + xpc_deactivate_partition(__LINE__, (_p), (_reason)) + + +/* struct xpc_partition setup_state values */ + +#define XPC_P_UNSET 0x00 /* infrastructure was never setup */ +#define XPC_P_SETUP 0x01 /* infrastructure is setup */ +#define XPC_P_WTEARDOWN 0x02 /* waiting to teardown infrastructure */ +#define XPC_P_TORNDOWN 0x03 /* infrastructure is torndown */ + + +/* + * struct xpc_partition IPI_timer #of seconds to wait before checking for + * dropped IPIs. These occur whenever an IPI amo write doesn't complete until + * after the IPI was received. + */ +#define XPC_P_DROPPED_IPI_WAIT (0.25 * HZ) + + +#define XPC_PARTID(_p) ((partid_t) ((_p) - &xpc_partitions[0])) + + + +/* found in xp_main.c */ +extern struct xpc_registration xpc_registrations[]; + + +/* >>> found in xpc_main.c only */ +extern struct device *xpc_part; +extern struct device *xpc_chan; +extern irqreturn_t xpc_notify_IRQ_handler(int, void *, struct pt_regs *); +extern void xpc_dropped_IPI_check(struct xpc_partition *); +extern void xpc_activate_kthreads(struct xpc_channel *, int); +extern void xpc_create_kthreads(struct xpc_channel *, int); +extern void xpc_disconnect_wait(int); + + +/* found in xpc_main.c and efi-xpc.c */ +extern void xpc_activate_partition(struct xpc_partition *); + + +/* found in xpc_partition.c */ +extern int xpc_exiting; +extern int xpc_hb_interval; +extern int xpc_hb_check_interval; +extern struct xpc_vars *xpc_vars; +extern struct xpc_rsvd_page *xpc_rsvd_page; +extern struct xpc_vars_part *xpc_vars_part; +extern struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1]; +extern char xpc_remote_copy_buffer[]; +extern struct xpc_rsvd_page *xpc_rsvd_page_init(void); +extern void xpc_allow_IPI_ops(void); +extern void xpc_restrict_IPI_ops(void); +extern int xpc_identify_act_IRQ_sender(void); +extern enum xpc_retval xpc_mark_partition_active(struct xpc_partition *); +extern void xpc_mark_partition_inactive(struct xpc_partition *); +extern void xpc_discovery(void); +extern void xpc_check_remote_hb(void); +extern void xpc_deactivate_partition(const int, struct xpc_partition *, + enum xpc_retval); +extern enum xpc_retval xpc_initiate_partid_to_nasids(partid_t, void *); + + +/* found in xpc_channel.c */ +extern void xpc_initiate_connect(int); +extern void xpc_initiate_disconnect(int); +extern enum xpc_retval xpc_initiate_allocate(partid_t, int, u32, void **); +extern enum xpc_retval xpc_initiate_send(partid_t, int, void *); +extern enum xpc_retval xpc_initiate_send_notify(partid_t, int, void *, + xpc_notify_func, void *); +extern void xpc_initiate_received(partid_t, int, void *); +extern enum xpc_retval xpc_setup_infrastructure(struct xpc_partition *); +extern enum xpc_retval xpc_pull_remote_vars_part(struct xpc_partition *); +extern void xpc_process_channel_activity(struct xpc_partition *); +extern void xpc_connected_callout(struct xpc_channel *); +extern void xpc_deliver_msg(struct xpc_channel *); +extern void xpc_disconnect_channel(const int, struct xpc_channel *, + enum xpc_retval, unsigned long *); +extern void xpc_disconnected_callout(struct xpc_channel *); +extern void xpc_partition_down(struct xpc_partition *, enum xpc_retval); +extern void xpc_teardown_infrastructure(struct xpc_partition *); + + + +static inline void +xpc_wakeup_channel_mgr(struct xpc_partition *part) +{ + if (atomic_inc_return(&part->channel_mgr_requests) == 1) { + wake_up(&part->channel_mgr_wq); + } +} + + + +/* + * These next two inlines are used to keep us from tearing down a channel's + * msg queues while a thread may be referencing them. + */ +static inline void +xpc_msgqueue_ref(struct xpc_channel *ch) +{ + atomic_inc(&ch->references); +} + +static inline void +xpc_msgqueue_deref(struct xpc_channel *ch) +{ + s32 refs = atomic_dec_return(&ch->references); + + DBUG_ON(refs < 0); + if (refs == 0) { + xpc_wakeup_channel_mgr(&xpc_partitions[ch->partid]); + } +} + + + +#define XPC_DISCONNECT_CHANNEL(_ch, _reason, _irqflgs) \ + xpc_disconnect_channel(__LINE__, _ch, _reason, _irqflgs) + + +/* + * These two inlines are used to keep us from tearing down a partition's + * setup infrastructure while a thread may be referencing it. + */ +static inline void +xpc_part_deref(struct xpc_partition *part) +{ + s32 refs = atomic_dec_return(&part->references); + + + DBUG_ON(refs < 0); + if (refs == 0 && part->setup_state == XPC_P_WTEARDOWN) { + wake_up(&part->teardown_wq); + } +} + +static inline int +xpc_part_ref(struct xpc_partition *part) +{ + int setup; + + + atomic_inc(&part->references); + setup = (part->setup_state == XPC_P_SETUP); + if (!setup) { + xpc_part_deref(part); + } + return setup; +} + + + +/* + * The following macro is to be used for the setting of the reason and + * reason_line fields in both the struct xpc_channel and struct xpc_partition + * structures. + */ +#define XPC_SET_REASON(_p, _reason, _line) \ + { \ + (_p)->reason = _reason; \ + (_p)->reason_line = _line; \ + } + + + +/* + * The following set of macros and inlines are used for the sending and + * receiving of IPIs (also known as IRQs). There are two flavors of IPIs, + * one that is associated with partition activity (SGI_XPC_ACTIVATE) and + * the other that is associated with channel activity (SGI_XPC_NOTIFY). + */ + +static inline u64 +xpc_IPI_receive(AMO_t *amo) +{ + return FETCHOP_LOAD_OP(TO_AMO((u64) &amo->variable), FETCHOP_CLEAR); +} + + +static inline enum xpc_retval +xpc_IPI_send(AMO_t *amo, u64 flag, int nasid, int phys_cpuid, int vector) +{ + int ret = 0; + unsigned long irq_flags; + + + local_irq_save(irq_flags); + + FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_OR, flag); + sn_send_IPI_phys(nasid, phys_cpuid, vector, 0); + + /* + * We must always use the nofault function regardless of whether we + * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we + * didn't, we'd never know that the other partition is down and would + * keep sending IPIs and AMOs to it until the heartbeat times out. + */ + ret = xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->variable), + xp_nofault_PIOR_target)); + + local_irq_restore(irq_flags); + + return ((ret == 0) ? xpcSuccess : xpcPioReadError); +} + + +/* + * IPIs associated with SGI_XPC_ACTIVATE IRQ. + */ + +/* + * Flag the appropriate AMO variable and send an IPI to the specified node. + */ +static inline void +xpc_activate_IRQ_send(u64 amos_page, int from_nasid, int to_nasid, + int to_phys_cpuid) +{ + int w_index = XPC_NASID_W_INDEX(from_nasid); + int b_index = XPC_NASID_B_INDEX(from_nasid); + AMO_t *amos = (AMO_t *) __va(amos_page + + (XP_MAX_PARTITIONS * sizeof(AMO_t))); + + + (void) xpc_IPI_send(&amos[w_index], (1UL << b_index), to_nasid, + to_phys_cpuid, SGI_XPC_ACTIVATE); +} + +static inline void +xpc_IPI_send_activate(struct xpc_vars *vars) +{ + xpc_activate_IRQ_send(vars->amos_page_pa, cnodeid_to_nasid(0), + vars->act_nasid, vars->act_phys_cpuid); +} + +static inline void +xpc_IPI_send_activated(struct xpc_partition *part) +{ + xpc_activate_IRQ_send(part->remote_amos_page_pa, cnodeid_to_nasid(0), + part->remote_act_nasid, part->remote_act_phys_cpuid); +} + +static inline void +xpc_IPI_send_reactivate(struct xpc_partition *part) +{ + xpc_activate_IRQ_send(xpc_vars->amos_page_pa, part->reactivate_nasid, + xpc_vars->act_nasid, xpc_vars->act_phys_cpuid); +} + + +/* + * IPIs associated with SGI_XPC_NOTIFY IRQ. + */ + +/* + * Send an IPI to the remote partition that is associated with the + * specified channel. + */ +#define XPC_NOTIFY_IRQ_SEND(_ch, _ipi_f, _irq_f) \ + xpc_notify_IRQ_send(_ch, _ipi_f, #_ipi_f, _irq_f) + +static inline void +xpc_notify_IRQ_send(struct xpc_channel *ch, u8 ipi_flag, char *ipi_flag_string, + unsigned long *irq_flags) +{ + struct xpc_partition *part = &xpc_partitions[ch->partid]; + enum xpc_retval ret; + + + if (likely(part->act_state != XPC_P_DEACTIVATING)) { + ret = xpc_IPI_send(part->remote_IPI_amo_va, + (u64) ipi_flag << (ch->number * 8), + part->remote_IPI_nasid, + part->remote_IPI_phys_cpuid, + SGI_XPC_NOTIFY); + dev_dbg(xpc_chan, "%s sent to partid=%d, channel=%d, ret=%d\n", + ipi_flag_string, ch->partid, ch->number, ret); + if (unlikely(ret != xpcSuccess)) { + if (irq_flags != NULL) { + spin_unlock_irqrestore(&ch->lock, *irq_flags); + } + XPC_DEACTIVATE_PARTITION(part, ret); + if (irq_flags != NULL) { + spin_lock_irqsave(&ch->lock, *irq_flags); + } + } + } +} + + +/* + * Make it look like the remote partition, which is associated with the + * specified channel, sent us an IPI. This faked IPI will be handled + * by xpc_dropped_IPI_check(). + */ +#define XPC_NOTIFY_IRQ_SEND_LOCAL(_ch, _ipi_f) \ + xpc_notify_IRQ_send_local(_ch, _ipi_f, #_ipi_f) + +static inline void +xpc_notify_IRQ_send_local(struct xpc_channel *ch, u8 ipi_flag, + char *ipi_flag_string) +{ + struct xpc_partition *part = &xpc_partitions[ch->partid]; + + + FETCHOP_STORE_OP(TO_AMO((u64) &part->local_IPI_amo_va->variable), + FETCHOP_OR, ((u64) ipi_flag << (ch->number * 8))); + dev_dbg(xpc_chan, "%s sent local from partid=%d, channel=%d\n", + ipi_flag_string, ch->partid, ch->number); +} + + +/* + * The sending and receiving of IPIs includes the setting of an AMO variable + * to indicate the reason the IPI was sent. The 64-bit variable is divided + * up into eight bytes, ordered from right to left. Byte zero pertains to + * channel 0, byte one to channel 1, and so on. Each byte is described by + * the following IPI flags. + */ + +#define XPC_IPI_CLOSEREQUEST 0x01 +#define XPC_IPI_CLOSEREPLY 0x02 +#define XPC_IPI_OPENREQUEST 0x04 +#define XPC_IPI_OPENREPLY 0x08 +#define XPC_IPI_MSGREQUEST 0x10 + + +/* given an AMO variable and a channel#, get its associated IPI flags */ +#define XPC_GET_IPI_FLAGS(_amo, _c) ((u8) (((_amo) >> ((_c) * 8)) & 0xff)) + +#define XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(_amo) ((_amo) & 0x0f0f0f0f0f0f0f0f) +#define XPC_ANY_MSG_IPI_FLAGS_SET(_amo) ((_amo) & 0x1010101010101010) + + +static inline void +xpc_IPI_send_closerequest(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_openclose_args *args = ch->local_openclose_args; + + + args->reason = ch->reason; + + XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_CLOSEREQUEST, irq_flags); +} + +static inline void +xpc_IPI_send_closereply(struct xpc_channel *ch, unsigned long *irq_flags) +{ + XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_CLOSEREPLY, irq_flags); +} + +static inline void +xpc_IPI_send_openrequest(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_openclose_args *args = ch->local_openclose_args; + + + args->msg_size = ch->msg_size; + args->local_nentries = ch->local_nentries; + + XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_OPENREQUEST, irq_flags); +} + +static inline void +xpc_IPI_send_openreply(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_openclose_args *args = ch->local_openclose_args; + + + args->remote_nentries = ch->remote_nentries; + args->local_nentries = ch->local_nentries; + args->local_msgqueue_pa = __pa(ch->local_msgqueue); + + XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_OPENREPLY, irq_flags); +} + +static inline void +xpc_IPI_send_msgrequest(struct xpc_channel *ch) +{ + XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_MSGREQUEST, NULL); +} + +static inline void +xpc_IPI_send_local_msgrequest(struct xpc_channel *ch) +{ + XPC_NOTIFY_IRQ_SEND_LOCAL(ch, XPC_IPI_MSGREQUEST); +} + + +/* + * Memory for XPC's AMO variables is allocated by the MSPEC driver. These + * pages are located in the lowest granule. The lowest granule uses 4k pages + * for cached references and an alternate TLB handler to never provide a + * cacheable mapping for the entire region. This will prevent speculative + * reading of cached copies of our lines from being issued which will cause + * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64 + * (XP_MAX_PARTITIONS) AMO variables for message notification (xpc_main.c) + * and an additional 16 AMO variables for partition activation (xpc_hb.c). + */ +static inline AMO_t * +xpc_IPI_init(partid_t partid) +{ + AMO_t *part_amo = xpc_vars->amos_page + partid; + + + xpc_IPI_receive(part_amo); + return part_amo; +} + + + +static inline enum xpc_retval +xpc_map_bte_errors(bte_result_t error) +{ + switch (error) { + case BTE_SUCCESS: return xpcSuccess; + case BTEFAIL_DIR: return xpcBteDirectoryError; + case BTEFAIL_POISON: return xpcBtePoisonError; + case BTEFAIL_WERR: return xpcBteWriteError; + case BTEFAIL_ACCESS: return xpcBteAccessError; + case BTEFAIL_PWERR: return xpcBtePWriteError; + case BTEFAIL_PRERR: return xpcBtePReadError; + case BTEFAIL_TOUT: return xpcBteTimeOutError; + case BTEFAIL_XTERR: return xpcBteXtalkError; + case BTEFAIL_NOTAVAIL: return xpcBteNotAvailable; + default: return xpcBteUnmappedError; + } +} + + + +static inline void * +xpc_kmalloc_cacheline_aligned(size_t size, int flags, void **base) +{ + /* see if kmalloc will give us cachline aligned memory by default */ + *base = kmalloc(size, flags); + if (*base == NULL) { + return NULL; + } + if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) { + return *base; + } + kfree(*base); + + /* nope, we'll have to do it ourselves */ + *base = kmalloc(size + L1_CACHE_BYTES, flags); + if (*base == NULL) { + return NULL; + } + return (void *) L1_CACHE_ALIGN((u64) *base); +} + + +/* + * Check to see if there is any channel activity to/from the specified + * partition. + */ +static inline void +xpc_check_for_channel_activity(struct xpc_partition *part) +{ + u64 IPI_amo; + unsigned long irq_flags; + + + IPI_amo = xpc_IPI_receive(part->local_IPI_amo_va); + if (IPI_amo == 0) { + return; + } + + spin_lock_irqsave(&part->IPI_lock, irq_flags); + part->local_IPI_amo |= IPI_amo; + spin_unlock_irqrestore(&part->IPI_lock, irq_flags); + + dev_dbg(xpc_chan, "received IPI from partid=%d, IPI_amo=0x%lx\n", + XPC_PARTID(part), IPI_amo); + + xpc_wakeup_channel_mgr(part); +} + + +#endif /* _IA64_SN_KERNEL_XPC_H */ + diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c new file mode 100644 index 0000000..0bf6fbc --- /dev/null +++ b/arch/ia64/sn/kernel/xpc_channel.c @@ -0,0 +1,2297 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved. + */ + + +/* + * Cross Partition Communication (XPC) channel support. + * + * This is the part of XPC that manages the channels and + * sends/receives messages across them to/from other partitions. + * + */ + + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/cache.h> +#include <linux/interrupt.h> +#include <linux/slab.h> +#include <asm/sn/bte.h> +#include <asm/sn/sn_sal.h> +#include "xpc.h" + + +/* + * Set up the initial values for the XPartition Communication channels. + */ +static void +xpc_initialize_channels(struct xpc_partition *part, partid_t partid) +{ + int ch_number; + struct xpc_channel *ch; + + + for (ch_number = 0; ch_number < part->nchannels; ch_number++) { + ch = &part->channels[ch_number]; + + ch->partid = partid; + ch->number = ch_number; + ch->flags = XPC_C_DISCONNECTED; + + ch->local_GP = &part->local_GPs[ch_number]; + ch->local_openclose_args = + &part->local_openclose_args[ch_number]; + + atomic_set(&ch->kthreads_assigned, 0); + atomic_set(&ch->kthreads_idle, 0); + atomic_set(&ch->kthreads_active, 0); + + atomic_set(&ch->references, 0); + atomic_set(&ch->n_to_notify, 0); + + spin_lock_init(&ch->lock); + sema_init(&ch->msg_to_pull_sema, 1); /* mutex */ + + atomic_set(&ch->n_on_msg_allocate_wq, 0); + init_waitqueue_head(&ch->msg_allocate_wq); + init_waitqueue_head(&ch->idle_wq); + } +} + + +/* + * Setup the infrastructure necessary to support XPartition Communication + * between the specified remote partition and the local one. + */ +enum xpc_retval +xpc_setup_infrastructure(struct xpc_partition *part) +{ + int ret; + struct timer_list *timer; + partid_t partid = XPC_PARTID(part); + + + /* + * Zero out MOST of the entry for this partition. Only the fields + * starting with `nchannels' will be zeroed. The preceding fields must + * remain `viable' across partition ups and downs, since they may be + * referenced during this memset() operation. + */ + memset(&part->nchannels, 0, sizeof(struct xpc_partition) - + offsetof(struct xpc_partition, nchannels)); + + /* + * Allocate all of the channel structures as a contiguous chunk of + * memory. + */ + part->channels = kmalloc(sizeof(struct xpc_channel) * XPC_NCHANNELS, + GFP_KERNEL); + if (part->channels == NULL) { + dev_err(xpc_chan, "can't get memory for channels\n"); + return xpcNoMemory; + } + memset(part->channels, 0, sizeof(struct xpc_channel) * XPC_NCHANNELS); + + part->nchannels = XPC_NCHANNELS; + + + /* allocate all the required GET/PUT values */ + + part->local_GPs = xpc_kmalloc_cacheline_aligned(XPC_GP_SIZE, + GFP_KERNEL, &part->local_GPs_base); + if (part->local_GPs == NULL) { + kfree(part->channels); + part->channels = NULL; + dev_err(xpc_chan, "can't get memory for local get/put " + "values\n"); + return xpcNoMemory; + } + memset(part->local_GPs, 0, XPC_GP_SIZE); + + part->remote_GPs = xpc_kmalloc_cacheline_aligned(XPC_GP_SIZE, + GFP_KERNEL, &part->remote_GPs_base); + if (part->remote_GPs == NULL) { + kfree(part->channels); + part->channels = NULL; + kfree(part->local_GPs_base); + part->local_GPs = NULL; + dev_err(xpc_chan, "can't get memory for remote get/put " + "values\n"); + return xpcNoMemory; + } + memset(part->remote_GPs, 0, XPC_GP_SIZE); + + + /* allocate all the required open and close args */ + + part->local_openclose_args = xpc_kmalloc_cacheline_aligned( + XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL, + &part->local_openclose_args_base); + if (part->local_openclose_args == NULL) { + kfree(part->channels); + part->channels = NULL; + kfree(part->local_GPs_base); + part->local_GPs = NULL; + kfree(part->remote_GPs_base); + part->remote_GPs = NULL; + dev_err(xpc_chan, "can't get memory for local connect args\n"); + return xpcNoMemory; + } + memset(part->local_openclose_args, 0, XPC_OPENCLOSE_ARGS_SIZE); + + part->remote_openclose_args = xpc_kmalloc_cacheline_aligned( + XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL, + &part->remote_openclose_args_base); + if (part->remote_openclose_args == NULL) { + kfree(part->channels); + part->channels = NULL; + kfree(part->local_GPs_base); + part->local_GPs = NULL; + kfree(part->remote_GPs_base); + part->remote_GPs = NULL; + kfree(part->local_openclose_args_base); + part->local_openclose_args = NULL; + dev_err(xpc_chan, "can't get memory for remote connect args\n"); + return xpcNoMemory; + } + memset(part->remote_openclose_args, 0, XPC_OPENCLOSE_ARGS_SIZE); + + + xpc_initialize_channels(part, partid); + + atomic_set(&part->nchannels_active, 0); + + + /* local_IPI_amo were set to 0 by an earlier memset() */ + + /* Initialize this partitions AMO_t structure */ + part->local_IPI_amo_va = xpc_IPI_init(partid); + + spin_lock_init(&part->IPI_lock); + + atomic_set(&part->channel_mgr_requests, 1); + init_waitqueue_head(&part->channel_mgr_wq); + + sprintf(part->IPI_owner, "xpc%02d", partid); + ret = request_irq(SGI_XPC_NOTIFY, xpc_notify_IRQ_handler, SA_SHIRQ, + part->IPI_owner, (void *) (u64) partid); + if (ret != 0) { + kfree(part->channels); + part->channels = NULL; + kfree(part->local_GPs_base); + part->local_GPs = NULL; + kfree(part->remote_GPs_base); + part->remote_GPs = NULL; + kfree(part->local_openclose_args_base); + part->local_openclose_args = NULL; + kfree(part->remote_openclose_args_base); + part->remote_openclose_args = NULL; + dev_err(xpc_chan, "can't register NOTIFY IRQ handler, " + "errno=%d\n", -ret); + return xpcLackOfResources; + } + + /* Setup a timer to check for dropped IPIs */ + timer = &part->dropped_IPI_timer; + init_timer(timer); + timer->function = (void (*)(unsigned long)) xpc_dropped_IPI_check; + timer->data = (unsigned long) part; + timer->expires = jiffies + XPC_P_DROPPED_IPI_WAIT; + add_timer(timer); + + /* + * With the setting of the partition setup_state to XPC_P_SETUP, we're + * declaring that this partition is ready to go. + */ + (volatile u8) part->setup_state = XPC_P_SETUP; + + + /* + * Setup the per partition specific variables required by the + * remote partition to establish channel connections with us. + * + * The setting of the magic # indicates that these per partition + * specific variables are ready to be used. + */ + xpc_vars_part[partid].GPs_pa = __pa(part->local_GPs); + xpc_vars_part[partid].openclose_args_pa = + __pa(part->local_openclose_args); + xpc_vars_part[partid].IPI_amo_pa = __pa(part->local_IPI_amo_va); + xpc_vars_part[partid].IPI_nasid = cpuid_to_nasid(smp_processor_id()); + xpc_vars_part[partid].IPI_phys_cpuid = + cpu_physical_id(smp_processor_id()); + xpc_vars_part[partid].nchannels = part->nchannels; + (volatile u64) xpc_vars_part[partid].magic = XPC_VP_MAGIC1; + + return xpcSuccess; +} + + +/* + * Create a wrapper that hides the underlying mechanism for pulling a cacheline + * (or multiple cachelines) from a remote partition. + * + * src must be a cacheline aligned physical address on the remote partition. + * dst must be a cacheline aligned virtual address on this partition. + * cnt must be an cacheline sized + */ +static enum xpc_retval +xpc_pull_remote_cachelines(struct xpc_partition *part, void *dst, + const void *src, size_t cnt) +{ + bte_result_t bte_ret; + + + DBUG_ON((u64) src != L1_CACHE_ALIGN((u64) src)); + DBUG_ON((u64) dst != L1_CACHE_ALIGN((u64) dst)); + DBUG_ON(cnt != L1_CACHE_ALIGN(cnt)); + + if (part->act_state == XPC_P_DEACTIVATING) { + return part->reason; + } + + bte_ret = xp_bte_copy((u64) src, (u64) ia64_tpa((u64) dst), + (u64) cnt, (BTE_NORMAL | BTE_WACQUIRE), NULL); + if (bte_ret == BTE_SUCCESS) { + return xpcSuccess; + } + + dev_dbg(xpc_chan, "xp_bte_copy() from partition %d failed, ret=%d\n", + XPC_PARTID(part), bte_ret); + + return xpc_map_bte_errors(bte_ret); +} + + +/* + * Pull the remote per partititon specific variables from the specified + * partition. + */ +enum xpc_retval +xpc_pull_remote_vars_part(struct xpc_partition *part) +{ + u8 buffer[L1_CACHE_BYTES * 2]; + struct xpc_vars_part *pulled_entry_cacheline = + (struct xpc_vars_part *) L1_CACHE_ALIGN((u64) buffer); + struct xpc_vars_part *pulled_entry; + u64 remote_entry_cacheline_pa, remote_entry_pa; + partid_t partid = XPC_PARTID(part); + enum xpc_retval ret; + + + /* pull the cacheline that contains the variables we're interested in */ + + DBUG_ON(part->remote_vars_part_pa != + L1_CACHE_ALIGN(part->remote_vars_part_pa)); + DBUG_ON(sizeof(struct xpc_vars_part) != L1_CACHE_BYTES / 2); + + remote_entry_pa = part->remote_vars_part_pa + + sn_partition_id * sizeof(struct xpc_vars_part); + + remote_entry_cacheline_pa = (remote_entry_pa & ~(L1_CACHE_BYTES - 1)); + + pulled_entry = (struct xpc_vars_part *) ((u64) pulled_entry_cacheline + + (remote_entry_pa & (L1_CACHE_BYTES - 1))); + + ret = xpc_pull_remote_cachelines(part, pulled_entry_cacheline, + (void *) remote_entry_cacheline_pa, + L1_CACHE_BYTES); + if (ret != xpcSuccess) { + dev_dbg(xpc_chan, "failed to pull XPC vars_part from " + "partition %d, ret=%d\n", partid, ret); + return ret; + } + + + /* see if they've been set up yet */ + + if (pulled_entry->magic != XPC_VP_MAGIC1 && + pulled_entry->magic != XPC_VP_MAGIC2) { + + if (pulled_entry->magic != 0) { + dev_dbg(xpc_chan, "partition %d's XPC vars_part for " + "partition %d has bad magic value (=0x%lx)\n", + partid, sn_partition_id, pulled_entry->magic); + return xpcBadMagic; + } + + /* they've not been initialized yet */ + return xpcRetry; + } + + if (xpc_vars_part[partid].magic == XPC_VP_MAGIC1) { + + /* validate the variables */ + + if (pulled_entry->GPs_pa == 0 || + pulled_entry->openclose_args_pa == 0 || + pulled_entry->IPI_amo_pa == 0) { + + dev_err(xpc_chan, "partition %d's XPC vars_part for " + "partition %d are not valid\n", partid, + sn_partition_id); + return xpcInvalidAddress; + } + + /* the variables we imported look to be valid */ + + part->remote_GPs_pa = pulled_entry->GPs_pa; + part->remote_openclose_args_pa = + pulled_entry->openclose_args_pa; + part->remote_IPI_amo_va = + (AMO_t *) __va(pulled_entry->IPI_amo_pa); + part->remote_IPI_nasid = pulled_entry->IPI_nasid; + part->remote_IPI_phys_cpuid = pulled_entry->IPI_phys_cpuid; + + if (part->nchannels > pulled_entry->nchannels) { + part->nchannels = pulled_entry->nchannels; + } + + /* let the other side know that we've pulled their variables */ + + (volatile u64) xpc_vars_part[partid].magic = XPC_VP_MAGIC2; + } + + if (pulled_entry->magic == XPC_VP_MAGIC1) { + return xpcRetry; + } + + return xpcSuccess; +} + + +/* + * Get the IPI flags and pull the openclose args and/or remote GPs as needed. + */ +static u64 +xpc_get_IPI_flags(struct xpc_partition *part) +{ + unsigned long irq_flags; + u64 IPI_amo; + enum xpc_retval ret; + + + /* + * See if there are any IPI flags to be handled. + */ + + spin_lock_irqsave(&part->IPI_lock, irq_flags); + if ((IPI_amo = part->local_IPI_amo) != 0) { + part->local_IPI_amo = 0; + } + spin_unlock_irqrestore(&part->IPI_lock, irq_flags); + + + if (XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(IPI_amo)) { + ret = xpc_pull_remote_cachelines(part, + part->remote_openclose_args, + (void *) part->remote_openclose_args_pa, + XPC_OPENCLOSE_ARGS_SIZE); + if (ret != xpcSuccess) { + XPC_DEACTIVATE_PARTITION(part, ret); + + dev_dbg(xpc_chan, "failed to pull openclose args from " + "partition %d, ret=%d\n", XPC_PARTID(part), + ret); + + /* don't bother processing IPIs anymore */ + IPI_amo = 0; + } + } + + if (XPC_ANY_MSG_IPI_FLAGS_SET(IPI_amo)) { + ret = xpc_pull_remote_cachelines(part, part->remote_GPs, + (void *) part->remote_GPs_pa, + XPC_GP_SIZE); + if (ret != xpcSuccess) { + XPC_DEACTIVATE_PARTITION(part, ret); + + dev_dbg(xpc_chan, "failed to pull GPs from partition " + "%d, ret=%d\n", XPC_PARTID(part), ret); + + /* don't bother processing IPIs anymore */ + IPI_amo = 0; + } + } + + return IPI_amo; +} + + +/* + * Allocate the local message queue and the notify queue. + */ +static enum xpc_retval +xpc_allocate_local_msgqueue(struct xpc_channel *ch) +{ + unsigned long irq_flags; + int nentries; + size_t nbytes; + + + // >>> may want to check for ch->flags & XPC_C_DISCONNECTING between + // >>> iterations of the for-loop, bail if set? + + // >>> should we impose a minumum #of entries? like 4 or 8? + for (nentries = ch->local_nentries; nentries > 0; nentries--) { + + nbytes = nentries * ch->msg_size; + ch->local_msgqueue = xpc_kmalloc_cacheline_aligned(nbytes, + (GFP_KERNEL | GFP_DMA), + &ch->local_msgqueue_base); + if (ch->local_msgqueue == NULL) { + continue; + } + memset(ch->local_msgqueue, 0, nbytes); + + nbytes = nentries * sizeof(struct xpc_notify); + ch->notify_queue = kmalloc(nbytes, (GFP_KERNEL | GFP_DMA)); + if (ch->notify_queue == NULL) { + kfree(ch->local_msgqueue_base); + ch->local_msgqueue = NULL; + continue; + } + memset(ch->notify_queue, 0, nbytes); + + spin_lock_irqsave(&ch->lock, irq_flags); + if (nentries < ch->local_nentries) { + dev_dbg(xpc_chan, "nentries=%d local_nentries=%d, " + "partid=%d, channel=%d\n", nentries, + ch->local_nentries, ch->partid, ch->number); + + ch->local_nentries = nentries; + } + spin_unlock_irqrestore(&ch->lock, irq_flags); + return xpcSuccess; + } + + dev_dbg(xpc_chan, "can't get memory for local message queue and notify " + "queue, partid=%d, channel=%d\n", ch->partid, ch->number); + return xpcNoMemory; +} + + +/* + * Allocate the cached remote message queue. + */ +static enum xpc_retval +xpc_allocate_remote_msgqueue(struct xpc_channel *ch) +{ + unsigned long irq_flags; + int nentries; + size_t nbytes; + + + DBUG_ON(ch->remote_nentries <= 0); + + // >>> may want to check for ch->flags & XPC_C_DISCONNECTING between + // >>> iterations of the for-loop, bail if set? + + // >>> should we impose a minumum #of entries? like 4 or 8? + for (nentries = ch->remote_nentries; nentries > 0; nentries--) { + + nbytes = nentries * ch->msg_size; + ch->remote_msgqueue = xpc_kmalloc_cacheline_aligned(nbytes, + (GFP_KERNEL | GFP_DMA), + &ch->remote_msgqueue_base); + if (ch->remote_msgqueue == NULL) { + continue; + } + memset(ch->remote_msgqueue, 0, nbytes); + + spin_lock_irqsave(&ch->lock, irq_flags); + if (nentries < ch->remote_nentries) { + dev_dbg(xpc_chan, "nentries=%d remote_nentries=%d, " + "partid=%d, channel=%d\n", nentries, + ch->remote_nentries, ch->partid, ch->number); + + ch->remote_nentries = nentries; + } + spin_unlock_irqrestore(&ch->lock, irq_flags); + return xpcSuccess; + } + + dev_dbg(xpc_chan, "can't get memory for cached remote message queue, " + "partid=%d, channel=%d\n", ch->partid, ch->number); + return xpcNoMemory; +} + + +/* + * Allocate message queues and other stuff associated with a channel. + * + * Note: Assumes all of the channel sizes are filled in. + */ +static enum xpc_retval +xpc_allocate_msgqueues(struct xpc_channel *ch) +{ + unsigned long irq_flags; + int i; + enum xpc_retval ret; + + + DBUG_ON(ch->flags & XPC_C_SETUP); + + if ((ret = xpc_allocate_local_msgqueue(ch)) != xpcSuccess) { + return ret; + } + + if ((ret = xpc_allocate_remote_msgqueue(ch)) != xpcSuccess) { + kfree(ch->local_msgqueue_base); + ch->local_msgqueue = NULL; + kfree(ch->notify_queue); + ch->notify_queue = NULL; + return ret; + } + + for (i = 0; i < ch->local_nentries; i++) { + /* use a semaphore as an event wait queue */ + sema_init(&ch->notify_queue[i].sema, 0); + } + + sema_init(&ch->teardown_sema, 0); /* event wait */ + + spin_lock_irqsave(&ch->lock, irq_flags); + ch->flags |= XPC_C_SETUP; + spin_unlock_irqrestore(&ch->lock, irq_flags); + + return xpcSuccess; +} + + +/* + * Process a connect message from a remote partition. + * + * Note: xpc_process_connect() is expecting to be called with the + * spin_lock_irqsave held and will leave it locked upon return. + */ +static void +xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags) +{ + enum xpc_retval ret; + + + DBUG_ON(!spin_is_locked(&ch->lock)); + + if (!(ch->flags & XPC_C_OPENREQUEST) || + !(ch->flags & XPC_C_ROPENREQUEST)) { + /* nothing more to do for now */ + return; + } + DBUG_ON(!(ch->flags & XPC_C_CONNECTING)); + + if (!(ch->flags & XPC_C_SETUP)) { + spin_unlock_irqrestore(&ch->lock, *irq_flags); + ret = xpc_allocate_msgqueues(ch); + spin_lock_irqsave(&ch->lock, *irq_flags); + + if (ret != xpcSuccess) { + XPC_DISCONNECT_CHANNEL(ch, ret, irq_flags); + } + if (ch->flags & (XPC_C_CONNECTED | XPC_C_DISCONNECTING)) { + return; + } + + DBUG_ON(!(ch->flags & XPC_C_SETUP)); + DBUG_ON(ch->local_msgqueue == NULL); + DBUG_ON(ch->remote_msgqueue == NULL); + } + + if (!(ch->flags & XPC_C_OPENREPLY)) { + ch->flags |= XPC_C_OPENREPLY; + xpc_IPI_send_openreply(ch, irq_flags); + } + + if (!(ch->flags & XPC_C_ROPENREPLY)) { + return; + } + + DBUG_ON(ch->remote_msgqueue_pa == 0); + + ch->flags = (XPC_C_CONNECTED | XPC_C_SETUP); /* clear all else */ + + dev_info(xpc_chan, "channel %d to partition %d connected\n", + ch->number, ch->partid); + + spin_unlock_irqrestore(&ch->lock, *irq_flags); + xpc_create_kthreads(ch, 1); + spin_lock_irqsave(&ch->lock, *irq_flags); +} + + +/* + * Free up message queues and other stuff that were allocated for the specified + * channel. + * + * Note: ch->reason and ch->reason_line are left set for debugging purposes, + * they're cleared when XPC_C_DISCONNECTED is cleared. + */ +static void +xpc_free_msgqueues(struct xpc_channel *ch) +{ + DBUG_ON(!spin_is_locked(&ch->lock)); + DBUG_ON(atomic_read(&ch->n_to_notify) != 0); + + ch->remote_msgqueue_pa = 0; + ch->func = NULL; + ch->key = NULL; + ch->msg_size = 0; + ch->local_nentries = 0; + ch->remote_nentries = 0; + ch->kthreads_assigned_limit = 0; + ch->kthreads_idle_limit = 0; + + ch->local_GP->get = 0; + ch->local_GP->put = 0; + ch->remote_GP.get = 0; + ch->remote_GP.put = 0; + ch->w_local_GP.get = 0; + ch->w_local_GP.put = 0; + ch->w_remote_GP.get = 0; + ch->w_remote_GP.put = 0; + ch->next_msg_to_pull = 0; + + if (ch->flags & XPC_C_SETUP) { + ch->flags &= ~XPC_C_SETUP; + + dev_dbg(xpc_chan, "ch->flags=0x%x, partid=%d, channel=%d\n", + ch->flags, ch->partid, ch->number); + + kfree(ch->local_msgqueue_base); + ch->local_msgqueue = NULL; + kfree(ch->remote_msgqueue_base); + ch->remote_msgqueue = NULL; + kfree(ch->notify_queue); + ch->notify_queue = NULL; + + /* in case someone is waiting for the teardown to complete */ + up(&ch->teardown_sema); + } +} + + +/* + * spin_lock_irqsave() is expected to be held on entry. + */ +static void +xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_partition *part = &xpc_partitions[ch->partid]; + u32 ch_flags = ch->flags; + + + DBUG_ON(!spin_is_locked(&ch->lock)); + + if (!(ch->flags & XPC_C_DISCONNECTING)) { + return; + } + + DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST)); + + /* make sure all activity has settled down first */ + + if (atomic_read(&ch->references) > 0) { + return; + } + DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0); + + /* it's now safe to free the channel's message queues */ + + xpc_free_msgqueues(ch); + DBUG_ON(ch->flags & XPC_C_SETUP); + + if (part->act_state != XPC_P_DEACTIVATING) { + + /* as long as the other side is up do the full protocol */ + + if (!(ch->flags & XPC_C_RCLOSEREQUEST)) { + return; + } + + if (!(ch->flags & XPC_C_CLOSEREPLY)) { + ch->flags |= XPC_C_CLOSEREPLY; + xpc_IPI_send_closereply(ch, irq_flags); + } + + if (!(ch->flags & XPC_C_RCLOSEREPLY)) { + return; + } + } + + /* both sides are disconnected now */ + + ch->flags = XPC_C_DISCONNECTED; /* clear all flags, but this one */ + + atomic_dec(&part->nchannels_active); + + if (ch_flags & XPC_C_WASCONNECTED) { + dev_info(xpc_chan, "channel %d to partition %d disconnected, " + "reason=%d\n", ch->number, ch->partid, ch->reason); + } +} + + +/* + * Process a change in the channel's remote connection state. + */ +static void +xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number, + u8 IPI_flags) +{ + unsigned long irq_flags; + struct xpc_openclose_args *args = + &part->remote_openclose_args[ch_number]; + struct xpc_channel *ch = &part->channels[ch_number]; + enum xpc_retval reason; + + + + spin_lock_irqsave(&ch->lock, irq_flags); + + + if (IPI_flags & XPC_IPI_CLOSEREQUEST) { + + dev_dbg(xpc_chan, "XPC_IPI_CLOSEREQUEST (reason=%d) received " + "from partid=%d, channel=%d\n", args->reason, + ch->partid, ch->number); + + /* + * If RCLOSEREQUEST is set, we're probably waiting for + * RCLOSEREPLY. We should find it and a ROPENREQUEST packed + * with this RCLOSEQREUQEST in the IPI_flags. + */ + + if (ch->flags & XPC_C_RCLOSEREQUEST) { + DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING)); + DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST)); + DBUG_ON(!(ch->flags & XPC_C_CLOSEREPLY)); + DBUG_ON(ch->flags & XPC_C_RCLOSEREPLY); + + DBUG_ON(!(IPI_flags & XPC_IPI_CLOSEREPLY)); + IPI_flags &= ~XPC_IPI_CLOSEREPLY; + ch->flags |= XPC_C_RCLOSEREPLY; + + /* both sides have finished disconnecting */ + xpc_process_disconnect(ch, &irq_flags); + } + + if (ch->flags & XPC_C_DISCONNECTED) { + // >>> explain this section + + if (!(IPI_flags & XPC_IPI_OPENREQUEST)) { + DBUG_ON(part->act_state != + XPC_P_DEACTIVATING); + spin_unlock_irqrestore(&ch->lock, irq_flags); + return; + } + + XPC_SET_REASON(ch, 0, 0); + ch->flags &= ~XPC_C_DISCONNECTED; + + atomic_inc(&part->nchannels_active); + ch->flags |= (XPC_C_CONNECTING | XPC_C_ROPENREQUEST); + } + + IPI_flags &= ~(XPC_IPI_OPENREQUEST | XPC_IPI_OPENREPLY); + + /* + * The meaningful CLOSEREQUEST connection state fields are: + * reason = reason connection is to be closed + */ + + ch->flags |= XPC_C_RCLOSEREQUEST; + + if (!(ch->flags & XPC_C_DISCONNECTING)) { + reason = args->reason; + if (reason <= xpcSuccess || reason > xpcUnknownReason) { + reason = xpcUnknownReason; + } else if (reason == xpcUnregistering) { + reason = xpcOtherUnregistering; + } + + XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags); + } else { + xpc_process_disconnect(ch, &irq_flags); + } + } + + + if (IPI_flags & XPC_IPI_CLOSEREPLY) { + + dev_dbg(xpc_chan, "XPC_IPI_CLOSEREPLY received from partid=%d," + " channel=%d\n", ch->partid, ch->number); + + if (ch->flags & XPC_C_DISCONNECTED) { + DBUG_ON(part->act_state != XPC_P_DEACTIVATING); + spin_unlock_irqrestore(&ch->lock, irq_flags); + return; + } + + DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST)); + DBUG_ON(!(ch->flags & XPC_C_RCLOSEREQUEST)); + + ch->flags |= XPC_C_RCLOSEREPLY; + + if (ch->flags & XPC_C_CLOSEREPLY) { + /* both sides have finished disconnecting */ + xpc_process_disconnect(ch, &irq_flags); + } + } + + + if (IPI_flags & XPC_IPI_OPENREQUEST) { + + dev_dbg(xpc_chan, "XPC_IPI_OPENREQUEST (msg_size=%d, " + "local_nentries=%d) received from partid=%d, " + "channel=%d\n", args->msg_size, args->local_nentries, + ch->partid, ch->number); + + if ((ch->flags & XPC_C_DISCONNECTING) || + part->act_state == XPC_P_DEACTIVATING) { + spin_unlock_irqrestore(&ch->lock, irq_flags); + return; + } + DBUG_ON(!(ch->flags & (XPC_C_DISCONNECTED | + XPC_C_OPENREQUEST))); + DBUG_ON(ch->flags & (XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY | + XPC_C_OPENREPLY | XPC_C_CONNECTED)); + + /* + * The meaningful OPENREQUEST connection state fields are: + * msg_size = size of channel's messages in bytes + * local_nentries = remote partition's local_nentries + */ + DBUG_ON(args->msg_size == 0); + DBUG_ON(args->local_nentries == 0); + + ch->flags |= (XPC_C_ROPENREQUEST | XPC_C_CONNECTING); + ch->remote_nentries = args->local_nentries; + + + if (ch->flags & XPC_C_OPENREQUEST) { + if (args->msg_size != ch->msg_size) { + XPC_DISCONNECT_CHANNEL(ch, xpcUnequalMsgSizes, + &irq_flags); + spin_unlock_irqrestore(&ch->lock, irq_flags); + return; + } + } else { + ch->msg_size = args->msg_size; + + XPC_SET_REASON(ch, 0, 0); + ch->flags &= ~XPC_C_DISCONNECTED; + + atomic_inc(&part->nchannels_active); + } + + xpc_process_connect(ch, &irq_flags); + } + + + if (IPI_flags & XPC_IPI_OPENREPLY) { + + dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY (local_msgqueue_pa=0x%lx, " + "local_nentries=%d, remote_nentries=%d) received from " + "partid=%d, channel=%d\n", args->local_msgqueue_pa, + args->local_nentries, args->remote_nentries, + ch->partid, ch->number); + + if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) { + spin_unlock_irqrestore(&ch->lock, irq_flags); + return; + } + DBUG_ON(!(ch->flags & XPC_C_OPENREQUEST)); + DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST)); + DBUG_ON(ch->flags & XPC_C_CONNECTED); + + /* + * The meaningful OPENREPLY connection state fields are: + * local_msgqueue_pa = physical address of remote + * partition's local_msgqueue + * local_nentries = remote partition's local_nentries + * remote_nentries = remote partition's remote_nentries + */ + DBUG_ON(args->local_msgqueue_pa == 0); + DBUG_ON(args->local_nentries == 0); + DBUG_ON(args->remote_nentries == 0); + + ch->flags |= XPC_C_ROPENREPLY; + ch->remote_msgqueue_pa = args->local_msgqueue_pa; + + if (args->local_nentries < ch->remote_nentries) { + dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY: new " + "remote_nentries=%d, old remote_nentries=%d, " + "partid=%d, channel=%d\n", + args->local_nentries, ch->remote_nentries, + ch->partid, ch->number); + + ch->remote_nentries = args->local_nentries; + } + if (args->remote_nentries < ch->local_nentries) { + dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY: new " + "local_nentries=%d, old local_nentries=%d, " + "partid=%d, channel=%d\n", + args->remote_nentries, ch->local_nentries, + ch->partid, ch->number); + + ch->local_nentries = args->remote_nentries; + } + + xpc_process_connect(ch, &irq_flags); + } + + spin_unlock_irqrestore(&ch->lock, irq_flags); +} + + +/* + * Attempt to establish a channel connection to a remote partition. + */ +static enum xpc_retval +xpc_connect_channel(struct xpc_channel *ch) +{ + unsigned long irq_flags; + struct xpc_registration *registration = &xpc_registrations[ch->number]; + + + if (down_interruptible(®istration->sema) != 0) { + return xpcInterrupted; + } + + if (!XPC_CHANNEL_REGISTERED(ch->number)) { + up(®istration->sema); + return xpcUnregistered; + } + + spin_lock_irqsave(&ch->lock, irq_flags); + + DBUG_ON(ch->flags & XPC_C_CONNECTED); + DBUG_ON(ch->flags & XPC_C_OPENREQUEST); + + if (ch->flags & XPC_C_DISCONNECTING) { + spin_unlock_irqrestore(&ch->lock, irq_flags); + up(®istration->sema); + return ch->reason; + } + + + /* add info from the channel connect registration to the channel */ + + ch->kthreads_assigned_limit = registration->assigned_limit; + ch->kthreads_idle_limit = registration->idle_limit; + DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0); + DBUG_ON(atomic_read(&ch->kthreads_idle) != 0); + DBUG_ON(atomic_read(&ch->kthreads_active) != 0); + + ch->func = registration->func; + DBUG_ON(registration->func == NULL); + ch->key = registration->key; + + ch->local_nentries = registration->nentries; + + if (ch->flags & XPC_C_ROPENREQUEST) { + if (registration->msg_size != ch->msg_size) { + /* the local and remote sides aren't the same */ + + /* + * Because XPC_DISCONNECT_CHANNEL() can block we're + * forced to up the registration sema before we unlock + * the channel lock. But that's okay here because we're + * done with the part that required the registration + * sema. XPC_DISCONNECT_CHANNEL() requires that the + * channel lock be locked and will unlock and relock + * the channel lock as needed. + */ + up(®istration->sema); + XPC_DISCONNECT_CHANNEL(ch, xpcUnequalMsgSizes, + &irq_flags); + spin_unlock_irqrestore(&ch->lock, irq_flags); + return xpcUnequalMsgSizes; + } + } else { + ch->msg_size = registration->msg_size; + + XPC_SET_REASON(ch, 0, 0); + ch->flags &= ~XPC_C_DISCONNECTED; + + atomic_inc(&xpc_partitions[ch->partid].nchannels_active); + } + + up(®istration->sema); + + + /* initiate the connection */ + + ch->flags |= (XPC_C_OPENREQUEST | XPC_C_CONNECTING); + xpc_IPI_send_openrequest(ch, &irq_flags); + + xpc_process_connect(ch, &irq_flags); + + spin_unlock_irqrestore(&ch->lock, irq_flags); + + return xpcSuccess; +} + + +/* + * Notify those who wanted to be notified upon delivery of their message. + */ +static void +xpc_notify_senders(struct xpc_channel *ch, enum xpc_retval reason, s64 put) +{ + struct xpc_notify *notify; + u8 notify_type; + s64 get = ch->w_remote_GP.get - 1; + + + while (++get < put && atomic_read(&ch->n_to_notify) > 0) { + + notify = &ch->notify_queue[get % ch->local_nentries]; + + /* + * See if the notify entry indicates it was associated with + * a message who's sender wants to be notified. It is possible + * that it is, but someone else is doing or has done the + * notification. + */ + notify_type = notify->type; + if (notify_type == 0 || + cmpxchg(¬ify->type, notify_type, 0) != + notify_type) { + continue; + } + + DBUG_ON(notify_type != XPC_N_CALL); + + atomic_dec(&ch->n_to_notify); + + if (notify->func != NULL) { + dev_dbg(xpc_chan, "notify->func() called, notify=0x%p, " + "msg_number=%ld, partid=%d, channel=%d\n", + (void *) notify, get, ch->partid, ch->number); + + notify->func(reason, ch->partid, ch->number, + notify->key); + + dev_dbg(xpc_chan, "notify->func() returned, " + "notify=0x%p, msg_number=%ld, partid=%d, " + "channel=%d\n", (void *) notify, get, + ch->partid, ch->number); + } + } +} + + +/* + * Clear some of the msg flags in the local message queue. + */ +static inline void +xpc_clear_local_msgqueue_flags(struct xpc_channel *ch) +{ + struct xpc_msg *msg; + s64 get; + + + get = ch->w_remote_GP.get; + do { + msg = (struct xpc_msg *) ((u64) ch->local_msgqueue + + (get % ch->local_nentries) * ch->msg_size); + msg->flags = 0; + } while (++get < (volatile s64) ch->remote_GP.get); +} + + +/* + * Clear some of the msg flags in the remote message queue. + */ +static inline void +xpc_clear_remote_msgqueue_flags(struct xpc_channel *ch) +{ + struct xpc_msg *msg; + s64 put; + + + put = ch->w_remote_GP.put; + do { + msg = (struct xpc_msg *) ((u64) ch->remote_msgqueue + + (put % ch->remote_nentries) * ch->msg_size); + msg->flags = 0; + } while (++put < (volatile s64) ch->remote_GP.put); +} + + +static void +xpc_process_msg_IPI(struct xpc_partition *part, int ch_number) +{ + struct xpc_channel *ch = &part->channels[ch_number]; + int nmsgs_sent; + + + ch->remote_GP = part->remote_GPs[ch_number]; + + + /* See what, if anything, has changed for each connected channel */ + + xpc_msgqueue_ref(ch); + + if (ch->w_remote_GP.get == ch->remote_GP.get && + ch->w_remote_GP.put == ch->remote_GP.put) { + /* nothing changed since GPs were last pulled */ + xpc_msgqueue_deref(ch); + return; + } + + if (!(ch->flags & XPC_C_CONNECTED)){ + xpc_msgqueue_deref(ch); + return; + } + + + /* + * First check to see if messages recently sent by us have been + * received by the other side. (The remote GET value will have + * changed since we last looked at it.) + */ + + if (ch->w_remote_GP.get != ch->remote_GP.get) { + + /* + * We need to notify any senders that want to be notified + * that their sent messages have been received by their + * intended recipients. We need to do this before updating + * w_remote_GP.get so that we don't allocate the same message + * queue entries prematurely (see xpc_allocate_msg()). + */ + if (atomic_read(&ch->n_to_notify) > 0) { + /* + * Notify senders that messages sent have been + * received and delivered by the other side. + */ + xpc_notify_senders(ch, xpcMsgDelivered, + ch->remote_GP.get); + } + + /* + * Clear msg->flags in previously sent messages, so that + * they're ready for xpc_allocate_msg(). + */ + xpc_clear_local_msgqueue_flags(ch); + + (volatile s64) ch->w_remote_GP.get = ch->remote_GP.get; + + dev_dbg(xpc_chan, "w_remote_GP.get changed to %ld, partid=%d, " + "channel=%d\n", ch->w_remote_GP.get, ch->partid, + ch->number); + + /* + * If anyone was waiting for message queue entries to become + * available, wake them up. + */ + if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) { + wake_up(&ch->msg_allocate_wq); + } + } + + + /* + * Now check for newly sent messages by the other side. (The remote + * PUT value will have changed since we last looked at it.) + */ + + if (ch->w_remote_GP.put != ch->remote_GP.put) { + /* + * Clear msg->flags in previously received messages, so that + * they're ready for xpc_get_deliverable_msg(). + */ + xpc_clear_remote_msgqueue_flags(ch); + + (volatile s64) ch->w_remote_GP.put = ch->remote_GP.put; + + dev_dbg(xpc_chan, "w_remote_GP.put changed to %ld, partid=%d, " + "channel=%d\n", ch->w_remote_GP.put, ch->partid, + ch->number); + + nmsgs_sent = ch->w_remote_GP.put - ch->w_local_GP.get; + if (nmsgs_sent > 0) { + dev_dbg(xpc_chan, "msgs waiting to be copied and " + "delivered=%d, partid=%d, channel=%d\n", + nmsgs_sent, ch->partid, ch->number); + + if (ch->flags & XPC_C_CONNECTCALLOUT) { + xpc_activate_kthreads(ch, nmsgs_sent); + } + } + } + + xpc_msgqueue_deref(ch); +} + + +void +xpc_process_channel_activity(struct xpc_partition *part) +{ + unsigned long irq_flags; + u64 IPI_amo, IPI_flags; + struct xpc_channel *ch; + int ch_number; + + + IPI_amo = xpc_get_IPI_flags(part); + + /* + * Initiate channel connections for registered channels. + * + * For each connected channel that has pending messages activate idle + * kthreads and/or create new kthreads as needed. + */ + + for (ch_number = 0; ch_number < part->nchannels; ch_number++) { + ch = &part->channels[ch_number]; + + + /* + * Process any open or close related IPI flags, and then deal + * with connecting or disconnecting the channel as required. + */ + + IPI_flags = XPC_GET_IPI_FLAGS(IPI_amo, ch_number); + + if (XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(IPI_flags)) { + xpc_process_openclose_IPI(part, ch_number, IPI_flags); + } + + + if (ch->flags & XPC_C_DISCONNECTING) { + spin_lock_irqsave(&ch->lock, irq_flags); + xpc_process_disconnect(ch, &irq_flags); + spin_unlock_irqrestore(&ch->lock, irq_flags); + continue; + } + + if (part->act_state == XPC_P_DEACTIVATING) { + continue; + } + + if (!(ch->flags & XPC_C_CONNECTED)) { + if (!(ch->flags & XPC_C_OPENREQUEST)) { + DBUG_ON(ch->flags & XPC_C_SETUP); + (void) xpc_connect_channel(ch); + } else { + spin_lock_irqsave(&ch->lock, irq_flags); + xpc_process_connect(ch, &irq_flags); + spin_unlock_irqrestore(&ch->lock, irq_flags); + } + continue; + } + + + /* + * Process any message related IPI flags, this may involve the + * activation of kthreads to deliver any pending messages sent + * from the other partition. + */ + + if (XPC_ANY_MSG_IPI_FLAGS_SET(IPI_flags)) { + xpc_process_msg_IPI(part, ch_number); + } + } +} + + +/* + * XPC's heartbeat code calls this function to inform XPC that a partition has + * gone down. XPC responds by tearing down the XPartition Communication + * infrastructure used for the just downed partition. + * + * XPC's heartbeat code will never call this function and xpc_partition_up() + * at the same time. Nor will it ever make multiple calls to either function + * at the same time. + */ +void +xpc_partition_down(struct xpc_partition *part, enum xpc_retval reason) +{ + unsigned long irq_flags; + int ch_number; + struct xpc_channel *ch; + + + dev_dbg(xpc_chan, "deactivating partition %d, reason=%d\n", + XPC_PARTID(part), reason); + + if (!xpc_part_ref(part)) { + /* infrastructure for this partition isn't currently set up */ + return; + } + + + /* disconnect all channels associated with the downed partition */ + + for (ch_number = 0; ch_number < part->nchannels; ch_number++) { + ch = &part->channels[ch_number]; + + + xpc_msgqueue_ref(ch); + spin_lock_irqsave(&ch->lock, irq_flags); + + XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags); + + spin_unlock_irqrestore(&ch->lock, irq_flags); + xpc_msgqueue_deref(ch); + } + + xpc_wakeup_channel_mgr(part); + + xpc_part_deref(part); +} + + +/* + * Teardown the infrastructure necessary to support XPartition Communication + * between the specified remote partition and the local one. + */ +void +xpc_teardown_infrastructure(struct xpc_partition *part) +{ + partid_t partid = XPC_PARTID(part); + + + /* + * We start off by making this partition inaccessible to local + * processes by marking it as no longer setup. Then we make it + * inaccessible to remote processes by clearing the XPC per partition + * specific variable's magic # (which indicates that these variables + * are no longer valid) and by ignoring all XPC notify IPIs sent to + * this partition. + */ + + DBUG_ON(atomic_read(&part->nchannels_active) != 0); + DBUG_ON(part->setup_state != XPC_P_SETUP); + part->setup_state = XPC_P_WTEARDOWN; + + xpc_vars_part[partid].magic = 0; + + + free_irq(SGI_XPC_NOTIFY, (void *) (u64) partid); + + + /* + * Before proceding with the teardown we have to wait until all + * existing references cease. + */ + wait_event(part->teardown_wq, (atomic_read(&part->references) == 0)); + + + /* now we can begin tearing down the infrastructure */ + + part->setup_state = XPC_P_TORNDOWN; + + /* in case we've still got outstanding timers registered... */ + del_timer_sync(&part->dropped_IPI_timer); + + kfree(part->remote_openclose_args_base); + part->remote_openclose_args = NULL; + kfree(part->local_openclose_args_base); + part->local_openclose_args = NULL; + kfree(part->remote_GPs_base); + part->remote_GPs = NULL; + kfree(part->local_GPs_base); + part->local_GPs = NULL; + kfree(part->channels); + part->channels = NULL; + part->local_IPI_amo_va = NULL; +} + + +/* + * Called by XP at the time of channel connection registration to cause + * XPC to establish connections to all currently active partitions. + */ +void +xpc_initiate_connect(int ch_number) +{ + partid_t partid; + struct xpc_partition *part; + struct xpc_channel *ch; + + + DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS); + + for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { + part = &xpc_partitions[partid]; + + if (xpc_part_ref(part)) { + ch = &part->channels[ch_number]; + + if (!(ch->flags & XPC_C_DISCONNECTING)) { + DBUG_ON(ch->flags & XPC_C_OPENREQUEST); + DBUG_ON(ch->flags & XPC_C_CONNECTED); + DBUG_ON(ch->flags & XPC_C_SETUP); + + /* + * Initiate the establishment of a connection + * on the newly registered channel to the + * remote partition. + */ + xpc_wakeup_channel_mgr(part); + } + + xpc_part_deref(part); + } + } +} + + +void +xpc_connected_callout(struct xpc_channel *ch) +{ + unsigned long irq_flags; + + + /* let the registerer know that a connection has been established */ + + if (ch->func != NULL) { + dev_dbg(xpc_chan, "ch->func() called, reason=xpcConnected, " + "partid=%d, channel=%d\n", ch->partid, ch->number); + + ch->func(xpcConnected, ch->partid, ch->number, + (void *) (u64) ch->local_nentries, ch->key); + + dev_dbg(xpc_chan, "ch->func() returned, reason=xpcConnected, " + "partid=%d, channel=%d\n", ch->partid, ch->number); + } + + spin_lock_irqsave(&ch->lock, irq_flags); + ch->flags |= XPC_C_CONNECTCALLOUT; + spin_unlock_irqrestore(&ch->lock, irq_flags); +} + + +/* + * Called by XP at the time of channel connection unregistration to cause + * XPC to teardown all current connections for the specified channel. + * + * Before returning xpc_initiate_disconnect() will wait until all connections + * on the specified channel have been closed/torndown. So the caller can be + * assured that they will not be receiving any more callouts from XPC to the + * function they registered via xpc_connect(). + * + * Arguments: + * + * ch_number - channel # to unregister. + */ +void +xpc_initiate_disconnect(int ch_number) +{ + unsigned long irq_flags; + partid_t partid; + struct xpc_partition *part; + struct xpc_channel *ch; + + + DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS); + + /* initiate the channel disconnect for every active partition */ + for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { + part = &xpc_partitions[partid]; + + if (xpc_part_ref(part)) { + ch = &part->channels[ch_number]; + xpc_msgqueue_ref(ch); + + spin_lock_irqsave(&ch->lock, irq_flags); + + XPC_DISCONNECT_CHANNEL(ch, xpcUnregistering, + &irq_flags); + + spin_unlock_irqrestore(&ch->lock, irq_flags); + + xpc_msgqueue_deref(ch); + xpc_part_deref(part); + } + } + + xpc_disconnect_wait(ch_number); +} + + +/* + * To disconnect a channel, and reflect it back to all who may be waiting. + * + * >>> An OPEN is not allowed until XPC_C_DISCONNECTING is cleared by + * >>> xpc_free_msgqueues(). + * + * THE CHANNEL IS TO BE LOCKED BY THE CALLER AND WILL REMAIN LOCKED UPON RETURN. + */ +void +xpc_disconnect_channel(const int line, struct xpc_channel *ch, + enum xpc_retval reason, unsigned long *irq_flags) +{ + u32 flags; + + + DBUG_ON(!spin_is_locked(&ch->lock)); + + if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) { + return; + } + DBUG_ON(!(ch->flags & (XPC_C_CONNECTING | XPC_C_CONNECTED))); + + dev_dbg(xpc_chan, "reason=%d, line=%d, partid=%d, channel=%d\n", + reason, line, ch->partid, ch->number); + + XPC_SET_REASON(ch, reason, line); + + flags = ch->flags; + /* some of these may not have been set */ + ch->flags &= ~(XPC_C_OPENREQUEST | XPC_C_OPENREPLY | + XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY | + XPC_C_CONNECTING | XPC_C_CONNECTED); + + ch->flags |= (XPC_C_CLOSEREQUEST | XPC_C_DISCONNECTING); + xpc_IPI_send_closerequest(ch, irq_flags); + + if (flags & XPC_C_CONNECTED) { + ch->flags |= XPC_C_WASCONNECTED; + } + + if (atomic_read(&ch->kthreads_idle) > 0) { + /* wake all idle kthreads so they can exit */ + wake_up_all(&ch->idle_wq); + } + + spin_unlock_irqrestore(&ch->lock, *irq_flags); + + + /* wake those waiting to allocate an entry from the local msg queue */ + + if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) { + wake_up(&ch->msg_allocate_wq); + } + + /* wake those waiting for notify completion */ + + if (atomic_read(&ch->n_to_notify) > 0) { + xpc_notify_senders(ch, reason, ch->w_local_GP.put); + } + + spin_lock_irqsave(&ch->lock, *irq_flags); +} + + +void +xpc_disconnected_callout(struct xpc_channel *ch) +{ + /* + * Let the channel's registerer know that the channel is now + * disconnected. We don't want to do this if the registerer was never + * informed of a connection being made, unless the disconnect was for + * abnormal reasons. + */ + + if (ch->func != NULL) { + dev_dbg(xpc_chan, "ch->func() called, reason=%d, partid=%d, " + "channel=%d\n", ch->reason, ch->partid, ch->number); + + ch->func(ch->reason, ch->partid, ch->number, NULL, ch->key); + + dev_dbg(xpc_chan, "ch->func() returned, reason=%d, partid=%d, " + "channel=%d\n", ch->reason, ch->partid, ch->number); + } +} + + +/* + * Wait for a message entry to become available for the specified channel, + * but don't wait any longer than 1 jiffy. + */ +static enum xpc_retval +xpc_allocate_msg_wait(struct xpc_channel *ch) +{ + enum xpc_retval ret; + + + if (ch->flags & XPC_C_DISCONNECTING) { + DBUG_ON(ch->reason == xpcInterrupted); // >>> Is this true? + return ch->reason; + } + + atomic_inc(&ch->n_on_msg_allocate_wq); + ret = interruptible_sleep_on_timeout(&ch->msg_allocate_wq, 1); + atomic_dec(&ch->n_on_msg_allocate_wq); + + if (ch->flags & XPC_C_DISCONNECTING) { + ret = ch->reason; + DBUG_ON(ch->reason == xpcInterrupted); // >>> Is this true? + } else if (ret == 0) { + ret = xpcTimeout; + } else { + ret = xpcInterrupted; + } + + return ret; +} + + +/* + * Allocate an entry for a message from the message queue associated with the + * specified channel. + */ +static enum xpc_retval +xpc_allocate_msg(struct xpc_channel *ch, u32 flags, + struct xpc_msg **address_of_msg) +{ + struct xpc_msg *msg; + enum xpc_retval ret; + s64 put; + + + /* this reference will be dropped in xpc_send_msg() */ + xpc_msgqueue_ref(ch); + + if (ch->flags & XPC_C_DISCONNECTING) { + xpc_msgqueue_deref(ch); + return ch->reason; + } + if (!(ch->flags & XPC_C_CONNECTED)) { + xpc_msgqueue_deref(ch); + return xpcNotConnected; + } + + + /* + * Get the next available message entry from the local message queue. + * If none are available, we'll make sure that we grab the latest + * GP values. + */ + ret = xpcTimeout; + + while (1) { + + put = (volatile s64) ch->w_local_GP.put; + if (put - (volatile s64) ch->w_remote_GP.get < + ch->local_nentries) { + + /* There are available message entries. We need to try + * to secure one for ourselves. We'll do this by trying + * to increment w_local_GP.put as long as someone else + * doesn't beat us to it. If they do, we'll have to + * try again. + */ + if (cmpxchg(&ch->w_local_GP.put, put, put + 1) == + put) { + /* we got the entry referenced by put */ + break; + } + continue; /* try again */ + } + + + /* + * There aren't any available msg entries at this time. + * + * In waiting for a message entry to become available, + * we set a timeout in case the other side is not + * sending completion IPIs. This lets us fake an IPI + * that will cause the IPI handler to fetch the latest + * GP values as if an IPI was sent by the other side. + */ + if (ret == xpcTimeout) { + xpc_IPI_send_local_msgrequest(ch); + } + + if (flags & XPC_NOWAIT) { + xpc_msgqueue_deref(ch); + return xpcNoWait; + } + + ret = xpc_allocate_msg_wait(ch); + if (ret != xpcInterrupted && ret != xpcTimeout) { + xpc_msgqueue_deref(ch); + return ret; + } + } + + + /* get the message's address and initialize it */ + msg = (struct xpc_msg *) ((u64) ch->local_msgqueue + + (put % ch->local_nentries) * ch->msg_size); + + + DBUG_ON(msg->flags != 0); + msg->number = put; + + dev_dbg(xpc_chan, "w_local_GP.put changed to %ld; msg=0x%p, " + "msg_number=%ld, partid=%d, channel=%d\n", put + 1, + (void *) msg, msg->number, ch->partid, ch->number); + + *address_of_msg = msg; + + return xpcSuccess; +} + + +/* + * Allocate an entry for a message from the message queue associated with the + * specified channel. NOTE that this routine can sleep waiting for a message + * entry to become available. To not sleep, pass in the XPC_NOWAIT flag. + * + * Arguments: + * + * partid - ID of partition to which the channel is connected. + * ch_number - channel #. + * flags - see xpc.h for valid flags. + * payload - address of the allocated payload area pointer (filled in on + * return) in which the user-defined message is constructed. + */ +enum xpc_retval +xpc_initiate_allocate(partid_t partid, int ch_number, u32 flags, void **payload) +{ + struct xpc_partition *part = &xpc_partitions[partid]; + enum xpc_retval ret = xpcUnknownReason; + struct xpc_msg *msg; + + + DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); + DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); + + *payload = NULL; + + if (xpc_part_ref(part)) { + ret = xpc_allocate_msg(&part->channels[ch_number], flags, &msg); + xpc_part_deref(part); + + if (msg != NULL) { + *payload = &msg->payload; + } + } + + return ret; +} + + +/* + * Now we actually send the messages that are ready to be sent by advancing + * the local message queue's Put value and then send an IPI to the recipient + * partition. + */ +static void +xpc_send_msgs(struct xpc_channel *ch, s64 initial_put) +{ + struct xpc_msg *msg; + s64 put = initial_put + 1; + int send_IPI = 0; + + + while (1) { + + while (1) { + if (put == (volatile s64) ch->w_local_GP.put) { + break; + } + + msg = (struct xpc_msg *) ((u64) ch->local_msgqueue + + (put % ch->local_nentries) * ch->msg_size); + + if (!(msg->flags & XPC_M_READY)) { + break; + } + + put++; + } + + if (put == initial_put) { + /* nothing's changed */ + break; + } + + if (cmpxchg_rel(&ch->local_GP->put, initial_put, put) != + initial_put) { + /* someone else beat us to it */ + DBUG_ON((volatile s64) ch->local_GP->put < initial_put); + break; + } + + /* we just set the new value of local_GP->put */ + + dev_dbg(xpc_chan, "local_GP->put changed to %ld, partid=%d, " + "channel=%d\n", put, ch->partid, ch->number); + + send_IPI = 1; + + /* + * We need to ensure that the message referenced by + * local_GP->put is not XPC_M_READY or that local_GP->put + * equals w_local_GP.put, so we'll go have a look. + */ + initial_put = put; + } + + if (send_IPI) { + xpc_IPI_send_msgrequest(ch); + } +} + + +/* + * Common code that does the actual sending of the message by advancing the + * local message queue's Put value and sends an IPI to the partition the + * message is being sent to. + */ +static enum xpc_retval +xpc_send_msg(struct xpc_channel *ch, struct xpc_msg *msg, u8 notify_type, + xpc_notify_func func, void *key) +{ + enum xpc_retval ret = xpcSuccess; + struct xpc_notify *notify = NULL; // >>> to keep the compiler happy!! + s64 put, msg_number = msg->number; + + + DBUG_ON(notify_type == XPC_N_CALL && func == NULL); + DBUG_ON((((u64) msg - (u64) ch->local_msgqueue) / ch->msg_size) != + msg_number % ch->local_nentries); + DBUG_ON(msg->flags & XPC_M_READY); + + if (ch->flags & XPC_C_DISCONNECTING) { + /* drop the reference grabbed in xpc_allocate_msg() */ + xpc_msgqueue_deref(ch); + return ch->reason; + } + + if (notify_type != 0) { + /* + * Tell the remote side to send an ACK interrupt when the + * message has been delivered. + */ + msg->flags |= XPC_M_INTERRUPT; + + atomic_inc(&ch->n_to_notify); + + notify = &ch->notify_queue[msg_number % ch->local_nentries]; + notify->func = func; + notify->key = key; + (volatile u8) notify->type = notify_type; + + // >>> is a mb() needed here? + + if (ch->flags & XPC_C_DISCONNECTING) { + /* + * An error occurred between our last error check and + * this one. We will try to clear the type field from + * the notify entry. If we succeed then + * xpc_disconnect_channel() didn't already process + * the notify entry. + */ + if (cmpxchg(¬ify->type, notify_type, 0) == + notify_type) { + atomic_dec(&ch->n_to_notify); + ret = ch->reason; + } + + /* drop the reference grabbed in xpc_allocate_msg() */ + xpc_msgqueue_deref(ch); + return ret; + } + } + + msg->flags |= XPC_M_READY; + + /* + * The preceding store of msg->flags must occur before the following + * load of ch->local_GP->put. + */ + mb(); + + /* see if the message is next in line to be sent, if so send it */ + + put = ch->local_GP->put; + if (put == msg_number) { + xpc_send_msgs(ch, put); + } + + /* drop the reference grabbed in xpc_allocate_msg() */ + xpc_msgqueue_deref(ch); + return ret; +} + + +/* + * Send a message previously allocated using xpc_initiate_allocate() on the + * specified channel connected to the specified partition. + * + * This routine will not wait for the message to be received, nor will + * notification be given when it does happen. Once this routine has returned + * the message entry allocated via xpc_initiate_allocate() is no longer + * accessable to the caller. + * + * This routine, although called by users, does not call xpc_part_ref() to + * ensure that the partition infrastructure is in place. It relies on the + * fact that we called xpc_msgqueue_ref() in xpc_allocate_msg(). + * + * Arguments: + * + * partid - ID of partition to which the channel is connected. + * ch_number - channel # to send message on. + * payload - pointer to the payload area allocated via + * xpc_initiate_allocate(). + */ +enum xpc_retval +xpc_initiate_send(partid_t partid, int ch_number, void *payload) +{ + struct xpc_partition *part = &xpc_partitions[partid]; + struct xpc_msg *msg = XPC_MSG_ADDRESS(payload); + enum xpc_retval ret; + + + dev_dbg(xpc_chan, "msg=0x%p, partid=%d, channel=%d\n", (void *) msg, + partid, ch_number); + + DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); + DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); + DBUG_ON(msg == NULL); + + ret = xpc_send_msg(&part->channels[ch_number], msg, 0, NULL, NULL); + + return ret; +} + + +/* + * Send a message previously allocated using xpc_initiate_allocate on the + * specified channel connected to the specified partition. + * + * This routine will not wait for the message to be sent. Once this routine + * has returned the message entry allocated via xpc_initiate_allocate() is no + * longer accessable to the caller. + * + * Once the remote end of the channel has received the message, the function + * passed as an argument to xpc_initiate_send_notify() will be called. This + * allows the sender to free up or re-use any buffers referenced by the + * message, but does NOT mean the message has been processed at the remote + * end by a receiver. + * + * If this routine returns an error, the caller's function will NOT be called. + * + * This routine, although called by users, does not call xpc_part_ref() to + * ensure that the partition infrastructure is in place. It relies on the + * fact that we called xpc_msgqueue_ref() in xpc_allocate_msg(). + * + * Arguments: + * + * partid - ID of partition to which the channel is connected. + * ch_number - channel # to send message on. + * payload - pointer to the payload area allocated via + * xpc_initiate_allocate(). + * func - function to call with asynchronous notification of message + * receipt. THIS FUNCTION MUST BE NON-BLOCKING. + * key - user-defined key to be passed to the function when it's called. + */ +enum xpc_retval +xpc_initiate_send_notify(partid_t partid, int ch_number, void *payload, + xpc_notify_func func, void *key) +{ + struct xpc_partition *part = &xpc_partitions[partid]; + struct xpc_msg *msg = XPC_MSG_ADDRESS(payload); + enum xpc_retval ret; + + + dev_dbg(xpc_chan, "msg=0x%p, partid=%d, channel=%d\n", (void *) msg, + partid, ch_number); + + DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); + DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); + DBUG_ON(msg == NULL); + DBUG_ON(func == NULL); + + ret = xpc_send_msg(&part->channels[ch_number], msg, XPC_N_CALL, + func, key); + return ret; +} + + +static struct xpc_msg * +xpc_pull_remote_msg(struct xpc_channel *ch, s64 get) +{ + struct xpc_partition *part = &xpc_partitions[ch->partid]; + struct xpc_msg *remote_msg, *msg; + u32 msg_index, nmsgs; + u64 msg_offset; + enum xpc_retval ret; + + + if (down_interruptible(&ch->msg_to_pull_sema) != 0) { + /* we were interrupted by a signal */ + return NULL; + } + + while (get >= ch->next_msg_to_pull) { + + /* pull as many messages as are ready and able to be pulled */ + + msg_index = ch->next_msg_to_pull % ch->remote_nentries; + + DBUG_ON(ch->next_msg_to_pull >= + (volatile s64) ch->w_remote_GP.put); + nmsgs = (volatile s64) ch->w_remote_GP.put - + ch->next_msg_to_pull; + if (msg_index + nmsgs > ch->remote_nentries) { + /* ignore the ones that wrap the msg queue for now */ + nmsgs = ch->remote_nentries - msg_index; + } + + msg_offset = msg_index * ch->msg_size; + msg = (struct xpc_msg *) ((u64) ch->remote_msgqueue + + msg_offset); + remote_msg = (struct xpc_msg *) (ch->remote_msgqueue_pa + + msg_offset); + + if ((ret = xpc_pull_remote_cachelines(part, msg, remote_msg, + nmsgs * ch->msg_size)) != xpcSuccess) { + + dev_dbg(xpc_chan, "failed to pull %d msgs starting with" + " msg %ld from partition %d, channel=%d, " + "ret=%d\n", nmsgs, ch->next_msg_to_pull, + ch->partid, ch->number, ret); + + XPC_DEACTIVATE_PARTITION(part, ret); + + up(&ch->msg_to_pull_sema); + return NULL; + } + + mb(); /* >>> this may not be needed, we're not sure */ + + ch->next_msg_to_pull += nmsgs; + } + + up(&ch->msg_to_pull_sema); + + /* return the message we were looking for */ + msg_offset = (get % ch->remote_nentries) * ch->msg_size; + msg = (struct xpc_msg *) ((u64) ch->remote_msgqueue + msg_offset); + + return msg; +} + + +/* + * Get a message to be delivered. + */ +static struct xpc_msg * +xpc_get_deliverable_msg(struct xpc_channel *ch) +{ + struct xpc_msg *msg = NULL; + s64 get; + + + do { + if ((volatile u32) ch->flags & XPC_C_DISCONNECTING) { + break; + } + + get = (volatile s64) ch->w_local_GP.get; + if (get == (volatile s64) ch->w_remote_GP.put) { + break; + } + + /* There are messages waiting to be pulled and delivered. + * We need to try to secure one for ourselves. We'll do this + * by trying to increment w_local_GP.get and hope that no one + * else beats us to it. If they do, we'll we'll simply have + * to try again for the next one. + */ + + if (cmpxchg(&ch->w_local_GP.get, get, get + 1) == get) { + /* we got the entry referenced by get */ + + dev_dbg(xpc_chan, "w_local_GP.get changed to %ld, " + "partid=%d, channel=%d\n", get + 1, + ch->partid, ch->number); + + /* pull the message from the remote partition */ + + msg = xpc_pull_remote_msg(ch, get); + + DBUG_ON(msg != NULL && msg->number != get); + DBUG_ON(msg != NULL && (msg->flags & XPC_M_DONE)); + DBUG_ON(msg != NULL && !(msg->flags & XPC_M_READY)); + + break; + } + + } while (1); + + return msg; +} + + +/* + * Deliver a message to its intended recipient. + */ +void +xpc_deliver_msg(struct xpc_channel *ch) +{ + struct xpc_msg *msg; + + + if ((msg = xpc_get_deliverable_msg(ch)) != NULL) { + + /* + * This ref is taken to protect the payload itself from being + * freed before the user is finished with it, which the user + * indicates by calling xpc_initiate_received(). + */ + xpc_msgqueue_ref(ch); + + atomic_inc(&ch->kthreads_active); + + if (ch->func != NULL) { + dev_dbg(xpc_chan, "ch->func() called, msg=0x%p, " + "msg_number=%ld, partid=%d, channel=%d\n", + (void *) msg, msg->number, ch->partid, + ch->number); + + /* deliver the message to its intended recipient */ + ch->func(xpcMsgReceived, ch->partid, ch->number, + &msg->payload, ch->key); + + dev_dbg(xpc_chan, "ch->func() returned, msg=0x%p, " + "msg_number=%ld, partid=%d, channel=%d\n", + (void *) msg, msg->number, ch->partid, + ch->number); + } + + atomic_dec(&ch->kthreads_active); + } +} + + +/* + * Now we actually acknowledge the messages that have been delivered and ack'd + * by advancing the cached remote message queue's Get value and if requested + * send an IPI to the message sender's partition. + */ +static void +xpc_acknowledge_msgs(struct xpc_channel *ch, s64 initial_get, u8 msg_flags) +{ + struct xpc_msg *msg; + s64 get = initial_get + 1; + int send_IPI = 0; + + + while (1) { + + while (1) { + if (get == (volatile s64) ch->w_local_GP.get) { + break; + } + + msg = (struct xpc_msg *) ((u64) ch->remote_msgqueue + + (get % ch->remote_nentries) * ch->msg_size); + + if (!(msg->flags & XPC_M_DONE)) { + break; + } + + msg_flags |= msg->flags; + get++; + } + + if (get == initial_get) { + /* nothing's changed */ + break; + } + + if (cmpxchg_rel(&ch->local_GP->get, initial_get, get) != + initial_get) { + /* someone else beat us to it */ + DBUG_ON((volatile s64) ch->local_GP->get <= + initial_get); + break; + } + + /* we just set the new value of local_GP->get */ + + dev_dbg(xpc_chan, "local_GP->get changed to %ld, partid=%d, " + "channel=%d\n", get, ch->partid, ch->number); + + send_IPI = (msg_flags & XPC_M_INTERRUPT); + + /* + * We need to ensure that the message referenced by + * local_GP->get is not XPC_M_DONE or that local_GP->get + * equals w_local_GP.get, so we'll go have a look. + */ + initial_get = get; + } + + if (send_IPI) { + xpc_IPI_send_msgrequest(ch); + } +} + + +/* + * Acknowledge receipt of a delivered message. + * + * If a message has XPC_M_INTERRUPT set, send an interrupt to the partition + * that sent the message. + * + * This function, although called by users, does not call xpc_part_ref() to + * ensure that the partition infrastructure is in place. It relies on the + * fact that we called xpc_msgqueue_ref() in xpc_deliver_msg(). + * + * Arguments: + * + * partid - ID of partition to which the channel is connected. + * ch_number - channel # message received on. + * payload - pointer to the payload area allocated via + * xpc_initiate_allocate(). + */ +void +xpc_initiate_received(partid_t partid, int ch_number, void *payload) +{ + struct xpc_partition *part = &xpc_partitions[partid]; + struct xpc_channel *ch; + struct xpc_msg *msg = XPC_MSG_ADDRESS(payload); + s64 get, msg_number = msg->number; + + + DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); + DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); + + ch = &part->channels[ch_number]; + + dev_dbg(xpc_chan, "msg=0x%p, msg_number=%ld, partid=%d, channel=%d\n", + (void *) msg, msg_number, ch->partid, ch->number); + + DBUG_ON((((u64) msg - (u64) ch->remote_msgqueue) / ch->msg_size) != + msg_number % ch->remote_nentries); + DBUG_ON(msg->flags & XPC_M_DONE); + + msg->flags |= XPC_M_DONE; + + /* + * The preceding store of msg->flags must occur before the following + * load of ch->local_GP->get. + */ + mb(); + + /* + * See if this message is next in line to be acknowledged as having + * been delivered. + */ + get = ch->local_GP->get; + if (get == msg_number) { + xpc_acknowledge_msgs(ch, get, msg->flags); + } + + /* the call to xpc_msgqueue_ref() was done by xpc_deliver_msg() */ + xpc_msgqueue_deref(ch); +} + diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c new file mode 100644 index 0000000..177ddb7 --- /dev/null +++ b/arch/ia64/sn/kernel/xpc_main.c @@ -0,0 +1,1064 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved. + */ + + +/* + * Cross Partition Communication (XPC) support - standard version. + * + * XPC provides a message passing capability that crosses partition + * boundaries. This module is made up of two parts: + * + * partition This part detects the presence/absence of other + * partitions. It provides a heartbeat and monitors + * the heartbeats of other partitions. + * + * channel This part manages the channels and sends/receives + * messages across them to/from other partitions. + * + * There are a couple of additional functions residing in XP, which + * provide an interface to XPC for its users. + * + * + * Caveats: + * + * . We currently have no way to determine which nasid an IPI came + * from. Thus, xpc_IPI_send() does a remote AMO write followed by + * an IPI. The AMO indicates where data is to be pulled from, so + * after the IPI arrives, the remote partition checks the AMO word. + * The IPI can actually arrive before the AMO however, so other code + * must periodically check for this case. Also, remote AMO operations + * do not reliably time out. Thus we do a remote PIO read solely to + * know whether the remote partition is down and whether we should + * stop sending IPIs to it. This remote PIO read operation is set up + * in a special nofault region so SAL knows to ignore (and cleanup) + * any errors due to the remote AMO write, PIO read, and/or PIO + * write operations. + * + * If/when new hardware solves this IPI problem, we should abandon + * the current approach. + * + */ + + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/syscalls.h> +#include <linux/cache.h> +#include <linux/interrupt.h> +#include <linux/slab.h> +#include <asm/sn/intr.h> +#include <asm/sn/sn_sal.h> +#include <asm/uaccess.h> +#include "xpc.h" + + +/* define two XPC debug device structures to be used with dev_dbg() et al */ + +struct device_driver xpc_dbg_name = { + .name = "xpc" +}; + +struct device xpc_part_dbg_subname = { + .bus_id = {0}, /* set to "part" at xpc_init() time */ + .driver = &xpc_dbg_name +}; + +struct device xpc_chan_dbg_subname = { + .bus_id = {0}, /* set to "chan" at xpc_init() time */ + .driver = &xpc_dbg_name +}; + +struct device *xpc_part = &xpc_part_dbg_subname; +struct device *xpc_chan = &xpc_chan_dbg_subname; + + +/* systune related variables for /proc/sys directories */ + +static int xpc_hb_min = 1; +static int xpc_hb_max = 10; + +static int xpc_hb_check_min = 10; +static int xpc_hb_check_max = 120; + +static ctl_table xpc_sys_xpc_hb_dir[] = { + { + 1, + "hb_interval", + &xpc_hb_interval, + sizeof(int), + 0644, + NULL, + &proc_dointvec_minmax, + &sysctl_intvec, + NULL, + &xpc_hb_min, &xpc_hb_max + }, + { + 2, + "hb_check_interval", + &xpc_hb_check_interval, + sizeof(int), + 0644, + NULL, + &proc_dointvec_minmax, + &sysctl_intvec, + NULL, + &xpc_hb_check_min, &xpc_hb_check_max + }, + {0} +}; +static ctl_table xpc_sys_xpc_dir[] = { + { + 1, + "hb", + NULL, + 0, + 0555, + xpc_sys_xpc_hb_dir + }, + {0} +}; +static ctl_table xpc_sys_dir[] = { + { + 1, + "xpc", + NULL, + 0, + 0555, + xpc_sys_xpc_dir + }, + {0} +}; +static struct ctl_table_header *xpc_sysctl; + + +/* #of IRQs received */ +static atomic_t xpc_act_IRQ_rcvd; + +/* IRQ handler notifies this wait queue on receipt of an IRQ */ +static DECLARE_WAIT_QUEUE_HEAD(xpc_act_IRQ_wq); + +static unsigned long xpc_hb_check_timeout; + +/* xpc_hb_checker thread exited notification */ +static DECLARE_MUTEX_LOCKED(xpc_hb_checker_exited); + +/* xpc_discovery thread exited notification */ +static DECLARE_MUTEX_LOCKED(xpc_discovery_exited); + + +static struct timer_list xpc_hb_timer; + + +static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *); + + +/* + * Notify the heartbeat check thread that an IRQ has been received. + */ +static irqreturn_t +xpc_act_IRQ_handler(int irq, void *dev_id, struct pt_regs *regs) +{ + atomic_inc(&xpc_act_IRQ_rcvd); + wake_up_interruptible(&xpc_act_IRQ_wq); + return IRQ_HANDLED; +} + + +/* + * Timer to produce the heartbeat. The timer structures function is + * already set when this is initially called. A tunable is used to + * specify when the next timeout should occur. + */ +static void +xpc_hb_beater(unsigned long dummy) +{ + xpc_vars->heartbeat++; + + if (jiffies >= xpc_hb_check_timeout) { + wake_up_interruptible(&xpc_act_IRQ_wq); + } + + xpc_hb_timer.expires = jiffies + (xpc_hb_interval * HZ); + add_timer(&xpc_hb_timer); +} + + +/* + * This thread is responsible for nearly all of the partition + * activation/deactivation. + */ +static int +xpc_hb_checker(void *ignore) +{ + int last_IRQ_count = 0; + int new_IRQ_count; + int force_IRQ=0; + + + /* this thread was marked active by xpc_hb_init() */ + + daemonize(XPC_HB_CHECK_THREAD_NAME); + + set_cpus_allowed(current, cpumask_of_cpu(XPC_HB_CHECK_CPU)); + + xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ); + + while (!(volatile int) xpc_exiting) { + + /* wait for IRQ or timeout */ + (void) wait_event_interruptible(xpc_act_IRQ_wq, + (last_IRQ_count < atomic_read(&xpc_act_IRQ_rcvd) || + jiffies >= xpc_hb_check_timeout || + (volatile int) xpc_exiting)); + + dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have " + "been received\n", + (int) (xpc_hb_check_timeout - jiffies), + atomic_read(&xpc_act_IRQ_rcvd) - last_IRQ_count); + + + /* checking of remote heartbeats is skewed by IRQ handling */ + if (jiffies >= xpc_hb_check_timeout) { + dev_dbg(xpc_part, "checking remote heartbeats\n"); + xpc_check_remote_hb(); + + /* + * We need to periodically recheck to ensure no + * IPI/AMO pairs have been missed. That check + * must always reset xpc_hb_check_timeout. + */ + force_IRQ = 1; + } + + + new_IRQ_count = atomic_read(&xpc_act_IRQ_rcvd); + if (last_IRQ_count < new_IRQ_count || force_IRQ != 0) { + force_IRQ = 0; + + dev_dbg(xpc_part, "found an IRQ to process; will be " + "resetting xpc_hb_check_timeout\n"); + + last_IRQ_count += xpc_identify_act_IRQ_sender(); + if (last_IRQ_count < new_IRQ_count) { + /* retry once to help avoid missing AMO */ + (void) xpc_identify_act_IRQ_sender(); + } + last_IRQ_count = new_IRQ_count; + + xpc_hb_check_timeout = jiffies + + (xpc_hb_check_interval * HZ); + } + } + + dev_dbg(xpc_part, "heartbeat checker is exiting\n"); + + + /* mark this thread as inactive */ + up(&xpc_hb_checker_exited); + return 0; +} + + +/* + * This thread will attempt to discover other partitions to activate + * based on info provided by SAL. This new thread is short lived and + * will exit once discovery is complete. + */ +static int +xpc_initiate_discovery(void *ignore) +{ + daemonize(XPC_DISCOVERY_THREAD_NAME); + + xpc_discovery(); + + dev_dbg(xpc_part, "discovery thread is exiting\n"); + + /* mark this thread as inactive */ + up(&xpc_discovery_exited); + return 0; +} + + +/* + * Establish first contact with the remote partititon. This involves pulling + * the XPC per partition variables from the remote partition and waiting for + * the remote partition to pull ours. + */ +static enum xpc_retval +xpc_make_first_contact(struct xpc_partition *part) +{ + enum xpc_retval ret; + + + while ((ret = xpc_pull_remote_vars_part(part)) != xpcSuccess) { + if (ret != xpcRetry) { + XPC_DEACTIVATE_PARTITION(part, ret); + return ret; + } + + dev_dbg(xpc_chan, "waiting to make first contact with " + "partition %d\n", XPC_PARTID(part)); + + /* wait a 1/4 of a second or so */ + set_current_state(TASK_INTERRUPTIBLE); + (void) schedule_timeout(0.25 * HZ); + + if (part->act_state == XPC_P_DEACTIVATING) { + return part->reason; + } + } + + return xpc_mark_partition_active(part); +} + + +/* + * The first kthread assigned to a newly activated partition is the one + * created by XPC HB with which it calls xpc_partition_up(). XPC hangs on to + * that kthread until the partition is brought down, at which time that kthread + * returns back to XPC HB. (The return of that kthread will signify to XPC HB + * that XPC has dismantled all communication infrastructure for the associated + * partition.) This kthread becomes the channel manager for that partition. + * + * Each active partition has a channel manager, who, besides connecting and + * disconnecting channels, will ensure that each of the partition's connected + * channels has the required number of assigned kthreads to get the work done. + */ +static void +xpc_channel_mgr(struct xpc_partition *part) +{ + while (part->act_state != XPC_P_DEACTIVATING || + atomic_read(&part->nchannels_active) > 0) { + + xpc_process_channel_activity(part); + + + /* + * Wait until we've been requested to activate kthreads or + * all of the channel's message queues have been torn down or + * a signal is pending. + * + * The channel_mgr_requests is set to 1 after being awakened, + * This is done to prevent the channel mgr from making one pass + * through the loop for each request, since he will + * be servicing all the requests in one pass. The reason it's + * set to 1 instead of 0 is so that other kthreads will know + * that the channel mgr is running and won't bother trying to + * wake him up. + */ + atomic_dec(&part->channel_mgr_requests); + (void) wait_event_interruptible(part->channel_mgr_wq, + (atomic_read(&part->channel_mgr_requests) > 0 || + (volatile u64) part->local_IPI_amo != 0 || + ((volatile u8) part->act_state == + XPC_P_DEACTIVATING && + atomic_read(&part->nchannels_active) == 0))); + atomic_set(&part->channel_mgr_requests, 1); + + // >>> Does it need to wakeup periodically as well? In case we + // >>> miscalculated the #of kthreads to wakeup or create? + } +} + + +/* + * When XPC HB determines that a partition has come up, it will create a new + * kthread and that kthread will call this function to attempt to set up the + * basic infrastructure used for Cross Partition Communication with the newly + * upped partition. + * + * The kthread that was created by XPC HB and which setup the XPC + * infrastructure will remain assigned to the partition until the partition + * goes down. At which time the kthread will teardown the XPC infrastructure + * and then exit. + * + * XPC HB will put the remote partition's XPC per partition specific variables + * physical address into xpc_partitions[partid].remote_vars_part_pa prior to + * calling xpc_partition_up(). + */ +static void +xpc_partition_up(struct xpc_partition *part) +{ + DBUG_ON(part->channels != NULL); + + dev_dbg(xpc_chan, "activating partition %d\n", XPC_PARTID(part)); + + if (xpc_setup_infrastructure(part) != xpcSuccess) { + return; + } + + /* + * The kthread that XPC HB called us with will become the + * channel manager for this partition. It will not return + * back to XPC HB until the partition's XPC infrastructure + * has been dismantled. + */ + + (void) xpc_part_ref(part); /* this will always succeed */ + + if (xpc_make_first_contact(part) == xpcSuccess) { + xpc_channel_mgr(part); + } + + xpc_part_deref(part); + + xpc_teardown_infrastructure(part); +} + + +static int +xpc_activating(void *__partid) +{ + partid_t partid = (u64) __partid; + struct xpc_partition *part = &xpc_partitions[partid]; + unsigned long irq_flags; + struct sched_param param = { sched_priority: MAX_USER_RT_PRIO - 1 }; + int ret; + + + DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); + + spin_lock_irqsave(&part->act_lock, irq_flags); + + if (part->act_state == XPC_P_DEACTIVATING) { + part->act_state = XPC_P_INACTIVE; + spin_unlock_irqrestore(&part->act_lock, irq_flags); + part->remote_rp_pa = 0; + return 0; + } + + /* indicate the thread is activating */ + DBUG_ON(part->act_state != XPC_P_ACTIVATION_REQ); + part->act_state = XPC_P_ACTIVATING; + + XPC_SET_REASON(part, 0, 0); + spin_unlock_irqrestore(&part->act_lock, irq_flags); + + dev_dbg(xpc_part, "bringing partition %d up\n", partid); + + daemonize("xpc%02d", partid); + + /* + * This thread needs to run at a realtime priority to prevent a + * significant performance degradation. + */ + ret = sched_setscheduler(current, SCHED_FIFO, ¶m); + if (ret != 0) { + dev_warn(xpc_part, "unable to set pid %d to a realtime " + "priority, ret=%d\n", current->pid, ret); + } + + /* allow this thread and its children to run on any CPU */ + set_cpus_allowed(current, CPU_MASK_ALL); + + /* + * Register the remote partition's AMOs with SAL so it can handle + * and cleanup errors within that address range should the remote + * partition go down. We don't unregister this range because it is + * difficult to tell when outstanding writes to the remote partition + * are finished and thus when it is safe to unregister. This should + * not result in wasted space in the SAL xp_addr_region table because + * we should get the same page for remote_amos_page_pa after module + * reloads and system reboots. + */ + if (sn_register_xp_addr_region(part->remote_amos_page_pa, + PAGE_SIZE, 1) < 0) { + dev_warn(xpc_part, "xpc_partition_up(%d) failed to register " + "xp_addr region\n", partid); + + spin_lock_irqsave(&part->act_lock, irq_flags); + part->act_state = XPC_P_INACTIVE; + XPC_SET_REASON(part, xpcPhysAddrRegFailed, __LINE__); + spin_unlock_irqrestore(&part->act_lock, irq_flags); + part->remote_rp_pa = 0; + return 0; + } + + XPC_ALLOW_HB(partid, xpc_vars); + xpc_IPI_send_activated(part); + + + /* + * xpc_partition_up() holds this thread and marks this partition as + * XPC_P_ACTIVE by calling xpc_hb_mark_active(). + */ + (void) xpc_partition_up(part); + + xpc_mark_partition_inactive(part); + + if (part->reason == xpcReactivating) { + /* interrupting ourselves results in activating partition */ + xpc_IPI_send_reactivate(part); + } + + return 0; +} + + +void +xpc_activate_partition(struct xpc_partition *part) +{ + partid_t partid = XPC_PARTID(part); + unsigned long irq_flags; + pid_t pid; + + + spin_lock_irqsave(&part->act_lock, irq_flags); + + pid = kernel_thread(xpc_activating, (void *) ((u64) partid), 0); + + DBUG_ON(part->act_state != XPC_P_INACTIVE); + + if (pid > 0) { + part->act_state = XPC_P_ACTIVATION_REQ; + XPC_SET_REASON(part, xpcCloneKThread, __LINE__); + } else { + XPC_SET_REASON(part, xpcCloneKThreadFailed, __LINE__); + } + + spin_unlock_irqrestore(&part->act_lock, irq_flags); +} + + +/* + * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified + * partition actually sent it. Since SGI_XPC_NOTIFY IRQs may be shared by more + * than one partition, we use an AMO_t structure per partition to indicate + * whether a partition has sent an IPI or not. >>> If it has, then wake up the + * associated kthread to handle it. + * + * All SGI_XPC_NOTIFY IRQs received by XPC are the result of IPIs sent by XPC + * running on other partitions. + * + * Noteworthy Arguments: + * + * irq - Interrupt ReQuest number. NOT USED. + * + * dev_id - partid of IPI's potential sender. + * + * regs - processor's context before the processor entered + * interrupt code. NOT USED. + */ +irqreturn_t +xpc_notify_IRQ_handler(int irq, void *dev_id, struct pt_regs *regs) +{ + partid_t partid = (partid_t) (u64) dev_id; + struct xpc_partition *part = &xpc_partitions[partid]; + + + DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); + + if (xpc_part_ref(part)) { + xpc_check_for_channel_activity(part); + + xpc_part_deref(part); + } + return IRQ_HANDLED; +} + + +/* + * Check to see if xpc_notify_IRQ_handler() dropped any IPIs on the floor + * because the write to their associated IPI amo completed after the IRQ/IPI + * was received. + */ +void +xpc_dropped_IPI_check(struct xpc_partition *part) +{ + if (xpc_part_ref(part)) { + xpc_check_for_channel_activity(part); + + part->dropped_IPI_timer.expires = jiffies + + XPC_P_DROPPED_IPI_WAIT; + add_timer(&part->dropped_IPI_timer); + xpc_part_deref(part); + } +} + + +void +xpc_activate_kthreads(struct xpc_channel *ch, int needed) +{ + int idle = atomic_read(&ch->kthreads_idle); + int assigned = atomic_read(&ch->kthreads_assigned); + int wakeup; + + + DBUG_ON(needed <= 0); + + if (idle > 0) { + wakeup = (needed > idle) ? idle : needed; + needed -= wakeup; + + dev_dbg(xpc_chan, "wakeup %d idle kthreads, partid=%d, " + "channel=%d\n", wakeup, ch->partid, ch->number); + + /* only wakeup the requested number of kthreads */ + wake_up_nr(&ch->idle_wq, wakeup); + } + + if (needed <= 0) { + return; + } + + if (needed + assigned > ch->kthreads_assigned_limit) { + needed = ch->kthreads_assigned_limit - assigned; + // >>>should never be less than 0 + if (needed <= 0) { + return; + } + } + + dev_dbg(xpc_chan, "create %d new kthreads, partid=%d, channel=%d\n", + needed, ch->partid, ch->number); + + xpc_create_kthreads(ch, needed); +} + + +/* + * This function is where XPC's kthreads wait for messages to deliver. + */ +static void +xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch) +{ + do { + /* deliver messages to their intended recipients */ + + while ((volatile s64) ch->w_local_GP.get < + (volatile s64) ch->w_remote_GP.put && + !((volatile u32) ch->flags & + XPC_C_DISCONNECTING)) { + xpc_deliver_msg(ch); + } + + if (atomic_inc_return(&ch->kthreads_idle) > + ch->kthreads_idle_limit) { + /* too many idle kthreads on this channel */ + atomic_dec(&ch->kthreads_idle); + break; + } + + dev_dbg(xpc_chan, "idle kthread calling " + "wait_event_interruptible_exclusive()\n"); + + (void) wait_event_interruptible_exclusive(ch->idle_wq, + ((volatile s64) ch->w_local_GP.get < + (volatile s64) ch->w_remote_GP.put || + ((volatile u32) ch->flags & + XPC_C_DISCONNECTING))); + + atomic_dec(&ch->kthreads_idle); + + } while (!((volatile u32) ch->flags & XPC_C_DISCONNECTING)); +} + + +static int +xpc_daemonize_kthread(void *args) +{ + partid_t partid = XPC_UNPACK_ARG1(args); + u16 ch_number = XPC_UNPACK_ARG2(args); + struct xpc_partition *part = &xpc_partitions[partid]; + struct xpc_channel *ch; + int n_needed; + + + daemonize("xpc%02dc%d", partid, ch_number); + + dev_dbg(xpc_chan, "kthread starting, partid=%d, channel=%d\n", + partid, ch_number); + + ch = &part->channels[ch_number]; + + if (!(ch->flags & XPC_C_DISCONNECTING)) { + DBUG_ON(!(ch->flags & XPC_C_CONNECTED)); + + /* let registerer know that connection has been established */ + + if (atomic_read(&ch->kthreads_assigned) == 1) { + xpc_connected_callout(ch); + + /* + * It is possible that while the callout was being + * made that the remote partition sent some messages. + * If that is the case, we may need to activate + * additional kthreads to help deliver them. We only + * need one less than total #of messages to deliver. + */ + n_needed = ch->w_remote_GP.put - ch->w_local_GP.get - 1; + if (n_needed > 0 && + !(ch->flags & XPC_C_DISCONNECTING)) { + xpc_activate_kthreads(ch, n_needed); + } + } + + xpc_kthread_waitmsgs(part, ch); + } + + if (atomic_dec_return(&ch->kthreads_assigned) == 0 && + ((ch->flags & XPC_C_CONNECTCALLOUT) || + (ch->reason != xpcUnregistering && + ch->reason != xpcOtherUnregistering))) { + xpc_disconnected_callout(ch); + } + + + xpc_msgqueue_deref(ch); + + dev_dbg(xpc_chan, "kthread exiting, partid=%d, channel=%d\n", + partid, ch_number); + + xpc_part_deref(part); + return 0; +} + + +/* + * For each partition that XPC has established communications with, there is + * a minimum of one kernel thread assigned to perform any operation that + * may potentially sleep or block (basically the callouts to the asynchronous + * functions registered via xpc_connect()). + * + * Additional kthreads are created and destroyed by XPC as the workload + * demands. + * + * A kthread is assigned to one of the active channels that exists for a given + * partition. + */ +void +xpc_create_kthreads(struct xpc_channel *ch, int needed) +{ + unsigned long irq_flags; + pid_t pid; + u64 args = XPC_PACK_ARGS(ch->partid, ch->number); + + + while (needed-- > 0) { + pid = kernel_thread(xpc_daemonize_kthread, (void *) args, 0); + if (pid < 0) { + /* the fork failed */ + + if (atomic_read(&ch->kthreads_assigned) < + ch->kthreads_idle_limit) { + /* + * Flag this as an error only if we have an + * insufficient #of kthreads for the channel + * to function. + * + * No xpc_msgqueue_ref() is needed here since + * the channel mgr is doing this. + */ + spin_lock_irqsave(&ch->lock, irq_flags); + XPC_DISCONNECT_CHANNEL(ch, xpcLackOfResources, + &irq_flags); + spin_unlock_irqrestore(&ch->lock, irq_flags); + } + break; + } + + /* + * The following is done on behalf of the newly created + * kthread. That kthread is responsible for doing the + * counterpart to the following before it exits. + */ + (void) xpc_part_ref(&xpc_partitions[ch->partid]); + xpc_msgqueue_ref(ch); + atomic_inc(&ch->kthreads_assigned); + ch->kthreads_created++; // >>> temporary debug only!!! + } +} + + +void +xpc_disconnect_wait(int ch_number) +{ + partid_t partid; + struct xpc_partition *part; + struct xpc_channel *ch; + + + /* now wait for all callouts to the caller's function to cease */ + for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { + part = &xpc_partitions[partid]; + + if (xpc_part_ref(part)) { + ch = &part->channels[ch_number]; + +// >>> how do we keep from falling into the window between our check and going +// >>> down and coming back up where sema is re-inited? + if (ch->flags & XPC_C_SETUP) { + (void) down(&ch->teardown_sema); + } + + xpc_part_deref(part); + } + } +} + + +static void +xpc_do_exit(void) +{ + partid_t partid; + int active_part_count; + struct xpc_partition *part; + + + /* now it's time to eliminate our heartbeat */ + del_timer_sync(&xpc_hb_timer); + xpc_vars->heartbeating_to_mask = 0; + + /* indicate to others that our reserved page is uninitialized */ + xpc_rsvd_page->vars_pa = 0; + + /* + * Ignore all incoming interrupts. Without interupts the heartbeat + * checker won't activate any new partitions that may come up. + */ + free_irq(SGI_XPC_ACTIVATE, NULL); + + /* + * Cause the heartbeat checker and the discovery threads to exit. + * We don't want them attempting to activate new partitions as we + * try to deactivate the existing ones. + */ + xpc_exiting = 1; + wake_up_interruptible(&xpc_act_IRQ_wq); + + /* wait for the heartbeat checker thread to mark itself inactive */ + down(&xpc_hb_checker_exited); + + /* wait for the discovery thread to mark itself inactive */ + down(&xpc_discovery_exited); + + + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(0.3 * HZ); + set_current_state(TASK_RUNNING); + + + /* wait for all partitions to become inactive */ + + do { + active_part_count = 0; + + for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { + part = &xpc_partitions[partid]; + if (part->act_state != XPC_P_INACTIVE) { + active_part_count++; + + XPC_DEACTIVATE_PARTITION(part, xpcUnloading); + } + } + + if (active_part_count) { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(0.3 * HZ); + set_current_state(TASK_RUNNING); + } + + } while (active_part_count > 0); + + + /* close down protections for IPI operations */ + xpc_restrict_IPI_ops(); + + + /* clear the interface to XPC's functions */ + xpc_clear_interface(); + + if (xpc_sysctl) { + unregister_sysctl_table(xpc_sysctl); + } +} + + +int __init +xpc_init(void) +{ + int ret; + partid_t partid; + struct xpc_partition *part; + pid_t pid; + + + /* + * xpc_remote_copy_buffer is used as a temporary buffer for bte_copy'ng + * both a partition's reserved page and its XPC variables. Its size was + * based on the size of a reserved page. So we need to ensure that the + * XPC variables will fit as well. + */ + if (XPC_VARS_ALIGNED_SIZE > XPC_RSVD_PAGE_ALIGNED_SIZE) { + dev_err(xpc_part, "xpc_remote_copy_buffer is not big enough\n"); + return -EPERM; + } + DBUG_ON((u64) xpc_remote_copy_buffer != + L1_CACHE_ALIGN((u64) xpc_remote_copy_buffer)); + + snprintf(xpc_part->bus_id, BUS_ID_SIZE, "part"); + snprintf(xpc_chan->bus_id, BUS_ID_SIZE, "chan"); + + xpc_sysctl = register_sysctl_table(xpc_sys_dir, 1); + + /* + * The first few fields of each entry of xpc_partitions[] need to + * be initialized now so that calls to xpc_connect() and + * xpc_disconnect() can be made prior to the activation of any remote + * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE + * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING + * PARTITION HAS BEEN ACTIVATED. + */ + for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { + part = &xpc_partitions[partid]; + + DBUG_ON((u64) part != L1_CACHE_ALIGN((u64) part)); + + part->act_IRQ_rcvd = 0; + spin_lock_init(&part->act_lock); + part->act_state = XPC_P_INACTIVE; + XPC_SET_REASON(part, 0, 0); + part->setup_state = XPC_P_UNSET; + init_waitqueue_head(&part->teardown_wq); + atomic_set(&part->references, 0); + } + + /* + * Open up protections for IPI operations (and AMO operations on + * Shub 1.1 systems). + */ + xpc_allow_IPI_ops(); + + /* + * Interrupts being processed will increment this atomic variable and + * awaken the heartbeat thread which will process the interrupts. + */ + atomic_set(&xpc_act_IRQ_rcvd, 0); + + /* + * This is safe to do before the xpc_hb_checker thread has started + * because the handler releases a wait queue. If an interrupt is + * received before the thread is waiting, it will not go to sleep, + * but rather immediately process the interrupt. + */ + ret = request_irq(SGI_XPC_ACTIVATE, xpc_act_IRQ_handler, 0, + "xpc hb", NULL); + if (ret != 0) { + dev_err(xpc_part, "can't register ACTIVATE IRQ handler, " + "errno=%d\n", -ret); + + xpc_restrict_IPI_ops(); + + if (xpc_sysctl) { + unregister_sysctl_table(xpc_sysctl); + } + return -EBUSY; + } + + /* + * Fill the partition reserved page with the information needed by + * other partitions to discover we are alive and establish initial + * communications. + */ + xpc_rsvd_page = xpc_rsvd_page_init(); + if (xpc_rsvd_page == NULL) { + dev_err(xpc_part, "could not setup our reserved page\n"); + + free_irq(SGI_XPC_ACTIVATE, NULL); + xpc_restrict_IPI_ops(); + + if (xpc_sysctl) { + unregister_sysctl_table(xpc_sysctl); + } + return -EBUSY; + } + + + /* + * Set the beating to other partitions into motion. This is + * the last requirement for other partitions' discovery to + * initiate communications with us. + */ + init_timer(&xpc_hb_timer); + xpc_hb_timer.function = xpc_hb_beater; + xpc_hb_beater(0); + + + /* + * The real work-horse behind xpc. This processes incoming + * interrupts and monitors remote heartbeats. + */ + pid = kernel_thread(xpc_hb_checker, NULL, 0); + if (pid < 0) { + dev_err(xpc_part, "failed while forking hb check thread\n"); + + /* indicate to others that our reserved page is uninitialized */ + xpc_rsvd_page->vars_pa = 0; + + del_timer_sync(&xpc_hb_timer); + free_irq(SGI_XPC_ACTIVATE, NULL); + xpc_restrict_IPI_ops(); + + if (xpc_sysctl) { + unregister_sysctl_table(xpc_sysctl); + } + return -EBUSY; + } + + + /* + * Startup a thread that will attempt to discover other partitions to + * activate based on info provided by SAL. This new thread is short + * lived and will exit once discovery is complete. + */ + pid = kernel_thread(xpc_initiate_discovery, NULL, 0); + if (pid < 0) { + dev_err(xpc_part, "failed while forking discovery thread\n"); + + /* mark this new thread as a non-starter */ + up(&xpc_discovery_exited); + + xpc_do_exit(); + return -EBUSY; + } + + + /* set the interface to point at XPC's functions */ + xpc_set_interface(xpc_initiate_connect, xpc_initiate_disconnect, + xpc_initiate_allocate, xpc_initiate_send, + xpc_initiate_send_notify, xpc_initiate_received, + xpc_initiate_partid_to_nasids); + + return 0; +} +module_init(xpc_init); + + +void __exit +xpc_exit(void) +{ + xpc_do_exit(); +} +module_exit(xpc_exit); + + +MODULE_AUTHOR("Silicon Graphics, Inc."); +MODULE_DESCRIPTION("Cross Partition Communication (XPC) support"); +MODULE_LICENSE("GPL"); + +module_param(xpc_hb_interval, int, 0); +MODULE_PARM_DESC(xpc_hb_interval, "Number of seconds between " + "heartbeat increments."); + +module_param(xpc_hb_check_interval, int, 0); +MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between " + "heartbeat checks."); + diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c new file mode 100644 index 0000000..2c3c4a8 --- /dev/null +++ b/arch/ia64/sn/kernel/xpc_partition.c @@ -0,0 +1,984 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved. + */ + + +/* + * Cross Partition Communication (XPC) partition support. + * + * This is the part of XPC that detects the presence/absence of + * other partitions. It provides a heartbeat and monitors the + * heartbeats of other partitions. + * + */ + + +#include <linux/kernel.h> +#include <linux/sysctl.h> +#include <linux/cache.h> +#include <linux/mmzone.h> +#include <linux/nodemask.h> +#include <asm/sn/bte.h> +#include <asm/sn/intr.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/nodepda.h> +#include <asm/sn/addrs.h> +#include "xpc.h" + + +/* XPC is exiting flag */ +int xpc_exiting; + + +/* SH_IPI_ACCESS shub register value on startup */ +static u64 xpc_sh1_IPI_access; +static u64 xpc_sh2_IPI_access0; +static u64 xpc_sh2_IPI_access1; +static u64 xpc_sh2_IPI_access2; +static u64 xpc_sh2_IPI_access3; + + +/* original protection values for each node */ +u64 xpc_prot_vec[MAX_COMPACT_NODES]; + + +/* this partition's reserved page */ +struct xpc_rsvd_page *xpc_rsvd_page; + +/* this partition's XPC variables (within the reserved page) */ +struct xpc_vars *xpc_vars; +struct xpc_vars_part *xpc_vars_part; + + +/* + * For performance reasons, each entry of xpc_partitions[] is cacheline + * aligned. And xpc_partitions[] is padded with an additional entry at the + * end so that the last legitimate entry doesn't share its cacheline with + * another variable. + */ +struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1]; + + +/* + * Generic buffer used to store a local copy of the remote partitions + * reserved page or XPC variables. + * + * xpc_discovery runs only once and is a seperate thread that is + * very likely going to be processing in parallel with receiving + * interrupts. + */ +char ____cacheline_aligned + xpc_remote_copy_buffer[XPC_RSVD_PAGE_ALIGNED_SIZE]; + + +/* systune related variables */ +int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL; +int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_TIMEOUT; + + +/* + * Given a nasid, get the physical address of the partition's reserved page + * for that nasid. This function returns 0 on any error. + */ +static u64 +xpc_get_rsvd_page_pa(int nasid, u64 buf, u64 buf_size) +{ + bte_result_t bte_res; + s64 status; + u64 cookie = 0; + u64 rp_pa = nasid; /* seed with nasid */ + u64 len = 0; + + + while (1) { + + status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa, + &len); + + dev_dbg(xpc_part, "SAL returned with status=%li, cookie=" + "0x%016lx, address=0x%016lx, len=0x%016lx\n", + status, cookie, rp_pa, len); + + if (status != SALRET_MORE_PASSES) { + break; + } + + if (len > buf_size) { + dev_err(xpc_part, "len (=0x%016lx) > buf_size\n", len); + status = SALRET_ERROR; + break; + } + + bte_res = xp_bte_copy(rp_pa, ia64_tpa(buf), buf_size, + (BTE_NOTIFY | BTE_WACQUIRE), NULL); + if (bte_res != BTE_SUCCESS) { + dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res); + status = SALRET_ERROR; + break; + } + } + + if (status != SALRET_OK) { + rp_pa = 0; + } + dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa); + return rp_pa; +} + + +/* + * Fill the partition reserved page with the information needed by + * other partitions to discover we are alive and establish initial + * communications. + */ +struct xpc_rsvd_page * +xpc_rsvd_page_init(void) +{ + struct xpc_rsvd_page *rp; + AMO_t *amos_page; + u64 rp_pa, next_cl, nasid_array = 0; + int i, ret; + + + /* get the local reserved page's address */ + + rp_pa = xpc_get_rsvd_page_pa(cnodeid_to_nasid(0), + (u64) xpc_remote_copy_buffer, + XPC_RSVD_PAGE_ALIGNED_SIZE); + if (rp_pa == 0) { + dev_err(xpc_part, "SAL failed to locate the reserved page\n"); + return NULL; + } + rp = (struct xpc_rsvd_page *) __va(rp_pa); + + if (rp->partid != sn_partition_id) { + dev_err(xpc_part, "the reserved page's partid of %d should be " + "%d\n", rp->partid, sn_partition_id); + return NULL; + } + + rp->version = XPC_RP_VERSION; + + /* + * Place the XPC variables on the cache line following the + * reserved page structure. + */ + next_cl = (u64) rp + XPC_RSVD_PAGE_ALIGNED_SIZE; + xpc_vars = (struct xpc_vars *) next_cl; + + /* + * Before clearing xpc_vars, see if a page of AMOs had been previously + * allocated. If not we'll need to allocate one and set permissions + * so that cross-partition AMOs are allowed. + * + * The allocated AMO page needs MCA reporting to remain disabled after + * XPC has unloaded. To make this work, we keep a copy of the pointer + * to this page (i.e., amos_page) in the struct xpc_vars structure, + * which is pointed to by the reserved page, and re-use that saved copy + * on subsequent loads of XPC. This AMO page is never freed, and its + * memory protections are never restricted. + */ + if ((amos_page = xpc_vars->amos_page) == NULL) { + amos_page = (AMO_t *) mspec_kalloc_page(0); + if (amos_page == NULL) { + dev_err(xpc_part, "can't allocate page of AMOs\n"); + return NULL; + } + + /* + * Open up AMO-R/W to cpu. This is done for Shub 1.1 systems + * when xpc_allow_IPI_ops() is called via xpc_hb_init(). + */ + if (!enable_shub_wars_1_1()) { + ret = sn_change_memprotect(ia64_tpa((u64) amos_page), + PAGE_SIZE, SN_MEMPROT_ACCESS_CLASS_1, + &nasid_array); + if (ret != 0) { + dev_err(xpc_part, "can't change memory " + "protections\n"); + mspec_kfree_page((unsigned long) amos_page); + return NULL; + } + } + } else if (!IS_AMO_ADDRESS((u64) amos_page)) { + /* + * EFI's XPBOOT can also set amos_page in the reserved page, + * but it happens to leave it as an uncached physical address + * and we need it to be an uncached virtual, so we'll have to + * convert it. + */ + if (!IS_AMO_PHYS_ADDRESS((u64) amos_page)) { + dev_err(xpc_part, "previously used amos_page address " + "is bad = 0x%p\n", (void *) amos_page); + return NULL; + } + amos_page = (AMO_t *) TO_AMO((u64) amos_page); + } + + memset(xpc_vars, 0, sizeof(struct xpc_vars)); + + /* + * Place the XPC per partition specific variables on the cache line + * following the XPC variables structure. + */ + next_cl += XPC_VARS_ALIGNED_SIZE; + memset((u64 *) next_cl, 0, sizeof(struct xpc_vars_part) * + XP_MAX_PARTITIONS); + xpc_vars_part = (struct xpc_vars_part *) next_cl; + xpc_vars->vars_part_pa = __pa(next_cl); + + xpc_vars->version = XPC_V_VERSION; + xpc_vars->act_nasid = cpuid_to_nasid(0); + xpc_vars->act_phys_cpuid = cpu_physical_id(0); + xpc_vars->amos_page = amos_page; /* save for next load of XPC */ + + + /* + * Initialize the activation related AMO variables. + */ + xpc_vars->act_amos = xpc_IPI_init(XP_MAX_PARTITIONS); + for (i = 1; i < XP_NASID_MASK_WORDS; i++) { + xpc_IPI_init(i + XP_MAX_PARTITIONS); + } + /* export AMO page's physical address to other partitions */ + xpc_vars->amos_page_pa = ia64_tpa((u64) xpc_vars->amos_page); + + /* + * This signifies to the remote partition that our reserved + * page is initialized. + */ + (volatile u64) rp->vars_pa = __pa(xpc_vars); + + return rp; +} + + +/* + * Change protections to allow IPI operations (and AMO operations on + * Shub 1.1 systems). + */ +void +xpc_allow_IPI_ops(void) +{ + int node; + int nasid; + + + // >>> Change SH_IPI_ACCESS code to use SAL call once it is available. + + if (is_shub2()) { + xpc_sh2_IPI_access0 = + (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS0)); + xpc_sh2_IPI_access1 = + (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS1)); + xpc_sh2_IPI_access2 = + (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS2)); + xpc_sh2_IPI_access3 = + (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS3)); + + for_each_online_node(node) { + nasid = cnodeid_to_nasid(node); + HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0), + -1UL); + HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1), + -1UL); + HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2), + -1UL); + HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3), + -1UL); + } + + } else { + xpc_sh1_IPI_access = + (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH1_IPI_ACCESS)); + + for_each_online_node(node) { + nasid = cnodeid_to_nasid(node); + HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS), + -1UL); + + /* + * Since the BIST collides with memory operations on + * SHUB 1.1 sn_change_memprotect() cannot be used. + */ + if (enable_shub_wars_1_1()) { + /* open up everything */ + xpc_prot_vec[node] = (u64) HUB_L((u64 *) + GLOBAL_MMR_ADDR(nasid, + SH1_MD_DQLP_MMR_DIR_PRIVEC0)); + HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, + SH1_MD_DQLP_MMR_DIR_PRIVEC0), + -1UL); + HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, + SH1_MD_DQRP_MMR_DIR_PRIVEC0), + -1UL); + } + } + } +} + + +/* + * Restrict protections to disallow IPI operations (and AMO operations on + * Shub 1.1 systems). + */ +void +xpc_restrict_IPI_ops(void) +{ + int node; + int nasid; + + + // >>> Change SH_IPI_ACCESS code to use SAL call once it is available. + + if (is_shub2()) { + + for_each_online_node(node) { + nasid = cnodeid_to_nasid(node); + HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0), + xpc_sh2_IPI_access0); + HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1), + xpc_sh2_IPI_access1); + HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2), + xpc_sh2_IPI_access2); + HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3), + xpc_sh2_IPI_access3); + } + + } else { + + for_each_online_node(node) { + nasid = cnodeid_to_nasid(node); + HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS), + xpc_sh1_IPI_access); + + if (enable_shub_wars_1_1()) { + HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, + SH1_MD_DQLP_MMR_DIR_PRIVEC0), + xpc_prot_vec[node]); + HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, + SH1_MD_DQRP_MMR_DIR_PRIVEC0), + xpc_prot_vec[node]); + } + } + } +} + + +/* + * At periodic intervals, scan through all active partitions and ensure + * their heartbeat is still active. If not, the partition is deactivated. + */ +void +xpc_check_remote_hb(void) +{ + struct xpc_vars *remote_vars; + struct xpc_partition *part; + partid_t partid; + bte_result_t bres; + + + remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer; + + for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { + if (partid == sn_partition_id) { + continue; + } + + part = &xpc_partitions[partid]; + + if (part->act_state == XPC_P_INACTIVE || + part->act_state == XPC_P_DEACTIVATING) { + continue; + } + + /* pull the remote_hb cache line */ + bres = xp_bte_copy(part->remote_vars_pa, + ia64_tpa((u64) remote_vars), + XPC_VARS_ALIGNED_SIZE, + (BTE_NOTIFY | BTE_WACQUIRE), NULL); + if (bres != BTE_SUCCESS) { + XPC_DEACTIVATE_PARTITION(part, + xpc_map_bte_errors(bres)); + continue; + } + + dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat" + " = %ld, kdb_status = %ld, HB_mask = 0x%lx\n", partid, + remote_vars->heartbeat, part->last_heartbeat, + remote_vars->kdb_status, + remote_vars->heartbeating_to_mask); + + if (((remote_vars->heartbeat == part->last_heartbeat) && + (remote_vars->kdb_status == 0)) || + !XPC_HB_ALLOWED(sn_partition_id, remote_vars)) { + + XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat); + continue; + } + + part->last_heartbeat = remote_vars->heartbeat; + } +} + + +/* + * Get a copy of the remote partition's rsvd page. + * + * remote_rp points to a buffer that is cacheline aligned for BTE copies and + * assumed to be of size XPC_RSVD_PAGE_ALIGNED_SIZE. + */ +static enum xpc_retval +xpc_get_remote_rp(int nasid, u64 *discovered_nasids, + struct xpc_rsvd_page *remote_rp, u64 *remote_rsvd_page_pa) +{ + int bres, i; + + + /* get the reserved page's physical address */ + + *remote_rsvd_page_pa = xpc_get_rsvd_page_pa(nasid, (u64) remote_rp, + XPC_RSVD_PAGE_ALIGNED_SIZE); + if (*remote_rsvd_page_pa == 0) { + return xpcNoRsvdPageAddr; + } + + + /* pull over the reserved page structure */ + + bres = xp_bte_copy(*remote_rsvd_page_pa, ia64_tpa((u64) remote_rp), + XPC_RSVD_PAGE_ALIGNED_SIZE, + (BTE_NOTIFY | BTE_WACQUIRE), NULL); + if (bres != BTE_SUCCESS) { + return xpc_map_bte_errors(bres); + } + + + if (discovered_nasids != NULL) { + for (i = 0; i < XP_NASID_MASK_WORDS; i++) { + discovered_nasids[i] |= remote_rp->part_nasids[i]; + } + } + + + /* check that the partid is for another partition */ + + if (remote_rp->partid < 1 || + remote_rp->partid > (XP_MAX_PARTITIONS - 1)) { + return xpcInvalidPartid; + } + + if (remote_rp->partid == sn_partition_id) { + return xpcLocalPartid; + } + + + if (XPC_VERSION_MAJOR(remote_rp->version) != + XPC_VERSION_MAJOR(XPC_RP_VERSION)) { + return xpcBadVersion; + } + + return xpcSuccess; +} + + +/* + * Get a copy of the remote partition's XPC variables. + * + * remote_vars points to a buffer that is cacheline aligned for BTE copies and + * assumed to be of size XPC_VARS_ALIGNED_SIZE. + */ +static enum xpc_retval +xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars) +{ + int bres; + + + if (remote_vars_pa == 0) { + return xpcVarsNotSet; + } + + + /* pull over the cross partition variables */ + + bres = xp_bte_copy(remote_vars_pa, ia64_tpa((u64) remote_vars), + XPC_VARS_ALIGNED_SIZE, + (BTE_NOTIFY | BTE_WACQUIRE), NULL); + if (bres != BTE_SUCCESS) { + return xpc_map_bte_errors(bres); + } + + if (XPC_VERSION_MAJOR(remote_vars->version) != + XPC_VERSION_MAJOR(XPC_V_VERSION)) { + return xpcBadVersion; + } + + return xpcSuccess; +} + + +/* + * Prior code has determine the nasid which generated an IPI. Inspect + * that nasid to determine if its partition needs to be activated or + * deactivated. + * + * A partition is consider "awaiting activation" if our partition + * flags indicate it is not active and it has a heartbeat. A + * partition is considered "awaiting deactivation" if our partition + * flags indicate it is active but it has no heartbeat or it is not + * sending its heartbeat to us. + * + * To determine the heartbeat, the remote nasid must have a properly + * initialized reserved page. + */ +static void +xpc_identify_act_IRQ_req(int nasid) +{ + struct xpc_rsvd_page *remote_rp; + struct xpc_vars *remote_vars; + u64 remote_rsvd_page_pa; + u64 remote_vars_pa; + partid_t partid; + struct xpc_partition *part; + enum xpc_retval ret; + + + /* pull over the reserved page structure */ + + remote_rp = (struct xpc_rsvd_page *) xpc_remote_copy_buffer; + + ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rsvd_page_pa); + if (ret != xpcSuccess) { + dev_warn(xpc_part, "unable to get reserved page from nasid %d, " + "which sent interrupt, reason=%d\n", nasid, ret); + return; + } + + remote_vars_pa = remote_rp->vars_pa; + partid = remote_rp->partid; + part = &xpc_partitions[partid]; + + + /* pull over the cross partition variables */ + + remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer; + + ret = xpc_get_remote_vars(remote_vars_pa, remote_vars); + if (ret != xpcSuccess) { + + dev_warn(xpc_part, "unable to get XPC variables from nasid %d, " + "which sent interrupt, reason=%d\n", nasid, ret); + + XPC_DEACTIVATE_PARTITION(part, ret); + return; + } + + + part->act_IRQ_rcvd++; + + dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = " + "%ld:0x%lx\n", (int) nasid, (int) partid, part->act_IRQ_rcvd, + remote_vars->heartbeat, remote_vars->heartbeating_to_mask); + + + if (part->act_state == XPC_P_INACTIVE) { + + part->remote_rp_pa = remote_rsvd_page_pa; + dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n", + part->remote_rp_pa); + + part->remote_vars_pa = remote_vars_pa; + dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n", + part->remote_vars_pa); + + part->last_heartbeat = remote_vars->heartbeat; + dev_dbg(xpc_part, " last_heartbeat = 0x%016lx\n", + part->last_heartbeat); + + part->remote_vars_part_pa = remote_vars->vars_part_pa; + dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n", + part->remote_vars_part_pa); + + part->remote_act_nasid = remote_vars->act_nasid; + dev_dbg(xpc_part, " remote_act_nasid = 0x%x\n", + part->remote_act_nasid); + + part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid; + dev_dbg(xpc_part, " remote_act_phys_cpuid = 0x%x\n", + part->remote_act_phys_cpuid); + + part->remote_amos_page_pa = remote_vars->amos_page_pa; + dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n", + part->remote_amos_page_pa); + + xpc_activate_partition(part); + + } else if (part->remote_amos_page_pa != remote_vars->amos_page_pa || + !XPC_HB_ALLOWED(sn_partition_id, remote_vars)) { + + part->reactivate_nasid = nasid; + XPC_DEACTIVATE_PARTITION(part, xpcReactivating); + } +} + + +/* + * Loop through the activation AMO variables and process any bits + * which are set. Each bit indicates a nasid sending a partition + * activation or deactivation request. + * + * Return #of IRQs detected. + */ +int +xpc_identify_act_IRQ_sender(void) +{ + int word, bit; + u64 nasid_mask; + u64 nasid; /* remote nasid */ + int n_IRQs_detected = 0; + AMO_t *act_amos; + struct xpc_rsvd_page *rp = (struct xpc_rsvd_page *) xpc_rsvd_page; + + + act_amos = xpc_vars->act_amos; + + + /* scan through act AMO variable looking for non-zero entries */ + for (word = 0; word < XP_NASID_MASK_WORDS; word++) { + + nasid_mask = xpc_IPI_receive(&act_amos[word]); + if (nasid_mask == 0) { + /* no IRQs from nasids in this variable */ + continue; + } + + dev_dbg(xpc_part, "AMO[%d] gave back 0x%lx\n", word, + nasid_mask); + + + /* + * If this nasid has been added to the machine since + * our partition was reset, this will retain the + * remote nasid in our reserved pages machine mask. + * This is used in the event of module reload. + */ + rp->mach_nasids[word] |= nasid_mask; + + + /* locate the nasid(s) which sent interrupts */ + + for (bit = 0; bit < (8 * sizeof(u64)); bit++) { + if (nasid_mask & (1UL << bit)) { + n_IRQs_detected++; + nasid = XPC_NASID_FROM_W_B(word, bit); + dev_dbg(xpc_part, "interrupt from nasid %ld\n", + nasid); + xpc_identify_act_IRQ_req(nasid); + } + } + } + return n_IRQs_detected; +} + + +/* + * Mark specified partition as active. + */ +enum xpc_retval +xpc_mark_partition_active(struct xpc_partition *part) +{ + unsigned long irq_flags; + enum xpc_retval ret; + + + dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part)); + + spin_lock_irqsave(&part->act_lock, irq_flags); + if (part->act_state == XPC_P_ACTIVATING) { + part->act_state = XPC_P_ACTIVE; + ret = xpcSuccess; + } else { + DBUG_ON(part->reason == xpcSuccess); + ret = part->reason; + } + spin_unlock_irqrestore(&part->act_lock, irq_flags); + + return ret; +} + + +/* + * Notify XPC that the partition is down. + */ +void +xpc_deactivate_partition(const int line, struct xpc_partition *part, + enum xpc_retval reason) +{ + unsigned long irq_flags; + partid_t partid = XPC_PARTID(part); + + + spin_lock_irqsave(&part->act_lock, irq_flags); + + if (part->act_state == XPC_P_INACTIVE) { + XPC_SET_REASON(part, reason, line); + spin_unlock_irqrestore(&part->act_lock, irq_flags); + if (reason == xpcReactivating) { + /* we interrupt ourselves to reactivate partition */ + xpc_IPI_send_reactivate(part); + } + return; + } + if (part->act_state == XPC_P_DEACTIVATING) { + if ((part->reason == xpcUnloading && reason != xpcUnloading) || + reason == xpcReactivating) { + XPC_SET_REASON(part, reason, line); + } + spin_unlock_irqrestore(&part->act_lock, irq_flags); + return; + } + + part->act_state = XPC_P_DEACTIVATING; + XPC_SET_REASON(part, reason, line); + + spin_unlock_irqrestore(&part->act_lock, irq_flags); + + XPC_DISALLOW_HB(partid, xpc_vars); + + dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n", partid, + reason); + + xpc_partition_down(part, reason); +} + + +/* + * Mark specified partition as active. + */ +void +xpc_mark_partition_inactive(struct xpc_partition *part) +{ + unsigned long irq_flags; + + + dev_dbg(xpc_part, "setting partition %d to INACTIVE\n", + XPC_PARTID(part)); + + spin_lock_irqsave(&part->act_lock, irq_flags); + part->act_state = XPC_P_INACTIVE; + spin_unlock_irqrestore(&part->act_lock, irq_flags); + part->remote_rp_pa = 0; +} + + +/* + * SAL has provided a partition and machine mask. The partition mask + * contains a bit for each even nasid in our partition. The machine + * mask contains a bit for each even nasid in the entire machine. + * + * Using those two bit arrays, we can determine which nasids are + * known in the machine. Each should also have a reserved page + * initialized if they are available for partitioning. + */ +void +xpc_discovery(void) +{ + void *remote_rp_base; + struct xpc_rsvd_page *remote_rp; + struct xpc_vars *remote_vars; + u64 remote_rsvd_page_pa; + u64 remote_vars_pa; + int region; + int max_regions; + int nasid; + struct xpc_rsvd_page *rp; + partid_t partid; + struct xpc_partition *part; + u64 *discovered_nasids; + enum xpc_retval ret; + + + remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RSVD_PAGE_ALIGNED_SIZE, + GFP_KERNEL, &remote_rp_base); + if (remote_rp == NULL) { + return; + } + remote_vars = (struct xpc_vars *) remote_rp; + + + discovered_nasids = kmalloc(sizeof(u64) * XP_NASID_MASK_WORDS, + GFP_KERNEL); + if (discovered_nasids == NULL) { + kfree(remote_rp_base); + return; + } + memset(discovered_nasids, 0, sizeof(u64) * XP_NASID_MASK_WORDS); + + rp = (struct xpc_rsvd_page *) xpc_rsvd_page; + + /* + * The term 'region' in this context refers to the minimum number of + * nodes that can comprise an access protection grouping. The access + * protection is in regards to memory, IOI and IPI. + */ +//>>> move the next two #defines into either include/asm-ia64/sn/arch.h or +//>>> include/asm-ia64/sn/addrs.h +#define SH1_MAX_REGIONS 64 +#define SH2_MAX_REGIONS 256 + max_regions = is_shub2() ? SH2_MAX_REGIONS : SH1_MAX_REGIONS; + + for (region = 0; region < max_regions; region++) { + + if ((volatile int) xpc_exiting) { + break; + } + + dev_dbg(xpc_part, "searching region %d\n", region); + + for (nasid = (region * sn_region_size * 2); + nasid < ((region + 1) * sn_region_size * 2); + nasid += 2) { + + if ((volatile int) xpc_exiting) { + break; + } + + dev_dbg(xpc_part, "checking nasid %d\n", nasid); + + + if (XPC_NASID_IN_ARRAY(nasid, rp->part_nasids)) { + dev_dbg(xpc_part, "PROM indicates Nasid %d is " + "part of the local partition; skipping " + "region\n", nasid); + break; + } + + if (!(XPC_NASID_IN_ARRAY(nasid, rp->mach_nasids))) { + dev_dbg(xpc_part, "PROM indicates Nasid %d was " + "not on Numa-Link network at reset\n", + nasid); + continue; + } + + if (XPC_NASID_IN_ARRAY(nasid, discovered_nasids)) { + dev_dbg(xpc_part, "Nasid %d is part of a " + "partition which was previously " + "discovered\n", nasid); + continue; + } + + + /* pull over the reserved page structure */ + + ret = xpc_get_remote_rp(nasid, discovered_nasids, + remote_rp, &remote_rsvd_page_pa); + if (ret != xpcSuccess) { + dev_dbg(xpc_part, "unable to get reserved page " + "from nasid %d, reason=%d\n", nasid, + ret); + + if (ret == xpcLocalPartid) { + break; + } + continue; + } + + remote_vars_pa = remote_rp->vars_pa; + + partid = remote_rp->partid; + part = &xpc_partitions[partid]; + + + /* pull over the cross partition variables */ + + ret = xpc_get_remote_vars(remote_vars_pa, remote_vars); + if (ret != xpcSuccess) { + dev_dbg(xpc_part, "unable to get XPC variables " + "from nasid %d, reason=%d\n", nasid, + ret); + + XPC_DEACTIVATE_PARTITION(part, ret); + continue; + } + + if (part->act_state != XPC_P_INACTIVE) { + dev_dbg(xpc_part, "partition %d on nasid %d is " + "already activating\n", partid, nasid); + break; + } + + /* + * Register the remote partition's AMOs with SAL so it + * can handle and cleanup errors within that address + * range should the remote partition go down. We don't + * unregister this range because it is difficult to + * tell when outstanding writes to the remote partition + * are finished and thus when it is thus safe to + * unregister. This should not result in wasted space + * in the SAL xp_addr_region table because we should + * get the same page for remote_act_amos_pa after + * module reloads and system reboots. + */ + if (sn_register_xp_addr_region( + remote_vars->amos_page_pa, + PAGE_SIZE, 1) < 0) { + dev_dbg(xpc_part, "partition %d failed to " + "register xp_addr region 0x%016lx\n", + partid, remote_vars->amos_page_pa); + + XPC_SET_REASON(part, xpcPhysAddrRegFailed, + __LINE__); + break; + } + + /* + * The remote nasid is valid and available. + * Send an interrupt to that nasid to notify + * it that we are ready to begin activation. + */ + dev_dbg(xpc_part, "sending an interrupt to AMO 0x%lx, " + "nasid %d, phys_cpuid 0x%x\n", + remote_vars->amos_page_pa, + remote_vars->act_nasid, + remote_vars->act_phys_cpuid); + + xpc_IPI_send_activate(remote_vars); + } + } + + kfree(discovered_nasids); + kfree(remote_rp_base); +} + + +/* + * Given a partid, get the nasids owned by that partition from the + * remote partition's reserved page. + */ +enum xpc_retval +xpc_initiate_partid_to_nasids(partid_t partid, void *nasid_mask) +{ + struct xpc_partition *part; + u64 part_nasid_pa; + int bte_res; + + + part = &xpc_partitions[partid]; + if (part->remote_rp_pa == 0) { + return xpcPartitionDown; + } + + part_nasid_pa = part->remote_rp_pa + + (u64) &((struct xpc_rsvd_page *) 0)->part_nasids; + + bte_res = xp_bte_copy(part_nasid_pa, ia64_tpa((u64) nasid_mask), + L1_CACHE_ALIGN(XP_NASID_MASK_BYTES), + (BTE_NOTIFY | BTE_WACQUIRE), NULL); + + return xpc_map_bte_errors(bte_res); +} + diff --git a/arch/ia64/sn/kernel/xpnet.c b/arch/ia64/sn/kernel/xpnet.c new file mode 100644 index 0000000..78c13d6 --- /dev/null +++ b/arch/ia64/sn/kernel/xpnet.c @@ -0,0 +1,715 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1999,2001-2005 Silicon Graphics, Inc. All rights reserved. + */ + + +/* + * Cross Partition Network Interface (XPNET) support + * + * XPNET provides a virtual network layered on top of the Cross + * Partition communication layer. + * + * XPNET provides direct point-to-point and broadcast-like support + * for an ethernet-like device. The ethernet broadcast medium is + * replaced with a point-to-point message structure which passes + * pointers to a DMA-capable block that a remote partition should + * retrieve and pass to the upper level networking layer. + * + */ + + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/ioport.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/delay.h> +#include <linux/ethtool.h> +#include <linux/mii.h> +#include <linux/smp.h> +#include <linux/string.h> +#include <asm/sn/bte.h> +#include <asm/sn/io.h> +#include <asm/sn/sn_sal.h> +#include <asm/types.h> +#include <asm/atomic.h> +#include <asm/sn/xp.h> + + +/* + * The message payload transferred by XPC. + * + * buf_pa is the physical address where the DMA should pull from. + * + * NOTE: for performance reasons, buf_pa should _ALWAYS_ begin on a + * cacheline boundary. To accomplish this, we record the number of + * bytes from the beginning of the first cacheline to the first useful + * byte of the skb (leadin_ignore) and the number of bytes from the + * last useful byte of the skb to the end of the last cacheline + * (tailout_ignore). + * + * size is the number of bytes to transfer which includes the skb->len + * (useful bytes of the senders skb) plus the leadin and tailout + */ +struct xpnet_message { + u16 version; /* Version for this message */ + u16 embedded_bytes; /* #of bytes embedded in XPC message */ + u32 magic; /* Special number indicating this is xpnet */ + u64 buf_pa; /* phys address of buffer to retrieve */ + u32 size; /* #of bytes in buffer */ + u8 leadin_ignore; /* #of bytes to ignore at the beginning */ + u8 tailout_ignore; /* #of bytes to ignore at the end */ + unsigned char data; /* body of small packets */ +}; + +/* + * Determine the size of our message, the cacheline aligned size, + * and then the number of message will request from XPC. + * + * XPC expects each message to exist in an individual cacheline. + */ +#define XPNET_MSG_SIZE (L1_CACHE_BYTES - XPC_MSG_PAYLOAD_OFFSET) +#define XPNET_MSG_DATA_MAX \ + (XPNET_MSG_SIZE - (u64)(&((struct xpnet_message *)0)->data)) +#define XPNET_MSG_ALIGNED_SIZE (L1_CACHE_ALIGN(XPNET_MSG_SIZE)) +#define XPNET_MSG_NENTRIES (PAGE_SIZE / XPNET_MSG_ALIGNED_SIZE) + + +#define XPNET_MAX_KTHREADS (XPNET_MSG_NENTRIES + 1) +#define XPNET_MAX_IDLE_KTHREADS (XPNET_MSG_NENTRIES + 1) + +/* + * Version number of XPNET implementation. XPNET can always talk to versions + * with same major #, and never talk to versions with a different version. + */ +#define _XPNET_VERSION(_major, _minor) (((_major) << 4) | (_minor)) +#define XPNET_VERSION_MAJOR(_v) ((_v) >> 4) +#define XPNET_VERSION_MINOR(_v) ((_v) & 0xf) + +#define XPNET_VERSION _XPNET_VERSION(1,0) /* version 1.0 */ +#define XPNET_VERSION_EMBED _XPNET_VERSION(1,1) /* version 1.1 */ +#define XPNET_MAGIC 0x88786984 /* "XNET" */ + +#define XPNET_VALID_MSG(_m) \ + ((XPNET_VERSION_MAJOR(_m->version) == XPNET_VERSION_MAJOR(XPNET_VERSION)) \ + && (msg->magic == XPNET_MAGIC)) + +#define XPNET_DEVICE_NAME "xp0" + + +/* + * When messages are queued with xpc_send_notify, a kmalloc'd buffer + * of the following type is passed as a notification cookie. When the + * notification function is called, we use the cookie to decide + * whether all outstanding message sends have completed. The skb can + * then be released. + */ +struct xpnet_pending_msg { + struct list_head free_list; + struct sk_buff *skb; + atomic_t use_count; +}; + +/* driver specific structure pointed to by the device structure */ +struct xpnet_dev_private { + struct net_device_stats stats; +}; + +struct net_device *xpnet_device; + +/* + * When we are notified of other partitions activating, we add them to + * our bitmask of partitions to which we broadcast. + */ +static u64 xpnet_broadcast_partitions; +/* protect above */ +static spinlock_t xpnet_broadcast_lock = SPIN_LOCK_UNLOCKED; + +/* + * Since the Block Transfer Engine (BTE) is being used for the transfer + * and it relies upon cache-line size transfers, we need to reserve at + * least one cache-line for head and tail alignment. The BTE is + * limited to 8MB transfers. + * + * Testing has shown that changing MTU to greater than 64KB has no effect + * on TCP as the two sides negotiate a Max Segment Size that is limited + * to 64K. Other protocols May use packets greater than this, but for + * now, the default is 64KB. + */ +#define XPNET_MAX_MTU (0x800000UL - L1_CACHE_BYTES) +/* 32KB has been determined to be the ideal */ +#define XPNET_DEF_MTU (0x8000UL) + + +/* + * The partition id is encapsulated in the MAC address. The following + * define locates the octet the partid is in. + */ +#define XPNET_PARTID_OCTET 1 +#define XPNET_LICENSE_OCTET 2 + + +/* + * Define the XPNET debug device structure that is to be used with dev_dbg(), + * dev_err(), dev_warn(), and dev_info(). + */ +struct device_driver xpnet_dbg_name = { + .name = "xpnet" +}; + +struct device xpnet_dbg_subname = { + .bus_id = {0}, /* set to "" */ + .driver = &xpnet_dbg_name +}; + +struct device *xpnet = &xpnet_dbg_subname; + +/* + * Packet was recevied by XPC and forwarded to us. + */ +static void +xpnet_receive(partid_t partid, int channel, struct xpnet_message *msg) +{ + struct sk_buff *skb; + bte_result_t bret; + struct xpnet_dev_private *priv = + (struct xpnet_dev_private *) xpnet_device->priv; + + + if (!XPNET_VALID_MSG(msg)) { + /* + * Packet with a different XPC version. Ignore. + */ + xpc_received(partid, channel, (void *) msg); + + priv->stats.rx_errors++; + + return; + } + dev_dbg(xpnet, "received 0x%lx, %d, %d, %d\n", msg->buf_pa, msg->size, + msg->leadin_ignore, msg->tailout_ignore); + + + /* reserve an extra cache line */ + skb = dev_alloc_skb(msg->size + L1_CACHE_BYTES); + if (!skb) { + dev_err(xpnet, "failed on dev_alloc_skb(%d)\n", + msg->size + L1_CACHE_BYTES); + + xpc_received(partid, channel, (void *) msg); + + priv->stats.rx_errors++; + + return; + } + + /* + * The allocated skb has some reserved space. + * In order to use bte_copy, we need to get the + * skb->data pointer moved forward. + */ + skb_reserve(skb, (L1_CACHE_BYTES - ((u64)skb->data & + (L1_CACHE_BYTES - 1)) + + msg->leadin_ignore)); + + /* + * Update the tail pointer to indicate data actually + * transferred. + */ + skb_put(skb, (msg->size - msg->leadin_ignore - msg->tailout_ignore)); + + /* + * Move the data over from the the other side. + */ + if ((XPNET_VERSION_MINOR(msg->version) == 1) && + (msg->embedded_bytes != 0)) { + dev_dbg(xpnet, "copying embedded message. memcpy(0x%p, 0x%p, " + "%lu)\n", skb->data, &msg->data, + (size_t) msg->embedded_bytes); + + memcpy(skb->data, &msg->data, (size_t) msg->embedded_bytes); + } else { + dev_dbg(xpnet, "transferring buffer to the skb->data area;\n\t" + "bte_copy(0x%p, 0x%p, %hu)\n", (void *)msg->buf_pa, + (void *)__pa((u64)skb->data & ~(L1_CACHE_BYTES - 1)), + msg->size); + + bret = bte_copy(msg->buf_pa, + __pa((u64)skb->data & ~(L1_CACHE_BYTES - 1)), + msg->size, (BTE_NOTIFY | BTE_WACQUIRE), NULL); + + if (bret != BTE_SUCCESS) { + // >>> Need better way of cleaning skb. Currently skb + // >>> appears in_use and we can't just call + // >>> dev_kfree_skb. + dev_err(xpnet, "bte_copy(0x%p, 0x%p, 0x%hx) returned " + "error=0x%x\n", (void *)msg->buf_pa, + (void *)__pa((u64)skb->data & + ~(L1_CACHE_BYTES - 1)), + msg->size, bret); + + xpc_received(partid, channel, (void *) msg); + + priv->stats.rx_errors++; + + return; + } + } + + dev_dbg(xpnet, "<skb->head=0x%p skb->data=0x%p skb->tail=0x%p " + "skb->end=0x%p skb->len=%d\n", (void *) skb->head, + (void *) skb->data, (void *) skb->tail, (void *) skb->end, + skb->len); + + skb->dev = xpnet_device; + skb->protocol = eth_type_trans(skb, xpnet_device); + skb->ip_summed = CHECKSUM_UNNECESSARY; + + dev_dbg(xpnet, "passing skb to network layer; \n\tskb->head=0x%p " + "skb->data=0x%p skb->tail=0x%p skb->end=0x%p skb->len=%d\n", + (void *) skb->head, (void *) skb->data, (void *) skb->tail, + (void *) skb->end, skb->len); + + + xpnet_device->last_rx = jiffies; + priv->stats.rx_packets++; + priv->stats.rx_bytes += skb->len + ETH_HLEN; + + netif_rx_ni(skb); + xpc_received(partid, channel, (void *) msg); +} + + +/* + * This is the handler which XPC calls during any sort of change in + * state or message reception on a connection. + */ +static void +xpnet_connection_activity(enum xpc_retval reason, partid_t partid, int channel, + void *data, void *key) +{ + long bp; + + + DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); + DBUG_ON(channel != XPC_NET_CHANNEL); + + switch(reason) { + case xpcMsgReceived: /* message received */ + DBUG_ON(data == NULL); + + xpnet_receive(partid, channel, (struct xpnet_message *) data); + break; + + case xpcConnected: /* connection completed to a partition */ + spin_lock_bh(&xpnet_broadcast_lock); + xpnet_broadcast_partitions |= 1UL << (partid -1 ); + bp = xpnet_broadcast_partitions; + spin_unlock_bh(&xpnet_broadcast_lock); + + netif_carrier_on(xpnet_device); + + dev_dbg(xpnet, "%s connection created to partition %d; " + "xpnet_broadcast_partitions=0x%lx\n", + xpnet_device->name, partid, bp); + break; + + default: + spin_lock_bh(&xpnet_broadcast_lock); + xpnet_broadcast_partitions &= ~(1UL << (partid -1 )); + bp = xpnet_broadcast_partitions; + spin_unlock_bh(&xpnet_broadcast_lock); + + if (bp == 0) { + netif_carrier_off(xpnet_device); + } + + dev_dbg(xpnet, "%s disconnected from partition %d; " + "xpnet_broadcast_partitions=0x%lx\n", + xpnet_device->name, partid, bp); + break; + + } +} + + +static int +xpnet_dev_open(struct net_device *dev) +{ + enum xpc_retval ret; + + + dev_dbg(xpnet, "calling xpc_connect(%d, 0x%p, NULL, %ld, %ld, %d, " + "%d)\n", XPC_NET_CHANNEL, xpnet_connection_activity, + XPNET_MSG_SIZE, XPNET_MSG_NENTRIES, XPNET_MAX_KTHREADS, + XPNET_MAX_IDLE_KTHREADS); + + ret = xpc_connect(XPC_NET_CHANNEL, xpnet_connection_activity, NULL, + XPNET_MSG_SIZE, XPNET_MSG_NENTRIES, + XPNET_MAX_KTHREADS, XPNET_MAX_IDLE_KTHREADS); + if (ret != xpcSuccess) { + dev_err(xpnet, "ifconfig up of %s failed on XPC connect, " + "ret=%d\n", dev->name, ret); + + return -ENOMEM; + } + + dev_dbg(xpnet, "ifconfig up of %s; XPC connected\n", dev->name); + + return 0; +} + + +static int +xpnet_dev_stop(struct net_device *dev) +{ + xpc_disconnect(XPC_NET_CHANNEL); + + dev_dbg(xpnet, "ifconfig down of %s; XPC disconnected\n", dev->name); + + return 0; +} + + +static int +xpnet_dev_change_mtu(struct net_device *dev, int new_mtu) +{ + /* 68 comes from min TCP+IP+MAC header */ + if ((new_mtu < 68) || (new_mtu > XPNET_MAX_MTU)) { + dev_err(xpnet, "ifconfig %s mtu %d failed; value must be " + "between 68 and %ld\n", dev->name, new_mtu, + XPNET_MAX_MTU); + return -EINVAL; + } + + dev->mtu = new_mtu; + dev_dbg(xpnet, "ifconfig %s mtu set to %d\n", dev->name, new_mtu); + return 0; +} + + +/* + * Required for the net_device structure. + */ +static int +xpnet_dev_set_config(struct net_device *dev, struct ifmap *new_map) +{ + return 0; +} + + +/* + * Return statistics to the caller. + */ +static struct net_device_stats * +xpnet_dev_get_stats(struct net_device *dev) +{ + struct xpnet_dev_private *priv; + + + priv = (struct xpnet_dev_private *) dev->priv; + + return &priv->stats; +} + + +/* + * Notification that the other end has received the message and + * DMA'd the skb information. At this point, they are done with + * our side. When all recipients are done processing, we + * release the skb and then release our pending message structure. + */ +static void +xpnet_send_completed(enum xpc_retval reason, partid_t partid, int channel, + void *__qm) +{ + struct xpnet_pending_msg *queued_msg = + (struct xpnet_pending_msg *) __qm; + + + DBUG_ON(queued_msg == NULL); + + dev_dbg(xpnet, "message to %d notified with reason %d\n", + partid, reason); + + if (atomic_dec_return(&queued_msg->use_count) == 0) { + dev_dbg(xpnet, "all acks for skb->head=-x%p\n", + (void *) queued_msg->skb->head); + + dev_kfree_skb_any(queued_msg->skb); + kfree(queued_msg); + } +} + + +/* + * Network layer has formatted a packet (skb) and is ready to place it + * "on the wire". Prepare and send an xpnet_message to all partitions + * which have connected with us and are targets of this packet. + * + * MAC-NOTE: For the XPNET driver, the MAC address contains the + * destination partition_id. If the destination partition id word + * is 0xff, this packet is to broadcast to all partitions. + */ +static int +xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct xpnet_pending_msg *queued_msg; + enum xpc_retval ret; + struct xpnet_message *msg; + u64 start_addr, end_addr; + long dp; + u8 second_mac_octet; + partid_t dest_partid; + struct xpnet_dev_private *priv; + u16 embedded_bytes; + + + priv = (struct xpnet_dev_private *) dev->priv; + + + dev_dbg(xpnet, ">skb->head=0x%p skb->data=0x%p skb->tail=0x%p " + "skb->end=0x%p skb->len=%d\n", (void *) skb->head, + (void *) skb->data, (void *) skb->tail, (void *) skb->end, + skb->len); + + + /* + * The xpnet_pending_msg tracks how many outstanding + * xpc_send_notifies are relying on this skb. When none + * remain, release the skb. + */ + queued_msg = kmalloc(sizeof(struct xpnet_pending_msg), GFP_ATOMIC); + if (queued_msg == NULL) { + dev_warn(xpnet, "failed to kmalloc %ld bytes; dropping " + "packet\n", sizeof(struct xpnet_pending_msg)); + + priv->stats.tx_errors++; + + return -ENOMEM; + } + + + /* get the beginning of the first cacheline and end of last */ + start_addr = ((u64) skb->data & ~(L1_CACHE_BYTES - 1)); + end_addr = L1_CACHE_ALIGN((u64) skb->tail); + + /* calculate how many bytes to embed in the XPC message */ + embedded_bytes = 0; + if (unlikely(skb->len <= XPNET_MSG_DATA_MAX)) { + /* skb->data does fit so embed */ + embedded_bytes = skb->len; + } + + + /* + * Since the send occurs asynchronously, we set the count to one + * and begin sending. Any sends that happen to complete before + * we are done sending will not free the skb. We will be left + * with that task during exit. This also handles the case of + * a packet destined for a partition which is no longer up. + */ + atomic_set(&queued_msg->use_count, 1); + queued_msg->skb = skb; + + + second_mac_octet = skb->data[XPNET_PARTID_OCTET]; + if (second_mac_octet == 0xff) { + /* we are being asked to broadcast to all partitions */ + dp = xpnet_broadcast_partitions; + } else if (second_mac_octet != 0) { + dp = xpnet_broadcast_partitions & + (1UL << (second_mac_octet - 1)); + } else { + /* 0 is an invalid partid. Ignore */ + dp = 0; + } + dev_dbg(xpnet, "destination Partitions mask (dp) = 0x%lx\n", dp); + + /* + * If we wanted to allow promiscous mode to work like an + * unswitched network, this would be a good point to OR in a + * mask of partitions which should be receiving all packets. + */ + + /* + * Main send loop. + */ + for (dest_partid = 1; dp && dest_partid < XP_MAX_PARTITIONS; + dest_partid++) { + + + if (!(dp & (1UL << (dest_partid - 1)))) { + /* not destined for this partition */ + continue; + } + + /* remove this partition from the destinations mask */ + dp &= ~(1UL << (dest_partid - 1)); + + + /* found a partition to send to */ + + ret = xpc_allocate(dest_partid, XPC_NET_CHANNEL, + XPC_NOWAIT, (void **)&msg); + if (unlikely(ret != xpcSuccess)) { + continue; + } + + msg->embedded_bytes = embedded_bytes; + if (unlikely(embedded_bytes != 0)) { + msg->version = XPNET_VERSION_EMBED; + dev_dbg(xpnet, "calling memcpy(0x%p, 0x%p, 0x%lx)\n", + &msg->data, skb->data, (size_t) embedded_bytes); + memcpy(&msg->data, skb->data, (size_t) embedded_bytes); + } else { + msg->version = XPNET_VERSION; + } + msg->magic = XPNET_MAGIC; + msg->size = end_addr - start_addr; + msg->leadin_ignore = (u64) skb->data - start_addr; + msg->tailout_ignore = end_addr - (u64) skb->tail; + msg->buf_pa = __pa(start_addr); + + dev_dbg(xpnet, "sending XPC message to %d:%d\nmsg->buf_pa=" + "0x%lx, msg->size=%u, msg->leadin_ignore=%u, " + "msg->tailout_ignore=%u\n", dest_partid, + XPC_NET_CHANNEL, msg->buf_pa, msg->size, + msg->leadin_ignore, msg->tailout_ignore); + + + atomic_inc(&queued_msg->use_count); + + ret = xpc_send_notify(dest_partid, XPC_NET_CHANNEL, msg, + xpnet_send_completed, queued_msg); + if (unlikely(ret != xpcSuccess)) { + atomic_dec(&queued_msg->use_count); + continue; + } + + } + + if (atomic_dec_return(&queued_msg->use_count) == 0) { + dev_dbg(xpnet, "no partitions to receive packet destined for " + "%d\n", dest_partid); + + + dev_kfree_skb(skb); + kfree(queued_msg); + } + + priv->stats.tx_packets++; + priv->stats.tx_bytes += skb->len; + + return 0; +} + + +/* + * Deal with transmit timeouts coming from the network layer. + */ +static void +xpnet_dev_tx_timeout (struct net_device *dev) +{ + struct xpnet_dev_private *priv; + + + priv = (struct xpnet_dev_private *) dev->priv; + + priv->stats.tx_errors++; + return; +} + + +static int __init +xpnet_init(void) +{ + int i; + u32 license_num; + int result = -ENOMEM; + + + dev_info(xpnet, "registering network device %s\n", XPNET_DEVICE_NAME); + + /* + * use ether_setup() to init the majority of our device + * structure and then override the necessary pieces. + */ + xpnet_device = alloc_netdev(sizeof(struct xpnet_dev_private), + XPNET_DEVICE_NAME, ether_setup); + if (xpnet_device == NULL) { + return -ENOMEM; + } + + netif_carrier_off(xpnet_device); + + xpnet_device->mtu = XPNET_DEF_MTU; + xpnet_device->change_mtu = xpnet_dev_change_mtu; + xpnet_device->open = xpnet_dev_open; + xpnet_device->get_stats = xpnet_dev_get_stats; + xpnet_device->stop = xpnet_dev_stop; + xpnet_device->hard_start_xmit = xpnet_dev_hard_start_xmit; + xpnet_device->tx_timeout = xpnet_dev_tx_timeout; + xpnet_device->set_config = xpnet_dev_set_config; + + /* + * Multicast assumes the LSB of the first octet is set for multicast + * MAC addresses. We chose the first octet of the MAC to be unlikely + * to collide with any vendor's officially issued MAC. + */ + xpnet_device->dev_addr[0] = 0xfe; + xpnet_device->dev_addr[XPNET_PARTID_OCTET] = sn_partition_id; + license_num = sn_partition_serial_number_val(); + for (i = 3; i >= 0; i--) { + xpnet_device->dev_addr[XPNET_LICENSE_OCTET + i] = + license_num & 0xff; + license_num = license_num >> 8; + } + + /* + * ether_setup() sets this to a multicast device. We are + * really not supporting multicast at this time. + */ + xpnet_device->flags &= ~IFF_MULTICAST; + + /* + * No need to checksum as it is a DMA transfer. The BTE will + * report an error if the data is not retrievable and the + * packet will be dropped. + */ + xpnet_device->features = NETIF_F_NO_CSUM; + + result = register_netdev(xpnet_device); + if (result != 0) { + free_netdev(xpnet_device); + } + + return result; +} +module_init(xpnet_init); + + +static void __exit +xpnet_exit(void) +{ + dev_info(xpnet, "unregistering network device %s\n", + xpnet_device[0].name); + + unregister_netdev(xpnet_device); + + free_netdev(xpnet_device); +} +module_exit(xpnet_exit); + + +MODULE_AUTHOR("Silicon Graphics, Inc."); +MODULE_DESCRIPTION("Cross Partition Network adapter (XPNET)"); +MODULE_LICENSE("GPL"); + diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c index c906859..64af2b2 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_dma.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c @@ -301,7 +301,7 @@ void sn_dma_flush(uint64_t addr) spin_lock_irqsave(&((struct sn_flush_device_list *)p)-> sfdl_flush_lock, flags); - p->sfdl_flush_value = 0; + *p->sfdl_flush_addr = 0; /* force an interrupt. */ *(volatile uint32_t *)(p->sfdl_force_int_addr) = 1; diff --git a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c index 54a0dd4..8dae9eb 100644 --- a/arch/ia64/sn/pci/tioca_provider.c +++ b/arch/ia64/sn/pci/tioca_provider.c @@ -431,7 +431,7 @@ tioca_dma_mapped(struct pci_dev *pdev, uint64_t paddr, size_t req_size) ca_dmamap->cad_dma_addr = bus_addr; ca_dmamap->cad_gart_size = entries; ca_dmamap->cad_gart_entry = entry; - list_add(&ca_dmamap->cad_list, &tioca_kern->ca_list); + list_add(&ca_dmamap->cad_list, &tioca_kern->ca_dmamaps); if (xio_addr % ps) { tioca_kern->ca_pcigart[entry] = tioca_paddr_to_gart(xio_addr); diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 7dbf997..5649fba 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc2-m68k -# Tue Apr 5 14:05:59 2005 +# Linux kernel version: 2.6.12-rc6-m68k +# Tue Jun 7 20:34:23 2005 # CONFIG_M68K=y CONFIG_MMU=y @@ -35,6 +35,8 @@ CONFIG_KOBJECT_UEVENT=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y @@ -135,7 +137,6 @@ CONFIG_PARPORT_1284=y # CONFIG_AMIGA_FLOPPY=y CONFIG_AMIGA_Z2RAM=y -# CONFIG_BLK_DEV_XD is not set # CONFIG_PARIDE is not set # CONFIG_BLK_DEV_COW_COMMON is not set CONFIG_BLK_DEV_LOOP=y @@ -223,17 +224,12 @@ CONFIG_SCSI_CONSTANTS=y # # SCSI low-level drivers # -# CONFIG_SCSI_7000FASST is not set # CONFIG_SCSI_AHA152X is not set -# CONFIG_SCSI_AHA1542 is not set # CONFIG_SCSI_AIC7XXX_OLD is not set # CONFIG_SCSI_IN2000 is not set # CONFIG_SCSI_SATA is not set -# CONFIG_SCSI_BUSLOGIC is not set # CONFIG_SCSI_DTC3280 is not set -# CONFIG_SCSI_EATA is not set # CONFIG_SCSI_FUTURE_DOMAIN is not set -# CONFIG_SCSI_GDTH is not set # CONFIG_SCSI_GENERIC_NCR5380 is not set # CONFIG_SCSI_GENERIC_NCR5380_MMIO is not set # CONFIG_SCSI_PPA is not set @@ -244,7 +240,6 @@ CONFIG_SCSI_CONSTANTS=y # CONFIG_SCSI_QLOGIC_FAS is not set # CONFIG_SCSI_SYM53C416 is not set # CONFIG_SCSI_T128 is not set -# CONFIG_SCSI_U14_34F is not set # CONFIG_SCSI_DEBUG is not set CONFIG_A3000_SCSI=y CONFIG_A2091_SCSI=y @@ -492,7 +487,6 @@ CONFIG_HYDRA=m CONFIG_ZORRO8390=m CONFIG_APNE=m # CONFIG_NET_VENDOR_3COM is not set -# CONFIG_LANCE is not set # CONFIG_NET_VENDOR_SMC is not set # CONFIG_NET_VENDOR_RACAL is not set # CONFIG_AT1700 is not set @@ -620,7 +614,6 @@ CONFIG_SERIO_SERPORT=m # CONFIG_SERIO_PARKBD is not set # CONFIG_SERIO_RAW is not set # CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y # # Character devices diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 505a296..63024b0 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc2-m68k -# Tue Apr 5 14:06:00 2005 +# Linux kernel version: 2.6.12-rc6-m68k +# Tue Jun 7 20:34:27 2005 # CONFIG_M68K=y CONFIG_MMU=y @@ -35,6 +35,8 @@ CONFIG_KOBJECT_UEVENT=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y @@ -497,7 +499,6 @@ CONFIG_SERIO_SERPORT=m CONFIG_SERIO_LIBPS2=m # CONFIG_SERIO_RAW is not set # CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y # # Character devices diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 617aa73..6433da2 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc2-m68k -# Tue Apr 5 14:06:18 2005 +# Linux kernel version: 2.6.12-rc6-m68k +# Tue Jun 7 20:34:32 2005 # CONFIG_M68K=y CONFIG_MMU=y @@ -35,6 +35,8 @@ CONFIG_KOBJECT_UEVENT=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y @@ -531,7 +533,6 @@ CONFIG_SERIO_SERPORT=y CONFIG_SERIO_LIBPS2=y # CONFIG_SERIO_RAW is not set # CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y # # Character devices diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index b501db5..da2a23a 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc2-m68k -# Tue Apr 5 14:06:19 2005 +# Linux kernel version: 2.6.12-rc6-m68k +# Tue Jun 7 20:34:37 2005 # CONFIG_M68K=y CONFIG_MMU=y @@ -35,6 +35,8 @@ CONFIG_KOBJECT_UEVENT=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y @@ -496,7 +498,6 @@ CONFIG_SERIO_SERPORT=m CONFIG_SERIO_LIBPS2=m # CONFIG_SERIO_RAW is not set # CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y # # Character devices diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 2bf6cef..5125188 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc2-m68k -# Tue Apr 5 14:06:21 2005 +# Linux kernel version: 2.6.12-rc6-m68k +# Tue Jun 7 20:34:41 2005 # CONFIG_M68K=y CONFIG_MMU=y @@ -35,6 +35,8 @@ CONFIG_KOBJECT_UEVENT=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y @@ -498,7 +500,6 @@ CONFIG_SERIO_SERPORT=m CONFIG_SERIO_LIBPS2=m # CONFIG_SERIO_RAW is not set # CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y # # Character devices diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 7074f85..15b80ab 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc2-m68k -# Tue Apr 5 14:06:24 2005 +# Linux kernel version: 2.6.12-rc6-m68k +# Tue Jun 7 20:34:45 2005 # CONFIG_M68K=y CONFIG_MMU=y @@ -35,6 +35,8 @@ CONFIG_KOBJECT_UEVENT=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y @@ -540,7 +542,6 @@ CONFIG_SERIO_SERPORT=m CONFIG_SERIO_LIBPS2=m # CONFIG_SERIO_RAW is not set # CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y # # Character devices diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 61f09bc..f0d5534 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc2-m68k -# Tue Apr 5 14:06:28 2005 +# Linux kernel version: 2.6.12-rc6-m68k +# Tue Jun 7 20:34:50 2005 # CONFIG_M68K=y CONFIG_MMU=y @@ -35,6 +35,8 @@ CONFIG_KOBJECT_UEVENT=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y @@ -498,7 +500,6 @@ CONFIG_SERIO_SERPORT=m CONFIG_SERIO_LIBPS2=m # CONFIG_SERIO_RAW is not set # CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y # # Character devices diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index 69c0100..1d5c46f 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc2-m68k -# Tue Apr 5 14:06:31 2005 +# Linux kernel version: 2.6.12-rc6-m68k +# Tue Jun 7 20:34:53 2005 # CONFIG_M68K=y CONFIG_MMU=y @@ -35,6 +35,8 @@ CONFIG_KOBJECT_UEVENT=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y @@ -497,7 +499,6 @@ CONFIG_SERIO_SERPORT=m CONFIG_SERIO_LIBPS2=m # CONFIG_SERIO_RAW is not set # CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y # # Character devices diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 550ec260..8562386 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc2-m68k -# Tue Apr 5 14:06:34 2005 +# Linux kernel version: 2.6.12-rc6-m68k +# Tue Jun 7 20:34:58 2005 # CONFIG_M68K=y CONFIG_MMU=y @@ -35,6 +35,8 @@ CONFIG_KOBJECT_UEVENT=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y @@ -125,7 +127,6 @@ CONFIG_FW_LOADER=m # Block devices # # CONFIG_BLK_DEV_FD is not set -# CONFIG_BLK_DEV_XD is not set # CONFIG_BLK_DEV_COW_COMMON is not set CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_CRYPTOLOOP=m @@ -210,17 +211,12 @@ CONFIG_SCSI_CONSTANTS=y # # SCSI low-level drivers # -# CONFIG_SCSI_7000FASST is not set # CONFIG_SCSI_AHA152X is not set -# CONFIG_SCSI_AHA1542 is not set # CONFIG_SCSI_AIC7XXX_OLD is not set # CONFIG_SCSI_IN2000 is not set # CONFIG_SCSI_SATA is not set -# CONFIG_SCSI_BUSLOGIC is not set # CONFIG_SCSI_DTC3280 is not set -# CONFIG_SCSI_EATA is not set # CONFIG_SCSI_FUTURE_DOMAIN is not set -# CONFIG_SCSI_GDTH is not set # CONFIG_SCSI_GENERIC_NCR5380 is not set # CONFIG_SCSI_GENERIC_NCR5380_MMIO is not set # CONFIG_SCSI_NCR53C406A is not set @@ -229,7 +225,6 @@ CONFIG_SCSI_CONSTANTS=y # CONFIG_SCSI_QLOGIC_FAS is not set # CONFIG_SCSI_SYM53C416 is not set # CONFIG_SCSI_T128 is not set -# CONFIG_SCSI_U14_34F is not set # CONFIG_SCSI_DEBUG is not set # @@ -466,7 +461,6 @@ CONFIG_EQUALIZER=m CONFIG_NET_ETHERNET=y CONFIG_MII=m # CONFIG_NET_VENDOR_3COM is not set -# CONFIG_LANCE is not set # CONFIG_NET_VENDOR_SMC is not set # CONFIG_NET_VENDOR_RACAL is not set # CONFIG_AT1700 is not set @@ -570,7 +564,6 @@ CONFIG_SERIO_Q40KBD=m CONFIG_SERIO_LIBPS2=m # CONFIG_SERIO_RAW is not set # CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y # # Character devices diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 5b5a619..af903b5 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc2-m68k -# Tue Apr 5 14:06:37 2005 +# Linux kernel version: 2.6.12-rc6-m68k +# Tue Jun 7 20:35:02 2005 # CONFIG_M68K=y CONFIG_MMU=y @@ -35,6 +35,8 @@ CONFIG_KOBJECT_UEVENT=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y @@ -171,7 +173,6 @@ CONFIG_SCSI_CONSTANTS=y # # CONFIG_SCSI_SATA is not set # CONFIG_SCSI_DEBUG is not set -CONFIG_SUN3_SCSI=y # # Multi-device support (RAID and LVM) @@ -487,7 +488,6 @@ CONFIG_SERIO_SERPORT=m CONFIG_SERIO_LIBPS2=m # CONFIG_SERIO_RAW is not set # CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y # # Character devices diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index 704e423..997143b 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc2-m68k -# Tue Apr 5 14:06:40 2005 +# Linux kernel version: 2.6.12-rc6-m68k +# Tue Jun 7 20:35:06 2005 # CONFIG_M68K=y CONFIG_MMU=y @@ -35,6 +35,8 @@ CONFIG_KOBJECT_UEVENT=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y @@ -497,7 +499,6 @@ CONFIG_SERIO_SERPORT=m CONFIG_SERIO_LIBPS2=m # CONFIG_SERIO_RAW is not set # CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y # # Character devices diff --git a/arch/m68k/defconfig b/arch/m68k/defconfig index 5b2296e..7d935e4 100644 --- a/arch/m68k/defconfig +++ b/arch/m68k/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc2-m68k -# Tue Apr 5 14:05:31 2005 +# Linux kernel version: 2.6.12-rc6-m68k +# Tue Jun 7 20:34:17 2005 # CONFIG_M68K=y CONFIG_MMU=y @@ -33,6 +33,8 @@ CONFIG_KOBJECT_UEVENT=y # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y @@ -355,7 +357,6 @@ CONFIG_SERIO_SERPORT=y CONFIG_SERIO_LIBPS2=y # CONFIG_SERIO_RAW is not set # CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y # # Character devices diff --git a/arch/m68knommu/Kconfig b/arch/m68knommu/Kconfig index fc4615b..e729bd2 100644 --- a/arch/m68knommu/Kconfig +++ b/arch/m68knommu/Kconfig @@ -534,6 +534,11 @@ endchoice endmenu +config ISA_DMA_API + bool + depends on !M5272 + default y + menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)" config PCI diff --git a/arch/m68knommu/kernel/process.c b/arch/m68knommu/kernel/process.c index 2b6c9d3..c4a33f2 100644 --- a/arch/m68knommu/kernel/process.c +++ b/arch/m68knommu/kernel/process.c @@ -45,11 +45,13 @@ asmlinkage void ret_from_fork(void); */ void default_idle(void) { - while(1) { - if (need_resched()) - __asm__("stop #0x2000" : : : "cc"); - schedule(); + local_irq_disable(); + while (!need_resched()) { + /* This stop will re-enable interrupts */ + __asm__("stop #0x2000" : : : "cc"); + local_irq_disable(); } + local_irq_enable(); } void (*idle)(void) = default_idle; @@ -63,7 +65,12 @@ void (*idle)(void) = default_idle; void cpu_idle(void) { /* endless idle loop with no priority at all */ - idle(); + while (1) { + idle(); + preempt_enable_no_resched(); + schedule(); + preempt_disable(); + } } void machine_restart(char * __unused) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 5e666aa..ab99446 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1656,3 +1656,7 @@ config GENERIC_HARDIRQS config GENERIC_IRQ_PROBE bool default y + +config ISA_DMA_API + bool + default y diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index a2f899c..92e70ca 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -301,25 +301,38 @@ out: return ret; } +static inline int audit_arch(void) +{ +#ifdef CONFIG_CPU_LITTLE_ENDIAN +#ifdef CONFIG_MIPS64 + if (!(current->thread.mflags & MF_32BIT_REGS)) + return AUDIT_ARCH_MIPSEL64; +#endif /* MIPS64 */ + return AUDIT_ARCH_MIPSEL; + +#else /* big endian... */ +#ifdef CONFIG_MIPS64 + if (!(current->thread.mflags & MF_32BIT_REGS)) + return AUDIT_ARCH_MIPS64; +#endif /* MIPS64 */ + return AUDIT_ARCH_MIPS; + +#endif /* endian */ +} + /* * Notification of system call entry/exit * - triggered by current->work.syscall_trace */ asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit) { - if (unlikely(current->audit_context)) { - if (!entryexit) - audit_syscall_entry(current, regs->regs[2], - regs->regs[4], regs->regs[5], - regs->regs[6], regs->regs[7]); - else - audit_syscall_exit(current, regs->regs[2]); - } + if (unlikely(current->audit_context) && entryexit) + audit_syscall_exit(current, AUDITSC_RESULT(regs->regs[2]), regs->regs[2]); if (!test_thread_flag(TIF_SYSCALL_TRACE)) - return; + goto out; if (!(current->ptrace & PT_PTRACED)) - return; + goto out; /* The 0x80 provides a way for the tracing parent to distinguish between a syscall stop and SIGTRAP delivery */ @@ -335,4 +348,9 @@ asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit) send_sig(current->exit_code, current, 1); current->exit_code = 0; } + out: + if (unlikely(current->audit_context) && !entryexit) + audit_syscall_entry(current, audit_arch(), regs->regs[2], + regs->regs[4], regs->regs[5], + regs->regs[6], regs->regs[7]); } diff --git a/arch/mips/vr41xx/common/pmu.c b/arch/mips/vr41xx/common/pmu.c index c5f1043..53166f3 100644 --- a/arch/mips/vr41xx/common/pmu.c +++ b/arch/mips/vr41xx/common/pmu.c @@ -1,7 +1,7 @@ /* * pmu.c, Power Management Unit routines for NEC VR4100 series. * - * Copyright (C) 2003-2004 Yoichi Yuasa <yuasa@hh.iij4u.or.jp> + * Copyright (C) 2003-2005 Yoichi Yuasa <yuasa@hh.iij4u.or.jp> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,7 +17,9 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <linux/errno.h> #include <linux/init.h> +#include <linux/ioport.h> #include <linux/kernel.h> #include <linux/smp.h> #include <linux/types.h> @@ -27,20 +29,31 @@ #include <asm/reboot.h> #include <asm/system.h> -#define PMUCNT2REG KSEG1ADDR(0x0f0000c6) +#define PMU_TYPE1_BASE 0x0b0000a0UL +#define PMU_TYPE1_SIZE 0x0eUL + +#define PMU_TYPE2_BASE 0x0f0000c0UL +#define PMU_TYPE2_SIZE 0x10UL + +#define PMUCNT2REG 0x06 #define SOFTRST 0x0010 +static void __iomem *pmu_base; + +#define pmu_read(offset) readw(pmu_base + (offset)) +#define pmu_write(offset, value) writew((value), pmu_base + (offset)) + static inline void software_reset(void) { - uint16_t val; + uint16_t pmucnt2; switch (current_cpu_data.cputype) { case CPU_VR4122: case CPU_VR4131: case CPU_VR4133: - val = readw(PMUCNT2REG); - val |= SOFTRST; - writew(val, PMUCNT2REG); + pmucnt2 = pmu_read(PMUCNT2REG); + pmucnt2 |= SOFTRST; + pmu_write(PMUCNT2REG, pmucnt2); break; default: break; @@ -71,6 +84,34 @@ static void vr41xx_power_off(void) static int __init vr41xx_pmu_init(void) { + unsigned long start, size; + + switch (current_cpu_data.cputype) { + case CPU_VR4111: + case CPU_VR4121: + start = PMU_TYPE1_BASE; + size = PMU_TYPE1_SIZE; + break; + case CPU_VR4122: + case CPU_VR4131: + case CPU_VR4133: + start = PMU_TYPE2_BASE; + size = PMU_TYPE2_SIZE; + break; + default: + printk("Unexpected CPU of NEC VR4100 series\n"); + return -ENODEV; + } + + if (request_mem_region(start, size, "PMU") == NULL) + return -EBUSY; + + pmu_base = ioremap(start, size); + if (pmu_base == NULL) { + release_mem_region(start, size); + return -EBUSY; + } + _machine_restart = vr41xx_restart; _machine_halt = vr41xx_halt; _machine_power_off = vr41xx_power_off; @@ -78,4 +119,4 @@ static int __init vr41xx_pmu_init(void) return 0; } -early_initcall(vr41xx_pmu_init); +core_initcall(vr41xx_pmu_init); diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 5b5cd00..e7e7c56 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -45,6 +45,10 @@ config GENERIC_IRQ_PROBE config PM bool +config ISA_DMA_API + bool + default y + source "init/Kconfig" diff --git a/arch/ppc/Kconfig b/arch/ppc/Kconfig index c3d9413..6e6377a 100644 --- a/arch/ppc/Kconfig +++ b/arch/ppc/Kconfig @@ -43,6 +43,10 @@ config GENERIC_NVRAM bool default y +config SCHED_NO_NO_OMIT_FRAME_POINTER + bool + default y + source "init/Kconfig" menu "Processor" @@ -73,9 +77,11 @@ config 44x bool "44x" config POWER3 + select PPC_FPU bool "POWER3" config POWER4 + select PPC_FPU bool "POWER4 and 970 (G5)" config 8xx @@ -1079,6 +1085,10 @@ source kernel/power/Kconfig endmenu +config ISA_DMA_API + bool + default y + menu "Bus options" config ISA @@ -1133,12 +1143,12 @@ config PCI_QSPAN config PCI_8260 bool - depends on PCI && 8260 && !8272 + depends on PCI && 8260 default y config 8260_PCI9 bool " Enable workaround for MPC826x erratum PCI 9" - depends on PCI_8260 + depends on PCI_8260 && !ADS8272 default y choice diff --git a/arch/ppc/boot/images/Makefile b/arch/ppc/boot/images/Makefile index f850fb0..c9ac5f5 100644 --- a/arch/ppc/boot/images/Makefile +++ b/arch/ppc/boot/images/Makefile @@ -22,7 +22,8 @@ targets += uImage $(obj)/uImage: $(obj)/vmlinux.gz $(Q)rm -f $@ $(call if_changed,uimage) - @echo ' Image: $@' $(if $(wildcard $@),'is ready','not made') + @echo -n ' Image: $@ ' + @if [ -f $@ ]; then echo 'is ready' ; else echo 'not made'; fi # Files generated that shall be removed upon make clean clean-files := sImage vmapus vmlinux* miboot* zImage* uImage diff --git a/arch/ppc/configs/mpc8555_cds_defconfig b/arch/ppc/configs/mpc8555_cds_defconfig index 728bd9e..15abebf 100644 --- a/arch/ppc/configs/mpc8555_cds_defconfig +++ b/arch/ppc/configs/mpc8555_cds_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.11-rc1 -# Thu Jan 20 01:25:35 2005 +# Linux kernel version: 2.6.12-rc4 +# Tue May 17 11:56:01 2005 # CONFIG_MMU=y CONFIG_GENERIC_HARDIRQS=y @@ -11,6 +11,7 @@ CONFIG_HAVE_DEC_LOCK=y CONFIG_PPC=y CONFIG_PPC32=y CONFIG_GENERIC_NVRAM=y +CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y # # Code maturity level options @@ -18,6 +19,7 @@ CONFIG_GENERIC_NVRAM=y CONFIG_EXPERIMENTAL=y CONFIG_CLEAN_COMPILE=y CONFIG_BROKEN_ON_SMP=y +CONFIG_INIT_ENV_ARG_LIMIT=32 # # General setup @@ -29,12 +31,14 @@ CONFIG_SYSVIPC=y # CONFIG_BSD_PROCESS_ACCT is not set CONFIG_SYSCTL=y # CONFIG_AUDIT is not set -CONFIG_LOG_BUF_SHIFT=14 # CONFIG_HOTPLUG is not set CONFIG_KOBJECT_UEVENT=y # CONFIG_IKCONFIG is not set CONFIG_EMBEDDED=y # CONFIG_KALLSYMS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_BASE_FULL=y CONFIG_FUTEX=y # CONFIG_EPOLL is not set # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set @@ -44,6 +48,7 @@ CONFIG_CC_ALIGN_LABELS=0 CONFIG_CC_ALIGN_LOOPS=0 CONFIG_CC_ALIGN_JUMPS=0 # CONFIG_TINY_SHMEM is not set +CONFIG_BASE_SMALL=0 # # Loadable module support @@ -62,10 +67,12 @@ CONFIG_CC_ALIGN_JUMPS=0 CONFIG_E500=y CONFIG_BOOKE=y CONFIG_FSL_BOOKE=y +# CONFIG_PHYS_64BIT is not set CONFIG_SPE=y CONFIG_MATH_EMULATION=y # CONFIG_CPU_FREQ is not set CONFIG_PPC_GEN550=y +# CONFIG_PM is not set CONFIG_85xx=y CONFIG_PPC_INDIRECT_PCI_BE=y @@ -76,6 +83,7 @@ CONFIG_PPC_INDIRECT_PCI_BE=y CONFIG_MPC8555_CDS=y # CONFIG_MPC8560_ADS is not set # CONFIG_SBC8560 is not set +# CONFIG_STX_GP3 is not set CONFIG_MPC8555=y CONFIG_85xx_PCI2=y @@ -90,6 +98,7 @@ CONFIG_CPM2=y CONFIG_BINFMT_ELF=y # CONFIG_BINFMT_MISC is not set # CONFIG_CMDLINE_BOOL is not set +CONFIG_ISA_DMA_API=y # # Bus options @@ -105,10 +114,6 @@ CONFIG_PCI_NAMES=y # CONFIG_PCCARD is not set # -# PC-card bridges -# - -# # Advanced setup # # CONFIG_ADVANCED_OPTIONS is not set @@ -180,7 +185,59 @@ CONFIG_IOSCHED_CFQ=y # # ATA/ATAPI/MFM/RLL support # -# CONFIG_IDE is not set +CONFIG_IDE=y +CONFIG_BLK_DEV_IDE=y + +# +# Please see Documentation/ide.txt for help/info on IDE drives +# +# CONFIG_BLK_DEV_IDE_SATA is not set +CONFIG_BLK_DEV_IDEDISK=y +# CONFIG_IDEDISK_MULTI_MODE is not set +# CONFIG_BLK_DEV_IDECD is not set +# CONFIG_BLK_DEV_IDETAPE is not set +# CONFIG_BLK_DEV_IDEFLOPPY is not set +# CONFIG_IDE_TASK_IOCTL is not set + +# +# IDE chipset support/bugfixes +# +CONFIG_IDE_GENERIC=y +CONFIG_BLK_DEV_IDEPCI=y +CONFIG_IDEPCI_SHARE_IRQ=y +# CONFIG_BLK_DEV_OFFBOARD is not set +CONFIG_BLK_DEV_GENERIC=y +# CONFIG_BLK_DEV_OPTI621 is not set +# CONFIG_BLK_DEV_SL82C105 is not set +CONFIG_BLK_DEV_IDEDMA_PCI=y +# CONFIG_BLK_DEV_IDEDMA_FORCED is not set +CONFIG_IDEDMA_PCI_AUTO=y +# CONFIG_IDEDMA_ONLYDISK is not set +# CONFIG_BLK_DEV_AEC62XX is not set +# CONFIG_BLK_DEV_ALI15X3 is not set +# CONFIG_BLK_DEV_AMD74XX is not set +# CONFIG_BLK_DEV_CMD64X is not set +# CONFIG_BLK_DEV_TRIFLEX is not set +# CONFIG_BLK_DEV_CY82C693 is not set +# CONFIG_BLK_DEV_CS5520 is not set +# CONFIG_BLK_DEV_CS5530 is not set +# CONFIG_BLK_DEV_HPT34X is not set +# CONFIG_BLK_DEV_HPT366 is not set +# CONFIG_BLK_DEV_SC1200 is not set +# CONFIG_BLK_DEV_PIIX is not set +# CONFIG_BLK_DEV_NS87415 is not set +# CONFIG_BLK_DEV_PDC202XX_OLD is not set +# CONFIG_BLK_DEV_PDC202XX_NEW is not set +# CONFIG_BLK_DEV_SVWKS is not set +# CONFIG_BLK_DEV_SIIMAGE is not set +# CONFIG_BLK_DEV_SLC90E66 is not set +# CONFIG_BLK_DEV_TRM290 is not set +CONFIG_BLK_DEV_VIA82CXXX=y +# CONFIG_IDE_ARM is not set +CONFIG_BLK_DEV_IDEDMA=y +# CONFIG_IDEDMA_IVB is not set +CONFIG_IDEDMA_AUTO=y +# CONFIG_BLK_DEV_HD is not set # # SCSI device support @@ -220,7 +277,6 @@ CONFIG_NET=y # CONFIG_PACKET=y # CONFIG_PACKET_MMAP is not set -# CONFIG_NETLINK_DEV is not set CONFIG_UNIX=y # CONFIG_NET_KEY is not set CONFIG_INET=y @@ -370,14 +426,6 @@ CONFIG_INPUT=y # CONFIG_INPUT_EVBUG is not set # -# Input I/O drivers -# -# CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y -# CONFIG_SERIO is not set -# CONFIG_SERIO_I8042 is not set - -# # Input Device Drivers # # CONFIG_INPUT_KEYBOARD is not set @@ -387,6 +435,13 @@ CONFIG_SOUND_GAMEPORT=y # CONFIG_INPUT_MISC is not set # +# Hardware I/O ports +# +# CONFIG_SERIO is not set +# CONFIG_GAMEPORT is not set +CONFIG_SOUND_GAMEPORT=y + +# # Character devices # # CONFIG_VT is not set @@ -406,6 +461,7 @@ CONFIG_SERIAL_8250_NR_UARTS=4 CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_CPM is not set +# CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 @@ -434,6 +490,11 @@ CONFIG_GEN_RTC=y # CONFIG_RAW_DRIVER is not set # +# TPM devices +# +# CONFIG_TCG_TPM is not set + +# # I2C support # CONFIG_I2C=y @@ -456,11 +517,11 @@ CONFIG_I2C_CHARDEV=y # CONFIG_I2C_AMD8111 is not set # CONFIG_I2C_I801 is not set # CONFIG_I2C_I810 is not set +# CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_ISA is not set CONFIG_I2C_MPC=y # CONFIG_I2C_NFORCE2 is not set # CONFIG_I2C_PARPORT_LIGHT is not set -# CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_PROSAVAGE is not set # CONFIG_I2C_SAVAGE4 is not set # CONFIG_SCx200_ACB is not set @@ -483,7 +544,9 @@ CONFIG_I2C_MPC=y # CONFIG_SENSORS_ASB100 is not set # CONFIG_SENSORS_DS1621 is not set # CONFIG_SENSORS_FSCHER is not set +# CONFIG_SENSORS_FSCPOS is not set # CONFIG_SENSORS_GL518SM is not set +# CONFIG_SENSORS_GL520SM is not set # CONFIG_SENSORS_IT87 is not set # CONFIG_SENSORS_LM63 is not set # CONFIG_SENSORS_LM75 is not set @@ -494,9 +557,11 @@ CONFIG_I2C_MPC=y # CONFIG_SENSORS_LM85 is not set # CONFIG_SENSORS_LM87 is not set # CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_LM92 is not set # CONFIG_SENSORS_MAX1619 is not set # CONFIG_SENSORS_PC87360 is not set # CONFIG_SENSORS_SMSC47B397 is not set +# CONFIG_SENSORS_SIS5595 is not set # CONFIG_SENSORS_SMSC47M1 is not set # CONFIG_SENSORS_VIA686A is not set # CONFIG_SENSORS_W83781D is not set @@ -506,10 +571,12 @@ CONFIG_I2C_MPC=y # # Other I2C Chip support # +# CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_EEPROM is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCF8591 is not set # CONFIG_SENSORS_RTC8564 is not set +# CONFIG_SENSORS_M41T00 is not set # CONFIG_I2C_DEBUG_CORE is not set # CONFIG_I2C_DEBUG_ALGO is not set # CONFIG_I2C_DEBUG_BUS is not set @@ -538,7 +605,6 @@ CONFIG_I2C_MPC=y # Graphics support # # CONFIG_FB is not set -# CONFIG_BACKLIGHT_LCD_SUPPORT is not set # # Sound @@ -548,13 +614,9 @@ CONFIG_I2C_MPC=y # # USB support # -# CONFIG_USB is not set CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB_ARCH_HAS_OHCI=y - -# -# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information -# +# CONFIG_USB is not set # # USB Gadget Support @@ -585,6 +647,10 @@ CONFIG_JBD=y CONFIG_FS_MBCACHE=y # CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set + +# +# XFS support +# # CONFIG_XFS_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_ROMFS_FS is not set @@ -646,7 +712,6 @@ CONFIG_NFS_FS=y # CONFIG_NFSD is not set CONFIG_ROOT_NFS=y CONFIG_LOCKD=y -# CONFIG_EXPORTFS is not set CONFIG_SUNRPC=y # CONFIG_RPCSEC_GSS_KRB5 is not set # CONFIG_RPCSEC_GSS_SPKM3 is not set @@ -698,7 +763,9 @@ CONFIG_CRC32=y # # Kernel hacking # +# CONFIG_PRINTK_TIME is not set # CONFIG_DEBUG_KERNEL is not set +CONFIG_LOG_BUF_SHIFT=14 # CONFIG_KGDB_CONSOLE is not set # CONFIG_SERIAL_TEXT_DEBUG is not set diff --git a/arch/ppc/kernel/cputable.c b/arch/ppc/kernel/cputable.c index 8aa5e8c..d44b7dc 100644 --- a/arch/ppc/kernel/cputable.c +++ b/arch/ppc/kernel/cputable.c @@ -838,6 +838,17 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, }, + { /* 405EP */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x51210000, + .cpu_name = "405EP", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .icache_bsize = 32, + .dcache_bsize = 32, + }, #endif /* CONFIG_40x */ #ifdef CONFIG_44x diff --git a/arch/ppc/kernel/head_44x.S b/arch/ppc/kernel/head_44x.S index 9b6a8e5..6c7ae60 100644 --- a/arch/ppc/kernel/head_44x.S +++ b/arch/ppc/kernel/head_44x.S @@ -330,8 +330,9 @@ interrupt_base: /* If we are faulting a kernel address, we have to use the * kernel page tables. */ - andis. r11, r10, 0x8000 - beq 3f + lis r11, TASK_SIZE@h + cmplw r10, r11 + blt+ 3f lis r11, swapper_pg_dir@h ori r11, r11, swapper_pg_dir@l @@ -464,8 +465,9 @@ interrupt_base: /* If we are faulting a kernel address, we have to use the * kernel page tables. */ - andis. r11, r10, 0x8000 - beq 3f + lis r11, TASK_SIZE@h + cmplw r10, r11 + blt+ 3f lis r11, swapper_pg_dir@h ori r11, r11, swapper_pg_dir@l @@ -533,8 +535,9 @@ interrupt_base: /* If we are faulting a kernel address, we have to use the * kernel page tables. */ - andis. r11, r10, 0x8000 - beq 3f + lis r11, TASK_SIZE@h + cmplw r10, r11 + blt+ 3f lis r11, swapper_pg_dir@h ori r11, r11, swapper_pg_dir@l diff --git a/arch/ppc/kernel/head_fsl_booke.S b/arch/ppc/kernel/head_fsl_booke.S index f22ddce..ce36e88 100644 --- a/arch/ppc/kernel/head_fsl_booke.S +++ b/arch/ppc/kernel/head_fsl_booke.S @@ -232,7 +232,8 @@ skpinv: addi r6,r6,1 /* Increment */ tlbwe /* 7. Jump to KERNELBASE mapping */ - li r7,0 + lis r7,MSR_KERNEL@h + ori r7,r7,MSR_KERNEL@l bl 1f /* Find our address */ 1: mflr r9 rlwimi r6,r9,0,20,31 @@ -293,6 +294,18 @@ skpinv: addi r6,r6,1 /* Increment */ mtspr SPRN_HID0, r2 #endif +#if !defined(CONFIG_BDI_SWITCH) + /* + * The Abatron BDI JTAG debugger does not tolerate others + * mucking with the debug registers. + */ + lis r2,DBCR0_IDM@h + mtspr SPRN_DBCR0,r2 + /* clear any residual debug events */ + li r2,-1 + mtspr SPRN_DBSR,r2 +#endif + /* * This is where the main kernel code starts. */ diff --git a/arch/ppc/kernel/misc.S b/arch/ppc/kernel/misc.S index e4f1615..7329ef1 100644 --- a/arch/ppc/kernel/misc.S +++ b/arch/ppc/kernel/misc.S @@ -619,7 +619,7 @@ _GLOBAL(flush_instruction_cache) _GLOBAL(flush_icache_range) BEGIN_FTR_SECTION blr /* for 601, do nothing */ -END_FTR_SECTION_IFSET(PPC_FEATURE_UNIFIED_CACHE) +END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) li r5,L1_CACHE_LINE_SIZE-1 andc r3,r3,r5 subf r4,r3,r4 @@ -736,7 +736,7 @@ _GLOBAL(flush_dcache_all) _GLOBAL(__flush_dcache_icache) BEGIN_FTR_SECTION blr /* for 601, do nothing */ -END_FTR_SECTION_IFSET(PPC_FEATURE_UNIFIED_CACHE) +END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) rlwinm r3,r3,0,0,19 /* Get page base address */ li r4,4096/L1_CACHE_LINE_SIZE /* Number of lines in a page */ mtctr r4 @@ -764,7 +764,7 @@ END_FTR_SECTION_IFSET(PPC_FEATURE_UNIFIED_CACHE) _GLOBAL(__flush_dcache_icache_phys) BEGIN_FTR_SECTION blr /* for 601, do nothing */ -END_FTR_SECTION_IFSET(PPC_FEATURE_UNIFIED_CACHE) +END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) mfmsr r10 rlwinm r0,r10,0,28,26 /* clear DR */ mtmsr r0 diff --git a/arch/ppc/kernel/setup.c b/arch/ppc/kernel/setup.c index e97ce63..5c20266 100644 --- a/arch/ppc/kernel/setup.c +++ b/arch/ppc/kernel/setup.c @@ -221,27 +221,26 @@ int show_cpuinfo(struct seq_file *m, void *v) return err; } - switch (PVR_VER(pvr)) { - case 0x0020: /* 403 family */ - maj = PVR_MAJ(pvr) + 1; - min = PVR_MIN(pvr); - break; - case 0x1008: /* 740P/750P ?? */ - maj = ((pvr >> 8) & 0xFF) - 1; - min = pvr & 0xFF; - break; - case 0x8083: /* e300 */ - maj = PVR_MAJ(pvr); - min = PVR_MIN(pvr); - break; - case 0x8020: /* e500 */ + /* If we are a Freescale core do a simple check so + * we dont have to keep adding cases in the future */ + if ((PVR_VER(pvr) & 0x8000) == 0x8000) { maj = PVR_MAJ(pvr); min = PVR_MIN(pvr); - break; - default: - maj = (pvr >> 8) & 0xFF; - min = pvr & 0xFF; - break; + } else { + switch (PVR_VER(pvr)) { + case 0x0020: /* 403 family */ + maj = PVR_MAJ(pvr) + 1; + min = PVR_MIN(pvr); + break; + case 0x1008: /* 740P/750P ?? */ + maj = ((pvr >> 8) & 0xFF) - 1; + min = pvr & 0xFF; + break; + default: + maj = (pvr >> 8) & 0xFF; + min = pvr & 0xFF; + break; + } } seq_printf(m, "revision\t: %hd.%hd (pvr %04x %04x)\n", @@ -500,7 +499,7 @@ static int __init set_preferred_console(void) { struct device_node *prom_stdout; char *name; - int offset; + int offset = 0; if (of_stdout_device == NULL) return -ENODEV; @@ -754,6 +753,8 @@ void __init setup_arch(char **cmdline_p) strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE); *cmdline_p = cmd_line; + parse_early_param(); + /* set up the bootmem stuff with available memory */ do_init_bootmem(); if ( ppc_md.progress ) ppc_md.progress("setup_arch: bootmem", 0x3eab); diff --git a/arch/ppc/kernel/traps.c b/arch/ppc/kernel/traps.c index f8e7e32..c65731e 100644 --- a/arch/ppc/kernel/traps.c +++ b/arch/ppc/kernel/traps.c @@ -408,12 +408,7 @@ static int emulate_string_inst(struct pt_regs *regs, u32 instword) /* Early out if we are an invalid form of lswx */ if ((instword & INST_STRING_MASK) == INST_LSWX) - if ((rA >= rT) || (NB_RB >= rT) || (rT == rA) || (rT == NB_RB)) - return -EINVAL; - - /* Early out if we are an invalid form of lswi */ - if ((instword & INST_STRING_MASK) == INST_LSWI) - if ((rA >= rT) || (rT == rA)) + if ((rT == rA) || (rT == NB_RB)) return -EINVAL; EA = (rA == 0) ? 0 : regs->gpr[rA]; diff --git a/arch/ppc/kernel/vmlinux.lds.S b/arch/ppc/kernel/vmlinux.lds.S index 0c0e714..9353584 100644 --- a/arch/ppc/kernel/vmlinux.lds.S +++ b/arch/ppc/kernel/vmlinux.lds.S @@ -145,6 +145,7 @@ SECTIONS __init_end = .; . = ALIGN(4096); + _sextratext = .; __pmac_begin = .; .pmac.text : { *(.pmac.text) } .pmac.data : { *(.pmac.data) } @@ -171,6 +172,7 @@ SECTIONS .openfirmware.data : { *(.openfirmware.data) } . = ALIGN(4096); __openfirmware_end = .; + _eextratext = .; __bss_start = .; .bss : diff --git a/arch/ppc/lib/string.S b/arch/ppc/lib/string.S index 8d08a2e..36c9b97 100644 --- a/arch/ppc/lib/string.S +++ b/arch/ppc/lib/string.S @@ -446,6 +446,7 @@ _GLOBAL(__copy_tofrom_user) #ifdef CONFIG_8xx /* Don't use prefetch on 8xx */ mtctr r0 + li r0,0 53: COPY_16_BYTES_WITHEX(0) bdnz 53b @@ -564,7 +565,9 @@ _GLOBAL(__copy_tofrom_user) /* or write fault in cacheline loop */ 105: li r9,1 92: li r3,LG_CACHELINE_BYTES - b 99f + mfctr r8 + add r0,r0,r8 + b 106f /* read fault in final word loop */ 108: li r9,0 b 93f @@ -585,7 +588,7 @@ _GLOBAL(__copy_tofrom_user) * r5 + (ctr << r3), and r9 is 0 for read or 1 for write. */ 99: mfctr r0 - slw r3,r0,r3 +106: slw r3,r0,r3 add. r3,r3,r5 beq 120f /* shouldn't happen */ cmpwi 0,r9,0 diff --git a/arch/ppc/mm/init.c b/arch/ppc/mm/init.c index be02a7f..363c157 100644 --- a/arch/ppc/mm/init.c +++ b/arch/ppc/mm/init.c @@ -179,6 +179,7 @@ void free_initmem(void) if (!have_of) FREESEC(openfirmware); printk("\n"); + ppc_md.progress = NULL; #undef FREESEC } diff --git a/arch/ppc/platforms/83xx/mpc834x_sys.c b/arch/ppc/platforms/83xx/mpc834x_sys.c index b3b0f51..e6348b5 100644 --- a/arch/ppc/platforms/83xx/mpc834x_sys.c +++ b/arch/ppc/platforms/83xx/mpc834x_sys.c @@ -127,7 +127,6 @@ mpc834x_sys_map_io(void) { /* we steal the lowest ioremap addr for virt space */ io_block_mapping(VIRT_IMMRBAR, immrbar, 1024*1024, _PAGE_IO); - io_block_mapping(BCSR_VIRT_ADDR, BCSR_PHYS_ADDR, BCSR_SIZE, _PAGE_IO); } int diff --git a/arch/ppc/platforms/83xx/mpc834x_sys.h b/arch/ppc/platforms/83xx/mpc834x_sys.h index f4d055a..a2f6e49 100644 --- a/arch/ppc/platforms/83xx/mpc834x_sys.h +++ b/arch/ppc/platforms/83xx/mpc834x_sys.h @@ -26,9 +26,14 @@ #define VIRT_IMMRBAR ((uint)0xfe000000) #define BCSR_PHYS_ADDR ((uint)0xf8000000) -#define BCSR_VIRT_ADDR ((uint)0xfe100000) #define BCSR_SIZE ((uint)(32 * 1024)) +#define BCSR_MISC_REG2_OFF 0x07 +#define BCSR_MISC_REG2_PORESET 0x01 + +#define BCSR_MISC_REG3_OFF 0x08 +#define BCSR_MISC_REG3_CNFLOCK 0x80 + #ifdef CONFIG_PCI /* PCI interrupt controller */ #define PIRQA MPC83xx_IRQ_IRQ4 diff --git a/arch/ppc/platforms/85xx/mpc8540_ads.c b/arch/ppc/platforms/85xx/mpc8540_ads.c index 4d857d6..583838a 100644 --- a/arch/ppc/platforms/85xx/mpc8540_ads.c +++ b/arch/ppc/platforms/85xx/mpc8540_ads.c @@ -210,6 +210,9 @@ platform_init(unsigned long r3, unsigned long r4, unsigned long r5, #if defined(CONFIG_SERIAL_8250) && defined(CONFIG_SERIAL_TEXT_DEBUG) ppc_md.progress = gen550_progress; #endif /* CONFIG_SERIAL_8250 && CONFIG_SERIAL_TEXT_DEBUG */ +#if defined(CONFIG_SERIAL_8250) && defined(CONFIG_KGDB) + ppc_md.early_serial_map = mpc85xx_early_serial_map; +#endif /* CONFIG_SERIAL_8250 && CONFIG_KGDB */ if (ppc_md.progress) ppc_md.progress("mpc8540ads_init(): exit", 0); diff --git a/arch/ppc/platforms/85xx/mpc85xx_cds_common.c b/arch/ppc/platforms/85xx/mpc85xx_cds_common.c index 6c020d6..e7cfa49 100644 --- a/arch/ppc/platforms/85xx/mpc85xx_cds_common.c +++ b/arch/ppc/platforms/85xx/mpc85xx_cds_common.c @@ -44,6 +44,7 @@ #include <asm/machdep.h> #include <asm/prom.h> #include <asm/open_pic.h> +#include <asm/i8259.h> #include <asm/bootinfo.h> #include <asm/pci-bridge.h> #include <asm/mpc85xx.h> @@ -181,6 +182,7 @@ void __init mpc85xx_cds_init_IRQ(void) { bd_t *binfo = (bd_t *) __res; + int i; /* Determine the Physical Address of the OpenPIC regs */ phys_addr_t OpenPIC_PAddr = binfo->bi_immr_base + MPC85xx_OPENPIC_OFFSET; @@ -198,6 +200,15 @@ mpc85xx_cds_init_IRQ(void) */ openpic_init(MPC85xx_OPENPIC_IRQ_OFFSET); +#ifdef CONFIG_PCI + openpic_hookup_cascade(PIRQ0A, "82c59 cascade", i8259_irq); + + for (i = 0; i < NUM_8259_INTERRUPTS; i++) + irq_desc[i].handler = &i8259_pic; + + i8259_init(0); +#endif + #ifdef CONFIG_CPM2 /* Setup CPM2 PIC */ cpm2_init_IRQ(); @@ -231,7 +242,7 @@ mpc85xx_map_irq(struct pci_dev *dev, unsigned char idsel, unsigned char pin) * interrupt on slot */ { { 0, 1, 2, 3 }, /* 16 - PMC */ - { 3, 0, 0, 0 }, /* 17 P2P (Tsi320) */ + { 0, 1, 2, 3 }, /* 17 P2P (Tsi320) */ { 0, 1, 2, 3 }, /* 18 - Slot 1 */ { 1, 2, 3, 0 }, /* 19 - Slot 2 */ { 2, 3, 0, 1 }, /* 20 - Slot 3 */ @@ -280,13 +291,135 @@ mpc85xx_exclude_device(u_char bus, u_char devfn) return PCIBIOS_DEVICE_NOT_FOUND; #endif /* We explicitly do not go past the Tundra 320 Bridge */ - if (bus == 1) + if ((bus == 1) && (PCI_SLOT(devfn) == ARCADIA_2ND_BRIDGE_IDSEL)) return PCIBIOS_DEVICE_NOT_FOUND; if ((bus == 0) && (PCI_SLOT(devfn) == ARCADIA_2ND_BRIDGE_IDSEL)) return PCIBIOS_DEVICE_NOT_FOUND; else return PCIBIOS_SUCCESSFUL; } + +void __init +mpc85xx_cds_enable_via(struct pci_controller *hose) +{ + u32 pci_class; + u16 vid, did; + + early_read_config_dword(hose, 0, 0x88, PCI_CLASS_REVISION, &pci_class); + if ((pci_class >> 16) != PCI_CLASS_BRIDGE_PCI) + return; + + /* Configure P2P so that we can reach bus 1 */ + early_write_config_byte(hose, 0, 0x88, PCI_PRIMARY_BUS, 0); + early_write_config_byte(hose, 0, 0x88, PCI_SECONDARY_BUS, 1); + early_write_config_byte(hose, 0, 0x88, PCI_SUBORDINATE_BUS, 0xff); + + early_read_config_word(hose, 1, 0x10, PCI_VENDOR_ID, &vid); + early_read_config_word(hose, 1, 0x10, PCI_DEVICE_ID, &did); + + if ((vid != PCI_VENDOR_ID_VIA) || + (did != PCI_DEVICE_ID_VIA_82C686)) + return; + + /* Enable USB and IDE functions */ + early_write_config_byte(hose, 1, 0x10, 0x48, 0x08); +} + +void __init +mpc85xx_cds_fixup_via(struct pci_controller *hose) +{ + u32 pci_class; + u16 vid, did; + + early_read_config_dword(hose, 0, 0x88, PCI_CLASS_REVISION, &pci_class); + if ((pci_class >> 16) != PCI_CLASS_BRIDGE_PCI) + return; + + /* + * Force the backplane P2P bridge to have a window + * open from 0x00000000-0x00001fff in PCI I/O space. + * This allows legacy I/O (i8259, etc) on the VIA + * southbridge to be accessed. + */ + early_write_config_byte(hose, 0, 0x88, PCI_IO_BASE, 0x00); + early_write_config_word(hose, 0, 0x88, PCI_IO_BASE_UPPER16, 0x0000); + early_write_config_byte(hose, 0, 0x88, PCI_IO_LIMIT, 0x10); + early_write_config_word(hose, 0, 0x88, PCI_IO_LIMIT_UPPER16, 0x0000); + + early_read_config_word(hose, 1, 0x10, PCI_VENDOR_ID, &vid); + early_read_config_word(hose, 1, 0x10, PCI_DEVICE_ID, &did); + if ((vid != PCI_VENDOR_ID_VIA) || + (did != PCI_DEVICE_ID_VIA_82C686)) + return; + + /* + * Since the P2P window was forced to cover the fixed + * legacy I/O addresses, it is necessary to manually + * place the base addresses for the IDE and USB functions + * within this window. + */ + /* Function 1, IDE */ + early_write_config_dword(hose, 1, 0x11, PCI_BASE_ADDRESS_0, 0x1ff8); + early_write_config_dword(hose, 1, 0x11, PCI_BASE_ADDRESS_1, 0x1ff4); + early_write_config_dword(hose, 1, 0x11, PCI_BASE_ADDRESS_2, 0x1fe8); + early_write_config_dword(hose, 1, 0x11, PCI_BASE_ADDRESS_3, 0x1fe4); + early_write_config_dword(hose, 1, 0x11, PCI_BASE_ADDRESS_4, 0x1fd0); + + /* Function 2, USB ports 0-1 */ + early_write_config_dword(hose, 1, 0x12, PCI_BASE_ADDRESS_4, 0x1fa0); + + /* Function 3, USB ports 2-3 */ + early_write_config_dword(hose, 1, 0x13, PCI_BASE_ADDRESS_4, 0x1f80); + + /* Function 5, Power Management */ + early_write_config_dword(hose, 1, 0x15, PCI_BASE_ADDRESS_0, 0x1e00); + early_write_config_dword(hose, 1, 0x15, PCI_BASE_ADDRESS_1, 0x1dfc); + early_write_config_dword(hose, 1, 0x15, PCI_BASE_ADDRESS_2, 0x1df8); + + /* Function 6, AC97 Interface */ + early_write_config_dword(hose, 1, 0x16, PCI_BASE_ADDRESS_0, 0x1c00); +} + +void __init +mpc85xx_cds_pcibios_fixup(void) +{ + struct pci_dev *dev = NULL; + u_char c; + + if ((dev = pci_find_device(PCI_VENDOR_ID_VIA, + PCI_DEVICE_ID_VIA_82C586_1, NULL))) { + /* + * U-Boot does not set the enable bits + * for the IDE device. Force them on here. + */ + pci_read_config_byte(dev, 0x40, &c); + c |= 0x03; /* IDE: Chip Enable Bits */ + pci_write_config_byte(dev, 0x40, c); + + /* + * Since only primary interface works, force the + * IDE function to standard primary IDE interrupt + * w/ 8259 offset + */ + dev->irq = 14; + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq); + } + + /* + * Force legacy USB interrupt routing + */ + if ((dev = pci_find_device(PCI_VENDOR_ID_VIA, + PCI_DEVICE_ID_VIA_82C586_2, NULL))) { + dev->irq = 10; + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, 10); + } + + if ((dev = pci_find_device(PCI_VENDOR_ID_VIA, + PCI_DEVICE_ID_VIA_82C586_2, dev))) { + dev->irq = 11; + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, 11); + } +} #endif /* CONFIG_PCI */ TODC_ALLOC(); @@ -328,6 +461,9 @@ mpc85xx_cds_setup_arch(void) loops_per_jiffy = freq / HZ; #ifdef CONFIG_PCI + /* VIA IDE configuration */ + ppc_md.pcibios_fixup = mpc85xx_cds_pcibios_fixup; + /* setup PCI host bridges */ mpc85xx_setup_hose(); #endif @@ -459,6 +595,9 @@ platform_init(unsigned long r3, unsigned long r4, unsigned long r5, #if defined(CONFIG_SERIAL_8250) && defined(CONFIG_SERIAL_TEXT_DEBUG) ppc_md.progress = gen550_progress; #endif /* CONFIG_SERIAL_8250 && CONFIG_SERIAL_TEXT_DEBUG */ +#if defined(CONFIG_SERIAL_8250) && defined(CONFIG_KGDB) + ppc_md.early_serial_map = mpc85xx_early_serial_map; +#endif /* CONFIG_SERIAL_8250 && CONFIG_KGDB */ if (ppc_md.progress) ppc_md.progress("mpc85xx_cds_init(): exit", 0); diff --git a/arch/ppc/platforms/85xx/mpc85xx_cds_common.h b/arch/ppc/platforms/85xx/mpc85xx_cds_common.h index 7627d77..12b292c 100644 --- a/arch/ppc/platforms/85xx/mpc85xx_cds_common.h +++ b/arch/ppc/platforms/85xx/mpc85xx_cds_common.h @@ -77,4 +77,7 @@ #define MPC85XX_PCI2_IO_SIZE 0x01000000 +#define NR_8259_INTS 16 +#define CPM_IRQ_OFFSET NR_8259_INTS + #endif /* __MACH_MPC85XX_CDS_H__ */ diff --git a/arch/ppc/platforms/85xx/sbc8560.c b/arch/ppc/platforms/85xx/sbc8560.c index 9ab05e5..7b9e154 100644 --- a/arch/ppc/platforms/85xx/sbc8560.c +++ b/arch/ppc/platforms/85xx/sbc8560.c @@ -221,6 +221,9 @@ platform_init(unsigned long r3, unsigned long r4, unsigned long r5, #if defined(CONFIG_SERIAL_8250) && defined(CONFIG_SERIAL_TEXT_DEBUG) ppc_md.progress = gen550_progress; #endif /* CONFIG_SERIAL_8250 && CONFIG_SERIAL_TEXT_DEBUG */ +#if defined(CONFIG_SERIAL_8250) && defined(CONFIG_KGDB) + ppc_md.early_serial_map = sbc8560_early_serial_map; +#endif /* CONFIG_SERIAL_8250 && CONFIG_KGDB */ if (ppc_md.progress) ppc_md.progress("sbc8560_init(): exit", 0); diff --git a/arch/ppc/platforms/pmac_cpufreq.c b/arch/ppc/platforms/pmac_cpufreq.c index f7fb278..5fdd4f6 100644 --- a/arch/ppc/platforms/pmac_cpufreq.c +++ b/arch/ppc/platforms/pmac_cpufreq.c @@ -83,16 +83,13 @@ static u32 frequency_gpio; static u32 slew_done_gpio; static int no_schedule; static int has_cpu_l2lve; - - -#define PMAC_CPU_LOW_SPEED 1 -#define PMAC_CPU_HIGH_SPEED 0 +static int is_pmu_based; /* There are only two frequency states for each processor. Values * are in kHz for the time being. */ -#define CPUFREQ_HIGH PMAC_CPU_HIGH_SPEED -#define CPUFREQ_LOW PMAC_CPU_LOW_SPEED +#define CPUFREQ_HIGH 0 +#define CPUFREQ_LOW 1 static struct cpufreq_frequency_table pmac_cpu_freqs[] = { {CPUFREQ_HIGH, 0}, @@ -100,6 +97,11 @@ static struct cpufreq_frequency_table pmac_cpu_freqs[] = { {0, CPUFREQ_TABLE_END}, }; +static struct freq_attr* pmac_cpu_freqs_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + static inline void local_delay(unsigned long ms) { if (no_schedule) @@ -269,6 +271,8 @@ static int __pmac pmu_set_cpu_speed(int low_speed) #ifdef DEBUG_FREQ printk(KERN_DEBUG "HID1, before: %x\n", mfspr(SPRN_HID1)); #endif + pmu_suspend(); + /* Disable all interrupt sources on openpic */ pic_prio = openpic_get_priority(); openpic_set_priority(0xf); @@ -343,6 +347,8 @@ static int __pmac pmu_set_cpu_speed(int low_speed) debug_calc_bogomips(); #endif + pmu_resume(); + preempt_enable(); return 0; @@ -355,7 +361,7 @@ static int __pmac do_set_cpu_speed(int speed_mode, int notify) static unsigned long prev_l3cr; freqs.old = cur_freq; - freqs.new = (speed_mode == PMAC_CPU_HIGH_SPEED) ? hi_freq : low_freq; + freqs.new = (speed_mode == CPUFREQ_HIGH) ? hi_freq : low_freq; freqs.cpu = smp_processor_id(); if (freqs.old == freqs.new) @@ -363,7 +369,7 @@ static int __pmac do_set_cpu_speed(int speed_mode, int notify) if (notify) cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - if (speed_mode == PMAC_CPU_LOW_SPEED && + if (speed_mode == CPUFREQ_LOW && cpu_has_feature(CPU_FTR_L3CR)) { l3cr = _get_L3CR(); if (l3cr & L3CR_L3E) { @@ -371,8 +377,8 @@ static int __pmac do_set_cpu_speed(int speed_mode, int notify) _set_L3CR(0); } } - set_speed_proc(speed_mode == PMAC_CPU_LOW_SPEED); - if (speed_mode == PMAC_CPU_HIGH_SPEED && + set_speed_proc(speed_mode == CPUFREQ_LOW); + if (speed_mode == CPUFREQ_HIGH && cpu_has_feature(CPU_FTR_L3CR)) { l3cr = _get_L3CR(); if ((prev_l3cr & L3CR_L3E) && l3cr != prev_l3cr) @@ -380,7 +386,7 @@ static int __pmac do_set_cpu_speed(int speed_mode, int notify) } if (notify) cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - cur_freq = (speed_mode == PMAC_CPU_HIGH_SPEED) ? hi_freq : low_freq; + cur_freq = (speed_mode == CPUFREQ_HIGH) ? hi_freq : low_freq; return 0; } @@ -423,7 +429,8 @@ static int __pmac pmac_cpufreq_cpu_init(struct cpufreq_policy *policy) policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; policy->cur = cur_freq; - return cpufreq_frequency_table_cpuinfo(policy, &pmac_cpu_freqs[0]); + cpufreq_frequency_table_get_attr(pmac_cpu_freqs, policy->cpu); + return cpufreq_frequency_table_cpuinfo(policy, pmac_cpu_freqs); } static u32 __pmac read_gpio(struct device_node *np) @@ -456,8 +463,8 @@ static int __pmac pmac_cpufreq_suspend(struct cpufreq_policy *policy, u32 state) */ no_schedule = 1; sleep_freq = cur_freq; - if (cur_freq == low_freq) - do_set_cpu_speed(PMAC_CPU_HIGH_SPEED, 0); + if (cur_freq == low_freq && !is_pmu_based) + do_set_cpu_speed(CPUFREQ_HIGH, 0); return 0; } @@ -473,8 +480,8 @@ static int __pmac pmac_cpufreq_resume(struct cpufreq_policy *policy) * is that we force a switch to whatever it was, which is * probably high speed due to our suspend() routine */ - do_set_cpu_speed(sleep_freq == low_freq ? PMAC_CPU_LOW_SPEED - : PMAC_CPU_HIGH_SPEED, 0); + do_set_cpu_speed(sleep_freq == low_freq ? + CPUFREQ_LOW : CPUFREQ_HIGH, 0); no_schedule = 0; return 0; @@ -488,6 +495,7 @@ static struct cpufreq_driver pmac_cpufreq_driver = { .suspend = pmac_cpufreq_suspend, .resume = pmac_cpufreq_resume, .flags = CPUFREQ_PM_NO_WARN, + .attr = pmac_cpu_freqs_attr, .name = "powermac", .owner = THIS_MODULE, }; @@ -580,6 +588,7 @@ static int __pmac pmac_cpufreq_init_MacRISC3(struct device_node *cpunode) return 1; hi_freq = (*value) / 1000; set_speed_proc = pmu_set_cpu_speed; + is_pmu_based = 1; return 0; } @@ -684,6 +693,7 @@ static int __init pmac_cpufreq_setup(void) hi_freq = cur_freq; low_freq = 400000; set_speed_proc = pmu_set_cpu_speed; + is_pmu_based = 1; } /* Else check for TiPb 400 & 500 */ else if (machine_is_compatible("PowerBook3,2")) { @@ -695,6 +705,7 @@ static int __init pmac_cpufreq_setup(void) hi_freq = cur_freq; low_freq = 300000; set_speed_proc = pmu_set_cpu_speed; + is_pmu_based = 1; } /* Else check for 750FX */ else if (PVR_VER(mfspr(SPRN_PVR)) == 0x7000) diff --git a/arch/ppc/platforms/pq2ads.h b/arch/ppc/platforms/pq2ads.h index cf5e5dd..067d9a5 100644 --- a/arch/ppc/platforms/pq2ads.h +++ b/arch/ppc/platforms/pq2ads.h @@ -49,10 +49,10 @@ /* PCI interrupt controller */ #define PCI_INT_STAT_REG 0xF8200000 #define PCI_INT_MASK_REG 0xF8200004 -#define PIRQA (NR_SIU_INTS + 0) -#define PIRQB (NR_SIU_INTS + 1) -#define PIRQC (NR_SIU_INTS + 2) -#define PIRQD (NR_SIU_INTS + 3) +#define PIRQA (NR_CPM_INTS + 0) +#define PIRQB (NR_CPM_INTS + 1) +#define PIRQC (NR_CPM_INTS + 2) +#define PIRQD (NR_CPM_INTS + 3) /* * PCI memory map definitions for MPC8266ADS-PCI. @@ -68,28 +68,23 @@ * 0x00000000-0x1FFFFFFF 0x00000000-0x1FFFFFFF MPC8266 local memory */ -/* window for a PCI master to access MPC8266 memory */ -#define PCI_SLV_MEM_LOCAL 0x00000000 /* Local base */ -#define PCI_SLV_MEM_BUS 0x00000000 /* PCI base */ +/* All the other PCI memory map definitions reside at syslib/m82xx_pci.h + Here we should redefine what is unique for this board */ +#define M82xx_PCI_SLAVE_MEM_LOCAL 0x00000000 /* Local base */ +#define M82xx_PCI_SLAVE_MEM_BUS 0x00000000 /* PCI base */ +#define M82xx_PCI_SLAVE_MEM_SIZE 0x10000000 /* 256 Mb */ -/* window for the processor to access PCI memory with prefetching */ -#define PCI_MSTR_MEM_LOCAL 0x80000000 /* Local base */ -#define PCI_MSTR_MEM_BUS 0x80000000 /* PCI base */ -#define PCI_MSTR_MEM_SIZE 0x20000000 /* 512MB */ +#define M82xx_PCI_SLAVE_SEC_WND_SIZE ~(0x40000000 - 1U) /* 2 x 512Mb */ +#define M82xx_PCI_SLAVE_SEC_WND_BASE 0x80000000 /* PCI Memory base */ -/* window for the processor to access PCI memory without prefetching */ -#define PCI_MSTR_MEMIO_LOCAL 0xA0000000 /* Local base */ -#define PCI_MSTR_MEMIO_BUS 0xA0000000 /* PCI base */ -#define PCI_MSTR_MEMIO_SIZE 0x20000000 /* 512MB */ +#if defined(CONFIG_ADS8272) +#define PCI_INT_TO_SIU SIU_INT_IRQ2 +#elif defined(CONFIG_PQ2FADS) +#define PCI_INT_TO_SIU SIU_INT_IRQ6 +#else +#warning PCI Bridge will be without interrupts support +#endif -/* window for the processor to access PCI I/O */ -#define PCI_MSTR_IO_LOCAL 0xF4000000 /* Local base */ -#define PCI_MSTR_IO_BUS 0x00000000 /* PCI base */ -#define PCI_MSTR_IO_SIZE 0x04000000 /* 64MB */ - -#define _IO_BASE PCI_MSTR_IO_LOCAL -#define _ISA_MEM_BASE PCI_MSTR_MEMIO_LOCAL -#define PCI_DRAM_OFFSET PCI_SLV_MEM_BUS #endif /* CONFIG_PCI */ #endif /* __MACH_ADS8260_DEFS */ diff --git a/arch/ppc/syslib/Makefile b/arch/ppc/syslib/Makefile index dd418ea..96acf85 100644 --- a/arch/ppc/syslib/Makefile +++ b/arch/ppc/syslib/Makefile @@ -81,7 +81,7 @@ obj-$(CONFIG_SBC82xx) += todc_time.o obj-$(CONFIG_SPRUCE) += cpc700_pic.o indirect_pci.o pci_auto.o \ todc_time.o obj-$(CONFIG_8260) += m8260_setup.o -obj-$(CONFIG_PCI_8260) += m8260_pci.o indirect_pci.o +obj-$(CONFIG_PCI_8260) += m82xx_pci.o indirect_pci.o pci_auto.o obj-$(CONFIG_8260_PCI9) += m8260_pci_erratum9.o obj-$(CONFIG_CPM2) += cpm2_common.o cpm2_pic.o ifeq ($(CONFIG_PPC_GEN550),y) @@ -97,7 +97,7 @@ obj-$(CONFIG_MPC10X_OPENPIC) += open_pic.o obj-$(CONFIG_40x) += dcr.o obj-$(CONFIG_BOOKE) += dcr.o obj-$(CONFIG_85xx) += open_pic.o ppc85xx_common.o ppc85xx_setup.o \ - ppc_sys.o mpc85xx_sys.o \ + ppc_sys.o i8259.o mpc85xx_sys.o \ mpc85xx_devices.o ifeq ($(CONFIG_85xx),y) obj-$(CONFIG_PCI) += indirect_pci.o pci_auto.o diff --git a/arch/ppc/syslib/ipic.c b/arch/ppc/syslib/ipic.c index acb2cde..580ed65 100644 --- a/arch/ppc/syslib/ipic.c +++ b/arch/ppc/syslib/ipic.c @@ -479,7 +479,7 @@ void __init ipic_init(phys_addr_t phys_addr, temp = 0; for (i = 0 ; i < senses_count ; i++) { if ((senses[i] & IRQ_SENSE_MASK) == IRQ_SENSE_EDGE) { - temp |= 1 << (16 - i); + temp |= 1 << (15 - i); if (i != 0) irq_desc[i + irq_offset + MPC83xx_IRQ_EXT1 - 1].status = 0; else diff --git a/arch/ppc/syslib/m8260_pci.c b/arch/ppc/syslib/m8260_pci.c deleted file mode 100644 index 057cc3f..0000000 --- a/arch/ppc/syslib/m8260_pci.c +++ /dev/null @@ -1,193 +0,0 @@ -/* - * (C) Copyright 2003 - * Wolfgang Denk, DENX Software Engineering, wd@denx.de. - * - * (C) Copyright 2004 Red Hat, Inc. - * - * See file CREDITS for list of people who contributed to this - * project. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/pci.h> -#include <linux/slab.h> -#include <linux/delay.h> - -#include <asm/byteorder.h> -#include <asm/io.h> -#include <asm/irq.h> -#include <asm/uaccess.h> -#include <asm/machdep.h> -#include <asm/pci-bridge.h> -#include <asm/immap_cpm2.h> -#include <asm/mpc8260.h> - -#include "m8260_pci.h" - - -/* PCI bus configuration registers. - */ - -static void __init m8260_setup_pci(struct pci_controller *hose) -{ - volatile cpm2_map_t *immap = cpm2_immr; - unsigned long pocmr; - u16 tempShort; - -#ifndef CONFIG_ATC /* already done in U-Boot */ - /* - * Setting required to enable IRQ1-IRQ7 (SIUMCR [DPPC]), - * and local bus for PCI (SIUMCR [LBPC]). - */ - immap->im_siu_conf.siu_82xx.sc_siumcr = 0x00640000; -#endif - - /* Make PCI lowest priority */ - /* Each 4 bits is a device bus request and the MS 4bits - is highest priority */ - /* Bus 4bit value - --- ---------- - CPM high 0b0000 - CPM middle 0b0001 - CPM low 0b0010 - PCI reguest 0b0011 - Reserved 0b0100 - Reserved 0b0101 - Internal Core 0b0110 - External Master 1 0b0111 - External Master 2 0b1000 - External Master 3 0b1001 - The rest are reserved */ - immap->im_siu_conf.siu_82xx.sc_ppc_alrh = 0x61207893; - - /* Park bus on core while modifying PCI Bus accesses */ - immap->im_siu_conf.siu_82xx.sc_ppc_acr = 0x6; - - /* - * Set up master window that allows the CPU to access PCI space. This - * window is set up using the first SIU PCIBR registers. - */ - immap->im_memctl.memc_pcimsk0 = MPC826x_PCI_MASK; - immap->im_memctl.memc_pcibr0 = MPC826x_PCI_BASE | PCIBR_ENABLE; - - /* Disable machine check on no response or target abort */ - immap->im_pci.pci_emr = cpu_to_le32(0x1fe7); - /* Release PCI RST (by default the PCI RST signal is held low) */ - immap->im_pci.pci_gcr = cpu_to_le32(PCIGCR_PCI_BUS_EN); - - /* give it some time */ - mdelay(1); - - /* - * Set up master window that allows the CPU to access PCI Memory (prefetch) - * space. This window is set up using the first set of Outbound ATU registers. - */ - immap->im_pci.pci_potar0 = cpu_to_le32(MPC826x_PCI_LOWER_MEM >> 12); - immap->im_pci.pci_pobar0 = cpu_to_le32((MPC826x_PCI_LOWER_MEM - MPC826x_PCI_MEM_OFFSET) >> 12); - pocmr = ((MPC826x_PCI_UPPER_MEM - MPC826x_PCI_LOWER_MEM) >> 12) ^ 0xfffff; - immap->im_pci.pci_pocmr0 = cpu_to_le32(pocmr | POCMR_ENABLE | POCMR_PREFETCH_EN); - - /* - * Set up master window that allows the CPU to access PCI Memory (non-prefetch) - * space. This window is set up using the second set of Outbound ATU registers. - */ - immap->im_pci.pci_potar1 = cpu_to_le32(MPC826x_PCI_LOWER_MMIO >> 12); - immap->im_pci.pci_pobar1 = cpu_to_le32((MPC826x_PCI_LOWER_MMIO - MPC826x_PCI_MMIO_OFFSET) >> 12); - pocmr = ((MPC826x_PCI_UPPER_MMIO - MPC826x_PCI_LOWER_MMIO) >> 12) ^ 0xfffff; - immap->im_pci.pci_pocmr1 = cpu_to_le32(pocmr | POCMR_ENABLE); - - /* - * Set up master window that allows the CPU to access PCI IO space. This window - * is set up using the third set of Outbound ATU registers. - */ - immap->im_pci.pci_potar2 = cpu_to_le32(MPC826x_PCI_IO_BASE >> 12); - immap->im_pci.pci_pobar2 = cpu_to_le32(MPC826x_PCI_LOWER_IO >> 12); - pocmr = ((MPC826x_PCI_UPPER_IO - MPC826x_PCI_LOWER_IO) >> 12) ^ 0xfffff; - immap->im_pci.pci_pocmr2 = cpu_to_le32(pocmr | POCMR_ENABLE | POCMR_PCI_IO); - - /* - * Set up slave window that allows PCI masters to access MPC826x local memory. - * This window is set up using the first set of Inbound ATU registers - */ - - immap->im_pci.pci_pitar0 = cpu_to_le32(MPC826x_PCI_SLAVE_MEM_LOCAL >> 12); - immap->im_pci.pci_pibar0 = cpu_to_le32(MPC826x_PCI_SLAVE_MEM_BUS >> 12); - pocmr = ((MPC826x_PCI_SLAVE_MEM_SIZE-1) >> 12) ^ 0xfffff; - immap->im_pci.pci_picmr0 = cpu_to_le32(pocmr | PICMR_ENABLE | PICMR_PREFETCH_EN); - - /* See above for description - puts PCI request as highest priority */ - immap->im_siu_conf.siu_82xx.sc_ppc_alrh = 0x03124567; - - /* Park the bus on the PCI */ - immap->im_siu_conf.siu_82xx.sc_ppc_acr = PPC_ACR_BUS_PARK_PCI; - - /* Host mode - specify the bridge as a host-PCI bridge */ - early_write_config_word(hose, 0, 0, PCI_CLASS_DEVICE, PCI_CLASS_BRIDGE_HOST); - - /* Enable the host bridge to be a master on the PCI bus, and to act as a PCI memory target */ - early_read_config_word(hose, 0, 0, PCI_COMMAND, &tempShort); - early_write_config_word(hose, 0, 0, PCI_COMMAND, - tempShort | PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY); -} - -void __init m8260_find_bridges(void) -{ - extern int pci_assign_all_busses; - struct pci_controller * hose; - - pci_assign_all_busses = 1; - - hose = pcibios_alloc_controller(); - - if (!hose) - return; - - ppc_md.pci_swizzle = common_swizzle; - - hose->first_busno = 0; - hose->bus_offset = 0; - hose->last_busno = 0xff; - - setup_m8260_indirect_pci(hose, - (unsigned long)&cpm2_immr->im_pci.pci_cfg_addr, - (unsigned long)&cpm2_immr->im_pci.pci_cfg_data); - - m8260_setup_pci(hose); - hose->pci_mem_offset = MPC826x_PCI_MEM_OFFSET; - - hose->io_base_virt = ioremap(MPC826x_PCI_IO_BASE, - MPC826x_PCI_IO_SIZE); - isa_io_base = (unsigned long) hose->io_base_virt; - - /* setup resources */ - pci_init_resource(&hose->mem_resources[0], - MPC826x_PCI_LOWER_MEM, - MPC826x_PCI_UPPER_MEM, - IORESOURCE_MEM|IORESOURCE_PREFETCH, "PCI prefetchable memory"); - - pci_init_resource(&hose->mem_resources[1], - MPC826x_PCI_LOWER_MMIO, - MPC826x_PCI_UPPER_MMIO, - IORESOURCE_MEM, "PCI memory"); - - pci_init_resource(&hose->io_resource, - MPC826x_PCI_LOWER_IO, - MPC826x_PCI_UPPER_IO, - IORESOURCE_IO, "PCI I/O"); -} diff --git a/arch/ppc/syslib/m8260_pci.h b/arch/ppc/syslib/m8260_pci.h deleted file mode 100644 index d1352120..0000000 --- a/arch/ppc/syslib/m8260_pci.h +++ /dev/null @@ -1,76 +0,0 @@ - -#ifndef _PPC_KERNEL_M8260_PCI_H -#define _PPC_KERNEL_M8260_PCI_H - -#include <asm/m8260_pci.h> - -/* - * Local->PCI map (from CPU) controlled by - * MPC826x master window - * - * 0x80000000 - 0xBFFFFFFF Total CPU2PCI space PCIBR0 - * - * 0x80000000 - 0x9FFFFFFF PCI Mem with prefetch (Outbound ATU #1) - * 0xA0000000 - 0xAFFFFFFF PCI Mem w/o prefetch (Outbound ATU #2) - * 0xB0000000 - 0xB0FFFFFF 32-bit PCI IO (Outbound ATU #3) - * - * PCI->Local map (from PCI) - * MPC826x slave window controlled by - * - * 0x00000000 - 0x07FFFFFF MPC826x local memory (Inbound ATU #1) - */ - -/* - * Slave window that allows PCI masters to access MPC826x local memory. - * This window is set up using the first set of Inbound ATU registers - */ - -#ifndef MPC826x_PCI_SLAVE_MEM_LOCAL -#define MPC826x_PCI_SLAVE_MEM_LOCAL (((struct bd_info *)__res)->bi_memstart) -#define MPC826x_PCI_SLAVE_MEM_BUS (((struct bd_info *)__res)->bi_memstart) -#define MPC826x_PCI_SLAVE_MEM_SIZE (((struct bd_info *)__res)->bi_memsize) -#endif - -/* - * This is the window that allows the CPU to access PCI address space. - * It will be setup with the SIU PCIBR0 register. All three PCI master - * windows, which allow the CPU to access PCI prefetch, non prefetch, - * and IO space (see below), must all fit within this window. - */ -#ifndef MPC826x_PCI_BASE -#define MPC826x_PCI_BASE 0x80000000 -#define MPC826x_PCI_MASK 0xc0000000 -#endif - -#ifndef MPC826x_PCI_LOWER_MEM -#define MPC826x_PCI_LOWER_MEM 0x80000000 -#define MPC826x_PCI_UPPER_MEM 0x9fffffff -#define MPC826x_PCI_MEM_OFFSET 0x00000000 -#endif - -#ifndef MPC826x_PCI_LOWER_MMIO -#define MPC826x_PCI_LOWER_MMIO 0xa0000000 -#define MPC826x_PCI_UPPER_MMIO 0xafffffff -#define MPC826x_PCI_MMIO_OFFSET 0x00000000 -#endif - -#ifndef MPC826x_PCI_LOWER_IO -#define MPC826x_PCI_LOWER_IO 0x00000000 -#define MPC826x_PCI_UPPER_IO 0x00ffffff -#define MPC826x_PCI_IO_BASE 0xb0000000 -#define MPC826x_PCI_IO_SIZE 0x01000000 -#endif - -#ifndef _IO_BASE -#define _IO_BASE isa_io_base -#endif - -#ifdef CONFIG_8260_PCI9 -struct pci_controller; -extern void setup_m8260_indirect_pci(struct pci_controller* hose, - u32 cfg_addr, u32 cfg_data); -#else -#define setup_m8260_indirect_pci setup_indirect_pci -#endif - -#endif /* _PPC_KERNEL_M8260_PCI_H */ diff --git a/arch/ppc/syslib/m8260_pci_erratum9.c b/arch/ppc/syslib/m8260_pci_erratum9.c index 9c0582d..1dc7e4e 100644 --- a/arch/ppc/syslib/m8260_pci_erratum9.c +++ b/arch/ppc/syslib/m8260_pci_erratum9.c @@ -31,7 +31,7 @@ #include <asm/immap_cpm2.h> #include <asm/cpm2.h> -#include "m8260_pci.h" +#include "m82xx_pci.h" #ifdef CONFIG_8260_PCI9 /*#include <asm/mpc8260_pci9.h>*/ /* included in asm/io.h */ @@ -248,11 +248,11 @@ EXPORT_SYMBOL(idma_pci9_read_le); static inline int is_pci_mem(unsigned long addr) { - if (addr >= MPC826x_PCI_LOWER_MMIO && - addr <= MPC826x_PCI_UPPER_MMIO) + if (addr >= M82xx_PCI_LOWER_MMIO && + addr <= M82xx_PCI_UPPER_MMIO) return 1; - if (addr >= MPC826x_PCI_LOWER_MEM && - addr <= MPC826x_PCI_UPPER_MEM) + if (addr >= M82xx_PCI_LOWER_MEM && + addr <= M82xx_PCI_UPPER_MEM) return 1; return 0; } diff --git a/arch/ppc/syslib/m8260_setup.c b/arch/ppc/syslib/m8260_setup.c index 23ea3f6..fda75d7 100644 --- a/arch/ppc/syslib/m8260_setup.c +++ b/arch/ppc/syslib/m8260_setup.c @@ -34,7 +34,8 @@ unsigned char __res[sizeof(bd_t)]; extern void cpm2_reset(void); -extern void m8260_find_bridges(void); +extern void pq2_find_bridges(void); +extern void pq2pci_init_irq(void); extern void idma_pci9_init(void); /* Place-holder for board-specific init */ @@ -56,7 +57,7 @@ m8260_setup_arch(void) idma_pci9_init(); #endif #ifdef CONFIG_PCI_8260 - m8260_find_bridges(); + pq2_find_bridges(); #endif #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start) @@ -173,6 +174,12 @@ m8260_init_IRQ(void) * in case the boot rom changed something on us. */ cpm2_immr->im_intctl.ic_siprr = 0x05309770; + +#if defined(CONFIG_PCI) && (defined(CONFIG_ADS8272) || defined(CONFIG_PQ2FADS)) + /* Initialize stuff for the 82xx CPLD IC and install demux */ + pq2pci_init_irq(); +#endif + } /* diff --git a/arch/ppc/syslib/m82xx_pci.c b/arch/ppc/syslib/m82xx_pci.c new file mode 100644 index 0000000..5e7a7ed --- /dev/null +++ b/arch/ppc/syslib/m82xx_pci.c @@ -0,0 +1,383 @@ +/* + * + * (C) Copyright 2003 + * Wolfgang Denk, DENX Software Engineering, wd@denx.de. + * + * (C) Copyright 2004 Red Hat, Inc. + * + * 2005 (c) MontaVista Software, Inc. + * Vitaly Bordug <vbordug@ru.mvista.com> + * + * See file CREDITS for list of people who contributed to this + * project. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/slab.h> +#include <linux/delay.h> +#include <linux/irq.h> +#include <linux/interrupt.h> + +#include <asm/byteorder.h> +#include <asm/io.h> +#include <asm/irq.h> +#include <asm/uaccess.h> +#include <asm/machdep.h> +#include <asm/pci-bridge.h> +#include <asm/immap_cpm2.h> +#include <asm/mpc8260.h> +#include <asm/cpm2.h> + +#include "m82xx_pci.h" + +/* + * Interrupt routing + */ + +static inline int +pq2pci_map_irq(struct pci_dev *dev, unsigned char idsel, unsigned char pin) +{ + static char pci_irq_table[][4] = + /* + * PCI IDSEL/INTPIN->INTLINE + * A B C D + */ + { + { PIRQA, PIRQB, PIRQC, PIRQD }, /* IDSEL 22 - PCI slot 0 */ + { PIRQD, PIRQA, PIRQB, PIRQC }, /* IDSEL 23 - PCI slot 1 */ + { PIRQC, PIRQD, PIRQA, PIRQB }, /* IDSEL 24 - PCI slot 2 */ + }; + + const long min_idsel = 22, max_idsel = 24, irqs_per_slot = 4; + return PCI_IRQ_TABLE_LOOKUP; +} + +static void +pq2pci_mask_irq(unsigned int irq) +{ + int bit = irq - NR_CPM_INTS; + + *(volatile unsigned long *) PCI_INT_MASK_REG |= (1 << (31 - bit)); + return; +} + +static void +pq2pci_unmask_irq(unsigned int irq) +{ + int bit = irq - NR_CPM_INTS; + + *(volatile unsigned long *) PCI_INT_MASK_REG &= ~(1 << (31 - bit)); + return; +} + +static void +pq2pci_mask_and_ack(unsigned int irq) +{ + int bit = irq - NR_CPM_INTS; + + *(volatile unsigned long *) PCI_INT_MASK_REG |= (1 << (31 - bit)); + return; +} + +static void +pq2pci_end_irq(unsigned int irq) +{ + int bit = irq - NR_CPM_INTS; + + *(volatile unsigned long *) PCI_INT_MASK_REG &= ~(1 << (31 - bit)); + return; +} + +struct hw_interrupt_type pq2pci_ic = { + "PQ2 PCI", + NULL, + NULL, + pq2pci_unmask_irq, + pq2pci_mask_irq, + pq2pci_mask_and_ack, + pq2pci_end_irq, + 0 +}; + +static irqreturn_t +pq2pci_irq_demux(int irq, void *dev_id, struct pt_regs *regs) +{ + unsigned long stat, mask, pend; + int bit; + + for(;;) { + stat = *(volatile unsigned long *) PCI_INT_STAT_REG; + mask = *(volatile unsigned long *) PCI_INT_MASK_REG; + pend = stat & ~mask & 0xf0000000; + if (!pend) + break; + for (bit = 0; pend != 0; ++bit, pend <<= 1) { + if (pend & 0x80000000) + __do_IRQ(NR_CPM_INTS + bit, regs); + } + } + + return IRQ_HANDLED; +} + +static struct irqaction pq2pci_irqaction = { + .handler = pq2pci_irq_demux, + .flags = SA_INTERRUPT, + .mask = CPU_MASK_NONE, + .name = "PQ2 PCI cascade", +}; + + +void +pq2pci_init_irq(void) +{ + int irq; + volatile cpm2_map_t *immap = cpm2_immr; +#if defined CONFIG_ADS8272 + /* configure chip select for PCI interrupt controller */ + immap->im_memctl.memc_br3 = PCI_INT_STAT_REG | 0x00001801; + immap->im_memctl.memc_or3 = 0xffff8010; +#elif defined CONFIG_PQ2FADS + immap->im_memctl.memc_br8 = PCI_INT_STAT_REG | 0x00001801; + immap->im_memctl.memc_or8 = 0xffff8010; +#endif + for (irq = NR_CPM_INTS; irq < NR_CPM_INTS + 4; irq++) + irq_desc[irq].handler = &pq2pci_ic; + + /* make PCI IRQ level sensitive */ + immap->im_intctl.ic_siexr &= + ~(1 << (14 - (PCI_INT_TO_SIU - SIU_INT_IRQ1))); + + /* mask all PCI interrupts */ + *(volatile unsigned long *) PCI_INT_MASK_REG |= 0xfff00000; + + /* install the demultiplexer for the PCI cascade interrupt */ + setup_irq(PCI_INT_TO_SIU, &pq2pci_irqaction); + return; +} + +static int +pq2pci_exclude_device(u_char bus, u_char devfn) +{ + return PCIBIOS_SUCCESSFUL; +} + +/* PCI bus configuration registers. + */ +static void +pq2ads_setup_pci(struct pci_controller *hose) +{ + __u32 val; + volatile cpm2_map_t *immap = cpm2_immr; + bd_t* binfo = (bd_t*) __res; + u32 sccr = immap->im_clkrst.car_sccr; + uint pci_div,freq,time; + /* PCI int lowest prio */ + /* Each 4 bits is a device bus request and the MS 4bits + is highest priority */ + /* Bus 4bit value + --- ---------- + CPM high 0b0000 + CPM middle 0b0001 + CPM low 0b0010 + PCI reguest 0b0011 + Reserved 0b0100 + Reserved 0b0101 + Internal Core 0b0110 + External Master 1 0b0111 + External Master 2 0b1000 + External Master 3 0b1001 + The rest are reserved + */ + immap->im_siu_conf.siu_82xx.sc_ppc_alrh = 0x61207893; + /* park bus on core */ + immap->im_siu_conf.siu_82xx.sc_ppc_acr = PPC_ACR_BUS_PARK_CORE; + /* + * Set up master windows that allow the CPU to access PCI space. These + * windows are set up using the two SIU PCIBR registers. + */ + + immap->im_memctl.memc_pcimsk0 = M82xx_PCI_PRIM_WND_SIZE; + immap->im_memctl.memc_pcibr0 = M82xx_PCI_PRIM_WND_BASE | PCIBR_ENABLE; + +#ifdef M82xx_PCI_SEC_WND_SIZE + immap->im_memctl.memc_pcimsk1 = M82xx_PCI_SEC_WND_SIZE; + immap->im_memctl.memc_pcibr1 = M82xx_PCI_SEC_WND_BASE | PCIBR_ENABLE; +#endif + +#if defined CONFIG_ADS8272 + immap->im_siu_conf.siu_82xx.sc_siumcr = + (immap->im_siu_conf.siu_82xx.sc_siumcr & + ~(SIUMCR_BBD | SIUMCR_ESE | SIUMCR_PBSE | + SIUMCR_CDIS | SIUMCR_DPPC11 | SIUMCR_L2CPC11 | + SIUMCR_LBPC11 | SIUMCR_APPC11 | + SIUMCR_CS10PC11 | SIUMCR_BCTLC11 | SIUMCR_MMR11)) | + SIUMCR_DPPC11 | SIUMCR_L2CPC01 | SIUMCR_LBPC00 | + SIUMCR_APPC10 | SIUMCR_CS10PC00 | + SIUMCR_BCTLC00 | SIUMCR_MMR11 ; + +#elif defined CONFIG_PQ2FADS + /* + * Setting required to enable IRQ1-IRQ7 (SIUMCR [DPPC]), + * and local bus for PCI (SIUMCR [LBPC]). + */ + immap->im_siu_conf.siu_82xx.sc_siumcr = (immap->im_siu_conf.sc_siumcr & + ~(SIUMCR_L2PC11 | SIUMCR_LBPC11 | SIUMCR_CS10PC11 | SIUMCR_APPC11) | + SIUMCR_BBD | SIUMCR_LBPC01 | SIUMCR_DPPC11 | SIUMCR_APPC10; +#endif + /* Enable PCI */ + immap->im_pci.pci_gcr = cpu_to_le32(PCIGCR_PCI_BUS_EN); + + pci_div = ( (sccr & SCCR_PCI_MODCK) ? 2 : 1) * + ( ( (sccr & SCCR_PCIDF_MSK) >> SCCR_PCIDF_SHIFT) + 1); + freq = (uint)((2*binfo->bi_cpmfreq)/(pci_div)); + time = (int)666666/freq; + /* due to PCI Local Bus spec, some devices needs to wait such a long + time after RST deassertion. More specifically, 0.508s for 66MHz & twice more for 33 */ + printk("%s: The PCI bus is %d Mhz.\nWaiting %s after deasserting RST...\n",__FILE__,freq, + (time==1) ? "0.5 seconds":"1 second" ); + + { + int i; + for(i=0;i<(500*time);i++) + udelay(1000); + } + + /* setup ATU registers */ + immap->im_pci.pci_pocmr0 = cpu_to_le32(POCMR_ENABLE | POCMR_PCI_IO | + ((~(M82xx_PCI_IO_SIZE - 1U)) >> POTA_ADDR_SHIFT)); + immap->im_pci.pci_potar0 = cpu_to_le32(M82xx_PCI_LOWER_IO >> POTA_ADDR_SHIFT); + immap->im_pci.pci_pobar0 = cpu_to_le32(M82xx_PCI_IO_BASE >> POTA_ADDR_SHIFT); + + /* Set-up non-prefetchable window */ + immap->im_pci.pci_pocmr1 = cpu_to_le32(POCMR_ENABLE | ((~(M82xx_PCI_MMIO_SIZE-1U)) >> POTA_ADDR_SHIFT)); + immap->im_pci.pci_potar1 = cpu_to_le32(M82xx_PCI_LOWER_MMIO >> POTA_ADDR_SHIFT); + immap->im_pci.pci_pobar1 = cpu_to_le32((M82xx_PCI_LOWER_MMIO - M82xx_PCI_MMIO_OFFSET) >> POTA_ADDR_SHIFT); + + /* Set-up prefetchable window */ + immap->im_pci.pci_pocmr2 = cpu_to_le32(POCMR_ENABLE |POCMR_PREFETCH_EN | + (~(M82xx_PCI_MEM_SIZE-1U) >> POTA_ADDR_SHIFT)); + immap->im_pci.pci_potar2 = cpu_to_le32(M82xx_PCI_LOWER_MEM >> POTA_ADDR_SHIFT); + immap->im_pci.pci_pobar2 = cpu_to_le32((M82xx_PCI_LOWER_MEM - M82xx_PCI_MEM_OFFSET) >> POTA_ADDR_SHIFT); + + /* Inbound transactions from PCI memory space */ + immap->im_pci.pci_picmr0 = cpu_to_le32(PICMR_ENABLE | PICMR_PREFETCH_EN | + ((~(M82xx_PCI_SLAVE_MEM_SIZE-1U)) >> PITA_ADDR_SHIFT)); + immap->im_pci.pci_pibar0 = cpu_to_le32(M82xx_PCI_SLAVE_MEM_BUS >> PITA_ADDR_SHIFT); + immap->im_pci.pci_pitar0 = cpu_to_le32(M82xx_PCI_SLAVE_MEM_LOCAL>> PITA_ADDR_SHIFT); + +#if defined CONFIG_ADS8272 + /* PCI int highest prio */ + immap->im_siu_conf.siu_82xx.sc_ppc_alrh = 0x01236745; +#elif defined CONFIG_PQ2FADS + immap->im_siu_conf.siu_82xx.sc_ppc_alrh = 0x03124567; +#endif + /* park bus on PCI */ + immap->im_siu_conf.siu_82xx.sc_ppc_acr = PPC_ACR_BUS_PARK_PCI; + + /* Enable bus mastering and inbound memory transactions */ + early_read_config_dword(hose, hose->first_busno, 0, PCI_COMMAND, &val); + val &= 0xffff0000; + val |= PCI_COMMAND_MEMORY|PCI_COMMAND_MASTER; + early_write_config_dword(hose, hose->first_busno, 0, PCI_COMMAND, val); + +} + +void __init pq2_find_bridges(void) +{ + extern int pci_assign_all_busses; + struct pci_controller * hose; + int host_bridge; + + pci_assign_all_busses = 1; + + hose = pcibios_alloc_controller(); + + if (!hose) + return; + + ppc_md.pci_swizzle = common_swizzle; + + hose->first_busno = 0; + hose->bus_offset = 0; + hose->last_busno = 0xff; + +#ifdef CONFIG_ADS8272 + hose->set_cfg_type = 1; +#endif + + setup_m8260_indirect_pci(hose, + (unsigned long)&cpm2_immr->im_pci.pci_cfg_addr, + (unsigned long)&cpm2_immr->im_pci.pci_cfg_data); + + /* Make sure it is a supported bridge */ + early_read_config_dword(hose, + 0, + PCI_DEVFN(0,0), + PCI_VENDOR_ID, + &host_bridge); + switch (host_bridge) { + case PCI_DEVICE_ID_MPC8265: + break; + case PCI_DEVICE_ID_MPC8272: + break; + default: + printk("Attempting to use unrecognized host bridge ID" + " 0x%08x.\n", host_bridge); + break; + } + + pq2ads_setup_pci(hose); + + hose->io_space.start = M82xx_PCI_LOWER_IO; + hose->io_space.end = M82xx_PCI_UPPER_IO; + hose->mem_space.start = M82xx_PCI_LOWER_MEM; + hose->mem_space.end = M82xx_PCI_UPPER_MMIO; + hose->pci_mem_offset = M82xx_PCI_MEM_OFFSET; + + isa_io_base = + (unsigned long) ioremap(M82xx_PCI_IO_BASE, + M82xx_PCI_IO_SIZE); + hose->io_base_virt = (void *) isa_io_base; + + /* setup resources */ + pci_init_resource(&hose->mem_resources[0], + M82xx_PCI_LOWER_MEM, + M82xx_PCI_UPPER_MEM, + IORESOURCE_MEM|IORESOURCE_PREFETCH, "PCI prefetchable memory"); + + pci_init_resource(&hose->mem_resources[1], + M82xx_PCI_LOWER_MMIO, + M82xx_PCI_UPPER_MMIO, + IORESOURCE_MEM, "PCI memory"); + + pci_init_resource(&hose->io_resource, + M82xx_PCI_LOWER_IO, + M82xx_PCI_UPPER_IO, + IORESOURCE_IO | 1, "PCI I/O"); + + ppc_md.pci_exclude_device = pq2pci_exclude_device; + hose->last_busno = pciauto_bus_scan(hose, hose->first_busno); + + ppc_md.pci_map_irq = pq2pci_map_irq; + ppc_md.pcibios_fixup = NULL; + ppc_md.pcibios_fixup_bus = NULL; + +} diff --git a/arch/ppc/syslib/m82xx_pci.h b/arch/ppc/syslib/m82xx_pci.h new file mode 100644 index 0000000..924f73f --- /dev/null +++ b/arch/ppc/syslib/m82xx_pci.h @@ -0,0 +1,92 @@ + +#ifndef _PPC_KERNEL_M82XX_PCI_H +#define _PPC_KERNEL_M82XX_PCI_H + +#include <asm/m8260_pci.h> +/* + * Local->PCI map (from CPU) controlled by + * MPC826x master window + * + * 0xF6000000 - 0xF7FFFFFF IO space + * 0x80000000 - 0xBFFFFFFF CPU2PCI memory space PCIBR0 + * + * 0x80000000 - 0x9FFFFFFF PCI Mem with prefetch (Outbound ATU #1) + * 0xA0000000 - 0xBFFFFFFF PCI Mem w/o prefetch (Outbound ATU #2) + * 0xF6000000 - 0xF7FFFFFF 32-bit PCI IO (Outbound ATU #3) + * + * PCI->Local map (from PCI) + * MPC826x slave window controlled by + * + * 0x00000000 - 0x07FFFFFF MPC826x local memory (Inbound ATU #1) + */ + +/* + * Slave window that allows PCI masters to access MPC826x local memory. + * This window is set up using the first set of Inbound ATU registers + */ + +#ifndef M82xx_PCI_SLAVE_MEM_LOCAL +#define M82xx_PCI_SLAVE_MEM_LOCAL (((struct bd_info *)__res)->bi_memstart) +#define M82xx_PCI_SLAVE_MEM_BUS (((struct bd_info *)__res)->bi_memstart) +#define M82xx_PCI_SLAVE_MEM_SIZE (((struct bd_info *)__res)->bi_memsize) +#endif + +/* + * This is the window that allows the CPU to access PCI address space. + * It will be setup with the SIU PCIBR0 register. All three PCI master + * windows, which allow the CPU to access PCI prefetch, non prefetch, + * and IO space (see below), must all fit within this window. + */ + +#ifndef M82xx_PCI_LOWER_MEM +#define M82xx_PCI_LOWER_MEM 0x80000000 +#define M82xx_PCI_UPPER_MEM 0x9fffffff +#define M82xx_PCI_MEM_OFFSET 0x00000000 +#define M82xx_PCI_MEM_SIZE 0x20000000 +#endif + +#ifndef M82xx_PCI_LOWER_MMIO +#define M82xx_PCI_LOWER_MMIO 0xa0000000 +#define M82xx_PCI_UPPER_MMIO 0xafffffff +#define M82xx_PCI_MMIO_OFFSET 0x00000000 +#define M82xx_PCI_MMIO_SIZE 0x20000000 +#endif + +#ifndef M82xx_PCI_LOWER_IO +#define M82xx_PCI_LOWER_IO 0x00000000 +#define M82xx_PCI_UPPER_IO 0x01ffffff +#define M82xx_PCI_IO_BASE 0xf6000000 +#define M82xx_PCI_IO_SIZE 0x02000000 +#endif + +#ifndef M82xx_PCI_PRIM_WND_SIZE +#define M82xx_PCI_PRIM_WND_SIZE ~(M82xx_PCI_IO_SIZE - 1U) +#define M82xx_PCI_PRIM_WND_BASE (M82xx_PCI_IO_BASE) +#endif + +#ifndef M82xx_PCI_SEC_WND_SIZE +#define M82xx_PCI_SEC_WND_SIZE ~(M82xx_PCI_MEM_SIZE + M82xx_PCI_MMIO_SIZE - 1U) +#define M82xx_PCI_SEC_WND_BASE (M82xx_PCI_LOWER_MEM) +#endif + +#ifndef POTA_ADDR_SHIFT +#define POTA_ADDR_SHIFT 12 +#endif + +#ifndef PITA_ADDR_SHIFT +#define PITA_ADDR_SHIFT 12 +#endif + +#ifndef _IO_BASE +#define _IO_BASE isa_io_base +#endif + +#ifdef CONFIG_8260_PCI9 +struct pci_controller; +extern void setup_m8260_indirect_pci(struct pci_controller* hose, + u32 cfg_addr, u32 cfg_data); +#else +#define setup_m8260_indirect_pci setup_indirect_pci +#endif + +#endif /* _PPC_KERNEL_M8260_PCI_H */ diff --git a/arch/ppc/syslib/mpc83xx_devices.c b/arch/ppc/syslib/mpc83xx_devices.c index 5c1a919e..75c8e98 100644 --- a/arch/ppc/syslib/mpc83xx_devices.c +++ b/arch/ppc/syslib/mpc83xx_devices.c @@ -61,6 +61,7 @@ static struct plat_serial8250_port serial_platform_data[] = { .iotype = UPIO_MEM, .flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST, }, + { }, }; struct platform_device ppc_sys_platform_devices[] = { diff --git a/arch/ppc/syslib/mpc85xx_devices.c b/arch/ppc/syslib/mpc85xx_devices.c index a231795..1e658ef 100644 --- a/arch/ppc/syslib/mpc85xx_devices.c +++ b/arch/ppc/syslib/mpc85xx_devices.c @@ -61,6 +61,7 @@ static struct plat_serial8250_port serial_platform_data[] = { .iotype = UPIO_MEM, .flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ, }, + { }, }; struct platform_device ppc_sys_platform_devices[] = { diff --git a/arch/ppc/syslib/open_pic.c b/arch/ppc/syslib/open_pic.c index 7619e16..000ba47 100644 --- a/arch/ppc/syslib/open_pic.c +++ b/arch/ppc/syslib/open_pic.c @@ -275,7 +275,7 @@ static void __init openpic_enable_sie(void) } #endif -#if defined(CONFIG_EPIC_SERIAL_MODE) || defined(CONFIG_PM) +#if defined(CONFIG_EPIC_SERIAL_MODE) static void openpic_reset(void) { openpic_setfield(&OpenPIC->Global.Global_Configuration0, @@ -557,12 +557,10 @@ static void __init openpic_initipi(u_int ipi, u_int pri, u_int vec) */ void openpic_cause_IPI(u_int ipi, cpumask_t cpumask) { - cpumask_t phys; DECL_THIS_CPU; CHECK_THIS_CPU; check_arg_ipi(ipi); - phys = physmask(cpumask); openpic_write(&OpenPIC->THIS_CPU.IPI_Dispatch(ipi), cpus_addr(physmask(cpumask))[0]); } @@ -995,8 +993,6 @@ int openpic_resume(struct sys_device *sysdev) return 0; } - openpic_reset(); - /* OpenPIC sometimes seem to need some time to be fully back up... */ do { openpic_set_spurious(OPENPIC_VEC_SPURIOUS); diff --git a/arch/ppc/syslib/ppc83xx_setup.c b/arch/ppc/syslib/ppc83xx_setup.c index c28f9d6..843cf88 100644 --- a/arch/ppc/syslib/ppc83xx_setup.c +++ b/arch/ppc/syslib/ppc83xx_setup.c @@ -29,6 +29,7 @@ #include <asm/mmu.h> #include <asm/ppc_sys.h> #include <asm/kgdb.h> +#include <asm/delay.h> #include <syslib/ppc83xx_setup.h> @@ -117,7 +118,34 @@ mpc83xx_early_serial_map(void) void mpc83xx_restart(char *cmd) { + volatile unsigned char __iomem *reg; + unsigned char tmp; + + reg = ioremap(BCSR_PHYS_ADDR, BCSR_SIZE); + local_irq_disable(); + + /* + * Unlock the BCSR bits so a PRST will update the contents. + * Otherwise the reset asserts but doesn't clear. + */ + tmp = in_8(reg + BCSR_MISC_REG3_OFF); + tmp |= BCSR_MISC_REG3_CNFLOCK; /* low true, high false */ + out_8(reg + BCSR_MISC_REG3_OFF, tmp); + + /* + * Trigger a reset via a low->high transition of the + * PORESET bit. + */ + tmp = in_8(reg + BCSR_MISC_REG2_OFF); + tmp &= ~BCSR_MISC_REG2_PORESET; + out_8(reg + BCSR_MISC_REG2_OFF, tmp); + + udelay(1); + + tmp |= BCSR_MISC_REG2_PORESET; + out_8(reg + BCSR_MISC_REG2_OFF, tmp); + for(;;); } diff --git a/arch/ppc/syslib/ppc85xx_setup.c b/arch/ppc/syslib/ppc85xx_setup.c index 152c3ef..f3277f4 100644 --- a/arch/ppc/syslib/ppc85xx_setup.c +++ b/arch/ppc/syslib/ppc85xx_setup.c @@ -132,6 +132,12 @@ mpc85xx_halt(void) } #ifdef CONFIG_PCI + +#if defined(CONFIG_MPC8555_CDS) +extern void mpc85xx_cds_enable_via(struct pci_controller *hose); +extern void mpc85xx_cds_fixup_via(struct pci_controller *hose); +#endif + static void __init mpc85xx_setup_pci1(struct pci_controller *hose) { @@ -302,8 +308,18 @@ mpc85xx_setup_hose(void) ppc_md.pci_exclude_device = mpc85xx_exclude_device; +#if defined(CONFIG_MPC8555_CDS) + /* Pre pciauto_bus_scan VIA init */ + mpc85xx_cds_enable_via(hose_a); +#endif + hose_a->last_busno = pciauto_bus_scan(hose_a, hose_a->first_busno); +#if defined(CONFIG_MPC8555_CDS) + /* Post pciauto_bus_scan VIA fixup */ + mpc85xx_cds_fixup_via(hose_a); +#endif + #ifdef CONFIG_85xx_PCI2 hose_b = pcibios_alloc_controller(); diff --git a/arch/ppc/syslib/prom_init.c b/arch/ppc/syslib/prom_init.c index 2cee871..7f15136 100644 --- a/arch/ppc/syslib/prom_init.c +++ b/arch/ppc/syslib/prom_init.c @@ -626,8 +626,18 @@ inspect_node(phandle node, struct device_node *dad, l = call_prom("package-to-path", 3, 1, node, mem_start, mem_end - mem_start); if (l >= 0) { + char *p, *ep; + np->full_name = PTRUNRELOC((char *) mem_start); *(char *)(mem_start + l) = 0; + /* Fixup an Apple bug where they have bogus \0 chars in the + * middle of the path in some properties + */ + for (p = (char *)mem_start, ep = p + l; p < ep; p++) + if ((*p) == '\0') { + memmove(p, p+1, ep - p); + ep--; + } mem_start = ALIGNUL(mem_start + l + 1); } diff --git a/arch/ppc64/Kconfig b/arch/ppc64/Kconfig index ef1f05e..5cb3438 100644 --- a/arch/ppc64/Kconfig +++ b/arch/ppc64/Kconfig @@ -40,6 +40,10 @@ config COMPAT bool default y +config SCHED_NO_NO_OMIT_FRAME_POINTER + bool + default y + # We optimistically allocate largepages from the VM, so make the limit # large enough (16MB). This badly named config option is actually # max order + 1 @@ -258,6 +262,7 @@ config PPC_RTAS config RTAS_PROC bool "Proc interface to RTAS" depends on PPC_RTAS + default y config RTAS_FLASH tristate "Firmware flash interface" @@ -293,6 +298,9 @@ config SECCOMP endmenu +config ISA_DMA_API + bool + default y menu "General setup" diff --git a/arch/ppc64/Kconfig.debug b/arch/ppc64/Kconfig.debug index e341a12..46b1ce5 100644 --- a/arch/ppc64/Kconfig.debug +++ b/arch/ppc64/Kconfig.debug @@ -5,6 +5,9 @@ source "lib/Kconfig.debug" config DEBUG_STACKOVERFLOW bool "Check for stack overflows" depends on DEBUG_KERNEL + help + This option will cause messages to be printed if free stack space + drops below a certain limit. config KPROBES bool "Kprobes" diff --git a/arch/ppc64/boot/main.c b/arch/ppc64/boot/main.c index b0fa86a..da12ea2 100644 --- a/arch/ppc64/boot/main.c +++ b/arch/ppc64/boot/main.c @@ -14,7 +14,6 @@ #include <linux/string.h> #include <asm/processor.h> #include <asm/page.h> -#include <asm/bootinfo.h> extern void *finddevice(const char *); extern int getprop(void *, const char *, void *, int); diff --git a/arch/ppc64/boot/prom.c b/arch/ppc64/boot/prom.c index 7b607d1..d5218b1 100644 --- a/arch/ppc64/boot/prom.c +++ b/arch/ppc64/boot/prom.c @@ -11,6 +11,23 @@ #include <linux/string.h> #include <linux/ctype.h> +extern __u32 __div64_32(unsigned long long *dividend, __u32 divisor); + +/* The unnecessary pointer compare is there + * to check for type safety (n must be 64bit) + */ +# define do_div(n,base) ({ \ + __u32 __base = (base); \ + __u32 __rem; \ + (void)(((typeof((n)) *)0) == ((unsigned long long *)0)); \ + if (((n) >> 32) == 0) { \ + __rem = (__u32)(n) % __base; \ + (n) = (__u32)(n) / __base; \ + } else \ + __rem = __div64_32(&(n), __base); \ + __rem; \ + }) + int (*prom)(void *); void *chosen_handle; @@ -352,7 +369,7 @@ static int skip_atoi(const char **s) #define SPECIAL 32 /* 0x */ #define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */ -static char * number(char * str, long num, int base, int size, int precision, int type) +static char * number(char * str, unsigned long long num, int base, int size, int precision, int type) { char c,sign,tmp[66]; const char *digits="0123456789abcdefghijklmnopqrstuvwxyz"; @@ -367,9 +384,9 @@ static char * number(char * str, long num, int base, int size, int precision, in c = (type & ZEROPAD) ? '0' : ' '; sign = 0; if (type & SIGN) { - if (num < 0) { + if ((signed long long)num < 0) { sign = '-'; - num = -num; + num = - (signed long long)num; size--; } else if (type & PLUS) { sign = '+'; @@ -389,8 +406,7 @@ static char * number(char * str, long num, int base, int size, int precision, in if (num == 0) tmp[i++]='0'; else while (num != 0) { - tmp[i++] = digits[num % base]; - num /= base; + tmp[i++] = digits[do_div(num, base)]; } if (i > precision) precision = i; @@ -426,7 +442,7 @@ int sprintf(char * buf, const char *fmt, ...); int vsprintf(char *buf, const char *fmt, va_list args) { int len; - unsigned long num; + unsigned long long num; int i, base; char * str; const char *s; diff --git a/arch/ppc64/boot/start.c b/arch/ppc64/boot/start.c deleted file mode 100644 index ea247e7..0000000 --- a/arch/ppc64/boot/start.c +++ /dev/null @@ -1,654 +0,0 @@ -/* - * Copyright (C) Paul Mackerras 1997. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include <stdarg.h> -#include <linux/types.h> -#include <linux/string.h> -#include <linux/ctype.h> - -#include <asm/div64.h> - -int (*prom)(void *); - -void *chosen_handle; -void *stdin; -void *stdout; -void *stderr; - -void exit(void); -void *finddevice(const char *name); -int getprop(void *phandle, const char *name, void *buf, int buflen); -void chrpboot(int a1, int a2, void *prom); /* in main.c */ - -void printk(char *fmt, ...); - -void -start(int a1, int a2, void *promptr) -{ - prom = (int (*)(void *)) promptr; - chosen_handle = finddevice("/chosen"); - if (chosen_handle == (void *) -1) - exit(); - if (getprop(chosen_handle, "stdout", &stdout, sizeof(stdout)) != 4) - exit(); - stderr = stdout; - if (getprop(chosen_handle, "stdin", &stdin, sizeof(stdin)) != 4) - exit(); - - chrpboot(a1, a2, promptr); - for (;;) - exit(); -} - -int -write(void *handle, void *ptr, int nb) -{ - struct prom_args { - char *service; - int nargs; - int nret; - void *ihandle; - void *addr; - int len; - int actual; - } args; - - args.service = "write"; - args.nargs = 3; - args.nret = 1; - args.ihandle = handle; - args.addr = ptr; - args.len = nb; - args.actual = -1; - (*prom)(&args); - return args.actual; -} - -int -read(void *handle, void *ptr, int nb) -{ - struct prom_args { - char *service; - int nargs; - int nret; - void *ihandle; - void *addr; - int len; - int actual; - } args; - - args.service = "read"; - args.nargs = 3; - args.nret = 1; - args.ihandle = handle; - args.addr = ptr; - args.len = nb; - args.actual = -1; - (*prom)(&args); - return args.actual; -} - -void -exit() -{ - struct prom_args { - char *service; - } args; - - for (;;) { - args.service = "exit"; - (*prom)(&args); - } -} - -void -pause(void) -{ - struct prom_args { - char *service; - } args; - - args.service = "enter"; - (*prom)(&args); -} - -void * -finddevice(const char *name) -{ - struct prom_args { - char *service; - int nargs; - int nret; - const char *devspec; - void *phandle; - } args; - - args.service = "finddevice"; - args.nargs = 1; - args.nret = 1; - args.devspec = name; - args.phandle = (void *) -1; - (*prom)(&args); - return args.phandle; -} - -void * -claim(unsigned long virt, unsigned long size, unsigned long align) -{ - struct prom_args { - char *service; - int nargs; - int nret; - unsigned int virt; - unsigned int size; - unsigned int align; - void *ret; - } args; - - args.service = "claim"; - args.nargs = 3; - args.nret = 1; - args.virt = virt; - args.size = size; - args.align = align; - (*prom)(&args); - return args.ret; -} - -int -getprop(void *phandle, const char *name, void *buf, int buflen) -{ - struct prom_args { - char *service; - int nargs; - int nret; - void *phandle; - const char *name; - void *buf; - int buflen; - int size; - } args; - - args.service = "getprop"; - args.nargs = 4; - args.nret = 1; - args.phandle = phandle; - args.name = name; - args.buf = buf; - args.buflen = buflen; - args.size = -1; - (*prom)(&args); - return args.size; -} - -int -putc(int c, void *f) -{ - char ch = c; - - if (c == '\n') - putc('\r', f); - return write(f, &ch, 1) == 1? c: -1; -} - -int -putchar(int c) -{ - return putc(c, stdout); -} - -int -fputs(char *str, void *f) -{ - int n = strlen(str); - - return write(f, str, n) == n? 0: -1; -} - -int -readchar(void) -{ - char ch; - - for (;;) { - switch (read(stdin, &ch, 1)) { - case 1: - return ch; - case -1: - printk("read(stdin) returned -1\r\n"); - return -1; - } - } -} - -static char line[256]; -static char *lineptr; -static int lineleft; - -int -getchar(void) -{ - int c; - - if (lineleft == 0) { - lineptr = line; - for (;;) { - c = readchar(); - if (c == -1 || c == 4) - break; - if (c == '\r' || c == '\n') { - *lineptr++ = '\n'; - putchar('\n'); - break; - } - switch (c) { - case 0177: - case '\b': - if (lineptr > line) { - putchar('\b'); - putchar(' '); - putchar('\b'); - --lineptr; - } - break; - case 'U' & 0x1F: - while (lineptr > line) { - putchar('\b'); - putchar(' '); - putchar('\b'); - --lineptr; - } - break; - default: - if (lineptr >= &line[sizeof(line) - 1]) - putchar('\a'); - else { - putchar(c); - *lineptr++ = c; - } - } - } - lineleft = lineptr - line; - lineptr = line; - } - if (lineleft == 0) - return -1; - --lineleft; - return *lineptr++; -} - - - -/* String functions lifted from lib/vsprintf.c and lib/ctype.c */ -unsigned char _ctype[] = { -_C,_C,_C,_C,_C,_C,_C,_C, /* 0-7 */ -_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C, /* 8-15 */ -_C,_C,_C,_C,_C,_C,_C,_C, /* 16-23 */ -_C,_C,_C,_C,_C,_C,_C,_C, /* 24-31 */ -_S|_SP,_P,_P,_P,_P,_P,_P,_P, /* 32-39 */ -_P,_P,_P,_P,_P,_P,_P,_P, /* 40-47 */ -_D,_D,_D,_D,_D,_D,_D,_D, /* 48-55 */ -_D,_D,_P,_P,_P,_P,_P,_P, /* 56-63 */ -_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U, /* 64-71 */ -_U,_U,_U,_U,_U,_U,_U,_U, /* 72-79 */ -_U,_U,_U,_U,_U,_U,_U,_U, /* 80-87 */ -_U,_U,_U,_P,_P,_P,_P,_P, /* 88-95 */ -_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L, /* 96-103 */ -_L,_L,_L,_L,_L,_L,_L,_L, /* 104-111 */ -_L,_L,_L,_L,_L,_L,_L,_L, /* 112-119 */ -_L,_L,_L,_P,_P,_P,_P,_C, /* 120-127 */ -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 128-143 */ -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 144-159 */ -_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 160-175 */ -_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 176-191 */ -_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U, /* 192-207 */ -_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L, /* 208-223 */ -_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L, /* 224-239 */ -_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L}; /* 240-255 */ - -size_t strnlen(const char * s, size_t count) -{ - const char *sc; - - for (sc = s; count-- && *sc != '\0'; ++sc) - /* nothing */; - return sc - s; -} - -unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base) -{ - unsigned long result = 0,value; - - if (!base) { - base = 10; - if (*cp == '0') { - base = 8; - cp++; - if ((*cp == 'x') && isxdigit(cp[1])) { - cp++; - base = 16; - } - } - } - while (isxdigit(*cp) && - (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) { - result = result*base + value; - cp++; - } - if (endp) - *endp = (char *)cp; - return result; -} - -long simple_strtol(const char *cp,char **endp,unsigned int base) -{ - if(*cp=='-') - return -simple_strtoul(cp+1,endp,base); - return simple_strtoul(cp,endp,base); -} - -static int skip_atoi(const char **s) -{ - int i=0; - - while (isdigit(**s)) - i = i*10 + *((*s)++) - '0'; - return i; -} - -#define ZEROPAD 1 /* pad with zero */ -#define SIGN 2 /* unsigned/signed long */ -#define PLUS 4 /* show plus */ -#define SPACE 8 /* space if plus */ -#define LEFT 16 /* left justified */ -#define SPECIAL 32 /* 0x */ -#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */ - -static char * number(char * str, long long num, int base, int size, int precision, int type) -{ - char c,sign,tmp[66]; - const char *digits="0123456789abcdefghijklmnopqrstuvwxyz"; - int i; - - if (type & LARGE) - digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; - if (type & LEFT) - type &= ~ZEROPAD; - if (base < 2 || base > 36) - return 0; - c = (type & ZEROPAD) ? '0' : ' '; - sign = 0; - if (type & SIGN) { - if (num < 0) { - sign = '-'; - num = -num; - size--; - } else if (type & PLUS) { - sign = '+'; - size--; - } else if (type & SPACE) { - sign = ' '; - size--; - } - } - if (type & SPECIAL) { - if (base == 16) - size -= 2; - else if (base == 8) - size--; - } - i = 0; - if (num == 0) - tmp[i++]='0'; - else while (num != 0) - tmp[i++] = digits[do_div(num,base)]; - if (i > precision) - precision = i; - size -= precision; - if (!(type&(ZEROPAD+LEFT))) - while(size-->0) - *str++ = ' '; - if (sign) - *str++ = sign; - if (type & SPECIAL) { - if (base==8) - *str++ = '0'; - else if (base==16) { - *str++ = '0'; - *str++ = digits[33]; - } - } - if (!(type & LEFT)) - while (size-- > 0) - *str++ = c; - while (i < precision--) - *str++ = '0'; - while (i-- > 0) - *str++ = tmp[i]; - while (size-- > 0) - *str++ = ' '; - return str; -} - -/* Forward decl. needed for IP address printing stuff... */ -int sprintf(char * buf, const char *fmt, ...); - -int vsprintf(char *buf, const char *fmt, va_list args) -{ - int len; - unsigned long long num; - int i, base; - char * str; - const char *s; - - int flags; /* flags to number() */ - - int field_width; /* width of output field */ - int precision; /* min. # of digits for integers; max - number of chars for from string */ - int qualifier; /* 'h', 'l', or 'L' for integer fields */ - /* 'z' support added 23/7/1999 S.H. */ - /* 'z' changed to 'Z' --davidm 1/25/99 */ - - - for (str=buf ; *fmt ; ++fmt) { - if (*fmt != '%') { - *str++ = *fmt; - continue; - } - - /* process flags */ - flags = 0; - repeat: - ++fmt; /* this also skips first '%' */ - switch (*fmt) { - case '-': flags |= LEFT; goto repeat; - case '+': flags |= PLUS; goto repeat; - case ' ': flags |= SPACE; goto repeat; - case '#': flags |= SPECIAL; goto repeat; - case '0': flags |= ZEROPAD; goto repeat; - } - - /* get field width */ - field_width = -1; - if (isdigit(*fmt)) - field_width = skip_atoi(&fmt); - else if (*fmt == '*') { - ++fmt; - /* it's the next argument */ - field_width = va_arg(args, int); - if (field_width < 0) { - field_width = -field_width; - flags |= LEFT; - } - } - - /* get the precision */ - precision = -1; - if (*fmt == '.') { - ++fmt; - if (isdigit(*fmt)) - precision = skip_atoi(&fmt); - else if (*fmt == '*') { - ++fmt; - /* it's the next argument */ - precision = va_arg(args, int); - } - if (precision < 0) - precision = 0; - } - - /* get the conversion qualifier */ - qualifier = -1; - if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') { - qualifier = *fmt; - ++fmt; - } - - /* default base */ - base = 10; - - switch (*fmt) { - case 'c': - if (!(flags & LEFT)) - while (--field_width > 0) - *str++ = ' '; - *str++ = (unsigned char) va_arg(args, int); - while (--field_width > 0) - *str++ = ' '; - continue; - - case 's': - s = va_arg(args, char *); - if (!s) - s = "<NULL>"; - - len = strnlen(s, precision); - - if (!(flags & LEFT)) - while (len < field_width--) - *str++ = ' '; - for (i = 0; i < len; ++i) - *str++ = *s++; - while (len < field_width--) - *str++ = ' '; - continue; - - case 'p': - if (field_width == -1) { - field_width = 2*sizeof(void *); - flags |= ZEROPAD; - } - str = number(str, - (unsigned long) va_arg(args, void *), 16, - field_width, precision, flags); - continue; - - - case 'n': - if (qualifier == 'l') { - long * ip = va_arg(args, long *); - *ip = (str - buf); - } else if (qualifier == 'Z') { - size_t * ip = va_arg(args, size_t *); - *ip = (str - buf); - } else { - int * ip = va_arg(args, int *); - *ip = (str - buf); - } - continue; - - case '%': - *str++ = '%'; - continue; - - /* integer number formats - set up the flags and "break" */ - case 'o': - base = 8; - break; - - case 'X': - flags |= LARGE; - case 'x': - base = 16; - break; - - case 'd': - case 'i': - flags |= SIGN; - case 'u': - break; - - default: - *str++ = '%'; - if (*fmt) - *str++ = *fmt; - else - --fmt; - continue; - } - if (qualifier == 'L') - num = va_arg(args, long long); - else if (qualifier == 'l') { - num = va_arg(args, unsigned long); - if (flags & SIGN) - num = (signed long) num; - } else if (qualifier == 'Z') { - num = va_arg(args, size_t); - } else if (qualifier == 'h') { - num = (unsigned short) va_arg(args, int); - if (flags & SIGN) - num = (signed short) num; - } else { - num = va_arg(args, unsigned int); - if (flags & SIGN) - num = (signed int) num; - } - str = number(str, num, base, field_width, precision, flags); - } - *str = '\0'; - return str-buf; -} - -int sprintf(char * buf, const char *fmt, ...) -{ - va_list args; - int i; - - va_start(args, fmt); - i=vsprintf(buf,fmt,args); - va_end(args); - return i; -} - -static char sprint_buf[1024]; - -void -printk(char *fmt, ...) -{ - va_list args; - int n; - - va_start(args, fmt); - n = vsprintf(sprint_buf, fmt, args); - va_end(args); - write(stdout, sprint_buf, n); -} - -int -printf(char *fmt, ...) -{ - va_list args; - int n; - - va_start(args, fmt); - n = vsprintf(sprint_buf, fmt, args); - va_end(args); - write(stdout, sprint_buf, n); - return n; -} diff --git a/arch/ppc64/configs/g5_defconfig b/arch/ppc64/configs/g5_defconfig index 0f90df0..1eb3339 100644 --- a/arch/ppc64/configs/g5_defconfig +++ b/arch/ppc64/configs/g5_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.11 -# Thu Mar 10 16:47:04 2005 +# Linux kernel version: 2.6.12-rc6 +# Tue Jun 14 16:59:20 2005 # CONFIG_64BIT=y CONFIG_MMU=y @@ -11,7 +11,7 @@ CONFIG_GENERIC_ISA_DMA=y CONFIG_HAVE_DEC_LOCK=y CONFIG_EARLY_PRINTK=y CONFIG_COMPAT=y -CONFIG_FRAME_POINTER=y +CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y CONFIG_FORCE_MAX_ZONEORDER=13 # @@ -20,6 +20,7 @@ CONFIG_FORCE_MAX_ZONEORDER=13 CONFIG_EXPERIMENTAL=y CONFIG_CLEAN_COMPILE=y CONFIG_LOCK_KERNEL=y +CONFIG_INIT_ENV_ARG_LIMIT=32 # # General setup @@ -31,19 +32,20 @@ CONFIG_POSIX_MQUEUE=y # CONFIG_BSD_PROCESS_ACCT is not set CONFIG_SYSCTL=y # CONFIG_AUDIT is not set -CONFIG_LOG_BUF_SHIFT=17 CONFIG_HOTPLUG=y CONFIG_KOBJECT_UEVENT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y +# CONFIG_CPUSETS is not set # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_SHMEM=y CONFIG_CC_ALIGN_FUNCTIONS=0 CONFIG_CC_ALIGN_LABELS=0 @@ -87,6 +89,8 @@ CONFIG_NR_CPUS=2 # CONFIG_SCHED_SMT is not set # CONFIG_PREEMPT is not set CONFIG_GENERIC_HARDIRQS=y +CONFIG_SECCOMP=y +CONFIG_ISA_DMA_API=y # # General setup @@ -97,6 +101,7 @@ CONFIG_BINFMT_ELF=y # CONFIG_BINFMT_MISC is not set CONFIG_PCI_LEGACY_PROC=y CONFIG_PCI_NAMES=y +# CONFIG_PCI_DEBUG is not set # CONFIG_HOTPLUG_CPU is not set # @@ -105,10 +110,6 @@ CONFIG_PCI_NAMES=y # CONFIG_PCCARD is not set # -# PC-card bridges -# - -# # PCI Hotplug Support # # CONFIG_HOTPLUG_PCI is not set @@ -293,7 +294,6 @@ CONFIG_SCSI_SATA_SVW=y # CONFIG_SCSI_BUSLOGIC is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_EATA is not set -# CONFIG_SCSI_EATA_PIO is not set # CONFIG_SCSI_FUTURE_DOMAIN is not set # CONFIG_SCSI_GDTH is not set # CONFIG_SCSI_IPS is not set @@ -301,7 +301,6 @@ CONFIG_SCSI_SATA_SVW=y # CONFIG_SCSI_INIA100 is not set # CONFIG_SCSI_SYM53C8XX_2 is not set # CONFIG_SCSI_IPR is not set -# CONFIG_SCSI_QLOGIC_ISP is not set # CONFIG_SCSI_QLOGIC_FC is not set # CONFIG_SCSI_QLOGIC_1280 is not set CONFIG_SCSI_QLA2XXX=y @@ -310,6 +309,7 @@ CONFIG_SCSI_QLA2XXX=y # CONFIG_SCSI_QLA2300 is not set # CONFIG_SCSI_QLA2322 is not set # CONFIG_SCSI_QLA6312 is not set +# CONFIG_SCSI_LPFC is not set # CONFIG_SCSI_DC395x is not set # CONFIG_SCSI_DC390T is not set # CONFIG_SCSI_DEBUG is not set @@ -332,6 +332,7 @@ CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m CONFIG_DM_MIRROR=m CONFIG_DM_ZERO=m +# CONFIG_DM_MULTIPATH is not set # # Fusion MPT device support @@ -394,7 +395,6 @@ CONFIG_NET=y # CONFIG_PACKET=y # CONFIG_PACKET_MMAP is not set -# CONFIG_NETLINK_DEV is not set CONFIG_UNIX=y CONFIG_NET_KEY=m CONFIG_INET=y @@ -564,6 +564,8 @@ CONFIG_E1000=y # CONFIG_R8169 is not set # CONFIG_SK98LIN is not set CONFIG_TIGON3=m +# CONFIG_BNX2 is not set +# CONFIG_MV643XX_ETH is not set # # Ethernet (10000 Mbit) @@ -631,18 +633,6 @@ CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_EVBUG is not set # -# Input I/O drivers -# -# CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y -CONFIG_SERIO=y -# CONFIG_SERIO_I8042 is not set -# CONFIG_SERIO_SERPORT is not set -# CONFIG_SERIO_CT82C710 is not set -# CONFIG_SERIO_PCIPS2 is not set -# CONFIG_SERIO_RAW is not set - -# # Input Device Drivers # CONFIG_INPUT_KEYBOARD=y @@ -660,6 +650,16 @@ CONFIG_INPUT_MOUSE=y # CONFIG_INPUT_MISC is not set # +# Hardware I/O ports +# +CONFIG_SERIO=y +# CONFIG_SERIO_I8042 is not set +# CONFIG_SERIO_SERPORT is not set +# CONFIG_SERIO_PCIPS2 is not set +# CONFIG_SERIO_RAW is not set +# CONFIG_GAMEPORT is not set + +# # Character devices # CONFIG_VT=y @@ -676,6 +676,7 @@ CONFIG_HW_CONSOLE=y # Non-8250 serial port support # # CONFIG_SERIAL_PMACZILOG is not set +# CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 @@ -698,9 +699,12 @@ CONFIG_LEGACY_PTY_COUNT=256 # # Ftape, the floppy tape device driver # +CONFIG_AGP=m +CONFIG_AGP_UNINORTH=m # CONFIG_DRM is not set CONFIG_RAW_DRIVER=y CONFIG_MAX_RAW_DEVS=256 +# CONFIG_HANGCHECK_TIMER is not set # # TPM devices @@ -730,12 +734,11 @@ CONFIG_I2C_ALGOBIT=y # CONFIG_I2C_AMD8111 is not set # CONFIG_I2C_I801 is not set # CONFIG_I2C_I810 is not set +# CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_ISA is not set CONFIG_I2C_KEYWEST=y -# CONFIG_I2C_MPC is not set # CONFIG_I2C_NFORCE2 is not set # CONFIG_I2C_PARPORT_LIGHT is not set -# CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_PROSAVAGE is not set # CONFIG_I2C_SAVAGE4 is not set # CONFIG_SCx200_ACB is not set @@ -772,6 +775,7 @@ CONFIG_I2C_KEYWEST=y # CONFIG_SENSORS_LM85 is not set # CONFIG_SENSORS_LM87 is not set # CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_LM92 is not set # CONFIG_SENSORS_MAX1619 is not set # CONFIG_SENSORS_PC87360 is not set # CONFIG_SENSORS_SMSC47B397 is not set @@ -785,6 +789,7 @@ CONFIG_I2C_KEYWEST=y # # Other I2C Chip support # +# CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_EEPROM is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCF8591 is not set @@ -817,6 +822,11 @@ CONFIG_I2C_KEYWEST=y # Graphics support # CONFIG_FB=y +CONFIG_FB_CFB_FILLRECT=y +CONFIG_FB_CFB_COPYAREA=y +CONFIG_FB_CFB_IMAGEBLIT=y +CONFIG_FB_SOFT_CURSOR=y +CONFIG_FB_MACMODES=y CONFIG_FB_MODE_HELPERS=y CONFIG_FB_TILEBLITTING=y # CONFIG_FB_CIRRUS is not set @@ -830,6 +840,7 @@ CONFIG_FB_OF=y # CONFIG_FB_ASILIANT is not set # CONFIG_FB_IMSTT is not set # CONFIG_FB_VGA16 is not set +# CONFIG_FB_NVIDIA is not set CONFIG_FB_RIVA=y # CONFIG_FB_RIVA_I2C is not set # CONFIG_FB_RIVA_DEBUG is not set @@ -847,6 +858,7 @@ CONFIG_FB_RADEON_I2C=y # CONFIG_FB_3DFX is not set # CONFIG_FB_VOODOO1 is not set # CONFIG_FB_TRIDENT is not set +# CONFIG_FB_S1D13XXX is not set # CONFIG_FB_VIRTUAL is not set # @@ -880,6 +892,8 @@ CONFIG_LCD_DEVICE=y # # USB support # +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y CONFIG_USB=y # CONFIG_USB_DEBUG is not set @@ -890,8 +904,6 @@ CONFIG_USB_DEVICEFS=y # CONFIG_USB_BANDWIDTH is not set # CONFIG_USB_DYNAMIC_MINORS is not set # CONFIG_USB_OTG is not set -CONFIG_USB_ARCH_HAS_HCD=y -CONFIG_USB_ARCH_HAS_OHCI=y # # USB Host Controller Drivers @@ -917,7 +929,6 @@ CONFIG_USB_PRINTER=y # CONFIG_USB_STORAGE=y # CONFIG_USB_STORAGE_DEBUG is not set -CONFIG_USB_STORAGE_RW_DETECT=y CONFIG_USB_STORAGE_DATAFAB=y CONFIG_USB_STORAGE_FREECOM=y CONFIG_USB_STORAGE_ISD200=y @@ -1004,8 +1015,10 @@ CONFIG_USB_MON=y # CONFIG_USB_SERIAL=m CONFIG_USB_SERIAL_GENERIC=y +# CONFIG_USB_SERIAL_AIRPRIME is not set CONFIG_USB_SERIAL_BELKIN=m CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m +# CONFIG_USB_SERIAL_CP2101 is not set CONFIG_USB_SERIAL_CYPRESS_M8=m CONFIG_USB_SERIAL_EMPEG=m CONFIG_USB_SERIAL_FTDI_SIO=m @@ -1034,6 +1047,7 @@ CONFIG_USB_SERIAL_KLSI=m CONFIG_USB_SERIAL_KOBIL_SCT=m CONFIG_USB_SERIAL_MCT_U232=m CONFIG_USB_SERIAL_PL2303=m +# CONFIG_USB_SERIAL_HP4X is not set CONFIG_USB_SERIAL_SAFE=m CONFIG_USB_SERIAL_SAFE_PADDED=y CONFIG_USB_SERIAL_TI=m @@ -1270,11 +1284,13 @@ CONFIG_OPROFILE=y # # Kernel hacking # +# CONFIG_PRINTK_TIME is not set CONFIG_DEBUG_KERNEL=y CONFIG_MAGIC_SYSRQ=y -# CONFIG_PRINTK_TIME is not set +CONFIG_LOG_BUF_SHIFT=17 # CONFIG_SCHEDSTATS is not set # CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_SPINLOCK is not set # CONFIG_DEBUG_SPINLOCK_SLEEP is not set # CONFIG_DEBUG_KOBJECT is not set # CONFIG_DEBUG_INFO is not set diff --git a/arch/ppc64/configs/iSeries_defconfig b/arch/ppc64/configs/iSeries_defconfig index a39e9d2..f6a2b99 100644 --- a/arch/ppc64/configs/iSeries_defconfig +++ b/arch/ppc64/configs/iSeries_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.11-rc3-bk6 -# Wed Feb 9 23:34:52 2005 +# Linux kernel version: 2.6.12-rc6 +# Tue Jun 14 17:01:28 2005 # CONFIG_64BIT=y CONFIG_MMU=y @@ -11,7 +11,7 @@ CONFIG_GENERIC_ISA_DMA=y CONFIG_HAVE_DEC_LOCK=y CONFIG_EARLY_PRINTK=y CONFIG_COMPAT=y -CONFIG_FRAME_POINTER=y +CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y CONFIG_FORCE_MAX_ZONEORDER=13 # @@ -20,6 +20,7 @@ CONFIG_FORCE_MAX_ZONEORDER=13 CONFIG_EXPERIMENTAL=y CONFIG_CLEAN_COMPILE=y CONFIG_LOCK_KERNEL=y +CONFIG_INIT_ENV_ARG_LIMIT=32 # # General setup @@ -30,24 +31,29 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y # CONFIG_BSD_PROCESS_ACCT is not set CONFIG_SYSCTL=y -CONFIG_LOG_BUF_SHIFT=17 +CONFIG_AUDIT=y +CONFIG_AUDITSYSCALL=y CONFIG_HOTPLUG=y CONFIG_KOBJECT_UEVENT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y +# CONFIG_CPUSETS is not set # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_SHMEM=y CONFIG_CC_ALIGN_FUNCTIONS=0 CONFIG_CC_ALIGN_LABELS=0 CONFIG_CC_ALIGN_LOOPS=0 CONFIG_CC_ALIGN_JUMPS=0 # CONFIG_TINY_SHMEM is not set +CONFIG_BASE_SMALL=0 # # Loadable module support @@ -79,6 +85,8 @@ CONFIG_NR_CPUS=32 CONFIG_GENERIC_HARDIRQS=y CONFIG_MSCHUNKS=y CONFIG_LPARCFG=y +CONFIG_SECCOMP=y +CONFIG_ISA_DMA_API=y # # General setup @@ -89,6 +97,7 @@ CONFIG_BINFMT_ELF=y # CONFIG_BINFMT_MISC is not set CONFIG_PCI_LEGACY_PROC=y CONFIG_PCI_NAMES=y +# CONFIG_PCI_DEBUG is not set # # PCCARD (PCMCIA/CardBus) support @@ -96,10 +105,6 @@ CONFIG_PCI_NAMES=y # CONFIG_PCCARD is not set # -# PC-card bridges -# - -# # PCI Hotplug Support # # CONFIG_HOTPLUG_PCI is not set @@ -210,7 +215,6 @@ CONFIG_SCSI_FC_ATTRS=y # CONFIG_SCSI_BUSLOGIC is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_EATA is not set -# CONFIG_SCSI_EATA_PIO is not set # CONFIG_SCSI_FUTURE_DOMAIN is not set # CONFIG_SCSI_GDTH is not set # CONFIG_SCSI_IPS is not set @@ -219,7 +223,6 @@ CONFIG_SCSI_IBMVSCSI=m # CONFIG_SCSI_INIA100 is not set # CONFIG_SCSI_SYM53C8XX_2 is not set # CONFIG_SCSI_IPR is not set -# CONFIG_SCSI_QLOGIC_ISP is not set # CONFIG_SCSI_QLOGIC_FC is not set # CONFIG_SCSI_QLOGIC_1280 is not set CONFIG_SCSI_QLA2XXX=y @@ -228,6 +231,7 @@ CONFIG_SCSI_QLA2XXX=y # CONFIG_SCSI_QLA2300 is not set # CONFIG_SCSI_QLA2322 is not set # CONFIG_SCSI_QLA6312 is not set +# CONFIG_SCSI_LPFC is not set # CONFIG_SCSI_DC395x is not set # CONFIG_SCSI_DC390T is not set # CONFIG_SCSI_DEBUG is not set @@ -250,6 +254,7 @@ CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m CONFIG_DM_MIRROR=m CONFIG_DM_ZERO=m +# CONFIG_DM_MULTIPATH is not set # # Fusion MPT device support @@ -280,7 +285,6 @@ CONFIG_NET=y # CONFIG_PACKET=y # CONFIG_PACKET_MMAP is not set -# CONFIG_NETLINK_DEV is not set CONFIG_UNIX=y CONFIG_NET_KEY=m CONFIG_INET=y @@ -445,7 +449,6 @@ CONFIG_PCNET32=y # CONFIG_DGRS is not set # CONFIG_EEPRO100 is not set CONFIG_E100=y -# CONFIG_E100_NAPI is not set # CONFIG_FEALNX is not set # CONFIG_NATSEMI is not set # CONFIG_NE2K_PCI is not set @@ -471,6 +474,7 @@ CONFIG_E1000=m # CONFIG_SK98LIN is not set # CONFIG_VIA_VELOCITY is not set # CONFIG_TIGON3 is not set +# CONFIG_BNX2 is not set # # Ethernet (10000 Mbit) @@ -539,14 +543,6 @@ CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 # CONFIG_INPUT_EVBUG is not set # -# Input I/O drivers -# -# CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y -# CONFIG_SERIO is not set -# CONFIG_SERIO_I8042 is not set - -# # Input Device Drivers # # CONFIG_INPUT_KEYBOARD is not set @@ -556,6 +552,12 @@ CONFIG_SOUND_GAMEPORT=y # CONFIG_INPUT_MISC is not set # +# Hardware I/O ports +# +# CONFIG_SERIO is not set +# CONFIG_GAMEPORT is not set + +# # Character devices # # CONFIG_SERIAL_NONSTANDARD is not set @@ -570,6 +572,7 @@ CONFIG_SOUND_GAMEPORT=y # CONFIG_SERIAL_CORE=m CONFIG_SERIAL_ICOM=m +# CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 @@ -592,9 +595,16 @@ CONFIG_LEGACY_PTY_COUNT=256 # # Ftape, the floppy tape device driver # +# CONFIG_AGP is not set # CONFIG_DRM is not set CONFIG_RAW_DRIVER=y CONFIG_MAX_RAW_DEVS=256 +# CONFIG_HANGCHECK_TIMER is not set + +# +# TPM devices +# +# CONFIG_TCG_TPM is not set # # I2C support @@ -633,13 +643,9 @@ CONFIG_MAX_RAW_DEVS=256 # # USB support # -# CONFIG_USB is not set CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB_ARCH_HAS_OHCI=y - -# -# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information -# +# CONFIG_USB is not set # # USB Gadget Support @@ -848,10 +854,13 @@ CONFIG_OPROFILE=y # # Kernel hacking # +# CONFIG_PRINTK_TIME is not set CONFIG_DEBUG_KERNEL=y CONFIG_MAGIC_SYSRQ=y +CONFIG_LOG_BUF_SHIFT=17 # CONFIG_SCHEDSTATS is not set # CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_SPINLOCK is not set # CONFIG_DEBUG_SPINLOCK_SLEEP is not set # CONFIG_DEBUG_KOBJECT is not set # CONFIG_DEBUG_INFO is not set @@ -881,6 +890,7 @@ CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_SHA256=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_DES=y CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_TWOFISH=m diff --git a/arch/ppc64/configs/maple_defconfig b/arch/ppc64/configs/maple_defconfig index cf52750..8051b0f 100644 --- a/arch/ppc64/configs/maple_defconfig +++ b/arch/ppc64/configs/maple_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.11-rc3-bk6 -# Wed Feb 9 23:34:53 2005 +# Linux kernel version: 2.6.12-rc6 +# Tue Jun 14 17:12:48 2005 # CONFIG_64BIT=y CONFIG_MMU=y @@ -11,7 +11,7 @@ CONFIG_GENERIC_ISA_DMA=y CONFIG_HAVE_DEC_LOCK=y CONFIG_EARLY_PRINTK=y CONFIG_COMPAT=y -CONFIG_FRAME_POINTER=y +CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y CONFIG_FORCE_MAX_ZONEORDER=13 # @@ -20,6 +20,7 @@ CONFIG_FORCE_MAX_ZONEORDER=13 CONFIG_EXPERIMENTAL=y CONFIG_CLEAN_COMPILE=y CONFIG_LOCK_KERNEL=y +CONFIG_INIT_ENV_ARG_LIMIT=32 # # General setup @@ -30,24 +31,28 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y # CONFIG_BSD_PROCESS_ACCT is not set CONFIG_SYSCTL=y -CONFIG_LOG_BUF_SHIFT=17 +# CONFIG_AUDIT is not set # CONFIG_HOTPLUG is not set CONFIG_KOBJECT_UEVENT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y +# CONFIG_CPUSETS is not set # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_SHMEM=y CONFIG_CC_ALIGN_FUNCTIONS=0 CONFIG_CC_ALIGN_LABELS=0 CONFIG_CC_ALIGN_LOOPS=0 CONFIG_CC_ALIGN_JUMPS=0 # CONFIG_TINY_SHMEM is not set +CONFIG_BASE_SMALL=0 # # Loadable module support @@ -84,6 +89,8 @@ CONFIG_NR_CPUS=2 # CONFIG_SCHED_SMT is not set # CONFIG_PREEMPT is not set CONFIG_GENERIC_HARDIRQS=y +CONFIG_SECCOMP=y +CONFIG_ISA_DMA_API=y # # General setup @@ -94,6 +101,7 @@ CONFIG_BINFMT_ELF=y # CONFIG_BINFMT_MISC is not set CONFIG_PCI_LEGACY_PROC=y CONFIG_PCI_NAMES=y +# CONFIG_PCI_DEBUG is not set # # PCCARD (PCMCIA/CardBus) support @@ -101,10 +109,6 @@ CONFIG_PCI_NAMES=y # CONFIG_PCCARD is not set # -# PC-card bridges -# - -# # PCI Hotplug Support # # CONFIG_HOTPLUG_PCI is not set @@ -261,7 +265,6 @@ CONFIG_NET=y # CONFIG_PACKET=y CONFIG_PACKET_MMAP=y -# CONFIG_NETLINK_DEV is not set CONFIG_UNIX=y # CONFIG_NET_KEY is not set CONFIG_INET=y @@ -376,6 +379,8 @@ CONFIG_E1000=y # CONFIG_SK98LIN is not set # CONFIG_VIA_VELOCITY is not set # CONFIG_TIGON3 is not set +# CONFIG_BNX2 is not set +# CONFIG_MV643XX_ETH is not set # # Ethernet (10000 Mbit) @@ -432,14 +437,6 @@ CONFIG_INPUT_MOUSEDEV_SCREEN_Y=1200 # CONFIG_INPUT_EVBUG is not set # -# Input I/O drivers -# -# CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y -# CONFIG_SERIO is not set -# CONFIG_SERIO_I8042 is not set - -# # Input Device Drivers # # CONFIG_INPUT_KEYBOARD is not set @@ -449,6 +446,12 @@ CONFIG_SOUND_GAMEPORT=y # CONFIG_INPUT_MISC is not set # +# Hardware I/O ports +# +# CONFIG_SERIO is not set +# CONFIG_GAMEPORT is not set + +# # Character devices # CONFIG_VT=y @@ -469,7 +472,7 @@ CONFIG_SERIAL_8250_NR_UARTS=4 # CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y -# CONFIG_SERIAL_PMACZILOG is not set +# CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 @@ -492,8 +495,15 @@ CONFIG_LEGACY_PTY_COUNT=256 # # Ftape, the floppy tape device driver # +# CONFIG_AGP is not set # CONFIG_DRM is not set # CONFIG_RAW_DRIVER is not set +# CONFIG_HANGCHECK_TIMER is not set + +# +# TPM devices +# +# CONFIG_TCG_TPM is not set # # I2C support @@ -518,8 +528,8 @@ CONFIG_I2C_ALGOBIT=y CONFIG_I2C_AMD8111=y # CONFIG_I2C_I801 is not set # CONFIG_I2C_I810 is not set +# CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_ISA is not set -# CONFIG_I2C_MPC is not set # CONFIG_I2C_NFORCE2 is not set # CONFIG_I2C_PARPORT_LIGHT is not set # CONFIG_I2C_PROSAVAGE is not set @@ -545,7 +555,9 @@ CONFIG_I2C_AMD8111=y # CONFIG_SENSORS_ASB100 is not set # CONFIG_SENSORS_DS1621 is not set # CONFIG_SENSORS_FSCHER is not set +# CONFIG_SENSORS_FSCPOS is not set # CONFIG_SENSORS_GL518SM is not set +# CONFIG_SENSORS_GL520SM is not set # CONFIG_SENSORS_IT87 is not set # CONFIG_SENSORS_LM63 is not set # CONFIG_SENSORS_LM75 is not set @@ -556,9 +568,11 @@ CONFIG_I2C_AMD8111=y # CONFIG_SENSORS_LM85 is not set # CONFIG_SENSORS_LM87 is not set # CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_LM92 is not set # CONFIG_SENSORS_MAX1619 is not set # CONFIG_SENSORS_PC87360 is not set # CONFIG_SENSORS_SMSC47B397 is not set +# CONFIG_SENSORS_SIS5595 is not set # CONFIG_SENSORS_SMSC47M1 is not set # CONFIG_SENSORS_VIA686A is not set # CONFIG_SENSORS_W83781D is not set @@ -568,6 +582,7 @@ CONFIG_I2C_AMD8111=y # # Other I2C Chip support # +# CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_EEPROM is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCF8591 is not set @@ -615,6 +630,8 @@ CONFIG_DUMMY_CONSOLE=y # # USB support # +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y CONFIG_USB=y # CONFIG_USB_DEBUG is not set @@ -625,8 +642,6 @@ CONFIG_USB_DEVICEFS=y # CONFIG_USB_BANDWIDTH is not set # CONFIG_USB_DYNAMIC_MINORS is not set # CONFIG_USB_OTG is not set -CONFIG_USB_ARCH_HAS_HCD=y -CONFIG_USB_ARCH_HAS_OHCI=y # # USB Host Controller Drivers @@ -635,6 +650,8 @@ CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_SPLIT_ISO=y CONFIG_USB_EHCI_ROOT_HUB_TT=y CONFIG_USB_OHCI_HCD=y +# CONFIG_USB_OHCI_BIG_ENDIAN is not set +CONFIG_USB_OHCI_LITTLE_ENDIAN=y CONFIG_USB_UHCI_HCD=y # CONFIG_USB_SL811_HCD is not set @@ -688,6 +705,7 @@ CONFIG_USB_HIDINPUT=y CONFIG_USB_PEGASUS=y # CONFIG_USB_RTL8150 is not set # CONFIG_USB_USBNET is not set +CONFIG_USB_MON=y # # USB port drivers @@ -699,8 +717,10 @@ CONFIG_USB_PEGASUS=y CONFIG_USB_SERIAL=y # CONFIG_USB_SERIAL_CONSOLE is not set CONFIG_USB_SERIAL_GENERIC=y +# CONFIG_USB_SERIAL_AIRPRIME is not set # CONFIG_USB_SERIAL_BELKIN is not set # CONFIG_USB_SERIAL_DIGI_ACCELEPORT is not set +# CONFIG_USB_SERIAL_CP2101 is not set CONFIG_USB_SERIAL_CYPRESS_M8=m # CONFIG_USB_SERIAL_EMPEG is not set # CONFIG_USB_SERIAL_FTDI_SIO is not set @@ -729,6 +749,7 @@ CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y # CONFIG_USB_SERIAL_KOBIL_SCT is not set # CONFIG_USB_SERIAL_MCT_U232 is not set # CONFIG_USB_SERIAL_PL2303 is not set +# CONFIG_USB_SERIAL_HP4X is not set # CONFIG_USB_SERIAL_SAFE is not set CONFIG_USB_SERIAL_TI=m # CONFIG_USB_SERIAL_CYBERJACK is not set @@ -750,6 +771,7 @@ CONFIG_USB_EZUSB=y # CONFIG_USB_PHIDGETKIT is not set # CONFIG_USB_PHIDGETSERVO is not set # CONFIG_USB_IDMOUSE is not set +# CONFIG_USB_SISUSBVGA is not set # CONFIG_USB_TEST is not set # @@ -936,10 +958,13 @@ CONFIG_NLS_UTF8=y # # Kernel hacking # +# CONFIG_PRINTK_TIME is not set CONFIG_DEBUG_KERNEL=y CONFIG_MAGIC_SYSRQ=y +CONFIG_LOG_BUF_SHIFT=17 # CONFIG_SCHEDSTATS is not set CONFIG_DEBUG_SLAB=y +# CONFIG_DEBUG_SPINLOCK is not set CONFIG_DEBUG_SPINLOCK_SLEEP=y # CONFIG_DEBUG_KOBJECT is not set # CONFIG_DEBUG_INFO is not set @@ -971,6 +996,7 @@ CONFIG_CRYPTO_MD5=y # CONFIG_CRYPTO_SHA256 is not set # CONFIG_CRYPTO_SHA512 is not set # CONFIG_CRYPTO_WP512 is not set +# CONFIG_CRYPTO_TGR192 is not set CONFIG_CRYPTO_DES=y # CONFIG_CRYPTO_BLOWFISH is not set # CONFIG_CRYPTO_TWOFISH is not set diff --git a/arch/ppc64/configs/pSeries_defconfig b/arch/ppc64/configs/pSeries_defconfig index 4fecf23..3eb5ef2 100644 --- a/arch/ppc64/configs/pSeries_defconfig +++ b/arch/ppc64/configs/pSeries_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.11-rc3-bk6 -# Wed Feb 9 23:34:54 2005 +# Linux kernel version: 2.6.12-rc6 +# Tue Jun 14 17:13:47 2005 # CONFIG_64BIT=y CONFIG_MMU=y @@ -11,7 +11,7 @@ CONFIG_GENERIC_ISA_DMA=y CONFIG_HAVE_DEC_LOCK=y CONFIG_EARLY_PRINTK=y CONFIG_COMPAT=y -CONFIG_FRAME_POINTER=y +CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y CONFIG_FORCE_MAX_ZONEORDER=13 # @@ -20,6 +20,7 @@ CONFIG_FORCE_MAX_ZONEORDER=13 CONFIG_EXPERIMENTAL=y CONFIG_CLEAN_COMPILE=y CONFIG_LOCK_KERNEL=y +CONFIG_INIT_ENV_ARG_LIMIT=32 # # General setup @@ -30,24 +31,29 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y # CONFIG_BSD_PROCESS_ACCT is not set CONFIG_SYSCTL=y -CONFIG_LOG_BUF_SHIFT=17 +CONFIG_AUDIT=y +CONFIG_AUDITSYSCALL=y CONFIG_HOTPLUG=y CONFIG_KOBJECT_UEVENT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y +CONFIG_CPUSETS=y # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_SHMEM=y CONFIG_CC_ALIGN_FUNCTIONS=0 CONFIG_CC_ALIGN_LABELS=0 CONFIG_CC_ALIGN_LOOPS=0 CONFIG_CC_ALIGN_JUMPS=0 # CONFIG_TINY_SHMEM is not set +CONFIG_BASE_SMALL=0 # # Loadable module support @@ -89,9 +95,12 @@ CONFIG_SCHED_SMT=y CONFIG_EEH=y CONFIG_GENERIC_HARDIRQS=y CONFIG_PPC_RTAS=y +CONFIG_RTAS_PROC=y CONFIG_RTAS_FLASH=m CONFIG_SCANLOG=m CONFIG_LPARCFG=y +CONFIG_SECCOMP=y +CONFIG_ISA_DMA_API=y # # General setup @@ -102,6 +111,7 @@ CONFIG_BINFMT_ELF=y # CONFIG_BINFMT_MISC is not set CONFIG_PCI_LEGACY_PROC=y CONFIG_PCI_NAMES=y +# CONFIG_PCI_DEBUG is not set CONFIG_HOTPLUG_CPU=y # @@ -110,10 +120,6 @@ CONFIG_HOTPLUG_CPU=y # CONFIG_PCCARD is not set # -# PC-card bridges -# - -# # PCI Hotplug Support # CONFIG_HOTPLUG_PCI=m @@ -147,11 +153,10 @@ CONFIG_FW_LOADER=y # CONFIG_PARPORT=m CONFIG_PARPORT_PC=m -CONFIG_PARPORT_PC_CML1=m # CONFIG_PARPORT_SERIAL is not set # CONFIG_PARPORT_PC_FIFO is not set # CONFIG_PARPORT_PC_SUPERIO is not set -# CONFIG_PARPORT_OTHER is not set +# CONFIG_PARPORT_GSC is not set # CONFIG_PARPORT_1284 is not set # @@ -293,7 +298,6 @@ CONFIG_SCSI_ISCSI_ATTRS=m # CONFIG_SCSI_BUSLOGIC is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_EATA is not set -# CONFIG_SCSI_EATA_PIO is not set # CONFIG_SCSI_FUTURE_DOMAIN is not set # CONFIG_SCSI_GDTH is not set # CONFIG_SCSI_IPS is not set @@ -310,7 +314,6 @@ CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 CONFIG_SCSI_IPR=y CONFIG_SCSI_IPR_TRACE=y CONFIG_SCSI_IPR_DUMP=y -# CONFIG_SCSI_QLOGIC_ISP is not set # CONFIG_SCSI_QLOGIC_FC is not set # CONFIG_SCSI_QLOGIC_1280 is not set CONFIG_SCSI_QLA2XXX=y @@ -319,6 +322,7 @@ CONFIG_SCSI_QLA22XX=m CONFIG_SCSI_QLA2300=m CONFIG_SCSI_QLA2322=m CONFIG_SCSI_QLA6312=m +CONFIG_SCSI_LPFC=m # CONFIG_SCSI_DC395x is not set # CONFIG_SCSI_DC390T is not set # CONFIG_SCSI_DEBUG is not set @@ -341,6 +345,8 @@ CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m CONFIG_DM_MIRROR=m CONFIG_DM_ZERO=m +CONFIG_DM_MULTIPATH=m +CONFIG_DM_MULTIPATH_EMC=m # # Fusion MPT device support @@ -371,7 +377,6 @@ CONFIG_NET=y # CONFIG_PACKET=y # CONFIG_PACKET_MMAP is not set -# CONFIG_NETLINK_DEV is not set CONFIG_UNIX=y CONFIG_NET_KEY=m CONFIG_INET=y @@ -539,7 +544,6 @@ CONFIG_PCNET32=y # CONFIG_DGRS is not set # CONFIG_EEPRO100 is not set CONFIG_E100=y -# CONFIG_E100_NAPI is not set # CONFIG_FEALNX is not set # CONFIG_NATSEMI is not set # CONFIG_NE2K_PCI is not set @@ -565,6 +569,8 @@ CONFIG_E1000=y # CONFIG_SK98LIN is not set # CONFIG_VIA_VELOCITY is not set CONFIG_TIGON3=y +# CONFIG_BNX2 is not set +# CONFIG_MV643XX_ETH is not set # # Ethernet (10000 Mbit) @@ -636,20 +642,6 @@ CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 # CONFIG_INPUT_EVBUG is not set # -# Input I/O drivers -# -# CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y -CONFIG_SERIO=y -CONFIG_SERIO_I8042=y -# CONFIG_SERIO_SERPORT is not set -# CONFIG_SERIO_CT82C710 is not set -# CONFIG_SERIO_PARKBD is not set -# CONFIG_SERIO_PCIPS2 is not set -CONFIG_SERIO_LIBPS2=y -# CONFIG_SERIO_RAW is not set - -# # Input Device Drivers # CONFIG_INPUT_KEYBOARD=y @@ -669,6 +661,18 @@ CONFIG_INPUT_PCSPKR=m # CONFIG_INPUT_UINPUT is not set # +# Hardware I/O ports +# +CONFIG_SERIO=y +CONFIG_SERIO_I8042=y +# CONFIG_SERIO_SERPORT is not set +# CONFIG_SERIO_PARKBD is not set +# CONFIG_SERIO_PCIPS2 is not set +CONFIG_SERIO_LIBPS2=y +# CONFIG_SERIO_RAW is not set +# CONFIG_GAMEPORT is not set + +# # Character devices # CONFIG_VT=y @@ -689,8 +693,8 @@ CONFIG_SERIAL_8250_NR_UARTS=4 # CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y -# CONFIG_SERIAL_PMACZILOG is not set CONFIG_SERIAL_ICOM=m +# CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 @@ -718,9 +722,16 @@ CONFIG_HVCS=m # # Ftape, the floppy tape device driver # +# CONFIG_AGP is not set # CONFIG_DRM is not set CONFIG_RAW_DRIVER=y CONFIG_MAX_RAW_DEVS=1024 +# CONFIG_HANGCHECK_TIMER is not set + +# +# TPM devices +# +# CONFIG_TCG_TPM is not set # # I2C support @@ -745,8 +756,8 @@ CONFIG_I2C_ALGOBIT=y # CONFIG_I2C_AMD8111 is not set # CONFIG_I2C_I801 is not set # CONFIG_I2C_I810 is not set +# CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_ISA is not set -# CONFIG_I2C_MPC is not set # CONFIG_I2C_NFORCE2 is not set # CONFIG_I2C_PARPORT is not set # CONFIG_I2C_PARPORT_LIGHT is not set @@ -773,7 +784,9 @@ CONFIG_I2C_ALGOBIT=y # CONFIG_SENSORS_ASB100 is not set # CONFIG_SENSORS_DS1621 is not set # CONFIG_SENSORS_FSCHER is not set +# CONFIG_SENSORS_FSCPOS is not set # CONFIG_SENSORS_GL518SM is not set +# CONFIG_SENSORS_GL520SM is not set # CONFIG_SENSORS_IT87 is not set # CONFIG_SENSORS_LM63 is not set # CONFIG_SENSORS_LM75 is not set @@ -784,9 +797,11 @@ CONFIG_I2C_ALGOBIT=y # CONFIG_SENSORS_LM85 is not set # CONFIG_SENSORS_LM87 is not set # CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_LM92 is not set # CONFIG_SENSORS_MAX1619 is not set # CONFIG_SENSORS_PC87360 is not set # CONFIG_SENSORS_SMSC47B397 is not set +# CONFIG_SENSORS_SIS5595 is not set # CONFIG_SENSORS_SMSC47M1 is not set # CONFIG_SENSORS_VIA686A is not set # CONFIG_SENSORS_W83781D is not set @@ -796,6 +811,7 @@ CONFIG_I2C_ALGOBIT=y # # Other I2C Chip support # +# CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_EEPROM is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCF8591 is not set @@ -828,8 +844,13 @@ CONFIG_I2C_ALGOBIT=y # Graphics support # CONFIG_FB=y +CONFIG_FB_CFB_FILLRECT=y +CONFIG_FB_CFB_COPYAREA=y +CONFIG_FB_CFB_IMAGEBLIT=y +CONFIG_FB_SOFT_CURSOR=y +CONFIG_FB_MACMODES=y CONFIG_FB_MODE_HELPERS=y -# CONFIG_FB_TILEBLITTING is not set +CONFIG_FB_TILEBLITTING=y # CONFIG_FB_CIRRUS is not set # CONFIG_FB_PM2 is not set # CONFIG_FB_CYBER2000 is not set @@ -838,6 +859,7 @@ CONFIG_FB_OF=y # CONFIG_FB_ASILIANT is not set # CONFIG_FB_IMSTT is not set # CONFIG_FB_VGA16 is not set +# CONFIG_FB_NVIDIA is not set # CONFIG_FB_RIVA is not set CONFIG_FB_MATROX=y CONFIG_FB_MATROX_MILLENIUM=y @@ -858,6 +880,7 @@ CONFIG_FB_RADEON_I2C=y # CONFIG_FB_3DFX is not set # CONFIG_FB_VOODOO1 is not set # CONFIG_FB_TRIDENT is not set +# CONFIG_FB_S1D13XXX is not set # CONFIG_FB_VIRTUAL is not set # @@ -891,6 +914,8 @@ CONFIG_LCD_DEVICE=y # # USB support # +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y CONFIG_USB=y # CONFIG_USB_DEBUG is not set @@ -901,8 +926,6 @@ CONFIG_USB_DEVICEFS=y # CONFIG_USB_BANDWIDTH is not set # CONFIG_USB_DYNAMIC_MINORS is not set # CONFIG_USB_OTG is not set -CONFIG_USB_ARCH_HAS_HCD=y -CONFIG_USB_ARCH_HAS_OHCI=y # # USB Host Controller Drivers @@ -911,6 +934,8 @@ CONFIG_USB_EHCI_HCD=y # CONFIG_USB_EHCI_SPLIT_ISO is not set # CONFIG_USB_EHCI_ROOT_HUB_TT is not set CONFIG_USB_OHCI_HCD=y +# CONFIG_USB_OHCI_BIG_ENDIAN is not set +CONFIG_USB_OHCI_LITTLE_ENDIAN=y # CONFIG_USB_UHCI_HCD is not set # CONFIG_USB_SL811_HCD is not set @@ -926,12 +951,11 @@ CONFIG_USB_OHCI_HCD=y # CONFIG_USB_STORAGE=y # CONFIG_USB_STORAGE_DEBUG is not set -# CONFIG_USB_STORAGE_RW_DETECT is not set # CONFIG_USB_STORAGE_DATAFAB is not set # CONFIG_USB_STORAGE_FREECOM is not set # CONFIG_USB_STORAGE_ISD200 is not set # CONFIG_USB_STORAGE_DPCM is not set -# CONFIG_USB_STORAGE_HP8200e is not set +# CONFIG_USB_STORAGE_USBAT is not set # CONFIG_USB_STORAGE_SDDR09 is not set # CONFIG_USB_STORAGE_SDDR55 is not set # CONFIG_USB_STORAGE_JUMPSHOT is not set @@ -975,6 +999,7 @@ CONFIG_USB_HIDDEV=y # CONFIG_USB_PEGASUS is not set # CONFIG_USB_RTL8150 is not set # CONFIG_USB_USBNET is not set +CONFIG_USB_MON=y # # USB port drivers @@ -1000,6 +1025,7 @@ CONFIG_USB_HIDDEV=y # CONFIG_USB_PHIDGETKIT is not set # CONFIG_USB_PHIDGETSERVO is not set # CONFIG_USB_IDMOUSE is not set +# CONFIG_USB_SISUSBVGA is not set # CONFIG_USB_TEST is not set # @@ -1208,10 +1234,13 @@ CONFIG_OPROFILE=y # # Kernel hacking # +# CONFIG_PRINTK_TIME is not set CONFIG_DEBUG_KERNEL=y CONFIG_MAGIC_SYSRQ=y +CONFIG_LOG_BUF_SHIFT=17 # CONFIG_SCHEDSTATS is not set # CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_SPINLOCK is not set # CONFIG_DEBUG_SPINLOCK_SLEEP is not set # CONFIG_DEBUG_KOBJECT is not set # CONFIG_DEBUG_INFO is not set @@ -1243,6 +1272,7 @@ CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_SHA256=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_DES=y CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_TWOFISH=m diff --git a/arch/ppc64/defconfig b/arch/ppc64/defconfig index 537b1cc..2f31bf3 100644 --- a/arch/ppc64/defconfig +++ b/arch/ppc64/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.11-rc3-bk6 -# Wed Feb 9 23:34:51 2005 +# Linux kernel version: 2.6.12-rc5-git9 +# Sun Jun 5 09:26:47 2005 # CONFIG_64BIT=y CONFIG_MMU=y @@ -11,7 +11,7 @@ CONFIG_GENERIC_ISA_DMA=y CONFIG_HAVE_DEC_LOCK=y CONFIG_EARLY_PRINTK=y CONFIG_COMPAT=y -CONFIG_FRAME_POINTER=y +CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y CONFIG_FORCE_MAX_ZONEORDER=13 # @@ -20,6 +20,7 @@ CONFIG_FORCE_MAX_ZONEORDER=13 CONFIG_EXPERIMENTAL=y CONFIG_CLEAN_COMPILE=y CONFIG_LOCK_KERNEL=y +CONFIG_INIT_ENV_ARG_LIMIT=32 # # General setup @@ -30,24 +31,28 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y # CONFIG_BSD_PROCESS_ACCT is not set CONFIG_SYSCTL=y -CONFIG_LOG_BUF_SHIFT=17 +# CONFIG_AUDIT is not set CONFIG_HOTPLUG=y CONFIG_KOBJECT_UEVENT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y +CONFIG_CPUSETS=y # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_SHMEM=y CONFIG_CC_ALIGN_FUNCTIONS=0 CONFIG_CC_ALIGN_LABELS=0 CONFIG_CC_ALIGN_LOOPS=0 CONFIG_CC_ALIGN_JUMPS=0 # CONFIG_TINY_SHMEM is not set +CONFIG_BASE_SMALL=0 # # Loadable module support @@ -91,9 +96,12 @@ CONFIG_DISCONTIGMEM=y CONFIG_EEH=y CONFIG_GENERIC_HARDIRQS=y CONFIG_PPC_RTAS=y +CONFIG_RTAS_PROC=y CONFIG_RTAS_FLASH=m CONFIG_SCANLOG=m CONFIG_LPARCFG=y +CONFIG_SECCOMP=y +CONFIG_ISA_DMA_API=y # # General setup @@ -104,6 +112,7 @@ CONFIG_BINFMT_ELF=y CONFIG_BINFMT_MISC=m # CONFIG_PCI_LEGACY_PROC is not set # CONFIG_PCI_NAMES is not set +# CONFIG_PCI_DEBUG is not set CONFIG_HOTPLUG_CPU=y # @@ -112,10 +121,6 @@ CONFIG_HOTPLUG_CPU=y # CONFIG_PCCARD is not set # -# PC-card bridges -# - -# # PCI Hotplug Support # CONFIG_HOTPLUG_PCI=m @@ -149,11 +154,10 @@ CONFIG_FW_LOADER=y # CONFIG_PARPORT=m CONFIG_PARPORT_PC=m -CONFIG_PARPORT_PC_CML1=m # CONFIG_PARPORT_SERIAL is not set # CONFIG_PARPORT_PC_FIFO is not set # CONFIG_PARPORT_PC_SUPERIO is not set -# CONFIG_PARPORT_OTHER is not set +# CONFIG_PARPORT_GSC is not set # CONFIG_PARPORT_1284 is not set # @@ -301,6 +305,7 @@ CONFIG_SCSI_SATA_SVW=y # CONFIG_SCSI_ATA_PIIX is not set # CONFIG_SCSI_SATA_NV is not set # CONFIG_SCSI_SATA_PROMISE is not set +# CONFIG_SCSI_SATA_QSTOR is not set # CONFIG_SCSI_SATA_SX4 is not set # CONFIG_SCSI_SATA_SIL is not set # CONFIG_SCSI_SATA_SIS is not set @@ -310,7 +315,6 @@ CONFIG_SCSI_SATA_SVW=y # CONFIG_SCSI_BUSLOGIC is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_EATA is not set -# CONFIG_SCSI_EATA_PIO is not set # CONFIG_SCSI_FUTURE_DOMAIN is not set # CONFIG_SCSI_GDTH is not set # CONFIG_SCSI_IPS is not set @@ -327,7 +331,6 @@ CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 CONFIG_SCSI_IPR=y CONFIG_SCSI_IPR_TRACE=y CONFIG_SCSI_IPR_DUMP=y -# CONFIG_SCSI_QLOGIC_ISP is not set # CONFIG_SCSI_QLOGIC_FC is not set # CONFIG_SCSI_QLOGIC_1280 is not set CONFIG_SCSI_QLA2XXX=y @@ -336,6 +339,7 @@ CONFIG_SCSI_QLA22XX=m CONFIG_SCSI_QLA2300=m CONFIG_SCSI_QLA2322=m CONFIG_SCSI_QLA6312=m +CONFIG_SCSI_LPFC=m # CONFIG_SCSI_DC395x is not set # CONFIG_SCSI_DC390T is not set CONFIG_SCSI_DEBUG=m @@ -358,6 +362,8 @@ CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m CONFIG_DM_MIRROR=m CONFIG_DM_ZERO=m +CONFIG_DM_MULTIPATH=m +CONFIG_DM_MULTIPATH_EMC=m # # Fusion MPT device support @@ -405,6 +411,7 @@ CONFIG_IEEE1394_AMDTP=m # CONFIG_ADB=y CONFIG_ADB_PMU=y +CONFIG_PMAC_SMU=y # CONFIG_PMAC_PBOOK is not set # CONFIG_PMAC_BACKLIGHT is not set # CONFIG_INPUT_ADBHID is not set @@ -420,7 +427,6 @@ CONFIG_NET=y # CONFIG_PACKET=y # CONFIG_PACKET_MMAP is not set -# CONFIG_NETLINK_DEV is not set CONFIG_UNIX=y CONFIG_NET_KEY=m CONFIG_INET=y @@ -588,7 +594,6 @@ CONFIG_PCNET32=y # CONFIG_DGRS is not set # CONFIG_EEPRO100 is not set CONFIG_E100=y -# CONFIG_E100_NAPI is not set # CONFIG_FEALNX is not set # CONFIG_NATSEMI is not set # CONFIG_NE2K_PCI is not set @@ -614,6 +619,8 @@ CONFIG_E1000=y # CONFIG_SK98LIN is not set # CONFIG_VIA_VELOCITY is not set CONFIG_TIGON3=y +# CONFIG_BNX2 is not set +# CONFIG_MV643XX_ETH is not set # # Ethernet (10000 Mbit) @@ -683,20 +690,6 @@ CONFIG_INPUT_EVDEV=m # CONFIG_INPUT_EVBUG is not set # -# Input I/O drivers -# -# CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y -CONFIG_SERIO=y -CONFIG_SERIO_I8042=y -# CONFIG_SERIO_SERPORT is not set -# CONFIG_SERIO_CT82C710 is not set -# CONFIG_SERIO_PARKBD is not set -# CONFIG_SERIO_PCIPS2 is not set -CONFIG_SERIO_LIBPS2=y -# CONFIG_SERIO_RAW is not set - -# # Input Device Drivers # CONFIG_INPUT_KEYBOARD=y @@ -716,6 +709,18 @@ CONFIG_INPUT_PCSPKR=m # CONFIG_INPUT_UINPUT is not set # +# Hardware I/O ports +# +CONFIG_SERIO=y +CONFIG_SERIO_I8042=y +# CONFIG_SERIO_SERPORT is not set +# CONFIG_SERIO_PARKBD is not set +# CONFIG_SERIO_PCIPS2 is not set +CONFIG_SERIO_LIBPS2=y +# CONFIG_SERIO_RAW is not set +# CONFIG_GAMEPORT is not set + +# # Character devices # CONFIG_VT=y @@ -738,6 +743,7 @@ CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_PMACZILOG is not set CONFIG_SERIAL_ICOM=m +CONFIG_SERIAL_JSM=m CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 @@ -766,9 +772,16 @@ CONFIG_HVCS=m # # Ftape, the floppy tape device driver # +# CONFIG_AGP is not set # CONFIG_DRM is not set CONFIG_RAW_DRIVER=y CONFIG_MAX_RAW_DEVS=256 +# CONFIG_HANGCHECK_TIMER is not set + +# +# TPM devices +# +# CONFIG_TCG_TPM is not set # # I2C support @@ -793,9 +806,9 @@ CONFIG_I2C_ALGOBIT=y CONFIG_I2C_AMD8111=y # CONFIG_I2C_I801 is not set # CONFIG_I2C_I810 is not set +# CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_ISA is not set CONFIG_I2C_KEYWEST=y -# CONFIG_I2C_MPC is not set # CONFIG_I2C_NFORCE2 is not set # CONFIG_I2C_PARPORT is not set # CONFIG_I2C_PARPORT_LIGHT is not set @@ -822,7 +835,9 @@ CONFIG_I2C_KEYWEST=y # CONFIG_SENSORS_ASB100 is not set # CONFIG_SENSORS_DS1621 is not set # CONFIG_SENSORS_FSCHER is not set +# CONFIG_SENSORS_FSCPOS is not set # CONFIG_SENSORS_GL518SM is not set +# CONFIG_SENSORS_GL520SM is not set # CONFIG_SENSORS_IT87 is not set # CONFIG_SENSORS_LM63 is not set # CONFIG_SENSORS_LM75 is not set @@ -833,9 +848,11 @@ CONFIG_I2C_KEYWEST=y # CONFIG_SENSORS_LM85 is not set # CONFIG_SENSORS_LM87 is not set # CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_LM92 is not set # CONFIG_SENSORS_MAX1619 is not set # CONFIG_SENSORS_PC87360 is not set # CONFIG_SENSORS_SMSC47B397 is not set +# CONFIG_SENSORS_SIS5595 is not set # CONFIG_SENSORS_SMSC47M1 is not set # CONFIG_SENSORS_VIA686A is not set # CONFIG_SENSORS_W83781D is not set @@ -845,6 +862,7 @@ CONFIG_I2C_KEYWEST=y # # Other I2C Chip support # +# CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_EEPROM is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCF8591 is not set @@ -877,6 +895,11 @@ CONFIG_I2C_KEYWEST=y # Graphics support # CONFIG_FB=y +CONFIG_FB_CFB_FILLRECT=y +CONFIG_FB_CFB_COPYAREA=y +CONFIG_FB_CFB_IMAGEBLIT=y +CONFIG_FB_SOFT_CURSOR=y +CONFIG_FB_MACMODES=y CONFIG_FB_MODE_HELPERS=y CONFIG_FB_TILEBLITTING=y # CONFIG_FB_CIRRUS is not set @@ -890,9 +913,8 @@ CONFIG_FB_OF=y # CONFIG_FB_ASILIANT is not set # CONFIG_FB_IMSTT is not set # CONFIG_FB_VGA16 is not set -CONFIG_FB_RIVA=y -CONFIG_FB_RIVA_I2C=y -# CONFIG_FB_RIVA_DEBUG is not set +# CONFIG_FB_NVIDIA is not set +# CONFIG_FB_RIVA is not set CONFIG_FB_MATROX=y CONFIG_FB_MATROX_MILLENIUM=y CONFIG_FB_MATROX_MYSTIQUE=y @@ -913,6 +935,7 @@ CONFIG_FB_RADEON_I2C=y # CONFIG_FB_3DFX is not set # CONFIG_FB_VOODOO1 is not set # CONFIG_FB_TRIDENT is not set +# CONFIG_FB_S1D13XXX is not set # CONFIG_FB_VIRTUAL is not set # @@ -946,6 +969,8 @@ CONFIG_LCD_DEVICE=y # # USB support # +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y CONFIG_USB=y # CONFIG_USB_DEBUG is not set @@ -956,8 +981,6 @@ CONFIG_USB_DEVICEFS=y # CONFIG_USB_BANDWIDTH is not set # CONFIG_USB_DYNAMIC_MINORS is not set # CONFIG_USB_OTG is not set -CONFIG_USB_ARCH_HAS_HCD=y -CONFIG_USB_ARCH_HAS_OHCI=y # # USB Host Controller Drivers @@ -966,6 +989,8 @@ CONFIG_USB_EHCI_HCD=y # CONFIG_USB_EHCI_SPLIT_ISO is not set # CONFIG_USB_EHCI_ROOT_HUB_TT is not set CONFIG_USB_OHCI_HCD=y +# CONFIG_USB_OHCI_BIG_ENDIAN is not set +CONFIG_USB_OHCI_LITTLE_ENDIAN=y # CONFIG_USB_UHCI_HCD is not set # CONFIG_USB_SL811_HCD is not set @@ -981,12 +1006,11 @@ CONFIG_USB_OHCI_HCD=y # CONFIG_USB_STORAGE=m # CONFIG_USB_STORAGE_DEBUG is not set -CONFIG_USB_STORAGE_RW_DETECT=y # CONFIG_USB_STORAGE_DATAFAB is not set # CONFIG_USB_STORAGE_FREECOM is not set # CONFIG_USB_STORAGE_ISD200 is not set # CONFIG_USB_STORAGE_DPCM is not set -# CONFIG_USB_STORAGE_HP8200e is not set +# CONFIG_USB_STORAGE_USBAT is not set # CONFIG_USB_STORAGE_SDDR09 is not set # CONFIG_USB_STORAGE_SDDR55 is not set # CONFIG_USB_STORAGE_JUMPSHOT is not set @@ -1030,6 +1054,7 @@ CONFIG_USB_HIDDEV=y CONFIG_USB_PEGASUS=y # CONFIG_USB_RTL8150 is not set # CONFIG_USB_USBNET is not set +# CONFIG_USB_MON is not set # # USB port drivers @@ -1055,6 +1080,7 @@ CONFIG_USB_PEGASUS=y # CONFIG_USB_PHIDGETKIT is not set # CONFIG_USB_PHIDGETSERVO is not set # CONFIG_USB_IDMOUSE is not set +# CONFIG_USB_SISUSBVGA is not set # CONFIG_USB_TEST is not set # @@ -1276,10 +1302,13 @@ CONFIG_OPROFILE=y # # Kernel hacking # +# CONFIG_PRINTK_TIME is not set CONFIG_DEBUG_KERNEL=y CONFIG_MAGIC_SYSRQ=y +CONFIG_LOG_BUF_SHIFT=17 # CONFIG_SCHEDSTATS is not set # CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_SPINLOCK is not set # CONFIG_DEBUG_SPINLOCK_SLEEP is not set # CONFIG_DEBUG_KOBJECT is not set # CONFIG_DEBUG_INFO is not set @@ -1311,6 +1340,7 @@ CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_SHA256=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_DES=y CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_TWOFISH=m diff --git a/arch/ppc64/kernel/entry.S b/arch/ppc64/kernel/entry.S index d360405..b61572e 100644 --- a/arch/ppc64/kernel/entry.S +++ b/arch/ppc64/kernel/entry.S @@ -436,15 +436,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) REST_8GPRS(14, r1) REST_10GPRS(22, r1) -#ifdef CONFIG_PPC_ISERIES - clrrdi r7,r1,THREAD_SHIFT /* get current_thread_info() */ - ld r7,TI_FLAGS(r7) /* Get run light flag */ - mfspr r9,CTRLF - srdi r7,r7,TIF_RUN_LIGHT - insrdi r9,r7,1,63 /* Insert run light into CTRL */ - mtspr CTRLT,r9 -#endif - /* convert old thread to its task_struct for return value */ addi r3,r3,-THREAD ld r7,_NIP(r1) /* Return to _switch caller in new task */ diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S index 92a744c..346dbf6 100644 --- a/arch/ppc64/kernel/head.S +++ b/arch/ppc64/kernel/head.S @@ -626,10 +626,10 @@ system_reset_iSeries: lhz r24,PACAPACAINDEX(r13) /* Get processor # */ cmpwi 0,r24,0 /* Are we processor 0? */ beq .__start_initialization_iSeries /* Start up the first processor */ - mfspr r4,CTRLF - li r5,RUNLATCH /* Turn off the run light */ + mfspr r4,SPRN_CTRLF + li r5,CTRL_RUNLATCH /* Turn off the run light */ andc r4,r4,r5 - mtspr CTRLT,r4 + mtspr SPRN_CTRLT,r4 1: HMT_LOW @@ -2082,9 +2082,9 @@ _GLOBAL(hmt_start_secondary) mfspr r4, HID0 ori r4, r4, 0x1 mtspr HID0, r4 - mfspr r4, CTRLF + mfspr r4, SPRN_CTRLF oris r4, r4, 0x40 - mtspr CTRLT, r4 + mtspr SPRN_CTRLT, r4 blr #endif diff --git a/arch/ppc64/kernel/iSeries_setup.c b/arch/ppc64/kernel/iSeries_setup.c index da20120..6d06eb5 100644 --- a/arch/ppc64/kernel/iSeries_setup.c +++ b/arch/ppc64/kernel/iSeries_setup.c @@ -852,6 +852,28 @@ static int __init iSeries_src_init(void) late_initcall(iSeries_src_init); +static int set_spread_lpevents(char *str) +{ + unsigned long i; + unsigned long val = simple_strtoul(str, NULL, 0); + + /* + * The parameter is the number of processors to share in processing + * lp events. + */ + if (( val > 0) && (val <= NR_CPUS)) { + for (i = 1; i < val; ++i) + paca[i].lpqueue_ptr = paca[0].lpqueue_ptr; + + printk("lpevent processing spread over %ld processors\n", val); + } else { + printk("invalid spread_lpevents %ld\n", val); + } + + return 1; +} +__setup("spread_lpevents=", set_spread_lpevents); + void __init iSeries_early_setup(void) { iSeries_fixup_klimit(); diff --git a/arch/ppc64/kernel/idle.c b/arch/ppc64/kernel/idle.c index 6abc621..f24ce2b 100644 --- a/arch/ppc64/kernel/idle.c +++ b/arch/ppc64/kernel/idle.c @@ -75,13 +75,9 @@ static int iSeries_idle(void) { struct paca_struct *lpaca; long oldval; - unsigned long CTRL; /* ensure iSeries run light will be out when idle */ - clear_thread_flag(TIF_RUN_LIGHT); - CTRL = mfspr(CTRLF); - CTRL &= ~RUNLATCH; - mtspr(CTRLT, CTRL); + ppc64_runlatch_off(); lpaca = get_paca(); @@ -111,7 +107,9 @@ static int iSeries_idle(void) } } + ppc64_runlatch_on(); schedule(); + ppc64_runlatch_off(); } return 0; diff --git a/arch/ppc64/kernel/kprobes.c b/arch/ppc64/kernel/kprobes.c index 103daaf..e950a20 100644 --- a/arch/ppc64/kernel/kprobes.c +++ b/arch/ppc64/kernel/kprobes.c @@ -45,12 +45,17 @@ static struct pt_regs jprobe_saved_regs; int arch_prepare_kprobe(struct kprobe *p) { + int ret = 0; kprobe_opcode_t insn = *p->addr; - if (IS_MTMSRD(insn) || IS_RFID(insn)) - /* cannot put bp on RFID/MTMSRD */ - return 1; - return 0; + if ((unsigned long)p->addr & 0x03) { + printk("Attempt to register kprobe at an unaligned address\n"); + ret = -EINVAL; + } else if (IS_MTMSRD(insn) || IS_RFID(insn)) { + printk("Cannot register a kprobe on rfid or mtmsrd\n"); + ret = -EINVAL; + } + return ret; } void arch_copy_kprobe(struct kprobe *p) @@ -172,8 +177,6 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs) ret = emulate_step(regs, p->ainsn.insn[0]); if (ret == 0) regs->nip = (unsigned long)p->addr + 4; - - regs->msr &= ~MSR_SE; } static inline int post_kprobe_handler(struct pt_regs *regs) @@ -210,6 +213,7 @@ static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) if (kprobe_status & KPROBE_HIT_SS) { resume_execution(current_kprobe, regs); + regs->msr &= ~MSR_SE; regs->msr |= kprobe_saved_msr; unlock_kprobes(); @@ -233,8 +237,6 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, */ preempt_disable(); switch (val) { - case DIE_IABR_MATCH: - case DIE_DABR_MATCH: case DIE_BPT: if (kprobe_handler(args->regs)) ret = NOTIFY_STOP; diff --git a/arch/ppc64/kernel/mf.c b/arch/ppc64/kernel/mf.c index 1bd52ec..5aca7e8 100644 --- a/arch/ppc64/kernel/mf.c +++ b/arch/ppc64/kernel/mf.c @@ -1,7 +1,7 @@ /* * mf.c * Copyright (C) 2001 Troy D. Armstrong IBM Corporation - * Copyright (C) 2004 Stephen Rothwell IBM Corporation + * Copyright (C) 2004-2005 Stephen Rothwell IBM Corporation * * This modules exists as an interface between a Linux secondary partition * running on an iSeries and the primary partition's Virtual Service @@ -36,10 +36,12 @@ #include <asm/time.h> #include <asm/uaccess.h> +#include <asm/paca.h> #include <asm/iSeries/vio.h> #include <asm/iSeries/mf.h> #include <asm/iSeries/HvLpConfig.h> #include <asm/iSeries/ItSpCommArea.h> +#include <asm/iSeries/ItLpQueue.h> /* * This is the structure layout for the Machine Facilites LPAR event @@ -696,36 +698,23 @@ static void get_rtc_time_complete(void *token, struct ce_msg_data *ce_msg) complete(&rtc->com); } -int mf_get_rtc(struct rtc_time *tm) +static int rtc_set_tm(int rc, u8 *ce_msg, struct rtc_time *tm) { - struct ce_msg_comp_data ce_complete; - struct rtc_time_data rtc_data; - int rc; - - memset(&ce_complete, 0, sizeof(ce_complete)); - memset(&rtc_data, 0, sizeof(rtc_data)); - init_completion(&rtc_data.com); - ce_complete.handler = &get_rtc_time_complete; - ce_complete.token = &rtc_data; - rc = signal_ce_msg_simple(0x40, &ce_complete); - if (rc) - return rc; - wait_for_completion(&rtc_data.com); tm->tm_wday = 0; tm->tm_yday = 0; tm->tm_isdst = 0; - if (rtc_data.rc) { + if (rc) { tm->tm_sec = 0; tm->tm_min = 0; tm->tm_hour = 0; tm->tm_mday = 15; tm->tm_mon = 5; tm->tm_year = 52; - return rtc_data.rc; + return rc; } - if ((rtc_data.ce_msg.ce_msg[2] == 0xa9) || - (rtc_data.ce_msg.ce_msg[2] == 0xaf)) { + if ((ce_msg[2] == 0xa9) || + (ce_msg[2] == 0xaf)) { /* TOD clock is not set */ tm->tm_sec = 1; tm->tm_min = 1; @@ -736,7 +725,6 @@ int mf_get_rtc(struct rtc_time *tm) mf_set_rtc(tm); } { - u8 *ce_msg = rtc_data.ce_msg.ce_msg; u8 year = ce_msg[5]; u8 sec = ce_msg[6]; u8 min = ce_msg[7]; @@ -765,6 +753,63 @@ int mf_get_rtc(struct rtc_time *tm) return 0; } +int mf_get_rtc(struct rtc_time *tm) +{ + struct ce_msg_comp_data ce_complete; + struct rtc_time_data rtc_data; + int rc; + + memset(&ce_complete, 0, sizeof(ce_complete)); + memset(&rtc_data, 0, sizeof(rtc_data)); + init_completion(&rtc_data.com); + ce_complete.handler = &get_rtc_time_complete; + ce_complete.token = &rtc_data; + rc = signal_ce_msg_simple(0x40, &ce_complete); + if (rc) + return rc; + wait_for_completion(&rtc_data.com); + return rtc_set_tm(rtc_data.rc, rtc_data.ce_msg.ce_msg, tm); +} + +struct boot_rtc_time_data { + int busy; + struct ce_msg_data ce_msg; + int rc; +}; + +static void get_boot_rtc_time_complete(void *token, struct ce_msg_data *ce_msg) +{ + struct boot_rtc_time_data *rtc = token; + + memcpy(&rtc->ce_msg, ce_msg, sizeof(rtc->ce_msg)); + rtc->rc = 0; + rtc->busy = 0; +} + +int mf_get_boot_rtc(struct rtc_time *tm) +{ + struct ce_msg_comp_data ce_complete; + struct boot_rtc_time_data rtc_data; + int rc; + + memset(&ce_complete, 0, sizeof(ce_complete)); + memset(&rtc_data, 0, sizeof(rtc_data)); + rtc_data.busy = 1; + ce_complete.handler = &get_boot_rtc_time_complete; + ce_complete.token = &rtc_data; + rc = signal_ce_msg_simple(0x40, &ce_complete); + if (rc) + return rc; + /* We need to poll here as we are not yet taking interrupts */ + while (rtc_data.busy) { + extern unsigned long lpevent_count; + struct ItLpQueue *lpq = get_paca()->lpqueue_ptr; + if (lpq && ItLpQueue_isLpIntPending(lpq)) + lpevent_count += ItLpQueue_process(lpq, NULL); + } + return rtc_set_tm(rtc_data.rc, rtc_data.ce_msg.ce_msg, tm); +} + int mf_set_rtc(struct rtc_time *tm) { char ce_time[12]; diff --git a/arch/ppc64/kernel/misc.S b/arch/ppc64/kernel/misc.S index 90b41f4..e3c73b3 100644 --- a/arch/ppc64/kernel/misc.S +++ b/arch/ppc64/kernel/misc.S @@ -32,7 +32,7 @@ .text /* - * Returns (address we're running at) - (address we were linked at) + * Returns (address we were linked at) - (address we are running at) * for use before the text and data are mapped to KERNELBASE. */ @@ -792,7 +792,7 @@ _GLOBAL(sys_call_table32) .llong .compat_sys_newstat .llong .compat_sys_newlstat .llong .compat_sys_newfstat - .llong .sys_uname + .llong .sys32_uname .llong .sys_ni_syscall /* 110 old iopl syscall */ .llong .sys_vhangup .llong .sys_ni_syscall /* old idle syscall */ diff --git a/arch/ppc64/kernel/pSeries_reconfig.c b/arch/ppc64/kernel/pSeries_reconfig.c index cb5443f..dc2a69d 100644 --- a/arch/ppc64/kernel/pSeries_reconfig.c +++ b/arch/ppc64/kernel/pSeries_reconfig.c @@ -47,14 +47,6 @@ static void remove_node_proc_entries(struct device_node *np) remove_proc_entry(pp->name, np->pde); pp = pp->next; } - - /* Assuming that symlinks have the same parent directory as - * np->pde. - */ - if (np->name_link) - remove_proc_entry(np->name_link->name, parent->pde); - if (np->addr_link) - remove_proc_entry(np->addr_link->name, parent->pde); if (np->pde) remove_proc_entry(np->pde->name, parent->pde); } diff --git a/arch/ppc64/kernel/pSeries_smp.c b/arch/ppc64/kernel/pSeries_smp.c index c60d8cb..fbad349 100644 --- a/arch/ppc64/kernel/pSeries_smp.c +++ b/arch/ppc64/kernel/pSeries_smp.c @@ -326,13 +326,6 @@ static void __devinit smp_xics_setup_cpu(int cpu) cpu_clear(cpu, of_spin_map); - /* - * Put the calling processor into the GIQ. This is really only - * necessary from a secondary thread as the OF start-cpu interface - * performs this function for us on primary threads. - */ - rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE, - (1UL << interrupt_server_size) - 1 - default_distrib_server, 1); } static DEFINE_SPINLOCK(timebase_lock); diff --git a/arch/ppc64/kernel/pci.c b/arch/ppc64/kernel/pci.c index be3cc38..d786d4b 100644 --- a/arch/ppc64/kernel/pci.c +++ b/arch/ppc64/kernel/pci.c @@ -438,7 +438,7 @@ pgprot_t pci_phys_mem_access_prot(struct file *file, int i; if (page_is_ram(offset >> PAGE_SHIFT)) - return prot; + return __pgprot(prot); prot |= _PAGE_NO_CACHE | _PAGE_GUARDED; diff --git a/arch/ppc64/kernel/pmac_smp.c b/arch/ppc64/kernel/pmac_smp.c index c27588e..a23de37 100644 --- a/arch/ppc64/kernel/pmac_smp.c +++ b/arch/ppc64/kernel/pmac_smp.c @@ -68,6 +68,7 @@ extern struct smp_ops_t *smp_ops; static void (*pmac_tb_freeze)(int freeze); static struct device_node *pmac_tb_clock_chip_host; +static u8 pmac_tb_pulsar_addr; static DEFINE_SPINLOCK(timebase_lock); static unsigned long timebase; @@ -106,12 +107,9 @@ static void smp_core99_pulsar_tb_freeze(int freeze) u8 data; int rc; - /* Strangely, the device-tree says address is 0xd2, but darwin - * accesses 0xd0 ... - */ pmac_low_i2c_setmode(pmac_tb_clock_chip_host, pmac_low_i2c_mode_combined); rc = pmac_low_i2c_xfer(pmac_tb_clock_chip_host, - 0xd4 | pmac_low_i2c_read, + pmac_tb_pulsar_addr | pmac_low_i2c_read, 0x2e, &data, 1); if (rc != 0) goto bail; @@ -120,7 +118,7 @@ static void smp_core99_pulsar_tb_freeze(int freeze) pmac_low_i2c_setmode(pmac_tb_clock_chip_host, pmac_low_i2c_mode_stdsub); rc = pmac_low_i2c_xfer(pmac_tb_clock_chip_host, - 0xd4 | pmac_low_i2c_write, + pmac_tb_pulsar_addr | pmac_low_i2c_write, 0x2e, &data, 1); bail: if (rc != 0) { @@ -185,6 +183,12 @@ static int __init smp_core99_probe(void) if (ncpus <= 1) return 1; + /* HW sync only on these platforms */ + if (!machine_is_compatible("PowerMac7,2") && + !machine_is_compatible("PowerMac7,3") && + !machine_is_compatible("RackMac3,1")) + goto nohwsync; + /* Look for the clock chip */ for (cc = NULL; (cc = of_find_node_by_name(cc, "i2c-hwclock")) != NULL;) { struct device_node *p = of_get_parent(cc); @@ -198,11 +202,18 @@ static int __init smp_core99_probe(void) goto next; switch (*reg) { case 0xd2: - pmac_tb_freeze = smp_core99_cypress_tb_freeze; - printk(KERN_INFO "Timebase clock is Cypress chip\n"); + if (device_is_compatible(cc, "pulsar-legacy-slewing")) { + pmac_tb_freeze = smp_core99_pulsar_tb_freeze; + pmac_tb_pulsar_addr = 0xd2; + printk(KERN_INFO "Timebase clock is Pulsar chip\n"); + } else if (device_is_compatible(cc, "cy28508")) { + pmac_tb_freeze = smp_core99_cypress_tb_freeze; + printk(KERN_INFO "Timebase clock is Cypress chip\n"); + } break; case 0xd4: pmac_tb_freeze = smp_core99_pulsar_tb_freeze; + pmac_tb_pulsar_addr = 0xd4; printk(KERN_INFO "Timebase clock is Pulsar chip\n"); break; } @@ -210,12 +221,15 @@ static int __init smp_core99_probe(void) pmac_tb_clock_chip_host = p; smp_ops->give_timebase = smp_core99_give_timebase; smp_ops->take_timebase = smp_core99_take_timebase; + of_node_put(cc); + of_node_put(p); break; } next: of_node_put(p); } + nohwsync: mpic_request_ipis(); return ncpus; diff --git a/arch/ppc64/kernel/process.c b/arch/ppc64/kernel/process.c index 8b06861..cdfecbe 100644 --- a/arch/ppc64/kernel/process.c +++ b/arch/ppc64/kernel/process.c @@ -378,9 +378,6 @@ copy_thread(int nr, unsigned long clone_flags, unsigned long usp, childregs->gpr[1] = sp + sizeof(struct pt_regs); p->thread.regs = NULL; /* no user register state */ clear_ti_thread_flag(p->thread_info, TIF_32BIT); -#ifdef CONFIG_PPC_ISERIES - set_ti_thread_flag(p->thread_info, TIF_RUN_LIGHT); -#endif } else { childregs->gpr[1] = usp; p->thread.regs = childregs; diff --git a/arch/ppc64/kernel/prom.c b/arch/ppc64/kernel/prom.c index fe2946c..eb6538b 100644 --- a/arch/ppc64/kernel/prom.c +++ b/arch/ppc64/kernel/prom.c @@ -834,7 +834,7 @@ void __init unflatten_device_tree(void) { unsigned long start, mem, size; struct device_node **allnextp = &allnodes; - char *p; + char *p = NULL; int l = 0; DBG(" -> unflatten_device_tree()\n"); diff --git a/arch/ppc64/kernel/prom_init.c b/arch/ppc64/kernel/prom_init.c index 35ec42d..b7683ab 100644 --- a/arch/ppc64/kernel/prom_init.c +++ b/arch/ppc64/kernel/prom_init.c @@ -211,13 +211,23 @@ struct { */ #define ADDR(x) (u32) ((unsigned long)(x) - offset) +/* + * Error results ... some OF calls will return "-1" on error, some + * will return 0, some will return either. To simplify, here are + * macros to use with any ihandle or phandle return value to check if + * it is valid + */ + +#define PROM_ERROR (-1u) +#define PHANDLE_VALID(p) ((p) != 0 && (p) != PROM_ERROR) +#define IHANDLE_VALID(i) ((i) != 0 && (i) != PROM_ERROR) + + /* This is the one and *ONLY* place where we actually call open * firmware from, since we need to make sure we're running in 32b * mode when we do. We switch back to 64b mode upon return. */ -#define PROM_ERROR (-1) - static int __init call_prom(const char *service, int nargs, int nret, ...) { int i; @@ -587,14 +597,13 @@ static void __init prom_send_capabilities(void) { unsigned long offset = reloc_offset(); ihandle elfloader; - int ret; elfloader = call_prom("open", 1, 1, ADDR("/packages/elf-loader")); if (elfloader == 0) { prom_printf("couldn't open /packages/elf-loader\n"); return; } - ret = call_prom("call-method", 3, 1, ADDR("process-elf-header"), + call_prom("call-method", 3, 1, ADDR("process-elf-header"), elfloader, ADDR(&fake_elf)); call_prom("close", 1, 0, elfloader); } @@ -646,7 +655,7 @@ static unsigned long __init alloc_up(unsigned long size, unsigned long align) base = _ALIGN_UP(base + 0x100000, align)) { prom_debug(" trying: 0x%x\n\r", base); addr = (unsigned long)prom_claim(base, size, 0); - if ((int)addr != PROM_ERROR) + if (addr != PROM_ERROR) break; addr = 0; if (align == 0) @@ -708,7 +717,7 @@ static unsigned long __init alloc_down(unsigned long size, unsigned long align, for(; base > RELOC(alloc_bottom); base = _ALIGN_DOWN(base - 0x100000, align)) { prom_debug(" trying: 0x%x\n\r", base); addr = (unsigned long)prom_claim(base, size, 0); - if ((int)addr != PROM_ERROR) + if (addr != PROM_ERROR) break; addr = 0; } @@ -902,18 +911,19 @@ static void __init prom_instantiate_rtas(void) { unsigned long offset = reloc_offset(); struct prom_t *_prom = PTRRELOC(&prom); - phandle prom_rtas, rtas_node; + phandle rtas_node; + ihandle rtas_inst; u32 base, entry = 0; u32 size = 0; prom_debug("prom_instantiate_rtas: start...\n"); - prom_rtas = call_prom("finddevice", 1, 1, ADDR("/rtas")); - prom_debug("prom_rtas: %x\n", prom_rtas); - if (prom_rtas == (phandle) -1) + rtas_node = call_prom("finddevice", 1, 1, ADDR("/rtas")); + prom_debug("rtas_node: %x\n", rtas_node); + if (!PHANDLE_VALID(rtas_node)) return; - prom_getprop(prom_rtas, "rtas-size", &size, sizeof(size)); + prom_getprop(rtas_node, "rtas-size", &size, sizeof(size)); if (size == 0) return; @@ -922,14 +932,18 @@ static void __init prom_instantiate_rtas(void) prom_printf("RTAS allocation failed !\n"); return; } - prom_printf("instantiating rtas at 0x%x", base); - rtas_node = call_prom("open", 1, 1, ADDR("/rtas")); - prom_printf("..."); + rtas_inst = call_prom("open", 1, 1, ADDR("/rtas")); + if (!IHANDLE_VALID(rtas_inst)) { + prom_printf("opening rtas package failed"); + return; + } + + prom_printf("instantiating rtas at 0x%x ...", base); if (call_prom("call-method", 3, 2, ADDR("instantiate-rtas"), - rtas_node, base) != PROM_ERROR) { + rtas_inst, base) != PROM_ERROR) { entry = (long)_prom->args.rets[1]; } if (entry == 0) { @@ -940,8 +954,8 @@ static void __init prom_instantiate_rtas(void) reserve_mem(base, size); - prom_setprop(prom_rtas, "linux,rtas-base", &base, sizeof(base)); - prom_setprop(prom_rtas, "linux,rtas-entry", &entry, sizeof(entry)); + prom_setprop(rtas_node, "linux,rtas-base", &base, sizeof(base)); + prom_setprop(rtas_node, "linux,rtas-entry", &entry, sizeof(entry)); prom_debug("rtas base = 0x%x\n", base); prom_debug("rtas entry = 0x%x\n", entry); @@ -1062,7 +1076,7 @@ static void __init prom_initialize_tce_table(void) prom_printf("opening PHB %s", path); phb_node = call_prom("open", 1, 1, path); - if ( (long)phb_node <= 0) + if (phb_node == 0) prom_printf("... failed\n"); else prom_printf("... done\n"); @@ -1279,12 +1293,12 @@ static void __init prom_init_client_services(unsigned long pp) /* get a handle for the stdout device */ _prom->chosen = call_prom("finddevice", 1, 1, ADDR("/chosen")); - if ((long)_prom->chosen <= 0) + if (!PHANDLE_VALID(_prom->chosen)) prom_panic("cannot find chosen"); /* msg won't be printed :( */ /* get device tree root */ _prom->root = call_prom("finddevice", 1, 1, ADDR("/")); - if ((long)_prom->root <= 0) + if (!PHANDLE_VALID(_prom->root)) prom_panic("cannot find device tree root"); /* msg won't be printed :( */ } @@ -1356,9 +1370,8 @@ static int __init prom_find_machine_type(void) } /* Default to pSeries. We need to know if we are running LPAR */ rtas = call_prom("finddevice", 1, 1, ADDR("/rtas")); - if (rtas != (phandle) -1) { - unsigned long x; - x = prom_getproplen(rtas, "ibm,hypertas-functions"); + if (PHANDLE_VALID(rtas)) { + int x = prom_getproplen(rtas, "ibm,hypertas-functions"); if (x != PROM_ERROR) { prom_printf("Hypertas detected, assuming LPAR !\n"); return PLATFORM_PSERIES_LPAR; @@ -1426,12 +1439,13 @@ static void __init prom_check_displays(void) * leave some room at the end of the path for appending extra * arguments */ - if (call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-10) < 0) + if (call_prom("package-to-path", 3, 1, node, path, + PROM_SCRATCH_SIZE-10) == PROM_ERROR) continue; prom_printf("found display : %s, opening ... ", path); ih = call_prom("open", 1, 1, path); - if (ih == (ihandle)0 || ih == (ihandle)-1) { + if (ih == 0) { prom_printf("failed\n"); continue; } @@ -1514,6 +1528,12 @@ static unsigned long __init dt_find_string(char *str) return 0; } +/* + * The Open Firmware 1275 specification states properties must be 31 bytes or + * less, however not all firmwares obey this. Make it 64 bytes to be safe. + */ +#define MAX_PROPERTY_NAME 64 + static void __init scan_dt_build_strings(phandle node, unsigned long *mem_start, unsigned long *mem_end) { @@ -1527,10 +1547,12 @@ static void __init scan_dt_build_strings(phandle node, unsigned long *mem_start, /* get and store all property names */ prev_name = RELOC(""); for (;;) { - - /* 32 is max len of name including nul. */ - namep = make_room(mem_start, mem_end, 32, 1); - if (call_prom("nextprop", 3, 1, node, prev_name, namep) <= 0) { + int rc; + + /* 64 is max len of name including nul. */ + namep = make_room(mem_start, mem_end, MAX_PROPERTY_NAME, 1); + rc = call_prom("nextprop", 3, 1, node, prev_name, namep); + if (rc != 1) { /* No more nodes: unwind alloc */ *mem_start = (unsigned long)namep; break; @@ -1555,18 +1577,12 @@ static void __init scan_dt_build_strings(phandle node, unsigned long *mem_start, } } -/* - * The Open Firmware 1275 specification states properties must be 31 bytes or - * less, however not all firmwares obey this. Make it 64 bytes to be safe. - */ -#define MAX_PROPERTY_NAME 64 - static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, unsigned long *mem_end) { int l, align; phandle child; - char *namep, *prev_name, *sstart; + char *namep, *prev_name, *sstart, *p, *ep; unsigned long soff; unsigned char *valp; unsigned long offset = reloc_offset(); @@ -1588,6 +1604,14 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, call_prom("package-to-path", 3, 1, node, namep, l); } namep[l] = '\0'; + /* Fixup an Apple bug where they have bogus \0 chars in the + * middle of the path in some properties + */ + for (p = namep, ep = namep + l; p < ep; p++) + if (*p == '\0') { + memmove(p, p+1, ep - p); + ep--; l--; + } *mem_start = _ALIGN(((unsigned long) namep) + strlen(namep) + 1, 4); } @@ -1599,7 +1623,10 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, prev_name = RELOC(""); sstart = (char *)RELOC(dt_string_start); for (;;) { - if (call_prom("nextprop", 3, 1, node, prev_name, pname) <= 0) + int rc; + + rc = call_prom("nextprop", 3, 1, node, prev_name, pname); + if (rc != 1) break; /* find string offset */ @@ -1615,7 +1642,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, l = call_prom("getproplen", 2, 1, node, pname); /* sanity checks */ - if (l < 0) + if (l == PROM_ERROR) continue; if (l > MAX_PROPERTY_LENGTH) { prom_printf("WARNING: ignoring large property "); @@ -1750,7 +1777,45 @@ static void __init flatten_device_tree(void) prom_printf("Device tree struct 0x%x -> 0x%x\n", RELOC(dt_struct_start), RELOC(dt_struct_end)); - } +} + + +static void __init fixup_device_tree(void) +{ + unsigned long offset = reloc_offset(); + phandle u3, i2c, mpic; + u32 u3_rev; + u32 interrupts[2]; + u32 parent; + + /* Some G5s have a missing interrupt definition, fix it up here */ + u3 = call_prom("finddevice", 1, 1, ADDR("/u3@0,f8000000")); + if (!PHANDLE_VALID(u3)) + return; + i2c = call_prom("finddevice", 1, 1, ADDR("/u3@0,f8000000/i2c@f8001000")); + if (!PHANDLE_VALID(i2c)) + return; + mpic = call_prom("finddevice", 1, 1, ADDR("/u3@0,f8000000/mpic@f8040000")); + if (!PHANDLE_VALID(mpic)) + return; + + /* check if proper rev of u3 */ + if (prom_getprop(u3, "device-rev", &u3_rev, sizeof(u3_rev)) + == PROM_ERROR) + return; + if (u3_rev != 0x35) + return; + /* does it need fixup ? */ + if (prom_getproplen(i2c, "interrupts") > 0) + return; + /* interrupt on this revision of u3 is number 0 and level */ + interrupts[0] = 0; + interrupts[1] = 1; + prom_setprop(i2c, "interrupts", &interrupts, sizeof(interrupts)); + parent = (u32)mpic; + prom_setprop(i2c, "interrupt-parent", &parent, sizeof(parent)); +} + static void __init prom_find_boot_cpu(void) { @@ -1844,6 +1909,12 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, unsigned long &getprop_rval, sizeof(getprop_rval)); /* + * On pSeries, inform the firmware about our capabilities + */ + if (RELOC(of_platform) & PLATFORM_PSERIES) + prom_send_capabilities(); + + /* * On pSeries, copy the CPU hold code */ if (RELOC(of_platform) & PLATFORM_PSERIES) @@ -1920,6 +1991,11 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, unsigned long } /* + * Fixup any known bugs in the device-tree + */ + fixup_device_tree(); + + /* * Now finally create the flattened device-tree */ prom_printf("copying OF device tree ...\n"); diff --git a/arch/ppc64/kernel/ptrace.c b/arch/ppc64/kernel/ptrace.c index 5a84632..9f8c608 100644 --- a/arch/ppc64/kernel/ptrace.c +++ b/arch/ppc64/kernel/ptrace.c @@ -305,14 +305,17 @@ static void do_syscall_trace(void) void do_syscall_trace_enter(struct pt_regs *regs) { + if (test_thread_flag(TIF_SYSCALL_TRACE) + && (current->ptrace & PT_PTRACED)) + do_syscall_trace(); + if (unlikely(current->audit_context)) - audit_syscall_entry(current, regs->gpr[0], + audit_syscall_entry(current, + test_thread_flag(TIF_32BIT)?AUDIT_ARCH_PPC:AUDIT_ARCH_PPC64, + regs->gpr[0], regs->gpr[3], regs->gpr[4], regs->gpr[5], regs->gpr[6]); - if (test_thread_flag(TIF_SYSCALL_TRACE) - && (current->ptrace & PT_PTRACED)) - do_syscall_trace(); } void do_syscall_trace_leave(struct pt_regs *regs) @@ -320,7 +323,9 @@ void do_syscall_trace_leave(struct pt_regs *regs) secure_computing(regs->gpr[0]); if (unlikely(current->audit_context)) - audit_syscall_exit(current, regs->result); + audit_syscall_exit(current, + (regs->ccr&0x1000)?AUDITSC_FAILURE:AUDITSC_SUCCESS, + regs->result); if ((test_thread_flag(TIF_SYSCALL_TRACE) || test_thread_flag(TIF_SINGLESTEP)) diff --git a/arch/ppc64/kernel/rtc.c b/arch/ppc64/kernel/rtc.c index 3e70b91..6798905 100644 --- a/arch/ppc64/kernel/rtc.c +++ b/arch/ppc64/kernel/rtc.c @@ -292,47 +292,10 @@ int iSeries_set_rtc_time(struct rtc_time *tm) void iSeries_get_boot_time(struct rtc_time *tm) { - unsigned long time; - static unsigned long lastsec = 1; - - u32 dataWord1 = *((u32 *)(&xSpCommArea.xBcdTimeAtIplStart)); - u32 dataWord2 = *(((u32 *)&(xSpCommArea.xBcdTimeAtIplStart)) + 1); - int year = 1970; - int year1 = ( dataWord1 >> 24 ) & 0x000000FF; - int year2 = ( dataWord1 >> 16 ) & 0x000000FF; - int sec = ( dataWord1 >> 8 ) & 0x000000FF; - int min = dataWord1 & 0x000000FF; - int hour = ( dataWord2 >> 24 ) & 0x000000FF; - int day = ( dataWord2 >> 8 ) & 0x000000FF; - int mon = dataWord2 & 0x000000FF; - if ( piranha_simulator ) return; - BCD_TO_BIN(sec); - BCD_TO_BIN(min); - BCD_TO_BIN(hour); - BCD_TO_BIN(day); - BCD_TO_BIN(mon); - BCD_TO_BIN(year1); - BCD_TO_BIN(year2); - year = year1 * 100 + year2; - - time = mktime(year, mon, day, hour, min, sec); - time += ( jiffies / HZ ); - - /* Now THIS is a nasty hack! - * It ensures that the first two calls get different answers. - * That way the loop in init_time (time.c) will not think - * the clock is stuck. - */ - if ( lastsec ) { - time -= lastsec; - --lastsec; - } - - to_tm(time, tm); - tm->tm_year -= 1900; + mf_get_boot_rtc(tm); tm->tm_mon -= 1; } #endif diff --git a/arch/ppc64/kernel/setup.c b/arch/ppc64/kernel/setup.c index 21c57f5..dce198d 100644 --- a/arch/ppc64/kernel/setup.c +++ b/arch/ppc64/kernel/setup.c @@ -103,11 +103,6 @@ extern void unflatten_device_tree(void); extern void smp_release_cpus(void); -unsigned long decr_overclock = 1; -unsigned long decr_overclock_proc0 = 1; -unsigned long decr_overclock_set = 0; -unsigned long decr_overclock_proc0_set = 0; - int have_of = 1; int boot_cpuid = 0; int boot_cpuid_phys = 0; @@ -1120,64 +1115,15 @@ void ppc64_dump_msg(unsigned int src, const char *msg) printk("[dump]%04x %s\n", src, msg); } -int set_spread_lpevents( char * str ) -{ - /* The parameter is the number of processors to share in processing lp events */ - unsigned long i; - unsigned long val = simple_strtoul( str, NULL, 0 ); - if ( ( val > 0 ) && ( val <= NR_CPUS ) ) { - for ( i=1; i<val; ++i ) - paca[i].lpqueue_ptr = paca[0].lpqueue_ptr; - printk("lpevent processing spread over %ld processors\n", val); - } - else - printk("invalid spreaqd_lpevents %ld\n", val); - return 1; -} - /* This should only be called on processor 0 during calibrate decr */ void setup_default_decr(void) { struct paca_struct *lpaca = get_paca(); - if ( decr_overclock_set && !decr_overclock_proc0_set ) - decr_overclock_proc0 = decr_overclock; - - lpaca->default_decr = tb_ticks_per_jiffy / decr_overclock_proc0; + lpaca->default_decr = tb_ticks_per_jiffy; lpaca->next_jiffy_update_tb = get_tb() + tb_ticks_per_jiffy; } -int set_decr_overclock_proc0( char * str ) -{ - unsigned long val = simple_strtoul( str, NULL, 0 ); - if ( ( val >= 1 ) && ( val <= 48 ) ) { - decr_overclock_proc0_set = 1; - decr_overclock_proc0 = val; - printk("proc 0 decrementer overclock factor of %ld\n", val); - } - else - printk("invalid proc 0 decrementer overclock factor of %ld\n", val); - return 1; -} - -int set_decr_overclock( char * str ) -{ - unsigned long val = simple_strtoul( str, NULL, 0 ); - if ( ( val >= 1 ) && ( val <= 48 ) ) { - decr_overclock_set = 1; - decr_overclock = val; - printk("decrementer overclock factor of %ld\n", val); - } - else - printk("invalid decrementer overclock factor of %ld\n", val); - return 1; - -} - -__setup("spread_lpevents=", set_spread_lpevents ); -__setup("decr_overclock_proc0=", set_decr_overclock_proc0 ); -__setup("decr_overclock=", set_decr_overclock ); - #ifndef CONFIG_PPC_ISERIES /* * This function can be used by platforms to "find" legacy serial ports. diff --git a/arch/ppc64/kernel/signal.c b/arch/ppc64/kernel/signal.c index a95a2b4..bf78227 100644 --- a/arch/ppc64/kernel/signal.c +++ b/arch/ppc64/kernel/signal.c @@ -42,11 +42,7 @@ #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) -#ifndef MIN -#define MIN(a,b) (((a) < (b)) ? (a) : (b)) -#endif - -#define GP_REGS_SIZE MIN(sizeof(elf_gregset_t), sizeof(struct pt_regs)) +#define GP_REGS_SIZE min(sizeof(elf_gregset_t), sizeof(struct pt_regs)) #define FP_REGS_SIZE sizeof(elf_fpregset_t) #define TRAMP_TRACEBACK 3 diff --git a/arch/ppc64/kernel/smp.c b/arch/ppc64/kernel/smp.c index 3b906cd..9ef5d36 100644 --- a/arch/ppc64/kernel/smp.c +++ b/arch/ppc64/kernel/smp.c @@ -334,7 +334,6 @@ void smp_call_function_interrupt(void) } } -extern unsigned long decr_overclock; extern struct gettimeofday_struct do_gtod; struct thread_info *current_set[NR_CPUS]; @@ -491,7 +490,7 @@ int __devinit __cpu_up(unsigned int cpu) if (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu)) return -EINVAL; - paca[cpu].default_decr = tb_ticks_per_jiffy / decr_overclock; + paca[cpu].default_decr = tb_ticks_per_jiffy; if (!cpu_has_feature(CPU_FTR_SLB)) { void *tmp; diff --git a/arch/ppc64/kernel/sys_ppc32.c b/arch/ppc64/kernel/sys_ppc32.c index 7cf7a96..9c8e317 100644 --- a/arch/ppc64/kernel/sys_ppc32.c +++ b/arch/ppc64/kernel/sys_ppc32.c @@ -791,31 +791,6 @@ asmlinkage int sys32_pciconfig_iobase(u32 which, u32 in_bus, u32 in_devfn) } -asmlinkage int ppc64_newuname(struct new_utsname __user * name) -{ - int errno = sys_newuname(name); - - if (current->personality == PER_LINUX32 && !errno) { - if(copy_to_user(name->machine, "ppc\0\0", 8)) { - errno = -EFAULT; - } - } - return errno; -} - -asmlinkage int ppc64_personality(unsigned long personality) -{ - int ret; - if (current->personality == PER_LINUX32 && personality == PER_LINUX) - personality = PER_LINUX32; - ret = sys_personality(personality); - if (ret == PER_LINUX32) - ret = PER_LINUX; - return ret; -} - - - /* Note: it is necessary to treat mode as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -1158,26 +1133,47 @@ asmlinkage long sys32_sysctl(struct __sysctl_args32 __user *args) } #endif +asmlinkage int sys32_uname(struct old_utsname __user * name) +{ + int err = 0; + + down_read(&uts_sem); + if (copy_to_user(name, &system_utsname, sizeof(*name))) + err = -EFAULT; + up_read(&uts_sem); + if (!err && personality(current->personality) == PER_LINUX32) { + /* change "ppc64" to "ppc" */ + if (__put_user(0, name->machine + 3) + || __put_user(0, name->machine + 4)) + err = -EFAULT; + } + return err; +} + asmlinkage int sys32_olduname(struct oldold_utsname __user * name) { int error; - - if (!name) - return -EFAULT; + if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname))) return -EFAULT; down_read(&uts_sem); error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); - error -= __put_user(0,name->sysname+__OLD_UTS_LEN); - error -= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); - error -= __put_user(0,name->nodename+__OLD_UTS_LEN); - error -= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); - error -= __put_user(0,name->release+__OLD_UTS_LEN); - error -= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); - error -= __put_user(0,name->version+__OLD_UTS_LEN); - error -= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN); - error = __put_user(0,name->machine+__OLD_UTS_LEN); + error |= __put_user(0,name->sysname+__OLD_UTS_LEN); + error |= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); + error |= __put_user(0,name->nodename+__OLD_UTS_LEN); + error |= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); + error |= __put_user(0,name->release+__OLD_UTS_LEN); + error |= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); + error |= __put_user(0,name->version+__OLD_UTS_LEN); + error |= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN); + error |= __put_user(0,name->machine+__OLD_UTS_LEN); + if (personality(current->personality) == PER_LINUX32) { + /* change "ppc64" to "ppc" */ + error |= __put_user(0, name->machine + 3); + error |= __put_user(0, name->machine + 4); + } + up_read(&uts_sem); error = error ? -EFAULT : 0; diff --git a/arch/ppc64/kernel/syscalls.c b/arch/ppc64/kernel/syscalls.c index f2865ff..a8cbb20 100644 --- a/arch/ppc64/kernel/syscalls.c +++ b/arch/ppc64/kernel/syscalls.c @@ -199,24 +199,33 @@ out: return ret; } -static int __init set_fakeppc(char *str) +long ppc64_personality(unsigned long personality) { - if (*str) - return 0; - init_task.personality = PER_LINUX32; - return 1; + long ret; + + if (personality(current->personality) == PER_LINUX32 + && personality == PER_LINUX) + personality = PER_LINUX32; + ret = sys_personality(personality); + if (ret == PER_LINUX32) + ret = PER_LINUX; + return ret; } -__setup("fakeppc", set_fakeppc); -asmlinkage int sys_uname(struct old_utsname __user * name) +long ppc64_newuname(struct new_utsname __user * name) { - int err = -EFAULT; - + int err = 0; + down_read(&uts_sem); - if (name && !copy_to_user(name, &system_utsname, sizeof (*name))) - err = 0; + if (copy_to_user(name, &system_utsname, sizeof(*name))) + err = -EFAULT; up_read(&uts_sem); - + if (!err && personality(current->personality) == PER_LINUX32) { + /* change ppc64 to ppc */ + if (__put_user(0, name->machine + 3) + || __put_user(0, name->machine + 4)) + err = -EFAULT; + } return err; } diff --git a/arch/ppc64/kernel/sysfs.c b/arch/ppc64/kernel/sysfs.c index 0925694..c8fa656 100644 --- a/arch/ppc64/kernel/sysfs.c +++ b/arch/ppc64/kernel/sysfs.c @@ -113,7 +113,6 @@ void ppc64_enable_pmcs(void) #ifdef CONFIG_PPC_PSERIES unsigned long set, reset; int ret; - unsigned int ctrl; #endif /* CONFIG_PPC_PSERIES */ /* Only need to enable them once */ @@ -167,11 +166,8 @@ void ppc64_enable_pmcs(void) * On SMT machines we have to set the run latch in the ctrl register * in order to make PMC6 spin. */ - if (cpu_has_feature(CPU_FTR_SMT)) { - ctrl = mfspr(CTRLF); - ctrl |= RUNLATCH; - mtspr(CTRLT, ctrl); - } + if (cpu_has_feature(CPU_FTR_SMT)) + ppc64_runlatch_on(); #endif /* CONFIG_PPC_PSERIES */ } diff --git a/arch/ppc64/kernel/time.c b/arch/ppc64/kernel/time.c index 772a465..33364a7 100644 --- a/arch/ppc64/kernel/time.c +++ b/arch/ppc64/kernel/time.c @@ -325,9 +325,7 @@ int timer_interrupt(struct pt_regs * regs) irq_enter(); -#ifndef CONFIG_PPC_ISERIES profile_tick(CPU_PROFILING, regs); -#endif lpaca->lppaca.int_dword.fields.decr_int = 0; @@ -515,6 +513,7 @@ void __init time_init(void) do_gtod.varp = &do_gtod.vars[0]; do_gtod.var_idx = 0; do_gtod.varp->tb_orig_stamp = tb_last_stamp; + get_paca()->next_jiffy_update_tb = tb_last_stamp + tb_ticks_per_jiffy; do_gtod.varp->stamp_xsec = xtime.tv_sec * XSEC_PER_SEC; do_gtod.tb_ticks_per_sec = tb_ticks_per_sec; do_gtod.varp->tb_to_xs = tb_to_xs; diff --git a/arch/ppc64/kernel/xics.c b/arch/ppc64/kernel/xics.c index eedd1d3..879f39b 100644 --- a/arch/ppc64/kernel/xics.c +++ b/arch/ppc64/kernel/xics.c @@ -432,6 +432,7 @@ void xics_cause_IPI(int cpu) { ops->qirr_info(cpu, IPI_PRIORITY); } +#endif /* CONFIG_SMP */ void xics_setup_cpu(void) { @@ -439,9 +440,17 @@ void xics_setup_cpu(void) ops->cppr_info(cpu, 0xff); iosync(); -} -#endif /* CONFIG_SMP */ + /* + * Put the calling processor into the GIQ. This is really only + * necessary from a secondary thread as the OF start-cpu interface + * performs this function for us on primary threads. + * + * XXX: undo of teardown on kexec needs this too, as may hotplug + */ + rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE, + (1UL << interrupt_server_size) - 1 - default_distrib_server, 1); +} void xics_init_IRQ(void) { @@ -563,8 +572,7 @@ nextnode: for (; i < NR_IRQS; ++i) get_irq_desc(i)->handler = &xics_pic; - ops->cppr_info(boot_cpuid, 0xff); - iosync(); + xics_setup_cpu(); ppc64_boot_msg(0x21, "XICS Done"); } diff --git a/arch/ppc64/mm/hash_native.c b/arch/ppc64/mm/hash_native.c index 144657e..52b6b93 100644 --- a/arch/ppc64/mm/hash_native.c +++ b/arch/ppc64/mm/hash_native.c @@ -320,8 +320,7 @@ static void native_flush_hash_range(unsigned long context, j = 0; for (i = 0; i < number; i++) { - if ((batch->addr[i] >= USER_START) && - (batch->addr[i] <= USER_END)) + if (batch->addr[i] < KERNELBASE) vsid = get_vsid(context, batch->addr[i]); else vsid = get_kernel_vsid(batch->addr[i]); diff --git a/arch/ppc64/mm/hash_utils.c b/arch/ppc64/mm/hash_utils.c index e48be12..0a0f970 100644 --- a/arch/ppc64/mm/hash_utils.c +++ b/arch/ppc64/mm/hash_utils.c @@ -298,24 +298,23 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) int local = 0; cpumask_t tmp; + if ((ea & ~REGION_MASK) > EADDR_MASK) + return 1; + switch (REGION_ID(ea)) { case USER_REGION_ID: user_region = 1; mm = current->mm; - if ((ea > USER_END) || (! mm)) + if (! mm) return 1; vsid = get_vsid(mm->context.id, ea); break; case IO_REGION_ID: - if (ea > IMALLOC_END) - return 1; mm = &ioremap_mm; vsid = get_kernel_vsid(ea); break; case VMALLOC_REGION_ID: - if (ea > VMALLOC_END) - return 1; mm = &init_mm; vsid = get_kernel_vsid(ea); break; @@ -362,7 +361,7 @@ void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte, unsigned long vsid, vpn, va, hash, secondary, slot; unsigned long huge = pte_huge(pte); - if ((ea >= USER_START) && (ea <= USER_END)) + if (ea < KERNELBASE) vsid = get_vsid(context, ea); else vsid = get_kernel_vsid(ea); diff --git a/arch/ppc64/mm/imalloc.c b/arch/ppc64/mm/imalloc.c index 9d92b0d..cb8727f 100644 --- a/arch/ppc64/mm/imalloc.c +++ b/arch/ppc64/mm/imalloc.c @@ -14,6 +14,7 @@ #include <asm/pgalloc.h> #include <asm/pgtable.h> #include <asm/semaphore.h> +#include <asm/imalloc.h> static DECLARE_MUTEX(imlist_sem); struct vm_struct * imlist = NULL; @@ -23,11 +24,11 @@ static int get_free_im_addr(unsigned long size, unsigned long *im_addr) unsigned long addr; struct vm_struct **p, *tmp; - addr = IMALLOC_START; + addr = ioremap_bot; for (p = &imlist; (tmp = *p) ; p = &tmp->next) { if (size + addr < (unsigned long) tmp->addr) break; - if ((unsigned long)tmp->addr >= IMALLOC_START) + if ((unsigned long)tmp->addr >= ioremap_bot) addr = tmp->size + (unsigned long) tmp->addr; if (addr > IMALLOC_END-size) return 1; diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c index cf33d7e..4b42aff 100644 --- a/arch/ppc64/mm/init.c +++ b/arch/ppc64/mm/init.c @@ -64,6 +64,7 @@ #include <asm/iommu.h> #include <asm/abs_addr.h> #include <asm/vdso.h> +#include <asm/imalloc.h> int mem_init_done; unsigned long ioremap_bot = IMALLOC_BASE; @@ -668,7 +669,7 @@ void __init paging_init(void) zones_size[ZONE_DMA] = top_of_ram >> PAGE_SHIFT; zholes_size[ZONE_DMA] = (top_of_ram - total_ram) >> PAGE_SHIFT; - free_area_init_node(0, &contig_page_data, zones_size, + free_area_init_node(0, NODE_DATA(0), zones_size, __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size); } #endif /* CONFIG_DISCONTIGMEM */ diff --git a/arch/ppc64/mm/stab.c b/arch/ppc64/mm/stab.c index 3149113..df4bbe1 100644 --- a/arch/ppc64/mm/stab.c +++ b/arch/ppc64/mm/stab.c @@ -19,6 +19,11 @@ #include <asm/paca.h> #include <asm/cputable.h> +struct stab_entry { + unsigned long esid_data; + unsigned long vsid_data; +}; + /* Both the segment table and SLB code uses the following cache */ #define NR_STAB_CACHE_ENTRIES 8 DEFINE_PER_CPU(long, stab_cache_ptr); diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index 01ae196..c067435 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -28,6 +28,7 @@ //#include <linux/kernel_stat.h> #include <linux/notifier.h> #include <linux/cpu.h> +#include <linux/workqueue.h> #include "appldata.h" @@ -133,9 +134,12 @@ static int appldata_interval = APPLDATA_CPU_INTERVAL; static int appldata_timer_active; /* - * Tasklet + * Work queue */ -static struct tasklet_struct appldata_tasklet_struct; +static struct workqueue_struct *appldata_wq; +static void appldata_work_fn(void *data); +static DECLARE_WORK(appldata_work, appldata_work_fn, NULL); + /* * Ops list @@ -144,11 +148,11 @@ static DEFINE_SPINLOCK(appldata_ops_lock); static LIST_HEAD(appldata_ops_list); -/************************* timer, tasklet, DIAG ******************************/ +/*************************** timer, work, DIAG *******************************/ /* * appldata_timer_function() * - * schedule tasklet and reschedule timer + * schedule work and reschedule timer */ static void appldata_timer_function(unsigned long data, struct pt_regs *regs) { @@ -157,22 +161,22 @@ static void appldata_timer_function(unsigned long data, struct pt_regs *regs) atomic_read(&appldata_expire_count)); if (atomic_dec_and_test(&appldata_expire_count)) { atomic_set(&appldata_expire_count, num_online_cpus()); - tasklet_schedule((struct tasklet_struct *) data); + queue_work(appldata_wq, (struct work_struct *) data); } } /* - * appldata_tasklet_function() + * appldata_work_fn() * * call data gathering function for each (active) module */ -static void appldata_tasklet_function(unsigned long data) +static void appldata_work_fn(void *data) { struct list_head *lh; struct appldata_ops *ops; int i; - P_DEBUG(" -= Tasklet =-\n"); + P_DEBUG(" -= Work Queue =-\n"); i = 0; spin_lock(&appldata_ops_lock); list_for_each(lh, &appldata_ops_list) { @@ -231,7 +235,7 @@ static int appldata_diag(char record_nr, u16 function, unsigned long buffer, : "=d" (ry) : "d" (&(appldata_parameter_list)) : "cc"); return (int) ry; } -/********************** timer, tasklet, DIAG <END> ***************************/ +/************************ timer, work, DIAG <END> ****************************/ /****************************** /proc stuff **********************************/ @@ -411,7 +415,7 @@ appldata_generic_handler(ctl_table *ctl, int write, struct file *filp, struct list_head *lh; found = 0; - spin_lock_bh(&appldata_ops_lock); + spin_lock(&appldata_ops_lock); list_for_each(lh, &appldata_ops_list) { tmp_ops = list_entry(lh, struct appldata_ops, list); if (&tmp_ops->ctl_table[2] == ctl) { @@ -419,15 +423,15 @@ appldata_generic_handler(ctl_table *ctl, int write, struct file *filp, } } if (!found) { - spin_unlock_bh(&appldata_ops_lock); + spin_unlock(&appldata_ops_lock); return -ENODEV; } ops = ctl->data; if (!try_module_get(ops->owner)) { // protect this function - spin_unlock_bh(&appldata_ops_lock); + spin_unlock(&appldata_ops_lock); return -ENODEV; } - spin_unlock_bh(&appldata_ops_lock); + spin_unlock(&appldata_ops_lock); if (!*lenp || *ppos) { *lenp = 0; @@ -451,10 +455,11 @@ appldata_generic_handler(ctl_table *ctl, int write, struct file *filp, return -EFAULT; } - spin_lock_bh(&appldata_ops_lock); + spin_lock(&appldata_ops_lock); if ((buf[0] == '1') && (ops->active == 0)) { - if (!try_module_get(ops->owner)) { // protect tasklet - spin_unlock_bh(&appldata_ops_lock); + // protect work queue callback + if (!try_module_get(ops->owner)) { + spin_unlock(&appldata_ops_lock); module_put(ops->owner); return -ENODEV; } @@ -485,7 +490,7 @@ appldata_generic_handler(ctl_table *ctl, int write, struct file *filp, } module_put(ops->owner); } - spin_unlock_bh(&appldata_ops_lock); + spin_unlock(&appldata_ops_lock); out: *lenp = len; *ppos += len; @@ -529,7 +534,7 @@ int appldata_register_ops(struct appldata_ops *ops) } memset(ops->ctl_table, 0, 4*sizeof(struct ctl_table)); - spin_lock_bh(&appldata_ops_lock); + spin_lock(&appldata_ops_lock); list_for_each(lh, &appldata_ops_list) { tmp_ops = list_entry(lh, struct appldata_ops, list); P_DEBUG("register_ops loop: %i) name = %s, ctl = %i\n", @@ -541,18 +546,18 @@ int appldata_register_ops(struct appldata_ops *ops) APPLDATA_PROC_NAME_LENGTH) == 0) { P_ERROR("Name \"%s\" already registered!\n", ops->name); kfree(ops->ctl_table); - spin_unlock_bh(&appldata_ops_lock); + spin_unlock(&appldata_ops_lock); return -EBUSY; } if (tmp_ops->ctl_nr == ops->ctl_nr) { P_ERROR("ctl_nr %i already registered!\n", ops->ctl_nr); kfree(ops->ctl_table); - spin_unlock_bh(&appldata_ops_lock); + spin_unlock(&appldata_ops_lock); return -EBUSY; } } list_add(&ops->list, &appldata_ops_list); - spin_unlock_bh(&appldata_ops_lock); + spin_unlock(&appldata_ops_lock); ops->ctl_table[0].ctl_name = CTL_APPLDATA; ops->ctl_table[0].procname = appldata_proc_name; @@ -583,12 +588,12 @@ int appldata_register_ops(struct appldata_ops *ops) */ void appldata_unregister_ops(struct appldata_ops *ops) { - spin_lock_bh(&appldata_ops_lock); + spin_lock(&appldata_ops_lock); unregister_sysctl_table(ops->sysctl_header); list_del(&ops->list); kfree(ops->ctl_table); ops->ctl_table = NULL; - spin_unlock_bh(&appldata_ops_lock); + spin_unlock(&appldata_ops_lock); P_INFO("%s-ops unregistered!\n", ops->name); } /********************** module-ops management <END> **************************/ @@ -602,7 +607,7 @@ appldata_online_cpu(int cpu) init_virt_timer(&per_cpu(appldata_timer, cpu)); per_cpu(appldata_timer, cpu).function = appldata_timer_function; per_cpu(appldata_timer, cpu).data = (unsigned long) - &appldata_tasklet_struct; + &appldata_work; atomic_inc(&appldata_expire_count); spin_lock(&appldata_timer_lock); __appldata_vtimer_setup(APPLDATA_MOD_TIMER); @@ -615,7 +620,7 @@ appldata_offline_cpu(int cpu) del_virt_timer(&per_cpu(appldata_timer, cpu)); if (atomic_dec_and_test(&appldata_expire_count)) { atomic_set(&appldata_expire_count, num_online_cpus()); - tasklet_schedule(&appldata_tasklet_struct); + queue_work(appldata_wq, &appldata_work); } spin_lock(&appldata_timer_lock); __appldata_vtimer_setup(APPLDATA_MOD_TIMER); @@ -648,7 +653,7 @@ static struct notifier_block __devinitdata appldata_nb = { /* * appldata_init() * - * init timer and tasklet, register /proc entries + * init timer, register /proc entries */ static int __init appldata_init(void) { @@ -657,6 +662,12 @@ static int __init appldata_init(void) P_DEBUG("sizeof(parameter_list) = %lu\n", sizeof(struct appldata_parameter_list)); + appldata_wq = create_singlethread_workqueue("appldata"); + if (!appldata_wq) { + P_ERROR("Could not create work queue\n"); + return -ENOMEM; + } + for_each_online_cpu(i) appldata_online_cpu(i); @@ -670,7 +681,6 @@ static int __init appldata_init(void) appldata_table[1].de->owner = THIS_MODULE; #endif - tasklet_init(&appldata_tasklet_struct, appldata_tasklet_function, 0); P_DEBUG("Base interface initialized.\n"); return 0; } @@ -678,7 +688,7 @@ static int __init appldata_init(void) /* * appldata_exit() * - * stop timer and tasklet, unregister /proc entries + * stop timer, unregister /proc entries */ static void __exit appldata_exit(void) { @@ -690,7 +700,7 @@ static void __exit appldata_exit(void) /* * ops list should be empty, but just in case something went wrong... */ - spin_lock_bh(&appldata_ops_lock); + spin_lock(&appldata_ops_lock); list_for_each(lh, &appldata_ops_list) { ops = list_entry(lh, struct appldata_ops, list); rc = appldata_diag(ops->record_nr, APPLDATA_STOP_REC, @@ -700,7 +710,7 @@ static void __exit appldata_exit(void) "return code: %d\n", ops->name, rc); } } - spin_unlock_bh(&appldata_ops_lock); + spin_unlock(&appldata_ops_lock); for_each_online_cpu(i) appldata_offline_cpu(i); @@ -709,7 +719,7 @@ static void __exit appldata_exit(void) unregister_sysctl_table(appldata_sysctl_header); - tasklet_kill(&appldata_tasklet_struct); + destroy_workqueue(appldata_wq); P_DEBUG("... module unloaded!\n"); } /**************************** init / exit <END> ******************************/ diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c index 462ee9a..f0e2fbe 100644 --- a/arch/s390/appldata/appldata_mem.c +++ b/arch/s390/appldata/appldata_mem.c @@ -68,7 +68,7 @@ struct appldata_mem_data { u64 pgmajfault; /* page faults (major only) */ // <-- New in 2.6 -} appldata_mem_data; +} __attribute__((packed)) appldata_mem_data; static inline void appldata_debug_print(struct appldata_mem_data *mem_data) diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c index dd61638..2a4c743 100644 --- a/arch/s390/appldata/appldata_net_sum.c +++ b/arch/s390/appldata/appldata_net_sum.c @@ -57,7 +57,7 @@ struct appldata_net_sum_data { u64 rx_dropped; /* no space in linux buffers */ u64 tx_dropped; /* no space available in linux */ u64 collisions; /* collisions while transmitting */ -} appldata_net_sum_data; +} __attribute__((packed)) appldata_net_sum_data; static inline void appldata_print_debug(struct appldata_net_sum_data *net_data) diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c index b83f074..e0a476b 100644 --- a/arch/s390/appldata/appldata_os.c +++ b/arch/s390/appldata/appldata_os.c @@ -49,7 +49,7 @@ struct appldata_os_per_cpu { u32 per_cpu_softirq; /* ... spent in softirqs */ u32 per_cpu_iowait; /* ... spent while waiting for I/O */ // <-- New in 2.6 -}; +} __attribute__((packed)); struct appldata_os_data { u64 timestamp; @@ -75,7 +75,7 @@ struct appldata_os_data { /* per cpu data */ struct appldata_os_per_cpu os_cpu[0]; -}; +} __attribute__((packed)); static struct appldata_os_data *appldata_os_data; diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 9f0d73e..06afa31 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -40,6 +40,7 @@ #include <asm/pgalloc.h> #include <asm/system.h> #include <asm/uaccess.h> +#include <asm/unistd.h> #ifdef CONFIG_S390_SUPPORT #include "compat_ptrace.h" @@ -130,13 +131,19 @@ static int peek_user(struct task_struct *child, addr_t addr, addr_t data) { struct user *dummy = NULL; - addr_t offset, tmp; + addr_t offset, tmp, mask; /* * Stupid gdb peeks/pokes the access registers in 64 bit with * an alignment of 4. Programmers from hell... */ - if ((addr & 3) || addr > sizeof(struct user) - __ADDR_MASK) + mask = __ADDR_MASK; +#ifdef CONFIG_ARCH_S390X + if (addr >= (addr_t) &dummy->regs.acrs && + addr < (addr_t) &dummy->regs.orig_gpr2) + mask = 3; +#endif + if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK) return -EIO; if (addr < (addr_t) &dummy->regs.acrs) { @@ -153,6 +160,16 @@ peek_user(struct task_struct *child, addr_t addr, addr_t data) * access registers are stored in the thread structure */ offset = addr - (addr_t) &dummy->regs.acrs; +#ifdef CONFIG_ARCH_S390X + /* + * Very special case: old & broken 64 bit gdb reading + * from acrs[15]. Result is a 64 bit value. Read the + * 32 bit acrs[15] value and shift it by 32. Sick... + */ + if (addr == (addr_t) &dummy->regs.acrs[15]) + tmp = ((unsigned long) child->thread.acrs[15]) << 32; + else +#endif tmp = *(addr_t *)((addr_t) &child->thread.acrs + offset); } else if (addr == (addr_t) &dummy->regs.orig_gpr2) { @@ -167,6 +184,9 @@ peek_user(struct task_struct *child, addr_t addr, addr_t data) */ offset = addr - (addr_t) &dummy->regs.fp_regs; tmp = *(addr_t *)((addr_t) &child->thread.fp_regs + offset); + if (addr == (addr_t) &dummy->regs.fp_regs.fpc) + tmp &= (unsigned long) FPC_VALID_MASK + << (BITS_PER_LONG - 32); } else if (addr < (addr_t) (&dummy->regs.per_info + 1)) { /* @@ -191,13 +211,19 @@ static int poke_user(struct task_struct *child, addr_t addr, addr_t data) { struct user *dummy = NULL; - addr_t offset; + addr_t offset, mask; /* * Stupid gdb peeks/pokes the access registers in 64 bit with * an alignment of 4. Programmers from hell indeed... */ - if ((addr & 3) || addr > sizeof(struct user) - __ADDR_MASK) + mask = __ADDR_MASK; +#ifdef CONFIG_ARCH_S390X + if (addr >= (addr_t) &dummy->regs.acrs && + addr < (addr_t) &dummy->regs.orig_gpr2) + mask = 3; +#endif + if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK) return -EIO; if (addr < (addr_t) &dummy->regs.acrs) { @@ -224,6 +250,17 @@ poke_user(struct task_struct *child, addr_t addr, addr_t data) * access registers are stored in the thread structure */ offset = addr - (addr_t) &dummy->regs.acrs; +#ifdef CONFIG_ARCH_S390X + /* + * Very special case: old & broken 64 bit gdb writing + * to acrs[15] with a 64 bit value. Ignore the lower + * half of the value and write the upper 32 bit to + * acrs[15]. Sick... + */ + if (addr == (addr_t) &dummy->regs.acrs[15]) + child->thread.acrs[15] = (unsigned int) (data >> 32); + else +#endif *(addr_t *)((addr_t) &child->thread.acrs + offset) = data; } else if (addr == (addr_t) &dummy->regs.orig_gpr2) { @@ -237,7 +274,8 @@ poke_user(struct task_struct *child, addr_t addr, addr_t data) * floating point regs. are stored in the thread structure */ if (addr == (addr_t) &dummy->regs.fp_regs.fpc && - (data & ~FPC_VALID_MASK) != 0) + (data & ~((unsigned long) FPC_VALID_MASK + << (BITS_PER_LONG - 32))) != 0) return -EINVAL; offset = addr - (addr_t) &dummy->regs.fp_regs; *(addr_t *)((addr_t) &child->thread.fp_regs + offset) = data; @@ -712,22 +750,24 @@ out: asmlinkage void syscall_trace(struct pt_regs *regs, int entryexit) { - if (unlikely(current->audit_context)) { - if (!entryexit) - audit_syscall_entry(current, regs->gprs[2], - regs->orig_gpr2, regs->gprs[3], - regs->gprs[4], regs->gprs[5]); - else - audit_syscall_exit(current, regs->gprs[2]); - } + if (unlikely(current->audit_context) && entryexit) + audit_syscall_exit(current, AUDITSC_RESULT(regs->gprs[2]), regs->gprs[2]); + if (!test_thread_flag(TIF_SYSCALL_TRACE)) - return; + goto out; if (!(current->ptrace & PT_PTRACED)) - return; + goto out; ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80 : 0)); /* + * If the debuffer has set an invalid system call number, + * we prepare to skip the system call restart handling. + */ + if (!entryexit && regs->gprs[2] >= NR_syscalls) + regs->trap = -1; + + /* * this isn't the same as continuing with a signal, but it will do * for normal use. strace only continues with a signal if the * stopping signal is not SIGTRAP. -brl @@ -736,4 +776,10 @@ syscall_trace(struct pt_regs *regs, int entryexit) send_sig(current->exit_code, current, 1); current->exit_code = 0; } + out: + if (unlikely(current->audit_context) && !entryexit) + audit_syscall_entry(current, + test_thread_flag(TIF_31BIT)?AUDIT_ARCH_S390:AUDIT_ARCH_S390X, + regs->gprs[2], regs->orig_gpr2, regs->gprs[3], + regs->gprs[4], regs->gprs[5]); } diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 80306bc..75fde94 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -207,7 +207,7 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int is_protection) * we are not in an interrupt and that there is a * user context. */ - if (user_address == 0 || in_interrupt() || !mm) + if (user_address == 0 || in_atomic() || !mm) goto no_context; /* diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 722ea1d..3468d51 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -693,6 +693,10 @@ config RTC_9701JE endmenu +config ISA_DMA_API + bool + depends on MPC1211 + default y menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)" diff --git a/arch/sparc/kernel/process.c b/arch/sparc/kernel/process.c index 066e253..2c216ff 100644 --- a/arch/sparc/kernel/process.c +++ b/arch/sparc/kernel/process.c @@ -83,9 +83,6 @@ void default_idle(void) */ void cpu_idle(void) { - if (current->pid != 0) - goto out; - /* endless idle loop with no priority at all */ for (;;) { if (ARCH_SUN4C_SUN4) { @@ -126,8 +123,6 @@ void cpu_idle(void) schedule(); check_pgt_cache(); } -out: - return; } #else diff --git a/arch/sparc/prom/memory.c b/arch/sparc/prom/memory.c index 46aa51a..c20e530 100644 --- a/arch/sparc/prom/memory.c +++ b/arch/sparc/prom/memory.c @@ -47,9 +47,9 @@ prom_sortmemlist(struct linux_mlist_v0 *thislist) char *tmpaddr; char *lowest; - for(i=0; thislist[i].theres_more != 0; i++) { + for(i=0; thislist[i].theres_more; i++) { lowest = thislist[i].start_adr; - for(mitr = i+1; thislist[mitr-1].theres_more != 0; mitr++) + for(mitr = i+1; thislist[mitr-1].theres_more; mitr++) if(thislist[mitr].start_adr < lowest) { lowest = thislist[mitr].start_adr; swapi = mitr; @@ -85,7 +85,7 @@ void __init prom_meminit(void) prom_phys_total[iter].num_bytes = mptr->num_bytes; prom_phys_total[iter].theres_more = &prom_phys_total[iter+1]; } - prom_phys_total[iter-1].theres_more = 0x0; + prom_phys_total[iter-1].theres_more = NULL; /* Second, the total prom taken descriptors. */ for(mptr = (*(romvec->pv_v0mem.v0_prommap)), iter=0; mptr; mptr=mptr->theres_more, iter++) { @@ -93,7 +93,7 @@ void __init prom_meminit(void) prom_prom_taken[iter].num_bytes = mptr->num_bytes; prom_prom_taken[iter].theres_more = &prom_prom_taken[iter+1]; } - prom_prom_taken[iter-1].theres_more = 0x0; + prom_prom_taken[iter-1].theres_more = NULL; /* Last, the available physical descriptors. */ for(mptr = (*(romvec->pv_v0mem.v0_available)), iter=0; mptr; mptr=mptr->theres_more, iter++) { @@ -101,7 +101,7 @@ void __init prom_meminit(void) prom_phys_avail[iter].num_bytes = mptr->num_bytes; prom_phys_avail[iter].theres_more = &prom_phys_avail[iter+1]; } - prom_phys_avail[iter-1].theres_more = 0x0; + prom_phys_avail[iter-1].theres_more = NULL; /* Sort all the lists. */ prom_sortmemlist(prom_phys_total); prom_sortmemlist(prom_prom_taken); @@ -124,7 +124,7 @@ void __init prom_meminit(void) prom_phys_avail[iter].theres_more = &prom_phys_avail[iter+1]; } - prom_phys_avail[iter-1].theres_more = 0x0; + prom_phys_avail[iter-1].theres_more = NULL; num_regs = prom_getproperty(node, "reg", (char *) prom_reg_memlist, @@ -138,7 +138,7 @@ void __init prom_meminit(void) prom_phys_total[iter].theres_more = &prom_phys_total[iter+1]; } - prom_phys_total[iter-1].theres_more = 0x0; + prom_phys_total[iter-1].theres_more = NULL; node = prom_getchild(prom_root_node); node = prom_searchsiblings(node, "virtual-memory"); @@ -158,7 +158,7 @@ void __init prom_meminit(void) prom_prom_taken[iter].theres_more = &prom_prom_taken[iter+1]; } - prom_prom_taken[iter-1].theres_more = 0x0; + prom_prom_taken[iter-1].theres_more = NULL; prom_sortmemlist(prom_prom_taken); @@ -182,15 +182,15 @@ void __init prom_meminit(void) case PROM_SUN4: #ifdef CONFIG_SUN4 /* how simple :) */ - prom_phys_total[0].start_adr = 0x0; + prom_phys_total[0].start_adr = NULL; prom_phys_total[0].num_bytes = *(sun4_romvec->memorysize); - prom_phys_total[0].theres_more = 0x0; - prom_prom_taken[0].start_adr = 0x0; + prom_phys_total[0].theres_more = NULL; + prom_prom_taken[0].start_adr = NULL; prom_prom_taken[0].num_bytes = 0x0; - prom_prom_taken[0].theres_more = 0x0; - prom_phys_avail[0].start_adr = 0x0; + prom_prom_taken[0].theres_more = NULL; + prom_phys_avail[0].start_adr = NULL; prom_phys_avail[0].num_bytes = *(sun4_romvec->memoryavail); - prom_phys_avail[0].theres_more = 0x0; + prom_phys_avail[0].theres_more = NULL; #endif break; diff --git a/arch/sparc/prom/sun4prom.c b/arch/sparc/prom/sun4prom.c index 69ca735..00390a2 100644 --- a/arch/sparc/prom/sun4prom.c +++ b/arch/sparc/prom/sun4prom.c @@ -151,7 +151,7 @@ struct linux_romvec * __init sun4_prom_init(void) * have more time, we can teach the penguin to say "By your * command" or "Activating turbo boost, Michael". :-) */ - sun4_romvec->setLEDs(0x0); + sun4_romvec->setLEDs(NULL); printk("PROMLIB: Old Sun4 boot PROM monitor %s, romvec version %d\n", sun4_romvec->monid, diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c index a38cb50..4dcb8af 100644 --- a/arch/sparc64/kernel/irq.c +++ b/arch/sparc64/kernel/irq.c @@ -756,7 +756,7 @@ void handler_irq(int irq, struct pt_regs *regs) clear_softint(clr_mask); } #else - int should_forward = 1; + int should_forward = 0; clear_softint(1 << irq); #endif @@ -1007,10 +1007,10 @@ static int retarget_one_irq(struct irqaction *p, int goal_cpu) } upa_writel(tid | IMAP_VALID, imap); - while (!cpu_online(goal_cpu)) { + do { if (++goal_cpu >= NR_CPUS) goal_cpu = 0; - } + } while (!cpu_online(goal_cpu)); return goal_cpu; } diff --git a/arch/sparc64/kernel/pci_iommu.c b/arch/sparc64/kernel/pci_iommu.c index 2929834..2803bc7 100644 --- a/arch/sparc64/kernel/pci_iommu.c +++ b/arch/sparc64/kernel/pci_iommu.c @@ -8,6 +8,7 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/mm.h> +#include <linux/delay.h> #include <asm/pbm.h> @@ -195,6 +196,34 @@ static iopte_t *alloc_consistent_cluster(struct pci_iommu *iommu, unsigned long return NULL; } +static int iommu_alloc_ctx(struct pci_iommu *iommu) +{ + int lowest = iommu->ctx_lowest_free; + int sz = IOMMU_NUM_CTXS - lowest; + int n = find_next_zero_bit(iommu->ctx_bitmap, sz, lowest); + + if (unlikely(n == sz)) { + n = find_next_zero_bit(iommu->ctx_bitmap, lowest, 1); + if (unlikely(n == lowest)) { + printk(KERN_WARNING "IOMMU: Ran out of contexts.\n"); + n = 0; + } + } + if (n) + __set_bit(n, iommu->ctx_bitmap); + + return n; +} + +static inline void iommu_free_ctx(struct pci_iommu *iommu, int ctx) +{ + if (likely(ctx)) { + __clear_bit(ctx, iommu->ctx_bitmap); + if (ctx < iommu->ctx_lowest_free) + iommu->ctx_lowest_free = ctx; + } +} + /* Allocate and map kernel buffer of size SIZE using consistent mode * DMA for PCI device PDEV. Return non-NULL cpu-side address if * successful and set *DMA_ADDRP to the PCI side dma address. @@ -235,7 +264,7 @@ void *pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_ad npages = size >> IO_PAGE_SHIFT; ctx = 0; if (iommu->iommu_ctxflush) - ctx = iommu->iommu_cur_ctx++; + ctx = iommu_alloc_ctx(iommu); first_page = __pa(first_page); while (npages--) { iopte_val(*iopte) = (IOPTE_CONSISTENT(ctx) | @@ -316,6 +345,8 @@ void pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_ } } + iommu_free_ctx(iommu, ctx); + spin_unlock_irqrestore(&iommu->lock, flags); order = get_order(size); @@ -359,7 +390,7 @@ dma_addr_t pci_map_single(struct pci_dev *pdev, void *ptr, size_t sz, int direct base_paddr = __pa(oaddr & IO_PAGE_MASK); ctx = 0; if (iommu->iommu_ctxflush) - ctx = iommu->iommu_cur_ctx++; + ctx = iommu_alloc_ctx(iommu); if (strbuf->strbuf_enabled) iopte_protection = IOPTE_STREAMING(ctx); else @@ -379,6 +410,70 @@ bad: return PCI_DMA_ERROR_CODE; } +static void pci_strbuf_flush(struct pci_strbuf *strbuf, struct pci_iommu *iommu, u32 vaddr, unsigned long ctx, unsigned long npages, int direction) +{ + int limit; + + if (strbuf->strbuf_ctxflush && + iommu->iommu_ctxflush) { + unsigned long matchreg, flushreg; + u64 val; + + flushreg = strbuf->strbuf_ctxflush; + matchreg = PCI_STC_CTXMATCH_ADDR(strbuf, ctx); + + pci_iommu_write(flushreg, ctx); + val = pci_iommu_read(matchreg); + val &= 0xffff; + if (!val) + goto do_flush_sync; + + while (val) { + if (val & 0x1) + pci_iommu_write(flushreg, ctx); + val >>= 1; + } + val = pci_iommu_read(matchreg); + if (unlikely(val)) { + printk(KERN_WARNING "pci_strbuf_flush: ctx flush " + "timeout matchreg[%lx] ctx[%lx]\n", + val, ctx); + goto do_page_flush; + } + } else { + unsigned long i; + + do_page_flush: + for (i = 0; i < npages; i++, vaddr += IO_PAGE_SIZE) + pci_iommu_write(strbuf->strbuf_pflush, vaddr); + } + +do_flush_sync: + /* If the device could not have possibly put dirty data into + * the streaming cache, no flush-flag synchronization needs + * to be performed. + */ + if (direction == PCI_DMA_TODEVICE) + return; + + PCI_STC_FLUSHFLAG_INIT(strbuf); + pci_iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa); + (void) pci_iommu_read(iommu->write_complete_reg); + + limit = 100000; + while (!PCI_STC_FLUSHFLAG_SET(strbuf)) { + limit--; + if (!limit) + break; + udelay(1); + membar("#LoadLoad"); + } + if (!limit) + printk(KERN_WARNING "pci_strbuf_flush: flushflag timeout " + "vaddr[%08x] ctx[%lx] npages[%ld]\n", + vaddr, ctx, npages); +} + /* Unmap a single streaming mode DMA translation. */ void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction) { @@ -386,7 +481,7 @@ void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int struct pci_iommu *iommu; struct pci_strbuf *strbuf; iopte_t *base; - unsigned long flags, npages, i, ctx; + unsigned long flags, npages, ctx; if (direction == PCI_DMA_NONE) BUG(); @@ -414,29 +509,8 @@ void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL; /* Step 1: Kick data out of streaming buffers if necessary. */ - if (strbuf->strbuf_enabled) { - u32 vaddr = bus_addr; - - PCI_STC_FLUSHFLAG_INIT(strbuf); - if (strbuf->strbuf_ctxflush && - iommu->iommu_ctxflush) { - unsigned long matchreg, flushreg; - - flushreg = strbuf->strbuf_ctxflush; - matchreg = PCI_STC_CTXMATCH_ADDR(strbuf, ctx); - do { - pci_iommu_write(flushreg, ctx); - } while(((long)pci_iommu_read(matchreg)) < 0L); - } else { - for (i = 0; i < npages; i++, vaddr += IO_PAGE_SIZE) - pci_iommu_write(strbuf->strbuf_pflush, vaddr); - } - - pci_iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa); - (void) pci_iommu_read(iommu->write_complete_reg); - while (!PCI_STC_FLUSHFLAG_SET(strbuf)) - membar("#LoadLoad"); - } + if (strbuf->strbuf_enabled) + pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction); /* Step 2: Clear out first TSB entry. */ iopte_make_dummy(iommu, base); @@ -444,6 +518,8 @@ void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int free_streaming_cluster(iommu, bus_addr - iommu->page_table_map_base, npages, ctx); + iommu_free_ctx(iommu, ctx); + spin_unlock_irqrestore(&iommu->lock, flags); } @@ -583,7 +659,7 @@ int pci_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int /* Step 4: Choose a context if necessary. */ ctx = 0; if (iommu->iommu_ctxflush) - ctx = iommu->iommu_cur_ctx++; + ctx = iommu_alloc_ctx(iommu); /* Step 5: Create the mappings. */ if (strbuf->strbuf_enabled) @@ -647,29 +723,8 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL; /* Step 1: Kick data out of streaming buffers if necessary. */ - if (strbuf->strbuf_enabled) { - u32 vaddr = (u32) bus_addr; - - PCI_STC_FLUSHFLAG_INIT(strbuf); - if (strbuf->strbuf_ctxflush && - iommu->iommu_ctxflush) { - unsigned long matchreg, flushreg; - - flushreg = strbuf->strbuf_ctxflush; - matchreg = PCI_STC_CTXMATCH_ADDR(strbuf, ctx); - do { - pci_iommu_write(flushreg, ctx); - } while(((long)pci_iommu_read(matchreg)) < 0L); - } else { - for (i = 0; i < npages; i++, vaddr += IO_PAGE_SIZE) - pci_iommu_write(strbuf->strbuf_pflush, vaddr); - } - - pci_iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa); - (void) pci_iommu_read(iommu->write_complete_reg); - while (!PCI_STC_FLUSHFLAG_SET(strbuf)) - membar("#LoadLoad"); - } + if (strbuf->strbuf_enabled) + pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction); /* Step 2: Clear out first TSB entry. */ iopte_make_dummy(iommu, base); @@ -677,6 +732,8 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, free_streaming_cluster(iommu, bus_addr - iommu->page_table_map_base, npages, ctx); + iommu_free_ctx(iommu, ctx); + spin_unlock_irqrestore(&iommu->lock, flags); } @@ -715,28 +772,7 @@ void pci_dma_sync_single_for_cpu(struct pci_dev *pdev, dma_addr_t bus_addr, size } /* Step 2: Kick data out of streaming buffers. */ - PCI_STC_FLUSHFLAG_INIT(strbuf); - if (iommu->iommu_ctxflush && - strbuf->strbuf_ctxflush) { - unsigned long matchreg, flushreg; - - flushreg = strbuf->strbuf_ctxflush; - matchreg = PCI_STC_CTXMATCH_ADDR(strbuf, ctx); - do { - pci_iommu_write(flushreg, ctx); - } while(((long)pci_iommu_read(matchreg)) < 0L); - } else { - unsigned long i; - - for (i = 0; i < npages; i++, bus_addr += IO_PAGE_SIZE) - pci_iommu_write(strbuf->strbuf_pflush, bus_addr); - } - - /* Step 3: Perform flush synchronization sequence. */ - pci_iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa); - (void) pci_iommu_read(iommu->write_complete_reg); - while (!PCI_STC_FLUSHFLAG_SET(strbuf)) - membar("#LoadLoad"); + pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction); spin_unlock_irqrestore(&iommu->lock, flags); } @@ -749,7 +785,8 @@ void pci_dma_sync_sg_for_cpu(struct pci_dev *pdev, struct scatterlist *sglist, i struct pcidev_cookie *pcp; struct pci_iommu *iommu; struct pci_strbuf *strbuf; - unsigned long flags, ctx; + unsigned long flags, ctx, npages, i; + u32 bus_addr; pcp = pdev->sysdata; iommu = pcp->pbm->iommu; @@ -772,36 +809,14 @@ void pci_dma_sync_sg_for_cpu(struct pci_dev *pdev, struct scatterlist *sglist, i } /* Step 2: Kick data out of streaming buffers. */ - PCI_STC_FLUSHFLAG_INIT(strbuf); - if (iommu->iommu_ctxflush && - strbuf->strbuf_ctxflush) { - unsigned long matchreg, flushreg; - - flushreg = strbuf->strbuf_ctxflush; - matchreg = PCI_STC_CTXMATCH_ADDR(strbuf, ctx); - do { - pci_iommu_write(flushreg, ctx); - } while (((long)pci_iommu_read(matchreg)) < 0L); - } else { - unsigned long i, npages; - u32 bus_addr; - - bus_addr = sglist[0].dma_address & IO_PAGE_MASK; - - for(i = 1; i < nelems; i++) - if (!sglist[i].dma_length) - break; - i--; - npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) - bus_addr) >> IO_PAGE_SHIFT; - for (i = 0; i < npages; i++, bus_addr += IO_PAGE_SIZE) - pci_iommu_write(strbuf->strbuf_pflush, bus_addr); - } - - /* Step 3: Perform flush synchronization sequence. */ - pci_iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa); - (void) pci_iommu_read(iommu->write_complete_reg); - while (!PCI_STC_FLUSHFLAG_SET(strbuf)) - membar("#LoadLoad"); + bus_addr = sglist[0].dma_address & IO_PAGE_MASK; + for(i = 1; i < nelems; i++) + if (!sglist[i].dma_length) + break; + i--; + npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) + - bus_addr) >> IO_PAGE_SHIFT; + pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction); spin_unlock_irqrestore(&iommu->lock, flags); } diff --git a/arch/sparc64/kernel/pci_psycho.c b/arch/sparc64/kernel/pci_psycho.c index 3567fa8..534320e 100644 --- a/arch/sparc64/kernel/pci_psycho.c +++ b/arch/sparc64/kernel/pci_psycho.c @@ -1212,7 +1212,7 @@ static void __init psycho_iommu_init(struct pci_controller_info *p) /* Setup initial software IOMMU state. */ spin_lock_init(&iommu->lock); - iommu->iommu_cur_ctx = 0; + iommu->ctx_lowest_free = 1; /* Register addresses. */ iommu->iommu_control = p->pbm_A.controller_regs + PSYCHO_IOMMU_CONTROL; diff --git a/arch/sparc64/kernel/pci_sabre.c b/arch/sparc64/kernel/pci_sabre.c index 5525d1e..53d333b 100644 --- a/arch/sparc64/kernel/pci_sabre.c +++ b/arch/sparc64/kernel/pci_sabre.c @@ -1265,7 +1265,7 @@ static void __init sabre_iommu_init(struct pci_controller_info *p, /* Setup initial software IOMMU state. */ spin_lock_init(&iommu->lock); - iommu->iommu_cur_ctx = 0; + iommu->ctx_lowest_free = 1; /* Register addresses. */ iommu->iommu_control = p->pbm_A.controller_regs + SABRE_IOMMU_CONTROL; diff --git a/arch/sparc64/kernel/pci_schizo.c b/arch/sparc64/kernel/pci_schizo.c index e93fcad..5753175 100644 --- a/arch/sparc64/kernel/pci_schizo.c +++ b/arch/sparc64/kernel/pci_schizo.c @@ -1753,7 +1753,7 @@ static void schizo_pbm_iommu_init(struct pci_pbm_info *pbm) /* Setup initial software IOMMU state. */ spin_lock_init(&iommu->lock); - iommu->iommu_cur_ctx = 0; + iommu->ctx_lowest_free = 1; /* Register addresses, SCHIZO has iommu ctx flushing. */ iommu->iommu_control = pbm->pbm_regs + SCHIZO_IOMMU_CONTROL; diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c index 26d3ec4..a0cd2b2 100644 --- a/arch/sparc64/kernel/process.c +++ b/arch/sparc64/kernel/process.c @@ -62,9 +62,6 @@ void default_idle(void) */ void cpu_idle(void) { - if (current->pid != 0) - return; - /* endless idle loop with no priority at all */ for (;;) { /* If current->work.need_resched is zero we should really @@ -80,7 +77,6 @@ void cpu_idle(void) schedule(); check_pgt_cache(); } - return; } #else diff --git a/arch/sparc64/kernel/sbus.c b/arch/sparc64/kernel/sbus.c index 14d9c3a..89f5e01 100644 --- a/arch/sparc64/kernel/sbus.c +++ b/arch/sparc64/kernel/sbus.c @@ -117,19 +117,42 @@ static void iommu_flush(struct sbus_iommu *iommu, u32 base, unsigned long npages #define STRBUF_TAG_VALID 0x02UL -static void strbuf_flush(struct sbus_iommu *iommu, u32 base, unsigned long npages) +static void sbus_strbuf_flush(struct sbus_iommu *iommu, u32 base, unsigned long npages, int direction) { - iommu->strbuf_flushflag = 0UL; - while (npages--) - upa_writeq(base + (npages << IO_PAGE_SHIFT), + unsigned long n; + int limit; + + n = npages; + while (n--) + upa_writeq(base + (n << IO_PAGE_SHIFT), iommu->strbuf_regs + STRBUF_PFLUSH); + /* If the device could not have possibly put dirty data into + * the streaming cache, no flush-flag synchronization needs + * to be performed. + */ + if (direction == SBUS_DMA_TODEVICE) + return; + + iommu->strbuf_flushflag = 0UL; + /* Whoopee cushion! */ upa_writeq(__pa(&iommu->strbuf_flushflag), iommu->strbuf_regs + STRBUF_FSYNC); upa_readq(iommu->sbus_control_reg); - while (iommu->strbuf_flushflag == 0UL) + + limit = 100000; + while (iommu->strbuf_flushflag == 0UL) { + limit--; + if (!limit) + break; + udelay(1); membar("#LoadLoad"); + } + if (!limit) + printk(KERN_WARNING "sbus_strbuf_flush: flushflag timeout " + "vaddr[%08x] npages[%ld]\n", + base, npages); } static iopte_t *alloc_streaming_cluster(struct sbus_iommu *iommu, unsigned long npages) @@ -406,7 +429,7 @@ void sbus_unmap_single(struct sbus_dev *sdev, dma_addr_t dma_addr, size_t size, spin_lock_irqsave(&iommu->lock, flags); free_streaming_cluster(iommu, dma_base, size >> IO_PAGE_SHIFT); - strbuf_flush(iommu, dma_base, size >> IO_PAGE_SHIFT); + sbus_strbuf_flush(iommu, dma_base, size >> IO_PAGE_SHIFT, direction); spin_unlock_irqrestore(&iommu->lock, flags); } @@ -569,7 +592,7 @@ void sbus_unmap_sg(struct sbus_dev *sdev, struct scatterlist *sg, int nents, int iommu = sdev->bus->iommu; spin_lock_irqsave(&iommu->lock, flags); free_streaming_cluster(iommu, dvma_base, size >> IO_PAGE_SHIFT); - strbuf_flush(iommu, dvma_base, size >> IO_PAGE_SHIFT); + sbus_strbuf_flush(iommu, dvma_base, size >> IO_PAGE_SHIFT, direction); spin_unlock_irqrestore(&iommu->lock, flags); } @@ -581,7 +604,7 @@ void sbus_dma_sync_single_for_cpu(struct sbus_dev *sdev, dma_addr_t base, size_t size = (IO_PAGE_ALIGN(base + size) - (base & IO_PAGE_MASK)); spin_lock_irqsave(&iommu->lock, flags); - strbuf_flush(iommu, base & IO_PAGE_MASK, size >> IO_PAGE_SHIFT); + sbus_strbuf_flush(iommu, base & IO_PAGE_MASK, size >> IO_PAGE_SHIFT, direction); spin_unlock_irqrestore(&iommu->lock, flags); } @@ -605,7 +628,7 @@ void sbus_dma_sync_sg_for_cpu(struct sbus_dev *sdev, struct scatterlist *sg, int size = IO_PAGE_ALIGN(sg[i].dma_address + sg[i].dma_length) - base; spin_lock_irqsave(&iommu->lock, flags); - strbuf_flush(iommu, base, size >> IO_PAGE_SHIFT); + sbus_strbuf_flush(iommu, base, size >> IO_PAGE_SHIFT, direction); spin_unlock_irqrestore(&iommu->lock, flags); } diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c index 12c3d84b..b7e6a91 100644 --- a/arch/sparc64/kernel/setup.c +++ b/arch/sparc64/kernel/setup.c @@ -383,6 +383,17 @@ static void __init process_switch(char c) /* Use PROM debug console. */ register_console(&prom_debug_console); break; + case 'P': + /* Force UltraSPARC-III P-Cache on. */ + if (tlb_type != cheetah) { + printk("BOOT: Ignoring P-Cache force option.\n"); + break; + } + cheetah_pcache_forced_on = 1; + add_taint(TAINT_MACHINE_CHECK); + cheetah_enable_pcache(); + break; + default: printk("Unknown boot switch (-%c)\n", c); break; diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 6dff06a..e5b9c7a 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -123,6 +123,9 @@ void __init smp_callin(void) smp_setup_percpu_timer(); + if (cheetah_pcache_forced_on) + cheetah_enable_pcache(); + local_irq_enable(); calibrate_delay(); diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c index cad5a11..e78cc53 100644 --- a/arch/sparc64/kernel/sparc64_ksyms.c +++ b/arch/sparc64/kernel/sparc64_ksyms.c @@ -278,7 +278,7 @@ EXPORT_SYMBOL(verify_compat_iovec); EXPORT_SYMBOL(dump_thread); EXPORT_SYMBOL(dump_fpu); -EXPORT_SYMBOL(__pte_alloc_one_kernel); +EXPORT_SYMBOL(pte_alloc_one_kernel); #ifndef CONFIG_SMP EXPORT_SYMBOL(pgt_quicklists); #endif diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c index 56b203a..a9f4596 100644 --- a/arch/sparc64/kernel/traps.c +++ b/arch/sparc64/kernel/traps.c @@ -421,6 +421,25 @@ asmlinkage void cee_log(unsigned long ce_status, } } +int cheetah_pcache_forced_on; + +void cheetah_enable_pcache(void) +{ + unsigned long dcr; + + printk("CHEETAH: Enabling P-Cache on cpu %d.\n", + smp_processor_id()); + + __asm__ __volatile__("ldxa [%%g0] %1, %0" + : "=r" (dcr) + : "i" (ASI_DCU_CONTROL_REG)); + dcr |= (DCU_PE | DCU_HPE | DCU_SPE | DCU_SL); + __asm__ __volatile__("stxa %0, [%%g0] %1\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (dcr), "i" (ASI_DCU_CONTROL_REG)); +} + /* Cheetah error trap handling. */ static unsigned long ecache_flush_physbase; static unsigned long ecache_flush_linesize; diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index db6fa77..9c52220 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -1114,7 +1114,7 @@ struct pgtable_cache_struct pgt_quicklists; #else #define DC_ALIAS_SHIFT 0 #endif -pte_t *__pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) +pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { struct page *page; unsigned long color; diff --git a/arch/um/Kconfig.debug b/arch/um/Kconfig.debug index b89989d..bd41e42 100644 --- a/arch/um/Kconfig.debug +++ b/arch/um/Kconfig.debug @@ -2,10 +2,6 @@ menu "Kernel hacking" source "lib/Kconfig.debug" -config FRAME_POINTER - bool - default y if DEBUG_INFO - config PT_PROXY bool "Enable ptrace proxy" depends on XTERM_CHAN && DEBUG_INFO && MODE_TT diff --git a/arch/um/Kconfig_char b/arch/um/Kconfig_char index 3e50fdb..62d87b7 100644 --- a/arch/um/Kconfig_char +++ b/arch/um/Kconfig_char @@ -204,5 +204,11 @@ config UML_RANDOM http://sourceforge.net/projects/gkernel/). rngd periodically reads /dev/hwrng and injects the entropy into /dev/random. +config MMAPPER + tristate "iomem emulation driver" + help + This driver allows a host file to be used as emulated IO memory inside + UML. + endmenu diff --git a/arch/um/Kconfig_x86_64 b/arch/um/Kconfig_x86_64 index fd8d7e8..f162f50 100644 --- a/arch/um/Kconfig_x86_64 +++ b/arch/um/Kconfig_x86_64 @@ -6,6 +6,10 @@ config 64BIT bool default y +config TOP_ADDR + hex + default 0x80000000 + config 3_LEVEL_PGTABLES bool default y diff --git a/arch/um/Makefile b/arch/um/Makefile index 97bca6b..f2a0c40 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -17,7 +17,7 @@ core-y += $(ARCH_DIR)/kernel/ \ # Have to precede the include because the included Makefiles reference them. SYMLINK_HEADERS := archparam.h system.h sigcontext.h processor.h ptrace.h \ - arch-signal.h module.h vm-flags.h + module.h vm-flags.h elf.h SYMLINK_HEADERS := $(foreach header,$(SYMLINK_HEADERS),include/asm-um/$(header)) # XXX: The "os" symlink is only used by arch/um/include/os.h, which includes @@ -44,6 +44,11 @@ ifneq ($(MAKEFILES-INCL),) endif ARCH_INCLUDE := -I$(ARCH_DIR)/include +ifneq ($(KBUILD_SRC),) +ARCH_INCLUDE += -I$(ARCH_DIR)/include2 +ARCH_INCLUDE += -I$(srctree)/$(ARCH_DIR)/include +MRPROPER_DIRS += $(ARCH_DIR)/include2 +endif SYS_DIR := $(ARCH_DIR)/include/sysdep-$(SUBARCH) include $(srctree)/$(ARCH_DIR)/Makefile-$(SUBARCH) @@ -94,17 +99,18 @@ define archhelp echo ' find in the kernel root.' endef +ifneq ($(KBUILD_SRC),) +$(shell mkdir -p $(ARCH_DIR) && ln -fsn $(srctree)/$(ARCH_DIR)/Kconfig_$(SUBARCH) $(ARCH_DIR)/Kconfig_arch) +CLEAN_FILES += $(ARCH_DIR)/Kconfig_arch +else $(shell cd $(ARCH_DIR) && ln -sf Kconfig_$(SUBARCH) Kconfig_arch) +endif -prepare: $(ARCH_SYMLINKS) $(SYS_HEADERS) $(GEN_HEADERS) \ - $(ARCH_DIR)/kernel/vmlinux.lds.S +prepare: $(ARCH_SYMLINKS) $(SYS_HEADERS) $(GEN_HEADERS) LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib -LD_SCRIPT-$(CONFIG_LD_SCRIPT_STATIC) := uml.lds.S -LD_SCRIPT-$(CONFIG_LD_SCRIPT_DYN) := dyn.lds.S - CPP_MODE-$(CONFIG_MODE_TT) := -DMODE_TT CONFIG_KERNEL_STACK_ORDER ?= 2 STACK_SIZE := $(shell echo $$[ 4096 * (1 << $(CONFIG_KERNEL_STACK_ORDER)) ] ) @@ -126,7 +132,7 @@ define cmd_vmlinux__ $(CC) $(CFLAGS_vmlinux) -o $@ \ -Wl,-T,$(vmlinux-lds) $(vmlinux-init) \ -Wl,--start-group $(vmlinux-main) -Wl,--end-group \ - -L/usr/lib -lutil \ + -lutil \ $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) \ FORCE ,$^) ; rm -f linux endef @@ -145,31 +151,42 @@ archclean: @find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \ -o -name '*.gcov' \) -type f -print | xargs rm -f -#We need to re-preprocess this when the symlink dest changes. -#So we touch it when needed. -$(ARCH_DIR)/kernel/vmlinux.lds.S: FORCE - $(Q)if [ "$(shell readlink $@)" != "$(LD_SCRIPT-y)" ]; then \ - echo ' SYMLINK $@'; \ - ln -sf $(LD_SCRIPT-y) $@; \ - touch $@; \ - fi; - $(SYMLINK_HEADERS): @echo ' SYMLINK $@' +ifneq ($(KBUILD_SRC),) + ln -fsn $(srctree)/include/asm-um/$(basename $(notdir $@))-$(SUBARCH)$(suffix $@) $@ +else $(Q)cd $(TOPDIR)/$(dir $@) ; \ ln -sf $(basename $(notdir $@))-$(SUBARCH)$(suffix $@) $(notdir $@) +endif include/asm-um/arch: @echo ' SYMLINK $@' +ifneq ($(KBUILD_SRC),) + $(Q)mkdir -p include/asm-um + $(Q)ln -fsn $(srctree)/include/asm-$(SUBARCH) include/asm-um/arch +else $(Q)cd $(TOPDIR)/include/asm-um && ln -sf ../asm-$(SUBARCH) arch +endif $(ARCH_DIR)/include/sysdep: @echo ' SYMLINK $@' +ifneq ($(KBUILD_SRC),) + $(Q)mkdir -p $(ARCH_DIR)/include + $(Q)mkdir -p $(ARCH_DIR)/include2 + $(Q)ln -fsn sysdep-$(SUBARCH) $(ARCH_DIR)/include/sysdep + $(Q)ln -fsn $(srctree)/$(ARCH_DIR)/include/sysdep-$(SUBARCH) $(ARCH_DIR)/include2/sysdep +else $(Q)cd $(ARCH_DIR)/include && ln -sf sysdep-$(SUBARCH) sysdep +endif $(ARCH_DIR)/os: @echo ' SYMLINK $@' +ifneq ($(KBUILD_SRC),) + $(Q)ln -fsn $(srctree)/$(ARCH_DIR)/os-$(OS) $(ARCH_DIR)/os +else $(Q)cd $(ARCH_DIR) && ln -sf os-$(OS) os +endif # Generated files define filechk_umlconfig @@ -179,10 +196,31 @@ endef $(ARCH_DIR)/include/uml-config.h : include/linux/autoconf.h $(call filechk,umlconfig) +$(ARCH_DIR)/user-offsets.s: $(ARCH_DIR)/sys-$(SUBARCH)/user-offsets.c + $(CC) $(USER_CFLAGS) -S -o $@ $< + +$(ARCH_DIR)/user-offsets.h: $(ARCH_DIR)/user-offsets.s + $(call filechk,gen-asm-offsets) + +CLEAN_FILES += $(ARCH_DIR)/user-offsets.s $(ARCH_DIR)/user-offsets.h + +$(ARCH_DIR)/kernel-offsets.s: $(ARCH_DIR)/sys-$(SUBARCH)/kernel-offsets.c \ + $(ARCH_SYMLINKS) \ + $(SYS_DIR)/sc.h \ + include/asm include/linux/version.h \ + include/config/MARKER \ + $(ARCH_DIR)/include/user_constants.h + $(CC) $(CFLAGS) $(NOSTDINC_FLAGS) $(CPPFLAGS) -S -o $@ $< + +$(ARCH_DIR)/kernel-offsets.h: $(ARCH_DIR)/kernel-offsets.s + $(call filechk,gen-asm-offsets) + +CLEAN_FILES += $(ARCH_DIR)/kernel-offsets.s $(ARCH_DIR)/kernel-offsets.h + $(ARCH_DIR)/include/task.h: $(ARCH_DIR)/util/mk_task $(call filechk,gen_header) -$(ARCH_DIR)/include/user_constants.h: $(ARCH_DIR)/os/util/mk_user_constants +$(ARCH_DIR)/include/user_constants.h: $(ARCH_DIR)/os-$(OS)/util/mk_user_constants $(call filechk,gen_header) $(ARCH_DIR)/include/kern_constants.h: $(ARCH_DIR)/util/mk_constants @@ -191,20 +229,20 @@ $(ARCH_DIR)/include/kern_constants.h: $(ARCH_DIR)/util/mk_constants $(ARCH_DIR)/include/skas_ptregs.h: $(ARCH_DIR)/kernel/skas/util/mk_ptregs $(call filechk,gen_header) -$(ARCH_DIR)/os/util/mk_user_constants: $(ARCH_DIR)/os/util FORCE ; +$(ARCH_DIR)/os-$(OS)/util/mk_user_constants: $(ARCH_DIR)/os-$(OS)/util FORCE ; $(ARCH_DIR)/util/mk_task $(ARCH_DIR)/util/mk_constants: $(ARCH_DIR)/include/user_constants.h $(ARCH_DIR)/util \ FORCE ; $(ARCH_DIR)/kernel/skas/util/mk_ptregs: $(ARCH_DIR)/kernel/skas/util FORCE ; -$(ARCH_DIR)/util: scripts_basic $(SYS_DIR)/sc.h FORCE +$(ARCH_DIR)/util: scripts_basic $(SYS_DIR)/sc.h $(ARCH_DIR)/kernel-offsets.h FORCE $(Q)$(MAKE) $(build)=$@ -$(ARCH_DIR)/kernel/skas/util: scripts_basic FORCE +$(ARCH_DIR)/kernel/skas/util: scripts_basic $(ARCH_DIR)/user-offsets.h FORCE $(Q)$(MAKE) $(build)=$@ -$(ARCH_DIR)/os/util: scripts_basic FORCE +$(ARCH_DIR)/os-$(OS)/util: scripts_basic FORCE $(Q)$(MAKE) $(build)=$@ export SUBARCH USER_CFLAGS OS diff --git a/arch/um/Makefile-i386 b/arch/um/Makefile-i386 index f9e3c0f..29e182d 100644 --- a/arch/um/Makefile-i386 +++ b/arch/um/Makefile-i386 @@ -32,10 +32,10 @@ $(SYS_DIR)/sc.h: $(SYS_UTIL_DIR)/mk_sc $(SYS_DIR)/thread.h: $(SYS_UTIL_DIR)/mk_thread $(call filechk,gen_header) -$(SYS_UTIL_DIR)/mk_sc: scripts_basic FORCE +$(SYS_UTIL_DIR)/mk_sc: scripts_basic $(ARCH_DIR)/user-offsets.h FORCE $(Q)$(MAKE) $(build)=$(SYS_UTIL_DIR) $@ -$(SYS_UTIL_DIR)/mk_thread: scripts_basic $(ARCH_SYMLINKS) $(GEN_HEADERS) FORCE +$(SYS_UTIL_DIR)/mk_thread: scripts_basic $(ARCH_DIR)/kernel-offsets.h FORCE $(Q)$(MAKE) $(build)=$(SYS_UTIL_DIR) $@ $(SYS_UTIL_DIR): scripts_basic include/asm FORCE diff --git a/arch/um/Makefile-x86_64 b/arch/um/Makefile-x86_64 index a779711..3214456 100644 --- a/arch/um/Makefile-x86_64 +++ b/arch/um/Makefile-x86_64 @@ -23,10 +23,10 @@ $(SYS_DIR)/sc.h: $(SYS_UTIL_DIR)/mk_sc $(SYS_DIR)/thread.h: $(SYS_UTIL_DIR)/mk_thread $(call filechk,gen_header) -$(SYS_UTIL_DIR)/mk_sc: scripts_basic FORCE +$(SYS_UTIL_DIR)/mk_sc: scripts_basic $(ARCH_DIR)/user-offsets.h FORCE $(Q)$(MAKE) $(build)=$(SYS_UTIL_DIR) $@ -$(SYS_UTIL_DIR)/mk_thread: scripts_basic $(ARCH_SYMLINKS) $(GEN_HEADERS) FORCE +$(SYS_UTIL_DIR)/mk_thread: scripts_basic $(GEN_HEADERS) $(ARCH_DIR)/kernel-offsets.h FORCE $(Q)$(MAKE) $(build)=$(SYS_UTIL_DIR) $@ CLEAN_FILES += $(SYS_HEADERS) diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile index 323f72c..b2de991 100644 --- a/arch/um/drivers/Makefile +++ b/arch/um/drivers/Makefile @@ -22,8 +22,8 @@ obj-y := stdio_console.o fd.o chan_kern.o chan_user.o line.o obj-$(CONFIG_SSL) += ssl.o obj-$(CONFIG_STDERR_CONSOLE) += stderr_console.o -obj-$(CONFIG_UML_NET_SLIP) += slip.o -obj-$(CONFIG_UML_NET_SLIRP) += slirp.o +obj-$(CONFIG_UML_NET_SLIP) += slip.o slip_common.o +obj-$(CONFIG_UML_NET_SLIRP) += slirp.o slip_common.o obj-$(CONFIG_UML_NET_DAEMON) += daemon.o obj-$(CONFIG_UML_NET_MCAST) += mcast.o #obj-$(CONFIG_UML_NET_PCAP) += pcap.o $(PCAP) @@ -41,6 +41,6 @@ obj-$(CONFIG_UML_WATCHDOG) += harddog.o obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o obj-$(CONFIG_UML_RANDOM) += random.o -USER_OBJS := fd.o null.o pty.o tty.o xterm.o +USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o include arch/um/scripts/Makefile.rules diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c index 0150038..14a12d6 100644 --- a/arch/um/drivers/chan_kern.c +++ b/arch/um/drivers/chan_kern.c @@ -20,9 +20,17 @@ #include "os.h" #ifdef CONFIG_NOCONFIG_CHAN + +/* The printk's here are wrong because we are complaining that there is no + * output device, but printk is printing to that output device. The user will + * never see the error. printf would be better, except it can't run on a + * kernel stack because it will overflow it. + * Use printk for now since that will avoid crashing. + */ + static void *not_configged_init(char *str, int device, struct chan_opts *opts) { - printf(KERN_ERR "Using a channel type which is configured out of " + printk(KERN_ERR "Using a channel type which is configured out of " "UML\n"); return(NULL); } @@ -30,27 +38,27 @@ static void *not_configged_init(char *str, int device, struct chan_opts *opts) static int not_configged_open(int input, int output, int primary, void *data, char **dev_out) { - printf(KERN_ERR "Using a channel type which is configured out of " + printk(KERN_ERR "Using a channel type which is configured out of " "UML\n"); return(-ENODEV); } static void not_configged_close(int fd, void *data) { - printf(KERN_ERR "Using a channel type which is configured out of " + printk(KERN_ERR "Using a channel type which is configured out of " "UML\n"); } static int not_configged_read(int fd, char *c_out, void *data) { - printf(KERN_ERR "Using a channel type which is configured out of " + printk(KERN_ERR "Using a channel type which is configured out of " "UML\n"); return(-EIO); } static int not_configged_write(int fd, const char *buf, int len, void *data) { - printf(KERN_ERR "Using a channel type which is configured out of " + printk(KERN_ERR "Using a channel type which is configured out of " "UML\n"); return(-EIO); } @@ -58,7 +66,7 @@ static int not_configged_write(int fd, const char *buf, int len, void *data) static int not_configged_console_write(int fd, const char *buf, int len, void *data) { - printf(KERN_ERR "Using a channel type which is configured out of " + printk(KERN_ERR "Using a channel type which is configured out of " "UML\n"); return(-EIO); } @@ -66,7 +74,7 @@ static int not_configged_console_write(int fd, const char *buf, int len, static int not_configged_window_size(int fd, void *data, unsigned short *rows, unsigned short *cols) { - printf(KERN_ERR "Using a channel type which is configured out of " + printk(KERN_ERR "Using a channel type which is configured out of " "UML\n"); return(-ENODEV); } diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c index 583b8e1..5d37681 100644 --- a/arch/um/drivers/chan_user.c +++ b/arch/um/drivers/chan_user.c @@ -143,22 +143,22 @@ static int winch_tramp(int fd, struct tty_struct *tty, int *fd_out) { struct winch_data data; unsigned long stack; - int fds[2], pid, n, err; + int fds[2], n, err; char c; err = os_pipe(fds, 1, 1); if(err < 0){ printk("winch_tramp : os_pipe failed, err = %d\n", -err); - return(err); + goto out; } data = ((struct winch_data) { .pty_fd = fd, .pipe_fd = fds[1], .close_me = fds[0] } ); - pid = run_helper_thread(winch_thread, &data, 0, &stack, 0); - if(pid < 0){ + err = run_helper_thread(winch_thread, &data, 0, &stack, 0); + if(err < 0){ printk("fork of winch_thread failed - errno = %d\n", errno); - return(pid); + goto out_close; } os_close_file(fds[1]); @@ -168,14 +168,22 @@ static int winch_tramp(int fd, struct tty_struct *tty, int *fd_out) printk("winch_tramp : failed to read synchronization byte\n"); printk("read failed, err = %d\n", -n); printk("fd %d will not support SIGWINCH\n", fd); - *fd_out = -1; + err = -EINVAL; + goto out_close1; } - return(pid); + return err ; + + out_close: + os_close_file(fds[1]); + out_close1: + os_close_file(fds[0]); + out: + return err; } void register_winch(int fd, struct tty_struct *tty) { - int pid, thread, thread_fd; + int pid, thread, thread_fd = -1; int count; char c = 1; @@ -186,7 +194,7 @@ void register_winch(int fd, struct tty_struct *tty) if(!CHOOSE_MODE_PROC(is_tracer_winch, is_skas_winch, pid, fd, tty) && (pid == -1)){ thread = winch_tramp(fd, tty, &thread_fd); - if(fd != -1){ + if(thread > 0){ register_winch_irq(thread_fd, fd, thread, tty); count = os_write_file(thread_fd, &c, sizeof(c)); diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index d0f9712..025d3be 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -462,12 +462,15 @@ out: return err; } +static void unregister_winch(struct tty_struct *tty); + void line_close(struct tty_struct *tty, struct file * filp) { struct line *line = tty->driver_data; - /* XXX: I assume this should be called in process context, not with interrupt - * disabled!*/ + /* XXX: I assume this should be called in process context, not with + * interrupts disabled! + */ spin_lock_irq(&line->lock); /* We ignore the error anyway! */ @@ -478,6 +481,12 @@ void line_close(struct tty_struct *tty, struct file * filp) line_disable(tty, -1); tty->driver_data = NULL; } + + if((line->count == 0) && line->sigio){ + unregister_winch(tty); + line->sigio = 0; + } + spin_unlock_irq(&line->lock); } @@ -729,6 +738,34 @@ void register_winch_irq(int fd, int tty_fd, int pid, struct tty_struct *tty) up(&winch_handler_sem); } +static void unregister_winch(struct tty_struct *tty) +{ + struct list_head *ele; + struct winch *winch, *found = NULL; + + down(&winch_handler_sem); + list_for_each(ele, &winch_handlers){ + winch = list_entry(ele, struct winch, list); + if(winch->tty == tty){ + found = winch; + break; + } + } + + if(found == NULL) + goto out; + + if(winch->pid != -1) + os_kill_process(winch->pid, 1); + + free_irq_by_irq_and_dev(WINCH_IRQ, winch); + free_irq(WINCH_IRQ, winch); + list_del(&winch->list); + kfree(winch); + out: + up(&winch_handler_sem); +} + static void winch_cleanup(void) { struct list_head *ele; diff --git a/arch/um/drivers/mcast_kern.c b/arch/um/drivers/mcast_kern.c index faf714e..217438c 100644 --- a/arch/um/drivers/mcast_kern.c +++ b/arch/um/drivers/mcast_kern.c @@ -73,7 +73,6 @@ int mcast_setup(char *str, char **mac_out, void *data) struct mcast_init *init = data; char *port_str = NULL, *ttl_str = NULL, *remain; char *last; - int n; *init = ((struct mcast_init) { .addr = "239.192.168.1", @@ -89,13 +88,12 @@ int mcast_setup(char *str, char **mac_out, void *data) } if(port_str != NULL){ - n = simple_strtoul(port_str, &last, 10); + init->port = simple_strtoul(port_str, &last, 10); if((*last != '\0') || (last == port_str)){ printk(KERN_ERR "mcast_setup - Bad port : '%s'\n", port_str); return(0); } - init->port = htons(n); } if(ttl_str != NULL){ diff --git a/arch/um/drivers/mcast_user.c b/arch/um/drivers/mcast_user.c index 0fe1d9f..7a0d115 100644 --- a/arch/um/drivers/mcast_user.c +++ b/arch/um/drivers/mcast_user.c @@ -38,7 +38,7 @@ static struct sockaddr_in *new_addr(char *addr, unsigned short port) } sin->sin_family = AF_INET; sin->sin_addr.s_addr = in_aton(addr); - sin->sin_port = port; + sin->sin_port = htons(port); return(sin); } @@ -55,28 +55,25 @@ static int mcast_open(void *data) struct mcast_data *pri = data; struct sockaddr_in *sin = pri->mcast_addr; struct ip_mreq mreq; - int fd, yes = 1; + int fd = -EINVAL, yes = 1, err = -EINVAL;; - if ((sin->sin_addr.s_addr == 0) || (sin->sin_port == 0)) { - fd = -EINVAL; + if ((sin->sin_addr.s_addr == 0) || (sin->sin_port == 0)) goto out; - } fd = socket(AF_INET, SOCK_DGRAM, 0); + if (fd < 0){ printk("mcast_open : data socket failed, errno = %d\n", errno); - fd = -ENOMEM; + fd = -errno; goto out; } if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) { printk("mcast_open: SO_REUSEADDR failed, errno = %d\n", errno); - os_close_file(fd); - fd = -EINVAL; - goto out; + goto out_close; } /* set ttl according to config */ @@ -84,26 +81,20 @@ static int mcast_open(void *data) sizeof(pri->ttl)) < 0) { printk("mcast_open: IP_MULTICAST_TTL failed, error = %d\n", errno); - os_close_file(fd); - fd = -EINVAL; - goto out; + goto out_close; } /* set LOOP, so data does get fed back to local sockets */ if (setsockopt(fd, SOL_IP, IP_MULTICAST_LOOP, &yes, sizeof(yes)) < 0) { printk("mcast_open: IP_MULTICAST_LOOP failed, error = %d\n", errno); - os_close_file(fd); - fd = -EINVAL; - goto out; + goto out_close; } /* bind socket to mcast address */ if (bind(fd, (struct sockaddr *) sin, sizeof(*sin)) < 0) { printk("mcast_open : data bind failed, errno = %d\n", errno); - os_close_file(fd); - fd = -EINVAL; - goto out; + goto out_close; } /* subscribe to the multicast group */ @@ -117,12 +108,15 @@ static int mcast_open(void *data) "interface on the host.\n"); printk("eth0 should be configured in order to use the " "multicast transport.\n"); - os_close_file(fd); - fd = -EINVAL; + goto out_close; } out: - return(fd); + return fd; + + out_close: + os_close_file(fd); + return err; } static void mcast_close(int fd, void *data) @@ -164,14 +158,3 @@ struct net_user_info mcast_user_info = { .delete_address = NULL, .max_packet = MAX_PACKET - ETH_HEADER_OTHER }; - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/drivers/mmapper_kern.c b/arch/um/drivers/mmapper_kern.c index a63231d..a37a5ac 100644 --- a/arch/um/drivers/mmapper_kern.c +++ b/arch/um/drivers/mmapper_kern.c @@ -18,6 +18,7 @@ #include <linux/slab.h> #include <linux/init.h> #include <linux/smp_lock.h> +#include <linux/miscdevice.h> #include <asm/uaccess.h> #include <asm/irq.h> #include <asm/pgtable.h> @@ -117,24 +118,39 @@ static struct file_operations mmapper_fops = { .release = mmapper_release, }; +static struct miscdevice mmapper_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "mmapper", + .fops = &mmapper_fops +}; + static int __init mmapper_init(void) { + int err; + printk(KERN_INFO "Mapper v0.1\n"); v_buf = (char *) find_iomem("mmapper", &mmapper_size); if(mmapper_size == 0){ printk(KERN_ERR "mmapper_init - find_iomem failed\n"); - return(0); + goto out; } - p_buf = __pa(v_buf); + err = misc_register(&mmapper_dev); + if(err){ + printk(KERN_ERR "mmapper - misc_register failed, err = %d\n", + err); + goto out; + } - devfs_mk_cdev(MKDEV(30, 0), S_IFCHR|S_IRUGO|S_IWUGO, "mmapper"); - return(0); + p_buf = __pa(v_buf); +out: + return 0; } static void mmapper_exit(void) { + misc_deregister(&mmapper_dev); } module_init(mmapper_init); diff --git a/arch/um/drivers/net_user.c b/arch/um/drivers/net_user.c index 47229fe..3730d4f 100644 --- a/arch/um/drivers/net_user.c +++ b/arch/um/drivers/net_user.c @@ -32,7 +32,7 @@ int tap_open_common(void *dev, char *gate_addr) return(0); } -void tap_check_ips(char *gate_addr, char *eth_addr) +void tap_check_ips(char *gate_addr, unsigned char *eth_addr) { int tap_addr[4]; diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c index d43e9fa..f9e2219 100644 --- a/arch/um/drivers/random.c +++ b/arch/um/drivers/random.c @@ -1,5 +1,10 @@ -/* Much of this ripped from hw_random.c */ - +/* Copyright (C) 2005 Jeff Dike <jdike@addtoit.com> */ +/* Much of this ripped from drivers/char/hw_random.c, see there for other + * copyright. + * + * This software may be used and distributed according to the terms + * of the GNU General Public License, incorporated herein by reference. + */ #include <linux/module.h> #include <linux/fs.h> #include <linux/miscdevice.h> @@ -12,8 +17,6 @@ */ #define RNG_VERSION "1.0.0" #define RNG_MODULE_NAME "random" -#define RNG_DRIVER_NAME RNG_MODULE_NAME " virtual driver " RNG_VERSION -#define PFX RNG_MODULE_NAME ": " #define RNG_MISCDEV_MINOR 183 /* official */ @@ -98,7 +101,7 @@ static int __init rng_init (void) err = misc_register (&rng_miscdev); if (err) { - printk (KERN_ERR PFX "misc device register failed\n"); + printk (KERN_ERR RNG_MODULE_NAME ": misc device register failed\n"); goto err_out_cleanup_hw; } @@ -120,3 +123,6 @@ static void __exit rng_cleanup (void) module_init (rng_init); module_exit (rng_cleanup); + +MODULE_DESCRIPTION("UML Host Random Number Generator (RNG) driver"); +MODULE_LICENSE("GPL"); diff --git a/arch/um/drivers/slip.h b/arch/um/drivers/slip.h index 495f2f1..bb0dab4 100644 --- a/arch/um/drivers/slip.h +++ b/arch/um/drivers/slip.h @@ -1,10 +1,7 @@ #ifndef __UM_SLIP_H #define __UM_SLIP_H -#define BUF_SIZE 1500 - /* two bytes each for a (pathological) max packet of escaped chars + * - * terminating END char + initial END char */ -#define ENC_BUF_SIZE (2 * BUF_SIZE + 2) +#include "slip_common.h" struct slip_data { void *dev; @@ -12,28 +9,12 @@ struct slip_data { char *addr; char *gate_addr; int slave; - char ibuf[ENC_BUF_SIZE]; - char obuf[ENC_BUF_SIZE]; - int more; /* more data: do not read fd until ibuf has been drained */ - int pos; - int esc; + struct slip_proto slip; }; extern struct net_user_info slip_user_info; -extern int set_umn_addr(int fd, char *addr, char *ptp_addr); extern int slip_user_read(int fd, void *buf, int len, struct slip_data *pri); extern int slip_user_write(int fd, void *buf, int len, struct slip_data *pri); #endif - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/drivers/slip_common.c b/arch/um/drivers/slip_common.c new file mode 100644 index 0000000..e89cfc6 --- /dev/null +++ b/arch/um/drivers/slip_common.c @@ -0,0 +1,54 @@ +#include <string.h> +#include "slip_common.h" +#include "net_user.h" + +int slip_proto_read(int fd, void *buf, int len, struct slip_proto *slip) +{ + int i, n, size, start; + + if(slip->more > 0){ + i = 0; + while(i < slip->more){ + size = slip_unesc(slip->ibuf[i++], slip->ibuf, + &slip->pos, &slip->esc); + if(size){ + memcpy(buf, slip->ibuf, size); + memmove(slip->ibuf, &slip->ibuf[i], + slip->more - i); + slip->more = slip->more - i; + return size; + } + } + slip->more = 0; + } + + n = net_read(fd, &slip->ibuf[slip->pos], + sizeof(slip->ibuf) - slip->pos); + if(n <= 0) + return n; + + start = slip->pos; + for(i = 0; i < n; i++){ + size = slip_unesc(slip->ibuf[start + i], slip->ibuf,&slip->pos, + &slip->esc); + if(size){ + memcpy(buf, slip->ibuf, size); + memmove(slip->ibuf, &slip->ibuf[start+i+1], + n - (i + 1)); + slip->more = n - (i + 1); + return size; + } + } + return 0; +} + +int slip_proto_write(int fd, void *buf, int len, struct slip_proto *slip) +{ + int actual, n; + + actual = slip_esc(buf, slip->obuf, len); + n = net_write(fd, slip->obuf, actual); + if(n < 0) + return n; + else return len; +} diff --git a/arch/um/drivers/slip_proto.h b/arch/um/drivers/slip_common.h index 7206361..2ae76d8 100644 --- a/arch/um/drivers/slip_proto.h +++ b/arch/um/drivers/slip_common.h @@ -1,10 +1,10 @@ -/* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL - */ +#ifndef __UM_SLIP_COMMON_H +#define __UM_SLIP_COMMON_H -#ifndef __UM_SLIP_PROTO_H__ -#define __UM_SLIP_PROTO_H__ +#define BUF_SIZE 1500 + /* two bytes each for a (pathological) max packet of escaped chars + * + * terminating END char + initial END char */ +#define ENC_BUF_SIZE (2 * BUF_SIZE + 2) /* SLIP protocol characters. */ #define SLIP_END 0300 /* indicates end of frame */ @@ -12,7 +12,8 @@ #define SLIP_ESC_END 0334 /* ESC ESC_END means END 'data' */ #define SLIP_ESC_ESC 0335 /* ESC ESC_ESC means ESC 'data' */ -static inline int slip_unesc(unsigned char c,char *buf,int *pos, int *esc) +static inline int slip_unesc(unsigned char c, unsigned char *buf, int *pos, + int *esc) { int ret; @@ -79,15 +80,25 @@ static inline int slip_esc(unsigned char *s, unsigned char *d, int len) return (ptr - d); } -#endif +struct slip_proto { + unsigned char ibuf[ENC_BUF_SIZE]; + unsigned char obuf[ENC_BUF_SIZE]; + int more; /* more data: do not read fd until ibuf has been drained */ + int pos; + int esc; +}; + +#define SLIP_PROTO_INIT { \ + .ibuf = { '\0' }, \ + .obuf = { '\0' }, \ + .more = 0, \ + .pos = 0, \ + .esc = 0 \ +} -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ +extern int slip_proto_read(int fd, void *buf, int len, + struct slip_proto *slip); +extern int slip_proto_write(int fd, void *buf, int len, + struct slip_proto *slip); + +#endif diff --git a/arch/um/drivers/slip_kern.c b/arch/um/drivers/slip_kern.c index 0886eed..9a6f5c8 100644 --- a/arch/um/drivers/slip_kern.c +++ b/arch/um/drivers/slip_kern.c @@ -26,16 +26,16 @@ void slip_init(struct net_device *dev, void *data) .addr = NULL, .gate_addr = init->gate_addr, .slave = -1, - .ibuf = { '\0' }, - .obuf = { '\0' }, - .pos = 0, - .esc = 0, + .slip = SLIP_PROTO_INIT, .dev = dev }); dev->init = NULL; + dev->header_cache_update = NULL; + dev->hard_header_cache = NULL; + dev->hard_header = NULL; dev->hard_header_len = 0; - dev->addr_len = 4; - dev->type = ARPHRD_ETHER; + dev->addr_len = 0; + dev->type = ARPHRD_SLIP; dev->tx_queue_len = 256; dev->flags = IFF_NOARP; printk("SLIP backend - SLIP IP = %s\n", spri->gate_addr); diff --git a/arch/um/drivers/slip_user.c b/arch/um/drivers/slip_user.c index d94846b..71af444 100644 --- a/arch/um/drivers/slip_user.c +++ b/arch/um/drivers/slip_user.c @@ -13,7 +13,7 @@ #include "user.h" #include "net_user.h" #include "slip.h" -#include "slip_proto.h" +#include "slip_common.h" #include "helper.h" #include "os.h" @@ -77,41 +77,51 @@ static int slip_tramp(char **argv, int fd) err = os_pipe(fds, 1, 0); if(err < 0){ printk("slip_tramp : pipe failed, err = %d\n", -err); - return(err); + goto out; } err = 0; pe_data.stdin = fd; pe_data.stdout = fds[1]; pe_data.close_me = fds[0]; - pid = run_helper(slip_pre_exec, &pe_data, argv, NULL); + err = run_helper(slip_pre_exec, &pe_data, argv, NULL); + if(err < 0) + goto out_close; + pid = err; + + output_len = page_size(); + output = um_kmalloc(output_len); + if(output == NULL){ + printk("slip_tramp : failed to allocate output buffer\n"); + os_kill_process(pid, 1); + err = -ENOMEM; + goto out_free; + } - if(pid < 0) err = pid; - else { - output_len = page_size(); - output = um_kmalloc(output_len); - if(output == NULL) - printk("slip_tramp : failed to allocate output " - "buffer\n"); - - os_close_file(fds[1]); - read_output(fds[0], output, output_len); - if(output != NULL){ - printk("%s", output); - kfree(output); - } - CATCH_EINTR(err = waitpid(pid, &status, 0)); - if(err < 0) - err = errno; - else if(!WIFEXITED(status) || (WEXITSTATUS(status) != 0)){ - printk("'%s' didn't exit with status 0\n", argv[0]); - err = -EINVAL; - } + os_close_file(fds[1]); + read_output(fds[0], output, output_len); + printk("%s", output); + + CATCH_EINTR(err = waitpid(pid, &status, 0)); + if(err < 0) + err = errno; + else if(!WIFEXITED(status) || (WEXITSTATUS(status) != 0)){ + printk("'%s' didn't exit with status 0\n", argv[0]); + err = -EINVAL; } + else err = 0; os_close_file(fds[0]); - return(err); +out_free: + kfree(output); + return err; + +out_close: + os_close_file(fds[0]); + os_close_file(fds[1]); +out: + return err; } static int slip_open(void *data) @@ -123,21 +133,26 @@ static int slip_open(void *data) NULL }; int sfd, mfd, err; - mfd = get_pty(); - if(mfd < 0){ - printk("umn : Failed to open pty, err = %d\n", -mfd); - return(mfd); + err = get_pty(); + if(err < 0){ + printk("slip-open : Failed to open pty, err = %d\n", -err); + goto out; } - sfd = os_open_file(ptsname(mfd), of_rdwr(OPENFLAGS()), 0); - if(sfd < 0){ - printk("Couldn't open tty for slip line, err = %d\n", -sfd); - os_close_file(mfd); - return(sfd); + mfd = err; + + err = os_open_file(ptsname(mfd), of_rdwr(OPENFLAGS()), 0); + if(err < 0){ + printk("Couldn't open tty for slip line, err = %d\n", -err); + goto out_close; } - if(set_up_tty(sfd)) return(-1); + sfd = err; + + if(set_up_tty(sfd)) + goto out_close2; + pri->slave = sfd; - pri->pos = 0; - pri->esc = 0; + pri->slip.pos = 0; + pri->slip.esc = 0; if(pri->gate_addr != NULL){ sprintf(version_buf, "%d", UML_NET_VERSION); strcpy(gate_buf, pri->gate_addr); @@ -146,12 +161,12 @@ static int slip_open(void *data) if(err < 0){ printk("slip_tramp failed - err = %d\n", -err); - return(err); + goto out_close2; } err = os_get_ifname(pri->slave, pri->name); if(err < 0){ printk("get_ifname failed, err = %d\n", -err); - return(err); + goto out_close2; } iter_addresses(pri->dev, open_addr, pri->name); } @@ -160,10 +175,16 @@ static int slip_open(void *data) if(err < 0){ printk("Failed to set slip discipline encapsulation - " "err = %d\n", -err); - return(err); + goto out_close2; } } return(mfd); +out_close2: + os_close_file(sfd); +out_close: + os_close_file(mfd); +out: + return err; } static void slip_close(int fd, void *data) @@ -190,48 +211,12 @@ static void slip_close(int fd, void *data) int slip_user_read(int fd, void *buf, int len, struct slip_data *pri) { - int i, n, size, start; - - if(pri->more>0) { - i = 0; - while(i < pri->more) { - size = slip_unesc(pri->ibuf[i++], - pri->ibuf, &pri->pos, &pri->esc); - if(size){ - memcpy(buf, pri->ibuf, size); - memmove(pri->ibuf, &pri->ibuf[i], pri->more-i); - pri->more=pri->more-i; - return(size); - } - } - pri->more=0; - } - - n = net_read(fd, &pri->ibuf[pri->pos], sizeof(pri->ibuf) - pri->pos); - if(n <= 0) return(n); - - start = pri->pos; - for(i = 0; i < n; i++){ - size = slip_unesc(pri->ibuf[start + i], - pri->ibuf, &pri->pos, &pri->esc); - if(size){ - memcpy(buf, pri->ibuf, size); - memmove(pri->ibuf, &pri->ibuf[start+i+1], n-(i+1)); - pri->more=n-(i+1); - return(size); - } - } - return(0); + return slip_proto_read(fd, buf, len, &pri->slip); } int slip_user_write(int fd, void *buf, int len, struct slip_data *pri) { - int actual, n; - - actual = slip_esc(buf, pri->obuf, len); - n = net_write(fd, pri->obuf, actual); - if(n < 0) return(n); - else return(len); + return slip_proto_write(fd, buf, len, &pri->slip); } static int slip_set_mtu(int mtu, void *data) @@ -267,14 +252,3 @@ struct net_user_info slip_user_info = { .delete_address = slip_del_addr, .max_packet = BUF_SIZE }; - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/drivers/slirp.h b/arch/um/drivers/slirp.h index 04e407d..6cf88ab 100644 --- a/arch/um/drivers/slirp.h +++ b/arch/um/drivers/slirp.h @@ -1,10 +1,7 @@ #ifndef __UM_SLIRP_H #define __UM_SLIRP_H -#define BUF_SIZE 1500 - /* two bytes each for a (pathological) max packet of escaped chars + * - * terminating END char + initial END char */ -#define ENC_BUF_SIZE (2 * BUF_SIZE + 2) +#include "slip_common.h" #define SLIRP_MAX_ARGS 100 /* @@ -24,28 +21,13 @@ struct slirp_data { struct arg_list_dummy_wrapper argw; int pid; int slave; - char ibuf[ENC_BUF_SIZE]; - char obuf[ENC_BUF_SIZE]; - int more; /* more data: do not read fd until ibuf has been drained */ - int pos; - int esc; + struct slip_proto slip; }; extern struct net_user_info slirp_user_info; -extern int set_umn_addr(int fd, char *addr, char *ptp_addr); extern int slirp_user_read(int fd, void *buf, int len, struct slirp_data *pri); -extern int slirp_user_write(int fd, void *buf, int len, struct slirp_data *pri); +extern int slirp_user_write(int fd, void *buf, int len, + struct slirp_data *pri); #endif - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/drivers/slirp_kern.c b/arch/um/drivers/slirp_kern.c index c9d6b52..9864d27 100644 --- a/arch/um/drivers/slirp_kern.c +++ b/arch/um/drivers/slirp_kern.c @@ -25,10 +25,7 @@ void slirp_init(struct net_device *dev, void *data) { .argw = init->argw, .pid = -1, .slave = -1, - .ibuf = { '\0' }, - .obuf = { '\0' }, - .pos = 0, - .esc = 0, + .slip = SLIP_PROTO_INIT, .dev = dev }); dev->init = NULL; diff --git a/arch/um/drivers/slirp_user.c b/arch/um/drivers/slirp_user.c index c322515..8d91f66 100644 --- a/arch/um/drivers/slirp_user.c +++ b/arch/um/drivers/slirp_user.c @@ -12,7 +12,7 @@ #include "user.h" #include "net_user.h" #include "slirp.h" -#include "slip_proto.h" +#include "slip_common.h" #include "helper.h" #include "os.h" @@ -48,47 +48,32 @@ static int slirp_tramp(char **argv, int fd) return(pid); } -/* XXX This is just a trivial wrapper around os_pipe */ -static int slirp_datachan(int *mfd, int *sfd) -{ - int fds[2], err; - - err = os_pipe(fds, 1, 1); - if(err < 0){ - printk("slirp_datachan: Failed to open pipe, err = %d\n", -err); - return(err); - } - - *mfd = fds[0]; - *sfd = fds[1]; - return(0); -} - static int slirp_open(void *data) { struct slirp_data *pri = data; - int sfd, mfd, pid, err; + int fds[2], pid, err; - err = slirp_datachan(&mfd, &sfd); + err = os_pipe(fds, 1, 1); if(err) return(err); - pid = slirp_tramp(pri->argw.argv, sfd); - - if(pid < 0){ - printk("slirp_tramp failed - errno = %d\n", -pid); - os_close_file(sfd); - os_close_file(mfd); - return(pid); + err = slirp_tramp(pri->argw.argv, fds[1]); + if(err < 0){ + printk("slirp_tramp failed - errno = %d\n", -err); + goto out; } - - pri->slave = sfd; - pri->pos = 0; - pri->esc = 0; - - pri->pid = pid; - - return(mfd); + pid = err; + + pri->slave = fds[1]; + pri->slip.pos = 0; + pri->slip.esc = 0; + pri->pid = err; + + return(fds[0]); +out: + os_close_file(fds[0]); + os_close_file(fds[1]); + return err; } static void slirp_close(int fd, void *data) @@ -129,48 +114,12 @@ static void slirp_close(int fd, void *data) int slirp_user_read(int fd, void *buf, int len, struct slirp_data *pri) { - int i, n, size, start; - - if(pri->more>0) { - i = 0; - while(i < pri->more) { - size = slip_unesc(pri->ibuf[i++], - pri->ibuf,&pri->pos,&pri->esc); - if(size){ - memcpy(buf, pri->ibuf, size); - memmove(pri->ibuf, &pri->ibuf[i], pri->more-i); - pri->more=pri->more-i; - return(size); - } - } - pri->more=0; - } - - n = net_read(fd, &pri->ibuf[pri->pos], sizeof(pri->ibuf) - pri->pos); - if(n <= 0) return(n); - - start = pri->pos; - for(i = 0; i < n; i++){ - size = slip_unesc(pri->ibuf[start + i], - pri->ibuf,&pri->pos,&pri->esc); - if(size){ - memcpy(buf, pri->ibuf, size); - memmove(pri->ibuf, &pri->ibuf[start+i+1], n-(i+1)); - pri->more=n-(i+1); - return(size); - } - } - return(0); + return slip_proto_read(fd, buf, len, &pri->slip); } int slirp_user_write(int fd, void *buf, int len, struct slirp_data *pri) { - int actual, n; - - actual = slip_esc(buf, pri->obuf, len); - n = net_write(fd, pri->obuf, actual); - if(n < 0) return(n); - else return(len); + return slip_proto_write(fd, buf, len, &pri->slip); } static int slirp_set_mtu(int mtu, void *data) @@ -188,14 +137,3 @@ struct net_user_info slirp_user_info = { .delete_address = NULL, .max_packet = BUF_SIZE }; - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/drivers/ssl.c b/arch/um/drivers/ssl.c index a2bac42..b32a770 100644 --- a/arch/um/drivers/ssl.c +++ b/arch/um/drivers/ssl.c @@ -22,7 +22,6 @@ #include "init.h" #include "irq_user.h" #include "mconsole_kern.h" -#include "2_5compat.h" static int ssl_version = 1; diff --git a/arch/um/drivers/stderr_console.c b/arch/um/drivers/stderr_console.c index 98565b5..429ae8e 100644 --- a/arch/um/drivers/stderr_console.c +++ b/arch/um/drivers/stderr_console.c @@ -22,9 +22,9 @@ static void stderr_console_write(struct console *console, const char *string, } static struct console stderr_console = { - .name "stderr", - .write stderr_console_write, - .flags CON_PRINTBUFFER, + .name = "stderr", + .write = stderr_console_write, + .flags = CON_PRINTBUFFER, }; static int __init stderr_console_init(void) diff --git a/arch/um/drivers/stdio_console.c b/arch/um/drivers/stdio_console.c index 361d0be..afbe1e7 100644 --- a/arch/um/drivers/stdio_console.c +++ b/arch/um/drivers/stdio_console.c @@ -28,7 +28,6 @@ #include "irq_user.h" #include "mconsole_kern.h" #include "init.h" -#include "2_5compat.h" #define MAX_TTYS (16) diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 9a56ff9..2a7f689 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -49,13 +49,12 @@ #include "irq_user.h" #include "irq_kern.h" #include "ubd_user.h" -#include "2_5compat.h" #include "os.h" #include "mem.h" #include "mem_kern.h" #include "cow.h" -enum ubd_req { UBD_READ, UBD_WRITE, UBD_MMAP }; +enum ubd_req { UBD_READ, UBD_WRITE }; struct io_thread_req { enum ubd_req op; @@ -68,8 +67,6 @@ struct io_thread_req { unsigned long sector_mask; unsigned long long cow_offset; unsigned long bitmap_words[2]; - int map_fd; - unsigned long long map_offset; int error; }; @@ -122,10 +119,6 @@ static int ubd_ioctl(struct inode * inode, struct file * file, #define MAX_DEV (8) -/* Changed in early boot */ -static int ubd_do_mmap = 0; -#define UBD_MMAP_BLOCK_SIZE PAGE_SIZE - static struct block_device_operations ubd_blops = { .owner = THIS_MODULE, .open = ubd_open, @@ -175,12 +168,6 @@ struct ubd { int no_cow; struct cow cow; struct platform_device pdev; - - int map_writes; - int map_reads; - int nomap_writes; - int nomap_reads; - int write_maps; }; #define DEFAULT_COW { \ @@ -200,11 +187,6 @@ struct ubd { .openflags = OPEN_FLAGS, \ .no_cow = 0, \ .cow = DEFAULT_COW, \ - .map_writes = 0, \ - .map_reads = 0, \ - .nomap_writes = 0, \ - .nomap_reads = 0, \ - .write_maps = 0, \ } struct ubd ubd_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD }; @@ -314,13 +296,6 @@ static int ubd_setup_common(char *str, int *index_out) int major; str++; - if(!strcmp(str, "mmap")){ - CHOOSE_MODE(printk("mmap not supported by the ubd " - "driver in tt mode\n"), - ubd_do_mmap = 1); - return(0); - } - if(!strcmp(str, "sync")){ global_openflags = of_sync(global_openflags); return(0); @@ -464,9 +439,9 @@ static int udb_setup(char *str) __setup("udb", udb_setup); __uml_help(udb_setup, "udb\n" -" This option is here solely to catch ubd -> udb typos, which can be\n\n" -" to impossible to catch visually unless you specifically look for\n\n" -" them. The only result of any option starting with 'udb' is an error\n\n" +" This option is here solely to catch ubd -> udb typos, which can be\n" +" to impossible to catch visually unless you specifically look for\n" +" them. The only result of any option starting with 'udb' is an error\n" " in the boot output.\n\n" ); @@ -524,7 +499,7 @@ static void ubd_handler(void) { struct io_thread_req req; struct request *rq = elv_next_request(ubd_queue); - int n, err; + int n; do_ubd = NULL; intr_count++; @@ -538,19 +513,6 @@ static void ubd_handler(void) return; } - if((req.op != UBD_MMAP) && - ((req.offset != ((__u64) (rq->sector)) << 9) || - (req.length != (rq->current_nr_sectors) << 9))) - panic("I/O op mismatch"); - - if(req.map_fd != -1){ - err = physmem_subst_mapping(req.buffer, req.map_fd, - req.map_offset, 1); - if(err) - printk("ubd_handler - physmem_subst_mapping failed, " - "err = %d\n", -err); - } - ubd_finish(rq, req.error); reactivate_fd(thread_fd, UBD_IRQ); do_ubd_request(ubd_queue); @@ -583,14 +545,10 @@ static int ubd_file_size(struct ubd *dev, __u64 *size_out) static void ubd_close(struct ubd *dev) { - if(ubd_do_mmap) - physmem_forget_descriptor(dev->fd); os_close_file(dev->fd); if(dev->cow.file == NULL) return; - if(ubd_do_mmap) - physmem_forget_descriptor(dev->cow.fd); os_close_file(dev->cow.fd); vfree(dev->cow.bitmap); dev->cow.bitmap = NULL; @@ -1010,94 +968,13 @@ static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, req->bitmap_words, bitmap_len); } -static int mmap_fd(struct request *req, struct ubd *dev, __u64 offset) -{ - __u64 sector; - unsigned char *bitmap; - int bit, i; - - /* mmap must have been requested on the command line */ - if(!ubd_do_mmap) - return(-1); - - /* The buffer must be page aligned */ - if(((unsigned long) req->buffer % UBD_MMAP_BLOCK_SIZE) != 0) - return(-1); - - /* The request must be a page long */ - if((req->current_nr_sectors << 9) != PAGE_SIZE) - return(-1); - - if(dev->cow.file == NULL) - return(dev->fd); - - sector = offset >> 9; - bitmap = (unsigned char *) dev->cow.bitmap; - bit = ubd_test_bit(sector, bitmap); - - for(i = 1; i < req->current_nr_sectors; i++){ - if(ubd_test_bit(sector + i, bitmap) != bit) - return(-1); - } - - if(bit || (rq_data_dir(req) == WRITE)) - offset += dev->cow.data_offset; - - /* The data on disk must be page aligned */ - if((offset % UBD_MMAP_BLOCK_SIZE) != 0) - return(-1); - - return(bit ? dev->fd : dev->cow.fd); -} - -static int prepare_mmap_request(struct ubd *dev, int fd, __u64 offset, - struct request *req, - struct io_thread_req *io_req) -{ - int err; - - if(rq_data_dir(req) == WRITE){ - /* Writes are almost no-ops since the new data is already in the - * host page cache - */ - dev->map_writes++; - if(dev->cow.file != NULL) - cowify_bitmap(io_req->offset, io_req->length, - &io_req->sector_mask, &io_req->cow_offset, - dev->cow.bitmap, dev->cow.bitmap_offset, - io_req->bitmap_words, - dev->cow.bitmap_len); - } - else { - int w; - - if((dev->cow.file != NULL) && (fd == dev->cow.fd)) - w = 0; - else w = dev->openflags.w; - - if((dev->cow.file != NULL) && (fd == dev->fd)) - offset += dev->cow.data_offset; - - err = physmem_subst_mapping(req->buffer, fd, offset, w); - if(err){ - printk("physmem_subst_mapping failed, err = %d\n", - -err); - return(1); - } - dev->map_reads++; - } - io_req->op = UBD_MMAP; - io_req->buffer = req->buffer; - return(0); -} - /* Called with ubd_io_lock held */ static int prepare_request(struct request *req, struct io_thread_req *io_req) { struct gendisk *disk = req->rq_disk; struct ubd *dev = disk->private_data; __u64 offset; - int len, fd; + int len; if(req->rq_status == RQ_INACTIVE) return(1); @@ -1114,34 +991,12 @@ static int prepare_request(struct request *req, struct io_thread_req *io_req) io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd; io_req->fds[1] = dev->fd; - io_req->map_fd = -1; io_req->cow_offset = -1; io_req->offset = offset; io_req->length = len; io_req->error = 0; io_req->sector_mask = 0; - fd = mmap_fd(req, dev, io_req->offset); - if(fd > 0){ - /* If mmapping is otherwise OK, but the first access to the - * page is a write, then it's not mapped in yet. So we have - * to write the data to disk first, then we can map the disk - * page in and continue normally from there. - */ - if((rq_data_dir(req) == WRITE) && !is_remapped(req->buffer)){ - io_req->map_fd = dev->fd; - io_req->map_offset = io_req->offset + - dev->cow.data_offset; - dev->write_maps++; - } - else return(prepare_mmap_request(dev, fd, io_req->offset, req, - io_req)); - } - - if(rq_data_dir(req) == READ) - dev->nomap_reads++; - else dev->nomap_writes++; - io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE; io_req->offsets[0] = 0; io_req->offsets[1] = dev->cow.data_offset; @@ -1229,143 +1084,6 @@ static int ubd_ioctl(struct inode * inode, struct file * file, return(-EINVAL); } -static int ubd_check_remapped(int fd, unsigned long address, int is_write, - __u64 offset) -{ - __u64 bitmap_offset; - unsigned long new_bitmap[2]; - int i, err, n; - - /* If it's not a write access, we can't do anything about it */ - if(!is_write) - return(0); - - /* We have a write */ - for(i = 0; i < sizeof(ubd_dev) / sizeof(ubd_dev[0]); i++){ - struct ubd *dev = &ubd_dev[i]; - - if((dev->fd != fd) && (dev->cow.fd != fd)) - continue; - - /* It's a write to a ubd device */ - - /* This should be impossible now */ - if(!dev->openflags.w){ - /* It's a write access on a read-only device - probably - * shouldn't happen. If the kernel is trying to change - * something with no intention of writing it back out, - * then this message will clue us in that this needs - * fixing - */ - printk("Write access to mapped page from readonly ubd " - "device %d\n", i); - return(0); - } - - /* It's a write to a writeable ubd device - it must be COWed - * because, otherwise, the page would have been mapped in - * writeable - */ - - if(!dev->cow.file) - panic("Write fault on writeable non-COW ubd device %d", - i); - - /* It should also be an access to the backing file since the - * COW pages should be mapped in read-write - */ - - if(fd == dev->fd) - panic("Write fault on a backing page of ubd " - "device %d\n", i); - - /* So, we do the write, copying the backing data to the COW - * file... - */ - - err = os_seek_file(dev->fd, offset + dev->cow.data_offset); - if(err < 0) - panic("Couldn't seek to %lld in COW file of ubd " - "device %d, err = %d", - offset + dev->cow.data_offset, i, -err); - - n = os_write_file(dev->fd, (void *) address, PAGE_SIZE); - if(n != PAGE_SIZE) - panic("Couldn't copy data to COW file of ubd " - "device %d, err = %d", i, -n); - - /* ... updating the COW bitmap... */ - - cowify_bitmap(offset, PAGE_SIZE, NULL, &bitmap_offset, - dev->cow.bitmap, dev->cow.bitmap_offset, - new_bitmap, dev->cow.bitmap_len); - - err = os_seek_file(dev->fd, bitmap_offset); - if(err < 0) - panic("Couldn't seek to %lld in COW file of ubd " - "device %d, err = %d", bitmap_offset, i, -err); - - n = os_write_file(dev->fd, new_bitmap, sizeof(new_bitmap)); - if(n != sizeof(new_bitmap)) - panic("Couldn't update bitmap of ubd device %d, " - "err = %d", i, -n); - - /* Maybe we can map the COW page in, and maybe we can't. If - * it is a pre-V3 COW file, we can't, since the alignment will - * be wrong. If it is a V3 or later COW file which has been - * moved to a system with a larger page size, then maybe we - * can't, depending on the exact location of the page. - */ - - offset += dev->cow.data_offset; - - /* Remove the remapping, putting the original anonymous page - * back. If the COW file can be mapped in, that is done. - * Otherwise, the COW page is read in. - */ - - if(!physmem_remove_mapping((void *) address)) - panic("Address 0x%lx not remapped by ubd device %d", - address, i); - if((offset % UBD_MMAP_BLOCK_SIZE) == 0) - physmem_subst_mapping((void *) address, dev->fd, - offset, 1); - else { - err = os_seek_file(dev->fd, offset); - if(err < 0) - panic("Couldn't seek to %lld in COW file of " - "ubd device %d, err = %d", offset, i, - -err); - - n = os_read_file(dev->fd, (void *) address, PAGE_SIZE); - if(n != PAGE_SIZE) - panic("Failed to read page from offset %llx of " - "COW file of ubd device %d, err = %d", - offset, i, -n); - } - - return(1); - } - - /* It's not a write on a ubd device */ - return(0); -} - -static struct remapper ubd_remapper = { - .list = LIST_HEAD_INIT(ubd_remapper.list), - .proc = ubd_check_remapped, -}; - -static int ubd_remapper_setup(void) -{ - if(ubd_do_mmap) - register_remapper(&ubd_remapper); - - return(0); -} - -__initcall(ubd_remapper_setup); - static int same_backing_files(char *from_cmdline, char *from_cow, char *cow) { struct uml_stat buf1, buf2; @@ -1568,15 +1286,6 @@ void do_io(struct io_thread_req *req) int err; __u64 off; - if(req->op == UBD_MMAP){ - /* Touch the page to force the host to do any necessary IO to - * get it into memory - */ - n = *((volatile int *) req->buffer); - req->error = update_bitmap(req); - return; - } - nsectors = req->length / req->sectorsize; start = 0; do { diff --git a/arch/um/drivers/xterm_kern.c b/arch/um/drivers/xterm_kern.c index 7917b9d..a4fdf35 100644 --- a/arch/um/drivers/xterm_kern.c +++ b/arch/um/drivers/xterm_kern.c @@ -7,7 +7,6 @@ #include "linux/slab.h" #include "linux/signal.h" #include "linux/interrupt.h" -#include "asm/semaphore.h" #include "asm/irq.h" #include "irq_user.h" #include "irq_kern.h" diff --git a/arch/um/include/2_5compat.h b/arch/um/include/2_5compat.h deleted file mode 100644 index abdb015..0000000 --- a/arch/um/include/2_5compat.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL - */ - -#ifndef __2_5_COMPAT_H__ -#define __2_5_COMPAT_H__ - -#define INIT_HARDSECT(arr, maj, sizes) - -#define SET_PRI(task) do ; while(0) - -#endif - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/include/common-offsets.h b/arch/um/include/common-offsets.h new file mode 100644 index 0000000..d705daa --- /dev/null +++ b/arch/um/include/common-offsets.h @@ -0,0 +1,14 @@ +/* for use by sys-$SUBARCH/kernel-offsets.c */ + +OFFSET(TASK_REGS, task_struct, thread.regs); +OFFSET(TASK_PID, task_struct, pid); +DEFINE(UM_KERN_PAGE_SIZE, PAGE_SIZE); +DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC); +DEFINE_STR(UM_KERN_EMERG, KERN_EMERG); +DEFINE_STR(UM_KERN_ALERT, KERN_ALERT); +DEFINE_STR(UM_KERN_CRIT, KERN_CRIT); +DEFINE_STR(UM_KERN_ERR, KERN_ERR); +DEFINE_STR(UM_KERN_WARNING, KERN_WARNING); +DEFINE_STR(UM_KERN_NOTICE, KERN_NOTICE); +DEFINE_STR(UM_KERN_INFO, KERN_INFO); +DEFINE_STR(UM_KERN_DEBUG, KERN_DEBUG); diff --git a/arch/um/include/kern_util.h b/arch/um/include/kern_util.h index 15389c8..e5fec55 100644 --- a/arch/um/include/kern_util.h +++ b/arch/um/include/kern_util.h @@ -8,6 +8,7 @@ #include "linux/threads.h" #include "sysdep/ptrace.h" +#include "sysdep/faultinfo.h" extern int ncpus; extern char *linux_prog; @@ -31,8 +32,8 @@ extern int current_pid(void); extern unsigned long alloc_stack(int order, int atomic); extern int do_signal(void); extern int is_stack_fault(unsigned long sp); -extern unsigned long segv(unsigned long address, unsigned long ip, - int is_write, int is_user, void *sc); +extern unsigned long segv(struct faultinfo fi, unsigned long ip, + int is_user, void *sc); extern int handle_page_fault(unsigned long address, unsigned long ip, int is_write, int is_user, int *code_out); extern void syscall_ready(void); @@ -82,7 +83,7 @@ extern void timer_irq(union uml_pt_regs *regs); extern void unprotect_stack(unsigned long stack); extern void do_uml_exitcalls(void); extern int attach_debugger(int idle_pid, int pid, int stop); -extern void bad_segv(unsigned long address, unsigned long ip, int is_write); +extern void bad_segv(struct faultinfo fi, unsigned long ip); extern int config_gdb(char *str); extern int remove_gdb(void); extern char *uml_strdup(char *string); diff --git a/arch/um/include/mconsole.h b/arch/um/include/mconsole.h index 9fbe308..cfa368e 100644 --- a/arch/um/include/mconsole.h +++ b/arch/um/include/mconsole.h @@ -56,7 +56,7 @@ struct mc_request int as_interrupt; int originating_fd; - int originlen; + unsigned int originlen; unsigned char origin[128]; /* sockaddr_un */ struct mconsole_request request; diff --git a/arch/um/include/net_user.h b/arch/um/include/net_user.h index 36807b7..89885a7 100644 --- a/arch/um/include/net_user.h +++ b/arch/um/include/net_user.h @@ -35,7 +35,7 @@ extern void *get_output_buffer(int *len_out); extern void free_output_buffer(void *buffer); extern int tap_open_common(void *dev, char *gate_addr); -extern void tap_check_ips(char *gate_addr, char *eth_addr); +extern void tap_check_ips(char *gate_addr, unsigned char *eth_addr); extern void read_output(int fd, char *output_out, int len); diff --git a/arch/um/include/os.h b/arch/um/include/os.h index 07340c8..881d298 100644 --- a/arch/um/include/os.h +++ b/arch/um/include/os.h @@ -136,7 +136,7 @@ extern int os_seek_file(int fd, __u64 offset); extern int os_open_file(char *file, struct openflags flags, int mode); extern int os_read_file(int fd, void *buf, int len); extern int os_write_file(int fd, const void *buf, int count); -extern int os_file_size(char *file, long long *size_out); +extern int os_file_size(char *file, unsigned long long *size_out); extern int os_file_modtime(char *file, unsigned long *modtime); extern int os_pipe(int *fd, int stream, int close_on_exec); extern int os_set_fd_async(int fd, int owner); @@ -160,6 +160,7 @@ extern void os_kill_process(int pid, int reap_child); extern void os_kill_ptraced_process(int pid, int reap_child); extern void os_usr1_process(int pid); extern int os_getpid(void); +extern int os_getpgrp(void); extern int os_map_memory(void *virt, int fd, unsigned long long off, unsigned long len, int r, int w, int x); diff --git a/arch/um/include/skas_ptrace.h b/arch/um/include/skas_ptrace.h index cfb5fb4..cd2327d 100644 --- a/arch/um/include/skas_ptrace.h +++ b/arch/um/include/skas_ptrace.h @@ -6,22 +6,11 @@ #ifndef __SKAS_PTRACE_H #define __SKAS_PTRACE_H -struct ptrace_faultinfo { - int is_write; - unsigned long addr; -}; - -struct ptrace_ldt { - int func; - void *ptr; - unsigned long bytecount; -}; - #define PTRACE_FAULTINFO 52 -#define PTRACE_SIGPENDING 53 -#define PTRACE_LDT 54 #define PTRACE_SWITCH_MM 55 +#include "sysdep/skas_ptrace.h" + #endif /* diff --git a/arch/um/include/sysdep-i386/checksum.h b/arch/um/include/sysdep-i386/checksum.h index 3a2a458..764ba4d 100644 --- a/arch/um/include/sysdep-i386/checksum.h +++ b/arch/um/include/sysdep-i386/checksum.h @@ -24,19 +24,6 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum); /* - * the same as csum_partial, but copies from src while it - * checksums, and handles user-space pointer exceptions correctly, when needed. - * - * here even more important to align src and dst on a 32-bit (or even - * better 64-bit) boundary - */ - -unsigned int csum_partial_copy_to(const unsigned char *src, unsigned char *dst, - int len, int sum, int *err_ptr); -unsigned int csum_partial_copy_from(const unsigned char *src, unsigned char *dst, - int len, int sum, int *err_ptr); - -/* * Note: when you get a NULL pointer exception here this means someone * passed in an incorrect kernel address to one of these functions. * @@ -52,11 +39,24 @@ unsigned int csum_partial_copy_nocheck(const unsigned char *src, unsigned char * return(csum_partial(dst, len, sum)); } +/* + * the same as csum_partial, but copies from src while it + * checksums, and handles user-space pointer exceptions correctly, when needed. + * + * here even more important to align src and dst on a 32-bit (or even + * better 64-bit) boundary + */ + static __inline__ unsigned int csum_partial_copy_from_user(const unsigned char *src, unsigned char *dst, int len, int sum, int *err_ptr) { - return csum_partial_copy_from(src, dst, len, sum, err_ptr); + if(copy_from_user(dst, src, len)){ + *err_ptr = -EFAULT; + return(-1); + } + + return csum_partial(dst, len, sum); } /* @@ -67,7 +67,6 @@ unsigned int csum_partial_copy_from_user(const unsigned char *src, unsigned char */ #define csum_partial_copy_fromuser csum_partial_copy_from_user -unsigned int csum_partial_copy(const unsigned char *src, unsigned char *dst, int len, int sum); /* * This is a version of ip_compute_csum() optimized for IP headers, @@ -196,8 +195,14 @@ static __inline__ unsigned int csum_and_copy_to_user(const unsigned char *src, unsigned char *dst, int len, int sum, int *err_ptr) { - if (access_ok(VERIFY_WRITE, dst, len)) - return(csum_partial_copy_to(src, dst, len, sum, err_ptr)); + if (access_ok(VERIFY_WRITE, dst, len)){ + if(copy_to_user(dst, src, len)){ + *err_ptr = -EFAULT; + return(-1); + } + + return csum_partial(src, len, sum); + } if (len) *err_ptr = -EFAULT; diff --git a/arch/um/include/sysdep-i386/faultinfo.h b/arch/um/include/sysdep-i386/faultinfo.h new file mode 100644 index 0000000..db437cc --- /dev/null +++ b/arch/um/include/sysdep-i386/faultinfo.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2004 Fujitsu Siemens Computers GmbH + * Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com> + * Licensed under the GPL + */ + +#ifndef __FAULTINFO_I386_H +#define __FAULTINFO_I386_H + +/* this structure contains the full arch-specific faultinfo + * from the traps. + * On i386, ptrace_faultinfo unfortunately doesn't provide + * all the info, since trap_no is missing. + * All common elements are defined at the same position in + * both structures, thus making it easy to copy the + * contents without knowledge about the structure elements. + */ +struct faultinfo { + int error_code; /* in ptrace_faultinfo misleadingly called is_write */ + unsigned long cr2; /* in ptrace_faultinfo called addr */ + int trap_no; /* missing in ptrace_faultinfo */ +}; + +#define FAULT_WRITE(fi) ((fi).error_code & 2) +#define FAULT_ADDRESS(fi) ((fi).cr2) + +#define PTRACE_FULL_FAULTINFO 0 + +#endif diff --git a/arch/um/include/sysdep-i386/ptrace.h b/arch/um/include/sysdep-i386/ptrace.h index 661d495..c8ee955 100644 --- a/arch/um/include/sysdep-i386/ptrace.h +++ b/arch/um/include/sysdep-i386/ptrace.h @@ -8,6 +8,8 @@ #include "uml-config.h" #include "user_constants.h" +#include "sysdep/faultinfo.h" +#include "choose-mode.h" #define MAX_REG_NR (UM_FRAME_SIZE / sizeof(unsigned long)) #define MAX_REG_OFFSET (UM_FRAME_SIZE) @@ -53,24 +55,17 @@ extern int sysemu_supported; #define REGS_RESTART_SYSCALL(r) IP_RESTART_SYSCALL(REGS_IP(r)) -#define REGS_SEGV_IS_FIXABLE(r) SEGV_IS_FIXABLE((r)->trap_type) - -#define REGS_FAULT_ADDR(r) ((r)->fault_addr) - -#define REGS_FAULT_WRITE(r) FAULT_WRITE((r)->fault_type) - #endif #ifndef PTRACE_SYSEMU_SINGLESTEP #define PTRACE_SYSEMU_SINGLESTEP 32 #endif -#include "choose-mode.h" - union uml_pt_regs { #ifdef UML_CONFIG_MODE_TT struct tt_regs { long syscall; void *sc; + struct faultinfo faultinfo; } tt; #endif #ifdef UML_CONFIG_MODE_SKAS @@ -78,9 +73,7 @@ union uml_pt_regs { unsigned long regs[HOST_FRAME_SIZE]; unsigned long fp[HOST_FP_SIZE]; unsigned long xfp[HOST_XFP_SIZE]; - unsigned long fault_addr; - unsigned long fault_type; - unsigned long trap_type; + struct faultinfo faultinfo; long syscall; int is_user; } skas; @@ -217,15 +210,8 @@ struct syscall_args { #define UPT_SYSCALL_NR(r) UPT_ORIG_EAX(r) #define UPT_SYSCALL_RET(r) UPT_EAX(r) -#define UPT_SEGV_IS_FIXABLE(r) \ - CHOOSE_MODE(SC_SEGV_IS_FIXABLE(UPT_SC(r)), \ - REGS_SEGV_IS_FIXABLE(&r->skas)) - -#define UPT_FAULT_ADDR(r) \ - __CHOOSE_MODE(SC_FAULT_ADDR(UPT_SC(r)), REGS_FAULT_ADDR(&r->skas)) - -#define UPT_FAULT_WRITE(r) \ - CHOOSE_MODE(SC_FAULT_WRITE(UPT_SC(r)), REGS_FAULT_WRITE(&r->skas)) +#define UPT_FAULTINFO(r) \ + CHOOSE_MODE((&(r)->tt.faultinfo), (&(r)->skas.faultinfo)) #endif diff --git a/arch/um/include/sysdep-i386/sigcontext.h b/arch/um/include/sysdep-i386/sigcontext.h index dfee589..1fe7292 100644 --- a/arch/um/include/sysdep-i386/sigcontext.h +++ b/arch/um/include/sysdep-i386/sigcontext.h @@ -13,15 +13,12 @@ #define SC_RESTART_SYSCALL(sc) IP_RESTART_SYSCALL(SC_IP(sc)) #define SC_SET_SYSCALL_RETURN(sc, result) SC_EAX(sc) = (result) -#define SC_FAULT_ADDR(sc) SC_CR2(sc) -#define SC_FAULT_TYPE(sc) SC_ERR(sc) - -#define FAULT_WRITE(err) (err & 2) -#define TO_SC_ERR(is_write) ((is_write) ? 2 : 0) - -#define SC_FAULT_WRITE(sc) (FAULT_WRITE(SC_ERR(sc))) - -#define SC_TRAP_TYPE(sc) SC_TRAPNO(sc) +#define GET_FAULTINFO_FROM_SC(fi,sc) \ + { \ + (fi).cr2 = SC_CR2(sc); \ + (fi).error_code = SC_ERR(sc); \ + (fi).trap_no = SC_TRAPNO(sc); \ + } /* ptrace expects that, at the start of a system call, %eax contains * -ENOSYS, so this makes it so. @@ -29,9 +26,7 @@ #define SC_START_SYSCALL(sc) do SC_EAX(sc) = -ENOSYS; while(0) /* This is Page Fault */ -#define SEGV_IS_FIXABLE(trap) (trap == 14) - -#define SC_SEGV_IS_FIXABLE(sc) (SEGV_IS_FIXABLE(SC_TRAPNO(sc))) +#define SEGV_IS_FIXABLE(fi) ((fi)->trap_no == 14) extern unsigned long *sc_sigmask(void *sc_ptr); extern int sc_get_fpregs(unsigned long buf, void *sc_ptr); diff --git a/arch/um/include/sysdep-i386/signal.h b/arch/um/include/sysdep-i386/signal.h index b1e1f7a..07518b1 100644 --- a/arch/um/include/sysdep-i386/signal.h +++ b/arch/um/include/sysdep-i386/signal.h @@ -8,6 +8,8 @@ #include <signal.h> +#define ARCH_SIGHDLR_PARAM int sig + #define ARCH_GET_SIGCONTEXT(sc, sig) \ do sc = (struct sigcontext *) (&sig + 1); while(0) diff --git a/arch/um/include/sysdep-i386/skas_ptrace.h b/arch/um/include/sysdep-i386/skas_ptrace.h new file mode 100644 index 0000000..e27b8a7 --- /dev/null +++ b/arch/um/include/sysdep-i386/skas_ptrace.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_I386_SKAS_PTRACE_H +#define __SYSDEP_I386_SKAS_PTRACE_H + +struct ptrace_faultinfo { + int is_write; + unsigned long addr; +}; + +struct ptrace_ldt { + int func; + void *ptr; + unsigned long bytecount; +}; + +#define PTRACE_LDT 54 + +#endif diff --git a/arch/um/include/sysdep-ia64/skas_ptrace.h b/arch/um/include/sysdep-ia64/skas_ptrace.h new file mode 100644 index 0000000..25a38e7 --- /dev/null +++ b/arch/um/include/sysdep-ia64/skas_ptrace.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_IA64_SKAS_PTRACE_H +#define __SYSDEP_IA64_SKAS_PTRACE_H + +struct ptrace_faultinfo { + int is_write; + unsigned long addr; +}; + +struct ptrace_ldt { + int func; + void *ptr; + unsigned long bytecount; +}; + +#define PTRACE_LDT 54 + +#endif diff --git a/arch/um/include/sysdep-ppc/skas_ptrace.h b/arch/um/include/sysdep-ppc/skas_ptrace.h new file mode 100644 index 0000000..d9fbbac --- /dev/null +++ b/arch/um/include/sysdep-ppc/skas_ptrace.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_PPC_SKAS_PTRACE_H +#define __SYSDEP_PPC_SKAS_PTRACE_H + +struct ptrace_faultinfo { + int is_write; + unsigned long addr; +}; + +struct ptrace_ldt { + int func; + void *ptr; + unsigned long bytecount; +}; + +#define PTRACE_LDT 54 + +#endif diff --git a/arch/um/include/sysdep-x86_64/checksum.h b/arch/um/include/sysdep-x86_64/checksum.h index 572c6c1..ea97005 100644 --- a/arch/um/include/sysdep-x86_64/checksum.h +++ b/arch/um/include/sysdep-x86_64/checksum.h @@ -9,8 +9,6 @@ #include "linux/in6.h" #include "asm/uaccess.h" -extern unsigned int csum_partial_copy_from(const unsigned char *src, unsigned char *dst, int len, - int sum, int *err_ptr); extern unsigned csum_partial(const unsigned char *buff, unsigned len, unsigned sum); @@ -31,10 +29,15 @@ unsigned int csum_partial_copy_nocheck(const unsigned char *src, unsigned char * } static __inline__ -unsigned int csum_partial_copy_from_user(const unsigned char *src, unsigned char *dst, - int len, int sum, int *err_ptr) +unsigned int csum_partial_copy_from_user(const unsigned char *src, + unsigned char *dst, int len, int sum, + int *err_ptr) { - return csum_partial_copy_from(src, dst, len, sum, err_ptr); + if(copy_from_user(dst, src, len)){ + *err_ptr = -EFAULT; + return(-1); + } + return csum_partial(dst, len, sum); } /** @@ -137,15 +140,6 @@ static inline unsigned add32_with_carry(unsigned a, unsigned b) return a; } -#endif +extern unsigned short ip_compute_csum(unsigned char * buff, int len); -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ +#endif diff --git a/arch/um/include/sysdep-x86_64/faultinfo.h b/arch/um/include/sysdep-x86_64/faultinfo.h new file mode 100644 index 0000000..cb917b0 --- /dev/null +++ b/arch/um/include/sysdep-x86_64/faultinfo.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2004 Fujitsu Siemens Computers GmbH + * Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com> + * Licensed under the GPL + */ + +#ifndef __FAULTINFO_X86_64_H +#define __FAULTINFO_X86_64_H + +/* this structure contains the full arch-specific faultinfo + * from the traps. + * On i386, ptrace_faultinfo unfortunately doesn't provide + * all the info, since trap_no is missing. + * All common elements are defined at the same position in + * both structures, thus making it easy to copy the + * contents without knowledge about the structure elements. + */ +struct faultinfo { + int error_code; /* in ptrace_faultinfo misleadingly called is_write */ + unsigned long cr2; /* in ptrace_faultinfo called addr */ + int trap_no; /* missing in ptrace_faultinfo */ +}; + +#define FAULT_WRITE(fi) ((fi).error_code & 2) +#define FAULT_ADDRESS(fi) ((fi).cr2) + +#define PTRACE_FULL_FAULTINFO 1 + +#endif diff --git a/arch/um/include/sysdep-x86_64/ptrace.h b/arch/um/include/sysdep-x86_64/ptrace.h index 915c82d..be8acd5 100644 --- a/arch/um/include/sysdep-x86_64/ptrace.h +++ b/arch/um/include/sysdep-x86_64/ptrace.h @@ -9,6 +9,7 @@ #include "uml-config.h" #include "user_constants.h" +#include "sysdep/faultinfo.h" #define MAX_REG_OFFSET (UM_FRAME_SIZE) #define MAX_REG_NR ((MAX_REG_OFFSET) / sizeof(unsigned long)) @@ -83,6 +84,7 @@ union uml_pt_regs { long syscall; unsigned long orig_rax; void *sc; + struct faultinfo faultinfo; } tt; #endif #ifdef UML_CONFIG_MODE_SKAS @@ -90,9 +92,7 @@ union uml_pt_regs { /* XXX */ unsigned long regs[27]; unsigned long fp[65]; - unsigned long fault_addr; - unsigned long fault_type; - unsigned long trap_type; + struct faultinfo faultinfo; long syscall; int is_user; } skas; @@ -135,6 +135,7 @@ extern int mode_tt; __CHOOSE_MODE(SC_EFLAGS(UPT_SC(r)), REGS_EFLAGS((r)->skas.regs)) #define UPT_SC(r) ((r)->tt.sc) #define UPT_SYSCALL_NR(r) __CHOOSE_MODE((r)->tt.syscall, (r)->skas.syscall) +#define UPT_SYSCALL_RET(r) UPT_RAX(r) extern int user_context(unsigned long sp); @@ -196,32 +197,32 @@ struct syscall_args { #define UPT_SET(regs, reg, val) \ - ({ unsigned long val; \ + ({ unsigned long __upt_val = val; \ switch(reg){ \ - case R8: UPT_R8(regs) = val; break; \ - case R9: UPT_R9(regs) = val; break; \ - case R10: UPT_R10(regs) = val; break; \ - case R11: UPT_R11(regs) = val; break; \ - case R12: UPT_R12(regs) = val; break; \ - case R13: UPT_R13(regs) = val; break; \ - case R14: UPT_R14(regs) = val; break; \ - case R15: UPT_R15(regs) = val; break; \ - case RIP: UPT_IP(regs) = val; break; \ - case RSP: UPT_SP(regs) = val; break; \ - case RAX: UPT_RAX(regs) = val; break; \ - case RBX: UPT_RBX(regs) = val; break; \ - case RCX: UPT_RCX(regs) = val; break; \ - case RDX: UPT_RDX(regs) = val; break; \ - case RSI: UPT_RSI(regs) = val; break; \ - case RDI: UPT_RDI(regs) = val; break; \ - case RBP: UPT_RBP(regs) = val; break; \ - case ORIG_RAX: UPT_ORIG_RAX(regs) = val; break; \ - case CS: UPT_CS(regs) = val; break; \ - case DS: UPT_DS(regs) = val; break; \ - case ES: UPT_ES(regs) = val; break; \ - case FS: UPT_FS(regs) = val; break; \ - case GS: UPT_GS(regs) = val; break; \ - case EFLAGS: UPT_EFLAGS(regs) = val; break; \ + case R8: UPT_R8(regs) = __upt_val; break; \ + case R9: UPT_R9(regs) = __upt_val; break; \ + case R10: UPT_R10(regs) = __upt_val; break; \ + case R11: UPT_R11(regs) = __upt_val; break; \ + case R12: UPT_R12(regs) = __upt_val; break; \ + case R13: UPT_R13(regs) = __upt_val; break; \ + case R14: UPT_R14(regs) = __upt_val; break; \ + case R15: UPT_R15(regs) = __upt_val; break; \ + case RIP: UPT_IP(regs) = __upt_val; break; \ + case RSP: UPT_SP(regs) = __upt_val; break; \ + case RAX: UPT_RAX(regs) = __upt_val; break; \ + case RBX: UPT_RBX(regs) = __upt_val; break; \ + case RCX: UPT_RCX(regs) = __upt_val; break; \ + case RDX: UPT_RDX(regs) = __upt_val; break; \ + case RSI: UPT_RSI(regs) = __upt_val; break; \ + case RDI: UPT_RDI(regs) = __upt_val; break; \ + case RBP: UPT_RBP(regs) = __upt_val; break; \ + case ORIG_RAX: UPT_ORIG_RAX(regs) = __upt_val; break; \ + case CS: UPT_CS(regs) = __upt_val; break; \ + case DS: UPT_DS(regs) = __upt_val; break; \ + case ES: UPT_ES(regs) = __upt_val; break; \ + case FS: UPT_FS(regs) = __upt_val; break; \ + case GS: UPT_GS(regs) = __upt_val; break; \ + case EFLAGS: UPT_EFLAGS(regs) = __upt_val; break; \ default : \ panic("Bad register in UPT_SET : %d\n", reg); \ break; \ @@ -241,24 +242,7 @@ struct syscall_args { CHOOSE_MODE(SC_SEGV_IS_FIXABLE(UPT_SC(r)), \ REGS_SEGV_IS_FIXABLE(&r->skas)) -#define UPT_FAULT_ADDR(r) \ - __CHOOSE_MODE(SC_FAULT_ADDR(UPT_SC(r)), REGS_FAULT_ADDR(&r->skas)) - -#define UPT_FAULT_WRITE(r) \ - CHOOSE_MODE(SC_FAULT_WRITE(UPT_SC(r)), REGS_FAULT_WRITE(&r->skas)) - -#define UPT_TRAP(r) __CHOOSE_MODE(SC_TRAP_TYPE(UPT_SC(r)), REGS_TRAP(&r->skas)) -#define UPT_ERR(r) __CHOOSE_MODE(SC_FAULT_TYPE(UPT_SC(r)), REGS_ERR(&r->skas)) +#define UPT_FAULTINFO(r) \ + CHOOSE_MODE((&(r)->tt.faultinfo), (&(r)->skas.faultinfo)) #endif - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/include/sysdep-x86_64/sigcontext.h b/arch/um/include/sysdep-x86_64/sigcontext.h index 1e38a54..2a78260 100644 --- a/arch/um/include/sysdep-x86_64/sigcontext.h +++ b/arch/um/include/sysdep-x86_64/sigcontext.h @@ -17,11 +17,12 @@ #define SC_FAULT_ADDR(sc) SC_CR2(sc) #define SC_FAULT_TYPE(sc) SC_ERR(sc) -#define FAULT_WRITE(err) ((err) & 2) - -#define SC_FAULT_WRITE(sc) FAULT_WRITE(SC_FAULT_TYPE(sc)) - -#define SC_TRAP_TYPE(sc) SC_TRAPNO(sc) +#define GET_FAULTINFO_FROM_SC(fi,sc) \ + { \ + (fi).cr2 = SC_CR2(sc); \ + (fi).error_code = SC_ERR(sc); \ + (fi).trap_no = SC_TRAPNO(sc); \ + } /* ptrace expects that, at the start of a system call, %eax contains * -ENOSYS, so this makes it so. @@ -29,8 +30,8 @@ #define SC_START_SYSCALL(sc) do SC_RAX(sc) = -ENOSYS; while(0) -#define SEGV_IS_FIXABLE(trap) ((trap) == 14) -#define SC_SEGV_IS_FIXABLE(sc) SEGV_IS_FIXABLE(SC_TRAP_TYPE(sc)) +/* This is Page Fault */ +#define SEGV_IS_FIXABLE(fi) ((fi)->trap_no == 14) extern unsigned long *sc_sigmask(void *sc_ptr); diff --git a/arch/um/include/sysdep-x86_64/signal.h b/arch/um/include/sysdep-x86_64/signal.h index e5e5275..6142897 100644 --- a/arch/um/include/sysdep-x86_64/signal.h +++ b/arch/um/include/sysdep-x86_64/signal.h @@ -6,6 +6,8 @@ #ifndef __X86_64_SIGNAL_H_ #define __X86_64_SIGNAL_H_ +#define ARCH_SIGHDLR_PARAM int sig + #define ARCH_GET_SIGCONTEXT(sc, sig_addr) \ do { \ struct ucontext *__uc; \ diff --git a/arch/um/include/sysdep-x86_64/skas_ptrace.h b/arch/um/include/sysdep-x86_64/skas_ptrace.h new file mode 100644 index 0000000..95db4be7 --- /dev/null +++ b/arch/um/include/sysdep-x86_64/skas_ptrace.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_X86_64_SKAS_PTRACE_H +#define __SYSDEP_X86_64_SKAS_PTRACE_H + +struct ptrace_faultinfo { + int is_write; + unsigned long addr; +}; + +struct ptrace_ldt { + int func; + void *ptr; + unsigned long bytecount; +}; + +#define PTRACE_LDT 54 + +#endif diff --git a/arch/um/include/sysrq.h b/arch/um/include/sysrq.h index 2ce9423..c8d332b 100644 --- a/arch/um/include/sysrq.h +++ b/arch/um/include/sysrq.h @@ -1,6 +1,7 @@ #ifndef __UM_SYSRQ_H #define __UM_SYSRQ_H -extern void show_trace(unsigned long *stack); +struct task_struct; +extern void show_trace(struct task_struct* task, unsigned long *stack); #endif diff --git a/arch/um/include/user_util.h b/arch/um/include/user_util.h index 103cd32..7b6a24d 100644 --- a/arch/um/include/user_util.h +++ b/arch/um/include/user_util.h @@ -41,9 +41,6 @@ extern unsigned long highmem; extern char host_info[]; extern char saved_command_line[]; -extern char command_line[]; - -extern char *tempdir; extern unsigned long _stext, _etext, _sdata, _edata, __bss_start, _end; extern unsigned long _unprotected_end; @@ -67,7 +64,6 @@ extern void *um_kmalloc(int size); extern int switcheroo(int fd, int prot, void *from, void *to, int size); extern void setup_machinename(char *machine_out); extern void setup_hostinfo(void); -extern void add_arg(char *arg); extern void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int)); extern void init_new_thread_signals(int altstack); extern void do_exec(int old_pid, int new_pid); diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile index 246f0e7..a8918e80 100644 --- a/arch/um/kernel/Makefile +++ b/arch/um/kernel/Makefile @@ -4,9 +4,9 @@ # extra-y := vmlinux.lds -clean-files := vmlinux.lds.S config.tmp +clean-files := -obj-y = checksum.o config.o exec_kern.o exitcode.o \ +obj-y = config.o exec_kern.o exitcode.o \ helper.o init_task.o irq.o irq_user.o ksyms.o main.o mem.o mem_user.o \ physmem.o process.o process_kern.o ptrace.o reboot.o resource.o \ sigio_user.o sigio_kern.o signal_kern.o signal_user.o smp.o \ @@ -14,7 +14,7 @@ obj-y = checksum.o config.o exec_kern.o exitcode.o \ tlb.o trap_kern.o trap_user.o uaccess_user.o um_arch.o umid.o \ user_util.o -obj-$(CONFIG_BLK_DEV_INITRD) += initrd_kern.o initrd_user.o +obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o obj-$(CONFIG_GPROF) += gprof_syms.o obj-$(CONFIG_GCOV) += gmon_syms.o obj-$(CONFIG_TTY_LOG) += tty_log.o @@ -23,18 +23,14 @@ obj-$(CONFIG_SYSCALL_DEBUG) += syscall_user.o obj-$(CONFIG_MODE_TT) += tt/ obj-$(CONFIG_MODE_SKAS) += skas/ -# This needs be compiled with frame pointers regardless of how the rest of the -# kernel is built. -CFLAGS_frame.o := -fno-omit-frame-pointer - user-objs-$(CONFIG_TTY_LOG) += tty_log.o USER_OBJS := $(user-objs-y) config.o helper.o main.o process.o tempfile.o \ - time.o tty_log.o umid.o user_util.o frame.o + time.o tty_log.o umid.o user_util.o include arch/um/scripts/Makefile.rules -targets += config.c +targets := config.c config.tmp # Be careful with the below Sed code - sed is pitfall-rich! # We use sed to lower build requirements, for "embedded" builders for instance. diff --git a/arch/um/kernel/checksum.c b/arch/um/kernel/checksum.c deleted file mode 100644 index e69b2be..0000000 --- a/arch/um/kernel/checksum.c +++ /dev/null @@ -1,36 +0,0 @@ -#include "asm/uaccess.h" -#include "linux/errno.h" -#include "linux/module.h" - -unsigned int arch_csum_partial(const unsigned char *buff, int len, int sum); - -unsigned int csum_partial(unsigned char *buff, int len, int sum) -{ - return arch_csum_partial(buff, len, sum); -} - -EXPORT_SYMBOL(csum_partial); - -unsigned int csum_partial_copy_to(const unsigned char *src, - unsigned char __user *dst, int len, int sum, - int *err_ptr) -{ - if(copy_to_user(dst, src, len)){ - *err_ptr = -EFAULT; - return(-1); - } - - return(arch_csum_partial(src, len, sum)); -} - -unsigned int csum_partial_copy_from(const unsigned char __user *src, - unsigned char *dst, int len, int sum, - int *err_ptr) -{ - if(copy_from_user(dst, src, len)){ - *err_ptr = -EFAULT; - return(-1); - } - - return arch_csum_partial(dst, len, sum); -} diff --git a/arch/um/kernel/exec_kern.c b/arch/um/kernel/exec_kern.c index 49ddabe..efd222f 100644 --- a/arch/um/kernel/exec_kern.c +++ b/arch/um/kernel/exec_kern.c @@ -16,7 +16,6 @@ #include "kern.h" #include "irq_user.h" #include "tlb.h" -#include "2_5compat.h" #include "os.h" #include "time_user.h" #include "choose-mode.h" diff --git a/arch/um/kernel/initrd_kern.c b/arch/um/kernel/initrd.c index fc568af..82ecf90 100644 --- a/arch/um/kernel/initrd_kern.c +++ b/arch/um/kernel/initrd.c @@ -41,12 +41,31 @@ static int __init uml_initrd_setup(char *line, int *add) return 0; } -__uml_setup("initrd=", uml_initrd_setup, +__uml_setup("initrd=", uml_initrd_setup, "initrd=<initrd image>\n" " This is used to boot UML from an initrd image. The argument is the\n" " name of the file containing the image.\n\n" ); +int load_initrd(char *filename, void *buf, int size) +{ + int fd, n; + + fd = os_open_file(filename, of_read(OPENFLAGS()), 0); + if(fd < 0){ + printk("Opening '%s' failed - err = %d\n", filename, -fd); + return(-1); + } + n = os_read_file(fd, buf, size); + if(n != size){ + printk("Read of %d bytes from '%s' failed, err = %d\n", size, + filename, -n); + return(-1); + } + + os_close_file(fd); + return(0); +} /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically diff --git a/arch/um/kernel/initrd_user.c b/arch/um/kernel/initrd_user.c deleted file mode 100644 index cb90681..0000000 --- a/arch/um/kernel/initrd_user.c +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL - */ - -#include <unistd.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <errno.h> - -#include "user_util.h" -#include "kern_util.h" -#include "user.h" -#include "initrd.h" -#include "os.h" - -int load_initrd(char *filename, void *buf, int size) -{ - int fd, n; - - fd = os_open_file(filename, of_read(OPENFLAGS()), 0); - if(fd < 0){ - printk("Opening '%s' failed - err = %d\n", filename, -fd); - return(-1); - } - n = os_read_file(fd, buf, size); - if(n != size){ - printk("Read of %d bytes from '%s' failed, err = %d\n", size, - filename, -n); - return(-1); - } - - os_close_file(fd); - return(0); -} - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index d71e8f0..d44fb52 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -163,7 +163,6 @@ void __init init_IRQ(void) irq_desc[i].handler = &SIGIO_irq_type; enable_irq(i); } - init_irq_signals(0); } /* diff --git a/arch/um/kernel/irq_user.c b/arch/um/kernel/irq_user.c index 6d6f948..b3074cb 100644 --- a/arch/um/kernel/irq_user.c +++ b/arch/um/kernel/irq_user.c @@ -236,9 +236,15 @@ static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg) (*prev)->fd, pollfds[i].fd); goto out; } - memcpy(&pollfds[i], &pollfds[i + 1], - (pollfds_num - i - 1) * sizeof(pollfds[0])); + pollfds_num--; + + /* This moves the *whole* array after pollfds[i] (though + * it doesn't spot as such)! */ + + memmove(&pollfds[i], &pollfds[i + 1], + (pollfds_num - i) * sizeof(pollfds[0])); + if(last_irq_ptr == &old_fd->next) last_irq_ptr = prev; *prev = (*prev)->next; diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c index b41d339..99439fa 100644 --- a/arch/um/kernel/ksyms.c +++ b/arch/um/kernel/ksyms.c @@ -10,7 +10,6 @@ #include "linux/spinlock.h" #include "linux/highmem.h" #include "asm/current.h" -#include "asm/delay.h" #include "asm/processor.h" #include "asm/unistd.h" #include "asm/pgalloc.h" @@ -28,8 +27,6 @@ EXPORT_SYMBOL(uml_physmem); EXPORT_SYMBOL(set_signals); EXPORT_SYMBOL(get_signals); EXPORT_SYMBOL(kernel_thread); -EXPORT_SYMBOL(__const_udelay); -EXPORT_SYMBOL(__udelay); EXPORT_SYMBOL(sys_waitpid); EXPORT_SYMBOL(task_size); EXPORT_SYMBOL(flush_tlb_range); @@ -60,6 +57,7 @@ EXPORT_SYMBOL(copy_to_user_tt); EXPORT_SYMBOL(strncpy_from_user_skas); EXPORT_SYMBOL(copy_to_user_skas); EXPORT_SYMBOL(copy_from_user_skas); +EXPORT_SYMBOL(clear_user_skas); #endif EXPORT_SYMBOL(uml_strdup); diff --git a/arch/um/kernel/main.c b/arch/um/kernel/main.c index a17c497..e59f581 100644 --- a/arch/um/kernel/main.c +++ b/arch/um/kernel/main.c @@ -24,8 +24,6 @@ #include "mode.h" #include "choose-mode.h" #include "uml-config.h" -#include "irq_user.h" -#include "time_user.h" #include "os.h" /* Set in set_stklim, which is called from main and __wrap_malloc. @@ -71,7 +69,7 @@ static __init void do_uml_initcalls(void) static void last_ditch_exit(int sig) { - CHOOSE_MODE(kmalloc_ok = 0, (void) 0); + kmalloc_ok = 0; signal(SIGINT, SIG_DFL); signal(SIGTERM, SIG_DFL); signal(SIGHUP, SIG_DFL); @@ -87,7 +85,7 @@ int main(int argc, char **argv, char **envp) { char **new_argv; sigset_t mask; - int ret, i; + int ret, i, err; /* Enable all signals except SIGIO - in some environments, we can * enter with some signals blocked @@ -160,27 +158,29 @@ int main(int argc, char **argv, char **envp) */ change_sig(SIGPROF, 0); - /* Reboot */ - if(ret){ - int err; + /* This signal stuff used to be in the reboot case. However, + * sometimes a SIGVTALRM can come in when we're halting (reproducably + * when writing out gcov information, presumably because that takes + * some time) and cause a segfault. + */ - printf("\n"); + /* stop timers and set SIG*ALRM to be ignored */ + disable_timer(); - /* stop timers and set SIG*ALRM to be ignored */ - disable_timer(); + /* disable SIGIO for the fds and set SIGIO to be ignored */ + err = deactivate_all_fds(); + if(err) + printf("deactivate_all_fds failed, errno = %d\n", -err); - /* disable SIGIO for the fds and set SIGIO to be ignored */ - err = deactivate_all_fds(); - if(err) - printf("deactivate_all_fds failed, errno = %d\n", - -err); - - /* Let any pending signals fire now. This ensures - * that they won't be delivered after the exec, when - * they are definitely not expected. - */ - unblock_signals(); + /* Let any pending signals fire now. This ensures + * that they won't be delivered after the exec, when + * they are definitely not expected. + */ + unblock_signals(); + /* Reboot */ + if(ret){ + printf("\n"); execvp(new_argv[0], new_argv); perror("Failed to exec kernel"); ret = 1; diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index f156661..c22825f 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -100,12 +100,37 @@ void mem_init(void) #endif } +/* + * Create a page table and place a pointer to it in a middle page + * directory entry. + */ +static void __init one_page_table_init(pmd_t *pmd) +{ + if (pmd_none(*pmd)) { + pte_t *pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); + set_pmd(pmd, __pmd(_KERNPG_TABLE + + (unsigned long) __pa(pte))); + if (pte != pte_offset_kernel(pmd, 0)) + BUG(); + } +} + +static void __init one_md_table_init(pud_t *pud) +{ +#ifdef CONFIG_3_LEVEL_PGTABLES + pmd_t *pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); + set_pud(pud, __pud(_KERNPG_TABLE + (unsigned long) __pa(pmd_table))); + if (pmd_table != pmd_offset(pud, 0)) + BUG(); +#endif +} + static void __init fixrange_init(unsigned long start, unsigned long end, pgd_t *pgd_base) { pgd_t *pgd; + pud_t *pud; pmd_t *pmd; - pte_t *pte; int i, j; unsigned long vaddr; @@ -115,15 +140,12 @@ static void __init fixrange_init(unsigned long start, unsigned long end, pgd = pgd_base + i; for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) { - pmd = (pmd_t *)pgd; + pud = pud_offset(pgd, vaddr); + if (pud_none(*pud)) + one_md_table_init(pud); + pmd = pmd_offset(pud, vaddr); for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) { - if (pmd_none(*pmd)) { - pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); - set_pmd(pmd, __pmd(_KERNPG_TABLE + - (unsigned long) __pa(pte))); - if (pte != pte_offset_kernel(pmd, 0)) - BUG(); - } + one_page_table_init(pmd); vaddr += PMD_SIZE; } j = 0; diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index f76a269..1b5ef3e 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -30,7 +30,6 @@ #include "init.h" #include "os.h" #include "uml-config.h" -#include "ptrace_user.h" #include "choose-mode.h" #include "mode.h" #ifdef UML_CONFIG_MODE_SKAS @@ -65,8 +64,6 @@ void init_new_thread_signals(int altstack) SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); set_handler(SIGBUS, (__sighandler_t) sig_handler, flags, SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); - set_handler(SIGWINCH, (__sighandler_t) sig_handler, flags, - SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); set_handler(SIGUSR2, (__sighandler_t) sig_handler, flags, SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); signal(SIGHUP, SIG_IGN); @@ -133,7 +130,7 @@ int start_fork_tramp(void *thread_arg, unsigned long temp_stack, return(arg.pid); } -static int ptrace_child(void *arg) +static int ptrace_child(void) { int ret; int pid = os_getpid(), ppid = getppid(); @@ -162,20 +159,16 @@ static int ptrace_child(void *arg) _exit(ret); } -static int start_ptraced_child(void **stack_out) +static int start_ptraced_child(void) { - void *stack; - unsigned long sp; int pid, n, status; - stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if(stack == MAP_FAILED) - panic("check_ptrace : mmap failed, errno = %d", errno); - sp = (unsigned long) stack + PAGE_SIZE - sizeof(void *); - pid = clone(ptrace_child, (void *) sp, SIGCHLD, NULL); + pid = fork(); + if(pid == 0) + ptrace_child(); + if(pid < 0) - panic("check_ptrace : clone failed, errno = %d", errno); + panic("check_ptrace : fork failed, errno = %d", errno); CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); if(n < 0) panic("check_ptrace : wait failed, errno = %d", errno); @@ -183,7 +176,6 @@ static int start_ptraced_child(void **stack_out) panic("check_ptrace : expected SIGSTOP, got status = %d", status); - *stack_out = stack; return(pid); } @@ -191,12 +183,12 @@ static int start_ptraced_child(void **stack_out) * just avoid using sysemu, not panic, but only if SYSEMU features are broken. * So only for SYSEMU features we test mustpanic, while normal host features * must work anyway!*/ -static int stop_ptraced_child(int pid, void *stack, int exitcode, int mustpanic) +static int stop_ptraced_child(int pid, int exitcode, int mustexit) { int status, n, ret = 0; if(ptrace(PTRACE_CONT, pid, 0, 0) < 0) - panic("check_ptrace : ptrace failed, errno = %d", errno); + panic("stop_ptraced_child : ptrace failed, errno = %d", errno); CATCH_EINTR(n = waitpid(pid, &status, 0)); if(!WIFEXITED(status) || (WEXITSTATUS(status) != exitcode)) { int exit_with = WEXITSTATUS(status); @@ -207,15 +199,13 @@ static int stop_ptraced_child(int pid, void *stack, int exitcode, int mustpanic) printk("check_ptrace : child exited with exitcode %d, while " "expecting %d; status 0x%x", exit_with, exitcode, status); - if (mustpanic) + if (mustexit) panic("\n"); else printk("\n"); ret = -1; } - if(munmap(stack, PAGE_SIZE) < 0) - panic("check_ptrace : munmap failed, errno = %d", errno); return ret; } @@ -237,12 +227,11 @@ __uml_setup("nosysemu", nosysemu_cmd_param, static void __init check_sysemu(void) { - void *stack; int pid, syscall, n, status, count=0; printk("Checking syscall emulation patch for ptrace..."); sysemu_supported = 0; - pid = start_ptraced_child(&stack); + pid = start_ptraced_child(); if(ptrace(PTRACE_SYSEMU, pid, 0, 0) < 0) goto fail; @@ -260,7 +249,7 @@ static void __init check_sysemu(void) panic("check_sysemu : failed to modify system " "call return, errno = %d", errno); - if (stop_ptraced_child(pid, stack, 0, 0) < 0) + if (stop_ptraced_child(pid, 0, 0) < 0) goto fail_stopped; sysemu_supported = 1; @@ -268,7 +257,7 @@ static void __init check_sysemu(void) set_using_sysemu(!force_sysemu_disabled); printk("Checking advanced syscall emulation patch for ptrace..."); - pid = start_ptraced_child(&stack); + pid = start_ptraced_child(); while(1){ count++; if(ptrace(PTRACE_SYSEMU_SINGLESTEP, pid, 0, 0) < 0) @@ -293,7 +282,7 @@ static void __init check_sysemu(void) break; } } - if (stop_ptraced_child(pid, stack, 0, 0) < 0) + if (stop_ptraced_child(pid, 0, 0) < 0) goto fail_stopped; sysemu_supported = 2; @@ -304,18 +293,17 @@ static void __init check_sysemu(void) return; fail: - stop_ptraced_child(pid, stack, 1, 0); + stop_ptraced_child(pid, 1, 0); fail_stopped: printk("missing\n"); } void __init check_ptrace(void) { - void *stack; int pid, syscall, n, status; printk("Checking that ptrace can change system call numbers..."); - pid = start_ptraced_child(&stack); + pid = start_ptraced_child(); if (ptrace(PTRACE_OLDSETOPTIONS, pid, 0, (void *)PTRACE_O_TRACESYSGOOD) < 0) panic("check_ptrace: PTRACE_SETOPTIONS failed, errno = %d", errno); @@ -342,7 +330,7 @@ void __init check_ptrace(void) break; } } - stop_ptraced_child(pid, stack, 0, 1); + stop_ptraced_child(pid, 0, 1); printk("OK\n"); check_sysemu(); } @@ -374,11 +362,10 @@ void forward_pending_sigio(int target) static inline int check_skas3_ptrace_support(void) { struct ptrace_faultinfo fi; - void *stack; int pid, n, ret = 1; printf("Checking for the skas3 patch in the host..."); - pid = start_ptraced_child(&stack); + pid = start_ptraced_child(); n = ptrace(PTRACE_FAULTINFO, pid, 0, &fi); if (n < 0) { @@ -393,7 +380,7 @@ static inline int check_skas3_ptrace_support(void) } init_registers(pid); - stop_ptraced_child(pid, stack, 1, 1); + stop_ptraced_child(pid, 1, 1); return(ret); } diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c index 7a94369..804c6bb 100644 --- a/arch/um/kernel/process_kern.c +++ b/arch/um/kernel/process_kern.c @@ -43,7 +43,6 @@ #include "tlb.h" #include "frame_kern.h" #include "sigcontext.h" -#include "2_5compat.h" #include "os.h" #include "mode.h" #include "mode_kern.h" @@ -55,18 +54,6 @@ */ struct cpu_task cpu_tasks[NR_CPUS] = { [0 ... NR_CPUS - 1] = { -1, NULL } }; -struct task_struct *get_task(int pid, int require) -{ - struct task_struct *ret; - - read_lock(&tasklist_lock); - ret = find_task_by_pid(pid); - read_unlock(&tasklist_lock); - - if(require && (ret == NULL)) panic("get_task couldn't find a task\n"); - return(ret); -} - int external_pid(void *t) { struct task_struct *task = t ? t : current; @@ -115,16 +102,6 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) return(pid); } -void switch_mm(struct mm_struct *prev, struct mm_struct *next, - struct task_struct *tsk) -{ - int cpu = smp_processor_id(); - - if (prev != next) - cpu_clear(cpu, prev->cpu_vm_mask); - cpu_set(cpu, next->cpu_vm_mask); -} - void set_current(void *t) { struct task_struct *task = t; @@ -152,7 +129,6 @@ void release_thread(struct task_struct *task) void exit_thread(void) { - CHOOSE_MODE(exit_thread_tt(), exit_thread_skas()); unprotect_stack((unsigned long) current_thread); } @@ -200,7 +176,6 @@ void default_idle(void) while(1){ /* endless idle loop with no priority at all */ - SET_PRI(current); /* * although we are an idle CPU, we do not want to @@ -223,11 +198,6 @@ int page_size(void) return(PAGE_SIZE); } -unsigned long page_mask(void) -{ - return(PAGE_MASK); -} - void *um_virt_to_phys(struct task_struct *task, unsigned long addr, pte_t *pte_out) { @@ -360,11 +330,6 @@ char *uml_strdup(char *string) return(new); } -void *get_init_task(void) -{ - return(&init_thread_union.thread_info.task); -} - int copy_to_user_proc(void __user *to, void *from, int size) { return(copy_to_user(to, from, size)); @@ -476,21 +441,18 @@ int singlestepping(void * t) return 2; } +/* + * Only x86 and x86_64 have an arch_align_stack(). + * All other arches have "#define arch_align_stack(x) (x)" + * in their asm/system.h + * As this is included in UML from asm-um/system-generic.h, + * we can use it to behave as the subarch does. + */ +#ifndef arch_align_stack unsigned long arch_align_stack(unsigned long sp) { if (randomize_va_space) sp -= get_random_int() % 8192; return sp & ~0xf; } - - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ +#endif diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c index e50e60f..71af4d5 100644 --- a/arch/um/kernel/ptrace.c +++ b/arch/um/kernel/ptrace.c @@ -19,15 +19,30 @@ #include "skas_ptrace.h" #include "sysdep/ptrace.h" +static inline void set_singlestepping(struct task_struct *child, int on) +{ + if (on) + child->ptrace |= PT_DTRACE; + else + child->ptrace &= ~PT_DTRACE; + child->thread.singlestep_syscall = 0; + +#ifdef SUBARCH_SET_SINGLESTEPPING + SUBARCH_SET_SINGLESTEPPING(child, on); +#endif +} + /* * Called by kernel/ptrace.c when detaching.. */ void ptrace_disable(struct task_struct *child) { - child->ptrace &= ~PT_DTRACE; - child->thread.singlestep_syscall = 0; + set_singlestepping(child,0); } +extern int peek_user(struct task_struct * child, long addr, long data); +extern int poke_user(struct task_struct * child, long addr, long data); + long sys_ptrace(long request, long pid, long addr, long data) { struct task_struct *child; @@ -67,6 +82,10 @@ long sys_ptrace(long request, long pid, long addr, long data) goto out_tsk; } +#ifdef SUBACH_PTRACE_SPECIAL + SUBARCH_PTRACE_SPECIAL(child,request,addr,data); +#endif + ret = ptrace_check_attach(child, request == PTRACE_KILL); if (ret < 0) goto out_tsk; @@ -87,26 +106,9 @@ long sys_ptrace(long request, long pid, long addr, long data) } /* read the word at location addr in the USER area. */ - case PTRACE_PEEKUSR: { - unsigned long tmp; - - ret = -EIO; - if ((addr & 3) || addr < 0) - break; - - tmp = 0; /* Default return condition */ - if(addr < MAX_REG_OFFSET){ - tmp = getreg(child, addr); - } - else if((addr >= offsetof(struct user, u_debugreg[0])) && - (addr <= offsetof(struct user, u_debugreg[7]))){ - addr -= offsetof(struct user, u_debugreg[0]); - addr = addr >> 2; - tmp = child->thread.arch.debugregs[addr]; - } - ret = put_user(tmp, (unsigned long __user *) data); - break; - } + case PTRACE_PEEKUSR: + ret = peek_user(child, addr, data); + break; /* when I and D space are separate, this will have to be fixed. */ case PTRACE_POKETEXT: /* write the word at location addr. */ @@ -119,26 +121,8 @@ long sys_ptrace(long request, long pid, long addr, long data) break; case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ - ret = -EIO; - if ((addr & 3) || addr < 0) - break; - - if (addr < MAX_REG_OFFSET) { - ret = putreg(child, addr, data); - break; - } -#if 0 /* XXX x86_64 */ - else if((addr >= offsetof(struct user, u_debugreg[0])) && - (addr <= offsetof(struct user, u_debugreg[7]))){ - addr -= offsetof(struct user, u_debugreg[0]); - addr = addr >> 2; - if((addr == 4) || (addr == 5)) break; - child->thread.arch.debugregs[addr] = data; - ret = 0; - } -#endif - - break; + ret = poke_user(child, addr, data); + break; case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ case PTRACE_CONT: { /* restart after signal. */ @@ -146,8 +130,7 @@ long sys_ptrace(long request, long pid, long addr, long data) if (!valid_signal(data)) break; - child->ptrace &= ~PT_DTRACE; - child->thread.singlestep_syscall = 0; + set_singlestepping(child, 0); if (request == PTRACE_SYSCALL) { set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); } @@ -170,8 +153,7 @@ long sys_ptrace(long request, long pid, long addr, long data) if (child->exit_state == EXIT_ZOMBIE) /* already dead */ break; - child->ptrace &= ~PT_DTRACE; - child->thread.singlestep_syscall = 0; + set_singlestepping(child, 0); child->exit_code = SIGKILL; wake_up_process(child); break; @@ -182,8 +164,7 @@ long sys_ptrace(long request, long pid, long addr, long data) if (!valid_signal(data)) break; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - child->ptrace |= PT_DTRACE; - child->thread.singlestep_syscall = 0; + set_singlestepping(child, 1); child->exit_code = data; /* give it a chance to run. */ wake_up_process(child); @@ -250,23 +231,19 @@ long sys_ptrace(long request, long pid, long addr, long data) break; #endif case PTRACE_FAULTINFO: { - struct ptrace_faultinfo fault; - - fault = ((struct ptrace_faultinfo) - { .is_write = child->thread.err, - .addr = child->thread.cr2 }); - ret = copy_to_user((unsigned long __user *) data, &fault, - sizeof(fault)); + /* Take the info from thread->arch->faultinfo, + * but transfer max. sizeof(struct ptrace_faultinfo). + * On i386, ptrace_faultinfo is smaller! + */ + ret = copy_to_user((unsigned long __user *) data, + &child->thread.arch.faultinfo, + sizeof(struct ptrace_faultinfo)); if(ret) break; break; } - case PTRACE_SIGPENDING: - ret = copy_to_user((unsigned long __user *) data, - &child->pending.signal, - sizeof(child->pending.signal)); - break; +#ifdef PTRACE_LDT case PTRACE_LDT: { struct ptrace_ldt ldt; @@ -282,6 +259,7 @@ long sys_ptrace(long request, long pid, long addr, long data) ret = -EIO; break; } +#endif #ifdef CONFIG_PROC_MM case PTRACE_SWITCH_MM: { struct mm_struct *old = child->mm; @@ -337,15 +315,16 @@ void syscall_trace(union uml_pt_regs *regs, int entryexit) if (unlikely(current->audit_context)) { if (!entryexit) - audit_syscall_entry(current, - UPT_SYSCALL_NR(®s->regs), - UPT_SYSCALL_ARG1(®s->regs), - UPT_SYSCALL_ARG2(®s->regs), - UPT_SYSCALL_ARG3(®s->regs), - UPT_SYSCALL_ARG4(®s->regs)); - else - audit_syscall_exit(current, - UPT_SYSCALL_RET(®s->regs)); + audit_syscall_entry(current, + HOST_AUDIT_ARCH, + UPT_SYSCALL_NR(regs), + UPT_SYSCALL_ARG1(regs), + UPT_SYSCALL_ARG2(regs), + UPT_SYSCALL_ARG3(regs), + UPT_SYSCALL_ARG4(regs)); + else audit_syscall_exit(current, + AUDITSC_RESULT(UPT_SYSCALL_RET(regs)), + UPT_SYSCALL_RET(regs)); } /* Fake a debug trap */ @@ -375,14 +354,3 @@ void syscall_trace(union uml_pt_regs *regs, int entryexit) current->exit_code = 0; } } - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/sigio_user.c b/arch/um/kernel/sigio_user.c index 668df13..e892189 100644 --- a/arch/um/kernel/sigio_user.c +++ b/arch/um/kernel/sigio_user.c @@ -182,6 +182,7 @@ static int write_sigio_thread(void *unused) int i, n, respond_fd; char c; + signal(SIGWINCH, SIG_IGN); fds = ¤t_poll; while(1){ n = poll(fds->poll, fds->used, -1); diff --git a/arch/um/kernel/skas/include/mode_kern-skas.h b/arch/um/kernel/skas/include/mode_kern-skas.h index 94c5649..e484900 100644 --- a/arch/um/kernel/skas/include/mode_kern-skas.h +++ b/arch/um/kernel/skas/include/mode_kern-skas.h @@ -18,7 +18,6 @@ extern int copy_thread_skas(int nr, unsigned long clone_flags, unsigned long sp, unsigned long stack_top, struct task_struct *p, struct pt_regs *regs); extern void release_thread_skas(struct task_struct *task); -extern void exit_thread_skas(void); extern void initial_thread_cb_skas(void (*proc)(void *), void *arg); extern void init_idle_skas(void); extern void flush_tlb_kernel_range_skas(unsigned long start, diff --git a/arch/um/kernel/skas/include/skas.h b/arch/um/kernel/skas/include/skas.h index f0702c2..96b51db 100644 --- a/arch/um/kernel/skas/include/skas.h +++ b/arch/um/kernel/skas/include/skas.h @@ -27,9 +27,10 @@ extern void map(int fd, unsigned long virt, unsigned long len, int r, int w, extern int unmap(int fd, void *addr, unsigned long len); extern int protect(int fd, unsigned long addr, unsigned long len, int r, int w, int x); -extern void user_signal(int sig, union uml_pt_regs *regs); +extern void user_signal(int sig, union uml_pt_regs *regs, int pid); extern int new_mm(int from); extern void start_userspace(int cpu); +extern void get_skas_faultinfo(int pid, struct faultinfo * fi); extern long execute_syscall_skas(void *r); #endif diff --git a/arch/um/kernel/skas/include/uaccess-skas.h b/arch/um/kernel/skas/include/uaccess-skas.h index c356203..cd6c280 100644 --- a/arch/um/kernel/skas/include/uaccess-skas.h +++ b/arch/um/kernel/skas/include/uaccess-skas.h @@ -18,8 +18,8 @@ ((unsigned long) (addr) + (size) <= FIXADDR_USER_END) && \ ((unsigned long) (addr) + (size) >= (unsigned long)(addr)))) -static inline int __deprecated verify_area_skas(int type, const void * addr, - unsigned long size) +static inline int verify_area_skas(int type, const void * addr, + unsigned long size) { return(access_ok_skas(type, addr, size) ? 0 : -EFAULT); } diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index b4ffaaa..773cd2b 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -4,6 +4,7 @@ */ #include <stdlib.h> +#include <string.h> #include <unistd.h> #include <errno.h> #include <signal.h> @@ -27,27 +28,37 @@ #include "chan_user.h" #include "signal_user.h" #include "registers.h" +#include "process.h" int is_skas_winch(int pid, int fd, void *data) { - if(pid != os_getpid()) + if(pid != os_getpgrp()) return(0); register_winch_irq(-1, fd, -1, data); return(1); } -static void handle_segv(int pid) +void get_skas_faultinfo(int pid, struct faultinfo * fi) { - struct ptrace_faultinfo fault; int err; - err = ptrace(PTRACE_FAULTINFO, pid, 0, &fault); + err = ptrace(PTRACE_FAULTINFO, pid, 0, fi); if(err) - panic("handle_segv - PTRACE_FAULTINFO failed, errno = %d\n", - errno); + panic("get_skas_faultinfo - PTRACE_FAULTINFO failed, " + "errno = %d\n", errno); + + /* Special handling for i386, which has different structs */ + if (sizeof(struct ptrace_faultinfo) < sizeof(struct faultinfo)) + memset((char *)fi + sizeof(struct ptrace_faultinfo), 0, + sizeof(struct faultinfo) - + sizeof(struct ptrace_faultinfo)); +} - segv(fault.addr, 0, FAULT_WRITE(fault.is_write), 1, NULL); +static void handle_segv(int pid, union uml_pt_regs * regs) +{ + get_skas_faultinfo(pid, ®s->skas.faultinfo); + segv(regs->skas.faultinfo, 0, 1, NULL); } /*To use the same value of using_sysemu as the caller, ask it that value (in local_using_sysemu)*/ @@ -163,7 +174,7 @@ void userspace(union uml_pt_regs *regs) if(WIFSTOPPED(status)){ switch(WSTOPSIG(status)){ case SIGSEGV: - handle_segv(pid); + handle_segv(pid, regs); break; case SIGTRAP + 0x80: handle_trap(pid, regs, local_using_sysemu); @@ -177,7 +188,7 @@ void userspace(union uml_pt_regs *regs) case SIGBUS: case SIGFPE: case SIGWINCH: - user_signal(WSTOPSIG(status), regs); + user_signal(WSTOPSIG(status), regs, pid); break; default: printk("userspace - child stopped with signal " @@ -190,6 +201,11 @@ void userspace(union uml_pt_regs *regs) } } } +#define INIT_JMP_NEW_THREAD 0 +#define INIT_JMP_REMOVE_SIGSTACK 1 +#define INIT_JMP_CALLBACK 2 +#define INIT_JMP_HALT 3 +#define INIT_JMP_REBOOT 4 void new_thread(void *stack, void **switch_buf_ptr, void **fork_buf_ptr, void (*handler)(int)) @@ -225,7 +241,7 @@ void thread_wait(void *sw, void *fb) *switch_buf = &buf; fork_buf = fb; if(sigsetjmp(buf, 1) == 0) - siglongjmp(*fork_buf, 1); + siglongjmp(*fork_buf, INIT_JMP_REMOVE_SIGSTACK); } void switch_threads(void *me, void *next) @@ -249,23 +265,31 @@ int start_idle_thread(void *stack, void *switch_buf_ptr, void **fork_buf_ptr) sigjmp_buf **switch_buf = switch_buf_ptr; int n; + set_handler(SIGWINCH, (__sighandler_t) sig_handler, + SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGALRM, + SIGVTALRM, -1); + *fork_buf_ptr = &initial_jmpbuf; n = sigsetjmp(initial_jmpbuf, 1); - if(n == 0) - new_thread_proc((void *) stack, new_thread_handler); - else if(n == 1) - remove_sigstack(); - else if(n == 2){ + switch(n){ + case INIT_JMP_NEW_THREAD: + new_thread_proc((void *) stack, new_thread_handler); + break; + case INIT_JMP_REMOVE_SIGSTACK: + remove_sigstack(); + break; + case INIT_JMP_CALLBACK: (*cb_proc)(cb_arg); siglongjmp(*cb_back, 1); - } - else if(n == 3){ + break; + case INIT_JMP_HALT: kmalloc_ok = 0; return(0); - } - else if(n == 4){ + case INIT_JMP_REBOOT: kmalloc_ok = 0; return(1); + default: + panic("Bad sigsetjmp return in start_idle_thread - %d\n", n); } siglongjmp(**switch_buf, 1); } @@ -290,7 +314,7 @@ void initial_thread_cb_skas(void (*proc)(void *), void *arg) block_signals(); if(sigsetjmp(here, 1) == 0) - siglongjmp(initial_jmpbuf, 2); + siglongjmp(initial_jmpbuf, INIT_JMP_CALLBACK); unblock_signals(); cb_proc = NULL; @@ -301,13 +325,13 @@ void initial_thread_cb_skas(void (*proc)(void *), void *arg) void halt_skas(void) { block_signals(); - siglongjmp(initial_jmpbuf, 3); + siglongjmp(initial_jmpbuf, INIT_JMP_HALT); } void reboot_skas(void) { block_signals(); - siglongjmp(initial_jmpbuf, 4); + siglongjmp(initial_jmpbuf, INIT_JMP_REBOOT); } void switch_mm_skas(int mm_fd) diff --git a/arch/um/kernel/skas/process_kern.c b/arch/um/kernel/skas/process_kern.c index 5d096ea..fc71ef2 100644 --- a/arch/um/kernel/skas/process_kern.c +++ b/arch/um/kernel/skas/process_kern.c @@ -68,8 +68,11 @@ void new_thread_handler(int sig) * 0 if it just exits */ n = run_kernel_thread(fn, arg, ¤t->thread.exec_buf); - if(n == 1) + if(n == 1){ + /* Handle any immediate reschedules or signals */ + interrupt_end(); userspace(¤t->thread.regs.regs); + } else do_exit(0); } @@ -83,10 +86,6 @@ void release_thread_skas(struct task_struct *task) { } -void exit_thread_skas(void) -{ -} - void fork_handler(int sig) { change_sig(SIGUSR1, 1); @@ -100,6 +99,8 @@ void fork_handler(int sig) schedule_tail(current->thread.prev_sched); current->thread.prev_sched = NULL; + /* Handle any immediate reschedules or signals */ + interrupt_end(); userspace(¤t->thread.regs.regs); } diff --git a/arch/um/kernel/skas/trap_user.c b/arch/um/kernel/skas/trap_user.c index 8e9b46d..0dee1d9 100644 --- a/arch/um/kernel/skas/trap_user.c +++ b/arch/um/kernel/skas/trap_user.c @@ -5,12 +5,15 @@ #include <signal.h> #include <errno.h> -#include "sysdep/ptrace.h" #include "signal_user.h" #include "user_util.h" #include "kern_util.h" #include "task.h" #include "sigcontext.h" +#include "skas.h" +#include "ptrace_user.h" +#include "sysdep/ptrace.h" +#include "sysdep/ptrace_user.h" void sig_handler_common_skas(int sig, void *sc_ptr) { @@ -31,9 +34,11 @@ void sig_handler_common_skas(int sig, void *sc_ptr) r = &TASK_REGS(get_current())->skas; save_user = r->is_user; r->is_user = 0; - r->fault_addr = SC_FAULT_ADDR(sc); - r->fault_type = SC_FAULT_TYPE(sc); - r->trap_type = SC_TRAP_TYPE(sc); + if ( sig == SIGFPE || sig == SIGSEGV || + sig == SIGBUS || sig == SIGILL || + sig == SIGTRAP ) { + GET_FAULTINFO_FROM_SC(r->faultinfo, sc); + } change_sig(SIGUSR1, 1); info = &sig_info[sig]; @@ -45,14 +50,17 @@ void sig_handler_common_skas(int sig, void *sc_ptr) r->is_user = save_user; } -void user_signal(int sig, union uml_pt_regs *regs) +extern int ptrace_faultinfo; + +void user_signal(int sig, union uml_pt_regs *regs, int pid) { struct signal_info *info; + int segv = ((sig == SIGFPE) || (sig == SIGSEGV) || (sig == SIGBUS) || + (sig == SIGILL) || (sig == SIGTRAP)); regs->skas.is_user = 1; - regs->skas.fault_addr = 0; - regs->skas.fault_type = 0; - regs->skas.trap_type = 0; + if (segv) + get_skas_faultinfo(pid, ®s->skas.faultinfo); info = &sig_info[sig]; (*info->handler)(sig, regs); diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c index f7da9d0..7519528 100644 --- a/arch/um/kernel/skas/uaccess.c +++ b/arch/um/kernel/skas/uaccess.c @@ -29,9 +29,12 @@ static unsigned long maybe_map(unsigned long virt, int is_write) if(IS_ERR(phys) || (is_write && !pte_write(pte))){ err = handle_page_fault(virt, 0, is_write, 1, &dummy_code); if(err) - return(0); + return(-1UL); phys = um_virt_to_phys(current, virt, NULL); } + if(IS_ERR(phys)) + phys = (void *) -1; + return((unsigned long) phys); } @@ -42,7 +45,7 @@ static int do_op(unsigned long addr, int len, int is_write, int n; addr = maybe_map(addr, is_write); - if(addr == -1) + if(addr == -1UL) return(-1); page = phys_to_page(addr); diff --git a/arch/um/kernel/skas/util/Makefile b/arch/um/kernel/skas/util/Makefile index 17f5909..f7b7eba 100644 --- a/arch/um/kernel/skas/util/Makefile +++ b/arch/um/kernel/skas/util/Makefile @@ -2,3 +2,4 @@ hostprogs-y := mk_ptregs always := $(hostprogs-y) mk_ptregs-objs := mk_ptregs-$(SUBARCH).o +HOSTCFLAGS_mk_ptregs-$(SUBARCH).o := -I$(objtree)/arch/um diff --git a/arch/um/kernel/skas/util/mk_ptregs-i386.c b/arch/um/kernel/skas/util/mk_ptregs-i386.c index 0788dd0..1f96e1e 100644 --- a/arch/um/kernel/skas/util/mk_ptregs-i386.c +++ b/arch/um/kernel/skas/util/mk_ptregs-i386.c @@ -1,8 +1,7 @@ #include <stdio.h> -#include <asm/ptrace.h> -#include <asm/user.h> +#include <user-offsets.h> -#define PRINT_REG(name, val) printf("#define HOST_%s %d\n", (name), (val)) +#define SHOW(name) printf("#define %s %d\n", #name, name) int main(int argc, char **argv) { @@ -12,28 +11,27 @@ int main(int argc, char **argv) printf("#ifndef __SKAS_PT_REGS_\n"); printf("#define __SKAS_PT_REGS_\n"); printf("\n"); - printf("#define HOST_FRAME_SIZE %d\n", FRAME_SIZE); - printf("#define HOST_FP_SIZE %d\n", - sizeof(struct user_i387_struct) / sizeof(unsigned long)); - printf("#define HOST_XFP_SIZE %d\n", - sizeof(struct user_fxsr_struct) / sizeof(unsigned long)); + SHOW(HOST_FRAME_SIZE); + SHOW(HOST_FP_SIZE); + SHOW(HOST_XFP_SIZE); + + SHOW(HOST_IP); + SHOW(HOST_SP); + SHOW(HOST_EFLAGS); + SHOW(HOST_EAX); + SHOW(HOST_EBX); + SHOW(HOST_ECX); + SHOW(HOST_EDX); + SHOW(HOST_ESI); + SHOW(HOST_EDI); + SHOW(HOST_EBP); + SHOW(HOST_CS); + SHOW(HOST_SS); + SHOW(HOST_DS); + SHOW(HOST_FS); + SHOW(HOST_ES); + SHOW(HOST_GS); - PRINT_REG("IP", EIP); - PRINT_REG("SP", UESP); - PRINT_REG("EFLAGS", EFL); - PRINT_REG("EAX", EAX); - PRINT_REG("EBX", EBX); - PRINT_REG("ECX", ECX); - PRINT_REG("EDX", EDX); - PRINT_REG("ESI", ESI); - PRINT_REG("EDI", EDI); - PRINT_REG("EBP", EBP); - PRINT_REG("CS", CS); - PRINT_REG("SS", SS); - PRINT_REG("DS", DS); - PRINT_REG("FS", FS); - PRINT_REG("ES", ES); - PRINT_REG("GS", GS); printf("\n"); printf("#endif\n"); return(0); diff --git a/arch/um/kernel/skas/util/mk_ptregs-x86_64.c b/arch/um/kernel/skas/util/mk_ptregs-x86_64.c index 67aee92..5fccbfe 100644 --- a/arch/um/kernel/skas/util/mk_ptregs-x86_64.c +++ b/arch/um/kernel/skas/util/mk_ptregs-x86_64.c @@ -5,11 +5,10 @@ */ #include <stdio.h> -#define __FRAME_OFFSETS -#include <asm/ptrace.h> +#include <user-offsets.h> -#define PRINT_REG(name, val) \ - printf("#define HOST_%s (%d / sizeof(unsigned long))\n", (name), (val)) +#define SHOW(name) \ + printf("#define %s (%d / sizeof(unsigned long))\n", #name, name) int main(int argc, char **argv) { @@ -18,36 +17,35 @@ int main(int argc, char **argv) printf("\n"); printf("#ifndef __SKAS_PT_REGS_\n"); printf("#define __SKAS_PT_REGS_\n"); - printf("#define HOST_FRAME_SIZE (%d / sizeof(unsigned long))\n", - FRAME_SIZE); - PRINT_REG("RBX", RBX); - PRINT_REG("RCX", RCX); - PRINT_REG("RDI", RDI); - PRINT_REG("RSI", RSI); - PRINT_REG("RDX", RDX); - PRINT_REG("RBP", RBP); - PRINT_REG("RAX", RAX); - PRINT_REG("R8", R8); - PRINT_REG("R9", R9); - PRINT_REG("R10", R10); - PRINT_REG("R11", R11); - PRINT_REG("R12", R12); - PRINT_REG("R13", R13); - PRINT_REG("R14", R14); - PRINT_REG("R15", R15); - PRINT_REG("ORIG_RAX", ORIG_RAX); - PRINT_REG("CS", CS); - PRINT_REG("SS", SS); - PRINT_REG("EFLAGS", EFLAGS); + SHOW(HOST_FRAME_SIZE); + SHOW(HOST_RBX); + SHOW(HOST_RCX); + SHOW(HOST_RDI); + SHOW(HOST_RSI); + SHOW(HOST_RDX); + SHOW(HOST_RBP); + SHOW(HOST_RAX); + SHOW(HOST_R8); + SHOW(HOST_R9); + SHOW(HOST_R10); + SHOW(HOST_R11); + SHOW(HOST_R12); + SHOW(HOST_R13); + SHOW(HOST_R14); + SHOW(HOST_R15); + SHOW(HOST_ORIG_RAX); + SHOW(HOST_CS); + SHOW(HOST_SS); + SHOW(HOST_EFLAGS); #if 0 - PRINT_REG("FS", FS); - PRINT_REG("GS", GS); - PRINT_REG("DS", DS); - PRINT_REG("ES", ES); + SHOW(HOST_FS); + SHOW(HOST_GS); + SHOW(HOST_DS); + SHOW(HOST_ES); #endif - PRINT_REG("IP", RIP); - PRINT_REG("SP", RSP); + SHOW(HOST_IP); + SHOW(HOST_SP); printf("#define HOST_FP_SIZE 0\n"); printf("#define HOST_XFP_SIZE 0\n"); printf("\n"); diff --git a/arch/um/kernel/syscall_kern.c b/arch/um/kernel/syscall_kern.c index 42731e0..b7a5525 100644 --- a/arch/um/kernel/syscall_kern.c +++ b/arch/um/kernel/syscall_kern.c @@ -17,7 +17,6 @@ #include "linux/utime.h" #include "asm/mman.h" #include "asm/uaccess.h" -#include "asm/ipc.h" #include "kern_util.h" #include "user_util.h" #include "sysdep/syscalls.h" diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c index e630438..f808500 100644 --- a/arch/um/kernel/sysrq.c +++ b/arch/um/kernel/sysrq.c @@ -3,6 +3,7 @@ * Licensed under the GPL */ +#include "linux/config.h" #include "linux/sched.h" #include "linux/kernel.h" #include "linux/module.h" @@ -12,14 +13,14 @@ #include "sysrq.h" #include "user_util.h" -void show_trace(unsigned long * stack) +/* Catch non-i386 SUBARCH's. */ +#if !defined(CONFIG_UML_X86) || defined(CONFIG_64BIT) +void show_trace(struct task_struct *task, unsigned long * stack) { - /* XXX: Copy the CONFIG_FRAME_POINTER stack-walking backtrace from - * arch/i386/kernel/traps.c, and then move this to sys-i386/sysrq.c.*/ unsigned long addr; if (!stack) { - stack = (unsigned long*) &stack; + stack = (unsigned long*) &stack; WARN_ON(1); } @@ -35,6 +36,7 @@ void show_trace(unsigned long * stack) } printk("\n"); } +#endif /* * stack dumps generator - this is used by arch-independent code. @@ -44,7 +46,7 @@ void dump_stack(void) { unsigned long stack; - show_trace(&stack); + show_trace(current, &stack); } EXPORT_SYMBOL(dump_stack); @@ -59,7 +61,11 @@ void show_stack(struct task_struct *task, unsigned long *esp) int i; if (esp == NULL) { - if (task != current) { + if (task != current && task != NULL) { + /* XXX: Isn't this bogus? I.e. isn't this the + * *userspace* stack of this task? If not so, use this + * even when task == current (as in i386). + */ esp = (unsigned long *) KSTK_ESP(task); /* Which one? No actual difference - just coding style.*/ //esp = (unsigned long *) PT_REGS_IP(&task->thread.regs); @@ -77,5 +83,6 @@ void show_stack(struct task_struct *task, unsigned long *esp) printk("%08lx ", *stack++); } - show_trace(esp); + printk("Call Trace: \n"); + show_trace(current, esp); } diff --git a/arch/um/kernel/time_kern.c b/arch/um/kernel/time_kern.c index 2461cd7..6516fc5 100644 --- a/arch/um/kernel/time_kern.c +++ b/arch/um/kernel/time_kern.c @@ -48,8 +48,6 @@ static unsigned long long prev_usecs; static long long delta; /* Deviation per interval */ #endif -#define MILLION 1000000 - void timer_irq(union uml_pt_regs *regs) { unsigned long long ticks = 0; @@ -136,22 +134,6 @@ long um_stime(int __user *tptr) return 0; } -void __udelay(unsigned long usecs) -{ - int i, n; - - n = (loops_per_jiffy * HZ * usecs) / MILLION; - for(i=0;i<n;i++) ; -} - -void __const_udelay(unsigned long usecs) -{ - int i, n; - - n = (loops_per_jiffy * HZ * usecs) / MILLION; - for(i=0;i<n;i++) ; -} - void timer_handler(int sig, union uml_pt_regs *regs) { local_irq_disable(); diff --git a/arch/um/kernel/trap_kern.c b/arch/um/kernel/trap_kern.c index 47e766e..c20aef1 100644 --- a/arch/um/kernel/trap_kern.c +++ b/arch/um/kernel/trap_kern.c @@ -23,7 +23,6 @@ #include "kern.h" #include "chan_kern.h" #include "mconsole_kern.h" -#include "2_5compat.h" #include "mem.h" #include "mem_kern.h" @@ -48,7 +47,7 @@ int handle_page_fault(unsigned long address, unsigned long ip, goto good_area; else if(!(vma->vm_flags & VM_GROWSDOWN)) goto out; - else if(!ARCH_IS_STACKGROW(address)) + else if(is_user && !ARCH_IS_STACKGROW(address)) goto out; else if(expand_stack(vma, address)) goto out; @@ -57,10 +56,11 @@ int handle_page_fault(unsigned long address, unsigned long ip, *code_out = SEGV_ACCERR; if(is_write && !(vma->vm_flags & VM_WRITE)) goto out; + + if(!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto out; + page = address & PAGE_MASK; - pgd = pgd_offset(mm, page); - pud = pud_offset(pgd, page); - pmd = pmd_offset(pud, page); do { survive: switch (handle_mm_fault(mm, vma, address, is_write)){ @@ -106,46 +106,24 @@ out_of_memory: goto out; } -LIST_HEAD(physmem_remappers); - -void register_remapper(struct remapper *info) -{ - list_add(&info->list, &physmem_remappers); -} - -static int check_remapped_addr(unsigned long address, int is_write) -{ - struct remapper *remapper; - struct list_head *ele; - __u64 offset; - int fd; - - fd = phys_mapping(__pa(address), &offset); - if(fd == -1) - return(0); - - list_for_each(ele, &physmem_remappers){ - remapper = list_entry(ele, struct remapper, list); - if((*remapper->proc)(fd, address, is_write, offset)) - return(1); - } - - return(0); -} - -unsigned long segv(unsigned long address, unsigned long ip, int is_write, - int is_user, void *sc) +/* + * We give a *copy* of the faultinfo in the regs to segv. + * This must be done, since nesting SEGVs could overwrite + * the info in the regs. A pointer to the info then would + * give us bad data! + */ +unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, void *sc) { struct siginfo si; void *catcher; int err; + int is_write = FAULT_WRITE(fi); + unsigned long address = FAULT_ADDRESS(fi); if(!is_user && (address >= start_vm) && (address < end_vm)){ flush_tlb_kernel_vm(); return(0); } - else if(check_remapped_addr(address & PAGE_MASK, is_write)) - return(0); else if(current->mm == NULL) panic("Segfault with no mm"); err = handle_page_fault(address, ip, is_write, is_user, &si.si_code); @@ -159,7 +137,7 @@ unsigned long segv(unsigned long address, unsigned long ip, int is_write, } else if(current->thread.fault_addr != NULL) panic("fault_addr set but no fault catcher"); - else if(arch_fixup(ip, sc)) + else if(!is_user && arch_fixup(ip, sc)) return(0); if(!is_user) @@ -171,6 +149,7 @@ unsigned long segv(unsigned long address, unsigned long ip, int is_write, si.si_errno = 0; si.si_code = BUS_ADRERR; si.si_addr = (void *)address; + current->thread.arch.faultinfo = fi; force_sig_info(SIGBUS, &si, current); } else if(err == -ENOMEM){ @@ -180,22 +159,20 @@ unsigned long segv(unsigned long address, unsigned long ip, int is_write, else { si.si_signo = SIGSEGV; si.si_addr = (void *) address; - current->thread.cr2 = address; - current->thread.err = is_write; + current->thread.arch.faultinfo = fi; force_sig_info(SIGSEGV, &si, current); } return(0); } -void bad_segv(unsigned long address, unsigned long ip, int is_write) +void bad_segv(struct faultinfo fi, unsigned long ip) { struct siginfo si; si.si_signo = SIGSEGV; si.si_code = SEGV_ACCERR; - si.si_addr = (void *) address; - current->thread.cr2 = address; - current->thread.err = is_write; + si.si_addr = (void *) FAULT_ADDRESS(fi); + current->thread.arch.faultinfo = fi; force_sig_info(SIGSEGV, &si, current); } @@ -204,6 +181,7 @@ void relay_signal(int sig, union uml_pt_regs *regs) if(arch_handle_signal(sig, regs)) return; if(!UPT_IS_USER(regs)) panic("Kernel mode signal %d", sig); + current->thread.arch.faultinfo = *UPT_FAULTINFO(regs); force_sig(sig, current); } diff --git a/arch/um/kernel/trap_user.c b/arch/um/kernel/trap_user.c index 50a4042..f825a6e 100644 --- a/arch/um/kernel/trap_user.c +++ b/arch/um/kernel/trap_user.c @@ -54,23 +54,22 @@ struct { void segv_handler(int sig, union uml_pt_regs *regs) { int index, max; + struct faultinfo * fi = UPT_FAULTINFO(regs); - if(UPT_IS_USER(regs) && !UPT_SEGV_IS_FIXABLE(regs)){ - bad_segv(UPT_FAULT_ADDR(regs), UPT_IP(regs), - UPT_FAULT_WRITE(regs)); + if(UPT_IS_USER(regs) && !SEGV_IS_FIXABLE(fi)){ + bad_segv(*fi, UPT_IP(regs)); return; } max = sizeof(segfault_record)/sizeof(segfault_record[0]); index = next_trap_index(max); nsegfaults++; - segfault_record[index].address = UPT_FAULT_ADDR(regs); + segfault_record[index].address = FAULT_ADDRESS(*fi); segfault_record[index].pid = os_getpid(); - segfault_record[index].is_write = UPT_FAULT_WRITE(regs); + segfault_record[index].is_write = FAULT_WRITE(*fi); segfault_record[index].sp = UPT_SP(regs); segfault_record[index].is_user = UPT_IS_USER(regs); - segv(UPT_FAULT_ADDR(regs), UPT_IP(regs), UPT_FAULT_WRITE(regs), - UPT_IS_USER(regs), regs); + segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs); } void usr2_handler(int sig, union uml_pt_regs *regs) diff --git a/arch/um/kernel/tt/Makefile b/arch/um/kernel/tt/Makefile index 3d5177d..c3faea2 100644 --- a/arch/um/kernel/tt/Makefile +++ b/arch/um/kernel/tt/Makefile @@ -4,6 +4,7 @@ # extra-y := unmap_fin.o +targets := unmap.o clean-files := unmap_tmp.o obj-y = exec_kern.o exec_user.o gdb.o ksyms.o mem.o mem_user.o process_kern.o \ diff --git a/arch/um/kernel/tt/include/mode_kern-tt.h b/arch/um/kernel/tt/include/mode_kern-tt.h index 28aaab3..e0ca0e0 100644 --- a/arch/um/kernel/tt/include/mode_kern-tt.h +++ b/arch/um/kernel/tt/include/mode_kern-tt.h @@ -19,7 +19,6 @@ extern int copy_thread_tt(int nr, unsigned long clone_flags, unsigned long sp, unsigned long stack_top, struct task_struct *p, struct pt_regs *regs); extern void release_thread_tt(struct task_struct *task); -extern void exit_thread_tt(void); extern void initial_thread_cb_tt(void (*proc)(void *), void *arg); extern void init_idle_tt(void); extern void flush_tlb_kernel_range_tt(unsigned long start, unsigned long end); diff --git a/arch/um/kernel/tt/include/uaccess-tt.h b/arch/um/kernel/tt/include/uaccess-tt.h index bb69d6b..3fbb5fe 100644 --- a/arch/um/kernel/tt/include/uaccess-tt.h +++ b/arch/um/kernel/tt/include/uaccess-tt.h @@ -33,8 +33,8 @@ extern unsigned long uml_physmem; (((unsigned long) (addr) <= ((unsigned long) (addr) + (size))) && \ (under_task_size(addr, size) || is_stack(addr, size)))) -static inline int __deprecated verify_area_tt(int type, const void * addr, - unsigned long size) +static inline int verify_area_tt(int type, const void * addr, + unsigned long size) { return(access_ok_tt(type, addr, size) ? 0 : -EFAULT); } diff --git a/arch/um/kernel/tt/ksyms.c b/arch/um/kernel/tt/ksyms.c index 92ec85d..84a9385 100644 --- a/arch/um/kernel/tt/ksyms.c +++ b/arch/um/kernel/tt/ksyms.c @@ -12,6 +12,7 @@ EXPORT_SYMBOL(__do_copy_to_user); EXPORT_SYMBOL(__do_strncpy_from_user); EXPORT_SYMBOL(__do_strnlen_user); EXPORT_SYMBOL(__do_clear_user); +EXPORT_SYMBOL(clear_user_tt); EXPORT_SYMBOL(tracing_pid); EXPORT_SYMBOL(honeypot); diff --git a/arch/um/kernel/tt/mem.c b/arch/um/kernel/tt/mem.c index 74346a0..bcb8796 100644 --- a/arch/um/kernel/tt/mem.c +++ b/arch/um/kernel/tt/mem.c @@ -21,14 +21,8 @@ void before_mem_tt(unsigned long brk_start) remap_data(UML_ROUND_DOWN(&__bss_start), UML_ROUND_UP(&_end), 1); } -#ifdef CONFIG_HOST_2G_2G -#define TOP 0x80000000 -#else -#define TOP 0xc0000000 -#endif - #define SIZE ((CONFIG_NEST_LEVEL + CONFIG_KERNEL_HALF_GIGS) * 0x20000000) -#define START (TOP - SIZE) +#define START (CONFIG_TOP_ADDR - SIZE) unsigned long set_task_sizes_tt(int arg, unsigned long *host_size_out, unsigned long *task_size_out) diff --git a/arch/um/kernel/tt/process_kern.c b/arch/um/kernel/tt/process_kern.c index f19f7c1..776310f 100644 --- a/arch/um/kernel/tt/process_kern.c +++ b/arch/um/kernel/tt/process_kern.c @@ -32,10 +32,6 @@ void *switch_to_tt(void *prev, void *next, void *last) unsigned long flags; int err, vtalrm, alrm, prof, cpu; char c; - /* jailing and SMP are incompatible, so this doesn't need to be - * made per-cpu - */ - static int reading; from = prev; to = next; @@ -59,14 +55,11 @@ void *switch_to_tt(void *prev, void *next, void *last) c = 0; set_current(to); - reading = 0; err = os_write_file(to->thread.mode.tt.switch_pipe[1], &c, sizeof(c)); if(err != sizeof(c)) panic("write of switch_pipe failed, err = %d", -err); - reading = 1; - if((from->exit_state == EXIT_ZOMBIE) || - (from->exit_state == EXIT_DEAD)) + if(from->thread.mode.tt.switch_pipe[0] == -1) os_kill_process(os_getpid(), 0); err = os_read_file(from->thread.mode.tt.switch_pipe[0], &c, sizeof(c)); @@ -81,8 +74,7 @@ void *switch_to_tt(void *prev, void *next, void *last) * in case it has not already killed itself. */ prev_sched = current->thread.prev_sched; - if((prev_sched->exit_state == EXIT_ZOMBIE) || - (prev_sched->exit_state == EXIT_DEAD)) + if(prev_sched->thread.mode.tt.switch_pipe[0] == -1) os_kill_process(prev_sched->thread.mode.tt.extern_pid, 1); change_sig(SIGVTALRM, vtalrm); @@ -101,14 +93,18 @@ void release_thread_tt(struct task_struct *task) { int pid = task->thread.mode.tt.extern_pid; + /* + * We first have to kill the other process, before + * closing its switch_pipe. Else it might wake up + * and receive "EOF" before we could kill it. + */ if(os_getpid() != pid) os_kill_process(pid, 0); -} -void exit_thread_tt(void) -{ - os_close_file(current->thread.mode.tt.switch_pipe[0]); - os_close_file(current->thread.mode.tt.switch_pipe[1]); + os_close_file(task->thread.mode.tt.switch_pipe[0]); + os_close_file(task->thread.mode.tt.switch_pipe[1]); + /* use switch_pipe as flag: thread is released */ + task->thread.mode.tt.switch_pipe[0] = -1; } void suspend_new_thread(int fd) diff --git a/arch/um/kernel/tt/syscall_user.c b/arch/um/kernel/tt/syscall_user.c index e4e7e9c..b218316 100644 --- a/arch/um/kernel/tt/syscall_user.c +++ b/arch/um/kernel/tt/syscall_user.c @@ -63,6 +63,10 @@ void do_syscall(void *task, int pid, int local_using_sysemu) UPT_SYSCALL_NR(TASK_REGS(task)) = PT_SYSCALL_NR(proc_regs); +#ifdef UPT_ORIGGPR2 + UPT_ORIGGPR2(TASK_REGS(task)) = REGS_ORIGGPR2(proc_regs); +#endif + if(((unsigned long *) PT_IP(proc_regs) >= &_stext) && ((unsigned long *) PT_IP(proc_regs) <= &_etext)) tracer_panic("I'm tracing myself and I can't get out"); diff --git a/arch/um/kernel/tt/tracer.c b/arch/um/kernel/tt/tracer.c index 7b5d937..d11e739 100644 --- a/arch/um/kernel/tt/tracer.c +++ b/arch/um/kernel/tt/tracer.c @@ -26,6 +26,7 @@ #include "kern_util.h" #include "chan_user.h" #include "ptrace_user.h" +#include "irq_user.h" #include "mode.h" #include "tt.h" @@ -33,7 +34,7 @@ static int tracer_winch[2]; int is_tracer_winch(int pid, int fd, void *data) { - if(pid != tracing_pid) + if(pid != os_getpgrp()) return(0); register_winch_irq(tracer_winch[0], fd, -1, data); @@ -89,8 +90,10 @@ void tracer_panic(char *format, ...) static void tracer_segv(int sig, struct sigcontext sc) { + struct faultinfo fi; + GET_FAULTINFO_FROM_SC(fi, &sc); printf("Tracing thread segfault at address 0x%lx, ip 0x%lx\n", - SC_FAULT_ADDR(&sc), SC_IP(&sc)); + FAULT_ADDRESS(fi), SC_IP(&sc)); while(1) pause(); } @@ -117,6 +120,7 @@ static int signal_tramp(void *arg) signal(SIGSEGV, (__sighandler_t) sig_handler); set_cmdline("(idle thread)"); set_init_pid(os_getpid()); + init_irq_signals(0); proc = arg; return((*proc)(NULL)); } diff --git a/arch/um/kernel/tt/trap_user.c b/arch/um/kernel/tt/trap_user.c index 92a3820..fc10861 100644 --- a/arch/um/kernel/tt/trap_user.c +++ b/arch/um/kernel/tt/trap_user.c @@ -7,6 +7,7 @@ #include <errno.h> #include <signal.h> #include "sysdep/ptrace.h" +#include "sysdep/sigcontext.h" #include "signal_user.h" #include "user_util.h" #include "kern_util.h" @@ -28,6 +29,11 @@ void sig_handler_common_tt(int sig, void *sc_ptr) change_sig(SIGSEGV, 1); r = &TASK_REGS(get_current())->tt; + if ( sig == SIGFPE || sig == SIGSEGV || + sig == SIGBUS || sig == SIGILL || + sig == SIGTRAP ) { + GET_FAULTINFO_FROM_SC(r->faultinfo, sc); + } save_regs = *r; is_user = user_context(SC_SP(sc)); r->sc = sc; diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 5c49d88..8736d09 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -23,9 +23,9 @@ #include "asm/ptrace.h" #include "asm/elf.h" #include "asm/user.h" +#include "asm/setup.h" #include "ubd_user.h" #include "asm/current.h" -#include "asm/setup.h" #include "user_util.h" #include "kern_util.h" #include "kern.h" @@ -42,9 +42,9 @@ #define DEFAULT_COMMAND_LINE "root=98:0" /* Changed in linux_main and setup_arch, which run before SMP is started */ -char command_line[COMMAND_LINE_SIZE] = { 0 }; +static char command_line[COMMAND_LINE_SIZE] = { 0 }; -void add_arg(char *arg) +static void add_arg(char *arg) { if (strlen(command_line) + strlen(arg) + 1 > COMMAND_LINE_SIZE) { printf("add_arg: Too many command line arguments!\n"); @@ -110,12 +110,6 @@ struct seq_operations cpuinfo_op = { .show = show_cpuinfo, }; -pte_t * __bad_pagetable(void) -{ - panic("Someone should implement __bad_pagetable"); - return(NULL); -} - /* Set in linux_main */ unsigned long host_task_size; unsigned long task_size; @@ -449,7 +443,7 @@ void __init setup_arch(char **cmdline_p) { notifier_chain_register(&panic_notifier_list, &panic_exit_notifier); paging_init(); - strlcpy(saved_command_line, command_line, COMMAND_LINE_SIZE); + strlcpy(saved_command_line, command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; setup_hostinfo(); } diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S index 76eadb3..dd53555 100644 --- a/arch/um/kernel/uml.lds.S +++ b/arch/um/kernel/uml.lds.S @@ -73,6 +73,8 @@ SECTIONS .got : { *(.got.plt) *(.got) } .dynamic : { *(.dynamic) } + .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) } + .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) } /* We want the small data sections together, so single-instruction offsets can access them all, and initialized data all before uninitialized, so we can shorten the on-disk segment size. */ diff --git a/arch/um/kernel/vmlinux.lds.S b/arch/um/kernel/vmlinux.lds.S new file mode 100644 index 0000000..1660a76 --- /dev/null +++ b/arch/um/kernel/vmlinux.lds.S @@ -0,0 +1,6 @@ +#include <linux/config.h> +#ifdef CONFIG_LD_SCRIPT_STATIC +#include "uml.lds.S" +#else +#include "dyn.lds.S" +#endif diff --git a/arch/um/os-Linux/elf_aux.c b/arch/um/os-Linux/elf_aux.c index 9aee0b6..f0d6060 100644 --- a/arch/um/os-Linux/elf_aux.c +++ b/arch/um/os-Linux/elf_aux.c @@ -45,7 +45,11 @@ __init void scan_elf_aux( char **envp) elf_aux_hwcap = auxv->a_un.a_val; break; case AT_PLATFORM: - elf_aux_platform = auxv->a_un.a_ptr; + /* elf.h removed the pointer elements from + * a_un, so we have to use a_val, which is + * all that's left. + */ + elf_aux_platform = (char *) auxv->a_un.a_val; break; case AT_PAGESZ: page_size = auxv->a_un.a_val; diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c index 77d4066..fd45bb2 100644 --- a/arch/um/os-Linux/file.c +++ b/arch/um/os-Linux/file.c @@ -363,7 +363,7 @@ int os_write_file(int fd, const void *buf, int len) (int (*)(int, void *, int)) write, copy_to_user_proc)); } -int os_file_size(char *file, long long *size_out) +int os_file_size(char *file, unsigned long long *size_out) { struct uml_stat buf; int err; diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c index ba9ca1c..1e126bf 100644 --- a/arch/um/os-Linux/process.c +++ b/arch/um/os-Linux/process.c @@ -123,6 +123,11 @@ int os_getpid(void) return(getpid()); } +int os_getpgrp(void) +{ + return getpgrp(); +} + int os_map_memory(void *virt, int fd, unsigned long long off, unsigned long len, int r, int w, int x) { diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index 7eac1ba..c7bfd5e 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -8,7 +8,7 @@ #include "mode.h" #include "sysdep/signal.h" -void sig_handler(int sig) +void sig_handler(ARCH_SIGHDLR_PARAM) { struct sigcontext *sc; @@ -19,7 +19,7 @@ void sig_handler(int sig) extern int timer_irq_inited; -void alarm_handler(int sig) +void alarm_handler(ARCH_SIGHDLR_PARAM) { struct sigcontext *sc; diff --git a/arch/um/os-Linux/util/Makefile b/arch/um/os-Linux/util/Makefile index fb00ddf..9778aed 100644 --- a/arch/um/os-Linux/util/Makefile +++ b/arch/um/os-Linux/util/Makefile @@ -1,4 +1,4 @@ hostprogs-y := mk_user_constants always := $(hostprogs-y) -mk_user_constants-objs := mk_user_constants.o +HOSTCFLAGS_mk_user_constants.o := -I$(objtree)/arch/um diff --git a/arch/um/os-Linux/util/mk_user_constants.c b/arch/um/os-Linux/util/mk_user_constants.c index 0933518..4838f30 100644 --- a/arch/um/os-Linux/util/mk_user_constants.c +++ b/arch/um/os-Linux/util/mk_user_constants.c @@ -1,11 +1,5 @@ #include <stdio.h> -#include <asm/types.h> -/* For some reason, x86_64 nowhere defines u64 and u32, even though they're - * used throughout the headers. - */ -typedef __u64 u64; -typedef __u32 u32; -#include <asm/user.h> +#include <user-offsets.h> int main(int argc, char **argv) { @@ -20,7 +14,7 @@ int main(int argc, char **argv) * x86_64 (216 vs 168 bytes). user_regs_struct is the correct size on * both x86_64 and i386. */ - printf("#define UM_FRAME_SIZE %d\n", (int) sizeof(struct user_regs_struct)); + printf("#define UM_FRAME_SIZE %d\n", __UM_FRAME_SIZE); printf("\n"); printf("#endif\n"); diff --git a/arch/um/scripts/Makefile.rules b/arch/um/scripts/Makefile.rules index 143f6fe..98346c7 100644 --- a/arch/um/scripts/Makefile.rules +++ b/arch/um/scripts/Makefile.rules @@ -2,12 +2,27 @@ # arch/um: Generic definitions # =========================================================================== -USER_SINGLE_OBJS = $(foreach f,$(patsubst %.o,%,$(obj-y) $(obj-m)),$($(f)-objs)) -USER_OBJS += $(filter %_user.o,$(obj-y) $(obj-m) $(USER_SINGLE_OBJS)) - +USER_SINGLE_OBJS := \ + $(foreach f,$(patsubst %.o,%,$(obj-y) $(obj-m)),$($(f)-objs)) +USER_OBJS += $(filter %_user.o,$(obj-y) $(obj-m) $(USER_SINGLE_OBJS)) USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file)) -$(USER_OBJS): c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) $(CFLAGS_$(notdir $@)) +$(USER_OBJS) : c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) \ + $(CFLAGS_$(notdir $@)) quiet_cmd_make_link = SYMLINK $@ -cmd_make_link = rm -f $@; ln -sf $(srctree)/arch/$(SUBARCH)/$($(notdir $@)-dir)/$(notdir $@) $@ +cmd_make_link = ln -sf $(srctree)/arch/$(SUBARCH)/$($(notdir $@)-dir)/$(notdir $@) $@ + +# this needs to be before the foreach, because targets does not accept +# complete paths like $(obj)/$(f). To make sure this works, use a := assignment +# or we will get $(obj)/$(f) in the "targets" value. +# Also, this forces you to use the := syntax when assigning to targets. +# Otherwise the line below will cause an infinite loop (if you don't know why, +# just do it). + +targets := $(targets) $(SYMLINKS) + +SYMLINKS := $(foreach f,$(SYMLINKS),$(obj)/$(f)) + +$(SYMLINKS): FORCE + $(call if_changed,make_link) diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile index 950781e..4351e56 100644 --- a/arch/um/sys-i386/Makefile +++ b/arch/um/sys-i386/Makefile @@ -7,24 +7,13 @@ obj-$(CONFIG_MODULES) += module.o USER_OBJS := bugs.o ptrace_user.o sigcontext.o fault.o -include arch/um/scripts/Makefile.rules - SYMLINKS = bitops.c semaphore.c highmem.c module.c -# this needs to be before the foreach, because clean-files does not accept -# complete paths like $(src)/$f. -clean-files := $(SYMLINKS) - -targets += $(SYMLINKS) - -SYMLINKS := $(foreach f,$(SYMLINKS),$(obj)/$f) +include arch/um/scripts/Makefile.rules bitops.c-dir = lib semaphore.c-dir = kernel highmem.c-dir = mm module.c-dir = kernel -$(SYMLINKS): FORCE - $(call if_changed,make_link) - subdir- := util diff --git a/arch/um/sys-i386/checksum.S b/arch/um/sys-i386/checksum.S index a11171f..d98b2ff 100644 --- a/arch/um/sys-i386/checksum.S +++ b/arch/um/sys-i386/checksum.S @@ -38,7 +38,7 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) .text .align 4 -.globl arch_csum_partial +.globl csum_partial #ifndef CONFIG_X86_USE_PPRO_CHECKSUM @@ -49,7 +49,7 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) * Fortunately, it is easy to convert 2-byte alignment to 4-byte * alignment for the unrolled loop. */ -arch_csum_partial: +csum_partial: pushl %esi pushl %ebx movl 20(%esp),%eax # Function arg: unsigned int sum @@ -119,7 +119,7 @@ arch_csum_partial: /* Version for PentiumII/PPro */ -arch_csum_partial: +csum_partial: pushl %esi pushl %ebx movl 20(%esp),%eax # Function arg: unsigned int sum diff --git a/arch/um/sys-i386/delay.c b/arch/um/sys-i386/delay.c index 20d37db..2c11b97 100644 --- a/arch/um/sys-i386/delay.c +++ b/arch/um/sys-i386/delay.c @@ -1,3 +1,8 @@ +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/delay.h> +#include <asm/param.h> + void __delay(unsigned long time) { /* Stolen from the i386 __loop_delay */ @@ -12,3 +17,24 @@ void __delay(unsigned long time) :"0" (time)); } +void __udelay(unsigned long usecs) +{ + int i, n; + + n = (loops_per_jiffy * HZ * usecs) / MILLION; + for(i=0;i<n;i++) + cpu_relax(); +} + +EXPORT_SYMBOL(__udelay); + +void __const_udelay(unsigned long usecs) +{ + int i, n; + + n = (loops_per_jiffy * HZ * usecs) / MILLION; + for(i=0;i<n;i++) + cpu_relax(); +} + +EXPORT_SYMBOL(__const_udelay); diff --git a/arch/um/sys-i386/kernel-offsets.c b/arch/um/sys-i386/kernel-offsets.c new file mode 100644 index 0000000..9f8ecd1 --- /dev/null +++ b/arch/um/sys-i386/kernel-offsets.c @@ -0,0 +1,25 @@ +#include <linux/config.h> +#include <linux/stddef.h> +#include <linux/sched.h> +#include <linux/time.h> +#include <asm/page.h> + +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val)) + +#define STR(x) #x +#define DEFINE_STR(sym, val) asm volatile("\n->" #sym " " STR(val) " " #val: : ) + +#define BLANK() asm volatile("\n->" : : ) + +#define OFFSET(sym, str, mem) \ + DEFINE(sym, offsetof(struct str, mem)); + +void foo(void) +{ + OFFSET(TASK_DEBUGREGS, task_struct, thread.arch.debugregs); +#ifdef CONFIG_MODE_TT + OFFSET(TASK_EXTERN_PID, task_struct, thread.mode.tt.extern_pid); +#endif +#include <common-offsets.h> +} diff --git a/arch/um/sys-i386/ksyms.c b/arch/um/sys-i386/ksyms.c index 74f70a1..db524ab3 100644 --- a/arch/um/sys-i386/ksyms.c +++ b/arch/um/sys-i386/ksyms.c @@ -2,6 +2,7 @@ #include "linux/in6.h" #include "linux/rwsem.h" #include "asm/byteorder.h" +#include "asm/delay.h" #include "asm/semaphore.h" #include "asm/uaccess.h" #include "asm/checksum.h" @@ -13,5 +14,8 @@ EXPORT_SYMBOL(__down_failed_trylock); EXPORT_SYMBOL(__up_wakeup); /* Networking helper routines. */ -EXPORT_SYMBOL(csum_partial_copy_from); -EXPORT_SYMBOL(csum_partial_copy_to); +EXPORT_SYMBOL(csum_partial); + +/* delay core functions */ +EXPORT_SYMBOL(__const_udelay); +EXPORT_SYMBOL(__udelay); diff --git a/arch/um/sys-i386/ldt.c b/arch/um/sys-i386/ldt.c index 31bcb2f..dc755b0 100644 --- a/arch/um/sys-i386/ldt.c +++ b/arch/um/sys-i386/ldt.c @@ -25,7 +25,7 @@ int sys_modify_ldt_tt(int func, void __user *ptr, unsigned long bytecount) #endif #ifdef CONFIG_MODE_SKAS -extern int userspace_pid; +extern int userspace_pid[]; #include "skas_ptrace.h" @@ -56,7 +56,8 @@ int sys_modify_ldt_skas(int func, void __user *ptr, unsigned long bytecount) ldt = ((struct ptrace_ldt) { .func = func, .ptr = buf, .bytecount = bytecount }); - res = ptrace(PTRACE_LDT, userspace_pid, 0, (unsigned long) &ldt); +#warning Need to look up userspace_pid by cpu + res = ptrace(PTRACE_LDT, userspace_pid[0], 0, (unsigned long) &ldt); if(res < 0) goto out; diff --git a/arch/um/sys-i386/ptrace.c b/arch/um/sys-i386/ptrace.c index e470d28..e839ce6 100644 --- a/arch/um/sys-i386/ptrace.c +++ b/arch/um/sys-i386/ptrace.c @@ -73,6 +73,25 @@ int putreg(struct task_struct *child, int regno, unsigned long value) return 0; } +int poke_user(struct task_struct *child, long addr, long data) +{ + if ((addr & 3) || addr < 0) + return -EIO; + + if (addr < MAX_REG_OFFSET) + return putreg(child, addr, data); + + else if((addr >= offsetof(struct user, u_debugreg[0])) && + (addr <= offsetof(struct user, u_debugreg[7]))){ + addr -= offsetof(struct user, u_debugreg[0]); + addr = addr >> 2; + if((addr == 4) || (addr == 5)) return -EIO; + child->thread.arch.debugregs[addr] = data; + return 0; + } + return -EIO; +} + unsigned long getreg(struct task_struct *child, int regno) { unsigned long retval = ~0UL; @@ -93,6 +112,27 @@ unsigned long getreg(struct task_struct *child, int regno) return retval; } +int peek_user(struct task_struct *child, long addr, long data) +{ +/* read the word at location addr in the USER area. */ + unsigned long tmp; + + if ((addr & 3) || addr < 0) + return -EIO; + + tmp = 0; /* Default return condition */ + if(addr < MAX_REG_OFFSET){ + tmp = getreg(child, addr); + } + else if((addr >= offsetof(struct user, u_debugreg[0])) && + (addr <= offsetof(struct user, u_debugreg[7]))){ + addr -= offsetof(struct user, u_debugreg[0]); + addr = addr >> 2; + tmp = child->thread.arch.debugregs[addr]; + } + return put_user(tmp, (unsigned long *) data); +} + struct i387_fxsave_struct { unsigned short cwd; unsigned short swd; diff --git a/arch/um/sys-i386/signal.c b/arch/um/sys-i386/signal.c index 76ba872..03913ca 100644 --- a/arch/um/sys-i386/signal.c +++ b/arch/um/sys-i386/signal.c @@ -47,9 +47,6 @@ static int copy_sc_from_user_skas(struct pt_regs *regs, REGS_CS(regs->regs.skas.regs) = sc.cs; REGS_EFLAGS(regs->regs.skas.regs) = sc.eflags; REGS_SS(regs->regs.skas.regs) = sc.ss; - regs->regs.skas.fault_addr = sc.cr2; - regs->regs.skas.fault_type = FAULT_WRITE(sc.err); - regs->regs.skas.trap_type = sc.trapno; err = restore_fp_registers(userspace_pid[0], fpregs); if(err < 0){ @@ -62,11 +59,11 @@ static int copy_sc_from_user_skas(struct pt_regs *regs, } int copy_sc_to_user_skas(struct sigcontext *to, struct _fpstate *to_fp, - struct pt_regs *regs, unsigned long fault_addr, - int fault_type) + struct pt_regs *regs) { struct sigcontext sc; unsigned long fpregs[HOST_FP_SIZE]; + struct faultinfo * fi = ¤t->thread.arch.faultinfo; int err; sc.gs = REGS_GS(regs->regs.skas.regs); @@ -86,9 +83,9 @@ int copy_sc_to_user_skas(struct sigcontext *to, struct _fpstate *to_fp, sc.eflags = REGS_EFLAGS(regs->regs.skas.regs); sc.esp_at_signal = regs->regs.skas.regs[UESP]; sc.ss = regs->regs.skas.regs[SS]; - sc.cr2 = fault_addr; - sc.err = TO_SC_ERR(fault_type); - sc.trapno = regs->regs.skas.trap_type; + sc.cr2 = fi->cr2; + sc.err = fi->error_code; + sc.trapno = fi->trap_no; err = save_fp_registers(userspace_pid[0], fpregs); if(err < 0){ @@ -167,9 +164,7 @@ static int copy_sc_to_user(struct sigcontext *to, struct _fpstate *fp, { return(CHOOSE_MODE(copy_sc_to_user_tt(to, fp, UPT_SC(&from->regs), sizeof(*fp)), - copy_sc_to_user_skas(to, fp, from, - current->thread.cr2, - current->thread.err))); + copy_sc_to_user_skas(to, fp, from))); } static int copy_ucontext_to_user(struct ucontext *uc, struct _fpstate *fp, diff --git a/arch/um/sys-i386/sysrq.c b/arch/um/sys-i386/sysrq.c index 281fc7b..e3706d1 100644 --- a/arch/um/sys-i386/sysrq.c +++ b/arch/um/sys-i386/sysrq.c @@ -3,12 +3,15 @@ * Licensed under the GPL */ +#include "linux/config.h" #include "linux/kernel.h" #include "linux/smp.h" #include "linux/sched.h" +#include "linux/kallsyms.h" #include "asm/ptrace.h" #include "sysrq.h" +/* This is declared by <linux/sched.h> */ void show_regs(struct pt_regs *regs) { printk("\n"); @@ -31,5 +34,80 @@ void show_regs(struct pt_regs *regs) 0xffff & PT_REGS_DS(regs), 0xffff & PT_REGS_ES(regs)); - show_trace((unsigned long *) ®s); + show_trace(NULL, (unsigned long *) ®s); } + +/* Copied from i386. */ +static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) +{ + return p > (void *)tinfo && + p < (void *)tinfo + THREAD_SIZE - 3; +} + +/* Adapted from i386 (we also print the address we read from). */ +static inline unsigned long print_context_stack(struct thread_info *tinfo, + unsigned long *stack, unsigned long ebp) +{ + unsigned long addr; + +#ifdef CONFIG_FRAME_POINTER + while (valid_stack_ptr(tinfo, (void *)ebp)) { + addr = *(unsigned long *)(ebp + 4); + printk("%08lx: [<%08lx>]", ebp + 4, addr); + print_symbol(" %s", addr); + printk("\n"); + ebp = *(unsigned long *)ebp; + } +#else + while (valid_stack_ptr(tinfo, stack)) { + addr = *stack; + if (__kernel_text_address(addr)) { + printk("%08lx: [<%08lx>]", (unsigned long) stack, addr); + print_symbol(" %s", addr); + printk("\n"); + } + stack++; + } +#endif + return ebp; +} + +void show_trace(struct task_struct* task, unsigned long * stack) +{ + unsigned long ebp; + struct thread_info *context; + + /* Turn this into BUG_ON if possible. */ + if (!stack) { + stack = (unsigned long*) &stack; + printk("show_trace: got NULL stack, implicit assumption task == current"); + WARN_ON(1); + } + + if (!task) + task = current; + + if (task != current) { + //ebp = (unsigned long) KSTK_EBP(task); + /* Which one? No actual difference - just coding style.*/ + ebp = (unsigned long) PT_REGS_EBP(&task->thread.regs); + } else { + asm ("movl %%ebp, %0" : "=r" (ebp) : ); + } + + context = (struct thread_info *) + ((unsigned long)stack & (~(THREAD_SIZE - 1))); + print_context_stack(context, stack, ebp); + + /*while (((long) stack & (THREAD_SIZE-1)) != 0) { + addr = *stack; + if (__kernel_text_address(addr)) { + printk("%08lx: [<%08lx>]", (unsigned long) stack, addr); + print_symbol(" %s", addr); + printk("\n"); + } + stack++; + }*/ + printk("\n"); +} + diff --git a/arch/um/sys-i386/user-offsets.c b/arch/um/sys-i386/user-offsets.c new file mode 100644 index 0000000..3ceaabc --- /dev/null +++ b/arch/um/sys-i386/user-offsets.c @@ -0,0 +1,69 @@ +#include <stdio.h> +#include <signal.h> +#include <asm/ptrace.h> +#include <asm/user.h> +#include <linux/stddef.h> + +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val)) + +#define OFFSET(sym, str, mem) \ + DEFINE(sym, offsetof(struct str, mem)); + +void foo(void) +{ + OFFSET(SC_IP, sigcontext, eip); + OFFSET(SC_SP, sigcontext, esp); + OFFSET(SC_FS, sigcontext, fs); + OFFSET(SC_GS, sigcontext, gs); + OFFSET(SC_DS, sigcontext, ds); + OFFSET(SC_ES, sigcontext, es); + OFFSET(SC_SS, sigcontext, ss); + OFFSET(SC_CS, sigcontext, cs); + OFFSET(SC_EFLAGS, sigcontext, eflags); + OFFSET(SC_EAX, sigcontext, eax); + OFFSET(SC_EBX, sigcontext, ebx); + OFFSET(SC_ECX, sigcontext, ecx); + OFFSET(SC_EDX, sigcontext, edx); + OFFSET(SC_EDI, sigcontext, edi); + OFFSET(SC_ESI, sigcontext, esi); + OFFSET(SC_EBP, sigcontext, ebp); + OFFSET(SC_TRAPNO, sigcontext, trapno); + OFFSET(SC_ERR, sigcontext, err); + OFFSET(SC_CR2, sigcontext, cr2); + OFFSET(SC_FPSTATE, sigcontext, fpstate); + OFFSET(SC_SIGMASK, sigcontext, oldmask); + OFFSET(SC_FP_CW, _fpstate, cw); + OFFSET(SC_FP_SW, _fpstate, sw); + OFFSET(SC_FP_TAG, _fpstate, tag); + OFFSET(SC_FP_IPOFF, _fpstate, ipoff); + OFFSET(SC_FP_CSSEL, _fpstate, cssel); + OFFSET(SC_FP_DATAOFF, _fpstate, dataoff); + OFFSET(SC_FP_DATASEL, _fpstate, datasel); + OFFSET(SC_FP_ST, _fpstate, _st); + OFFSET(SC_FXSR_ENV, _fpstate, _fxsr_env); + + DEFINE(HOST_FRAME_SIZE, FRAME_SIZE); + DEFINE(HOST_FP_SIZE, + sizeof(struct user_i387_struct) / sizeof(unsigned long)); + DEFINE(HOST_XFP_SIZE, + sizeof(struct user_fxsr_struct) / sizeof(unsigned long)); + + DEFINE(HOST_IP, EIP); + DEFINE(HOST_SP, UESP); + DEFINE(HOST_EFLAGS, EFL); + DEFINE(HOST_EAX, EAX); + DEFINE(HOST_EBX, EBX); + DEFINE(HOST_ECX, ECX); + DEFINE(HOST_EDX, EDX); + DEFINE(HOST_ESI, ESI); + DEFINE(HOST_EDI, EDI); + DEFINE(HOST_EBP, EBP); + DEFINE(HOST_CS, CS); + DEFINE(HOST_SS, SS); + DEFINE(HOST_DS, DS); + DEFINE(HOST_FS, FS); + DEFINE(HOST_ES, ES); + DEFINE(HOST_GS, GS); + DEFINE(__UM_FRAME_SIZE, sizeof(struct user_regs_struct)); +} diff --git a/arch/um/sys-i386/util/Makefile b/arch/um/sys-i386/util/Makefile index 34860f9..bf61afd 100644 --- a/arch/um/sys-i386/util/Makefile +++ b/arch/um/sys-i386/util/Makefile @@ -1,8 +1,5 @@ - hostprogs-y := mk_sc mk_thread always := $(hostprogs-y) -mk_thread-objs := mk_thread_kern.o mk_thread_user.o - -HOSTCFLAGS_mk_thread_kern.o := $(CFLAGS) $(CPPFLAGS) -HOSTCFLAGS_mk_thread_user.o := $(USER_CFLAGS) +HOSTCFLAGS_mk_sc.o := -I$(objtree)/arch/um +HOSTCFLAGS_mk_thread.o := -I$(objtree)/arch/um diff --git a/arch/um/sys-i386/util/mk_sc.c b/arch/um/sys-i386/util/mk_sc.c index 85cbd30..04c0d73 100644 --- a/arch/um/sys-i386/util/mk_sc.c +++ b/arch/um/sys-i386/util/mk_sc.c @@ -1,52 +1,51 @@ #include <stdio.h> -#include <signal.h> -#include <linux/stddef.h> +#include <user-offsets.h> #define SC_OFFSET(name, field) \ - printf("#define " name "(sc) *((unsigned long *) &(((char *) (sc))[%d]))\n",\ - offsetof(struct sigcontext, field)) + printf("#define " #name "(sc) *((unsigned long *) &(((char *) (sc))[%d]))\n",\ + name) #define SC_FP_OFFSET(name, field) \ - printf("#define " name \ + printf("#define " #name \ "(sc) *((unsigned long *) &(((char *) (SC_FPSTATE(sc)))[%d]))\n",\ - offsetof(struct _fpstate, field)) + name) #define SC_FP_OFFSET_PTR(name, field, type) \ - printf("#define " name \ + printf("#define " #name \ "(sc) ((" type " *) &(((char *) (SC_FPSTATE(sc)))[%d]))\n",\ - offsetof(struct _fpstate, field)) + name) int main(int argc, char **argv) { - SC_OFFSET("SC_IP", eip); - SC_OFFSET("SC_SP", esp); - SC_OFFSET("SC_FS", fs); - SC_OFFSET("SC_GS", gs); - SC_OFFSET("SC_DS", ds); - SC_OFFSET("SC_ES", es); - SC_OFFSET("SC_SS", ss); - SC_OFFSET("SC_CS", cs); - SC_OFFSET("SC_EFLAGS", eflags); - SC_OFFSET("SC_EAX", eax); - SC_OFFSET("SC_EBX", ebx); - SC_OFFSET("SC_ECX", ecx); - SC_OFFSET("SC_EDX", edx); - SC_OFFSET("SC_EDI", edi); - SC_OFFSET("SC_ESI", esi); - SC_OFFSET("SC_EBP", ebp); - SC_OFFSET("SC_TRAPNO", trapno); - SC_OFFSET("SC_ERR", err); - SC_OFFSET("SC_CR2", cr2); - SC_OFFSET("SC_FPSTATE", fpstate); - SC_OFFSET("SC_SIGMASK", oldmask); - SC_FP_OFFSET("SC_FP_CW", cw); - SC_FP_OFFSET("SC_FP_SW", sw); - SC_FP_OFFSET("SC_FP_TAG", tag); - SC_FP_OFFSET("SC_FP_IPOFF", ipoff); - SC_FP_OFFSET("SC_FP_CSSEL", cssel); - SC_FP_OFFSET("SC_FP_DATAOFF", dataoff); - SC_FP_OFFSET("SC_FP_DATASEL", datasel); - SC_FP_OFFSET_PTR("SC_FP_ST", _st, "struct _fpstate"); - SC_FP_OFFSET_PTR("SC_FXSR_ENV", _fxsr_env, "void"); + SC_OFFSET(SC_IP, eip); + SC_OFFSET(SC_SP, esp); + SC_OFFSET(SC_FS, fs); + SC_OFFSET(SC_GS, gs); + SC_OFFSET(SC_DS, ds); + SC_OFFSET(SC_ES, es); + SC_OFFSET(SC_SS, ss); + SC_OFFSET(SC_CS, cs); + SC_OFFSET(SC_EFLAGS, eflags); + SC_OFFSET(SC_EAX, eax); + SC_OFFSET(SC_EBX, ebx); + SC_OFFSET(SC_ECX, ecx); + SC_OFFSET(SC_EDX, edx); + SC_OFFSET(SC_EDI, edi); + SC_OFFSET(SC_ESI, esi); + SC_OFFSET(SC_EBP, ebp); + SC_OFFSET(SC_TRAPNO, trapno); + SC_OFFSET(SC_ERR, err); + SC_OFFSET(SC_CR2, cr2); + SC_OFFSET(SC_FPSTATE, fpstate); + SC_OFFSET(SC_SIGMASK, oldmask); + SC_FP_OFFSET(SC_FP_CW, cw); + SC_FP_OFFSET(SC_FP_SW, sw); + SC_FP_OFFSET(SC_FP_TAG, tag); + SC_FP_OFFSET(SC_FP_IPOFF, ipoff); + SC_FP_OFFSET(SC_FP_CSSEL, cssel); + SC_FP_OFFSET(SC_FP_DATAOFF, dataoff); + SC_FP_OFFSET(SC_FP_DATASEL, datasel); + SC_FP_OFFSET_PTR(SC_FP_ST, _st, "struct _fpstate"); + SC_FP_OFFSET_PTR(SC_FXSR_ENV, _fxsr_env, "void"); return(0); } diff --git a/arch/um/sys-i386/util/mk_thread.c b/arch/um/sys-i386/util/mk_thread.c new file mode 100644 index 0000000..7470d0d --- /dev/null +++ b/arch/um/sys-i386/util/mk_thread.c @@ -0,0 +1,22 @@ +#include <stdio.h> +#include <kernel-offsets.h> + +int main(int argc, char **argv) +{ + printf("/*\n"); + printf(" * Generated by mk_thread\n"); + printf(" */\n"); + printf("\n"); + printf("#ifndef __UM_THREAD_H\n"); + printf("#define __UM_THREAD_H\n"); + printf("\n"); + printf("#define TASK_DEBUGREGS(task) ((unsigned long *) " + "&(((char *) (task))[%d]))\n", TASK_DEBUGREGS); +#ifdef TASK_EXTERN_PID + printf("#define TASK_EXTERN_PID(task) *((int *) &(((char *) (task))[%d]))\n", + TASK_EXTERN_PID); +#endif + printf("\n"); + printf("#endif\n"); + return(0); +} diff --git a/arch/um/sys-i386/util/mk_thread_kern.c b/arch/um/sys-i386/util/mk_thread_kern.c deleted file mode 100644 index 948b1ce..0000000 --- a/arch/um/sys-i386/util/mk_thread_kern.c +++ /dev/null @@ -1,22 +0,0 @@ -#include "linux/config.h" -#include "linux/stddef.h" -#include "linux/sched.h" - -extern void print_head(void); -extern void print_constant_ptr(char *name, int value); -extern void print_constant(char *name, char *type, int value); -extern void print_tail(void); - -#define THREAD_OFFSET(field) offsetof(struct task_struct, thread.field) - -int main(int argc, char **argv) -{ - print_head(); - print_constant_ptr("TASK_DEBUGREGS", THREAD_OFFSET(arch.debugregs)); -#ifdef CONFIG_MODE_TT - print_constant("TASK_EXTERN_PID", "int", THREAD_OFFSET(mode.tt.extern_pid)); -#endif - print_tail(); - return(0); -} - diff --git a/arch/um/sys-i386/util/mk_thread_user.c b/arch/um/sys-i386/util/mk_thread_user.c deleted file mode 100644 index 2620cd6..0000000 --- a/arch/um/sys-i386/util/mk_thread_user.c +++ /dev/null @@ -1,30 +0,0 @@ -#include <stdio.h> - -void print_head(void) -{ - printf("/*\n"); - printf(" * Generated by mk_thread\n"); - printf(" */\n"); - printf("\n"); - printf("#ifndef __UM_THREAD_H\n"); - printf("#define __UM_THREAD_H\n"); - printf("\n"); -} - -void print_constant_ptr(char *name, int value) -{ - printf("#define %s(task) ((unsigned long *) " - "&(((char *) (task))[%d]))\n", name, value); -} - -void print_constant(char *name, char *type, int value) -{ - printf("#define %s(task) *((%s *) &(((char *) (task))[%d]))\n", name, type, - value); -} - -void print_tail(void) -{ - printf("\n"); - printf("#endif\n"); -} diff --git a/arch/um/sys-ppc/ptrace.c b/arch/um/sys-ppc/ptrace.c index a971366d..8e71b47 100644 --- a/arch/um/sys-ppc/ptrace.c +++ b/arch/um/sys-ppc/ptrace.c @@ -8,6 +8,25 @@ int putreg(struct task_struct *child, unsigned long regno, return 0; } +int poke_user(struct task_struct *child, long addr, long data) +{ + if ((addr & 3) || addr < 0) + return -EIO; + + if (addr < MAX_REG_OFFSET) + return putreg(child, addr, data); + + else if((addr >= offsetof(struct user, u_debugreg[0])) && + (addr <= offsetof(struct user, u_debugreg[7]))){ + addr -= offsetof(struct user, u_debugreg[0]); + addr = addr >> 2; + if((addr == 4) || (addr == 5)) return -EIO; + child->thread.arch.debugregs[addr] = data; + return 0; + } + return -EIO; +} + unsigned long getreg(struct task_struct *child, unsigned long regno) { unsigned long retval = ~0UL; @@ -16,6 +35,27 @@ unsigned long getreg(struct task_struct *child, unsigned long regno) return retval; } +int peek_user(struct task_struct *child, long addr, long data) +{ + /* read the word at location addr in the USER area. */ + unsigned long tmp; + + if ((addr & 3) || addr < 0) + return -EIO; + + tmp = 0; /* Default return condition */ + if(addr < MAX_REG_OFFSET){ + tmp = getreg(child, addr); + } + else if((addr >= offsetof(struct user, u_debugreg[0])) && + (addr <= offsetof(struct user, u_debugreg[7]))){ + addr -= offsetof(struct user, u_debugreg[0]); + addr = addr >> 2; + tmp = child->thread.arch.debugregs[addr]; + } + return put_user(tmp, (unsigned long *) data); +} + /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically diff --git a/arch/um/sys-ppc/sysrq.c b/arch/um/sys-ppc/sysrq.c index 82d6e93..2f816f1 100644 --- a/arch/um/sys-ppc/sysrq.c +++ b/arch/um/sys-ppc/sysrq.c @@ -27,17 +27,5 @@ void show_regs(struct pt_regs_subarch *regs) 0xffff & regs->xds, 0xffff & regs->xes); #endif - show_trace(®s->gpr[1]); + show_trace(current, ®s->gpr[1]); } - - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile index d7ed2f7..608466a 100644 --- a/arch/um/sys-x86_64/Makefile +++ b/arch/um/sys-x86_64/Makefile @@ -4,24 +4,20 @@ # Licensed under the GPL # +#XXX: why into lib-y? lib-y = bitops.o bugs.o csum-partial.o delay.o fault.o mem.o memcpy.o \ ptrace.o ptrace_user.o semaphore.o sigcontext.o signal.o \ syscalls.o sysrq.o thunk.o syscall_table.o -USER_OBJS := ptrace_user.o sigcontext.o +obj-y := ksyms.o +obj-$(CONFIG_MODULES) += module.o um_module.o -include arch/um/scripts/Makefile.rules +USER_OBJS := ptrace_user.o sigcontext.o SYMLINKS = bitops.c csum-copy.S csum-partial.c csum-wrappers.c memcpy.S \ - semaphore.c thunk.S - -# this needs to be before the foreach, because clean-files does not accept -# complete paths like $(src)/$f. -clean-files := $(SYMLINKS) + semaphore.c thunk.S module.c -targets += $(SYMLINKS) - -SYMLINKS := $(foreach f,$(SYMLINKS),$(obj)/$f) +include arch/um/scripts/Makefile.rules bitops.c-dir = lib csum-copy.S-dir = lib @@ -30,8 +26,6 @@ csum-wrappers.c-dir = lib memcpy.S-dir = lib semaphore.c-dir = kernel thunk.S-dir = lib +module.c-dir = kernel -$(SYMLINKS): FORCE - $(call if_changed,make_link) - -CFLAGS_csum-partial.o := -Dcsum_partial=arch_csum_partial +subdir- := util diff --git a/arch/um/sys-x86_64/delay.c b/arch/um/sys-x86_64/delay.c index f3b5187..137f444 100644 --- a/arch/um/sys-x86_64/delay.c +++ b/arch/um/sys-x86_64/delay.c @@ -5,22 +5,37 @@ * Licensed under the GPL */ -#include "asm/processor.h" +#include <linux/module.h> +#include <linux/delay.h> +#include <asm/processor.h> +#include <asm/param.h> void __delay(unsigned long loops) { unsigned long i; - for(i = 0; i < loops; i++) ; + for(i = 0; i < loops; i++) + cpu_relax(); } -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ +void __udelay(unsigned long usecs) +{ + unsigned long i, n; + + n = (loops_per_jiffy * HZ * usecs) / MILLION; + for(i=0;i<n;i++) + cpu_relax(); +} + +EXPORT_SYMBOL(__udelay); + +void __const_udelay(unsigned long usecs) +{ + unsigned long i, n; + + n = (loops_per_jiffy * HZ * usecs) / MILLION; + for(i=0;i<n;i++) + cpu_relax(); +} + +EXPORT_SYMBOL(__const_udelay); diff --git a/arch/um/sys-x86_64/kernel-offsets.c b/arch/um/sys-x86_64/kernel-offsets.c new file mode 100644 index 0000000..220e875 --- /dev/null +++ b/arch/um/sys-x86_64/kernel-offsets.c @@ -0,0 +1,24 @@ +#include <linux/config.h> +#include <linux/stddef.h> +#include <linux/sched.h> +#include <linux/time.h> +#include <asm/page.h> + +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val)) + +#define DEFINE_STR1(x) #x +#define DEFINE_STR(sym, val) asm volatile("\n->" #sym " " DEFINE_STR1(val) " " #val: : ) + +#define BLANK() asm volatile("\n->" : : ) + +#define OFFSET(sym, str, mem) \ + DEFINE(sym, offsetof(struct str, mem)); + +void foo(void) +{ +#ifdef CONFIG_MODE_TT + OFFSET(TASK_EXTERN_PID, task_struct, thread.mode.tt.extern_pid); +#endif +#include <common-offsets.h> +} diff --git a/arch/um/sys-x86_64/ksyms.c b/arch/um/sys-x86_64/ksyms.c new file mode 100644 index 0000000..8592738 --- /dev/null +++ b/arch/um/sys-x86_64/ksyms.c @@ -0,0 +1,19 @@ +#include "linux/module.h" +#include "linux/in6.h" +#include "linux/rwsem.h" +#include "asm/byteorder.h" +#include "asm/semaphore.h" +#include "asm/uaccess.h" +#include "asm/checksum.h" +#include "asm/errno.h" + +EXPORT_SYMBOL(__down_failed); +EXPORT_SYMBOL(__down_failed_interruptible); +EXPORT_SYMBOL(__down_failed_trylock); +EXPORT_SYMBOL(__up_wakeup); + +/*XXX: we need them because they would be exported by x86_64 */ +EXPORT_SYMBOL(__memcpy); + +/* Networking helper routines. */ +EXPORT_SYMBOL(ip_compute_csum); diff --git a/arch/um/sys-x86_64/ptrace.c b/arch/um/sys-x86_64/ptrace.c index 8c146b2..74eee5c 100644 --- a/arch/um/sys-x86_64/ptrace.c +++ b/arch/um/sys-x86_64/ptrace.c @@ -5,10 +5,11 @@ */ #define __FRAME_OFFSETS -#include "asm/ptrace.h" -#include "linux/sched.h" -#include "linux/errno.h" -#include "asm/elf.h" +#include <asm/ptrace.h> +#include <linux/sched.h> +#include <linux/errno.h> +#include <asm/uaccess.h> +#include <asm/elf.h> /* XXX x86_64 */ unsigned long not_ss; @@ -62,6 +63,27 @@ int putreg(struct task_struct *child, int regno, unsigned long value) return 0; } +int poke_user(struct task_struct *child, long addr, long data) +{ + if ((addr & 3) || addr < 0) + return -EIO; + + if (addr < MAX_REG_OFFSET) + return putreg(child, addr, data); + +#if 0 /* Need x86_64 debugregs handling */ + else if((addr >= offsetof(struct user, u_debugreg[0])) && + (addr <= offsetof(struct user, u_debugreg[7]))){ + addr -= offsetof(struct user, u_debugreg[0]); + addr = addr >> 2; + if((addr == 4) || (addr == 5)) return -EIO; + child->thread.arch.debugregs[addr] = data; + return 0; + } +#endif + return -EIO; +} + unsigned long getreg(struct task_struct *child, int regno) { unsigned long retval = ~0UL; @@ -84,6 +106,29 @@ unsigned long getreg(struct task_struct *child, int regno) return retval; } +int peek_user(struct task_struct *child, long addr, long data) +{ + /* read the word at location addr in the USER area. */ + unsigned long tmp; + + if ((addr & 3) || addr < 0) + return -EIO; + + tmp = 0; /* Default return condition */ + if(addr < MAX_REG_OFFSET){ + tmp = getreg(child, addr); + } +#if 0 /* Need x86_64 debugregs handling */ + else if((addr >= offsetof(struct user, u_debugreg[0])) && + (addr <= offsetof(struct user, u_debugreg[7]))){ + addr -= offsetof(struct user, u_debugreg[0]); + addr = addr >> 2; + tmp = child->thread.arch.debugregs[addr]; + } +#endif + return put_user(tmp, (unsigned long *) data); +} + void arch_switch(void) { /* XXX diff --git a/arch/um/sys-x86_64/signal.c b/arch/um/sys-x86_64/signal.c index 5bc5a0d..73a7926 100644 --- a/arch/um/sys-x86_64/signal.c +++ b/arch/um/sys-x86_64/signal.c @@ -57,7 +57,7 @@ static int copy_sc_from_user_skas(struct pt_regs *regs, int copy_sc_to_user_skas(struct sigcontext *to, struct _fpstate *to_fp, struct pt_regs *regs, unsigned long mask) { - unsigned long eflags; + struct faultinfo * fi = ¤t->thread.arch.faultinfo; int err = 0; err |= __put_user(0, &to->gs); @@ -84,14 +84,16 @@ int copy_sc_to_user_skas(struct sigcontext *to, struct _fpstate *to_fp, err |= PUTREG(regs, R14, to, r14); err |= PUTREG(regs, R15, to, r15); err |= PUTREG(regs, CS, to, cs); /* XXX x86_64 doesn't do this */ - err |= __put_user(current->thread.err, &to->err); - err |= __put_user(current->thread.trap_no, &to->trapno); + + err |= __put_user(fi->cr2, &to->cr2); + err |= __put_user(fi->error_code, &to->err); + err |= __put_user(fi->trap_no, &to->trapno); + err |= PUTREG(regs, RIP, to, rip); err |= PUTREG(regs, EFLAGS, to, eflags); #undef PUTREG err |= __put_user(mask, &to->oldmask); - err |= __put_user(current->thread.cr2, &to->cr2); return(err); } @@ -166,7 +168,7 @@ int setup_signal_stack_si(unsigned long stack_top, int sig, frame = (struct rt_sigframe __user *) round_down(stack_top - sizeof(struct rt_sigframe), 16) - 8; - frame -= 128; + ((unsigned char *) frame) -= 128; if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate))) goto out; diff --git a/arch/um/sys-x86_64/syscalls.c b/arch/um/sys-x86_64/syscalls.c index ab4b0ab..6f44f40 100644 --- a/arch/um/sys-x86_64/syscalls.c +++ b/arch/um/sys-x86_64/syscalls.c @@ -7,12 +7,15 @@ #include "linux/linkage.h" #include "linux/slab.h" #include "linux/shm.h" +#include "linux/utsname.h" +#include "linux/personality.h" #include "asm/uaccess.h" #define __FRAME_OFFSETS #include "asm/ptrace.h" #include "asm/unistd.h" #include "asm/prctl.h" /* XXX This should get the constants from libc */ #include "choose-mode.h" +#include "kern.h" asmlinkage long sys_uname64(struct new_utsname __user * name) { @@ -42,6 +45,8 @@ long sys_modify_ldt_tt(int func, void *ptr, unsigned long bytecount) #ifdef CONFIG_MODE_SKAS extern int userspace_pid[]; +#include "skas_ptrace.h" + long sys_modify_ldt_skas(int func, void *ptr, unsigned long bytecount) { struct ptrace_ldt ldt; @@ -128,23 +133,27 @@ static long arch_prctl_tt(int code, unsigned long addr) #ifdef CONFIG_MODE_SKAS +/* XXX: Must also call arch_prctl in the host, beside saving the segment bases! */ static long arch_prctl_skas(int code, unsigned long addr) { long ret = 0; switch(code){ - case ARCH_SET_GS: - current->thread.regs.regs.skas.regs[GS_BASE / sizeof(unsigned long)] = addr; - break; case ARCH_SET_FS: current->thread.regs.regs.skas.regs[FS_BASE / sizeof(unsigned long)] = addr; break; + case ARCH_SET_GS: + current->thread.regs.regs.skas.regs[GS_BASE / sizeof(unsigned long)] = addr; + break; case ARCH_GET_FS: - ret = put_user(current->thread.regs.regs.skas.regs[GS / sizeof(unsigned long)], &addr); + ret = put_user(current->thread.regs.regs.skas. + regs[FS_BASE / sizeof(unsigned long)], + (unsigned long __user *)addr); break; case ARCH_GET_GS: - ret = put_user(current->thread.regs.regs.skas.regs[FS / sizeof(unsigned \ -long)], &addr); + ret = put_user(current->thread.regs.regs.skas. + regs[GS_BASE / sizeof(unsigned long)], + (unsigned long __user *)addr); break; default: ret = -EINVAL; diff --git a/arch/um/sys-x86_64/sysrq.c b/arch/um/sys-x86_64/sysrq.c index ddf7469..d0a25af 100644 --- a/arch/um/sys-x86_64/sysrq.c +++ b/arch/um/sys-x86_64/sysrq.c @@ -36,14 +36,5 @@ void __show_regs(struct pt_regs * regs) void show_regs(struct pt_regs *regs) { __show_regs(regs); - show_trace((unsigned long *) ®s); + show_trace(current, (unsigned long *) ®s); } - -/* Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/sys-x86_64/um_module.c b/arch/um/sys-x86_64/um_module.c new file mode 100644 index 0000000..8b8eff1 --- /dev/null +++ b/arch/um/sys-x86_64/um_module.c @@ -0,0 +1,19 @@ +#include <linux/vmalloc.h> +#include <linux/moduleloader.h> + +/*Copied from i386 arch/i386/kernel/module.c */ +void *module_alloc(unsigned long size) +{ + if (size == 0) + return NULL; + return vmalloc_exec(size); +} + +/* Free memory returned from module_alloc */ +void module_free(struct module *mod, void *module_region) +{ + vfree(module_region); + /* FIXME: If module_region == mod->init_region, trim exception + table entries. */ +} + diff --git a/arch/um/sys-x86_64/user-offsets.c b/arch/um/sys-x86_64/user-offsets.c new file mode 100644 index 0000000..513d17c --- /dev/null +++ b/arch/um/sys-x86_64/user-offsets.c @@ -0,0 +1,86 @@ +#include <stdio.h> +#include <stddef.h> +#include <signal.h> +#define __FRAME_OFFSETS +#include <asm/ptrace.h> +#include <asm/types.h> +/* For some reason, x86_64 defines u64 and u32 only in <pci/types.h>, which I + * refuse to include here, even though they're used throughout the headers. + * These are used in asm/user.h, and that include can't be avoided because of + * the sizeof(struct user_regs_struct) below. + */ +typedef __u64 u64; +typedef __u32 u32; +#include <asm/user.h> + +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val)) + +#define OFFSET(sym, str, mem) \ + DEFINE(sym, offsetof(struct str, mem)); + +void foo(void) +{ + OFFSET(SC_RBX, sigcontext, rbx); + OFFSET(SC_RCX, sigcontext, rcx); + OFFSET(SC_RDX, sigcontext, rdx); + OFFSET(SC_RSI, sigcontext, rsi); + OFFSET(SC_RDI, sigcontext, rdi); + OFFSET(SC_RBP, sigcontext, rbp); + OFFSET(SC_RAX, sigcontext, rax); + OFFSET(SC_R8, sigcontext, r8); + OFFSET(SC_R9, sigcontext, r9); + OFFSET(SC_R10, sigcontext, r10); + OFFSET(SC_R11, sigcontext, r11); + OFFSET(SC_R12, sigcontext, r12); + OFFSET(SC_R13, sigcontext, r13); + OFFSET(SC_R14, sigcontext, r14); + OFFSET(SC_R15, sigcontext, r15); + OFFSET(SC_IP, sigcontext, rip); + OFFSET(SC_SP, sigcontext, rsp); + OFFSET(SC_CR2, sigcontext, cr2); + OFFSET(SC_ERR, sigcontext, err); + OFFSET(SC_TRAPNO, sigcontext, trapno); + OFFSET(SC_CS, sigcontext, cs); + OFFSET(SC_FS, sigcontext, fs); + OFFSET(SC_GS, sigcontext, gs); + OFFSET(SC_EFLAGS, sigcontext, eflags); + OFFSET(SC_SIGMASK, sigcontext, oldmask); +#if 0 + OFFSET(SC_ORIG_RAX, sigcontext, orig_rax); + OFFSET(SC_DS, sigcontext, ds); + OFFSET(SC_ES, sigcontext, es); + OFFSET(SC_SS, sigcontext, ss); +#endif + + DEFINE(HOST_FRAME_SIZE, FRAME_SIZE); + DEFINE(HOST_RBX, RBX); + DEFINE(HOST_RCX, RCX); + DEFINE(HOST_RDI, RDI); + DEFINE(HOST_RSI, RSI); + DEFINE(HOST_RDX, RDX); + DEFINE(HOST_RBP, RBP); + DEFINE(HOST_RAX, RAX); + DEFINE(HOST_R8, R8); + DEFINE(HOST_R9, R9); + DEFINE(HOST_R10, R10); + DEFINE(HOST_R11, R11); + DEFINE(HOST_R12, R12); + DEFINE(HOST_R13, R13); + DEFINE(HOST_R14, R14); + DEFINE(HOST_R15, R15); + DEFINE(HOST_ORIG_RAX, ORIG_RAX); + DEFINE(HOST_CS, CS); + DEFINE(HOST_SS, SS); + DEFINE(HOST_EFLAGS, EFLAGS); +#if 0 + DEFINE(HOST_FS, FS); + DEFINE(HOST_GS, GS); + DEFINE(HOST_DS, DS); + DEFINE(HOST_ES, ES); +#endif + + DEFINE(HOST_IP, RIP); + DEFINE(HOST_SP, RSP); + DEFINE(__UM_FRAME_SIZE, sizeof(struct user_regs_struct)); +} diff --git a/arch/um/sys-x86_64/util/Makefile b/arch/um/sys-x86_64/util/Makefile index 0026079..75b052c 100644 --- a/arch/um/sys-x86_64/util/Makefile +++ b/arch/um/sys-x86_64/util/Makefile @@ -4,7 +4,5 @@ hostprogs-y := mk_sc mk_thread always := $(hostprogs-y) -mk_thread-objs := mk_thread_kern.o mk_thread_user.o - -HOSTCFLAGS_mk_thread_kern.o := $(CFLAGS) $(CPPFLAGS) -HOSTCFLAGS_mk_thread_user.o := $(USER_CFLAGS) +HOSTCFLAGS_mk_sc.o := -I$(objtree)/arch/um +HOSTCFLAGS_mk_thread.o := -I$(objtree)/arch/um diff --git a/arch/um/sys-x86_64/util/mk_sc.c b/arch/um/sys-x86_64/util/mk_sc.c index c236e21..7619bc3 100644 --- a/arch/um/sys-x86_64/util/mk_sc.c +++ b/arch/um/sys-x86_64/util/mk_sc.c @@ -3,56 +3,45 @@ */ #include <stdio.h> -#include <signal.h> -#include <linux/stddef.h> +#include <user-offsets.h> -#define SC_OFFSET(name, field) \ - printf("#define " name \ - "(sc) *((unsigned long *) &(((char *) (sc))[%ld]))\n",\ - offsetof(struct sigcontext, field)) - -#define SC_FP_OFFSET(name, field) \ - printf("#define " name \ - "(sc) *((unsigned long *) &(((char *) (SC_FPSTATE(sc)))[%ld]))\n",\ - offsetof(struct _fpstate, field)) - -#define SC_FP_OFFSET_PTR(name, field, type) \ - printf("#define " name \ - "(sc) ((" type " *) &(((char *) (SC_FPSTATE(sc)))[%d]))\n",\ - offsetof(struct _fpstate, field)) +#define SC_OFFSET(name) \ + printf("#define " #name \ + "(sc) *((unsigned long *) &(((char *) (sc))[%d]))\n",\ + name) int main(int argc, char **argv) { - SC_OFFSET("SC_RBX", rbx); - SC_OFFSET("SC_RCX", rcx); - SC_OFFSET("SC_RDX", rdx); - SC_OFFSET("SC_RSI", rsi); - SC_OFFSET("SC_RDI", rdi); - SC_OFFSET("SC_RBP", rbp); - SC_OFFSET("SC_RAX", rax); - SC_OFFSET("SC_R8", r8); - SC_OFFSET("SC_R9", r9); - SC_OFFSET("SC_R10", r10); - SC_OFFSET("SC_R11", r11); - SC_OFFSET("SC_R12", r12); - SC_OFFSET("SC_R13", r13); - SC_OFFSET("SC_R14", r14); - SC_OFFSET("SC_R15", r15); - SC_OFFSET("SC_IP", rip); - SC_OFFSET("SC_SP", rsp); - SC_OFFSET("SC_CR2", cr2); - SC_OFFSET("SC_ERR", err); - SC_OFFSET("SC_TRAPNO", trapno); - SC_OFFSET("SC_CS", cs); - SC_OFFSET("SC_FS", fs); - SC_OFFSET("SC_GS", gs); - SC_OFFSET("SC_EFLAGS", eflags); - SC_OFFSET("SC_SIGMASK", oldmask); + SC_OFFSET(SC_RBX); + SC_OFFSET(SC_RCX); + SC_OFFSET(SC_RDX); + SC_OFFSET(SC_RSI); + SC_OFFSET(SC_RDI); + SC_OFFSET(SC_RBP); + SC_OFFSET(SC_RAX); + SC_OFFSET(SC_R8); + SC_OFFSET(SC_R9); + SC_OFFSET(SC_R10); + SC_OFFSET(SC_R11); + SC_OFFSET(SC_R12); + SC_OFFSET(SC_R13); + SC_OFFSET(SC_R14); + SC_OFFSET(SC_R15); + SC_OFFSET(SC_IP); + SC_OFFSET(SC_SP); + SC_OFFSET(SC_CR2); + SC_OFFSET(SC_ERR); + SC_OFFSET(SC_TRAPNO); + SC_OFFSET(SC_CS); + SC_OFFSET(SC_FS); + SC_OFFSET(SC_GS); + SC_OFFSET(SC_EFLAGS); + SC_OFFSET(SC_SIGMASK); #if 0 - SC_OFFSET("SC_ORIG_RAX", orig_rax); - SC_OFFSET("SC_DS", ds); - SC_OFFSET("SC_ES", es); - SC_OFFSET("SC_SS", ss); + SC_OFFSET(SC_ORIG_RAX); + SC_OFFSET(SC_DS); + SC_OFFSET(SC_ES); + SC_OFFSET(SC_SS); #endif return(0); } diff --git a/arch/um/sys-x86_64/util/mk_thread.c b/arch/um/sys-x86_64/util/mk_thread.c new file mode 100644 index 0000000..1551739 --- /dev/null +++ b/arch/um/sys-x86_64/util/mk_thread.c @@ -0,0 +1,20 @@ +#include <stdio.h> +#include <kernel-offsets.h> + +int main(int argc, char **argv) +{ + printf("/*\n"); + printf(" * Generated by mk_thread\n"); + printf(" */\n"); + printf("\n"); + printf("#ifndef __UM_THREAD_H\n"); + printf("#define __UM_THREAD_H\n"); + printf("\n"); +#ifdef TASK_EXTERN_PID + printf("#define TASK_EXTERN_PID(task) *((int *) &(((char *) (task))[%d]))\n", + TASK_EXTERN_PID); +#endif + printf("\n"); + printf("#endif\n"); + return(0); +} diff --git a/arch/um/sys-x86_64/util/mk_thread_kern.c b/arch/um/sys-x86_64/util/mk_thread_kern.c deleted file mode 100644 index a281673..0000000 --- a/arch/um/sys-x86_64/util/mk_thread_kern.c +++ /dev/null @@ -1,21 +0,0 @@ -#include "linux/config.h" -#include "linux/stddef.h" -#include "linux/sched.h" - -extern void print_head(void); -extern void print_constant_ptr(char *name, int value); -extern void print_constant(char *name, char *type, int value); -extern void print_tail(void); - -#define THREAD_OFFSET(field) offsetof(struct task_struct, thread.field) - -int main(int argc, char **argv) -{ - print_head(); -#ifdef CONFIG_MODE_TT - print_constant("TASK_EXTERN_PID", "int", THREAD_OFFSET(mode.tt.extern_pid)); -#endif - print_tail(); - return(0); -} - diff --git a/arch/um/sys-x86_64/util/mk_thread_user.c b/arch/um/sys-x86_64/util/mk_thread_user.c deleted file mode 100644 index 7989725..0000000 --- a/arch/um/sys-x86_64/util/mk_thread_user.c +++ /dev/null @@ -1,30 +0,0 @@ -#include <stdio.h> - -void print_head(void) -{ - printf("/*\n"); - printf(" * Generated by mk_thread\n"); - printf(" */\n"); - printf("\n"); - printf("#ifndef __UM_THREAD_H\n"); - printf("#define __UM_THREAD_H\n"); - printf("\n"); -} - -void print_constant_ptr(char *name, int value) -{ - printf("#define %s(task) ((unsigned long *) " - "&(((char *) (task))[%d]))\n", name, value); -} - -void print_constant(char *name, char *type, int value) -{ - printf("#define %s(task) *((%s *) &(((char *) (task))[%d]))\n", name, type, - value); -} - -void print_tail(void) -{ - printf("\n"); - printf("#endif\n"); -} diff --git a/arch/um/util/Makefile b/arch/um/util/Makefile index e2ab712..4c7551c 100644 --- a/arch/um/util/Makefile +++ b/arch/um/util/Makefile @@ -1,8 +1,5 @@ hostprogs-y := mk_task mk_constants always := $(hostprogs-y) -mk_task-objs := mk_task_user.o mk_task_kern.o -mk_constants-objs := mk_constants_user.o mk_constants_kern.o - -HOSTCFLAGS_mk_task_kern.o := $(CFLAGS) $(CPPFLAGS) -HOSTCFLAGS_mk_constants_kern.o := $(CFLAGS) $(CPPFLAGS) +HOSTCFLAGS_mk_task.o := -I$(objtree)/arch/um +HOSTCFLAGS_mk_constants.o := -I$(objtree)/arch/um diff --git a/arch/um/util/mk_constants.c b/arch/um/util/mk_constants.c new file mode 100644 index 0000000..ab217be --- /dev/null +++ b/arch/um/util/mk_constants.c @@ -0,0 +1,32 @@ +#include <stdio.h> +#include <kernel-offsets.h> + +#define SHOW_INT(sym) printf("#define %s %d\n", #sym, sym) +#define SHOW_STR(sym) printf("#define %s %s\n", #sym, sym) + +int main(int argc, char **argv) +{ + printf("/*\n"); + printf(" * Generated by mk_constants\n"); + printf(" */\n"); + printf("\n"); + printf("#ifndef __UM_CONSTANTS_H\n"); + printf("#define __UM_CONSTANTS_H\n"); + printf("\n"); + + SHOW_INT(UM_KERN_PAGE_SIZE); + + SHOW_STR(UM_KERN_EMERG); + SHOW_STR(UM_KERN_ALERT); + SHOW_STR(UM_KERN_CRIT); + SHOW_STR(UM_KERN_ERR); + SHOW_STR(UM_KERN_WARNING); + SHOW_STR(UM_KERN_NOTICE); + SHOW_STR(UM_KERN_INFO); + SHOW_STR(UM_KERN_DEBUG); + + SHOW_INT(UM_NSEC_PER_SEC); + printf("\n"); + printf("#endif\n"); + return(0); +} diff --git a/arch/um/util/mk_constants_kern.c b/arch/um/util/mk_constants_kern.c deleted file mode 100644 index cdcb123..0000000 --- a/arch/um/util/mk_constants_kern.c +++ /dev/null @@ -1,28 +0,0 @@ -#include "linux/kernel.h" -#include "linux/stringify.h" -#include "linux/time.h" -#include "asm/page.h" - -extern void print_head(void); -extern void print_constant_str(char *name, char *value); -extern void print_constant_int(char *name, int value); -extern void print_tail(void); - -int main(int argc, char **argv) -{ - print_head(); - print_constant_int("UM_KERN_PAGE_SIZE", PAGE_SIZE); - - print_constant_str("UM_KERN_EMERG", KERN_EMERG); - print_constant_str("UM_KERN_ALERT", KERN_ALERT); - print_constant_str("UM_KERN_CRIT", KERN_CRIT); - print_constant_str("UM_KERN_ERR", KERN_ERR); - print_constant_str("UM_KERN_WARNING", KERN_WARNING); - print_constant_str("UM_KERN_NOTICE", KERN_NOTICE); - print_constant_str("UM_KERN_INFO", KERN_INFO); - print_constant_str("UM_KERN_DEBUG", KERN_DEBUG); - - print_constant_int("UM_NSEC_PER_SEC", NSEC_PER_SEC); - print_tail(); - return(0); -} diff --git a/arch/um/util/mk_constants_user.c b/arch/um/util/mk_constants_user.c deleted file mode 100644 index 8f4d7e5..0000000 --- a/arch/um/util/mk_constants_user.c +++ /dev/null @@ -1,28 +0,0 @@ -#include <stdio.h> - -void print_head(void) -{ - printf("/*\n"); - printf(" * Generated by mk_constants\n"); - printf(" */\n"); - printf("\n"); - printf("#ifndef __UM_CONSTANTS_H\n"); - printf("#define __UM_CONSTANTS_H\n"); - printf("\n"); -} - -void print_constant_str(char *name, char *value) -{ - printf("#define %s \"%s\"\n", name, value); -} - -void print_constant_int(char *name, int value) -{ - printf("#define %s %d\n", name, value); -} - -void print_tail(void) -{ - printf("\n"); - printf("#endif\n"); -} diff --git a/arch/um/util/mk_task_user.c b/arch/um/util/mk_task.c index 9db849f..36c9606 100644 --- a/arch/um/util/mk_task_user.c +++ b/arch/um/util/mk_task.c @@ -1,18 +1,19 @@ #include <stdio.h> +#include <kernel-offsets.h> -void print(char *name, char *type, int offset) +void print_ptr(char *name, char *type, int offset) { - printf("#define %s(task) *((%s *) &(((char *) (task))[%d]))\n", name, type, + printf("#define %s(task) ((%s *) &(((char *) (task))[%d]))\n", name, type, offset); } -void print_ptr(char *name, char *type, int offset) +void print(char *name, char *type, int offset) { - printf("#define %s(task) ((%s *) &(((char *) (task))[%d]))\n", name, type, + printf("#define %s(task) *((%s *) &(((char *) (task))[%d]))\n", name, type, offset); } -void print_head(void) +int main(int argc, char **argv) { printf("/*\n"); printf(" * Generated by mk_task\n"); @@ -21,10 +22,9 @@ void print_head(void) printf("#ifndef __TASK_H\n"); printf("#define __TASK_H\n"); printf("\n"); -} - -void print_tail(void) -{ + print_ptr("TASK_REGS", "union uml_pt_regs", TASK_REGS); + print("TASK_PID", "int", TASK_PID); printf("\n"); printf("#endif\n"); + return(0); } diff --git a/arch/um/util/mk_task_kern.c b/arch/um/util/mk_task_kern.c deleted file mode 100644 index c218103..0000000 --- a/arch/um/util/mk_task_kern.c +++ /dev/null @@ -1,17 +0,0 @@ -#include "linux/sched.h" -#include "linux/stddef.h" - -extern void print(char *name, char *type, int offset); -extern void print_ptr(char *name, char *type, int offset); -extern void print_head(void); -extern void print_tail(void); - -int main(int argc, char **argv) -{ - print_head(); - print_ptr("TASK_REGS", "union uml_pt_regs", - offsetof(struct task_struct, thread.regs)); - print("TASK_PID", "int", offsetof(struct task_struct, pid)); - print_tail(); - return(0); -} diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 80c38c5..289f448 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -303,6 +303,21 @@ config HPET_TIMER as it is off-chip. You can find the HPET spec at <http://www.intel.com/labs/platcomp/hpet/hpetspec.htm>. +config X86_PM_TIMER + bool "PM timer" + depends on ACPI + default y + help + Support the ACPI PM timer for time keeping. This is slow, + but is useful on some chipsets without HPET on systems with more + than one CPU. On a single processor or single socket multi core + system it is normally not required. + When the PM timer is active 64bit vsyscalls are disabled + and should not be enabled (/proc/sys/kernel/vsyscall64 should + not be changed). + The kernel selects the PM timer only as a last resort, so it is + useful to enable just in case. + config HPET_EMULATE_RTC bool "Provide RTC interrupt" depends on HPET_TIMER && RTC=y @@ -379,6 +394,11 @@ config GENERIC_IRQ_PROBE bool default y +# we have no ISA slots, but we do have ISA-style DMA. +config ISA_DMA_API + bool + default y + menu "Power management options" source kernel/power/Kconfig @@ -402,7 +422,7 @@ config PCI_DIRECT config PCI_MMCONFIG bool "Support mmconfig PCI config space access" - depends on PCI + depends on PCI && ACPI select ACPI_BOOT config UNORDERED_IO diff --git a/arch/x86_64/boot/bootsect.S b/arch/x86_64/boot/bootsect.S index bb15d40..011b7a4 100644 --- a/arch/x86_64/boot/bootsect.S +++ b/arch/x86_64/boot/bootsect.S @@ -63,7 +63,7 @@ msg_loop: jz die movb $0xe, %ah movw $7, %bx - int $0x10 + int $0x10 jmp msg_loop die: @@ -71,7 +71,7 @@ die: xorw %ax, %ax int $0x16 int $0x19 - + # int 0x19 should never return. In case it does anyway, # invoke the BIOS reset code... ljmp $0xf000,$0xfff0 diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig index 9ce51de..569595b 100644 --- a/arch/x86_64/defconfig +++ b/arch/x86_64/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.11-bk7 -# Sat Mar 12 23:43:44 2005 +# Linux kernel version: 2.6.12-rc4 +# Fri May 13 06:39:11 2005 # CONFIG_X86_64=y CONFIG_64BIT=y @@ -11,8 +11,6 @@ CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_X86_CMPXCHG=y CONFIG_EARLY_PRINTK=y -CONFIG_HPET_TIMER=y -CONFIG_HPET_EMULATE_RTC=y CONFIG_GENERIC_ISA_DMA=y CONFIG_GENERIC_IOMAP=y @@ -22,6 +20,7 @@ CONFIG_GENERIC_IOMAP=y CONFIG_EXPERIMENTAL=y CONFIG_CLEAN_COMPILE=y CONFIG_LOCK_KERNEL=y +CONFIG_INIT_ENV_ARG_LIMIT=32 # # General setup @@ -33,7 +32,6 @@ CONFIG_POSIX_MQUEUE=y # CONFIG_BSD_PROCESS_ACCT is not set CONFIG_SYSCTL=y # CONFIG_AUDIT is not set -CONFIG_LOG_BUF_SHIFT=18 # CONFIG_HOTPLUG is not set CONFIG_KOBJECT_UEVENT=y CONFIG_IKCONFIG=y @@ -43,10 +41,11 @@ CONFIG_IKCONFIG_PROC=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_SHMEM=y CONFIG_CC_ALIGN_FUNCTIONS=0 CONFIG_CC_ALIGN_LABELS=0 @@ -93,6 +92,9 @@ CONFIG_DISCONTIGMEM=y CONFIG_NUMA=y CONFIG_HAVE_DEC_LOCK=y CONFIG_NR_CPUS=8 +CONFIG_HPET_TIMER=y +CONFIG_X86_PM_TIMER=y +CONFIG_HPET_EMULATE_RTC=y CONFIG_GART_IOMMU=y CONFIG_SWIOTLB=y CONFIG_X86_MCE=y @@ -100,6 +102,7 @@ CONFIG_X86_MCE_INTEL=y CONFIG_SECCOMP=y CONFIG_GENERIC_HARDIRQS=y CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_ISA_DMA_API=y # # Power management options @@ -129,7 +132,7 @@ CONFIG_ACPI_NUMA=y # CONFIG_ACPI_IBM is not set CONFIG_ACPI_TOSHIBA=y CONFIG_ACPI_BLACKLIST_YEAR=2001 -CONFIG_ACPI_DEBUG=y +# CONFIG_ACPI_DEBUG is not set CONFIG_ACPI_BUS=y CONFIG_ACPI_EC=y CONFIG_ACPI_POWER=y @@ -141,6 +144,7 @@ CONFIG_ACPI_SYSTEM=y # CPU Frequency scaling # CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_TABLE=y # CONFIG_CPU_FREQ_DEBUG is not set CONFIG_CPU_FREQ_STAT=y # CONFIG_CPU_FREQ_STAT_DETAILS is not set @@ -150,7 +154,6 @@ CONFIG_CPU_FREQ_GOV_PERFORMANCE=y # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_GOV_ONDEMAND=y -CONFIG_CPU_FREQ_TABLE=y # # CPUFreq processor drivers @@ -164,6 +167,7 @@ CONFIG_X86_ACPI_CPUFREQ=y # shared options # CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y +# CONFIG_X86_SPEEDSTEP_LIB is not set # # Bus options (PCI etc.) @@ -172,9 +176,11 @@ CONFIG_PCI=y CONFIG_PCI_DIRECT=y CONFIG_PCI_MMCONFIG=y CONFIG_UNORDERED_IO=y +# CONFIG_PCIEPORTBUS is not set CONFIG_PCI_MSI=y # CONFIG_PCI_LEGACY_PROC is not set # CONFIG_PCI_NAMES is not set +# CONFIG_PCI_DEBUG is not set # # PCCARD (PCMCIA/CardBus) support @@ -182,10 +188,6 @@ CONFIG_PCI_MSI=y # CONFIG_PCCARD is not set # -# PC-card bridges -# - -# # PCI Hotplug Support # # CONFIG_HOTPLUG_PCI is not set @@ -254,7 +256,7 @@ CONFIG_LBD=y # IO Schedulers # CONFIG_IOSCHED_NOOP=y -CONFIG_IOSCHED_AS=y +# CONFIG_IOSCHED_AS is not set CONFIG_IOSCHED_DEADLINE=y CONFIG_IOSCHED_CFQ=y # CONFIG_ATA_OVER_ETH is not set @@ -308,7 +310,8 @@ CONFIG_BLK_DEV_AMD74XX=y CONFIG_BLK_DEV_PIIX=y # CONFIG_BLK_DEV_NS87415 is not set # CONFIG_BLK_DEV_PDC202XX_OLD is not set -# CONFIG_BLK_DEV_PDC202XX_NEW is not set +CONFIG_BLK_DEV_PDC202XX_NEW=y +# CONFIG_PDC202XX_FORCE is not set # CONFIG_BLK_DEV_SVWKS is not set # CONFIG_BLK_DEV_SIIMAGE is not set # CONFIG_BLK_DEV_SIS5513 is not set @@ -353,7 +356,7 @@ CONFIG_BLK_DEV_SD=y # # SCSI low-level drivers # -CONFIG_BLK_DEV_3W_XXXX_RAID=y +# CONFIG_BLK_DEV_3W_XXXX_RAID is not set # CONFIG_SCSI_3W_9XXX is not set # CONFIG_SCSI_ACARD is not set # CONFIG_SCSI_AACRAID is not set @@ -384,7 +387,6 @@ CONFIG_SCSI_SATA_VIA=y # CONFIG_SCSI_BUSLOGIC is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_EATA is not set -# CONFIG_SCSI_EATA_PIO is not set # CONFIG_SCSI_FUTURE_DOMAIN is not set # CONFIG_SCSI_GDTH is not set # CONFIG_SCSI_IPS is not set @@ -392,7 +394,6 @@ CONFIG_SCSI_SATA_VIA=y # CONFIG_SCSI_INIA100 is not set # CONFIG_SCSI_SYM53C8XX_2 is not set # CONFIG_SCSI_IPR is not set -# CONFIG_SCSI_QLOGIC_ISP is not set # CONFIG_SCSI_QLOGIC_FC is not set # CONFIG_SCSI_QLOGIC_1280 is not set CONFIG_SCSI_QLA2XXX=y @@ -401,6 +402,7 @@ CONFIG_SCSI_QLA2XXX=y # CONFIG_SCSI_QLA2300 is not set # CONFIG_SCSI_QLA2322 is not set # CONFIG_SCSI_QLA6312 is not set +# CONFIG_SCSI_LPFC is not set # CONFIG_SCSI_DC395x is not set # CONFIG_SCSI_DC390T is not set # CONFIG_SCSI_DEBUG is not set @@ -437,7 +439,6 @@ CONFIG_NET=y # CONFIG_PACKET=y # CONFIG_PACKET_MMAP is not set -# CONFIG_NETLINK_DEV is not set CONFIG_UNIX=y # CONFIG_NET_KEY is not set CONFIG_INET=y @@ -502,7 +503,7 @@ CONFIG_NETDEVICES=y # CONFIG_DUMMY is not set # CONFIG_BONDING is not set # CONFIG_EQUALIZER is not set -# CONFIG_TUN is not set +CONFIG_TUN=y # # ARCnet devices @@ -525,8 +526,7 @@ CONFIG_MII=y # CONFIG_HP100 is not set CONFIG_NET_PCI=y # CONFIG_PCNET32 is not set -CONFIG_AMD8111_ETH=y -# CONFIG_AMD8111E_NAPI is not set +# CONFIG_AMD8111_ETH is not set # CONFIG_ADAPTEC_STARFIRE is not set # CONFIG_B44 is not set CONFIG_FORCEDETH=y @@ -536,7 +536,7 @@ CONFIG_FORCEDETH=y # CONFIG_FEALNX is not set # CONFIG_NATSEMI is not set # CONFIG_NE2K_PCI is not set -CONFIG_8139CP=m +CONFIG_8139CP=y CONFIG_8139TOO=y # CONFIG_8139TOO_PIO is not set # CONFIG_8139TOO_TUNE_TWISTER is not set @@ -671,6 +671,7 @@ CONFIG_SERIAL_8250_NR_UARTS=4 # CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 @@ -696,6 +697,7 @@ CONFIG_RTC=y # CONFIG_AGP=y CONFIG_AGP_AMD64=y +CONFIG_AGP_INTEL=y # CONFIG_DRM is not set # CONFIG_MWAVE is not set CONFIG_RAW_DRIVER=y @@ -703,7 +705,7 @@ CONFIG_HPET=y # CONFIG_HPET_RTC_IRQ is not set CONFIG_HPET_MMAP=y CONFIG_MAX_RAW_DEVS=256 -CONFIG_HANGCHECK_TIMER=y +# CONFIG_HANGCHECK_TIMER is not set # # TPM devices @@ -786,6 +788,8 @@ CONFIG_SOUND_ICH=y # # USB support # +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y CONFIG_USB=y # CONFIG_USB_DEBUG is not set @@ -797,8 +801,6 @@ CONFIG_USB_DEVICEFS=y # CONFIG_USB_DYNAMIC_MINORS is not set # CONFIG_USB_SUSPEND is not set # CONFIG_USB_OTG is not set -CONFIG_USB_ARCH_HAS_HCD=y -CONFIG_USB_ARCH_HAS_OHCI=y # # USB Host Controller Drivers @@ -826,7 +828,6 @@ CONFIG_USB_PRINTER=y # CONFIG_USB_STORAGE=y # CONFIG_USB_STORAGE_DEBUG is not set -# CONFIG_USB_STORAGE_RW_DETECT is not set # CONFIG_USB_STORAGE_DATAFAB is not set # CONFIG_USB_STORAGE_FREECOM is not set # CONFIG_USB_STORAGE_ISD200 is not set @@ -965,7 +966,7 @@ CONFIG_AUTOFS_FS=y # CD-ROM/DVD Filesystems # CONFIG_ISO9660_FS=y -# CONFIG_JOLIET is not set +CONFIG_JOLIET=y # CONFIG_ZISOFS is not set # CONFIG_UDF_FS is not set @@ -1092,9 +1093,10 @@ CONFIG_OPROFILE=y # # Kernel hacking # +# CONFIG_PRINTK_TIME is not set CONFIG_DEBUG_KERNEL=y CONFIG_MAGIC_SYSRQ=y -# CONFIG_PRINTK_TIME is not set +CONFIG_LOG_BUF_SHIFT=18 # CONFIG_SCHEDSTATS is not set # CONFIG_DEBUG_SLAB is not set # CONFIG_DEBUG_SPINLOCK is not set diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index 0a3318e..5ca4a45 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile @@ -28,6 +28,7 @@ obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_KPROBES) += kprobes.o +obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o obj-$(CONFIG_MODULES) += module.o diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c index a491f72..504e634 100644 --- a/arch/x86_64/kernel/aperture.c +++ b/arch/x86_64/kernel/aperture.c @@ -33,12 +33,10 @@ int fallback_aper_force __initdata = 0; int fix_aperture __initdata = 1; -#define NB_ID_3 (PCI_VENDOR_ID_AMD | (0x1103<<16)) +/* This code runs before the PCI subsystem is initialized, so just + access the northbridge directly. */ -static struct resource aper_res = { - .name = "Aperture", - .flags = IORESOURCE_MEM, -}; +#define NB_ID_3 (PCI_VENDOR_ID_AMD | (0x1103<<16)) static u32 __init allocate_aperture(void) { @@ -55,24 +53,11 @@ static u32 __init allocate_aperture(void) aper_size = (32 * 1024 * 1024) << fallback_aper_order; /* - * Aperture has to be naturally aligned. This means an 2GB - * aperture won't have much chances to find a place in the - * lower 4GB of memory. Unfortunately we cannot move it up - * because that would make the IOMMU useless. + * Aperture has to be naturally aligned. This means an 2GB aperture won't + * have much chances to find a place in the lower 4GB of memory. + * Unfortunately we cannot move it up because that would make the + * IOMMU useless. */ - - /* First try to find some free unused space */ - if (!allocate_resource(&iomem_resource, &aper_res, - aper_size, - 0, 0xffffffff, - aper_size, - NULL, NULL)) { - printk(KERN_INFO "Putting aperture at %lx-%lx\n", - aper_res.start, aper_res.end); - return aper_res.start; - } - - /* No free space found. Go on to waste some memory... */ p = __alloc_bootmem_node(nd0, aper_size, aper_size, 0); if (!p || __pa(p)+aper_size > 0xffffffff) { printk("Cannot allocate aperture memory hole (%p,%uK)\n", @@ -81,7 +66,7 @@ static u32 __init allocate_aperture(void) free_bootmem_node(nd0, (unsigned long)p, aper_size); return 0; } - printk("Mapping aperture over %d KB of precious RAM @ %lx\n", + printk("Mapping aperture over %d KB of RAM @ %lx\n", aper_size >> 10, __pa(p)); return (u32)__pa(p); } @@ -102,16 +87,10 @@ static int __init aperture_valid(char *name, u64 aper_base, u32 aper_size) printk("Aperture from %s pointing to e820 RAM. Ignoring.\n",name); return 0; } - /* Don't check the resource here because the aperture is usually - in an e820 reserved area, and we allocated these earlier. */ return 1; } -/* - * Find a PCI capability. - * This code runs before the PCI subsystem is initialized, so just - * access the northbridge directly. - */ +/* Find a PCI capability */ static __u32 __init find_cap(int num, int slot, int func, int cap) { u8 pos; @@ -276,6 +255,8 @@ void __init iommu_hole_init(void) fallback_aper_force) { printk("Your BIOS doesn't leave a aperture memory hole\n"); printk("Please enable the IOMMU option in the BIOS setup\n"); + printk("This costs you %d MB of RAM\n", + 32 << fallback_aper_order); aper_order = fallback_aper_order; aper_alloc = allocate_aperture(); diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index 7e13545..f8e6cc4 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -33,6 +33,7 @@ #include <asm/mpspec.h> #include <asm/pgalloc.h> #include <asm/mach_apic.h> +#include <asm/nmi.h> int apic_verbosity; @@ -925,7 +926,7 @@ __init int oem_force_hpet_timer(void) unsigned id; DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); - bitmap_empty(clustermap, NUM_APIC_CLUSTERS); + bitmap_zero(clustermap, NUM_APIC_CLUSTERS); for (i = 0; i < NR_CPUS; i++) { id = bios_cpu_apicid[i]; @@ -1056,7 +1057,7 @@ int __init APIC_init_uniprocessor (void) nr_ioapics = 0; #endif setup_boot_APIC_clock(); - + check_nmi_watchdog(); return 0; } diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index 1086b5f..2881749 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -220,13 +220,18 @@ sysret_careful: jmp sysret_check /* Handle a signal */ - /* edx: work flags (arg3) */ sysret_signal: sti + testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx + jz 1f + + /* Really a signal */ + /* edx: work flags (arg3) */ leaq do_notify_resume(%rip),%rax leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 xorl %esi,%esi # oldset -> arg2 call ptregscall_common +1: movl $_TIF_NEED_RESCHED,%edi jmp sysret_check /* Do syscall tracing */ @@ -484,6 +489,8 @@ retint_careful: jmp retint_check retint_signal: + testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx + jz retint_swapgs sti SAVE_REST movq $-1,ORIG_RAX(%rsp) @@ -492,8 +499,8 @@ retint_signal: call do_notify_resume RESTORE_REST cli + movl $_TIF_NEED_RESCHED,%edi GET_THREAD_INFO(%rcx) - movl $_TIF_WORK_MASK,%edi jmp retint_check #ifdef CONFIG_PREEMPT diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index 60be586..afd87e6 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -37,11 +37,14 @@ #include <asm/desc.h> #include <asm/proto.h> #include <asm/mach_apic.h> +#include <asm/acpi.h> #define __apicdebuginit __init int sis_apic_bug; /* not actually supported, dummy for compile */ +static int no_timer_check; + static DEFINE_SPINLOCK(ioapic_lock); /* @@ -1601,7 +1604,7 @@ static inline void check_timer(void) * Ok, does IRQ0 through the IOAPIC work? */ unmask_IO_APIC_irq(0); - if (timer_irq_works()) { + if (!no_timer_check && timer_irq_works()) { nmi_watchdog_default(); if (nmi_watchdog == NMI_IO_APIC) { disable_8259A_irq(0); @@ -1671,6 +1674,13 @@ static inline void check_timer(void) panic("IO-APIC + timer doesn't work! Try using the 'noapic' kernel parameter\n"); } +static int __init notimercheck(char *s) +{ + no_timer_check = 1; + return 1; +} +__setup("no_timer_check", notimercheck); + /* * * IRQ's that are handled by the PIC in the MPS IOAPIC case. @@ -1804,76 +1814,6 @@ device_initcall(ioapic_init_sysfs); #define IO_APIC_MAX_ID 0xFE -int __init io_apic_get_unique_id (int ioapic, int apic_id) -{ - union IO_APIC_reg_00 reg_00; - static physid_mask_t apic_id_map; - unsigned long flags; - int i = 0; - - /* - * The P4 platform supports up to 256 APIC IDs on two separate APIC - * buses (one for LAPICs, one for IOAPICs), where predecessors only - * supports up to 16 on one shared APIC bus. - * - * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full - * advantage of new APIC bus architecture. - */ - - if (physids_empty(apic_id_map)) - apic_id_map = phys_cpu_present_map; - - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(ioapic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - - if (apic_id >= IO_APIC_MAX_ID) { - apic_printk(APIC_QUIET, KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " - "%d\n", ioapic, apic_id, reg_00.bits.ID); - apic_id = reg_00.bits.ID; - } - - /* - * Every APIC in a system must have a unique ID or we get lots of nice - * 'stuck on smp_invalidate_needed IPI wait' messages. - */ - if (physid_isset(apic_id, apic_id_map)) { - - for (i = 0; i < IO_APIC_MAX_ID; i++) { - if (!physid_isset(i, apic_id_map)) - break; - } - - if (i == IO_APIC_MAX_ID) - panic("Max apic_id exceeded!\n"); - - apic_printk(APIC_VERBOSE, KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " - "trying %d\n", ioapic, apic_id, i); - - apic_id = i; - } - - physid_set(apic_id, apic_id_map); - - if (reg_00.bits.ID != apic_id) { - reg_00.bits.ID = apic_id; - - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(ioapic, 0, reg_00.raw); - reg_00.raw = io_apic_read(ioapic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - - /* Sanity check */ - if (reg_00.bits.ID != apic_id) - panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic); - } - - apic_printk(APIC_VERBOSE,KERN_INFO "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); - - return apic_id; -} - - int __init io_apic_get_version (int ioapic) { union IO_APIC_reg_01 reg_01; diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index 4f2a852..f77f8a0 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -355,6 +355,13 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs) *tos &= ~(TF_MASK | IF_MASK); *tos |= kprobe_old_rflags; break; + case 0xc3: /* ret/lret */ + case 0xcb: + case 0xc2: + case 0xca: + regs->eflags &= ~TF_MASK; + /* rip is already adjusted, no more changes required*/ + return; case 0xe8: /* call relative - Fix return addr */ *tos = orig_rip + (*tos - copy_rip); break; diff --git a/arch/x86_64/kernel/module.c b/arch/x86_64/kernel/module.c index c2ffea8..bac195c 100644 --- a/arch/x86_64/kernel/module.c +++ b/arch/x86_64/kernel/module.c @@ -30,9 +30,12 @@ #define DEBUGP(fmt...) +#ifndef CONFIG_UML void module_free(struct module *mod, void *module_region) { vfree(module_region); + /* FIXME: If module_region == mod->init_region, trim exception + table entries. */ } void *module_alloc(unsigned long size) @@ -51,6 +54,7 @@ void *module_alloc(unsigned long size) return __vmalloc_area(area, GFP_KERNEL, PAGE_KERNEL_EXEC); } +#endif /* We don't need anything special. */ int module_frob_arch_sections(Elf_Ehdr *hdr, diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index 7ec031c..61a63be 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c @@ -30,6 +30,7 @@ #include <asm/pgalloc.h> #include <asm/io_apic.h> #include <asm/proto.h> +#include <asm/acpi.h> /* Have we found an MP table */ int smp_found_config; @@ -107,6 +108,7 @@ static int __init mpf_checksum(unsigned char *mp, int len) static void __init MP_processor_info (struct mpc_config_processor *m) { int ver; + static int found_bsp=0; if (!(m->mpc_cpuflag & CPU_ENABLED)) return; @@ -126,11 +128,6 @@ static void __init MP_processor_info (struct mpc_config_processor *m) " Processor ignored.\n", NR_CPUS); return; } - if (num_processors >= maxcpus) { - printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." - " Processor ignored.\n", maxcpus); - return; - } num_processors++; @@ -150,7 +147,19 @@ static void __init MP_processor_info (struct mpc_config_processor *m) ver = 0x10; } apic_version[m->mpc_apicid] = ver; - bios_cpu_apicid[num_processors - 1] = m->mpc_apicid; + if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { + /* + * bios_cpu_apicid is required to have processors listed + * in same order as logical cpu numbers. Hence the first + * entry is BSP, and so on. + */ + bios_cpu_apicid[0] = m->mpc_apicid; + x86_cpu_to_apicid[0] = m->mpc_apicid; + found_bsp = 1; + } else { + bios_cpu_apicid[num_processors - found_bsp] = m->mpc_apicid; + x86_cpu_to_apicid[num_processors - found_bsp] = m->mpc_apicid; + } } static void __init MP_bus_info (struct mpc_config_bus *m) @@ -759,7 +768,7 @@ void __init mp_register_ioapic ( mp_ioapics[idx].mpc_apicaddr = address; set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); - mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id); + mp_ioapics[idx].mpc_apicid = id; mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); /* diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 61de0b3..31c0f2e 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -33,6 +33,7 @@ #include <asm/msr.h> #include <asm/proto.h> #include <asm/kdebug.h> +#include <asm/local.h> /* * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: @@ -59,7 +60,8 @@ int panic_on_timeout; unsigned int nmi_watchdog = NMI_DEFAULT; static unsigned int nmi_hz = HZ; -unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ +static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ +static unsigned int nmi_p4_cccr_val; /* Note that these events don't tick when the CPU idles. This means the frequency varies with CPU load. */ @@ -71,61 +73,87 @@ unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING -#define P6_EVNTSEL0_ENABLE (1 << 22) -#define P6_EVNTSEL_INT (1 << 20) -#define P6_EVNTSEL_OS (1 << 17) -#define P6_EVNTSEL_USR (1 << 16) -#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 -#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED +#define MSR_P4_MISC_ENABLE 0x1A0 +#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) +#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12) +#define MSR_P4_PERFCTR0 0x300 +#define MSR_P4_CCCR0 0x360 +#define P4_ESCR_EVENT_SELECT(N) ((N)<<25) +#define P4_ESCR_OS (1<<3) +#define P4_ESCR_USR (1<<2) +#define P4_CCCR_OVF_PMI0 (1<<26) +#define P4_CCCR_OVF_PMI1 (1<<27) +#define P4_CCCR_THRESHOLD(N) ((N)<<20) +#define P4_CCCR_COMPLEMENT (1<<19) +#define P4_CCCR_COMPARE (1<<18) +#define P4_CCCR_REQUIRED (3<<16) +#define P4_CCCR_ESCR_SELECT(N) ((N)<<13) +#define P4_CCCR_ENABLE (1<<12) +/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter + CRU_ESCR0 (with any non-null event selector) through a complemented + max threshold. [IA32-Vol3, Section 14.9.9] */ +#define MSR_P4_IQ_COUNTER0 0x30C +#define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR) +#define P4_NMI_IQ_CCCR0 \ + (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ + P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) + +static __init inline int nmi_known_cpu(void) +{ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + return boot_cpu_data.x86 == 15; + case X86_VENDOR_INTEL: + return boot_cpu_data.x86 == 15; + } + return 0; +} /* Run after command line and cpu_init init, but before all other checks */ void __init nmi_watchdog_default(void) { if (nmi_watchdog != NMI_DEFAULT) return; - - /* For some reason the IO APIC watchdog doesn't work on the AMD - 8111 chipset. For now switch to local APIC mode using - perfctr0 there. On Intel CPUs we don't have code to handle - the perfctr and the IO-APIC seems to work, so use that. */ - - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { - nmi_watchdog = NMI_LOCAL_APIC; - printk(KERN_INFO - "Using local APIC NMI watchdog using perfctr0\n"); - } else { - printk(KERN_INFO "Using IO APIC NMI watchdog\n"); + if (nmi_known_cpu()) + nmi_watchdog = NMI_LOCAL_APIC; + else nmi_watchdog = NMI_IO_APIC; - } } -/* Why is there no CPUID flag for this? */ -static __init int cpu_has_lapic(void) +#ifdef CONFIG_SMP +/* The performance counters used by NMI_LOCAL_APIC don't trigger when + * the CPU is idle. To make sure the NMI watchdog really ticks on all + * CPUs during the test make them busy. + */ +static __init void nmi_cpu_busy(void *data) { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - case X86_VENDOR_AMD: - return boot_cpu_data.x86 >= 6; - /* .... add more cpus here or find a different way to figure this out. */ - default: - return 0; - } + volatile int *endflag = data; + local_irq_enable(); + /* Intentionally don't use cpu_relax here. This is + to make sure that the performance counter really ticks, + even if there is a simulator or similar that catches the + pause instruction. On a real HT machine this is fine because + all other CPUs are busy with "useless" delay loops and don't + care if they get somewhat less cycles. */ + while (*endflag == 0) + barrier(); } +#endif -static int __init check_nmi_watchdog (void) +int __init check_nmi_watchdog (void) { - int counts[NR_CPUS]; + volatile int endflag = 0; + int *counts; int cpu; - if (nmi_watchdog == NMI_NONE) - return 0; + counts = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); + if (!counts) + return -1; - if (nmi_watchdog == NMI_LOCAL_APIC && !cpu_has_lapic()) { - nmi_watchdog = NMI_NONE; - return -1; - } + printk(KERN_INFO "testing NMI watchdog ... "); - printk(KERN_INFO "Testing NMI watchdog ... "); + if (nmi_watchdog == NMI_LOCAL_APIC) + smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); for (cpu = 0; cpu < NR_CPUS; cpu++) counts[cpu] = cpu_pda[cpu].__nmi_count; @@ -133,15 +161,22 @@ static int __init check_nmi_watchdog (void) mdelay((10*1000)/nmi_hz); // wait 10 ticks for (cpu = 0; cpu < NR_CPUS; cpu++) { + if (!cpu_online(cpu)) + continue; if (cpu_pda[cpu].__nmi_count - counts[cpu] <= 5) { - printk("CPU#%d: NMI appears to be stuck (%d)!\n", + endflag = 1; + printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", cpu, + counts[cpu], cpu_pda[cpu].__nmi_count); nmi_active = 0; lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; + nmi_perfctr_msr = 0; + kfree(counts); return -1; } } + endflag = 1; printk("OK.\n"); /* now that we know it works we can reduce NMI frequency to @@ -149,10 +184,9 @@ static int __init check_nmi_watchdog (void) if (nmi_watchdog == NMI_LOCAL_APIC) nmi_hz = 1; + kfree(counts); return 0; } -/* Have this called later during boot so counters are updating */ -late_initcall(check_nmi_watchdog); int __init setup_nmi_watchdog(char *str) { @@ -170,7 +204,7 @@ int __init setup_nmi_watchdog(char *str) if (nmi >= NMI_INVALID) return 0; - nmi_watchdog = nmi; + nmi_watchdog = nmi; return 1; } @@ -185,7 +219,10 @@ static void disable_lapic_nmi_watchdog(void) wrmsr(MSR_K7_EVNTSEL0, 0, 0); break; case X86_VENDOR_INTEL: - wrmsr(MSR_IA32_EVNTSEL0, 0, 0); + if (boot_cpu_data.x86 == 15) { + wrmsr(MSR_P4_IQ_CCCR0, 0, 0); + wrmsr(MSR_P4_CRU_ESCR0, 0, 0); + } break; } nmi_active = -1; @@ -253,7 +290,7 @@ void enable_timer_nmi_watchdog(void) static int nmi_pm_active; /* nmi_active before suspend */ -static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) +static int lapic_nmi_suspend(struct sys_device *dev, u32 state) { nmi_pm_active = nmi_active; disable_lapic_nmi_watchdog(); @@ -300,22 +337,27 @@ late_initcall(init_lapic_nmi_sysfs); * Original code written by Keith Owens. */ +static void clear_msr_range(unsigned int base, unsigned int n) +{ + unsigned int i; + + for(i = 0; i < n; ++i) + wrmsr(base+i, 0, 0); +} + static void setup_k7_watchdog(void) { int i; unsigned int evntsel; - /* No check, so can start with slow frequency */ - nmi_hz = 1; - - /* XXX should check these in EFER */ - nmi_perfctr_msr = MSR_K7_PERFCTR0; for(i = 0; i < 4; ++i) { /* Simulator may not support it */ - if (checking_wrmsrl(MSR_K7_EVNTSEL0+i, 0UL)) + if (checking_wrmsrl(MSR_K7_EVNTSEL0+i, 0UL)) { + nmi_perfctr_msr = 0; return; + } wrmsrl(MSR_K7_PERFCTR0+i, 0UL); } @@ -325,12 +367,54 @@ static void setup_k7_watchdog(void) | K7_NMI_EVENT; wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); - wrmsrl(MSR_K7_PERFCTR0, -((u64)cpu_khz*1000) / nmi_hz); + wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1); apic_write(APIC_LVTPC, APIC_DM_NMI); evntsel |= K7_EVNTSEL_ENABLE; wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); } + +static int setup_p4_watchdog(void) +{ + unsigned int misc_enable, dummy; + + rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy); + if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) + return 0; + + nmi_perfctr_msr = MSR_P4_IQ_COUNTER0; + nmi_p4_cccr_val = P4_NMI_IQ_CCCR0; +#ifdef CONFIG_SMP + if (smp_num_siblings == 2) + nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1; +#endif + + if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL)) + clear_msr_range(0x3F1, 2); + /* MSR 0x3F0 seems to have a default value of 0xFC00, but current + docs doesn't fully define it, so leave it alone for now. */ + if (boot_cpu_data.x86_model >= 0x3) { + /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */ + clear_msr_range(0x3A0, 26); + clear_msr_range(0x3BC, 3); + } else { + clear_msr_range(0x3A0, 31); + } + clear_msr_range(0x3C0, 6); + clear_msr_range(0x3C8, 6); + clear_msr_range(0x3E0, 2); + clear_msr_range(MSR_P4_CCCR0, 18); + clear_msr_range(MSR_P4_PERFCTR0, 18); + + wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0); + wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0); + Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000)); + wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1); + apic_write(APIC_LVTPC, APIC_DM_NMI); + wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); + return 1; +} + void setup_apic_nmi_watchdog(void) { switch (boot_cpu_data.x86_vendor) { @@ -341,6 +425,13 @@ void setup_apic_nmi_watchdog(void) return; setup_k7_watchdog(); break; + case X86_VENDOR_INTEL: + if (boot_cpu_data.x86 != 15) + return; + if (!setup_p4_watchdog()) + return; + break; + default: return; } @@ -355,56 +446,67 @@ void setup_apic_nmi_watchdog(void) * * as these watchdog NMI IRQs are generated on every CPU, we only * have to check the current processor. - * - * since NMIs don't listen to _any_ locks, we have to be extremely - * careful not to rely on unsafe variables. The printk might lock - * up though, so we have to break up any console locks first ... - * [when there will be more tty-related locks, break them up - * here too!] */ -static unsigned int - last_irq_sums [NR_CPUS], - alert_counter [NR_CPUS]; +static DEFINE_PER_CPU(unsigned, last_irq_sum); +static DEFINE_PER_CPU(local_t, alert_counter); +static DEFINE_PER_CPU(int, nmi_touch); void touch_nmi_watchdog (void) { int i; /* - * Just reset the alert counters, (other CPUs might be - * spinning on locks we hold): + * Tell other CPUs to reset their alert counters. We cannot + * do it ourselves because the alert count increase is not + * atomic. */ for (i = 0; i < NR_CPUS; i++) - alert_counter[i] = 0; + per_cpu(nmi_touch, i) = 1; } void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) { - int sum, cpu; + int sum; + int touched = 0; - cpu = safe_smp_processor_id(); sum = read_pda(apic_timer_irqs); - if (last_irq_sums[cpu] == sum) { + if (__get_cpu_var(nmi_touch)) { + __get_cpu_var(nmi_touch) = 0; + touched = 1; + } + if (!touched && __get_cpu_var(last_irq_sum) == sum) { /* * Ayiee, looks like this CPU is stuck ... * wait a few IRQs (5 seconds) before doing the oops ... */ - alert_counter[cpu]++; - if (alert_counter[cpu] == 5*nmi_hz) { + local_inc(&__get_cpu_var(alert_counter)); + if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz) { if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) { - alert_counter[cpu] = 0; + local_set(&__get_cpu_var(alert_counter), 0); return; } die_nmi("NMI Watchdog detected LOCKUP on CPU%d", regs); } } else { - last_irq_sums[cpu] = sum; - alert_counter[cpu] = 0; + __get_cpu_var(last_irq_sum) = sum; + local_set(&__get_cpu_var(alert_counter), 0); } - if (nmi_perfctr_msr) + if (nmi_perfctr_msr) { + if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) { + /* + * P4 quirks: + * - An overflown perfctr will assert its interrupt + * until the OVF flag in its CCCR is cleared. + * - LVTPC is masked on interrupt and must be + * unmasked by the LVTPC handler. + */ + wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); + apic_write(APIC_LVTPC, APIC_DM_NMI); + } wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1); + } } static int dummy_nmi_callback(struct pt_regs * regs, int cpu) diff --git a/arch/x86_64/kernel/pmtimer.c b/arch/x86_64/kernel/pmtimer.c new file mode 100644 index 0000000..feb5f10 --- /dev/null +++ b/arch/x86_64/kernel/pmtimer.c @@ -0,0 +1,101 @@ +/* Ported over from i386 by AK, original copyright was: + * + * (C) Dominik Brodowski <linux@brodo.de> 2003 + * + * Driver to use the Power Management Timer (PMTMR) available in some + * southbridges as primary timing source for the Linux kernel. + * + * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c, + * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4. + * + * This file is licensed under the GPL v2. + * + * Dropped all the hardware bug workarounds for now. Hopefully they + * are not needed on 64bit chipsets. + */ + +#include <linux/jiffies.h> +#include <linux/kernel.h> +#include <linux/time.h> +#include <linux/init.h> +#include <linux/cpumask.h> +#include <asm/io.h> +#include <asm/proto.h> +#include <asm/msr.h> +#include <asm/vsyscall.h> + +/* The I/O port the PMTMR resides at. + * The location is detected during setup_arch(), + * in arch/i386/kernel/acpi/boot.c */ +u32 pmtmr_ioport; + +/* value of the Power timer at last timer interrupt */ +static u32 offset_delay; +static u32 last_pmtmr_tick; + +#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ + +static inline u32 cyc2us(u32 cycles) +{ + /* The Power Management Timer ticks at 3.579545 ticks per microsecond. + * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%] + * + * Even with HZ = 100, delta is at maximum 35796 ticks, so it can + * easily be multiplied with 286 (=0x11E) without having to fear + * u32 overflows. + */ + cycles *= 286; + return (cycles >> 10); +} + +int pmtimer_mark_offset(void) +{ + static int first_run = 1; + unsigned long tsc; + u32 lost; + + u32 tick = inl(pmtmr_ioport); + u32 delta; + + delta = cyc2us((tick - last_pmtmr_tick) & ACPI_PM_MASK); + + last_pmtmr_tick = tick; + monotonic_base += delta * NSEC_PER_USEC; + + delta += offset_delay; + + lost = delta / (USEC_PER_SEC / HZ); + offset_delay = delta % (USEC_PER_SEC / HZ); + + rdtscll(tsc); + vxtime.last_tsc = tsc - offset_delay * cpu_khz; + + /* don't calculate delay for first run, + or if we've got less then a tick */ + if (first_run || (lost < 1)) { + first_run = 0; + offset_delay = 0; + } + + return lost - 1; +} + +unsigned int do_gettimeoffset_pm(void) +{ + u32 now, offset, delta = 0; + + offset = last_pmtmr_tick; + now = inl(pmtmr_ioport); + delta = (now - offset) & ACPI_PM_MASK; + + return offset_delay + cyc2us(delta); +} + + +static int __init nopmtimer_setup(char *s) +{ + pmtmr_ioport = 0; + return 0; +} + +__setup("nopmtimer", nopmtimer_setup); diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c index c64b9c9..525f6a1 100644 --- a/arch/x86_64/kernel/ptrace.c +++ b/arch/x86_64/kernel/ptrace.c @@ -257,13 +257,13 @@ static int putreg(struct task_struct *child, value &= 0xffff; return 0; case offsetof(struct user_regs_struct,fs_base): - if (!((value >> 48) == 0 || (value >> 48) == 0xffff)) - return -EIO; + if (value >= TASK_SIZE) + return -EIO; child->thread.fs = value; return 0; case offsetof(struct user_regs_struct,gs_base): - if (!((value >> 48) == 0 || (value >> 48) == 0xffff)) - return -EIO; + if (value >= TASK_SIZE) + return -EIO; child->thread.gs = value; return 0; case offsetof(struct user_regs_struct, eflags): @@ -277,6 +277,11 @@ static int putreg(struct task_struct *child, return -EIO; value &= 0xffff; break; + case offsetof(struct user_regs_struct, rip): + /* Check if the new RIP address is canonical */ + if (value >= TASK_SIZE) + return -EIO; + break; } put_stack_long(child, regno - sizeof(struct pt_regs), value); return 0; @@ -375,7 +380,7 @@ asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data break; switch (addr) { - case 0 ... sizeof(struct user_regs_struct): + case 0 ... sizeof(struct user_regs_struct) - sizeof(long): tmp = getreg(child, addr); break; case offsetof(struct user, u_debugreg[0]): @@ -420,7 +425,7 @@ asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data break; switch (addr) { - case 0 ... sizeof(struct user_regs_struct): + case 0 ... sizeof(struct user_regs_struct) - sizeof(long): ret = putreg(child, addr, data); break; /* Disallows to set a breakpoint into the vsyscall */ @@ -635,20 +640,29 @@ asmlinkage void syscall_trace_enter(struct pt_regs *regs) /* do the secure computing check first */ secure_computing(regs->orig_rax); - if (unlikely(current->audit_context)) - audit_syscall_entry(current, regs->orig_rax, - regs->rdi, regs->rsi, - regs->rdx, regs->r10); - if (test_thread_flag(TIF_SYSCALL_TRACE) && (current->ptrace & PT_PTRACED)) syscall_trace(regs); + + if (unlikely(current->audit_context)) { + if (test_thread_flag(TIF_IA32)) { + audit_syscall_entry(current, AUDIT_ARCH_I386, + regs->orig_rax, + regs->rbx, regs->rcx, + regs->rdx, regs->rsi); + } else { + audit_syscall_entry(current, AUDIT_ARCH_X86_64, + regs->orig_rax, + regs->rdi, regs->rsi, + regs->rdx, regs->r10); + } + } } asmlinkage void syscall_trace_leave(struct pt_regs *regs) { if (unlikely(current->audit_context)) - audit_syscall_exit(current, regs->rax); + audit_syscall_exit(current, AUDITSC_RESULT(regs->rax), regs->rax); if ((test_thread_flag(TIF_SYSCALL_TRACE) || test_thread_flag(TIF_SINGLESTEP)) diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 2129cf9..99f038e 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -719,7 +719,6 @@ static void __init display_cacheinfo(struct cpuinfo_x86 *c) } } -#ifdef CONFIG_SMP /* * On a AMD dual core setup the lower bits of the APIC id distingush the cores. * Assumes number of cores is a power of two. @@ -727,17 +726,26 @@ static void __init display_cacheinfo(struct cpuinfo_x86 *c) static void __init amd_detect_cmp(struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP - int cpu = c->x86_apicid; + int cpu = smp_processor_id(); int node = 0; + unsigned bits; if (c->x86_num_cores == 1) return; - cpu_core_id[cpu] = cpu >> hweight32(c->x86_num_cores - 1); + + bits = 0; + while ((1 << bits) < c->x86_num_cores) + bits++; + + /* Low order bits define the core id (index of core in socket) */ + cpu_core_id[cpu] = phys_proc_id[cpu] & ((1 << bits)-1); + /* Convert the APIC ID into the socket ID */ + phys_proc_id[cpu] >>= bits; #ifdef CONFIG_NUMA /* When an ACPI SRAT table is available use the mappings from SRAT instead. */ if (acpi_numa <= 0) { - node = cpu_core_id[cpu]; + node = phys_proc_id[cpu]; if (!node_online(node)) node = first_node(node_online_map); cpu_to_node[cpu] = node; @@ -745,15 +753,11 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) node = cpu_to_node[cpu]; } #endif + printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n", cpu, c->x86_num_cores, node, cpu_core_id[cpu]); #endif } -#else -static void __init amd_detect_cmp(struct cpuinfo_x86 *c) -{ -} -#endif static int __init init_amd(struct cpuinfo_x86 *c) { @@ -925,7 +929,6 @@ void __init early_identify_cpu(struct cpuinfo_x86 *c) c->x86_clflush_size = 64; c->x86_cache_alignment = c->x86_clflush_size; c->x86_num_cores = 1; - c->x86_apicid = c == &boot_cpu_data ? 0 : c - cpu_data; c->extended_cpuid_level = 0; memset(&c->x86_capability, 0, sizeof c->x86_capability); @@ -954,11 +957,14 @@ void __init early_identify_cpu(struct cpuinfo_x86 *c) } if (c->x86_capability[0] & (1<<19)) c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; - c->x86_apicid = misc >> 24; } else { /* Have CPUID level 0 only - unheard of */ c->x86 = 4; } + +#ifdef CONFIG_SMP + phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff; +#endif } /* @@ -1088,7 +1094,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* Other (Linux-defined) */ - "cxmmx", NULL, "cyrix_arr", "centaur_mcr", "k8c+", + "cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL, "constant_tsc", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c index d439ced..429c026 100644 --- a/arch/x86_64/kernel/signal.c +++ b/arch/x86_64/kernel/signal.c @@ -28,6 +28,7 @@ #include <asm/uaccess.h> #include <asm/i387.h> #include <asm/proto.h> +#include <asm/ia32_unistd.h> /* #define DEBUG_SIG 1 */ @@ -452,7 +453,9 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset) regs->rip -= 2; } if (regs->rax == (unsigned long)-ERESTART_RESTARTBLOCK) { - regs->rax = __NR_restart_syscall; + regs->rax = test_thread_flag(TIF_IA32) ? + __NR_ia32_restart_syscall : + __NR_restart_syscall; regs->rip -= 2; } } diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 73f7e8b..f1ec0f3 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -56,6 +56,7 @@ #include <asm/kdebug.h> #include <asm/tlbflush.h> #include <asm/proto.h> +#include <asm/nmi.h> /* Change for real CPU hotplug. Note other files need to be fixed first too. */ @@ -93,6 +94,7 @@ int smp_threads_ready; cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; +EXPORT_SYMBOL(cpu_core_map); /* * Trampoline 80x86 program as an array. @@ -125,96 +127,210 @@ static void __cpuinit smp_store_cpu_info(int id) *c = boot_cpu_data; identify_cpu(c); + print_cpu_info(c); } /* - * Synchronize TSCs of CPUs + * New Funky TSC sync algorithm borrowed from IA64. + * Main advantage is that it doesn't reset the TSCs fully and + * in general looks more robust and it works better than my earlier + * attempts. I believe it was written by David Mosberger. Some minor + * adjustments for x86-64 by me -AK * - * This new algorithm is less accurate than the old "zero TSCs" - * one, but we cannot zero TSCs anymore in the new hotplug CPU - * model. + * Original comment reproduced below. + * + * Synchronize TSC of the current (slave) CPU with the TSC of the + * MASTER CPU (normally the time-keeper CPU). We use a closed loop to + * eliminate the possibility of unaccounted-for errors (such as + * getting a machine check in the middle of a calibration step). The + * basic idea is for the slave to ask the master what itc value it has + * and to read its own itc before and after the master responds. Each + * iteration gives us three timestamps: + * + * slave master + * + * t0 ---\ + * ---\ + * ---> + * tm + * /--- + * /--- + * t1 <--- + * + * + * The goal is to adjust the slave's TSC such that tm falls exactly + * half-way between t0 and t1. If we achieve this, the clocks are + * synchronized provided the interconnect between the slave and the + * master is symmetric. Even if the interconnect were asymmetric, we + * would still know that the synchronization error is smaller than the + * roundtrip latency (t0 - t1). + * + * When the interconnect is quiet and symmetric, this lets us + * synchronize the TSC to within one or two cycles. However, we can + * only *guarantee* that the synchronization is accurate to within a + * round-trip time, which is typically in the range of several hundred + * cycles (e.g., ~500 cycles). In practice, this means that the TSCs + * are usually almost perfectly synchronized, but we shouldn't assume + * that the accuracy is much better than half a micro second or so. + * + * [there are other errors like the latency of RDTSC and of the + * WRMSR. These can also account to hundreds of cycles. So it's + * probably worse. It claims 153 cycles error on a dual Opteron, + * but I suspect the numbers are actually somewhat worse -AK] */ -static atomic_t __cpuinitdata tsc_flag; +#define MASTER 0 +#define SLAVE (SMP_CACHE_BYTES/8) + +/* Intentionally don't use cpu_relax() while TSC synchronization + because we don't want to go into funky power save modi or cause + hypervisors to schedule us away. Going to sleep would likely affect + latency and low latency is the primary objective here. -AK */ +#define no_cpu_relax() barrier() + static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock); -static unsigned long long __cpuinitdata bp_tsc, ap_tsc; +static volatile __cpuinitdata unsigned long go[SLAVE + 1]; +static int notscsync __cpuinitdata; + +#undef DEBUG_TSC_SYNC -#define NR_LOOPS 5 +#define NUM_ROUNDS 64 /* magic value */ +#define NUM_ITERS 5 /* likewise */ -static void __cpuinit sync_tsc_bp_init(int init) +/* Callback on boot CPU */ +static __cpuinit void sync_master(void *arg) { - if (init) - _raw_spin_lock(&tsc_sync_lock); - else - _raw_spin_unlock(&tsc_sync_lock); - atomic_set(&tsc_flag, 0); + unsigned long flags, i; + + if (smp_processor_id() != boot_cpu_id) + return; + + go[MASTER] = 0; + + local_irq_save(flags); + { + for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) { + while (!go[MASTER]) + no_cpu_relax(); + go[MASTER] = 0; + rdtscll(go[SLAVE]); + } + } + local_irq_restore(flags); } /* - * Synchronize TSC on AP with BP. + * Return the number of cycles by which our tsc differs from the tsc + * on the master (time-keeper) CPU. A positive number indicates our + * tsc is ahead of the master, negative that it is behind. */ -static void __cpuinit __sync_tsc_ap(void) +static inline long +get_delta(long *rt, long *master) { - if (!cpu_has_tsc) - return; - Dprintk("AP %d syncing TSC\n", smp_processor_id()); + unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0; + unsigned long tcenter, t0, t1, tm; + int i; - while (atomic_read(&tsc_flag) != 0) - cpu_relax(); - atomic_inc(&tsc_flag); - mb(); - _raw_spin_lock(&tsc_sync_lock); - wrmsrl(MSR_IA32_TSC, bp_tsc); - _raw_spin_unlock(&tsc_sync_lock); - rdtscll(ap_tsc); - mb(); - atomic_inc(&tsc_flag); - mb(); + for (i = 0; i < NUM_ITERS; ++i) { + rdtscll(t0); + go[MASTER] = 1; + while (!(tm = go[SLAVE])) + no_cpu_relax(); + go[SLAVE] = 0; + rdtscll(t1); + + if (t1 - t0 < best_t1 - best_t0) + best_t0 = t0, best_t1 = t1, best_tm = tm; + } + + *rt = best_t1 - best_t0; + *master = best_tm - best_t0; + + /* average best_t0 and best_t1 without overflow: */ + tcenter = (best_t0/2 + best_t1/2); + if (best_t0 % 2 + best_t1 % 2 == 2) + ++tcenter; + return tcenter - best_tm; } -static void __cpuinit sync_tsc_ap(void) +static __cpuinit void sync_tsc(void) { - int i; - for (i = 0; i < NR_LOOPS; i++) - __sync_tsc_ap(); + int i, done = 0; + long delta, adj, adjust_latency = 0; + unsigned long flags, rt, master_time_stamp, bound; +#if DEBUG_TSC_SYNC + static struct syncdebug { + long rt; /* roundtrip time */ + long master; /* master's timestamp */ + long diff; /* difference between midpoint and master's timestamp */ + long lat; /* estimate of tsc adjustment latency */ + } t[NUM_ROUNDS] __cpuinitdata; +#endif + + go[MASTER] = 1; + + smp_call_function(sync_master, NULL, 1, 0); + + while (go[MASTER]) /* wait for master to be ready */ + no_cpu_relax(); + + spin_lock_irqsave(&tsc_sync_lock, flags); + { + for (i = 0; i < NUM_ROUNDS; ++i) { + delta = get_delta(&rt, &master_time_stamp); + if (delta == 0) { + done = 1; /* let's lock on to this... */ + bound = rt; + } + + if (!done) { + unsigned long t; + if (i > 0) { + adjust_latency += -delta; + adj = -delta + adjust_latency/4; + } else + adj = -delta; + + rdtscll(t); + wrmsrl(MSR_IA32_TSC, t + adj); + } +#if DEBUG_TSC_SYNC + t[i].rt = rt; + t[i].master = master_time_stamp; + t[i].diff = delta; + t[i].lat = adjust_latency/4; +#endif + } + } + spin_unlock_irqrestore(&tsc_sync_lock, flags); + +#if DEBUG_TSC_SYNC + for (i = 0; i < NUM_ROUNDS; ++i) + printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n", + t[i].rt, t[i].master, t[i].diff, t[i].lat); +#endif + + printk(KERN_INFO + "CPU %d: synchronized TSC with CPU %u (last diff %ld cycles, " + "maxerr %lu cycles)\n", + smp_processor_id(), boot_cpu_id, delta, rt); } -/* - * Synchronize TSC from BP to AP. - */ -static void __cpuinit __sync_tsc_bp(int cpu) +static void __cpuinit tsc_sync_wait(void) { - if (!cpu_has_tsc) + if (notscsync || !cpu_has_tsc) return; - - /* Wait for AP */ - while (atomic_read(&tsc_flag) == 0) - cpu_relax(); - /* Save BPs TSC */ - sync_core(); - rdtscll(bp_tsc); - /* Don't do the sync core here to avoid too much latency. */ - mb(); - /* Start the AP */ - _raw_spin_unlock(&tsc_sync_lock); - /* Wait for AP again */ - while (atomic_read(&tsc_flag) < 2) - cpu_relax(); - rdtscl(bp_tsc); - barrier(); + printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n", smp_processor_id(), + boot_cpu_id); + sync_tsc(); } -static void __cpuinit sync_tsc_bp(int cpu) +static __init int notscsync_setup(char *s) { - int i; - for (i = 0; i < NR_LOOPS - 1; i++) { - __sync_tsc_bp(cpu); - sync_tsc_bp_init(1); - } - __sync_tsc_bp(cpu); - printk(KERN_INFO "Synced TSC of CPU %d difference %Ld\n", - cpu, ap_tsc - bp_tsc); + notscsync = 1; + return 0; } +__setup("notscsync", notscsync_setup); static atomic_t init_deasserted __cpuinitdata; @@ -315,11 +431,6 @@ void __cpuinit start_secondary(void) cpu_init(); smp_callin(); - /* - * Synchronize the TSC with the BP - */ - sync_tsc_ap(); - /* otherwise gcc will move up the smp_processor_id before the cpu_init */ barrier(); @@ -334,7 +445,6 @@ void __cpuinit start_secondary(void) enable_8259A_irq(0); } - enable_APIC_timer(); /* @@ -343,6 +453,11 @@ void __cpuinit start_secondary(void) cpu_set(smp_processor_id(), cpu_online_map); mb(); + /* Wait for TSC sync to not schedule things before. + We still process interrupts, which could see an inconsistent + time in that window unfortunately. */ + tsc_sync_wait(); + cpu_idle(); } @@ -531,7 +646,6 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid) printk("failed fork for CPU %d\n", cpu); return PTR_ERR(idle); } - x86_cpu_to_apicid[cpu] = apicid; cpu_pda[cpu].pcurrent = idle; @@ -600,8 +714,6 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid) if (cpu_isset(cpu, cpu_callin_map)) { /* number CPUs logically, starting from 1 (BSP is 0) */ - Dprintk("OK.\n"); - print_cpu_info(&cpu_data[cpu]); Dprintk("CPU has booted.\n"); } else { boot_error = 1; @@ -842,7 +954,6 @@ void __cpuinit smp_prepare_cpus(unsigned int max_cpus) GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id); /* Or can we switch back to PIC here? */ } - x86_cpu_to_apicid[0] = boot_cpu_id; /* * Now start the IO-APICs @@ -889,18 +1000,14 @@ int __cpuinit __cpu_up(unsigned int cpu) printk("__cpu_up: bad cpu %d\n", cpu); return -EINVAL; } - sync_tsc_bp_init(1); /* Boot it! */ err = do_boot_cpu(cpu, apicid); if (err < 0) { - sync_tsc_bp_init(0); Dprintk("do_boot_cpu failed %d\n", err); return err; } - sync_tsc_bp(cpu); - /* Unleash the CPU! */ Dprintk("waiting for cpu %d\n", cpu); @@ -923,4 +1030,6 @@ void __cpuinit smp_cpus_done(unsigned int max_cpus) detect_siblings(); time_init_gtod(); + + check_nmi_watchdog(); } diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 0e3b60e..fb8c809 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -26,6 +26,10 @@ #include <linux/sysdev.h> #include <linux/bcd.h> #include <linux/kallsyms.h> +#include <linux/acpi.h> +#ifdef CONFIG_ACPI +#include <acpi/achware.h> /* for PM timer frequency */ +#endif #include <asm/8253pit.h> #include <asm/pgtable.h> #include <asm/vsyscall.h> @@ -396,6 +400,10 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) (offset - vxtime.last)*(NSEC_PER_SEC/HZ) / hpet_tick; vxtime.last = offset; +#ifdef CONFIG_X86_PM_TIMER + } else if (vxtime.mode == VXTIME_PMTMR) { + lost = pmtimer_mark_offset(); +#endif } else { offset = (((tsc - vxtime.last_tsc) * vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ); @@ -898,6 +906,13 @@ void __init time_init(void) hpet_period; cpu_khz = hpet_calibrate_tsc(); timename = "HPET"; +#ifdef CONFIG_X86_PM_TIMER + } else if (pmtmr_ioport) { + vxtime_hz = PM_TIMER_FREQUENCY; + timename = "PM"; + pit_init(); + cpu_khz = pit_calibrate_tsc(); +#endif } else { pit_init(); cpu_khz = pit_calibrate_tsc(); @@ -923,35 +938,50 @@ void __init time_init(void) } /* + * Make an educated guess if the TSC is trustworthy and synchronized + * over all CPUs. + */ +static __init int unsynchronized_tsc(void) +{ +#ifdef CONFIG_SMP + if (oem_force_hpet_timer()) + return 1; + /* Intel systems are normally all synchronized. Exceptions + are handled in the OEM check above. */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + return 0; + /* All in a single socket - should be synchronized */ + if (cpus_weight(cpu_core_map[0]) == num_online_cpus()) + return 0; +#endif + /* Assume multi socket systems are not synchronized */ + return num_online_cpus() > 1; +} + +/* * Decide after all CPUs are booted what mode gettimeofday should use. */ void __init time_init_gtod(void) { char *timetype; - /* - * AMD systems with more than one CPU don't have fully synchronized - * TSCs. Always use HPET gettimeofday for these, although it is slower. - * Intel SMP systems usually have synchronized TSCs, so use always - * the TSC. - * - * Exceptions: - * IBM Summit2 checked by oem_force_hpet_timer(). - * AMD dual core may also not need HPET. Check me. - * - * Can be turned off with "notsc". - */ - if (num_online_cpus() > 1 && - boot_cpu_data.x86_vendor == X86_VENDOR_AMD) - notsc = 1; - /* Some systems will want to disable TSC and use HPET. */ - if (oem_force_hpet_timer()) + if (unsynchronized_tsc()) notsc = 1; if (vxtime.hpet_address && notsc) { timetype = "HPET"; vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; vxtime.mode = VXTIME_HPET; do_gettimeoffset = do_gettimeoffset_hpet; +#ifdef CONFIG_X86_PM_TIMER + /* Using PM for gettimeofday is quite slow, but we have no other + choice because the TSC is too unreliable on some systems. */ + } else if (pmtmr_ioport && !vxtime.hpet_address && notsc) { + timetype = "PM"; + do_gettimeoffset = do_gettimeoffset_pm; + vxtime.mode = VXTIME_PMTMR; + sysctl_vsyscall = 0; + printk(KERN_INFO "Disabling vsyscall due to use of PM timer\n"); +#endif } else { timetype = vxtime.hpet_address ? "HPET/TSC" : "PIT/TSC"; vxtime.mode = VXTIME_TSC; diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 65a37f5..3dfec8f 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -332,10 +332,12 @@ void handle_BUG(struct pt_regs *regs) printk(KERN_ALERT "Kernel BUG at %.50s:%d\n", f.filename, f.line); } +#ifdef CONFIG_BUG void out_of_line_bug(void) { BUG(); } +#endif static DEFINE_SPINLOCK(die_lock); static int die_owner = -1; diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index b4b8dc5..2e57344 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c @@ -65,7 +65,7 @@ static force_inline void do_vgettimeofday(struct timeval * tv) usec = (__xtime.tv_nsec / 1000) + (__jiffies - __wall_jiffies) * (1000000 / HZ); - if (__vxtime.mode == VXTIME_TSC) { + if (__vxtime.mode != VXTIME_HPET) { sync_core(); rdtscll(t); if (t < __vxtime.last_tsc) @@ -217,8 +217,9 @@ static int __init vsyscall_init(void) BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); map_vsyscall(); - sysctl_vsyscall = 1; +#ifdef CONFIG_SYSCTL register_sysctl_table(kernel_root_table2, 0); +#endif return 0; } diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c index 88626e6..68ec030 100644 --- a/arch/x86_64/kernel/x8664_ksyms.c +++ b/arch/x86_64/kernel/x8664_ksyms.c @@ -139,35 +139,23 @@ EXPORT_SYMBOL_GPL(unset_nmi_callback); #undef memmove #undef memchr #undef strlen -#undef strcpy #undef strncmp #undef strncpy #undef strchr -#undef strcmp -#undef strcpy -#undef strcat -#undef memcmp extern void * memset(void *,int,__kernel_size_t); extern size_t strlen(const char *); extern void * memmove(void * dest,const void *src,size_t count); -extern char * strcpy(char * dest,const char *src); -extern int strcmp(const char * cs,const char * ct); extern void *memchr(const void *s, int c, size_t n); extern void * memcpy(void *,const void *,__kernel_size_t); extern void * __memcpy(void *,const void *,__kernel_size_t); -extern char * strcat(char *, const char *); -extern int memcmp(const void * cs,const void * ct,size_t count); EXPORT_SYMBOL(memset); EXPORT_SYMBOL(strlen); EXPORT_SYMBOL(memmove); -EXPORT_SYMBOL(strcpy); EXPORT_SYMBOL(strncmp); EXPORT_SYMBOL(strncpy); EXPORT_SYMBOL(strchr); -EXPORT_SYMBOL(strcmp); -EXPORT_SYMBOL(strcat); EXPORT_SYMBOL(strncat); EXPORT_SYMBOL(memchr); EXPORT_SYMBOL(strrchr); @@ -175,7 +163,6 @@ EXPORT_SYMBOL(strnlen); EXPORT_SYMBOL(memscan); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(__memcpy); -EXPORT_SYMBOL(memcmp); #ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM /* prototypes are wrong, these are assembly with custom calling functions */ @@ -206,8 +193,9 @@ EXPORT_SYMBOL(smp_num_siblings); extern void do_softirq_thunk(void); EXPORT_SYMBOL(do_softirq_thunk); -void out_of_line_bug(void); +#ifdef CONFIG_BUG EXPORT_SYMBOL(out_of_line_bug); +#endif EXPORT_SYMBOL(init_level4_pgt); diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index e033092..5d6b211 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -234,6 +234,8 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, /* * Handle a fault on the vmalloc or module mapping area + * + * This assumes no large pages in there. */ static int vmalloc_fault(unsigned long address) { @@ -272,7 +274,10 @@ static int vmalloc_fault(unsigned long address) if (!pte_present(*pte_ref)) return -1; pte = pte_offset_kernel(pmd, address); - if (!pte_present(*pte) || pte_page(*pte) != pte_page(*pte_ref)) + /* Don't use pte_page here, because the mappings can point + outside mem_map, and the NUMA hash lookup cannot handle + that. */ + if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref)) BUG(); __flush_tlb_all(); return 0; @@ -346,7 +351,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code) * protection error (error_code & 1) == 0. */ if (unlikely(address >= TASK_SIZE)) { - if (!(error_code & 5)) { + if (!(error_code & 5) && + ((address >= VMALLOC_START && address < VMALLOC_END) || + (address >= MODULES_VADDR && address < MODULES_END))) { if (vmalloc_fault(address) < 0) goto bad_area_nosemaphore; return; diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c index 74ec855..58aac23 100644 --- a/arch/x86_64/mm/ioremap.c +++ b/arch/x86_64/mm/ioremap.c @@ -133,7 +133,7 @@ ioremap_change_attr(unsigned long phys_addr, unsigned long size, unsigned long flags) { int err = 0; - if (flags && phys_addr + size - 1 < (end_pfn_map << PAGE_SHIFT)) { + if (phys_addr + size - 1 < (end_pfn_map << PAGE_SHIFT)) { unsigned long npages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; unsigned long vaddr = (unsigned long) __va(phys_addr); @@ -214,7 +214,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr)); return NULL; } - if (ioremap_change_attr(phys_addr, size, flags) < 0) { + if (flags && ioremap_change_attr(phys_addr, size, flags) < 0) { area->flags &= 0xffffff; vunmap(addr); return NULL; @@ -251,7 +251,7 @@ void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size) void iounmap(volatile void __iomem *addr) { - struct vm_struct *p, **pprev; + struct vm_struct *p; if (addr <= high_memory) return; @@ -260,24 +260,11 @@ void iounmap(volatile void __iomem *addr) return; write_lock(&vmlist_lock); - for (p = vmlist, pprev = &vmlist; p != NULL; pprev = &p->next, p = *pprev) - if (p->addr == (void *)(PAGE_MASK & (unsigned long)addr)) - break; - if (!p) { - printk("__iounmap: bad address %p\n", addr); - goto out_unlock; - } - *pprev = p->next; - unmap_vm_area(p); - if ((p->flags >> 20) && - p->phys_addr + p->size - 1 < virt_to_phys(high_memory)) { - /* p->size includes the guard page, but cpa doesn't like that */ - change_page_attr(virt_to_page(__va(p->phys_addr)), - p->size >> PAGE_SHIFT, - PAGE_KERNEL); - global_flush_tlb(); - } -out_unlock: + p = __remove_vm_area((void *)((unsigned long)addr & PAGE_MASK)); + if (!p) + printk("iounmap: bad address %p\n", addr); + else if (p->flags >> 20) + ioremap_change_attr(p->phys_addr, p->size, 0); write_unlock(&vmlist_lock); kfree(p); } |