diff options
Diffstat (limited to 'arch/i386')
88 files changed, 1292 insertions, 1087 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 6004bb0..5b1a7d4 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -29,10 +29,6 @@ config MMU config SBUS bool -config UID16 - bool - default y - config GENERIC_ISA_DMA bool default y @@ -45,6 +41,10 @@ config ARCH_MAY_HAVE_PC_FDC bool default y +config DMI + bool + default y + source "init/Kconfig" menu "Processor type and features" @@ -442,12 +442,50 @@ config HIGHMEM4G config HIGHMEM64G bool "64GB" + depends on X86_CMPXCHG64 help Select this if you have a 32-bit processor and more than 4 gigabytes of physical RAM. endchoice +choice + depends on EXPERIMENTAL && !X86_PAE + prompt "Memory split" + default VMSPLIT_3G + help + Select the desired split between kernel and user memory. + + If the address range available to the kernel is less than the + physical memory installed, the remaining memory will be available + as "high memory". Accessing high memory is a little more costly + than low memory, as it needs to be mapped into the kernel first. + Note that increasing the kernel address space limits the range + available to user programs, making the address space there + tighter. Selecting anything other than the default 3G/1G split + will also likely make your kernel incompatible with binary-only + kernel modules. + + If you are not absolutely sure what you are doing, leave this + option alone! + + config VMSPLIT_3G + bool "3G/1G user/kernel split" + config VMSPLIT_3G_OPT + bool "3G/1G user/kernel split (for full 1G low memory)" + config VMSPLIT_2G + bool "2G/2G user/kernel split" + config VMSPLIT_1G + bool "1G/3G user/kernel split" +endchoice + +config PAGE_OFFSET + hex + default 0xB0000000 if VMSPLIT_3G_OPT + default 0x78000000 if VMSPLIT_2G + default 0x40000000 if VMSPLIT_1G + default 0xC0000000 + config HIGHMEM bool depends on HIGHMEM64G || HIGHMEM4G @@ -464,7 +502,6 @@ config NUMA depends on SMP && HIGHMEM64G && (X86_NUMAQ || X86_GENERICARCH || (X86_SUMMIT && ACPI)) default n if X86_PC default y if (X86_NUMAQ || X86_SUMMIT) - select SPARSEMEM_STATIC # Need comments to help the hapless user trying to turn on NUMA support comment "NUMA (NUMA-Q) requires SMP, 64GB highmem support" @@ -493,6 +530,10 @@ config HAVE_ARCH_ALLOC_REMAP depends on NUMA default y +config ARCH_FLATMEM_ENABLE + def_bool y + depends on (ARCH_SELECT_MEMORY_MODEL && X86_PC) + config ARCH_DISCONTIGMEM_ENABLE def_bool y depends on NUMA @@ -503,7 +544,8 @@ config ARCH_DISCONTIGMEM_DEFAULT config ARCH_SPARSEMEM_ENABLE def_bool y - depends on NUMA + depends on (NUMA || (X86_PC && EXPERIMENTAL)) + select SPARSEMEM_STATIC config ARCH_SELECT_MEMORY_MODEL def_bool y @@ -626,10 +668,6 @@ config REGPARM and passes the first three arguments of a function call in registers. This will probably break binary only modules. - This feature is only enabled for gcc-3.0 and later - earlier compilers - generate incorrect output with certain kernel constructs when - -mregparm=3 is used. - config SECCOMP bool "Enable seccomp to safely compute untrusted bytecode" depends on PROC_FS @@ -649,17 +687,6 @@ config SECCOMP source kernel/Kconfig.hz -config PHYSICAL_START - hex "Physical address where the kernel is loaded" if EMBEDDED - default "0x100000" - help - This gives the physical address where the kernel is loaded. - Primarily used in the case of kexec on panic where the - fail safe kernel needs to run at a different address than - the panic-ed kernel. - - Don't change this unless you know what you are doing. - config KEXEC bool "kexec system call (EXPERIMENTAL)" depends on EXPERIMENTAL @@ -679,11 +706,49 @@ config KEXEC config CRASH_DUMP bool "kernel crash dumps (EXPERIMENTAL)" - depends on EMBEDDED depends on EXPERIMENTAL depends on HIGHMEM help Generate crash dump after being started by kexec. + +config PHYSICAL_START + hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) + + default "0x1000000" if CRASH_DUMP + default "0x100000" + help + This gives the physical address where the kernel is loaded. Normally + for regular kernels this value is 0x100000 (1MB). But in the case + of kexec on panic the fail safe kernel needs to run at a different + address than the panic-ed kernel. This option is used to set the load + address for kernels used to capture crash dump on being kexec'ed + after panic. The default value for crash dump kernels is + 0x1000000 (16MB). This can also be set based on the "X" value as + specified in the "crashkernel=YM@XM" command line boot parameter + passed to the panic-ed kernel. Typically this parameter is set as + crashkernel=64M@16M. Please take a look at + Documentation/kdump/kdump.txt for more details about crash dumps. + + Don't change this unless you know what you are doing. + +config HOTPLUG_CPU + bool "Support for hot-pluggable CPUs (EXPERIMENTAL)" + depends on SMP && HOTPLUG && EXPERIMENTAL && !X86_VOYAGER + ---help--- + Say Y here to experiment with turning CPUs off and on. CPUs + can be controlled through /sys/devices/system/cpu. + + Say N. + +config DOUBLEFAULT + default y + bool "Enable doublefault exception handler" if EMBEDDED + help + This option allows trapping of rare doublefault exceptions that + would otherwise cause a system to silently reboot. Disabling this + option saves about 4k and might cause you much additional grey + hair. + endmenu @@ -699,7 +764,7 @@ depends on PM && !X86_VISWS config APM tristate "APM (Advanced Power Management) BIOS support" - depends on PM && PM_LEGACY + depends on PM ---help--- APM is a BIOS specification for saving power using several different techniques. This is mostly useful for battery powered laptops with @@ -970,15 +1035,6 @@ config SCx200 This support is also available as a module. If compiled as a module, it will be called scx200. -config HOTPLUG_CPU - bool "Support for hot-pluggable CPUs (EXPERIMENTAL)" - depends on SMP && HOTPLUG && EXPERIMENTAL - ---help--- - Say Y here to experiment with turning CPUs off and on. CPUs - can be controlled through /sys/devices/system/cpu. - - Say N. - source "drivers/pcmcia/Kconfig" source "drivers/pci/hotplug/Kconfig" @@ -1004,6 +1060,7 @@ source "arch/i386/oprofile/Kconfig" config KPROBES bool "Kprobes (EXPERIMENTAL)" + depends on EXPERIMENTAL && MODULES help Kprobes allows you to trap at almost any kernel address and execute a callback function. register_kprobe() establishes @@ -1055,3 +1112,7 @@ config X86_TRAMPOLINE bool depends on X86_SMP || (X86_VOYAGER && SMP) default y + +config KTIME_SCALAR + bool + default y diff --git a/arch/i386/Kconfig.cpu b/arch/i386/Kconfig.cpu index 53bbb3c..79603b3 100644 --- a/arch/i386/Kconfig.cpu +++ b/arch/i386/Kconfig.cpu @@ -39,6 +39,7 @@ config M386 - "Winchip-2" for IDT Winchip 2. - "Winchip-2A" for IDT Winchips with 3dNow! capabilities. - "GeodeGX1" for Geode GX1 (Cyrix MediaGX). + - "Geode GX/LX" For AMD Geode GX and LX processors. - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3. - "VIA C3-2 for VIA C3-2 "Nehemiah" (model 9 and above). @@ -171,6 +172,11 @@ config MGEODEGX1 help Select this for a Geode GX1 (Cyrix MediaGX) chip. +config MGEODE_LX + bool "Geode GX/LX" + help + Select this for AMD Geode GX and LX processors. + config MCYRIXIII bool "CyrixIII/VIA-C3" help @@ -220,8 +226,8 @@ config X86_XADD config X86_L1_CACHE_SHIFT int default "7" if MPENTIUM4 || X86_GENERIC - default "4" if X86_ELAN || M486 || M386 - default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODEGX1 + default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 + default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX default "6" if MK7 || MK8 || MPENTIUMM config RWSEM_GENERIC_SPINLOCK @@ -290,12 +296,12 @@ config X86_INTEL_USERCOPY config X86_USE_PPRO_CHECKSUM bool - depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON + depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX default y config X86_USE_3DNOW bool - depends on MCYRIXIII || MK7 + depends on MCYRIXIII || MK7 || MGEODE_LX default y config X86_OOSTORE diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug index c48b424..bf32ecc 100644 --- a/arch/i386/Kconfig.debug +++ b/arch/i386/Kconfig.debug @@ -42,6 +42,16 @@ config DEBUG_PAGEALLOC This results in a large slowdown, but helps to find certain types of memory corruptions. +config DEBUG_RODATA + bool "Write protect kernel read-only data structures" + depends on DEBUG_KERNEL + help + Mark the kernel read-only data as write-protected in the pagetables, + in order to catch accidental (and incorrect) writes to such const + data. This option may have a slight performance impact because a + portion of the kernel code won't be covered by a 2MB TLB anymore. + If in doubt, say "N". + config 4KSTACKS bool "Use 4Kb for kernel stacks instead of 8Kb" depends on DEBUG_KERNEL diff --git a/arch/i386/Makefile b/arch/i386/Makefile index d121ea1..36bef65 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -37,14 +37,11 @@ CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2) # CPU-specific tuning. Anything which can be shared with UML should go here. include $(srctree)/arch/i386/Makefile.cpu -# -mregparm=3 works ok on gcc-3.0 and later -# -GCC_VERSION := $(call cc-version) -cflags-$(CONFIG_REGPARM) += $(shell if [ $(GCC_VERSION) -ge 0300 ] ; then echo "-mregparm=3"; fi ;) +cflags-$(CONFIG_REGPARM) += -mregparm=3 -# Disable unit-at-a-time mode, it makes gcc use a lot more stack -# due to the lack of sharing of stacklots. -CFLAGS += $(call cc-option,-fno-unit-at-a-time) +# Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use +# a lot more stack due to the lack of sharing of stacklots: +CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then echo $(call cc-option,-fno-unit-at-a-time); fi ;) CFLAGS += $(cflags-y) @@ -103,7 +100,7 @@ AFLAGS += $(mflags-y) boot := arch/i386/boot .PHONY: zImage bzImage compressed zlilo bzlilo \ - zdisk bzdisk fdimage fdimage144 fdimage288 install kernel_install + zdisk bzdisk fdimage fdimage144 fdimage288 install all: bzImage @@ -125,8 +122,7 @@ zdisk bzdisk: vmlinux fdimage fdimage144 fdimage288: vmlinux $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@ -install: vmlinux -install kernel_install: +install: $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) install archclean: diff --git a/arch/i386/Makefile.cpu b/arch/i386/Makefile.cpu index 8e51456..dcd936e 100644 --- a/arch/i386/Makefile.cpu +++ b/arch/i386/Makefile.cpu @@ -1,7 +1,7 @@ # CPU tuning section - shared with UML. # Must change only cflags-y (or [yn]), not CFLAGS! That makes a difference for UML. -#-mtune exists since gcc 3.4, and some -mcpu flavors didn't exist in gcc 2.95. +#-mtune exists since gcc 3.4 HAS_MTUNE := $(call cc-option-yn, -mtune=i386) ifeq ($(HAS_MTUNE),y) tune = $(call cc-option,-mtune=$(1),) @@ -14,7 +14,7 @@ cflags-$(CONFIG_M386) += -march=i386 cflags-$(CONFIG_M486) += -march=i486 cflags-$(CONFIG_M586) += -march=i586 cflags-$(CONFIG_M586TSC) += -march=i586 -cflags-$(CONFIG_M586MMX) += $(call cc-option,-march=pentium-mmx,-march=i586) +cflags-$(CONFIG_M586MMX) += -march=pentium-mmx cflags-$(CONFIG_M686) += -march=i686 cflags-$(CONFIG_MPENTIUMII) += -march=i686 $(call tune,pentium2) cflags-$(CONFIG_MPENTIUMIII) += -march=i686 $(call tune,pentium3) @@ -23,8 +23,8 @@ cflags-$(CONFIG_MPENTIUM4) += -march=i686 $(call tune,pentium4) cflags-$(CONFIG_MK6) += -march=k6 # Please note, that patches that add -march=athlon-xp and friends are pointless. # They make zero difference whatsosever to performance at this time. -cflags-$(CONFIG_MK7) += $(call cc-option,-march=athlon,-march=i686 $(align)-functions=4) -cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,$(call cc-option,-march=athlon,-march=i686 $(align)-functions=4)) +cflags-$(CONFIG_MK7) += -march=athlon +cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,-march=athlon) cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586) @@ -37,5 +37,5 @@ cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) cflags-$(CONFIG_X86_ELAN) += -march=i486 # Geode GX1 support -cflags-$(CONFIG_MGEODEGX1) += $(call cc-option,-march=pentium-mmx,-march=i486) +cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx diff --git a/arch/i386/boot/.gitignore b/arch/i386/boot/.gitignore new file mode 100644 index 0000000..495f20c --- /dev/null +++ b/arch/i386/boot/.gitignore @@ -0,0 +1,3 @@ +bootsect +bzImage +setup diff --git a/arch/i386/boot/Makefile b/arch/i386/boot/Makefile index 1e71382..f136752 100644 --- a/arch/i386/boot/Makefile +++ b/arch/i386/boot/Makefile @@ -100,5 +100,5 @@ zlilo: $(BOOTIMAGE) cp System.map $(INSTALL_PATH)/ if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi -install: $(BOOTIMAGE) - sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $< System.map "$(INSTALL_PATH)" +install: + sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(BOOTIMAGE) System.map "$(INSTALL_PATH)" diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c index 82a807f..f19f3a7 100644 --- a/arch/i386/boot/compressed/misc.c +++ b/arch/i386/boot/compressed/misc.c @@ -11,7 +11,7 @@ #include <linux/linkage.h> #include <linux/vmalloc.h> -#include <linux/tty.h> +#include <linux/screen_info.h> #include <asm/io.h> #include <asm/page.h> diff --git a/arch/i386/boot/install.sh b/arch/i386/boot/install.sh index f17b40d..5e44c73 100644 --- a/arch/i386/boot/install.sh +++ b/arch/i386/boot/install.sh @@ -19,6 +19,20 @@ # $4 - default install path (blank if root directory) # +verify () { + if [ ! -f "$1" ]; then + echo "" 1>&2 + echo " *** Missing file: $1" 1>&2 + echo ' *** You need to run "make" before "make install".' 1>&2 + echo "" 1>&2 + exit 1 + fi +} + +# Make sure the files actually exist +verify "$2" +verify "$3" + # User may have a custom install script if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi diff --git a/arch/i386/boot/tools/.gitignore b/arch/i386/boot/tools/.gitignore new file mode 100644 index 0000000..378eac2 --- /dev/null +++ b/arch/i386/boot/tools/.gitignore @@ -0,0 +1 @@ +build diff --git a/arch/i386/boot/video.S b/arch/i386/boot/video.S index 92f6694..2ac40c8 100644 --- a/arch/i386/boot/video.S +++ b/arch/i386/boot/video.S @@ -97,7 +97,6 @@ #define PARAM_VESAPM_OFF 0x30 #define PARAM_LFB_PAGES 0x32 #define PARAM_VESA_ATTRIB 0x34 -#define PARAM_CAPABILITIES 0x36 /* Define DO_STORE according to CONFIG_VIDEO_RETAIN */ #ifdef CONFIG_VIDEO_RETAIN @@ -234,10 +233,6 @@ mopar_gr: movw 18(%di), %ax movl %eax, %fs:(PARAM_LFB_SIZE) -# store mode capabilities - movl 10(%di), %eax - movl %eax, %fs:(PARAM_CAPABILITIES) - # switching the DAC to 8-bit is for <= 8 bpp only movw %fs:(PARAM_LFB_DEPTH), %ax cmpw $8, %ax diff --git a/arch/i386/crypto/aes-i586-asm.S b/arch/i386/crypto/aes-i586-asm.S index 7b73c67..911b153 100644 --- a/arch/i386/crypto/aes-i586-asm.S +++ b/arch/i386/crypto/aes-i586-asm.S @@ -255,18 +255,17 @@ aes_enc_blk: xor 8(%ebp),%r4 xor 12(%ebp),%r5 - sub $8,%esp // space for register saves on stack - add $16,%ebp // increment to next round key - sub $10,%r3 - je 4f // 10 rounds for 128-bit key - add $32,%ebp - sub $2,%r3 - je 3f // 12 rounds for 128-bit key - add $32,%ebp - -2: fwd_rnd1( -64(%ebp) ,ft_tab) // 14 rounds for 128-bit key + sub $8,%esp // space for register saves on stack + add $16,%ebp // increment to next round key + cmp $12,%r3 + jb 4f // 10 rounds for 128-bit key + lea 32(%ebp),%ebp + je 3f // 12 rounds for 192-bit key + lea 32(%ebp),%ebp + +2: fwd_rnd1( -64(%ebp) ,ft_tab) // 14 rounds for 256-bit key fwd_rnd2( -48(%ebp) ,ft_tab) -3: fwd_rnd1( -32(%ebp) ,ft_tab) // 12 rounds for 128-bit key +3: fwd_rnd1( -32(%ebp) ,ft_tab) // 12 rounds for 192-bit key fwd_rnd2( -16(%ebp) ,ft_tab) 4: fwd_rnd1( (%ebp) ,ft_tab) // 10 rounds for 128-bit key fwd_rnd2( +16(%ebp) ,ft_tab) @@ -334,18 +333,17 @@ aes_dec_blk: xor 8(%ebp),%r4 xor 12(%ebp),%r5 - sub $8,%esp // space for register saves on stack - sub $16,%ebp // increment to next round key - sub $10,%r3 - je 4f // 10 rounds for 128-bit key - sub $32,%ebp - sub $2,%r3 - je 3f // 12 rounds for 128-bit key - sub $32,%ebp + sub $8,%esp // space for register saves on stack + sub $16,%ebp // increment to next round key + cmp $12,%r3 + jb 4f // 10 rounds for 128-bit key + lea -32(%ebp),%ebp + je 3f // 12 rounds for 192-bit key + lea -32(%ebp),%ebp -2: inv_rnd1( +64(%ebp), it_tab) // 14 rounds for 128-bit key +2: inv_rnd1( +64(%ebp), it_tab) // 14 rounds for 256-bit key inv_rnd2( +48(%ebp), it_tab) -3: inv_rnd1( +32(%ebp), it_tab) // 12 rounds for 128-bit key +3: inv_rnd1( +32(%ebp), it_tab) // 12 rounds for 192-bit key inv_rnd2( +16(%ebp), it_tab) 4: inv_rnd1( (%ebp), it_tab) // 10 rounds for 128-bit key inv_rnd2( -16(%ebp), it_tab) diff --git a/arch/i386/crypto/aes.c b/arch/i386/crypto/aes.c index 88ee85c..a50397b 100644 --- a/arch/i386/crypto/aes.c +++ b/arch/i386/crypto/aes.c @@ -36,6 +36,8 @@ * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> * */ + +#include <asm/byteorder.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> @@ -59,7 +61,6 @@ struct aes_ctx { }; #define WPOLY 0x011b -#define u32_in(x) le32_to_cpup((const __le32 *)(x)) #define bytes2word(b0, b1, b2, b3) \ (((u32)(b3) << 24) | ((u32)(b2) << 16) | ((u32)(b1) << 8) | (b0)) @@ -93,7 +94,6 @@ static u32 rcon_tab[RC_LENGTH]; u32 ft_tab[4][256]; u32 fl_tab[4][256]; -static u32 ls_tab[4][256]; static u32 im_tab[4][256]; u32 il_tab[4][256]; u32 it_tab[4][256]; @@ -144,15 +144,6 @@ static void gen_tabs(void) fl_tab[2][i] = upr(w, 2); fl_tab[3][i] = upr(w, 3); - /* - * table for key schedule if fl_tab above is - * not of the required form - */ - ls_tab[0][i] = w; - ls_tab[1][i] = upr(w, 1); - ls_tab[2][i] = upr(w, 2); - ls_tab[3][i] = upr(w, 3); - b = fi(inv_affine((u8)i)); w = bytes2word(fe(b), f9(b), fd(b), fb(b)); @@ -393,13 +384,14 @@ aes_set_key(void *ctx_arg, const u8 *in_key, unsigned int key_len, u32 *flags) int i; u32 ss[8]; struct aes_ctx *ctx = ctx_arg; + const __le32 *key = (const __le32 *)in_key; /* encryption schedule */ - ctx->ekey[0] = ss[0] = u32_in(in_key); - ctx->ekey[1] = ss[1] = u32_in(in_key + 4); - ctx->ekey[2] = ss[2] = u32_in(in_key + 8); - ctx->ekey[3] = ss[3] = u32_in(in_key + 12); + ctx->ekey[0] = ss[0] = le32_to_cpu(key[0]); + ctx->ekey[1] = ss[1] = le32_to_cpu(key[1]); + ctx->ekey[2] = ss[2] = le32_to_cpu(key[2]); + ctx->ekey[3] = ss[3] = le32_to_cpu(key[3]); switch(key_len) { case 16: @@ -410,8 +402,8 @@ aes_set_key(void *ctx_arg, const u8 *in_key, unsigned int key_len, u32 *flags) break; case 24: - ctx->ekey[4] = ss[4] = u32_in(in_key + 16); - ctx->ekey[5] = ss[5] = u32_in(in_key + 20); + ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]); + ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]); for (i = 0; i < 7; i++) ke6(ctx->ekey, i); kel6(ctx->ekey, 7); @@ -419,10 +411,10 @@ aes_set_key(void *ctx_arg, const u8 *in_key, unsigned int key_len, u32 *flags) break; case 32: - ctx->ekey[4] = ss[4] = u32_in(in_key + 16); - ctx->ekey[5] = ss[5] = u32_in(in_key + 20); - ctx->ekey[6] = ss[6] = u32_in(in_key + 24); - ctx->ekey[7] = ss[7] = u32_in(in_key + 28); + ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]); + ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]); + ctx->ekey[6] = ss[6] = le32_to_cpu(key[6]); + ctx->ekey[7] = ss[7] = le32_to_cpu(key[7]); for (i = 0; i < 6; i++) ke8(ctx->ekey, i); kel8(ctx->ekey, 6); @@ -436,10 +428,10 @@ aes_set_key(void *ctx_arg, const u8 *in_key, unsigned int key_len, u32 *flags) /* decryption schedule */ - ctx->dkey[0] = ss[0] = u32_in(in_key); - ctx->dkey[1] = ss[1] = u32_in(in_key + 4); - ctx->dkey[2] = ss[2] = u32_in(in_key + 8); - ctx->dkey[3] = ss[3] = u32_in(in_key + 12); + ctx->dkey[0] = ss[0] = le32_to_cpu(key[0]); + ctx->dkey[1] = ss[1] = le32_to_cpu(key[1]); + ctx->dkey[2] = ss[2] = le32_to_cpu(key[2]); + ctx->dkey[3] = ss[3] = le32_to_cpu(key[3]); switch (key_len) { case 16: @@ -450,8 +442,8 @@ aes_set_key(void *ctx_arg, const u8 *in_key, unsigned int key_len, u32 *flags) break; case 24: - ctx->dkey[4] = ff(ss[4] = u32_in(in_key + 16)); - ctx->dkey[5] = ff(ss[5] = u32_in(in_key + 20)); + ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4])); + ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5])); kdf6(ctx->dkey, 0); for (i = 1; i < 7; i++) kd6(ctx->dkey, i); @@ -459,10 +451,10 @@ aes_set_key(void *ctx_arg, const u8 *in_key, unsigned int key_len, u32 *flags) break; case 32: - ctx->dkey[4] = ff(ss[4] = u32_in(in_key + 16)); - ctx->dkey[5] = ff(ss[5] = u32_in(in_key + 20)); - ctx->dkey[6] = ff(ss[6] = u32_in(in_key + 24)); - ctx->dkey[7] = ff(ss[7] = u32_in(in_key + 28)); + ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4])); + ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5])); + ctx->dkey[6] = ff(ss[6] = le32_to_cpu(key[6])); + ctx->dkey[7] = ff(ss[7] = le32_to_cpu(key[7])); kdf8(ctx->dkey, 0); for (i = 1; i < 6; i++) kd8(ctx->dkey, i); @@ -484,6 +476,8 @@ static inline void aes_decrypt(void *ctx, u8 *dst, const u8 *src) static struct crypto_alg aes_alg = { .cra_name = "aes", + .cra_driver_name = "aes-i586", + .cra_priority = 200, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct aes_ctx), diff --git a/arch/i386/defconfig b/arch/i386/defconfig index 6a431b9..3cbe6e9 100644 --- a/arch/i386/defconfig +++ b/arch/i386/defconfig @@ -644,6 +644,8 @@ CONFIG_8139TOO_PIO=y # CONFIG_ACENIC is not set # CONFIG_DL2K is not set # CONFIG_E1000 is not set +# CONFIG_E1000_NAPI is not set +# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set # CONFIG_NS83820 is not set # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set diff --git a/arch/i386/kernel/.gitignore b/arch/i386/kernel/.gitignore new file mode 100644 index 0000000..40836ad --- /dev/null +++ b/arch/i386/kernel/.gitignore @@ -0,0 +1 @@ +vsyscall.lds diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index f10de0f..65656c0 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -4,14 +4,14 @@ extra-y := head.o init_task.o vmlinux.lds -obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \ +obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \ - doublefault.o quirks.o i8237.o + quirks.o i8237.o topology.o obj-y += cpu/ obj-y += timers/ -obj-$(CONFIG_ACPI) += acpi/ +obj-y += acpi/ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o obj-$(CONFIG_MCA) += mca.o obj-$(CONFIG_X86_MSR) += msr.o @@ -25,6 +25,7 @@ obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_X86_NUMAQ) += numaq.o obj-$(CONFIG_X86_SUMMIT_NUMA) += summit.o obj-$(CONFIG_KPROBES) += kprobes.o @@ -33,6 +34,8 @@ obj-y += sysenter.o vsyscall.o obj-$(CONFIG_ACPI_SRAT) += srat.o obj-$(CONFIG_HPET_TIMER) += time_hpet.o obj-$(CONFIG_EFI) += efi.o efi_stub.o +obj-$(CONFIG_DOUBLEFAULT) += doublefault.o +obj-$(CONFIG_VM86) += vm86.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o EXTRA_AFLAGS := -traditional diff --git a/arch/i386/kernel/acpi/Makefile b/arch/i386/kernel/acpi/Makefile index 267ca48..7e9ac99 100644 --- a/arch/i386/kernel/acpi/Makefile +++ b/arch/i386/kernel/acpi/Makefile @@ -1,8 +1,8 @@ -obj-y := boot.o +obj-$(CONFIG_ACPI) += boot.o obj-$(CONFIG_X86_IO_APIC) += earlyquirk.o obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o ifneq ($(CONFIG_ACPI_PROCESSOR),) -obj-y += cstate.o +obj-y += cstate.o processor.o endif diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index 447fa9e..f1a2194 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -108,7 +108,7 @@ char *__acpi_map_table(unsigned long phys_addr, unsigned long size) if (!phys_addr || !size) return NULL; - if (phys_addr < (end_pfn_map << PAGE_SHIFT)) + if (phys_addr+size <= (end_pfn_map << PAGE_SHIFT) + PAGE_SIZE) return __va(phys_addr); return NULL; @@ -248,10 +248,17 @@ acpi_parse_lapic(acpi_table_entry_header * header, const unsigned long end) acpi_table_print_madt_entry(header); - /* Register even disabled CPUs for cpu hotplug */ - - x86_acpiid_to_apicid[processor->acpi_id] = processor->id; + /* Record local apic id only when enabled */ + if (processor->flags.enabled) + x86_acpiid_to_apicid[processor->acpi_id] = processor->id; + /* + * We need to register disabled CPU as well to permit + * counting disabled CPUs. This allows us to size + * cpus_possible_map more accurately, to permit + * to not preallocating memory for all NR_CPUS + * when we use CPU hotplug. + */ mp_register_lapic(processor->id, /* APIC ID */ processor->flags.enabled); /* Enabled? */ @@ -464,7 +471,7 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) * success: return IRQ number (>=0) * failure: return < 0 */ -int acpi_register_gsi(u32 gsi, int edge_level, int active_high_low) +int acpi_register_gsi(u32 gsi, int triggering, int polarity) { unsigned int irq; unsigned int plat_gsi = gsi; @@ -476,14 +483,14 @@ int acpi_register_gsi(u32 gsi, int edge_level, int active_high_low) if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { extern void eisa_set_level_irq(unsigned int irq); - if (edge_level == ACPI_LEVEL_SENSITIVE) + if (triggering == ACPI_LEVEL_SENSITIVE) eisa_set_level_irq(gsi); } #endif #ifdef CONFIG_X86_IO_APIC if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) { - plat_gsi = mp_register_gsi(gsi, edge_level, active_high_low); + plat_gsi = mp_register_gsi(gsi, triggering, polarity); } #endif acpi_gsi_to_irq(plat_gsi, &irq); @@ -1104,9 +1111,6 @@ int __init acpi_boot_table_init(void) disable_acpi(); return error; } -#ifdef __i386__ - check_acpi_pci(); -#endif acpi_table_parse(ACPI_BOOT, acpi_parse_sbf); diff --git a/arch/i386/kernel/acpi/cstate.c b/arch/i386/kernel/acpi/cstate.c index 4c3036b..25db49e 100644 --- a/arch/i386/kernel/acpi/cstate.c +++ b/arch/i386/kernel/acpi/cstate.c @@ -14,64 +14,6 @@ #include <acpi/processor.h> #include <asm/acpi.h> -static void acpi_processor_power_init_intel_pdc(struct acpi_processor_power - *pow) -{ - struct acpi_object_list *obj_list; - union acpi_object *obj; - u32 *buf; - - /* allocate and initialize pdc. It will be used later. */ - obj_list = kmalloc(sizeof(struct acpi_object_list), GFP_KERNEL); - if (!obj_list) { - printk(KERN_ERR "Memory allocation error\n"); - return; - } - - obj = kmalloc(sizeof(union acpi_object), GFP_KERNEL); - if (!obj) { - printk(KERN_ERR "Memory allocation error\n"); - kfree(obj_list); - return; - } - - buf = kmalloc(12, GFP_KERNEL); - if (!buf) { - printk(KERN_ERR "Memory allocation error\n"); - kfree(obj); - kfree(obj_list); - return; - } - - buf[0] = ACPI_PDC_REVISION_ID; - buf[1] = 1; - buf[2] = ACPI_PDC_C_CAPABILITY_SMP; - - obj->type = ACPI_TYPE_BUFFER; - obj->buffer.length = 12; - obj->buffer.pointer = (u8 *) buf; - obj_list->count = 1; - obj_list->pointer = obj; - pow->pdc = obj_list; - - return; -} - -/* Initialize _PDC data based on the CPU vendor */ -void acpi_processor_power_init_pdc(struct acpi_processor_power *pow, - unsigned int cpu) -{ - struct cpuinfo_x86 *c = cpu_data + cpu; - - pow->pdc = NULL; - if (c->x86_vendor == X86_VENDOR_INTEL) - acpi_processor_power_init_intel_pdc(pow); - - return; -} - -EXPORT_SYMBOL(acpi_processor_power_init_pdc); - /* * Initialize bm_flags based on the CPU cache properties * On SMP it depends on cache configuration diff --git a/arch/i386/kernel/acpi/earlyquirk.c b/arch/i386/kernel/acpi/earlyquirk.c index f1b9d2a..2e3b643 100644 --- a/arch/i386/kernel/acpi/earlyquirk.c +++ b/arch/i386/kernel/acpi/earlyquirk.c @@ -7,14 +7,22 @@ #include <linux/pci.h> #include <asm/pci-direct.h> #include <asm/acpi.h> +#include <asm/apic.h> static int __init check_bridge(int vendor, int device) { +#ifdef CONFIG_ACPI /* According to Nvidia all timer overrides are bogus. Just ignore them all. */ if (vendor == PCI_VENDOR_ID_NVIDIA) { acpi_skip_timer_override = 1; } +#endif + if (vendor == PCI_VENDOR_ID_ATI && timer_over_8254 == 1) { + timer_over_8254 = 0; + printk(KERN_INFO "ATI board detected. Disabling timer routing " + "over 8254.\n"); + } return 0; } diff --git a/arch/i386/kernel/acpi/processor.c b/arch/i386/kernel/acpi/processor.c new file mode 100644 index 0000000..9f4cc02 --- /dev/null +++ b/arch/i386/kernel/acpi/processor.c @@ -0,0 +1,75 @@ +/* + * arch/i386/kernel/acpi/processor.c + * + * Copyright (C) 2005 Intel Corporation + * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> + * - Added _PDC for platforms with Intel CPUs + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/acpi.h> + +#include <acpi/processor.h> +#include <asm/acpi.h> + +static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c) +{ + struct acpi_object_list *obj_list; + union acpi_object *obj; + u32 *buf; + + /* allocate and initialize pdc. It will be used later. */ + obj_list = kmalloc(sizeof(struct acpi_object_list), GFP_KERNEL); + if (!obj_list) { + printk(KERN_ERR "Memory allocation error\n"); + return; + } + + obj = kmalloc(sizeof(union acpi_object), GFP_KERNEL); + if (!obj) { + printk(KERN_ERR "Memory allocation error\n"); + kfree(obj_list); + return; + } + + buf = kmalloc(12, GFP_KERNEL); + if (!buf) { + printk(KERN_ERR "Memory allocation error\n"); + kfree(obj); + kfree(obj_list); + return; + } + + buf[0] = ACPI_PDC_REVISION_ID; + buf[1] = 1; + buf[2] = ACPI_PDC_C_CAPABILITY_SMP; + + if (cpu_has(c, X86_FEATURE_EST)) + buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP; + + obj->type = ACPI_TYPE_BUFFER; + obj->buffer.length = 12; + obj->buffer.pointer = (u8 *) buf; + obj_list->count = 1; + obj_list->pointer = obj; + pr->pdc = obj_list; + + return; +} + +/* Initialize _PDC data based on the CPU vendor */ +void arch_acpi_processor_init_pdc(struct acpi_processor *pr) +{ + unsigned int cpu = pr->id; + struct cpuinfo_x86 *c = cpu_data + cpu; + + pr->pdc = NULL; + if (c->x86_vendor == X86_VENDOR_INTEL) + init_intel_pdc(pr, c); + + return; +} + +EXPORT_SYMBOL(arch_acpi_processor_init_pdc); diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 496a2c9..776c909 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -26,6 +26,7 @@ #include <linux/kernel_stat.h> #include <linux/sysdev.h> #include <linux/cpu.h> +#include <linux/module.h> #include <asm/atomic.h> #include <asm/smp.h> @@ -37,10 +38,17 @@ #include <asm/i8253.h> #include <mach_apic.h> +#include <mach_ipi.h> #include "io_ports.h" /* + * cpu_mask that denotes the CPUs that needs timer interrupt coming in as + * IPIs in place of local APIC timers + */ +static cpumask_t timer_bcast_ipi; + +/* * Knob to control our willingness to enable the local APIC. */ int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */ @@ -67,8 +75,10 @@ void ack_bad_irq(unsigned int irq) * holds up an irq slot - in excessive cases (when multiple * unexpected vectors occur) that might lock up the APIC * completely. + * But only ack when the APIC is enabled -AK */ - ack_APIC_irq(); + if (cpu_has_apic) + ack_APIC_irq(); } void __init apic_intr_init(void) @@ -92,10 +102,6 @@ void __init apic_intr_init(void) /* Using APIC to generate smp_local_timer_interrupt? */ int using_apic_timer = 0; -static DEFINE_PER_CPU(int, prof_multiplier) = 1; -static DEFINE_PER_CPU(int, prof_old_multiplier) = 1; -static DEFINE_PER_CPU(int, prof_counter) = 1; - static int enabled_via_apicbase; void enable_NMI_through_LVT0 (void * dummy) @@ -564,16 +570,18 @@ void __devinit setup_local_APIC(void) */ void lapic_shutdown(void) { + unsigned long flags; + if (!cpu_has_apic) return; - local_irq_disable(); + local_irq_save(flags); clear_local_APIC(); if (enabled_via_apicbase) disable_local_APIC(); - local_irq_enable(); + local_irq_restore(flags); } #ifdef CONFIG_PM @@ -721,7 +729,7 @@ static int __init apic_set_verbosity(char *str) apic_verbosity = APIC_VERBOSE; else printk(KERN_WARNING "APIC Verbosity level %s not recognised" - " use apic=verbose or apic=debug", str); + " use apic=verbose or apic=debug\n", str); return 0; } @@ -935,11 +943,16 @@ void (*wait_timer_tick)(void) __devinitdata = wait_8254_wraparound; static void __setup_APIC_LVTT(unsigned int clocks) { unsigned int lvtt_value, tmp_value, ver; + int cpu = smp_processor_id(); ver = GET_APIC_VERSION(apic_read(APIC_LVR)); lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; if (!APIC_INTEGRATED(ver)) lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); + + if (cpu_isset(cpu, timer_bcast_ipi)) + lvtt_value |= APIC_LVT_MASKED; + apic_write_around(APIC_LVTT, lvtt_value); /* @@ -1072,7 +1085,7 @@ void __devinit setup_secondary_APIC_clock(void) setup_APIC_timer(calibration_result); } -void __devinit disable_APIC_timer(void) +void disable_APIC_timer(void) { if (using_apic_timer) { unsigned long v; @@ -1084,7 +1097,10 @@ void __devinit disable_APIC_timer(void) void enable_APIC_timer(void) { - if (using_apic_timer) { + int cpu = smp_processor_id(); + + if (using_apic_timer && + !cpu_isset(cpu, timer_bcast_ipi)) { unsigned long v; v = apic_read(APIC_LVTT); @@ -1092,33 +1108,31 @@ void enable_APIC_timer(void) } } -/* - * the frequency of the profiling timer can be changed - * by writing a multiplier value into /proc/profile. - */ -int setup_profiling_timer(unsigned int multiplier) +void switch_APIC_timer_to_ipi(void *cpumask) { - int i; + cpumask_t mask = *(cpumask_t *)cpumask; + int cpu = smp_processor_id(); - /* - * Sanity check. [at least 500 APIC cycles should be - * between APIC interrupts as a rule of thumb, to avoid - * irqs flooding us] - */ - if ( (!multiplier) || (calibration_result/multiplier < 500)) - return -EINVAL; - - /* - * Set the new multiplier for each CPU. CPUs don't start using the - * new values until the next timer interrupt in which they do process - * accounting. At that time they also adjust their APIC timers - * accordingly. - */ - for (i = 0; i < NR_CPUS; ++i) - per_cpu(prof_multiplier, i) = multiplier; + if (cpu_isset(cpu, mask) && + !cpu_isset(cpu, timer_bcast_ipi)) { + disable_APIC_timer(); + cpu_set(cpu, timer_bcast_ipi); + } +} +EXPORT_SYMBOL(switch_APIC_timer_to_ipi); - return 0; +void switch_ipi_to_APIC_timer(void *cpumask) +{ + cpumask_t mask = *(cpumask_t *)cpumask; + int cpu = smp_processor_id(); + + if (cpu_isset(cpu, mask) && + cpu_isset(cpu, timer_bcast_ipi)) { + cpu_clear(cpu, timer_bcast_ipi); + enable_APIC_timer(); + } } +EXPORT_SYMBOL(switch_ipi_to_APIC_timer); #undef APIC_DIVISOR @@ -1134,32 +1148,10 @@ int setup_profiling_timer(unsigned int multiplier) inline void smp_local_timer_interrupt(struct pt_regs * regs) { - int cpu = smp_processor_id(); - profile_tick(CPU_PROFILING, regs); - if (--per_cpu(prof_counter, cpu) <= 0) { - /* - * The multiplier may have changed since the last time we got - * to this point as a result of the user writing to - * /proc/profile. In this case we need to adjust the APIC - * timer accordingly. - * - * Interrupts are already masked off at this point. - */ - per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu); - if (per_cpu(prof_counter, cpu) != - per_cpu(prof_old_multiplier, cpu)) { - __setup_APIC_LVTT( - calibration_result/ - per_cpu(prof_counter, cpu)); - per_cpu(prof_old_multiplier, cpu) = - per_cpu(prof_counter, cpu); - } - #ifdef CONFIG_SMP - update_process_times(user_mode_vm(regs)); + update_process_times(user_mode_vm(regs)); #endif - } /* * We take the 'long' return path, and there every subsystem @@ -1206,6 +1198,43 @@ fastcall void smp_apic_timer_interrupt(struct pt_regs *regs) irq_exit(); } +#ifndef CONFIG_SMP +static void up_apic_timer_interrupt_call(struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + + /* + * the NMI deadlock-detector uses this. + */ + per_cpu(irq_stat, cpu).apic_timer_irqs++; + + smp_local_timer_interrupt(regs); +} +#endif + +void smp_send_timer_broadcast_ipi(struct pt_regs *regs) +{ + cpumask_t mask; + + cpus_and(mask, cpu_online_map, timer_bcast_ipi); + if (!cpus_empty(mask)) { +#ifdef CONFIG_SMP + send_IPI_mask(mask, LOCAL_TIMER_VECTOR); +#else + /* + * We can directly call the apic timer interrupt handler + * in UP case. Minus all irq related functions + */ + up_apic_timer_interrupt_call(regs); +#endif + } +} + +int setup_profiling_timer(unsigned int multiplier) +{ + return -EINVAL; +} + /* * This interrupt should _never_ happen with our APIC/SMP architecture */ @@ -1278,6 +1307,7 @@ int __init APIC_init_uniprocessor (void) if (!cpu_has_apic && APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", boot_cpu_physical_apicid); + clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); return -1; } diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index 1e60acb..05312a8 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -219,6 +219,7 @@ #include <linux/sched.h> #include <linux/pm.h> #include <linux/pm_legacy.h> +#include <linux/capability.h> #include <linux/device.h> #include <linux/kernel.h> #include <linux/smp.h> @@ -303,17 +304,6 @@ extern int (*console_blank_hook)(int); #include "apm.h" /* - * Define to make all _set_limit calls use 64k limits. The APM 1.1 BIOS is - * supposed to provide limit information that it recognizes. Many machines - * do this correctly, but many others do not restrict themselves to their - * claimed limit. When this happens, they will cause a segmentation - * violation in the kernel at boot time. Most BIOS's, however, will - * respect a 64k limit, so we use that. If you want to be pedantic and - * hold your BIOS to its claims, then undefine this. - */ -#define APM_RELAX_SEGMENTS - -/* * Define to re-initialize the interrupt 0 timer to 100 Hz after a suspend. * This patched by Chad Miller <cmiller@surfsouth.com>, original code by * David Chen <chen@ctpa04.mit.edu> @@ -1075,22 +1065,23 @@ static int apm_engage_power_management(u_short device, int enable) static int apm_console_blank(int blank) { - int error; - u_short state; + int error, i; + u_short state; + static const u_short dev[3] = { 0x100, 0x1FF, 0x101 }; state = blank ? APM_STATE_STANDBY : APM_STATE_READY; - /* Blank the first display device */ - error = set_power_state(0x100, state); - if ((error != APM_SUCCESS) && (error != APM_NO_ERROR)) { - /* try to blank them all instead */ - error = set_power_state(0x1ff, state); - if ((error != APM_SUCCESS) && (error != APM_NO_ERROR)) - /* try to blank device one instead */ - error = set_power_state(0x101, state); + + for (i = 0; i < ARRAY_SIZE(dev); i++) { + error = set_power_state(dev[i], state); + + if ((error == APM_SUCCESS) || (error == APM_NO_ERROR)) + return 1; + + if (error == APM_NOT_ENGAGED) + break; } - if ((error == APM_SUCCESS) || (error == APM_NO_ERROR)) - return 1; - if (error == APM_NOT_ENGAGED) { + + if (error == APM_NOT_ENGAGED && state != APM_STATE_READY) { static int tried; int eng_error; if (tried++ == 0) { @@ -2233,8 +2224,8 @@ static struct dmi_system_id __initdata apm_dmi_table[] = { static int __init apm_init(void) { struct proc_dir_entry *apm_proc; + struct desc_struct *gdt; int ret; - int i; dmi_check_system(apm_dmi_table); @@ -2301,7 +2292,9 @@ static int __init apm_init(void) apm_info.disabled = 1; return -ENODEV; } +#ifdef CONFIG_PM_LEGACY pm_active = 1; +#endif /* * Set up a segment that references the real mode segment 0x40 @@ -2312,45 +2305,30 @@ static int __init apm_init(void) set_base(bad_bios_desc, __va((unsigned long)0x40 << 4)); _set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4)); + /* + * Set up the long jump entry point to the APM BIOS, which is called + * from inline assembly. + */ apm_bios_entry.offset = apm_info.bios.offset; apm_bios_entry.segment = APM_CS; - for (i = 0; i < NR_CPUS; i++) { - struct desc_struct *gdt = get_cpu_gdt_table(i); - set_base(gdt[APM_CS >> 3], - __va((unsigned long)apm_info.bios.cseg << 4)); - set_base(gdt[APM_CS_16 >> 3], - __va((unsigned long)apm_info.bios.cseg_16 << 4)); - set_base(gdt[APM_DS >> 3], - __va((unsigned long)apm_info.bios.dseg << 4)); -#ifndef APM_RELAX_SEGMENTS - if (apm_info.bios.version == 0x100) { -#endif - /* For ASUS motherboard, Award BIOS rev 110 (and others?) */ - _set_limit((char *)&gdt[APM_CS >> 3], 64 * 1024 - 1); - /* For some unknown machine. */ - _set_limit((char *)&gdt[APM_CS_16 >> 3], 64 * 1024 - 1); - /* For the DEC Hinote Ultra CT475 (and others?) */ - _set_limit((char *)&gdt[APM_DS >> 3], 64 * 1024 - 1); -#ifndef APM_RELAX_SEGMENTS - } else { - _set_limit((char *)&gdt[APM_CS >> 3], - (apm_info.bios.cseg_len - 1) & 0xffff); - _set_limit((char *)&gdt[APM_CS_16 >> 3], - (apm_info.bios.cseg_16_len - 1) & 0xffff); - _set_limit((char *)&gdt[APM_DS >> 3], - (apm_info.bios.dseg_len - 1) & 0xffff); - /* workaround for broken BIOSes */ - if (apm_info.bios.cseg_len <= apm_info.bios.offset) - _set_limit((char *)&gdt[APM_CS >> 3], 64 * 1024 -1); - if (apm_info.bios.dseg_len <= 0x40) { /* 0x40 * 4kB == 64kB */ - /* for the BIOS that assumes granularity = 1 */ - gdt[APM_DS >> 3].b |= 0x800000; - printk(KERN_NOTICE "apm: we set the granularity of dseg.\n"); - } - } -#endif - } + /* + * The APM 1.1 BIOS is supposed to provide limit information that it + * recognizes. Many machines do this correctly, but many others do + * not restrict themselves to their claimed limit. When this happens, + * they will cause a segmentation violation in the kernel at boot time. + * Most BIOS's, however, will respect a 64k limit, so we use that. + * + * Note we only set APM segments on CPU zero, since we pin the APM + * code to that CPU. + */ + gdt = get_cpu_gdt_table(0); + set_base(gdt[APM_CS >> 3], + __va((unsigned long)apm_info.bios.cseg << 4)); + set_base(gdt[APM_CS_16 >> 3], + __va((unsigned long)apm_info.bios.cseg_16 << 4)); + set_base(gdt[APM_DS >> 3], + __va((unsigned long)apm_info.bios.dseg << 4)); apm_proc = create_proc_info_entry("apm", 0, NULL, apm_get_info); if (apm_proc) @@ -2407,7 +2385,9 @@ static void __exit apm_exit(void) exit_kapmd = 1; while (kapmd_running) schedule(); +#ifdef CONFIG_PM_LEGACY pm_active = 0; +#endif } module_init(apm_init); diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index e344ef8..0810f81 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c @@ -161,8 +161,13 @@ static void __init init_amd(struct cpuinfo_x86 *c) set_bit(X86_FEATURE_K6_MTRR, c->x86_capability); break; } - break; + if (c->x86_model == 10) { + /* AMD Geode LX is model 10 */ + /* placeholder for any needed mods */ + break; + } + break; case 6: /* An Athlon/Duron */ /* Bit 15 of Athlon specific MSR 15, needs to be 0 @@ -211,6 +216,12 @@ static void __init init_amd(struct cpuinfo_x86 *c) c->x86_max_cores = 1; } + if (cpuid_eax(0x80000000) >= 0x80000007) { + c->x86_power = cpuid_edx(0x80000007); + if (c->x86_power & (1<<8)) + set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); + } + #ifdef CONFIG_X86_HT /* * On a AMD dual core setup the lower bits of the APIC id @@ -228,6 +239,7 @@ static void __init init_amd(struct cpuinfo_x86 *c) cpu, c->x86_max_cores, cpu_core_id[cpu]); } #endif + } static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) @@ -270,3 +282,11 @@ int __init amd_init_cpu(void) } //early_arch_initcall(amd_init_cpu); + +static int __init amd_exit_cpu(void) +{ + cpu_devs[X86_VENDOR_AMD] = NULL; + return 0; +} + +late_initcall(amd_exit_cpu); diff --git a/arch/i386/kernel/cpu/centaur.c b/arch/i386/kernel/cpu/centaur.c index 394814e..f52669e 100644 --- a/arch/i386/kernel/cpu/centaur.c +++ b/arch/i386/kernel/cpu/centaur.c @@ -405,10 +405,6 @@ static void __init init_centaur(struct cpuinfo_x86 *c) winchip2_protect_mcr(); #endif break; - case 10: - name="4"; - /* no info on the WC4 yet */ - break; default: name="??"; } @@ -474,3 +470,11 @@ int __init centaur_init_cpu(void) } //early_arch_initcall(centaur_init_cpu); + +static int __init centaur_exit_cpu(void) +{ + cpu_devs[X86_VENDOR_CENTAUR] = NULL; + return 0; +} + +late_initcall(centaur_exit_cpu); diff --git a/arch/i386/kernel/cpu/changelog b/arch/i386/kernel/cpu/changelog deleted file mode 100644 index cef76b8..0000000 --- a/arch/i386/kernel/cpu/changelog +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Enhanced CPU type detection by Mike Jagdis, Patrick St. Jean - * and Martin Mares, November 1997. - * - * Force Cyrix 6x86(MX) and M II processors to report MTRR capability - * and Cyrix "coma bug" recognition by - * Zoltán Böszörményi <zboszor@mail.externet.hu> February 1999. - * - * Force Centaur C6 processors to report MTRR capability. - * Bart Hartgers <bart@etpmod.phys.tue.nl>, May 1999. - * - * Intel Mobile Pentium II detection fix. Sean Gilley, June 1999. - * - * IDT Winchip tweaks, misc clean ups. - * Dave Jones <davej@suse.de>, August 1999 - * - * Better detection of Centaur/IDT WinChip models. - * Bart Hartgers <bart@etpmod.phys.tue.nl>, August 1999. - * - * Cleaned up cache-detection code - * Dave Jones <davej@suse.de>, October 1999 - * - * Added proper L2 cache detection for Coppermine - * Dragan Stancevic <visitor@valinux.com>, October 1999 - * - * Added the original array for capability flags but forgot to credit - * myself :) (~1998) Fixed/cleaned up some cpu_model_info and other stuff - * Jauder Ho <jauderho@carumba.com>, January 2000 - * - * Detection for Celeron coppermine, identify_cpu() overhauled, - * and a few other clean ups. - * Dave Jones <davej@suse.de>, April 2000 - * - * Pentium III FXSR, SSE support - * General FPU state handling cleanups - * Gareth Hughes <gareth@valinux.com>, May 2000 - * - * Added proper Cascades CPU and L2 cache detection for Cascades - * and 8-way type cache happy bunch from Intel:^) - * Dragan Stancevic <visitor@valinux.com>, May 2000 - * - * Forward port AMD Duron errata T13 from 2.2.17pre - * Dave Jones <davej@suse.de>, August 2000 - * - * Forward port lots of fixes/improvements from 2.2.18pre - * Cyrix III, Pentium IV support. - * Dave Jones <davej@suse.de>, October 2000 - * - * Massive cleanup of CPU detection and bug handling; - * Transmeta CPU detection, - * H. Peter Anvin <hpa@zytor.com>, November 2000 - * - * VIA C3 Support. - * Dave Jones <davej@suse.de>, March 2001 - * - * AMD Athlon/Duron/Thunderbird bluesmoke support. - * Dave Jones <davej@suse.de>, April 2001. - * - * CacheSize bug workaround updates for AMD, Intel & VIA Cyrix. - * Dave Jones <davej@suse.de>, September, October 2001. - * - */ - diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 31e344b..e6bd095 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -4,6 +4,7 @@ #include <linux/smp.h> #include <linux/module.h> #include <linux/percpu.h> +#include <linux/bootmem.h> #include <asm/semaphore.h> #include <asm/processor.h> #include <asm/i387.h> @@ -18,8 +19,8 @@ #include "cpu.h" -DEFINE_PER_CPU(struct desc_struct, cpu_gdt_table[GDT_ENTRIES]); -EXPORT_PER_CPU_SYMBOL(cpu_gdt_table); +DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); +EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr); DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack); @@ -47,6 +48,7 @@ static void default_init(struct cpuinfo_x86 * c) static struct cpu_dev default_cpu = { .c_init = default_init, + .c_vendor = "Unknown", }; static struct cpu_dev * this_cpu = &default_cpu; @@ -153,6 +155,7 @@ static void __devinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) { char *v = c->x86_vendor_id; int i; + static int printed; for (i = 0; i < X86_VENDOR_NUM; i++) { if (cpu_devs[i]) { @@ -162,10 +165,17 @@ static void __devinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) c->x86_vendor = i; if (!early) this_cpu = cpu_devs[i]; - break; + return; } } } + if (!printed) { + printed++; + printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); + printk(KERN_ERR "CPU: Your system may be unstable.\n"); + } + c->x86_vendor = X86_VENDOR_UNKNOWN; + this_cpu = &default_cpu; } @@ -207,7 +217,10 @@ static int __devinit have_cpuid_p(void) /* Do minimum CPU detection early. Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. - The others are not touched to avoid unwanted side effects. */ + The others are not touched to avoid unwanted side effects. + + WARNING: this function is only called on the BP. Don't add code here + that is supposed to run on all CPUs. */ static void __init early_cpu_detect(void) { struct cpuinfo_x86 *c = &boot_cpu_data; @@ -239,12 +252,6 @@ static void __init early_cpu_detect(void) if (cap0 & (1<<19)) c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; } - - early_intel_workaround(c); - -#ifdef CONFIG_X86_HT - phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff; -#endif } void __devinit generic_identify(struct cpuinfo_x86 * c) @@ -271,10 +278,10 @@ void __devinit generic_identify(struct cpuinfo_x86 * c) c->x86_capability[4] = excap; c->x86 = (tfms >> 8) & 15; c->x86_model = (tfms >> 4) & 15; - if (c->x86 == 0xf) { + if (c->x86 == 0xf) c->x86 += (tfms >> 20) & 0xff; + if (c->x86 >= 0x6) c->x86_model += ((tfms >> 16) & 0xF) << 4; - } c->x86_mask = tfms & 15; } else { /* Have CPUID level 0 only - unheard of */ @@ -292,6 +299,12 @@ void __devinit generic_identify(struct cpuinfo_x86 * c) get_model_name(c); /* Default name */ } } + + early_intel_workaround(c); + +#ifdef CONFIG_X86_HT + phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff; +#endif } static void __devinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) @@ -562,8 +575,9 @@ void __devinit cpu_init(void) int cpu = smp_processor_id(); struct tss_struct * t = &per_cpu(init_tss, cpu); struct thread_struct *thread = ¤t->thread; - struct desc_struct *gdt = get_cpu_gdt_table(cpu); + struct desc_struct *gdt; __u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu); + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); if (cpu_test_and_set(cpu, cpu_initialized)) { printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); @@ -581,6 +595,25 @@ void __devinit cpu_init(void) } /* + * This is a horrible hack to allocate the GDT. The problem + * is that cpu_init() is called really early for the boot CPU + * (and hence needs bootmem) but much later for the secondary + * CPUs, when bootmem will have gone away + */ + if (NODE_DATA(0)->bdata->node_bootmem_map) { + gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE); + /* alloc_bootmem_pages panics on failure, so no check */ + memset(gdt, 0, PAGE_SIZE); + } else { + gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL); + if (unlikely(!gdt)) { + printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu); + for (;;) + local_irq_enable(); + } + } + + /* * Initialize the per-CPU GDT with the boot GDT, * and set up the GDT descriptor: */ @@ -592,18 +625,13 @@ void __devinit cpu_init(void) ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) | (CPU_16BIT_STACK_SIZE - 1); - cpu_gdt_descr[cpu].size = GDT_SIZE - 1; - cpu_gdt_descr[cpu].address = (unsigned long)gdt; + cpu_gdt_descr->size = GDT_SIZE - 1; + cpu_gdt_descr->address = (unsigned long)gdt; - load_gdt(&cpu_gdt_descr[cpu]); + load_gdt(cpu_gdt_descr); load_idt(&idt_descr); /* - * Delete NT - */ - __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl"); - - /* * Set up and load the per-CPU TSS and LDT */ atomic_inc(&init_mm.mm_count); @@ -617,8 +645,10 @@ void __devinit cpu_init(void) load_TR_desc(); load_LDT(&init_mm.context); +#ifdef CONFIG_DOUBLEFAULT /* Set up doublefault TSS pointer in the GDT */ __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); +#endif /* Clear %fs and %gs. */ asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); diff --git a/arch/i386/kernel/cpu/cpufreq/Kconfig b/arch/i386/kernel/cpu/cpufreq/Kconfig index 0f1eb50..26892d2 100644 --- a/arch/i386/kernel/cpu/cpufreq/Kconfig +++ b/arch/i386/kernel/cpu/cpufreq/Kconfig @@ -96,6 +96,7 @@ config X86_POWERNOW_K8_ACPI config X86_GX_SUSPMOD tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation" + depends on PCI help This add the CPUFreq driver for NatSemi Geode processors which support suspend modulation. diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c index 871366b..3852d0a 100644 --- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -40,8 +40,6 @@ #include <linux/acpi.h> #include <acpi/processor.h> -#include "speedstep-est-common.h" - #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg) MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski"); @@ -297,68 +295,6 @@ acpi_cpufreq_guess_freq ( } -/* - * acpi_processor_cpu_init_pdc_est - let BIOS know about the SMP capabilities - * of this driver - * @perf: processor-specific acpi_io_data struct - * @cpu: CPU being initialized - * - * To avoid issues with legacy OSes, some BIOSes require to be informed of - * the SMP capabilities of OS P-state driver. Here we set the bits in _PDC - * accordingly, for Enhanced Speedstep. Actual call to _PDC is done in - * driver/acpi/processor.c - */ -static void -acpi_processor_cpu_init_pdc_est( - struct acpi_processor_performance *perf, - unsigned int cpu, - struct acpi_object_list *obj_list - ) -{ - union acpi_object *obj; - u32 *buf; - struct cpuinfo_x86 *c = cpu_data + cpu; - dprintk("acpi_processor_cpu_init_pdc_est\n"); - - if (!cpu_has(c, X86_FEATURE_EST)) - return; - - /* Initialize pdc. It will be used later. */ - if (!obj_list) - return; - - if (!(obj_list->count && obj_list->pointer)) - return; - - obj = obj_list->pointer; - if ((obj->buffer.length == 12) && obj->buffer.pointer) { - buf = (u32 *)obj->buffer.pointer; - buf[0] = ACPI_PDC_REVISION_ID; - buf[1] = 1; - buf[2] = ACPI_PDC_EST_CAPABILITY_SMP; - perf->pdc = obj_list; - } - return; -} - - -/* CPU specific PDC initialization */ -static void -acpi_processor_cpu_init_pdc( - struct acpi_processor_performance *perf, - unsigned int cpu, - struct acpi_object_list *obj_list - ) -{ - struct cpuinfo_x86 *c = cpu_data + cpu; - dprintk("acpi_processor_cpu_init_pdc\n"); - perf->pdc = NULL; - if (cpu_has(c, X86_FEATURE_EST)) - acpi_processor_cpu_init_pdc_est(perf, cpu, obj_list); - return; -} - - static int acpi_cpufreq_cpu_init ( struct cpufreq_policy *policy) @@ -367,15 +303,9 @@ acpi_cpufreq_cpu_init ( unsigned int cpu = policy->cpu; struct cpufreq_acpi_io *data; unsigned int result = 0; - - union acpi_object arg0 = {ACPI_TYPE_BUFFER}; - u32 arg0_buf[3]; - struct acpi_object_list arg_list = {1, &arg0}; + struct cpuinfo_x86 *c = &cpu_data[policy->cpu]; dprintk("acpi_cpufreq_cpu_init\n"); - /* setup arg_list for _PDC settings */ - arg0.buffer.length = 12; - arg0.buffer.pointer = (u8 *) arg0_buf; data = kzalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL); if (!data) @@ -383,14 +313,12 @@ acpi_cpufreq_cpu_init ( acpi_io_data[cpu] = data; - acpi_processor_cpu_init_pdc(&data->acpi_data, cpu, &arg_list); result = acpi_processor_register_performance(&data->acpi_data, cpu); - data->acpi_data.pdc = NULL; if (result) goto err_free; - if (is_const_loops_cpu(cpu)) { + if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; } diff --git a/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c b/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c index 270f218..cc73a7a 100644 --- a/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c @@ -52,6 +52,7 @@ enum { static int has_N44_O17_errata[NR_CPUS]; +static int has_N60_errata[NR_CPUS]; static unsigned int stock_freq; static struct cpufreq_driver p4clockmod_driver; static unsigned int cpufreq_p4_get(unsigned int cpu); @@ -226,6 +227,12 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) case 0x0f12: has_N44_O17_errata[policy->cpu] = 1; dprintk("has errata -- disabling low frequencies\n"); + break; + + case 0x0f29: + has_N60_errata[policy->cpu] = 1; + dprintk("has errata -- disabling frequencies lower than 2ghz\n"); + break; } /* get max frequency */ @@ -237,6 +244,8 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) for (i=1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) { if ((i<2) && (has_N44_O17_errata[policy->cpu])) p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID; + else if (has_N60_errata[policy->cpu] && p4clockmod_table[i].frequency < 2000000) + p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID; else p4clockmod_table[i].frequency = (stock_freq * i)/8; } diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index 0fbbd4c..e11a092 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -980,7 +980,7 @@ static int powernowk8_verify(struct cpufreq_policy *pol) } /* per CPU init entry point to the driver */ -static int __init powernowk8_cpu_init(struct cpufreq_policy *pol) +static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) { struct powernow_k8_data *data; cpumask_t oldmask = CPU_MASK_ALL; @@ -1141,7 +1141,7 @@ static struct cpufreq_driver cpufreq_amd64_driver = { }; /* driver entry point for init */ -static int __init powernowk8_init(void) +static int __cpuinit powernowk8_init(void) { unsigned int i, supported_cpus = 0; diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c index edb9873..c173c0f 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c @@ -35,8 +35,6 @@ #include <asm/processor.h> #include <asm/cpufeature.h> -#include "speedstep-est-common.h" - #define PFX "speedstep-centrino: " #define MAINTAINER "Jeremy Fitzhardinge <jeremy@goop.org>" @@ -364,22 +362,10 @@ static struct acpi_processor_performance p; */ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy) { - union acpi_object arg0 = {ACPI_TYPE_BUFFER}; - u32 arg0_buf[3]; - struct acpi_object_list arg_list = {1, &arg0}; unsigned long cur_freq; int result = 0, i; unsigned int cpu = policy->cpu; - /* _PDC settings */ - arg0.buffer.length = 12; - arg0.buffer.pointer = (u8 *) arg0_buf; - arg0_buf[0] = ACPI_PDC_REVISION_ID; - arg0_buf[1] = 1; - arg0_buf[2] = ACPI_PDC_EST_CAPABILITY_SMP_MSR; - - p.pdc = &arg_list; - /* register with ACPI core */ if (acpi_processor_register_performance(&p, cpu)) { dprintk(KERN_INFO PFX "obtaining ACPI data failed\n"); @@ -493,12 +479,13 @@ static int centrino_cpu_init(struct cpufreq_policy *policy) unsigned l, h; int ret; int i; + struct cpuinfo_x86 *c = &cpu_data[policy->cpu]; /* Only Intel makes Enhanced Speedstep-capable CPUs */ if (cpu->x86_vendor != X86_VENDOR_INTEL || !cpu_has(cpu, X86_FEATURE_EST)) return -ENODEV; - if (is_const_loops_cpu(policy->cpu)) { + if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { centrino_driver.flags |= CPUFREQ_CONST_LOOPS; } diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-est-common.h b/arch/i386/kernel/cpu/cpufreq/speedstep-est-common.h deleted file mode 100644 index 5ce995c..0000000 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-est-common.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Routines common for drivers handling Enhanced Speedstep Technology - * Copyright (C) 2004 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> - * - * Licensed under the terms of the GNU GPL License version 2 -- see - * COPYING for details. - */ - -static inline int is_const_loops_cpu(unsigned int cpu) -{ - struct cpuinfo_x86 *c = cpu_data + cpu; - - if (c->x86_vendor != X86_VENDOR_INTEL || !cpu_has(c, X86_FEATURE_EST)) - return 0; - - /* - * on P-4s, the TSC runs with constant frequency independent of cpu freq - * when we use EST - */ - if (c->x86 == 0xf) - return 1; - - return 0; -} - diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c index ff87cc2..00f2e05 100644 --- a/arch/i386/kernel/cpu/cyrix.c +++ b/arch/i386/kernel/cpu/cyrix.c @@ -343,6 +343,31 @@ static void __init init_cyrix(struct cpuinfo_x86 *c) } /* + * Handle National Semiconductor branded processors + */ +static void __init init_nsc(struct cpuinfo_x86 *c) +{ + /* There may be GX1 processors in the wild that are branded + * NSC and not Cyrix. + * + * This function only handles the GX processor, and kicks every + * thing else to the Cyrix init function above - that should + * cover any processors that might have been branded differently + * after NSC aquired Cyrix. + * + * If this breaks your GX1 horribly, please e-mail + * info-linux@ldcmail.amd.com to tell us. + */ + + /* Handle the GX (Formally known as the GX2) */ + + if (c->x86 == 5 && c->x86_model == 5) + display_cacheinfo(c); + else + init_cyrix(c); +} + +/* * Cyrix CPUs without cpuid or with cpuid not yet enabled can be detected * by the fact that they preserve the flags across the division of 5/2. * PII and PPro exhibit this behavior too, but they have cpuid available. @@ -419,10 +444,18 @@ int __init cyrix_init_cpu(void) //early_arch_initcall(cyrix_init_cpu); +static int __init cyrix_exit_cpu(void) +{ + cpu_devs[X86_VENDOR_CYRIX] = NULL; + return 0; +} + +late_initcall(cyrix_exit_cpu); + static struct cpu_dev nsc_cpu_dev __initdata = { .c_vendor = "NSC", .c_ident = { "Geode by NSC" }, - .c_init = init_cyrix, + .c_init = init_nsc, .c_identify = generic_identify, }; @@ -433,3 +466,11 @@ int __init nsc_init_cpu(void) } //early_arch_initcall(nsc_init_cpu); + +static int __init nsc_exit_cpu(void) +{ + cpu_devs[X86_VENDOR_NSC] = NULL; + return 0; +} + +late_initcall(nsc_exit_cpu); diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 5e2da70..8c01201 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c @@ -183,10 +183,13 @@ static void __devinit init_intel(struct cpuinfo_x86 *c) } #endif - if (c->x86 == 15) + if (c->x86 == 15) set_bit(X86_FEATURE_P4, c->x86_capability); if (c->x86 == 6) set_bit(X86_FEATURE_P3, c->x86_capability); + if ((c->x86 == 0xf && c->x86_model >= 0x03) || + (c->x86 == 0x6 && c->x86_model >= 0x0e)) + set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); } diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index fbfd374..ffe58ce 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -43,13 +43,23 @@ static struct _cache_table cache_table[] __cpuinitdata = { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */ { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */ { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */ + { 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */ { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */ { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */ + { 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */ + { 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */ { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */ { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */ { 0x44, LVL_2, 1024 }, /* 4-way set assoc, 32 byte line size */ { 0x45, LVL_2, 2048 }, /* 4-way set assoc, 32 byte line size */ + { 0x46, LVL_3, 4096 }, /* 4-way set assoc, 64 byte line size */ + { 0x47, LVL_3, 8192 }, /* 8-way set assoc, 64 byte line size */ + { 0x49, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ + { 0x4a, LVL_3, 6144 }, /* 12-way set assoc, 64 byte line size */ + { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ + { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */ + { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */ { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ @@ -57,6 +67,7 @@ static struct _cache_table cache_table[] __cpuinitdata = { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */ { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */ { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */ + { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */ { 0x78, LVL_2, 1024 }, /* 4-way set assoc, 64 byte line size */ { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */ { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */ @@ -141,6 +152,7 @@ static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_le return 0; } +/* will only be called once; __init is safe here */ static int __init find_num_cache_leaves(void) { unsigned int eax, ebx, ecx, edx; diff --git a/arch/i386/kernel/cpu/mtrr/changelog b/arch/i386/kernel/cpu/mtrr/changelog deleted file mode 100644 index af13685..0000000 --- a/arch/i386/kernel/cpu/mtrr/changelog +++ /dev/null @@ -1,229 +0,0 @@ - ChangeLog - - Prehistory Martin Tischhäuser <martin@ikcbarka.fzk.de> - Initial register-setting code (from proform-1.0). - 19971216 Richard Gooch <rgooch@atnf.csiro.au> - Original version for /proc/mtrr interface, SMP-safe. - v1.0 - 19971217 Richard Gooch <rgooch@atnf.csiro.au> - Bug fix for ioctls()'s. - Added sample code in Documentation/mtrr.txt - v1.1 - 19971218 Richard Gooch <rgooch@atnf.csiro.au> - Disallow overlapping regions. - 19971219 Jens Maurer <jmaurer@menuett.rhein-main.de> - Register-setting fixups. - v1.2 - 19971222 Richard Gooch <rgooch@atnf.csiro.au> - Fixups for kernel 2.1.75. - v1.3 - 19971229 David Wragg <dpw@doc.ic.ac.uk> - Register-setting fixups and conformity with Intel conventions. - 19971229 Richard Gooch <rgooch@atnf.csiro.au> - Cosmetic changes and wrote this ChangeLog ;-) - 19980106 Richard Gooch <rgooch@atnf.csiro.au> - Fixups for kernel 2.1.78. - v1.4 - 19980119 David Wragg <dpw@doc.ic.ac.uk> - Included passive-release enable code (elsewhere in PCI setup). - v1.5 - 19980131 Richard Gooch <rgooch@atnf.csiro.au> - Replaced global kernel lock with private spinlock. - v1.6 - 19980201 Richard Gooch <rgooch@atnf.csiro.au> - Added wait for other CPUs to complete changes. - v1.7 - 19980202 Richard Gooch <rgooch@atnf.csiro.au> - Bug fix in definition of <set_mtrr> for UP. - v1.8 - 19980319 Richard Gooch <rgooch@atnf.csiro.au> - Fixups for kernel 2.1.90. - 19980323 Richard Gooch <rgooch@atnf.csiro.au> - Move SMP BIOS fixup before secondary CPUs call <calibrate_delay> - v1.9 - 19980325 Richard Gooch <rgooch@atnf.csiro.au> - Fixed test for overlapping regions: confused by adjacent regions - 19980326 Richard Gooch <rgooch@atnf.csiro.au> - Added wbinvd in <set_mtrr_prepare>. - 19980401 Richard Gooch <rgooch@atnf.csiro.au> - Bug fix for non-SMP compilation. - 19980418 David Wragg <dpw@doc.ic.ac.uk> - Fixed-MTRR synchronisation for SMP and use atomic operations - instead of spinlocks. - 19980418 Richard Gooch <rgooch@atnf.csiro.au> - Differentiate different MTRR register classes for BIOS fixup. - v1.10 - 19980419 David Wragg <dpw@doc.ic.ac.uk> - Bug fix in variable MTRR synchronisation. - v1.11 - 19980419 Richard Gooch <rgooch@atnf.csiro.au> - Fixups for kernel 2.1.97. - v1.12 - 19980421 Richard Gooch <rgooch@atnf.csiro.au> - Safer synchronisation across CPUs when changing MTRRs. - v1.13 - 19980423 Richard Gooch <rgooch@atnf.csiro.au> - Bugfix for SMP systems without MTRR support. - v1.14 - 19980427 Richard Gooch <rgooch@atnf.csiro.au> - Trap calls to <mtrr_add> and <mtrr_del> on non-MTRR machines. - v1.15 - 19980427 Richard Gooch <rgooch@atnf.csiro.au> - Use atomic bitops for setting SMP change mask. - v1.16 - 19980428 Richard Gooch <rgooch@atnf.csiro.au> - Removed spurious diagnostic message. - v1.17 - 19980429 Richard Gooch <rgooch@atnf.csiro.au> - Moved register-setting macros into this file. - Moved setup code from init/main.c to i386-specific areas. - v1.18 - 19980502 Richard Gooch <rgooch@atnf.csiro.au> - Moved MTRR detection outside conditionals in <mtrr_init>. - v1.19 - 19980502 Richard Gooch <rgooch@atnf.csiro.au> - Documentation improvement: mention Pentium II and AGP. - v1.20 - 19980521 Richard Gooch <rgooch@atnf.csiro.au> - Only manipulate interrupt enable flag on local CPU. - Allow enclosed uncachable regions. - v1.21 - 19980611 Richard Gooch <rgooch@atnf.csiro.au> - Always define <main_lock>. - v1.22 - 19980901 Richard Gooch <rgooch@atnf.csiro.au> - Removed module support in order to tidy up code. - Added sanity check for <mtrr_add>/<mtrr_del> before <mtrr_init>. - Created addition queue for prior to SMP commence. - v1.23 - 19980902 Richard Gooch <rgooch@atnf.csiro.au> - Ported patch to kernel 2.1.120-pre3. - v1.24 - 19980910 Richard Gooch <rgooch@atnf.csiro.au> - Removed sanity checks and addition queue: Linus prefers an OOPS. - v1.25 - 19981001 Richard Gooch <rgooch@atnf.csiro.au> - Fixed harmless compiler warning in include/asm-i386/mtrr.h - Fixed version numbering and history for v1.23 -> v1.24. - v1.26 - 19990118 Richard Gooch <rgooch@atnf.csiro.au> - Added devfs support. - v1.27 - 19990123 Richard Gooch <rgooch@atnf.csiro.au> - Changed locking to spin with reschedule. - Made use of new <smp_call_function>. - v1.28 - 19990201 Zoltán Böszörményi <zboszor@mail.externet.hu> - Extended the driver to be able to use Cyrix style ARRs. - 19990204 Richard Gooch <rgooch@atnf.csiro.au> - Restructured Cyrix support. - v1.29 - 19990204 Zoltán Böszörményi <zboszor@mail.externet.hu> - Refined ARR support: enable MAPEN in set_mtrr_prepare() - and disable MAPEN in set_mtrr_done(). - 19990205 Richard Gooch <rgooch@atnf.csiro.au> - Minor cleanups. - v1.30 - 19990208 Zoltán Böszörményi <zboszor@mail.externet.hu> - Protect plain 6x86s (and other processors without the - Page Global Enable feature) against accessing CR4 in - set_mtrr_prepare() and set_mtrr_done(). - 19990210 Richard Gooch <rgooch@atnf.csiro.au> - Turned <set_mtrr_up> and <get_mtrr> into function pointers. - v1.31 - 19990212 Zoltán Böszörményi <zboszor@mail.externet.hu> - Major rewrite of cyrix_arr_init(): do not touch ARRs, - leave them as the BIOS have set them up. - Enable usage of all 8 ARRs. - Avoid multiplications by 3 everywhere and other - code clean ups/speed ups. - 19990213 Zoltán Böszörményi <zboszor@mail.externet.hu> - Set up other Cyrix processors identical to the boot cpu. - Since Cyrix don't support Intel APIC, this is l'art pour l'art. - Weigh ARRs by size: - If size <= 32M is given, set up ARR# we were given. - If size > 32M is given, set up ARR7 only if it is free, - fail otherwise. - 19990214 Zoltán Böszörményi <zboszor@mail.externet.hu> - Also check for size >= 256K if we are to set up ARR7, - mtrr_add() returns the value it gets from set_mtrr() - 19990218 Zoltán Böszörményi <zboszor@mail.externet.hu> - Remove Cyrix "coma bug" workaround from here. - Moved to linux/arch/i386/kernel/setup.c and - linux/include/asm-i386/bugs.h - 19990228 Richard Gooch <rgooch@atnf.csiro.au> - Added MTRRIOC_KILL_ENTRY ioctl(2) - Trap for counter underflow in <mtrr_file_del>. - Trap for 4 MiB aligned regions for PPro, stepping <= 7. - 19990301 Richard Gooch <rgooch@atnf.csiro.au> - Created <get_free_region> hook. - 19990305 Richard Gooch <rgooch@atnf.csiro.au> - Temporarily disable AMD support now MTRR capability flag is set. - v1.32 - 19990308 Zoltán Böszörményi <zboszor@mail.externet.hu> - Adjust my changes (19990212-19990218) to Richard Gooch's - latest changes. (19990228-19990305) - v1.33 - 19990309 Richard Gooch <rgooch@atnf.csiro.au> - Fixed typo in <printk> message. - 19990310 Richard Gooch <rgooch@atnf.csiro.au> - Support K6-II/III based on Alan Cox's <alan@redhat.com> patches. - v1.34 - 19990511 Bart Hartgers <bart@etpmod.phys.tue.nl> - Support Centaur C6 MCR's. - 19990512 Richard Gooch <rgooch@atnf.csiro.au> - Minor cleanups. - v1.35 - 19990707 Zoltán Böszörményi <zboszor@mail.externet.hu> - Check whether ARR3 is protected in cyrix_get_free_region() - and mtrr_del(). The code won't attempt to delete or change it - from now on if the BIOS protected ARR3. It silently skips ARR3 - in cyrix_get_free_region() or returns with an error code from - mtrr_del(). - 19990711 Zoltán Böszörményi <zboszor@mail.externet.hu> - Reset some bits in the CCRs in cyrix_arr_init() to disable SMM - if ARR3 isn't protected. This is needed because if SMM is active - and ARR3 isn't protected then deleting and setting ARR3 again - may lock up the processor. With SMM entirely disabled, it does - not happen. - 19990812 Zoltán Böszörményi <zboszor@mail.externet.hu> - Rearrange switch() statements so the driver accomodates to - the fact that the AMD Athlon handles its MTRRs the same way - as Intel does. - 19990814 Zoltán Böszörményi <zboszor@mail.externet.hu> - Double check for Intel in mtrr_add()'s big switch() because - that revision check is only valid for Intel CPUs. - 19990819 Alan Cox <alan@redhat.com> - Tested Zoltan's changes on a pre production Athlon - 100% - success. - 19991008 Manfred Spraul <manfreds@colorfullife.com> - replaced spin_lock_reschedule() with a normal semaphore. - v1.36 - 20000221 Richard Gooch <rgooch@atnf.csiro.au> - Compile fix if procfs and devfs not enabled. - Formatting changes. - v1.37 - 20001109 H. Peter Anvin <hpa@zytor.com> - Use the new centralized CPU feature detects. - - v1.38 - 20010309 Dave Jones <davej@suse.de> - Add support for Cyrix III. - - v1.39 - 20010312 Dave Jones <davej@suse.de> - Ugh, I broke AMD support. - Reworked fix by Troels Walsted Hansen <troels@thule.no> - - v1.40 - 20010327 Dave Jones <davej@suse.de> - Adapted Cyrix III support to include VIA C3. - - v2.0 - 20020306 Patrick Mochel <mochel@osdl.org> - Split mtrr.c -> mtrr/*.c - Converted to Linux Kernel Coding Style - Fixed several minor nits in form - Moved some SMP-only functions out, so they can be used - for power management in the future. - TODO: Fix user interface cruft. diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c index cf39e20..5ac051b 100644 --- a/arch/i386/kernel/cpu/mtrr/if.c +++ b/arch/i386/kernel/cpu/mtrr/if.c @@ -1,5 +1,6 @@ #include <linux/init.h> #include <linux/proc_fs.h> +#include <linux/capability.h> #include <linux/ctype.h> #include <linux/module.h> #include <linux/seq_file.h> diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c index 1e9db19..3b4618b 100644 --- a/arch/i386/kernel/cpu/mtrr/main.c +++ b/arch/i386/kernel/cpu/mtrr/main.c @@ -44,12 +44,10 @@ #include <asm/msr.h> #include "mtrr.h" -#define MTRR_VERSION "2.0 (20020519)" - u32 num_var_ranges = 0; unsigned int *usage_table; -static DECLARE_MUTEX(main_lock); +static DECLARE_MUTEX(mtrr_sem); u32 size_or_mask, size_and_mask; @@ -335,7 +333,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, /* No CPU hotplug when we change MTRR entries */ lock_cpu_hotplug(); /* Search for existing MTRR */ - down(&main_lock); + down(&mtrr_sem); for (i = 0; i < num_var_ranges; ++i) { mtrr_if->get(i, &lbase, &lsize, <ype); if (base >= lbase + lsize) @@ -373,7 +371,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, printk(KERN_INFO "mtrr: no more MTRRs available\n"); error = i; out: - up(&main_lock); + up(&mtrr_sem); unlock_cpu_hotplug(); return error; } @@ -466,7 +464,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) max = num_var_ranges; /* No CPU hotplug when we change MTRR entries */ lock_cpu_hotplug(); - down(&main_lock); + down(&mtrr_sem); if (reg < 0) { /* Search for existing MTRR */ for (i = 0; i < max; ++i) { @@ -505,7 +503,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) set_mtrr(reg, 0, 0, 0); error = reg; out: - up(&main_lock); + up(&mtrr_sem); unlock_cpu_hotplug(); return error; } @@ -671,7 +669,6 @@ void __init mtrr_bp_init(void) break; } } - printk(KERN_INFO "mtrr: v%s\n",MTRR_VERSION); if (mtrr_if) { set_num_var_ranges(); @@ -688,7 +685,7 @@ void mtrr_ap_init(void) if (!mtrr_if || !use_intel()) return; /* - * Ideally we should hold main_lock here to avoid mtrr entries changed, + * Ideally we should hold mtrr_sem here to avoid mtrr entries changed, * but this routine will be called in cpu boot time, holding the lock * breaks it. This routine is called in two cases: 1.very earily time * of software resume, when there absolutely isn't mtrr entry changes; diff --git a/arch/i386/kernel/cpu/nexgen.c b/arch/i386/kernel/cpu/nexgen.c index 30898a2..ad87fa5 100644 --- a/arch/i386/kernel/cpu/nexgen.c +++ b/arch/i386/kernel/cpu/nexgen.c @@ -61,3 +61,11 @@ int __init nexgen_init_cpu(void) } //early_arch_initcall(nexgen_init_cpu); + +static int __init nexgen_exit_cpu(void) +{ + cpu_devs[X86_VENDOR_NEXGEN] = NULL; + return 0; +} + +late_initcall(nexgen_exit_cpu); diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index 6d91b27..89a85af 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c @@ -29,7 +29,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL, - NULL, "fxsr_opt", NULL, NULL, NULL, "lm", "3dnowext", "3dnow", + NULL, "fxsr_opt", "rdtscp", NULL, NULL, "lm", "3dnowext", "3dnow", /* Transmeta-defined */ "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, @@ -40,7 +40,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) /* Other (Linux-defined) */ "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "constant_tsc", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -57,11 +57,21 @@ static int show_cpuinfo(struct seq_file *m, void *v) NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* AMD-defined (#2) */ - "lahf_lm", "cmp_legacy", NULL, NULL, NULL, NULL, NULL, NULL, + "lahf_lm", "cmp_legacy", "svm", NULL, "cr8legacy", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }; + static char *x86_power_flags[] = { + "ts", /* temperature sensor */ + "fid", /* frequency id control */ + "vid", /* voltage id control */ + "ttp", /* thermal trip */ + "tm", + "stc", + NULL, + /* nothing */ /* constant_tsc - moved to flags */ + }; struct cpuinfo_x86 *c = v; int i, n = c - cpu_data; int fpu_exception; @@ -131,6 +141,17 @@ static int show_cpuinfo(struct seq_file *m, void *v) x86_cap_flags[i] != NULL ) seq_printf(m, " %s", x86_cap_flags[i]); + for (i = 0; i < 32; i++) + if (c->x86_power & (1 << i)) { + if (i < ARRAY_SIZE(x86_power_flags) && + x86_power_flags[i]) + seq_printf(m, "%s%s", + x86_power_flags[i][0]?" ":"", + x86_power_flags[i]); + else + seq_printf(m, " [%d]", i); + } + seq_printf(m, "\nbogomips\t: %lu.%02lu\n\n", c->loops_per_jiffy/(500000/HZ), (c->loops_per_jiffy/(5000/HZ)) % 100); diff --git a/arch/i386/kernel/cpu/rise.c b/arch/i386/kernel/cpu/rise.c index 8602425..d08d5a2 100644 --- a/arch/i386/kernel/cpu/rise.c +++ b/arch/i386/kernel/cpu/rise.c @@ -51,3 +51,11 @@ int __init rise_init_cpu(void) } //early_arch_initcall(rise_init_cpu); + +static int __init rise_exit_cpu(void) +{ + cpu_devs[X86_VENDOR_RISE] = NULL; + return 0; +} + +late_initcall(rise_exit_cpu); diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c index fc42638..7214c9b 100644 --- a/arch/i386/kernel/cpu/transmeta.c +++ b/arch/i386/kernel/cpu/transmeta.c @@ -1,4 +1,5 @@ #include <linux/kernel.h> +#include <linux/mm.h> #include <linux/init.h> #include <asm/processor.h> #include <asm/msr.h> @@ -84,7 +85,7 @@ static void __init init_transmeta(struct cpuinfo_x86 *c) #endif } -static void transmeta_identify(struct cpuinfo_x86 * c) +static void __init transmeta_identify(struct cpuinfo_x86 * c) { u32 xlvl; generic_identify(c); @@ -111,3 +112,11 @@ int __init transmeta_init_cpu(void) } //early_arch_initcall(transmeta_init_cpu); + +static int __init transmeta_exit_cpu(void) +{ + cpu_devs[X86_VENDOR_TRANSMETA] = NULL; + return 0; +} + +late_initcall(transmeta_exit_cpu); diff --git a/arch/i386/kernel/cpu/umc.c b/arch/i386/kernel/cpu/umc.c index 264fcad..2cd988f 100644 --- a/arch/i386/kernel/cpu/umc.c +++ b/arch/i386/kernel/cpu/umc.c @@ -31,3 +31,11 @@ int __init umc_init_cpu(void) } //early_arch_initcall(umc_init_cpu); + +static int __init umc_exit_cpu(void) +{ + cpu_devs[X86_VENDOR_UMC] = NULL; + return 0; +} + +late_initcall(umc_exit_cpu); diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c index 13bae79..006141d 100644 --- a/arch/i386/kernel/cpuid.c +++ b/arch/i386/kernel/cpuid.c @@ -117,14 +117,13 @@ static ssize_t cpuid_read(struct file *file, char __user *buf, { char __user *tmp = buf; u32 data[4]; - size_t rv; u32 reg = *ppos; int cpu = iminor(file->f_dentry->d_inode); if (count % 16) return -EINVAL; /* Invalid chunk size */ - for (rv = 0; count; count -= 16) { + for (; count; count -= 16) { do_cpuid(cpu, reg, data); if (copy_to_user(tmp, &data, 16)) return -EFAULT; diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index 0248e08..d49dbe8 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c @@ -25,7 +25,6 @@ #include <mach_ipi.h> -note_buf_t crash_notes[NR_CPUS]; /* This keeps a track of which one is crashing cpu. */ static int crashing_cpu; @@ -72,7 +71,9 @@ static void crash_save_this_cpu(struct pt_regs *regs, int cpu) * squirrelled away. ELF notes happen to provide * all of that that no need to invent something new. */ - buf = &crash_notes[cpu][0]; + buf = (u32*)per_cpu_ptr(crash_notes, cpu); + if (!buf) + return; memset(&prstatus, 0, sizeof(prstatus)); prstatus.pr_pid = current->pid; elf_core_copy_regs(&prstatus.pr_reg, regs); @@ -81,51 +82,12 @@ static void crash_save_this_cpu(struct pt_regs *regs, int cpu) final_note(buf); } -static void crash_get_current_regs(struct pt_regs *regs) -{ - __asm__ __volatile__("movl %%ebx,%0" : "=m"(regs->ebx)); - __asm__ __volatile__("movl %%ecx,%0" : "=m"(regs->ecx)); - __asm__ __volatile__("movl %%edx,%0" : "=m"(regs->edx)); - __asm__ __volatile__("movl %%esi,%0" : "=m"(regs->esi)); - __asm__ __volatile__("movl %%edi,%0" : "=m"(regs->edi)); - __asm__ __volatile__("movl %%ebp,%0" : "=m"(regs->ebp)); - __asm__ __volatile__("movl %%eax,%0" : "=m"(regs->eax)); - __asm__ __volatile__("movl %%esp,%0" : "=m"(regs->esp)); - __asm__ __volatile__("movw %%ss, %%ax;" :"=a"(regs->xss)); - __asm__ __volatile__("movw %%cs, %%ax;" :"=a"(regs->xcs)); - __asm__ __volatile__("movw %%ds, %%ax;" :"=a"(regs->xds)); - __asm__ __volatile__("movw %%es, %%ax;" :"=a"(regs->xes)); - __asm__ __volatile__("pushfl; popl %0" :"=m"(regs->eflags)); - - regs->eip = (unsigned long)current_text_addr(); -} - -/* CPU does not save ss and esp on stack if execution is already - * running in kernel mode at the time of NMI occurrence. This code - * fixes it. - */ -static void crash_setup_regs(struct pt_regs *newregs, struct pt_regs *oldregs) -{ - memcpy(newregs, oldregs, sizeof(*newregs)); - newregs->esp = (unsigned long)&(oldregs->esp); - __asm__ __volatile__("xorl %eax, %eax;"); - __asm__ __volatile__ ("movw %%ss, %%ax;" :"=a"(newregs->xss)); -} - -/* We may have saved_regs from where the error came from - * or it is NULL if via a direct panic(). - */ -static void crash_save_self(struct pt_regs *saved_regs) +static void crash_save_self(struct pt_regs *regs) { - struct pt_regs regs; int cpu; cpu = smp_processor_id(); - if (saved_regs) - crash_setup_regs(®s, saved_regs); - else - crash_get_current_regs(®s); - crash_save_this_cpu(®s, cpu); + crash_save_this_cpu(regs, cpu); } #ifdef CONFIG_SMP @@ -144,7 +106,7 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu) local_irq_disable(); if (!user_mode(regs)) { - crash_setup_regs(&fixed_regs, regs); + crash_fixup_ss_esp(&fixed_regs, regs); regs = &fixed_regs; } crash_save_this_cpu(regs, cpu); diff --git a/arch/i386/kernel/crash_dump.c b/arch/i386/kernel/crash_dump.c new file mode 100644 index 0000000..3f532df --- /dev/null +++ b/arch/i386/kernel/crash_dump.c @@ -0,0 +1,74 @@ +/* + * kernel/crash_dump.c - Memory preserving reboot related code. + * + * Created by: Hariprasad Nellitheertha (hari@in.ibm.com) + * Copyright (C) IBM Corporation, 2004. All rights reserved + */ + +#include <linux/errno.h> +#include <linux/highmem.h> +#include <linux/crash_dump.h> + +#include <asm/uaccess.h> + +static void *kdump_buf_page; + +/** + * copy_oldmem_page - copy one page from "oldmem" + * @pfn: page frame number to be copied + * @buf: target memory address for the copy; this can be in kernel address + * space or user address space (see @userbuf) + * @csize: number of bytes to copy + * @offset: offset in bytes into the page (based on pfn) to begin the copy + * @userbuf: if set, @buf is in user address space, use copy_to_user(), + * otherwise @buf is in kernel address space, use memcpy(). + * + * Copy a page from "oldmem". For this page, there is no pte mapped + * in the current kernel. We stitch up a pte, similar to kmap_atomic. + * + * Calling copy_to_user() in atomic context is not desirable. Hence first + * copying the data to a pre-allocated kernel page and then copying to user + * space in non-atomic context. + */ +ssize_t copy_oldmem_page(unsigned long pfn, char *buf, + size_t csize, unsigned long offset, int userbuf) +{ + void *vaddr; + + if (!csize) + return 0; + + vaddr = kmap_atomic_pfn(pfn, KM_PTE0); + + if (!userbuf) { + memcpy(buf, (vaddr + offset), csize); + kunmap_atomic(vaddr, KM_PTE0); + } else { + if (!kdump_buf_page) { + printk(KERN_WARNING "Kdump: Kdump buffer page not" + " allocated\n"); + return -EFAULT; + } + copy_page(kdump_buf_page, vaddr); + kunmap_atomic(vaddr, KM_PTE0); + if (copy_to_user(buf, (kdump_buf_page + offset), csize)) + return -EFAULT; + } + + return csize; +} + +static int __init kdump_buf_page_init(void) +{ + int ret = 0; + + kdump_buf_page = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!kdump_buf_page) { + printk(KERN_WARNING "Kdump: Failed to allocate kdump buffer" + " page\n"); + ret = -ENOMEM; + } + + return ret; +} +arch_initcall(kdump_buf_page_init); diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c index 58516e2..6a93d75 100644 --- a/arch/i386/kernel/dmi_scan.c +++ b/arch/i386/kernel/dmi_scan.c @@ -4,7 +4,7 @@ #include <linux/module.h> #include <linux/dmi.h> #include <linux/bootmem.h> - +#include <linux/slab.h> static char * __init dmi_string(struct dmi_header *dm, u8 s) { @@ -19,7 +19,7 @@ static char * __init dmi_string(struct dmi_header *dm, u8 s) } if (*bp != 0) { - str = alloc_bootmem(strlen(bp) + 1); + str = dmi_alloc(strlen(bp) + 1); if (str != NULL) strcpy(str, bp); else @@ -40,7 +40,7 @@ static int __init dmi_table(u32 base, int len, int num, u8 *buf, *data; int i = 0; - buf = bt_ioremap(base, len); + buf = dmi_ioremap(base, len); if (buf == NULL) return -1; @@ -65,7 +65,7 @@ static int __init dmi_table(u32 base, int len, int num, data += 2; i++; } - bt_iounmap(buf, len); + dmi_iounmap(buf, len); return 0; } @@ -112,7 +112,7 @@ static void __init dmi_save_devices(struct dmi_header *dm) if ((*d & 0x80) == 0) continue; - dev = alloc_bootmem(sizeof(*dev)); + dev = dmi_alloc(sizeof(*dev)); if (!dev) { printk(KERN_ERR "dmi_save_devices: out of memory.\n"); break; @@ -131,7 +131,7 @@ static void __init dmi_save_ipmi_device(struct dmi_header *dm) struct dmi_device *dev; void * data; - data = alloc_bootmem(dm->length); + data = dmi_alloc(dm->length); if (data == NULL) { printk(KERN_ERR "dmi_save_ipmi_device: out of memory.\n"); return; @@ -139,7 +139,7 @@ static void __init dmi_save_ipmi_device(struct dmi_header *dm) memcpy(data, dm, dm->length); - dev = alloc_bootmem(sizeof(*dev)); + dev = dmi_alloc(sizeof(*dev)); if (!dev) { printk(KERN_ERR "dmi_save_ipmi_device: out of memory.\n"); return; @@ -221,7 +221,7 @@ void __init dmi_scan_machine(void) } } -out: printk(KERN_INFO "DMI not present.\n"); +out: printk(KERN_INFO "DMI not present or invalid.\n"); } diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c index ecad519..c9cad7b 100644 --- a/arch/i386/kernel/efi.c +++ b/arch/i386/kernel/efi.c @@ -70,10 +70,13 @@ static void efi_call_phys_prelog(void) { unsigned long cr4; unsigned long temp; + struct Xgt_desc_struct *cpu_gdt_descr; spin_lock(&efi_rt_lock); local_irq_save(efi_rt_eflags); + cpu_gdt_descr = &per_cpu(cpu_gdt_descr, 0); + /* * If I don't have PSE, I should just duplicate two entries in page * directory. If I have PSE, I just need to duplicate one entry in @@ -103,17 +106,18 @@ static void efi_call_phys_prelog(void) */ local_flush_tlb(); - cpu_gdt_descr[0].address = __pa(cpu_gdt_descr[0].address); - load_gdt((struct Xgt_desc_struct *) __pa(&cpu_gdt_descr[0])); + cpu_gdt_descr->address = __pa(cpu_gdt_descr->address); + load_gdt(cpu_gdt_descr); } static void efi_call_phys_epilog(void) { unsigned long cr4; + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, 0); + + cpu_gdt_descr->address = __va(cpu_gdt_descr->address); + load_gdt(cpu_gdt_descr); - cpu_gdt_descr[0].address = - (unsigned long) __va(cpu_gdt_descr[0].address); - load_gdt(&cpu_gdt_descr[0]); cr4 = read_cr4(); if (cr4 & X86_CR4_PSE) { diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index e50b9315..4d70472 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -323,6 +323,7 @@ work_notifysig: # deal with pending signals and ALIGN work_notifysig_v86: +#ifdef CONFIG_VM86 pushl %ecx # save ti_flags for do_notify_resume call save_v86_state # %eax contains pt_regs pointer popl %ecx @@ -330,6 +331,7 @@ work_notifysig_v86: xorl %edx, %edx call do_notify_resume jmp resume_userspace +#endif # perform syscall exit tracing ALIGN @@ -657,6 +659,7 @@ ENTRY(spurious_interrupt_bug) pushl $do_spurious_interrupt_bug jmp error_code +.section .rodata,"a" #include "syscall_table.S" syscall_table_size=(.-sys_call_table) diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index e437fb3..e0b7c63 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -398,7 +398,11 @@ ignore_int: pushl 32(%esp) pushl 40(%esp) pushl $int_msg +#ifdef CONFIG_EARLY_PRINTK + call early_printk +#else call printk +#endif addl $(5*4),%esp popl %ds popl %es @@ -504,19 +508,24 @@ ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* 0x80 TSS descriptor */ .quad 0x0000000000000000 /* 0x88 LDT descriptor */ - /* Segments used for calling PnP BIOS */ - .quad 0x00c09a0000000000 /* 0x90 32-bit code */ - .quad 0x00809a0000000000 /* 0x98 16-bit code */ - .quad 0x0080920000000000 /* 0xa0 16-bit data */ - .quad 0x0080920000000000 /* 0xa8 16-bit data */ - .quad 0x0080920000000000 /* 0xb0 16-bit data */ + /* + * Segments used for calling PnP BIOS have byte granularity. + * They code segments and data segments have fixed 64k limits, + * the transfer segment sizes are set at run time. + */ + .quad 0x00409a000000ffff /* 0x90 32-bit code */ + .quad 0x00009a000000ffff /* 0x98 16-bit code */ + .quad 0x000092000000ffff /* 0xa0 16-bit data */ + .quad 0x0000920000000000 /* 0xa8 16-bit data */ + .quad 0x0000920000000000 /* 0xb0 16-bit data */ + /* * The APM segments have byte granularity and their bases - * and limits are set at run time. + * are set at run time. All have 64k limits. */ - .quad 0x00409a0000000000 /* 0xb8 APM CS code */ - .quad 0x00009a0000000000 /* 0xc0 APM CS 16 code (16 bit) */ - .quad 0x0040920000000000 /* 0xc8 APM DS data */ + .quad 0x00409a000000ffff /* 0xb8 APM CS code */ + .quad 0x00009a000000ffff /* 0xc0 APM CS 16 code (16 bit) */ + .quad 0x004092000000ffff /* 0xc8 APM DS data */ .quad 0x0000920000000000 /* 0xd0 - ESPFIX 16-bit SS */ .quad 0x0000000000000000 /* 0xd8 - unused */ diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c index 180f070..0553250 100644 --- a/arch/i386/kernel/i386_ksyms.c +++ b/arch/i386/kernel/i386_ksyms.c @@ -3,9 +3,6 @@ #include <asm/checksum.h> #include <asm/desc.h> -/* This is definitely a GPL-only symbol */ -EXPORT_SYMBOL_GPL(cpu_gdt_table); - EXPORT_SYMBOL(__down_failed); EXPORT_SYMBOL(__down_failed_interruptible); EXPORT_SYMBOL(__down_failed_trylock); diff --git a/arch/i386/kernel/init_task.c b/arch/i386/kernel/init_task.c index 9caa8e8..cff95d1 100644 --- a/arch/i386/kernel/init_task.c +++ b/arch/i386/kernel/init_task.c @@ -42,5 +42,5 @@ EXPORT_SYMBOL(init_task); * per-CPU TSS segments. Threads are completely 'soft' on Linux, * no more per-task TSS's. */ -DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp = INIT_TSS; +DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS; diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 22c8675..39d9a5f 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -51,6 +51,8 @@ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; static DEFINE_SPINLOCK(ioapic_lock); +int timer_over_8254 __initdata = 1; + /* * Is the SiS APIC rmw bug present ? * -1 = don't know, 0 = no, 1 = yes @@ -1649,7 +1651,7 @@ static void __init enable_IO_APIC(void) for(apic = 0; apic < nr_ioapics; apic++) { int pin; /* See if any of the pins is in ExtINT mode */ - for(pin = 0; pin < nr_ioapic_registers[i]; pin++) { + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { struct IO_APIC_route_entry entry; spin_lock_irqsave(&ioapic_lock, flags); *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); @@ -1722,8 +1724,8 @@ void disable_IO_APIC(void) entry.dest_mode = 0; /* Physical */ entry.delivery_mode = dest_ExtINT; /* ExtInt */ entry.vector = 0; - entry.dest.physical.physical_dest = 0; - + entry.dest.physical.physical_dest = + GET_APIC_ID(apic_read(APIC_ID)); /* * Add it to the IO-APIC irq-routing table: @@ -2267,7 +2269,8 @@ static inline void check_timer(void) apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); timer_ack = 1; - enable_8259A_irq(0); + if (timer_over_8254 > 0) + enable_8259A_irq(0); pin1 = find_isa_irq_pin(0, mp_INT); apic1 = find_isa_irq_apic(0, mp_INT); @@ -2392,6 +2395,20 @@ void __init setup_IO_APIC(void) print_IO_APIC(); } +static int __init setup_disable_8254_timer(char *s) +{ + timer_over_8254 = -1; + return 1; +} +static int __init setup_enable_8254_timer(char *s) +{ + timer_over_8254 = 2; + return 1; +} + +__setup("disable_8254_timer", setup_disable_8254_timer); +__setup("enable_8254_timer", setup_enable_8254_timer); + /* * Called after all the initialization is done. If we didnt find any * APIC bugs then we can allow the modify fast path @@ -2566,8 +2583,10 @@ int __init io_apic_get_unique_id (int ioapic, int apic_id) spin_unlock_irqrestore(&ioapic_lock, flags); /* Sanity check */ - if (reg_00.bits.ID != apic_id) - panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic); + if (reg_00.bits.ID != apic_id) { + printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); + return -1; + } } apic_printk(APIC_VERBOSE, KERN_INFO diff --git a/arch/i386/kernel/ioport.c b/arch/i386/kernel/ioport.c index b59a34d..79026f0 100644 --- a/arch/i386/kernel/ioport.c +++ b/arch/i386/kernel/ioport.c @@ -7,6 +7,7 @@ #include <linux/sched.h> #include <linux/kernel.h> +#include <linux/capability.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/ioport.h> diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index 1a201a9..f3a9c78 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -19,7 +19,7 @@ #include <linux/cpu.h> #include <linux/delay.h> -DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp; +DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; EXPORT_PER_CPU_SYMBOL(irq_stat); #ifndef CONFIG_X86_LOCAL_APIC diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index 19edcd5..694a139 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -58,13 +58,14 @@ static inline int is_IF_modifier(kprobe_opcode_t opcode) int __kprobes arch_prepare_kprobe(struct kprobe *p) { - return 0; -} + /* insn: must be on special executable page on i386. */ + p->ainsn.insn = get_insn_slot(); + if (!p->ainsn.insn) + return -ENOMEM; -void __kprobes arch_copy_kprobe(struct kprobe *p) -{ memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); p->opcode = *p->addr; + return 0; } void __kprobes arch_arm_kprobe(struct kprobe *p) @@ -83,6 +84,9 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { + down(&kprobe_mutex); + free_insn_slot(p->ainsn.insn); + up(&kprobe_mutex); } static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) @@ -119,7 +123,7 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) if (p->opcode == BREAKPOINT_INSTRUCTION) regs->eip = (unsigned long)p->addr; else - regs->eip = (unsigned long)&p->ainsn.insn; + regs->eip = (unsigned long)p->ainsn.insn; } /* Called with kretprobe_lock held */ @@ -196,6 +200,19 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) kcb->kprobe_status = KPROBE_REENTER; return 1; } else { + if (regs->eflags & VM_MASK) { + /* We are in virtual-8086 mode. Return 0 */ + goto no_kprobe; + } + if (*addr != BREAKPOINT_INSTRUCTION) { + /* The breakpoint instruction was removed by + * another cpu right after we hit, no further + * handling of this interrupt is appropriate + */ + regs->eip -= sizeof(kprobe_opcode_t); + ret = 1; + goto no_kprobe; + } p = __get_cpu_var(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { goto ss_probe; @@ -346,7 +363,7 @@ static void __kprobes resume_execution(struct kprobe *p, { unsigned long *tos = (unsigned long *)®s->esp; unsigned long next_eip = 0; - unsigned long copy_eip = (unsigned long)&p->ainsn.insn; + unsigned long copy_eip = (unsigned long)p->ainsn.insn; unsigned long orig_eip = (unsigned long)p->addr; switch (p->ainsn.insn[0]) { diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c index a912fed..f73d737 100644 --- a/arch/i386/kernel/machine_kexec.c +++ b/arch/i386/kernel/machine_kexec.c @@ -116,13 +116,13 @@ static void load_segments(void) __asm__ __volatile__ ( "\tljmp $"STR(__KERNEL_CS)",$1f\n" "\t1:\n" - "\tmovl $"STR(__KERNEL_DS)",%eax\n" - "\tmovl %eax,%ds\n" - "\tmovl %eax,%es\n" - "\tmovl %eax,%fs\n" - "\tmovl %eax,%gs\n" - "\tmovl %eax,%ss\n" - ); + "\tmovl $"STR(__KERNEL_DS)",%%eax\n" + "\tmovl %%eax,%%ds\n" + "\tmovl %%eax,%%es\n" + "\tmovl %%eax,%%fs\n" + "\tmovl %%eax,%%gs\n" + "\tmovl %%eax,%%ss\n" + ::: "eax", "memory"); #undef STR #undef __STR } diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c index 165f131..5390b52 100644 --- a/arch/i386/kernel/microcode.c +++ b/arch/i386/kernel/microcode.c @@ -70,9 +70,11 @@ */ //#define DEBUG /* pr_debug */ +#include <linux/capability.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/sched.h> +#include <linux/cpumask.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/vmalloc.h> @@ -165,7 +167,7 @@ static void collect_cpu_info (void *unused) wrmsr(MSR_IA32_UCODE_REV, 0, 0); /* see notes above for revision 1.07. Apparent chip bug */ - serialize_cpu(); + sync_core(); /* get the current revision from MSR 0x8B */ rdmsr(MSR_IA32_UCODE_REV, val[0], uci->rev); pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n", @@ -249,8 +251,8 @@ static int find_matching_ucodes (void) error = -EINVAL; goto out; } - - for (cpu_num = 0; cpu_num < num_online_cpus(); cpu_num++) { + + for_each_online_cpu(cpu_num) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; if (uci->err != MC_NOTFOUND) /* already found a match or not an online cpu*/ continue; @@ -292,7 +294,7 @@ static int find_matching_ucodes (void) error = -EFAULT; goto out; } - for (cpu_num = 0; cpu_num < num_online_cpus(); cpu_num++) { + for_each_online_cpu(cpu_num) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; if (uci->err != MC_NOTFOUND) /* already found a match or not an online cpu*/ continue; @@ -303,7 +305,9 @@ static int find_matching_ucodes (void) } } /* now check if any cpu has matched */ - for (cpu_num = 0, allocated_flag = 0, sum = 0; cpu_num < num_online_cpus(); cpu_num++) { + allocated_flag = 0; + sum = 0; + for_each_online_cpu(cpu_num) { if (ucode_cpu_info[cpu_num].err == MC_MARKED) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; if (!allocated_flag) { @@ -379,7 +383,7 @@ static void do_update_one (void * unused) wrmsr(MSR_IA32_UCODE_REV, 0, 0); /* see notes above for revision 1.07. Apparent chip bug */ - serialize_cpu(); + sync_core(); /* get the current revision from MSR 0x8B */ rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); @@ -414,12 +418,12 @@ static int do_microcode_update (void) } out_free: - for (i = 0; i < num_online_cpus(); i++) { + for_each_online_cpu(i) { if (ucode_cpu_info[i].mc) { int j; void *tmp = ucode_cpu_info[i].mc; vfree(tmp); - for (j = i; j < num_online_cpus(); j++) { + for_each_online_cpu(j) { if (ucode_cpu_info[j].mc == tmp) ucode_cpu_info[j].mc = NULL; } diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index 1ca5269..e6e2f43 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -38,6 +38,12 @@ int smp_found_config; unsigned int __initdata maxcpus = NR_CPUS; +#ifdef CONFIG_HOTPLUG_CPU +#define CPU_HOTPLUG_ENABLED (1) +#else +#define CPU_HOTPLUG_ENABLED (0) +#endif + /* * Various Linux-internal data structures created from the * MP-table. @@ -219,14 +225,18 @@ static void __devinit MP_processor_info (struct mpc_config_processor *m) cpu_set(num_processors, cpu_possible_map); num_processors++; - if ((num_processors > 8) && - ((APIC_XAPIC(ver) && - (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)) || - (boot_cpu_data.x86_vendor == X86_VENDOR_AMD))) - def_to_bigsmp = 1; - else - def_to_bigsmp = 0; - + if (CPU_HOTPLUG_ENABLED || (num_processors > 8)) { + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + if (!APIC_XAPIC(ver)) { + def_to_bigsmp = 0; + break; + } + /* If P4 and above fall through */ + case X86_VENDOR_AMD: + def_to_bigsmp = 1; + } + } bios_cpu_apicid[num_processors - 1] = m->mpc_apicid; } @@ -700,7 +710,7 @@ void __init get_smp_config (void) * Read the physical hardware table. Anything here will * override the defaults. */ - if (!smp_read_mpc((void *)mpf->mpf_physptr)) { + if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr))) { smp_found_config = 0; printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); @@ -905,6 +915,7 @@ void __init mp_register_ioapic ( u32 gsi_base) { int idx = 0; + int tmpid; if (nr_ioapics >= MAX_IO_APICS) { printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " @@ -925,9 +936,14 @@ void __init mp_register_ioapic ( set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 < 15)) - mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id); + tmpid = io_apic_get_unique_id(idx, id); else - mp_ioapics[idx].mpc_apicid = id; + tmpid = id; + if (tmpid == -1) { + nr_ioapics--; + return; + } + mp_ioapics[idx].mpc_apicid = tmpid; mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); /* @@ -1070,7 +1086,7 @@ void __init mp_config_acpi_legacy_irqs (void) #define MAX_GSI_NUM 4096 -int mp_register_gsi (u32 gsi, int edge_level, int active_high_low) +int mp_register_gsi (u32 gsi, int triggering, int polarity) { int ioapic = -1; int ioapic_pin = 0; @@ -1119,7 +1135,7 @@ int mp_register_gsi (u32 gsi, int edge_level, int active_high_low) mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit); - if (edge_level) { + if (triggering == ACPI_LEVEL_SENSITIVE) { /* * For PCI devices assign IRQs in order, avoiding gaps * due to unused I/O APIC pins. @@ -1141,8 +1157,8 @@ int mp_register_gsi (u32 gsi, int edge_level, int active_high_low) } io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, - edge_level == ACPI_EDGE_SENSITIVE ? 0 : 1, - active_high_low == ACPI_ACTIVE_HIGH ? 0 : 1); + triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, + polarity == ACPI_ACTIVE_HIGH ? 0 : 1); return gsi; } diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c index 44470fe..1d0a55e 100644 --- a/arch/i386/kernel/msr.c +++ b/arch/i386/kernel/msr.c @@ -172,7 +172,6 @@ static ssize_t msr_read(struct file *file, char __user * buf, { u32 __user *tmp = (u32 __user *) buf; u32 data[2]; - size_t rv; u32 reg = *ppos; int cpu = iminor(file->f_dentry->d_inode); int err; @@ -180,7 +179,7 @@ static ssize_t msr_read(struct file *file, char __user * buf, if (count % 8) return -EINVAL; /* Invalid chunk size */ - for (rv = 0; count; count -= 8) { + for (; count; count -= 8) { err = do_rdmsr(cpu, reg, &data[0], &data[1]); if (err) return err; diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index d661703..be87c5e 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -138,7 +138,7 @@ static int __init check_nmi_watchdog(void) if (nmi_watchdog == NMI_LOCAL_APIC) smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); - for (cpu = 0; cpu < NR_CPUS; cpu++) + for_each_cpu(cpu) prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count; local_irq_enable(); mdelay((10*1000)/nmi_hz); // wait 10 ticks @@ -357,7 +357,7 @@ static void clear_msr_range(unsigned int base, unsigned int n) wrmsr(base+i, 0, 0); } -static inline void write_watchdog_counter(const char *descr) +static void write_watchdog_counter(const char *descr) { u64 count = (u64)cpu_khz * 1000; @@ -544,7 +544,7 @@ void nmi_watchdog_tick (struct pt_regs * regs) * die_nmi will return ONLY if NOTIFY_STOP happens.. */ die_nmi(regs, "NMI Watchdog detected LOCKUP"); - + } else { last_irq_sums[cpu] = sum; alert_counter[cpu] = 0; } diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 2333aea..0480454 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -48,6 +48,7 @@ #include <asm/processor.h> #include <asm/i387.h> #include <asm/desc.h> +#include <asm/vm86.h> #ifdef CONFIG_MATH_EMULATION #include <asm/math_emu.h> #endif @@ -296,8 +297,10 @@ void show_regs(struct pt_regs * regs) if (user_mode(regs)) printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); - printk(" EFLAGS: %08lx %s (%s)\n", - regs->eflags, print_tainted(), system_utsname.release); + printk(" EFLAGS: %08lx %s (%s %.*s)\n", + regs->eflags, print_tainted(), system_utsname.release, + (int)strcspn(system_utsname.version, " "), + system_utsname.version); printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", regs->eax,regs->ebx,regs->ecx,regs->edx); printk("ESI: %08lx EDI: %08lx EBP: %08lx", @@ -308,9 +311,7 @@ void show_regs(struct pt_regs * regs) cr0 = read_cr0(); cr2 = read_cr2(); cr3 = read_cr3(); - if (current_cpu_data.x86 > 4) { - cr4 = read_cr4(); - } + cr4 = read_cr4_safe(); printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); show_trace(NULL, ®s->esp); } @@ -404,17 +405,7 @@ void flush_thread(void) void release_thread(struct task_struct *dead_task) { - if (dead_task->mm) { - // temporary debugging check - if (dead_task->mm->context.size) { - printk("WARNING: dead process %8s still has LDT? <%p/%d>\n", - dead_task->comm, - dead_task->mm->context.ldt, - dead_task->mm->context.size); - BUG(); - } - } - + BUG_ON(dead_task->mm); release_vm86_irqs(dead_task); } @@ -435,18 +426,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, struct task_struct *tsk; int err; - childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1; - /* - * The below -8 is to reserve 8 bytes on top of the ring0 stack. - * This is necessary to guarantee that the entire "struct pt_regs" - * is accessable even if the CPU haven't stored the SS/ESP registers - * on the stack (interrupt gate does not save these registers - * when switching to the same priv ring). - * Therefore beware: accessing the xss/esp fields of the - * "struct pt_regs" is possible, but they may contain the - * completely wrong values. - */ - childregs = (struct pt_regs *) ((unsigned long) childregs - 8); + childregs = task_pt_regs(p); *childregs = *regs; childregs->eax = 0; childregs->esp = esp; @@ -551,12 +531,7 @@ EXPORT_SYMBOL(dump_thread); */ int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs) { - struct pt_regs ptregs; - - ptregs = *(struct pt_regs *) - ((unsigned long)tsk->thread_info + - /* see comments in copy_thread() about -8 */ - THREAD_SIZE - sizeof(ptregs) - 8); + struct pt_regs ptregs = *task_pt_regs(tsk); ptregs.xcs &= 0xffff; ptregs.xds &= 0xffff; ptregs.xes &= 0xffff; @@ -612,8 +587,8 @@ static inline void disable_tsc(struct task_struct *prev_p, * gcc should eliminate the ->thread_info dereference if * has_secure_computing returns 0 at compile time (SECCOMP=n). */ - prev = prev_p->thread_info; - next = next_p->thread_info; + prev = task_thread_info(prev_p); + next = task_thread_info(next_p); if (has_secure_computing(prev) || has_secure_computing(next)) { /* slow path here */ @@ -798,7 +773,7 @@ unsigned long get_wchan(struct task_struct *p) int count = 0; if (!p || p == current || p->state == TASK_RUNNING) return 0; - stack_page = (unsigned long)p->thread_info; + stack_page = (unsigned long)task_stack_page(p); esp = p->thread.esp; if (!stack_page || esp < stack_page || esp > top_esp+stack_page) return 0; diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index 5ffbb4b..5c1fb6a 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -32,9 +32,12 @@ * in exit.c or in signal.c. */ -/* determines which flags the user has access to. */ -/* 1 = access 0 = no access */ -#define FLAG_MASK 0x00044dd5 +/* + * Determines which flags the user has access to [1 = access, 0 = no access]. + * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), IOPL(12-13), IF(9). + * Also masks reserved bits (31-22, 15, 5, 3, 1). + */ +#define FLAG_MASK 0x00054dd5 /* set's the trap flag. */ #define TRAP_FLAG 0x100 diff --git a/arch/i386/kernel/quirks.c b/arch/i386/kernel/quirks.c index aaf89cb..87ccdac 100644 --- a/arch/i386/kernel/quirks.c +++ b/arch/i386/kernel/quirks.c @@ -25,8 +25,7 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) /* enable access to config space*/ pci_read_config_byte(dev, 0xf4, &config); - config |= 0x2; - pci_write_config_byte(dev, 0xf4, config); + pci_write_config_byte(dev, 0xf4, config|0x2); /* read xTPR register */ raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word); @@ -42,9 +41,9 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) #endif } - config &= ~0x2; - /* disable access to config space*/ - pci_write_config_byte(dev, 0xf4, config); + /* put back the original value for config space*/ + if (!(config & 0x2)) + pci_write_config_byte(dev, 0xf4, config); } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance); diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c index 2afe0f8..d207242 100644 --- a/arch/i386/kernel/reboot.c +++ b/arch/i386/kernel/reboot.c @@ -12,6 +12,7 @@ #include <linux/efi.h> #include <linux/dmi.h> #include <linux/ctype.h> +#include <linux/pm.h> #include <asm/uaccess.h> #include <asm/apic.h> #include <asm/desc.h> @@ -111,12 +112,12 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"), }, }, - { /* Handle problems with rebooting on HP nc6120 */ + { /* Handle problems with rebooting on HP laptops */ .callback = set_bios_reboot, - .ident = "HP Compaq nc6120", + .ident = "HP Compaq Laptop", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nc6120"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq"), }, }, { } @@ -355,10 +356,10 @@ void machine_halt(void) void machine_power_off(void) { - machine_shutdown(); - - if (pm_power_off) + if (pm_power_off) { + machine_shutdown(); pm_power_off(); + } } diff --git a/arch/i386/kernel/scx200.c b/arch/i386/kernel/scx200.c index 9c968ae..321f5fd 100644 --- a/arch/i386/kernel/scx200.c +++ b/arch/i386/kernel/scx200.c @@ -143,7 +143,7 @@ static int __init scx200_init(void) { printk(KERN_INFO NAME ": NatSemi SCx200 Driver\n"); - return pci_module_init(&scx200_pci_driver); + return pci_register_driver(&scx200_pci_driver); } static void __exit scx200_cleanup(void) diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index fdfcb0c..ab62a9f 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -45,6 +45,7 @@ #include <linux/nodemask.h> #include <linux/kexec.h> #include <linux/crash_dump.h> +#include <linux/dmi.h> #include <video/edid.h> @@ -146,7 +147,6 @@ EXPORT_SYMBOL(ist_info); struct e820map e820; extern void early_cpu_init(void); -extern void dmi_scan_machine(void); extern void generic_apic_probe(char *); extern int root_mountflags; @@ -898,7 +898,7 @@ static void __init parse_cmdline_early (char ** cmdline_p) } } #endif -#ifdef CONFIG_CRASH_DUMP +#ifdef CONFIG_PROC_VMCORE /* elfcorehdr= specifies the location of elf core header * stored by the crashed kernel. */ @@ -954,6 +954,12 @@ efi_find_max_pfn(unsigned long start, unsigned long end, void *arg) return 0; } +static int __init +efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg) +{ + memory_present(0, start, end); + return 0; +} /* * Find the highest page frame number we have available @@ -965,6 +971,7 @@ void __init find_max_pfn(void) max_pfn = 0; if (efi_enabled) { efi_memmap_walk(efi_find_max_pfn, &max_pfn); + efi_memmap_walk(efi_memory_present_wrapper, NULL); return; } @@ -979,6 +986,7 @@ void __init find_max_pfn(void) continue; if (end > max_pfn) max_pfn = end; + memory_present(0, start, end); } } @@ -1576,7 +1584,7 @@ void __init setup_arch(char **cmdline_p) if (s) { extern void setup_early_printk(char *); - setup_early_printk(s); + setup_early_printk(strchr(s, '=') + 1); printk("early console enabled\n"); } } @@ -1591,6 +1599,10 @@ void __init setup_arch(char **cmdline_p) if (efi_enabled) efi_map_memmap(); +#ifdef CONFIG_X86_IO_APIC + check_acpi_pci(); /* Checks more than just ACPI actually */ +#endif + #ifdef CONFIG_ACPI /* * Parse the ACPI tables for possible boot-time SMP configuration. diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index adcd069..963616d 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c @@ -37,51 +37,17 @@ asmlinkage int sys_sigsuspend(int history0, int history1, old_sigset_t mask) { - struct pt_regs * regs = (struct pt_regs *) &history0; - sigset_t saveset; - mask &= _BLOCKABLE; spin_lock_irq(¤t->sighand->siglock); - saveset = current->blocked; + current->saved_sigmask = current->blocked; siginitset(¤t->blocked, mask); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); - regs->eax = -EINTR; - while (1) { - current->state = TASK_INTERRUPTIBLE; - schedule(); - if (do_signal(regs, &saveset)) - return -EINTR; - } -} - -asmlinkage int -sys_rt_sigsuspend(struct pt_regs regs) -{ - sigset_t saveset, newset; - - /* XXX: Don't preclude handling different sized sigset_t's. */ - if (regs.ecx != sizeof(sigset_t)) - return -EINVAL; - - if (copy_from_user(&newset, (sigset_t __user *)regs.ebx, sizeof(newset))) - return -EFAULT; - sigdelsetmask(&newset, ~_BLOCKABLE); - - spin_lock_irq(¤t->sighand->siglock); - saveset = current->blocked; - current->blocked = newset; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - - regs.eax = -EINTR; - while (1) { - current->state = TASK_INTERRUPTIBLE; - schedule(); - if (do_signal(®s, &saveset)) - return -EINTR; - } + current->state = TASK_INTERRUPTIBLE; + schedule(); + set_thread_flag(TIF_RESTORE_SIGMASK); + return -ERESTARTNOHAND; } asmlinkage int @@ -433,11 +399,11 @@ static int setup_frame(int sig, struct k_sigaction *ka, current->comm, current->pid, frame, regs->eip, frame->pretcode); #endif - return 1; + return 0; give_sigsegv: force_sigsegv(sig, current); - return 0; + return -EFAULT; } static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, @@ -527,11 +493,11 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, current->comm, current->pid, frame, regs->eip, frame->pretcode); #endif - return 1; + return 0; give_sigsegv: force_sigsegv(sig, current); - return 0; + return -EFAULT; } /* @@ -581,7 +547,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, else ret = setup_frame(sig, ka, oldset, regs); - if (ret) { + if (ret == 0) { spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); if (!(ka->sa.sa_flags & SA_NODEFER)) @@ -598,11 +564,12 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, * want to handle. Thus you cannot kill init even with a SIGKILL even by * mistake. */ -int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset) +static void fastcall do_signal(struct pt_regs *regs) { siginfo_t info; int signr; struct k_sigaction ka; + sigset_t *oldset; /* * We want the common case to go fast, which @@ -613,12 +580,14 @@ int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset) * CS suffices. */ if (!user_mode(regs)) - return 1; + return; if (try_to_freeze()) goto no_signal; - if (!oldset) + if (test_thread_flag(TIF_RESTORE_SIGMASK)) + oldset = ¤t->saved_sigmask; + else oldset = ¤t->blocked; signr = get_signal_to_deliver(&info, &ka, regs, NULL); @@ -628,38 +597,55 @@ int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset) * have been cleared if the watchpoint triggered * inside the kernel. */ - if (unlikely(current->thread.debugreg[7])) { + if (unlikely(current->thread.debugreg[7])) set_debugreg(current->thread.debugreg[7], 7); - } /* Whee! Actually deliver the signal. */ - return handle_signal(signr, &info, &ka, oldset, regs); + if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { + /* a signal was successfully delivered; the saved + * sigmask will have been stored in the signal frame, + * and will be restored by sigreturn, so we can simply + * clear the TIF_RESTORE_SIGMASK flag */ + if (test_thread_flag(TIF_RESTORE_SIGMASK)) + clear_thread_flag(TIF_RESTORE_SIGMASK); + } + + return; } - no_signal: +no_signal: /* Did we come from a system call? */ if (regs->orig_eax >= 0) { /* Restart the system call - no handlers present */ - if (regs->eax == -ERESTARTNOHAND || - regs->eax == -ERESTARTSYS || - regs->eax == -ERESTARTNOINTR) { + switch (regs->eax) { + case -ERESTARTNOHAND: + case -ERESTARTSYS: + case -ERESTARTNOINTR: regs->eax = regs->orig_eax; regs->eip -= 2; - } - if (regs->eax == -ERESTART_RESTARTBLOCK){ + break; + + case -ERESTART_RESTARTBLOCK: regs->eax = __NR_restart_syscall; regs->eip -= 2; + break; } } - return 0; + + /* if there's no signal to deliver, we just put the saved sigmask + * back */ + if (test_thread_flag(TIF_RESTORE_SIGMASK)) { + clear_thread_flag(TIF_RESTORE_SIGMASK); + sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); + } } /* * notification of userspace execution resumption - * - triggered by current->work.notify_resume + * - triggered by the TIF_WORK_MASK flags */ __attribute__((regparm(3))) -void do_notify_resume(struct pt_regs *regs, sigset_t *oldset, +void do_notify_resume(struct pt_regs *regs, void *_unused, __u32 thread_info_flags) { /* Pending single-step? */ @@ -667,9 +653,10 @@ void do_notify_resume(struct pt_regs *regs, sigset_t *oldset, regs->eflags |= TF_MASK; clear_thread_flag(TIF_SINGLESTEP); } + /* deal with pending signal delivery */ - if (thread_info_flags & _TIF_SIGPENDING) - do_signal(regs,oldset); + if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) + do_signal(regs); clear_thread_flag(TIF_IRET); } diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 9ed449a..7007e17 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -87,11 +87,7 @@ EXPORT_SYMBOL(cpu_online_map); cpumask_t cpu_callin_map; cpumask_t cpu_callout_map; EXPORT_SYMBOL(cpu_callout_map); -#ifdef CONFIG_HOTPLUG_CPU -cpumask_t cpu_possible_map = CPU_MASK_ALL; -#else cpumask_t cpu_possible_map; -#endif EXPORT_SYMBOL(cpu_possible_map); static cpumask_t smp_commenced_mask; @@ -875,8 +871,7 @@ static inline struct task_struct * alloc_idle_task(int cpu) /* initialize thread_struct. we really want to avoid destroy * idle tread */ - idle->thread.esp = (unsigned long)(((struct pt_regs *) - (THREAD_SIZE + (unsigned long) idle->thread_info)) - 1); + idle->thread.esp = (unsigned long)task_pt_regs(idle); init_idle(idle, cpu); return idle; } @@ -1034,6 +1029,16 @@ int __devinit smp_prepare_cpu(int cpu) int apicid, ret; lock_cpu_hotplug(); + + /* + * On x86, CPU0 is never offlined. Trying to bring up an + * already-booted CPU will hang. So check for that case. + */ + if (cpu_online(cpu)) { + ret = -EINVAL; + goto exit; + } + apicid = x86_cpu_to_apicid[cpu]; if (apicid == BAD_APICID) { ret = -ENODEV; @@ -1090,6 +1095,7 @@ static void smp_tune_scheduling (void) cachesize = 16; /* Pentiums, 2x8kB cache */ bandwidth = 100; } + max_cache_size = cachesize * 1024; } } diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S index 9b21a31..ac687d0 100644 --- a/arch/i386/kernel/syscall_table.S +++ b/arch/i386/kernel/syscall_table.S @@ -1,4 +1,3 @@ -.data ENTRY(sys_call_table) .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ .long sys_exit @@ -294,3 +293,20 @@ ENTRY(sys_call_table) .long sys_inotify_init .long sys_inotify_add_watch .long sys_inotify_rm_watch + .long sys_migrate_pages + .long sys_openat /* 295 */ + .long sys_mkdirat + .long sys_mknodat + .long sys_fchownat + .long sys_futimesat + .long sys_fstatat64 /* 300 */ + .long sys_unlinkat + .long sys_renameat + .long sys_linkat + .long sys_symlinkat + .long sys_readlinkat /* 305 */ + .long sys_fchmodat + .long sys_faccessat + .long sys_pselect6 + .long sys_ppoll + .long sys_unshare /* 310 */ diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index 41c5b2d..9d30747 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -302,6 +302,12 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) do_timer_interrupt(irq, regs); write_sequnlock(&xtime_lock); + +#ifdef CONFIG_X86_LOCAL_APIC + if (using_apic_timer) + smp_send_timer_broadcast_ipi(regs); +#endif + return IRQ_HANDLED; } @@ -406,9 +412,9 @@ static int timer_resume(struct sys_device *dev) write_seqlock_irqsave(&xtime_lock, flags); xtime.tv_sec = sec; xtime.tv_nsec = 0; - write_sequnlock_irqrestore(&xtime_lock, flags); - jiffies += sleep_length; + jiffies_64 += sleep_length; wall_jiffies += sleep_length; + write_sequnlock_irqrestore(&xtime_lock, flags); if (last_timer->resume) last_timer->resume(); cur_timer = last_timer; diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c index 9caeaa3..a529f0c 100644 --- a/arch/i386/kernel/time_hpet.c +++ b/arch/i386/kernel/time_hpet.c @@ -259,8 +259,6 @@ __setup("hpet=", hpet_setup); #include <linux/mc146818rtc.h> #include <linux/rtc.h> -extern irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs); - #define DEFAULT_RTC_INT_FREQ 64 #define RTC_NUM_INTS 1 diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c index d395e3b..a7f5a2a 100644 --- a/arch/i386/kernel/timers/timer_tsc.c +++ b/arch/i386/kernel/timers/timer_tsc.c @@ -45,6 +45,15 @@ static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */ static unsigned long long monotonic_base; static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; +/* Avoid compensating for lost ticks before TSCs are synched */ +static int detect_lost_ticks; +static int __init start_lost_tick_compensation(void) +{ + detect_lost_ticks = 1; + return 0; +} +late_initcall(start_lost_tick_compensation); + /* convert from cycles(64bits) => nanoseconds (64bits) * basic equation: * ns = cycles / (freq / ns_per_sec) @@ -196,7 +205,8 @@ static void mark_offset_tsc_hpet(void) /* lost tick compensation */ offset = hpet_readl(HPET_T0_CMP) - hpet_tick; - if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))) { + if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0)) + && detect_lost_ticks) { int lost_ticks = (offset - hpet_last) / hpet_tick; jiffies_64 += lost_ticks; } @@ -272,6 +282,10 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, if (val != CPUFREQ_RESUMECHANGE) write_seqlock_irq(&xtime_lock); if (!ref_freq) { + if (!freq->old){ + ref_freq = freq->new; + goto end; + } ref_freq = freq->old; loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy; #ifndef CONFIG_SMP @@ -297,6 +311,7 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, #endif } +end: if (val != CPUFREQ_RESUMECHANGE) write_sequnlock_irq(&xtime_lock); @@ -330,7 +345,9 @@ int recalibrate_cpu_khz(void) unsigned int cpu_khz_old = cpu_khz; if (cpu_has_tsc) { + local_irq_disable(); init_cpu_khz(); + local_irq_enable(); cpu_data[0].loops_per_jiffy = cpufreq_scale(cpu_data[0].loops_per_jiffy, cpu_khz_old, @@ -419,7 +436,7 @@ static void mark_offset_tsc(void) delta += delay_at_last_interrupt; lost = delta/(1000000/HZ); delay = delta%(1000000/HZ); - if (lost >= 2) { + if (lost >= 2 && detect_lost_ticks) { jiffies_64 += lost-1; /* sanity check to ensure we're not always losing ticks */ diff --git a/arch/i386/mach-default/topology.c b/arch/i386/kernel/topology.c index b643140..67a0e1b 100644 --- a/arch/i386/mach-default/topology.c +++ b/arch/i386/kernel/topology.c @@ -1,12 +1,12 @@ /* - * arch/i386/mach-generic/topology.c - Populate driverfs with topology information + * arch/i386/kernel/topology.c - Populate driverfs with topology information * * Written by: Matthew Dobson, IBM Corporation * Original Code: Paul Dorwin, IBM Corporation, Patrick Mochel, OSDL * * Copyright (C) 2002, IBM Corp. * - * All rights reserved. + * All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -34,7 +34,7 @@ static struct i386_cpu cpu_devices[NR_CPUS]; int arch_register_cpu(int num){ struct node *parent = NULL; - + #ifdef CONFIG_NUMA int node = cpu_to_node(num); if (node_online(node)) diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index ab0e943..b814dbd 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -112,33 +112,38 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) p < (void *)tinfo + THREAD_SIZE - 3; } +static void print_addr_and_symbol(unsigned long addr, char *log_lvl) +{ + printk(log_lvl); + printk(" [<%08lx>] ", addr); + print_symbol("%s", addr); + printk("\n"); +} + static inline unsigned long print_context_stack(struct thread_info *tinfo, - unsigned long *stack, unsigned long ebp) + unsigned long *stack, unsigned long ebp, + char *log_lvl) { unsigned long addr; #ifdef CONFIG_FRAME_POINTER while (valid_stack_ptr(tinfo, (void *)ebp)) { addr = *(unsigned long *)(ebp + 4); - printk(" [<%08lx>] ", addr); - print_symbol("%s", addr); - printk("\n"); + print_addr_and_symbol(addr, log_lvl); ebp = *(unsigned long *)ebp; } #else while (valid_stack_ptr(tinfo, stack)) { addr = *stack++; - if (__kernel_text_address(addr)) { - printk(" [<%08lx>]", addr); - print_symbol(" %s", addr); - printk("\n"); - } + if (__kernel_text_address(addr)) + print_addr_and_symbol(addr, log_lvl); } #endif return ebp; } -void show_trace(struct task_struct *task, unsigned long * stack) +static void show_trace_log_lvl(struct task_struct *task, + unsigned long *stack, char *log_lvl) { unsigned long ebp; @@ -157,15 +162,22 @@ void show_trace(struct task_struct *task, unsigned long * stack) struct thread_info *context; context = (struct thread_info *) ((unsigned long)stack & (~(THREAD_SIZE - 1))); - ebp = print_context_stack(context, stack, ebp); + ebp = print_context_stack(context, stack, ebp, log_lvl); stack = (unsigned long*)context->previous_esp; if (!stack) break; + printk(log_lvl); printk(" =======================\n"); } } -void show_stack(struct task_struct *task, unsigned long *esp) +void show_trace(struct task_struct *task, unsigned long * stack) +{ + show_trace_log_lvl(task, stack, ""); +} + +static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp, + char *log_lvl) { unsigned long *stack; int i; @@ -178,15 +190,26 @@ void show_stack(struct task_struct *task, unsigned long *esp) } stack = esp; + printk(log_lvl); for(i = 0; i < kstack_depth_to_print; i++) { if (kstack_end(stack)) break; - if (i && ((i % 8) == 0)) - printk("\n "); + if (i && ((i % 8) == 0)) { + printk("\n"); + printk(log_lvl); + printk(" "); + } printk("%08lx ", *stack++); } - printk("\nCall Trace:\n"); - show_trace(task, esp); + printk("\n"); + printk(log_lvl); + printk("Call Trace:\n"); + show_trace_log_lvl(task, esp, log_lvl); +} + +void show_stack(struct task_struct *task, unsigned long *esp) +{ + show_stack_log_lvl(task, esp, ""); } /* @@ -216,18 +239,20 @@ void show_registers(struct pt_regs *regs) ss = regs->xss & 0xffff; } print_modules(); - printk("CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\nEFLAGS: %08lx" - " (%s) \n", + printk(KERN_EMERG "CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\n" + "EFLAGS: %08lx (%s %.*s) \n", smp_processor_id(), 0xffff & regs->xcs, regs->eip, - print_tainted(), regs->eflags, system_utsname.release); - print_symbol("EIP is at %s\n", regs->eip); - printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", + print_tainted(), regs->eflags, system_utsname.release, + (int)strcspn(system_utsname.version, " "), + system_utsname.version); + print_symbol(KERN_EMERG "EIP is at %s\n", regs->eip); + printk(KERN_EMERG "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", regs->eax, regs->ebx, regs->ecx, regs->edx); - printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", + printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", regs->esi, regs->edi, regs->ebp, esp); - printk("ds: %04x es: %04x ss: %04x\n", + printk(KERN_EMERG "ds: %04x es: %04x ss: %04x\n", regs->xds & 0xffff, regs->xes & 0xffff, ss); - printk("Process %s (pid: %d, threadinfo=%p task=%p)", + printk(KERN_EMERG "Process %s (pid: %d, threadinfo=%p task=%p)", current->comm, current->pid, current_thread_info(), current); /* * When in-kernel, we also print out the stack and code at the @@ -236,10 +261,10 @@ void show_registers(struct pt_regs *regs) if (in_kernel) { u8 __user *eip; - printk("\nStack: "); - show_stack(NULL, (unsigned long*)esp); + printk("\n" KERN_EMERG "Stack: "); + show_stack_log_lvl(NULL, (unsigned long *)esp, KERN_EMERG); - printk("Code: "); + printk(KERN_EMERG "Code: "); eip = (u8 __user *)regs->eip - 43; for (i = 0; i < 64; i++, eip++) { @@ -280,15 +305,15 @@ static void handle_BUG(struct pt_regs *regs) (unsigned long)file < PAGE_OFFSET || __get_user(c, file)) file = "<bad filename>"; - printk("------------[ cut here ]------------\n"); - printk(KERN_ALERT "kernel BUG at %s:%d!\n", file, line); + printk(KERN_EMERG "------------[ cut here ]------------\n"); + printk(KERN_EMERG "kernel BUG at %s:%d!\n", file, line); no_bug: return; /* Here we know it was a BUG but file-n-line is unavailable */ bug: - printk("Kernel BUG\n"); + printk(KERN_EMERG "Kernel BUG\n"); } /* This is gone through when something in the kernel @@ -306,28 +331,35 @@ void die(const char * str, struct pt_regs * regs, long err) .lock_owner_depth = 0 }; static int die_counter; + unsigned long flags; if (die.lock_owner != raw_smp_processor_id()) { console_verbose(); - spin_lock_irq(&die.lock); + spin_lock_irqsave(&die.lock, flags); die.lock_owner = smp_processor_id(); die.lock_owner_depth = 0; bust_spinlocks(1); } + else + local_save_flags(flags); if (++die.lock_owner_depth < 3) { int nl = 0; handle_BUG(regs); - printk(KERN_ALERT "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); + printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); #ifdef CONFIG_PREEMPT - printk("PREEMPT "); + printk(KERN_EMERG "PREEMPT "); nl = 1; #endif #ifdef CONFIG_SMP + if (!nl) + printk(KERN_EMERG); printk("SMP "); nl = 1; #endif #ifdef CONFIG_DEBUG_PAGEALLOC + if (!nl) + printk(KERN_EMERG); printk("DEBUG_PAGEALLOC"); nl = 1; #endif @@ -336,11 +368,11 @@ void die(const char * str, struct pt_regs * regs, long err) notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV); show_registers(regs); } else - printk(KERN_ERR "Recursive die() failure, output suppressed\n"); + printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); bust_spinlocks(0); die.lock_owner = -1; - spin_unlock_irq(&die.lock); + spin_unlock_irqrestore(&die.lock, flags); if (kexec_should_crash(current)) crash_kexec(regs); @@ -524,8 +556,10 @@ gp_in_kernel: static void mem_parity_error(unsigned char reason, struct pt_regs * regs) { - printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n"); - printk("You probably have a hardware problem with your RAM chips\n"); + printk(KERN_EMERG "Uhhuh. NMI received. Dazed and confused, but trying " + "to continue\n"); + printk(KERN_EMERG "You probably have a hardware problem with your RAM " + "chips\n"); /* Clear and disable the memory parity error line. */ clear_mem_error(reason); @@ -535,7 +569,7 @@ static void io_check_error(unsigned char reason, struct pt_regs * regs) { unsigned long i; - printk("NMI: IOCK error (debug interrupt?)\n"); + printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); show_registers(regs); /* Re-enable the IOCK line, wait for a few seconds */ @@ -577,11 +611,11 @@ void die_nmi (struct pt_regs *regs, const char *msg) * to get a message out. */ bust_spinlocks(1); - printk(msg); + printk(KERN_EMERG "%s", msg); printk(" on CPU%d, eip %08lx, registers:\n", smp_processor_id(), regs->eip); show_registers(regs); - printk("console shuts up ...\n"); + printk(KERN_EMERG "console shuts up ...\n"); console_silent(); spin_unlock(&nmi_print_lock); bust_spinlocks(0); @@ -987,8 +1021,8 @@ asmlinkage void math_state_restore(struct pt_regs regs) asmlinkage void math_emulate(long arg) { - printk("math-emulation not enabled and no coprocessor found.\n"); - printk("killing %s.\n",current->comm); + printk(KERN_EMERG "math-emulation not enabled and no coprocessor found.\n"); + printk(KERN_EMERG "killing %s.\n",current->comm); force_sig(SIGFPE,current); schedule(); } @@ -1075,9 +1109,9 @@ void __init trap_init(void) set_trap_gate(0,÷_error); set_intr_gate(1,&debug); set_intr_gate(2,&nmi); - set_system_intr_gate(3, &int3); /* int3-5 can be called from all */ + set_system_intr_gate(3, &int3); /* int3/4 can be called from all */ set_system_gate(4,&overflow); - set_system_gate(5,&bounds); + set_trap_gate(5,&bounds); set_trap_gate(6,&invalid_op); set_trap_gate(7,&device_not_available); set_task_gate(8,GDT_ENTRY_DOUBLEFAULT_TSS); @@ -1095,6 +1129,28 @@ void __init trap_init(void) #endif set_trap_gate(19,&simd_coprocessor_error); + if (cpu_has_fxsr) { + /* + * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned. + * Generates a compile-time "error: zero width for bit-field" if + * the alignment is wrong. + */ + struct fxsrAlignAssert { + int _:!(offsetof(struct task_struct, + thread.i387.fxsave) & 15); + }; + + printk(KERN_INFO "Enabling fast FPU save and restore... "); + set_in_cr4(X86_CR4_OSFXSR); + printk("done.\n"); + } + if (cpu_has_xmm) { + printk(KERN_INFO "Enabling unmasked SIMD FPU exception " + "support... "); + set_in_cr4(X86_CR4_OSXMMEXCPT); + printk("done.\n"); + } + set_system_gate(SYSCALL_VECTOR,&system_call); /* diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c index fc19935..f51c894 100644 --- a/arch/i386/kernel/vm86.c +++ b/arch/i386/kernel/vm86.c @@ -4,7 +4,7 @@ * Copyright (C) 1994 Linus Torvalds * * 29 dec 2001 - Fixed oopses caused by unchecked access to the vm86 - * stack - Manfred Spraul <manfreds@colorfullife.com> + * stack - Manfred Spraul <manfred@colorfullife.com> * * 22 mar 2002 - Manfred detected the stackfaults, but didn't handle * them correctly. Now the emulation will be in a @@ -30,6 +30,7 @@ * */ +#include <linux/capability.h> #include <linux/config.h> #include <linux/errno.h> #include <linux/interrupt.h> @@ -310,7 +311,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk "movl %1,%%ebp\n\t" "jmp resume_userspace" : /* no outputs */ - :"r" (&info->regs), "r" (tsk->thread_info) : "ax"); + :"r" (&info->regs), "r" (task_thread_info(tsk)) : "ax"); /* we never return here */ } diff --git a/arch/i386/kernel/vsyscall-sysenter.S b/arch/i386/kernel/vsyscall-sysenter.S index 4daefb2..76b7281 100644 --- a/arch/i386/kernel/vsyscall-sysenter.S +++ b/arch/i386/kernel/vsyscall-sysenter.S @@ -7,6 +7,21 @@ * for details. */ +/* + * The caller puts arg2 in %ecx, which gets pushed. The kernel will use + * %ecx itself for arg2. The pushing is because the sysexit instruction + * (found in entry.S) requires that we clobber %ecx with the desired %esp. + * User code might expect that %ecx is unclobbered though, as it would be + * for returning via the iret instruction, so we must push and pop. + * + * The caller puts arg3 in %edx, which the sysexit instruction requires + * for %eip. Thus, exactly as for arg2, we must push and pop. + * + * Arg6 is different. The caller puts arg6 in %ebp. Since the sysenter + * instruction clobbers %esp, the user's %esp won't even survive entry + * into the kernel. We store %esp in %ebp. Code in entry.S must fetch + * arg6 from the stack. + */ .text .globl __kernel_vsyscall .type __kernel_vsyscall,@function diff --git a/arch/i386/mach-default/Makefile b/arch/i386/mach-default/Makefile index e95bb02..012fe34 100644 --- a/arch/i386/mach-default/Makefile +++ b/arch/i386/mach-default/Makefile @@ -2,4 +2,4 @@ # Makefile for the linux kernel. # -obj-y := setup.o topology.o +obj-y := setup.o diff --git a/arch/i386/mach-voyager/voyager_basic.c b/arch/i386/mach-voyager/voyager_basic.c index aa49a33..b584060 100644 --- a/arch/i386/mach-voyager/voyager_basic.c +++ b/arch/i386/mach-voyager/voyager_basic.c @@ -23,6 +23,8 @@ #include <linux/delay.h> #include <linux/reboot.h> #include <linux/sysrq.h> +#include <linux/smp.h> +#include <linux/nodemask.h> #include <asm/io.h> #include <asm/voyager.h> #include <asm/vic.h> @@ -328,4 +330,3 @@ void machine_power_off(void) if (pm_power_off) pm_power_off(); } - diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c index 72a1b9c..8165626 100644 --- a/arch/i386/mach-voyager/voyager_smp.c +++ b/arch/i386/mach-voyager/voyager_smp.c @@ -240,7 +240,7 @@ static cpumask_t smp_commenced_mask = CPU_MASK_NONE; cpumask_t cpu_callin_map = CPU_MASK_NONE; cpumask_t cpu_callout_map = CPU_MASK_NONE; EXPORT_SYMBOL(cpu_callout_map); -cpumask_t cpu_possible_map = CPU_MASK_ALL; +cpumask_t cpu_possible_map = CPU_MASK_NONE; EXPORT_SYMBOL(cpu_possible_map); /* The per processor IRQ masks (these are usually kept in sync) */ @@ -402,6 +402,7 @@ find_smp_config(void) cpus_addr(phys_cpu_present_map)[0] |= voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 1) << 8; cpus_addr(phys_cpu_present_map)[0] |= voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 2) << 16; cpus_addr(phys_cpu_present_map)[0] |= voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 3) << 24; + cpu_possible_map = phys_cpu_present_map; printk("VOYAGER SMP: phys_cpu_present_map = 0x%lx\n", cpus_addr(phys_cpu_present_map)[0]); /* Here we set up the VIC to enable SMP */ /* enable the CPIs by writing the base vector to their register */ diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 06e26f0..2700f01 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -268,7 +268,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base) pkmap_page_table = pte; } -static void __devinit free_new_highpage(struct page *page) +static void __meminit free_new_highpage(struct page *page) { set_page_count(page, 1); __free_page(page); @@ -735,6 +735,30 @@ void free_initmem(void) printk (KERN_INFO "Freeing unused kernel memory: %dk freed\n", (__init_end - __init_begin) >> 10); } +#ifdef CONFIG_DEBUG_RODATA + +extern char __start_rodata, __end_rodata; +void mark_rodata_ro(void) +{ + unsigned long addr = (unsigned long)&__start_rodata; + + for (; addr < (unsigned long)&__end_rodata; addr += PAGE_SIZE) + change_page_attr(virt_to_page(addr), 1, PAGE_KERNEL_RO); + + printk ("Write protecting the kernel read-only data: %luk\n", + (unsigned long)(&__end_rodata - &__start_rodata) >> 10); + + /* + * change_page_attr() requires a global_flush_tlb() call after it. + * We do this after the printk so that if something went wrong in the + * change, the printk gets out at least to give a better debug hint + * of who is the culprit. + */ + global_flush_tlb(); +} +#endif + + #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c index f600fc2..d0cadb3 100644 --- a/arch/i386/mm/pageattr.c +++ b/arch/i386/mm/pageattr.c @@ -13,6 +13,7 @@ #include <asm/processor.h> #include <asm/tlbflush.h> #include <asm/pgalloc.h> +#include <asm/sections.h> static DEFINE_SPINLOCK(cpa_lock); static struct list_head df_list = LIST_HEAD_INIT(df_list); @@ -36,7 +37,8 @@ pte_t *lookup_address(unsigned long address) return pte_offset_kernel(pmd, address); } -static struct page *split_large_page(unsigned long address, pgprot_t prot) +static struct page *split_large_page(unsigned long address, pgprot_t prot, + pgprot_t ref_prot) { int i; unsigned long addr; @@ -54,7 +56,7 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot) pbase = (pte_t *)page_address(base); for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, - addr == address ? prot : PAGE_KERNEL)); + addr == address ? prot : ref_prot)); } return base; } @@ -98,11 +100,18 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) */ static inline void revert_page(struct page *kpte_page, unsigned long address) { - pte_t *linear = (pte_t *) + pgprot_t ref_prot; + pte_t *linear; + + ref_prot = + ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext) + ? PAGE_KERNEL_LARGE_EXEC : PAGE_KERNEL_LARGE; + + linear = (pte_t *) pmd_offset(pud_offset(pgd_offset_k(address), address), address); set_pmd_pte(linear, address, pfn_pte((__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT, - PAGE_KERNEL_LARGE)); + ref_prot)); } static int @@ -123,10 +132,16 @@ __change_page_attr(struct page *page, pgprot_t prot) if ((pte_val(*kpte) & _PAGE_PSE) == 0) { set_pte_atomic(kpte, mk_pte(page, prot)); } else { - struct page *split = split_large_page(address, prot); + pgprot_t ref_prot; + struct page *split; + + ref_prot = + ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext) + ? PAGE_KERNEL_EXEC : PAGE_KERNEL; + split = split_large_page(address, prot, ref_prot); if (!split) return -ENOMEM; - set_pmd_pte(kpte,address,mk_pte(split, PAGE_KERNEL)); + set_pmd_pte(kpte,address,mk_pte(split, ref_prot)); kpte_page = split; } get_page(kpte_page); @@ -207,6 +222,10 @@ void kernel_map_pages(struct page *page, int numpages, int enable) { if (PageHighMem(page)) return; + if (!enable) + mutex_debug_check_no_locks_freed(page_address(page), + numpages * PAGE_SIZE); + /* the return value is ignored - the calls cannot fail, * large pages are disabled at boot time. */ diff --git a/arch/i386/oprofile/backtrace.c b/arch/i386/oprofile/backtrace.c index 21654be..c049ce4 100644 --- a/arch/i386/oprofile/backtrace.c +++ b/arch/i386/oprofile/backtrace.c @@ -20,7 +20,20 @@ struct frame_head { } __attribute__((packed)); static struct frame_head * -dump_backtrace(struct frame_head * head) +dump_kernel_backtrace(struct frame_head * head) +{ + oprofile_add_trace(head->ret); + + /* frame pointers should strictly progress back up the stack + * (towards higher addresses) */ + if (head >= head->ebp) + return NULL; + + return head->ebp; +} + +static struct frame_head * +dump_user_backtrace(struct frame_head * head) { struct frame_head bufhead[2]; @@ -49,7 +62,9 @@ dump_backtrace(struct frame_head * head) * | stack | * --------------- saved regs->ebp value if valid (frame_head address) * . . - * --------------- struct pt_regs stored on stack (struct pt_regs *) + * --------------- saved regs->rsp value if x86_64 + * | | + * --------------- struct pt_regs * stored on stack if 32-bit * | | * . . * | | @@ -57,13 +72,26 @@ dump_backtrace(struct frame_head * head) * | | * | | \/ Lower addresses * - * Thus, &pt_regs <-> stack base restricts the valid(ish) ebp values + * Thus, regs (or regs->rsp for x86_64) <-> stack base restricts the + * valid(ish) ebp values. Note: (1) for x86_64, NMI and several other + * exceptions use special stacks, maintained by the interrupt stack table + * (IST). These stacks are set up in trap_init() in + * arch/x86_64/kernel/traps.c. Thus, for x86_64, regs now does not point + * to the kernel stack; instead, it points to some location on the NMI + * stack. On the other hand, regs->rsp is the stack pointer saved when the + * NMI occurred. (2) For 32-bit, regs->esp is not valid because the + * processor does not save %esp on the kernel stack when interrupts occur + * in the kernel mode. */ #ifdef CONFIG_FRAME_POINTER static int valid_kernel_stack(struct frame_head * head, struct pt_regs * regs) { unsigned long headaddr = (unsigned long)head; +#ifdef CONFIG_X86_64 + unsigned long stack = (unsigned long)regs->rsp; +#else unsigned long stack = (unsigned long)regs; +#endif unsigned long stack_base = (stack & ~(THREAD_SIZE - 1)) + THREAD_SIZE; return headaddr > stack && headaddr < stack_base; @@ -90,10 +118,10 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth) if (!user_mode_vm(regs)) { while (depth-- && valid_kernel_stack(head, regs)) - head = dump_backtrace(head); + head = dump_kernel_backtrace(head); return; } while (depth-- && head) - head = dump_backtrace(head); + head = dump_user_backtrace(head); } diff --git a/arch/i386/pci/acpi.c b/arch/i386/pci/acpi.c index 4c4522b..b33aea8 100644 --- a/arch/i386/pci/acpi.c +++ b/arch/i386/pci/acpi.c @@ -53,7 +53,7 @@ static int __init pci_acpi_init(void) * don't use pci_enable_device(). */ printk(KERN_INFO "PCI: Routing PCI interrupts for all devices because \"pci=routeirq\" specified\n"); - while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) + for_each_pci_dev(dev) acpi_pci_irq_enable(dev); } else printk(KERN_INFO "PCI: If a device doesn't work, try \"pci=routeirq\". If it helps, post a report\n"); diff --git a/arch/i386/pci/fixup.c b/arch/i386/pci/fixup.c index eeb1b1f..83c3645 100644 --- a/arch/i386/pci/fixup.c +++ b/arch/i386/pci/fixup.c @@ -413,6 +413,13 @@ static struct dmi_system_id __devinitdata toshiba_ohci1394_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "PSM4"), }, }, + { + .ident = "Toshiba A40 based laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), + DMI_MATCH(DMI_PRODUCT_VERSION, "PSA40U"), + }, + }, { } }; @@ -442,3 +449,19 @@ static void __devinit pci_post_fixup_toshiba_ohci1394(struct pci_dev *dev) } DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_TI, 0x8032, pci_post_fixup_toshiba_ohci1394); + + +/* + * Prevent the BIOS trapping accesses to the Cyrix CS5530A video device + * configuration space. + */ +static void __devinit pci_early_fixup_cyrix_5530(struct pci_dev *dev) +{ + u8 r; + /* clear 'F4 Video Configuration Trap' bit */ + pci_read_config_byte(dev, 0x42, &r); + r &= 0xfd; + pci_write_config_byte(dev, 0x42, r); +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, + pci_early_fixup_cyrix_5530); diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c index 19e6f48..3ca59ca 100644 --- a/arch/i386/pci/irq.c +++ b/arch/i386/pci/irq.c @@ -78,7 +78,7 @@ static inline struct irq_routing_table * pirq_check_routing_table(u8 *addr) for (i=0; i < rt->size; i++) sum += addr[i]; if (!sum) { - DBG("PCI: Interrupt Routing Table found at 0x%p\n", rt); + DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n", rt); return rt; } return NULL; @@ -128,7 +128,7 @@ static void __init pirq_peer_trick(void) #ifdef DEBUG { int j; - DBG("%02x:%02x slot=%02x", e->bus, e->devfn/8, e->slot); + DBG(KERN_DEBUG "%02x:%02x slot=%02x", e->bus, e->devfn/8, e->slot); for(j=0; j<4; j++) DBG(" %d:%02x/%04x", j, e->irq[j].link, e->irq[j].bitmap); DBG("\n"); @@ -160,10 +160,10 @@ void eisa_set_level_irq(unsigned int irq) return; eisa_irq_mask |= (1 << irq); - printk("PCI: setting IRQ %u as level-triggered\n", irq); + printk(KERN_DEBUG "PCI: setting IRQ %u as level-triggered\n", irq); val = inb(port); if (!(val & mask)) { - DBG(" -> edge"); + DBG(KERN_DEBUG " -> edge"); outb(val | mask, port); } } @@ -539,6 +539,11 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route case PCI_DEVICE_ID_INTEL_ICH7_30: case PCI_DEVICE_ID_INTEL_ICH7_31: case PCI_DEVICE_ID_INTEL_ESB2_0: + case PCI_DEVICE_ID_INTEL_ICH8_0: + case PCI_DEVICE_ID_INTEL_ICH8_1: + case PCI_DEVICE_ID_INTEL_ICH8_2: + case PCI_DEVICE_ID_INTEL_ICH8_3: + case PCI_DEVICE_ID_INTEL_ICH8_4: r->name = "PIIX/ICH"; r->get = pirq_piix_get; r->set = pirq_piix_set; @@ -677,11 +682,11 @@ static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, { case PCI_DEVICE_ID_AL_M1533: case PCI_DEVICE_ID_AL_M1563: - printk("PCI: Using ALI IRQ Router\n"); - r->name = "ALI"; - r->get = pirq_ali_get; - r->set = pirq_ali_set; - return 1; + printk(KERN_DEBUG "PCI: Using ALI IRQ Router\n"); + r->name = "ALI"; + r->get = pirq_ali_get; + r->set = pirq_ali_set; + return 1; } return 0; } @@ -749,12 +754,13 @@ static void __init pirq_find_router(struct irq_router *r) r->get = NULL; r->set = NULL; - DBG("PCI: Attempting to find IRQ router for %04x:%04x\n", + DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n", rt->rtr_vendor, rt->rtr_device); pirq_router_dev = pci_find_slot(rt->rtr_bus, rt->rtr_devfn); if (!pirq_router_dev) { - DBG("PCI: Interrupt router not found at %02x:%02x\n", rt->rtr_bus, rt->rtr_devfn); + DBG(KERN_DEBUG "PCI: Interrupt router not found at " + "%02x:%02x\n", rt->rtr_bus, rt->rtr_devfn); return; } @@ -799,7 +805,7 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) /* Find IRQ pin */ pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); if (!pin) { - DBG(" -> no interrupt pin\n"); + DBG(KERN_DEBUG " -> no interrupt pin\n"); return 0; } pin = pin - 1; @@ -809,16 +815,16 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) if (!pirq_table) return 0; - DBG("IRQ for %s[%c]", pci_name(dev), 'A' + pin); + DBG(KERN_DEBUG "IRQ for %s[%c]", pci_name(dev), 'A' + pin); info = pirq_get_info(dev); if (!info) { - DBG(" -> not found in routing table\n"); + DBG(" -> not found in routing table\n" KERN_DEBUG); return 0; } pirq = info->irq[pin].link; mask = info->irq[pin].bitmap; if (!pirq) { - DBG(" -> not routed\n"); + DBG(" -> not routed\n" KERN_DEBUG); return 0; } DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, pirq_table->exclusive_irqs); @@ -846,9 +852,12 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) * reported by the device if possible. */ newirq = dev->irq; - if (!((1 << newirq) & mask)) { + if (newirq && !((1 << newirq) & mask)) { if ( pci_probe & PCI_USE_PIRQ_MASK) newirq = 0; - else printk(KERN_WARNING "PCI: IRQ %i for device %s doesn't match PIRQ mask - try pci=usepirqmask\n", newirq, pci_name(dev)); + else printk("\n" KERN_WARNING + "PCI: IRQ %i for device %s doesn't match PIRQ mask " + "- try pci=usepirqmask\n" KERN_DEBUG, newirq, + pci_name(dev)); } if (!newirq && assign) { for (i = 0; i < 16; i++) { @@ -923,14 +932,14 @@ static void __init pcibios_fixup_irqs(void) struct pci_dev *dev = NULL; u8 pin; - DBG("PCI: IRQ fixup\n"); + DBG(KERN_DEBUG "PCI: IRQ fixup\n"); while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { /* * If the BIOS has set an out of range IRQ number, just ignore it. * Also keep track of which IRQ's are already in use. */ if (dev->irq >= 16) { - DBG("%s: ignoring bogus IRQ %d\n", pci_name(dev), dev->irq); + DBG(KERN_DEBUG "%s: ignoring bogus IRQ %d\n", pci_name(dev), dev->irq); dev->irq = 0; } /* If the IRQ is already assigned to a PCI device, ignore its ISA use penalty */ @@ -1039,7 +1048,7 @@ static struct dmi_system_id __initdata pciirq_dmi_table[] = { static int __init pcibios_irq_init(void) { - DBG("PCI: IRQ init\n"); + DBG(KERN_DEBUG "PCI: IRQ init\n"); if (pcibios_enable_irq || raw_pci_ops == NULL) return 0; diff --git a/arch/i386/pci/mmconfig.c b/arch/i386/pci/mmconfig.c index 4bb4d4b..0ee8a98 100644 --- a/arch/i386/pci/mmconfig.c +++ b/arch/i386/pci/mmconfig.c @@ -36,8 +36,7 @@ static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn) while (1) { ++cfg_num; if (cfg_num >= pci_mmcfg_config_num) { - /* Not found - fallback to type 1 */ - return 0; + break; } cfg = &pci_mmcfg_config[cfg_num]; if (cfg->pci_segment_group_number != seg) @@ -46,6 +45,18 @@ static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn) (cfg->end_bus_number >= bus)) return cfg->base_address; } + + /* Handle more broken MCFG tables on Asus etc. + They only contain a single entry for bus 0-0. Assume + this applies to all busses. */ + cfg = &pci_mmcfg_config[0]; + if (pci_mmcfg_config_num == 1 && + cfg->pci_segment_group_number == 0 && + (cfg->start_bus_number | cfg->end_bus_number) == 0) + return cfg->base_address; + + /* Fall back to type 0 */ + return 0; } static inline void pci_exp_set_dev_base(unsigned int base, int bus, int devfn) |