diff options
Diffstat (limited to 'arch')
72 files changed, 2175 insertions, 979 deletions
diff --git a/arch/i386/kernel/acpi/earlyquirk.c b/arch/i386/kernel/acpi/earlyquirk.c index f1b9d2a..1ae2aee 100644 --- a/arch/i386/kernel/acpi/earlyquirk.c +++ b/arch/i386/kernel/acpi/earlyquirk.c @@ -7,6 +7,7 @@ #include <linux/pci.h> #include <asm/pci-direct.h> #include <asm/acpi.h> +#include <asm/apic.h> static int __init check_bridge(int vendor, int device) { @@ -15,6 +16,15 @@ static int __init check_bridge(int vendor, int device) if (vendor == PCI_VENDOR_ID_NVIDIA) { acpi_skip_timer_override = 1; } +#ifdef CONFIG_X86_LOCAL_APIC + /* + * ATI IXP chipsets get double timer interrupts. + * For now just do this for all ATI chipsets. + * FIXME: this needs to be checked for the non ACPI case too. + */ + if (vendor == PCI_VENDOR_ID_ATI) + disable_timer_pin_1 = 1; +#endif return 0; } diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 3aad038..9e24f7b 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -319,7 +319,7 @@ work_notifysig: # deal with pending signals and # vm86-space xorl %edx, %edx call do_notify_resume - jmp restore_all + jmp resume_userspace ALIGN work_notifysig_v86: @@ -329,7 +329,7 @@ work_notifysig_v86: movl %eax, %esp xorl %edx, %edx call do_notify_resume - jmp restore_all + jmp resume_userspace # perform syscall exit tracing ALIGN diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 35d3ce2..378313b 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -60,6 +60,8 @@ int sis_apic_bug = -1; */ int nr_ioapic_registers[MAX_IO_APICS]; +int disable_timer_pin_1 __initdata; + /* * Rough estimation of how many shared IRQs there are, can * be changed anytime. @@ -2211,6 +2213,8 @@ static inline void check_timer(void) setup_nmi(); enable_8259A_irq(0); } + if (disable_timer_pin_1 > 0) + clear_IO_APIC_pin(0, pin1); return; } clear_IO_APIC_pin(0, pin1); diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index f3d8084..dc39ca6 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -851,6 +851,11 @@ static void __init parse_cmdline_early (char ** cmdline_p) #endif #ifdef CONFIG_X86_LOCAL_APIC + if (!memcmp(from, "disable_timer_pin_1", 19)) + disable_timer_pin_1 = 1; + if (!memcmp(from, "enable_timer_pin_1", 18)) + disable_timer_pin_1 = -1; + /* disable IO-APIC */ else if (!memcmp(from, "noapic", 6)) disable_ioapic_setup(); diff --git a/arch/i386/kernel/srat.c b/arch/i386/kernel/srat.c index 7b3b27d..516bf56 100644 --- a/arch/i386/kernel/srat.c +++ b/arch/i386/kernel/srat.c @@ -213,12 +213,18 @@ static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_c node_end_pfn[nid] = memory_chunk->end_pfn; } +static u8 pxm_to_nid_map[MAX_PXM_DOMAINS];/* _PXM to logical node ID map */ + +int pxm_to_node(int pxm) +{ + return pxm_to_nid_map[pxm]; +} + /* Parse the ACPI Static Resource Affinity Table */ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) { u8 *start, *end, *p; int i, j, nid; - u8 pxm_to_nid_map[MAX_PXM_DOMAINS];/* _PXM to logical node ID map */ u8 nid_to_pxm_map[MAX_NUMNODES];/* logical node ID to _PXM map */ start = (u8 *)(&(sratp->reserved) + 1); /* skip header */ diff --git a/arch/i386/pci/acpi.c b/arch/i386/pci/acpi.c index 42913f4..2941674 100644 --- a/arch/i386/pci/acpi.c +++ b/arch/i386/pci/acpi.c @@ -3,16 +3,31 @@ #include <linux/init.h> #include <linux/irq.h> #include <asm/hw_irq.h> +#include <asm/numa.h> #include "pci.h" struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int domain, int busnum) { + struct pci_bus *bus; + if (domain != 0) { printk(KERN_WARNING "PCI: Multiple domains not supported\n"); return NULL; } - return pcibios_scan_root(busnum); + bus = pcibios_scan_root(busnum); +#ifdef CONFIG_ACPI_NUMA + if (bus != NULL) { + int pxm = acpi_get_pxm(device->handle); + if (pxm >= 0) { + bus->sysdata = (void *)(unsigned long)pxm_to_node(pxm); + printk("bus %d -> pxm %d -> node %ld\n", + busnum, pxm, (long)(bus->sysdata)); + } + } +#endif + + return bus; } extern int pci_routeirq; diff --git a/arch/i386/pci/mmconfig.c b/arch/i386/pci/mmconfig.c index 60f0e7a..dfbf80c 100644 --- a/arch/i386/pci/mmconfig.c +++ b/arch/i386/pci/mmconfig.c @@ -127,13 +127,6 @@ static int __init pci_mmcfg_init(void) (pci_mmcfg_config[0].base_address == 0)) goto out; - /* Kludge for now. Don't use mmconfig on AMD systems because - those have some busses where mmconfig doesn't work, - and we don't parse ACPI MCFG well enough to handle that. - Remove when proper handling is added. */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) - goto out; - printk(KERN_INFO "PCI: Using MMCONFIG\n"); raw_pci_ops = &pci_mmcfg; pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index e29a8a5..3fa67ec 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -2327,7 +2327,7 @@ sys32_sendfile (int out_fd, int in_fd, int __user *offset, unsigned int count) ret = sys_sendfile(out_fd, in_fd, offset ? (off_t __user *) &of : NULL, count); set_fs(old_fs); - if (!ret && offset && put_user(of, offset)) + if (offset && put_user(of, offset)) return -EFAULT; return ret; diff --git a/arch/m68knommu/platform/68360/head-ram.S b/arch/m68knommu/platform/68360/head-ram.S new file mode 100644 index 0000000..a5c639a --- /dev/null +++ b/arch/m68knommu/platform/68360/head-ram.S @@ -0,0 +1,408 @@ +/* arch/m68knommu/platform/68360/head-ram.S + * + * Startup code for Motorola 68360 + * + * Copyright 2001 (C) SED Systems, a Division of Calian Ltd. + * Based on: arch/m68knommu/platform/68328/pilot/crt0_rom.S + * Based on: arch/m68knommu/platform/68360/uCquicc/crt0_rom.S, 2.0.38.1.pre7 + * uClinux Kernel + * Copyright (C) Michael Leslie <mleslie@lineo.com> + * Based on: arch/m68knommu/platform/68EZ328/ucsimm/crt0_rom.S + * Copyright (C) 1998 D. Jeff Dionne <jeff@uclinux.org>, + * + */ +#define ASSEMBLY +#include <linux/config.h> + +.global _stext +.global _start + +.global _rambase +.global __ramvec +.global _ramvec +.global _ramstart +.global _ramend + +.global _quicc_base +.global _periph_base + +#define REGB 0x1000 +#define PEPAR (_dprbase + REGB + 0x0016) +#define GMR (_dprbase + REGB + 0x0040) +#define OR0 (_dprbase + REGB + 0x0054) +#define BR0 (_dprbase + REGB + 0x0050) +#define OR1 (_dprbase + REGB + 0x0064) +#define BR1 (_dprbase + REGB + 0x0060) +#define OR4 (_dprbase + REGB + 0x0094) +#define BR4 (_dprbase + REGB + 0x0090) +#define OR6 (_dprbase + REGB + 0x00b4) +#define BR6 (_dprbase + REGB + 0x00b0) +#define OR7 (_dprbase + REGB + 0x00c4) +#define BR7 (_dprbase + REGB + 0x00c0) + +#define MCR (_dprbase + REGB + 0x0000) +#define AVR (_dprbase + REGB + 0x0008) + +#define SYPCR (_dprbase + REGB + 0x0022) + +#define PLLCR (_dprbase + REGB + 0x0010) +#define CLKOCR (_dprbase + REGB + 0x000C) +#define CDVCR (_dprbase + REGB + 0x0014) + +#define BKAR (_dprbase + REGB + 0x0030) +#define BKCR (_dprbase + REGB + 0x0034) +#define SWIV (_dprbase + REGB + 0x0023) +#define PICR (_dprbase + REGB + 0x0026) +#define PITR (_dprbase + REGB + 0x002A) + +/* Define for all memory configuration */ +#define MCU_SIM_GMR 0x00000000 +#define SIM_OR_MASK 0x0fffffff + +/* Defines for chip select zero - the flash */ +#define SIM_OR0_MASK 0x20000002 +#define SIM_BR0_MASK 0x00000001 + + +/* Defines for chip select one - the RAM */ +#define SIM_OR1_MASK 0x10000000 +#define SIM_BR1_MASK 0x00000001 + +#define MCU_SIM_MBAR_ADRS 0x0003ff00 +#define MCU_SIM_MBAR_BA_MASK 0xfffff000 +#define MCU_SIM_MBAR_AS_MASK 0x00000001 + +#define MCU_SIM_PEPAR 0x00B4 + +#define MCU_DISABLE_INTRPTS 0x2700 +#define MCU_SIM_AVR 0x00 + +#define MCU_SIM_MCR 0x00005cff + +#define MCU_SIM_CLKOCR 0x00 +#define MCU_SIM_PLLCR 0x8000 +#define MCU_SIM_CDVCR 0x0000 + +#define MCU_SIM_SYPCR 0x0000 +#define MCU_SIM_SWIV 0x00 +#define MCU_SIM_PICR 0x0000 +#define MCU_SIM_PITR 0x0000 + + +#include <asm/m68360_regs.h> + + +/* + * By the time this RAM specific code begins to execute, DPRAM + * and DRAM should already be mapped and accessible. + */ + + .text +_start: +_stext: + nop + ori.w #MCU_DISABLE_INTRPTS, %sr /* disable interrupts: */ + /* We should not need to setup the boot stack the reset should do it. */ + movea.l #__ramend, %sp /*set up stack at the end of DRAM:*/ + +set_mbar_register: + moveq.l #0x07, %d1 /* Setup MBAR */ + movec %d1, %dfc + + lea.l MCU_SIM_MBAR_ADRS, %a0 + move.l #_dprbase, %d0 + andi.l #MCU_SIM_MBAR_BA_MASK, %d0 + ori.l #MCU_SIM_MBAR_AS_MASK, %d0 + moves.l %d0, %a0@ + + moveq.l #0x05, %d1 + movec.l %d1, %dfc + + /* Now we can begin to access registers in DPRAM */ + +set_sim_mcr: + /* Set Module Configuration Register */ + move.l #MCU_SIM_MCR, MCR + + /* to do: Determine cause of reset */ + + /* + * configure system clock MC68360 p. 6-40 + * (value +1)*osc/128 = system clock + */ +set_sim_clock: + move.w #MCU_SIM_PLLCR, PLLCR + move.b #MCU_SIM_CLKOCR, CLKOCR + move.w #MCU_SIM_CDVCR, CDVCR + + /* Wait for the PLL to settle */ + move.w #16384, %d0 +pll_settle_wait: + subi.w #1, %d0 + bne pll_settle_wait + + /* Setup the system protection register, and watchdog timer register */ + move.b #MCU_SIM_SWIV, SWIV + move.w #MCU_SIM_PICR, PICR + move.w #MCU_SIM_PITR, PITR + move.w #MCU_SIM_SYPCR, SYPCR + + /* Clear DPRAM - system + parameter */ + movea.l #_dprbase, %a0 + movea.l #_dprbase+0x2000, %a1 + + /* Copy 0 to %a0 until %a0 == %a1 */ +clear_dpram: + movel #0, %a0@+ + cmpal %a0, %a1 + bhi clear_dpram + +configure_memory_controller: + /* Set up Global Memory Register (GMR) */ + move.l #MCU_SIM_GMR, %d0 + move.l %d0, GMR + +configure_chip_select_0: + move.l #__ramend, %d0 + subi.l #__ramstart, %d0 + subq.l #0x01, %d0 + eori.l #SIM_OR_MASK, %d0 + ori.l #SIM_OR0_MASK, %d0 + move.l %d0, OR0 + + move.l #__ramstart, %d0 + ori.l #SIM_BR0_MASK, %d0 + move.l %d0, BR0 + +configure_chip_select_1: + move.l #__rom_end, %d0 + subi.l #__rom_start, %d0 + subq.l #0x01, %d0 + eori.l #SIM_OR_MASK, %d0 + ori.l #SIM_OR1_MASK, %d0 + move.l %d0, OR1 + + move.l #__rom_start, %d0 + ori.l #SIM_BR1_MASK, %d0 + move.l %d0, BR1 + + move.w #MCU_SIM_PEPAR, PEPAR + + /* point to vector table: */ + move.l #_romvec, %a0 + move.l #_ramvec, %a1 +copy_vectors: + move.l %a0@, %d0 + move.l %d0, %a1@ + move.l %a0@, %a1@ + addq.l #0x04, %a0 + addq.l #0x04, %a1 + cmp.l #_start, %a0 + blt copy_vectors + + move.l #_ramvec, %a1 + movec %a1, %vbr + + + /* Copy data segment from ROM to RAM */ + moveal #_stext, %a0 + moveal #_sdata, %a1 + moveal #_edata, %a2 + + /* Copy %a0 to %a1 until %a1 == %a2 */ +LD1: + move.l %a0@, %d0 + addq.l #0x04, %a0 + move.l %d0, %a1@ + addq.l #0x04, %a1 + cmp.l #_edata, %a1 + blt LD1 + + moveal #_sbss, %a0 + moveal #_ebss, %a1 + + /* Copy 0 to %a0 until %a0 == %a1 */ +L1: + movel #0, %a0@+ + cmpal %a0, %a1 + bhi L1 + +load_quicc: + move.l #_dprbase, _quicc_base + +store_ram_size: + /* Set ram size information */ + move.l #_sdata, _rambase + move.l #_ebss, _ramstart + move.l #__ramend, %d0 + sub.l #0x1000, %d0 /* Reserve 4K for stack space.*/ + move.l %d0, _ramend /* Different from __ramend.*/ + +store_flash_size: + /* Set rom size information */ + move.l #__rom_end, %d0 + sub.l #__rom_start, %d0 + move.l %d0, rom_length + + pea 0 + pea env + pea %sp@(4) + pea 0 + + lea init_thread_union, %a2 + lea 0x2000(%a2), %sp + +lp: + jsr start_kernel + +_exit: + jmp _exit + + + .data + .align 4 +env: + .long 0 +_quicc_base: + .long 0 +_periph_base: + .long 0 +_ramvec: + .long 0 +_rambase: + .long 0 +_ramstart: + .long 0 +_ramend: + .long 0 +_dprbase: + .long 0xffffe000 + + .text + + /* + * These are the exception vectors at boot up, they are copied into RAM + * and then overwritten as needed. + */ + +.section ".data.initvect","awx" + .long __ramend /* Reset: Initial Stack Pointer - 0. */ + .long _start /* Reset: Initial Program Counter - 1. */ + .long buserr /* Bus Error - 2. */ + .long trap /* Address Error - 3. */ + .long trap /* Illegal Instruction - 4. */ + .long trap /* Divide by zero - 5. */ + .long trap /* CHK, CHK2 Instructions - 6. */ + .long trap /* TRAPcc, TRAPV Instructions - 7. */ + .long trap /* Privilege Violation - 8. */ + .long trap /* Trace - 9. */ + .long trap /* Line 1010 Emulator - 10. */ + .long trap /* Line 1111 Emualtor - 11. */ + .long trap /* Harware Breakpoint - 12. */ + .long trap /* (Reserved for Coprocessor Protocol Violation)- 13. */ + .long trap /* Format Error - 14. */ + .long trap /* Uninitialized Interrupt - 15. */ + .long trap /* (Unassigned, Reserver) - 16. */ + .long trap /* (Unassigned, Reserver) - 17. */ + .long trap /* (Unassigned, Reserver) - 18. */ + .long trap /* (Unassigned, Reserver) - 19. */ + .long trap /* (Unassigned, Reserver) - 20. */ + .long trap /* (Unassigned, Reserver) - 21. */ + .long trap /* (Unassigned, Reserver) - 22. */ + .long trap /* (Unassigned, Reserver) - 23. */ + .long trap /* Spurious Interrupt - 24. */ + .long trap /* Level 1 Interrupt Autovector - 25. */ + .long trap /* Level 2 Interrupt Autovector - 26. */ + .long trap /* Level 3 Interrupt Autovector - 27. */ + .long trap /* Level 4 Interrupt Autovector - 28. */ + .long trap /* Level 5 Interrupt Autovector - 29. */ + .long trap /* Level 6 Interrupt Autovector - 30. */ + .long trap /* Level 7 Interrupt Autovector - 31. */ + .long system_call /* Trap Instruction Vectors 0 - 32. */ + .long trap /* Trap Instruction Vectors 1 - 33. */ + .long trap /* Trap Instruction Vectors 2 - 34. */ + .long trap /* Trap Instruction Vectors 3 - 35. */ + .long trap /* Trap Instruction Vectors 4 - 36. */ + .long trap /* Trap Instruction Vectors 5 - 37. */ + .long trap /* Trap Instruction Vectors 6 - 38. */ + .long trap /* Trap Instruction Vectors 7 - 39. */ + .long trap /* Trap Instruction Vectors 8 - 40. */ + .long trap /* Trap Instruction Vectors 9 - 41. */ + .long trap /* Trap Instruction Vectors 10 - 42. */ + .long trap /* Trap Instruction Vectors 11 - 43. */ + .long trap /* Trap Instruction Vectors 12 - 44. */ + .long trap /* Trap Instruction Vectors 13 - 45. */ + .long trap /* Trap Instruction Vectors 14 - 46. */ + .long trap /* Trap Instruction Vectors 15 - 47. */ + .long 0 /* (Reserved for Coprocessor) - 48. */ + .long 0 /* (Reserved for Coprocessor) - 49. */ + .long 0 /* (Reserved for Coprocessor) - 50. */ + .long 0 /* (Reserved for Coprocessor) - 51. */ + .long 0 /* (Reserved for Coprocessor) - 52. */ + .long 0 /* (Reserved for Coprocessor) - 53. */ + .long 0 /* (Reserved for Coprocessor) - 54. */ + .long 0 /* (Reserved for Coprocessor) - 55. */ + .long 0 /* (Reserved for Coprocessor) - 56. */ + .long 0 /* (Reserved for Coprocessor) - 57. */ + .long 0 /* (Reserved for Coprocessor) - 58. */ + .long 0 /* (Unassigned, Reserved) - 59. */ + .long 0 /* (Unassigned, Reserved) - 60. */ + .long 0 /* (Unassigned, Reserved) - 61. */ + .long 0 /* (Unassigned, Reserved) - 62. */ + .long 0 /* (Unassigned, Reserved) - 63. */ + /* The assignment of these vectors to the CPM is */ + /* dependent on the configuration of the CPM vba */ + /* fields. */ + .long 0 /* (User-Defined Vectors 1) CPM Error - 64. */ + .long 0 /* (User-Defined Vectors 2) CPM Parallel IO PC11- 65. */ + .long 0 /* (User-Defined Vectors 3) CPM Parallel IO PC10- 66. */ + .long 0 /* (User-Defined Vectors 4) CPM SMC2 / PIP - 67. */ + .long 0 /* (User-Defined Vectors 5) CPM SMC1 - 68. */ + .long 0 /* (User-Defined Vectors 6) CPM SPI - 69. */ + .long 0 /* (User-Defined Vectors 7) CPM Parallel IO PC9 - 70. */ + .long 0 /* (User-Defined Vectors 8) CPM Timer 4 - 71. */ + .long 0 /* (User-Defined Vectors 9) CPM Reserved - 72. */ + .long 0 /* (User-Defined Vectors 10) CPM Parallel IO PC8- 73. */ + .long 0 /* (User-Defined Vectors 11) CPM Parallel IO PC7- 74. */ + .long 0 /* (User-Defined Vectors 12) CPM Parallel IO PC6- 75. */ + .long 0 /* (User-Defined Vectors 13) CPM Timer 3 - 76. */ + .long 0 /* (User-Defined Vectors 14) CPM Reserved - 77. */ + .long 0 /* (User-Defined Vectors 15) CPM Parallel IO PC5- 78. */ + .long 0 /* (User-Defined Vectors 16) CPM Parallel IO PC4- 79. */ + .long 0 /* (User-Defined Vectors 17) CPM Reserved - 80. */ + .long 0 /* (User-Defined Vectors 18) CPM RISC Timer Tbl - 81. */ + .long 0 /* (User-Defined Vectors 19) CPM Timer 2 - 82. */ + .long 0 /* (User-Defined Vectors 21) CPM Reserved - 83. */ + .long 0 /* (User-Defined Vectors 22) CPM IDMA2 - 84. */ + .long 0 /* (User-Defined Vectors 23) CPM IDMA1 - 85. */ + .long 0 /* (User-Defined Vectors 24) CPM SDMA Bus Err - 86. */ + .long 0 /* (User-Defined Vectors 25) CPM Parallel IO PC3- 87. */ + .long 0 /* (User-Defined Vectors 26) CPM Parallel IO PC2- 88. */ + .long 0 /* (User-Defined Vectors 27) CPM Timer 1 - 89. */ + .long 0 /* (User-Defined Vectors 28) CPM Parallel IO PC1- 90. */ + .long 0 /* (User-Defined Vectors 29) CPM SCC 4 - 91. */ + .long 0 /* (User-Defined Vectors 30) CPM SCC 3 - 92. */ + .long 0 /* (User-Defined Vectors 31) CPM SCC 2 - 93. */ + .long 0 /* (User-Defined Vectors 32) CPM SCC 1 - 94. */ + .long 0 /* (User-Defined Vectors 33) CPM Parallel IO PC0- 95. */ + /* I don't think anything uses the vectors after here. */ + .long 0 /* (User-Defined Vectors 34) - 96. */ + .long 0,0,0,0,0 /* (User-Defined Vectors 35 - 39). */ + .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 40 - 49). */ + .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 50 - 59). */ + .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 60 - 69). */ + .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 70 - 79). */ + .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 80 - 89). */ + .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 90 - 99). */ + .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 100 - 109). */ + .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 110 - 119). */ + .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 120 - 129). */ + .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 130 - 139). */ + .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 140 - 149). */ + .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 150 - 159). */ + .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 160 - 169). */ + .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 170 - 179). */ + .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 180 - 189). */ + .long 0,0,0 /* (User-Defined Vectors 190 - 192). */ +.text +ignore: rte diff --git a/arch/m68knommu/platform/68360/head-rom.S b/arch/m68knommu/platform/68360/head-rom.S new file mode 100644 index 0000000..0da357a --- /dev/null +++ b/arch/m68knommu/platform/68360/head-rom.S @@ -0,0 +1,420 @@ +/* arch/m68knommu/platform/68360/head-rom.S + * + * Startup code for Motorola 68360 + * + * Copyright (C) SED Systems, a Division of Calian Ltd. + * Based on: arch/m68knommu/platform/68328/pilot/crt0_rom.S + * Based on: arch/m68knommu/platform/68360/uCquicc/crt0_rom.S, 2.0.38.1.pre7 + * uClinux Kernel + * Copyright (C) Michael Leslie <mleslie@lineo.com> + * Based on: arch/m68knommu/platform/68EZ328/ucsimm/crt0_rom.S + * Copyright (C) 1998 D. Jeff Dionne <jeff@uclinux.org>, + * + */ +#include <linux/config.h> + +.global _stext +.global _sbss +.global _start + +.global _rambase +.global __ramvec +.global _ramvec +.global _ramstart +.global _ramend + +.global _quicc_base +.global _periph_base + +#define REGB 0x1000 +#define PEPAR (_dprbase + REGB + 0x0016) +#define GMR (_dprbase + REGB + 0x0040) +#define OR0 (_dprbase + REGB + 0x0054) +#define BR0 (_dprbase + REGB + 0x0050) + +#define OR1 (_dprbase + REGB + 0x0064) +#define BR1 (_dprbase + REGB + 0x0060) + +#define OR2 (_dprbase + REGB + 0x0074) +#define BR2 (_dprbase + REGB + 0x0070) + +#define OR3 (_dprbase + REGB + 0x0084) +#define BR3 (_dprbase + REGB + 0x0080) + +#define OR4 (_dprbase + REGB + 0x0094) +#define BR4 (_dprbase + REGB + 0x0090) + +#define OR5 (_dprbase + REGB + 0x00A4) +#define BR5 (_dprbase + REGB + 0x00A0) + +#define OR6 (_dprbase + REGB + 0x00b4) +#define BR6 (_dprbase + REGB + 0x00b0) + +#define OR7 (_dprbase + REGB + 0x00c4) +#define BR7 (_dprbase + REGB + 0x00c0) + +#define MCR (_dprbase + REGB + 0x0000) +#define AVR (_dprbase + REGB + 0x0008) + +#define SYPCR (_dprbase + REGB + 0x0022) + +#define PLLCR (_dprbase + REGB + 0x0010) +#define CLKOCR (_dprbase + REGB + 0x000C) +#define CDVCR (_dprbase + REGB + 0x0014) + +#define BKAR (_dprbase + REGB + 0x0030) +#define BKCR (_dprbase + REGB + 0x0034) +#define SWIV (_dprbase + REGB + 0x0023) +#define PICR (_dprbase + REGB + 0x0026) +#define PITR (_dprbase + REGB + 0x002A) + +/* Define for all memory configuration */ +#define MCU_SIM_GMR 0x00000000 +#define SIM_OR_MASK 0x0fffffff + +/* Defines for chip select zero - the flash */ +#define SIM_OR0_MASK 0x20000000 +#define SIM_BR0_MASK 0x00000001 + +/* Defines for chip select one - the RAM */ +#define SIM_OR1_MASK 0x10000000 +#define SIM_BR1_MASK 0x00000001 + +#define MCU_SIM_MBAR_ADRS 0x0003ff00 +#define MCU_SIM_MBAR_BA_MASK 0xfffff000 +#define MCU_SIM_MBAR_AS_MASK 0x00000001 + +#define MCU_SIM_PEPAR 0x00B4 + +#define MCU_DISABLE_INTRPTS 0x2700 +#define MCU_SIM_AVR 0x00 + +#define MCU_SIM_MCR 0x00005cff + +#define MCU_SIM_CLKOCR 0x00 +#define MCU_SIM_PLLCR 0x8000 +#define MCU_SIM_CDVCR 0x0000 + +#define MCU_SIM_SYPCR 0x0000 +#define MCU_SIM_SWIV 0x00 +#define MCU_SIM_PICR 0x0000 +#define MCU_SIM_PITR 0x0000 + + +#include <asm/m68360_regs.h> + + +/* + * By the time this RAM specific code begins to execute, DPRAM + * and DRAM should already be mapped and accessible. + */ + + .text +_start: +_stext: + nop + ori.w #MCU_DISABLE_INTRPTS, %sr /* disable interrupts: */ + /* We should not need to setup the boot stack the reset should do it. */ + movea.l #__ramend, %sp /* set up stack at the end of DRAM:*/ + + +set_mbar_register: + moveq.l #0x07, %d1 /* Setup MBAR */ + movec %d1, %dfc + + lea.l MCU_SIM_MBAR_ADRS, %a0 + move.l #_dprbase, %d0 + andi.l #MCU_SIM_MBAR_BA_MASK, %d0 + ori.l #MCU_SIM_MBAR_AS_MASK, %d0 + moves.l %d0, %a0@ + + moveq.l #0x05, %d1 + movec.l %d1, %dfc + + /* Now we can begin to access registers in DPRAM */ + +set_sim_mcr: + /* Set Module Configuration Register */ + move.l #MCU_SIM_MCR, MCR + + /* to do: Determine cause of reset */ + + /* + * configure system clock MC68360 p. 6-40 + * (value +1)*osc/128 = system clock + * or + * (value + 1)*osc = system clock + * You do not need to divide the oscillator by 128 unless you want to. + */ +set_sim_clock: + move.w #MCU_SIM_PLLCR, PLLCR + move.b #MCU_SIM_CLKOCR, CLKOCR + move.w #MCU_SIM_CDVCR, CDVCR + + /* Wait for the PLL to settle */ + move.w #16384, %d0 +pll_settle_wait: + subi.w #1, %d0 + bne pll_settle_wait + + /* Setup the system protection register, and watchdog timer register */ + move.b #MCU_SIM_SWIV, SWIV + move.w #MCU_SIM_PICR, PICR + move.w #MCU_SIM_PITR, PITR + move.w #MCU_SIM_SYPCR, SYPCR + + /* Clear DPRAM - system + parameter */ + movea.l #_dprbase, %a0 + movea.l #_dprbase+0x2000, %a1 + + /* Copy 0 to %a0 until %a0 == %a1 */ +clear_dpram: + movel #0, %a0@+ + cmpal %a0, %a1 + bhi clear_dpram + +configure_memory_controller: + /* Set up Global Memory Register (GMR) */ + move.l #MCU_SIM_GMR, %d0 + move.l %d0, GMR + +configure_chip_select_0: + move.l #0x00400000, %d0 + subq.l #0x01, %d0 + eori.l #SIM_OR_MASK, %d0 + ori.l #SIM_OR0_MASK, %d0 + move.l %d0, OR0 + + move.l #__rom_start, %d0 + ori.l #SIM_BR0_MASK, %d0 + move.l %d0, BR0 + + move.l #0x0, BR1 + move.l #0x0, BR2 + move.l #0x0, BR3 + move.l #0x0, BR4 + move.l #0x0, BR5 + move.l #0x0, BR6 + move.l #0x0, BR7 + + move.w #MCU_SIM_PEPAR, PEPAR + + /* point to vector table: */ + move.l #_romvec, %a0 + move.l #_ramvec, %a1 +copy_vectors: + move.l %a0@, %d0 + move.l %d0, %a1@ + move.l %a0@, %a1@ + addq.l #0x04, %a0 + addq.l #0x04, %a1 + cmp.l #_start, %a0 + blt copy_vectors + + move.l #_ramvec, %a1 + movec %a1, %vbr + + + /* Copy data segment from ROM to RAM */ + moveal #_etext, %a0 + moveal #_sdata, %a1 + moveal #_edata, %a2 + + /* Copy %a0 to %a1 until %a1 == %a2 */ +LD1: + move.l %a0@, %d0 + addq.l #0x04, %a0 + move.l %d0, %a1@ + addq.l #0x04, %a1 + cmp.l #_edata, %a1 + blt LD1 + + moveal #_sbss, %a0 + moveal #_ebss, %a1 + + /* Copy 0 to %a0 until %a0 == %a1 */ +L1: + movel #0, %a0@+ + cmpal %a0, %a1 + bhi L1 + +load_quicc: + move.l #_dprbase, _quicc_base + +store_ram_size: + /* Set ram size information */ + move.l #_sdata, _rambase + move.l #_ebss, _ramstart + move.l #__ramend, %d0 + sub.l #0x1000, %d0 /* Reserve 4K for stack space.*/ + move.l %d0, _ramend /* Different from __ramend.*/ + +store_flash_size: + /* Set rom size information */ + move.l #__rom_end, %d0 + sub.l #__rom_start, %d0 + move.l %d0, rom_length + + pea 0 + pea env + pea %sp@(4) + pea 0 + + lea init_thread_union, %a2 + lea 0x2000(%a2), %sp + +lp: + jsr start_kernel + +_exit: + jmp _exit + + + .data + .align 4 +env: + .long 0 +_quicc_base: + .long 0 +_periph_base: + .long 0 +_ramvec: + .long 0 +_rambase: + .long 0 +_ramstart: + .long 0 +_ramend: + .long 0 +_dprbase: + .long 0xffffe000 + + + .text + + /* + * These are the exception vectors at boot up, they are copied into RAM + * and then overwritten as needed. + */ + +.section ".data.initvect","awx" + .long __ramend /* Reset: Initial Stack Pointer - 0. */ + .long _start /* Reset: Initial Program Counter - 1. */ + .long buserr /* Bus Error - 2. */ + .long trap /* Address Error - 3. */ + .long trap /* Illegal Instruction - 4. */ + .long trap /* Divide by zero - 5. */ + .long trap /* CHK, CHK2 Instructions - 6. */ + .long trap /* TRAPcc, TRAPV Instructions - 7. */ + .long trap /* Privilege Violation - 8. */ + .long trap /* Trace - 9. */ + .long trap /* Line 1010 Emulator - 10. */ + .long trap /* Line 1111 Emualtor - 11. */ + .long trap /* Harware Breakpoint - 12. */ + .long trap /* (Reserved for Coprocessor Protocol Violation)- 13. */ + .long trap /* Format Error - 14. */ + .long trap /* Uninitialized Interrupt - 15. */ + .long trap /* (Unassigned, Reserver) - 16. */ + .long trap /* (Unassigned, Reserver) - 17. */ + .long trap /* (Unassigned, Reserver) - 18. */ + .long trap /* (Unassigned, Reserver) - 19. */ + .long trap /* (Unassigned, Reserver) - 20. */ + .long trap /* (Unassigned, Reserver) - 21. */ + .long trap /* (Unassigned, Reserver) - 22. */ + .long trap /* (Unassigned, Reserver) - 23. */ + .long trap /* Spurious Interrupt - 24. */ + .long trap /* Level 1 Interrupt Autovector - 25. */ + .long trap /* Level 2 Interrupt Autovector - 26. */ + .long trap /* Level 3 Interrupt Autovector - 27. */ + .long trap /* Level 4 Interrupt Autovector - 28. */ + .long trap /* Level 5 Interrupt Autovector - 29. */ + .long trap /* Level 6 Interrupt Autovector - 30. */ + .long trap /* Level 7 Interrupt Autovector - 31. */ + .long system_call /* Trap Instruction Vectors 0 - 32. */ + .long trap /* Trap Instruction Vectors 1 - 33. */ + .long trap /* Trap Instruction Vectors 2 - 34. */ + .long trap /* Trap Instruction Vectors 3 - 35. */ + .long trap /* Trap Instruction Vectors 4 - 36. */ + .long trap /* Trap Instruction Vectors 5 - 37. */ + .long trap /* Trap Instruction Vectors 6 - 38. */ + .long trap /* Trap Instruction Vectors 7 - 39. */ + .long trap /* Trap Instruction Vectors 8 - 40. */ + .long trap /* Trap Instruction Vectors 9 - 41. */ + .long trap /* Trap Instruction Vectors 10 - 42. */ + .long trap /* Trap Instruction Vectors 11 - 43. */ + .long trap /* Trap Instruction Vectors 12 - 44. */ + .long trap /* Trap Instruction Vectors 13 - 45. */ + .long trap /* Trap Instruction Vectors 14 - 46. */ + .long trap /* Trap Instruction Vectors 15 - 47. */ + .long 0 /* (Reserved for Coprocessor) - 48. */ + .long 0 /* (Reserved for Coprocessor) - 49. */ + .long 0 /* (Reserved for Coprocessor) - 50. */ + .long 0 /* (Reserved for Coprocessor) - 51. */ + .long 0 /* (Reserved for Coprocessor) - 52. */ + .long 0 /* (Reserved for Coprocessor) - 53. */ + .long 0 /* (Reserved for Coprocessor) - 54. */ + .long 0 /* (Reserved for Coprocessor) - 55. */ + .long 0 /* (Reserved for Coprocessor) - 56. */ + .long 0 /* (Reserved for Coprocessor) - 57. */ + .long 0 /* (Reserved for Coprocessor) - 58. */ + .long 0 /* (Unassigned, Reserved) - 59. */ + .long 0 /* (Unassigned, Reserved) - 60. */ + .long 0 /* (Unassigned, Reserved) - 61. */ + .long 0 /* (Unassigned, Reserved) - 62. */ + .long 0 /* (Unassigned, Reserved) - 63. */ + /* The assignment of these vectors to the CPM is */ + /* dependent on the configuration of the CPM vba */ + /* fields. */ + .long 0 /* (User-Defined Vectors 1) CPM Error - 64. */ + .long 0 /* (User-Defined Vectors 2) CPM Parallel IO PC11- 65. */ + .long 0 /* (User-Defined Vectors 3) CPM Parallel IO PC10- 66. */ + .long 0 /* (User-Defined Vectors 4) CPM SMC2 / PIP - 67. */ + .long 0 /* (User-Defined Vectors 5) CPM SMC1 - 68. */ + .long 0 /* (User-Defined Vectors 6) CPM SPI - 69. */ + .long 0 /* (User-Defined Vectors 7) CPM Parallel IO PC9 - 70. */ + .long 0 /* (User-Defined Vectors 8) CPM Timer 4 - 71. */ + .long 0 /* (User-Defined Vectors 9) CPM Reserved - 72. */ + .long 0 /* (User-Defined Vectors 10) CPM Parallel IO PC8- 73. */ + .long 0 /* (User-Defined Vectors 11) CPM Parallel IO PC7- 74. */ + .long 0 /* (User-Defined Vectors 12) CPM Parallel IO PC6- 75. */ + .long 0 /* (User-Defined Vectors 13) CPM Timer 3 - 76. */ + .long 0 /* (User-Defined Vectors 14) CPM Reserved - 77. */ + .long 0 /* (User-Defined Vectors 15) CPM Parallel IO PC5- 78. */ + .long 0 /* (User-Defined Vectors 16) CPM Parallel IO PC4- 79. */ + .long 0 /* (User-Defined Vectors 17) CPM Reserved - 80. */ + .long 0 /* (User-Defined Vectors 18) CPM RISC Timer Tbl - 81. */ + .long 0 /* (User-Defined Vectors 19) CPM Timer 2 - 82. */ + .long 0 /* (User-Defined Vectors 21) CPM Reserved - 83. */ + .long 0 /* (User-Defined Vectors 22) CPM IDMA2 - 84. */ + .long 0 /* (User-Defined Vectors 23) CPM IDMA1 - 85. */ + .long 0 /* (User-Defined Vectors 24) CPM SDMA Bus Err - 86. */ + .long 0 /* (User-Defined Vectors 25) CPM Parallel IO PC3- 87. */ + .long 0 /* (User-Defined Vectors 26) CPM Parallel IO PC2- 88. */ + .long 0 /* (User-Defined Vectors 27) CPM Timer 1 - 89. */ + .long 0 /* (User-Defined Vectors 28) CPM Parallel IO PC1- 90. */ + .long 0 /* (User-Defined Vectors 29) CPM SCC 4 - 91. */ + .long 0 /* (User-Defined Vectors 30) CPM SCC 3 - 92. */ + .long 0 /* (User-Defined Vectors 31) CPM SCC 2 - 93. */ + .long 0 /* (User-Defined Vectors 32) CPM SCC 1 - 94. */ + .long 0 /* (User-Defined Vectors 33) CPM Parallel IO PC0- 95. */ + /* I don't think anything uses the vectors after here. */ + .long 0 /* (User-Defined Vectors 34) - 96. */ + .long 0,0,0,0,0 /* (User-Defined Vectors 35 - 39). */ + .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 40 - 49). */ + .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 50 - 59). */ + .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 60 - 69). */ + .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 70 - 79). */ + .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 80 - 89). */ + .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 90 - 99). */ + .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 100 - 109). */ + .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 110 - 119). */ + .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 120 - 129). */ + .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 130 - 139). */ + .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 140 - 149). */ + .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 150 - 159). */ + .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 160 - 169). */ + .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 170 - 179). */ + .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 180 - 189). */ + .long 0,0,0 /* (User-Defined Vectors 190 - 192). */ +.text +ignore: rte diff --git a/arch/ppc64/kernel/iSeries_pci.c b/arch/ppc64/kernel/iSeries_pci.c index 356e4fd..fbc273c 100644 --- a/arch/ppc64/kernel/iSeries_pci.c +++ b/arch/ppc64/kernel/iSeries_pci.c @@ -252,7 +252,7 @@ unsigned long __init find_and_init_phbs(void) phb = (struct pci_controller *)kmalloc(sizeof(struct pci_controller), GFP_KERNEL); if (phb == NULL) return -ENOMEM; - pci_setup_pci_controller(phb); + pci_setup_pci_controller(phb); phb->pci_mem_offset = phb->local_number = bus; phb->first_busno = bus; diff --git a/arch/ppc64/kernel/maple_pci.c b/arch/ppc64/kernel/maple_pci.c index 5a8b4d8..1d297e0 100644 --- a/arch/ppc64/kernel/maple_pci.c +++ b/arch/ppc64/kernel/maple_pci.c @@ -283,7 +283,7 @@ static void __init setup_u3_agp(struct pci_controller* hose) * the reg address cell, we shall fix that by killing struct * reg_property and using some accessor functions instead */ - hose->first_busno = 0xf0; + hose->first_busno = 0xf0; hose->last_busno = 0xff; hose->ops = &u3_agp_pci_ops; hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000); @@ -315,24 +315,24 @@ static int __init add_bridge(struct device_node *dev) char* disp_name; int *bus_range; int primary = 1; - struct property *of_prop; + struct property *of_prop; DBG("Adding PCI host bridge %s\n", dev->full_name); - bus_range = (int *) get_property(dev, "bus-range", &len); - if (bus_range == NULL || len < 2 * sizeof(int)) { - printk(KERN_WARNING "Can't get bus-range for %s, assume bus 0\n", - dev->full_name); - } + bus_range = (int *) get_property(dev, "bus-range", &len); + if (bus_range == NULL || len < 2 * sizeof(int)) { + printk(KERN_WARNING "Can't get bus-range for %s, assume bus 0\n", + dev->full_name); + } hose = alloc_bootmem(sizeof(struct pci_controller)); if (hose == NULL) return -ENOMEM; - pci_setup_pci_controller(hose); + pci_setup_pci_controller(hose); - hose->arch_data = dev; - hose->first_busno = bus_range ? bus_range[0] : 0; - hose->last_busno = bus_range ? bus_range[1] : 0xff; + hose->arch_data = dev; + hose->first_busno = bus_range ? bus_range[0] : 0; + hose->last_busno = bus_range ? bus_range[1] : 0xff; of_prop = alloc_bootmem(sizeof(struct property) + sizeof(hose->global_number)); @@ -346,25 +346,25 @@ static int __init add_bridge(struct device_node *dev) } disp_name = NULL; - if (device_is_compatible(dev, "u3-agp")) { - setup_u3_agp(hose); - disp_name = "U3-AGP"; - primary = 0; - } else if (device_is_compatible(dev, "u3-ht")) { - setup_u3_ht(hose); - disp_name = "U3-HT"; - primary = 1; - } - printk(KERN_INFO "Found %s PCI host bridge. Firmware bus number: %d->%d\n", - disp_name, hose->first_busno, hose->last_busno); - - /* Interpret the "ranges" property */ - /* This also maps the I/O region and sets isa_io/mem_base */ - pci_process_bridge_OF_ranges(hose, dev); + if (device_is_compatible(dev, "u3-agp")) { + setup_u3_agp(hose); + disp_name = "U3-AGP"; + primary = 0; + } else if (device_is_compatible(dev, "u3-ht")) { + setup_u3_ht(hose); + disp_name = "U3-HT"; + primary = 1; + } + printk(KERN_INFO "Found %s PCI host bridge. Firmware bus number: %d->%d\n", + disp_name, hose->first_busno, hose->last_busno); + + /* Interpret the "ranges" property */ + /* This also maps the I/O region and sets isa_io/mem_base */ + pci_process_bridge_OF_ranges(hose, dev); pci_setup_phb_io(hose, primary); - /* Fixup "bus-range" OF property */ - fixup_bus_range(dev); + /* Fixup "bus-range" OF property */ + fixup_bus_range(dev); return 0; } diff --git a/arch/ppc64/kernel/pSeries_setup.c b/arch/ppc64/kernel/pSeries_setup.c index 9490b6c..bfadccc 100644 --- a/arch/ppc64/kernel/pSeries_setup.c +++ b/arch/ppc64/kernel/pSeries_setup.c @@ -590,6 +590,13 @@ static int pseries_shared_idle(void) return 0; } +static int pSeries_pci_probe_mode(struct pci_bus *bus) +{ + if (systemcfg->platform & PLATFORM_LPAR) + return PCI_PROBE_DEVTREE; + return PCI_PROBE_NORMAL; +} + struct machdep_calls __initdata pSeries_md = { .probe = pSeries_probe, .setup_arch = pSeries_setup_arch, @@ -597,6 +604,7 @@ struct machdep_calls __initdata pSeries_md = { .get_cpuinfo = pSeries_get_cpuinfo, .log_error = pSeries_log_error, .pcibios_fixup = pSeries_final_fixup, + .pci_probe_mode = pSeries_pci_probe_mode, .irq_bus_setup = pSeries_irq_bus_setup, .restart = rtas_restart, .power_off = rtas_power_off, diff --git a/arch/ppc64/kernel/pSeries_smp.c b/arch/ppc64/kernel/pSeries_smp.c index 79c7f32..d2c7e2c 100644 --- a/arch/ppc64/kernel/pSeries_smp.c +++ b/arch/ppc64/kernel/pSeries_smp.c @@ -272,6 +272,7 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu) unsigned long start_here = __pa((u32)*((unsigned long *) pSeries_secondary_smp_init)); unsigned int pcpu; + int start_cpu; if (cpu_isset(lcpu, of_spin_map)) /* Already started by OF and sitting in spin loop */ @@ -282,12 +283,20 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu) /* Fixup atomic count: it exited inside IRQ handler. */ paca[lcpu].__current->thread_info->preempt_count = 0; - status = rtas_call(rtas_token("start-cpu"), 3, 1, NULL, - pcpu, start_here, lcpu); + /* + * If the RTAS start-cpu token does not exist then presume the + * cpu is already spinning. + */ + start_cpu = rtas_token("start-cpu"); + if (start_cpu == RTAS_UNKNOWN_SERVICE) + return 1; + + status = rtas_call(start_cpu, 3, 1, NULL, pcpu, start_here, lcpu); if (status != 0) { printk(KERN_ERR "start-cpu failed: %i\n", status); return 0; } + return 1; } diff --git a/arch/ppc64/kernel/pci.c b/arch/ppc64/kernel/pci.c index 8447dcc..861138a 100644 --- a/arch/ppc64/kernel/pci.c +++ b/arch/ppc64/kernel/pci.c @@ -51,6 +51,10 @@ unsigned long io_page_mask; EXPORT_SYMBOL(io_page_mask); +#ifdef CONFIG_PPC_MULTIPLATFORM +static void fixup_resource(struct resource *res, struct pci_dev *dev); +static void do_bus_setup(struct pci_bus *bus); +#endif unsigned int pcibios_assign_all_busses(void) { @@ -225,10 +229,287 @@ static void __init pcibios_claim_of_setup(void) } #endif +#ifdef CONFIG_PPC_MULTIPLATFORM +static u32 get_int_prop(struct device_node *np, const char *name, u32 def) +{ + u32 *prop; + int len; + + prop = (u32 *) get_property(np, name, &len); + if (prop && len >= 4) + return *prop; + return def; +} + +static unsigned int pci_parse_of_flags(u32 addr0) +{ + unsigned int flags = 0; + + if (addr0 & 0x02000000) { + flags |= IORESOURCE_MEM; + if (addr0 & 0x40000000) + flags |= IORESOURCE_PREFETCH; + } else if (addr0 & 0x01000000) + flags |= IORESOURCE_IO; + return flags; +} + +#define GET_64BIT(prop, i) ((((u64) (prop)[(i)]) << 32) | (prop)[(i)+1]) + +static void pci_parse_of_addrs(struct device_node *node, struct pci_dev *dev) +{ + u64 base, size; + unsigned int flags; + struct resource *res; + u32 *addrs, i; + int proplen; + + addrs = (u32 *) get_property(node, "assigned-addresses", &proplen); + if (!addrs) + return; + for (; proplen >= 20; proplen -= 20, addrs += 5) { + flags = pci_parse_of_flags(addrs[0]); + if (!flags) + continue; + base = GET_64BIT(addrs, 1); + size = GET_64BIT(addrs, 3); + if (!size) + continue; + i = addrs[0] & 0xff; + if (PCI_BASE_ADDRESS_0 <= i && i <= PCI_BASE_ADDRESS_5) { + res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2]; + } else if (i == dev->rom_base_reg) { + res = &dev->resource[PCI_ROM_RESOURCE]; + flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE; + } else { + printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i); + continue; + } + res->start = base; + res->end = base + size - 1; + res->flags = flags; + res->name = pci_name(dev); + fixup_resource(res, dev); + } +} + +static struct pci_dev *of_create_pci_dev(struct device_node *node, + struct pci_bus *bus, int devfn) +{ + struct pci_dev *dev; + const char *type; + + dev = kmalloc(sizeof(struct pci_dev), GFP_KERNEL); + if (!dev) + return NULL; + type = get_property(node, "device_type", NULL); + if (type == NULL) + type = ""; + + memset(dev, 0, sizeof(struct pci_dev)); + dev->bus = bus; + dev->sysdata = node; + dev->dev.parent = bus->bridge; + dev->dev.bus = &pci_bus_type; + dev->devfn = devfn; + dev->multifunction = 0; /* maybe a lie? */ + + dev->vendor = get_int_prop(node, "vendor-id", 0xffff); + dev->device = get_int_prop(node, "device-id", 0xffff); + dev->subsystem_vendor = get_int_prop(node, "subsystem-vendor-id", 0); + dev->subsystem_device = get_int_prop(node, "subsystem-id", 0); + + dev->cfg_size = 256; /*pci_cfg_space_size(dev);*/ + + sprintf(pci_name(dev), "%04x:%02x:%02x.%d", pci_domain_nr(bus), + dev->bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn)); + dev->class = get_int_prop(node, "class-code", 0); + + dev->current_state = 4; /* unknown power state */ + + if (!strcmp(type, "pci")) { + /* a PCI-PCI bridge */ + dev->hdr_type = PCI_HEADER_TYPE_BRIDGE; + dev->rom_base_reg = PCI_ROM_ADDRESS1; + } else if (!strcmp(type, "cardbus")) { + dev->hdr_type = PCI_HEADER_TYPE_CARDBUS; + } else { + dev->hdr_type = PCI_HEADER_TYPE_NORMAL; + dev->rom_base_reg = PCI_ROM_ADDRESS; + dev->irq = NO_IRQ; + if (node->n_intrs > 0) { + dev->irq = node->intrs[0].line; + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, + dev->irq); + } + } + + pci_parse_of_addrs(node, dev); + + pci_device_add(dev, bus); + + /* XXX pci_scan_msi_device(dev); */ + + return dev; +} + +static void of_scan_pci_bridge(struct device_node *node, struct pci_dev *dev); + +static void __devinit of_scan_bus(struct device_node *node, + struct pci_bus *bus) +{ + struct device_node *child = NULL; + u32 *reg; + int reglen, devfn; + struct pci_dev *dev; + + while ((child = of_get_next_child(node, child)) != NULL) { + reg = (u32 *) get_property(child, "reg", ®len); + if (reg == NULL || reglen < 20) + continue; + devfn = (reg[0] >> 8) & 0xff; + /* create a new pci_dev for this device */ + dev = of_create_pci_dev(child, bus, devfn); + if (!dev) + continue; + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || + dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) + of_scan_pci_bridge(child, dev); + } + + do_bus_setup(bus); +} + +static void __devinit of_scan_pci_bridge(struct device_node *node, + struct pci_dev *dev) +{ + struct pci_bus *bus; + u32 *busrange, *ranges; + int len, i, mode; + struct resource *res; + unsigned int flags; + u64 size; + + /* parse bus-range property */ + busrange = (u32 *) get_property(node, "bus-range", &len); + if (busrange == NULL || len != 8) { + printk(KERN_ERR "Can't get bus-range for PCI-PCI bridge %s\n", + node->full_name); + return; + } + ranges = (u32 *) get_property(node, "ranges", &len); + if (ranges == NULL) { + printk(KERN_ERR "Can't get ranges for PCI-PCI bridge %s\n", + node->full_name); + return; + } + + bus = pci_add_new_bus(dev->bus, dev, busrange[0]); + if (!bus) { + printk(KERN_ERR "Failed to create pci bus for %s\n", + node->full_name); + return; + } + + bus->primary = dev->bus->number; + bus->subordinate = busrange[1]; + bus->bridge_ctl = 0; + bus->sysdata = node; + + /* parse ranges property */ + /* PCI #address-cells == 3 and #size-cells == 2 always */ + res = &dev->resource[PCI_BRIDGE_RESOURCES]; + for (i = 0; i < PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES; ++i) { + res->flags = 0; + bus->resource[i] = res; + ++res; + } + i = 1; + for (; len >= 32; len -= 32, ranges += 8) { + flags = pci_parse_of_flags(ranges[0]); + size = GET_64BIT(ranges, 6); + if (flags == 0 || size == 0) + continue; + if (flags & IORESOURCE_IO) { + res = bus->resource[0]; + if (res->flags) { + printk(KERN_ERR "PCI: ignoring extra I/O range" + " for bridge %s\n", node->full_name); + continue; + } + } else { + if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) { + printk(KERN_ERR "PCI: too many memory ranges" + " for bridge %s\n", node->full_name); + continue; + } + res = bus->resource[i]; + ++i; + } + res->start = GET_64BIT(ranges, 1); + res->end = res->start + size - 1; + res->flags = flags; + fixup_resource(res, dev); + } + sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus), + bus->number); + + mode = PCI_PROBE_NORMAL; + if (ppc_md.pci_probe_mode) + mode = ppc_md.pci_probe_mode(bus); + if (mode == PCI_PROBE_DEVTREE) + of_scan_bus(node, bus); + else if (mode == PCI_PROBE_NORMAL) + pci_scan_child_bus(bus); +} +#endif /* CONFIG_PPC_MULTIPLATFORM */ + +static void __devinit scan_phb(struct pci_controller *hose) +{ + struct pci_bus *bus; + struct device_node *node = hose->arch_data; + int i, mode; + struct resource *res; + + bus = pci_create_bus(NULL, hose->first_busno, hose->ops, node); + if (bus == NULL) { + printk(KERN_ERR "Failed to create bus for PCI domain %04x\n", + hose->global_number); + return; + } + bus->secondary = hose->first_busno; + hose->bus = bus; + + bus->resource[0] = res = &hose->io_resource; + if (res->flags && request_resource(&ioport_resource, res)) + printk(KERN_ERR "Failed to request PCI IO region " + "on PCI domain %04x\n", hose->global_number); + + for (i = 0; i < 3; ++i) { + res = &hose->mem_resources[i]; + bus->resource[i+1] = res; + if (res->flags && request_resource(&iomem_resource, res)) + printk(KERN_ERR "Failed to request PCI memory region " + "on PCI domain %04x\n", hose->global_number); + } + + mode = PCI_PROBE_NORMAL; +#ifdef CONFIG_PPC_MULTIPLATFORM + if (ppc_md.pci_probe_mode) + mode = ppc_md.pci_probe_mode(bus); + if (mode == PCI_PROBE_DEVTREE) { + bus->subordinate = hose->last_busno; + of_scan_bus(node, bus); + } +#endif /* CONFIG_PPC_MULTIPLATFORM */ + if (mode == PCI_PROBE_NORMAL) + hose->last_busno = bus->subordinate = pci_scan_child_bus(bus); + pci_bus_add_devices(bus); +} + static int __init pcibios_init(void) { struct pci_controller *hose, *tmp; - struct pci_bus *bus; /* For now, override phys_mem_access_prot. If we need it, * later, we may move that initialization to each ppc_md @@ -242,13 +523,8 @@ static int __init pcibios_init(void) printk("PCI: Probing PCI hardware\n"); /* Scan all of the recorded PCI controllers. */ - list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { - hose->last_busno = 0xff; - bus = pci_scan_bus(hose->first_busno, hose->ops, - hose->arch_data); - hose->bus = bus; - hose->last_busno = bus->subordinate; - } + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) + scan_phb(hose); #ifndef CONFIG_PPC_ISERIES if (pci_probe_only) @@ -820,120 +1096,89 @@ void phbs_remap_io(void) /* * ppc64 can have multifunction devices that do not respond to function 0. * In this case we must scan all functions. + * XXX this can go now, we use the OF device tree in all the + * cases that caused problems. -- paulus */ int pcibios_scan_all_fns(struct pci_bus *bus, int devfn) { - struct device_node *busdn, *dn; - - if (bus->self) - busdn = pci_device_to_OF_node(bus->self); - else - busdn = bus->sysdata; /* must be a phb */ - - if (busdn == NULL) - return 0; - - /* - * Check to see if there is any of the 8 functions are in the - * device tree. If they are then we need to scan all the - * functions of this slot. - */ - for (dn = busdn->child; dn; dn = dn->sibling) { - struct pci_dn *pdn = dn->data; - if (pdn && (pdn->devfn >> 3) == (devfn >> 3)) - return 1; - } - return 0; } +static void __devinit fixup_resource(struct resource *res, struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + unsigned long start, end, mask, offset; + + if (res->flags & IORESOURCE_IO) { + offset = (unsigned long)hose->io_base_virt - pci_io_base; + + start = res->start += offset; + end = res->end += offset; + + /* Need to allow IO access to pages that are in the + ISA range */ + if (start < MAX_ISA_PORT) { + if (end > MAX_ISA_PORT) + end = MAX_ISA_PORT; + + start >>= PAGE_SHIFT; + end >>= PAGE_SHIFT; + + /* get the range of pages for the map */ + mask = ((1 << (end+1)) - 1) ^ ((1 << start) - 1); + io_page_mask |= mask; + } + } else if (res->flags & IORESOURCE_MEM) { + res->start += hose->pci_mem_offset; + res->end += hose->pci_mem_offset; + } +} void __devinit pcibios_fixup_device_resources(struct pci_dev *dev, - struct pci_bus *bus) + struct pci_bus *bus) { /* Update device resources. */ - struct pci_controller *hose = pci_bus_to_host(bus); int i; - for (i = 0; i < PCI_NUM_RESOURCES; i++) { - if (dev->resource[i].flags & IORESOURCE_IO) { - unsigned long offset = (unsigned long)hose->io_base_virt - - pci_io_base; - unsigned long start, end, mask; - - start = dev->resource[i].start += offset; - end = dev->resource[i].end += offset; - - /* Need to allow IO access to pages that are in the - ISA range */ - if (start < MAX_ISA_PORT) { - if (end > MAX_ISA_PORT) - end = MAX_ISA_PORT; - - start >>= PAGE_SHIFT; - end >>= PAGE_SHIFT; - - /* get the range of pages for the map */ - mask = ((1 << (end+1))-1) ^ ((1 << start)-1); - io_page_mask |= mask; - } - } - else if (dev->resource[i].flags & IORESOURCE_MEM) { - dev->resource[i].start += hose->pci_mem_offset; - dev->resource[i].end += hose->pci_mem_offset; - } - } + for (i = 0; i < PCI_NUM_RESOURCES; i++) + if (dev->resource[i].flags) + fixup_resource(&dev->resource[i], dev); } EXPORT_SYMBOL(pcibios_fixup_device_resources); -void __devinit pcibios_fixup_bus(struct pci_bus *bus) +static void __devinit do_bus_setup(struct pci_bus *bus) { - struct pci_controller *hose = pci_bus_to_host(bus); - struct pci_dev *dev = bus->self; - struct resource *res; - int i; + struct pci_dev *dev; - if (!dev) { - /* Root bus. */ + ppc_md.iommu_bus_setup(bus); - hose->bus = bus; - bus->resource[0] = res = &hose->io_resource; + list_for_each_entry(dev, &bus->devices, bus_list) + ppc_md.iommu_dev_setup(dev); - if (res->flags && request_resource(&ioport_resource, res)) - printk(KERN_ERR "Failed to request IO on " - "PCI domain %d\n", pci_domain_nr(bus)); + if (ppc_md.irq_bus_setup) + ppc_md.irq_bus_setup(bus); +} - for (i = 0; i < 3; ++i) { - res = &hose->mem_resources[i]; - bus->resource[i+1] = res; - if (res->flags && request_resource(&iomem_resource, res)) - printk(KERN_ERR "Failed to request MEM on " - "PCI domain %d\n", - pci_domain_nr(bus)); - } - } else if (pci_probe_only && - (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) { +void __devinit pcibios_fixup_bus(struct pci_bus *bus) +{ + struct pci_dev *dev = bus->self; + + if (dev && pci_probe_only && + (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) { /* This is a subordinate bridge */ pci_read_bridge_bases(bus); pcibios_fixup_device_resources(dev, bus); } - ppc_md.iommu_bus_setup(bus); - - list_for_each_entry(dev, &bus->devices, bus_list) - ppc_md.iommu_dev_setup(dev); - - if (ppc_md.irq_bus_setup) - ppc_md.irq_bus_setup(bus); + do_bus_setup(bus); if (!pci_probe_only) return; - list_for_each_entry(dev, &bus->devices, bus_list) { + list_for_each_entry(dev, &bus->devices, bus_list) if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI) pcibios_fixup_device_resources(dev, bus); - } } EXPORT_SYMBOL(pcibios_fixup_bus); diff --git a/arch/ppc64/kernel/pmac_pci.c b/arch/ppc64/kernel/pmac_pci.c index d37bff2..dc40a0c 100644 --- a/arch/ppc64/kernel/pmac_pci.c +++ b/arch/ppc64/kernel/pmac_pci.c @@ -388,7 +388,7 @@ static void __init setup_u3_agp(struct pci_controller* hose) * the reg address cell, we shall fix that by killing struct * reg_property and using some accessor functions instead */ - hose->first_busno = 0xf0; + hose->first_busno = 0xf0; hose->last_busno = 0xff; has_uninorth = 1; hose->ops = ¯isc_pci_ops; @@ -473,7 +473,7 @@ static void __init setup_u3_ht(struct pci_controller* hose) continue; } cur++; - DBG("U3/HT: hole, %d end at %08lx, %d start at %08lx\n", + DBG("U3/HT: hole, %d end at %08lx, %d start at %08lx\n", cur-1, res->start - 1, cur, res->end + 1); hose->mem_resources[cur].name = np->full_name; hose->mem_resources[cur].flags = IORESOURCE_MEM; @@ -603,24 +603,24 @@ static int __init add_bridge(struct device_node *dev) char* disp_name; int *bus_range; int primary = 1; - struct property *of_prop; + struct property *of_prop; DBG("Adding PCI host bridge %s\n", dev->full_name); - bus_range = (int *) get_property(dev, "bus-range", &len); - if (bus_range == NULL || len < 2 * sizeof(int)) { - printk(KERN_WARNING "Can't get bus-range for %s, assume bus 0\n", - dev->full_name); - } + bus_range = (int *) get_property(dev, "bus-range", &len); + if (bus_range == NULL || len < 2 * sizeof(int)) { + printk(KERN_WARNING "Can't get bus-range for %s, assume bus 0\n", + dev->full_name); + } hose = alloc_bootmem(sizeof(struct pci_controller)); if (hose == NULL) return -ENOMEM; - pci_setup_pci_controller(hose); + pci_setup_pci_controller(hose); - hose->arch_data = dev; - hose->first_busno = bus_range ? bus_range[0] : 0; - hose->last_busno = bus_range ? bus_range[1] : 0xff; + hose->arch_data = dev; + hose->first_busno = bus_range ? bus_range[0] : 0; + hose->last_busno = bus_range ? bus_range[1] : 0xff; of_prop = alloc_bootmem(sizeof(struct property) + sizeof(hose->global_number)); @@ -634,24 +634,24 @@ static int __init add_bridge(struct device_node *dev) } disp_name = NULL; - if (device_is_compatible(dev, "u3-agp")) { - setup_u3_agp(hose); - disp_name = "U3-AGP"; - primary = 0; - } else if (device_is_compatible(dev, "u3-ht")) { - setup_u3_ht(hose); - disp_name = "U3-HT"; - primary = 1; - } - printk(KERN_INFO "Found %s PCI host bridge. Firmware bus number: %d->%d\n", - disp_name, hose->first_busno, hose->last_busno); - - /* Interpret the "ranges" property */ - /* This also maps the I/O region and sets isa_io/mem_base */ - pmac_process_bridge_OF_ranges(hose, dev, primary); - - /* Fixup "bus-range" OF property */ - fixup_bus_range(dev); + if (device_is_compatible(dev, "u3-agp")) { + setup_u3_agp(hose); + disp_name = "U3-AGP"; + primary = 0; + } else if (device_is_compatible(dev, "u3-ht")) { + setup_u3_ht(hose); + disp_name = "U3-HT"; + primary = 1; + } + printk(KERN_INFO "Found %s PCI host bridge. Firmware bus number: %d->%d\n", + disp_name, hose->first_busno, hose->last_busno); + + /* Interpret the "ranges" property */ + /* This also maps the I/O region and sets isa_io/mem_base */ + pmac_process_bridge_OF_ranges(hose, dev, primary); + + /* Fixup "bus-range" OF property */ + fixup_bus_range(dev); return 0; } diff --git a/arch/ppc64/kernel/pmac_setup.c b/arch/ppc64/kernel/pmac_setup.c index e7f695d..325426c 100644 --- a/arch/ppc64/kernel/pmac_setup.c +++ b/arch/ppc64/kernel/pmac_setup.c @@ -477,6 +477,18 @@ static int __init pmac_probe(int platform) return 1; } +static int pmac_probe_mode(struct pci_bus *bus) +{ + struct device_node *node = bus->sysdata; + + /* We need to use normal PCI probing for the AGP bus, + since the device for the AGP bridge isn't in the tree. */ + if (bus->self == NULL && device_is_compatible(node, "u3-agp")) + return PCI_PROBE_NORMAL; + + return PCI_PROBE_DEVTREE; +} + struct machdep_calls __initdata pmac_md = { #ifdef CONFIG_HOTPLUG_CPU .cpu_die = generic_mach_cpu_die, @@ -488,6 +500,7 @@ struct machdep_calls __initdata pmac_md = { .init_IRQ = pmac_init_IRQ, .get_irq = mpic_get_irq, .pcibios_fixup = pmac_pcibios_fixup, + .pci_probe_mode = pmac_probe_mode, .restart = pmac_restart, .power_off = pmac_power_off, .halt = pmac_halt, diff --git a/arch/ppc64/kernel/process.c b/arch/ppc64/kernel/process.c index 7a7e027..8870053 100644 --- a/arch/ppc64/kernel/process.c +++ b/arch/ppc64/kernel/process.c @@ -54,6 +54,7 @@ #include <asm/sections.h> #include <asm/tlbflush.h> #include <asm/time.h> +#include <asm/plpar_wrappers.h> #ifndef CONFIG_SMP struct task_struct *last_task_used_math = NULL; @@ -163,7 +164,30 @@ int dump_task_altivec(struct pt_regs *regs, elf_vrregset_t *vrregs) #endif /* CONFIG_ALTIVEC */ +static void set_dabr_spr(unsigned long val) +{ + mtspr(SPRN_DABR, val); +} + +int set_dabr(unsigned long dabr) +{ + int ret = 0; + + if (firmware_has_feature(FW_FEATURE_XDABR)) { + /* We want to catch accesses from kernel and userspace */ + unsigned long flags = H_DABRX_KERNEL|H_DABRX_USER; + ret = plpar_set_xdabr(dabr, flags); + } else if (firmware_has_feature(FW_FEATURE_DABR)) { + ret = plpar_set_dabr(dabr); + } else { + set_dabr_spr(dabr); + } + + return ret; +} + DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array); +static DEFINE_PER_CPU(unsigned long, current_dabr); struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *new) @@ -198,6 +222,11 @@ struct task_struct *__switch_to(struct task_struct *prev, new->thread.regs->msr |= MSR_VEC; #endif /* CONFIG_ALTIVEC */ + if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr)) { + set_dabr(new->thread.dabr); + __get_cpu_var(current_dabr) = new->thread.dabr; + } + flush_tlb_pending(); new_thread = &new->thread; @@ -334,6 +363,11 @@ void flush_thread(void) last_task_used_altivec = NULL; #endif /* CONFIG_ALTIVEC */ #endif /* CONFIG_SMP */ + + if (current->thread.dabr) { + current->thread.dabr = 0; + set_dabr(0); + } } void diff --git a/arch/ppc64/kernel/ptrace.c b/arch/ppc64/kernel/ptrace.c index 2993f10..85ed318 100644 --- a/arch/ppc64/kernel/ptrace.c +++ b/arch/ppc64/kernel/ptrace.c @@ -17,6 +17,7 @@ * this archive for more details. */ +#include <linux/config.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/mm.h> @@ -206,6 +207,19 @@ int sys_ptrace(long request, long pid, long addr, long data) break; } + case PTRACE_GET_DEBUGREG: { + ret = -EINVAL; + /* We only support one DABR and no IABRS at the moment */ + if (addr > 0) + break; + ret = put_user(child->thread.dabr, + (unsigned long __user *)data); + break; + } + + case PTRACE_SET_DEBUGREG: + ret = ptrace_set_debugreg(child, addr, data); + case PTRACE_DETACH: ret = ptrace_detach(child, data); break; @@ -274,6 +288,20 @@ int sys_ptrace(long request, long pid, long addr, long data) break; } +#ifdef CONFIG_ALTIVEC + case PTRACE_GETVRREGS: + /* Get the child altivec register state. */ + flush_altivec_to_thread(child); + ret = get_vrregs((unsigned long __user *)data, child); + break; + + case PTRACE_SETVRREGS: + /* Set the child altivec register state. */ + flush_altivec_to_thread(child); + ret = set_vrregs(child, (unsigned long __user *)data); + break; +#endif + default: ret = ptrace_request(child, request, addr, data); break; diff --git a/arch/ppc64/kernel/ptrace32.c b/arch/ppc64/kernel/ptrace32.c index 1643642..fb8c22d 100644 --- a/arch/ppc64/kernel/ptrace32.c +++ b/arch/ppc64/kernel/ptrace32.c @@ -17,6 +17,7 @@ * this archive for more details. */ +#include <linux/config.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/mm.h> @@ -337,6 +338,19 @@ int sys32_ptrace(long request, long pid, unsigned long addr, unsigned long data) break; } + case PTRACE_GET_DEBUGREG: { + ret = -EINVAL; + /* We only support one DABR and no IABRS at the moment */ + if (addr > 0) + break; + ret = put_user(child->thread.dabr, (u32 __user *)data); + break; + } + + case PTRACE_SET_DEBUGREG: + ret = ptrace_set_debugreg(child, addr, data); + break; + case PTRACE_DETACH: ret = ptrace_detach(child, data); break; @@ -405,9 +419,23 @@ int sys32_ptrace(long request, long pid, unsigned long addr, unsigned long data) break; } - case PTRACE_GETEVENTMSG: - ret = put_user(child->ptrace_message, (unsigned int __user *) data); - break; + case PTRACE_GETEVENTMSG: + ret = put_user(child->ptrace_message, (unsigned int __user *) data); + break; + +#ifdef CONFIG_ALTIVEC + case PTRACE_GETVRREGS: + /* Get the child altivec register state. */ + flush_altivec_to_thread(child); + ret = get_vrregs((unsigned long __user *)data, child); + break; + + case PTRACE_SETVRREGS: + /* Set the child altivec register state. */ + flush_altivec_to_thread(child); + ret = set_vrregs(child, (unsigned long __user *)data); + break; +#endif default: ret = ptrace_request(child, request, addr, data); diff --git a/arch/ppc64/kernel/ras.c b/arch/ppc64/kernel/ras.c index 3c00f7b..41b97dc 100644 --- a/arch/ppc64/kernel/ras.c +++ b/arch/ppc64/kernel/ras.c @@ -59,8 +59,6 @@ char mce_data_buf[RTAS_ERROR_LOG_MAX] /* This is true if we are using the firmware NMI handler (typically LPAR) */ extern int fwnmi_active; -extern void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr); - static int ras_get_sensor_state_token; static int ras_check_exception_token; diff --git a/arch/ppc64/kernel/setup.c b/arch/ppc64/kernel/setup.c index bfa8791c..5ac48bd 100644 --- a/arch/ppc64/kernel/setup.c +++ b/arch/ppc64/kernel/setup.c @@ -1064,8 +1064,6 @@ void __init setup_arch(char **cmdline_p) #define PPC64_LINUX_FUNCTION 0x0f000000 #define PPC64_IPL_MESSAGE 0xc0000000 #define PPC64_TERM_MESSAGE 0xb0000000 -#define PPC64_ATTN_MESSAGE 0xa0000000 -#define PPC64_DUMP_MESSAGE 0xd0000000 static void ppc64_do_msg(unsigned int src, const char *msg) { @@ -1093,20 +1091,6 @@ void ppc64_terminate_msg(unsigned int src, const char *msg) printk("[terminate]%04x %s\n", src, msg); } -/* Print something that needs attention (device error, etc) */ -void ppc64_attention_msg(unsigned int src, const char *msg) -{ - ppc64_do_msg(PPC64_LINUX_FUNCTION|PPC64_ATTN_MESSAGE|src, msg); - printk("[attention]%04x %s\n", src, msg); -} - -/* Print a dump progress message. */ -void ppc64_dump_msg(unsigned int src, const char *msg) -{ - ppc64_do_msg(PPC64_LINUX_FUNCTION|PPC64_DUMP_MESSAGE|src, msg); - printk("[dump]%04x %s\n", src, msg); -} - /* This should only be called on processor 0 during calibrate decr */ void __init setup_default_decr(void) { diff --git a/arch/ppc64/kernel/signal.c b/arch/ppc64/kernel/signal.c index 49a79a5..347112c 100644 --- a/arch/ppc64/kernel/signal.c +++ b/arch/ppc64/kernel/signal.c @@ -550,6 +550,15 @@ int do_signal(sigset_t *oldset, struct pt_regs *regs) /* Whee! Actually deliver the signal. */ if (TRAP(regs) == 0x0C00) syscall_restart(regs, &ka); + + /* + * Reenable the DABR before delivering the signal to + * user space. The DABR will have been cleared if it + * triggered inside the kernel. + */ + if (current->thread.dabr) + set_dabr(current->thread.dabr); + return handle_signal(signr, &ka, &info, oldset, regs); } diff --git a/arch/ppc64/kernel/signal32.c b/arch/ppc64/kernel/signal32.c index 46f4d6c..a8b7a5a 100644 --- a/arch/ppc64/kernel/signal32.c +++ b/arch/ppc64/kernel/signal32.c @@ -970,6 +970,14 @@ int do_signal32(sigset_t *oldset, struct pt_regs *regs) newsp = regs->gpr[1]; newsp &= ~0xfUL; + /* + * Reenable the DABR before delivering the signal to + * user space. The DABR will have been cleared if it + * triggered inside the kernel. + */ + if (current->thread.dabr) + set_dabr(current->thread.dabr); + /* Whee! Actually deliver the signal. */ if (ka.sa.sa_flags & SA_SIGINFO) ret = handle_rt_signal32(signr, &ka, &info, oldset, regs, newsp); diff --git a/arch/ppc64/kernel/xics.c b/arch/ppc64/kernel/xics.c index d9dc6f2..daf9388 100644 --- a/arch/ppc64/kernel/xics.c +++ b/arch/ppc64/kernel/xics.c @@ -38,7 +38,7 @@ static void xics_mask_and_ack_irq(unsigned int irq); static void xics_end_irq(unsigned int irq); static void xics_set_affinity(unsigned int irq_nr, cpumask_t cpumask); -struct hw_interrupt_type xics_pic = { +static struct hw_interrupt_type xics_pic = { .typename = " XICS ", .startup = xics_startup, .enable = xics_enable_irq, @@ -48,7 +48,7 @@ struct hw_interrupt_type xics_pic = { .set_affinity = xics_set_affinity }; -struct hw_interrupt_type xics_8259_pic = { +static struct hw_interrupt_type xics_8259_pic = { .typename = " XICS/8259", .ack = xics_mask_and_ack_irq, }; @@ -89,9 +89,8 @@ static struct xics_ipl __iomem *xics_per_cpu[NR_CPUS]; static int xics_irq_8259_cascade = 0; static int xics_irq_8259_cascade_real = 0; static unsigned int default_server = 0xFF; -/* also referenced in smp.c... */ -unsigned int default_distrib_server = 0; -unsigned int interrupt_server_size = 8; +static unsigned int default_distrib_server = 0; +static unsigned int interrupt_server_size = 8; /* * XICS only has a single IPI, so encode the messages per CPU @@ -99,10 +98,10 @@ unsigned int interrupt_server_size = 8; struct xics_ipi_struct xics_ipi_message[NR_CPUS] __cacheline_aligned; /* RTAS service tokens */ -int ibm_get_xive; -int ibm_set_xive; -int ibm_int_on; -int ibm_int_off; +static int ibm_get_xive; +static int ibm_set_xive; +static int ibm_int_on; +static int ibm_int_off; typedef struct { int (*xirr_info_get)(int cpu); @@ -284,16 +283,17 @@ static void xics_enable_irq(unsigned int virq) call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, server, DEFAULT_PRIORITY); if (call_status != 0) { - printk(KERN_ERR "xics_enable_irq: irq=%d: ibm_set_xive " - "returned %x\n", irq, call_status); + printk(KERN_ERR "xics_enable_irq: irq=%u: ibm_set_xive " + "returned %d\n", irq, call_status); + printk("set_xive %x, server %x\n", ibm_set_xive, server); return; } /* Now unmask the interrupt (often a no-op) */ call_status = rtas_call(ibm_int_on, 1, 1, NULL, irq); if (call_status != 0) { - printk(KERN_ERR "xics_enable_irq: irq=%d: ibm_int_on " - "returned %x\n", irq, call_status); + printk(KERN_ERR "xics_enable_irq: irq=%u: ibm_int_on " + "returned %d\n", irq, call_status); return; } } @@ -308,8 +308,8 @@ static void xics_disable_real_irq(unsigned int irq) call_status = rtas_call(ibm_int_off, 1, 1, NULL, irq); if (call_status != 0) { - printk(KERN_ERR "xics_disable_real_irq: irq=%d: " - "ibm_int_off returned %x\n", irq, call_status); + printk(KERN_ERR "xics_disable_real_irq: irq=%u: " + "ibm_int_off returned %d\n", irq, call_status); return; } @@ -317,8 +317,8 @@ static void xics_disable_real_irq(unsigned int irq) /* Have to set XIVE to 0xff to be able to remove a slot */ call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, server, 0xff); if (call_status != 0) { - printk(KERN_ERR "xics_disable_irq: irq=%d: ibm_set_xive(0xff)" - " returned %x\n", irq, call_status); + printk(KERN_ERR "xics_disable_irq: irq=%u: ibm_set_xive(0xff)" + " returned %d\n", irq, call_status); return; } } @@ -380,7 +380,7 @@ int xics_get_irq(struct pt_regs *regs) if (irq == NO_IRQ) irq = real_irq_to_virt_slowpath(vec); if (irq == NO_IRQ) { - printk(KERN_ERR "Interrupt %d (real) is invalid," + printk(KERN_ERR "Interrupt %u (real) is invalid," " disabling it.\n", vec); xics_disable_real_irq(vec); } else @@ -622,7 +622,7 @@ static void xics_set_affinity(unsigned int virq, cpumask_t cpumask) status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq); if (status) { - printk(KERN_ERR "xics_set_affinity: irq=%d ibm,get-xive " + printk(KERN_ERR "xics_set_affinity: irq=%u ibm,get-xive " "returns %d\n", irq, status); return; } @@ -641,7 +641,7 @@ static void xics_set_affinity(unsigned int virq, cpumask_t cpumask) irq, newmask, xics_status[1]); if (status) { - printk(KERN_ERR "xics_set_affinity: irq=%d ibm,set-xive " + printk(KERN_ERR "xics_set_affinity: irq=%u ibm,set-xive " "returns %d\n", irq, status); return; } @@ -720,7 +720,7 @@ void xics_migrate_irqs_away(void) status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq); if (status) { - printk(KERN_ERR "migrate_irqs_away: irq=%d " + printk(KERN_ERR "migrate_irqs_away: irq=%u " "ibm,get-xive returns %d\n", virq, status); goto unlock; @@ -734,7 +734,7 @@ void xics_migrate_irqs_away(void) if (xics_status[0] != get_hard_smp_processor_id(cpu)) goto unlock; - printk(KERN_WARNING "IRQ %d affinity broken off cpu %u\n", + printk(KERN_WARNING "IRQ %u affinity broken off cpu %u\n", virq, cpu); /* Reset affinity to all cpus */ diff --git a/arch/ppc64/mm/fault.c b/arch/ppc64/mm/fault.c index 772f071..7fbc68b 100644 --- a/arch/ppc64/mm/fault.c +++ b/arch/ppc64/mm/fault.c @@ -77,6 +77,28 @@ static int store_updates_sp(struct pt_regs *regs) return 0; } +static void do_dabr(struct pt_regs *regs, unsigned long error_code) +{ + siginfo_t info; + + if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code, + 11, SIGSEGV) == NOTIFY_STOP) + return; + + if (debugger_dabr_match(regs)) + return; + + /* Clear the DABR */ + set_dabr(0); + + /* Deliver the signal to userspace */ + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_HWBKPT; + info.si_addr = (void __user *)regs->nip; + force_sig_info(SIGTRAP, &info, current); +} + /* * The error_code parameter is * - DSISR for a non-SLB data access fault, @@ -111,12 +133,9 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, if (!user_mode(regs) && (address >= TASK_SIZE)) return SIGSEGV; - if (error_code & DSISR_DABRMATCH) { - if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code, - 11, SIGSEGV) == NOTIFY_STOP) - return 0; - if (debugger_dabr_match(regs)) - return 0; + if (error_code & DSISR_DABRMATCH) { + do_dabr(regs, error_code); + return 0; } if (in_atomic() || mm == NULL) { diff --git a/arch/ppc64/xmon/privinst.h b/arch/ppc64/xmon/privinst.h index 183c3e4..02eb40d 100644 --- a/arch/ppc64/xmon/privinst.h +++ b/arch/ppc64/xmon/privinst.h @@ -46,7 +46,6 @@ GSETSPR(287, pvr) GSETSPR(1008, hid0) GSETSPR(1009, hid1) GSETSPR(1010, iabr) -GSETSPR(1013, dabr) GSETSPR(1023, pir) static inline void store_inst(void *p) diff --git a/arch/ppc64/xmon/xmon.c b/arch/ppc64/xmon/xmon.c index 45908b1..74e63a8 100644 --- a/arch/ppc64/xmon/xmon.c +++ b/arch/ppc64/xmon/xmon.c @@ -586,6 +586,8 @@ int xmon_dabr_match(struct pt_regs *regs) { if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) != (MSR_IR|MSR_SF)) return 0; + if (dabr.enabled == 0) + return 0; xmon_core(regs, 0); return 1; } @@ -628,20 +630,6 @@ int xmon_fault_handler(struct pt_regs *regs) return 0; } -/* On systems with a hypervisor, we can't set the DABR - (data address breakpoint register) directly. */ -static void set_controlled_dabr(unsigned long val) -{ -#ifdef CONFIG_PPC_PSERIES - if (systemcfg->platform == PLATFORM_PSERIES_LPAR) { - int rc = plpar_hcall_norets(H_SET_DABR, val); - if (rc != H_Success) - xmon_printf("Warning: setting DABR failed (%d)\n", rc); - } else -#endif - set_dabr(val); -} - static struct bpt *at_breakpoint(unsigned long pc) { int i; @@ -728,7 +716,7 @@ static void insert_bpts(void) static void insert_cpu_bpts(void) { if (dabr.enabled) - set_controlled_dabr(dabr.address | (dabr.enabled & 7)); + set_dabr(dabr.address | (dabr.enabled & 7)); if (iabr && cpu_has_feature(CPU_FTR_IABR)) set_iabr(iabr->address | (iabr->enabled & (BP_IABR|BP_IABR_TE))); @@ -756,7 +744,7 @@ static void remove_bpts(void) static void remove_cpu_bpts(void) { - set_controlled_dabr(0); + set_dabr(0); if (cpu_has_feature(CPU_FTR_IABR)) set_iabr(0); } diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 18610ce..ed877d0 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -678,7 +678,7 @@ asmlinkage long sys32_sendfile(int out_fd, int in_fd, compat_off_t *offset, size ret = sys_sendfile(out_fd, in_fd, offset ? &of : NULL, count); set_fs(old_fs); - if (!ret && offset && put_user(of, offset)) + if (offset && put_user(of, offset)) return -EFAULT; return ret; diff --git a/arch/x86_64/boot/Makefile b/arch/x86_64/boot/Makefile index f4399c7..18c6e91 100644 --- a/arch/x86_64/boot/Makefile +++ b/arch/x86_64/boot/Makefile @@ -46,7 +46,7 @@ cmd_image = $(obj)/tools/build $(BUILDFLAGS) $(obj)/bootsect $(obj)/setup \ $(obj)/bzImage: $(obj)/bootsect $(obj)/setup \ $(obj)/vmlinux.bin $(obj)/tools/build FORCE $(call if_changed,image) - @echo 'Kernel: $@ is ready' + @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE $(call if_changed,objcopy) diff --git a/arch/x86_64/boot/compressed/misc.c b/arch/x86_64/boot/compressed/misc.c index b38d5b8..0e10fd8 100644 --- a/arch/x86_64/boot/compressed/misc.c +++ b/arch/x86_64/boot/compressed/misc.c @@ -83,7 +83,7 @@ static unsigned char *real_mode; /* Pointer to real-mode data */ #endif #define SCREEN_INFO (*(struct screen_info *)(real_mode+0)) -extern char input_data[]; +extern unsigned char input_data[]; extern int input_len; static long bytes_out = 0; @@ -288,7 +288,7 @@ void setup_normal_output_buffer(void) #else if ((ALT_MEM_K > EXT_MEM_K ? ALT_MEM_K : EXT_MEM_K) < 1024) error("Less than 2MB of memory"); #endif - output_data = (char *)__PHYSICAL_START; /* Normally Points to 1M */ + output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */ free_mem_end_ptr = (long)real_mode; } @@ -305,7 +305,7 @@ void setup_output_buffer_if_we_run_high(struct moveparams *mv) #else if ((ALT_MEM_K > EXT_MEM_K ? ALT_MEM_K : EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory"); #endif - mv->low_buffer_start = output_data = (char *)LOW_BUFFER_START; + mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START; low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX ? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff; low_buffer_size = low_buffer_end - LOW_BUFFER_START; diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig index bf57e23..f8db7e5 100644 --- a/arch/x86_64/defconfig +++ b/arch/x86_64/defconfig @@ -1,11 +1,12 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.13-rc6-git3 -# Fri Aug 12 16:40:34 2005 +# Linux kernel version: 2.6.13-git11 +# Mon Sep 12 16:16:16 2005 # CONFIG_X86_64=y CONFIG_64BIT=y CONFIG_X86=y +CONFIG_SEMAPHORE_SLEEPERS=y CONFIG_MMU=y CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_CALIBRATE_DELAY=y @@ -13,6 +14,7 @@ CONFIG_X86_CMPXCHG=y CONFIG_EARLY_PRINTK=y CONFIG_GENERIC_ISA_DMA=y CONFIG_GENERIC_IOMAP=y +CONFIG_ARCH_MAY_HAVE_PC_FDC=y # # Code maturity level options @@ -26,6 +28,7 @@ CONFIG_INIT_ENV_ARG_LIMIT=32 # General setup # CONFIG_LOCALVERSION="" +CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -37,6 +40,7 @@ CONFIG_KOBJECT_UEVENT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y # CONFIG_CPUSETS is not set +CONFIG_INITRAMFS_SOURCE="" # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y @@ -102,6 +106,7 @@ CONFIG_DISCONTIGMEM_MANUAL=y CONFIG_DISCONTIGMEM=y CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_NEED_MULTIPLE_NODES=y +# CONFIG_SPARSEMEM_STATIC is not set CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y CONFIG_HAVE_DEC_LOCK=y CONFIG_NR_CPUS=32 @@ -122,6 +127,7 @@ CONFIG_HZ=250 CONFIG_GENERIC_HARDIRQS=y CONFIG_GENERIC_IRQ_PROBE=y CONFIG_ISA_DMA_API=y +CONFIG_GENERIC_PENDING_IRQ=y # # Power management options @@ -194,7 +200,6 @@ CONFIG_UNORDERED_IO=y # CONFIG_PCIEPORTBUS is not set CONFIG_PCI_MSI=y # CONFIG_PCI_LEGACY_PROC is not set -# CONFIG_PCI_NAMES is not set # CONFIG_PCI_DEBUG is not set # @@ -234,7 +239,10 @@ CONFIG_INET=y CONFIG_IP_MULTICAST=y # CONFIG_IP_ADVANCED_ROUTER is not set CONFIG_IP_FIB_HASH=y -# CONFIG_IP_PNP is not set +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +# CONFIG_IP_PNP_BOOTP is not set +# CONFIG_IP_PNP_RARP is not set # CONFIG_NET_IPIP is not set # CONFIG_NET_IPGRE is not set # CONFIG_IP_MROUTE is not set @@ -244,8 +252,8 @@ CONFIG_IP_FIB_HASH=y # CONFIG_INET_ESP is not set # CONFIG_INET_IPCOMP is not set # CONFIG_INET_TUNNEL is not set -CONFIG_IP_TCPDIAG=y -CONFIG_IP_TCPDIAG_IPV6=y +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y # CONFIG_TCP_CONG_ADVANCED is not set CONFIG_TCP_CONG_BIC=y CONFIG_IPV6=y @@ -258,6 +266,11 @@ CONFIG_IPV6=y # CONFIG_NETFILTER is not set # +# DCCP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_DCCP is not set + +# # SCTP Configuration (EXPERIMENTAL) # # CONFIG_IP_SCTP is not set @@ -280,9 +293,11 @@ CONFIG_IPV6=y # Network testing # # CONFIG_NET_PKTGEN is not set +# CONFIG_NETFILTER_NETLINK is not set # CONFIG_HAMRADIO is not set # CONFIG_IRDA is not set # CONFIG_BT is not set +# CONFIG_IEEE80211 is not set # # Device Drivers @@ -329,7 +344,6 @@ CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=16 CONFIG_BLK_DEV_RAM_SIZE=4096 CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="" CONFIG_LBD=y # CONFIG_CDROM_PKTCDVD is not set @@ -409,6 +423,7 @@ CONFIG_IDEDMA_AUTO=y # # SCSI device support # +# CONFIG_RAID_ATTRS is not set CONFIG_SCSI=y # CONFIG_SCSI_PROC_FS is not set @@ -432,7 +447,7 @@ CONFIG_BLK_DEV_SD=y # # SCSI Transport Attributes # -# CONFIG_SCSI_SPI_ATTRS is not set +CONFIG_SCSI_SPI_ATTRS=y # CONFIG_SCSI_FC_ATTRS is not set # CONFIG_SCSI_ISCSI_ATTRS is not set @@ -458,6 +473,7 @@ CONFIG_SCSI_SATA=y # CONFIG_SCSI_SATA_AHCI is not set # CONFIG_SCSI_SATA_SVW is not set CONFIG_SCSI_ATA_PIIX=y +# CONFIG_SCSI_SATA_MV is not set # CONFIG_SCSI_SATA_NV is not set # CONFIG_SCSI_SATA_PROMISE is not set # CONFIG_SCSI_SATA_QSTOR is not set @@ -537,6 +553,11 @@ CONFIG_TUN=y # CONFIG_ARCNET is not set # +# PHY device support +# +# CONFIG_PHYLIB is not set + +# # Ethernet (10 or 100Mbit) # CONFIG_NET_ETHERNET=y @@ -586,6 +607,7 @@ CONFIG_E1000=y # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set # CONFIG_R8169 is not set +# CONFIG_SIS190 is not set # CONFIG_SKGE is not set # CONFIG_SK98LIN is not set # CONFIG_VIA_VELOCITY is not set @@ -595,6 +617,7 @@ CONFIG_TIGON3=y # # Ethernet (10000 Mbit) # +# CONFIG_CHELSIO_T1 is not set # CONFIG_IXGB is not set CONFIG_S2IO=m # CONFIG_S2IO_NAPI is not set @@ -749,7 +772,6 @@ CONFIG_MAX_RAW_DEVS=256 # I2C support # # CONFIG_I2C is not set -# CONFIG_I2C_SENSOR is not set # # Dallas's 1-wire bus @@ -760,6 +782,7 @@ CONFIG_MAX_RAW_DEVS=256 # Hardware Monitoring support # CONFIG_HWMON=y +# CONFIG_HWMON_VID is not set # CONFIG_HWMON_DEBUG_CHIP is not set # @@ -768,6 +791,10 @@ CONFIG_HWMON=y # CONFIG_IBM_ASM is not set # +# Multimedia Capabilities Port drivers +# + +# # Multimedia devices # # CONFIG_VIDEO_DEV is not set @@ -858,9 +885,8 @@ CONFIG_USB_UHCI_HCD=y # # USB Device Class drivers # -# CONFIG_USB_AUDIO is not set +# CONFIG_OBSOLETE_OSS_USB_DRIVER is not set # CONFIG_USB_BLUETOOTH_TTY is not set -# CONFIG_USB_MIDI is not set # CONFIG_USB_ACM is not set CONFIG_USB_PRINTER=y @@ -877,6 +903,7 @@ CONFIG_USB_STORAGE=y # CONFIG_USB_STORAGE_SDDR09 is not set # CONFIG_USB_STORAGE_SDDR55 is not set # CONFIG_USB_STORAGE_JUMPSHOT is not set +# CONFIG_USB_STORAGE_ONETOUCH is not set # # USB Input Devices @@ -893,6 +920,7 @@ CONFIG_USB_HIDINPUT=y # CONFIG_USB_MTOUCH is not set # CONFIG_USB_ITMTOUCH is not set # CONFIG_USB_EGALAX is not set +# CONFIG_USB_YEALINK is not set # CONFIG_USB_XPAD is not set # CONFIG_USB_ATI_REMOTE is not set # CONFIG_USB_KEYSPAN_REMOTE is not set @@ -976,6 +1004,8 @@ CONFIG_USB_MON=y # Firmware Drivers # # CONFIG_EDD is not set +# CONFIG_DELL_RBU is not set +CONFIG_DCDBAS=m # # File systems @@ -1000,10 +1030,6 @@ CONFIG_REISERFS_FS_POSIX_ACL=y # CONFIG_REISERFS_FS_SECURITY is not set # CONFIG_JFS_FS is not set CONFIG_FS_POSIX_ACL=y - -# -# XFS support -# # CONFIG_XFS_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_ROMFS_FS is not set @@ -1012,6 +1038,7 @@ CONFIG_INOTIFY=y CONFIG_DNOTIFY=y CONFIG_AUTOFS_FS=y # CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set # # CD-ROM/DVD Filesystems @@ -1037,12 +1064,11 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y CONFIG_SYSFS=y -# CONFIG_DEVPTS_FS_XATTR is not set CONFIG_TMPFS=y -# CONFIG_TMPFS_XATTR is not set CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y CONFIG_RAMFS=y +# CONFIG_RELAYFS_FS is not set # # Miscellaneous filesystems @@ -1074,6 +1100,7 @@ CONFIG_NFSD_V3=y # CONFIG_NFSD_V3_ACL is not set # CONFIG_NFSD_V4 is not set CONFIG_NFSD_TCP=y +CONFIG_ROOT_NFS=y CONFIG_LOCKD=y CONFIG_LOCKD_V4=y CONFIG_EXPORTFS=y @@ -1086,6 +1113,7 @@ CONFIG_SUNRPC=y # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set +# CONFIG_9P_FS is not set # # Partition Types @@ -1150,6 +1178,7 @@ CONFIG_OPROFILE=y CONFIG_DEBUG_KERNEL=y CONFIG_MAGIC_SYSRQ=y CONFIG_LOG_BUF_SHIFT=18 +CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_SCHEDSTATS is not set # CONFIG_DEBUG_SLAB is not set # CONFIG_DEBUG_SPINLOCK is not set @@ -1157,6 +1186,7 @@ CONFIG_LOG_BUF_SHIFT=18 # CONFIG_DEBUG_KOBJECT is not set # CONFIG_DEBUG_INFO is not set CONFIG_DEBUG_FS=y +# CONFIG_FRAME_POINTER is not set CONFIG_INIT_DEBUG=y # CONFIG_IOMMU_DEBUG is not set CONFIG_KPROBES=y @@ -1180,5 +1210,6 @@ CONFIG_KPROBES=y # Library routines # # CONFIG_CRC_CCITT is not set +# CONFIG_CRC16 is not set CONFIG_CRC32=y # CONFIG_LIBCRC32C is not set diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index 5244f80..e0eb0c7 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S @@ -55,20 +55,34 @@ * with the int 0x80 path. */ ENTRY(ia32_sysenter_target) - CFI_STARTPROC + CFI_STARTPROC simple + CFI_DEF_CFA rsp,0 + CFI_REGISTER rsp,rbp swapgs movq %gs:pda_kernelstack, %rsp addq $(PDA_STACKOFFSET),%rsp sti movl %ebp,%ebp /* zero extension */ pushq $__USER32_DS + CFI_ADJUST_CFA_OFFSET 8 + /*CFI_REL_OFFSET ss,0*/ pushq %rbp + CFI_ADJUST_CFA_OFFSET 8 + CFI_REL_OFFSET rsp,0 pushfq + CFI_ADJUST_CFA_OFFSET 8 + /*CFI_REL_OFFSET rflags,0*/ movl $VSYSCALL32_SYSEXIT, %r10d + CFI_REGISTER rip,r10 pushq $__USER32_CS + CFI_ADJUST_CFA_OFFSET 8 + /*CFI_REL_OFFSET cs,0*/ movl %eax, %eax pushq %r10 + CFI_ADJUST_CFA_OFFSET 8 + CFI_REL_OFFSET rip,0 pushq %rax + CFI_ADJUST_CFA_OFFSET 8 cld SAVE_ARGS 0,0,1 /* no need to do an access_ok check here because rbp has been @@ -79,6 +93,7 @@ ENTRY(ia32_sysenter_target) .previous GET_THREAD_INFO(%r10) testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) + CFI_REMEMBER_STATE jnz sysenter_tracesys sysenter_do_call: cmpl $(IA32_NR_syscalls),%eax @@ -94,14 +109,20 @@ sysenter_do_call: andl $~0x200,EFLAGS-R11(%rsp) RESTORE_ARGS 1,24,1,1,1,1 popfq + CFI_ADJUST_CFA_OFFSET -8 + /*CFI_RESTORE rflags*/ popq %rcx /* User %esp */ + CFI_ADJUST_CFA_OFFSET -8 + CFI_REGISTER rsp,rcx movl $VSYSCALL32_SYSEXIT,%edx /* User %eip */ + CFI_REGISTER rip,rdx swapgs sti /* sti only takes effect after the next instruction */ /* sysexit */ .byte 0xf, 0x35 sysenter_tracesys: + CFI_RESTORE_STATE SAVE_REST CLEAR_RREGS movq $-ENOSYS,RAX(%rsp) /* really needed? */ @@ -140,21 +161,28 @@ sysenter_tracesys: * with the int 0x80 path. */ ENTRY(ia32_cstar_target) - CFI_STARTPROC + CFI_STARTPROC simple + CFI_DEF_CFA rsp,0 + CFI_REGISTER rip,rcx + /*CFI_REGISTER rflags,r11*/ swapgs movl %esp,%r8d + CFI_REGISTER rsp,r8 movq %gs:pda_kernelstack,%rsp sti SAVE_ARGS 8,1,1 movl %eax,%eax /* zero extension */ movq %rax,ORIG_RAX-ARGOFFSET(%rsp) movq %rcx,RIP-ARGOFFSET(%rsp) + CFI_REL_OFFSET rip,RIP-ARGOFFSET movq %rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */ movl %ebp,%ecx movq $__USER32_CS,CS-ARGOFFSET(%rsp) movq $__USER32_DS,SS-ARGOFFSET(%rsp) movq %r11,EFLAGS-ARGOFFSET(%rsp) + /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ movq %r8,RSP-ARGOFFSET(%rsp) + CFI_REL_OFFSET rsp,RSP-ARGOFFSET /* no need to do an access_ok check here because r8 has been 32bit zero extended */ /* hardware stack frame is complete now */ @@ -164,6 +192,7 @@ ENTRY(ia32_cstar_target) .previous GET_THREAD_INFO(%r10) testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) + CFI_REMEMBER_STATE jnz cstar_tracesys cstar_do_call: cmpl $IA32_NR_syscalls,%eax @@ -177,12 +206,16 @@ cstar_do_call: jnz int_ret_from_sys_call RESTORE_ARGS 1,-ARG_SKIP,1,1,1 movl RIP-ARGOFFSET(%rsp),%ecx + CFI_REGISTER rip,rcx movl EFLAGS-ARGOFFSET(%rsp),%r11d + /*CFI_REGISTER rflags,r11*/ movl RSP-ARGOFFSET(%rsp),%esp + CFI_RESTORE rsp swapgs sysretl cstar_tracesys: + CFI_RESTORE_STATE SAVE_REST CLEAR_RREGS movq $-ENOSYS,RAX(%rsp) /* really needed? */ @@ -226,11 +259,18 @@ ia32_badarg: */ ENTRY(ia32_syscall) - CFI_STARTPROC + CFI_STARTPROC simple + CFI_DEF_CFA rsp,SS+8-RIP + /*CFI_REL_OFFSET ss,SS-RIP*/ + CFI_REL_OFFSET rsp,RSP-RIP + /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/ + /*CFI_REL_OFFSET cs,CS-RIP*/ + CFI_REL_OFFSET rip,RIP-RIP swapgs sti movl %eax,%eax pushq %rax + CFI_ADJUST_CFA_OFFSET 8 cld /* note the registers are not zero extended to the sf. this could be a problem. */ @@ -278,6 +318,8 @@ quiet_ni_syscall: jmp ia32_ptregs_common .endm + CFI_STARTPROC + PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx @@ -290,8 +332,9 @@ quiet_ni_syscall: PTREGSCALL stub32_rt_sigsuspend, sys_rt_sigsuspend, %rdx ENTRY(ia32_ptregs_common) - CFI_STARTPROC popq %r11 + CFI_ADJUST_CFA_OFFSET -8 + CFI_REGISTER rip, r11 SAVE_REST call *%rax RESTORE_REST diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c index 04d8040..5389df6 100644 --- a/arch/x86_64/ia32/sys_ia32.c +++ b/arch/x86_64/ia32/sys_ia32.c @@ -751,7 +751,7 @@ sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, s32 count) ret = sys_sendfile(out_fd, in_fd, offset ? &of : NULL, count); set_fs(old_fs); - if (!ret && offset && put_user(of, offset)) + if (offset && put_user(of, offset)) return -EFAULT; return ret; diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index 1579bdd..bcdd0a8 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile @@ -46,3 +46,4 @@ microcode-$(subst m,y,$(CONFIG_MICROCODE)) += ../../i386/kernel/microcode.o intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o quirks-y += ../../i386/kernel/quirks.o i8237-y += ../../i386/kernel/i8237.o +msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o diff --git a/arch/x86_64/kernel/acpi/sleep.c b/arch/x86_64/kernel/acpi/sleep.c index 148f6f7..867a0eb 100644 --- a/arch/x86_64/kernel/acpi/sleep.c +++ b/arch/x86_64/kernel/acpi/sleep.c @@ -34,7 +34,6 @@ #include <linux/slab.h> #include <linux/pci.h> #include <linux/bootmem.h> -#include <linux/irq.h> #include <linux/acpi.h> #include <asm/mpspec.h> #include <asm/io.h> diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c index c9a6b81..962ad48 100644 --- a/arch/x86_64/kernel/aperture.c +++ b/arch/x86_64/kernel/aperture.c @@ -245,6 +245,8 @@ void __init iommu_hole_init(void) if (aper_alloc) { /* Got the aperture from the AGP bridge */ + } else if (swiotlb && !valid_agp) { + /* Do nothing */ } else if ((!no_iommu && end_pfn >= 0xffffffff>>PAGE_SHIFT) || force_iommu || valid_agp || diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index 375d369..b6e7715 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -18,7 +18,6 @@ #include <linux/init.h> #include <linux/mm.h> -#include <linux/irq.h> #include <linux/delay.h> #include <linux/bootmem.h> #include <linux/smp_lock.h> @@ -109,11 +108,8 @@ void clear_local_APIC(void) if (maxlvt >= 4) apic_write_around(APIC_LVTPC, APIC_LVT_MASKED); v = GET_APIC_VERSION(apic_read(APIC_LVR)); - if (APIC_INTEGRATED(v)) { /* !82489DX */ - if (maxlvt > 3) /* Due to Pentium errata 3AP and 11AP. */ - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - } + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); } void __init connect_bsp_APIC(void) @@ -316,8 +312,6 @@ void __init init_bsp_APIC(void) */ apic_write_around(APIC_LVT0, APIC_DM_EXTINT); value = APIC_DM_NMI; - if (!APIC_INTEGRATED(ver)) /* 82489DX */ - value |= APIC_LVT_LEVEL_TRIGGER; apic_write_around(APIC_LVT1, value); } @@ -325,14 +319,6 @@ void __cpuinit setup_local_APIC (void) { unsigned int value, ver, maxlvt; - /* Pound the ESR really hard over the head with a big hammer - mbligh */ - if (esr_disable) { - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - } - value = apic_read(APIC_LVR); ver = GET_APIC_VERSION(value); @@ -430,15 +416,11 @@ void __cpuinit setup_local_APIC (void) value = APIC_DM_NMI; else value = APIC_DM_NMI | APIC_LVT_MASKED; - if (!APIC_INTEGRATED(ver)) /* 82489DX */ - value |= APIC_LVT_LEVEL_TRIGGER; apic_write_around(APIC_LVT1, value); - if (APIC_INTEGRATED(ver) && !esr_disable) { /* !82489DX */ + { unsigned oldvalue; maxlvt = get_maxlvt(); - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); oldvalue = apic_read(APIC_ESR); value = ERROR_APIC_VECTOR; // enables sending errors apic_write_around(APIC_LVTERR, value); @@ -452,17 +434,6 @@ void __cpuinit setup_local_APIC (void) apic_printk(APIC_VERBOSE, "ESR value after enabling vector: %08x, after %08x\n", oldvalue, value); - } else { - if (esr_disable) - /* - * Something untraceble is creating bad interrupts on - * secondary quads ... for the moment, just leave the - * ESR disabled - we can't do anything useful with the - * errors anyway - mbligh - */ - apic_printk(APIC_DEBUG, "Leaving ESR disabled.\n"); - else - apic_printk(APIC_DEBUG, "No ESR for 82489DX.\n"); } nmi_watchdog_default(); @@ -650,8 +621,7 @@ void __init init_apic_mappings(void) * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). */ - if (boot_cpu_id == -1U) - boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID)); + boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID)); #ifdef CONFIG_X86_IO_APIC { @@ -693,8 +663,6 @@ static void __setup_APIC_LVTT(unsigned int clocks) ver = GET_APIC_VERSION(apic_read(APIC_LVR)); lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; - if (!APIC_INTEGRATED(ver)) - lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); apic_write_around(APIC_LVTT, lvtt_value); /* @@ -1081,7 +1049,7 @@ int __init APIC_init_uniprocessor (void) connect_bsp_APIC(); - phys_cpu_present_map = physid_mask_of_physid(0); + phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id); apic_write_around(APIC_ID, boot_cpu_id); setup_local_APIC(); diff --git a/arch/x86_64/kernel/asm-offsets.c b/arch/x86_64/kernel/asm-offsets.c index 35b4c3f..aaa6d38 100644 --- a/arch/x86_64/kernel/asm-offsets.c +++ b/arch/x86_64/kernel/asm-offsets.c @@ -39,7 +39,6 @@ int main(void) ENTRY(kernelstack); ENTRY(oldrsp); ENTRY(pcurrent); - ENTRY(irqrsp); ENTRY(irqcount); ENTRY(cpunumber); ENTRY(irqstackptr); diff --git a/arch/x86_64/kernel/crash.c b/arch/x86_64/kernel/crash.c index d7fa424..535e044 100644 --- a/arch/x86_64/kernel/crash.c +++ b/arch/x86_64/kernel/crash.c @@ -11,7 +11,6 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/smp.h> -#include <linux/irq.h> #include <linux/reboot.h> #include <linux/kexec.h> diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c index bb0ae18..eb7929e 100644 --- a/arch/x86_64/kernel/e820.c +++ b/arch/x86_64/kernel/e820.c @@ -131,7 +131,7 @@ void __init e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned lon if (ei->type != E820_RAM || ei->addr+ei->size <= start || - ei->addr > end) + ei->addr >= end) continue; addr = round_up(ei->addr, PAGE_SIZE); diff --git a/arch/x86_64/kernel/early_printk.c b/arch/x86_64/kernel/early_printk.c index 9631c74..9cd968d 100644 --- a/arch/x86_64/kernel/early_printk.c +++ b/arch/x86_64/kernel/early_printk.c @@ -5,6 +5,7 @@ #include <linux/tty.h> #include <asm/io.h> #include <asm/processor.h> +#include <asm/fcntl.h> /* Simple VGA output */ @@ -158,6 +159,47 @@ static struct console early_serial_console = { .index = -1, }; +/* Console interface to a host file on AMD's SimNow! */ + +static int simnow_fd; + +enum { + MAGIC1 = 0xBACCD00A, + MAGIC2 = 0xCA110000, + XOPEN = 5, + XWRITE = 4, +}; + +static noinline long simnow(long cmd, long a, long b, long c) +{ + long ret; + asm volatile("cpuid" : + "=a" (ret) : + "b" (a), "c" (b), "d" (c), "0" (MAGIC1), "D" (cmd + MAGIC2)); + return ret; +} + +void __init simnow_init(char *str) +{ + char *fn = "klog"; + if (*str == '=') + fn = ++str; + /* error ignored */ + simnow_fd = simnow(XOPEN, (unsigned long)fn, O_WRONLY|O_APPEND|O_CREAT, 0644); +} + +static void simnow_write(struct console *con, const char *s, unsigned n) +{ + simnow(XWRITE, simnow_fd, (unsigned long)s, n); +} + +static struct console simnow_console = { + .name = "simnow", + .write = simnow_write, + .flags = CON_PRINTBUFFER, + .index = -1, +}; + /* Direct interface for emergencies */ struct console *early_console = &early_vga_console; static int early_console_initialized = 0; @@ -205,6 +247,10 @@ int __init setup_early_printk(char *opt) max_xpos = SCREEN_INFO.orig_video_cols; max_ypos = SCREEN_INFO.orig_video_lines; early_console = &early_vga_console; + } else if (!strncmp(buf, "simnow", 6)) { + simnow_init(buf + 6); + early_console = &simnow_console; + keep_early = 1; } early_console_initialized = 1; register_console(early_console); diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index 3620508..7937971 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -79,16 +79,19 @@ xorl %eax, %eax pushq %rax /* ss */ CFI_ADJUST_CFA_OFFSET 8 + /*CFI_REL_OFFSET ss,0*/ pushq %rax /* rsp */ CFI_ADJUST_CFA_OFFSET 8 - CFI_OFFSET rip,0 + CFI_REL_OFFSET rsp,0 pushq $(1<<9) /* eflags - interrupts on */ CFI_ADJUST_CFA_OFFSET 8 + /*CFI_REL_OFFSET rflags,0*/ pushq $__KERNEL_CS /* cs */ CFI_ADJUST_CFA_OFFSET 8 + /*CFI_REL_OFFSET cs,0*/ pushq \child_rip /* rip */ CFI_ADJUST_CFA_OFFSET 8 - CFI_OFFSET rip,0 + CFI_REL_OFFSET rip,0 pushq %rax /* orig rax */ CFI_ADJUST_CFA_OFFSET 8 .endm @@ -98,32 +101,39 @@ CFI_ADJUST_CFA_OFFSET -(6*8) .endm - .macro CFI_DEFAULT_STACK - CFI_ADJUST_CFA_OFFSET (SS) - CFI_OFFSET r15,R15-SS - CFI_OFFSET r14,R14-SS - CFI_OFFSET r13,R13-SS - CFI_OFFSET r12,R12-SS - CFI_OFFSET rbp,RBP-SS - CFI_OFFSET rbx,RBX-SS - CFI_OFFSET r11,R11-SS - CFI_OFFSET r10,R10-SS - CFI_OFFSET r9,R9-SS - CFI_OFFSET r8,R8-SS - CFI_OFFSET rax,RAX-SS - CFI_OFFSET rcx,RCX-SS - CFI_OFFSET rdx,RDX-SS - CFI_OFFSET rsi,RSI-SS - CFI_OFFSET rdi,RDI-SS - CFI_OFFSET rsp,RSP-SS - CFI_OFFSET rip,RIP-SS + .macro CFI_DEFAULT_STACK start=1 + .if \start + CFI_STARTPROC simple + CFI_DEF_CFA rsp,SS+8 + .else + CFI_DEF_CFA_OFFSET SS+8 + .endif + CFI_REL_OFFSET r15,R15 + CFI_REL_OFFSET r14,R14 + CFI_REL_OFFSET r13,R13 + CFI_REL_OFFSET r12,R12 + CFI_REL_OFFSET rbp,RBP + CFI_REL_OFFSET rbx,RBX + CFI_REL_OFFSET r11,R11 + CFI_REL_OFFSET r10,R10 + CFI_REL_OFFSET r9,R9 + CFI_REL_OFFSET r8,R8 + CFI_REL_OFFSET rax,RAX + CFI_REL_OFFSET rcx,RCX + CFI_REL_OFFSET rdx,RDX + CFI_REL_OFFSET rsi,RSI + CFI_REL_OFFSET rdi,RDI + CFI_REL_OFFSET rip,RIP + /*CFI_REL_OFFSET cs,CS*/ + /*CFI_REL_OFFSET rflags,EFLAGS*/ + CFI_REL_OFFSET rsp,RSP + /*CFI_REL_OFFSET ss,SS*/ .endm /* * A newly forked process directly context switches into this. */ /* rdi: prev */ ENTRY(ret_from_fork) - CFI_STARTPROC CFI_DEFAULT_STACK call schedule_tail GET_THREAD_INFO(%rcx) @@ -172,16 +182,21 @@ rff_trace: */ ENTRY(system_call) - CFI_STARTPROC + CFI_STARTPROC simple + CFI_DEF_CFA rsp,0 + CFI_REGISTER rip,rcx + /*CFI_REGISTER rflags,r11*/ swapgs movq %rsp,%gs:pda_oldrsp movq %gs:pda_kernelstack,%rsp sti SAVE_ARGS 8,1 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) - movq %rcx,RIP-ARGOFFSET(%rsp) + movq %rcx,RIP-ARGOFFSET(%rsp) + CFI_REL_OFFSET rip,RIP-ARGOFFSET GET_THREAD_INFO(%rcx) testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) + CFI_REMEMBER_STATE jnz tracesys cmpq $__NR_syscall_max,%rax ja badsys @@ -201,9 +216,12 @@ sysret_check: cli movl threadinfo_flags(%rcx),%edx andl %edi,%edx + CFI_REMEMBER_STATE jnz sysret_careful movq RIP-ARGOFFSET(%rsp),%rcx + CFI_REGISTER rip,rcx RESTORE_ARGS 0,-ARG_SKIP,1 + /*CFI_REGISTER rflags,r11*/ movq %gs:pda_oldrsp,%rsp swapgs sysretq @@ -211,12 +229,15 @@ sysret_check: /* Handle reschedules */ /* edx: work, edi: workmask */ sysret_careful: + CFI_RESTORE_STATE bt $TIF_NEED_RESCHED,%edx jnc sysret_signal sti pushq %rdi + CFI_ADJUST_CFA_OFFSET 8 call schedule popq %rdi + CFI_ADJUST_CFA_OFFSET -8 jmp sysret_check /* Handle a signal */ @@ -234,8 +255,13 @@ sysret_signal: 1: movl $_TIF_NEED_RESCHED,%edi jmp sysret_check +badsys: + movq $-ENOSYS,RAX-ARGOFFSET(%rsp) + jmp ret_from_sys_call + /* Do syscall tracing */ tracesys: + CFI_RESTORE_STATE SAVE_REST movq $-ENOSYS,RAX(%rsp) FIXUP_TOP_OF_STACK %rdi @@ -254,16 +280,29 @@ tracesys: RESTORE_TOP_OF_STACK %rbx RESTORE_REST jmp ret_from_sys_call + CFI_ENDPROC -badsys: - movq $-ENOSYS,RAX-ARGOFFSET(%rsp) - jmp ret_from_sys_call - /* * Syscall return path ending with IRET. * Has correct top of stack, but partial stack frame. */ -ENTRY(int_ret_from_sys_call) +ENTRY(int_ret_from_sys_call) + CFI_STARTPROC simple + CFI_DEF_CFA rsp,SS+8-ARGOFFSET + /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/ + CFI_REL_OFFSET rsp,RSP-ARGOFFSET + /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ + /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/ + CFI_REL_OFFSET rip,RIP-ARGOFFSET + CFI_REL_OFFSET rdx,RDX-ARGOFFSET + CFI_REL_OFFSET rcx,RCX-ARGOFFSET + CFI_REL_OFFSET rax,RAX-ARGOFFSET + CFI_REL_OFFSET rdi,RDI-ARGOFFSET + CFI_REL_OFFSET rsi,RSI-ARGOFFSET + CFI_REL_OFFSET r8,R8-ARGOFFSET + CFI_REL_OFFSET r9,R9-ARGOFFSET + CFI_REL_OFFSET r10,R10-ARGOFFSET + CFI_REL_OFFSET r11,R11-ARGOFFSET cli testl $3,CS-ARGOFFSET(%rsp) je retint_restore_args @@ -284,8 +323,10 @@ int_careful: jnc int_very_careful sti pushq %rdi + CFI_ADJUST_CFA_OFFSET 8 call schedule popq %rdi + CFI_ADJUST_CFA_OFFSET -8 cli jmp int_with_check @@ -297,9 +338,11 @@ int_very_careful: testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx jz int_signal pushq %rdi + CFI_ADJUST_CFA_OFFSET 8 leaq 8(%rsp),%rdi # &ptregs -> arg1 call syscall_trace_leave popq %rdi + CFI_ADJUST_CFA_OFFSET -8 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi cli jmp int_restore_rest @@ -329,6 +372,8 @@ int_restore_rest: jmp ptregscall_common .endm + CFI_STARTPROC + PTREGSCALL stub_clone, sys_clone, %r8 PTREGSCALL stub_fork, sys_fork, %rdi PTREGSCALL stub_vfork, sys_vfork, %rdi @@ -337,40 +382,49 @@ int_restore_rest: PTREGSCALL stub_iopl, sys_iopl, %rsi ENTRY(ptregscall_common) - CFI_STARTPROC popq %r11 - CFI_ADJUST_CFA_OFFSET -8 + CFI_ADJUST_CFA_OFFSET -8 + CFI_REGISTER rip, r11 SAVE_REST movq %r11, %r15 + CFI_REGISTER rip, r15 FIXUP_TOP_OF_STACK %r11 call *%rax RESTORE_TOP_OF_STACK %r11 movq %r15, %r11 + CFI_REGISTER rip, r11 RESTORE_REST pushq %r11 - CFI_ADJUST_CFA_OFFSET 8 + CFI_ADJUST_CFA_OFFSET 8 + CFI_REL_OFFSET rip, 0 ret CFI_ENDPROC ENTRY(stub_execve) CFI_STARTPROC popq %r11 - CFI_ADJUST_CFA_OFFSET -8 + CFI_ADJUST_CFA_OFFSET -8 + CFI_REGISTER rip, r11 SAVE_REST movq %r11, %r15 + CFI_REGISTER rip, r15 FIXUP_TOP_OF_STACK %r11 call sys_execve GET_THREAD_INFO(%rcx) bt $TIF_IA32,threadinfo_flags(%rcx) + CFI_REMEMBER_STATE jc exec_32bit RESTORE_TOP_OF_STACK %r11 movq %r15, %r11 + CFI_REGISTER rip, r11 RESTORE_REST - push %r11 + pushq %r11 + CFI_ADJUST_CFA_OFFSET 8 + CFI_REL_OFFSET rip, 0 ret exec_32bit: - CFI_ADJUST_CFA_OFFSET REST_SKIP + CFI_RESTORE_STATE movq %rax,RAX(%rsp) RESTORE_REST jmp int_ret_from_sys_call @@ -382,7 +436,8 @@ exec_32bit: */ ENTRY(stub_rt_sigreturn) CFI_STARTPROC - addq $8, %rsp + addq $8, %rsp + CFI_ADJUST_CFA_OFFSET -8 SAVE_REST movq %rsp,%rdi FIXUP_TOP_OF_STACK %r11 @@ -392,6 +447,25 @@ ENTRY(stub_rt_sigreturn) jmp int_ret_from_sys_call CFI_ENDPROC +/* + * initial frame state for interrupts and exceptions + */ + .macro _frame ref + CFI_STARTPROC simple + CFI_DEF_CFA rsp,SS+8-\ref + /*CFI_REL_OFFSET ss,SS-\ref*/ + CFI_REL_OFFSET rsp,RSP-\ref + /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/ + /*CFI_REL_OFFSET cs,CS-\ref*/ + CFI_REL_OFFSET rip,RIP-\ref + .endm + +/* initial frame state for interrupts (and exceptions without error code) */ +#define INTR_FRAME _frame RIP +/* initial frame state for exceptions with error code (and interrupts with + vector already pushed) */ +#define XCPT_FRAME _frame ORIG_RAX + /* * Interrupt entry/exit. * @@ -402,10 +476,6 @@ ENTRY(stub_rt_sigreturn) /* 0(%rsp): interrupt number */ .macro interrupt func - CFI_STARTPROC simple - CFI_DEF_CFA rsp,(SS-RDI) - CFI_REL_OFFSET rsp,(RSP-ORIG_RAX) - CFI_REL_OFFSET rip,(RIP-ORIG_RAX) cld #ifdef CONFIG_DEBUG_INFO SAVE_ALL @@ -425,23 +495,27 @@ ENTRY(stub_rt_sigreturn) swapgs 1: incl %gs:pda_irqcount # RED-PEN should check preempt count movq %gs:pda_irqstackptr,%rax - cmoveq %rax,%rsp + cmoveq %rax,%rsp /*todo This needs CFI annotation! */ pushq %rdi # save old stack + CFI_ADJUST_CFA_OFFSET 8 call \func .endm ENTRY(common_interrupt) + XCPT_FRAME interrupt do_IRQ /* 0(%rsp): oldrsp-ARGOFFSET */ -ret_from_intr: +ret_from_intr: popq %rdi + CFI_ADJUST_CFA_OFFSET -8 cli decl %gs:pda_irqcount #ifdef CONFIG_DEBUG_INFO movq RBP(%rdi),%rbp + CFI_DEF_CFA_REGISTER rsp #endif - leaq ARGOFFSET(%rdi),%rsp -exit_intr: + leaq ARGOFFSET(%rdi),%rsp /*todo This needs CFI annotation! */ +exit_intr: GET_THREAD_INFO(%rcx) testl $3,CS-ARGOFFSET(%rsp) je retint_kernel @@ -453,9 +527,10 @@ exit_intr: */ retint_with_reschedule: movl $_TIF_WORK_MASK,%edi -retint_check: +retint_check: movl threadinfo_flags(%rcx),%edx andl %edi,%edx + CFI_REMEMBER_STATE jnz retint_careful retint_swapgs: swapgs @@ -476,14 +551,17 @@ bad_iret: jmp do_exit .previous - /* edi: workmask, edx: work */ + /* edi: workmask, edx: work */ retint_careful: + CFI_RESTORE_STATE bt $TIF_NEED_RESCHED,%edx jnc retint_signal sti pushq %rdi + CFI_ADJUST_CFA_OFFSET 8 call schedule popq %rdi + CFI_ADJUST_CFA_OFFSET -8 GET_THREAD_INFO(%rcx) cli jmp retint_check @@ -523,7 +601,9 @@ retint_kernel: * APIC interrupts. */ .macro apicinterrupt num,func + INTR_FRAME pushq $\num-256 + CFI_ADJUST_CFA_OFFSET 8 interrupt \func jmp ret_from_intr CFI_ENDPROC @@ -536,8 +616,19 @@ ENTRY(thermal_interrupt) ENTRY(reschedule_interrupt) apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt -ENTRY(invalidate_interrupt) - apicinterrupt INVALIDATE_TLB_VECTOR,smp_invalidate_interrupt + .macro INVALIDATE_ENTRY num +ENTRY(invalidate_interrupt\num) + apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt + .endm + + INVALIDATE_ENTRY 0 + INVALIDATE_ENTRY 1 + INVALIDATE_ENTRY 2 + INVALIDATE_ENTRY 3 + INVALIDATE_ENTRY 4 + INVALIDATE_ENTRY 5 + INVALIDATE_ENTRY 6 + INVALIDATE_ENTRY 7 ENTRY(call_function_interrupt) apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt @@ -558,16 +649,23 @@ ENTRY(spurious_interrupt) * Exception entry points. */ .macro zeroentry sym + INTR_FRAME pushq $0 /* push error code/oldrax */ + CFI_ADJUST_CFA_OFFSET 8 pushq %rax /* push real oldrax to the rdi slot */ + CFI_ADJUST_CFA_OFFSET 8 leaq \sym(%rip),%rax jmp error_entry + CFI_ENDPROC .endm .macro errorentry sym + XCPT_FRAME pushq %rax + CFI_ADJUST_CFA_OFFSET 8 leaq \sym(%rip),%rax jmp error_entry + CFI_ENDPROC .endm /* error code is on the stack already */ @@ -594,10 +692,7 @@ ENTRY(spurious_interrupt) * and the exception handler in %rax. */ ENTRY(error_entry) - CFI_STARTPROC simple - CFI_DEF_CFA rsp,(SS-RDI) - CFI_REL_OFFSET rsp,(RSP-RDI) - CFI_REL_OFFSET rip,(RIP-RDI) + _frame RDI /* rdi slot contains rax, oldrax contains error code */ cld subq $14*8,%rsp @@ -679,7 +774,9 @@ error_kernelspace: /* Reload gs selector with exception handling */ /* edi: new selector */ ENTRY(load_gs_index) + CFI_STARTPROC pushf + CFI_ADJUST_CFA_OFFSET 8 cli swapgs gs_change: @@ -687,7 +784,9 @@ gs_change: 2: mfence /* workaround */ swapgs popf + CFI_ADJUST_CFA_OFFSET -8 ret + CFI_ENDPROC .section __ex_table,"a" .align 8 @@ -799,7 +898,7 @@ ENTRY(device_not_available) /* runs on exception stack */ KPROBE_ENTRY(debug) - CFI_STARTPROC + INTR_FRAME pushq $0 CFI_ADJUST_CFA_OFFSET 8 paranoidentry do_debug @@ -809,9 +908,9 @@ KPROBE_ENTRY(debug) /* runs on exception stack */ ENTRY(nmi) - CFI_STARTPROC + INTR_FRAME pushq $-1 - CFI_ADJUST_CFA_OFFSET 8 + CFI_ADJUST_CFA_OFFSET 8 paranoidentry do_nmi /* * "Paranoid" exit path from exception stack. @@ -877,7 +976,7 @@ ENTRY(reserved) /* runs on exception stack */ ENTRY(double_fault) - CFI_STARTPROC + XCPT_FRAME paranoidentry do_double_fault jmp paranoid_exit CFI_ENDPROC @@ -890,7 +989,7 @@ ENTRY(segment_not_present) /* runs on exception stack */ ENTRY(stack_segment) - CFI_STARTPROC + XCPT_FRAME paranoidentry do_stack_segment jmp paranoid_exit CFI_ENDPROC @@ -911,7 +1010,7 @@ ENTRY(spurious_interrupt_bug) #ifdef CONFIG_X86_MCE /* runs on exception stack */ ENTRY(machine_check) - CFI_STARTPROC + INTR_FRAME pushq $0 CFI_ADJUST_CFA_OFFSET 8 paranoidentry do_machine_check @@ -923,14 +1022,19 @@ ENTRY(call_debug) zeroentry do_call_debug ENTRY(call_softirq) + CFI_STARTPROC movq %gs:pda_irqstackptr,%rax pushq %r15 + CFI_ADJUST_CFA_OFFSET 8 movq %rsp,%r15 + CFI_DEF_CFA_REGISTER r15 incl %gs:pda_irqcount cmove %rax,%rsp call __do_softirq movq %r15,%rsp + CFI_DEF_CFA_REGISTER rsp decl %gs:pda_irqcount popq %r15 + CFI_ADJUST_CFA_OFFSET -8 ret - + CFI_ENDPROC diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c index b1c144f..7a64ea1 100644 --- a/arch/x86_64/kernel/genapic.c +++ b/arch/x86_64/kernel/genapic.c @@ -45,7 +45,7 @@ void __init clustered_apic_check(void) u8 clusters, max_cluster; u8 id; u8 cluster_cnt[NUM_APIC_CLUSTERS]; - int num_cpus = 0; + int max_apic = 0; #if defined(CONFIG_ACPI) /* @@ -64,14 +64,15 @@ void __init clustered_apic_check(void) id = bios_cpu_apicid[i]; if (id == BAD_APICID) continue; - num_cpus++; + if (id > max_apic) + max_apic = id; cluster_cnt[APIC_CLUSTERID(id)]++; } /* Don't use clustered mode on AMD platforms. */ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { genapic = &apic_physflat; -#ifndef CONFIG_CPU_HOTPLUG +#ifndef CONFIG_HOTPLUG_CPU /* In the CPU hotplug case we cannot use broadcast mode because that opens a race when a CPU is removed. Stay at physflat mode in this case. @@ -79,7 +80,7 @@ void __init clustered_apic_check(void) we have ACPI platform support for CPU hotplug we should detect hotplug capablity from ACPI tables and only do this when really needed. -AK */ - if (num_cpus <= 8) + if (max_apic <= 8) genapic = &apic_flat; #endif goto print; @@ -103,9 +104,14 @@ void __init clustered_apic_check(void) * (We don't use lowest priority delivery + HW APIC IRQ steering, so * can ignore the clustered logical case and go straight to physical.) */ - if (clusters <= 1 && max_cluster <= 8 && cluster_cnt[0] == max_cluster) + if (clusters <= 1 && max_cluster <= 8 && cluster_cnt[0] == max_cluster) { +#ifdef CONFIG_HOTPLUG_CPU + /* Don't use APIC shortcuts in CPU hotplug to avoid races */ + genapic = &apic_physflat; +#else genapic = &apic_flat; - else +#endif + } else genapic = &apic_cluster; print: diff --git a/arch/x86_64/kernel/genapic_cluster.c b/arch/x86_64/kernel/genapic_cluster.c index f6523dd..a472d62 100644 --- a/arch/x86_64/kernel/genapic_cluster.c +++ b/arch/x86_64/kernel/genapic_cluster.c @@ -51,10 +51,10 @@ static void cluster_init_apic_ldr(void) count = 3; id = my_cluster | (1UL << count); x86_cpu_to_log_apicid[smp_processor_id()] = id; - apic_write_around(APIC_DFR, APIC_DFR_CLUSTER); + apic_write(APIC_DFR, APIC_DFR_CLUSTER); val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; val |= SET_APIC_LOGICAL_ID(id); - apic_write_around(APIC_LDR, val); + apic_write(APIC_LDR, val); } /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c index 6d57da9..9da3edb 100644 --- a/arch/x86_64/kernel/genapic_flat.c +++ b/arch/x86_64/kernel/genapic_flat.c @@ -38,10 +38,10 @@ static void flat_init_apic_ldr(void) num = smp_processor_id(); id = 1UL << num; x86_cpu_to_log_apicid[num] = id; - apic_write_around(APIC_DFR, APIC_DFR_FLAT); + apic_write(APIC_DFR, APIC_DFR_FLAT); val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; val |= SET_APIC_LOGICAL_ID(id); - apic_write_around(APIC_LDR, val); + apic_write(APIC_LDR, val); } static void flat_send_IPI_mask(cpumask_t cpumask, int vector) @@ -62,7 +62,7 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector) * prepare target chip field */ cfg = __prepare_ICR2(mask); - apic_write_around(APIC_ICR2, cfg); + apic_write(APIC_ICR2, cfg); /* * program the ICR @@ -72,7 +72,7 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector) /* * Send the IPI. The write to APIC_ICR fires this off. */ - apic_write_around(APIC_ICR, cfg); + apic_write(APIC_ICR, cfg); local_irq_restore(flags); } @@ -177,9 +177,9 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) struct genapic apic_physflat = { .name = "physical flat", - .int_delivery_mode = dest_LowestPrio, + .int_delivery_mode = dest_Fixed, .int_dest_mode = (APIC_DEST_PHYSICAL != 0), - .int_delivery_dest = APIC_DEST_PHYSICAL | APIC_DM_LOWEST, + .int_delivery_dest = APIC_DEST_PHYSICAL | APIC_DM_FIXED, .target_cpus = physflat_target_cpus, .apic_id_registered = flat_apic_id_registered, .init_apic_ldr = flat_init_apic_ldr,/*not needed, but shouldn't hurt*/ diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S index 98ff5eb..4592bf2 100644 --- a/arch/x86_64/kernel/head.S +++ b/arch/x86_64/kernel/head.S @@ -270,7 +270,7 @@ ENTRY(level3_kernel_pgt) .org 0x4000 ENTRY(level2_ident_pgt) /* 40MB for bootup. */ - .quad 0x0000000000000283 + .quad 0x0000000000000183 .quad 0x0000000000200183 .quad 0x0000000000400183 .quad 0x0000000000600183 diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c index a891690..b2a238b 100644 --- a/arch/x86_64/kernel/i8259.c +++ b/arch/x86_64/kernel/i8259.c @@ -18,15 +18,12 @@ #include <asm/atomic.h> #include <asm/system.h> #include <asm/io.h> -#include <asm/irq.h> #include <asm/hw_irq.h> #include <asm/pgtable.h> #include <asm/delay.h> #include <asm/desc.h> #include <asm/apic.h> -#include <linux/irq.h> - /* * Common place to define all x86 IRQ vectors * @@ -486,7 +483,14 @@ void spurious_interrupt(void); void error_interrupt(void); void reschedule_interrupt(void); void call_function_interrupt(void); -void invalidate_interrupt(void); +void invalidate_interrupt0(void); +void invalidate_interrupt1(void); +void invalidate_interrupt2(void); +void invalidate_interrupt3(void); +void invalidate_interrupt4(void); +void invalidate_interrupt5(void); +void invalidate_interrupt6(void); +void invalidate_interrupt7(void); void thermal_interrupt(void); void i8254_timer_resume(void); @@ -562,8 +566,15 @@ void __init init_IRQ(void) */ set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); - /* IPI for invalidation */ - set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); + /* IPIs for invalidation */ + set_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); + set_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); + set_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); + set_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); + set_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); + set_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); + set_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); + set_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); /* IPI for generic function call */ set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); diff --git a/arch/x86_64/kernel/init_task.c b/arch/x86_64/kernel/init_task.c index c4dc914..e0ba5c1 100644 --- a/arch/x86_64/kernel/init_task.c +++ b/arch/x86_64/kernel/init_task.c @@ -44,6 +44,6 @@ EXPORT_SYMBOL(init_task); * section. Since TSS's are completely CPU-local, we want them * on exact cacheline boundaries, to eliminate cacheline ping-pong. */ -DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp; +DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp = INIT_TSS; #define ALIGN_TO_4K __attribute__((section(".data.init_task"))) diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index 5f1529b..522944a 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -21,7 +21,6 @@ */ #include <linux/mm.h> -#include <linux/irq.h> #include <linux/interrupt.h> #include <linux/init.h> #include <linux/delay.h> @@ -45,6 +44,8 @@ int sis_apic_bug; /* not actually supported, dummy for compile */ static int no_timer_check; +int disable_timer_pin_1 __initdata; + static DEFINE_SPINLOCK(ioapic_lock); /* @@ -298,6 +299,15 @@ void __init check_ioapic(void) #endif /* RED-PEN skip them on mptables too? */ return; + case PCI_VENDOR_ID_ATI: + /* All timer interrupts on atiixp + are doubled. Disable one. */ + if (disable_timer_pin_1 == 0) { + disable_timer_pin_1 = 1; + printk(KERN_INFO + "ATI board detected. Disabling timer pin 1.\n"); + } + return; } /* No multi-function device? */ @@ -1022,13 +1032,11 @@ void __apicdebuginit print_local_APIC(void * dummy) v = apic_read(APIC_TASKPRI); printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); - if (APIC_INTEGRATED(ver)) { /* !82489DX */ - v = apic_read(APIC_ARBPRI); - printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v, - v & APIC_ARBPRI_MASK); - v = apic_read(APIC_PROCPRI); - printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v); - } + v = apic_read(APIC_ARBPRI); + printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v, + v & APIC_ARBPRI_MASK); + v = apic_read(APIC_PROCPRI); + printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v); v = apic_read(APIC_EOI); printk(KERN_DEBUG "... APIC EOI: %08x\n", v); @@ -1048,12 +1056,8 @@ void __apicdebuginit print_local_APIC(void * dummy) printk(KERN_DEBUG "... APIC IRR field:\n"); print_APIC_bitfield(APIC_IRR); - if (APIC_INTEGRATED(ver)) { /* !82489DX */ - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - v = apic_read(APIC_ESR); - printk(KERN_DEBUG "... APIC ESR: %08x\n", v); - } + v = apic_read(APIC_ESR); + printk(KERN_DEBUG "... APIC ESR: %08x\n", v); v = apic_read(APIC_ICR); printk(KERN_DEBUG "... APIC ICR: %08x\n", v); @@ -1665,6 +1669,8 @@ static inline void check_timer(void) setup_nmi(); enable_8259A_irq(0); } + if (disable_timer_pin_1 > 0) + clear_IO_APIC_pin(0, pin1); return; } clear_IO_APIC_pin(0, pin1); diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c index 849a20a..d6a04a8 100644 --- a/arch/x86_64/kernel/irq.c +++ b/arch/x86_64/kernel/irq.c @@ -99,7 +99,6 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs) unsigned irq = regs->orig_rax & 0xff; irq_enter(); - BUG_ON(irq > 256); __do_IRQ(irq, regs); irq_exit(); diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index 8aa5673..969365c 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -17,6 +17,7 @@ #include <linux/fs.h> #include <linux/cpu.h> #include <linux/percpu.h> +#include <linux/ctype.h> #include <asm/processor.h> #include <asm/msr.h> #include <asm/mce.h> @@ -56,15 +57,19 @@ void mce_log(struct mce *mce) smp_wmb(); for (;;) { entry = rcu_dereference(mcelog.next); - /* When the buffer fills up discard new entries. Assume - that the earlier errors are the more interesting. */ - if (entry >= MCE_LOG_LEN) { - set_bit(MCE_OVERFLOW, &mcelog.flags); - return; + for (;;) { + /* When the buffer fills up discard new entries. Assume + that the earlier errors are the more interesting. */ + if (entry >= MCE_LOG_LEN) { + set_bit(MCE_OVERFLOW, &mcelog.flags); + return; + } + /* Old left over entry. Skip. */ + if (mcelog.entry[entry].finished) { + entry++; + continue; + } } - /* Old left over entry. Skip. */ - if (mcelog.entry[entry].finished) - continue; smp_rmb(); next = entry + 1; if (cmpxchg(&mcelog.next, entry, next) == entry) @@ -404,9 +409,15 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff } err = 0; - for (i = 0; i < next; i++) { - if (!mcelog.entry[i].finished) - continue; + for (i = 0; i < next; i++) { + unsigned long start = jiffies; + while (!mcelog.entry[i].finished) { + if (!time_before(jiffies, start + 2)) { + memset(mcelog.entry + i,0, sizeof(struct mce)); + continue; + } + cpu_relax(); + } smp_rmb(); err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce)); buf += sizeof(struct mce); @@ -479,6 +490,7 @@ static int __init mcheck_disable(char *str) /* mce=off disables machine check. Note you can reenable it later using sysfs. + mce=TOLERANCELEVEL (number, see above) mce=bootlog Log MCEs from before booting. Disabled by default to work around buggy BIOS that leave bogus MCEs. */ static int __init mcheck_enable(char *str) @@ -489,6 +501,8 @@ static int __init mcheck_enable(char *str) mce_dont_init = 1; else if (!strcmp(str, "bootlog")) mce_bootlog = 1; + else if (isdigit(str[0])) + get_option(&str, &tolerant); else printk("mce= argument %s ignored. Please use /sys", str); return 0; @@ -501,10 +515,12 @@ __setup("mce", mcheck_enable); * Sysfs support */ -/* On resume clear all MCE state. Don't want to see leftovers from the BIOS. */ +/* On resume clear all MCE state. Don't want to see leftovers from the BIOS. + Only one CPU is active at this time, the others get readded later using + CPU hotplug. */ static int mce_resume(struct sys_device *dev) { - on_each_cpu(mce_init, NULL, 1, 1); + mce_init(NULL); return 0; } diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index 8d8ed6a..f16d38d 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c @@ -14,7 +14,6 @@ */ #include <linux/mm.h> -#include <linux/irq.h> #include <linux/init.h> #include <linux/delay.h> #include <linux/config.h> @@ -46,8 +45,6 @@ int acpi_found_madt; int apic_version [MAX_APICS]; unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; -unsigned char pci_bus_to_node [256]; -EXPORT_SYMBOL(pci_bus_to_node); static int mp_current_pci_id = 0; /* I/O APIC entries */ @@ -705,7 +702,7 @@ void __init mp_register_lapic ( processor.mpc_type = MP_PROCESSOR; processor.mpc_apicid = id; - processor.mpc_apicver = 0x10; /* TBD: lapic version */ + processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR)); processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0); processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0); processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | diff --git a/arch/x86_64/kernel/msr.c b/arch/x86_64/kernel/msr.c deleted file mode 100644 index 598953a..0000000 --- a/arch/x86_64/kernel/msr.c +++ /dev/null @@ -1,279 +0,0 @@ -/* ----------------------------------------------------------------------- * - * - * Copyright 2000 H. Peter Anvin - All Rights Reserved - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, - * USA; either version 2 of the License, or (at your option) any later - * version; incorporated herein by reference. - * - * ----------------------------------------------------------------------- */ - -/* - * msr.c - * - * x86 MSR access device - * - * This device is accessed by lseek() to the appropriate register number - * and then read/write in chunks of 8 bytes. A larger size means multiple - * reads or writes of the same register. - * - * This driver uses /dev/cpu/%d/msr where %d is the minor number, and on - * an SMP box will direct the access to CPU %d. - */ - -#include <linux/module.h> -#include <linux/config.h> - -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/fcntl.h> -#include <linux/init.h> -#include <linux/poll.h> -#include <linux/smp.h> -#include <linux/smp_lock.h> -#include <linux/major.h> -#include <linux/fs.h> - -#include <asm/processor.h> -#include <asm/msr.h> -#include <asm/uaccess.h> -#include <asm/system.h> - -/* Note: "err" is handled in a funny way below. Otherwise one version - of gcc or another breaks. */ - -static inline int wrmsr_eio(u32 reg, u32 eax, u32 edx) -{ - int err; - - asm volatile ("1: wrmsr\n" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl %4,%0\n" - " jmp 2b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 8\n" " .quad 1b,3b\n" ".previous":"=&bDS" (err) - :"a"(eax), "d"(edx), "c"(reg), "i"(-EIO), "0"(0)); - - return err; -} - -static inline int rdmsr_eio(u32 reg, u32 *eax, u32 *edx) -{ - int err; - - asm volatile ("1: rdmsr\n" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl %4,%0\n" - " jmp 2b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 8\n" - " .quad 1b,3b\n" - ".previous":"=&bDS" (err), "=a"(*eax), "=d"(*edx) - :"c"(reg), "i"(-EIO), "0"(0)); - - return err; -} - -#ifdef CONFIG_SMP - -struct msr_command { - int cpu; - int err; - u32 reg; - u32 data[2]; -}; - -static void msr_smp_wrmsr(void *cmd_block) -{ - struct msr_command *cmd = (struct msr_command *)cmd_block; - - if (cmd->cpu == smp_processor_id()) - cmd->err = wrmsr_eio(cmd->reg, cmd->data[0], cmd->data[1]); -} - -static void msr_smp_rdmsr(void *cmd_block) -{ - struct msr_command *cmd = (struct msr_command *)cmd_block; - - if (cmd->cpu == smp_processor_id()) - cmd->err = rdmsr_eio(cmd->reg, &cmd->data[0], &cmd->data[1]); -} - -static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx) -{ - struct msr_command cmd; - int ret; - - preempt_disable(); - if (cpu == smp_processor_id()) { - ret = wrmsr_eio(reg, eax, edx); - } else { - cmd.cpu = cpu; - cmd.reg = reg; - cmd.data[0] = eax; - cmd.data[1] = edx; - - smp_call_function(msr_smp_wrmsr, &cmd, 1, 1); - ret = cmd.err; - } - preempt_enable(); - return ret; -} - -static inline int do_rdmsr(int cpu, u32 reg, u32 * eax, u32 * edx) -{ - struct msr_command cmd; - int ret; - - preempt_disable(); - if (cpu == smp_processor_id()) { - ret = rdmsr_eio(reg, eax, edx); - } else { - cmd.cpu = cpu; - cmd.reg = reg; - - smp_call_function(msr_smp_rdmsr, &cmd, 1, 1); - - *eax = cmd.data[0]; - *edx = cmd.data[1]; - - ret = cmd.err; - } - preempt_enable(); - return ret; -} - -#else /* ! CONFIG_SMP */ - -static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx) -{ - return wrmsr_eio(reg, eax, edx); -} - -static inline int do_rdmsr(int cpu, u32 reg, u32 *eax, u32 *edx) -{ - return rdmsr_eio(reg, eax, edx); -} - -#endif /* ! CONFIG_SMP */ - -static loff_t msr_seek(struct file *file, loff_t offset, int orig) -{ - loff_t ret = -EINVAL; - - lock_kernel(); - switch (orig) { - case 0: - file->f_pos = offset; - ret = file->f_pos; - break; - case 1: - file->f_pos += offset; - ret = file->f_pos; - } - unlock_kernel(); - return ret; -} - -static ssize_t msr_read(struct file *file, char __user * buf, - size_t count, loff_t * ppos) -{ - u32 __user *tmp = (u32 __user *) buf; - u32 data[2]; - size_t rv; - u32 reg = *ppos; - int cpu = iminor(file->f_dentry->d_inode); - int err; - - if (count % 8) - return -EINVAL; /* Invalid chunk size */ - - for (rv = 0; count; count -= 8) { - err = do_rdmsr(cpu, reg, &data[0], &data[1]); - if (err) - return err; - if (copy_to_user(tmp, &data, 8)) - return -EFAULT; - tmp += 2; - } - - return ((char __user *)tmp) - buf; -} - -static ssize_t msr_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - const u32 __user *tmp = (const u32 __user *)buf; - u32 data[2]; - size_t rv; - u32 reg = *ppos; - int cpu = iminor(file->f_dentry->d_inode); - int err; - - if (count % 8) - return -EINVAL; /* Invalid chunk size */ - - for (rv = 0; count; count -= 8) { - if (copy_from_user(&data, tmp, 8)) - return -EFAULT; - err = do_wrmsr(cpu, reg, data[0], data[1]); - if (err) - return err; - tmp += 2; - } - - return ((char __user *)tmp) - buf; -} - -static int msr_open(struct inode *inode, struct file *file) -{ - unsigned int cpu = iminor(file->f_dentry->d_inode); - struct cpuinfo_x86 *c = &(cpu_data)[cpu]; - - if (cpu >= NR_CPUS || !cpu_online(cpu)) - return -ENXIO; /* No such CPU */ - if (!cpu_has(c, X86_FEATURE_MSR)) - return -EIO; /* MSR not supported */ - - return 0; -} - -/* - * File operations we support - */ -static struct file_operations msr_fops = { - .owner = THIS_MODULE, - .llseek = msr_seek, - .read = msr_read, - .write = msr_write, - .open = msr_open, -}; - -static int __init msr_init(void) -{ - if (register_chrdev(MSR_MAJOR, "cpu/msr", &msr_fops)) { - printk(KERN_ERR "msr: unable to get major %d for msr\n", - MSR_MAJOR); - return -EBUSY; - } - - return 0; -} - -static void __exit msr_exit(void) -{ - unregister_chrdev(MSR_MAJOR, "cpu/msr"); -} - -module_init(msr_init); -module_exit(msr_exit) - -MODULE_AUTHOR("H. Peter Anvin <hpa@zytor.com>"); -MODULE_DESCRIPTION("x86 generic MSR driver"); -MODULE_LICENSE("GPL"); diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index caf1649..4388b8a5 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -14,7 +14,6 @@ #include <linux/config.h> #include <linux/mm.h> -#include <linux/irq.h> #include <linux/delay.h> #include <linux/bootmem.h> #include <linux/smp_lock.h> @@ -488,8 +487,8 @@ void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) == NOTIFY_STOP) { local_set(&__get_cpu_var(alert_counter), 0); return; - } - die_nmi("NMI Watchdog detected LOCKUP on CPU%d", regs); + } + die_nmi("NMI Watchdog detected LOCKUP on CPU %d\n", regs); } } else { __get_cpu_var(last_irq_sum) = sum; diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c index 57f35c6..cf0a031 100644 --- a/arch/x86_64/kernel/pci-gart.c +++ b/arch/x86_64/kernel/pci-gart.c @@ -191,11 +191,9 @@ static void *dma_alloc_pages(struct device *dev, unsigned gfp, unsigned order) { struct page *page; int node; - if (dev->bus == &pci_bus_type) { - cpumask_t mask; - mask = pcibus_to_cpumask(to_pci_dev(dev)->bus); - node = cpu_to_node(first_cpu(mask)); - } else + if (dev->bus == &pci_bus_type) + node = pcibus_to_node(to_pci_dev(dev)->bus); + else node = numa_node_id(); page = alloc_pages_node(node, gfp, order); return page ? page_address(page) : NULL; diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 8661f82..b5a89c0 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -32,7 +32,6 @@ #include <linux/a.out.h> #include <linux/interrupt.h> #include <linux/delay.h> -#include <linux/irq.h> #include <linux/ptrace.h> #include <linux/utsname.h> #include <linux/random.h> @@ -123,6 +122,7 @@ static void poll_idle (void) : : "i" (_TIF_NEED_RESCHED), "m" (current_thread_info()->flags)); + clear_thread_flag(TIF_POLLING_NRFLAG); } else { set_need_resched(); } @@ -271,8 +271,11 @@ void __show_regs(struct pt_regs * regs) printk("\n"); print_modules(); - printk("Pid: %d, comm: %.20s %s %s\n", - current->pid, current->comm, print_tainted(), system_utsname.release); + printk("Pid: %d, comm: %.20s %s %s %.*s\n", + current->pid, current->comm, print_tainted(), + system_utsname.release, + (int)strcspn(system_utsname.version, " "), + system_utsname.version); printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip); printk_address(regs->rip); printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags); @@ -483,33 +486,6 @@ out: } /* - * This function selects if the context switch from prev to next - * has to tweak the TSC disable bit in the cr4. - */ -static inline void disable_tsc(struct task_struct *prev_p, - struct task_struct *next_p) -{ - struct thread_info *prev, *next; - - /* - * gcc should eliminate the ->thread_info dereference if - * has_secure_computing returns 0 at compile time (SECCOMP=n). - */ - prev = prev_p->thread_info; - next = next_p->thread_info; - - if (has_secure_computing(prev) || has_secure_computing(next)) { - /* slow path here */ - if (has_secure_computing(prev) && - !has_secure_computing(next)) { - write_cr4(read_cr4() & ~X86_CR4_TSD); - } else if (!has_secure_computing(prev) && - has_secure_computing(next)) - write_cr4(read_cr4() | X86_CR4_TSD); - } -} - -/* * This special macro can be used to load a debugging register */ #define loaddebug(thread,r) set_debug(thread->debugreg ## r, r) @@ -627,8 +603,6 @@ struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct * } } - disable_tsc(prev_p, next_p); - return prev_p; } diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 9aec524..351d8d6 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -336,6 +336,11 @@ static __init void parse_cmdline_early (char ** cmdline_p) #endif #endif + if (!memcmp(from, "disable_timer_pin_1", 19)) + disable_timer_pin_1 = 1; + if (!memcmp(from, "enable_timer_pin_1", 18)) + disable_timer_pin_1 = -1; + if (!memcmp(from, "nolapic", 7) || !memcmp(from, "disableapic", 11)) disable_apic = 1; @@ -755,6 +760,24 @@ static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) } } +#ifdef CONFIG_NUMA +static int nearby_node(int apicid) +{ + int i; + for (i = apicid - 1; i >= 0; i--) { + int node = apicid_to_node[i]; + if (node != NUMA_NO_NODE && node_online(node)) + return node; + } + for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { + int node = apicid_to_node[i]; + if (node != NUMA_NO_NODE && node_online(node)) + return node; + } + return first_node(node_online_map); /* Shouldn't happen */ +} +#endif + /* * On a AMD dual core setup the lower bits of the APIC id distingush the cores. * Assumes number of cores is a power of two. @@ -763,8 +786,11 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP int cpu = smp_processor_id(); - int node = 0; unsigned bits; +#ifdef CONFIG_NUMA + int node = 0; + unsigned apicid = phys_proc_id[cpu]; +#endif bits = 0; while ((1 << bits) < c->x86_num_cores) @@ -776,20 +802,32 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) phys_proc_id[cpu] >>= bits; #ifdef CONFIG_NUMA - /* When an ACPI SRAT table is available use the mappings from SRAT - instead. */ - if (acpi_numa <= 0) { - node = phys_proc_id[cpu]; - if (!node_online(node)) - node = first_node(node_online_map); - cpu_to_node[cpu] = node; - } else { - node = cpu_to_node[cpu]; - } + node = phys_proc_id[cpu]; + if (apicid_to_node[apicid] != NUMA_NO_NODE) + node = apicid_to_node[apicid]; + if (!node_online(node)) { + /* Two possibilities here: + - The CPU is missing memory and no node was created. + In that case try picking one from a nearby CPU + - The APIC IDs differ from the HyperTransport node IDs + which the K8 northbridge parsing fills in. + Assume they are all increased by a constant offset, + but in the same order as the HT nodeids. + If that doesn't result in a usable node fall back to the + path for the previous case. */ + int ht_nodeid = apicid - (phys_proc_id[0] << bits); + if (ht_nodeid >= 0 && + apicid_to_node[ht_nodeid] != NUMA_NO_NODE) + node = apicid_to_node[ht_nodeid]; + /* Pick a nearby node */ + if (!node_online(node)) + node = nearby_node(apicid); + } + cpu_to_node[cpu] = node; + + printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n", + cpu, c->x86_num_cores, node, cpu_core_id[cpu]); #endif - - printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n", - cpu, c->x86_num_cores, node, cpu_core_id[cpu]); #endif } @@ -909,6 +947,25 @@ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) return 1; } +static void srat_detect_node(void) +{ +#ifdef CONFIG_NUMA + unsigned apicid, node; + int cpu = smp_processor_id(); + + /* Don't do the funky fallback heuristics the AMD version employs + for now. */ + apicid = phys_proc_id[cpu]; + node = apicid_to_node[apicid]; + if (node == NUMA_NO_NODE) + node = 0; + cpu_to_node[cpu] = node; + + if (acpi_numa > 0) + printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node); +#endif +} + static void __cpuinit init_intel(struct cpuinfo_x86 *c) { /* Cache sizes */ @@ -927,6 +984,8 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (c->x86 >= 15) set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); c->x86_num_cores = intel_num_cpu_cores(c); + + srat_detect_node(); } static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index e3ffcac..bd33be2 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c @@ -94,7 +94,7 @@ void __init setup_per_cpu_areas(void) size = PERCPU_ENOUGH_ROOM; #endif - for (i = 0; i < NR_CPUS; i++) { + for_each_cpu_mask (i, cpu_possible_map) { char *ptr; if (!NODE_DATA(cpu_to_node(i))) { @@ -119,7 +119,6 @@ void pda_init(int cpu) asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0)); wrmsrl(MSR_GS_BASE, cpu_pda + cpu); - pda->me = pda; pda->cpunumber = cpu; pda->irqcount = -1; pda->kernelstack = diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c index e595822..9db9dda 100644 --- a/arch/x86_64/kernel/smp.c +++ b/arch/x86_64/kernel/smp.c @@ -12,7 +12,6 @@ #include <linux/init.h> #include <linux/mm.h> -#include <linux/irq.h> #include <linux/delay.h> #include <linux/spinlock.h> #include <linux/smp_lock.h> @@ -29,6 +28,8 @@ #include <asm/proto.h> #include <asm/apicdef.h> +#define __cpuinit __init + /* * Smarter SMP flushing macros. * c/o Linus Torvalds. @@ -37,19 +38,41 @@ * writing to user space from interrupts. (Its not allowed anyway). * * Optimizations Manfred Spraul <manfred@colorfullife.com> + * + * More scalable flush, from Andi Kleen + * + * To avoid global state use 8 different call vectors. + * Each CPU uses a specific vector to trigger flushes on other + * CPUs. Depending on the received vector the target CPUs look into + * the right per cpu variable for the flush data. + * + * With more than 8 CPUs they are hashed to the 8 available + * vectors. The limited global vector space forces us to this right now. + * In future when interrupts are split into per CPU domains this could be + * fixed, at the cost of triggering multiple IPIs in some cases. */ -static cpumask_t flush_cpumask; -static struct mm_struct * flush_mm; -static unsigned long flush_va; -static DEFINE_SPINLOCK(tlbstate_lock); +union smp_flush_state { + struct { + cpumask_t flush_cpumask; + struct mm_struct *flush_mm; + unsigned long flush_va; #define FLUSH_ALL -1ULL + spinlock_t tlbstate_lock; + }; + char pad[SMP_CACHE_BYTES]; +} ____cacheline_aligned; + +/* State is put into the per CPU data section, but padded + to a full cache line because other CPUs can access it and we don't + want false sharing in the per cpu data segment. */ +static DEFINE_PER_CPU(union smp_flush_state, flush_state); /* * We cannot call mmdrop() because we are in interrupt context, * instead update mm->cpu_vm_mask. */ -static inline void leave_mm (unsigned long cpu) +static inline void leave_mm(int cpu) { if (read_pda(mmu_state) == TLBSTATE_OK) BUG(); @@ -101,15 +124,25 @@ static inline void leave_mm (unsigned long cpu) * * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. * 2) Leave the mm if we are in the lazy tlb mode. + * + * Interrupts are disabled. */ -asmlinkage void smp_invalidate_interrupt (void) +asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) { - unsigned long cpu; + int cpu; + int sender; + union smp_flush_state *f; - cpu = get_cpu(); + cpu = smp_processor_id(); + /* + * orig_rax contains the interrupt vector - 256. + * Use that to determine where the sender put the data. + */ + sender = regs->orig_rax + 256 - INVALIDATE_TLB_VECTOR_START; + f = &per_cpu(flush_state, sender); - if (!cpu_isset(cpu, flush_cpumask)) + if (!cpu_isset(cpu, f->flush_cpumask)) goto out; /* * This was a BUG() but until someone can quote me the @@ -120,64 +153,63 @@ asmlinkage void smp_invalidate_interrupt (void) * BUG(); */ - if (flush_mm == read_pda(active_mm)) { + if (f->flush_mm == read_pda(active_mm)) { if (read_pda(mmu_state) == TLBSTATE_OK) { - if (flush_va == FLUSH_ALL) + if (f->flush_va == FLUSH_ALL) local_flush_tlb(); else - __flush_tlb_one(flush_va); + __flush_tlb_one(f->flush_va); } else leave_mm(cpu); } out: ack_APIC_irq(); - cpu_clear(cpu, flush_cpumask); - put_cpu_no_resched(); + cpu_clear(cpu, f->flush_cpumask); } static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, unsigned long va) { - cpumask_t tmp; - /* - * A couple of (to be removed) sanity checks: - * - * - we do not send IPIs to not-yet booted CPUs. - * - current CPU must not be in mask - * - mask must exist :) - */ - BUG_ON(cpus_empty(cpumask)); - cpus_and(tmp, cpumask, cpu_online_map); - BUG_ON(!cpus_equal(tmp, cpumask)); - BUG_ON(cpu_isset(smp_processor_id(), cpumask)); - if (!mm) - BUG(); + int sender; + union smp_flush_state *f; - /* - * I'm not happy about this global shared spinlock in the - * MM hot path, but we'll see how contended it is. - * Temporarily this turns IRQs off, so that lockups are - * detected by the NMI watchdog. - */ - spin_lock(&tlbstate_lock); - - flush_mm = mm; - flush_va = va; - cpus_or(flush_cpumask, cpumask, flush_cpumask); + /* Caller has disabled preemption */ + sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; + f = &per_cpu(flush_state, sender); + + /* Could avoid this lock when + num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is + probably not worth checking this for a cache-hot lock. */ + spin_lock(&f->tlbstate_lock); + + f->flush_mm = mm; + f->flush_va = va; + cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask); /* * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); + send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender); + + while (!cpus_empty(f->flush_cpumask)) + cpu_relax(); - while (!cpus_empty(flush_cpumask)) - mb(); /* nothing. lockup detection does not belong here */; + f->flush_mm = NULL; + f->flush_va = 0; + spin_unlock(&f->tlbstate_lock); +} - flush_mm = NULL; - flush_va = 0; - spin_unlock(&tlbstate_lock); +int __cpuinit init_smp_flush(void) +{ + int i; + for_each_cpu_mask(i, cpu_possible_map) { + spin_lock_init(&per_cpu(flush_state.tlbstate_lock, i)); + } + return 0; } + +core_initcall(init_smp_flush); void flush_tlb_current_task(void) { @@ -295,8 +327,11 @@ void unlock_ipi_call_lock(void) /* * this function sends a 'generic call function' IPI to one other CPU * in the system. + * + * cpu is a standard Linux logical CPU number. */ -static void __smp_call_function_single (int cpu, void (*func) (void *info), void *info, +static void +__smp_call_function_single(int cpu, void (*func) (void *info), void *info, int nonatomic, int wait) { struct call_data_struct data; diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 4fb34b5..e12d7ba 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -44,7 +44,6 @@ #include <linux/mm.h> #include <linux/kernel_stat.h> #include <linux/smp_lock.h> -#include <linux/irq.h> #include <linux/bootmem.h> #include <linux/thread_info.h> #include <linux/module.h> @@ -58,6 +57,8 @@ #include <asm/tlbflush.h> #include <asm/proto.h> #include <asm/nmi.h> +#include <asm/irq.h> +#include <asm/hw_irq.h> /* Number of siblings per CPU package */ int smp_num_siblings = 1; @@ -413,8 +414,13 @@ void __cpuinit smp_callin(void) /* * Get our bogomips. + * + * Need to enable IRQs because it can take longer and then + * the NMI watchdog might kill us. */ + local_irq_enable(); calibrate_delay(); + local_irq_disable(); Dprintk("Stack at about %p\n",&cpuid); disable_APIC_timer(); @@ -540,8 +546,8 @@ static void inquire_remote_apic(int apicid) */ apic_wait_icr_idle(); - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); - apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); + apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); + apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]); timeout = 0; do { @@ -574,12 +580,12 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta /* * Turn INIT on target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); + apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); /* * Send IPI */ - apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT + apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT); Dprintk("Waiting for send to finish...\n"); @@ -595,10 +601,10 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta Dprintk("Deasserting INIT.\n"); /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); + apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); /* Send IPI */ - apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); + apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); Dprintk("Waiting for send to finish...\n"); timeout = 0; @@ -610,16 +616,7 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta atomic_set(&init_deasserted, 1); - /* - * Should we send STARTUP IPIs ? - * - * Determine this based on the APIC version. - * If we don't have an integrated APIC, don't send the STARTUP IPIs. - */ - if (APIC_INTEGRATED(apic_version[phys_apicid])) - num_starts = 2; - else - num_starts = 0; + num_starts = 2; /* * Run STARTUP IPI loop. @@ -640,12 +637,11 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta */ /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); + apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); /* Boot on the stack */ /* Kick the second */ - apic_write_around(APIC_ICR, APIC_DM_STARTUP - | (start_rip >> 12)); + apic_write(APIC_ICR, APIC_DM_STARTUP | (start_rip >> 12)); /* * Give the other CPU some time to accept the IPI. @@ -864,17 +860,6 @@ static __cpuinit void smp_cleanup_boot(void) * Reset trampoline flag */ *((volatile int *) phys_to_virt(0x467)) = 0; - -#ifndef CONFIG_HOTPLUG_CPU - /* - * Free pages reserved for SMP bootup. - * When you add hotplug CPU support later remove this - * Note there is more work to be done for later CPU bootup. - */ - - free_page((unsigned long) __va(PAGE_SIZE)); - free_page((unsigned long) __va(SMP_TRAMPOLINE_BASE)); -#endif } /* diff --git a/arch/x86_64/kernel/suspend.c b/arch/x86_64/kernel/suspend.c index 0612640..ebb9abf 100644 --- a/arch/x86_64/kernel/suspend.c +++ b/arch/x86_64/kernel/suspend.c @@ -8,23 +8,8 @@ */ #include <linux/config.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/types.h> -#include <linux/spinlock.h> -#include <linux/poll.h> -#include <linux/delay.h> -#include <linux/sysrq.h> -#include <linux/proc_fs.h> -#include <linux/irq.h> -#include <linux/pm.h> -#include <linux/device.h> +#include <linux/smp.h> #include <linux/suspend.h> -#include <asm/uaccess.h> -#include <asm/acpi.h> -#include <asm/tlbflush.h> -#include <asm/io.h> #include <asm/proto.h> struct saved_context saved_context; diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 7b6abe0..2373cb8 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -18,7 +18,6 @@ #include <linux/interrupt.h> #include <linux/init.h> #include <linux/mc146818rtc.h> -#include <linux/irq.h> #include <linux/time.h> #include <linux/ioport.h> #include <linux/module.h> @@ -937,7 +936,6 @@ void __init time_init(void) vxtime.mode = VXTIME_TSC; vxtime.quot = (1000000L << 32) / vxtime_hz; vxtime.tsc_quot = (1000L << 32) / cpu_khz; - vxtime.hz = vxtime_hz; rdtscll_sync(&vxtime.last_tsc); setup_irq(0, &irq0); diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index f238d60..b5e09e6 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -47,9 +47,6 @@ #include <asm/proto.h> #include <asm/nmi.h> -#include <linux/irq.h> - - extern struct gate_struct idt_table[256]; asmlinkage void divide_error(void); @@ -324,13 +321,13 @@ void handle_BUG(struct pt_regs *regs) if (__copy_from_user(&f, (struct bug_frame *) regs->rip, sizeof(struct bug_frame))) return; - if ((unsigned long)f.filename < __PAGE_OFFSET || + if (f.filename >= 0 || f.ud2[0] != 0x0f || f.ud2[1] != 0x0b) return; - if (__get_user(tmp, f.filename)) - f.filename = "unmapped filename"; + if (__get_user(tmp, (char *)(long)f.filename)) + f.filename = (int)(long)"unmapped filename"; printk("----------- [cut here ] --------- [please bite here ] ---------\n"); - printk(KERN_ALERT "Kernel BUG at %.50s:%d\n", f.filename, f.line); + printk(KERN_ALERT "Kernel BUG at %.50s:%d\n", (char *)(long)f.filename, f.line); } #ifdef CONFIG_BUG @@ -343,30 +340,33 @@ void out_of_line_bug(void) static DEFINE_SPINLOCK(die_lock); static int die_owner = -1; -void oops_begin(void) +unsigned long oops_begin(void) { - int cpu = safe_smp_processor_id(); - /* racy, but better than risking deadlock. */ - local_irq_disable(); + int cpu = safe_smp_processor_id(); + unsigned long flags; + + /* racy, but better than risking deadlock. */ + local_irq_save(flags); if (!spin_trylock(&die_lock)) { if (cpu == die_owner) /* nested oops. should stop eventually */; else - spin_lock(&die_lock); + spin_lock(&die_lock); } - die_owner = cpu; + die_owner = cpu; console_verbose(); - bust_spinlocks(1); + bust_spinlocks(1); + return flags; } -void oops_end(void) +void oops_end(unsigned long flags) { die_owner = -1; - bust_spinlocks(0); - spin_unlock(&die_lock); + bust_spinlocks(0); + spin_unlock_irqrestore(&die_lock, flags); if (panic_on_oops) - panic("Oops"); -} + panic("Oops"); +} void __die(const char * str, struct pt_regs * regs, long err) { @@ -392,10 +392,11 @@ void __die(const char * str, struct pt_regs * regs, long err) void die(const char * str, struct pt_regs * regs, long err) { - oops_begin(); + unsigned long flags = oops_begin(); + handle_BUG(regs); __die(str, regs, err); - oops_end(); + oops_end(flags); do_exit(SIGSEGV); } static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err) @@ -406,7 +407,8 @@ static inline void die_if_kernel(const char * str, struct pt_regs * regs, long e void die_nmi(char *str, struct pt_regs *regs) { - oops_begin(); + unsigned long flags = oops_begin(); + /* * We are in trouble anyway, lets at least try * to get a message out. @@ -416,7 +418,7 @@ void die_nmi(char *str, struct pt_regs *regs) if (panic_on_timeout || panic_on_oops) panic("nmi watchdog"); printk("console shuts up ...\n"); - oops_end(); + oops_end(flags); do_exit(SIGSEGV); } @@ -790,13 +792,16 @@ asmlinkage void do_coprocessor_error(struct pt_regs *regs) */ cwd = get_fpu_cwd(task); swd = get_fpu_swd(task); - switch (((~cwd) & swd & 0x3f) | (swd & 0x240)) { + switch (swd & ~cwd & 0x3f) { case 0x000: default: break; case 0x001: /* Invalid Op */ - case 0x041: /* Stack Fault */ - case 0x241: /* Stack Fault | Direction */ + /* + * swd & 0x240 == 0x040: Stack Underflow + * swd & 0x240 == 0x240: Stack Overflow + * User must clear the SF bit (0x40) if set + */ info.si_code = FPE_FLTINV; break; case 0x002: /* Denormalize */ diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index 2e57344..70a0bd1 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c @@ -107,7 +107,7 @@ static force_inline long time_syscall(long *t) return secs; } -static int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) +int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) { if (unlikely(!__sysctl_vsyscall)) return gettimeofday(tv,tz); @@ -120,7 +120,7 @@ static int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz /* This will break when the xtime seconds get inaccurate, but that is * unlikely */ -static time_t __vsyscall(1) vtime(time_t *t) +time_t __vsyscall(1) vtime(time_t *t) { if (unlikely(!__sysctl_vsyscall)) return time_syscall(t); @@ -129,12 +129,12 @@ static time_t __vsyscall(1) vtime(time_t *t) return __xtime.tv_sec; } -static long __vsyscall(2) venosys_0(void) +long __vsyscall(2) venosys_0(void) { return -ENOSYS; } -static long __vsyscall(3) venosys_1(void) +long __vsyscall(3) venosys_1(void) { return -ENOSYS; } diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 816732d..b75b872 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -221,12 +221,13 @@ int unhandled_signal(struct task_struct *tsk, int sig) static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, unsigned long error_code) { - oops_begin(); + unsigned long flags = oops_begin(); + printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", current->comm, address); dump_pagetable(address); __die("Bad pagetable", regs, error_code); - oops_end(); + oops_end(flags); do_exit(SIGKILL); } @@ -304,6 +305,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, unsigned long address; const struct exception_table_entry *fixup; int write; + unsigned long flags; siginfo_t info; #ifdef CONFIG_CHECKING @@ -521,7 +523,7 @@ no_context: * terminate things with extreme prejudice. */ - oops_begin(); + flags = oops_begin(); if (address < PAGE_SIZE) printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); @@ -534,7 +536,7 @@ no_context: __die("Oops", regs, error_code); /* Executive summary in case the body of the oops scrolled away */ printk(KERN_EMERG "CR2: %016lx\n", address); - oops_end(); + oops_end(flags); do_exit(SIGKILL); /* diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index aa4a518..e60a1a8 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -57,31 +57,31 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); void show_mem(void) { - int i, total = 0, reserved = 0; - int shared = 0, cached = 0; + long i, total = 0, reserved = 0; + long shared = 0, cached = 0; pg_data_t *pgdat; struct page *page; - printk("Mem-info:\n"); + printk(KERN_INFO "Mem-info:\n"); show_free_areas(); - printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); + printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { for (i = 0; i < pgdat->node_spanned_pages; ++i) { page = pfn_to_page(pgdat->node_start_pfn + i); total++; - if (PageReserved(page)) - reserved++; - else if (PageSwapCache(page)) - cached++; - else if (page_count(page)) - shared += page_count(page) - 1; + if (PageReserved(page)) + reserved++; + else if (PageSwapCache(page)) + cached++; + else if (page_count(page)) + shared += page_count(page) - 1; } } - printk("%d pages of RAM\n", total); - printk("%d reserved pages\n",reserved); - printk("%d pages shared\n",shared); - printk("%d pages swap cached\n",cached); + printk(KERN_INFO "%lu pages of RAM\n", total); + printk(KERN_INFO "%lu reserved pages\n",reserved); + printk(KERN_INFO "%lu pages shared\n",shared); + printk(KERN_INFO "%lu pages swap cached\n",cached); } /* References to section boundaries */ @@ -381,41 +381,14 @@ void __init clear_kernel_mapping(unsigned long address, unsigned long size) __flush_tlb_all(); } -static inline int page_is_ram (unsigned long pagenr) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - unsigned long addr, end; - - if (e820.map[i].type != E820_RAM) /* not usable memory */ - continue; - /* - * !!!FIXME!!! Some BIOSen report areas as RAM that - * are not. Notably the 640->1Mb area. We need a sanity - * check here. - */ - addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT; - end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT; - if ((pagenr >= addr) && (pagenr < end)) - return 1; - } - return 0; -} - -extern int swiotlb_force; - static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, kcore_vsyscall; void __init mem_init(void) { - int codesize, reservedpages, datasize, initsize; - int tmp; + long codesize, reservedpages, datasize, initsize; #ifdef CONFIG_SWIOTLB - if (swiotlb_force) - swiotlb = 1; if (!iommu_aperture && (end_pfn >= 0xffffffff>>PAGE_SHIFT || force_iommu)) swiotlb = 1; @@ -436,25 +409,11 @@ void __init mem_init(void) /* this will put all low memory onto the freelists */ #ifdef CONFIG_NUMA - totalram_pages += numa_free_all_bootmem(); - tmp = 0; - /* should count reserved pages here for all nodes */ + totalram_pages = numa_free_all_bootmem(); #else - -#ifdef CONFIG_FLATMEM - max_mapnr = end_pfn; - if (!mem_map) BUG(); -#endif - - totalram_pages += free_all_bootmem(); - - for (tmp = 0; tmp < end_pfn; tmp++) - /* - * Only count reserved RAM pages - */ - if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) - reservedpages++; + totalram_pages = free_all_bootmem(); #endif + reservedpages = end_pfn - totalram_pages - e820_hole_size(0, end_pfn); after_bootmem = 1; @@ -471,7 +430,7 @@ void __init mem_init(void) kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START, VSYSCALL_END - VSYSCALL_START); - printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init)\n", + printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n", (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), end_pfn << (PAGE_SHIFT-10), codesize >> 10, diff --git a/arch/x86_64/mm/k8topology.c b/arch/x86_64/mm/k8topology.c index ec35747..65417b0 100644 --- a/arch/x86_64/mm/k8topology.c +++ b/arch/x86_64/mm/k8topology.c @@ -45,10 +45,12 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) unsigned long prevbase; struct node nodes[8]; int nodeid, i, nb; + unsigned char nodeids[8]; int found = 0; u32 reg; unsigned numnodes; nodemask_t nodes_parsed; + unsigned dualcore = 0; nodes_clear(nodes_parsed); @@ -67,11 +69,15 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) prevbase = 0; for (i = 0; i < 8; i++) { unsigned long base,limit; - + u32 nodeid; + + /* Undefined before E stepping, but hopefully 0 */ + dualcore |= ((read_pci_config(0, nb, 3, 0xe8) >> 12) & 3) == 1; base = read_pci_config(0, nb, 1, 0x40 + i*8); limit = read_pci_config(0, nb, 1, 0x44 + i*8); nodeid = limit & 7; + nodeids[i] = nodeid; if ((base & 3) == 0) { if (i < numnodes) printk("Skipping disabled node %d\n", i); @@ -157,8 +163,9 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) for (i = 0; i < 8; i++) { if (nodes[i].start != nodes[i].end) { - /* assume 1:1 NODE:CPU */ - cpu_to_node[i] = i; + nodeid = nodeids[i]; + apicid_to_node[nodeid << dualcore] = i; + apicid_to_node[(nodeid << dualcore) + dualcore] = i; setup_node_bootmem(i, nodes[i].start, nodes[i].end); } } diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index 04f7a33..80a49d9 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c @@ -28,8 +28,13 @@ bootmem_data_t plat_node_bdata[MAX_NUMNODES]; int memnode_shift; u8 memnodemap[NODEMAPSIZE]; -unsigned char cpu_to_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = NUMA_NO_NODE }; -cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly; +unsigned char cpu_to_node[NR_CPUS] __read_mostly = { + [0 ... NR_CPUS-1] = NUMA_NO_NODE +}; +unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { + [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE +}; +cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly; int numa_off __initdata; diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c index 8e3d097..4b2e844 100644 --- a/arch/x86_64/mm/srat.c +++ b/arch/x86_64/mm/srat.c @@ -20,14 +20,20 @@ static struct acpi_table_slit *acpi_slit; -/* Internal processor count */ -static unsigned int __initdata num_processors = 0; - static nodemask_t nodes_parsed __initdata; static nodemask_t nodes_found __initdata; static struct node nodes[MAX_NUMNODES] __initdata; static __u8 pxm2node[256] = { [0 ... 255] = 0xff }; +static int node_to_pxm(int n); + +int pxm_to_node(int pxm) +{ + if ((unsigned)pxm >= 256) + return 0; + return pxm2node[pxm]; +} + static __init int setup_node(int pxm) { unsigned node = pxm2node[pxm]; @@ -44,14 +50,14 @@ static __init int setup_node(int pxm) static __init int conflicting_nodes(unsigned long start, unsigned long end) { int i; - for_each_online_node(i) { + for_each_node_mask(i, nodes_parsed) { struct node *nd = &nodes[i]; if (nd->start == nd->end) continue; if (nd->end > start && nd->start < end) - return 1; + return i; if (nd->end == end && nd->start == start) - return 1; + return i; } return -1; } @@ -75,8 +81,11 @@ static __init void cutoff_node(int i, unsigned long start, unsigned long end) static __init void bad_srat(void) { + int i; printk(KERN_ERR "SRAT: SRAT not used.\n"); acpi_numa = -1; + for (i = 0; i < MAX_LOCAL_APIC; i++) + apicid_to_node[i] = NUMA_NO_NODE; } static __init inline int srat_disabled(void) @@ -104,18 +113,10 @@ acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa) bad_srat(); return; } - if (num_processors >= NR_CPUS) { - printk(KERN_ERR "SRAT: Processor #%d (lapic %u) INVALID. (Max ID: %d).\n", - num_processors, pa->apic_id, NR_CPUS); - bad_srat(); - return; - } - cpu_to_node[num_processors] = node; + apicid_to_node[pa->apic_id] = node; acpi_numa = 1; - printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> CPU %u -> Node %u\n", - pxm, pa->apic_id, num_processors, node); - - num_processors++; + printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", + pxm, pa->apic_id, node); } /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ @@ -143,10 +144,15 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma) printk(KERN_INFO "SRAT: hot plug zone found %lx - %lx \n", start, end); i = conflicting_nodes(start, end); - if (i >= 0) { + if (i == node) { + printk(KERN_WARNING + "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n", + pxm, start, end, nodes[i].start, nodes[i].end); + } else if (i >= 0) { printk(KERN_ERR - "SRAT: pxm %d overlap %lx-%lx with node %d(%Lx-%Lx)\n", - pxm, start, end, i, nodes[i].start, nodes[i].end); + "SRAT: PXM %d (%lx-%lx) overlaps with PXM %d (%Lx-%Lx)\n", + pxm, start, end, node_to_pxm(i), + nodes[i].start, nodes[i].end); bad_srat(); return; } @@ -174,6 +180,14 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) int i; if (acpi_numa <= 0) return -1; + + /* First clean up the node list */ + for_each_node_mask(i, nodes_parsed) { + cutoff_node(i, start, end); + if (nodes[i].start == nodes[i].end) + node_clear(i, nodes_parsed); + } + memnode_shift = compute_hash_shift(nodes, nodes_weight(nodes_parsed)); if (memnode_shift < 0) { printk(KERN_ERR @@ -181,16 +195,10 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) bad_srat(); return -1; } - for (i = 0; i < MAX_NUMNODES; i++) { - if (!node_isset(i, nodes_parsed)) - continue; - cutoff_node(i, start, end); - if (nodes[i].start == nodes[i].end) { - node_clear(i, nodes_parsed); - continue; - } + + /* Finally register nodes */ + for_each_node_mask(i, nodes_parsed) setup_node_bootmem(i, nodes[i].start, nodes[i].end); - } for (i = 0; i < NR_CPUS; i++) { if (cpu_to_node[i] == NUMA_NO_NODE) continue; @@ -201,7 +209,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) return 0; } -int node_to_pxm(int n) +static int node_to_pxm(int n) { int i; if (pxm2node[n] == n) diff --git a/arch/x86_64/pci/k8-bus.c b/arch/x86_64/pci/k8-bus.c index d80c323..3acf60d 100644 --- a/arch/x86_64/pci/k8-bus.c +++ b/arch/x86_64/pci/k8-bus.c @@ -58,10 +58,16 @@ fill_mp_bus_to_cpumask(void) for (j = SECONDARY_LDT_BUS_NUMBER(ldtbus); j <= SUBORDINATE_LDT_BUS_NUMBER(ldtbus); j++) { - int node = NODE_ID(nid); + struct pci_bus *bus; + long node = NODE_ID(nid); + /* Algorithm a bit dumb, but + it shouldn't matter here */ + bus = pci_find_bus(0, j); + if (!bus) + continue; if (!node_online(node)) node = 0; - pci_bus_to_node[j] = node; + bus->sysdata = (void *)node; } } } diff --git a/arch/x86_64/pci/mmconfig.c b/arch/x86_64/pci/mmconfig.c index 657e88a..a0838c4 100644 --- a/arch/x86_64/pci/mmconfig.c +++ b/arch/x86_64/pci/mmconfig.c @@ -111,13 +111,6 @@ static int __init pci_mmcfg_init(void) (pci_mmcfg_config[0].base_address == 0)) return 0; - /* Kludge for now. Don't use mmconfig on AMD systems because - those have some busses where mmconfig doesn't work, - and we don't parse ACPI MCFG well enough to handle that. - Remove when proper handling is added. */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) - return 0; - /* RED-PEN i386 doesn't do _nocache right now */ pci_mmcfg_virt = kmalloc(sizeof(*pci_mmcfg_virt) * pci_mmcfg_config_num, GFP_KERNEL); if (pci_mmcfg_virt == NULL) { |