From b332828c39326b1dca617f387dd15d12e81cd5f0 Mon Sep 17 00:00:00 2001
From: "K.Prasad" <prasad@linux.vnet.ibm.com>
Date: Mon, 1 Jun 2009 23:43:10 +0530
Subject: hw-breakpoints: prepare the code for Hardware Breakpoint interfaces

The generic hardware breakpoint interface provides an abstraction of
hardware breakpoints in front of specific arch implementations for both kernel
and user side breakpoints.
This includes execution breakpoints and read/write breakpoints, also known as
"watchpoints".

This patch introduces header files containing constants, structure definitions
and declaration of functions used by the hardware breakpoint core and x86
specific code.
It also introduces an array based storage for the debug-register values in
'struct thread_struct', while modifying all users of debugreg<n> member in the
structure.

[ Impact: add headers for new hardware breakpoint interface ]

Original-patch-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
Reviewed-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/x86/include/asm/a.out-core.h    |  8 +++---
 arch/x86/include/asm/debugreg.h      | 29 +++++++++++++++++++
 arch/x86/include/asm/hw_breakpoint.h | 55 ++++++++++++++++++++++++++++++++++++
 arch/x86/include/asm/processor.h     |  8 +++---
 4 files changed, 92 insertions(+), 8 deletions(-)
 create mode 100644 arch/x86/include/asm/hw_breakpoint.h

(limited to 'arch/x86/include')
diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h
index bb70e39..fc4685d 100644
--- a/arch/x86/include/asm/a.out-core.h
+++ b/arch/x86/include/asm/a.out-core.h
@@ -32,10 +32,10 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
 			>> PAGE_SHIFT;
 	dump->u_dsize -= dump->u_tsize;
 	dump->u_ssize = 0;
-	dump->u_debugreg[0] = current->thread.debugreg0;
-	dump->u_debugreg[1] = current->thread.debugreg1;
-	dump->u_debugreg[2] = current->thread.debugreg2;
-	dump->u_debugreg[3] = current->thread.debugreg3;
+	dump->u_debugreg[0] = current->thread.debugreg[0];
+	dump->u_debugreg[1] = current->thread.debugreg[1];
+	dump->u_debugreg[2] = current->thread.debugreg[2];
+	dump->u_debugreg[3] = current->thread.debugreg[3];
 	dump->u_debugreg[4] = 0;
 	dump->u_debugreg[5] = 0;
 	dump->u_debugreg[6] = current->thread.debugreg6;
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 3ea6f37..23439fb 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -18,6 +18,7 @@
 #define DR_TRAP1	(0x2)		/* db1 */
 #define DR_TRAP2	(0x4)		/* db2 */
 #define DR_TRAP3	(0x8)		/* db3 */
+#define DR_TRAP_BITS	(DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)
 
 #define DR_STEP		(0x4000)	/* single-step */
 #define DR_SWITCH	(0x8000)	/* task switch */
@@ -49,6 +50,8 @@
 
 #define DR_LOCAL_ENABLE_SHIFT 0    /* Extra shift to the local enable bit */
 #define DR_GLOBAL_ENABLE_SHIFT 1   /* Extra shift to the global enable bit */
+#define DR_LOCAL_ENABLE (0x1)      /* Local enable for reg 0 */
+#define DR_GLOBAL_ENABLE (0x2)     /* Global enable for reg 0 */
 #define DR_ENABLE_SIZE 2           /* 2 enable bits per register */
 
 #define DR_LOCAL_ENABLE_MASK (0x55)  /* Set  local bits for all 4 regs */
@@ -67,4 +70,30 @@
 #define DR_LOCAL_SLOWDOWN (0x100)   /* Local slow the pipeline */
 #define DR_GLOBAL_SLOWDOWN (0x200)  /* Global slow the pipeline */
 
+/*
+ * HW breakpoint additions
+ */
+#ifdef __KERNEL__
+
+/* For process management */
+extern void flush_thread_hw_breakpoint(struct task_struct *tsk);
+extern int copy_thread_hw_breakpoint(struct task_struct *tsk,
+		struct task_struct *child, unsigned long clone_flags);
+
+/* For CPU management */
+extern void load_debug_registers(void);
+static inline void hw_breakpoint_disable(void)
+{
+	/* Zero the control register for HW Breakpoint */
+	set_debugreg(0UL, 7);
+
+	/* Zero-out the individual HW breakpoint address registers */
+	set_debugreg(0UL, 0);
+	set_debugreg(0UL, 1);
+	set_debugreg(0UL, 2);
+	set_debugreg(0UL, 3);
+}
+
+#endif	/* __KERNEL__ */
+
 #endif /* _ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
new file mode 100644
index 0000000..1acb4d4
--- /dev/null
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -0,0 +1,55 @@
+#ifndef	_I386_HW_BREAKPOINT_H
+#define	_I386_HW_BREAKPOINT_H
+
+#ifdef	__KERNEL__
+#define	__ARCH_HW_BREAKPOINT_H
+
+struct arch_hw_breakpoint {
+	char		*name; /* Contains name of the symbol to set bkpt */
+	unsigned long	address;
+	u8		len;
+	u8		type;
+};
+
+#include <linux/kdebug.h>
+#include <asm-generic/hw_breakpoint.h>
+
+/* Available HW breakpoint length encodings */
+#define HW_BREAKPOINT_LEN_1		0x40
+#define HW_BREAKPOINT_LEN_2		0x44
+#define HW_BREAKPOINT_LEN_4		0x4c
+#define HW_BREAKPOINT_LEN_EXECUTE	0x40
+
+#ifdef CONFIG_X86_64
+#define HW_BREAKPOINT_LEN_8		0x48
+#endif
+
+/* Available HW breakpoint type encodings */
+
+/* trigger on instruction execute */
+#define HW_BREAKPOINT_EXECUTE	0x80
+/* trigger on memory write */
+#define HW_BREAKPOINT_WRITE	0x81
+/* trigger on memory read or write */
+#define HW_BREAKPOINT_RW	0x83
+
+/* Total number of available HW breakpoint registers */
+#define HBP_NUM 4
+
+extern struct hw_breakpoint *hbp_kernel[HBP_NUM];
+DECLARE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]);
+extern unsigned int hbp_user_refcount[HBP_NUM];
+
+extern void arch_install_thread_hw_breakpoint(struct task_struct *tsk);
+extern void arch_uninstall_thread_hw_breakpoint(void);
+extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len);
+extern int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
+						struct task_struct *tsk);
+extern void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk);
+extern void arch_flush_thread_hw_breakpoint(struct task_struct *tsk);
+extern void arch_update_kernel_hw_breakpoint(void *);
+extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
+				     unsigned long val, void *data);
+#endif	/* __KERNEL__ */
+#endif	/* _I386_HW_BREAKPOINT_H */
+
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 0b2fab0..448b34a 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -29,6 +29,7 @@ struct mm_struct;
 #include <linux/threads.h>
 #include <linux/init.h>
 
+#define HBP_NUM 4
 /*
  * Default implementation of macro that returns current
  * instruction pointer ("program counter").
@@ -431,12 +432,11 @@ struct thread_struct {
 	unsigned long		fs;
 	unsigned long		gs;
 	/* Hardware debugging registers: */
-	unsigned long		debugreg0;
-	unsigned long		debugreg1;
-	unsigned long		debugreg2;
-	unsigned long		debugreg3;
+	unsigned long		debugreg[HBP_NUM];
 	unsigned long		debugreg6;
 	unsigned long		debugreg7;
+	/* Hardware breakpoint info */
+	struct hw_breakpoint	*hbp[HBP_NUM];
 	/* Fault info: */
 	unsigned long		cr2;
 	unsigned long		trap_no;
-- 
cgit v1.1


From 5946fa3d5cdeb846a647a1900026af9f8b08c8b5 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@novell.com>
Date: Wed, 19 Aug 2009 08:44:24 +0100
Subject: x86, hpet: Simplify the HPET code

On 64-bits, using unsigned long when unsigned int suffices
needlessly creates larger code (due to the need for REX
prefixes), and most of the logic in hpet.c really doesn't need
64-bit operations.

At once this avoids the need for a couple of type casts.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Cc: Shaohua Li <shaohua.li@intel.com>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
LKML-Reference: <4A8BC9780200007800010832@vpn.id2.novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/hpet.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 1c22cb0..65847c5 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -69,7 +69,7 @@ extern int hpet_force_user;
 extern int is_hpet_enabled(void);
 extern int hpet_enable(void);
 extern void hpet_disable(void);
-extern unsigned long hpet_readl(unsigned long a);
+extern unsigned int hpet_readl(unsigned int a);
 extern void force_hpet_resume(void);
 
 extern void hpet_msi_unmask(unsigned int irq);
-- 
cgit v1.1


From eb13296cfaf6c699566473669a96a38a90562384 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Thu, 13 Aug 2009 16:34:13 -0400
Subject: x86: Instruction decoder API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add x86 instruction decoder to arch-specific libraries. This decoder
can decode x86 instructions used in kernel into prefix, opcode, modrm,
sib, displacement and immediates. This can also show the length of
instructions.

This version introduces instruction attributes for decoding
instructions.
The instruction attribute tables are generated from the opcode map file
(x86-opcode-map.txt) by the generator script(gen-insn-attr-x86.awk).

Currently, the opcode maps are based on opcode maps in Intel(R) 64 and
IA-32 Architectures Software Developers Manual Vol.2: Appendix.A,
and consist of below two types of opcode tables.

1-byte/2-bytes/3-bytes opcodes, which has 256 elements, are
written as below;

 Table: table-name
 Referrer: escaped-name
 opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
  (or)
 opcode: escape # escaped-name
 EndTable

Group opcodes, which has 8 elements, are written as below;

 GrpTable: GrpXXX
 reg:  mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
 EndTable

These opcode maps include a few SSE and FP opcodes (for setup), because
those opcodes are used in the kernel.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Jim Keniston <jkenisto@us.ibm.com>
Acked-by: H. Peter Anvin <hpa@zytor.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Frank Ch. Eigler <fche@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jason Baron <jbaron@redhat.com>
Cc: K.Prasad <prasad@linux.vnet.ibm.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Przemysław Pawełczyk <przemyslaw@pawelczyk.it>
Cc: Roland McGrath <roland@redhat.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Vegard Nossum <vegard.nossum@gmail.com>
LKML-Reference: <20090813203413.31965.49709.stgit@localhost.localdomain>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/x86/include/asm/inat.h       | 188 ++++++++++++++++++++++++++++++++++++++
 arch/x86/include/asm/inat_types.h |  29 ++++++
 arch/x86/include/asm/insn.h       | 143 +++++++++++++++++++++++++++++
 3 files changed, 360 insertions(+)
 create mode 100644 arch/x86/include/asm/inat.h
 create mode 100644 arch/x86/include/asm/inat_types.h
 create mode 100644 arch/x86/include/asm/insn.h

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h
new file mode 100644
index 0000000..2866fdd
--- /dev/null
+++ b/arch/x86/include/asm/inat.h
@@ -0,0 +1,188 @@
+#ifndef _ASM_X86_INAT_H
+#define _ASM_X86_INAT_H
+/*
+ * x86 instruction attributes
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+#include <asm/inat_types.h>
+
+/*
+ * Internal bits. Don't use bitmasks directly, because these bits are
+ * unstable. You should use checking functions.
+ */
+
+#define INAT_OPCODE_TABLE_SIZE 256
+#define INAT_GROUP_TABLE_SIZE 8
+
+/* Legacy instruction prefixes */
+#define INAT_PFX_OPNDSZ	1	/* 0x66 */ /* LPFX1 */
+#define INAT_PFX_REPNE	2	/* 0xF2 */ /* LPFX2 */
+#define INAT_PFX_REPE	3	/* 0xF3 */ /* LPFX3 */
+#define INAT_PFX_LOCK	4	/* 0xF0 */
+#define INAT_PFX_CS	5	/* 0x2E */
+#define INAT_PFX_DS	6	/* 0x3E */
+#define INAT_PFX_ES	7	/* 0x26 */
+#define INAT_PFX_FS	8	/* 0x64 */
+#define INAT_PFX_GS	9	/* 0x65 */
+#define INAT_PFX_SS	10	/* 0x36 */
+#define INAT_PFX_ADDRSZ	11	/* 0x67 */
+
+#define INAT_LPREFIX_MAX	3
+
+/* Immediate size */
+#define INAT_IMM_BYTE		1
+#define INAT_IMM_WORD		2
+#define INAT_IMM_DWORD		3
+#define INAT_IMM_QWORD		4
+#define INAT_IMM_PTR		5
+#define INAT_IMM_VWORD32	6
+#define INAT_IMM_VWORD		7
+
+/* Legacy prefix */
+#define INAT_PFX_OFFS	0
+#define INAT_PFX_BITS	4
+#define INAT_PFX_MAX    ((1 << INAT_PFX_BITS) - 1)
+#define INAT_PFX_MASK	(INAT_PFX_MAX << INAT_PFX_OFFS)
+/* Escape opcodes */
+#define INAT_ESC_OFFS	(INAT_PFX_OFFS + INAT_PFX_BITS)
+#define INAT_ESC_BITS	2
+#define INAT_ESC_MAX	((1 << INAT_ESC_BITS) - 1)
+#define INAT_ESC_MASK	(INAT_ESC_MAX << INAT_ESC_OFFS)
+/* Group opcodes (1-16) */
+#define INAT_GRP_OFFS	(INAT_ESC_OFFS + INAT_ESC_BITS)
+#define INAT_GRP_BITS	5
+#define INAT_GRP_MAX	((1 << INAT_GRP_BITS) - 1)
+#define INAT_GRP_MASK	(INAT_GRP_MAX << INAT_GRP_OFFS)
+/* Immediates */
+#define INAT_IMM_OFFS	(INAT_GRP_OFFS + INAT_GRP_BITS)
+#define INAT_IMM_BITS	3
+#define INAT_IMM_MASK	(((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS)
+/* Flags */
+#define INAT_FLAG_OFFS	(INAT_IMM_OFFS + INAT_IMM_BITS)
+#define INAT_REXPFX	(1 << INAT_FLAG_OFFS)
+#define INAT_MODRM	(1 << (INAT_FLAG_OFFS + 1))
+#define INAT_FORCE64	(1 << (INAT_FLAG_OFFS + 2))
+#define INAT_SCNDIMM	(1 << (INAT_FLAG_OFFS + 3))
+#define INAT_MOFFSET	(1 << (INAT_FLAG_OFFS + 4))
+#define INAT_VARIANT	(1 << (INAT_FLAG_OFFS + 5))
+/* Attribute making macros for attribute tables */
+#define INAT_MAKE_PREFIX(pfx)	(pfx << INAT_PFX_OFFS)
+#define INAT_MAKE_ESCAPE(esc)	(esc << INAT_ESC_OFFS)
+#define INAT_MAKE_GROUP(grp)	((grp << INAT_GRP_OFFS) | INAT_MODRM)
+#define INAT_MAKE_IMM(imm)	(imm << INAT_IMM_OFFS)
+
+/* Attribute search APIs */
+extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
+extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode,
+					     insn_byte_t last_pfx,
+					     insn_attr_t esc_attr);
+extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm,
+					    insn_byte_t last_pfx,
+					    insn_attr_t esc_attr);
+
+/* Attribute checking functions */
+static inline int inat_is_prefix(insn_attr_t attr)
+{
+	return attr & INAT_PFX_MASK;
+}
+
+static inline int inat_is_address_size_prefix(insn_attr_t attr)
+{
+	return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ;
+}
+
+static inline int inat_is_operand_size_prefix(insn_attr_t attr)
+{
+	return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ;
+}
+
+static inline int inat_last_prefix_id(insn_attr_t attr)
+{
+	if ((attr & INAT_PFX_MASK) > INAT_LPREFIX_MAX)
+		return 0;
+	else
+		return attr & INAT_PFX_MASK;
+}
+
+static inline int inat_is_escape(insn_attr_t attr)
+{
+	return attr & INAT_ESC_MASK;
+}
+
+static inline int inat_escape_id(insn_attr_t attr)
+{
+	return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS;
+}
+
+static inline int inat_is_group(insn_attr_t attr)
+{
+	return attr & INAT_GRP_MASK;
+}
+
+static inline int inat_group_id(insn_attr_t attr)
+{
+	return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS;
+}
+
+static inline int inat_group_common_attribute(insn_attr_t attr)
+{
+	return attr & ~INAT_GRP_MASK;
+}
+
+static inline int inat_has_immediate(insn_attr_t attr)
+{
+	return attr & INAT_IMM_MASK;
+}
+
+static inline int inat_immediate_size(insn_attr_t attr)
+{
+	return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS;
+}
+
+static inline int inat_is_rex_prefix(insn_attr_t attr)
+{
+	return attr & INAT_REXPFX;
+}
+
+static inline int inat_has_modrm(insn_attr_t attr)
+{
+	return attr & INAT_MODRM;
+}
+
+static inline int inat_is_force64(insn_attr_t attr)
+{
+	return attr & INAT_FORCE64;
+}
+
+static inline int inat_has_second_immediate(insn_attr_t attr)
+{
+	return attr & INAT_SCNDIMM;
+}
+
+static inline int inat_has_moffset(insn_attr_t attr)
+{
+	return attr & INAT_MOFFSET;
+}
+
+static inline int inat_has_variant(insn_attr_t attr)
+{
+	return attr & INAT_VARIANT;
+}
+
+#endif
diff --git a/arch/x86/include/asm/inat_types.h b/arch/x86/include/asm/inat_types.h
new file mode 100644
index 0000000..cb3c20c
--- /dev/null
+++ b/arch/x86/include/asm/inat_types.h
@@ -0,0 +1,29 @@
+#ifndef _ASM_X86_INAT_TYPES_H
+#define _ASM_X86_INAT_TYPES_H
+/*
+ * x86 instruction attributes
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+/* Instruction attributes */
+typedef unsigned int insn_attr_t;
+typedef unsigned char insn_byte_t;
+typedef signed int insn_value_t;
+
+#endif
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
new file mode 100644
index 0000000..12b4e37
--- /dev/null
+++ b/arch/x86/include/asm/insn.h
@@ -0,0 +1,143 @@
+#ifndef _ASM_X86_INSN_H
+#define _ASM_X86_INSN_H
+/*
+ * x86 instruction analysis
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2009
+ */
+
+/* insn_attr_t is defined in inat.h */
+#include <asm/inat.h>
+
+struct insn_field {
+	union {
+		insn_value_t value;
+		insn_byte_t bytes[4];
+	};
+	/* !0 if we've run insn_get_xxx() for this field */
+	unsigned char got;
+	unsigned char nbytes;
+};
+
+struct insn {
+	struct insn_field prefixes;	/*
+					 * Prefixes
+					 * prefixes.bytes[3]: last prefix
+					 */
+	struct insn_field rex_prefix;	/* REX prefix */
+	struct insn_field opcode;	/*
+					 * opcode.bytes[0]: opcode1
+					 * opcode.bytes[1]: opcode2
+					 * opcode.bytes[2]: opcode3
+					 */
+	struct insn_field modrm;
+	struct insn_field sib;
+	struct insn_field displacement;
+	union {
+		struct insn_field immediate;
+		struct insn_field moffset1;	/* for 64bit MOV */
+		struct insn_field immediate1;	/* for 64bit imm or off16/32 */
+	};
+	union {
+		struct insn_field moffset2;	/* for 64bit MOV */
+		struct insn_field immediate2;	/* for 64bit imm or seg16 */
+	};
+
+	insn_attr_t attr;
+	unsigned char opnd_bytes;
+	unsigned char addr_bytes;
+	unsigned char length;
+	unsigned char x86_64;
+
+	const insn_byte_t *kaddr;	/* kernel address of insn to analyze */
+	const insn_byte_t *next_byte;
+};
+
+#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6)
+#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3)
+#define X86_MODRM_RM(modrm) ((modrm) & 0x07)
+
+#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6)
+#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3)
+#define X86_SIB_BASE(sib) ((sib) & 0x07)
+
+#define X86_REX_W(rex) ((rex) & 8)
+#define X86_REX_R(rex) ((rex) & 4)
+#define X86_REX_X(rex) ((rex) & 2)
+#define X86_REX_B(rex) ((rex) & 1)
+
+/* The last prefix is needed for two-byte and three-byte opcodes */
+static inline insn_byte_t insn_last_prefix(struct insn *insn)
+{
+	return insn->prefixes.bytes[3];
+}
+
+extern void insn_init(struct insn *insn, const void *kaddr, int x86_64);
+extern void insn_get_prefixes(struct insn *insn);
+extern void insn_get_opcode(struct insn *insn);
+extern void insn_get_modrm(struct insn *insn);
+extern void insn_get_sib(struct insn *insn);
+extern void insn_get_displacement(struct insn *insn);
+extern void insn_get_immediate(struct insn *insn);
+extern void insn_get_length(struct insn *insn);
+
+/* Attribute will be determined after getting ModRM (for opcode groups) */
+static inline void insn_get_attribute(struct insn *insn)
+{
+	insn_get_modrm(insn);
+}
+
+/* Instruction uses RIP-relative addressing */
+extern int insn_rip_relative(struct insn *insn);
+
+/* Init insn for kernel text */
+static inline void kernel_insn_init(struct insn *insn, const void *kaddr)
+{
+#ifdef CONFIG_X86_64
+	insn_init(insn, kaddr, 1);
+#else /* CONFIG_X86_32 */
+	insn_init(insn, kaddr, 0);
+#endif
+}
+
+/* Offset of each field from kaddr */
+static inline int insn_offset_rex_prefix(struct insn *insn)
+{
+	return insn->prefixes.nbytes;
+}
+static inline int insn_offset_opcode(struct insn *insn)
+{
+	return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes;
+}
+static inline int insn_offset_modrm(struct insn *insn)
+{
+	return insn_offset_opcode(insn) + insn->opcode.nbytes;
+}
+static inline int insn_offset_sib(struct insn *insn)
+{
+	return insn_offset_modrm(insn) + insn->modrm.nbytes;
+}
+static inline int insn_offset_displacement(struct insn *insn)
+{
+	return insn_offset_sib(insn) + insn->sib.nbytes;
+}
+static inline int insn_offset_immediate(struct insn *insn)
+{
+	return insn_offset_displacement(insn) + insn->displacement.nbytes;
+}
+
+#endif /* _ASM_X86_INSN_H */
-- 
cgit v1.1


From b1cf540f0e5278ecfe8532557e547d833ed269d7 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Thu, 13 Aug 2009 16:34:44 -0400
Subject: x86: Add pt_regs register and stack access APIs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add following APIs for accessing registers and stack entries from
pt_regs.
These APIs are required by kprobes-based event tracer on ftrace.
Some other debugging tools might be able to use it too.

- regs_query_register_offset(const char *name)
   Query the offset of "name" register.

- regs_query_register_name(unsigned int offset)
   Query the name of register by its offset.

- regs_get_register(struct pt_regs *regs, unsigned int offset)
   Get the value of a register by its offset.

- regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
   Check the address is in the kernel stack.

- regs_get_kernel_stack_nth(struct pt_regs *reg, unsigned int nth)
   Get Nth entry of the kernel stack. (N >= 0)

- regs_get_argument_nth(struct pt_regs *reg, unsigned int nth)
   Get Nth argument at function call. (N >= 0)

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: linux-arch@vger.kernel.org
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Frank Ch. Eigler <fche@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Jim Keniston <jkenisto@us.ibm.com>
Cc: K.Prasad <prasad@linux.vnet.ibm.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Przemysław Pawełczyk <przemyslaw@pawelczyk.it>
Cc: Roland McGrath <roland@redhat.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Vegard Nossum <vegard.nossum@gmail.com>
LKML-Reference: <20090813203444.31965.26374.stgit@localhost.localdomain>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/x86/include/asm/ptrace.h | 62 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 0f0d908..a3d49dd 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -7,6 +7,7 @@
 
 #ifdef __KERNEL__
 #include <asm/segment.h>
+#include <asm/page_types.h>
 #endif
 
 #ifndef __ASSEMBLY__
@@ -216,6 +217,67 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs)
 	return regs->sp;
 }
 
+/* Query offset/name of register from its name/offset */
+extern int regs_query_register_offset(const char *name);
+extern const char *regs_query_register_name(unsigned int offset);
+#define MAX_REG_OFFSET (offsetof(struct pt_regs, ss))
+
+/**
+ * regs_get_register() - get register value from its offset
+ * @regs:	pt_regs from which register value is gotten.
+ * @offset:	offset number of the register.
+ *
+ * regs_get_register returns the value of a register whose offset from @regs
+ * is @offset. The @offset is the offset of the register in struct pt_regs.
+ * If @offset is bigger than MAX_REG_OFFSET, this returns 0.
+ */
+static inline unsigned long regs_get_register(struct pt_regs *regs,
+					      unsigned int offset)
+{
+	if (unlikely(offset > MAX_REG_OFFSET))
+		return 0;
+	return *(unsigned long *)((unsigned long)regs + offset);
+}
+
+/**
+ * regs_within_kernel_stack() - check the address in the stack
+ * @regs:	pt_regs which contains kernel stack pointer.
+ * @addr:	address which is checked.
+ *
+ * regs_within_kenel_stack() checks @addr is within the kernel stack page(s).
+ * If @addr is within the kernel stack, it returns true. If not, returns false.
+ */
+static inline int regs_within_kernel_stack(struct pt_regs *regs,
+					   unsigned long addr)
+{
+	return ((addr & ~(THREAD_SIZE - 1))  ==
+		(kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1)));
+}
+
+/**
+ * regs_get_kernel_stack_nth() - get Nth entry of the stack
+ * @regs:	pt_regs which contains kernel stack pointer.
+ * @n:		stack entry number.
+ *
+ * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
+ * is specifined by @regs. If the @n th entry is NOT in the kernel stack,
+ * this returns 0.
+ */
+static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
+						      unsigned int n)
+{
+	unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
+	addr += n;
+	if (regs_within_kernel_stack(regs, (unsigned long)addr))
+		return *addr;
+	else
+		return 0;
+}
+
+/* Get Nth argument at function call */
+extern unsigned long regs_get_argument_nth(struct pt_regs *regs,
+					   unsigned int n);
+
 /*
  * These are defined as per linux/ptrace.h, which see.
  */
-- 
cgit v1.1


From c8bc6f3c806f1fcbfdbf0b1ff6c52dba59192d3b Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Tue, 4 Aug 2009 12:07:09 -0700
Subject: x86: arch specific support for remapping HPET MSIs

x86 arch support for remapping HPET MSI's by associating the HPET timer block
with the interrupt-remapping HW unit and setting up appropriate irq_chip

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Jay Fenlason <fenlason@redhat.com>
LKML-Reference: <20090804190729.630510000@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/hpet.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 65847c5..5d89fd2 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -65,6 +65,7 @@
 /* hpet memory map physical address */
 extern unsigned long hpet_address;
 extern unsigned long force_hpet_address;
+extern u8 hpet_blockid;
 extern int hpet_force_user;
 extern int is_hpet_enabled(void);
 extern int hpet_enable(void);
@@ -78,9 +79,9 @@ extern void hpet_msi_write(unsigned int irq, struct msi_msg *msg);
 extern void hpet_msi_read(unsigned int irq, struct msi_msg *msg);
 
 #ifdef CONFIG_PCI_MSI
-extern int arch_setup_hpet_msi(unsigned int irq);
+extern int arch_setup_hpet_msi(unsigned int irq, unsigned int id);
 #else
-static inline int arch_setup_hpet_msi(unsigned int irq)
+static inline int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
 {
 	return -EINVAL;
 }
-- 
cgit v1.1


From b8a4754147d61f5359a765a3afd3eb03012aa052 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Thu, 30 Jul 2009 11:10:02 +0200
Subject: x86, msr: Unify rdmsr_on_cpus/wrmsr_on_cpus

Since rdmsr_on_cpus and wrmsr_on_cpus are almost identical, unify them
into a common __rwmsr_on_cpus helper thus avoiding code duplication.

While at it, convert cpumask_t's to const struct cpumask *.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/msr.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 7e2b6ba..9a00219 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -247,8 +247,8 @@ do {                                                            \
 #ifdef CONFIG_SMP
 int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
 int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
-void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs);
-void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs);
+void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs);
+void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs);
 int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
 int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
 int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]);
-- 
cgit v1.1


From 9f0cf4adb6aa0bfccf675c938124e68f7f06349d Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Sat, 26 Sep 2009 14:33:01 +0200
Subject: x86: Use __builtin_object_size() to validate the buffer size for
 copy_from_user()

gcc (4.x) supports the __builtin_object_size() builtin, which
reports the size of an object that a pointer point to, when known
at compile time. If the buffer size is not known at compile time, a
constant -1 is returned.

This patch uses this feature to add a sanity check to
copy_from_user(); if the target buffer is known to be smaller than
the copy size, the copy is aborted and a WARNing is emitted in
memory debug mode.

These extra checks compile away when the object size is not known,
or if both the buffer size and the copy length are constants.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
LKML-Reference: <20090926143301.2c396b94@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/uaccess_32.h | 19 ++++++++++++++++++-
 arch/x86/include/asm/uaccess_64.h | 19 ++++++++++++++++++-
 2 files changed, 36 insertions(+), 2 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 632fb44..582d6ae 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -187,9 +187,26 @@ __copy_from_user_inatomic_nocache(void *to, const void __user *from,
 
 unsigned long __must_check copy_to_user(void __user *to,
 					const void *from, unsigned long n);
-unsigned long __must_check copy_from_user(void *to,
+unsigned long __must_check _copy_from_user(void *to,
 					  const void __user *from,
 					  unsigned long n);
+
+static inline unsigned long __must_check copy_from_user(void *to,
+					  const void __user *from,
+					  unsigned long n)
+{
+	int sz = __compiletime_object_size(to);
+	int ret = -EFAULT;
+
+	if (likely(sz == -1 || sz >= n))
+		ret = _copy_from_user(to, from, n);
+#ifdef CONFIG_DEBUG_VM
+	else
+		WARN(1, "Buffer overflow detected!\n");
+#endif
+	return ret;
+}
+
 long __must_check strncpy_from_user(char *dst, const char __user *src,
 				    long count);
 long __must_check __strncpy_from_user(char *dst,
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index db24b21..ce6fec7 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -21,10 +21,27 @@ copy_user_generic(void *to, const void *from, unsigned len);
 __must_check unsigned long
 copy_to_user(void __user *to, const void *from, unsigned len);
 __must_check unsigned long
-copy_from_user(void *to, const void __user *from, unsigned len);
+_copy_from_user(void *to, const void __user *from, unsigned len);
 __must_check unsigned long
 copy_in_user(void __user *to, const void __user *from, unsigned len);
 
+static inline unsigned long __must_check copy_from_user(void *to,
+					  const void __user *from,
+					  unsigned long n)
+{
+	int sz = __compiletime_object_size(to);
+	int ret = -EFAULT;
+
+	if (likely(sz == -1 || sz >= n))
+		ret = _copy_from_user(to, from, n);
+#ifdef CONFIG_DEBUG_VM
+	else
+		WARN(1, "Buffer overflow detected!\n");
+#endif
+	return ret;
+}
+
+
 static __always_inline __must_check
 int __copy_from_user(void *dst, const void __user *src, unsigned size)
 {
-- 
cgit v1.1


From ff60fab71bb3b4fdbf8caf57ff3739ffd0887396 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Mon, 28 Sep 2009 14:21:22 +0200
Subject: x86: Use __builtin_memset and __builtin_memcpy for memset/memcpy

GCC provides reasonable memset/memcpy functions itself, with __builtin_memset
and __builtin_memcpy. For the "unknown" cases, it'll fall back to our
current existing functions, but for fixed size versions it'll inline
something smart. Quite often that will be the same as we have now,
but sometimes it can do something smarter (for example, if the code
then sets the first member of a struct, it can do a shorter memset).

In addition, and this is more important, gcc knows which registers and
such are not clobbered (while for our asm version it pretty much
acts like a compiler barrier), so for various cases it can avoid reloading
values.

The effect on codesize is shown below on my typical laptop .config:

   text	   data	    bss	    dec	    hex	filename
5605675	2041100	6525148	14171923	 d83f13	vmlinux.before
5595849	2041668	6525148	14162665	 d81ae9	vmlinux.after

Due to some not-so-good behavior in the gcc 3.x series, this change
is only done for GCC 4.x and above.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
LKML-Reference: <20090928142122.6fc57e9c@infradead.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/string_32.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h
index ae907e6..3d3e835 100644
--- a/arch/x86/include/asm/string_32.h
+++ b/arch/x86/include/asm/string_32.h
@@ -177,10 +177,15 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len)
  */
 
 #ifndef CONFIG_KMEMCHECK
+
+#if (__GNUC__ >= 4)
+#define memcpy(t, f, n) __builtin_memcpy(t, f, n)
+#else
 #define memcpy(t, f, n)				\
 	(__builtin_constant_p((n))		\
 	 ? __constant_memcpy((t), (f), (n))	\
 	 : __memcpy((t), (f), (n)))
+#endif
 #else
 /*
  * kmemcheck becomes very happy if we use the REP instructions unconditionally,
@@ -316,11 +321,15 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern,
 	 : __memset_generic((s), (c), (count)))
 
 #define __HAVE_ARCH_MEMSET
+#if (__GNUC__ >= 4)
+#define memset(s, c, count) __builtin_memset(s, c, count)
+#else
 #define memset(s, c, count)						\
 	(__builtin_constant_p(c)					\
 	 ? __constant_c_x_memset((s), (0x01010101UL * (unsigned char)(c)), \
 				 (count))				\
 	 : __memset((s), (c), (count)))
+#endif
 
 /*
  * find the first occurrence of byte 'c', or 1 past the area if none
-- 
cgit v1.1


From 4a3127693001c61a21d1ce680db6340623f52e93 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Wed, 30 Sep 2009 13:05:23 +0200
Subject: x86: Turn the copy_from_user check into an (optional) compile time
 warning

A previous patch added the buffer size check to copy_from_user().

One of the things learned from analyzing the result of the previous
patch is that in general, gcc is really good at proving that the
code contains sufficient security checks to not need to do a
runtime check. But that for those cases where gcc could not prove
this, there was a relatively high percentage of real security
issues.

This patch turns the case of "gcc cannot prove" into a compile time
warning, as long as a sufficiently new gcc is in use that supports
this. The objective is that these warnings will trigger developers
checking new cases out before a security hole enters a linux kernel
release.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: James Morris <jmorris@namei.org>
Cc: Jan Beulich <jbeulich@novell.com>
LKML-Reference: <20090930130523.348ae6c4@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/uaccess_32.h | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 582d6ae..952f9e7 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -191,6 +191,13 @@ unsigned long __must_check _copy_from_user(void *to,
 					  const void __user *from,
 					  unsigned long n);
 
+
+extern void copy_from_user_overflow(void)
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+	__compiletime_warning("copy_from_user() buffer size is not provably correct")
+#endif
+;
+
 static inline unsigned long __must_check copy_from_user(void *to,
 					  const void __user *from,
 					  unsigned long n)
@@ -200,10 +207,9 @@ static inline unsigned long __must_check copy_from_user(void *to,
 
 	if (likely(sz == -1 || sz >= n))
 		ret = _copy_from_user(to, from, n);
-#ifdef CONFIG_DEBUG_VM
 	else
-		WARN(1, "Buffer overflow detected!\n");
-#endif
+		copy_from_user_overflow();
+
 	return ret;
 }
 
-- 
cgit v1.1


From 7c68af6e32c73992bad24107311f3433c89016e2 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sat, 19 Sep 2009 09:40:22 +0300
Subject: core, x86: Add user return notifiers

Add a general per-cpu notifier that is called whenever the kernel is
about to return to userspace.  The notifier uses a thread_info flag
and existing checks, so there is no impact on user return or context
switch fast paths.

This will be used initially to speed up KVM task switching by lazily
updating MSRs.

Signed-off-by: Avi Kivity <avi@redhat.com>
LKML-Reference: <1253342422-13811-1-git-send-email-avi@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/thread_info.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index d27d0a2..375c917 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -83,6 +83,7 @@ struct thread_info {
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 #define TIF_SECCOMP		8	/* secure computing */
 #define TIF_MCE_NOTIFY		10	/* notify userspace of an MCE */
+#define TIF_USER_RETURN_NOTIFY	11	/* notify kernel of userspace return */
 #define TIF_NOTSC		16	/* TSC is not accessible in userland */
 #define TIF_IA32		17	/* 32bit process */
 #define TIF_FORK		18	/* ret_from_fork */
@@ -107,6 +108,7 @@ struct thread_info {
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
 #define _TIF_MCE_NOTIFY		(1 << TIF_MCE_NOTIFY)
+#define _TIF_USER_RETURN_NOTIFY	(1 << TIF_USER_RETURN_NOTIFY)
 #define _TIF_NOTSC		(1 << TIF_NOTSC)
 #define _TIF_IA32		(1 << TIF_IA32)
 #define _TIF_FORK		(1 << TIF_FORK)
@@ -142,13 +144,14 @@ struct thread_info {
 
 /* Only used for 64 bit */
 #define _TIF_DO_NOTIFY_MASK						\
-	(_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME)
+	(_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME |	\
+	 _TIF_USER_RETURN_NOTIFY)
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW							\
 	(_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC)
 
-#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
+#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
 
 #define PREEMPT_ACTIVE		0x10000000
-- 
cgit v1.1


From 63312b6a6faae3f2e5577f2b001e3b504f10a2aa Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Fri, 2 Oct 2009 07:50:50 -0700
Subject: x86: Add a Kconfig option to turn the copy_from_user warnings into
 errors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For automated testing it is useful to have the option to turn
the warnings on copy_from_user() etc checks into errors:

 In function ‘copy_from_user’,
     inlined from ‘fd_copyin’ at drivers/block/floppy.c:3080,
     inlined from ‘fd_ioctl’ at drivers/block/floppy.c:3503:
   linux/arch/x86/include/asm/uaccess_32.h:213:
  error: call to ‘copy_from_user_overflow’ declared with attribute error:
  copy_from_user buffer size is not provably correct

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <20091002075050.4e9f7641@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/uaccess_32.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 952f9e7..0c9825e 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -193,7 +193,9 @@ unsigned long __must_check _copy_from_user(void *to,
 
 
 extern void copy_from_user_overflow(void)
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
+#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS
+	__compiletime_error("copy_from_user() buffer size is not provably correct")
+#else
 	__compiletime_warning("copy_from_user() buffer size is not provably correct")
 #endif
 ;
-- 
cgit v1.1


From 04a705df47d1ea27ca2b066f24b1951c51792d0d Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@googlemail.com>
Date: Tue, 6 Oct 2009 16:42:08 +0200
Subject: perf_events: Check for filters on fixed counter events

Intel fixed counters do not support all the filters possible with a
generic counter. Thus, if a fixed counter event is passed but with
certain filters set, then the fixed_mode_idx() function must fail
and the event must be measured in a generic counter instead.

Reject filters are: inv, edge, cnt-mask.

Signed-off-by: Stephane Eranian <eranian@gmail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1254840129-6198-2-git-send-email-eranian@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/perf_event.h | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index ad7ce3f..8d9f854 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -28,9 +28,20 @@
  */
 #define ARCH_PERFMON_EVENT_MASK				    0xffff
 
+/*
+ * filter mask to validate fixed counter events.
+ * the following filters disqualify for fixed counters:
+ *  - inv
+ *  - edge
+ *  - cnt-mask
+ *  The other filters are supported by fixed counters.
+ *  The any-thread option is supported starting with v3.
+ */
+#define ARCH_PERFMON_EVENT_FILTER_MASK			0xff840000
+
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		      0x3c
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK		(0x00 << 8)
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 		 0
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX			 0
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
 		(1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
 
-- 
cgit v1.1


From f3834b9ef68067199486740b31f691afb14dbdf5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 9 Oct 2009 10:12:46 +0200
Subject: x86: Generate cmpxchg build failures

Rework the x86 cmpxchg() implementation to generate build failures
when used on improper types.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <1254771187.21044.22.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/cmpxchg_32.h | 218 ++++++++++++++---------------------
 arch/x86/include/asm/cmpxchg_64.h | 234 +++++++++++++++-----------------------
 2 files changed, 177 insertions(+), 275 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index 82ceb78..5371174 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -8,14 +8,50 @@
  *       you need to test for the feature in boot_cpu_data.
  */
 
-#define xchg(ptr, v)							\
-	((__typeof__(*(ptr)))__xchg((unsigned long)(v), (ptr), sizeof(*(ptr))))
+extern void __xchg_wrong_size(void);
+
+/*
+ * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
+ * Note 2: xchg has side effect, so that attribute volatile is necessary,
+ *	  but generally the primitive is invalid, *ptr is output argument. --ANK
+ */
 
 struct __xchg_dummy {
 	unsigned long a[100];
 };
 #define __xg(x) ((struct __xchg_dummy *)(x))
 
+#define __xchg(x, ptr, size)						\
+({									\
+	__typeof(*(ptr)) __x = (x);					\
+	switch (size) {							\
+	case 1:								\
+		asm volatile("xchgb %b0,%1"				\
+			     : "=q" (__x)				\
+			     : "m" (*__xg(ptr)), "0" (__x)		\
+			     : "memory");				\
+		break;							\
+	case 2:								\
+		asm volatile("xchgw %w0,%1"				\
+			     : "=r" (__x)				\
+			     : "m" (*__xg(ptr)), "0" (__x)		\
+			     : "memory");				\
+		break;							\
+	case 4:								\
+		asm volatile("xchgl %0,%1"				\
+			     : "=r" (__x)				\
+			     : "m" (*__xg(ptr)), "0" (__x)		\
+			     : "memory");				\
+		break;							\
+	default:							\
+		__xchg_wrong_size();					\
+	}								\
+	__x;								\
+})
+
+#define xchg(ptr, v)							\
+	__xchg((v), (ptr), sizeof(*ptr))
+
 /*
  * The semantics of XCHGCMP8B are a bit strange, this is why
  * there is a loop and the loading of %%eax and %%edx has to
@@ -71,57 +107,63 @@ static inline void __set_64bit_var(unsigned long long *ptr,
 		       (unsigned int)((value) >> 32))			\
 	 : __set_64bit(ptr, ll_low((value)), ll_high((value))))
 
-/*
- * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
- * Note 2: xchg has side effect, so that attribute volatile is necessary,
- *	  but generally the primitive is invalid, *ptr is output argument. --ANK
- */
-static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
-				   int size)
-{
-	switch (size) {
-	case 1:
-		asm volatile("xchgb %b0,%1"
-			     : "=q" (x)
-			     : "m" (*__xg(ptr)), "0" (x)
-			     : "memory");
-		break;
-	case 2:
-		asm volatile("xchgw %w0,%1"
-			     : "=r" (x)
-			     : "m" (*__xg(ptr)), "0" (x)
-			     : "memory");
-		break;
-	case 4:
-		asm volatile("xchgl %0,%1"
-			     : "=r" (x)
-			     : "m" (*__xg(ptr)), "0" (x)
-			     : "memory");
-		break;
-	}
-	return x;
-}
+extern void __cmpxchg_wrong_size(void);
 
 /*
  * Atomic compare and exchange.  Compare OLD with MEM, if identical,
  * store NEW in MEM.  Return the initial value in MEM.  Success is
  * indicated by comparing RETURN with OLD.
  */
+#define __raw_cmpxchg(ptr, old, new, size, lock)			\
+({									\
+	__typeof__(*(ptr)) __ret;					\
+	__typeof__(*(ptr)) __old = (old);				\
+	__typeof__(*(ptr)) __new = (new);				\
+	switch (size) {							\
+	case 1:								\
+		asm volatile(lock "cmpxchgb %b1,%2"			\
+			     : "=a"(__ret)				\
+			     : "q"(__new), "m"(*__xg(ptr)), "0"(__old)	\
+			     : "memory");				\
+		break;							\
+	case 2:								\
+		asm volatile(lock "cmpxchgw %w1,%2"			\
+			     : "=a"(__ret)				\
+			     : "r"(__new), "m"(*__xg(ptr)), "0"(__old)	\
+			     : "memory");				\
+		break;							\
+	case 4:								\
+		asm volatile(lock "cmpxchgl %1,%2"			\
+			     : "=a"(__ret)				\
+			     : "r"(__new), "m"(*__xg(ptr)), "0"(__old)	\
+			     : "memory");				\
+		break;							\
+	default:							\
+		__cmpxchg_wrong_size();					\
+	}								\
+	__ret;								\
+})
+
+#define __cmpxchg(ptr, old, new, size)					\
+	__raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
+
+#define __sync_cmpxchg(ptr, old, new, size)				\
+	__raw_cmpxchg((ptr), (old), (new), (size), "lock; ")
+
+#define __cmpxchg_local(ptr, old, new, size)				\
+	__raw_cmpxchg((ptr), (old), (new), (size), "")
 
 #ifdef CONFIG_X86_CMPXCHG
 #define __HAVE_ARCH_CMPXCHG 1
-#define cmpxchg(ptr, o, n)						\
-	((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o),	\
-				       (unsigned long)(n),		\
-				       sizeof(*(ptr))))
-#define sync_cmpxchg(ptr, o, n)						\
-	((__typeof__(*(ptr)))__sync_cmpxchg((ptr), (unsigned long)(o),	\
-					    (unsigned long)(n),		\
-					    sizeof(*(ptr))))
-#define cmpxchg_local(ptr, o, n)					\
-	((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o),	\
-					     (unsigned long)(n),	\
-					     sizeof(*(ptr))))
+
+#define cmpxchg(ptr, old, new)						\
+	__cmpxchg((ptr), (old), (new), sizeof(*ptr))
+
+#define sync_cmpxchg(ptr, old, new)					\
+	__sync_cmpxchg((ptr), (old), (new), sizeof(*ptr))
+
+#define cmpxchg_local(ptr, old, new)					\
+	__cmpxchg_local((ptr), (old), (new), sizeof(*ptr))
 #endif
 
 #ifdef CONFIG_X86_CMPXCHG64
@@ -133,94 +175,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
 					       (unsigned long long)(n)))
 #endif
 
-static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
-				      unsigned long new, int size)
-{
-	unsigned long prev;
-	switch (size) {
-	case 1:
-		asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2"
-			     : "=a"(prev)
-			     : "q"(new), "m"(*__xg(ptr)), "0"(old)
-			     : "memory");
-		return prev;
-	case 2:
-		asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2"
-			     : "=a"(prev)
-			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
-			     : "memory");
-		return prev;
-	case 4:
-		asm volatile(LOCK_PREFIX "cmpxchgl %1,%2"
-			     : "=a"(prev)
-			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
-			     : "memory");
-		return prev;
-	}
-	return old;
-}
-
-/*
- * Always use locked operations when touching memory shared with a
- * hypervisor, since the system may be SMP even if the guest kernel
- * isn't.
- */
-static inline unsigned long __sync_cmpxchg(volatile void *ptr,
-					   unsigned long old,
-					   unsigned long new, int size)
-{
-	unsigned long prev;
-	switch (size) {
-	case 1:
-		asm volatile("lock; cmpxchgb %b1,%2"
-			     : "=a"(prev)
-			     : "q"(new), "m"(*__xg(ptr)), "0"(old)
-			     : "memory");
-		return prev;
-	case 2:
-		asm volatile("lock; cmpxchgw %w1,%2"
-			     : "=a"(prev)
-			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
-			     : "memory");
-		return prev;
-	case 4:
-		asm volatile("lock; cmpxchgl %1,%2"
-			     : "=a"(prev)
-			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
-			     : "memory");
-		return prev;
-	}
-	return old;
-}
-
-static inline unsigned long __cmpxchg_local(volatile void *ptr,
-					    unsigned long old,
-					    unsigned long new, int size)
-{
-	unsigned long prev;
-	switch (size) {
-	case 1:
-		asm volatile("cmpxchgb %b1,%2"
-			     : "=a"(prev)
-			     : "q"(new), "m"(*__xg(ptr)), "0"(old)
-			     : "memory");
-		return prev;
-	case 2:
-		asm volatile("cmpxchgw %w1,%2"
-			     : "=a"(prev)
-			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
-			     : "memory");
-		return prev;
-	case 4:
-		asm volatile("cmpxchgl %1,%2"
-			     : "=a"(prev)
-			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
-			     : "memory");
-		return prev;
-	}
-	return old;
-}
-
 static inline unsigned long long __cmpxchg64(volatile void *ptr,
 					     unsigned long long old,
 					     unsigned long long new)
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
index 52de72e..485ae41 100644
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -3,9 +3,6 @@
 
 #include <asm/alternative.h> /* Provides LOCK_PREFIX */
 
-#define xchg(ptr, v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v), \
-						 (ptr), sizeof(*(ptr))))
-
 #define __xg(x) ((volatile long *)(x))
 
 static inline void set_64bit(volatile unsigned long *ptr, unsigned long val)
@@ -15,167 +12,118 @@ static inline void set_64bit(volatile unsigned long *ptr, unsigned long val)
 
 #define _set_64bit set_64bit
 
+extern void __xchg_wrong_size(void);
+extern void __cmpxchg_wrong_size(void);
+
 /*
  * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
  * Note 2: xchg has side effect, so that attribute volatile is necessary,
  *	  but generally the primitive is invalid, *ptr is output argument. --ANK
  */
-static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
-				   int size)
-{
-	switch (size) {
-	case 1:
-		asm volatile("xchgb %b0,%1"
-			     : "=q" (x)
-			     : "m" (*__xg(ptr)), "0" (x)
-			     : "memory");
-		break;
-	case 2:
-		asm volatile("xchgw %w0,%1"
-			     : "=r" (x)
-			     : "m" (*__xg(ptr)), "0" (x)
-			     : "memory");
-		break;
-	case 4:
-		asm volatile("xchgl %k0,%1"
-			     : "=r" (x)
-			     : "m" (*__xg(ptr)), "0" (x)
-			     : "memory");
-		break;
-	case 8:
-		asm volatile("xchgq %0,%1"
-			     : "=r" (x)
-			     : "m" (*__xg(ptr)), "0" (x)
-			     : "memory");
-		break;
-	}
-	return x;
-}
+#define __xchg(x, ptr, size)						\
+({									\
+	__typeof(*(ptr)) __x = (x);					\
+	switch (size) {							\
+	case 1:								\
+		asm volatile("xchgb %b0,%1"				\
+			     : "=q" (__x)				\
+			     : "m" (*__xg(ptr)), "0" (__x)		\
+			     : "memory");				\
+		break;							\
+	case 2:								\
+		asm volatile("xchgw %w0,%1"				\
+			     : "=r" (__x)				\
+			     : "m" (*__xg(ptr)), "0" (__x)		\
+			     : "memory");				\
+		break;							\
+	case 4:								\
+		asm volatile("xchgl %k0,%1"				\
+			     : "=r" (__x)				\
+			     : "m" (*__xg(ptr)), "0" (__x)		\
+			     : "memory");				\
+		break;							\
+	case 8:								\
+		asm volatile("xchgq %0,%1"				\
+			     : "=r" (__x)				\
+			     : "m" (*__xg(ptr)), "0" (__x)		\
+			     : "memory");				\
+		break;							\
+	default:							\
+		__xchg_wrong_size();					\
+	}								\
+	__x;								\
+})
+
+#define xchg(ptr, v)							\
+	__xchg((v), (ptr), sizeof(*ptr))
+
+#define __HAVE_ARCH_CMPXCHG 1
 
 /*
  * Atomic compare and exchange.  Compare OLD with MEM, if identical,
  * store NEW in MEM.  Return the initial value in MEM.  Success is
  * indicated by comparing RETURN with OLD.
  */
+#define __raw_cmpxchg(ptr, old, new, size, lock)			\
+({									\
+	__typeof__(*(ptr)) __ret;					\
+	__typeof__(*(ptr)) __old = (old);				\
+	__typeof__(*(ptr)) __new = (new);				\
+	switch (size) {							\
+	case 1:								\
+		asm volatile(lock "cmpxchgb %b1,%2"			\
+			     : "=a"(__ret)				\
+			     : "q"(__new), "m"(*__xg(ptr)), "0"(__old)	\
+			     : "memory");				\
+		break;							\
+	case 2:								\
+		asm volatile(lock "cmpxchgw %w1,%2"			\
+			     : "=a"(__ret)				\
+			     : "r"(__new), "m"(*__xg(ptr)), "0"(__old)	\
+			     : "memory");				\
+		break;							\
+	case 4:								\
+		asm volatile(lock "cmpxchgl %k1,%2"			\
+			     : "=a"(__ret)				\
+			     : "r"(__new), "m"(*__xg(ptr)), "0"(__old)	\
+			     : "memory");				\
+		break;							\
+	case 8:								\
+		asm volatile(lock "cmpxchgq %1,%2"			\
+			     : "=a"(__ret)				\
+			     : "r"(__new), "m"(*__xg(ptr)), "0"(__old)	\
+			     : "memory");				\
+		break;							\
+	default:							\
+		__cmpxchg_wrong_size();					\
+	}								\
+	__ret;								\
+})
 
-#define __HAVE_ARCH_CMPXCHG 1
+#define __cmpxchg(ptr, old, new, size)					\
+	__raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
 
-static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
-				      unsigned long new, int size)
-{
-	unsigned long prev;
-	switch (size) {
-	case 1:
-		asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2"
-			     : "=a"(prev)
-			     : "q"(new), "m"(*__xg(ptr)), "0"(old)
-			     : "memory");
-		return prev;
-	case 2:
-		asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2"
-			     : "=a"(prev)
-			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
-			     : "memory");
-		return prev;
-	case 4:
-		asm volatile(LOCK_PREFIX "cmpxchgl %k1,%2"
-			     : "=a"(prev)
-			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
-			     : "memory");
-		return prev;
-	case 8:
-		asm volatile(LOCK_PREFIX "cmpxchgq %1,%2"
-			     : "=a"(prev)
-			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
-			     : "memory");
-		return prev;
-	}
-	return old;
-}
+#define __sync_cmpxchg(ptr, old, new, size)				\
+	__raw_cmpxchg((ptr), (old), (new), (size), "lock; ")
 
-/*
- * Always use locked operations when touching memory shared with a
- * hypervisor, since the system may be SMP even if the guest kernel
- * isn't.
- */
-static inline unsigned long __sync_cmpxchg(volatile void *ptr,
-					   unsigned long old,
-					   unsigned long new, int size)
-{
-	unsigned long prev;
-	switch (size) {
-	case 1:
-		asm volatile("lock; cmpxchgb %b1,%2"
-			     : "=a"(prev)
-			     : "q"(new), "m"(*__xg(ptr)), "0"(old)
-			     : "memory");
-		return prev;
-	case 2:
-		asm volatile("lock; cmpxchgw %w1,%2"
-			     : "=a"(prev)
-			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
-			     : "memory");
-		return prev;
-	case 4:
-		asm volatile("lock; cmpxchgl %1,%2"
-			     : "=a"(prev)
-			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
-			     : "memory");
-		return prev;
-	}
-	return old;
-}
+#define __cmpxchg_local(ptr, old, new, size)				\
+	__raw_cmpxchg((ptr), (old), (new), (size), "")
 
-static inline unsigned long __cmpxchg_local(volatile void *ptr,
-					    unsigned long old,
-					    unsigned long new, int size)
-{
-	unsigned long prev;
-	switch (size) {
-	case 1:
-		asm volatile("cmpxchgb %b1,%2"
-			     : "=a"(prev)
-			     : "q"(new), "m"(*__xg(ptr)), "0"(old)
-			     : "memory");
-		return prev;
-	case 2:
-		asm volatile("cmpxchgw %w1,%2"
-			     : "=a"(prev)
-			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
-			     : "memory");
-		return prev;
-	case 4:
-		asm volatile("cmpxchgl %k1,%2"
-			     : "=a"(prev)
-			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
-			     : "memory");
-		return prev;
-	case 8:
-		asm volatile("cmpxchgq %1,%2"
-			     : "=a"(prev)
-			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
-			     : "memory");
-		return prev;
-	}
-	return old;
-}
+#define cmpxchg(ptr, old, new)						\
+	__cmpxchg((ptr), (old), (new), sizeof(*ptr))
+
+#define sync_cmpxchg(ptr, old, new)					\
+	__sync_cmpxchg((ptr), (old), (new), sizeof(*ptr))
+
+#define cmpxchg_local(ptr, old, new)					\
+	__cmpxchg_local((ptr), (old), (new), sizeof(*ptr))
 
-#define cmpxchg(ptr, o, n)						\
-	((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o),	\
-				       (unsigned long)(n), sizeof(*(ptr))))
 #define cmpxchg64(ptr, o, n)						\
 ({									\
 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
 	cmpxchg((ptr), (o), (n));					\
 })
-#define cmpxchg_local(ptr, o, n)					\
-	((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o),	\
-					     (unsigned long)(n),	\
-					     sizeof(*(ptr))))
-#define sync_cmpxchg(ptr, o, n)						\
-	((__typeof__(*(ptr)))__sync_cmpxchg((ptr), (unsigned long)(o),	\
-					    (unsigned long)(n),		\
-					    sizeof(*(ptr))))
+
 #define cmpxchg64_local(ptr, o, n)					\
 ({									\
 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
-- 
cgit v1.1


From a6f05a6a0a1713d5b019f096799d49226807d3df Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Thu, 8 Oct 2009 18:02:54 -0700
Subject: x86-64: make compat_start_thread() match start_thread()

For no real good reason, compat_start_thread() was embedded inline in
<asm/elf.h> whereas the native start_thread() lives in process_*.c.
Move compat_start_thread() to process_64.c, remove gratuitious
differences, and fix a few items which mostly look like bit rot.

In particular, compat_start_thread() didn't do free_thread_xstate(),
which means it was hanging on to the xstate store area even when it
was not needed.  It was also not setting old_rsp, but it looks like
that generally shouldn't matter for a 32-bit process.

Note: compat_start_thread *has* to be a macro, since it is tested with
start_thread_ia32() as the out of line function name.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
---
 arch/x86/include/asm/elf.h | 20 ++------------------
 1 file changed, 2 insertions(+), 18 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 456a304..8a024ba 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -157,19 +157,6 @@ do {						\
 
 #define compat_elf_check_arch(x)	elf_check_arch_ia32(x)
 
-static inline void start_ia32_thread(struct pt_regs *regs, u32 ip, u32 sp)
-{
-	loadsegment(fs, 0);
-	loadsegment(ds, __USER32_DS);
-	loadsegment(es, __USER32_DS);
-	load_gs_index(0);
-	regs->ip = ip;
-	regs->sp = sp;
-	regs->flags = X86_EFLAGS_IF;
-	regs->cs = __USER32_CS;
-	regs->ss = __USER32_DS;
-}
-
 static inline void elf_common_init(struct thread_struct *t,
 				   struct pt_regs *regs, const u16 ds)
 {
@@ -191,11 +178,8 @@ do {							\
 #define	COMPAT_ELF_PLAT_INIT(regs, load_addr)		\
 	elf_common_init(&current->thread, regs, __USER_DS)
 
-#define	compat_start_thread(regs, ip, sp)		\
-do {							\
-	start_ia32_thread(regs, ip, sp);		\
-	set_fs(USER_DS);				\
-} while (0)
+void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp);
+#define compat_start_thread start_thread_ia32
 
 #define COMPAT_SET_PERSONALITY(ex)			\
 do {							\
-- 
cgit v1.1


From fb2531953fd8855abdcf458459020fd382c5deca Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Wed, 7 Oct 2009 13:20:38 +0200
Subject: mce, edac: Use an atomic notifier for MCEs decoding

Add an atomic notifier which ensures proper locking when conveying
MCE info to EDAC for decoding. The actual notifier call overrides a
default, negative priority notifier.

Note: make sure we register the default decoder only once since
mcheck_init() runs on each CPU.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
LKML-Reference: <20091003065752.GA8935@liondog.tnic>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/mce.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index f1363b7..227a72d 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -108,6 +108,8 @@ struct mce_log {
 #define K8_MCE_THRESHOLD_BANK_5    (MCE_THRESHOLD_BASE + 5 * 9)
 #define K8_MCE_THRESHOLD_DRAM_ECC  (MCE_THRESHOLD_BANK_4 + 0)
 
+extern struct atomic_notifier_head x86_mce_decoder_chain;
+
 #ifdef __KERNEL__
 
 #include <linux/percpu.h>
@@ -213,6 +215,5 @@ extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
 void intel_init_thermal(struct cpuinfo_x86 *c);
 
 void mce_log_therm_throt_event(__u64 status);
-
 #endif /* __KERNEL__ */
 #endif /* _ASM_X86_MCE_H */
-- 
cgit v1.1


From ae24ffe5ecec17c956ac25371d7c2e12b4b36e53 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Mon, 12 Oct 2009 10:18:23 -0400
Subject: x86, 64-bit: Move K8 B step iret fixup to fault entry asm

Move the handling of truncated %rip from an iret fault to the fault
entry path.

This allows x86-64 to use the standard search_extable() function.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jan Beulich <jbeulich@novell.com>
LKML-Reference: <1255357103-5418-1-git-send-email-brgerst@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/uaccess.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index d2c6c93..abd3e0e 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -570,7 +570,6 @@ extern struct movsl_mask {
 #ifdef CONFIG_X86_32
 # include "uaccess_32.h"
 #else
-# define ARCH_HAS_SEARCH_EXTABLE
 # include "uaccess_64.h"
 #endif
 
-- 
cgit v1.1


From 8ee2debce32412118cf8c239e0026ace56ea1425 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Fri, 25 Sep 2009 15:20:00 -0700
Subject: x86: Export k8 physical topology

To eventually interleave emulated nodes over physical nodes, we
need to know the physical topology of the machine without actually
registering it.  This does the k8 node setup in two parts:
detection and registration.  NUMA emulation can then used the
physical topology detected to setup the address ranges of emulated
nodes accordingly.  If emulation isn't used, the k8 nodes are
registered as normal.

Two formals are added to the x86 NUMA setup functions: `acpi' and
`k8'. These represent whether ACPI or K8 NUMA has been detected;
both cannot be true at the same time.  This specifies to the NUMA
emulation code whether an underlying physical NUMA topology exists
and which interface to use.

This patch deals solely with separating the k8 setup path into
Northbridge detection and registration steps and leaves the ACPI
changes for a subsequent patch.  The `acpi' formal is added here,
however, to avoid touching all the header files again in the next
patch.

This approach also ensures emulated nodes will not span physical
nodes so the true memory latency is not misrepresented.

k8_get_nodes() may now be used to export the k8 physical topology
of the machine for NUMA emulation.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Ankita Garg <ankita@in.ibm.com>
Cc: Len Brown <len.brown@intel.com>
LKML-Reference: <alpine.DEB.1.00.0909251518400.14754@chino.kir.corp.google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/k8.h         | 4 +++-
 arch/x86/include/asm/page_types.h | 3 ++-
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/k8.h
index c2d1f3b..c092f72 100644
--- a/arch/x86/include/asm/k8.h
+++ b/arch/x86/include/asm/k8.h
@@ -10,7 +10,9 @@ extern struct pci_dev **k8_northbridges;
 extern int num_k8_northbridges;
 extern int cache_k8_northbridges(void);
 extern void k8_flush_garts(void);
-extern int k8_scan_nodes(unsigned long start, unsigned long end);
+extern int k8_get_nodes(struct bootnode *nodes);
+extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn);
+extern int k8_scan_nodes(void);
 
 #ifdef CONFIG_K8_NB
 static inline struct pci_dev *node_to_k8_nb_misc(int node)
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 6473f5c..642fe34 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -49,7 +49,8 @@ extern unsigned long max_pfn_mapped;
 extern unsigned long init_memory_mapping(unsigned long start,
 					 unsigned long end);
 
-extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn);
+extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn,
+				int acpi, int k8);
 extern void free_initmem(void);
 
 #endif	/* !__ASSEMBLY__ */
-- 
cgit v1.1


From 8716273caef7f55f39fe4fc6c69c5f9f197f41f1 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Fri, 25 Sep 2009 15:20:04 -0700
Subject: x86: Export srat physical topology

This is the counterpart to "x86: export k8 physical topology" for
SRAT. It is not as invasive because the acpi code already seperates
node setup into detection and registration steps, with the
exception of registering e820 active regions in
acpi_numa_memory_affinity_init().  This is now moved to
acpi_scan_nodes() if NUMA emulation is disabled or deferred.

acpi_numa_init() now returns a value which specifies whether an
underlying SRAT was located.  If so, that topology can be used by
the emulation code to interleave emulated nodes over physical nodes
or to register the nodes for ACPI.

acpi_get_nodes() may now be used to export the srat physical
topology of the machine for NUMA emulation.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Ankita Garg <ankita@in.ibm.com>
Cc: Len Brown <len.brown@intel.com>
LKML-Reference: <alpine.DEB.1.00.0909251518580.14754@chino.kir.corp.google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/acpi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 4518dc5..e3d4a0d 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -158,6 +158,7 @@ struct bootnode;
 
 #ifdef CONFIG_ACPI_NUMA
 extern int acpi_numa;
+extern int acpi_get_nodes(struct bootnode *physnodes);
 extern int acpi_scan_nodes(unsigned long start, unsigned long end);
 #define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
 extern void acpi_fake_nodes(const struct bootnode *fake_nodes,
-- 
cgit v1.1


From a2e2725541fad72416326798c2d7fa4dafb7d337 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 12 Oct 2009 23:40:10 -0700
Subject: net: Introduce recvmmsg socket syscall
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Meaning receive multiple messages, reducing the number of syscalls and
net stack entry/exit operations.

Next patches will introduce mechanisms where protocols that want to
optimize this operation will provide an unlocked_recvmsg operation.

This takes into account comments made by:

. Paul Moore: sock_recvmsg is called only for the first datagram,
  sock_recvmsg_nosec is used for the rest.

. Caitlin Bestler: recvmmsg now has a struct timespec timeout, that
  works in the same fashion as the ppoll one.

  If the underlying protocol returns a datagram with MSG_OOB set, this
  will make recvmmsg return right away with as many datagrams (+ the OOB
  one) it has received so far.

. Rémi Denis-Courmont & Steven Whitehouse: If we receive N < vlen
  datagrams and then recvmsg returns an error, recvmmsg will return
  the successfully received datagrams, store the error and return it
  in the next call.

This paves the way for a subsequent optimization, sk_prot->unlocked_recvmsg,
where we will be able to acquire the lock only at batch start and end, not at
every underlying recvmsg call.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/x86/include/asm/unistd_32.h | 3 ++-
 arch/x86/include/asm/unistd_64.h | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 6fb3c20..3baf379 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -342,10 +342,11 @@
 #define __NR_pwritev		334
 #define __NR_rt_tgsigqueueinfo	335
 #define __NR_perf_event_open	336
+#define __NR_recvmmsg		337
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 337
+#define NR_syscalls 338
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 8d3ad0a..4843f7b 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -661,6 +661,8 @@ __SYSCALL(__NR_pwritev, sys_pwritev)
 __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
 #define __NR_perf_event_open			298
 __SYSCALL(__NR_perf_event_open, sys_perf_event_open)
+#define __NR_recvmmsg				299
+__SYSCALL(__NR_recvmmsg, sys_recvmmsg)
 
 #ifndef __NO_STUBS
 #define __ARCH_WANT_OLD_READDIR
-- 
cgit v1.1


From 9844ab11c763bfed9f054c82366b19dcda66aca9 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Wed, 14 Oct 2009 00:07:03 +0400
Subject: x86, apic: Introduce the NOOP apic driver

Introduce NOOP APIC driver. We should use it in case if apic was
disabled due to hardware of software/firmware problems (including
user requested to disable it case).

The driver is attempting to catch any inappropriate apic operation
call with warning issue.

Also it is possible to use some apic operation like IPI calls,
read/write without checking for apic presence which should make
callers code easier.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: yinghai@kernel.org
Cc: macro@linux-mips.org
LKML-Reference: <20091013201022.534682104@openvz.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/apic.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 474d80d..08a5f42 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -488,6 +488,8 @@ static inline unsigned int read_apic_id(void)
 
 extern void default_setup_apic_routing(void);
 
+extern struct apic apic_noop;
+
 #ifdef CONFIG_X86_32
 
 extern struct apic apic_default;
-- 
cgit v1.1


From 6c2c502910247d2820cb630e7b28fb6bdecdbf45 Mon Sep 17 00:00:00 2001
From: Dimitri Sivanich <sivanich@sgi.com>
Date: Wed, 30 Sep 2009 11:02:59 -0500
Subject: x86: SGI UV: Fix irq affinity for hub based interrupts

This patch fixes handling of uv hub irq affinity.  IRQs with ALL or
NODE affinity can be routed to cpus other than their originally
assigned cpu.  Those with CPU affinity cannot be rerouted.

Signed-off-by: Dimitri Sivanich <sivanich@sgi.com>
LKML-Reference: <20090930160259.GA7822@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/uv/uv_irq.h | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/uv/uv_irq.h b/arch/x86/include/asm/uv/uv_irq.h
index 9613c8c..5397e12 100644
--- a/arch/x86/include/asm/uv/uv_irq.h
+++ b/arch/x86/include/asm/uv/uv_irq.h
@@ -25,12 +25,21 @@ struct uv_IO_APIC_route_entry {
 		dest		: 32;
 };
 
+enum {
+	UV_AFFINITY_ALL,
+	UV_AFFINITY_NODE,
+	UV_AFFINITY_CPU
+};
+
 extern struct irq_chip uv_irq_chip;
 
-extern int arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long);
+extern int
+arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long, int);
 extern void arch_disable_uv_irq(int, unsigned long);
+extern int uv_set_irq_affinity(unsigned int, const struct cpumask *);
 
-extern int uv_setup_irq(char *, int, int, unsigned long);
-extern void uv_teardown_irq(unsigned int, int, unsigned long);
+extern int uv_irq_2_mmr_info(int, unsigned long *, int *);
+extern int uv_setup_irq(char *, int, int, unsigned long, int);
+extern void uv_teardown_irq(unsigned int);
 
 #endif /* _ASM_X86_UV_UV_IRQ_H */
-- 
cgit v1.1


From 9338ad6ffb70eca97f335d93c54943828c8b209e Mon Sep 17 00:00:00 2001
From: Dimitri Sivanich <sivanich@sgi.com>
Date: Tue, 13 Oct 2009 15:32:36 -0500
Subject: x86, apic: Move SGI UV functionality out of generic IO-APIC code

Move UV specific functionality out of the generic IO-APIC code.

Signed-off-by: Dimitri Sivanich <sivanich@sgi.com>
LKML-Reference: <20091013203236.GD20543@sgi.com>
[ Cleaned up the code some more in their new places. ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/hw_irq.h    | 29 +++++++++++++++++++++++------
 arch/x86/include/asm/uv/uv_irq.h |  7 -------
 2 files changed, 23 insertions(+), 13 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index ba180d9..56f0877 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -79,14 +79,31 @@ static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
 					int ioapic, int ioapic_pin,
 					int trigger, int polarity)
 {
-	irq_attr->ioapic     = ioapic;
-	irq_attr->ioapic_pin = ioapic_pin;
-	irq_attr->trigger    = trigger;
-	irq_attr->polarity   = polarity;
+	irq_attr->ioapic	= ioapic;
+	irq_attr->ioapic_pin	= ioapic_pin;
+	irq_attr->trigger	= trigger;
+	irq_attr->polarity	= polarity;
 }
 
-extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin,
-					struct io_apic_irq_attr *irq_attr);
+/*
+ * This is performance-critical, we want to do it O(1)
+ *
+ * Most irqs are mapped 1:1 with pins.
+ */
+struct irq_cfg {
+	struct irq_pin_list	*irq_2_pin;
+	cpumask_var_t		domain;
+	cpumask_var_t		old_domain;
+	unsigned		move_cleanup_count;
+	u8			vector;
+	u8			move_in_progress : 1;
+};
+
+extern struct irq_cfg *irq_cfg(unsigned int);
+extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *);
+extern void send_cleanup_vector(struct irq_cfg *);
+extern unsigned int set_desc_affinity(struct irq_desc *, const struct cpumask *);
+extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr);
 extern void setup_ioapic_dest(void);
 
 extern void enable_IO_APIC(void);
diff --git a/arch/x86/include/asm/uv/uv_irq.h b/arch/x86/include/asm/uv/uv_irq.h
index 5397e12..d6b17c7 100644
--- a/arch/x86/include/asm/uv/uv_irq.h
+++ b/arch/x86/include/asm/uv/uv_irq.h
@@ -31,13 +31,6 @@ enum {
 	UV_AFFINITY_CPU
 };
 
-extern struct irq_chip uv_irq_chip;
-
-extern int
-arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long, int);
-extern void arch_disable_uv_irq(int, unsigned long);
-extern int uv_set_irq_affinity(unsigned int, const struct cpumask *);
-
 extern int uv_irq_2_mmr_info(int, unsigned long *, int *);
 extern int uv_setup_irq(char *, int, int, unsigned long, int);
 extern void uv_teardown_irq(unsigned int);
-- 
cgit v1.1


From 7ec13187ef48b04bb7f6dfa266c7271a52d009c2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 14 Oct 2009 15:06:42 +0200
Subject: x86, apic: Fix prototype in hw_irq.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This warning:

 In file included from arch/x86/include/asm/ipi.h:23,
                  from arch/x86/kernel/apic/apic_noop.c:27:
 arch/x86/include/asm/hw_irq.h:105: warning: ‘struct irq_desc’ declared inside parameter list
 arch/x86/include/asm/hw_irq.h:105: warning: its scope is only this definition or declaration, which is probably not what you want

triggers because irq_desc is defined after hw_irq.h is included
in irq.h. Since it's pointer reference only, a forward declaration
of the type will solve the problem.

LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/hw_irq.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 56f0877..1984ce9 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -102,6 +102,8 @@ struct irq_cfg {
 extern struct irq_cfg *irq_cfg(unsigned int);
 extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *);
 extern void send_cleanup_vector(struct irq_cfg *);
+
+struct irq_desc;
 extern unsigned int set_desc_affinity(struct irq_desc *, const struct cpumask *);
 extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr);
 extern void setup_ioapic_dest(void);
-- 
cgit v1.1


From 4a4de9c7d7111ce4caf422b856756125d8304f9d Mon Sep 17 00:00:00 2001
From: Dimitri Sivanich <sivanich@sgi.com>
Date: Wed, 14 Oct 2009 09:22:57 -0500
Subject: x86: UV RTC: Rename generic_interrupt to x86_platform_ipi

Signed-off-by: Dimitri Sivanich <sivanich@sgi.com>
LKML-Reference: <20091014142257.GE11048@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/entry_arch.h  | 2 +-
 arch/x86/include/asm/hardirq.h     | 2 +-
 arch/x86/include/asm/hw_irq.h      | 4 ++--
 arch/x86/include/asm/irq.h         | 2 +-
 arch/x86/include/asm/irq_vectors.h | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index f5693c8..8e8ec66 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -34,7 +34,7 @@ BUILD_INTERRUPT3(invalidate_interrupt7,INVALIDATE_TLB_VECTOR_START+7,
 		 smp_invalidate_interrupt)
 #endif
 
-BUILD_INTERRUPT(generic_interrupt, GENERIC_INTERRUPT_VECTOR)
+BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
 
 /*
  * every pentium local APIC has two 'local interrupts', with a
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 82e3e8f..beaabd7 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -12,7 +12,7 @@ typedef struct {
 	unsigned int apic_timer_irqs;	/* arch dependent */
 	unsigned int irq_spurious_count;
 #endif
-	unsigned int generic_irqs;	/* arch dependent */
+	unsigned int x86_platform_ipis;	/* arch dependent */
 	unsigned int apic_perf_irqs;
 	unsigned int apic_pending_irqs;
 #ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index ba180d9..95207ca 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -27,7 +27,7 @@
 
 /* Interrupt handlers registered during init_IRQ */
 extern void apic_timer_interrupt(void);
-extern void generic_interrupt(void);
+extern void x86_platform_ipi(void);
 extern void error_interrupt(void);
 extern void perf_pending_interrupt(void);
 
@@ -101,7 +101,7 @@ extern void eisa_set_level_irq(unsigned int irq);
 /* SMP */
 extern void smp_apic_timer_interrupt(struct pt_regs *);
 extern void smp_spurious_interrupt(struct pt_regs *);
-extern void smp_generic_interrupt(struct pt_regs *);
+extern void smp_x86_platform_ipi(struct pt_regs *);
 extern void smp_error_interrupt(struct pt_regs *);
 #ifdef CONFIG_X86_IO_APIC
 extern asmlinkage void smp_irq_move_cleanup_interrupt(void);
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index ddda6cb..fcbc6d1 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -36,7 +36,7 @@ static inline int irq_canonicalize(int irq)
 extern void fixup_irqs(void);
 #endif
 
-extern void (*generic_interrupt_extension)(void);
+extern void (*x86_platform_ipi_callback)(void);
 extern void native_init_IRQ(void);
 extern bool handle_irq(unsigned irq, struct pt_regs *regs);
 
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 5b21f0e..6a635bd 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -106,7 +106,7 @@
 /*
  * Generic system vector for platform specific use
  */
-#define GENERIC_INTERRUPT_VECTOR	0xed
+#define X86_PLATFORM_IPI_VECTOR		0xed
 
 /*
  * Performance monitoring pending work vector:
-- 
cgit v1.1


From 5e09954a9acc3b435ffe318b95afd3c02fae069f Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Fri, 16 Oct 2009 12:31:32 +0200
Subject: x86, mce: Fix up MCE naming nomenclature

Prefix global/setup routines with "mcheck_" thus differentiating
from the internal facilities prefixed with "mce_". Also, prefix
the per cpu calls with mcheck_cpu and rename them to reflect the
MCE setup hierarchy of calls better.

There should be no functionality change resulting from this
patch.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Cc: Andi Kleen <andi@firstfloor.org>
LKML-Reference: <1255689093-26921-1-git-send-email-borislav.petkov@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/mce.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 227a72d..161485d 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -120,9 +120,9 @@ extern int mce_disabled;
 extern int mce_p5_enabled;
 
 #ifdef CONFIG_X86_MCE
-void mcheck_init(struct cpuinfo_x86 *c);
+void mcheck_cpu_init(struct cpuinfo_x86 *c);
 #else
-static inline void mcheck_init(struct cpuinfo_x86 *c) {}
+static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {}
 #endif
 
 #ifdef CONFIG_X86_ANCIENT_MCE
-- 
cgit v1.1


From 0e1227d356e9b2fe0500d6cc7084f752040a1e0e Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Mon, 19 Oct 2009 11:53:06 +0900
Subject: crypto: ghash - Add PCLMULQDQ accelerated implementation

PCLMULQDQ is used to accelerate the most time-consuming part of GHASH,
carry-less multiplication. More information about PCLMULQDQ can be
found at:

http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/

Because PCLMULQDQ changes XMM state, its usage must be enclosed with
kernel_fpu_begin/end, which can be used only in process context, the
acceleration is implemented as crypto_ahash. That is, request in soft
IRQ context will be defered to the cryptd kernel thread.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/include/asm/cpufeature.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 9cfc88b..613700f 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -248,6 +248,7 @@ extern const char * const x86_power_flags[32];
 #define cpu_has_x2apic		boot_cpu_has(X86_FEATURE_X2APIC)
 #define cpu_has_xsave		boot_cpu_has(X86_FEATURE_XSAVE)
 #define cpu_has_hypervisor	boot_cpu_has(X86_FEATURE_HYPERVISOR)
+#define cpu_has_pclmulqdq	boot_cpu_has(X86_FEATURE_PCLMULQDQ)
 
 #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
 # define cpu_has_invlpg		1
-- 
cgit v1.1


From 74e081797bd9d2a7d8005fe519e719df343a2ba8 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Wed, 14 Oct 2009 14:46:56 -0700
Subject: x86-64: align RODATA kernel section to 2MB with CONFIG_DEBUG_RODATA

CONFIG_DEBUG_RODATA chops the large pages spanning boundaries of kernel
text/rodata/data to small 4KB pages as they are mapped with different
attributes (text as RO, RODATA as RO and NX etc).

On x86_64, preserve the large page mappings for kernel text/rodata/data
boundaries when CONFIG_DEBUG_RODATA is enabled. This is done by allowing the
RODATA section to be hugepage aligned and having same RWX attributes
for the 2MB page boundaries

Extra Memory pages padding the sections will be freed during the end of the boot
and the kernel identity mappings will have different RWX permissions compared to
the kernel text mappings.

Kernel identity mappings to these physical pages will be mapped with smaller
pages but large page mappings are still retained for kernel text,rodata,data
mappings.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
LKML-Reference: <20091014220254.190119924@sbs-t61.sc.intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/sections.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h
index 1b7ee5d..0a52424 100644
--- a/arch/x86/include/asm/sections.h
+++ b/arch/x86/include/asm/sections.h
@@ -2,7 +2,13 @@
 #define _ASM_X86_SECTIONS_H
 
 #include <asm-generic/sections.h>
+#include <asm/uaccess.h>
 
 extern char __brk_base[], __brk_limit[];
+extern struct exception_table_entry __stop___ex_table[];
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
+extern char __end_rodata_hpage_align[];
+#endif
 
 #endif	/* _ASM_X86_SECTIONS_H */
-- 
cgit v1.1


From 04d46c1b13b02e1e5c24eb270a01cf3f94ee4d04 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Tue, 27 Oct 2009 16:42:11 -0400
Subject: x86: Merge INAT_REXPFX into INAT_PFX_*

Merge INAT_REXPFX into INAT_PFX_* macro and rename it to
INAT_PFX_REX.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Jim Keniston <jkenisto@us.ibm.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Frank Ch. Eigler <fche@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: K.Prasad <prasad@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
LKML-Reference: <20091027204211.30545.58090.stgit@harusame>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/inat.h | 36 ++++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 16 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h
index 2866fdd..c2487d2 100644
--- a/arch/x86/include/asm/inat.h
+++ b/arch/x86/include/asm/inat.h
@@ -30,10 +30,11 @@
 #define INAT_OPCODE_TABLE_SIZE 256
 #define INAT_GROUP_TABLE_SIZE 8
 
-/* Legacy instruction prefixes */
+/* Legacy last prefixes */
 #define INAT_PFX_OPNDSZ	1	/* 0x66 */ /* LPFX1 */
 #define INAT_PFX_REPNE	2	/* 0xF2 */ /* LPFX2 */
 #define INAT_PFX_REPE	3	/* 0xF3 */ /* LPFX3 */
+/* Other Legacy prefixes */
 #define INAT_PFX_LOCK	4	/* 0xF0 */
 #define INAT_PFX_CS	5	/* 0x2E */
 #define INAT_PFX_DS	6	/* 0x3E */
@@ -42,8 +43,11 @@
 #define INAT_PFX_GS	9	/* 0x65 */
 #define INAT_PFX_SS	10	/* 0x36 */
 #define INAT_PFX_ADDRSZ	11	/* 0x67 */
+/* x86-64 REX prefix */
+#define INAT_PFX_REX	12	/* 0x4X */
 
-#define INAT_LPREFIX_MAX	3
+#define INAT_LSTPFX_MAX	3
+#define INAT_LGCPFX_MAX	11
 
 /* Immediate size */
 #define INAT_IMM_BYTE		1
@@ -75,12 +79,11 @@
 #define INAT_IMM_MASK	(((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS)
 /* Flags */
 #define INAT_FLAG_OFFS	(INAT_IMM_OFFS + INAT_IMM_BITS)
-#define INAT_REXPFX	(1 << INAT_FLAG_OFFS)
-#define INAT_MODRM	(1 << (INAT_FLAG_OFFS + 1))
-#define INAT_FORCE64	(1 << (INAT_FLAG_OFFS + 2))
-#define INAT_SCNDIMM	(1 << (INAT_FLAG_OFFS + 3))
-#define INAT_MOFFSET	(1 << (INAT_FLAG_OFFS + 4))
-#define INAT_VARIANT	(1 << (INAT_FLAG_OFFS + 5))
+#define INAT_MODRM	(1 << (INAT_FLAG_OFFS))
+#define INAT_FORCE64	(1 << (INAT_FLAG_OFFS + 1))
+#define INAT_SCNDIMM	(1 << (INAT_FLAG_OFFS + 2))
+#define INAT_MOFFSET	(1 << (INAT_FLAG_OFFS + 3))
+#define INAT_VARIANT	(1 << (INAT_FLAG_OFFS + 4))
 /* Attribute making macros for attribute tables */
 #define INAT_MAKE_PREFIX(pfx)	(pfx << INAT_PFX_OFFS)
 #define INAT_MAKE_ESCAPE(esc)	(esc << INAT_ESC_OFFS)
@@ -97,9 +100,10 @@ extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm,
 					    insn_attr_t esc_attr);
 
 /* Attribute checking functions */
-static inline int inat_is_prefix(insn_attr_t attr)
+static inline int inat_is_legacy_prefix(insn_attr_t attr)
 {
-	return attr & INAT_PFX_MASK;
+	attr &= INAT_PFX_MASK;
+	return attr && attr <= INAT_LGCPFX_MAX;
 }
 
 static inline int inat_is_address_size_prefix(insn_attr_t attr)
@@ -112,9 +116,14 @@ static inline int inat_is_operand_size_prefix(insn_attr_t attr)
 	return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ;
 }
 
+static inline int inat_is_rex_prefix(insn_attr_t attr)
+{
+	return (attr & INAT_PFX_MASK) == INAT_PFX_REX;
+}
+
 static inline int inat_last_prefix_id(insn_attr_t attr)
 {
-	if ((attr & INAT_PFX_MASK) > INAT_LPREFIX_MAX)
+	if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX)
 		return 0;
 	else
 		return attr & INAT_PFX_MASK;
@@ -155,11 +164,6 @@ static inline int inat_immediate_size(insn_attr_t attr)
 	return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS;
 }
 
-static inline int inat_is_rex_prefix(insn_attr_t attr)
-{
-	return attr & INAT_REXPFX;
-}
-
 static inline int inat_has_modrm(insn_attr_t attr)
 {
 	return attr & INAT_MODRM;
-- 
cgit v1.1


From e0e492e99b372c6990a5daca9e4683c341f1330e Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Tue, 27 Oct 2009 16:42:27 -0400
Subject: x86: AVX instruction set decoder support

Add Intel AVX(Advanced Vector Extensions) instruction set
support to x86 instruction decoder. This adds insn.vex_prefix
field for storing VEX prefixes, and introduces some original
tags for expressing opcodes attributes.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Jim Keniston <jkenisto@us.ibm.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Frank Ch. Eigler <fche@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: K.Prasad <prasad@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
LKML-Reference: <20091027204226.30545.23451.stgit@harusame>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/inat.h | 32 ++++++++++++++++++++++++++++++--
 arch/x86/include/asm/insn.h | 43 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 72 insertions(+), 3 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h
index c2487d2..205b063 100644
--- a/arch/x86/include/asm/inat.h
+++ b/arch/x86/include/asm/inat.h
@@ -32,8 +32,8 @@
 
 /* Legacy last prefixes */
 #define INAT_PFX_OPNDSZ	1	/* 0x66 */ /* LPFX1 */
-#define INAT_PFX_REPNE	2	/* 0xF2 */ /* LPFX2 */
-#define INAT_PFX_REPE	3	/* 0xF3 */ /* LPFX3 */
+#define INAT_PFX_REPE	2	/* 0xF3 */ /* LPFX2 */
+#define INAT_PFX_REPNE	3	/* 0xF2 */ /* LPFX3 */
 /* Other Legacy prefixes */
 #define INAT_PFX_LOCK	4	/* 0xF0 */
 #define INAT_PFX_CS	5	/* 0x2E */
@@ -45,6 +45,9 @@
 #define INAT_PFX_ADDRSZ	11	/* 0x67 */
 /* x86-64 REX prefix */
 #define INAT_PFX_REX	12	/* 0x4X */
+/* AVX VEX prefixes */
+#define INAT_PFX_VEX2	13	/* 2-bytes VEX prefix */
+#define INAT_PFX_VEX3	14	/* 3-bytes VEX prefix */
 
 #define INAT_LSTPFX_MAX	3
 #define INAT_LGCPFX_MAX	11
@@ -84,6 +87,8 @@
 #define INAT_SCNDIMM	(1 << (INAT_FLAG_OFFS + 2))
 #define INAT_MOFFSET	(1 << (INAT_FLAG_OFFS + 3))
 #define INAT_VARIANT	(1 << (INAT_FLAG_OFFS + 4))
+#define INAT_VEXOK	(1 << (INAT_FLAG_OFFS + 5))
+#define INAT_VEXONLY	(1 << (INAT_FLAG_OFFS + 6))
 /* Attribute making macros for attribute tables */
 #define INAT_MAKE_PREFIX(pfx)	(pfx << INAT_PFX_OFFS)
 #define INAT_MAKE_ESCAPE(esc)	(esc << INAT_ESC_OFFS)
@@ -98,6 +103,9 @@ extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode,
 extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm,
 					    insn_byte_t last_pfx,
 					    insn_attr_t esc_attr);
+extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode,
+					  insn_byte_t vex_m,
+					  insn_byte_t vex_pp);
 
 /* Attribute checking functions */
 static inline int inat_is_legacy_prefix(insn_attr_t attr)
@@ -129,6 +137,17 @@ static inline int inat_last_prefix_id(insn_attr_t attr)
 		return attr & INAT_PFX_MASK;
 }
 
+static inline int inat_is_vex_prefix(insn_attr_t attr)
+{
+	attr &= INAT_PFX_MASK;
+	return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3;
+}
+
+static inline int inat_is_vex3_prefix(insn_attr_t attr)
+{
+	return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3;
+}
+
 static inline int inat_is_escape(insn_attr_t attr)
 {
 	return attr & INAT_ESC_MASK;
@@ -189,4 +208,13 @@ static inline int inat_has_variant(insn_attr_t attr)
 	return attr & INAT_VARIANT;
 }
 
+static inline int inat_accept_vex(insn_attr_t attr)
+{
+	return attr & INAT_VEXOK;
+}
+
+static inline int inat_must_vex(insn_attr_t attr)
+{
+	return attr & INAT_VEXONLY;
+}
 #endif
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 12b4e37..96c2e0a 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -39,6 +39,7 @@ struct insn {
 					 * prefixes.bytes[3]: last prefix
 					 */
 	struct insn_field rex_prefix;	/* REX prefix */
+	struct insn_field vex_prefix;	/* VEX prefix */
 	struct insn_field opcode;	/*
 					 * opcode.bytes[0]: opcode1
 					 * opcode.bytes[1]: opcode2
@@ -80,6 +81,19 @@ struct insn {
 #define X86_REX_X(rex) ((rex) & 2)
 #define X86_REX_B(rex) ((rex) & 1)
 
+/* VEX bit flags  */
+#define X86_VEX_W(vex)	((vex) & 0x80)	/* VEX3 Byte2 */
+#define X86_VEX_R(vex)	((vex) & 0x80)	/* VEX2/3 Byte1 */
+#define X86_VEX_X(vex)	((vex) & 0x40)	/* VEX3 Byte1 */
+#define X86_VEX_B(vex)	((vex) & 0x20)	/* VEX3 Byte1 */
+#define X86_VEX_L(vex)	((vex) & 0x04)	/* VEX3 Byte2, VEX2 Byte1 */
+/* VEX bit fields */
+#define X86_VEX3_M(vex)	((vex) & 0x1f)		/* VEX3 Byte1 */
+#define X86_VEX2_M	1			/* VEX2.M always 1 */
+#define X86_VEX_V(vex)	(((vex) & 0x78) >> 3)	/* VEX3 Byte2, VEX2 Byte1 */
+#define X86_VEX_P(vex)	((vex) & 0x03)		/* VEX3 Byte2, VEX2 Byte1 */
+#define X86_VEX_M_MAX	0x1f			/* VEX3.M Maximum value */
+
 /* The last prefix is needed for two-byte and three-byte opcodes */
 static inline insn_byte_t insn_last_prefix(struct insn *insn)
 {
@@ -114,15 +128,42 @@ static inline void kernel_insn_init(struct insn *insn, const void *kaddr)
 #endif
 }
 
+static inline int insn_is_avx(struct insn *insn)
+{
+	if (!insn->prefixes.got)
+		insn_get_prefixes(insn);
+	return (insn->vex_prefix.value != 0);
+}
+
+static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
+{
+	if (insn->vex_prefix.nbytes == 2)	/* 2 bytes VEX */
+		return X86_VEX2_M;
+	else
+		return X86_VEX3_M(insn->vex_prefix.bytes[1]);
+}
+
+static inline insn_byte_t insn_vex_p_bits(struct insn *insn)
+{
+	if (insn->vex_prefix.nbytes == 2)	/* 2 bytes VEX */
+		return X86_VEX_P(insn->vex_prefix.bytes[1]);
+	else
+		return X86_VEX_P(insn->vex_prefix.bytes[2]);
+}
+
 /* Offset of each field from kaddr */
 static inline int insn_offset_rex_prefix(struct insn *insn)
 {
 	return insn->prefixes.nbytes;
 }
-static inline int insn_offset_opcode(struct insn *insn)
+static inline int insn_offset_vex_prefix(struct insn *insn)
 {
 	return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes;
 }
+static inline int insn_offset_opcode(struct insn *insn)
+{
+	return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes;
+}
 static inline int insn_offset_modrm(struct insn *insn)
 {
 	return insn_offset_opcode(insn) + insn->opcode.nbytes;
-- 
cgit v1.1


From 23359a88e7eca3c4f402562b102f23014db3c2aa Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 26 Oct 2009 14:24:33 -0800
Subject: x86: Remove move_cleanup_count from irq_cfg

move_cleanup_count for each irq in irq_cfg is keeping track of
the total number of cpus that need to free the corresponding
vectors associated with the irq which has now been migrated to
new destination. As long as this move_cleanup_count is non-zero
(i.e., as long as we have n't freed the vector allocations on
the old destinations) we were preventing the irq's further
migration.

This cleanup count is unnecessary and it is enough to not allow
the irq migration till we send the cleanup vector to the
previous irq destination, for which we already have irq_cfg's
move_in_progress.  All we need to make sure is that we free the
vector at the old desintation but we don't need to wait till
that gets freed.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Acked-by: Gary Hade <garyhade@us.ibm.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
LKML-Reference: <20091026230001.752968906@sbs-t61.sc.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/hw_irq.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 1984ce9..6e12426 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -94,7 +94,6 @@ struct irq_cfg {
 	struct irq_pin_list	*irq_2_pin;
 	cpumask_var_t		domain;
 	cpumask_var_t		old_domain;
-	unsigned		move_cleanup_count;
 	u8			vector;
 	u8			move_in_progress : 1;
 };
-- 
cgit v1.1


From a5e74b841930bec78a4684ab9f208b2ddfe7c736 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 26 Oct 2009 14:24:34 -0800
Subject: x86: Force irq complete move during cpu offline

When a cpu goes offline, fixup_irqs() try to move irq's
currently destined to the offline cpu to a new cpu. But this
attempt will fail if the irq is recently moved to this cpu and
the irq still hasn't arrived at this cpu (for non intr-remapping
platforms this is when we free the vector allocation at the
previous destination) that is about to go offline.

This will endup with the interrupt subsystem still pointing the
irq to the offline cpu, causing that irq to not work any more.

Fix this by forcing the irq to complete its move (its been a
long time we moved the irq to this cpu which we are offlining
now) and then move this irq to a new cpu before this cpu goes
offline.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Acked-by: Gary Hade <garyhade@us.ibm.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
LKML-Reference: <20091026230001.848830905@sbs-t61.sc.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/irq.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index ddda6cb..ffd700f 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -34,6 +34,7 @@ static inline int irq_canonicalize(int irq)
 #ifdef CONFIG_HOTPLUG_CPU
 #include <linux/cpumask.h>
 extern void fixup_irqs(void);
+extern void irq_force_complete_move(int);
 #endif
 
 extern void (*generic_interrupt_extension)(void);
-- 
cgit v1.1


From 502f660466ba7a66711ffdf414b1f7f1131dcbf7 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Wed, 28 Oct 2009 18:46:56 -0800
Subject: x86, cpa: Fix kernel text RO checks in static_protection()

Steven Rostedt reported that we are unconditionally making the
kernel text mapping as read-only. i.e., if someone does cpa() to
the kernel text area for setting/clearing any page table
attribute, we unconditionally clear the read-write attribute for
the kernel text mapping that is set at compile time.

We should delay (to forbid the write attribute) and enforce only
after the kernel has mapped the text as read-only.

Reported-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Tested-by: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20091029024820.996634347@sbs-t61.sc.intel.com>
[ marked kernel_set_to_readonly as __read_mostly ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/cacheflush.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index b54f6af..eebb2cd 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -176,6 +176,7 @@ void clflush_cache_range(void *addr, unsigned int size);
 #ifdef CONFIG_DEBUG_RODATA
 void mark_rodata_ro(void);
 extern const int rodata_test_data;
+extern int kernel_set_to_readonly;
 void set_kernel_text_rw(void);
 void set_kernel_text_ro(void);
 #else
-- 
cgit v1.1


From 3b0d65969b549b796abc6f0230f6142fed365d49 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 3 Nov 2009 09:11:15 -0500
Subject: crypto: ghash-intel - Add PSHUFB macros

Add PSHUFB macros instead of repeating byte sequences, suggested
by Ingo.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/i387.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 0b20bbb..ebfb8a9 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -10,6 +10,8 @@
 #ifndef _ASM_X86_I387_H
 #define _ASM_X86_I387_H
 
+#ifndef __ASSEMBLY__
+
 #include <linux/sched.h>
 #include <linux/kernel_stat.h>
 #include <linux/regset.h>
@@ -411,4 +413,9 @@ static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk)
 	}
 }
 
+#endif /* __ASSEMBLY__ */
+
+#define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5
+#define PSHUFB_XMM5_XMM6 .byte 0x66, 0x0f, 0x38, 0x00, 0xf5
+
 #endif /* _ASM_X86_I387_H */
-- 
cgit v1.1


From 2da3e160cb3d226d87b907fab26850d838ed8d7c Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 5 Nov 2009 23:06:50 +0100
Subject: hw-breakpoint: Move asm-generic/hw_breakpoint.h to
 linux/hw_breakpoint.h

We plan to make the breakpoints parameters generic among architectures.
For that it's better to move the asm-generic header to a generic linux
header.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/x86/include/asm/hw_breakpoint.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
index 1acb4d4..3cfca8e 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -12,7 +12,7 @@ struct arch_hw_breakpoint {
 };
 
 #include <linux/kdebug.h>
-#include <asm-generic/hw_breakpoint.h>
+#include <linux/hw_breakpoint.h>
 
 /* Available HW breakpoint length encodings */
 #define HW_BREAKPOINT_LEN_1		0x40
-- 
cgit v1.1


From c3359fbce4b65d542d02c30aa5174c8e4838da2d Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 3 Apr 2009 00:59:52 -0700
Subject: sysctl: x86 Use the compat_sys_sysctl

Now that we have a generic 32bit compatibility implementation
there is no need for x86 to implement it's own.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Acked-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 arch/x86/include/asm/sys_ia32.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index 72a6dcd..9af9dec 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -51,11 +51,6 @@ asmlinkage long sys32_sched_rr_get_interval(compat_pid_t,
 asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *, compat_size_t);
 asmlinkage long sys32_rt_sigqueueinfo(int, int, compat_siginfo_t __user *);
 
-#ifdef CONFIG_SYSCTL_SYSCALL
-struct sysctl_ia32;
-asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *);
-#endif
-
 asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32);
 asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32);
 
-- 
cgit v1.1


From c12a229bc5971534537a7d0e49e44f9f1f5d0336 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Thu, 5 Nov 2009 11:03:59 -0500
Subject: x86: Remove unused thread_return label from switch_to()

Remove unused thread_return label from switch_to() macro on
x86-64. Since this symbol cuts into schedule(), backtrace at the
latter half of schedule() was always shown as thread_return().

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: systemtap <systemtap@sources.redhat.com>
Cc: DLE <dle-develop@lists.sourceforge.net>
LKML-Reference: <20091105160359.5181.26225.stgit@harusame>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/system.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index f08f973..1a953e2 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -128,8 +128,6 @@ do {									\
 	     "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */	  \
 	     "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */	  \
 	     "call __switch_to\n\t"					  \
-	     ".globl thread_return\n"					  \
-	     "thread_return:\n\t"					  \
 	     "movq "__percpu_arg([current_task])",%%rsi\n\t"		  \
 	     __switch_canary						  \
 	     "movq %P[thread_info](%%rsi),%%r8\n\t"			  \
-- 
cgit v1.1


From 0d0fbbddcc27c062815732b38c44b544e656c799 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 5 Nov 2009 22:45:41 +1030
Subject: x86, msr, cpumask: Use struct cpumask rather than the deprecated
 cpumask_t

This makes the declarations match the definitions, which already
use 'struct cpumask'.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Borislav Petkov <borislav.petkov@amd.com>
LKML-Reference: <200911052245.41803.rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/msr.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 9a00219..5bef931 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -264,12 +264,12 @@ static inline int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
 	wrmsr(msr_no, l, h);
 	return 0;
 }
-static inline void rdmsr_on_cpus(const cpumask_t *m, u32 msr_no,
+static inline void rdmsr_on_cpus(const struct cpumask *m, u32 msr_no,
 				struct msr *msrs)
 {
        rdmsr_on_cpu(0, msr_no, &(msrs[0].l), &(msrs[0].h));
 }
-static inline void wrmsr_on_cpus(const cpumask_t *m, u32 msr_no,
+static inline void wrmsr_on_cpus(const struct cpumask *m, u32 msr_no,
 				struct msr *msrs)
 {
        wrmsr_on_cpu(0, msr_no, msrs[0].l, msrs[0].h);
-- 
cgit v1.1


From 0420101c075530c65ba00b6fe7291b126fbfc5d2 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 28 Oct 2009 16:09:55 -0700
Subject: x86: k8.h: Add struct bootnode

k8.h uses struct bootnode but does not #include a header file
for it, so provide a simple declaration for it.

  arch/x86/include/asm/k8.h:13: warning: 'struct bootnode'
  declared inside parameter list arch/x86/include/asm/k8.h:13:
  warning: its scope is only this definition or declaration, which is probably not what you want

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
LKML-Reference: <20091028160955.d27ccb16.randy.dunlap@oracle.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/k8.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/k8.h
index c092f72..f70e600 100644
--- a/arch/x86/include/asm/k8.h
+++ b/arch/x86/include/asm/k8.h
@@ -4,6 +4,7 @@
 #include <linux/pci.h>
 
 extern struct pci_device_id k8_nb_ids[];
+struct bootnode;
 
 extern int early_is_k8_nb(u32 value);
 extern struct pci_dev **k8_northbridges;
-- 
cgit v1.1


From 338bac527ed0e35b4cb50390972f15d3cbce92ca Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 27 Oct 2009 16:34:44 +0900
Subject: x86: Use x86_platform for iommu_shutdown

This patch cleans up pci_iommu_shutdown() a bit to use
x86_platform (similar to how IA64 initializes an IOMMU driver).

This adds iommu_shutdown() to x86_platform to avoid calling
every IOMMUs' shutdown functions in pci_iommu_shutdown() in
order. The IOMMU shutdown functions are platform specific (we
don't have multiple different IOMMU hardware) so the current way
is pointless.

An IOMMU driver sets x86_platform.iommu_shutdown to the shutdown
function if necessary.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: joerg.roedel@amd.com
LKML-Reference: <20091027163358F.fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/amd_iommu.h | 2 --
 arch/x86/include/asm/gart.h      | 4 ----
 arch/x86/include/asm/iommu.h     | 2 +-
 arch/x86/include/asm/x86_init.h  | 1 +
 4 files changed, 2 insertions(+), 7 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h
index 4b18089..3604669 100644
--- a/arch/x86/include/asm/amd_iommu.h
+++ b/arch/x86/include/asm/amd_iommu.h
@@ -30,12 +30,10 @@ extern void amd_iommu_detect(void);
 extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
 extern void amd_iommu_flush_all_domains(void);
 extern void amd_iommu_flush_all_devices(void);
-extern void amd_iommu_shutdown(void);
 extern void amd_iommu_apply_erratum_63(u16 devid);
 #else
 static inline int amd_iommu_init(void) { return -ENODEV; }
 static inline void amd_iommu_detect(void) { }
-static inline void amd_iommu_shutdown(void) { }
 #endif
 
 #endif /* _ASM_X86_AMD_IOMMU_H */
diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h
index 6cfdafa..4fdd5b3 100644
--- a/arch/x86/include/asm/gart.h
+++ b/arch/x86/include/asm/gart.h
@@ -36,7 +36,6 @@ extern int gart_iommu_aperture_disabled;
 
 extern void early_gart_iommu_check(void);
 extern void gart_iommu_init(void);
-extern void gart_iommu_shutdown(void);
 extern void __init gart_parse_options(char *);
 extern void gart_iommu_hole_init(void);
 
@@ -51,9 +50,6 @@ static inline void early_gart_iommu_check(void)
 static inline void gart_iommu_init(void)
 {
 }
-static inline void gart_iommu_shutdown(void)
-{
-}
 static inline void gart_parse_options(char *options)
 {
 }
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index fd6d21b..878b307 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -1,7 +1,7 @@
 #ifndef _ASM_X86_IOMMU_H
 #define _ASM_X86_IOMMU_H
 
-extern void pci_iommu_shutdown(void);
+static inline void iommu_shutdown_noop(void) {}
 extern void no_iommu_init(void);
 extern struct dma_map_ops nommu_dma_ops;
 extern int force_iommu, no_iommu;
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 2c756fd..66008ed 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -121,6 +121,7 @@ struct x86_platform_ops {
 	unsigned long (*calibrate_tsc)(void);
 	unsigned long (*get_wallclock)(void);
 	int (*set_wallclock)(unsigned long nowtime);
+	void (*iommu_shutdown)(void);
 };
 
 extern struct x86_init_ops x86_init;
-- 
cgit v1.1


From 24f1e32c60c45c89a997c73395b69c8af6f0a84e Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 9 Sep 2009 19:22:48 +0200
Subject: hw-breakpoints: Rewrite the hw-breakpoints layer on top of perf
 events

This patch rebase the implementation of the breakpoints API on top of
perf events instances.

Each breakpoints are now perf events that handle the
register scheduling, thread/cpu attachment, etc..

The new layering is now made as follows:

       ptrace       kgdb      ftrace   perf syscall
          \          |          /         /
           \         |         /         /
                                        /
            Core breakpoint API        /
                                      /
                     |               /
                     |              /

              Breakpoints perf events

                     |
                     |

               Breakpoints PMU ---- Debug Register constraints handling
                                    (Part of core breakpoint API)
                     |
                     |

             Hardware debug registers

Reasons of this rewrite:

- Use the centralized/optimized pmu registers scheduling,
  implying an easier arch integration
- More powerful register handling: perf attributes (pinned/flexible
  events, exclusive/non-exclusive, tunable period, etc...)

Impact:

- New perf ABI: the hardware breakpoints counters
- Ptrace breakpoints setting remains tricky and still needs some per
  thread breakpoints references.

Todo (in the order):

- Support breakpoints perf counter events for perf tools (ie: implement
  perf_bpcounter_event())
- Support from perf tools

Changes in v2:

- Follow the perf "event " rename
- The ptrace regression have been fixed (ptrace breakpoint perf events
  weren't released when a task ended)
- Drop the struct hw_breakpoint and store generic fields in
  perf_event_attr.
- Separate core and arch specific headers, drop
  asm-generic/hw_breakpoint.h and create linux/hw_breakpoint.h
- Use new generic len/type for breakpoint
- Handle off case: when breakpoints api is not supported by an arch

Changes in v3:

- Fix broken CONFIG_KVM, we need to propagate the breakpoint api
  changes to kvm when we exit the guest and restore the bp registers
  to the host.

Changes in v4:

- Drop the hw_breakpoint_restore() stub as it is only used by KVM
- EXPORT_SYMBOL_GPL hw_breakpoint_restore() as KVM can be built as a
  module
- Restore the breakpoints unconditionally on kvm guest exit:
  TIF_DEBUG_THREAD doesn't anymore cover every cases of running
  breakpoints and vcpu->arch.switch_db_regs might not always be
  set when the guest used debug registers.
  (Waiting for a reliable optimization)

Changes in v5:

- Split-up the asm-generic/hw-breakpoint.h moving to
  linux/hw_breakpoint.h into a separate patch
- Optimize the breakpoints restoring while switching from kvm guest
  to host. We only want to restore the state if we have active
  breakpoints to the host, otherwise we don't care about messed-up
  address registers.
- Add asm/hw_breakpoint.h to Kbuild
- Fix bad breakpoint type in trace_selftest.c

Changes in v6:

- Fix wrong header inclusion in trace.h (triggered a build
  error with CONFIG_FTRACE_SELFTEST

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Prasad <prasad@linux.vnet.ibm.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jan Kiszka <jan.kiszka@web.de>
Cc: Jiri Slaby <jirislaby@gmail.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Paul Mundt <lethal@linux-sh.org>
---
 arch/x86/include/asm/Kbuild          |  1 +
 arch/x86/include/asm/debugreg.h      | 11 ++++---
 arch/x86/include/asm/hw_breakpoint.h | 58 +++++++++++++++++++++++-------------
 arch/x86/include/asm/processor.h     | 12 ++++----
 4 files changed, 50 insertions(+), 32 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 4a8e80c..9f828f8 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -10,6 +10,7 @@ header-y += ptrace-abi.h
 header-y += sigcontext32.h
 header-y += ucontext.h
 header-y += processor-flags.h
+header-y += hw_breakpoint.h
 
 unifdef-y += e820.h
 unifdef-y += ist.h
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 23439fb..9a3333c 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -75,13 +75,8 @@
  */
 #ifdef __KERNEL__
 
-/* For process management */
-extern void flush_thread_hw_breakpoint(struct task_struct *tsk);
-extern int copy_thread_hw_breakpoint(struct task_struct *tsk,
-		struct task_struct *child, unsigned long clone_flags);
+DECLARE_PER_CPU(unsigned long, dr7);
 
-/* For CPU management */
-extern void load_debug_registers(void);
 static inline void hw_breakpoint_disable(void)
 {
 	/* Zero the control register for HW Breakpoint */
@@ -94,6 +89,10 @@ static inline void hw_breakpoint_disable(void)
 	set_debugreg(0UL, 3);
 }
 
+#ifdef CONFIG_KVM
+extern void hw_breakpoint_restore(void);
+#endif
+
 #endif	/* __KERNEL__ */
 
 #endif /* _ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
index 3cfca8e..0675a7c 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -4,6 +4,11 @@
 #ifdef	__KERNEL__
 #define	__ARCH_HW_BREAKPOINT_H
 
+/*
+ * The name should probably be something dealt in
+ * a higher level. While dealing with the user
+ * (display/resolving)
+ */
 struct arch_hw_breakpoint {
 	char		*name; /* Contains name of the symbol to set bkpt */
 	unsigned long	address;
@@ -12,44 +17,57 @@ struct arch_hw_breakpoint {
 };
 
 #include <linux/kdebug.h>
-#include <linux/hw_breakpoint.h>
+#include <linux/percpu.h>
+#include <linux/list.h>
 
 /* Available HW breakpoint length encodings */
-#define HW_BREAKPOINT_LEN_1		0x40
-#define HW_BREAKPOINT_LEN_2		0x44
-#define HW_BREAKPOINT_LEN_4		0x4c
-#define HW_BREAKPOINT_LEN_EXECUTE	0x40
+#define X86_BREAKPOINT_LEN_1		0x40
+#define X86_BREAKPOINT_LEN_2		0x44
+#define X86_BREAKPOINT_LEN_4		0x4c
+#define X86_BREAKPOINT_LEN_EXECUTE	0x40
 
 #ifdef CONFIG_X86_64
-#define HW_BREAKPOINT_LEN_8		0x48
+#define X86_BREAKPOINT_LEN_8		0x48
 #endif
 
 /* Available HW breakpoint type encodings */
 
 /* trigger on instruction execute */
-#define HW_BREAKPOINT_EXECUTE	0x80
+#define X86_BREAKPOINT_EXECUTE	0x80
 /* trigger on memory write */
-#define HW_BREAKPOINT_WRITE	0x81
+#define X86_BREAKPOINT_WRITE	0x81
 /* trigger on memory read or write */
-#define HW_BREAKPOINT_RW	0x83
+#define X86_BREAKPOINT_RW	0x83
 
 /* Total number of available HW breakpoint registers */
 #define HBP_NUM 4
 
-extern struct hw_breakpoint *hbp_kernel[HBP_NUM];
-DECLARE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]);
-extern unsigned int hbp_user_refcount[HBP_NUM];
+struct perf_event;
+struct pmu;
 
-extern void arch_install_thread_hw_breakpoint(struct task_struct *tsk);
-extern void arch_uninstall_thread_hw_breakpoint(void);
 extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len);
-extern int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
-						struct task_struct *tsk);
-extern void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk);
-extern void arch_flush_thread_hw_breakpoint(struct task_struct *tsk);
-extern void arch_update_kernel_hw_breakpoint(void *);
+extern int arch_validate_hwbkpt_settings(struct perf_event *bp,
+					 struct task_struct *tsk);
 extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
-				     unsigned long val, void *data);
+					   unsigned long val, void *data);
+
+
+int arch_install_hw_breakpoint(struct perf_event *bp);
+void arch_uninstall_hw_breakpoint(struct perf_event *bp);
+void hw_breakpoint_pmu_read(struct perf_event *bp);
+void hw_breakpoint_pmu_unthrottle(struct perf_event *bp);
+
+extern void
+arch_fill_perf_breakpoint(struct perf_event *bp);
+
+unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type);
+int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type);
+
+extern int arch_bp_generic_fields(int x86_len, int x86_type,
+				  int *gen_len, int *gen_type);
+
+extern struct pmu perf_ops_bp;
+
 #endif	/* __KERNEL__ */
 #endif	/* _I386_HW_BREAKPOINT_H */
 
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 61aafb7..820f300 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -423,6 +423,8 @@ extern unsigned int xstate_size;
 extern void free_thread_xstate(struct task_struct *);
 extern struct kmem_cache *task_xstate_cachep;
 
+struct perf_event;
+
 struct thread_struct {
 	/* Cached TLS descriptors: */
 	struct desc_struct	tls_array[GDT_ENTRY_TLS_ENTRIES];
@@ -444,12 +446,10 @@ struct thread_struct {
 	unsigned long		fs;
 #endif
 	unsigned long		gs;
-	/* Hardware debugging registers: */
-	unsigned long		debugreg[HBP_NUM];
-	unsigned long		debugreg6;
-	unsigned long		debugreg7;
-	/* Hardware breakpoint info */
-	struct hw_breakpoint	*hbp[HBP_NUM];
+	/* Save middle states of ptrace breakpoints */
+	struct perf_event	*ptrace_bps[HBP_NUM];
+	/* Debug status used for traps, single steps, etc... */
+	unsigned long           debugreg6;
 	/* Fault info: */
 	unsigned long		cr2;
 	unsigned long		trap_no;
-- 
cgit v1.1


From fd650a6394b3242edf125ba9c4d500349a6d7178 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Mon, 9 Nov 2009 13:52:26 -0500
Subject: x86: Generate .byte code for some new instructions via gas macro

It will take some time for binutils (gas) to support some newly added
instructions, such as SSE4.1 instructions or the AES-NI instructions
found in upcoming Intel CPU.

To make the source code can be compiled by old binutils, .byte code is
used instead of the assembly instruction. But the readability and
flexibility of raw .byte code is not good.

This patch solves the issue of raw .byte code via generating it via
assembly instruction like gas macro. The syntax is as close as
possible to real assembly instruction.

Some helper macros such as MODRM is not a full feature
implementation. It can be extended when necessary.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Acked-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/include/asm/inst.h | 150 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 150 insertions(+)
 create mode 100644 arch/x86/include/asm/inst.h

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h
new file mode 100644
index 0000000..14cf526
--- /dev/null
+++ b/arch/x86/include/asm/inst.h
@@ -0,0 +1,150 @@
+/*
+ * Generate .byte code for some instructions not supported by old
+ * binutils.
+ */
+#ifndef X86_ASM_INST_H
+#define X86_ASM_INST_H
+
+#ifdef __ASSEMBLY__
+
+	.macro XMM_NUM opd xmm
+	.ifc \xmm,%xmm0
+	\opd = 0
+	.endif
+	.ifc \xmm,%xmm1
+	\opd = 1
+	.endif
+	.ifc \xmm,%xmm2
+	\opd = 2
+	.endif
+	.ifc \xmm,%xmm3
+	\opd = 3
+	.endif
+	.ifc \xmm,%xmm4
+	\opd = 4
+	.endif
+	.ifc \xmm,%xmm5
+	\opd = 5
+	.endif
+	.ifc \xmm,%xmm6
+	\opd = 6
+	.endif
+	.ifc \xmm,%xmm7
+	\opd = 7
+	.endif
+	.ifc \xmm,%xmm8
+	\opd = 8
+	.endif
+	.ifc \xmm,%xmm9
+	\opd = 9
+	.endif
+	.ifc \xmm,%xmm10
+	\opd = 10
+	.endif
+	.ifc \xmm,%xmm11
+	\opd = 11
+	.endif
+	.ifc \xmm,%xmm12
+	\opd = 12
+	.endif
+	.ifc \xmm,%xmm13
+	\opd = 13
+	.endif
+	.ifc \xmm,%xmm14
+	\opd = 14
+	.endif
+	.ifc \xmm,%xmm15
+	\opd = 15
+	.endif
+	.endm
+
+	.macro PFX_OPD_SIZE
+	.byte 0x66
+	.endm
+
+	.macro PFX_REX opd1 opd2
+	.if (\opd1 | \opd2) & 8
+	.byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1)
+	.endif
+	.endm
+
+	.macro MODRM mod opd1 opd2
+	.byte \mod | (\opd1 & 7) | ((\opd2 & 7) << 3)
+	.endm
+
+	.macro PSHUFB_XMM xmm1 xmm2
+	XMM_NUM pshufb_opd1 \xmm1
+	XMM_NUM pshufb_opd2 \xmm2
+	PFX_OPD_SIZE
+	PFX_REX pshufb_opd1 pshufb_opd2
+	.byte 0x0f, 0x38, 0x00
+	MODRM 0xc0 pshufb_opd1 pshufb_opd2
+	.endm
+
+	.macro PCLMULQDQ imm8 xmm1 xmm2
+	XMM_NUM clmul_opd1 \xmm1
+	XMM_NUM clmul_opd2 \xmm2
+	PFX_OPD_SIZE
+	PFX_REX clmul_opd1 clmul_opd2
+	.byte 0x0f, 0x3a, 0x44
+	MODRM 0xc0 clmul_opd1 clmul_opd2
+	.byte \imm8
+	.endm
+
+	.macro AESKEYGENASSIST rcon xmm1 xmm2
+	XMM_NUM aeskeygen_opd1 \xmm1
+	XMM_NUM aeskeygen_opd2 \xmm2
+	PFX_OPD_SIZE
+	PFX_REX aeskeygen_opd1 aeskeygen_opd2
+	.byte 0x0f, 0x3a, 0xdf
+	MODRM 0xc0 aeskeygen_opd1 aeskeygen_opd2
+	.byte \rcon
+	.endm
+
+	.macro AESIMC xmm1 xmm2
+	XMM_NUM aesimc_opd1 \xmm1
+	XMM_NUM aesimc_opd2 \xmm2
+	PFX_OPD_SIZE
+	PFX_REX aesimc_opd1 aesimc_opd2
+	.byte 0x0f, 0x38, 0xdb
+	MODRM 0xc0 aesimc_opd1 aesimc_opd2
+	.endm
+
+	.macro AESENC xmm1 xmm2
+	XMM_NUM aesenc_opd1 \xmm1
+	XMM_NUM aesenc_opd2 \xmm2
+	PFX_OPD_SIZE
+	PFX_REX aesenc_opd1 aesenc_opd2
+	.byte 0x0f, 0x38, 0xdc
+	MODRM 0xc0 aesenc_opd1 aesenc_opd2
+	.endm
+
+	.macro AESENCLAST xmm1 xmm2
+	XMM_NUM aesenclast_opd1 \xmm1
+	XMM_NUM aesenclast_opd2 \xmm2
+	PFX_OPD_SIZE
+	PFX_REX aesenclast_opd1 aesenclast_opd2
+	.byte 0x0f, 0x38, 0xdd
+	MODRM 0xc0 aesenclast_opd1 aesenclast_opd2
+	.endm
+
+	.macro AESDEC xmm1 xmm2
+	XMM_NUM aesdec_opd1 \xmm1
+	XMM_NUM aesdec_opd2 \xmm2
+	PFX_OPD_SIZE
+	PFX_REX aesdec_opd1 aesdec_opd2
+	.byte 0x0f, 0x38, 0xde
+	MODRM 0xc0 aesdec_opd1 aesdec_opd2
+	.endm
+
+	.macro AESDECLAST xmm1 xmm2
+	XMM_NUM aesdeclast_opd1 \xmm1
+	XMM_NUM aesdeclast_opd2 \xmm2
+	PFX_OPD_SIZE
+	PFX_REX aesdeclast_opd1 aesdeclast_opd2
+	.byte 0x0f, 0x38, 0xdf
+	MODRM 0xc0 aesdeclast_opd1 aesdeclast_opd2
+	.endm
+#endif
+
+#endif
-- 
cgit v1.1


From 7abc07531383ac7f727cc9d44e1360a829f2082e Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Tue, 10 Nov 2009 01:06:59 +0300
Subject: x86: apic: Do not use stacked physid_mask_t

We should not use physid_mask_t as a stack based
variable in apic code. This type depends on MAX_APICS
parameter which may be huge enough.

Especially it became a problem with apic NOOP driver which
is portable between 32 bit and 64 bit environment
(where we have really huge MAX_APICS).

So apic driver should operate with pointers and a caller
in turn should aware of allocation physid_mask_t variable.

As a side (but positive) effect -- we may use already
implemented physid_set_mask_of_physid function eliminating
default_apicid_to_cpu_present completely.

Note that physids_coerce and physids_promote turned into static
inline from macro (since macro hides the fact that parameter is
being interpreted as unsigned long, make it explicit).

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Maciej W. Rozycki <macro@linux-mips.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
LKML-Reference: <20091109220659.GA5568@lenovo>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/apic.h   | 19 +++++++------------
 arch/x86/include/asm/mpspec.h | 16 +++++++++-------
 2 files changed, 16 insertions(+), 19 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 08a5f42..b4ac2cd 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -297,20 +297,20 @@ struct apic {
 	int disable_esr;
 
 	int dest_logical;
-	unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid);
+	unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid);
 	unsigned long (*check_apicid_present)(int apicid);
 
 	void (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
 	void (*init_apic_ldr)(void);
 
-	physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map);
+	void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap);
 
 	void (*setup_apic_routing)(void);
 	int (*multi_timer_check)(int apic, int irq);
 	int (*apicid_to_node)(int logical_apicid);
 	int (*cpu_to_logical_apicid)(int cpu);
 	int (*cpu_present_to_apicid)(int mps_cpu);
-	physid_mask_t (*apicid_to_cpu_present)(int phys_apicid);
+	void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap);
 	void (*setup_portio_remap)(void);
 	int (*check_phys_apicid_present)(int phys_apicid);
 	void (*enable_apic_mode)(void);
@@ -534,9 +534,9 @@ default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 	return (unsigned int)(mask1 & mask2 & mask3);
 }
 
-static inline unsigned long default_check_apicid_used(physid_mask_t bitmap, int apicid)
+static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid)
 {
-	return physid_isset(apicid, bitmap);
+	return physid_isset(apicid, *map);
 }
 
 static inline unsigned long default_check_apicid_present(int bit)
@@ -544,9 +544,9 @@ static inline unsigned long default_check_apicid_present(int bit)
 	return physid_isset(bit, phys_cpu_present_map);
 }
 
-static inline physid_mask_t default_ioapic_phys_id_map(physid_mask_t phys_map)
+static inline void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
 {
-	return phys_map;
+	*retmap = *phys_map;
 }
 
 /* Mapping from cpu number to logical apicid */
@@ -585,11 +585,6 @@ extern int default_cpu_present_to_apicid(int mps_cpu);
 extern int default_check_phys_apicid_present(int phys_apicid);
 #endif
 
-static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid)
-{
-	return physid_mask_of_physid(phys_apicid);
-}
-
 #endif /* CONFIG_X86_LOCAL_APIC */
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 79c9450..61d90b1 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -163,14 +163,16 @@ typedef struct physid_mask physid_mask_t;
 #define physids_shift_left(d, s, n)				\
 	bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS)
 
-#define physids_coerce(map)			((map).mask[0])
+static inline unsigned long physids_coerce(physid_mask_t *map)
+{
+	return map->mask[0];
+}
 
-#define physids_promote(physids)					\
-	({								\
-		physid_mask_t __physid_mask = PHYSID_MASK_NONE;		\
-		__physid_mask.mask[0] = physids;			\
-		__physid_mask;						\
-	})
+static inline void physids_promote(unsigned long physids, physid_mask_t *map)
+{
+	physids_clear(*map);
+	map->mask[0] = physids;
+}
 
 /* Note: will create very large stack frames if physid_mask_t is big */
 #define physid_mask_of_physid(physid)					\
-- 
cgit v1.1


From a2202aa29289db64ca7988b12343158b67b27f10 Mon Sep 17 00:00:00 2001
From: Yong Wang <yong.y.wang@linux.intel.com>
Date: Tue, 10 Nov 2009 09:38:24 +0800
Subject: x86: Under BIOS control, restore AP's APIC_LVTTHMR to the BSP value

On platforms where the BIOS handles the thermal monitor interrupt,
APIC_LVTTHMR on each logical CPU is programmed to generate a SMI
and OS must not touch it.

Unfortunately AP bringup sequence using INIT-SIPI-SIPI clears all
the LVT entries except the mask bit. Essentially this results in
all LVT entries including the thermal monitoring interrupt set
to masked (clearing the bios programmed value for APIC_LVTTHMR).

And this leads to kernel take over the thermal monitoring
interrupt on AP's but not on BSP (leaving the bios programmed
value only on BSP).

As a result of this, we have seen system hangs when the thermal
monitoring interrupt is generated.

Fix this by reading the initial value of thermal LVT entry on
BSP and if bios has taken over the control, then program the
same value on all AP's and leave the thermal monitoring
interrupt control on all the logical cpu's to the bios.

Signed-off-by: Yong Wang <yong.y.wang@intel.com>
Reviewed-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Borislav Petkov <borislav.petkov@amd.com>
Cc: Arjan van de Ven <arjan@infradead.org>
LKML-Reference: <20091110013824.GA24940@ywang-moblin2.bj.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: stable@kernel.org
---
 arch/x86/include/asm/mce.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 161485d..858baa0 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -120,8 +120,10 @@ extern int mce_disabled;
 extern int mce_p5_enabled;
 
 #ifdef CONFIG_X86_MCE
+int mcheck_init(void);
 void mcheck_cpu_init(struct cpuinfo_x86 *c);
 #else
+static inline int mcheck_init(void) { return 0; }
 static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {}
 #endif
 
@@ -215,5 +217,12 @@ extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
 void intel_init_thermal(struct cpuinfo_x86 *c);
 
 void mce_log_therm_throt_event(__u64 status);
+
+#ifdef CONFIG_X86_THERMAL_VECTOR
+extern void mcheck_intel_therm_init(void);
+#else
+static inline void mcheck_intel_therm_init(void) { }
+#endif
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_X86_MCE_H */
-- 
cgit v1.1


From 9f6b3c2c30cfbb1166ce7e74a8f9fd93ae19d2de Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 9 Nov 2009 21:03:43 +0100
Subject: hw-breakpoints: Fix broken a.out format dump

Fix the broken a.out format dump. For now we only dump the ptrace
breakpoints.

TODO: Dump every perf breakpoints for the current thread, not only
ptrace based ones.

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: "K. Prasad" <prasad@linux.vnet.ibm.com>
---
 arch/x86/include/asm/a.out-core.h | 10 ++--------
 arch/x86/include/asm/debugreg.h   |  2 ++
 2 files changed, 4 insertions(+), 8 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h
index fc4685d..7a15588 100644
--- a/arch/x86/include/asm/a.out-core.h
+++ b/arch/x86/include/asm/a.out-core.h
@@ -17,6 +17,7 @@
 
 #include <linux/user.h>
 #include <linux/elfcore.h>
+#include <asm/debugreg.h>
 
 /*
  * fill in the user structure for an a.out core dump
@@ -32,14 +33,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
 			>> PAGE_SHIFT;
 	dump->u_dsize -= dump->u_tsize;
 	dump->u_ssize = 0;
-	dump->u_debugreg[0] = current->thread.debugreg[0];
-	dump->u_debugreg[1] = current->thread.debugreg[1];
-	dump->u_debugreg[2] = current->thread.debugreg[2];
-	dump->u_debugreg[3] = current->thread.debugreg[3];
-	dump->u_debugreg[4] = 0;
-	dump->u_debugreg[5] = 0;
-	dump->u_debugreg[6] = current->thread.debugreg6;
-	dump->u_debugreg[7] = current->thread.debugreg7;
+	aout_dump_debugregs(dump);
 
 	if (dump->start_stack < TASK_SIZE)
 		dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack))
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 9a3333c..f1b673f 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -89,6 +89,8 @@ static inline void hw_breakpoint_disable(void)
 	set_debugreg(0UL, 3);
 }
 
+extern void aout_dump_debugregs(struct user *dump);
+
 #ifdef CONFIG_KVM
 extern void hw_breakpoint_restore(void);
 #endif
-- 
cgit v1.1


From 59d8eb53ea9947db7cad8ebc31b0fb54f23a9851 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 10 Nov 2009 11:03:12 +0100
Subject: hw-breakpoints: Wrap in the KVM breakpoint active state check

Wrap in the cpu dr7 check that tells if we have active
breakpoints that need to be restored in the cpu.

This wrapper makes the check more self-explainable and also
reusable for any further other uses.

Reported-by: Jan Kiszka <jan.kiszka@web.de>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: "K. Prasad" <prasad@linux.vnet.ibm.com>
---
 arch/x86/include/asm/debugreg.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index f1b673f..0f6e92a 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -89,6 +89,11 @@ static inline void hw_breakpoint_disable(void)
 	set_debugreg(0UL, 3);
 }
 
+static inline int hw_breakpoint_active(void)
+{
+	return __get_cpu_var(dr7) & DR_GLOBAL_ENABLE_MASK;
+}
+
 extern void aout_dump_debugregs(struct user *dump);
 
 #ifdef CONFIG_KVM
-- 
cgit v1.1


From d1c84f79a6ba992dc01e312c44a21496303874d6 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <herrmann.der.user@googlemail.com>
Date: Tue, 10 Nov 2009 12:07:23 +0100
Subject: x86: ucode-amd: Load ucode-patches once and not separately of each
 CPU

This also implies that corresponding log messages, e.g.

  platform microcode: firmware: requesting amd-ucode/microcode_amd.bin

show up only once on module load and not when ucode is updated
for each CPU.

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: dimm <dmitry.adamushko@gmail.com>
LKML-Reference: <20091110110723.GH30802@alberich.amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/microcode.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index ef51b50..c24ca9a 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -12,6 +12,8 @@ struct device;
 enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND };
 
 struct microcode_ops {
+	void (*init)(struct device *device);
+	void (*fini)(void);
 	enum ucode_state (*request_microcode_user) (int cpu,
 				const void __user *buf, size_t size);
 
-- 
cgit v1.1


From d07c1be0693e0902d743160b8b638585b808f8ac Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 10 Nov 2009 19:46:12 +0900
Subject: x86: Add iommu_init to x86_init_ops

We call the detections functions of all the IOMMUs then all
their initialization functions. The latter is pointless since we
don't detect multiple different IOMMUs. What we need to do is
calling the initialization function of the detected IOMMU.

This adds iommu_init hook to x86_init_ops so if an IOMMU
detection function can set its initialization function to the
hook.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: chrisw@sous-sol.org
Cc: dwmw2@infradead.org
Cc: joerg.roedel@amd.com
Cc: muli@il.ibm.com
LKML-Reference: <1257849980-22640-2-git-send-email-fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/x86_init.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 66008ed..d8e7145 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -91,6 +91,14 @@ struct x86_init_timers {
 };
 
 /**
+ * struct x86_init_iommu - platform specific iommu setup
+ * @iommu_init:			platform specific iommu setup
+ */
+struct x86_init_iommu {
+	int (*iommu_init)(void);
+};
+
+/**
  * struct x86_init_ops - functions for platform specific setup
  *
  */
@@ -101,6 +109,7 @@ struct x86_init_ops {
 	struct x86_init_oem		oem;
 	struct x86_init_paging		paging;
 	struct x86_init_timers		timers;
+	struct x86_init_iommu		iommu;
 };
 
 /**
-- 
cgit v1.1


From d7b9f7be216b04ff9d108f856bc03d96e7b3439c Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 10 Nov 2009 19:46:13 +0900
Subject: x86: Calgary: Convert detect_calgary() to use iommu_init hook

This changes detect_calgary() to set init_calgary() to
iommu_init hook if detect_calgary() finds the Calgary IOMMU.

We can kill the code to check if we found the IOMMU in
init_calgary() since detect_calgary() sets init_calgary() only
when it found the IOMMU.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Muli Ben-Yehuda <muli@il.ibm.com>
Cc: chrisw@sous-sol.org
Cc: dwmw2@infradead.org
Cc: joerg.roedel@amd.com
LKML-Reference: <1257849980-22640-3-git-send-email-fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/calgary.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h
index b03bedb..0918654 100644
--- a/arch/x86/include/asm/calgary.h
+++ b/arch/x86/include/asm/calgary.h
@@ -62,10 +62,8 @@ struct cal_chipset_ops {
 extern int use_calgary;
 
 #ifdef CONFIG_CALGARY_IOMMU
-extern int calgary_iommu_init(void);
 extern void detect_calgary(void);
 #else
-static inline int calgary_iommu_init(void) { return 1; }
 static inline void detect_calgary(void) { return; }
 #endif
 
-- 
cgit v1.1


From de957628ce7c84764ff41331111036b3ae5bad0f Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 10 Nov 2009 19:46:14 +0900
Subject: x86: GART: Convert gart_iommu_hole_init() to use iommu_init hook

This changes gart_iommu_hole_init() to set gart_iommu_init() to
iommu_init hook if gart_iommu_hole_init() finds the GART IOMMU.

We can kill the code to check if we found the IOMMU in
gart_iommu_init() since gart_iommu_hole_init() sets
gart_iommu_init() only when it found the IOMMU.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: chrisw@sous-sol.org
Cc: dwmw2@infradead.org
Cc: joerg.roedel@amd.com
Cc: muli@il.ibm.com
LKML-Reference: <1257849980-22640-4-git-send-email-fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/gart.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h
index 4fdd5b3..4ac5b0f 100644
--- a/arch/x86/include/asm/gart.h
+++ b/arch/x86/include/asm/gart.h
@@ -35,7 +35,7 @@ extern int gart_iommu_aperture_allowed;
 extern int gart_iommu_aperture_disabled;
 
 extern void early_gart_iommu_check(void);
-extern void gart_iommu_init(void);
+extern int gart_iommu_init(void);
 extern void __init gart_parse_options(char *);
 extern void gart_iommu_hole_init(void);
 
@@ -47,9 +47,6 @@ extern void gart_iommu_hole_init(void);
 static inline void early_gart_iommu_check(void)
 {
 }
-static inline void gart_iommu_init(void)
-{
-}
 static inline void gart_parse_options(char *options)
 {
 }
-- 
cgit v1.1


From ea1b0d3945c7374849235b6ecaea1191ee1d9d50 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 10 Nov 2009 19:46:15 +0900
Subject: x86: amd_iommu: Convert amd_iommu_detect() to use iommu_init hook

This changes amd_iommu_detect() to set amd_iommu_init to
iommu_init hook if amd_iommu_detect() finds the AMD IOMMU.

We can kill the code to check if we found the IOMMU in
amd_iommu_init() since amd_iommu_detect() sets amd_iommu_init()
only when it found the IOMMU.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: chrisw@sous-sol.org
Cc: dwmw2@infradead.org
Cc: joerg.roedel@amd.com
Cc: muli@il.ibm.com
LKML-Reference: <1257849980-22640-5-git-send-email-fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/amd_iommu.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h
index 3604669..b8ef2ee 100644
--- a/arch/x86/include/asm/amd_iommu.h
+++ b/arch/x86/include/asm/amd_iommu.h
@@ -23,7 +23,6 @@
 #include <linux/irqreturn.h>
 
 #ifdef CONFIG_AMD_IOMMU
-extern int amd_iommu_init(void);
 extern int amd_iommu_init_dma_ops(void);
 extern int amd_iommu_init_passthrough(void);
 extern void amd_iommu_detect(void);
@@ -32,7 +31,6 @@ extern void amd_iommu_flush_all_domains(void);
 extern void amd_iommu_flush_all_devices(void);
 extern void amd_iommu_apply_erratum_63(u16 devid);
 #else
-static inline int amd_iommu_init(void) { return -ENODEV; }
 static inline void amd_iommu_detect(void) { }
 #endif
 
-- 
cgit v1.1


From 75f1cdf1dda92cae037ec848ae63690d91913eac Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 10 Nov 2009 19:46:20 +0900
Subject: x86: Handle HW IOMMU initialization failure gracefully

If HW IOMMU initialization fails (Intel VT-d often does this,
typically due to BIOS bugs), we fall back to nommu. It doesn't
work for the majority since nowadays we have more than 4GB
memory so we must use swiotlb instead of nommu.

The problem is that it's too late to initialize swiotlb when HW
IOMMU initialization fails. We need to allocate swiotlb memory
earlier from bootmem allocator. Chris explained the issue in
detail:

  http://marc.info/?l=linux-kernel&m=125657444317079&w=2

The current x86 IOMMU initialization sequence is too complicated
and handling the above issue makes it more hacky.

This patch changes x86 IOMMU initialization sequence to handle
the above issue cleanly.

The new x86 IOMMU initialization sequence are:

1. we initialize the swiotlb (and setting swiotlb to 1) in the case
   of (max_pfn > MAX_DMA32_PFN && !no_iommu). dma_ops is set to
   swiotlb_dma_ops or nommu_dma_ops. if swiotlb usage is forced by
   the boot option, we finish here.

2. we call the detection functions of all the IOMMUs

3. the detection function sets x86_init.iommu.iommu_init to the
   IOMMU initialization function (so we can avoid calling the
   initialization functions of all the IOMMUs needlessly).

4. if the IOMMU initialization function doesn't need to swiotlb
   then sets swiotlb to zero (e.g. the initialization is
   sucessful).

5. if we find that swiotlb is set to zero, we free swiotlb
   resource.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: chrisw@sous-sol.org
Cc: dwmw2@infradead.org
Cc: joerg.roedel@amd.com
Cc: muli@il.ibm.com
LKML-Reference: <1257849980-22640-10-git-send-email-fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/iommu.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index 878b307..df42a71 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -2,7 +2,6 @@
 #define _ASM_X86_IOMMU_H
 
 static inline void iommu_shutdown_noop(void) {}
-extern void no_iommu_init(void);
 extern struct dma_map_ops nommu_dma_ops;
 extern int force_iommu, no_iommu;
 extern int iommu_detected;
-- 
cgit v1.1


From 200a9ae2801bc725f2c41ab13f6e0fb1610d2fb6 Mon Sep 17 00:00:00 2001
From: Dimitri Sivanich <sivanich@sgi.com>
Date: Tue, 10 Nov 2009 13:58:35 -0600
Subject: x86: Remove asm/apicnum.h

arch/x86/include/asm/apicnum.h is not referenced anywhere
anymore. Its definitions appear in apicdef.h. Remove it.

Signed-off-by: Dimitri Sivanich <sivanich@sgi.com>
Acked-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: Mike Travis <travis@sgi.com>
LKML-Reference: <20091110195835.GA4393@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/apicnum.h | 12 ------------
 1 file changed, 12 deletions(-)
 delete mode 100644 arch/x86/include/asm/apicnum.h

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/apicnum.h b/arch/x86/include/asm/apicnum.h
deleted file mode 100644
index 82f613c..0000000
--- a/arch/x86/include/asm/apicnum.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _ASM_X86_APICNUM_H
-#define _ASM_X86_APICNUM_H
-
-/* define MAX_IO_APICS */
-#ifdef CONFIG_X86_32
-# define MAX_IO_APICS 64
-#else
-# define MAX_IO_APICS 128
-# define MAX_LOCAL_APIC 32768
-#endif
-
-#endif /* _ASM_X86_APICNUM_H */
-- 
cgit v1.1


From b18485e7acfe1a634615d1c628ef644c0d58d472 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Thu, 12 Nov 2009 00:03:28 +0900
Subject: swiotlb: Remove the swiotlb variable usage

POWERPC doesn't expect it to be used.

This fixes the linux-next build failure reported by
Stephen Rothwell:

  lib/swiotlb.c: In function 'setup_io_tlb_npages':
  lib/swiotlb.c:114: error: 'swiotlb' undeclared (first use in this function)

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: peterz@infradead.org
LKML-Reference: <20091112000258F.fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/swiotlb.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
index b9e4e20..940f13a 100644
--- a/arch/x86/include/asm/swiotlb.h
+++ b/arch/x86/include/asm/swiotlb.h
@@ -9,11 +9,12 @@ extern int swiotlb_force;
 
 #ifdef CONFIG_SWIOTLB
 extern int swiotlb;
-extern void pci_swiotlb_init(void);
+extern int pci_swiotlb_init(void);
 #else
 #define swiotlb 0
-static inline void pci_swiotlb_init(void)
+static inline int pci_swiotlb_init(void)
 {
+	return 0;
 }
 #endif
 
-- 
cgit v1.1


From 196cf0d67acad70ebb2572da489d5cc7066cdd05 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Tue, 10 Nov 2009 18:27:23 -0800
Subject: x86: Make sure wakeup trampoline code is below 1MB

Instead of using bootmem, try find_e820_area()/reserve_early(),
and call acpi_reserve_memory() early, to allocate the wakeup
trampoline code area below 1M.

This is more reliable, and it also removes a dependency on
bootmem.

-v2: change function name to acpi_reserve_wakeup_memory(),
     as suggested by Rafael.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: H. Peter Anvin <hpa@zytor.com>
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: pm list <linux-pm@lists.linux-foundation.org>
Cc: Len Brown <lenb@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <4AFA210B.3020207@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/acpi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index e3d4a0d..60d2b2d 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -118,7 +118,7 @@ extern void acpi_restore_state_mem(void);
 extern unsigned long acpi_wakeup_address;
 
 /* early initialization routine */
-extern void acpi_reserve_bootmem(void);
+extern void acpi_reserve_wakeup_memory(void);
 
 /*
  * Check if the CPU can handle C2 and deeper
-- 
cgit v1.1


From 68efa37df779b3e04280598e8b5b3a1919b65fee Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 14 Nov 2009 01:35:29 +0100
Subject: hw-breakpoints, x86: Fix modular KVM build

This build error:

arch/x86/kvm/x86.c:3655: error: implicit declaration of function 'hw_breakpoint_restore'

Happens because in the CONFIG_KVM=m case there's no 'CONFIG_KVM' define
in the kernel - it's CONFIG_KVM_MODULE in that case.

Make the prototype available unconditionally.

Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Prasad <prasad@linux.vnet.ibm.com>
LKML-Reference: <1258114575-32655-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/debugreg.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 0f6e92a..fdabd84 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -96,9 +96,7 @@ static inline int hw_breakpoint_active(void)
 
 extern void aout_dump_debugregs(struct user *dump);
 
-#ifdef CONFIG_KVM
 extern void hw_breakpoint_restore(void);
-#endif
 
 #endif	/* __KERNEL__ */
 
-- 
cgit v1.1


From 94a15564ac63af6bb2ff8d4d04f86d5e7ee0278a Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Sat, 14 Nov 2009 20:46:37 +0900
Subject: x86: Move iommu_shutdown_noop to x86_init.c

iommu_init_noop() is in arch/x86/kernel/x86_init.c but
iommu_shutdown_noop() in arch/x86/include/asm/iommu.h.

This moves iommu_shutdown_noop() to x86_init.c for consistency.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
LKML-Reference: <1258199198-16657-3-git-send-email-fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/iommu.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index df42a71..345c99c 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -1,7 +1,6 @@
 #ifndef _ASM_X86_IOMMU_H
 #define _ASM_X86_IOMMU_H
 
-static inline void iommu_shutdown_noop(void) {}
 extern struct dma_map_ops nommu_dma_ops;
 extern int force_iommu, no_iommu;
 extern int iommu_detected;
-- 
cgit v1.1


From 6959450e567c1f17d3ce8489099fc56c3721d577 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Sat, 14 Nov 2009 20:46:38 +0900
Subject: swiotlb: Remove duplicate swiotlb_force extern declarations

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: tony.luck@intel.com
LKML-Reference: <1258199198-16657-4-git-send-email-fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/swiotlb.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
index 940f13a..87ffcb1 100644
--- a/arch/x86/include/asm/swiotlb.h
+++ b/arch/x86/include/asm/swiotlb.h
@@ -3,10 +3,6 @@
 
 #include <linux/swiotlb.h>
 
-/* SWIOTLB interface */
-
-extern int swiotlb_force;
-
 #ifdef CONFIG_SWIOTLB
 extern int swiotlb;
 extern int pci_swiotlb_init(void);
-- 
cgit v1.1


From 14722485830fe6baba738b91d96f06fbd6cf7a18 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@novell.com>
Date: Fri, 13 Nov 2009 11:56:24 +0000
Subject: x86-64: __copy_from_user_inatomic() adjustments

This v2.6.26 commit:

    ad2fc2c: x86: fix copy_user on x86

rendered __copy_from_user_inatomic() identical to
copy_user_generic(), yet didn't make the former just call the
latter from an inline function.

Furthermore, this v2.6.19 commit:

    b885808: [PATCH] Add proper sparse __user casts to __copy_to_user_inatomic

converted the return type of __copy_to_user_inatomic() from
unsigned long to int, but didn't do the same to
__copy_from_user_inatomic().

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: <v.mayatskih@gmail.com>
LKML-Reference: <4AFD5778020000780001F8F4@vpn.id2.novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/uaccess_64.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index ce6fec7..7adebac 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -193,8 +193,11 @@ __must_check long strlen_user(const char __user *str);
 __must_check unsigned long clear_user(void __user *mem, unsigned long len);
 __must_check unsigned long __clear_user(void __user *mem, unsigned long len);
 
-__must_check long __copy_from_user_inatomic(void *dst, const void __user *src,
-					    unsigned size);
+static __must_check __always_inline int
+__copy_from_user_inatomic(void *dst, const void __user *src, unsigned size)
+{
+	return copy_user_generic(dst, (__force const void *)src, size);
+}
 
 static __must_check __always_inline int
 __copy_to_user_inatomic(void __user *dst, const void *src, unsigned size)
-- 
cgit v1.1


From 3c93ca00eeeb774c7dd666cc7286a9e90c53e998 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 16 Nov 2009 15:42:18 +0100
Subject: x86: Add missing might_fault() checks to copy_{to,from}_user()

On x86-64, copy_[to|from]_user() rely on assembly routines that
never call might_fault(), making us missing various lockdep
checks.

This doesn't apply to __copy_from,to_user() that explicitly
handle these calls, neither is it a problem in x86-32 where
copy_to,from_user() rely on the "__" prefixed versions that
also call might_fault().

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1258382538-30979-1-git-send-email-fweisbec@gmail.com>
[ v2: fix module export ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/uaccess_64.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 7adebac..46324c6 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -19,7 +19,7 @@ __must_check unsigned long
 copy_user_generic(void *to, const void *from, unsigned len);
 
 __must_check unsigned long
-copy_to_user(void __user *to, const void *from, unsigned len);
+_copy_to_user(void __user *to, const void *from, unsigned len);
 __must_check unsigned long
 _copy_from_user(void *to, const void __user *from, unsigned len);
 __must_check unsigned long
@@ -32,6 +32,7 @@ static inline unsigned long __must_check copy_from_user(void *to,
 	int sz = __compiletime_object_size(to);
 	int ret = -EFAULT;
 
+	might_fault();
 	if (likely(sz == -1 || sz >= n))
 		ret = _copy_from_user(to, from, n);
 #ifdef CONFIG_DEBUG_VM
@@ -41,6 +42,13 @@ static inline unsigned long __must_check copy_from_user(void *to,
 	return ret;
 }
 
+static __always_inline __must_check
+int copy_to_user(void __user *dst, const void *src, unsigned size)
+{
+	might_fault();
+
+	return _copy_to_user(dst, src, size);
+}
 
 static __always_inline __must_check
 int __copy_from_user(void *dst, const void __user *src, unsigned size)
-- 
cgit v1.1


From e79c65a97c01d5da4317f44f9f98b3814e091a43 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Mon, 16 Nov 2009 18:14:26 +0300
Subject: x86: io-apic: IO-APIC MMIO should not fail on resource insertion

If IO-APIC base address is 1K aligned we should not fail
on resourse insertion procedure. For this sake we define
IO_APIC_SLOT_SIZE constant which should cover all IO-APIC
direct accessible registers.

An example of a such configuration is there

	http://marc.info/?l=linux-kernel&m=118114792006520

 |
 | Quoting the message
 |
 | IOAPIC[0]: apic_id 2, version 32, address 0xfec00000, GSI 0-23
 | IOAPIC[1]: apic_id 3, version 32, address 0xfec80000, GSI 24-47
 | IOAPIC[2]: apic_id 4, version 32, address 0xfec80400, GSI 48-71
 | IOAPIC[3]: apic_id 5, version 32, address 0xfec84000, GSI 72-95
 | IOAPIC[4]: apic_id 8, version 32, address 0xfec84400, GSI 96-119
 |

Reported-by: "Maciej W. Rozycki" <macro@linux-mips.org>
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <20091116151426.GC5653@lenovo>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/apicdef.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 3b62da9..7fe3b30 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -11,6 +11,12 @@
 #define IO_APIC_DEFAULT_PHYS_BASE	0xfec00000
 #define	APIC_DEFAULT_PHYS_BASE		0xfee00000
 
+/*
+ * This is the IO-APIC register space as specified
+ * by Intel docs:
+ */
+#define IO_APIC_SLOT_SIZE		1024
+
 #define	APIC_ID		0x20
 
 #define	APIC_LVR	0x30
-- 
cgit v1.1


From 4763ed4d45522b876c97e1f7f4b659d211f75571 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Fri, 13 Nov 2009 15:28:16 -0800
Subject: x86, mm: Clean up and simplify NX enablement

The 32- and 64-bit code used very different mechanisms for enabling
NX, but even the 32-bit code was enabling NX in head_32.S if it is
available.  Furthermore, we had a bewildering collection of tests for
the available of NX.

This patch:

a) merges the 32-bit set_nx() and the 64-bit check_efer() function
   into a single x86_configure_nx() function.  EFER control is left
   to the head code.

b) eliminates the nx_enabled variable entirely.  Things that need to
   test for NX enablement can verify __supported_pte_mask directly,
   and cpu_has_nx gives the supported status of NX.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Vegard Nossum <vegardno@ifi.uio.no>
Cc: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Chris Wright <chrisw@sous-sol.org>
LKML-Reference: <1258154897-6770-5-git-send-email-hpa@zytor.com>
Acked-by: Kees Cook <kees.cook@canonical.com>
---
 arch/x86/include/asm/proto.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 621f56d..add7f18 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -16,7 +16,7 @@ extern void ia32_sysenter_target(void);
 
 extern void syscall32_cpu_init(void);
 
-extern void check_efer(void);
+extern void x86_configure_nx(void);
 
 extern int reboot_force;
 
-- 
cgit v1.1


From 4b0f3b81eb33ef18283aa71440cccfede1753ae0 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees.cook@canonical.com>
Date: Fri, 13 Nov 2009 15:28:17 -0800
Subject: x86, mm: Report state of NX protections during boot

It is possible for x86_64 systems to lack the NX bit either due to the
hardware lacking support or the BIOS having turned off the CPU capability,
so NX status should be reported.  Additionally, anyone booting NX-capable
CPUs in 32bit mode without PAE will lack NX functionality, so this change
provides feedback for that case as well.

Signed-off-by: Kees Cook <kees.cook@canonical.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
LKML-Reference: <1258154897-6770-6-git-send-email-hpa@zytor.com>
---
 arch/x86/include/asm/proto.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index add7f18..450c56b 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -17,6 +17,7 @@ extern void ia32_sysenter_target(void);
 extern void syscall32_cpu_init(void);
 
 extern void x86_configure_nx(void);
+extern void x86_report_nx(void);
 
 extern int reboot_force;
 
-- 
cgit v1.1


From 5bd085b5fbd8b0b8685a2173cb9263798fc2a44e Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 16 Nov 2009 13:55:31 -0800
Subject: x86: remove "extern" from function prototypes in <asm/proto.h>

Function prototypes don't need "extern", and it is generally frowned
upon to have them.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/proto.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 450c56b..4009f65 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -5,19 +5,19 @@
 
 /* misc architecture specific prototypes */
 
-extern void early_idt_handler(void);
+void early_idt_handler(void);
 
-extern void system_call(void);
-extern void syscall_init(void);
+void system_call(void);
+void syscall_init(void);
 
-extern void ia32_syscall(void);
-extern void ia32_cstar_target(void);
-extern void ia32_sysenter_target(void);
+void ia32_syscall(void);
+void ia32_cstar_target(void);
+void ia32_sysenter_target(void);
 
-extern void syscall32_cpu_init(void);
+void syscall32_cpu_init(void);
 
-extern void x86_configure_nx(void);
-extern void x86_report_nx(void);
+void x86_configure_nx(void);
+void x86_report_nx(void);
 
 extern int reboot_force;
 
-- 
cgit v1.1


From 8fd524b355daef0945692227e726fb444cebcd4f Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Sun, 15 Nov 2009 21:19:53 +0900
Subject: x86: Kill bad_dma_address variable

This kills bad_dma_address variable, the old mechanism to enable
IOMMU drivers to make dma_mapping_error() work in IOMMU's
specific way.

bad_dma_address variable was introduced to enable IOMMU drivers
to make dma_mapping_error() work in IOMMU's specific way.
However, it can't handle systems that use both swiotlb and HW
IOMMU. SO we introduced dma_map_ops->mapping_error to solve that
case.

Intel VT-d, GART, and swiotlb already use
dma_map_ops->mapping_error. Calgary, AMD IOMMU, and nommu use
zero for an error dma address. This adds DMA_ERROR_CODE and
converts them to use it (as SPARC and POWER does).

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: muli@il.ibm.com
Cc: joerg.roedel@amd.com
LKML-Reference: <1258287594-8777-3-git-send-email-fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/dma-mapping.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 6a25d5d..0f6c02f 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -20,7 +20,8 @@
 # define ISA_DMA_BIT_MASK DMA_BIT_MASK(32)
 #endif
 
-extern dma_addr_t bad_dma_address;
+#define DMA_ERROR_CODE	0
+
 extern int iommu_merge;
 extern struct device x86_dma_fallback_dev;
 extern int panic_on_overflow;
@@ -48,7 +49,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 	if (ops->mapping_error)
 		return ops->mapping_error(dev, dma_addr);
 
-	return (dma_addr == bad_dma_address);
+	return (dma_addr == DMA_ERROR_CODE);
 }
 
 #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
-- 
cgit v1.1


From 350f8f5631922c7848ec4b530c111cb8c2ff7caa Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@novell.com>
Date: Fri, 13 Nov 2009 11:54:40 +0000
Subject: x86: Eliminate redundant/contradicting cache line size config options

Rather than having X86_L1_CACHE_BYTES and X86_L1_CACHE_SHIFT
(with inconsistent defaults), just having the latter suffices as
the former can be easily calculated from it.

To be consistent, also change X86_INTERNODE_CACHE_BYTES to
X86_INTERNODE_CACHE_SHIFT, and set it to 7 (128 bytes) for NUMA
to account for last level cache line size (which here matters
more than L1 cache line size).

Finally, make sure the default value for X86_L1_CACHE_SHIFT,
when X86_GENERIC is selected, is being seen before that for the
individual CPU model options (other than on x86-64, where
GENERIC_CPU is part of the choice construct, X86_GENERIC is a
separate option on ix86).

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Acked-by: Ravikiran Thirumalai <kiran@scalex86.org>
Acked-by: Nick Piggin <npiggin@suse.de>
LKML-Reference: <4AFD5710020000780001F8F0@vpn.id2.novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/cache.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/cache.h b/arch/x86/include/asm/cache.h
index 549860d..2f9047c 100644
--- a/arch/x86/include/asm/cache.h
+++ b/arch/x86/include/asm/cache.h
@@ -9,12 +9,13 @@
 
 #define __read_mostly __attribute__((__section__(".data.read_mostly")))
 
+#define INTERNODE_CACHE_SHIFT CONFIG_X86_INTERNODE_CACHE_SHIFT
+#define INTERNODE_CACHE_BYTES (1 << INTERNODE_CACHE_SHIFT)
+
 #ifdef CONFIG_X86_VSMP
-/* vSMP Internode cacheline shift */
-#define INTERNODE_CACHE_SHIFT (12)
 #ifdef CONFIG_SMP
 #define __cacheline_aligned_in_smp					\
-	__attribute__((__aligned__(1 << (INTERNODE_CACHE_SHIFT))))	\
+	__attribute__((__aligned__(INTERNODE_CACHE_BYTES)))		\
 	__page_aligned_data
 #endif
 #endif
-- 
cgit v1.1


From 0444c9bd0cf4e0eb946a7fcaf34765accfa9404a Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@novell.com>
Date: Fri, 20 Nov 2009 14:03:05 +0000
Subject: x86: Tighten conditionals on MCE related statistics

irq_thermal_count is only being maintained when
X86_THERMAL_VECTOR, and both X86_THERMAL_VECTOR and
X86_MCE_THRESHOLD don't need extra wrapping in X86_MCE
conditionals.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Cc: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Cc: Yong Wang <yong.y.wang@intel.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Borislav Petkov <borislav.petkov@amd.com>
Cc: Arjan van de Ven <arjan@infradead.org>
LKML-Reference: <4B06AFA902000078000211F8@vpn.id2.novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/hardirq.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 82e3e8f..108eb6f 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -20,11 +20,11 @@ typedef struct {
 	unsigned int irq_call_count;
 	unsigned int irq_tlb_count;
 #endif
-#ifdef CONFIG_X86_MCE
+#ifdef CONFIG_X86_THERMAL_VECTOR
 	unsigned int irq_thermal_count;
-# ifdef CONFIG_X86_MCE_THRESHOLD
+#endif
+#ifdef CONFIG_X86_MCE_THRESHOLD
 	unsigned int irq_threshold_count;
-# endif
 #endif
 } ____cacheline_aligned irq_cpustat_t;
 
-- 
cgit v1.1


From fd12a0d69aee6d90fa9b9890db24368a897f8423 Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Thu, 19 Nov 2009 14:23:41 -0600
Subject: x86: UV SGI: Don't track GRU space in PAT

GRU space is always mapped as WB in the page table. There is
no need to track the mappings in the PAT. This also eliminates
the "freeing invalid memtype" messages when the GRU space is
unmapped.

Signed-off-by: Jack Steiner <steiner@sgi.com>
LKML-Reference: <20091119202341.GA4420@sgi.com>
[ v2: fix build failure ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/pat.h      | 2 ++
 arch/x86/include/asm/x86_init.h | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
index e2c1668..4c35dd0 100644
--- a/arch/x86/include/asm/pat.h
+++ b/arch/x86/include/asm/pat.h
@@ -24,4 +24,6 @@ int io_reserve_memtype(resource_size_t start, resource_size_t end,
 
 void io_free_memtype(resource_size_t start, resource_size_t end);
 
+int default_is_untracked_pat_range(u64 start, u64 end);
+
 #endif /* _ASM_X86_PAT_H */
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 2c756fd..8112ed7 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -113,11 +113,13 @@ struct x86_cpuinit_ops {
 
 /**
  * struct x86_platform_ops - platform specific runtime functions
+ * @is_untracked_pat_range	exclude from PAT logic
  * @calibrate_tsc:		calibrate TSC
  * @get_wallclock:		get time from HW clock like RTC etc.
  * @set_wallclock:		set time back to HW clock
  */
 struct x86_platform_ops {
+	int (*is_untracked_pat_range)(u64 start, u64 end);
 	unsigned long (*calibrate_tsc)(void);
 	unsigned long (*get_wallclock)(void);
 	int (*set_wallclock)(unsigned long nowtime);
-- 
cgit v1.1


From 55a6ca25472ee01574bfc24d23b7f5fa09cc38cf Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 23 Nov 2009 15:12:07 -0800
Subject: x86, mm: Call is_untracked_pat_range() rather than is_ISA_range()

Checkin fd12a0d69aee6d90fa9b9890db24368a897f8423 made the PAT
untracked range a platform configurable, but missed on occurrence of
is_ISA_range() which still refers to PAT-untracked memory, and
therefore should be using the configurable.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: Jack Steiner <steiner@sgi.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
LKML-Reference: <20091119202341.GA4420@sgi.com>
---
 arch/x86/include/asm/pgtable.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index af6fd36..1de2094 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -16,6 +16,8 @@
 
 #ifndef __ASSEMBLY__
 
+#include <asm/x86_init.h>
+
 /*
  * ZERO_PAGE is a global shared page that is always zero: used
  * for zero-mapped memory areas etc..
@@ -270,9 +272,9 @@ static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
 					 unsigned long new_flags)
 {
 	/*
-	 * PAT type is always WB for ISA. So no need to check.
+	 * PAT type is always WB for untracked ranges, so no need to check.
 	 */
-	if (is_ISA_range(paddr, paddr + size - 1))
+	if (x86_platform.is_untracked_pat_range(paddr, paddr + size - 1))
 		return 1;
 
 	/*
-- 
cgit v1.1


From 8a27138924f64d2f30c1022f909f74480046bc3f Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 23 Nov 2009 14:49:20 -0800
Subject: x86, mm: is_untracked_pat_range() takes a normal semiclosed range

is_untracked_pat_range() -- like its components, is_ISA_range() and
is_GRU_range(), takes a normal semiclosed interval (>=, <) whereas the
PAT code called it as if it took a closed range (>=, <=).  Fix.

Although this is a bug, I believe it is non-manifest, simply because
none of the callers will call this with non-page-aligned addresses.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
LKML-Reference: <20091119202341.GA4420@sgi.com>
---
 arch/x86/include/asm/pgtable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 1de2094..a34c785 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -274,7 +274,7 @@ static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
 	/*
 	 * PAT type is always WB for untracked ranges, so no need to check.
 	 */
-	if (x86_platform.is_untracked_pat_range(paddr, paddr + size - 1))
+	if (x86_platform.is_untracked_pat_range(paddr, paddr + size))
 		return 1;
 
 	/*
-- 
cgit v1.1


From 65f116f5f16dc3371fce24fb24bc4843b5380ba5 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 23 Nov 2009 14:44:39 -0800
Subject: x86: Change is_ISA_range() into an inline function

Change is_ISA_range() from a macro to an inline function.  This makes
it type safe, and also allows it to be assigned to a function pointer
if necessary.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <20091119202341.GA4420@sgi.com>
---
 arch/x86/include/asm/e820.h | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 40b4e61..68b4e0e 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -61,6 +61,12 @@ struct e820map {
 	struct e820entry map[E820_X_MAX];
 };
 
+#define ISA_START_ADDRESS	0xa0000
+#define ISA_END_ADDRESS		0x100000
+
+#define BIOS_BEGIN		0x000a0000
+#define BIOS_END		0x00100000
+
 #ifdef __KERNEL__
 /* see comment in arch/x86/kernel/e820.c */
 extern struct e820map e820;
@@ -126,15 +132,14 @@ extern void e820_reserve_resources(void);
 extern void e820_reserve_resources_late(void);
 extern void setup_memory_map(void);
 extern char *default_machine_specific_memory_setup(void);
-#endif /* __KERNEL__ */
-#endif /* __ASSEMBLY__ */
 
-#define ISA_START_ADDRESS	0xa0000
-#define ISA_END_ADDRESS		0x100000
-#define is_ISA_range(s, e) ((s) >= ISA_START_ADDRESS && (e) < ISA_END_ADDRESS)
+static inline bool is_ISA_range(u64 s, u64 e)
+{
+	return s >= ISA_START_ADDRESS && e < ISA_END_ADDRESS;
+}
 
-#define BIOS_BEGIN		0x000a0000
-#define BIOS_END		0x00100000
+#endif /* __KERNEL__ */
+#endif /* __ASSEMBLY__ */
 
 #ifdef __KERNEL__
 #include <linux/ioport.h>
-- 
cgit v1.1


From eb41c8be89dbe079f49202774e04a79ccac48a09 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 23 Nov 2009 14:46:07 -0800
Subject: x86, platform: Change is_untracked_pat_range() to bool; cleanup init

- Change is_untracked_pat_range() to return bool.
- Clean up the initialization of is_untracked_pat_range() -- by default,
  we simply point it at is_ISA_range() directly.
- Move is_untracked_pat_range to the end of struct x86_platform, since
  it is the newest field.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Jack Steiner <steiner@sgi.com>
LKML-Reference: <20091119202341.GA4420@sgi.com>
---
 arch/x86/include/asm/pat.h      | 2 --
 arch/x86/include/asm/x86_init.h | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
index 4c35dd0..e2c1668 100644
--- a/arch/x86/include/asm/pat.h
+++ b/arch/x86/include/asm/pat.h
@@ -24,6 +24,4 @@ int io_reserve_memtype(resource_size_t start, resource_size_t end,
 
 void io_free_memtype(resource_size_t start, resource_size_t end);
 
-int default_is_untracked_pat_range(u64 start, u64 end);
-
 #endif /* _ASM_X86_PAT_H */
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 8112ed7..024cf3c 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -113,16 +113,16 @@ struct x86_cpuinit_ops {
 
 /**
  * struct x86_platform_ops - platform specific runtime functions
- * @is_untracked_pat_range	exclude from PAT logic
  * @calibrate_tsc:		calibrate TSC
  * @get_wallclock:		get time from HW clock like RTC etc.
  * @set_wallclock:		set time back to HW clock
+ * @is_untracked_pat_range	exclude from PAT logic
  */
 struct x86_platform_ops {
-	int (*is_untracked_pat_range)(u64 start, u64 end);
 	unsigned long (*calibrate_tsc)(void);
 	unsigned long (*get_wallclock)(void);
 	int (*set_wallclock)(unsigned long nowtime);
+	bool (*is_untracked_pat_range)(u64 start, u64 end);
 };
 
 extern struct x86_init_ops x86_init;
-- 
cgit v1.1


From b24c2a925a9837cccf54d50aeac22ba0cbc15455 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Tue, 24 Nov 2009 02:48:18 -0800
Subject: x86: Move find_smp_config() earlier and avoid bootmem usage

Move the find_smp_config() call to before bootmem is initialized.
Use reserve_early() instead of reserve_bootmem() in it.

This simplifies the code, we only need to call find_smp_config()
once and can remove the now unneeded reserve parameter from
x86_init_mpparse::find_smp_config.

We thus also reduce x86's dependency on bootmem allocations.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <4B0BB9F2.70907@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/mpspec.h   | 11 +++--------
 arch/x86/include/asm/x86_init.h |  2 +-
 2 files changed, 4 insertions(+), 9 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 79c9450..644cf1a 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -71,12 +71,7 @@ static inline void early_get_smp_config(void)
 
 static inline void find_smp_config(void)
 {
-	x86_init.mpparse.find_smp_config(1);
-}
-
-static inline void early_find_smp_config(void)
-{
-	x86_init.mpparse.find_smp_config(0);
+	x86_init.mpparse.find_smp_config();
 }
 
 #ifdef CONFIG_X86_MPPARSE
@@ -89,7 +84,7 @@ extern void default_mpc_oem_bus_info(struct mpc_bus *m, char *str);
 # else
 #  define default_mpc_oem_bus_info NULL
 # endif
-extern void default_find_smp_config(unsigned int reserve);
+extern void default_find_smp_config(void);
 extern void default_get_smp_config(unsigned int early);
 #else
 static inline void early_reserve_e820_mpc_new(void) { }
@@ -97,7 +92,7 @@ static inline void early_reserve_e820_mpc_new(void) { }
 #define default_mpc_apic_id NULL
 #define default_smp_read_mpc_oem NULL
 #define default_mpc_oem_bus_info NULL
-#define default_find_smp_config x86_init_uint_noop
+#define default_find_smp_config x86_init_noop
 #define default_get_smp_config x86_init_uint_noop
 #endif
 
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 024cf3c..97e5fb4 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -26,7 +26,7 @@ struct x86_init_mpparse {
 	void (*smp_read_mpc_oem)(struct mpc_table *mpc);
 	void (*mpc_oem_pci_bus)(struct mpc_bus *m);
 	void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name);
-	void (*find_smp_config)(unsigned int reserve);
+	void (*find_smp_config)(void);
 	void (*get_smp_config)(unsigned int early);
 };
 
-- 
cgit v1.1


From 28b4e0d86acf59ae3bc422921138a4958458326e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 25 Nov 2009 22:24:44 +0900
Subject: x86: Rename global percpu symbol dr7 to cpu_dr7

Percpu symbols now occupy the same namespace as other global
symbols and as such short global symbols without subsystem
prefix tend to collide with local variables.  dr7 percpu
variable used by x86 was hit by this. Rename it to cpu_dr7.

The rename also makes it more consistent with its fellow
cpu_debugreg percpu variable.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
Cc: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <20091125115856.GA17856@elte.hu>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/x86/include/asm/debugreg.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index fdabd84..8240f76 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -75,7 +75,7 @@
  */
 #ifdef __KERNEL__
 
-DECLARE_PER_CPU(unsigned long, dr7);
+DECLARE_PER_CPU(unsigned long, cpu_dr7);
 
 static inline void hw_breakpoint_disable(void)
 {
@@ -91,7 +91,7 @@ static inline void hw_breakpoint_disable(void)
 
 static inline int hw_breakpoint_active(void)
 {
-	return __get_cpu_var(dr7) & DR_GLOBAL_ENABLE_MASK;
+	return __get_cpu_var(cpu_dr7) & DR_GLOBAL_ENABLE_MASK;
 }
 
 extern void aout_dump_debugregs(struct user *dump);
-- 
cgit v1.1


From 2d4dc890b5c8fabd818a8586607e6843c4375e62 Mon Sep 17 00:00:00 2001
From: Ilya Loginov <isloginov@gmail.com>
Date: Thu, 26 Nov 2009 09:16:19 +0100
Subject: block: add helpers to run flush_dcache_page() against a bio and a
 request's pages

Mtdblock driver doesn't call flush_dcache_page for pages in request.  So,
this causes problems on architectures where the icache doesn't fill from
the dcache or with dcache aliases.  The patch fixes this.

The ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE symbol was introduced to avoid
pointless empty cache-thrashing loops on architectures for which
flush_dcache_page() is a no-op.  Every architecture was provided with this
flush pages on architectires where ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE is
equal 1 or do nothing otherwise.

See "fix mtd_blkdevs problem with caches on some architectures" discussion
on LKML for more information.

Signed-off-by: Ilya Loginov <isloginov@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Peter Horton <phorton@bitbox.co.uk>
Cc: "Ed L. Cashin" <ecashin@coraid.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 arch/x86/include/asm/cacheflush.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index b54f6af..9076add 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -12,6 +12,7 @@ static inline void flush_cache_range(struct vm_area_struct *vma,
 				     unsigned long start, unsigned long end) { }
 static inline void flush_cache_page(struct vm_area_struct *vma,
 				    unsigned long vmaddr, unsigned long pfn) { }
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
 static inline void flush_dcache_page(struct page *page) { }
 static inline void flush_dcache_mmap_lock(struct address_space *mapping) { }
 static inline void flush_dcache_mmap_unlock(struct address_space *mapping) { }
-- 
cgit v1.1


From 79b0379cee09b00ef309384aff652e328e438c79 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Wed, 25 Nov 2009 14:18:26 -0500
Subject: x86: Optimize loadsegment()

Zero the input register in the exception handler instead of
using an extra register to pass in a zero value.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
LKML-Reference: <1259176706-5908-1-git-send-email-brgerst@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/system.h | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 1a953e2..537395a 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -156,18 +156,19 @@ extern void native_load_gs_index(unsigned);
  * segment if something goes wrong..
  */
 #define loadsegment(seg, value)			\
+do {						\
+	unsigned short __val = value;		\
 	asm volatile("\n"			\
 		     "1:\t"			\
 		     "movl %k0,%%" #seg "\n"	\
-		     "2:\n"			\
 		     ".section .fixup,\"ax\"\n"	\
-		     "3:\t"			\
-		     "movl %k1, %%" #seg "\n\t"	\
-		     "jmp 2b\n"			\
+		     "2:\t"			\
+		     "xorl %k0,%k0\n\t"		\
+		     "jmp 1b\n"			\
 		     ".previous\n"		\
-		     _ASM_EXTABLE(1b,3b)	\
-		     : :"r" (value), "r" (0) : "memory")
-
+		     _ASM_EXTABLE(1b, 2b)	\
+		     : "+r" (__val) : : "memory"); \
+} while (0)
 
 /*
  * Save a segment register away
-- 
cgit v1.1


From 64b028b22616946a05bf9580f7f7f7ee2ac070b4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 26 Nov 2009 10:37:55 +0100
Subject: x86: Clean up the loadsegment() macro

Make it readable in the source too, not just in the assembly output.
No change in functionality.

Cc: Brian Gerst <brgerst@gmail.com>
LKML-Reference: <1259176706-5908-1-git-send-email-brgerst@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/system.h | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 537395a..022a843 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -155,19 +155,21 @@ extern void native_load_gs_index(unsigned);
  * Load a segment. Fall back on loading the zero
  * segment if something goes wrong..
  */
-#define loadsegment(seg, value)			\
-do {						\
-	unsigned short __val = value;		\
-	asm volatile("\n"			\
-		     "1:\t"			\
-		     "movl %k0,%%" #seg "\n"	\
-		     ".section .fixup,\"ax\"\n"	\
-		     "2:\t"			\
-		     "xorl %k0,%k0\n\t"		\
-		     "jmp 1b\n"			\
-		     ".previous\n"		\
-		     _ASM_EXTABLE(1b, 2b)	\
-		     : "+r" (__val) : : "memory"); \
+#define loadsegment(seg, value)						\
+do {									\
+	unsigned short __val = (value);					\
+									\
+	asm volatile("						\n"	\
+		     "1:	movl %k0,%%" #seg "		\n"	\
+									\
+		     ".section .fixup,\"ax\"			\n"	\
+		     "2:	xorl %k0,%k0			\n"	\
+		     "		jmp 1b				\n"	\
+		     ".previous					\n"	\
+									\
+		     _ASM_EXTABLE(1b, 2b)				\
+									\
+		     : "+r" (__val) : : "memory");			\
 } while (0)
 
 /*
-- 
cgit v1.1


From 6a9401a7ac13e62ef2baf4d46e022d303edc3050 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 20 Nov 2009 13:22:21 +0100
Subject: x86/amd-iommu: Separate internal interface definitions

This patch moves all function declarations which are only
used inside the driver code to a seperate header file.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu.h       | 10 ++++-----
 arch/x86/include/asm/amd_iommu_proto.h | 38 ++++++++++++++++++++++++++++++++++
 arch/x86/include/asm/amd_iommu_types.h |  5 -----
 3 files changed, 42 insertions(+), 11 deletions(-)
 create mode 100644 arch/x86/include/asm/amd_iommu_proto.h

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h
index b8ef2ee..0891338 100644
--- a/arch/x86/include/asm/amd_iommu.h
+++ b/arch/x86/include/asm/amd_iommu.h
@@ -23,15 +23,13 @@
 #include <linux/irqreturn.h>
 
 #ifdef CONFIG_AMD_IOMMU
-extern int amd_iommu_init_dma_ops(void);
-extern int amd_iommu_init_passthrough(void);
+
 extern void amd_iommu_detect(void);
-extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
-extern void amd_iommu_flush_all_domains(void);
-extern void amd_iommu_flush_all_devices(void);
-extern void amd_iommu_apply_erratum_63(u16 devid);
+
 #else
+
 static inline void amd_iommu_detect(void) { }
+
 #endif
 
 #endif /* _ASM_X86_AMD_IOMMU_H */
diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h
new file mode 100644
index 0000000..84786fb
--- /dev/null
+++ b/arch/x86/include/asm/amd_iommu_proto.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2009 Advanced Micro Devices, Inc.
+ * Author: Joerg Roedel <joerg.roedel@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef _ASM_X86_AMD_IOMMU_PROTO_H
+#define _ASM_X86_AMD_IOMMU_PROTO_H
+
+struct amd_iommu;
+
+extern int amd_iommu_init_dma_ops(void);
+extern int amd_iommu_init_passthrough(void);
+extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
+extern void amd_iommu_flush_all_domains(void);
+extern void amd_iommu_flush_all_devices(void);
+extern void amd_iommu_apply_erratum_63(u16 devid);
+extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu);
+
+#ifndef CONFIG_AMD_IOMMU_STATS
+
+static inline void amd_iommu_stats_init(void) { }
+
+#endif /* !CONFIG_AMD_IOMMU_STATS */
+
+#endif /* _ASM_X86_AMD_IOMMU_PROTO_H  */
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 2a2cc7a..27db7f9 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -462,11 +462,6 @@ struct __iommu_counter {
 #define ADD_STATS_COUNTER(name, x)
 #define SUB_STATS_COUNTER(name, x)
 
-static inline void amd_iommu_stats_init(void) { }
-
 #endif /* CONFIG_AMD_IOMMU_STATS */
 
-/* some function prototypes */
-extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu);
-
 #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */
-- 
cgit v1.1


From bf3118c1276d27fe9e84aa42382da25ee0750777 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 20 Nov 2009 13:39:19 +0100
Subject: x86/amd-iommu: Update copyright headers

This patch updates the copyright headers in the relevant AMD
IOMMU driver files to match the date of the latest changes.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu.h       | 2 +-
 arch/x86/include/asm/amd_iommu_types.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h
index 0891338..5af2982 100644
--- a/arch/x86/include/asm/amd_iommu.h
+++ b/arch/x86/include/asm/amd_iommu.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
+ * Copyright (C) 2007-2009 Advanced Micro Devices, Inc.
  * Author: Joerg Roedel <joerg.roedel@amd.com>
  *         Leo Duran <leo.duran@amd.com>
  *
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 27db7f9..df5e9c8 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
+ * Copyright (C) 2007-2009 Advanced Micro Devices, Inc.
  * Author: Joerg Roedel <joerg.roedel@amd.com>
  *         Leo Duran <leo.duran@amd.com>
  *
-- 
cgit v1.1


From bb52777ec4d736c2d7c4f037b32d4eeeb172ed89 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 20 Nov 2009 14:31:51 +0100
Subject: x86/amd-iommu: Add an index field to struct amd_iommu

This patch adds an index field to struct amd_iommu which can
be used to lookup it up in an array. This index will be used
in struct protection_domain to keep track which protection
domain has devices behind which IOMMU.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index df5e9c8..ab3e7bf 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -25,6 +25,11 @@
 #include <linux/spinlock.h>
 
 /*
+ * Maximum number of IOMMUs supported
+ */
+#define MAX_IOMMUS	32
+
+/*
  * some size calculation constants
  */
 #define DEV_TABLE_ENTRY_SIZE		32
@@ -291,6 +296,9 @@ struct dma_ops_domain {
 struct amd_iommu {
 	struct list_head list;
 
+	/* Index within the IOMMU array */
+	int index;
+
 	/* locks the accesses to the hardware */
 	spinlock_t lock;
 
@@ -357,6 +365,15 @@ struct amd_iommu {
 extern struct list_head amd_iommu_list;
 
 /*
+ * Array with pointers to each IOMMU struct
+ * The indices are referenced in the protection domains
+ */
+extern struct amd_iommu *amd_iommus[MAX_IOMMUS];
+
+/* Number of IOMMUs present in the system */
+extern int amd_iommus_present;
+
+/*
  * Structure defining one entry in the device table
  */
 struct dev_table_entry {
-- 
cgit v1.1


From c459611424d8b8396060eb766e23bd0c70c993bc Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 20 Nov 2009 14:57:32 +0100
Subject: x86/amd-iommu: Add per IOMMU reference counting

This patch adds reference counting for protection domains
per IOMMU. This allows a smarter TLB flushing strategy.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index ab3e7bf..e68b148 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -238,7 +238,9 @@ struct protection_domain {
 	unsigned long flags;	/* flags to find out type of domain */
 	bool updated;		/* complete domain flush required */
 	unsigned dev_cnt;	/* devices assigned to this domain */
+	unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
 	void *priv;		/* private data */
+
 };
 
 /*
-- 
cgit v1.1


From aeb26f55337d4310840c8adc3ec7d6aebb714472 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 20 Nov 2009 16:44:01 +0100
Subject: x86/amd-iommu: Implement protection domain list

This patch adds code to keep a global list of all protection
domains. This allows to simplify the resume code.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index e68b148..b332b7f 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -231,6 +231,7 @@ extern bool amd_iommu_dump;
  * independent of their use.
  */
 struct protection_domain {
+	struct list_head list;  /* for list of all protection domains */
 	spinlock_t lock;	/* mostly used to lock the page table*/
 	u16 id;			/* the domain id written to the device table */
 	int mode;		/* paging mode (0-6 levels) */
@@ -376,6 +377,12 @@ extern struct amd_iommu *amd_iommus[MAX_IOMMUS];
 extern int amd_iommus_present;
 
 /*
+ * Declarations for the global list of all protection domains
+ */
+extern spinlock_t amd_iommu_pd_lock;
+extern struct list_head amd_iommu_pd_list;
+
+/*
  * Structure defining one entry in the device table
  */
 struct dev_table_entry {
-- 
cgit v1.1


From 318afd41d2eca3224de3fd85a3b9a27a3010a98d Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 23 Nov 2009 18:32:38 +0100
Subject: x86/amd-iommu: Make np-cache a global flag

The non-present cache flag was IOMMU local until now which
doesn't make sense. Make this a global flag so we can remove
the lase user of 'struct iommu' in the map/unmap path.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index b332b7f..4899f78 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -211,6 +211,9 @@ extern bool amd_iommu_dump;
 			printk(KERN_INFO "AMD-Vi: " format, ## arg);	\
 	} while(0);
 
+/* global flag if IOMMUs cache non-present entries */
+extern bool amd_iommu_np_cache;
+
 /*
  * Make iterating over all IOMMUs easier
  */
-- 
cgit v1.1


From 8793abeb783c12cc37f92f6133fd6468152b98df Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 27 Nov 2009 11:40:33 +0100
Subject: x86/amd-iommu: Remove support for domain sharing

This patch makes device isolation mandatory and removes
support for the amd_iommu=share option. This simplifies the
code in several places.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 4899f78..02b6a0f 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -451,9 +451,6 @@ extern struct protection_domain **amd_iommu_pd_table;
 /* allocation bitmap for domain ids */
 extern unsigned long *amd_iommu_pd_alloc_bitmap;
 
-/* will be 1 if device isolation is enabled */
-extern bool amd_iommu_isolate;
-
 /*
  * If true, the addresses will be flushed on unmap time, not when
  * they are reused
-- 
cgit v1.1


From 657cbb6b6cba0f9c98c5299e0c803b2c0e67ea0a Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 23 Nov 2009 15:26:46 +0100
Subject: x86/amd-iommu: Use dev->arch->iommu to store iommu related
 information

This patch changes IOMMU code to use dev->archdata->iommu to
store information about the alias device and the domain the
device is attached to.
This allows the driver to get rid of the amd_iommu_pd_table
in the future.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h | 8 ++++++++
 arch/x86/include/asm/device.h          | 2 +-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 02b6a0f..9eaa27b 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -248,6 +248,14 @@ struct protection_domain {
 };
 
 /*
+ * This struct contains device specific data for the IOMMU
+ */
+struct iommu_dev_data {
+	struct device *alias;		  /* The Alias Device */
+	struct protection_domain *domain; /* Domain the device is bound to */
+};
+
+/*
  * For dynamic growth the aperture size is split into ranges of 128MB of
  * DMA address space each. This struct represents one such range.
  */
diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h
index cee34e9..029f230 100644
--- a/arch/x86/include/asm/device.h
+++ b/arch/x86/include/asm/device.h
@@ -8,7 +8,7 @@ struct dev_archdata {
 #ifdef CONFIG_X86_64
 struct dma_map_ops *dma_ops;
 #endif
-#ifdef CONFIG_DMAR
+#if defined(CONFIG_DMAR) || defined(CONFIG_AMD_IOMMU)
 	void *iommu; /* hook for IOMMU specific extension */
 #endif
 };
-- 
cgit v1.1


From 241000556f751dacd332df6ab2e903a23746e51e Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 25 Nov 2009 15:59:57 +0100
Subject: x86/amd-iommu: Add device bind reference counting

This patch adds a reference count to each device to count
how often the device was bound to that domain. This is
important for single devices that act as an alias for a
number of others. These devices must stay bound to their
domains until all devices that alias to it are unbound from
the same domain.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 9eaa27b..434e90e 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -253,6 +253,7 @@ struct protection_domain {
 struct iommu_dev_data {
 	struct device *alias;		  /* The Alias Device */
 	struct protection_domain *domain; /* Domain the device is bound to */
+	atomic_t bind;			  /* Domain attach reverent count */
 };
 
 /*
-- 
cgit v1.1


From 7c392cbe984d904f7c89a6a75b2ac245254e8da5 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 26 Nov 2009 11:13:32 +0100
Subject: x86/amd-iommu: Keep devices per domain in a list

This patch introduces a list to each protection domain which
keeps all devices associated with the domain. This can be
used later to optimize certain functions and to completly
remove the amd_iommu_pd_table.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 434e90e..93953d1 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -235,6 +235,7 @@ extern bool amd_iommu_np_cache;
  */
 struct protection_domain {
 	struct list_head list;  /* for list of all protection domains */
+	struct list_head dev_list; /* List of all devices in this domain */
 	spinlock_t lock;	/* mostly used to lock the page table*/
 	u16 id;			/* the domain id written to the device table */
 	int mode;		/* paging mode (0-6 levels) */
@@ -251,6 +252,7 @@ struct protection_domain {
  * This struct contains device specific data for the IOMMU
  */
 struct iommu_dev_data {
+	struct list_head list;		  /* For domain->dev_list */
 	struct device *alias;		  /* The Alias Device */
 	struct protection_domain *domain; /* Domain the device is bound to */
 	atomic_t bind;			  /* Domain attach reverent count */
-- 
cgit v1.1


From b00d3bcff4d996f65e337d404b0df5dc201a01ab Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 26 Nov 2009 15:35:33 +0100
Subject: x86/amd-iommu: Cleanup DTE flushing code

This patch cleans up the code to flush device table entries
in the IOMMU. With this chance the driver can get rid of the
iommu_queue_inv_dev_entry() function.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 93953d1..f92d1b3 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -253,6 +253,7 @@ struct protection_domain {
  */
 struct iommu_dev_data {
 	struct list_head list;		  /* For domain->dev_list */
+	struct device *dev;		  /* Device this data belong to */
 	struct device *alias;		  /* The Alias Device */
 	struct protection_domain *domain; /* Domain the device is bound to */
 	atomic_t bind;			  /* Domain attach reverent count */
-- 
cgit v1.1


From 492667dacc0ac9763969155482b1261b34ccf450 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 27 Nov 2009 13:25:47 +0100
Subject: x86/amd-iommu: Remove amd_iommu_pd_table

The data that was stored in this table is now available in
dev->archdata.iommu. So this table is not longer necessary.
This patch removes the remaining uses of that variable and
removes it from the code.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index f92d1b3..ba19ad4 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -457,9 +457,6 @@ extern unsigned amd_iommu_aperture_order;
 /* largest PCI device id we expect translation requests for */
 extern u16 amd_iommu_last_bdf;
 
-/* data structures for protection domain handling */
-extern struct protection_domain **amd_iommu_pd_table;
-
 /* allocation bitmap for domain ids */
 extern unsigned long *amd_iommu_pd_alloc_bitmap;
 
-- 
cgit v1.1


From ccef086454d4c97e7b722e9303390207d681cb4c Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 30 Nov 2009 21:33:51 -0800
Subject: x86, mm: Correct the implementation of is_untracked_pat_range()

The semantics the PAT code expect of is_untracked_pat_range() is "is
this range completely contained inside the untracked region."  This
means that checkin 8a27138924f64d2f30c1022f909f74480046bc3f was
technically wrong, because the implementation needlessly confusing.

The sane interface is for it to take a semiclosed range like just
about everything else (as evidenced by the sheer number of "- 1"'s
removed by that patch) so change the actual implementation to match.

Reported-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jack Steiner <steiner@sgi.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
LKML-Reference: <20091119202341.GA4420@sgi.com>
---
 arch/x86/include/asm/e820.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 68b4e0e..761249e 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -133,9 +133,13 @@ extern void e820_reserve_resources_late(void);
 extern void setup_memory_map(void);
 extern char *default_machine_specific_memory_setup(void);
 
+/*
+ * Returns true iff the specified range [s,e) is completely contained inside
+ * the ISA region.
+ */
 static inline bool is_ISA_range(u64 s, u64 e)
 {
-	return s >= ISA_START_ADDRESS && e < ISA_END_ADDRESS;
+	return s >= ISA_START_ADDRESS && e <= ISA_END_ADDRESS;
 }
 
 #endif /* __KERNEL__ */
-- 
cgit v1.1


From e859cf8656043f158b4004ccc8cbbf1ba4f97177 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Mon, 30 Nov 2009 19:02:22 -0500
Subject: x86: Fix comments of register/stack access functions

Fix typos and some redundant comments of register/stack access
functions in asm/ptrace.h.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: systemtap <systemtap@sources.redhat.com>
Cc: DLE <dle-develop@lists.sourceforge.net>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Wenji Huang <wenji.huang@oracle.com>
Cc: Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com>
LKML-Reference: <20091201000222.7669.7477.stgit@harusame>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Suggested-by: Wenji Huang <wenji.huang@oracle.com>
---
 arch/x86/include/asm/ptrace.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index a3d49dd..3d11fd0 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -227,8 +227,8 @@ extern const char *regs_query_register_name(unsigned int offset);
  * @regs:	pt_regs from which register value is gotten.
  * @offset:	offset number of the register.
  *
- * regs_get_register returns the value of a register whose offset from @regs
- * is @offset. The @offset is the offset of the register in struct pt_regs.
+ * regs_get_register returns the value of a register. The @offset is the
+ * offset of the register in struct pt_regs address which specified by @regs.
  * If @offset is bigger than MAX_REG_OFFSET, this returns 0.
  */
 static inline unsigned long regs_get_register(struct pt_regs *regs,
@@ -244,7 +244,7 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
  * @regs:	pt_regs which contains kernel stack pointer.
  * @addr:	address which is checked.
  *
- * regs_within_kenel_stack() checks @addr is within the kernel stack page(s).
+ * regs_within_kernel_stack() checks @addr is within the kernel stack page(s).
  * If @addr is within the kernel stack, it returns true. If not, returns false.
  */
 static inline int regs_within_kernel_stack(struct pt_regs *regs,
@@ -260,7 +260,7 @@ static inline int regs_within_kernel_stack(struct pt_regs *regs,
  * @n:		stack entry number.
  *
  * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
- * is specifined by @regs. If the @n th entry is NOT in the kernel stack,
+ * is specified by @regs. If the @n th entry is NOT in the kernel stack,
  * this returns 0.
  */
 static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
-- 
cgit v1.1


From 01be50a308be466e122c3a8b3d535f1b673ecbd2 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@novell.com>
Date: Fri, 27 Nov 2009 15:04:58 +0000
Subject: x86/alternatives: Check replacementlen <= instrlen at build time

Having run into the run-(boot-)time check a couple of times lately,
I finally took time to find a build-time check so that one doesn't
need to analyze the register/stack dump and resolve this (through
manual lookup in vmlinux) to the offending construct.

The assembler will emit a message like "Error: value of <num> too
large for field of 1 bytes at <offset>", which while not pointing
out the source location still makes analysis quite a bit easier.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
LKML-Reference: <4B0FF8AA0200007800022703@vpn.id2.novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/alternative.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index c240efc..69b74a7 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -84,6 +84,7 @@ static inline void alternatives_smp_switch(int smp) {}
       "	 .byte " __stringify(feature) "\n"	/* feature bit     */	\
       "	 .byte 662b-661b\n"			/* sourcelen       */	\
       "	 .byte 664f-663f\n"			/* replacementlen  */	\
+      "	 .byte 0xff + (664f-663f) - (662b-661b)\n" /* rlen <= slen */	\
       ".previous\n"							\
       ".section .altinstr_replacement, \"ax\"\n"			\
       "663:\n\t" newinstr "\n664:\n"		/* replacement     */	\
-- 
cgit v1.1


From 99063c0bcebcc913165a5d168050326eba3e0996 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@novell.com>
Date: Fri, 27 Nov 2009 15:06:16 +0000
Subject: x86/alternatives: No need for alternatives-asm.h to re-invent stuff
 already in asm.h

This at once also gets the alignment specification right for
x86-64.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
LKML-Reference: <4B0FF8F80200007800022708@vpn.id2.novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/alternative-asm.h | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index e2077d3..b97f786 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -1,17 +1,13 @@
 #ifdef __ASSEMBLY__
 
-#ifdef CONFIG_X86_32
-# define X86_ALIGN .long
-#else
-# define X86_ALIGN .quad
-#endif
+#include <asm/asm.h>
 
 #ifdef CONFIG_SMP
 	.macro LOCK_PREFIX
 1:	lock
 	.section .smp_locks,"a"
-	.align 4
-	X86_ALIGN 1b
+	_ASM_ALIGN
+	_ASM_PTR 1b
 	.previous
 	.endm
 #else
-- 
cgit v1.1


From 851ba6922ac575b749f63dee0ae072808163ba6a Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 24 Aug 2009 11:10:17 +0300
Subject: KVM: Don't pass kvm_run arguments

They're just copies of vcpu->run, which is readily accessible.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d838922..0b113f2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -506,8 +506,8 @@ struct kvm_x86_ops {
 
 	void (*tlb_flush)(struct kvm_vcpu *vcpu);
 
-	void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
-	int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu);
+	void (*run)(struct kvm_vcpu *vcpu);
+	int (*handle_exit)(struct kvm_vcpu *vcpu);
 	void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
 	void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
 	u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
@@ -568,7 +568,7 @@ enum emulation_result {
 #define EMULTYPE_NO_DECODE	    (1 << 0)
 #define EMULTYPE_TRAP_UD	    (1 << 1)
 #define EMULTYPE_SKIP		    (1 << 2)
-int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run,
+int emulate_instruction(struct kvm_vcpu *vcpu,
 			unsigned long cr2, u16 error_code, int emulation_type);
 void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context);
 void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
@@ -585,9 +585,9 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
 
 struct x86_emulate_ctxt;
 
-int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
+int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in,
 		     int size, unsigned port);
-int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
+int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in,
 			   int size, unsigned long count, int down,
 			    gva_t address, int rep, unsigned port);
 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
-- 
cgit v1.1


From 1a6e4a8c276e122dbeb6f9c610f29735e4236bfd Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 24 Aug 2009 11:54:19 +0300
Subject: KVM: Move irq sharing information to irqchip level

This removes assumptions that max GSIs is smaller than number of pins.
Sharing is tracked on pin level not GSI level.

[avi: no PIC on ia64]

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0b113f2..35d3236 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -410,7 +410,6 @@ struct kvm_arch{
 	gpa_t ept_identity_map_addr;
 
 	unsigned long irq_sources_bitmap;
-	unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
 	u64 vm_init_tsc;
 };
 
-- 
cgit v1.1


From 3e71f88bc90792a187703860cf22fbed7c12cbd9 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 24 Aug 2009 11:54:21 +0300
Subject: KVM: Maintain back mapping from irqchip/pin to gsi

Maintain back mapping from irqchip/pin to gsi to speedup
interrupt acknowledgment notifications.

[avi: build fix on non-x86/ia64]

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 4a5fe91..f02e87a 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -79,6 +79,7 @@ struct kvm_ioapic_state {
 #define KVM_IRQCHIP_PIC_MASTER   0
 #define KVM_IRQCHIP_PIC_SLAVE    1
 #define KVM_IRQCHIP_IOAPIC       2
+#define KVM_NR_IRQCHIPS          3
 
 /* for KVM_GET_REGS and KVM_SET_REGS */
 struct kvm_regs {
-- 
cgit v1.1


From 136bdfeee7b5bc986fc94af3a40d7d13ea37bb95 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 24 Aug 2009 11:54:23 +0300
Subject: KVM: Move irq ack notifier list to arch independent code

Mask irq notifier list is already there.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 35d3236..a46e2dd 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -397,7 +397,6 @@ struct kvm_arch{
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
-	struct hlist_head irq_ack_notifier_list;
 	int vapics_in_nmi_mode;
 
 	unsigned int tss_addr;
-- 
cgit v1.1


From 10474ae8945ce08622fd1f3464e55bd817bf2376 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Tue, 15 Sep 2009 11:37:46 +0200
Subject: KVM: Activate Virtualization On Demand

X86 CPUs need to have some magic happening to enable the virtualization
extensions on them. This magic can result in unpleasant results for
users, like blocking other VMMs from working (vmx) or using invalid TLB
entries (svm).

Currently KVM activates virtualization when the respective kernel module
is loaded. This blocks us from autoloading KVM modules without breaking
other VMMs.

To circumvent this problem at least a bit, this patch introduces on
demand activation of virtualization. This means, that instead
virtualization is enabled on creation of the first virtual machine
and disabled on destruction of the last one.

So using this, KVM can be easily autoloaded, while keeping other
hypervisors usable.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a46e2dd..295c7c4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -459,7 +459,7 @@ struct descriptor_table {
 struct kvm_x86_ops {
 	int (*cpu_has_kvm_support)(void);          /* __init */
 	int (*disabled_by_bios)(void);             /* __init */
-	void (*hardware_enable)(void *dummy);      /* __init */
+	int (*hardware_enable)(void *dummy);
 	void (*hardware_disable)(void *dummy);
 	void (*check_processor_compatibility)(void *rtn);
 	int (*hardware_setup)(void);               /* __init */
-- 
cgit v1.1


From 355be0b9300579e02275d7d19374806a974ce622 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@web.de>
Date: Sat, 3 Oct 2009 00:31:21 +0200
Subject: KVM: x86: Refactor guest debug IOCTL handling

Much of so far vendor-specific code for setting up guest debug can
actually be handled by the generic code. This also fixes a minor deficit
in the SVM part /wrt processing KVM_GUESTDBG_ENABLE.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 295c7c4..e7f8708 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -475,8 +475,8 @@ struct kvm_x86_ops {
 	void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
 	void (*vcpu_put)(struct kvm_vcpu *vcpu);
 
-	int (*set_guest_debug)(struct kvm_vcpu *vcpu,
-			       struct kvm_guest_debug *dbg);
+	void (*set_guest_debug)(struct kvm_vcpu *vcpu,
+				struct kvm_guest_debug *dbg);
 	int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
 	int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
 	u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
-- 
cgit v1.1


From 91586a3b7d79432772a3cdcb81473cd08a237c79 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Mon, 5 Oct 2009 13:07:21 +0200
Subject: KVM: x86: Rework guest single-step flag injection and filtering

Push TF and RF injection and filtering on guest single-stepping into the
vender get/set_rflags callbacks. This makes the whole mechanism more
robust wrt user space IOCTL order and instruction emulations.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e7f8708..179a919 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -614,6 +614,9 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
 int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data);
 
+unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu);
+void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
+
 void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
 void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
 void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
-- 
cgit v1.1


From 4b8d54f9726f1159330201c5ed2ea30bce7e63ea Mon Sep 17 00:00:00 2001
From: "Zhai, Edwin" <edwin.zhai@intel.com>
Date: Fri, 9 Oct 2009 18:03:20 +0800
Subject: KVM: VMX: Add support for Pause-Loop Exiting

New NHM processors will support Pause-Loop Exiting by adding 2 VM-execution
control fields:
PLE_Gap    - upper bound on the amount of time between two successive
             executions of PAUSE in a loop.
PLE_Window - upper bound on the amount of time a guest is allowed to execute in
             a PAUSE loop

If the time, between this execution of PAUSE and previous one, exceeds the
PLE_Gap, processor consider this PAUSE belongs to a new loop.
Otherwise, processor determins the the total execution time of this loop(since
1st PAUSE in this loop), and triggers a VM exit if total time exceeds the
PLE_Window.
* Refer SDM volume 3b section 21.6.13 & 22.1.3.

Pause-Loop Exiting can be used to detect Lock-Holder Preemption, where one VP
is sched-out after hold a spinlock, then other VPs for same lock are sched-in
to waste the CPU time.

Our tests indicate that most spinlocks are held for less than 212 cycles.
Performance tests show that with 2X LP over-commitment we can get +2% perf
improvement for kernel build(Even more perf gain with more LPs).

Signed-off-by: Zhai Edwin <edwin.zhai@intel.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/include/asm/vmx.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 272514c..2b49454 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -56,6 +56,7 @@
 #define SECONDARY_EXEC_ENABLE_VPID              0x00000020
 #define SECONDARY_EXEC_WBINVD_EXITING		0x00000040
 #define SECONDARY_EXEC_UNRESTRICTED_GUEST	0x00000080
+#define SECONDARY_EXEC_PAUSE_LOOP_EXITING	0x00000400
 
 
 #define PIN_BASED_EXT_INTR_MASK                 0x00000001
@@ -144,6 +145,8 @@ enum vmcs_field {
 	VM_ENTRY_INSTRUCTION_LEN        = 0x0000401a,
 	TPR_THRESHOLD                   = 0x0000401c,
 	SECONDARY_VM_EXEC_CONTROL       = 0x0000401e,
+	PLE_GAP                         = 0x00004020,
+	PLE_WINDOW                      = 0x00004022,
 	VM_INSTRUCTION_ERROR            = 0x00004400,
 	VM_EXIT_REASON                  = 0x00004402,
 	VM_EXIT_INTR_INFO               = 0x00004404,
@@ -248,6 +251,7 @@ enum vmcs_field {
 #define EXIT_REASON_MSR_READ            31
 #define EXIT_REASON_MSR_WRITE           32
 #define EXIT_REASON_MWAIT_INSTRUCTION   36
+#define EXIT_REASON_PAUSE_INSTRUCTION   40
 #define EXIT_REASON_MCE_DURING_VMENTRY	 41
 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43
 #define EXIT_REASON_APIC_ACCESS         44
-- 
cgit v1.1


From 565d0998ecac8373b9a9ecd5991abe74318cd235 Mon Sep 17 00:00:00 2001
From: Mark Langsdorf <mark.langsdorf@amd.com>
Date: Tue, 6 Oct 2009 14:25:02 -0500
Subject: KVM: SVM: Support Pause Filter in AMD processors

New AMD processors (Family 0x10 models 8+) support the Pause
Filter Feature.  This feature creates a new field in the VMCB
called Pause Filter Count.  If Pause Filter Count is greater
than 0 and intercepting PAUSEs is enabled, the processor will
increment an internal counter when a PAUSE instruction occurs
instead of intercepting.  When the internal counter reaches the
Pause Filter Count value, a PAUSE intercept will occur.

This feature can be used to detect contended spinlocks,
especially when the lock holding VCPU is not scheduled.
Rescheduling another VCPU prevents the VCPU seeking the
lock from wasting its quantum by spinning idly.

Experimental results show that most spinlocks are held
for less than 1000 PAUSE cycles or more than a few
thousand.  Default the Pause Filter Counter to 3000 to
detect the contended spinlocks.

Processor support for this feature is indicated by a CPUID
bit.

On a 24 core system running 4 guests each with 16 VCPUs,
this patch improved overall performance of each guest's
32 job kernbench by approximately 3-5% when combined
with a scheduler algorithm thati caused the VCPU to
sleep for a brief period. Further performance improvement
may be possible with a more sophisticated yield algorithm.

Signed-off-by: Mark Langsdorf <mark.langsdorf@amd.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/include/asm/svm.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 85574b7..1fecb7e 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -57,7 +57,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
 	u16 intercept_dr_write;
 	u32 intercept_exceptions;
 	u64 intercept;
-	u8 reserved_1[44];
+	u8 reserved_1[42];
+	u16 pause_filter_count;
 	u64 iopm_base_pa;
 	u64 msrpm_base_pa;
 	u64 tsc_offset;
-- 
cgit v1.1


From ffde22ac53b6d6b1d7206f1172176a667eead778 Mon Sep 17 00:00:00 2001
From: Ed Swierk <eswierk@aristanetworks.com>
Date: Thu, 15 Oct 2009 15:21:43 -0700
Subject: KVM: Xen PV-on-HVM guest support

Support for Xen PV-on-HVM guests can be implemented almost entirely in
userspace, except for handling one annoying MSR that maps a Xen
hypercall blob into guest address space.

A generic mechanism to delegate MSR writes to userspace seems overkill
and risks encouraging similar MSR abuse in the future.  Thus this patch
adds special support for the Xen HVM MSR.

I implemented a new ioctl, KVM_XEN_HVM_CONFIG, that lets userspace tell
KVM which MSR the guest will write to, as well as the starting address
and size of the hypercall blobs (one each for 32-bit and 64-bit) that
userspace has loaded from files.  When the guest writes to the MSR, KVM
copies one page of the blob from userspace to the guest.

I've tested this patch with a hacked-up version of Gerd's userspace
code, booting a number of guests (CentOS 5.3 i386 and x86_64, and
FreeBSD 8.0-RC1 amd64) and exercising PV network and block devices.

[jan: fix i386 build warning]
[avi: future proof abi with a flags field]

Signed-off-by: Ed Swierk <eswierk@aristanetworks.com>
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm.h      | 1 +
 arch/x86/include/asm/kvm_host.h | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index f02e87a..ef9b4b7 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -19,6 +19,7 @@
 #define __KVM_HAVE_MSIX
 #define __KVM_HAVE_MCE
 #define __KVM_HAVE_PIT_STATE2
+#define __KVM_HAVE_XEN_HVM
 
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 179a919..36f3b53 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -410,6 +410,8 @@ struct kvm_arch{
 
 	unsigned long irq_sources_bitmap;
 	u64 vm_init_tsc;
+
+	struct kvm_xen_hvm_config xen_hvm_config;
 };
 
 struct kvm_vm_stat {
-- 
cgit v1.1


From 94fe45da48f921d01d8ff02a0ad54ee9c326d7f0 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@web.de>
Date: Sun, 18 Oct 2009 13:24:44 +0200
Subject: KVM: x86: Fix guest single-stepping while interruptible

Commit 705c5323 opened the doors of hell by unconditionally injecting
single-step flags as long as guest_debug signaled this. This doesn't
work when the guest branches into some interrupt or exception handler
and triggers a vmexit with flag reloading.

Fix it by saving cs:rip when user space requests single-stepping and
restricting the trace flag injection to this guest code position.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 36f3b53..2536fbd 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -371,6 +371,10 @@ struct kvm_vcpu_arch {
 	u64 mcg_status;
 	u64 mcg_ctl;
 	u64 *mce_banks;
+
+	/* used for guest single stepping over the given code position */
+	u16 singlestep_cs;
+	unsigned long singlestep_rip;
 };
 
 struct kvm_mem_alias {
-- 
cgit v1.1


From 6be7d3062b59af891be7e40c6802350de5f78cef Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@web.de>
Date: Sun, 18 Oct 2009 13:24:54 +0200
Subject: KVM: SVM: Cleanup NMI singlestep

Push the NMI-related singlestep variable into vcpu_svm. It's dealing
with an AMD-specific deficit, nothing generic for x86.

Acked-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>

 arch/x86/include/asm/kvm_host.h |    1 -
 arch/x86/kvm/svm.c              |   12 +++++++-----
 2 files changed, 7 insertions(+), 6 deletions(-)
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2536fbd..4d994ad 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -354,7 +354,6 @@ struct kvm_vcpu_arch {
 	unsigned int time_offset;
 	struct page *time_page;
 
-	bool singlestep; /* guest is single stepped by KVM */
 	bool nmi_pending;
 	bool nmi_injected;
 
-- 
cgit v1.1


From afbcf7ab8d1bc8c2d04792f6d9e786e0adeb328d Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@redhat.com>
Date: Fri, 16 Oct 2009 15:28:36 -0400
Subject: KVM: allow userspace to adjust kvmclock offset

When we migrate a kvm guest that uses pvclock between two hosts, we may
suffer a large skew. This is because there can be significant differences
between the monotonic clock of the hosts involved. When a new host with
a much larger monotonic time starts running the guest, the view of time
will be significantly impacted.

Situation is much worse when we do the opposite, and migrate to a host with
a smaller monotonic clock.

This proposed ioctl will allow userspace to inform us what is the monotonic
clock value in the source host, so we can keep the time skew short, and
more importantly, never goes backwards. Userspace may also need to trigger
the current data, since from the first migration onwards, it won't be
reflected by a simple call to clock_gettime() anymore.

[marcelo: future-proof abi with a flags field]
[jan: fix KVM_GET_CLOCK by clearing flags field instead of checking it]

Signed-off-by: Glauber Costa <glommer@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4d994ad..0558ff8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -413,6 +413,7 @@ struct kvm_arch{
 
 	unsigned long irq_sources_bitmap;
 	u64 vm_init_tsc;
+	s64 kvmclock_offset;
 
 	struct kvm_xen_hvm_config xen_hvm_config;
 };
-- 
cgit v1.1


From 18863bdd60f895f3b3ba16b15e8331aee781e8ec Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 7 Sep 2009 11:12:18 +0300
Subject: KVM: x86 shared msr infrastructure

The various syscall-related MSRs are fairly expensive to switch.  Currently
we switch them on every vcpu preemption, which is far too often:

- if we're switching to a kernel thread (idle task, threaded interrupt,
  kernel-mode virtio server (vhost-net), for example) and back, then
  there's no need to switch those MSRs since kernel threasd won't
  be exiting to userspace.

- if we're switching to another guest running an identical OS, most likely
  those MSRs will have the same value, so there's little point in reloading
  them.

- if we're running the same OS on the guest and host, the MSRs will have
  identical values and reloading is unnecessary.

This patch uses the new user return notifiers to implement last-minute
switching, and checks the msr values to avoid unnecessary reloading.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0558ff8..26a74b7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -809,4 +809,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
 int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
 int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
 
+void kvm_define_shared_msr(unsigned index, u32 msr);
+void kvm_set_shared_msr(unsigned index, u64 val);
+
 #endif /* _ASM_X86_KVM_HOST_H */
-- 
cgit v1.1


From 3cfc3092f40bc37c57ba556cfd8de4218f2135ab Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@web.de>
Date: Thu, 12 Nov 2009 01:04:25 +0100
Subject: KVM: x86: Add KVM_GET/SET_VCPU_EVENTS

This new IOCTL exports all yet user-invisible states related to
exceptions, interrupts, and NMIs. Together with appropriate user space
changes, this fixes sporadic problems of vmsave/restore, live migration
and system reset.

[avi: future-proof abi by adding a flags field]

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm.h      | 28 ++++++++++++++++++++++++++++
 arch/x86/include/asm/kvm_host.h |  2 ++
 2 files changed, 30 insertions(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index ef9b4b7..950df43 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -20,6 +20,7 @@
 #define __KVM_HAVE_MCE
 #define __KVM_HAVE_PIT_STATE2
 #define __KVM_HAVE_XEN_HVM
+#define __KVM_HAVE_VCPU_EVENTS
 
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
@@ -252,4 +253,31 @@ struct kvm_reinject_control {
 	__u8 pit_reinject;
 	__u8 reserved[31];
 };
+
+/* for KVM_GET/SET_VCPU_EVENTS */
+struct kvm_vcpu_events {
+	struct {
+		__u8 injected;
+		__u8 nr;
+		__u8 has_error_code;
+		__u8 pad;
+		__u32 error_code;
+	} exception;
+	struct {
+		__u8 injected;
+		__u8 nr;
+		__u8 soft;
+		__u8 pad;
+	} interrupt;
+	struct {
+		__u8 injected;
+		__u8 pending;
+		__u8 masked;
+		__u8 pad;
+	} nmi;
+	__u32 sipi_vector;
+	__u32 flags;
+	__u32 reserved[10];
+};
+
 #endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 26a74b7..06e0856 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -523,6 +523,8 @@ struct kvm_x86_ops {
 				bool has_error_code, u32 error_code);
 	int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
 	int (*nmi_allowed)(struct kvm_vcpu *vcpu);
+	bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
+	void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
 	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
 	void (*enable_irq_window)(struct kvm_vcpu *vcpu);
 	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
-- 
cgit v1.1


From eb3c79e64a70fb8f7473e30fa07e89c1ecc2c9bb Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 24 Nov 2009 15:20:15 +0200
Subject: KVM: x86 emulator: limit instructions to 15 bytes

While we are never normally passed an instruction that exceeds 15 bytes,
smp games can cause us to attempt to interpret one, which will cause
large latencies in non-preempt hosts.

Cc: stable@kernel.org
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_emulate.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index b7ed2c4..7c18e12 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -129,7 +129,7 @@ struct decode_cache {
 	u8 seg_override;
 	unsigned int d;
 	unsigned long regs[NR_VCPU_REGS];
-	unsigned long eip;
+	unsigned long eip, eip_orig;
 	/* modrm */
 	u8 modrm;
 	u8 modrm_mod;
-- 
cgit v1.1


From d5696725b2a4c59503f5e0bc33adeee7f30cd45b Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 2 Dec 2009 12:28:47 +0200
Subject: KVM: VMX: Fix comparison of guest efer with stale host value

update_transition_efer() masks out some efer bits when deciding whether
to switch the msr during guest entry; for example, NX is emulated using the
mmu so we don't need to disable it, and LMA/LME are handled by the hardware.

However, with shared msrs, the comparison is made against a stale value;
at the time of the guest switch we may be running with another guest's efer.

Fix by deferring the mask/compare to the actual point of guest entry.

Noted by Marcelo.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 06e0856..4f865e8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -812,6 +812,6 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
 int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
 
 void kvm_define_shared_msr(unsigned index, u32 msr);
-void kvm_set_shared_msr(unsigned index, u64 val);
+void kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
 
 #endif /* _ASM_X86_KVM_HOST_H */
-- 
cgit v1.1


From af901ca181d92aac3a7dc265144a9081a86d8f39 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Goddard=20Rosa?= <andre.goddard@gmail.com>
Date: Sat, 14 Nov 2009 13:09:05 -0200
Subject: tree-wide: fix assorted typos all over the place
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

That is "success", "unknown", "through", "performance", "[re|un]mapping"
, "access", "default", "reasonable", "[con]currently", "temperature"
, "channel", "[un]used", "application", "example","hierarchy", "therefore"
, "[over|under]flow", "contiguous", "threshold", "enough" and others.

Signed-off-by: André Goddard Rosa <andre.goddard@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 arch/x86/include/asm/desc_defs.h | 4 ++--
 arch/x86/include/asm/mmzone_32.h | 2 +-
 arch/x86/include/asm/uv/uv_bau.h | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h
index 9d66848..278441f 100644
--- a/arch/x86/include/asm/desc_defs.h
+++ b/arch/x86/include/asm/desc_defs.h
@@ -12,9 +12,9 @@
 #include <linux/types.h>
 
 /*
- * FIXME: Acessing the desc_struct through its fields is more elegant,
+ * FIXME: Accessing the desc_struct through its fields is more elegant,
  * and should be the one valid thing to do. However, a lot of open code
- * still touches the a and b acessors, and doing this allow us to do it
+ * still touches the a and b accessors, and doing this allow us to do it
  * incrementally. We keep the signature as a struct, rather than an union,
  * so we can get rid of it transparently in the future -- glommer
  */
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index ede6998..91df7c5 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -47,7 +47,7 @@ static inline void resume_map_numa_kva(pgd_t *pgd) {}
 /*
  * generic node memory support, the following assumptions apply:
  *
- * 1) memory comes in 64Mb contigious chunks which are either present or not
+ * 1) memory comes in 64Mb contiguous chunks which are either present or not
  * 2) we will not have more than 64Gb in total
  *
  * for now assume that 64Gb is max amount of RAM for whole system
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 80e2984..b414d2b 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -55,7 +55,7 @@
 #define DESC_STATUS_SOURCE_TIMEOUT	3
 
 /*
- * source side threshholds at which message retries print a warning
+ * source side thresholds at which message retries print a warning
  */
 #define SOURCE_TIMEOUT_LIMIT		20
 #define DESTINATION_TIMEOUT_LIMIT	20
-- 
cgit v1.1


From 6070d81eb5f2d4943223c96e7609a53cdc984364 Mon Sep 17 00:00:00 2001
From: Adam Buchbinder <adam.buchbinder@gmail.com>
Date: Fri, 4 Dec 2009 15:47:01 -0500
Subject: tree-wide: fix misspelling of "definition" in comments

"Definition" is misspelled "defintion" in several comments; this
patch fixes them. No code changes.

Signed-off-by: Adam Buchbinder <adam.buchbinder@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 arch/x86/include/asm/sigcontext.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h
index 72e5a44..04459d2 100644
--- a/arch/x86/include/asm/sigcontext.h
+++ b/arch/x86/include/asm/sigcontext.h
@@ -124,7 +124,7 @@ struct sigcontext {
 	 * fpstate is really (struct _fpstate *) or (struct _xstate *)
 	 * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved
 	 * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end
-	 * of extended memory layout. See comments at the defintion of
+	 * of extended memory layout. See comments at the definition of
 	 * (struct _fpx_sw_bytes)
 	 */
 	void __user *fpstate;		/* zero when no FPU/extended context */
@@ -219,7 +219,7 @@ struct sigcontext {
 	 * fpstate is really (struct _fpstate *) or (struct _xstate *)
 	 * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved
 	 * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end
-	 * of extended memory layout. See comments at the defintion of
+	 * of extended memory layout. See comments at the definition of
 	 * (struct _fpx_sw_bytes)
 	 */
 	void __user *fpstate;		/* zero when no FPU/extended context */
-- 
cgit v1.1


From a5fc5eba4dfcc284e6adcd7fdcd5b43182230d2b Mon Sep 17 00:00:00 2001
From: David Daney <ddaney@caviumnetworks.com>
Date: Fri, 4 Dec 2009 17:44:51 -0800
Subject: x86: Convert BUG() to use unreachable()

Use the new unreachable() macro instead of for(;;);.  When
allyesconfig is built with a GCC-4.5 snapshot on i686 the size of the
text segment is reduced by 3987 bytes (from 6827019 to 6823032).

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
Acked-by: "H. Peter Anvin" <hpa@zytor.com>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: x86@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/bug.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index d9cf1cd..f654d1b 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -22,14 +22,14 @@ do {								\
 		     ".popsection"				\
 		     : : "i" (__FILE__), "i" (__LINE__),	\
 		     "i" (sizeof(struct bug_entry)));		\
-	for (;;) ;						\
+	unreachable();						\
 } while (0)
 
 #else
 #define BUG()							\
 do {								\
 	asm volatile("ud2");					\
-	for (;;) ;						\
+	unreachable();						\
 } while (0)
 #endif
 
-- 
cgit v1.1