From 334ef7a7ab8f80b689a2be95d5e62d2167900865 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Mon, 12 May 2008 21:21:13 +0200
Subject: x86: use performance variant for_each_cpu_mask_nr

Change references from for_each_cpu_mask to for_each_cpu_mask_nr
where appropriate

Reviewed-by: Paul Jackson <pj@sgi.com>
Reviewed-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 2d474871e2fb092eb46a0930aba5442e10eb96cc
Author: Mike Travis <travis@sgi.com>
Date:   Mon May 12 21:21:13 2008 +0200
---
 include/asm-x86/ipi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/ipi.h b/include/asm-x86/ipi.h
index ecc80f3..5f7310a 100644
--- a/include/asm-x86/ipi.h
+++ b/include/asm-x86/ipi.h
@@ -121,7 +121,7 @@ static inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
 	 * - mbligh
 	 */
 	local_irq_save(flags);
-	for_each_cpu_mask(query_cpu, mask) {
+	for_each_cpu_mask_nr(query_cpu, mask) {
 		__send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu),
 				      vector, APIC_DEST_PHYSICAL);
 	}
-- 
cgit v1.1


From 46a7fa270afbe5fddc6042a598cfe22977b0e989 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 11 Jul 2008 10:23:42 +0900
Subject: x86: make only GART code include gart.h

gart.h has only GART-specific stuff. Only GART code needs it. Other
IOMMU stuff should include iommu.h instead of gart.h.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Muli Ben-Yehuda <muli@il.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/gart.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/gart.h b/include/asm-x86/gart.h
index 33b9aee..3f62a83 100644
--- a/include/asm-x86/gart.h
+++ b/include/asm-x86/gart.h
@@ -2,7 +2,6 @@
 #define _ASM_X8664_GART_H 1
 
 #include <asm/e820.h>
-#include <asm/iommu.h>
 
 extern void set_up_gart_resume(u32, u32);
 
-- 
cgit v1.1


From ac7ded2adb2e43152fe7385ddd53bf45f5c92285 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 11 Jul 2008 10:23:43 +0900
Subject: x86: remove ifdef CONFIG_GART_IOMMU in pci-dma.c

Our way to handle gart_* functions for CONFIG_GART_IOMMU and
!CONFIG_GART_IOMMU cases is inconsistent.

We have some dummy gart_* functions in !CONFIG_GART_IOMMU case and
also use ifdef CONFIG_GART_IOMMU tricks in pci-dma.c to call some
gart_* functions in only CONFIG_GART_IOMMU case.

This patch removes ifdef CONFIG_GART_IOMMU in pci-dma.c and always use
dummy gart_* functions in iommu.h.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Muli Ben-Yehuda <muli@il.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/iommu.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/iommu.h b/include/asm-x86/iommu.h
index 068c9a4..d63166f 100644
--- a/include/asm-x86/iommu.h
+++ b/include/asm-x86/iommu.h
@@ -25,10 +25,18 @@ extern void gart_iommu_hole_init(void);
 static inline void early_gart_iommu_check(void)
 {
 }
-
+static inline void gart_iommu_init(void)
+{
+}
 static inline void gart_iommu_shutdown(void)
 {
 }
+static inline void gart_parse_options(char *options)
+{
+}
+static inline void gart_iommu_hole_init(void)
+{
+}
 #endif
 
 #endif
-- 
cgit v1.1


From be54f9d1c8df93c4998e134a306652caaa58f67f Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 11 Jul 2008 10:23:45 +0900
Subject: x86: remove ifdef CONFIG_SWIOTLB in pci-dma.c

As other IOMMUs do, this puts dummy pci_swiotlb_init() in swiotlb.h
and remove ifdef CONFIG_SWIOTLB in pci-dma.c.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Muli Ben-Yehuda <muli@il.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/swiotlb.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/swiotlb.h b/include/asm-x86/swiotlb.h
index f5d9e74..c706a74 100644
--- a/include/asm-x86/swiotlb.h
+++ b/include/asm-x86/swiotlb.h
@@ -45,12 +45,14 @@ extern int swiotlb_force;
 
 #ifdef CONFIG_SWIOTLB
 extern int swiotlb;
+extern void pci_swiotlb_init(void);
 #else
 #define swiotlb 0
+static inline void pci_swiotlb_init(void)
+{
+}
 #endif
 
-extern void pci_swiotlb_init(void);
-
 static inline void dma_mark_clean(void *addr, size_t size) {}
 
 #endif /* _ASM_SWIOTLB_H */
-- 
cgit v1.1


From 5694703f14b1f6219fce42a27229b0c7d2c23edd Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 11 Jul 2008 17:14:20 +0200
Subject: x86, AMD IOMMU: add comments to amd_iommu_types.h

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: iommu@lists.linux-foundation.org
Cc: bhavna.sarathy@amd.com
Cc: robert.richter@amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/amd_iommu_types.h | 104 +++++++++++++++++++++++++++++++++++---
 1 file changed, 98 insertions(+), 6 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/amd_iommu_types.h b/include/asm-x86/amd_iommu_types.h
index 7bfcb47..945fd49 100644
--- a/include/asm-x86/amd_iommu_types.h
+++ b/include/asm-x86/amd_iommu_types.h
@@ -158,78 +158,170 @@
 
 #define MAX_DOMAIN_ID 65536
 
+/*
+ * This structure contains generic data for  IOMMU protection domains
+ * independent of their use.
+ */
 struct protection_domain {
-	spinlock_t lock;
-	u16 id;
-	int mode;
-	u64 *pt_root;
-	void *priv;
+	spinlock_t lock; /* mostly used to lock the page table*/
+	u16 id;		 /* the domain id written to the device table */
+	int mode;	 /* paging mode (0-6 levels) */
+	u64 *pt_root;	 /* page table root pointer */
+	void *priv;	 /* private data */
 };
 
+/*
+ * Data container for a dma_ops specific protection domain
+ */
 struct dma_ops_domain {
 	struct list_head list;
+
+	/* generic protection domain information */
 	struct protection_domain domain;
+
+	/* size of the aperture for the mappings */
 	unsigned long aperture_size;
+
+	/* address we start to search for free addresses */
 	unsigned long next_bit;
+
+	/* address allocation bitmap */
 	unsigned long *bitmap;
+
+	/*
+	 * Array of PTE pages for the aperture. In this array we save all the
+	 * leaf pages of the domain page table used for the aperture. This way
+	 * we don't need to walk the page table to find a specific PTE. We can
+	 * just calculate its address in constant time.
+	 */
 	u64 **pte_pages;
 };
 
+/*
+ * Structure where we save information about one hardware AMD IOMMU in the
+ * system.
+ */
 struct amd_iommu {
 	struct list_head list;
+
+	/* locks the accesses to the hardware */
 	spinlock_t lock;
 
+	/* device id of this IOMMU */
 	u16 devid;
+	/*
+	 * Capability pointer. There could be more than one IOMMU per PCI
+	 * device function if there are more than one AMD IOMMU capability
+	 * pointers.
+	 */
 	u16 cap_ptr;
 
+	/* physical address of MMIO space */
 	u64 mmio_phys;
+	/* virtual address of MMIO space */
 	u8 *mmio_base;
+
+	/* capabilities of that IOMMU read from ACPI */
 	u32 cap;
+
+	/* first device this IOMMU handles. read from PCI */
 	u16 first_device;
+	/* last device this IOMMU handles. read from PCI */
 	u16 last_device;
+
+	/* start of exclusion range of that IOMMU */
 	u64 exclusion_start;
+	/* length of exclusion range of that IOMMU */
 	u64 exclusion_length;
 
+	/* command buffer virtual address */
 	u8 *cmd_buf;
+	/* size of command buffer */
 	u32 cmd_buf_size;
 
+	/* if one, we need to send a completion wait command */
 	int need_sync;
 
+	/* default dma_ops domain for that IOMMU */
 	struct dma_ops_domain *default_dom;
 };
 
+/*
+ * List with all IOMMUs in the system. This list is not locked because it is
+ * only written and read at driver initialization or suspend time
+ */
 extern struct list_head amd_iommu_list;
 
+/*
+ * Structure defining one entry in the device table
+ */
 struct dev_table_entry {
 	u32 data[8];
 };
 
+/*
+ * One entry for unity mappings parsed out of the ACPI table.
+ */
 struct unity_map_entry {
 	struct list_head list;
+
+	/* starting device id this entry is used for (including) */
 	u16 devid_start;
+	/* end device id this entry is used for (including) */
 	u16 devid_end;
+
+	/* start address to unity map (including) */
 	u64 address_start;
+	/* end address to unity map (including) */
 	u64 address_end;
+
+	/* required protection */
 	int prot;
 };
 
+/*
+ * List of all unity mappings. It is not locked because as runtime it is only
+ * read. It is created at ACPI table parsing time.
+ */
 extern struct list_head amd_iommu_unity_map;
 
-/* data structures for device handling */
+/*
+ * Data structures for device handling
+ */
+
+/*
+ * Device table used by hardware. Read and write accesses by software are
+ * locked with the amd_iommu_pd_table lock.
+ */
 extern struct dev_table_entry *amd_iommu_dev_table;
+
+/*
+ * Alias table to find requestor ids to device ids. Not locked because only
+ * read on runtime.
+ */
 extern u16 *amd_iommu_alias_table;
+
+/*
+ * Reverse lookup table to find the IOMMU which translates a specific device.
+ */
 extern struct amd_iommu **amd_iommu_rlookup_table;
 
+/* size of the dma_ops aperture as power of 2 */
 extern unsigned amd_iommu_aperture_order;
 
+/* largest PCI device id we expect translation requests for */
 extern u16 amd_iommu_last_bdf;
 
 /* data structures for protection domain handling */
 extern struct protection_domain **amd_iommu_pd_table;
+
+/* allocation bitmap for domain ids */
 extern unsigned long *amd_iommu_pd_alloc_bitmap;
 
+/* will be 1 if device isolation is enabled */
 extern int amd_iommu_isolate;
 
+/* takes a PCI device id and prints it out in a readable form */
 static inline void print_devid(u16 devid, int nl)
 {
 	int bus = devid >> 8;
-- 
cgit v1.1


From 8ea80d783efd0c50577ec8d69757ae54c408eacd Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 11 Jul 2008 17:14:23 +0200
Subject: x86, AMD IOMMU: replace HIGH_U32 macro with upper_32_bits function

Removes a driver specific macro and replaces it with a generic function already
available in Linux.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: iommu@lists.linux-foundation.org
Cc: bhavna.sarathy@amd.com
Cc: robert.richter@amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/amd_iommu_types.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/amd_iommu_types.h b/include/asm-x86/amd_iommu_types.h
index 945fd49..14aaffe 100644
--- a/include/asm-x86/amd_iommu_types.h
+++ b/include/asm-x86/amd_iommu_types.h
@@ -33,7 +33,6 @@
 
 /* helper macros */
 #define LOW_U32(x) ((x) & ((1ULL << 32)-1))
-#define HIGH_U32(x) (LOW_U32((x) >> 32))
 
 /* Length of the MMIO region for the AMD IOMMU */
 #define MMIO_REGION_LENGTH       0x4000
-- 
cgit v1.1


From 83f5aac18ccd02170f4a61e7289ceabd5101c1a0 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 11 Jul 2008 17:14:34 +0200
Subject: x86, AMD IOMMU: fix device table entry size

A device table entry is actually only 256 *bits* large. Not 256 bytes.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: iommu@lists.linux-foundation.org
Cc: bhavna.sarathy@amd.com
Cc: robert.richter@amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/amd_iommu_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/amd_iommu_types.h b/include/asm-x86/amd_iommu_types.h
index 14aaffe..2e8601b 100644
--- a/include/asm-x86/amd_iommu_types.h
+++ b/include/asm-x86/amd_iommu_types.h
@@ -27,7 +27,7 @@
 /*
  * some size calculation constants
  */
-#define DEV_TABLE_ENTRY_SIZE		256
+#define DEV_TABLE_ENTRY_SIZE		32
 #define ALIAS_TABLE_ENTRY_SIZE		2
 #define RLOOKUP_TABLE_ENTRY_SIZE	(sizeof(void *))
 
-- 
cgit v1.1


From d591b0a3ae25f587d0c4da1e1d1a425143590790 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 11 Jul 2008 17:14:35 +0200
Subject: x86, AMD IOMMU: replace DEVID macro with a function

This patch replaces the DEVID macro with a function and uses them where
apropriate (also in the core code).

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: iommu@lists.linux-foundation.org
Cc: bhavna.sarathy@amd.com
Cc: robert.richter@amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/amd_iommu_types.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/amd_iommu_types.h b/include/asm-x86/amd_iommu_types.h
index 2e8601b..22aa58c 100644
--- a/include/asm-x86/amd_iommu_types.h
+++ b/include/asm-x86/amd_iommu_types.h
@@ -332,4 +332,11 @@ static inline void print_devid(u16 devid, int nl)
 		printk("\n");
 }
 
+/* takes bus and device/function and returns the device id
+ * FIXME: should that be in generic PCI code? */
+static inline u16 calc_devid(u8 bus, u8 devfn)
+{
+	return (((u16)bus) << 8) | devfn;
+}
+
 #endif
-- 
cgit v1.1


From a312b37b2a212fd2e227d1d6321f903b91b65ec7 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Tue, 8 Jul 2008 15:06:23 -0700
Subject: x86/paravirt: call paravirt_pagetable_setup_{start, done}

Call paravirt_pagetable_setup_{start,done}

These paravirt_ops functions were not being called on x86_64.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/pgtable.h    | 18 ++++++++++++++++++
 include/asm-x86/pgtable_32.h | 15 ---------------
 2 files changed, 18 insertions(+), 15 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 49cbd76..96aa76e 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -302,6 +302,14 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
 /* Install a pte for a particular vaddr in kernel space. */
 void set_pte_vaddr(unsigned long vaddr, pte_t pte);
 
+#ifdef CONFIG_X86_32
+extern void native_pagetable_setup_start(pgd_t *base);
+extern void native_pagetable_setup_done(pgd_t *base);
+#else
+static inline void native_pagetable_setup_start(pgd_t *base) {}
+static inline void native_pagetable_setup_done(pgd_t *base) {}
+#endif
+
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else  /* !CONFIG_PARAVIRT */
@@ -333,6 +341,16 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pte);
 
 #define pte_update(mm, addr, ptep)              do { } while (0)
 #define pte_update_defer(mm, addr, ptep)        do { } while (0)
+
+static inline void __init paravirt_pagetable_setup_start(pgd_t *base)
+{
+	native_pagetable_setup_start(base);
+}
+
+static inline void __init paravirt_pagetable_setup_done(pgd_t *base)
+{
+	native_pagetable_setup_done(base);
+}
 #endif	/* CONFIG_PARAVIRT */
 
 #endif	/* __ASSEMBLY__ */
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index ec871c4..0611abf 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -171,21 +171,6 @@ do {						\
  */
 #define update_mmu_cache(vma, address, pte) do { } while (0)
 
-extern void native_pagetable_setup_start(pgd_t *base);
-extern void native_pagetable_setup_done(pgd_t *base);
-
-#ifndef CONFIG_PARAVIRT
-static inline void __init paravirt_pagetable_setup_start(pgd_t *base)
-{
-	native_pagetable_setup_start(base);
-}
-
-static inline void __init paravirt_pagetable_setup_done(pgd_t *base)
-{
-	native_pagetable_setup_done(base);
-}
-#endif	/* !CONFIG_PARAVIRT */
-
 #endif /* !__ASSEMBLY__ */
 
 /*
-- 
cgit v1.1


From 7c33b1e6ee26d67551109aca04d46544d0ce55b1 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:29 -0700
Subject: x86_64: unstatic get_local_pda

This allows Xen's xen_cpu_up() to allocate a pda for the new CPU.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/smp.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h
index c2784b3..3c877f7 100644
--- a/include/asm-x86/smp.h
+++ b/include/asm-x86/smp.h
@@ -25,6 +25,8 @@ extern cpumask_t cpu_callin_map;
 extern void (*mtrr_hook)(void);
 extern void zap_low_mappings(void);
 
+extern int __cpuinit get_local_pda(int cpu);
+
 extern int smp_num_siblings;
 extern unsigned int num_processors;
 extern cpumask_t cpu_initialized;
-- 
cgit v1.1


From 48b5db20621388582ca11ac3c61d3403966dbe51 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:34 -0700
Subject: xen64: define asm/xen/interface for 64-bit

Copy 64-bit definitions of various interface structures into place.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/xen/interface.h    | 139 ++++++++++++--------------------
 include/asm-x86/xen/interface_32.h |  97 ++++++++++++++++++++++
 include/asm-x86/xen/interface_64.h | 159 +++++++++++++++++++++++++++++++++++++
 3 files changed, 305 insertions(+), 90 deletions(-)
 create mode 100644 include/asm-x86/xen/interface_32.h
 create mode 100644 include/asm-x86/xen/interface_64.h

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/xen/interface.h b/include/asm-x86/xen/interface.h
index 6227000..9d810f2 100644
--- a/include/asm-x86/xen/interface.h
+++ b/include/asm-x86/xen/interface.h
@@ -1,13 +1,13 @@
 /******************************************************************************
  * arch-x86_32.h
  *
- * Guest OS interface to x86 32-bit Xen.
+ * Guest OS interface to x86 Xen.
  *
  * Copyright (c) 2004, K A Fraser
  */
 
-#ifndef __XEN_PUBLIC_ARCH_X86_32_H__
-#define __XEN_PUBLIC_ARCH_X86_32_H__
+#ifndef __ASM_X86_XEN_INTERFACE_H
+#define __ASM_X86_XEN_INTERFACE_H
 
 #ifdef __XEN__
 #define __DEFINE_GUEST_HANDLE(name, type) \
@@ -57,6 +57,17 @@ DEFINE_GUEST_HANDLE(long);
 DEFINE_GUEST_HANDLE(void);
 #endif
 
+#ifndef HYPERVISOR_VIRT_START
+#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
+#endif
+
+#ifndef machine_to_phys_mapping
+#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
+#endif
+
+/* Maximum number of virtual CPUs in multi-processor guests. */
+#define MAX_VIRT_CPUS 32
+
 /*
  * SEGMENT DESCRIPTOR TABLES
  */
@@ -71,58 +82,21 @@ DEFINE_GUEST_HANDLE(void);
 #define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8)
 
 /*
- * These flat segments are in the Xen-private section of every GDT. Since these
- * are also present in the initial GDT, many OSes will be able to avoid
- * installing their own GDT.
- */
-#define FLAT_RING1_CS 0xe019    /* GDT index 259 */
-#define FLAT_RING1_DS 0xe021    /* GDT index 260 */
-#define FLAT_RING1_SS 0xe021    /* GDT index 260 */
-#define FLAT_RING3_CS 0xe02b    /* GDT index 261 */
-#define FLAT_RING3_DS 0xe033    /* GDT index 262 */
-#define FLAT_RING3_SS 0xe033    /* GDT index 262 */
-
-#define FLAT_KERNEL_CS FLAT_RING1_CS
-#define FLAT_KERNEL_DS FLAT_RING1_DS
-#define FLAT_KERNEL_SS FLAT_RING1_SS
-#define FLAT_USER_CS    FLAT_RING3_CS
-#define FLAT_USER_DS    FLAT_RING3_DS
-#define FLAT_USER_SS    FLAT_RING3_SS
-
-/* And the trap vector is... */
-#define TRAP_INSTR "int $0x82"
-
-/*
- * Virtual addresses beyond this are not modifiable by guest OSes. The
- * machine->physical mapping table starts at this address, read-only.
- */
-#ifdef CONFIG_X86_PAE
-#define __HYPERVISOR_VIRT_START 0xF5800000
-#else
-#define __HYPERVISOR_VIRT_START 0xFC000000
-#endif
-
-#ifndef HYPERVISOR_VIRT_START
-#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
-#endif
-
-#ifndef machine_to_phys_mapping
-#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
-#endif
-
-/* Maximum number of virtual CPUs in multi-processor guests. */
-#define MAX_VIRT_CPUS 32
-
-#ifndef __ASSEMBLY__
-
-/*
  * Send an array of these to HYPERVISOR_set_trap_table()
+ * The privilege level specifies which modes may enter a trap via a software
+ * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate
+ * privilege levels as follows:
+ *  Level == 0: Noone may enter
+ *  Level == 1: Kernel may enter
+ *  Level == 2: Kernel may enter
+ *  Level == 3: Everyone may enter
  */
 #define TI_GET_DPL(_ti)		((_ti)->flags & 3)
 #define TI_GET_IF(_ti)		((_ti)->flags & 4)
 #define TI_SET_DPL(_ti, _dpl)	((_ti)->flags |= (_dpl))
 #define TI_SET_IF(_ti, _if)	((_ti)->flags |= ((!!(_if))<<2))
 
+#ifndef __ASSEMBLY__
 struct trap_info {
     uint8_t       vector;  /* exception vector                              */
     uint8_t       flags;   /* 0-3: privilege level; 4: clear event enable?  */
@@ -131,32 +105,21 @@ struct trap_info {
 };
 DEFINE_GUEST_HANDLE_STRUCT(trap_info);
 
-struct cpu_user_regs {
-    uint32_t ebx;
-    uint32_t ecx;
-    uint32_t edx;
-    uint32_t esi;
-    uint32_t edi;
-    uint32_t ebp;
-    uint32_t eax;
-    uint16_t error_code;    /* private */
-    uint16_t entry_vector;  /* private */
-    uint32_t eip;
-    uint16_t cs;
-    uint8_t  saved_upcall_mask;
-    uint8_t  _pad0;
-    uint32_t eflags;        /* eflags.IF == !saved_upcall_mask */
-    uint32_t esp;
-    uint16_t ss, _pad1;
-    uint16_t es, _pad2;
-    uint16_t ds, _pad3;
-    uint16_t fs, _pad4;
-    uint16_t gs, _pad5;
+struct arch_shared_info {
+    unsigned long max_pfn;                  /* max pfn that appears in table */
+    /* Frame containing list of mfns containing list of mfns containing p2m. */
+    unsigned long pfn_to_mfn_frame_list_list;
+    unsigned long nmi_reason;
 };
-DEFINE_GUEST_HANDLE_STRUCT(cpu_user_regs);
+#endif	/* !__ASSEMBLY__ */
 
-typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */
+#ifdef CONFIG_X86_32
+#include "interface_32.h"
+#else
+#include "interface_64.h"
+#endif
 
+#ifndef __ASSEMBLY__
 /*
  * The following is all CPU context. Note that the fpu_ctxt block is filled
  * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
@@ -173,33 +136,29 @@ struct vcpu_guest_context {
     unsigned long ldt_base, ldt_ents;       /* LDT (linear address, # ents) */
     unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
     unsigned long kernel_ss, kernel_sp;     /* Virtual TSS (only SS1/SP1)   */
+    /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */
     unsigned long ctrlreg[8];               /* CR0-CR7 (control registers)  */
     unsigned long debugreg[8];              /* DB0-DB7 (debug registers)    */
+#ifdef __i386__
     unsigned long event_callback_cs;        /* CS:EIP of event callback     */
     unsigned long event_callback_eip;
     unsigned long failsafe_callback_cs;     /* CS:EIP of failsafe callback  */
     unsigned long failsafe_callback_eip;
+#else
+    unsigned long event_callback_eip;
+    unsigned long failsafe_callback_eip;
+    unsigned long syscall_callback_eip;
+#endif
     unsigned long vm_assist;                /* VMASST_TYPE_* bitmap */
+#ifdef __x86_64__
+    /* Segment base addresses. */
+    uint64_t      fs_base;
+    uint64_t      gs_base_kernel;
+    uint64_t      gs_base_user;
+#endif
 };
 DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context);
-
-struct arch_shared_info {
-    unsigned long max_pfn;                  /* max pfn that appears in table */
-    /* Frame containing list of mfns containing list of mfns containing p2m. */
-    unsigned long pfn_to_mfn_frame_list_list;
-    unsigned long nmi_reason;
-};
-
-struct arch_vcpu_info {
-    unsigned long cr2;
-    unsigned long pad[5]; /* sizeof(struct vcpu_info) == 64 */
-};
-
-struct xen_callback {
-	unsigned long cs;
-	unsigned long eip;
-};
-#endif /* !__ASSEMBLY__ */
+#endif	/* !__ASSEMBLY__ */
 
 /*
  * Prefix forces emulation of some non-trapping instructions.
@@ -213,4 +172,4 @@ struct xen_callback {
 #define XEN_CPUID          XEN_EMULATE_PREFIX "cpuid"
 #endif
 
-#endif
+#endif	/* __ASM_X86_XEN_INTERFACE_H */
diff --git a/include/asm-x86/xen/interface_32.h b/include/asm-x86/xen/interface_32.h
new file mode 100644
index 0000000..d8ac41d
--- /dev/null
+++ b/include/asm-x86/xen/interface_32.h
@@ -0,0 +1,97 @@
+/******************************************************************************
+ * arch-x86_32.h
+ *
+ * Guest OS interface to x86 32-bit Xen.
+ *
+ * Copyright (c) 2004, K A Fraser
+ */
+
+#ifndef __ASM_X86_XEN_INTERFACE_32_H
+#define __ASM_X86_XEN_INTERFACE_32_H
+
+
+/*
+ * These flat segments are in the Xen-private section of every GDT. Since these
+ * are also present in the initial GDT, many OSes will be able to avoid
+ * installing their own GDT.
+ */
+#define FLAT_RING1_CS 0xe019    /* GDT index 259 */
+#define FLAT_RING1_DS 0xe021    /* GDT index 260 */
+#define FLAT_RING1_SS 0xe021    /* GDT index 260 */
+#define FLAT_RING3_CS 0xe02b    /* GDT index 261 */
+#define FLAT_RING3_DS 0xe033    /* GDT index 262 */
+#define FLAT_RING3_SS 0xe033    /* GDT index 262 */
+
+#define FLAT_KERNEL_CS FLAT_RING1_CS
+#define FLAT_KERNEL_DS FLAT_RING1_DS
+#define FLAT_KERNEL_SS FLAT_RING1_SS
+#define FLAT_USER_CS    FLAT_RING3_CS
+#define FLAT_USER_DS    FLAT_RING3_DS
+#define FLAT_USER_SS    FLAT_RING3_SS
+
+/* And the trap vector is... */
+#define TRAP_INSTR "int $0x82"
+
+/*
+ * Virtual addresses beyond this are not modifiable by guest OSes. The
+ * machine->physical mapping table starts at this address, read-only.
+ */
+#define __HYPERVISOR_VIRT_START 0xF5800000
+
+#ifndef __ASSEMBLY__
+
+struct cpu_user_regs {
+    uint32_t ebx;
+    uint32_t ecx;
+    uint32_t edx;
+    uint32_t esi;
+    uint32_t edi;
+    uint32_t ebp;
+    uint32_t eax;
+    uint16_t error_code;    /* private */
+    uint16_t entry_vector;  /* private */
+    uint32_t eip;
+    uint16_t cs;
+    uint8_t  saved_upcall_mask;
+    uint8_t  _pad0;
+    uint32_t eflags;        /* eflags.IF == !saved_upcall_mask */
+    uint32_t esp;
+    uint16_t ss, _pad1;
+    uint16_t es, _pad2;
+    uint16_t ds, _pad3;
+    uint16_t fs, _pad4;
+    uint16_t gs, _pad5;
+};
+DEFINE_GUEST_HANDLE_STRUCT(cpu_user_regs);
+
+typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */
+
+struct arch_vcpu_info {
+    unsigned long cr2;
+    unsigned long pad[5]; /* sizeof(struct vcpu_info) == 64 */
+};
+
+struct xen_callback {
+	unsigned long cs;
+	unsigned long eip;
+};
+typedef struct xen_callback xen_callback_t;
+
+#define XEN_CALLBACK(__cs, __eip)				\
+	((struct xen_callback){ .cs = (__cs), .eip = (unsigned long)(__eip) })
+#endif /* !__ASSEMBLY__ */
+
+
+/*
+ * Page-directory addresses above 4GB do not fit into architectural %cr3.
+ * When accessing %cr3, or equivalent field in vcpu_guest_context, guests
+ * must use the following accessor macros to pack/unpack valid MFNs.
+ *
+ * Note that Xen is using the fact that the pagetable base is always
+ * page-aligned, and putting the 12 MSB of the address into the 12 LSB
+ * of cr3.
+ */
+#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20))
+#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20))
+
+#endif	/* __ASM_X86_XEN_INTERFACE_32_H */
diff --git a/include/asm-x86/xen/interface_64.h b/include/asm-x86/xen/interface_64.h
new file mode 100644
index 0000000..842266c
--- /dev/null
+++ b/include/asm-x86/xen/interface_64.h
@@ -0,0 +1,159 @@
+#ifndef __ASM_X86_XEN_INTERFACE_64_H
+#define __ASM_X86_XEN_INTERFACE_64_H
+
+/*
+ * 64-bit segment selectors
+ * These flat segments are in the Xen-private section of every GDT. Since these
+ * are also present in the initial GDT, many OSes will be able to avoid
+ * installing their own GDT.
+ */
+
+#define FLAT_RING3_CS32 0xe023  /* GDT index 260 */
+#define FLAT_RING3_CS64 0xe033  /* GDT index 261 */
+#define FLAT_RING3_DS32 0xe02b  /* GDT index 262 */
+#define FLAT_RING3_DS64 0x0000  /* NULL selector */
+#define FLAT_RING3_SS32 0xe02b  /* GDT index 262 */
+#define FLAT_RING3_SS64 0xe02b  /* GDT index 262 */
+
+#define FLAT_KERNEL_DS64 FLAT_RING3_DS64
+#define FLAT_KERNEL_DS32 FLAT_RING3_DS32
+#define FLAT_KERNEL_DS   FLAT_KERNEL_DS64
+#define FLAT_KERNEL_CS64 FLAT_RING3_CS64
+#define FLAT_KERNEL_CS32 FLAT_RING3_CS32
+#define FLAT_KERNEL_CS   FLAT_KERNEL_CS64
+#define FLAT_KERNEL_SS64 FLAT_RING3_SS64
+#define FLAT_KERNEL_SS32 FLAT_RING3_SS32
+#define FLAT_KERNEL_SS   FLAT_KERNEL_SS64
+
+#define FLAT_USER_DS64 FLAT_RING3_DS64
+#define FLAT_USER_DS32 FLAT_RING3_DS32
+#define FLAT_USER_DS   FLAT_USER_DS64
+#define FLAT_USER_CS64 FLAT_RING3_CS64
+#define FLAT_USER_CS32 FLAT_RING3_CS32
+#define FLAT_USER_CS   FLAT_USER_CS64
+#define FLAT_USER_SS64 FLAT_RING3_SS64
+#define FLAT_USER_SS32 FLAT_RING3_SS32
+#define FLAT_USER_SS   FLAT_USER_SS64
+
+#define __HYPERVISOR_VIRT_START 0xFFFF800000000000
+#define __HYPERVISOR_VIRT_END   0xFFFF880000000000
+#define __MACH2PHYS_VIRT_START  0xFFFF800000000000
+#define __MACH2PHYS_VIRT_END    0xFFFF804000000000
+
+#ifndef HYPERVISOR_VIRT_START
+#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
+#define HYPERVISOR_VIRT_END   mk_unsigned_long(__HYPERVISOR_VIRT_END)
+#endif
+
+#define MACH2PHYS_VIRT_START  mk_unsigned_long(__MACH2PHYS_VIRT_START)
+#define MACH2PHYS_VIRT_END    mk_unsigned_long(__MACH2PHYS_VIRT_END)
+#define MACH2PHYS_NR_ENTRIES  ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3)
+#ifndef machine_to_phys_mapping
+#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
+#endif
+
+/*
+ * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base)
+ *  @which == SEGBASE_*  ;  @base == 64-bit base address
+ * Returns 0 on success.
+ */
+#define SEGBASE_FS          0
+#define SEGBASE_GS_USER     1
+#define SEGBASE_GS_KERNEL   2
+#define SEGBASE_GS_USER_SEL 3 /* Set user %gs specified in base[15:0] */
+
+/*
+ * int HYPERVISOR_iret(void)
+ * All arguments are on the kernel stack, in the following format.
+ * Never returns if successful. Current kernel context is lost.
+ * The saved CS is mapped as follows:
+ *   RING0 -> RING3 kernel mode.
+ *   RING1 -> RING3 kernel mode.
+ *   RING2 -> RING3 kernel mode.
+ *   RING3 -> RING3 user mode.
+ * However RING0 indicates that the guest kernel should return to iteself
+ * directly with
+ *      orb   $3,1*8(%rsp)
+ *      iretq
+ * If flags contains VGCF_in_syscall:
+ *   Restore RAX, RIP, RFLAGS, RSP.
+ *   Discard R11, RCX, CS, SS.
+ * Otherwise:
+ *   Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP.
+ * All other registers are saved on hypercall entry and restored to user.
+ */
+/* Guest exited in SYSCALL context? Return to guest with SYSRET? */
+#define _VGCF_in_syscall 8
+#define VGCF_in_syscall  (1<<_VGCF_in_syscall)
+#define VGCF_IN_SYSCALL  VGCF_in_syscall
+
+#ifndef __ASSEMBLY__
+
+struct iret_context {
+    /* Top of stack (%rsp at point of hypercall). */
+    uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
+    /* Bottom of iret stack frame. */
+};
+
+#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
+/* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */
+#define __DECL_REG(name) union { \
+    uint64_t r ## name, e ## name; \
+    uint32_t _e ## name; \
+}
+#else
+/* Non-gcc sources must always use the proper 64-bit name (e.g., rax). */
+#define __DECL_REG(name) uint64_t r ## name
+#endif
+
+struct cpu_user_regs {
+    uint64_t r15;
+    uint64_t r14;
+    uint64_t r13;
+    uint64_t r12;
+    __DECL_REG(bp);
+    __DECL_REG(bx);
+    uint64_t r11;
+    uint64_t r10;
+    uint64_t r9;
+    uint64_t r8;
+    __DECL_REG(ax);
+    __DECL_REG(cx);
+    __DECL_REG(dx);
+    __DECL_REG(si);
+    __DECL_REG(di);
+    uint32_t error_code;    /* private */
+    uint32_t entry_vector;  /* private */
+    __DECL_REG(ip);
+    uint16_t cs, _pad0[1];
+    uint8_t  saved_upcall_mask;
+    uint8_t  _pad1[3];
+    __DECL_REG(flags);      /* rflags.IF == !saved_upcall_mask */
+    __DECL_REG(sp);
+    uint16_t ss, _pad2[3];
+    uint16_t es, _pad3[3];
+    uint16_t ds, _pad4[3];
+    uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base.     */
+    uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */
+};
+DEFINE_GUEST_HANDLE_STRUCT(cpu_user_regs);
+
+#undef __DECL_REG
+
+#define xen_pfn_to_cr3(pfn) ((unsigned long)(pfn) << 12)
+#define xen_cr3_to_pfn(cr3) ((unsigned long)(cr3) >> 12)
+
+struct arch_vcpu_info {
+    unsigned long cr2;
+    unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
+};
+
+typedef unsigned long xen_callback_t;
+
+#define XEN_CALLBACK(__cs, __rip)				\
+	((unsigned long)(__rip))
+
+#endif /* !__ASSEMBLY__ */
+
+
+#endif	/* __ASM_X86_XEN_INTERFACE_64_H */
-- 
cgit v1.1


From ca15f20f1126f897500ade892a2d598a08da1b56 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:36 -0700
Subject: xen: fix 64-bit hypercall variants

64-bit guests can pass 64-bit quantities in a single argument,
so fix up the hypercalls.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/xen/hypercall.h | 60 +++++++++++++++++++++--------------------
 1 file changed, 31 insertions(+), 29 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/xen/hypercall.h b/include/asm-x86/xen/hypercall.h
index 2a4f9b4..0551b9d 100644
--- a/include/asm-x86/xen/hypercall.h
+++ b/include/asm-x86/xen/hypercall.h
@@ -223,12 +223,12 @@ static inline int
 HYPERVISOR_update_va_mapping(unsigned long va, pte_t new_val,
 			     unsigned long flags)
 {
-	unsigned long pte_hi = 0;
-#ifdef CONFIG_X86_PAE
-	pte_hi = new_val.pte_high;
-#endif
-	return _hypercall4(int, update_va_mapping, va,
-			   new_val.pte_low, pte_hi, flags);
+	if (sizeof(new_val) == sizeof(long))
+		return _hypercall3(int, update_va_mapping, va,
+				   new_val.pte, flags);
+	else
+		return _hypercall4(int, update_va_mapping, va,
+				   new_val.pte, new_val.pte >> 32, flags);
 }
 
 static inline int
@@ -281,12 +281,13 @@ static inline int
 HYPERVISOR_update_va_mapping_otherdomain(unsigned long va, pte_t new_val,
 					 unsigned long flags, domid_t domid)
 {
-	unsigned long pte_hi = 0;
-#ifdef CONFIG_X86_PAE
-	pte_hi = new_val.pte_high;
-#endif
-	return _hypercall5(int, update_va_mapping_otherdomain, va,
-			   new_val.pte_low, pte_hi, flags, domid);
+	if (sizeof(new_val) == sizeof(long))
+		return _hypercall4(int, update_va_mapping_otherdomain, va,
+				   new_val.pte, flags, domid);
+	else
+		return _hypercall5(int, update_va_mapping_otherdomain, va,
+				   new_val.pte, new_val.pte >> 32,
+				   flags, domid);
 }
 
 static inline int
@@ -327,14 +328,14 @@ MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va,
 {
 	mcl->op = __HYPERVISOR_update_va_mapping;
 	mcl->args[0] = va;
-#ifdef CONFIG_X86_PAE
-	mcl->args[1] = new_val.pte_low;
-	mcl->args[2] = new_val.pte_high;
-#else
-	mcl->args[1] = new_val.pte_low;
-	mcl->args[2] = 0;
-#endif
-	mcl->args[3] = flags;
+	if (sizeof(new_val) == sizeof(long)) {
+		mcl->args[1] = new_val.pte;
+		mcl->args[2] = flags;
+	} else {
+		mcl->args[1] = new_val.pte;
+		mcl->args[2] = new_val.pte >> 32;
+		mcl->args[3] = flags;
+	}
 }
 
 static inline void
@@ -354,15 +355,16 @@ MULTI_update_va_mapping_otherdomain(struct multicall_entry *mcl, unsigned long v
 {
 	mcl->op = __HYPERVISOR_update_va_mapping_otherdomain;
 	mcl->args[0] = va;
-#ifdef CONFIG_X86_PAE
-	mcl->args[1] = new_val.pte_low;
-	mcl->args[2] = new_val.pte_high;
-#else
-	mcl->args[1] = new_val.pte_low;
-	mcl->args[2] = 0;
-#endif
-	mcl->args[3] = flags;
-	mcl->args[4] = domid;
+	if (sizeof(new_val) == sizeof(long)) {
+		mcl->args[1] = new_val.pte;
+		mcl->args[2] = flags;
+		mcl->args[3] = domid;
+	} else {
+		mcl->args[1] = new_val.pte;
+		mcl->args[2] = new_val.pte >> 32;
+		mcl->args[3] = flags;
+		mcl->args[4] = domid;
+	}
 }
 
 static inline void
-- 
cgit v1.1


From e74359028d5489a281fb2c379a47b1d3cb14526e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:37 -0700
Subject: xen64: fix calls into hypercall page

The 64-bit calling convention for hypercalls uses different registers
from 32-bit.  Annoyingly, gcc's asm syntax doesn't have a way to
specify one of the extra numeric reigisters in a constraint, so we
must use explicitly placed register variables.  Given that we have to
do it for some args, may as well do it for all.

Also fix syntax gcc generates for the call instruction itself.  We
need a plain direct call, but the asm expansion which works on 32-bit
generates a rip-relative addressing mode in 64-bit, which is treated
as an indirect call.  The alternative is to pass the hypercall page
offset into the asm, and have it add it to the hypercall page start
address to generate the call.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/xen/hypercall.h | 170 ++++++++++++++++++++++++++++------------
 1 file changed, 122 insertions(+), 48 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/xen/hypercall.h b/include/asm-x86/xen/hypercall.h
index 0551b9d..d0c5ded 100644
--- a/include/asm-x86/xen/hypercall.h
+++ b/include/asm-x86/xen/hypercall.h
@@ -40,83 +40,157 @@
 #include <xen/interface/sched.h>
 #include <xen/interface/physdev.h>
 
+/*
+ * The hypercall asms have to meet several constraints:
+ * - Work on 32- and 64-bit.
+ *    The two architectures put their arguments in different sets of
+ *    registers.
+ *
+ * - Work around asm syntax quirks
+ *    It isn't possible to specify one of the rNN registers in a
+ *    constraint, so we use explicit register variables to get the
+ *    args into the right place.
+ *
+ * - Mark all registers as potentially clobbered
+ *    Even unused parameters can be clobbered by the hypervisor, so we
+ *    need to make sure gcc knows it.
+ *
+ * - Avoid compiler bugs.
+ *    This is the tricky part.  Because x86_32 has such a constrained
+ *    register set, gcc versions below 4.3 have trouble generating
+ *    code when all the arg registers and memory are trashed by the
+ *    asm.  There are syntactically simpler ways of achieving the
+ *    semantics below, but they cause the compiler to crash.
+ *
+ *    The only combination I found which works is:
+ *     - assign the __argX variables first
+ *     - list all actually used parameters as "+r" (__argX)
+ *     - clobber the rest
+ *
+ * The result certainly isn't pretty, and it really shows up cpp's
+ * weakness as as macro language.  Sorry.  (But let's just give thanks
+ * there aren't more than 5 arguments...)
+ */
+
 extern struct { char _entry[32]; } hypercall_page[];
 
+#define __HYPERCALL		"call hypercall_page+%c[offset]"
+#define __HYPERCALL_ENTRY(x)						\
+	[offset] "i" (__HYPERVISOR_##x * sizeof(hypercall_page[0]))
+
+#ifdef CONFIG_X86_32
+#define __HYPERCALL_RETREG	"eax"
+#define __HYPERCALL_ARG1REG	"ebx"
+#define __HYPERCALL_ARG2REG	"ecx"
+#define __HYPERCALL_ARG3REG	"edx"
+#define __HYPERCALL_ARG4REG	"esi"
+#define __HYPERCALL_ARG5REG	"edi"
+#else
+#define __HYPERCALL_RETREG	"rax"
+#define __HYPERCALL_ARG1REG	"rdi"
+#define __HYPERCALL_ARG2REG	"rsi"
+#define __HYPERCALL_ARG3REG	"rdx"
+#define __HYPERCALL_ARG4REG	"r10"
+#define __HYPERCALL_ARG5REG	"r8"
+#endif
+
+#define __HYPERCALL_DECLS						\
+	register unsigned long __res  asm(__HYPERCALL_RETREG);		\
+	register unsigned long __arg1 asm(__HYPERCALL_ARG1REG) = __arg1; \
+	register unsigned long __arg2 asm(__HYPERCALL_ARG2REG) = __arg2; \
+	register unsigned long __arg3 asm(__HYPERCALL_ARG3REG) = __arg3; \
+	register unsigned long __arg4 asm(__HYPERCALL_ARG4REG) = __arg4; \
+	register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5;
+
+#define __HYPERCALL_0PARAM	"=r" (__res)
+#define __HYPERCALL_1PARAM	__HYPERCALL_0PARAM, "+r" (__arg1)
+#define __HYPERCALL_2PARAM	__HYPERCALL_1PARAM, "+r" (__arg2)
+#define __HYPERCALL_3PARAM	__HYPERCALL_2PARAM, "+r" (__arg3)
+#define __HYPERCALL_4PARAM	__HYPERCALL_3PARAM, "+r" (__arg4)
+#define __HYPERCALL_5PARAM	__HYPERCALL_4PARAM, "+r" (__arg5)
+
+#define __HYPERCALL_0ARG()
+#define __HYPERCALL_1ARG(a1)						\
+	__HYPERCALL_0ARG()		__arg1 = (unsigned long)(a1);
+#define __HYPERCALL_2ARG(a1,a2)						\
+	__HYPERCALL_1ARG(a1)		__arg2 = (unsigned long)(a2);
+#define __HYPERCALL_3ARG(a1,a2,a3)					\
+	__HYPERCALL_2ARG(a1,a2)		__arg3 = (unsigned long)(a3);
+#define __HYPERCALL_4ARG(a1,a2,a3,a4)					\
+	__HYPERCALL_3ARG(a1,a2,a3)	__arg4 = (unsigned long)(a4);
+#define __HYPERCALL_5ARG(a1,a2,a3,a4,a5)				\
+	__HYPERCALL_4ARG(a1,a2,a3,a4)	__arg5 = (unsigned long)(a5);
+
+#define __HYPERCALL_CLOBBER5	"memory"
+#define __HYPERCALL_CLOBBER4	__HYPERCALL_CLOBBER5, __HYPERCALL_ARG5REG
+#define __HYPERCALL_CLOBBER3	__HYPERCALL_CLOBBER4, __HYPERCALL_ARG4REG
+#define __HYPERCALL_CLOBBER2	__HYPERCALL_CLOBBER3, __HYPERCALL_ARG3REG
+#define __HYPERCALL_CLOBBER1	__HYPERCALL_CLOBBER2, __HYPERCALL_ARG2REG
+#define __HYPERCALL_CLOBBER0	__HYPERCALL_CLOBBER1, __HYPERCALL_ARG1REG
+
 #define _hypercall0(type, name)						\
 ({									\
-	long __res;							\
-	asm volatile (							\
-		"call %[call]"						\
-		: "=a" (__res)						\
-		: [call] "m" (hypercall_page[__HYPERVISOR_##name])	\
-		: "memory" );						\
+	__HYPERCALL_DECLS;						\
+	__HYPERCALL_0ARG();						\
+	asm volatile (__HYPERCALL					\
+		      : __HYPERCALL_0PARAM				\
+		      : __HYPERCALL_ENTRY(name)				\
+		      : __HYPERCALL_CLOBBER0);				\
 	(type)__res;							\
 })
 
 #define _hypercall1(type, name, a1)					\
 ({									\
-	long __res, __ign1;						\
-	asm volatile (							\
-		"call %[call]"						\
-		: "=a" (__res), "=b" (__ign1)				\
-		: "1" ((long)(a1)),					\
-		  [call] "m" (hypercall_page[__HYPERVISOR_##name])	\
-		: "memory" );						\
+	__HYPERCALL_DECLS;						\
+	__HYPERCALL_1ARG(a1);						\
+	asm volatile (__HYPERCALL					\
+		      : __HYPERCALL_1PARAM				\
+		      : __HYPERCALL_ENTRY(name)				\
+		      : __HYPERCALL_CLOBBER1);				\
 	(type)__res;							\
 })
 
 #define _hypercall2(type, name, a1, a2)					\
 ({									\
-	long __res, __ign1, __ign2;					\
-	asm volatile (							\
-		"call %[call]"						\
-		: "=a" (__res), "=b" (__ign1), "=c" (__ign2)		\
-		: "1" ((long)(a1)), "2" ((long)(a2)),			\
-		  [call] "m" (hypercall_page[__HYPERVISOR_##name])	\
-		: "memory" );						\
+	__HYPERCALL_DECLS;						\
+	__HYPERCALL_2ARG(a1, a2);					\
+	asm volatile (__HYPERCALL					\
+		      : __HYPERCALL_2PARAM				\
+		      : __HYPERCALL_ENTRY(name)				\
+		      : __HYPERCALL_CLOBBER2);				\
 	(type)__res;							\
 })
 
 #define _hypercall3(type, name, a1, a2, a3)				\
 ({									\
-	long __res, __ign1, __ign2, __ign3;				\
-	asm volatile (							\
-		"call %[call]"						\
-		: "=a" (__res), "=b" (__ign1), "=c" (__ign2),		\
-		"=d" (__ign3)						\
-		: "1" ((long)(a1)), "2" ((long)(a2)),			\
-		  "3" ((long)(a3)),					\
-		  [call] "m" (hypercall_page[__HYPERVISOR_##name])	\
-		: "memory" );						\
+	__HYPERCALL_DECLS;						\
+	__HYPERCALL_3ARG(a1, a2, a3);					\
+	asm volatile (__HYPERCALL					\
+		      : __HYPERCALL_3PARAM				\
+		      : __HYPERCALL_ENTRY(name)				\
+		      : __HYPERCALL_CLOBBER3);				\
 	(type)__res;							\
 })
 
 #define _hypercall4(type, name, a1, a2, a3, a4)				\
 ({									\
-	long __res, __ign1, __ign2, __ign3, __ign4;			\
-	asm volatile (							\
-		"call %[call]"						\
-		: "=a" (__res), "=b" (__ign1), "=c" (__ign2),		\
-		"=d" (__ign3), "=S" (__ign4)				\
-		: "1" ((long)(a1)), "2" ((long)(a2)),			\
-		  "3" ((long)(a3)), "4" ((long)(a4)),			\
-		  [call] "m" (hypercall_page[__HYPERVISOR_##name])	\
-		: "memory" );						\
+	__HYPERCALL_DECLS;						\
+	__HYPERCALL_4ARG(a1, a2, a3, a4);				\
+	asm volatile (__HYPERCALL					\
+		      : __HYPERCALL_4PARAM				\
+		      : __HYPERCALL_ENTRY(name)				\
+		      : __HYPERCALL_CLOBBER4);				\
 	(type)__res;							\
 })
 
 #define _hypercall5(type, name, a1, a2, a3, a4, a5)			\
 ({									\
-	long __res, __ign1, __ign2, __ign3, __ign4, __ign5;		\
-	asm volatile (							\
-		"call %[call]"						\
-		: "=a" (__res), "=b" (__ign1), "=c" (__ign2),		\
-		"=d" (__ign3), "=S" (__ign4), "=D" (__ign5)		\
-		: "1" ((long)(a1)), "2" ((long)(a2)),			\
-		  "3" ((long)(a3)), "4" ((long)(a4)),			\
-		  "5" ((long)(a5)),					\
-		  [call] "m" (hypercall_page[__HYPERVISOR_##name])	\
-		: "memory" );						\
+	__HYPERCALL_DECLS;						\
+	__HYPERCALL_5ARG(a1, a2, a3, a4, a5);				\
+	asm volatile (__HYPERCALL					\
+		      : __HYPERCALL_5PARAM				\
+		      : __HYPERCALL_ENTRY(name)				\
+		      : __HYPERCALL_CLOBBER5);				\
 	(type)__res;							\
 })
 
-- 
cgit v1.1


From f6e587325b3bc7e5c829a407ddc25b52c1e73851 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:38 -0700
Subject: xen64: add extra pv_mmu_ops

We need extra pv_mmu_ops for 64-bit, to deal with the extra level of
pagetable.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/xen/page.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h
index 377c045..a40be65 100644
--- a/include/asm-x86/xen/page.h
+++ b/include/asm-x86/xen/page.h
@@ -148,7 +148,11 @@ static inline pte_t __pte_ma(pteval_t x)
 }
 
 #define pmd_val_ma(v) ((v).pmd)
+#ifdef __PAGETABLE_PUD_FOLDED
 #define pud_val_ma(v) ((v).pgd.pgd)
+#else
+#define pud_val_ma(v) ((v).pud)
+#endif
 #define __pmd_ma(x)	((pmd_t) { (x) } )
 
 #define pgd_val_ma(x)	((x).pgd)
-- 
cgit v1.1


From 5b09b2876ed1a8e34a0da8f069575fc6174e2077 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:42 -0700
Subject: x86_64: add workaround for no %gs-based percpu

As a stopgap until Mike Travis's x86-64 gs-based percpu patches are
ready, provide workaround functions for x86_read/write_percpu for
Xen's use.

Specifically, this means that we can't really make use of vcpu
placement, because we can't use a single gs-based memory access to get
to vcpu fields.  So disable all that for now.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/percpu.h | 26 ++++++++++++++++++++++++++
 include/asm-x86/setup.h  |  1 +
 2 files changed, 27 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/percpu.h b/include/asm-x86/percpu.h
index 912a3a1..4e91ee1 100644
--- a/include/asm-x86/percpu.h
+++ b/include/asm-x86/percpu.h
@@ -22,6 +22,32 @@
 
 DECLARE_PER_CPU(struct x8664_pda, pda);
 
+/*
+ * These are supposed to be implemented as a single instruction which
+ * operates on the per-cpu data base segment.  x86-64 doesn't have
+ * that yet, so this is a fairly inefficient workaround for the
+ * meantime.  The single instruction is atomic with respect to
+ * preemption and interrupts, so we need to explicitly disable
+ * interrupts here to achieve the same effect.  However, because it
+ * can be used from within interrupt-disable/enable, we can't actually
+ * disable interrupts; disabling preemption is enough.
+ */
+#define x86_read_percpu(var)						\
+	({								\
+		typeof(per_cpu_var(var)) __tmp;				\
+		preempt_disable();					\
+		__tmp = __get_cpu_var(var);				\
+		preempt_enable();					\
+		__tmp;							\
+	})
+
+#define x86_write_percpu(var, val)					\
+	do {								\
+		preempt_disable();					\
+		__get_cpu_var(var) = (val);				\
+		preempt_enable();					\
+	} while(0)
+
 #else /* CONFIG_X86_64 */
 
 #ifdef __ASSEMBLY__
diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h
index 90ab222..6594926 100644
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -76,6 +76,7 @@ extern unsigned long init_pg_tables_start;
 extern unsigned long init_pg_tables_end;
 
 #else
+void __init x86_64_init_pda(void);
 void __init x86_64_start_kernel(char *real_mode);
 void __init x86_64_start_reservations(char *real_mode_data);
 
-- 
cgit v1.1


From 084a2a4e7656209ea93aac9778defa03213ca31d Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:50 -0700
Subject: xen64: early mapping setup

Set up the initial pagetables to map the kernel mapping into the
physical mapping space.  This makes __va() usable, since it requires
physical mappings.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/pgtable_64.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index fa7208b..805d312 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -16,6 +16,8 @@
 extern pud_t level3_kernel_pgt[512];
 extern pud_t level3_ident_pgt[512];
 extern pmd_t level2_kernel_pgt[512];
+extern pmd_t level2_fixmap_pgt[512];
+extern pmd_t level2_ident_pgt[512];
 extern pgd_t init_level4_pgt[];
 
 #define swapper_pg_dir init_level4_pgt
-- 
cgit v1.1


From ce803e705f1cbdd2703e83061622089b5b4a5417 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:55 -0700
Subject: xen64: use arbitrary_virt_to_machine for xen_set_pmd

When building initial pagetables in 64-bit kernel the pud/pmd pointer may
be in ioremap/fixmap space, so we need to walk the pagetable to look up the
physical address.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/xen/page.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h
index a40be65..05e678a 100644
--- a/include/asm-x86/xen/page.h
+++ b/include/asm-x86/xen/page.h
@@ -158,7 +158,7 @@ static inline pte_t __pte_ma(pteval_t x)
 #define pgd_val_ma(x)	((x).pgd)
 
 
-xmaddr_t arbitrary_virt_to_machine(unsigned long address);
+xmaddr_t arbitrary_virt_to_machine(void *address);
 void make_lowmem_page_readonly(void *vaddr);
 void make_lowmem_page_readwrite(void *vaddr);
 
-- 
cgit v1.1


From 88459d4c7eb68c4a15609e00e5d100e2a305f040 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:07:02 -0700
Subject: xen64: register callbacks in arch-independent way

Use callback_op hypercall to register callbacks in a 32/64-bit
independent way (64-bit doesn't need a code segment, but that detail
is hidden in XEN_CALLBACK).

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/xen/hypercall.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/xen/hypercall.h b/include/asm-x86/xen/hypercall.h
index d0c5ded..2536664 100644
--- a/include/asm-x86/xen/hypercall.h
+++ b/include/asm-x86/xen/hypercall.h
@@ -226,6 +226,7 @@ HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp)
 	return _hypercall2(int, stack_switch, ss, esp);
 }
 
+#ifdef CONFIG_X86_32
 static inline int
 HYPERVISOR_set_callbacks(unsigned long event_selector,
 			 unsigned long event_address,
@@ -236,6 +237,17 @@ HYPERVISOR_set_callbacks(unsigned long event_selector,
 			   event_selector, event_address,
 			   failsafe_selector, failsafe_address);
 }
+#else  /* CONFIG_X86_64 */
+static inline int
+HYPERVISOR_set_callbacks(unsigned long event_address,
+			unsigned long failsafe_address,
+			unsigned long syscall_address)
+{
+	return _hypercall3(int, set_callbacks,
+			   event_address, failsafe_address,
+			   syscall_address);
+}
+#endif  /* CONFIG_X86_{32,64} */
 
 static inline int
 HYPERVISOR_callback_op(int cmd, void *arg)
-- 
cgit v1.1


From 45eb0d889862c813dfc98c95549c25acbfc99ab8 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Tue, 8 Jul 2008 15:07:04 -0700
Subject: Xen64: HYPERVISOR_set_segment_base() implementation

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/xen/hypercall.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/xen/hypercall.h b/include/asm-x86/xen/hypercall.h
index 2536664..d9e4cf7 100644
--- a/include/asm-x86/xen/hypercall.h
+++ b/include/asm-x86/xen/hypercall.h
@@ -388,6 +388,14 @@ HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args)
 	return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args);
 }
 
+#ifdef CONFIG_X86_64
+static inline int
+HYPERVISOR_set_segment_base(int reg, unsigned long value)
+{
+	return _hypercall2(int, set_segment_base, reg, value);
+}
+#endif
+
 static inline int
 HYPERVISOR_suspend(unsigned long srec)
 {
-- 
cgit v1.1


From c05f1cfaba846dfbd4a67e348087d32326288fe0 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:07:11 -0700
Subject: xen64: implement 64-bit update_descriptor

64-bit hypercall interface can pass a maddr in one argument rather
than splitting it.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/xen/hypercall.h | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/xen/hypercall.h b/include/asm-x86/xen/hypercall.h
index d9e4cf7..91cb7fd 100644
--- a/include/asm-x86/xen/hypercall.h
+++ b/include/asm-x86/xen/hypercall.h
@@ -466,10 +466,15 @@ MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
 			struct desc_struct desc)
 {
 	mcl->op = __HYPERVISOR_update_descriptor;
-	mcl->args[0] = maddr;
-	mcl->args[1] = maddr >> 32;
-	mcl->args[2] = desc.a;
-	mcl->args[3] = desc.b;
+	if (sizeof(maddr) == sizeof(long)) {
+		mcl->args[0] = maddr;
+		mcl->args[1] = *(unsigned long *)&desc;
+	} else {
+		mcl->args[0] = maddr;
+		mcl->args[1] = maddr >> 32;
+		mcl->args[2] = desc.a;
+		mcl->args[3] = desc.b;
+	}
 }
 
 static inline void
-- 
cgit v1.1


From c24481e9da2c7bc8aafab46e0bc64821244a24a6 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:07:12 -0700
Subject: xen64: save lots of registers

The Xen hypercall interface is allowed to trash any or all of the
argument registers, so we need to be careful that the kernel state
isn't damaged.  On 32-bit kernels, the hypercall parameter registers
same as a regparm function call, so we've got away without explicit
clobbering so far.  The 64-bit ABI defines lots of caller-save
registers, so save them all for safety.  We can trim this set later by
re-distributing the responsibility for saving all these registers.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/paravirt.h | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index ef5e8ec..eef8095 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -1396,8 +1396,8 @@ extern struct paravirt_patch_site __parainstructions[],
  * caller saved registers but the argument parameter */
 #define PV_SAVE_REGS "pushq %%rdi;"
 #define PV_RESTORE_REGS "popq %%rdi;"
-#define PV_EXTRA_CLOBBERS EXTRA_CLOBBERS, "rcx" , "rdx"
-#define PV_VEXTRA_CLOBBERS EXTRA_CLOBBERS, "rdi", "rcx" , "rdx"
+#define PV_EXTRA_CLOBBERS EXTRA_CLOBBERS, "rcx" , "rdx", "rsi"
+#define PV_VEXTRA_CLOBBERS EXTRA_CLOBBERS, "rdi", "rcx" , "rdx", "rsi"
 #define PV_FLAGS_ARG "D"
 #endif
 
@@ -1489,8 +1489,26 @@ static inline unsigned long __raw_local_irq_save(void)
 
 
 #ifdef CONFIG_X86_64
-#define PV_SAVE_REGS   pushq %rax; pushq %rdi; pushq %rcx; pushq %rdx
-#define PV_RESTORE_REGS popq %rdx; popq %rcx; popq %rdi; popq %rax
+#define PV_SAVE_REGS				\
+	push %rax;				\
+	push %rcx;				\
+	push %rdx;				\
+	push %rsi;				\
+	push %rdi;				\
+	push %r8;				\
+	push %r9;				\
+	push %r10;				\
+	push %r11
+#define PV_RESTORE_REGS				\
+	pop %r11;				\
+	pop %r10;				\
+	pop %r9;				\
+	pop %r8;				\
+	pop %rdi;				\
+	pop %rsi;				\
+	pop %rdx;				\
+	pop %rcx;				\
+	pop %rax
 #define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 8)
 #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8)
 #define PARA_INDIRECT(addr)	*addr(%rip)
-- 
cgit v1.1


From 6a52e4b1cddd90fbfde8fb67021657936ee74b07 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Sat, 12 Jul 2008 02:22:00 -0700
Subject: x86_64: further cleanup of 32-bit compat syscall mechanisms

AMD only supports "syscall" from 32-bit compat usermode.
Intel and Centaur(?) only support "sysenter" from 32-bit compat usermode.

Set the X86 feature bits accordingly, and set up the vdso in
accordance with those bits.  On the offchance we run on in a 64-bit
environment which supports neither syscall nor sysenter from 32-bit
mode, then fall back to the int $0x80 vdso.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 include/asm-x86/vdso.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/vdso.h b/include/asm-x86/vdso.h
index 86e085e..8e18fb8 100644
--- a/include/asm-x86/vdso.h
+++ b/include/asm-x86/vdso.h
@@ -36,4 +36,12 @@ extern const char VDSO32_PRELINK[];
 extern void __user __kernel_sigreturn;
 extern void __user __kernel_rt_sigreturn;
 
+/*
+ * These symbols are defined by vdso32.S to mark the bounds
+ * of the ELF DSO images included therein.
+ */
+extern const char vdso32_int80_start, vdso32_int80_end;
+extern const char vdso32_syscall_start, vdso32_syscall_end;
+extern const char vdso32_sysenter_start, vdso32_sysenter_end;
+
 #endif	/* asm-x86/vdso.h */
-- 
cgit v1.1


From 74d4affde8feb8d5bdebf7fba8e90e4eae3b7b1d Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 7 Jul 2008 12:07:50 -0700
Subject: x86/paravirt: add hooks for spinlock operations

Ticket spinlocks have absolutely ghastly worst-case performance
characteristics in a virtual environment.  If there is any contention
for physical CPUs (ie, there are more runnable vcpus than cpus), then
ticket locks can cause the system to end up spending 90+% of its time
spinning.

The problem is that (v)cpus waiting on a ticket spinlock will be
granted access to the lock in strict order they got their tickets.  If
the hypervisor scheduler doesn't give the vcpus time in that order,
they will burn timeslices waiting for the scheduler to give the right
vcpu some time.  In the worst case it could take O(n^2) vcpu scheduler
timeslices for everyone waiting on the lock to get it, not counting
new cpus trying to take the lock while the log-jam is sorted out.

These hooks allow a paravirt backend to replace the spinlock
implementation.

At the very least, this could revert the implementation back to the
old lock algorithm, which allows the next scheduled vcpu to take the
lock, and has basically fairly good performance.

It also allows the spinlocks to take advantages of the hypervisor
features to make locks more efficient (spin and block, for example).

The cost to native execution is an extra direct call when using a
spinlock function.  There's no overhead if CONFIG_PARAVIRT is turned
off.

The lock structure is fixed at a single "unsigned int", initialized to
zero, but the spinlock implementation can use it as it wishes.

Thanks to Thomas Friebel's Xen Summit talk "Preventing Guests from
Spinning Around" for pointing out this problem.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Christoph Lameter <clameter@linux-foundation.org>
Cc: Petr Tesarik <ptesarik@suse.cz>
Cc: Virtualization <virtualization@lists.linux-foundation.org>
Cc: Xen devel <xen-devel@lists.xensource.com>
Cc: Thomas Friebel <thomas.friebel@amd.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/paravirt.h       | 37 +++++++++++++++++++++++++++
 include/asm-x86/spinlock.h       | 55 +++++++++++++++++++++++++++++-----------
 include/asm-x86/spinlock_types.h |  2 +-
 3 files changed, 78 insertions(+), 16 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index eef8095..feb6bb6 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -326,6 +326,15 @@ struct pv_mmu_ops {
 			   unsigned long phys, pgprot_t flags);
 };
 
+struct raw_spinlock;
+struct pv_lock_ops {
+	int (*spin_is_locked)(struct raw_spinlock *lock);
+	int (*spin_is_contended)(struct raw_spinlock *lock);
+	void (*spin_lock)(struct raw_spinlock *lock);
+	int (*spin_trylock)(struct raw_spinlock *lock);
+	void (*spin_unlock)(struct raw_spinlock *lock);
+};
+
 /* This contains all the paravirt structures: we get a convenient
  * number for each function using the offset which we use to indicate
  * what to patch. */
@@ -336,6 +345,7 @@ struct paravirt_patch_template {
 	struct pv_irq_ops pv_irq_ops;
 	struct pv_apic_ops pv_apic_ops;
 	struct pv_mmu_ops pv_mmu_ops;
+	struct pv_lock_ops pv_lock_ops;
 };
 
 extern struct pv_info pv_info;
@@ -345,6 +355,7 @@ extern struct pv_cpu_ops pv_cpu_ops;
 extern struct pv_irq_ops pv_irq_ops;
 extern struct pv_apic_ops pv_apic_ops;
 extern struct pv_mmu_ops pv_mmu_ops;
+extern struct pv_lock_ops pv_lock_ops;
 
 #define PARAVIRT_PATCH(x)					\
 	(offsetof(struct paravirt_patch_template, x) / sizeof(void *))
@@ -1374,6 +1385,31 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
 void _paravirt_nop(void);
 #define paravirt_nop	((void *)_paravirt_nop)
 
+static inline int __raw_spin_is_locked(struct raw_spinlock *lock)
+{
+	return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock);
+}
+
+static inline int __raw_spin_is_contended(struct raw_spinlock *lock)
+{
+	return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock);
+}
+
+static __always_inline void __raw_spin_lock(struct raw_spinlock *lock)
+{
+	return PVOP_VCALL1(pv_lock_ops.spin_lock, lock);
+}
+
+static __always_inline int __raw_spin_trylock(struct raw_spinlock *lock)
+{
+	return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock);
+}
+
+static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock)
+{
+	return PVOP_VCALL1(pv_lock_ops.spin_unlock, lock);
+}
+
 /* These all sit in the .parainstructions section to tell us what to patch. */
 struct paravirt_patch_site {
 	u8 *instr; 		/* original instructions */
@@ -1458,6 +1494,7 @@ static inline unsigned long __raw_local_irq_save(void)
 	return f;
 }
 
+
 /* Make sure as little as possible of this mess escapes. */
 #undef PARAVIRT_CALL
 #undef __PVOP_CALL
diff --git a/include/asm-x86/spinlock.h b/include/asm-x86/spinlock.h
index 21e89bf..9726144 100644
--- a/include/asm-x86/spinlock.h
+++ b/include/asm-x86/spinlock.h
@@ -6,7 +6,7 @@
 #include <asm/page.h>
 #include <asm/processor.h>
 #include <linux/compiler.h>
-
+#include <asm/paravirt.h>
 /*
  * Your basic SMP spinlocks, allowing only a single CPU anywhere
  *
@@ -54,21 +54,21 @@
  * much between them in performance though, especially as locks are out of line.
  */
 #if (NR_CPUS < 256)
-static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
+static inline int __ticket_spin_is_locked(raw_spinlock_t *lock)
 {
 	int tmp = ACCESS_ONCE(lock->slock);
 
 	return (((tmp >> 8) & 0xff) != (tmp & 0xff));
 }
 
-static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
+static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
 {
 	int tmp = ACCESS_ONCE(lock->slock);
 
 	return (((tmp >> 8) & 0xff) - (tmp & 0xff)) > 1;
 }
 
-static __always_inline void __raw_spin_lock(raw_spinlock_t *lock)
+static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
 {
 	short inc = 0x0100;
 
@@ -87,9 +87,7 @@ static __always_inline void __raw_spin_lock(raw_spinlock_t *lock)
 		: "memory", "cc");
 }
 
-#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
-
-static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
+static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
 {
 	int tmp;
 	short new;
@@ -110,7 +108,7 @@ static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
 	return tmp;
 }
 
-static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
+static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
 {
 	asm volatile(UNLOCK_LOCK_PREFIX "incb %0"
 		     : "+m" (lock->slock)
@@ -118,21 +116,21 @@ static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
 		     : "memory", "cc");
 }
 #else
-static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
+static inline int __ticket_spin_is_locked(raw_spinlock_t *lock)
 {
 	int tmp = ACCESS_ONCE(lock->slock);
 
 	return (((tmp >> 16) & 0xffff) != (tmp & 0xffff));
 }
 
-static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
+static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
 {
 	int tmp = ACCESS_ONCE(lock->slock);
 
 	return (((tmp >> 16) & 0xffff) - (tmp & 0xffff)) > 1;
 }
 
-static __always_inline void __raw_spin_lock(raw_spinlock_t *lock)
+static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
 {
 	int inc = 0x00010000;
 	int tmp;
@@ -153,9 +151,7 @@ static __always_inline void __raw_spin_lock(raw_spinlock_t *lock)
 		     : "memory", "cc");
 }
 
-#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
-
-static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
+static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
 {
 	int tmp;
 	int new;
@@ -177,7 +173,7 @@ static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
 	return tmp;
 }
 
-static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
+static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
 {
 	asm volatile(UNLOCK_LOCK_PREFIX "incw %0"
 		     : "+m" (lock->slock)
@@ -186,6 +182,35 @@ static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
 }
 #endif
 
+#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
+
+#ifndef CONFIG_PARAVIRT
+static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
+{
+	return __ticket_spin_is_locked(lock);
+}
+
+static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
+{
+	return __ticket_spin_is_contended(lock);
+}
+
+static __always_inline void __raw_spin_lock(raw_spinlock_t *lock)
+{
+	__ticket_spin_lock(lock);
+}
+
+static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
+{
+	return __ticket_spin_trylock(lock);
+}
+
+static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
+{
+	__ticket_spin_unlock(lock);
+}
+#endif	/* CONFIG_PARAVIRT */
+
 static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
 {
 	while (__raw_spin_is_locked(lock))
diff --git a/include/asm-x86/spinlock_types.h b/include/asm-x86/spinlock_types.h
index 9029cf7..06c071c 100644
--- a/include/asm-x86/spinlock_types.h
+++ b/include/asm-x86/spinlock_types.h
@@ -5,7 +5,7 @@
 # error "please don't include this file directly"
 #endif
 
-typedef struct {
+typedef struct raw_spinlock {
 	unsigned int slock;
 } raw_spinlock_t;
 
-- 
cgit v1.1


From 8efcbab674de2bee45a2e4cdf97de16b8e609ac8 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 7 Jul 2008 12:07:51 -0700
Subject: paravirt: introduce a "lock-byte" spinlock implementation

Implement a version of the old spinlock algorithm, in which everyone
spins waiting for a lock byte.  In order to be compatible with the
ticket-lock's use of a zero initializer, this uses the convention of
'0' for unlocked and '1' for locked.

This algorithm is much better than ticket locks in a virtual
envionment, because it doesn't interact badly with the vcpu scheduler.
If there are multiple vcpus spinning on a lock and the lock is
released, the next vcpu to be scheduled will take the lock, rather
than cycling around until the next ticketed vcpu gets it.

To use this, you must call paravirt_use_bytelocks() very early, before
any spinlocks have been taken.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Christoph Lameter <clameter@linux-foundation.org>
Cc: Petr Tesarik <ptesarik@suse.cz>
Cc: Virtualization <virtualization@lists.linux-foundation.org>
Cc: Xen devel <xen-devel@lists.xensource.com>
Cc: Thomas Friebel <thomas.friebel@amd.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/paravirt.h |  2 ++
 include/asm-x86/spinlock.h | 65 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 66 insertions(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index feb6bb6..65ed02c 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -1385,6 +1385,8 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
 void _paravirt_nop(void);
 #define paravirt_nop	((void *)_paravirt_nop)
 
+void paravirt_use_bytelocks(void);
+
 static inline int __raw_spin_is_locked(struct raw_spinlock *lock)
 {
 	return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock);
diff --git a/include/asm-x86/spinlock.h b/include/asm-x86/spinlock.h
index 9726144..4f9a986 100644
--- a/include/asm-x86/spinlock.h
+++ b/include/asm-x86/spinlock.h
@@ -184,7 +184,70 @@ static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
 
 #define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
 
-#ifndef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT
+/*
+ * Define virtualization-friendly old-style lock byte lock, for use in
+ * pv_lock_ops if desired.
+ *
+ * This differs from the pre-2.6.24 spinlock by always using xchgb
+ * rather than decb to take the lock; this allows it to use a
+ * zero-initialized lock structure.  It also maintains a 1-byte
+ * contention counter, so that we can implement
+ * __byte_spin_is_contended.
+ */
+struct __byte_spinlock {
+	s8 lock;
+	s8 spinners;
+};
+
+static inline int __byte_spin_is_locked(raw_spinlock_t *lock)
+{
+	struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
+	return bl->lock != 0;
+}
+
+static inline int __byte_spin_is_contended(raw_spinlock_t *lock)
+{
+	struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
+	return bl->spinners != 0;
+}
+
+static inline void __byte_spin_lock(raw_spinlock_t *lock)
+{
+	struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
+	s8 val = 1;
+
+	asm("1: xchgb %1, %0\n"
+	    "   test %1,%1\n"
+	    "   jz 3f\n"
+	    "   " LOCK_PREFIX "incb %2\n"
+	    "2: rep;nop\n"
+	    "   cmpb $1, %0\n"
+	    "   je 2b\n"
+	    "   " LOCK_PREFIX "decb %2\n"
+	    "   jmp 1b\n"
+	    "3:"
+	    : "+m" (bl->lock), "+q" (val), "+m" (bl->spinners): : "memory");
+}
+
+static inline int __byte_spin_trylock(raw_spinlock_t *lock)
+{
+	struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
+	u8 old = 1;
+
+	asm("xchgb %1,%0"
+	    : "+m" (bl->lock), "+q" (old) : : "memory");
+
+	return old == 0;
+}
+
+static inline void __byte_spin_unlock(raw_spinlock_t *lock)
+{
+	struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
+	smp_wmb();
+	bl->lock = 0;
+}
+#else  /* !CONFIG_PARAVIRT */
 static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
 {
 	return __ticket_spin_is_locked(lock);
-- 
cgit v1.1


From 2d9e1e2f58b5612aa4eab0ab54c84308a29dbd79 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 7 Jul 2008 12:07:53 -0700
Subject: xen: implement Xen-specific spinlocks

The standard ticket spinlocks are very expensive in a virtual
environment, because their performance depends on Xen's scheduler
giving vcpus time in the order that they're supposed to take the
spinlock.

This implements a Xen-specific spinlock, which should be much more
efficient.

The fast-path is essentially the old Linux-x86 locks, using a single
lock byte.  The locker decrements the byte; if the result is 0, then
they have the lock.  If the lock is negative, then locker must spin
until the lock is positive again.

When there's contention, the locker spin for 2^16[*] iterations waiting
to get the lock.  If it fails to get the lock in that time, it adds
itself to the contention count in the lock and blocks on a per-cpu
event channel.

When unlocking the spinlock, the locker looks to see if there's anyone
blocked waiting for the lock by checking for a non-zero waiter count.
If there's a waiter, it traverses the per-cpu "lock_spinners"
variable, which contains which lock each CPU is waiting on.  It picks
one CPU waiting on the lock and sends it an event to wake it up.

This allows efficient fast-path spinlock operation, while allowing
spinning vcpus to give up their processor time while waiting for a
contended lock.

[*] 2^16 iterations is threshold at which 98% locks have been taken
according to Thomas Friebel's Xen Summit talk "Preventing Guests from
Spinning Around".  Therefore, we'd expect the lock and unlock slow
paths will only be entered 2% of the time.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Christoph Lameter <clameter@linux-foundation.org>
Cc: Petr Tesarik <ptesarik@suse.cz>
Cc: Virtualization <virtualization@lists.linux-foundation.org>
Cc: Xen devel <xen-devel@lists.xensource.com>
Cc: Thomas Friebel <thomas.friebel@amd.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/xen/events.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/xen/events.h b/include/asm-x86/xen/events.h
index f8d57ea..8ded747 100644
--- a/include/asm-x86/xen/events.h
+++ b/include/asm-x86/xen/events.h
@@ -5,6 +5,7 @@ enum ipi_vector {
 	XEN_RESCHEDULE_VECTOR,
 	XEN_CALL_FUNCTION_VECTOR,
 	XEN_CALL_FUNCTION_SINGLE_VECTOR,
+	XEN_SPIN_UNLOCK_VECTOR,
 
 	XEN_NR_IPIS,
 };
-- 
cgit v1.1


From 4bb689eee12ceb6d669a0c9a519037c049a8af38 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 9 Jul 2008 14:33:33 +0200
Subject: x86: paravirt spinlocks, !CONFIG_SMP build fixes

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/paravirt.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 65ed02c..b2aba8f 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -1387,6 +1387,8 @@ void _paravirt_nop(void);
 
 void paravirt_use_bytelocks(void);
 
+#ifdef CONFIG_SMP
+
 static inline int __raw_spin_is_locked(struct raw_spinlock *lock)
 {
 	return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock);
@@ -1412,6 +1414,8 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock)
 	return PVOP_VCALL1(pv_lock_ops.spin_unlock, lock);
 }
 
+#endif
+
 /* These all sit in the .parainstructions section to tell us what to patch. */
 struct paravirt_patch_site {
 	u8 *instr; 		/* original instructions */
-- 
cgit v1.1


From 64f097331928b01d704047c1dbc738bb6d2a9bf9 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 9 Jul 2008 01:33:14 -0700
Subject: x86 ptrace: unify TIF_SINGLESTEP

This unifies the treatment of TIF_SINGLESTEP on i386 and x86_64.
The bit is now excluded from _TIF_WORK_MASK on i386 as it has been
on x86_64.  This means the do_notify_resume() path using it is never
used, so TIF_SINGLESTEP is not cleared on returning to user mode.

Both now leave TIF_SINGLESTEP set when returning to user, so that
it's already set on an int $0x80 system call entry.  This removes
the need for testing TF on the system_call path.  Doing it this way
fixes the regression for PTRACE_SINGLESTEP into a sigreturn syscall,
introduced by commit 1e2e99f0e4aa6363e8515ed17011c210c8f1b52a.

The clear_TF_reenable case that sets TIF_SINGLESTEP can only happen
on a non-exception kernel entry, i.e. sysenter/syscall instruction.
That will always get to the syscall exit tracing path.

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 include/asm-x86/thread_info.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
index 895339d..fb8d3cd 100644
--- a/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h
@@ -124,7 +124,7 @@ struct thread_info {
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK							\
 	(0x0000FFFF &							\
-	 ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP|	\
+	 ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|			\
 	 _TIF_SECCOMP|_TIF_SYSCALL_EMU))
 
 /* work to do on any return to user space */
@@ -132,7 +132,7 @@ struct thread_info {
 
 /* Only used for 64 bit */
 #define _TIF_DO_NOTIFY_MASK						\
-	(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED)
+	(_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED)
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW							\
-- 
cgit v1.1


From d4d67150165df8bf1cc05e532f6efca96f907cab Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 9 Jul 2008 02:38:07 -0700
Subject: x86 ptrace: unify syscall tracing

This unifies and cleans up the syscall tracing code on i386 and x86_64.

Using a single function for entry and exit tracing on 32-bit made the
do_syscall_trace() into some terrible spaghetti.  The logic is clear and
simple using separate syscall_trace_enter() and syscall_trace_leave()
functions as on 64-bit.

The unification adds PTRACE_SYSEMU and PTRACE_SYSEMU_SINGLESTEP support
on x86_64, for 32-bit ptrace() callers and for 64-bit ptrace() callers
tracing either 32-bit or 64-bit tasks.  It behaves just like 32-bit.

Changing syscall_trace_enter() to return the syscall number shortens
all the assembly paths, while adding the SYSEMU feature in a simple way.

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 include/asm-x86/calling.h     |  6 ++++--
 include/asm-x86/ptrace-abi.h  |  6 +++---
 include/asm-x86/thread_info.h | 17 ++++++++++-------
 3 files changed, 17 insertions(+), 12 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/calling.h b/include/asm-x86/calling.h
index f13e62e..2bc162e 100644
--- a/include/asm-x86/calling.h
+++ b/include/asm-x86/calling.h
@@ -104,7 +104,7 @@
 	.endif
 	.endm
 
-	.macro LOAD_ARGS offset
+	.macro LOAD_ARGS offset, skiprax=0
 	movq \offset(%rsp),    %r11
 	movq \offset+8(%rsp),  %r10
 	movq \offset+16(%rsp), %r9
@@ -113,7 +113,10 @@
 	movq \offset+48(%rsp), %rdx
 	movq \offset+56(%rsp), %rsi
 	movq \offset+64(%rsp), %rdi
+	.if \skiprax
+	.else
 	movq \offset+72(%rsp), %rax
+	.endif
 	.endm
 
 #define REST_SKIP	6*8
@@ -165,4 +168,3 @@
 	.macro icebp
 	.byte 0xf1
 	.endm
-
diff --git a/include/asm-x86/ptrace-abi.h b/include/asm-x86/ptrace-abi.h
index f224eb3..72e7b9d 100644
--- a/include/asm-x86/ptrace-abi.h
+++ b/include/asm-x86/ptrace-abi.h
@@ -73,11 +73,11 @@
 
 #ifdef __x86_64__
 # define PTRACE_ARCH_PRCTL	  30
-#else
-# define PTRACE_SYSEMU		  31
-# define PTRACE_SYSEMU_SINGLESTEP 32
 #endif
 
+#define PTRACE_SYSEMU		  31
+#define PTRACE_SYSEMU_SINGLESTEP  32
+
 #define PTRACE_SINGLEBLOCK	33	/* resume execution until next branch */
 
 #ifndef __ASSEMBLY__
diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
index fb8d3cd..b2702a1 100644
--- a/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h
@@ -75,9 +75,7 @@ struct thread_info {
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
 #define TIF_SINGLESTEP		4	/* reenable singlestep on user return*/
 #define TIF_IRET		5	/* force IRET */
-#ifdef CONFIG_X86_32
 #define TIF_SYSCALL_EMU		6	/* syscall emulation active */
-#endif
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 #define TIF_SECCOMP		8	/* secure computing */
 #define TIF_MCE_NOTIFY		10	/* notify userspace of an MCE */
@@ -100,11 +98,7 @@ struct thread_info {
 #define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
 #define _TIF_IRET		(1 << TIF_IRET)
-#ifdef CONFIG_X86_32
 #define _TIF_SYSCALL_EMU	(1 << TIF_SYSCALL_EMU)
-#else
-#define _TIF_SYSCALL_EMU	0
-#endif
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
 #define _TIF_MCE_NOTIFY		(1 << TIF_MCE_NOTIFY)
@@ -121,11 +115,20 @@ struct thread_info {
 #define _TIF_DS_AREA_MSR	(1 << TIF_DS_AREA_MSR)
 #define _TIF_BTS_TRACE_TS	(1 << TIF_BTS_TRACE_TS)
 
+/* work to do in syscall_trace_enter() */
+#define _TIF_WORK_SYSCALL_ENTRY	\
+	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | \
+	 _TIF_SYSCALL_AUDIT | _TIF_SECCOMP)
+
+/* work to do in syscall_trace_leave() */
+#define _TIF_WORK_SYSCALL_EXIT	\
+	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP)
+
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK							\
 	(0x0000FFFF &							\
 	 ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|			\
-	 _TIF_SECCOMP|_TIF_SYSCALL_EMU))
+	   _TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU))
 
 /* work to do on any return to user space */
 #define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP)
-- 
cgit v1.1


From 380fdd7585a4c2f41b48925eba85c0654b7b858b Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 9 Jul 2008 02:39:29 -0700
Subject: x86 ptrace: user-sets-TF nits

This closes some arcane holes in single-step handling that can arise
only when user programs set TF directly (via popf or sigreturn) and
then use vDSO (syscall/sysenter) system call entry.  In those entry
paths, the clear_TF_reenable case hits and we must check TIF_SINGLESTEP
to be sure our bookkeeping stays correct wrt the user's view of TF.

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 include/asm-x86/thread_info.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
index b2702a1..0a8f27d 100644
--- a/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h
@@ -118,7 +118,7 @@ struct thread_info {
 /* work to do in syscall_trace_enter() */
 #define _TIF_WORK_SYSCALL_ENTRY	\
 	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | \
-	 _TIF_SYSCALL_AUDIT | _TIF_SECCOMP)
+	 _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | _TIF_SINGLESTEP)
 
 /* work to do in syscall_trace_leave() */
 #define _TIF_WORK_SYSCALL_EXIT	\
-- 
cgit v1.1


From 4fdf08b5bf8d449cc9897395895157c6ff8ddc41 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Thu, 17 Jul 2008 11:29:24 -0700
Subject: x86: unify and correct the GDT_ENTRY() macro

Merge the GDT_ENTRY() macro between arch/x86/boot/pm.c and
arch/x86/kernel/acpi/sleep.c and put the new one in
<asm-x86/segment.h>.

While we're at it, correct the bitmasks for the limit and flags.  The
new version relies on using ULL constants in order to cause type
promotion rather than explicit casts; this avoids having to include
<linux/types.h> in <asm-x86/segments.h>.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 include/asm-x86/segment.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/segment.h b/include/asm-x86/segment.h
index dfc8601..646452e 100644
--- a/include/asm-x86/segment.h
+++ b/include/asm-x86/segment.h
@@ -1,6 +1,15 @@
 #ifndef _ASM_X86_SEGMENT_H_
 #define _ASM_X86_SEGMENT_H_
 
+/* Constructor for a conventional segment GDT (or LDT) entry */
+/* This is a macro so it can be used in initializers */
+#define GDT_ENTRY(flags, base, limit)			\
+	((((base)  & 0xff000000ULL) << (56-24)) |	\
+	 (((flags) & 0x0000f0ffULL) << 40) |		\
+	 (((limit) & 0x000f0000ULL) << (48-16)) |	\
+	 (((base)  & 0x00ffffffULL) << 16) |		\
+	 (((limit) & 0x0000ffffULL)))
+
 /* Simple and small GDT entries for booting only */
 
 #define GDT_ENTRY_BOOT_CS	2
-- 
cgit v1.1


From 593f4a788e5d09e9f00182561437461b0b564de4 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Wed, 16 Jul 2008 19:15:30 +0100
Subject: x86: APIC: remove apic_write_around(); use alternatives

Use alternatives to select the workaround for the 11AP Pentium erratum
for the affected steppings on the fly rather than build time.  Remove the
X86_GOOD_APIC configuration option and replace all the calls to
apic_write_around() with plain apic_write(), protecting accesses to the
ESR as appropriate due to the 3AP Pentium erratum.  Remove
apic_read_around() and all its invocations altogether as not needed.
Remove apic_write_atomic() and all its implementing backends.  The use of
ASM_OUTPUT2() is not strictly needed for input constraints, but I have
used it for readability's sake.

I had the feeling no one else was brave enough to do it, so I went ahead
and here it is.  Verified by checking the generated assembly and tested
with both a 32-bit and a 64-bit configuration, also with the 11AP
"feature" forced on and verified with gdb on /proc/kcore to work as
expected (as an 11AP machines are quite hard to get hands on these days).
Some script complained about the use of "volatile", but apic_write() needs
it for the same reason and is effectively a replacement for writel(), so I
have disregarded it.

I am not sure what the policy wrt defconfig files is, they are generated
and there is risk of a conflict resulting from an unrelated change, so I
have left changes to them out.  The option will get removed from them at
the next run.

Some testing with machines other than mine will be needed to avoid some
stupid mistake, but despite its volume, the change is not really that
intrusive, so I am fairly confident that because it works for me, it will
everywhere.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/apic.h                   | 24 +++++++-----------------
 include/asm-x86/cpufeature.h             |  1 +
 include/asm-x86/mach-bigsmp/mach_apic.h  |  4 ++--
 include/asm-x86/mach-default/mach_apic.h |  4 ++--
 include/asm-x86/mach-es7000/mach_apic.h  |  4 ++--
 include/asm-x86/mach-summit/mach_apic.h  |  4 ++--
 include/asm-x86/paravirt.h               |  6 ------
 7 files changed, 16 insertions(+), 31 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index 4e2c1e5..ea866ba 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -3,6 +3,8 @@
 
 #include <linux/pm.h>
 #include <linux/delay.h>
+
+#include <asm/alternative.h>
 #include <asm/fixmap.h>
 #include <asm/apicdef.h>
 #include <asm/processor.h>
@@ -48,7 +50,6 @@ extern int disable_apic;
 #include <asm/paravirt.h>
 #else
 #define apic_write native_apic_write
-#define apic_write_atomic native_apic_write_atomic
 #define apic_read native_apic_read
 #define setup_boot_clock setup_boot_APIC_clock
 #define setup_secondary_clock setup_secondary_APIC_clock
@@ -58,12 +59,11 @@ extern int is_vsmp_box(void);
 
 static inline void native_apic_write(unsigned long reg, u32 v)
 {
-	*((volatile u32 *)(APIC_BASE + reg)) = v;
-}
+	volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg);
 
-static inline void native_apic_write_atomic(unsigned long reg, u32 v)
-{
-	(void)xchg((u32 *)(APIC_BASE + reg), v);
+	alternative_io("movl %0, %1", "xchgl %0, %1", X86_FEATURE_11AP,
+		       ASM_OUTPUT2("=r" (v), "=m" (*addr)),
+		       ASM_OUTPUT2("0" (v), "m" (*addr)));
 }
 
 static inline u32 native_apic_read(unsigned long reg)
@@ -75,16 +75,6 @@ extern void apic_wait_icr_idle(void);
 extern u32 safe_apic_wait_icr_idle(void);
 extern int get_physical_broadcast(void);
 
-#ifdef CONFIG_X86_GOOD_APIC
-# define FORCE_READ_AROUND_WRITE 0
-# define apic_read_around(x)
-# define apic_write_around(x, y) apic_write((x), (y))
-#else
-# define FORCE_READ_AROUND_WRITE 1
-# define apic_read_around(x) apic_read(x)
-# define apic_write_around(x, y) apic_write_atomic((x), (y))
-#endif
-
 static inline void ack_APIC_irq(void)
 {
 	/*
@@ -95,7 +85,7 @@ static inline void ack_APIC_irq(void)
 	 */
 
 	/* Docs say use 0 for future compatibility */
-	apic_write_around(APIC_EOI, 0);
+	apic_write(APIC_EOI, 0);
 }
 
 extern int lapic_get_maxlvt(void);
diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h
index 75ef959..2f5a792 100644
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h
@@ -79,6 +79,7 @@
 #define X86_FEATURE_REP_GOOD	(3*32+16) /* rep microcode works well on this CPU */
 #define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* Mfence synchronizes RDTSC */
 #define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* Lfence synchronizes RDTSC */
+#define X86_FEATURE_11AP	(3*32+19)  /* Bad local APIC aka 11AP */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */
diff --git a/include/asm-x86/mach-bigsmp/mach_apic.h b/include/asm-x86/mach-bigsmp/mach_apic.h
index 017c8c1..c3b9dc6 100644
--- a/include/asm-x86/mach-bigsmp/mach_apic.h
+++ b/include/asm-x86/mach-bigsmp/mach_apic.h
@@ -63,9 +63,9 @@ static inline void init_apic_ldr(void)
 	unsigned long val;
 	int cpu = smp_processor_id();
 
-	apic_write_around(APIC_DFR, APIC_DFR_VALUE);
+	apic_write(APIC_DFR, APIC_DFR_VALUE);
 	val = calculate_ldr(cpu);
-	apic_write_around(APIC_LDR, val);
+	apic_write(APIC_LDR, val);
 }
 
 static inline void setup_apic_routing(void)
diff --git a/include/asm-x86/mach-default/mach_apic.h b/include/asm-x86/mach-default/mach_apic.h
index 0b2cde5..f3226b9 100644
--- a/include/asm-x86/mach-default/mach_apic.h
+++ b/include/asm-x86/mach-default/mach_apic.h
@@ -46,10 +46,10 @@ static inline void init_apic_ldr(void)
 {
 	unsigned long val;
 
-	apic_write_around(APIC_DFR, APIC_DFR_VALUE);
+	apic_write(APIC_DFR, APIC_DFR_VALUE);
 	val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
 	val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
-	apic_write_around(APIC_LDR, val);
+	apic_write(APIC_LDR, val);
 }
 
 static inline int apic_id_registered(void)
diff --git a/include/asm-x86/mach-es7000/mach_apic.h b/include/asm-x86/mach-es7000/mach_apic.h
index fbc8ad2..0a3fdf9 100644
--- a/include/asm-x86/mach-es7000/mach_apic.h
+++ b/include/asm-x86/mach-es7000/mach_apic.h
@@ -66,9 +66,9 @@ static inline void init_apic_ldr(void)
 	unsigned long val;
 	int cpu = smp_processor_id();
 
-	apic_write_around(APIC_DFR, APIC_DFR_VALUE);
+	apic_write(APIC_DFR, APIC_DFR_VALUE);
 	val = calculate_ldr(cpu);
-	apic_write_around(APIC_LDR, val);
+	apic_write(APIC_LDR, val);
 }
 
 #ifndef CONFIG_X86_GENERICARCH
diff --git a/include/asm-x86/mach-summit/mach_apic.h b/include/asm-x86/mach-summit/mach_apic.h
index 1f76c2e..75d2c95 100644
--- a/include/asm-x86/mach-summit/mach_apic.h
+++ b/include/asm-x86/mach-summit/mach_apic.h
@@ -63,10 +63,10 @@ static inline void init_apic_ldr(void)
 	 * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */
 	BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT);
 	id = my_cluster | (1UL << count);
-	apic_write_around(APIC_DFR, APIC_DFR_VALUE);
+	apic_write(APIC_DFR, APIC_DFR_VALUE);
 	val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
 	val |= SET_APIC_LOGICAL_ID(id);
-	apic_write_around(APIC_LDR, val);
+	apic_write(APIC_LDR, val);
 }
 
 static inline int multi_timer_check(int apic, int irq)
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index ef5e8ec..719d959 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -205,7 +205,6 @@ struct pv_apic_ops {
 	 * these shouldn't be in this interface.
 	 */
 	void (*apic_write)(unsigned long reg, u32 v);
-	void (*apic_write_atomic)(unsigned long reg, u32 v);
 	u32 (*apic_read)(unsigned long reg);
 	void (*setup_boot_clock)(void);
 	void (*setup_secondary_clock)(void);
@@ -896,11 +895,6 @@ static inline void apic_write(unsigned long reg, u32 v)
 	PVOP_VCALL2(pv_apic_ops.apic_write, reg, v);
 }
 
-static inline void apic_write_atomic(unsigned long reg, u32 v)
-{
-	PVOP_VCALL2(pv_apic_ops.apic_write_atomic, reg, v);
-}
-
 static inline u32 apic_read(unsigned long reg)
 {
 	return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg);
-- 
cgit v1.1


From 2b7207a6b53bd07be53b4753a3ea5ecb8d180048 Mon Sep 17 00:00:00 2001
From: Sebastian Siewior <bigeasy@linutronix.de>
Date: Wed, 16 Jul 2008 23:31:17 +0200
Subject: ftrace: copy + paste typo in asm/ftrace.h

Signed-off-by: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Steven Rostedt <srostedt@redhat.co>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/ftrace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/ftrace.h b/include/asm-x86/ftrace.h
index c184441..5c68b32 100644
--- a/include/asm-x86/ftrace.h
+++ b/include/asm-x86/ftrace.h
@@ -1,5 +1,5 @@
 #ifndef _ASM_X86_FTRACE
-#define _ASM_SPARC64_FTRACE
+#define _ASM_X86_FTRACE
 
 #ifdef CONFIG_FTRACE
 #define MCOUNT_ADDR		((long)(mcount))
-- 
cgit v1.1


From 32172561889868c0ea422ea8570f0413963a815f Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Thu, 17 Jul 2008 14:22:34 -0700
Subject: x86: suppress sparse returning void warnings

include/asm/paravirt.h:1404:2: warning: returning void-valued expression
include/asm/paravirt.h:1414:2: warning: returning void-valued expression

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/paravirt.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index b2aba8f..27c9f22 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -1401,7 +1401,7 @@ static inline int __raw_spin_is_contended(struct raw_spinlock *lock)
 
 static __always_inline void __raw_spin_lock(struct raw_spinlock *lock)
 {
-	return PVOP_VCALL1(pv_lock_ops.spin_lock, lock);
+	PVOP_VCALL1(pv_lock_ops.spin_lock, lock);
 }
 
 static __always_inline int __raw_spin_trylock(struct raw_spinlock *lock)
@@ -1411,7 +1411,7 @@ static __always_inline int __raw_spin_trylock(struct raw_spinlock *lock)
 
 static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock)
 {
-	return PVOP_VCALL1(pv_lock_ops.spin_unlock, lock);
+	PVOP_VCALL1(pv_lock_ops.spin_unlock, lock);
 }
 
 #endif
-- 
cgit v1.1


From 1f067167a83d1c7f80437fd1d32b55508aaca009 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 15 Jul 2008 00:02:28 -0700
Subject: x86: seperate memtest from init_64.c

it's separate functionality that deserves its own file.

This also prepares 32-bit memtest support.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/e820.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index 06633b0..16a31e2 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -90,6 +90,14 @@ static inline void e820_mark_nosave_regions(unsigned long limit_pfn)
 }
 #endif
 
+#ifdef CONFIG_MEMTEST
+extern void early_memtest(unsigned long start, unsigned long end);
+#else
+static inline void early_memtest(unsigned long start, unsigned long end)
+{
+}
+#endif
+
 extern unsigned long end_user_pfn;
 
 extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align);
-- 
cgit v1.1


From baa1318841d4bc95d783e6c15219b264720002c8 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Mon, 14 Jul 2008 18:44:51 +0100
Subject: x86: APIC: Make apic_verbosity unsigned

As a microoptimisation, make apic_verbosity unsigned.  This will make
apic_printk(APIC_QUIET, ...) expand into just printk(...) with the
surrounding condition and a reference to apic_verbosity removed.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/apic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index ea866ba..a3dd4c3 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -37,7 +37,7 @@ extern void generic_apic_probe(void);
 
 #ifdef CONFIG_X86_LOCAL_APIC
 
-extern int apic_verbosity;
+extern unsigned int apic_verbosity;
 extern int local_apic_timer_c2_ok;
 
 extern int ioapic_force;
-- 
cgit v1.1


From 35b680557f95564f70f21a8d3f5c72e101fab260 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Fri, 18 Jul 2008 01:47:44 +0100
Subject: x86: more apic debugging

[ mingo@elte.hu: picked up this patch from Maciej, lets make apic=debug
                 print out more info - we had a lot of APIC changes ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/apic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index a3dd4c3..b96460a 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -12,7 +12,7 @@
 
 #define ARCH_APICTIMER_STOPS_ON_C3	1
 
-#define Dprintk(x...)
+#define Dprintk printk
 
 /*
  * Debugging macros
-- 
cgit v1.1


From 8450e85399031a192ffb34f0f9ac981173db6a31 Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@mailshack.com>
Date: Sat, 5 Jul 2008 19:53:46 +0200
Subject: x86, cleanup: fix description of __fls(): __fls(0) is undefined

Ricardo M. Correia spotted that the use of __fls() in fls64() did
not seem to make sense. In fact fls64()'s implementation is fine,
but the description of __fls() was wrong. Fix that.

Reported-by: "Ricardo M. Correia" <Ricardo.M.Correia@Sun.COM>
Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/bitops.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h
index 96b1829..cfb2b64 100644
--- a/include/asm-x86/bitops.h
+++ b/include/asm-x86/bitops.h
@@ -356,7 +356,7 @@ static inline unsigned long ffz(unsigned long word)
  * __fls: find last set bit in word
  * @word: The word to search
  *
- * Undefined if no zero exists, so code should check against ~0UL first.
+ * Undefined if no set bit exists, so code should check against 0 first.
  */
 static inline unsigned long __fls(unsigned long word)
 {
-- 
cgit v1.1


From 7019cc2dd6fafcdc6b104005482dc910dcdbb797 Mon Sep 17 00:00:00 2001
From: Russ Anderson <rja@sgi.com>
Date: Wed, 9 Jul 2008 15:27:19 -0500
Subject: x86 BIOS interface for RTC on SGI UV

Real-time code needs to know the number of cycles per second
on SGI UV.  The information is provided via a run time BIOS
call.  This patch provides the linux side of that interface.
This is the first of several run time BIOS calls to be defined
in uv/bios.h and bios_uv.c.

Note that BIOS_CALL() is just a stub for now.  The bios
side is being worked on.

Signed-off-by: Russ Anderson <rja@sgi.com>
Cc: Jack Steiner <steiner@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/uv/bios.h | 68 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 include/asm-x86/uv/bios.h

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/uv/bios.h b/include/asm-x86/uv/bios.h
new file mode 100644
index 0000000..aa73362
--- /dev/null
+++ b/include/asm-x86/uv/bios.h
@@ -0,0 +1,68 @@
+#ifndef _ASM_X86_BIOS_H
+#define _ASM_X86_BIOS_H
+
+/*
+ * BIOS layer definitions.
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/rtc.h>
+
+#define BIOS_FREQ_BASE			0x01000001
+
+enum {
+	BIOS_FREQ_BASE_PLATFORM = 0,
+	BIOS_FREQ_BASE_INTERVAL_TIMER = 1,
+	BIOS_FREQ_BASE_REALTIME_CLOCK = 2
+};
+
+# define BIOS_CALL(result, a0, a1, a2, a3, a4, a5, a6, a7)		\
+	do {								\
+		/* XXX - the real call goes here */			\
+		result.status = BIOS_STATUS_UNIMPLEMENTED;		\
+		isrv.v0 = 0;						\
+		isrv.v1 = 0;						\
+	} while (0)
+
+enum {
+	BIOS_STATUS_SUCCESS		=  0,
+	BIOS_STATUS_UNIMPLEMENTED	= -1,
+	BIOS_STATUS_EINVAL		= -2,
+	BIOS_STATUS_ERROR		= -3
+};
+
+struct uv_bios_retval {
+	/*
+	 * A zero status value indicates call completed without error.
+	 * A negative status value indicates reason of call failure.
+	 * A positive status value indicates success but an
+	 * informational value should be printed (e.g., "reboot for
+	 * change to take effect").
+	 */
+	s64 status;
+	u64 v0;
+	u64 v1;
+	u64 v2;
+};
+
+extern long
+x86_bios_freq_base(unsigned long which, unsigned long *ticks_per_second,
+		   unsigned long *drift_info);
+extern const char *x86_bios_strerror(long status);
+
+#endif /* _ASM_X86_BIOS_H */
-- 
cgit v1.1


From 723edb5060855ef36ddeca51a070784b0e0d16df Mon Sep 17 00:00:00 2001
From: Herton Ronaldo Krzesinski <herton@mandriva.com.br>
Date: Mon, 14 Jul 2008 17:40:23 -0300
Subject: Fix typos from signal_32/64.h merge

Fallout from commit 33185c504f8e521b398536b5a8d415779a24593c ("x86:
merge signal_32/64.h")

Thanks to Dick Streefland who provided an useful testcase on
http://lkml.org/lkml/2008/3/17/205 (only applicable to 2.6.24.x), that
helped a lot as a deterministic way to bisect an issue that leaded to
this fix.

Signed-off-by: Herton Ronaldo Krzesinski <herton@mandriva.com.br>
Signed-off-by: Luiz Fernando N. Capitulino <lcapitulino@mandriva.com.br>
Cc: Roland McGrath <roland@redhat.com>
Cc: <stable@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/signal.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/signal.h b/include/asm-x86/signal.h
index f15186d..6dac493 100644
--- a/include/asm-x86/signal.h
+++ b/include/asm-x86/signal.h
@@ -181,12 +181,12 @@ typedef struct sigaltstack {
 #ifdef __KERNEL__
 #include <asm/sigcontext.h>
 
-#ifdef __386__
+#ifdef __i386__
 
 #define __HAVE_ARCH_SIG_BITOPS
 
 #define sigaddset(set,sig)		    \
-	(__builtin_constantp(sig)	    \
+	(__builtin_constant_p(sig)	    \
 	 ? __const_sigaddset((set), (sig))  \
 	 : __gen_sigaddset((set), (sig)))
 
-- 
cgit v1.1


From 9781f39fd209cd93ab98b669814191acc67f32fd Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Thu, 10 Jul 2008 17:13:19 +0200
Subject: x86: consolidate the definition of the force_mwait variable

The force_mwait variable iss defined either in
arch/x86/kernel/cpu/amd.c or in arch/x86/kernel/setup_64.c, but it is
only initialized and used in arch/x86/kernel/process.c. This patch
moves the declaration to arch/x86/kernel/process.c.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Cc: michael@free-electrons.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/processor.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 55402d2..15cb82a 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -722,8 +722,6 @@ static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
 
 extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
 
-extern int			force_mwait;
-
 extern void select_idle_routine(const struct cpuinfo_x86 *c);
 
 extern unsigned long		boot_option_idle_override;
-- 
cgit v1.1


From 6ac8d51f01d345af5ea4209004a9ea29b2f20891 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh <jaswinder@infradead.org>
Date: Tue, 15 Jul 2008 21:09:13 +0530
Subject: x86: introducing asm-x86/traps.h

Declaring x86 traps under one hood.
Declaring x86 do_traps before defining them.

Signed-off-by: Jaswinder Singh <jaswinder@infradead.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/traps.h | 66 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 include/asm-x86/traps.h

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/traps.h b/include/asm-x86/traps.h
new file mode 100644
index 0000000..a4b65a7
--- /dev/null
+++ b/include/asm-x86/traps.h
@@ -0,0 +1,66 @@
+#ifndef _ASM_X86_TRAPS_H
+#define _ASM_X86_TRAPS_H
+
+/* Common in X86_32 and X86_64 */
+asmlinkage void divide_error(void);
+asmlinkage void debug(void);
+asmlinkage void nmi(void);
+asmlinkage void int3(void);
+asmlinkage void overflow(void);
+asmlinkage void bounds(void);
+asmlinkage void invalid_op(void);
+asmlinkage void device_not_available(void);
+asmlinkage void coprocessor_segment_overrun(void);
+asmlinkage void invalid_TSS(void);
+asmlinkage void segment_not_present(void);
+asmlinkage void stack_segment(void);
+asmlinkage void general_protection(void);
+asmlinkage void page_fault(void);
+asmlinkage void coprocessor_error(void);
+asmlinkage void simd_coprocessor_error(void);
+asmlinkage void alignment_check(void);
+asmlinkage void spurious_interrupt_bug(void);
+#ifdef CONFIG_X86_MCE
+asmlinkage void machine_check(void);
+#endif /* CONFIG_X86_MCE */
+
+void do_divide_error(struct pt_regs *, long);
+void do_overflow(struct pt_regs *, long);
+void do_bounds(struct pt_regs *, long);
+void do_coprocessor_segment_overrun(struct pt_regs *, long);
+void do_invalid_TSS(struct pt_regs *, long);
+void do_segment_not_present(struct pt_regs *, long);
+void do_stack_segment(struct pt_regs *, long);
+void do_alignment_check(struct pt_regs *, long);
+void do_invalid_op(struct pt_regs *, long);
+void do_general_protection(struct pt_regs *, long);
+void do_nmi(struct pt_regs *, long);
+
+extern int panic_on_unrecovered_nmi;
+extern int kstack_depth_to_print;
+
+#ifdef CONFIG_X86_32
+
+void do_iret_error(struct pt_regs *, long);
+void do_int3(struct pt_regs *, long);
+void do_debug(struct pt_regs *, long);
+void math_error(void __user *);
+void do_coprocessor_error(struct pt_regs *, long);
+void do_simd_coprocessor_error(struct pt_regs *, long);
+void do_spurious_interrupt_bug(struct pt_regs *, long);
+unsigned long patch_espfix_desc(unsigned long, unsigned long);
+asmlinkage void math_emulate(long);
+
+#else /* CONFIG_X86_32 */
+
+asmlinkage void double_fault(void);
+
+asmlinkage void do_int3(struct pt_regs *, long);
+asmlinkage void do_stack_segment(struct pt_regs *, long);
+asmlinkage void do_debug(struct pt_regs *, unsigned long);
+asmlinkage void do_coprocessor_error(struct pt_regs *);
+asmlinkage void do_simd_coprocessor_error(struct pt_regs *);
+asmlinkage void do_spurious_interrupt_bug(struct pt_regs *);
+
+#endif /* CONFIG_X86_32 */
+#endif /* _ASM_X86_TRAPS_H */
-- 
cgit v1.1


From 08e1a13e7d14ba5d6a22bf4b8c6e11128d3bcdfe Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Fri, 18 Jul 2008 13:44:16 +0100
Subject: x86: reduce forbid_dac's visibility

It's not used anywhere outside its declaring file.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 include/asm-x86/dma-mapping.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h
index a1a4dc7..c2ddd3d 100644
--- a/include/asm-x86/dma-mapping.h
+++ b/include/asm-x86/dma-mapping.h
@@ -14,7 +14,6 @@ extern dma_addr_t bad_dma_address;
 extern int iommu_merge;
 extern struct device fallback_dev;
 extern int panic_on_overflow;
-extern int forbid_dac;
 extern int force_iommu;
 
 struct dma_mapping_ops {
-- 
cgit v1.1


From 08ad8afaa0f7343e9c64eec5dbbb178e390e03a2 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Fri, 18 Jul 2008 13:45:20 +0100
Subject: x86: reduce force_mwait visibility

It's not used anywhere outside its single referencing file.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 include/asm-x86/processor.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 55402d2..15cb82a 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -722,8 +722,6 @@ static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
 
 extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
 
-extern int			force_mwait;
-
 extern void select_idle_routine(const struct cpuinfo_x86 *c);
 
 extern unsigned long		boot_option_idle_override;
-- 
cgit v1.1


From 48fe4a76e27dc64b47f3d2a2af2b6bbf2b2f5b6b Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Fri, 18 Jul 2008 13:29:00 +0100
Subject: x86: i386: reduce boot fixmap space

As 256 entries are needed, aligning to a 256-entry boundary is
sufficient and still guarantees the single pte table requirement.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 include/asm-x86/fixmap_32.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/fixmap_32.h b/include/asm-x86/fixmap_32.h
index aae2f05..f1ac2b2 100644
--- a/include/asm-x86/fixmap_32.h
+++ b/include/asm-x86/fixmap_32.h
@@ -90,13 +90,13 @@ enum fixed_addresses {
 	 * 256 temporary boot-time mappings, used by early_ioremap(),
 	 * before ioremap() is functional.
 	 *
-	 * We round it up to the next 512 pages boundary so that we
+	 * We round it up to the next 256 pages boundary so that we
 	 * can have a single pgd entry and a single pte table:
 	 */
 #define NR_FIX_BTMAPS		64
 #define FIX_BTMAPS_NESTING	4
-	FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 512 -
-			(__end_of_permanent_fixed_addresses & 511),
+	FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 -
+			(__end_of_permanent_fixed_addresses & 255),
 	FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1,
 	FIX_WP_TEST,
 #ifdef CONFIG_ACPI
-- 
cgit v1.1


From 3c9cb6de1e5ad37d1558fdb0d9d2bed5a7bac0d9 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sat, 19 Jul 2008 02:07:25 -0700
Subject: x86: introduce x86_quirks

introduce x86_quirks array of boot-time quirk methods.

No change in functionality intended.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/setup.h | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h
index 90ab222..66191d0 100644
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -19,13 +19,17 @@ static inline int is_visws_box(void) { return 0; }
 /*
  * Any setup quirks to be performed?
  */
-extern int (*arch_time_init_quirk)(void);
-extern int (*arch_pre_intr_init_quirk)(void);
-extern int (*arch_intr_init_quirk)(void);
-extern int (*arch_trap_init_quirk)(void);
-extern char * (*arch_memory_setup_quirk)(void);
-extern int (*mach_get_smp_config_quirk)(unsigned int early);
-extern int (*mach_find_smp_config_quirk)(unsigned int reserve);
+struct x86_quirks {
+	int (*arch_time_init)(void);
+	int (*arch_pre_intr_init)(void);
+	int (*arch_intr_init)(void);
+	int (*arch_trap_init)(void);
+	char * (*arch_memory_setup)(void);
+	int (*mach_get_smp_config)(unsigned int early);
+	int (*mach_find_smp_config)(unsigned int reserve);
+};
+
+extern struct x86_quirks *x86_quirks;
 
 #ifndef CONFIG_PARAVIRT
 #define paravirt_post_allocator_init()	do {} while (0)
-- 
cgit v1.1


From 64898a8bad8c94ad7a4bd5cc86b66edfbb081f4a Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sat, 19 Jul 2008 18:01:16 -0700
Subject: x86: extend and use x86_quirks to clean up NUMAQ code

add these new x86_quirks methods:

	int *mpc_record;
	int (*mpc_apic_id)(struct mpc_config_processor *m);
	void (*mpc_oem_bus_info)(struct mpc_config_bus *m, char *name);
	void (*mpc_oem_pci_bus)(struct mpc_config_bus *m);
	void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable,
                                    unsigned short oemsize);

... and move NUMAQ related mps table handling to numaq_32.c.

also move the call to smp_read_mpc_oem() to smp_read_mpc() directly.

Should not change functionality, albeit it would be nice to get it
tested on real NUMAQ as well ...

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/mach-generic/mach_mpspec.h |  2 ++
 include/asm-x86/setup.h                    | 10 ++++++++++
 2 files changed, 12 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/mach-generic/mach_mpspec.h b/include/asm-x86/mach-generic/mach_mpspec.h
index 9ef0b94..c83c120 100644
--- a/include/asm-x86/mach-generic/mach_mpspec.h
+++ b/include/asm-x86/mach-generic/mach_mpspec.h
@@ -7,4 +7,6 @@
 /* Maximum 256 PCI busses, plus 1 ISA bus in each of 4 cabinets. */
 #define MAX_MP_BUSSES 260
 
+extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
+				char *productid);
 #endif /* __ASM_MACH_MPSPEC_H */
diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h
index 66191d0..2585075 100644
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -19,6 +19,9 @@ static inline int is_visws_box(void) { return 0; }
 /*
  * Any setup quirks to be performed?
  */
+struct mpc_config_processor;
+struct mpc_config_bus;
+struct mp_config_oemtable;
 struct x86_quirks {
 	int (*arch_time_init)(void);
 	int (*arch_pre_intr_init)(void);
@@ -27,6 +30,13 @@ struct x86_quirks {
 	char * (*arch_memory_setup)(void);
 	int (*mach_get_smp_config)(unsigned int early);
 	int (*mach_find_smp_config)(unsigned int reserve);
+
+	int *mpc_record;
+	int (*mpc_apic_id)(struct mpc_config_processor *m);
+	void (*mpc_oem_bus_info)(struct mpc_config_bus *m, char *name);
+	void (*mpc_oem_pci_bus)(struct mpc_config_bus *m);
+	void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable,
+                                    unsigned short oemsize);
 };
 
 extern struct x86_quirks *x86_quirks;
-- 
cgit v1.1


From 63b5d7af2556a7de6bf72c5dd0b85a32fb4c3767 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sat, 19 Jul 2008 18:02:26 -0700
Subject: x86: add ->pre_time_init to x86_quirks

so NUMAQ can use that to call numaq_pre_time_init()

This allows us to remove a NUMAQ special from arch/x86/kernel/setup.c.

(and paves the way to remove the NUMAQ subarch)

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/arch_hooks.h | 1 +
 include/asm-x86/setup.h      | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/arch_hooks.h b/include/asm-x86/arch_hooks.h
index 768aee8..8411750 100644
--- a/include/asm-x86/arch_hooks.h
+++ b/include/asm-x86/arch_hooks.h
@@ -21,6 +21,7 @@ extern void intr_init_hook(void);
 extern void pre_intr_init_hook(void);
 extern void pre_setup_arch_hook(void);
 extern void trap_init_hook(void);
+extern void pre_time_init_hook(void);
 extern void time_init_hook(void);
 extern void mca_nmi_hook(void);
 
diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h
index 2585075..f003cea 100644
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -23,6 +23,7 @@ struct mpc_config_processor;
 struct mpc_config_bus;
 struct mp_config_oemtable;
 struct x86_quirks {
+	int (*arch_pre_time_init)(void);
 	int (*arch_time_init)(void);
 	int (*arch_pre_intr_init)(void);
 	int (*arch_intr_init)(void);
-- 
cgit v1.1


From c4dc59ae7af8c1c116d2cb4dffba337f032a6bee Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 20 Jul 2008 09:31:24 +0200
Subject: x86, VisWS: turn into generic arch, eliminate leftover files

remove unused leftovers.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/mach-visws/entry_arch.h    | 5 -----
 include/asm-x86/mach-visws/mach_apic.h     | 1 -
 include/asm-x86/mach-visws/mach_apicdef.h  | 1 -
 include/asm-x86/mach-visws/setup_arch.h    | 1 -
 include/asm-x86/mach-visws/smpboot_hooks.h | 1 -
 5 files changed, 9 deletions(-)
 delete mode 100644 include/asm-x86/mach-visws/entry_arch.h
 delete mode 100644 include/asm-x86/mach-visws/mach_apic.h
 delete mode 100644 include/asm-x86/mach-visws/mach_apicdef.h
 delete mode 100644 include/asm-x86/mach-visws/setup_arch.h
 delete mode 100644 include/asm-x86/mach-visws/smpboot_hooks.h

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/mach-visws/entry_arch.h b/include/asm-x86/mach-visws/entry_arch.h
deleted file mode 100644
index 86be554..0000000
--- a/include/asm-x86/mach-visws/entry_arch.h
+++ /dev/null
@@ -1,5 +0,0 @@
-/*
- * VISWS uses the standard Linux entry points:
- */
-
-#include "../mach-default/entry_arch.h"
diff --git a/include/asm-x86/mach-visws/mach_apic.h b/include/asm-x86/mach-visws/mach_apic.h
deleted file mode 100644
index 6943e7a..0000000
--- a/include/asm-x86/mach-visws/mach_apic.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../mach-default/mach_apic.h"
diff --git a/include/asm-x86/mach-visws/mach_apicdef.h b/include/asm-x86/mach-visws/mach_apicdef.h
deleted file mode 100644
index 42711d1..0000000
--- a/include/asm-x86/mach-visws/mach_apicdef.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../mach-default/mach_apicdef.h"
diff --git a/include/asm-x86/mach-visws/setup_arch.h b/include/asm-x86/mach-visws/setup_arch.h
deleted file mode 100644
index fa4766c..0000000
--- a/include/asm-x86/mach-visws/setup_arch.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../mach-default/setup_arch.h"
diff --git a/include/asm-x86/mach-visws/smpboot_hooks.h b/include/asm-x86/mach-visws/smpboot_hooks.h
deleted file mode 100644
index e4433ca..0000000
--- a/include/asm-x86/mach-visws/smpboot_hooks.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../mach-default/smpboot_hooks.h"
-- 
cgit v1.1


From 94a1e869c7b96a9d30e260084866383a145fd8ae Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Fri, 18 Jul 2008 18:11:31 -0700
Subject: NR_CPUS: Replace per_cpu(..., smp_processor_id()) with __get_cpu_var

  * Slight optimization when getting one's own cpu_info percpu data.

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/processor.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 55402d2..f92315e 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -134,7 +134,7 @@ extern __u32			cleared_cpu_caps[NCAPINTS];
 #ifdef CONFIG_SMP
 DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info);
 #define cpu_data(cpu)		per_cpu(cpu_info, cpu)
-#define current_cpu_data	cpu_data(smp_processor_id())
+#define current_cpu_data	__get_cpu_var(cpu_info)
 #else
 #define cpu_data(cpu)		boot_cpu_data
 #define current_cpu_data	boot_cpu_data
-- 
cgit v1.1


From 31656519e132f6612584815f128c83976a9aaaef Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 18 Jul 2008 18:01:23 +0200
Subject: sched, x86: clean up hrtick implementation

random uvesafb failures were reported against Gentoo:

  http://bugs.gentoo.org/show_bug.cgi?id=222799

and Mihai Moldovan bisected it back to:

> 8f4d37ec073c17e2d4aa8851df5837d798606d6f is first bad commit
> commit 8f4d37ec073c17e2d4aa8851df5837d798606d6f
> Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
> Date:   Fri Jan 25 21:08:29 2008 +0100
>
>    sched: high-res preemption tick

Linus suspected it to be hrtick + vm86 interaction and observed:

> Btw, Peter, Ingo: I think that commit is doing bad things. They aren't
> _incorrect_ per se, but they are definitely bad.
>
> Why?
>
> Using random _TIF_WORK_MASK flags is really impolite for doing
> "scheduling" work. There's a reason that arch/x86/kernel/entry_32.S
> special-cases the _TIF_NEED_RESCHED flag: we don't want to exit out of
> vm86 mode unnecessarily.
>
> See the "work_notifysig_v86" label, and how it does that
> "save_v86_state()" thing etc etc.

Right, I never liked having to fiddle with those TIF flags. Initially I
needed it because the hrtimer base lock could not nest in the rq lock.
That however is fixed these days.

Currently the only reason left to fiddle with the TIF flags is remote
wakeups. We cannot program a remote cpu's hrtimer. I've been thinking
about using the new and improved IPI function call stuff to implement
hrtimer_start_on().

However that does require that smp_call_function_single(.wait=0) works
from interrupt context - /me looks at the latest series from Jens - Yes
that does seem to be supported, good.

Here's a stab at cleaning this stuff up ...

Mihai reported test success as well.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Tested-by: Mihai Moldovan <ionic@ionic.de>
Cc: Michal Januszewski <spock@gentoo.org>
Cc: Antonino Daplas <adaplas@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/thread_info.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
index 895339d..d701263 100644
--- a/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h
@@ -81,7 +81,6 @@ struct thread_info {
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 #define TIF_SECCOMP		8	/* secure computing */
 #define TIF_MCE_NOTIFY		10	/* notify userspace of an MCE */
-#define TIF_HRTICK_RESCHED	11	/* reprogram hrtick timer */
 #define TIF_NOTSC		16	/* TSC is not accessible in userland */
 #define TIF_IA32		17	/* 32bit process */
 #define TIF_FORK		18	/* ret_from_fork */
@@ -108,7 +107,6 @@ struct thread_info {
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
 #define _TIF_MCE_NOTIFY		(1 << TIF_MCE_NOTIFY)
-#define _TIF_HRTICK_RESCHED	(1 << TIF_HRTICK_RESCHED)
 #define _TIF_NOTSC		(1 << TIF_NOTSC)
 #define _TIF_IA32		(1 << TIF_IA32)
 #define _TIF_FORK		(1 << TIF_FORK)
@@ -132,7 +130,7 @@ struct thread_info {
 
 /* Only used for 64 bit */
 #define _TIF_DO_NOTIFY_MASK						\
-	(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED)
+	(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY)
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW							\
-- 
cgit v1.1


From d2ebb4103ff349af6dac14955bf93e57487a6694 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 30 Apr 2008 17:56:04 +0200
Subject: KVM: SVM: add tracing support for TDP page faults

To distinguish between real page faults and nested page faults they should be
traced as different events. This is implemented by this patch.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 include/asm-x86/kvm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/kvm.h b/include/asm-x86/kvm.h
index 80eefef..6f18408 100644
--- a/include/asm-x86/kvm.h
+++ b/include/asm-x86/kvm.h
@@ -228,5 +228,6 @@ struct kvm_pit_state {
 #define KVM_TRC_CLTS             (KVM_TRC_HANDLER + 0x12)
 #define KVM_TRC_LMSW             (KVM_TRC_HANDLER + 0x13)
 #define KVM_TRC_APIC_ACCESS      (KVM_TRC_HANDLER + 0x14)
+#define KVM_TRC_TDP_FAULT        (KVM_TRC_HANDLER + 0x15)
 
 #endif
-- 
cgit v1.1


From 1b7fcd3263e5f12dba43d27b64e1578bec070c28 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Thu, 15 May 2008 13:51:35 +0300
Subject: KVM: MMU: Fix false flooding when a pte points to page table

The KVM MMU tries to detect when a speculative pte update is not actually
used by demand fault, by checking the accessed bit of the shadow pte.  If
the shadow pte has not been accessed, we deem that page table flooded and
remove the shadow page table, allowing further pte updates to proceed
without emulation.

However, if the pte itself points at a page table and only used for write
operations, the accessed bit will never be set since all access will happen
through the emulator.

This is exactly what happens with kscand on old (2.4.x) HIGHMEM kernels.
The kernel points a kmap_atomic() pte at a page table, and then
proceeds with read-modify-write operations to look at the dirty and accessed
bits.  We get a false flood trigger on the kmap ptes, which results in the
mmu spending all its time setting up and tearing down shadows.

Fix by setting the shadow accessed bit on emulated accesses.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 include/asm-x86/kvm_host.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 844f2a8..c2d066e 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -243,6 +243,7 @@ struct kvm_vcpu_arch {
 	gfn_t last_pt_write_gfn;
 	int   last_pt_write_count;
 	u64  *last_pte_updated;
+	gfn_t last_pte_gfn;
 
 	struct {
 		gfn_t gfn;	/* presumed gfn during guest pte update */
-- 
cgit v1.1


From 4ecac3fd6dc2629ad76a658a486f081c44aef10e Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Tue, 13 May 2008 13:23:38 +0300
Subject: KVM: Handle virtualization instruction #UD faults during reboot

KVM turns off hardware virtualization extensions during reboot, in order
to disassociate the memory used by the virtualization extensions from the
processor, and in order to have the system in a consistent state.
Unfortunately virtual machines may still be running while this goes on,
and once virtualization extensions are turned off, any virtulization
instruction will #UD on execution.

Fix by adding an exception handler to virtualization instructions; if we get
an exception during reboot, we simply spin waiting for the reset to complete.
If it's a true exception, BUG() so we can have our stack trace.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 include/asm-x86/kvm_host.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index c2d066e..0df9d5f 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -692,4 +692,28 @@ enum {
 	trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
 						vcpu, 0, 0, 0, 0, 0, 0)
 
+#ifdef CONFIG_64BIT
+#define KVM_EX_ENTRY ".quad"
+#else
+#define KVM_EX_ENTRY ".long"
+#endif
+
+/*
+ * Hardware virtualization extension instructions may fault if a
+ * reboot turns off virtualization while processes are running.
+ * Trap the fault and ignore the instruction if that happens.
+ */
+asmlinkage void kvm_handle_fault_on_reboot(void);
+
+#define __kvm_handle_fault_on_reboot(insn) \
+	"666: " insn "\n\t" \
+	".pushsection .text.fixup, \"ax\" \n" \
+	"667: \n\t" \
+	"push $666b \n\t" \
+	"jmp kvm_handle_fault_on_reboot \n\t" \
+	".popsection \n\t" \
+	".pushsection __ex_table, \"a\" \n\t" \
+	KVM_EX_ENTRY " 666b, 667b \n\t" \
+	".popsection"
+
 #endif
-- 
cgit v1.1


From 7cc8883074b040aa8c1ebd3a17463b0ea3a9ef16 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Tue, 13 May 2008 16:29:20 +0300
Subject: KVM: Remove decache_vcpus_on_cpu() and related callbacks

Obsoleted by the vmx-specific per-cpu list.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 include/asm-x86/kvm_host.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 0df9d5f..4bcdc7d 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -380,7 +380,6 @@ struct kvm_x86_ops {
 	void (*prepare_guest_switch)(struct kvm_vcpu *vcpu);
 	void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
 	void (*vcpu_put)(struct kvm_vcpu *vcpu);
-	void (*vcpu_decache)(struct kvm_vcpu *vcpu);
 
 	int (*set_guest_debug)(struct kvm_vcpu *vcpu,
 			       struct kvm_debug_guest *dbg);
-- 
cgit v1.1


From 3419ffc8e45a5344abc87684cbca6cdc5c9c8a01 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng.yang@intel.com>
Date: Thu, 15 May 2008 09:52:48 +0800
Subject: KVM: IOAPIC/LAPIC: Enable NMI support

[avi: fix ia64 build breakage]

Signed-off-by: Sheng Yang <sheng.yang@intel.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 include/asm-x86/kvm_host.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 4bcdc7d..b666219 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -288,6 +288,8 @@ struct kvm_vcpu_arch {
 	unsigned int hv_clock_tsc_khz;
 	unsigned int time_offset;
 	struct page *time_page;
+
+	bool nmi_pending;
 };
 
 struct kvm_mem_alias {
@@ -515,6 +517,8 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
 void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
 			   u32 error_code);
 
+void kvm_inject_nmi(struct kvm_vcpu *vcpu);
+
 void fx_init(struct kvm_vcpu *vcpu);
 
 int emulator_read_std(unsigned long addr,
-- 
cgit v1.1


From f08864b42a45581a64558aa5b6b673c77b97ee5d Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng.yang@intel.com>
Date: Thu, 15 May 2008 18:23:25 +0800
Subject: KVM: VMX: Enable NMI with in-kernel irqchip

Signed-off-by: Sheng Yang <sheng.yang@intel.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 include/asm-x86/kvm_host.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index b666219..bacb1e2 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -347,6 +347,7 @@ struct kvm_vcpu_stat {
 	u32 mmio_exits;
 	u32 signal_exits;
 	u32 irq_window_exits;
+	u32 nmi_window_exits;
 	u32 halt_exits;
 	u32 halt_wakeup;
 	u32 request_irq_exits;
-- 
cgit v1.1


From 81609e3e26508840a1b51414376f2541dd191483 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Tue, 27 May 2008 16:26:01 +0300
Subject: KVM: Order segment register constants in the same way as cpu operand
 encoding

This can be used to simplify the x86 instruction decoder.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 include/asm-x86/kvm_host.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index bacb1e2..075598b 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -109,12 +109,12 @@ enum {
 };
 
 enum {
+	VCPU_SREG_ES,
 	VCPU_SREG_CS,
+	VCPU_SREG_SS,
 	VCPU_SREG_DS,
-	VCPU_SREG_ES,
 	VCPU_SREG_FS,
 	VCPU_SREG_GS,
-	VCPU_SREG_SS,
 	VCPU_SREG_TR,
 	VCPU_SREG_LDTR,
 };
-- 
cgit v1.1


From 9ba075a664dff836fd6fb93f90fcc827f7683d91 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Mon, 26 May 2008 20:06:35 +0300
Subject: KVM: MTRR support

Add emulation for the memory type range registers, needed by VMware esx 3.5,
and by pci device assignment.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 include/asm-x86/kvm_host.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 075598b..fc72bad 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -79,6 +79,7 @@
 #define KVM_MIN_FREE_MMU_PAGES 5
 #define KVM_REFILL_PAGES 25
 #define KVM_MAX_CPUID_ENTRIES 40
+#define KVM_NR_VAR_MTRR 8
 
 extern spinlock_t kvm_lock;
 extern struct list_head vm_list;
@@ -290,6 +291,8 @@ struct kvm_vcpu_arch {
 	struct page *time_page;
 
 	bool nmi_pending;
+
+	u64 mtrr[0x100];
 };
 
 struct kvm_mem_alias {
-- 
cgit v1.1


From 3e6e0aab1ba1e8b354ce01f5659336f9aee69437 Mon Sep 17 00:00:00 2001
From: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Date: Tue, 27 May 2008 10:18:46 +0200
Subject: KVM: Prefixes segment functions that will be exported with "kvm_"

Prefixes functions that will be exported with kvm_.
We also prefixed set_segment() even if it still static
to be coherent.

signed-off-by: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Signed-off-by: Laurent Vivier <laurent.vivier@bull.net>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 include/asm-x86/kvm_host.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index fc72bad..cd6a4bb 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -503,6 +503,10 @@ int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
 int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
 		    unsigned long value);
 
+void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
+int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
+				int type_bits, int seg);
+
 int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason);
 
 void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
-- 
cgit v1.1


From 542472b53ea9e0add0ba23976018210191d84754 Mon Sep 17 00:00:00 2001
From: Laurent Vivier <Laurent.Vivier@bull.net>
Date: Fri, 30 May 2008 16:05:55 +0200
Subject: KVM: Add coalesced MMIO support (x86 part)

This patch enables coalesced MMIO for x86 architecture.
It defines KVM_MMIO_PAGE_OFFSET and KVM_CAP_COALESCED_MMIO.
It enables the compilation of coalesced_mmio.c.

Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 include/asm-x86/kvm_host.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index cd6a4bb..c64d124 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -27,6 +27,7 @@
 #define KVM_PRIVATE_MEM_SLOTS 4
 
 #define KVM_PIO_PAGE_OFFSET 1
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
 
 #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1)
 #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD))
-- 
cgit v1.1


From f5b4edcd52e78556800f90d08bfc9126416ac82f Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Sun, 15 Jun 2008 22:09:11 -0700
Subject: KVM: x86 emulator: simplify rip relative decoding

rip relative decoding is relative to the instruction pointer of the next
instruction; by moving address adjustment until after decoding is complete,
we remove the need to determine the instruction size.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 include/asm-x86/kvm_x86_emulate.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/kvm_x86_emulate.h b/include/asm-x86/kvm_x86_emulate.h
index b877bbd..9fda4b3 100644
--- a/include/asm-x86/kvm_x86_emulate.h
+++ b/include/asm-x86/kvm_x86_emulate.h
@@ -134,6 +134,7 @@ struct decode_cache {
 	u8 modrm_reg;
 	u8 modrm_rm;
 	u8 use_modrm_ea;
+	bool rip_relative;
 	unsigned long modrm_ea;
 	void *modrm_ptr;
 	unsigned long modrm_val;
-- 
cgit v1.1


From 7a5b56dfd3a682a51fc84682290d5147872a8e99 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Sun, 22 Jun 2008 16:22:51 +0300
Subject: KVM: x86 emulator: lazily evaluate segment registers

Instead of prefetching all segment bases before emulation, read them at the
last moment.  Since most of them are unneeded, we save some cycles on
Intel machines where this is a bit expensive.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 include/asm-x86/kvm_x86_emulate.h | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/kvm_x86_emulate.h b/include/asm-x86/kvm_x86_emulate.h
index 9fda4b3..4e8c1e4 100644
--- a/include/asm-x86/kvm_x86_emulate.h
+++ b/include/asm-x86/kvm_x86_emulate.h
@@ -124,7 +124,8 @@ struct decode_cache {
 	u8 rex_prefix;
 	struct operand src;
 	struct operand dst;
-	unsigned long *override_base;
+	bool has_seg_override;
+	u8 seg_override;
 	unsigned int d;
 	unsigned long regs[NR_VCPU_REGS];
 	unsigned long eip;
@@ -151,12 +152,7 @@ struct x86_emulate_ctxt {
 	/* Emulated execution mode, represented by an X86EMUL_MODE value. */
 	int mode;
 
-	unsigned long cs_base;
-	unsigned long ds_base;
-	unsigned long es_base;
-	unsigned long ss_base;
-	unsigned long gs_base;
-	unsigned long fs_base;
+	u32 cs_base;
 
 	/* decode cache */
 
-- 
cgit v1.1


From d6e88aec07aa8f6c7e4024f5734ec659fd7c5a40 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Thu, 10 Jul 2008 16:53:33 +0300
Subject: KVM: Prefix some x86 low level function with kvm_, to avoid namespace
 issues

Fixes compilation with CONFIG_VMI enabled.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 include/asm-x86/kvm_host.h | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index c64d124..f995783 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -567,55 +567,53 @@ static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
 	return (struct kvm_mmu_page *)page_private(page);
 }
 
-static inline u16 read_fs(void)
+static inline u16 kvm_read_fs(void)
 {
 	u16 seg;
 	asm("mov %%fs, %0" : "=g"(seg));
 	return seg;
 }
 
-static inline u16 read_gs(void)
+static inline u16 kvm_read_gs(void)
 {
 	u16 seg;
 	asm("mov %%gs, %0" : "=g"(seg));
 	return seg;
 }
 
-static inline u16 read_ldt(void)
+static inline u16 kvm_read_ldt(void)
 {
 	u16 ldt;
 	asm("sldt %0" : "=g"(ldt));
 	return ldt;
 }
 
-static inline void load_fs(u16 sel)
+static inline void kvm_load_fs(u16 sel)
 {
 	asm("mov %0, %%fs" : : "rm"(sel));
 }
 
-static inline void load_gs(u16 sel)
+static inline void kvm_load_gs(u16 sel)
 {
 	asm("mov %0, %%gs" : : "rm"(sel));
 }
 
-#ifndef load_ldt
-static inline void load_ldt(u16 sel)
+static inline void kvm_load_ldt(u16 sel)
 {
 	asm("lldt %0" : : "rm"(sel));
 }
-#endif
 
-static inline void get_idt(struct descriptor_table *table)
+static inline void kvm_get_idt(struct descriptor_table *table)
 {
 	asm("sidt %0" : "=m"(*table));
 }
 
-static inline void get_gdt(struct descriptor_table *table)
+static inline void kvm_get_gdt(struct descriptor_table *table)
 {
 	asm("sgdt %0" : "=m"(*table));
 }
 
-static inline unsigned long read_tr_base(void)
+static inline unsigned long kvm_read_tr_base(void)
 {
 	u16 tr;
 	asm("str %0" : "=g"(tr));
@@ -632,17 +630,17 @@ static inline unsigned long read_msr(unsigned long msr)
 }
 #endif
 
-static inline void fx_save(struct i387_fxsave_struct *image)
+static inline void kvm_fx_save(struct i387_fxsave_struct *image)
 {
 	asm("fxsave (%0)":: "r" (image));
 }
 
-static inline void fx_restore(struct i387_fxsave_struct *image)
+static inline void kvm_fx_restore(struct i387_fxsave_struct *image)
 {
 	asm("fxrstor (%0)":: "r" (image));
 }
 
-static inline void fx_finit(void)
+static inline void kvm_fx_finit(void)
 {
 	asm("finit");
 }
-- 
cgit v1.1


From 33a37eb411d193851c334060780ab834ba534292 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 21 Jul 2008 10:57:15 +0200
Subject: KVM: fix exception entry / build bug, on 64-bit

-tip testing found this build bug:

 arch/x86/kvm/built-in.o:(.text.fixup+0x1): relocation truncated to fit: R_X86_64_32 against `.text'
 arch/x86/kvm/built-in.o:(.text.fixup+0xb): relocation truncated to fit: R_X86_64_32 against `.text'
 arch/x86/kvm/built-in.o:(.text.fixup+0x15): relocation truncated to fit: R_X86_64_32 against `.text'
 arch/x86/kvm/built-in.o:(.text.fixup+0x1f): relocation truncated to fit: R_X86_64_32 against `.text'
 arch/x86/kvm/built-in.o:(.text.fixup+0x29): relocation truncated to fit: R_X86_64_32 against `.text'

Introduced by commit 4ecac3fd. The problem is that 'push' will default
to 32-bit, which is not wide enough as a fixup address. (and which would
crash on any real fixup event even if it was wide enough)

Introduce KVM_EX_PUSH to get the proper address push width on 64-bit too.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/kvm_host.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index f995783..fdde0be 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -703,9 +703,11 @@ enum {
 						vcpu, 0, 0, 0, 0, 0, 0)
 
 #ifdef CONFIG_64BIT
-#define KVM_EX_ENTRY ".quad"
+# define KVM_EX_ENTRY ".quad"
+# define KVM_EX_PUSH "pushq"
 #else
-#define KVM_EX_ENTRY ".long"
+# define KVM_EX_ENTRY ".long"
+# define KVM_EX_PUSH "pushl"
 #endif
 
 /*
@@ -719,7 +721,7 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
 	"666: " insn "\n\t" \
 	".pushsection .text.fixup, \"ax\" \n" \
 	"667: \n\t" \
-	"push $666b \n\t" \
+	KVM_EX_PUSH " $666b \n\t" \
 	"jmp kvm_handle_fault_on_reboot \n\t" \
 	".popsection \n\t" \
 	".pushsection __ex_table, \"a\" \n\t" \
-- 
cgit v1.1


From cfc1b9a6a683c835a20d5b565ade55baf639f72f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 21 Jul 2008 21:35:38 +0200
Subject: x86: convert Dprintk to pr_debug

There are a couple of places where (P)Dprintk is used which is an old
compile time enabled printk wrapper. Convert it to the generic
pr_debug().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/apic.h                       | 2 --
 include/asm-x86/mach-default/smpboot_hooks.h | 6 +++---
 2 files changed, 3 insertions(+), 5 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index b96460a..133c998 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -12,8 +12,6 @@
 
 #define ARCH_APICTIMER_STOPS_ON_C3	1
 
-#define Dprintk printk
-
 /*
  * Debugging macros
  */
diff --git a/include/asm-x86/mach-default/smpboot_hooks.h b/include/asm-x86/mach-default/smpboot_hooks.h
index 56d001b..dbab36d 100644
--- a/include/asm-x86/mach-default/smpboot_hooks.h
+++ b/include/asm-x86/mach-default/smpboot_hooks.h
@@ -12,11 +12,11 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
 {
 	CMOS_WRITE(0xa, 0xf);
 	local_flush_tlb();
-	Dprintk("1.\n");
+	pr_debug("1.\n");
 	*((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
-	Dprintk("2.\n");
+	pr_debug("2.\n");
 	*((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
-	Dprintk("3.\n");
+	pr_debug("3.\n");
 }
 
 static inline void smpboot_restore_warm_reset_vector(void)
-- 
cgit v1.1


From c2e3277f875b83e5adc34e96989d6d87ec5f80f7 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 22 Jul 2008 15:40:46 +1000
Subject: x86: fix pte_flags() to only return flags, fix lguest (updated)

(Jeremy said:
	rusty: use PTE_MASK
	rusty: use PTE_MASK
	rusty: use PTE_MASK
 When I asked:
	jsgf: does that include the NX flag?
 He responded eloquently:
	rusty: use PTE_MASK
	rusty: use PTE_MASK
	yes, it's the official constant of masking flags out of ptes
)

Change a15af1c9ea2750a9ff01e51615c45950bad8221b 'x86/paravirt: add
pte_flags to just get pte flags' removed lguest's private pte_flags()
in favor of a generic one.

Unfortunately, the generic one doesn't filter out the non-flags bits:
this results in lguest creating corrupt shadow page tables and blowing
up host memory.

Since noone is supposed to use the pfn part of pte_flags(), it seems
safest to always do the filtering.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-and-morning-tea-spilled-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/page.h     | 7 ++++++-
 include/asm-x86/paravirt.h | 3 +++
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index 28d7b45..05d9bea 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -144,6 +144,11 @@ static inline pteval_t native_pte_val(pte_t pte)
 	return pte.pte;
 }
 
+static inline pteval_t native_pte_flags(pte_t pte)
+{
+	return native_pte_val(pte) & ~PTE_MASK;
+}
+
 #define pgprot_val(x)	((x).pgprot)
 #define __pgprot(x)	((pgprot_t) { (x) } )
 
@@ -165,7 +170,7 @@ static inline pteval_t native_pte_val(pte_t pte)
 #endif
 
 #define pte_val(x)	native_pte_val(x)
-#define pte_flags(x)	native_pte_val(x)
+#define pte_flags(x)	native_pte_flags(x)
 #define __pte(x)	native_make_pte(x)
 
 #endif	/* CONFIG_PARAVIRT */
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index aec9767..5ca4639 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -1088,6 +1088,9 @@ static inline pteval_t pte_flags(pte_t pte)
 		ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_flags,
 				 pte.pte);
 
+#ifdef CONFIG_PARAVIRT_DEBUG
+	BUG_ON(ret & PTE_MASK);
+#endif
 	return ret;
 }
 
-- 
cgit v1.1


From 59438c9fc4f7a92c808c9049bc6b396f98bf954c Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 21 Jul 2008 22:59:42 -0700
Subject: x86: rename PTE_MASK to PTE_PFN_MASK

Rusty, in his peevish way, complained that macros defining constants
should have a name which somewhat accurately reflects the actual
purpose of the constant.

Aside from the fact that PTE_MASK gives no clue as to what's actually
being masked, and is misleadingly similar to the functionally entirely
different PMD_MASK, PUD_MASK and PGD_MASK, I don't really see what the
problem is.

But if this patch silences the incessent noise, then it will have
achieved its goal (TODO: write test-case).

Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/page.h           |  6 +++---
 include/asm-x86/paravirt.h       |  2 +-
 include/asm-x86/pgtable-3level.h |  8 ++++----
 include/asm-x86/pgtable.h        |  4 ++--
 include/asm-x86/pgtable_32.h     |  4 ++--
 include/asm-x86/pgtable_64.h     | 10 +++++-----
 include/asm-x86/xen/page.h       |  2 +-
 7 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index 05d9bea..e99fb9f 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -18,8 +18,8 @@
    (ie, 32-bit PAE). */
 #define PHYSICAL_PAGE_MASK	(((signed long)PAGE_MASK) & __PHYSICAL_MASK)
 
-/* PTE_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */
-#define PTE_MASK		((pteval_t)PHYSICAL_PAGE_MASK)
+/* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */
+#define PTE_PFN_MASK		((pteval_t)PHYSICAL_PAGE_MASK)
 
 #define PMD_PAGE_SIZE		(_AC(1, UL) << PMD_SHIFT)
 #define PMD_PAGE_MASK		(~(PMD_PAGE_SIZE-1))
@@ -146,7 +146,7 @@ static inline pteval_t native_pte_val(pte_t pte)
 
 static inline pteval_t native_pte_flags(pte_t pte)
 {
-	return native_pte_val(pte) & ~PTE_MASK;
+	return native_pte_val(pte) & ~PTE_PFN_MASK;
 }
 
 #define pgprot_val(x)	((x).pgprot)
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 5ca4639..fbbde93 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -1089,7 +1089,7 @@ static inline pteval_t pte_flags(pte_t pte)
 				 pte.pte);
 
 #ifdef CONFIG_PARAVIRT_DEBUG
-	BUG_ON(ret & PTE_MASK);
+	BUG_ON(ret & PTE_PFN_MASK);
 #endif
 	return ret;
 }
diff --git a/include/asm-x86/pgtable-3level.h b/include/asm-x86/pgtable-3level.h
index c93dbb6..105057f 100644
--- a/include/asm-x86/pgtable-3level.h
+++ b/include/asm-x86/pgtable-3level.h
@@ -25,7 +25,7 @@ static inline int pud_none(pud_t pud)
 
 static inline int pud_bad(pud_t pud)
 {
-	return (pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER)) != 0;
+	return (pud_val(pud) & ~(PTE_PFN_MASK | _KERNPG_TABLE | _PAGE_USER)) != 0;
 }
 
 static inline int pud_present(pud_t pud)
@@ -120,9 +120,9 @@ static inline void pud_clear(pud_t *pudp)
 		write_cr3(pgd);
 }
 
-#define pud_page(pud) ((struct page *) __va(pud_val(pud) & PTE_MASK))
+#define pud_page(pud) ((struct page *) __va(pud_val(pud) & PTE_PFN_MASK))
 
-#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PTE_MASK))
+#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PTE_PFN_MASK))
 
 
 /* Find an entry in the second-level page table.. */
@@ -160,7 +160,7 @@ static inline int pte_none(pte_t pte)
 
 static inline unsigned long pte_pfn(pte_t pte)
 {
-	return (pte_val(pte) & PTE_MASK) >> PAGE_SHIFT;
+	return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT;
 }
 
 /*
diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 96aa76e..2b1746c 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -53,7 +53,7 @@
 			 _PAGE_DIRTY)
 
 /* Set of bits not changed in pte_modify */
-#define _PAGE_CHG_MASK	(PTE_MASK | _PAGE_PCD | _PAGE_PWT |		\
+#define _PAGE_CHG_MASK	(PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT |		\
 			 _PAGE_ACCESSED | _PAGE_DIRTY)
 
 #define _PAGE_CACHE_MASK	(_PAGE_PCD | _PAGE_PWT)
@@ -286,7 +286,7 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
 	return __pgprot(preservebits | addbits);
 }
 
-#define pte_pgprot(x) __pgprot(pte_flags(x) & ~PTE_MASK)
+#define pte_pgprot(x) __pgprot(pte_flags(x) & ~PTE_PFN_MASK)
 
 #define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask)
 
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index 0611abf..525b53e 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -88,7 +88,7 @@ extern unsigned long pg0[];
 /* To avoid harmful races, pmd_none(x) should check only the lower when PAE */
 #define pmd_none(x)	(!(unsigned long)pmd_val((x)))
 #define pmd_present(x)	(pmd_val((x)) & _PAGE_PRESENT)
-#define pmd_bad(x) ((pmd_val(x) & (~PTE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
+#define pmd_bad(x) ((pmd_val(x) & (~PTE_PFN_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
 
 #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
 
@@ -139,7 +139,7 @@ static inline int pud_large(pud_t pud) { return 0; }
 #define pmd_page(pmd) (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT))
 
 #define pmd_page_vaddr(pmd)					\
-	((unsigned long)__va(pmd_val((pmd)) & PTE_MASK))
+	((unsigned long)__va(pmd_val((pmd)) & PTE_PFN_MASK))
 
 #if defined(CONFIG_HIGHPTE)
 #define pte_offset_map(dir, address)					\
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 805d312..ac5fff4 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -158,17 +158,17 @@ static inline void native_pgd_clear(pgd_t *pgd)
 
 static inline int pgd_bad(pgd_t pgd)
 {
-	return (pgd_val(pgd) & ~(PTE_MASK | _PAGE_USER)) != _KERNPG_TABLE;
+	return (pgd_val(pgd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE;
 }
 
 static inline int pud_bad(pud_t pud)
 {
-	return (pud_val(pud) & ~(PTE_MASK | _PAGE_USER)) != _KERNPG_TABLE;
+	return (pud_val(pud) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE;
 }
 
 static inline int pmd_bad(pmd_t pmd)
 {
-	return (pmd_val(pmd) & ~(PTE_MASK | _PAGE_USER)) != _KERNPG_TABLE;
+	return (pmd_val(pmd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE;
 }
 
 #define pte_none(x)	(!pte_val((x)))
@@ -193,7 +193,7 @@ static inline int pmd_bad(pmd_t pmd)
  * Level 4 access.
  */
 #define pgd_page_vaddr(pgd)						\
-	((unsigned long)__va((unsigned long)pgd_val((pgd)) & PTE_MASK))
+	((unsigned long)__va((unsigned long)pgd_val((pgd)) & PTE_PFN_MASK))
 #define pgd_page(pgd)		(pfn_to_page(pgd_val((pgd)) >> PAGE_SHIFT))
 #define pgd_present(pgd) (pgd_val(pgd) & _PAGE_PRESENT)
 static inline int pgd_large(pgd_t pgd) { return 0; }
@@ -216,7 +216,7 @@ static inline int pud_large(pud_t pte)
 }
 
 /* PMD  - Level 2 access */
-#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val((pmd)) & PTE_MASK))
+#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val((pmd)) & PTE_PFN_MASK))
 #define pmd_page(pmd)		(pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT))
 
 #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h
index 05e678a..7b3835d 100644
--- a/include/asm-x86/xen/page.h
+++ b/include/asm-x86/xen/page.h
@@ -124,7 +124,7 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
 
 static inline unsigned long pte_mfn(pte_t pte)
 {
-	return (pte.pte & PTE_MASK) >> PAGE_SHIFT;
+	return (pte.pte & PTE_PFN_MASK) >> PAGE_SHIFT;
 }
 
 static inline pte_t mfn_pte(unsigned long page_nr, pgprot_t pgprot)
-- 
cgit v1.1


From 77be1fabd024b37423d12f832b1fbdb95dbdf494 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 21 Jul 2008 22:59:56 -0700
Subject: x86: add PTE_FLAGS_MASK

PTE_PFN_MASK was getting lonely, so I made it a friend.

Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/page.h       | 5 ++++-
 include/asm-x86/pgtable.h    | 2 +-
 include/asm-x86/pgtable_32.h | 2 +-
 3 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index e99fb9f..6c84622 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -21,6 +21,9 @@
 /* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */
 #define PTE_PFN_MASK		((pteval_t)PHYSICAL_PAGE_MASK)
 
+/* PTE_FLAGS_MASK extracts the flags from a (pte|pmd|pud|pgd)val_t */
+#define PTE_FLAGS_MASK		(~PTE_PFN_MASK)
+
 #define PMD_PAGE_SIZE		(_AC(1, UL) << PMD_SHIFT)
 #define PMD_PAGE_MASK		(~(PMD_PAGE_SIZE-1))
 
@@ -146,7 +149,7 @@ static inline pteval_t native_pte_val(pte_t pte)
 
 static inline pteval_t native_pte_flags(pte_t pte)
 {
-	return native_pte_val(pte) & ~PTE_PFN_MASK;
+	return native_pte_val(pte) & PTE_FLAGS_MASK;
 }
 
 #define pgprot_val(x)	((x).pgprot)
diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 2b1746c..3e5dbc4 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -286,7 +286,7 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
 	return __pgprot(preservebits | addbits);
 }
 
-#define pte_pgprot(x) __pgprot(pte_flags(x) & ~PTE_PFN_MASK)
+#define pte_pgprot(x) __pgprot(pte_flags(x) & PTE_FLAGS_MASK)
 
 #define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask)
 
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index 525b53e..5c3b265 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -88,7 +88,7 @@ extern unsigned long pg0[];
 /* To avoid harmful races, pmd_none(x) should check only the lower when PAE */
 #define pmd_none(x)	(!(unsigned long)pmd_val((x)))
 #define pmd_present(x)	(pmd_val((x)) & _PAGE_PRESENT)
-#define pmd_bad(x) ((pmd_val(x) & (~PTE_PFN_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
+#define pmd_bad(x) ((pmd_val(x) & (PTE_FLAGS_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
 
 #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
 
-- 
cgit v1.1


From 04bbe430f73c6c31bbd067349c029e907e153a8d Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Mon, 21 Jul 2008 15:06:35 +0200
Subject: x86: fix header export, asm-x86/processor-flags.h, CONFIG_* leaks

Apparently,

commit 6330a30a76c1e62d4b4ec238368957f8febf9113
Author: Vegard Nossum <vegard.nossum@gmail.com>
Date:   Wed May 28 09:46:19 2008 +0200

    x86: break mutual header inclusion

introduced some CONFIG names to processor-flags.h, which was exported in

commit 6093015db2bd9e70cf20cdd23be1a50733baafdd
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sun Mar 30 11:45:23 2008 +0200

    x86: cleanup replace most vm86 flags with flags from processor-flags.h, fix

Fix it by wrapping the CONFIG parts in __KERNEL__.

Reported-by: Olaf Hering <olaf@aepfle.de>
Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
Cc: Adrian Bunk <bunk@kernel.org>
Cc: Olaf Hering <olaf@aepfle.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/processor-flags.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/processor-flags.h b/include/asm-x86/processor-flags.h
index 092b39b..eff2ecd 100644
--- a/include/asm-x86/processor-flags.h
+++ b/include/asm-x86/processor-flags.h
@@ -88,10 +88,12 @@
 #define CX86_ARR_BASE	0xc4
 #define CX86_RCR_BASE	0xdc
 
+#ifdef __KERNEL__
 #ifdef CONFIG_VM86
 #define X86_VM_MASK	X86_EFLAGS_VM
 #else
 #define X86_VM_MASK	0 /* No VM86 support */
 #endif
+#endif
 
 #endif	/* __ASM_I386_PROCESSOR_FLAGS_H */
-- 
cgit v1.1


From 2351ec533ed0dd56052ab96988d2161d5ecc8ed9 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew@wil.cx>
Date: Thu, 24 Jul 2008 08:09:32 -0400
Subject: Remove asm/semaphore.h

All users have now been converted to linux/semaphore.h and we don't need
to keep these files around any longer.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
---
 include/asm-x86/semaphore.h | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 include/asm-x86/semaphore.h

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/semaphore.h b/include/asm-x86/semaphore.h
deleted file mode 100644
index d9b2034..0000000
--- a/include/asm-x86/semaphore.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <linux/semaphore.h>
-- 
cgit v1.1


From 28b2ee20c7cba812b6f2ccf6d722cf86d00a84dc Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Wed, 23 Jul 2008 21:27:05 -0700
Subject: access_process_vm device memory infrastructure

In order to be able to debug things like the X server and programs using
the PPC Cell SPUs, the debugger needs to be able to access device memory
through ptrace and /proc/pid/mem.

This patch:

Add the generic_access_phys access function and put the hooks in place
to allow access_process_vm to access device or PPC Cell SPU memory.

[riel@redhat.com: Add documentation for the vm_ops->access function]
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Benjamin Herrensmidt <benh@kernel.crashing.org>
Cc: Dave Airlie <airlied@linux.ie>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-x86/io_32.h | 2 ++
 include/asm-x86/io_64.h | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h
index 4df44ed..e876d89 100644
--- a/include/asm-x86/io_32.h
+++ b/include/asm-x86/io_32.h
@@ -110,6 +110,8 @@ static inline void *phys_to_virt(unsigned long address)
  */
 extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
 extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
+extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size,
+				unsigned long prot_val);
 
 /*
  * The default ioremap() behavior is non-cached:
diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h
index ddd8058..22995c5 100644
--- a/include/asm-x86/io_64.h
+++ b/include/asm-x86/io_64.h
@@ -175,6 +175,8 @@ extern void early_iounmap(void *addr, unsigned long size);
  */
 extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
 extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
+extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size,
+				unsigned long prot_val);
 
 /*
  * The default ioremap() behavior is non-cached:
-- 
cgit v1.1


From 42b7772812d15b86543a23b82bd6070eef9a08b1 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Wed, 23 Jul 2008 21:27:10 -0700
Subject: mm: remove double indirection on tlb parameter to free_pgd_range() &
 Co

The double indirection here is not needed anywhere and hence (at least)
confusing.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: "David S. Miller" <davem@davemloft.net>
Acked-by: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-x86/hugetlb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/hugetlb.h b/include/asm-x86/hugetlb.h
index 14171a4..7eed6e0 100644
--- a/include/asm-x86/hugetlb.h
+++ b/include/asm-x86/hugetlb.h
@@ -26,7 +26,7 @@ static inline int prepare_hugepage_range(unsigned long addr, unsigned long len)
 static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) {
 }
 
-static inline void hugetlb_free_pgd_range(struct mmu_gather **tlb,
+static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 					  unsigned long addr, unsigned long end,
 					  unsigned long floor,
 					  unsigned long ceiling)
-- 
cgit v1.1


From a5516438959d90b071ff0a484ce4f3f523dc3152 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 23 Jul 2008 21:27:41 -0700
Subject: hugetlb: modular state for hugetlb page size

The goal of this patchset is to support multiple hugetlb page sizes.  This
is achieved by introducing a new struct hstate structure, which
encapsulates the important hugetlb state and constants (eg.  huge page
size, number of huge pages currently allocated, etc).

The hstate structure is then passed around the code which requires these
fields, they will do the right thing regardless of the exact hstate they
are operating on.

This patch adds the hstate structure, with a single global instance of it
(default_hstate), and does the basic work of converting hugetlb to use the
hstate.

Future patches will add more hstate structures to allow for different
hugetlbfs mounts to have different page sizes.

[akpm@linux-foundation.org: coding-style fixes]
Acked-by: Adam Litke <agl@us.ibm.com>
Acked-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-x86/hugetlb.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/hugetlb.h b/include/asm-x86/hugetlb.h
index 7eed6e0..439a9ac 100644
--- a/include/asm-x86/hugetlb.h
+++ b/include/asm-x86/hugetlb.h
@@ -14,11 +14,13 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
  * If the arch doesn't supply something else, assume that hugepage
  * size aligned regions are ok without further preparation.
  */
-static inline int prepare_hugepage_range(unsigned long addr, unsigned long len)
+static inline int prepare_hugepage_range(struct file *file,
+			unsigned long addr, unsigned long len)
 {
-	if (len & ~HPAGE_MASK)
+	struct hstate *h = hstate_file(file);
+	if (len & ~huge_page_mask(h))
 		return -EINVAL;
-	if (addr & ~HPAGE_MASK)
+	if (addr & ~huge_page_mask(h))
 		return -EINVAL;
 	return 0;
 }
-- 
cgit v1.1


From b4718e628dbf68a2dee23b5709e2aa3190409c56 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 23 Jul 2008 21:27:51 -0700
Subject: x86: add hugepagesz option on 64-bit

Add an hugepagesz=...  option similar to IA64, PPC etc.  to x86-64.

This finally allows to select GB pages for hugetlbfs in x86 now that all
the infrastructure is in place.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-x86/page.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index 6c84622..6e02098 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -32,6 +32,8 @@
 #define HPAGE_MASK		(~(HPAGE_SIZE - 1))
 #define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
 
+#define HUGE_MAX_HSTATE 2
+
 /* to align the pointer to the (next) page boundary */
 #define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
 
-- 
cgit v1.1


From 27ac792ca0b0a1e7e65f20342260650516c95864 Mon Sep 17 00:00:00 2001
From: Andrea Righi <righi.andrea@gmail.com>
Date: Wed, 23 Jul 2008 21:28:13 -0700
Subject: PAGE_ALIGN(): correctly handle 64-bit values on 32-bit architectures

On 32-bit architectures PAGE_ALIGN() truncates 64-bit values to the 32-bit
boundary. For example:

	u64 val = PAGE_ALIGN(size);

always returns a value < 4GB even if size is greater than 4GB.

The problem resides in PAGE_MASK definition (from include/asm-x86/page.h for
example):

#define PAGE_SHIFT      12
#define PAGE_SIZE       (_AC(1,UL) << PAGE_SHIFT)
#define PAGE_MASK       (~(PAGE_SIZE-1))
...
#define PAGE_ALIGN(addr)       (((addr)+PAGE_SIZE-1)&PAGE_MASK)

The "~" is performed on a 32-bit value, so everything in "and" with
PAGE_MASK greater than 4GB will be truncated to the 32-bit boundary.
Using the ALIGN() macro seems to be the right way, because it uses
typeof(addr) for the mask.

Also move the PAGE_ALIGN() definitions out of include/asm-*/page.h in
include/linux/mm.h.

See also lkml discussion: http://lkml.org/lkml/2008/6/11/237

[akpm@linux-foundation.org: fix drivers/media/video/uvc/uvc_queue.c]
[akpm@linux-foundation.org: fix v850]
[akpm@linux-foundation.org: fix powerpc]
[akpm@linux-foundation.org: fix arm]
[akpm@linux-foundation.org: fix mips]
[akpm@linux-foundation.org: fix drivers/media/video/pvrusb2/pvrusb2-dvb.c]
[akpm@linux-foundation.org: fix drivers/mtd/maps/uclinux.c]
[akpm@linux-foundation.org: fix powerpc]
Signed-off-by: Andrea Righi <righi.andrea@gmail.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-x86/page.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index 6e02098..4998211 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -34,9 +34,6 @@
 
 #define HUGE_MAX_HSTATE 2
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
 #ifndef __ASSEMBLY__
 #include <linux/types.h>
 #endif
-- 
cgit v1.1


From aaca0bdca573f3f51ea03139f9c7289541e7bca3 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:20 -0700
Subject: flag parameters: paccept

This patch is by far the most complex in the series.  It adds a new syscall
paccept.  This syscall differs from accept in that it adds (at the userlevel)
two additional parameters:

- a signal mask
- a flags value

The flags parameter can be used to set flag like SOCK_CLOEXEC.  This is
imlpemented here as well.  Some people argued that this is a property which
should be inherited from the file desriptor for the server but this is against
POSIX.  Additionally, we really want the signal mask parameter as well
(similar to pselect, ppoll, etc).  So an interface change in inevitable.

The flag value is the same as for socket and socketpair.  I think diverging
here will only create confusion.  Similar to the filesystem interfaces where
the use of the O_* constants differs, it is acceptable here.

The signal mask is handled as for pselect etc.  The mask is temporarily
installed for the thread and removed before the call returns.  I modeled the
code after pselect.  If there is a problem it's likely also in pselect.

For architectures which use socketcall I maintained this interface instead of
adding a system call.  The symmetry shouldn't be broken.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
#include <signal.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/syscall.h>

#ifndef __NR_paccept
# ifdef __x86_64__
#  define __NR_paccept 288
# elif defined __i386__
#  define SYS_PACCEPT 18
#  define USE_SOCKETCALL 1
# else
#  error "need __NR_paccept"
# endif
#endif

#ifdef USE_SOCKETCALL
# define paccept(fd, addr, addrlen, mask, flags) \
  ({ long args[6] = { \
       (long) fd, (long) addr, (long) addrlen, (long) mask, 8, (long) flags }; \
     syscall (__NR_socketcall, SYS_PACCEPT, args); })
#else
# define paccept(fd, addr, addrlen, mask, flags) \
  syscall (__NR_paccept, fd, addr, addrlen, mask, 8, flags)
#endif

#define PORT 57392

#define SOCK_CLOEXEC O_CLOEXEC

static pthread_barrier_t b;

static void *
tf (void *arg)
{
  pthread_barrier_wait (&b);
  int s = socket (AF_INET, SOCK_STREAM, 0);
  struct sockaddr_in sin;
  sin.sin_family = AF_INET;
  sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
  sin.sin_port = htons (PORT);
  connect (s, (const struct sockaddr *) &sin, sizeof (sin));
  close (s);

  pthread_barrier_wait (&b);
  s = socket (AF_INET, SOCK_STREAM, 0);
  sin.sin_port = htons (PORT);
  connect (s, (const struct sockaddr *) &sin, sizeof (sin));
  close (s);
  pthread_barrier_wait (&b);

  pthread_barrier_wait (&b);
  sleep (2);
  pthread_kill ((pthread_t) arg, SIGUSR1);

  return NULL;
}

static void
handler (int s)
{
}

int
main (void)
{
  pthread_barrier_init (&b, NULL, 2);

  struct sockaddr_in sin;
  pthread_t th;
  if (pthread_create (&th, NULL, tf, (void *) pthread_self ()) != 0)
    {
      puts ("pthread_create failed");
      return 1;
    }

  int s = socket (AF_INET, SOCK_STREAM, 0);
  int reuse = 1;
  setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
  sin.sin_family = AF_INET;
  sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
  sin.sin_port = htons (PORT);
  bind (s, (struct sockaddr *) &sin, sizeof (sin));
  listen (s, SOMAXCONN);

  pthread_barrier_wait (&b);

  int s2 = paccept (s, NULL, 0, NULL, 0);
  if (s2 < 0)
    {
      puts ("paccept(0) failed");
      return 1;
    }

  int coe = fcntl (s2, F_GETFD);
  if (coe & FD_CLOEXEC)
    {
      puts ("paccept(0) set close-on-exec-flag");
      return 1;
    }
  close (s2);

  pthread_barrier_wait (&b);

  s2 = paccept (s, NULL, 0, NULL, SOCK_CLOEXEC);
  if (s2 < 0)
    {
      puts ("paccept(SOCK_CLOEXEC) failed");
      return 1;
    }

  coe = fcntl (s2, F_GETFD);
  if ((coe & FD_CLOEXEC) == 0)
    {
      puts ("paccept(SOCK_CLOEXEC) does not set close-on-exec flag");
      return 1;
    }
  close (s2);

  pthread_barrier_wait (&b);

  struct sigaction sa;
  sa.sa_handler = handler;
  sa.sa_flags = 0;
  sigemptyset (&sa.sa_mask);
  sigaction (SIGUSR1, &sa, NULL);

  sigset_t ss;
  pthread_sigmask (SIG_SETMASK, NULL, &ss);
  sigaddset (&ss, SIGUSR1);
  pthread_sigmask (SIG_SETMASK, &ss, NULL);

  sigdelset (&ss, SIGUSR1);
  alarm (4);
  pthread_barrier_wait (&b);

  errno = 0 ;
  s2 = paccept (s, NULL, 0, &ss, 0);
  if (s2 != -1 || errno != EINTR)
    {
      puts ("paccept did not fail with EINTR");
      return 1;
    }

  close (s);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

[akpm@linux-foundation.org: make it compile]
[akpm@linux-foundation.org: add sys_ni stub]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Roland McGrath <roland@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-x86/unistd_64.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
index 9c1a4a3..e323994 100644
--- a/include/asm-x86/unistd_64.h
+++ b/include/asm-x86/unistd_64.h
@@ -639,6 +639,8 @@ __SYSCALL(__NR_fallocate, sys_fallocate)
 __SYSCALL(__NR_timerfd_settime, sys_timerfd_settime)
 #define __NR_timerfd_gettime			287
 __SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime)
+#define __NR_paccept				288
+__SYSCALL(__NR_paccept, sys_paccept)
 
 
 #ifndef __NO_STUBS
-- 
cgit v1.1


From 9deb27baedb79759c3ab9435a7d8b841842d56e9 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:24 -0700
Subject: flag parameters: signalfd

This patch adds the new signalfd4 syscall.  It extends the old signalfd
syscall by one parameter which is meant to hold a flag value.  In this
patch the only flag support is SFD_CLOEXEC which causes the close-on-exec
flag for the returned file descriptor to be set.

A new name SFD_CLOEXEC is introduced which in this implementation must
have the same value as O_CLOEXEC.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <signal.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_signalfd4
# ifdef __x86_64__
#  define __NR_signalfd4 289
# elif defined __i386__
#  define __NR_signalfd4 327
# else
#  error "need __NR_signalfd4"
# endif
#endif

#define SFD_CLOEXEC O_CLOEXEC

int
main (void)
{
  sigset_t ss;
  sigemptyset (&ss);
  sigaddset (&ss, SIGUSR1);
  int fd = syscall (__NR_signalfd4, -1, &ss, 8, 0);
  if (fd == -1)
    {
      puts ("signalfd4(0) failed");
      return 1;
    }
  int coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (coe & FD_CLOEXEC)
    {
      puts ("signalfd4(0) set close-on-exec flag");
      return 1;
    }
  close (fd);

  fd = syscall (__NR_signalfd4, -1, &ss, 8, SFD_CLOEXEC);
  if (fd == -1)
    {
      puts ("signalfd4(SFD_CLOEXEC) failed");
      return 1;
    }
  coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((coe & FD_CLOEXEC) == 0)
    {
      puts ("signalfd4(SFD_CLOEXEC) does not set close-on-exec flag");
      return 1;
    }
  close (fd);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

[akpm@linux-foundation.org: add sys_ni stub]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-x86/unistd_32.h | 1 +
 include/asm-x86/unistd_64.h | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h
index 8317d94..c310371 100644
--- a/include/asm-x86/unistd_32.h
+++ b/include/asm-x86/unistd_32.h
@@ -332,6 +332,7 @@
 #define __NR_fallocate		324
 #define __NR_timerfd_settime	325
 #define __NR_timerfd_gettime	326
+#define __NR_signalfd4		327
 
 #ifdef __KERNEL__
 
diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
index e323994..e0a9b45 100644
--- a/include/asm-x86/unistd_64.h
+++ b/include/asm-x86/unistd_64.h
@@ -641,6 +641,8 @@ __SYSCALL(__NR_timerfd_settime, sys_timerfd_settime)
 __SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime)
 #define __NR_paccept				288
 __SYSCALL(__NR_paccept, sys_paccept)
+#define __NR_signalfd4				289
+__SYSCALL(__NR_signalfd4, sys_signalfd4)
 
 
 #ifndef __NO_STUBS
-- 
cgit v1.1


From b087498eb5605673b0f260a7620d91818cd72304 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:25 -0700
Subject: flag parameters: eventfd

This patch adds the new eventfd2 syscall.  It extends the old eventfd
syscall by one parameter which is meant to hold a flag value.  In this
patch the only flag support is EFD_CLOEXEC which causes the close-on-exec
flag for the returned file descriptor to be set.

A new name EFD_CLOEXEC is introduced which in this implementation must
have the same value as O_CLOEXEC.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_eventfd2
# ifdef __x86_64__
#  define __NR_eventfd2 290
# elif defined __i386__
#  define __NR_eventfd2 328
# else
#  error "need __NR_eventfd2"
# endif
#endif

#define EFD_CLOEXEC O_CLOEXEC

int
main (void)
{
  int fd = syscall (__NR_eventfd2, 1, 0);
  if (fd == -1)
    {
      puts ("eventfd2(0) failed");
      return 1;
    }
  int coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (coe & FD_CLOEXEC)
    {
      puts ("eventfd2(0) sets close-on-exec flag");
      return 1;
    }
  close (fd);

  fd = syscall (__NR_eventfd2, 1, EFD_CLOEXEC);
  if (fd == -1)
    {
      puts ("eventfd2(EFD_CLOEXEC) failed");
      return 1;
    }
  coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((coe & FD_CLOEXEC) == 0)
    {
      puts ("eventfd2(EFD_CLOEXEC) does not set close-on-exec flag");
      return 1;
    }
  close (fd);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

[akpm@linux-foundation.org: add sys_ni stub]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-x86/unistd_32.h | 1 +
 include/asm-x86/unistd_64.h | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h
index c310371..edbd872 100644
--- a/include/asm-x86/unistd_32.h
+++ b/include/asm-x86/unistd_32.h
@@ -333,6 +333,7 @@
 #define __NR_timerfd_settime	325
 #define __NR_timerfd_gettime	326
 #define __NR_signalfd4		327
+#define __NR_eventfd2		328
 
 #ifdef __KERNEL__
 
diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
index e0a9b45..fb059a6 100644
--- a/include/asm-x86/unistd_64.h
+++ b/include/asm-x86/unistd_64.h
@@ -643,6 +643,8 @@ __SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime)
 __SYSCALL(__NR_paccept, sys_paccept)
 #define __NR_signalfd4				289
 __SYSCALL(__NR_signalfd4, sys_signalfd4)
+#define __NR_eventfd2				290
+__SYSCALL(__NR_eventfd2, sys_eventfd2)
 
 
 #ifndef __NO_STUBS
-- 
cgit v1.1


From a0998b50c3f0b8fdd265c63e0032f86ebe377dbf Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:27 -0700
Subject: flag parameters: epoll_create

This patch adds the new epoll_create2 syscall.  It extends the old epoll_create
syscall by one parameter which is meant to hold a flag value.  In this
patch the only flag support is EPOLL_CLOEXEC which causes the close-on-exec
flag for the returned file descriptor to be set.

A new name EPOLL_CLOEXEC is introduced which in this implementation must
have the same value as O_CLOEXEC.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <time.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_epoll_create2
# ifdef __x86_64__
#  define __NR_epoll_create2 291
# elif defined __i386__
#  define __NR_epoll_create2 329
# else
#  error "need __NR_epoll_create2"
# endif
#endif

#define EPOLL_CLOEXEC O_CLOEXEC

int
main (void)
{
  int fd = syscall (__NR_epoll_create2, 1, 0);
  if (fd == -1)
    {
      puts ("epoll_create2(0) failed");
      return 1;
    }
  int coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (coe & FD_CLOEXEC)
    {
      puts ("epoll_create2(0) set close-on-exec flag");
      return 1;
    }
  close (fd);

  fd = syscall (__NR_epoll_create2, 1, EPOLL_CLOEXEC);
  if (fd == -1)
    {
      puts ("epoll_create2(EPOLL_CLOEXEC) failed");
      return 1;
    }
  coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((coe & FD_CLOEXEC) == 0)
    {
      puts ("epoll_create2(EPOLL_CLOEXEC) set close-on-exec flag");
      return 1;
    }
  close (fd);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-x86/unistd_32.h | 1 +
 include/asm-x86/unistd_64.h | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h
index edbd872..a37d6b0 100644
--- a/include/asm-x86/unistd_32.h
+++ b/include/asm-x86/unistd_32.h
@@ -334,6 +334,7 @@
 #define __NR_timerfd_gettime	326
 #define __NR_signalfd4		327
 #define __NR_eventfd2		328
+#define __NR_epoll_create2	329
 
 #ifdef __KERNEL__
 
diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
index fb059a6..a1a4a5b 100644
--- a/include/asm-x86/unistd_64.h
+++ b/include/asm-x86/unistd_64.h
@@ -645,6 +645,8 @@ __SYSCALL(__NR_paccept, sys_paccept)
 __SYSCALL(__NR_signalfd4, sys_signalfd4)
 #define __NR_eventfd2				290
 __SYSCALL(__NR_eventfd2, sys_eventfd2)
+#define __NR_epoll_create2			291
+__SYSCALL(__NR_epoll_create2, sys_epoll_create2)
 
 
 #ifndef __NO_STUBS
-- 
cgit v1.1


From 336dd1f70ff62d7dd8655228caed4c5bfc818c56 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:29 -0700
Subject: flag parameters: dup2

This patch adds the new dup3 syscall.  It extends the old dup2 syscall by one
parameter which is meant to hold a flag value.  Support for the O_CLOEXEC flag
is added in this patch.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <time.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_dup3
# ifdef __x86_64__
#  define __NR_dup3 292
# elif defined __i386__
#  define __NR_dup3 330
# else
#  error "need __NR_dup3"
# endif
#endif

int
main (void)
{
  int fd = syscall (__NR_dup3, 1, 4, 0);
  if (fd == -1)
    {
      puts ("dup3(0) failed");
      return 1;
    }
  int coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (coe & FD_CLOEXEC)
    {
      puts ("dup3(0) set close-on-exec flag");
      return 1;
    }
  close (fd);

  fd = syscall (__NR_dup3, 1, 4, O_CLOEXEC);
  if (fd == -1)
    {
      puts ("dup3(O_CLOEXEC) failed");
      return 1;
    }
  coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((coe & FD_CLOEXEC) == 0)
    {
      puts ("dup3(O_CLOEXEC) set close-on-exec flag");
      return 1;
    }
  close (fd);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-x86/unistd_32.h | 1 +
 include/asm-x86/unistd_64.h | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h
index a37d6b0..a1f6383 100644
--- a/include/asm-x86/unistd_32.h
+++ b/include/asm-x86/unistd_32.h
@@ -335,6 +335,7 @@
 #define __NR_signalfd4		327
 #define __NR_eventfd2		328
 #define __NR_epoll_create2	329
+#define __NR_dup3		330
 
 #ifdef __KERNEL__
 
diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
index a1a4a5b..f0fb2bd 100644
--- a/include/asm-x86/unistd_64.h
+++ b/include/asm-x86/unistd_64.h
@@ -647,6 +647,8 @@ __SYSCALL(__NR_signalfd4, sys_signalfd4)
 __SYSCALL(__NR_eventfd2, sys_eventfd2)
 #define __NR_epoll_create2			291
 __SYSCALL(__NR_epoll_create2, sys_epoll_create2)
+#define __NR_dup3				292
+__SYSCALL(__NR_dup3, sys_dup3)
 
 
 #ifndef __NO_STUBS
-- 
cgit v1.1


From ed8cae8ba01348bfd83333f4648dd807b04d7f08 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:30 -0700
Subject: flag parameters: pipe

This patch introduces the new syscall pipe2 which is like pipe but it also
takes an additional parameter which takes a flag value.  This patch implements
the handling of O_CLOEXEC for the flag.  I did not add support for the new
syscall for the architectures which have a special sys_pipe implementation.  I
think the maintainers of those archs have the chance to go with the unified
implementation but that's up to them.

The implementation introduces do_pipe_flags.  I did that instead of changing
all callers of do_pipe because some of the callers are written in assembler.
I would probably screw up changing the assembly code.  To avoid breaking code
do_pipe is now a small wrapper around do_pipe_flags.  Once all callers are
changed over to do_pipe_flags the old do_pipe function can be removed.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_pipe2
# ifdef __x86_64__
#  define __NR_pipe2 293
# elif defined __i386__
#  define __NR_pipe2 331
# else
#  error "need __NR_pipe2"
# endif
#endif

int
main (void)
{
  int fd[2];
  if (syscall (__NR_pipe2, fd, 0) != 0)
    {
      puts ("pipe2(0) failed");
      return 1;
    }
  for (int i = 0; i < 2; ++i)
    {
      int coe = fcntl (fd[i], F_GETFD);
      if (coe == -1)
        {
          puts ("fcntl failed");
          return 1;
        }
      if (coe & FD_CLOEXEC)
        {
          printf ("pipe2(0) set close-on-exit for fd[%d]\n", i);
          return 1;
        }
    }
  close (fd[0]);
  close (fd[1]);

  if (syscall (__NR_pipe2, fd, O_CLOEXEC) != 0)
    {
      puts ("pipe2(O_CLOEXEC) failed");
      return 1;
    }
  for (int i = 0; i < 2; ++i)
    {
      int coe = fcntl (fd[i], F_GETFD);
      if (coe == -1)
        {
          puts ("fcntl failed");
          return 1;
        }
      if ((coe & FD_CLOEXEC) == 0)
        {
          printf ("pipe2(O_CLOEXEC) does not set close-on-exit for fd[%d]\n", i);
          return 1;
        }
    }
  close (fd[0]);
  close (fd[1]);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-x86/unistd_32.h | 1 +
 include/asm-x86/unistd_64.h | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h
index a1f6383..748a05c 100644
--- a/include/asm-x86/unistd_32.h
+++ b/include/asm-x86/unistd_32.h
@@ -336,6 +336,7 @@
 #define __NR_eventfd2		328
 #define __NR_epoll_create2	329
 #define __NR_dup3		330
+#define __NR_pipe2		331
 
 #ifdef __KERNEL__
 
diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
index f0fb2bd..d2284b43 100644
--- a/include/asm-x86/unistd_64.h
+++ b/include/asm-x86/unistd_64.h
@@ -649,6 +649,8 @@ __SYSCALL(__NR_eventfd2, sys_eventfd2)
 __SYSCALL(__NR_epoll_create2, sys_epoll_create2)
 #define __NR_dup3				292
 __SYSCALL(__NR_dup3, sys_dup3)
+#define __NR_pipe2				293
+__SYSCALL(__NR_pipe2, sys_pipe2)
 
 
 #ifndef __NO_STUBS
-- 
cgit v1.1


From 4006553b06306b34054529477b06b68a1c66249b Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:32 -0700
Subject: flag parameters: inotify_init

This patch introduces the new syscall inotify_init1 (note: the 1 stands for
the one parameter the syscall takes, as opposed to no parameter before).  The
values accepted for this parameter are function-specific and defined in the
inotify.h header.  Here the values must match the O_* flags, though.  In this
patch CLOEXEC support is introduced.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_inotify_init1
# ifdef __x86_64__
#  define __NR_inotify_init1 294
# elif defined __i386__
#  define __NR_inotify_init1 332
# else
#  error "need __NR_inotify_init1"
# endif
#endif

#define IN_CLOEXEC O_CLOEXEC

int
main (void)
{
  int fd;
  fd = syscall (__NR_inotify_init1, 0);
  if (fd == -1)
    {
      puts ("inotify_init1(0) failed");
      return 1;
    }
  int coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (coe & FD_CLOEXEC)
    {
      puts ("inotify_init1(0) set close-on-exit");
      return 1;
    }
  close (fd);

  fd = syscall (__NR_inotify_init1, IN_CLOEXEC);
  if (fd == -1)
    {
      puts ("inotify_init1(IN_CLOEXEC) failed");
      return 1;
    }
  coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((coe & FD_CLOEXEC) == 0)
    {
      puts ("inotify_init1(O_CLOEXEC) does not set close-on-exit");
      return 1;
    }
  close (fd);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

[akpm@linux-foundation.org: add sys_ni stub]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-x86/unistd_32.h | 1 +
 include/asm-x86/unistd_64.h | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h
index 748a05c..b3daf50 100644
--- a/include/asm-x86/unistd_32.h
+++ b/include/asm-x86/unistd_32.h
@@ -337,6 +337,7 @@
 #define __NR_epoll_create2	329
 #define __NR_dup3		330
 #define __NR_pipe2		331
+#define __NR_inotify_init1	332
 
 #ifdef __KERNEL__
 
diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
index d2284b43..c8cb88d 100644
--- a/include/asm-x86/unistd_64.h
+++ b/include/asm-x86/unistd_64.h
@@ -651,6 +651,8 @@ __SYSCALL(__NR_epoll_create2, sys_epoll_create2)
 __SYSCALL(__NR_dup3, sys_dup3)
 #define __NR_pipe2				293
 __SYSCALL(__NR_pipe2, sys_pipe2)
+#define __NR_inotify_init1			294
+__SYSCALL(__NR_inotify_init1, sys_inotify_init1)
 
 
 #ifndef __NO_STUBS
-- 
cgit v1.1


From 9fe5ad9c8cef9ad5873d8ee55d1cf00d9b607df0 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:43 -0700
Subject: flag parameters add-on: remove epoll_create size param

Remove the size parameter from the new epoll_create syscall and renames the
syscall itself.  The updated test program follows.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <time.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_epoll_create2
# ifdef __x86_64__
#  define __NR_epoll_create2 291
# elif defined __i386__
#  define __NR_epoll_create2 329
# else
#  error "need __NR_epoll_create2"
# endif
#endif

#define EPOLL_CLOEXEC O_CLOEXEC

int
main (void)
{
  int fd = syscall (__NR_epoll_create2, 0);
  if (fd == -1)
    {
      puts ("epoll_create2(0) failed");
      return 1;
    }
  int coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (coe & FD_CLOEXEC)
    {
      puts ("epoll_create2(0) set close-on-exec flag");
      return 1;
    }
  close (fd);

  fd = syscall (__NR_epoll_create2, EPOLL_CLOEXEC);
  if (fd == -1)
    {
      puts ("epoll_create2(EPOLL_CLOEXEC) failed");
      return 1;
    }
  coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((coe & FD_CLOEXEC) == 0)
    {
      puts ("epoll_create2(EPOLL_CLOEXEC) set close-on-exec flag");
      return 1;
    }
  close (fd);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-x86/unistd_32.h | 2 +-
 include/asm-x86/unistd_64.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h
index b3daf50..d739467 100644
--- a/include/asm-x86/unistd_32.h
+++ b/include/asm-x86/unistd_32.h
@@ -334,7 +334,7 @@
 #define __NR_timerfd_gettime	326
 #define __NR_signalfd4		327
 #define __NR_eventfd2		328
-#define __NR_epoll_create2	329
+#define __NR_epoll_create1	329
 #define __NR_dup3		330
 #define __NR_pipe2		331
 #define __NR_inotify_init1	332
diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
index c8cb88d..3a341d7 100644
--- a/include/asm-x86/unistd_64.h
+++ b/include/asm-x86/unistd_64.h
@@ -645,8 +645,8 @@ __SYSCALL(__NR_paccept, sys_paccept)
 __SYSCALL(__NR_signalfd4, sys_signalfd4)
 #define __NR_eventfd2				290
 __SYSCALL(__NR_eventfd2, sys_eventfd2)
-#define __NR_epoll_create2			291
-__SYSCALL(__NR_epoll_create2, sys_epoll_create2)
+#define __NR_epoll_create1			291
+__SYSCALL(__NR_epoll_create1, sys_epoll_create1)
 #define __NR_dup3				292
 __SYSCALL(__NR_dup3, sys_dup3)
 #define __NR_pipe2				293
-- 
cgit v1.1


From b6cd7da5be2522b62bbc48d41b36c828b88e02fe Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:28 +0200
Subject: ide-generic: remove "no_pci_devices()" quirk from
 ide_default_io_base()

Since the decision to probe for ISA ide2-6 is now left to the user
"no_pci_devices()" quirk is no longer needed and may be removed.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/asm-x86/ide.h | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/ide.h b/include/asm-x86/ide.h
index cf9c98e..3405074 100644
--- a/include/asm-x86/ide.h
+++ b/include/asm-x86/ide.h
@@ -36,23 +36,13 @@ static __inline__ int ide_default_irq(unsigned long base)
 
 static __inline__ unsigned long ide_default_io_base(int index)
 {
-	/*
-	 *	If PCI is present then it is not safe to poke around
-	 *	the other legacy IDE ports. Only 0x1f0 and 0x170 are
-	 *	defined compatibility mode ports for PCI. A user can 
-	 *	override this using ide= but we must default safe.
-	 */
-	if (no_pci_devices()) {
-		switch(index) {
-			case 2: return 0x1e8;
-			case 3: return 0x168;
-			case 4: return 0x1e0;
-			case 5: return 0x160;
-		}
-	}
 	switch (index) {
 		case 0:	return 0x1f0;
 		case 1:	return 0x170;
+		case 2: return 0x1e8;
+		case 3: return 0x168;
+		case 4: return 0x1e0;
+		case 5: return 0x160;
 		default:
 			return 0;
 	}
-- 
cgit v1.1


From d83b8b85cd56a083d30df73f3fd5e4714591b910 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:30 +0200
Subject: ide: define MAX_HWIFS in <linux/ide.h>

* Now that ide_hwif_t instances are allocated dynamically
  the difference between MAX_HWIFS == 2 and MAX_HWIFS == 10
  is ~100 bytes (x86-32) so use MAX_HWIFS == 10 on all archs
  except these ones that use MAX_HWIFS == 1.

* Define MAX_HWIFS in <linux/ide.h> instead of <asm/ide.h>.

[ Please note that avr32/cris/v850 have no <asm/ide.h>
  and alpha/ia64/sh always define CONFIG_IDE_MAX_HWIFS. ]

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/asm-x86/ide.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/ide.h b/include/asm-x86/ide.h
index 3405074..bc54879 100644
--- a/include/asm-x86/ide.h
+++ b/include/asm-x86/ide.h
@@ -11,15 +11,6 @@
 
 #ifdef __KERNEL__
 
-
-#ifndef MAX_HWIFS
-# ifdef CONFIG_BLK_DEV_IDEPCI
-#define MAX_HWIFS	10
-# else
-#define MAX_HWIFS	6
-# endif
-#endif
-
 static __inline__ int ide_default_irq(unsigned long base)
 {
 	switch (base) {
-- 
cgit v1.1


From f01d35d87f39ab794ddcdefadb79c11054bcbfbc Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:31 +0200
Subject: ide-generic: remove ide_default_{io_base,irq}() inlines (take 3)

Replace ide_default_{io_base,irq}() inlines by legacy_{bases,irqs}[].

v2:
Add missing zero-ing of hws[] (caught during testing by Borislav Petkov).

v3:
Fix zero-oing of hws[] for _real_ this time.

There should be no functional changes caused by this patch.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/asm-x86/ide.h | 28 ----------------------------
 1 file changed, 28 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/ide.h b/include/asm-x86/ide.h
index bc54879..0289baf 100644
--- a/include/asm-x86/ide.h
+++ b/include/asm-x86/ide.h
@@ -11,34 +11,6 @@
 
 #ifdef __KERNEL__
 
-static __inline__ int ide_default_irq(unsigned long base)
-{
-	switch (base) {
-		case 0x1f0: return 14;
-		case 0x170: return 15;
-		case 0x1e8: return 11;
-		case 0x168: return 10;
-		case 0x1e0: return 8;
-		case 0x160: return 12;
-		default:
-			return 0;
-	}
-}
-
-static __inline__ unsigned long ide_default_io_base(int index)
-{
-	switch (index) {
-		case 0:	return 0x1f0;
-		case 1:	return 0x170;
-		case 2: return 0x1e8;
-		case 3: return 0x168;
-		case 4: return 0x1e0;
-		case 5: return 0x160;
-		default:
-			return 0;
-	}
-}
-
 #include <asm-generic/ide_iops.h>
 
 #endif /* __KERNEL__ */
-- 
cgit v1.1


From 2a8f7450f828eaee49d66f41f99ac2e54f1160a6 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:31 +0200
Subject: ide: remove <asm/ide.h> for some archs

* Remove <linux/irq.h> include from <asm-ia64.h> (<linux/ide.h> includes
  <linux/interrupt.h> which is enough).

* Remove <asm/ide.h> for alpha/blackfin/h8300/ia64/m32r/sh/x86/xtensa
  (this leaves us with arm/frv/m68k/mips/mn10300/parisc/powerpc/sparc[64]).

There should be no functional changes caused by this patch.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/asm-x86/ide.h | 18 ------------------
 1 file changed, 18 deletions(-)
 delete mode 100644 include/asm-x86/ide.h

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/ide.h b/include/asm-x86/ide.h
deleted file mode 100644
index 0289baf..0000000
--- a/include/asm-x86/ide.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- *  Copyright (C) 1994-1996  Linus Torvalds & authors
- */
-
-/*
- *  This file contains the i386 architecture specific IDE code.
- */
-
-#ifndef __ASMi386_IDE_H
-#define __ASMi386_IDE_H
-
-#ifdef __KERNEL__
-
-#include <asm-generic/ide_iops.h>
-
-#endif /* __KERNEL__ */
-
-#endif /* __ASMi386_IDE_H */
-- 
cgit v1.1


From b30f3ae50cd03ef2ff433a5030fbf88dd8323528 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 24 Jul 2008 15:43:44 -0700
Subject: x86-64: Clean up 'save/restore_i387()' usage

Suresh Siddha wants to fix a possible FPU leakage in error conditions,
but the fact that save/restore_i387() are inlines in a header file makes
that harder to do than necessary.  So start off with an obvious cleanup.

This just moves the x86-64 version of save/restore_i387() out of the
header file, and moves it to the only file that it is actually used in:
arch/x86/kernel/signal_64.c.  So exposing it in a header file was wrong
to begin with.

[ Side note: I'd like to fix up some of the games we play with the
  32-bit version of these functions too, but that's a separate
  matter.  The 32-bit versions are shared - under different names
  at that! - by both the native x86-32 code and the x86-64 32-bit
  compatibility code ]

Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-x86/i387.h | 54 --------------------------------------------------
 1 file changed, 54 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h
index 37672f7..96fa844 100644
--- a/include/asm-x86/i387.h
+++ b/include/asm-x86/i387.h
@@ -137,60 +137,6 @@ static inline void __save_init_fpu(struct task_struct *tsk)
 	task_thread_info(tsk)->status &= ~TS_USEDFPU;
 }
 
-/*
- * Signal frame handlers.
- */
-
-static inline int save_i387(struct _fpstate __user *buf)
-{
-	struct task_struct *tsk = current;
-	int err = 0;
-
-	BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
-			sizeof(tsk->thread.xstate->fxsave));
-
-	if ((unsigned long)buf % 16)
-		printk("save_i387: bad fpstate %p\n", buf);
-
-	if (!used_math())
-		return 0;
-	clear_used_math(); /* trigger finit */
-	if (task_thread_info(tsk)->status & TS_USEDFPU) {
-		err = save_i387_checking((struct i387_fxsave_struct __user *)
-					 buf);
-		if (err)
-			return err;
-		task_thread_info(tsk)->status &= ~TS_USEDFPU;
-		stts();
-	} else {
-		if (__copy_to_user(buf, &tsk->thread.xstate->fxsave,
-				   sizeof(struct i387_fxsave_struct)))
-			return -1;
-	}
-	return 1;
-}
-
-/*
- * This restores directly out of user space. Exceptions are handled.
- */
-static inline int restore_i387(struct _fpstate __user *buf)
-{
-	struct task_struct *tsk = current;
-	int err;
-
-	if (!used_math()) {
-		err = init_fpu(tsk);
-		if (err)
-			return err;
-	}
-
-	if (!(task_thread_info(current)->status & TS_USEDFPU)) {
-		clts();
-		task_thread_info(current)->status |= TS_USEDFPU;
-	}
-	return restore_fpu_checking((__force struct i387_fxsave_struct *)buf);
-}
-
 #else  /* CONFIG_X86_32 */
 
 extern void finit(void);
-- 
cgit v1.1


From b69c49b78457f681ecfb3147bd968434ee6559c1 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 25 Jul 2008 01:45:40 -0700
Subject: clean up duplicated alloc/free_thread_info

We duplicate alloc/free_thread_info defines on many platforms (the
majority uses __get_free_pages/free_pages).  This patch defines common
defines and removes these duplicated defines.
__HAVE_ARCH_THREAD_INFO_ALLOCATOR is introduced for platforms that do
something different.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-x86/thread_info.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
index 3f2de10..da0a675 100644
--- a/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h
@@ -152,6 +152,8 @@ struct thread_info {
 #define THREAD_FLAGS GFP_KERNEL
 #endif
 
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
 #define alloc_thread_info(tsk)						\
 	((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER))
 
-- 
cgit v1.1


From 7444a72effa632fcd8edc566f880d96fe213c73b Mon Sep 17 00:00:00 2001
From: Michael Buesch <mb@bu3sch.de>
Date: Fri, 25 Jul 2008 01:46:11 -0700
Subject: gpiolib: allow user-selection

This patch adds functionality to the gpio-lib subsystem to make it
possible to enable the gpio-lib code even if the architecture code didn't
request to get it built in.

The archtitecture code does still need to implement the gpiolib accessor
functions in its asm/gpio.h file.  This patch adds the implementations for
x86 and PPC.

With these changes it is possible to run generic GPIO expansion cards on
every architecture that implements the trivial wrapper functions.  Support
for more architectures can easily be added.

Signed-off-by: Michael Buesch <mb@bu3sch.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: David Brownell <david-b@pacbell.net>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Haavard Skinnemoen <hskinnemoen@atmel.com>
Cc: Jesper Nilsson <jesper.nilsson@axis.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Samuel Ortiz <sameo@openedhand.com>
Cc: Kumar Gala <galak@gate.crashing.org>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-x86/gpio.h | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/gpio.h b/include/asm-x86/gpio.h
index ff87fca..116e914 100644
--- a/include/asm-x86/gpio.h
+++ b/include/asm-x86/gpio.h
@@ -1,6 +1,62 @@
+/*
+ * Generic GPIO API implementation for x86.
+ *
+ * Derived from the generic GPIO API for powerpc:
+ *
+ * Copyright (c) 2007-2008  MontaVista Software, Inc.
+ *
+ * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
 #ifndef _ASM_I386_GPIO_H
 #define _ASM_I386_GPIO_H
 
+#ifdef CONFIG_X86_RDC321X
 #include <gpio.h>
+#else /* CONFIG_X86_RDC321X */
+
+#include <asm-generic/gpio.h>
+
+#ifdef CONFIG_GPIOLIB
+
+/*
+ * Just call gpiolib.
+ */
+static inline int gpio_get_value(unsigned int gpio)
+{
+	return __gpio_get_value(gpio);
+}
+
+static inline void gpio_set_value(unsigned int gpio, int value)
+{
+	__gpio_set_value(gpio, value);
+}
+
+static inline int gpio_cansleep(unsigned int gpio)
+{
+	return __gpio_cansleep(gpio);
+}
+
+/*
+ * Not implemented, yet.
+ */
+static inline int gpio_to_irq(unsigned int gpio)
+{
+	return -ENOSYS;
+}
+
+static inline int irq_to_gpio(unsigned int irq)
+{
+	return -EINVAL;
+}
+
+#endif /* CONFIG_GPIOLIB */
+
+#endif /* CONFIG_X86_RDC321X */
 
 #endif /* _ASM_I386_GPIO_H */
-- 
cgit v1.1


From f22ab814a24e654b1de24db0c5f8b57b5ab2026a Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 01:47:34 -0700
Subject: include/asm/ptrace.h userspace headers cleanup

This patch contains the following cleanups for the asm/ptrace.h
userspace headers:

- include/asm-generic/Kbuild.asm already lists ptrace.h, remove
  the superfluous listings in the Kbuild files of the following
  architectures:
  - cris
  - frv
  - powerpc
  - x86
- don't expose function prototypes and macros to userspace:
  - arm
  - blackfin
  - cris
  - mn10300
  - parisc
- remove #ifdef CONFIG_'s around #define's:
  - blackfin
  - m68knommu
- sh: AFAIK __SH5__ should work in both kernel and userspace,
      no need to leak CONFIG_SUPERH64 to userspace
- xtensa: cosmetical change to remove empty
            #ifndef __ASSEMBLY__ #else #endif
          from the userspace headers

Not changed by this patch is the fact that the following architectures
have a different struct pt_regs depending on CONFIG_ variables:
- h8300
- m68knommu
- mips

This does not work in userspace.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Cc: <linux-arch@vger.kernel.org>
Cc: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Greg Ungerer <gerg@uclinux.org>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Acked-by: Grant Grundler <grundler@parisc-linux.org>
Acked-by: Jesper Nilsson <jesper.nilsson@axis.com>
Acked-by: Chris Zankel <chris@zankel.net>
Acked-by: David Howells <dhowells@redhat.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-x86/Kbuild | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/Kbuild b/include/asm-x86/Kbuild
index 1e35545..00473f7 100644
--- a/include/asm-x86/Kbuild
+++ b/include/asm-x86/Kbuild
@@ -19,7 +19,6 @@ unifdef-y += msr.h
 unifdef-y += mtrr.h
 unifdef-y += posix_types_32.h
 unifdef-y += posix_types_64.h
-unifdef-y += ptrace.h
 unifdef-y += unistd_32.h
 unifdef-y += unistd_64.h
 unifdef-y += vm86.h
-- 
cgit v1.1


From 7dcf2a9fced59e58e4694cdcf15850c01fdba89b Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Tue, 1 Jul 2008 19:27:16 +0300
Subject: remove dummy asm/kvm.h files

This patch removes the dummy asm/kvm.h files on architectures not (yet)
supporting KVM and uses the same conditional headers installation as
already used for a.out.h .

Also removed are superfluous install rules in the s390 and x86 Kbuild
files (they are already in Kbuild.asm).

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Acked-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/asm-x86/Kbuild | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/Kbuild b/include/asm-x86/Kbuild
index 1e35545..811e982 100644
--- a/include/asm-x86/Kbuild
+++ b/include/asm-x86/Kbuild
@@ -3,7 +3,6 @@ include include/asm-generic/Kbuild.asm
 header-y += boot.h
 header-y += bootparam.h
 header-y += debugreg.h
-header-y += kvm.h
 header-y += ldt.h
 header-y += msr-index.h
 header-y += prctl.h
-- 
cgit v1.1


From 071375bc76ee86e58592f4682030c81d410ddfd9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 26 Jul 2008 14:52:26 +0200
Subject: x86, RDC321x: remove gpio.h complications

Remove the include/asm-x86/gpio.h specials, just use the generic
version.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/gpio.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/gpio.h b/include/asm-x86/gpio.h
index 116e914..c4c91b3 100644
--- a/include/asm-x86/gpio.h
+++ b/include/asm-x86/gpio.h
@@ -16,10 +16,6 @@
 #ifndef _ASM_I386_GPIO_H
 #define _ASM_I386_GPIO_H
 
-#ifdef CONFIG_X86_RDC321X
-#include <gpio.h>
-#else /* CONFIG_X86_RDC321X */
-
 #include <asm-generic/gpio.h>
 
 #ifdef CONFIG_GPIOLIB
@@ -57,6 +53,4 @@ static inline int irq_to_gpio(unsigned int irq)
 
 #endif /* CONFIG_GPIOLIB */
 
-#endif /* CONFIG_X86_RDC321X */
-
 #endif /* _ASM_I386_GPIO_H */
-- 
cgit v1.1


From 1ca9fda4b2f3413e4bc748b983f5585629ca0560 Mon Sep 17 00:00:00 2001
From: Chris McDermott <lcm@linux.vnet.ibm.com>
Date: Thu, 24 Jul 2008 19:06:09 -0700
Subject: x86: fix IBM Summit based systems' phys_cpu_present_map on 32-bit
 kernels

x86 kernels on IBM Summit based systems will only online 1 CPU because the
phys_cpu_present_map is not set up correctly. Patch below applied to
2.6.26-git10.

Signed-off-by: Chris McDermott <lcm@linux.vnet.ibm.com>
Tested-by: Tim Pepper <lnxninga@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/mach-summit/mach_apic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/mach-summit/mach_apic.h b/include/asm-x86/mach-summit/mach_apic.h
index 75d2c95..c47e2ab 100644
--- a/include/asm-x86/mach-summit/mach_apic.h
+++ b/include/asm-x86/mach-summit/mach_apic.h
@@ -122,7 +122,7 @@ static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_id_map)
 
 static inline physid_mask_t apicid_to_cpu_present(int apicid)
 {
-	return physid_mask_of_physid(0);
+	return physid_mask_of_physid(apicid);
 }
 
 static inline void setup_portio_remap(void)
-- 
cgit v1.1


From 8d8bb39b9eba32dd70e87fd5ad5c5dd4ba118e06 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 25 Jul 2008 19:44:49 -0700
Subject: dma-mapping: add the device argument to dma_mapping_error()

Add per-device dma_mapping_ops support for CONFIG_X86_64 as POWER
architecture does:

This enables us to cleanly fix the Calgary IOMMU issue that some devices
are not behind the IOMMU (http://lkml.org/lkml/2008/5/8/423).

I think that per-device dma_mapping_ops support would be also helpful for
KVM people to support PCI passthrough but Andi thinks that this makes it
difficult to support the PCI passthrough (see the above thread).  So I
CC'ed this to KVM camp.  Comments are appreciated.

A pointer to dma_mapping_ops to struct dev_archdata is added.  If the
pointer is non NULL, DMA operations in asm/dma-mapping.h use it.  If it's
NULL, the system-wide dma_ops pointer is used as before.

If it's useful for KVM people, I plan to implement a mechanism to register
a hook called when a new pci (or dma capable) device is created (it works
with hot plugging).  It enables IOMMUs to set up an appropriate
dma_mapping_ops per device.

The major obstacle is that dma_mapping_error doesn't take a pointer to the
device unlike other DMA operations.  So x86 can't have dma_mapping_ops per
device.  Note all the POWER IOMMUs use the same dma_mapping_error function
so this is not a problem for POWER but x86 IOMMUs use different
dma_mapping_error functions.

The first patch adds the device argument to dma_mapping_error.  The patch
is trivial but large since it touches lots of drivers and dma-mapping.h in
all the architecture.

This patch:

dma_mapping_error() doesn't take a pointer to the device unlike other DMA
operations.  So we can't have dma_mapping_ops per device.

Note that POWER already has dma_mapping_ops per device but all the POWER
IOMMUs use the same dma_mapping_error function.  x86 IOMMUs use device
argument.

[akpm@linux-foundation.org: fix sge]
[akpm@linux-foundation.org: fix svc_rdma]
[akpm@linux-foundation.org: build fix]
[akpm@linux-foundation.org: fix bnx2x]
[akpm@linux-foundation.org: fix s2io]
[akpm@linux-foundation.org: fix pasemi_mac]
[akpm@linux-foundation.org: fix sdhci]
[akpm@linux-foundation.org: build fix]
[akpm@linux-foundation.org: fix sparc]
[akpm@linux-foundation.org: fix ibmvscsi]
Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Muli Ben-Yehuda <muli@il.ibm.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Avi Kivity <avi@qumranet.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-x86/device.h      |  3 ++
 include/asm-x86/dma-mapping.h | 99 +++++++++++++++++++++++++++++--------------
 include/asm-x86/swiotlb.h     |  2 +-
 3 files changed, 72 insertions(+), 32 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/device.h b/include/asm-x86/device.h
index 87a7153..3c034f4 100644
--- a/include/asm-x86/device.h
+++ b/include/asm-x86/device.h
@@ -5,6 +5,9 @@ struct dev_archdata {
 #ifdef CONFIG_ACPI
 	void	*acpi_handle;
 #endif
+#ifdef CONFIG_X86_64
+struct dma_mapping_ops *dma_ops;
+#endif
 #ifdef CONFIG_DMAR
 	void *iommu; /* hook for IOMMU specific extension */
 #endif
diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h
index c2ddd3d..0eaa9bf 100644
--- a/include/asm-x86/dma-mapping.h
+++ b/include/asm-x86/dma-mapping.h
@@ -17,7 +17,8 @@ extern int panic_on_overflow;
 extern int force_iommu;
 
 struct dma_mapping_ops {
-	int             (*mapping_error)(dma_addr_t dma_addr);
+	int             (*mapping_error)(struct device *dev,
+					 dma_addr_t dma_addr);
 	void*           (*alloc_coherent)(struct device *dev, size_t size,
 				dma_addr_t *dma_handle, gfp_t gfp);
 	void            (*free_coherent)(struct device *dev, size_t size,
@@ -56,14 +57,32 @@ struct dma_mapping_ops {
 	int		is_phys;
 };
 
-extern const struct dma_mapping_ops *dma_ops;
+extern struct dma_mapping_ops *dma_ops;
 
-static inline int dma_mapping_error(dma_addr_t dma_addr)
+static inline struct dma_mapping_ops *get_dma_ops(struct device *dev)
 {
-	if (dma_ops->mapping_error)
-		return dma_ops->mapping_error(dma_addr);
+#ifdef CONFIG_X86_32
+	return dma_ops;
+#else
+	if (unlikely(!dev) || !dev->archdata.dma_ops)
+		return dma_ops;
+	else
+		return dev->archdata.dma_ops;
+#endif
+}
+
+/* Make sure we keep the same behaviour */
+static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+#ifdef CONFIG_X86_32
+	return 0;
+#else
+	struct dma_mapping_ops *ops = get_dma_ops(dev);
+	if (ops->mapping_error)
+		return ops->mapping_error(dev, dma_addr);
 
 	return (dma_addr == bad_dma_address);
+#endif
 }
 
 #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
@@ -83,44 +102,53 @@ static inline dma_addr_t
 dma_map_single(struct device *hwdev, void *ptr, size_t size,
 	       int direction)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
+
 	BUG_ON(!valid_dma_direction(direction));
-	return dma_ops->map_single(hwdev, virt_to_phys(ptr), size, direction);
+	return ops->map_single(hwdev, virt_to_phys(ptr), size, direction);
 }
 
 static inline void
 dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size,
 		 int direction)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(dev);
+
 	BUG_ON(!valid_dma_direction(direction));
-	if (dma_ops->unmap_single)
-		dma_ops->unmap_single(dev, addr, size, direction);
+	if (ops->unmap_single)
+		ops->unmap_single(dev, addr, size, direction);
 }
 
 static inline int
 dma_map_sg(struct device *hwdev, struct scatterlist *sg,
 	   int nents, int direction)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
+
 	BUG_ON(!valid_dma_direction(direction));
-	return dma_ops->map_sg(hwdev, sg, nents, direction);
+	return ops->map_sg(hwdev, sg, nents, direction);
 }
 
 static inline void
 dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
 	     int direction)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
+
 	BUG_ON(!valid_dma_direction(direction));
-	if (dma_ops->unmap_sg)
-		dma_ops->unmap_sg(hwdev, sg, nents, direction);
+	if (ops->unmap_sg)
+		ops->unmap_sg(hwdev, sg, nents, direction);
 }
 
 static inline void
 dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
 			size_t size, int direction)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
+
 	BUG_ON(!valid_dma_direction(direction));
-	if (dma_ops->sync_single_for_cpu)
-		dma_ops->sync_single_for_cpu(hwdev, dma_handle, size,
-					     direction);
+	if (ops->sync_single_for_cpu)
+		ops->sync_single_for_cpu(hwdev, dma_handle, size, direction);
 	flush_write_buffers();
 }
 
@@ -128,10 +156,11 @@ static inline void
 dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle,
 			   size_t size, int direction)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
+
 	BUG_ON(!valid_dma_direction(direction));
-	if (dma_ops->sync_single_for_device)
-		dma_ops->sync_single_for_device(hwdev, dma_handle, size,
-						direction);
+	if (ops->sync_single_for_device)
+		ops->sync_single_for_device(hwdev, dma_handle, size, direction);
 	flush_write_buffers();
 }
 
@@ -139,11 +168,12 @@ static inline void
 dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
 			      unsigned long offset, size_t size, int direction)
 {
-	BUG_ON(!valid_dma_direction(direction));
-	if (dma_ops->sync_single_range_for_cpu)
-		dma_ops->sync_single_range_for_cpu(hwdev, dma_handle, offset,
-						   size, direction);
+	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
 
+	BUG_ON(!valid_dma_direction(direction));
+	if (ops->sync_single_range_for_cpu)
+		ops->sync_single_range_for_cpu(hwdev, dma_handle, offset,
+					       size, direction);
 	flush_write_buffers();
 }
 
@@ -152,11 +182,12 @@ dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle,
 				 unsigned long offset, size_t size,
 				 int direction)
 {
-	BUG_ON(!valid_dma_direction(direction));
-	if (dma_ops->sync_single_range_for_device)
-		dma_ops->sync_single_range_for_device(hwdev, dma_handle,
-						      offset, size, direction);
+	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
 
+	BUG_ON(!valid_dma_direction(direction));
+	if (ops->sync_single_range_for_device)
+		ops->sync_single_range_for_device(hwdev, dma_handle,
+						  offset, size, direction);
 	flush_write_buffers();
 }
 
@@ -164,9 +195,11 @@ static inline void
 dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
 		    int nelems, int direction)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
+
 	BUG_ON(!valid_dma_direction(direction));
-	if (dma_ops->sync_sg_for_cpu)
-		dma_ops->sync_sg_for_cpu(hwdev, sg, nelems, direction);
+	if (ops->sync_sg_for_cpu)
+		ops->sync_sg_for_cpu(hwdev, sg, nelems, direction);
 	flush_write_buffers();
 }
 
@@ -174,9 +207,11 @@ static inline void
 dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
 		       int nelems, int direction)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
+
 	BUG_ON(!valid_dma_direction(direction));
-	if (dma_ops->sync_sg_for_device)
-		dma_ops->sync_sg_for_device(hwdev, sg, nelems, direction);
+	if (ops->sync_sg_for_device)
+		ops->sync_sg_for_device(hwdev, sg, nelems, direction);
 
 	flush_write_buffers();
 }
@@ -185,9 +220,11 @@ static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
 				      size_t offset, size_t size,
 				      int direction)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(dev);
+
 	BUG_ON(!valid_dma_direction(direction));
-	return dma_ops->map_single(dev, page_to_phys(page)+offset,
-				   size, direction);
+	return ops->map_single(dev, page_to_phys(page) + offset,
+			       size, direction);
 }
 
 static inline void dma_unmap_page(struct device *dev, dma_addr_t addr,
diff --git a/include/asm-x86/swiotlb.h b/include/asm-x86/swiotlb.h
index c706a74..2730b35 100644
--- a/include/asm-x86/swiotlb.h
+++ b/include/asm-x86/swiotlb.h
@@ -35,7 +35,7 @@ extern int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg,
 			  int nents, int direction);
 extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg,
 			     int nents, int direction);
-extern int swiotlb_dma_mapping_error(dma_addr_t dma_addr);
+extern int swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr);
 extern void swiotlb_free_coherent(struct device *hwdev, size_t size,
 				  void *vaddr, dma_addr_t dma_handle);
 extern int swiotlb_dma_supported(struct device *hwdev, u64 mask);
-- 
cgit v1.1


From 1956a96de488feb05e95c08c9d5e80f63a4be2b1 Mon Sep 17 00:00:00 2001
From: Alexis Bruemmer <alexisb@us.ibm.com>
Date: Fri, 25 Jul 2008 19:44:51 -0700
Subject: x86 calgary: fix handling of devices that aren't behind the Calgary

The calgary code can give drivers addresses above 4GB which is very bad
for hardware that is only 32bit DMA addressable.

With this patch, the calgary code sets the global dma_ops to swiotlb or
nommu properly, and the dma_ops of devices behind the Calgary/CalIOC2
to calgary_dma_ops.  So the calgary code can handle devices safely that
aren't behind the Calgary/CalIOC2.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Alexis Bruemmer <alexisb@us.ibm.com>
Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Muli Ben-Yehuda <muli@il.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-x86/iommu.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/iommu.h b/include/asm-x86/iommu.h
index d63166f..ecc8061 100644
--- a/include/asm-x86/iommu.h
+++ b/include/asm-x86/iommu.h
@@ -3,6 +3,7 @@
 
 extern void pci_iommu_shutdown(void);
 extern void no_iommu_init(void);
+extern struct dma_mapping_ops nommu_dma_ops;
 extern int force_iommu, no_iommu;
 extern int iommu_detected;
 
-- 
cgit v1.1


From 3ab83521378268044a448113c6aa9a9e245f4d2f Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Fri, 25 Jul 2008 19:45:07 -0700
Subject: kexec jump

This patch provides an enhancement to kexec/kdump.  It implements the
following features:

- Backup/restore memory used by the original kernel before/after
  kexec.

- Save/restore CPU state before/after kexec.

The features of this patch can be used as a general method to call program in
physical mode (paging turning off).  This can be used to call BIOS code under
Linux.

kexec-tools needs to be patched to support kexec jump. The patches and
the precompiled kexec can be download from the following URL:

       source: http://khibernation.sourceforge.net/download/release_v10/kexec-tools/kexec-tools-src_git_kh10.tar.bz2
       patches: http://khibernation.sourceforge.net/download/release_v10/kexec-tools/kexec-tools-patches_git_kh10.tar.bz2
       binary: http://khibernation.sourceforge.net/download/release_v10/kexec-tools/kexec_git_kh10

Usage example of calling some physical mode code and return:

1. Compile and install patched kernel with following options selected:

CONFIG_X86_32=y
CONFIG_KEXEC=y
CONFIG_PM=y
CONFIG_KEXEC_JUMP=y

2. Build patched kexec-tool or download the pre-built one.

3. Build some physical mode executable named such as "phy_mode"

4. Boot kernel compiled in step 1.

5. Load physical mode executable with /sbin/kexec. The shell command
   line can be as follow:

   /sbin/kexec --load-preserve-context --args-none phy_mode

6. Call physical mode executable with following shell command line:

   /sbin/kexec -e

Implementation point:

To support jumping without reserving memory.  One shadow backup page (source
page) is allocated for each page used by kexeced code image (destination
page).  When do kexec_load, the image of kexeced code is loaded into source
pages, and before executing, the destination pages and the source pages are
swapped, so the contents of destination pages are backupped.  Before jumping
to the kexeced code image and after jumping back to the original kernel, the
destination pages and the source pages are swapped too.

C ABI (calling convention) is used as communication protocol between
kernel and called code.

A flag named KEXEC_PRESERVE_CONTEXT for sys_kexec_load is added to
indicate that the loaded kernel image is used for jumping back.

Now, only the i386 architecture is supported.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Nigel Cunningham <nigel@nigel.suspend2.net>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-x86/kexec.h | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/kexec.h b/include/asm-x86/kexec.h
index 8f855a1..c0e52a1 100644
--- a/include/asm-x86/kexec.h
+++ b/include/asm-x86/kexec.h
@@ -10,14 +10,15 @@
 # define VA_PTE_0		5
 # define PA_PTE_1		6
 # define VA_PTE_1		7
+# define PA_SWAP_PAGE		8
 # ifdef CONFIG_X86_PAE
-#  define PA_PMD_0		8
-#  define VA_PMD_0		9
-#  define PA_PMD_1		10
-#  define VA_PMD_1		11
-#  define PAGES_NR		12
+#  define PA_PMD_0		9
+#  define VA_PMD_0		10
+#  define PA_PMD_1		11
+#  define VA_PMD_1		12
+#  define PAGES_NR		13
 # else
-#  define PAGES_NR		8
+#  define PAGES_NR		9
 # endif
 #else
 # define PA_CONTROL_PAGE	0
@@ -152,11 +153,12 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
 }
 
 #ifdef CONFIG_X86_32
-asmlinkage NORET_TYPE void
+asmlinkage unsigned long
 relocate_kernel(unsigned long indirection_page,
 		unsigned long control_page,
 		unsigned long start_address,
-		unsigned int has_pae) ATTRIB_NORET;
+		unsigned int has_pae,
+		unsigned int preserve_context);
 #else
 NORET_TYPE void
 relocate_kernel(unsigned long indirection_page,
-- 
cgit v1.1


From a0a8f5364a5ad248aec6cb705e0092ff563edc2f Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Fri, 25 Jul 2008 19:45:20 -0700
Subject: x86: implement pte_special

Implement the pte_special bit for x86.  This is required to support
lockless get_user_pages, because we need to know whether or not we can
refcount a particular page given only its pte (and no vma).

[hugh@veritas.com: fix a BUG]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Dave Kleikamp <shaggy@austin.ibm.com>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Dave Kleikamp <shaggy@austin.ibm.com>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Zach Brown <zach.brown@oracle.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Reviewed-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-x86/pgtable.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 3e5dbc4..04caa2f 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -18,6 +18,7 @@
 #define _PAGE_BIT_UNUSED2	10
 #define _PAGE_BIT_UNUSED3	11
 #define _PAGE_BIT_PAT_LARGE	12	/* On 2MB or 1GB pages */
+#define _PAGE_BIT_SPECIAL	_PAGE_BIT_UNUSED1
 #define _PAGE_BIT_NX           63       /* No execute: only valid after cpuid check */
 
 #define _PAGE_PRESENT	(_AT(pteval_t, 1) << _PAGE_BIT_PRESENT)
@@ -34,6 +35,8 @@
 #define _PAGE_UNUSED3	(_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3)
 #define _PAGE_PAT	(_AT(pteval_t, 1) << _PAGE_BIT_PAT)
 #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
+#define _PAGE_SPECIAL	(_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
+#define __HAVE_ARCH_PTE_SPECIAL
 
 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
 #define _PAGE_NX	(_AT(pteval_t, 1) << _PAGE_BIT_NX)
@@ -54,7 +57,7 @@
 
 /* Set of bits not changed in pte_modify */
 #define _PAGE_CHG_MASK	(PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT |		\
-			 _PAGE_ACCESSED | _PAGE_DIRTY)
+			 _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY)
 
 #define _PAGE_CACHE_MASK	(_PAGE_PCD | _PAGE_PWT)
 #define _PAGE_CACHE_WB		(0)
@@ -180,7 +183,7 @@ static inline int pte_exec(pte_t pte)
 
 static inline int pte_special(pte_t pte)
 {
-	return 0;
+	return pte_val(pte) & _PAGE_SPECIAL;
 }
 
 static inline int pmd_large(pmd_t pte)
@@ -246,7 +249,7 @@ static inline pte_t pte_clrglobal(pte_t pte)
 
 static inline pte_t pte_mkspecial(pte_t pte)
 {
-	return pte;
+	return __pte(pte_val(pte) | _PAGE_SPECIAL);
 }
 
 extern pteval_t __supported_pte_mask;
-- 
cgit v1.1


From 8174c430e445a93016ef18f717fe570214fa38bf Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Fri, 25 Jul 2008 19:45:24 -0700
Subject: x86: lockless get_user_pages_fast()

Implement get_user_pages_fast without locking in the fastpath on x86.

Do an optimistic lockless pagetable walk, without taking mmap_sem or any
page table locks or even mmap_sem.  Page table existence is guaranteed by
turning interrupts off (combined with the fact that we're always looking
up the current mm, means we can do the lockless page table walk within the
constraints of the TLB shootdown design).  Basically we can do this
lockless pagetable walk in a similar manner to the way the CPU's pagetable
walker does not have to take any locks to find present ptes.

This patch (combined with the subsequent ones to convert direct IO to use
it) was found to give about 10% performance improvement on a 2 socket 8
core Intel Xeon system running an OLTP workload on DB2 v9.5

 "To test the effects of the patch, an OLTP workload was run on an IBM
  x3850 M2 server with 2 processors (quad-core Intel Xeon processors at
  2.93 GHz) using IBM DB2 v9.5 running Linux 2.6.24rc7 kernel.  Comparing
  runs with and without the patch resulted in an overall performance
  benefit of ~9.8%.  Correspondingly, oprofiles showed that samples from
  __up_read and __down_read routines that is seen during thread contention
  for system resources was reduced from 2.8% down to .05%.  Monitoring the
  /proc/vmstat output from the patched run showed that the counter for
  fast_gup contained a very high number while the fast_gup_slow value was
  zero."

(fast_gup is the old name for get_user_pages_fast, fast_gup_slow is a
counter we had for the number of times the slowpath was invoked).

The main reason for the improvement is that DB2 has multiple threads each
issuing direct-IO.  Direct-IO uses get_user_pages, and thus the threads
contend the mmap_sem cacheline, and can also contend on page table locks.

I would anticipate larger performance gains on larger systems, however I
think DB2 uses an adaptive mix of threads and processes, so it could be
that thread contention remains pretty constant as machine size increases.
In which case, we stuck with "only" a 10% gain.

The downside of using get_user_pages_fast is that if there is not a pte
with the correct permissions for the access, we end up falling back to
get_user_pages and so the get_user_pages_fast is a bit of extra work.
However this should not be the common case in most performance critical
code.

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: build fix]
[akpm@linux-foundation.org: Kconfig fix]
[akpm@linux-foundation.org: Makefile fix/cleanup]
[akpm@linux-foundation.org: warning fix]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Dave Kleikamp <shaggy@austin.ibm.com>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Dave Kleikamp <shaggy@austin.ibm.com>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Zach Brown <zach.brown@oracle.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Reviewed-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-x86/uaccess.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h
index f6fa4d8..5f702d1 100644
--- a/include/asm-x86/uaccess.h
+++ b/include/asm-x86/uaccess.h
@@ -451,3 +451,4 @@ extern struct movsl_mask {
 #endif
 
 #endif
+
-- 
cgit v1.1


From 7f2da1e7d0330395e5e9e350b879b98a1ea495df Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 10 May 2008 20:44:54 -0400
Subject: [PATCH] kill altroot

long overdue...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/asm-x86/namei.h | 11 -----------
 1 file changed, 11 deletions(-)
 delete mode 100644 include/asm-x86/namei.h

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/namei.h b/include/asm-x86/namei.h
deleted file mode 100644
index 415ef5d..0000000
--- a/include/asm-x86/namei.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef _ASM_X86_NAMEI_H
-#define _ASM_X86_NAMEI_H
-
-/* This dummy routine maybe changed to something useful
- * for /usr/gnemul/ emulation stuff.
- * Look at asm-sparc/namei.h for details.
- */
-
-#define __emul_prefix() NULL
-
-#endif /* _ASM_X86_NAMEI_H */
-- 
cgit v1.1


From 5f4cb662a0a2533b45656607471571460310a5ca Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 14 Jul 2008 20:36:36 +0200
Subject: KVM: SVM: allow enabling/disabling NPT by reloading only the
 architecture module

If NPT is enabled after loading both KVM modules on AMD and it should be
disabled, both KVM modules must be reloaded. If only the architecture module is
reloaded the behavior is undefined. With this patch it is possible to disable
NPT only by reloading the kvm_amd module.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 include/asm-x86/kvm_host.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index fdde0be..bc34dc2 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -556,6 +556,7 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu);
 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code);
 
 void kvm_enable_tdp(void);
+void kvm_disable_tdp(void);
 
 int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
 int complete_pio(struct kvm_vcpu *vcpu);
-- 
cgit v1.1