aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorCarsten Otte <cotte@de.ibm.com>2008-03-25 18:47:10 +0100
committerAvi Kivity <avi@qumranet.com>2008-04-27 12:00:40 +0300
commit402b08622d9ac6e32e25289573272e0f21bb58a7 (patch)
tree40d7386154cef85c9bfd2bd862db025933820776 /arch
parent37817f2982d0f559f90cecc66e150dd9d2c2df05 (diff)
downloadkernel_samsung_espresso10-402b08622d9ac6e32e25289573272e0f21bb58a7.zip
kernel_samsung_espresso10-402b08622d9ac6e32e25289573272e0f21bb58a7.tar.gz
kernel_samsung_espresso10-402b08622d9ac6e32e25289573272e0f21bb58a7.tar.bz2
s390: KVM preparation: provide hook to enable pgstes in user pagetable
The SIE instruction on s390 uses the 2nd half of the page table page to virtualize the storage keys of a guest. This patch offers the s390_enable_sie function, which reorganizes the page tables of a single-threaded process to reserve space in the page table: s390_enable_sie makes sure that the process is single threaded and then uses dup_mm to create a new mm with reorganized page tables. The old mm is freed and the process has now a page status extended field after every page table. Code that wants to exploit pgstes should SELECT CONFIG_PGSTE. This patch has a small common code hit, namely making dup_mm non-static. Edit (Carsten): I've modified Martin's patch, following Jeremy Fitzhardinge's review feedback. Now we do have the prototype for dup_mm in include/linux/sched.h. Following Martin's suggestion, s390_enable_sie() does now call task_lock() to prevent race against ptrace modification of mm_users. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Carsten Otte <cotte@de.ibm.com> Acked-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Avi Kivity <avi@qumranet.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/s390/Kconfig4
-rw-r--r--arch/s390/kernel/setup.c4
-rw-r--r--arch/s390/mm/pgtable.c65
3 files changed, 70 insertions, 3 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index f6a68e1..513a058 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -62,6 +62,10 @@ config GENERIC_LOCKBREAK
default y
depends on SMP && PREEMPT
+config PGSTE
+ bool
+ default y if KVM
+
mainmenu "Linux Kernel Configuration"
config S390
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 7141147..2f35133 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -316,7 +316,11 @@ static int __init early_parse_ipldelay(char *p)
early_param("ipldelay", early_parse_ipldelay);
#ifdef CONFIG_S390_SWITCH_AMODE
+#ifdef CONFIG_PGSTE
+unsigned int switch_amode = 1;
+#else
unsigned int switch_amode = 0;
+#endif
EXPORT_SYMBOL_GPL(switch_amode);
static void set_amode_and_uaccess(unsigned long user_amode,
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index fd07201..5c1aea9 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -30,11 +30,27 @@
#define TABLES_PER_PAGE 4
#define FRAG_MASK 15UL
#define SECOND_HALVES 10UL
+
+void clear_table_pgstes(unsigned long *table)
+{
+ clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
+ memset(table + 256, 0, PAGE_SIZE/4);
+ clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
+ memset(table + 768, 0, PAGE_SIZE/4);
+}
+
#else
#define ALLOC_ORDER 2
#define TABLES_PER_PAGE 2
#define FRAG_MASK 3UL
#define SECOND_HALVES 2UL
+
+void clear_table_pgstes(unsigned long *table)
+{
+ clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
+ memset(table + 256, 0, PAGE_SIZE/2);
+}
+
#endif
unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
@@ -153,7 +169,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
unsigned long *table;
unsigned long bits;
- bits = mm->context.noexec ? 3UL : 1UL;
+ bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
spin_lock(&mm->page_table_lock);
page = NULL;
if (!list_empty(&mm->context.pgtable_list)) {
@@ -170,7 +186,10 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
pgtable_page_ctor(page);
page->flags &= ~FRAG_MASK;
table = (unsigned long *) page_to_phys(page);
- clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
+ if (mm->context.pgstes)
+ clear_table_pgstes(table);
+ else
+ clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
spin_lock(&mm->page_table_lock);
list_add(&page->lru, &mm->context.pgtable_list);
}
@@ -191,7 +210,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
struct page *page;
unsigned long bits;
- bits = mm->context.noexec ? 3UL : 1UL;
+ bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
spin_lock(&mm->page_table_lock);
@@ -228,3 +247,43 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
mm->context.noexec = 0;
update_mm(mm, tsk);
}
+
+/*
+ * switch on pgstes for its userspace process (for kvm)
+ */
+int s390_enable_sie(void)
+{
+ struct task_struct *tsk = current;
+ struct mm_struct *mm;
+ int rc;
+
+ task_lock(tsk);
+
+ rc = 0;
+ if (tsk->mm->context.pgstes)
+ goto unlock;
+
+ rc = -EINVAL;
+ if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
+ tsk->mm != tsk->active_mm || tsk->mm->ioctx_list)
+ goto unlock;
+
+ tsk->mm->context.pgstes = 1; /* dirty little tricks .. */
+ mm = dup_mm(tsk);
+ tsk->mm->context.pgstes = 0;
+
+ rc = -ENOMEM;
+ if (!mm)
+ goto unlock;
+ mmput(tsk->mm);
+ tsk->mm = tsk->active_mm = mm;
+ preempt_disable();
+ update_mm(mm, tsk);
+ cpu_set(smp_processor_id(), mm->cpu_vm_mask);
+ preempt_enable();
+ rc = 0;
+unlock:
+ task_unlock(tsk);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(s390_enable_sie);