diff options
58 files changed, 895 insertions, 1284 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 4a7f140..ff2d237 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -230,17 +230,6 @@ config SYSVIPC_COMPAT config AUDIT_ARCH def_bool y -config S390_EXEC_PROTECT - def_bool y - prompt "Data execute protection" - help - This option allows to enable a buffer overflow protection for user - space programs and it also selects the addressing mode option above. - The kernel parameter noexec=on will enable this feature and also - switch the addressing modes, default is disabled. Enabling this (via - kernel parameter) on machines earlier than IBM System z9 this will - reduce system performance. - comment "Code generation options" choice diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index 5c91995..24bff4f 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -130,9 +130,7 @@ static void appldata_work_fn(struct work_struct *work) { struct list_head *lh; struct appldata_ops *ops; - int i; - i = 0; get_online_cpus(); mutex_lock(&appldata_ops_mutex); list_for_each(lh, &appldata_ops_list) { diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h index 7488e52..81d7908 100644 --- a/arch/s390/include/asm/cmpxchg.h +++ b/arch/s390/include/asm/cmpxchg.h @@ -167,7 +167,6 @@ static inline unsigned long __cmpxchg(void *ptr, unsigned long old, #ifdef CONFIG_64BIT #define cmpxchg64(ptr, o, n) \ ({ \ - BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ cmpxchg((ptr), (o), (n)); \ }) #else /* CONFIG_64BIT */ diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 10c029c..64b61bf 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -196,18 +196,6 @@ do { \ } while (0) #endif /* __s390x__ */ -/* - * An executable for which elf_read_implies_exec() returns TRUE will - * have the READ_IMPLIES_EXEC personality flag set automatically. - */ -#define elf_read_implies_exec(ex, executable_stack) \ -({ \ - if (current->mm->context.noexec && \ - executable_stack != EXSTACK_DISABLE_X) \ - disable_noexec(current->mm, current); \ - current->mm->context.noexec == 0; \ -}) - #define STACK_RND_MASK 0x7ffUL #define ARCH_DLINFO \ diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index b56403c..799ed0f 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -111,21 +111,10 @@ static inline void huge_ptep_invalidate(struct mm_struct *mm, { pmd_t *pmdp = (pmd_t *) ptep; - if (!MACHINE_HAS_IDTE) { - __pmd_csp(pmdp); - if (mm->context.noexec) { - pmdp = get_shadow_table(pmdp); - __pmd_csp(pmdp); - } - return; - } - - __pmd_idte(address, pmdp); - if (mm->context.noexec) { - pmdp = get_shadow_table(pmdp); + if (MACHINE_HAS_IDTE) __pmd_idte(address, pmdp); - } - return; + else + __pmd_csp(pmdp); } #define huge_ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \ diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index db14a31..1544b90 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -15,6 +15,7 @@ enum interruption_class { EXTINT_VRT, EXTINT_SCP, EXTINT_IUC, + EXTINT_CPM, IOINT_QAI, IOINT_QDI, IOINT_DAS, diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 65e172f..228cf0b 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -124,7 +124,7 @@ struct _lowcore { /* Address space pointer. */ __u32 kernel_asce; /* 0x02ac */ __u32 user_asce; /* 0x02b0 */ - __u32 user_exec_asce; /* 0x02b4 */ + __u32 current_pid; /* 0x02b4 */ /* SMP info area */ __u32 cpu_nr; /* 0x02b8 */ @@ -255,7 +255,7 @@ struct _lowcore { /* Address space pointer. */ __u64 kernel_asce; /* 0x0310 */ __u64 user_asce; /* 0x0318 */ - __u64 user_exec_asce; /* 0x0320 */ + __u64 current_pid; /* 0x0320 */ /* SMP info area */ __u32 cpu_nr; /* 0x0328 */ diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index 78522cd..82d0847 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -5,19 +5,18 @@ typedef struct { atomic_t attach_count; unsigned int flush_mm; spinlock_t list_lock; - struct list_head crst_list; struct list_head pgtable_list; unsigned long asce_bits; unsigned long asce_limit; unsigned long vdso_base; - int noexec; - int has_pgste; /* The mmu context has extended page tables */ - int alloc_pgste; /* cloned contexts will have extended page tables */ + /* Cloned contexts will be created with extended page tables. */ + unsigned int alloc_pgste:1; + /* The mmu context has extended page tables. */ + unsigned int has_pgste:1; } mm_context_t; #define INIT_MM_CONTEXT(name) \ .context.list_lock = __SPIN_LOCK_UNLOCKED(name.context.list_lock), \ - .context.crst_list = LIST_HEAD_INIT(name.context.crst_list), \ .context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), #endif diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 8c277ca..5682f16 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -35,11 +35,9 @@ static inline int init_new_context(struct task_struct *tsk, * and if has_pgste is set, it will create extended page * tables. */ - mm->context.noexec = 0; mm->context.has_pgste = 1; mm->context.alloc_pgste = 1; } else { - mm->context.noexec = (user_mode == SECONDARY_SPACE_MODE); mm->context.has_pgste = 0; mm->context.alloc_pgste = 0; } @@ -63,10 +61,8 @@ static inline void update_mm(struct mm_struct *mm, struct task_struct *tsk) S390_lowcore.user_asce = mm->context.asce_bits | __pa(pgd); if (user_mode != HOME_SPACE_MODE) { /* Load primary space page table origin. */ - pgd = mm->context.noexec ? get_shadow_table(pgd) : pgd; - S390_lowcore.user_exec_asce = mm->context.asce_bits | __pa(pgd); asm volatile(LCTL_OPCODE" 1,1,%0\n" - : : "m" (S390_lowcore.user_exec_asce) ); + : : "m" (S390_lowcore.user_asce) ); } else /* Load home space page table origin. */ asm volatile(LCTL_OPCODE" 13,13,%0" diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 3c987e9..accb372 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -90,6 +90,7 @@ static inline void copy_page(void *to, void *from) */ typedef struct { unsigned long pgprot; } pgprot_t; +typedef struct { unsigned long pgste; } pgste_t; typedef struct { unsigned long pte; } pte_t; typedef struct { unsigned long pmd; } pmd_t; typedef struct { unsigned long pud; } pud_t; @@ -97,18 +98,21 @@ typedef struct { unsigned long pgd; } pgd_t; typedef pte_t *pgtable_t; #define pgprot_val(x) ((x).pgprot) +#define pgste_val(x) ((x).pgste) #define pte_val(x) ((x).pte) #define pmd_val(x) ((x).pmd) #define pud_val(x) ((x).pud) #define pgd_val(x) ((x).pgd) +#define __pgste(x) ((pgste_t) { (x) } ) #define __pte(x) ((pte_t) { (x) } ) #define __pmd(x) ((pmd_t) { (x) } ) +#define __pud(x) ((pud_t) { (x) } ) #define __pgd(x) ((pgd_t) { (x) } ) #define __pgprot(x) ((pgprot_t) { (x) } ) -static inline void -page_set_storage_key(unsigned long addr, unsigned int skey, int mapped) +static inline void page_set_storage_key(unsigned long addr, + unsigned char skey, int mapped) { if (!mapped) asm volatile(".insn rrf,0xb22b0000,%0,%1,8,0" @@ -117,15 +121,59 @@ page_set_storage_key(unsigned long addr, unsigned int skey, int mapped) asm volatile("sske %0,%1" : : "d" (skey), "a" (addr)); } -static inline unsigned int -page_get_storage_key(unsigned long addr) +static inline unsigned char page_get_storage_key(unsigned long addr) { - unsigned int skey; + unsigned char skey; - asm volatile("iske %0,%1" : "=d" (skey) : "a" (addr), "0" (0)); + asm volatile("iske %0,%1" : "=d" (skey) : "a" (addr)); return skey; } +static inline int page_reset_referenced(unsigned long addr) +{ + unsigned int ipm; + + asm volatile( + " rrbe 0,%1\n" + " ipm %0\n" + : "=d" (ipm) : "a" (addr) : "cc"); + return !!(ipm & 0x20000000); +} + +/* Bits int the storage key */ +#define _PAGE_CHANGED 0x02 /* HW changed bit */ +#define _PAGE_REFERENCED 0x04 /* HW referenced bit */ +#define _PAGE_FP_BIT 0x08 /* HW fetch protection bit */ +#define _PAGE_ACC_BITS 0xf0 /* HW access control bits */ + +/* + * Test and clear dirty bit in storage key. + * We can't clear the changed bit atomically. This is a potential + * race against modification of the referenced bit. This function + * should therefore only be called if it is not mapped in any + * address space. + */ +#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY +static inline int page_test_and_clear_dirty(unsigned long pfn, int mapped) +{ + unsigned char skey; + + skey = page_get_storage_key(pfn << PAGE_SHIFT); + if (!(skey & _PAGE_CHANGED)) + return 0; + page_set_storage_key(pfn << PAGE_SHIFT, skey & ~_PAGE_CHANGED, mapped); + return 1; +} + +/* + * Test and clear referenced bit in storage key. + */ +#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG +static inline int page_test_and_clear_young(unsigned long pfn) +{ + return page_reset_referenced(pfn << PAGE_SHIFT); +} + struct page; void arch_free_page(struct page *page, int order); void arch_alloc_page(struct page *page, int order); diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index f7ad871..5325c89 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -1,6 +1,9 @@ #ifndef __ARCH_S390_PERCPU__ #define __ARCH_S390_PERCPU__ +#include <linux/preempt.h> +#include <asm/cmpxchg.h> + /* * s390 uses its own implementation for per cpu data, the offset of * the cpu local data area is cached in the cpu's lowcore memory. @@ -16,6 +19,71 @@ #define ARCH_NEEDS_WEAK_PER_CPU #endif +#define arch_irqsafe_cpu_to_op(pcp, val, op) \ +do { \ + typedef typeof(pcp) pcp_op_T__; \ + pcp_op_T__ old__, new__, prev__; \ + pcp_op_T__ *ptr__; \ + preempt_disable(); \ + ptr__ = __this_cpu_ptr(&(pcp)); \ + prev__ = *ptr__; \ + do { \ + old__ = prev__; \ + new__ = old__ op (val); \ + switch (sizeof(*ptr__)) { \ + case 8: \ + prev__ = cmpxchg64(ptr__, old__, new__); \ + break; \ + default: \ + prev__ = cmpxchg(ptr__, old__, new__); \ + } \ + } while (prev__ != old__); \ + preempt_enable(); \ +} while (0) + +#define irqsafe_cpu_add_1(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, +) +#define irqsafe_cpu_add_2(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, +) +#define irqsafe_cpu_add_4(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, +) +#define irqsafe_cpu_add_8(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, +) + +#define irqsafe_cpu_and_1(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, &) +#define irqsafe_cpu_and_2(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, &) +#define irqsafe_cpu_and_4(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, &) +#define irqsafe_cpu_and_8(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, &) + +#define irqsafe_cpu_or_1(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, |) +#define irqsafe_cpu_or_2(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, |) +#define irqsafe_cpu_or_4(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, |) +#define irqsafe_cpu_or_8(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, |) + +#define irqsafe_cpu_xor_1(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, ^) +#define irqsafe_cpu_xor_2(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, ^) +#define irqsafe_cpu_xor_4(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, ^) +#define irqsafe_cpu_xor_8(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, ^) + +#define arch_irqsafe_cpu_cmpxchg(pcp, oval, nval) \ +({ \ + typedef typeof(pcp) pcp_op_T__; \ + pcp_op_T__ ret__; \ + pcp_op_T__ *ptr__; \ + preempt_disable(); \ + ptr__ = __this_cpu_ptr(&(pcp)); \ + switch (sizeof(*ptr__)) { \ + case 8: \ + ret__ = cmpxchg64(ptr__, oval, nval); \ + break; \ + default: \ + ret__ = cmpxchg(ptr__, oval, nval); \ + } \ + preempt_enable(); \ + ret__; \ +}) + +#define irqsafe_cpu_cmpxchg_1(pcp, oval, nval) arch_irqsafe_cpu_cmpxchg(pcp, oval, nval) +#define irqsafe_cpu_cmpxchg_2(pcp, oval, nval) arch_irqsafe_cpu_cmpxchg(pcp, oval, nval) +#define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) arch_irqsafe_cpu_cmpxchg(pcp, oval, nval) +#define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) arch_irqsafe_cpu_cmpxchg(pcp, oval, nval) + #include <asm-generic/percpu.h> #endif /* __ARCH_S390_PERCPU__ */ diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 082eb4e..f6314af 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -19,14 +19,13 @@ #define check_pgt_cache() do {} while (0) -unsigned long *crst_table_alloc(struct mm_struct *, int); +unsigned long *crst_table_alloc(struct mm_struct *); void crst_table_free(struct mm_struct *, unsigned long *); void crst_table_free_rcu(struct mm_struct *, unsigned long *); unsigned long *page_table_alloc(struct mm_struct *); void page_table_free(struct mm_struct *, unsigned long *); void page_table_free_rcu(struct mm_struct *, unsigned long *); -void disable_noexec(struct mm_struct *, struct task_struct *); static inline void clear_table(unsigned long *s, unsigned long val, size_t n) { @@ -50,9 +49,6 @@ static inline void clear_table(unsigned long *s, unsigned long val, size_t n) static inline void crst_table_init(unsigned long *crst, unsigned long entry) { clear_table(crst, entry, sizeof(unsigned long)*2048); - crst = get_shadow_table(crst); - if (crst) - clear_table(crst, entry, sizeof(unsigned long)*2048); } #ifndef __s390x__ @@ -69,10 +65,7 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm) #define pmd_free(mm, x) do { } while (0) #define pgd_populate(mm, pgd, pud) BUG() -#define pgd_populate_kernel(mm, pgd, pud) BUG() - #define pud_populate(mm, pud, pmd) BUG() -#define pud_populate_kernel(mm, pud, pmd) BUG() #else /* __s390x__ */ @@ -90,7 +83,7 @@ void crst_table_downgrade(struct mm_struct *, unsigned long limit); static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) { - unsigned long *table = crst_table_alloc(mm, mm->context.noexec); + unsigned long *table = crst_table_alloc(mm); if (table) crst_table_init(table, _REGION3_ENTRY_EMPTY); return (pud_t *) table; @@ -99,43 +92,21 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) { - unsigned long *table = crst_table_alloc(mm, mm->context.noexec); + unsigned long *table = crst_table_alloc(mm); if (table) crst_table_init(table, _SEGMENT_ENTRY_EMPTY); return (pmd_t *) table; } #define pmd_free(mm, pmd) crst_table_free(mm, (unsigned long *) pmd) -static inline void pgd_populate_kernel(struct mm_struct *mm, - pgd_t *pgd, pud_t *pud) -{ - pgd_val(*pgd) = _REGION2_ENTRY | __pa(pud); -} - static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) { - pgd_populate_kernel(mm, pgd, pud); - if (mm->context.noexec) { - pgd = get_shadow_table(pgd); - pud = get_shadow_table(pud); - pgd_populate_kernel(mm, pgd, pud); - } -} - -static inline void pud_populate_kernel(struct mm_struct *mm, - pud_t *pud, pmd_t *pmd) -{ - pud_val(*pud) = _REGION3_ENTRY | __pa(pmd); + pgd_val(*pgd) = _REGION2_ENTRY | __pa(pud); } static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) { - pud_populate_kernel(mm, pud, pmd); - if (mm->context.noexec) { - pud = get_shadow_table(pud); - pmd = get_shadow_table(pmd); - pud_populate_kernel(mm, pud, pmd); - } + pud_val(*pud) = _REGION3_ENTRY | __pa(pmd); } #endif /* __s390x__ */ @@ -143,29 +114,19 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) static inline pgd_t *pgd_alloc(struct mm_struct *mm) { spin_lock_init(&mm->context.list_lock); - INIT_LIST_HEAD(&mm->context.crst_list); INIT_LIST_HEAD(&mm->context.pgtable_list); - return (pgd_t *) - crst_table_alloc(mm, user_mode == SECONDARY_SPACE_MODE); + return (pgd_t *) crst_table_alloc(mm); } #define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd) -static inline void pmd_populate_kernel(struct mm_struct *mm, - pmd_t *pmd, pte_t *pte) -{ - pmd_val(*pmd) = _SEGMENT_ENTRY + __pa(pte); -} - static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte) { - pmd_populate_kernel(mm, pmd, pte); - if (mm->context.noexec) { - pmd = get_shadow_table(pmd); - pmd_populate_kernel(mm, pmd, pte + PTRS_PER_PTE); - } + pmd_val(*pmd) = _SEGMENT_ENTRY + __pa(pte); } +#define pmd_populate_kernel(mm, pmd, pte) pmd_populate(mm, pmd, pte) + #define pmd_pgtable(pmd) \ (pgtable_t)(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 02ace34..c4773a2 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -31,9 +31,8 @@ #ifndef __ASSEMBLY__ #include <linux/sched.h> #include <linux/mm_types.h> -#include <asm/bitops.h> #include <asm/bug.h> -#include <asm/processor.h> +#include <asm/page.h> extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096))); extern void paging_init(void); @@ -243,11 +242,13 @@ extern unsigned long VMALLOC_START; /* Software bits in the page table entry */ #define _PAGE_SWT 0x001 /* SW pte type bit t */ #define _PAGE_SWX 0x002 /* SW pte type bit x */ -#define _PAGE_SPECIAL 0x004 /* SW associated with special page */ +#define _PAGE_SWC 0x004 /* SW pte changed bit (for KVM) */ +#define _PAGE_SWR 0x008 /* SW pte referenced bit (for KVM) */ +#define _PAGE_SPECIAL 0x010 /* SW associated with special page */ #define __HAVE_ARCH_PTE_SPECIAL /* Set of bits not changed in pte_modify */ -#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL) +#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_SWC | _PAGE_SWR) /* Six different types of pages. */ #define _PAGE_TYPE_EMPTY 0x400 @@ -256,8 +257,6 @@ extern unsigned long VMALLOC_START; #define _PAGE_TYPE_FILE 0x601 /* bit 0x002 is used for offset !! */ #define _PAGE_TYPE_RO 0x200 #define _PAGE_TYPE_RW 0x000 -#define _PAGE_TYPE_EX_RO 0x202 -#define _PAGE_TYPE_EX_RW 0x002 /* * Only four types for huge pages, using the invalid bit and protection bit @@ -287,8 +286,6 @@ extern unsigned long VMALLOC_START; * _PAGE_TYPE_FILE 11?1 -> 11?1 * _PAGE_TYPE_RO 0100 -> 1100 * _PAGE_TYPE_RW 0000 -> 1000 - * _PAGE_TYPE_EX_RO 0110 -> 1110 - * _PAGE_TYPE_EX_RW 0010 -> 1010 * * pte_none is true for bits combinations 1000, 1010, 1100, 1110 * pte_present is true for bits combinations 0000, 0010, 0100, 0110, 1001 @@ -297,14 +294,17 @@ extern unsigned long VMALLOC_START; */ /* Page status table bits for virtualization */ -#define RCP_PCL_BIT 55 -#define RCP_HR_BIT 54 -#define RCP_HC_BIT 53 -#define RCP_GR_BIT 50 -#define RCP_GC_BIT 49 - -/* User dirty bit for KVM's migration feature */ -#define KVM_UD_BIT 47 +#define RCP_ACC_BITS 0xf000000000000000UL +#define RCP_FP_BIT 0x0800000000000000UL +#define RCP_PCL_BIT 0x0080000000000000UL +#define RCP_HR_BIT 0x0040000000000000UL +#define RCP_HC_BIT 0x0020000000000000UL +#define RCP_GR_BIT 0x0004000000000000UL +#define RCP_GC_BIT 0x0002000000000000UL + +/* User dirty / referenced bit for KVM's migration feature */ +#define KVM_UR_BIT 0x0000800000000000UL +#define KVM_UC_BIT 0x0000400000000000UL #ifndef __s390x__ @@ -377,85 +377,54 @@ extern unsigned long VMALLOC_START; #define _ASCE_USER_BITS (_ASCE_SPACE_SWITCH | _ASCE_PRIVATE_SPACE | \ _ASCE_ALT_EVENT) -/* Bits int the storage key */ -#define _PAGE_CHANGED 0x02 /* HW changed bit */ -#define _PAGE_REFERENCED 0x04 /* HW referenced bit */ - /* * Page protection definitions. */ #define PAGE_NONE __pgprot(_PAGE_TYPE_NONE) #define PAGE_RO __pgprot(_PAGE_TYPE_RO) #define PAGE_RW __pgprot(_PAGE_TYPE_RW) -#define PAGE_EX_RO __pgprot(_PAGE_TYPE_EX_RO) -#define PAGE_EX_RW __pgprot(_PAGE_TYPE_EX_RW) #define PAGE_KERNEL PAGE_RW #define PAGE_COPY PAGE_RO /* - * Dependent on the EXEC_PROTECT option s390 can do execute protection. - * Write permission always implies read permission. In theory with a - * primary/secondary page table execute only can be implemented but - * it would cost an additional bit in the pte to distinguish all the - * different pte types. To avoid that execute permission currently - * implies read permission as well. + * On s390 the page table entry has an invalid bit and a read-only bit. + * Read permission implies execute permission and write permission + * implies read permission. */ /*xwr*/ #define __P000 PAGE_NONE #define __P001 PAGE_RO #define __P010 PAGE_RO #define __P011 PAGE_RO -#define __P100 PAGE_EX_RO -#define __P101 PAGE_EX_RO -#define __P110 PAGE_EX_RO -#define __P111 PAGE_EX_RO +#define __P100 PAGE_RO +#define __P101 PAGE_RO +#define __P110 PAGE_RO +#define __P111 PAGE_RO #define __S000 PAGE_NONE #define __S001 PAGE_RO #define __S010 PAGE_RW #define __S011 PAGE_RW -#define __S100 PAGE_EX_RO -#define __S101 PAGE_EX_RO -#define __S110 PAGE_EX_RW -#define __S111 PAGE_EX_RW - -#ifndef __s390x__ -# define PxD_SHADOW_SHIFT 1 -#else /* __s390x__ */ -# define PxD_SHADOW_SHIFT 2 -#endif /* __s390x__ */ +#define __S100 PAGE_RO +#define __S101 PAGE_RO +#define __S110 PAGE_RW +#define __S111 PAGE_RW -static inline void *get_shadow_table(void *table) +static inline int mm_exclusive(struct mm_struct *mm) { - unsigned long addr, offset; - struct page *page; - - addr = (unsigned long) table; - offset = addr & ((PAGE_SIZE << PxD_SHADOW_SHIFT) - 1); - page = virt_to_page((void *)(addr ^ offset)); - return (void *)(addr_t)(page->index ? (page->index | offset) : 0UL); + return likely(mm == current->active_mm && + atomic_read(&mm->context.attach_count) <= 1); } -/* - * Certain architectures need to do special things when PTEs - * within a page table are directly modified. Thus, the following - * hook is made available. - */ -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t entry) +static inline int mm_has_pgste(struct mm_struct *mm) { - *ptep = entry; - if (mm->context.noexec) { - if (!(pte_val(entry) & _PAGE_INVALID) && - (pte_val(entry) & _PAGE_SWX)) - pte_val(entry) |= _PAGE_RO; - else - pte_val(entry) = _PAGE_TYPE_EMPTY; - ptep[PTRS_PER_PTE] = entry; - } +#ifdef CONFIG_PGSTE + if (unlikely(mm->context.has_pgste)) + return 1; +#endif + return 0; } - /* * pgd/pmd/pte query functions */ @@ -568,52 +537,127 @@ static inline int pte_special(pte_t pte) } #define __HAVE_ARCH_PTE_SAME -#define pte_same(a,b) (pte_val(a) == pte_val(b)) +static inline int pte_same(pte_t a, pte_t b) +{ + return pte_val(a) == pte_val(b); +} -static inline void rcp_lock(pte_t *ptep) +static inline pgste_t pgste_get_lock(pte_t *ptep) { + unsigned long new = 0; #ifdef CONFIG_PGSTE - unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); + unsigned long old; + preempt_disable(); - while (test_and_set_bit(RCP_PCL_BIT, pgste)) - ; + asm( + " lg %0,%2\n" + "0: lgr %1,%0\n" + " nihh %0,0xff7f\n" /* clear RCP_PCL_BIT in old */ + " oihh %1,0x0080\n" /* set RCP_PCL_BIT in new */ + " csg %0,%1,%2\n" + " jl 0b\n" + : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) + : "Q" (ptep[PTRS_PER_PTE]) : "cc"); #endif + return __pgste(new); } -static inline void rcp_unlock(pte_t *ptep) +static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE - unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); - clear_bit(RCP_PCL_BIT, pgste); + asm( + " nihh %1,0xff7f\n" /* clear RCP_PCL_BIT */ + " stg %1,%0\n" + : "=Q" (ptep[PTRS_PER_PTE]) + : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) : "cc"); preempt_enable(); #endif } -/* forward declaration for SetPageUptodate in page-flags.h*/ -static inline void page_clear_dirty(struct page *page, int mapped); -#include <linux/page-flags.h> - -static inline void ptep_rcp_copy(pte_t *ptep) +static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE - struct page *page = virt_to_page(pte_val(*ptep)); - unsigned int skey; - unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); - - skey = page_get_storage_key(page_to_phys(page)); - if (skey & _PAGE_CHANGED) { - set_bit_simple(RCP_GC_BIT, pgste); - set_bit_simple(KVM_UD_BIT, pgste); + unsigned long pfn, bits; + unsigned char skey; + + pfn = pte_val(*ptep) >> PAGE_SHIFT; + skey = page_get_storage_key(pfn); + bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); + /* Clear page changed & referenced bit in the storage key */ + if (bits) { + skey ^= bits; + page_set_storage_key(pfn, skey, 1); } - if (skey & _PAGE_REFERENCED) - set_bit_simple(RCP_GR_BIT, pgste); - if (test_and_clear_bit_simple(RCP_HC_BIT, pgste)) { - SetPageDirty(page); - set_bit_simple(KVM_UD_BIT, pgste); - } - if (test_and_clear_bit_simple(RCP_HR_BIT, pgste)) - SetPageReferenced(page); + /* Transfer page changed & referenced bit to guest bits in pgste */ + pgste_val(pgste) |= bits << 48; /* RCP_GR_BIT & RCP_GC_BIT */ + /* Get host changed & referenced bits from pgste */ + bits |= (pgste_val(pgste) & (RCP_HR_BIT | RCP_HC_BIT)) >> 52; + /* Clear host bits in pgste. */ + pgste_val(pgste) &= ~(RCP_HR_BIT | RCP_HC_BIT); + pgste_val(pgste) &= ~(RCP_ACC_BITS | RCP_FP_BIT); + /* Copy page access key and fetch protection bit to pgste */ + pgste_val(pgste) |= + (unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; + /* Transfer changed and referenced to kvm user bits */ + pgste_val(pgste) |= bits << 45; /* KVM_UR_BIT & KVM_UC_BIT */ + /* Transfer changed & referenced to pte sofware bits */ + pte_val(*ptep) |= bits << 1; /* _PAGE_SWR & _PAGE_SWC */ #endif + return pgste; + +} + +static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste) +{ +#ifdef CONFIG_PGSTE + int young; + + young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); + /* Transfer page referenced bit to pte software bit (host view) */ + if (young || (pgste_val(pgste) & RCP_HR_BIT)) + pte_val(*ptep) |= _PAGE_SWR; + /* Clear host referenced bit in pgste. */ + pgste_val(pgste) &= ~RCP_HR_BIT; + /* Transfer page referenced bit to guest bit in pgste */ + pgste_val(pgste) |= (unsigned long) young << 50; /* set RCP_GR_BIT */ +#endif + return pgste; + +} + +static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste) +{ +#ifdef CONFIG_PGSTE + unsigned long pfn; + unsigned long okey, nkey; + + pfn = pte_val(*ptep) >> PAGE_SHIFT; + okey = nkey = page_get_storage_key(pfn); + nkey &= ~(_PAGE_ACC_BITS | _PAGE_FP_BIT); + /* Set page access key and fetch protection bit from pgste */ + nkey |= (pgste_val(pgste) & (RCP_ACC_BITS | RCP_FP_BIT)) >> 56; + if (okey != nkey) + page_set_storage_key(pfn, nkey, 1); +#endif +} + +/* + * Certain architectures need to do special things when PTEs + * within a page table are directly modified. Thus, the following + * hook is made available. + */ +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t entry) +{ + pgste_t pgste; + + if (mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste_set_pte(ptep, pgste); + *ptep = entry; + pgste_set_unlock(ptep, pgste); + } else + *ptep = entry; } /* @@ -627,19 +671,19 @@ static inline int pte_write(pte_t pte) static inline int pte_dirty(pte_t pte) { - /* A pte is neither clean nor dirty on s/390. The dirty bit - * is in the storage key. See page_test_and_clear_dirty for - * details. - */ +#ifdef CONFIG_PGSTE + if (pte_val(pte) & _PAGE_SWC) + return 1; +#endif return 0; } static inline int pte_young(pte_t pte) { - /* A pte is neither young nor old on s/390. The young bit - * is in the storage key. See page_test_and_clear_young for - * details. - */ +#ifdef CONFIG_PGSTE + if (pte_val(pte) & _PAGE_SWR) + return 1; +#endif return 0; } @@ -647,64 +691,30 @@ static inline int pte_young(pte_t pte) * pgd/pmd/pte modification functions */ -#ifndef __s390x__ - -#define pgd_clear(pgd) do { } while (0) -#define pud_clear(pud) do { } while (0) - -#else /* __s390x__ */ - -static inline void pgd_clear_kernel(pgd_t * pgd) +static inline void pgd_clear(pgd_t *pgd) { +#ifdef __s390x__ if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) pgd_val(*pgd) = _REGION2_ENTRY_EMPTY; +#endif } -static inline void pgd_clear(pgd_t * pgd) -{ - pgd_t *shadow = get_shadow_table(pgd); - - pgd_clear_kernel(pgd); - if (shadow) - pgd_clear_kernel(shadow); -} - -static inline void pud_clear_kernel(pud_t *pud) +static inline void pud_clear(pud_t *pud) { +#ifdef __s390x__ if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) pud_val(*pud) = _REGION3_ENTRY_EMPTY; +#endif } -static inline void pud_clear(pud_t *pud) -{ - pud_t *shadow = get_shadow_table(pud); - - pud_clear_kernel(pud); - if (shadow) - pud_clear_kernel(shadow); -} - -#endif /* __s390x__ */ - -static inline void pmd_clear_kernel(pmd_t * pmdp) +static inline void pmd_clear(pmd_t *pmdp) { pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; } -static inline void pmd_clear(pmd_t *pmd) -{ - pmd_t *shadow = get_shadow_table(pmd); - - pmd_clear_kernel(pmd); - if (shadow) - pmd_clear_kernel(shadow); -} - static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_val(*ptep) = _PAGE_TYPE_EMPTY; - if (mm->context.noexec) - pte_val(ptep[PTRS_PER_PTE]) = _PAGE_TYPE_EMPTY; } /* @@ -734,35 +744,27 @@ static inline pte_t pte_mkwrite(pte_t pte) static inline pte_t pte_mkclean(pte_t pte) { - /* The only user of pte_mkclean is the fork() code. - We must *not* clear the *physical* page dirty bit - just because fork() wants to clear the dirty bit in - *one* of the page's mappings. So we just do nothing. */ +#ifdef CONFIG_PGSTE + pte_val(pte) &= ~_PAGE_SWC; +#endif return pte; } static inline pte_t pte_mkdirty(pte_t pte) { - /* We do not explicitly set the dirty bit because the - * sske instruction is slow. It is faster to let the - * next instruction set the dirty bit. - */ return pte; } static inline pte_t pte_mkold(pte_t pte) { - /* S/390 doesn't keep its dirty/referenced bit in the pte. - * There is no point in clearing the real referenced bit. - */ +#ifdef CONFIG_PGSTE + pte_val(pte) &= ~_PAGE_SWR; +#endif return pte; } static inline pte_t pte_mkyoung(pte_t pte) { - /* S/390 doesn't keep its dirty/referenced bit in the pte. - * There is no point in setting the real referenced bit. - */ return pte; } @@ -800,62 +802,60 @@ static inline pte_t pte_mkhuge(pte_t pte) } #endif -#ifdef CONFIG_PGSTE /* - * Get (and clear) the user dirty bit for a PTE. + * Get (and clear) the user dirty bit for a pte. */ -static inline int kvm_s390_test_and_clear_page_dirty(struct mm_struct *mm, - pte_t *ptep) +static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm, + pte_t *ptep) { - int dirty; - unsigned long *pgste; - struct page *page; - unsigned int skey; - - if (!mm->context.has_pgste) - return -EINVAL; - rcp_lock(ptep); - pgste = (unsigned long *) (ptep + PTRS_PER_PTE); - page = virt_to_page(pte_val(*ptep)); - skey = page_get_storage_key(page_to_phys(page)); - if (skey & _PAGE_CHANGED) { - set_bit_simple(RCP_GC_BIT, pgste); - set_bit_simple(KVM_UD_BIT, pgste); + pgste_t pgste; + int dirty = 0; + + if (mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste = pgste_update_all(ptep, pgste); + dirty = !!(pgste_val(pgste) & KVM_UC_BIT); + pgste_val(pgste) &= ~KVM_UC_BIT; + pgste_set_unlock(ptep, pgste); + return dirty; } - if (test_and_clear_bit_simple(RCP_HC_BIT, pgste)) { - SetPageDirty(page); - set_bit_simple(KVM_UD_BIT, pgste); - } - dirty = test_and_clear_bit_simple(KVM_UD_BIT, pgste); - if (skey & _PAGE_CHANGED) - page_clear_dirty(page, 1); - rcp_unlock(ptep); return dirty; } -#endif + +/* + * Get (and clear) the user referenced bit for a pte. + */ +static inline int ptep_test_and_clear_user_young(struct mm_struct *mm, + pte_t *ptep) +{ + pgste_t pgste; + int young = 0; + + if (mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste = pgste_update_young(ptep, pgste); + young = !!(pgste_val(pgste) & KVM_UR_BIT); + pgste_val(pgste) &= ~KVM_UR_BIT; + pgste_set_unlock(ptep, pgste); + } + return young; +} #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { -#ifdef CONFIG_PGSTE - unsigned long physpage; - int young; - unsigned long *pgste; + pgste_t pgste; + pte_t pte; - if (!vma->vm_mm->context.has_pgste) - return 0; - physpage = pte_val(*ptep) & PAGE_MASK; - pgste = (unsigned long *) (ptep + PTRS_PER_PTE); - - young = ((page_get_storage_key(physpage) & _PAGE_REFERENCED) != 0); - rcp_lock(ptep); - if (young) - set_bit_simple(RCP_GR_BIT, pgste); - young |= test_and_clear_bit_simple(RCP_HR_BIT, pgste); - rcp_unlock(ptep); - return young; -#endif + if (mm_has_pgste(vma->vm_mm)) { + pgste = pgste_get_lock(ptep); + pgste = pgste_update_young(ptep, pgste); + pte = *ptep; + *ptep = pte_mkold(pte); + pgste_set_unlock(ptep, pgste); + return pte_young(pte); + } return 0; } @@ -867,10 +867,7 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, * On s390 reference bits are in storage key and never in TLB * With virtualization we handle the reference bit, without we * we can simply return */ -#ifdef CONFIG_PGSTE return ptep_test_and_clear_young(vma, address, ptep); -#endif - return 0; } static inline void __ptep_ipte(unsigned long address, pte_t *ptep) @@ -890,25 +887,6 @@ static inline void __ptep_ipte(unsigned long address, pte_t *ptep) } } -static inline void ptep_invalidate(struct mm_struct *mm, - unsigned long address, pte_t *ptep) -{ - if (mm->context.has_pgste) { - rcp_lock(ptep); - __ptep_ipte(address, ptep); - ptep_rcp_copy(ptep); - pte_val(*ptep) = _PAGE_TYPE_EMPTY; - rcp_unlock(ptep); - return; - } - __ptep_ipte(address, ptep); - pte_val(*ptep) = _PAGE_TYPE_EMPTY; - if (mm->context.noexec) { - __ptep_ipte(address, ptep + PTRS_PER_PTE); - pte_val(*(ptep + PTRS_PER_PTE)) = _PAGE_TYPE_EMPTY; - } -} - /* * This is hard to understand. ptep_get_and_clear and ptep_clear_flush * both clear the TLB for the unmapped pte. The reason is that @@ -923,24 +901,72 @@ static inline void ptep_invalidate(struct mm_struct *mm, * is a nop. */ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR -#define ptep_get_and_clear(__mm, __address, __ptep) \ -({ \ - pte_t __pte = *(__ptep); \ - (__mm)->context.flush_mm = 1; \ - if (atomic_read(&(__mm)->context.attach_count) > 1 || \ - (__mm) != current->active_mm) \ - ptep_invalidate(__mm, __address, __ptep); \ - else \ - pte_clear((__mm), (__address), (__ptep)); \ - __pte; \ -}) +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, + unsigned long address, pte_t *ptep) +{ + pgste_t pgste; + pte_t pte; + + mm->context.flush_mm = 1; + if (mm_has_pgste(mm)) + pgste = pgste_get_lock(ptep); + + pte = *ptep; + if (!mm_exclusive(mm)) + __ptep_ipte(address, ptep); + pte_val(*ptep) = _PAGE_TYPE_EMPTY; + + if (mm_has_pgste(mm)) { + pgste = pgste_update_all(&pte, pgste); + pgste_set_unlock(ptep, pgste); + } + return pte; +} + +#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION +static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, + unsigned long address, + pte_t *ptep) +{ + pte_t pte; + + mm->context.flush_mm = 1; + if (mm_has_pgste(mm)) + pgste_get_lock(ptep); + + pte = *ptep; + if (!mm_exclusive(mm)) + __ptep_ipte(address, ptep); + return pte; +} + +static inline void ptep_modify_prot_commit(struct mm_struct *mm, + unsigned long address, + pte_t *ptep, pte_t pte) +{ + *ptep = pte; + if (mm_has_pgste(mm)) + pgste_set_unlock(ptep, *(pgste_t *)(ptep + PTRS_PER_PTE)); +} #define __HAVE_ARCH_PTEP_CLEAR_FLUSH static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { - pte_t pte = *ptep; - ptep_invalidate(vma->vm_mm, address, ptep); + pgste_t pgste; + pte_t pte; + + if (mm_has_pgste(vma->vm_mm)) + pgste = pgste_get_lock(ptep); + + pte = *ptep; + __ptep_ipte(address, ptep); + pte_val(*ptep) = _PAGE_TYPE_EMPTY; + + if (mm_has_pgste(vma->vm_mm)) { + pgste = pgste_update_all(&pte, pgste); + pgste_set_unlock(ptep, pgste); + } return pte; } @@ -953,76 +979,67 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, */ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, - unsigned long addr, + unsigned long address, pte_t *ptep, int full) { - pte_t pte = *ptep; + pgste_t pgste; + pte_t pte; + + if (mm_has_pgste(mm)) + pgste = pgste_get_lock(ptep); + + pte = *ptep; + if (!full) + __ptep_ipte(address, ptep); + pte_val(*ptep) = _PAGE_TYPE_EMPTY; - if (full) - pte_clear(mm, addr, ptep); - else - ptep_invalidate(mm, addr, ptep); + if (mm_has_pgste(mm)) { + pgste = pgste_update_all(&pte, pgste); + pgste_set_unlock(ptep, pgste); + } return pte; } #define __HAVE_ARCH_PTEP_SET_WRPROTECT -#define ptep_set_wrprotect(__mm, __addr, __ptep) \ -({ \ - pte_t __pte = *(__ptep); \ - if (pte_write(__pte)) { \ - (__mm)->context.flush_mm = 1; \ - if (atomic_read(&(__mm)->context.attach_count) > 1 || \ - (__mm) != current->active_mm) \ - ptep_invalidate(__mm, __addr, __ptep); \ - set_pte_at(__mm, __addr, __ptep, pte_wrprotect(__pte)); \ - } \ -}) +static inline pte_t ptep_set_wrprotect(struct mm_struct *mm, + unsigned long address, pte_t *ptep) +{ + pgste_t pgste; + pte_t pte = *ptep; -#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS -#define ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \ -({ \ - int __changed = !pte_same(*(__ptep), __entry); \ - if (__changed) { \ - ptep_invalidate((__vma)->vm_mm, __addr, __ptep); \ - set_pte_at((__vma)->vm_mm, __addr, __ptep, __entry); \ - } \ - __changed; \ -}) + if (pte_write(pte)) { + mm->context.flush_mm = 1; + if (mm_has_pgste(mm)) + pgste = pgste_get_lock(ptep); -/* - * Test and clear dirty bit in storage key. - * We can't clear the changed bit atomically. This is a potential - * race against modification of the referenced bit. This function - * should therefore only be called if it is not mapped in any - * address space. - */ -#define __HAVE_ARCH_PAGE_TEST_DIRTY -static inline int page_test_dirty(struct page *page) -{ - return (page_get_storage_key(page_to_phys(page)) & _PAGE_CHANGED) != 0; -} + if (!mm_exclusive(mm)) + __ptep_ipte(address, ptep); + *ptep = pte_wrprotect(pte); -#define __HAVE_ARCH_PAGE_CLEAR_DIRTY -static inline void page_clear_dirty(struct page *page, int mapped) -{ - page_set_storage_key(page_to_phys(page), PAGE_DEFAULT_KEY, mapped); + if (mm_has_pgste(mm)) + pgste_set_unlock(ptep, pgste); + } + return pte; } -/* - * Test and clear referenced bit in storage key. - */ -#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG -static inline int page_test_and_clear_young(struct page *page) +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS +static inline int ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep, + pte_t entry, int dirty) { - unsigned long physpage = page_to_phys(page); - int ccode; - - asm volatile( - " rrbe 0,%1\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (ccode) : "a" (physpage) : "cc" ); - return ccode & 2; + pgste_t pgste; + + if (pte_same(*ptep, entry)) + return 0; + if (mm_has_pgste(vma->vm_mm)) + pgste = pgste_get_lock(ptep); + + __ptep_ipte(address, ptep); + *ptep = entry; + + if (mm_has_pgste(vma->vm_mm)) + pgste_set_unlock(ptep, pgste); + return 1; } /* diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 2c79b64..1300c30 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -84,6 +84,7 @@ struct thread_struct { struct per_event per_event; /* Cause of the last PER trap */ /* pfault_wait is used to block the process on a pfault event */ unsigned long pfault_wait; + struct list_head list; }; typedef struct thread_struct thread_struct; diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 29d5d6d..b7a4f2e 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -50,7 +50,7 @@ static inline void __tlb_flush_full(struct mm_struct *mm) /* * If the process only ran on the local cpu, do a local flush. */ - local_cpumask = cpumask_of_cpu(smp_processor_id()); + cpumask_copy(&local_cpumask, cpumask_of(smp_processor_id())); if (cpumask_equal(mm_cpumask(mm), &local_cpumask)) __tlb_flush_local(); else @@ -80,16 +80,11 @@ static inline void __tlb_flush_mm(struct mm_struct * mm) * on all cpus instead of doing a local flush if the mm * only ran on the local cpu. */ - if (MACHINE_HAS_IDTE) { - if (mm->context.noexec) - __tlb_flush_idte((unsigned long) - get_shadow_table(mm->pgd) | - mm->context.asce_bits); + if (MACHINE_HAS_IDTE) __tlb_flush_idte((unsigned long) mm->pgd | mm->context.asce_bits); - return; - } - __tlb_flush_full(mm); + else + __tlb_flush_full(mm); } static inline void __tlb_flush_mm_cond(struct mm_struct * mm) diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index e821525..9208e69 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -385,6 +385,7 @@ /* Ignore system calls that are also reachable via sys_socket */ #define __IGNORE_recvmmsg +#define __IGNORE_sendmmsg #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index fe03c14..edfbd17 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -124,13 +124,11 @@ int main(void) DEFINE(__LC_LAST_UPDATE_TIMER, offsetof(struct _lowcore, last_update_timer)); DEFINE(__LC_LAST_UPDATE_CLOCK, offsetof(struct _lowcore, last_update_clock)); DEFINE(__LC_CURRENT, offsetof(struct _lowcore, current_task)); + DEFINE(__LC_CURRENT_PID, offsetof(struct _lowcore, current_pid)); DEFINE(__LC_THREAD_INFO, offsetof(struct _lowcore, thread_info)); DEFINE(__LC_KERNEL_STACK, offsetof(struct _lowcore, kernel_stack)); DEFINE(__LC_ASYNC_STACK, offsetof(struct _lowcore, async_stack)); DEFINE(__LC_PANIC_STACK, offsetof(struct _lowcore, panic_stack)); - DEFINE(__LC_KERNEL_ASCE, offsetof(struct _lowcore, kernel_asce)); - DEFINE(__LC_USER_ASCE, offsetof(struct _lowcore, user_asce)); - DEFINE(__LC_USER_EXEC_ASCE, offsetof(struct _lowcore, user_exec_asce)); DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock)); DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock)); DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags)); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 1b67fc6..0476174 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -212,6 +212,7 @@ __switch_to: lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 lm %r6,%r15,__SF_GPRS(%r15) # load gprs of next task st %r3,__LC_CURRENT # store task struct of next + mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next st %r5,__LC_THREAD_INFO # store thread info of next ahi %r5,STACK_SIZE # end of kernel stack of next st %r5,__LC_KERNEL_STACK # store end of kernel stack diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 9fd8645..d61967e 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -220,6 +220,7 @@ __switch_to: lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task stg %r3,__LC_CURRENT # store task struct of next + mvc __LC_CURRENT_PID+4(4,%r0),__TASK_pid(%r3) # store pid of next stg %r5,__LC_THREAD_INFO # store thread info of next aghi %r5,STACK_SIZE # end of kernel stack of next stg %r5,__LC_KERNEL_STACK # store end of kernel stack diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index ea5099c..e204f95 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -32,6 +32,7 @@ static const struct irq_class intrclass_names[] = { {.name = "VRT", .desc = "[EXT] Virtio" }, {.name = "SCP", .desc = "[EXT] Service Call" }, {.name = "IUC", .desc = "[EXT] IUCV" }, + {.name = "CPM", .desc = "[EXT] CPU Measurement" }, {.name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt" }, {.name = "QDI", .desc = "[I/O] QDIO Interrupt" }, {.name = "DAS", .desc = "[I/O] DASD" }, diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index a895e69..541a750 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -9,41 +9,26 @@ #include <linux/compiler.h> #include <linux/cpu.h> -#include <linux/errno.h> #include <linux/sched.h> #include <linux/kernel.h> #include <linux/mm.h> -#include <linux/fs.h> #include <linux/smp.h> -#include <linux/stddef.h> #include <linux/slab.h> -#include <linux/unistd.h> -#include <linux/ptrace.h> -#include <linux/vmalloc.h> -#include <linux/user.h> #include <linux/interrupt.h> -#include <linux/delay.h> -#include <linux/reboot.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/notifier.h> #include <linux/tick.h> -#include <linux/elfcore.h> -#include <linux/kernel_stat.h> #include <linux/personality.h> #include <linux/syscalls.h> #include <linux/compat.h> #include <linux/kprobes.h> #include <linux/random.h> -#include <asm/compat.h> -#include <asm/uaccess.h> -#include <asm/pgtable.h> +#include <linux/module.h> #include <asm/system.h> #include <asm/io.h> #include <asm/processor.h> #include <asm/irq.h> #include <asm/timer.h> #include <asm/nmi.h> +#include <asm/compat.h> #include <asm/smp.h> #include "entry.h" diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index f5434d1..0c35dee 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -305,8 +305,7 @@ static int set_amode_and_uaccess(unsigned long user_amode, */ static int __init early_parse_switch_amode(char *p) { - if (user_mode != SECONDARY_SPACE_MODE) - user_mode = PRIMARY_SPACE_MODE; + user_mode = PRIMARY_SPACE_MODE; return 0; } early_param("switch_amode", early_parse_switch_amode); @@ -315,10 +314,6 @@ static int __init early_parse_user_mode(char *p) { if (p && strcmp(p, "primary") == 0) user_mode = PRIMARY_SPACE_MODE; -#ifdef CONFIG_S390_EXEC_PROTECT - else if (p && strcmp(p, "secondary") == 0) - user_mode = SECONDARY_SPACE_MODE; -#endif else if (!p || strcmp(p, "home") == 0) user_mode = HOME_SPACE_MODE; else @@ -327,31 +322,9 @@ static int __init early_parse_user_mode(char *p) } early_param("user_mode", early_parse_user_mode); -#ifdef CONFIG_S390_EXEC_PROTECT -/* - * Enable execute protection? - */ -static int __init early_parse_noexec(char *p) -{ - if (!strncmp(p, "off", 3)) - return 0; - user_mode = SECONDARY_SPACE_MODE; - return 0; -} -early_param("noexec", early_parse_noexec); -#endif /* CONFIG_S390_EXEC_PROTECT */ - static void setup_addressing_mode(void) { - if (user_mode == SECONDARY_SPACE_MODE) { - if (set_amode_and_uaccess(PSW_ASC_SECONDARY, - PSW32_ASC_SECONDARY)) - pr_info("Execute protection active, " - "mvcos available\n"); - else - pr_info("Execute protection active, " - "mvcos not available\n"); - } else if (user_mode == PRIMARY_SPACE_MODE) { + if (user_mode == PRIMARY_SPACE_MODE) { if (set_amode_and_uaccess(PSW_ASC_PRIMARY, PSW32_ASC_PRIMARY)) pr_info("Address spaces switched, " "mvcos available\n"); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 63c7d9f..f8e85ec 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -335,7 +335,7 @@ static int smp_rescan_cpus_sigp(cpumask_t avail) smp_cpu_polarization[logical_cpu] = POLARIZATION_UNKNWN; if (!cpu_stopped(logical_cpu)) continue; - cpu_set(logical_cpu, cpu_present_map); + set_cpu_present(logical_cpu, true); smp_cpu_state[logical_cpu] = CPU_STATE_CONFIGURED; logical_cpu = cpumask_next(logical_cpu, &avail); if (logical_cpu >= nr_cpu_ids) @@ -367,7 +367,7 @@ static int smp_rescan_cpus_sclp(cpumask_t avail) continue; __cpu_logical_map[logical_cpu] = cpu_id; smp_cpu_polarization[logical_cpu] = POLARIZATION_UNKNWN; - cpu_set(logical_cpu, cpu_present_map); + set_cpu_present(logical_cpu, true); if (cpu >= info->configured) smp_cpu_state[logical_cpu] = CPU_STATE_STANDBY; else @@ -385,7 +385,7 @@ static int __smp_rescan_cpus(void) { cpumask_t avail; - cpus_xor(avail, cpu_possible_map, cpu_present_map); + cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); if (smp_use_sigp_detection) return smp_rescan_cpus_sigp(avail); else @@ -467,7 +467,7 @@ int __cpuinit start_secondary(void *cpuvoid) notify_cpu_starting(smp_processor_id()); /* Mark this cpu as online */ ipi_call_lock(); - cpu_set(smp_processor_id(), cpu_online_map); + set_cpu_online(smp_processor_id(), true); ipi_call_unlock(); /* Switch on interrupts */ local_irq_enable(); @@ -644,7 +644,7 @@ int __cpu_disable(void) struct ec_creg_mask_parms cr_parms; int cpu = smp_processor_id(); - cpu_clear(cpu, cpu_online_map); + set_cpu_online(cpu, false); /* Disable pfault pseudo page faults on this cpu. */ pfault_fini(); @@ -654,8 +654,8 @@ int __cpu_disable(void) /* disable all external interrupts */ cr_parms.orvals[0] = 0; - cr_parms.andvals[0] = ~(1 << 15 | 1 << 14 | 1 << 13 | 1 << 12 | - 1 << 11 | 1 << 10 | 1 << 6 | 1 << 4); + cr_parms.andvals[0] = ~(1 << 15 | 1 << 14 | 1 << 13 | 1 << 11 | + 1 << 10 | 1 << 9 | 1 << 6 | 1 << 4); /* disable all I/O interrupts */ cr_parms.orvals[6] = 0; cr_parms.andvals[6] = ~(1 << 31 | 1 << 30 | 1 << 29 | 1 << 28 | @@ -681,7 +681,7 @@ void __cpu_die(unsigned int cpu) atomic_dec(&init_mm.context.attach_count); } -void cpu_die(void) +void __noreturn cpu_die(void) { idle_task_exit(); while (sigp(smp_processor_id(), sigp_stop) == sigp_busy) @@ -738,8 +738,8 @@ void __init smp_prepare_boot_cpu(void) BUG_ON(smp_processor_id() != 0); current_thread_info()->cpu = 0; - cpu_set(0, cpu_present_map); - cpu_set(0, cpu_online_map); + set_cpu_present(0, true); + set_cpu_online(0, true); S390_lowcore.percpu_offset = __per_cpu_offset[0]; current_set[0] = current; smp_cpu_state[0] = CPU_STATE_CONFIGURED; @@ -1016,21 +1016,21 @@ int __ref smp_rescan_cpus(void) get_online_cpus(); mutex_lock(&smp_cpu_state_mutex); - newcpus = cpu_present_map; + cpumask_copy(&newcpus, cpu_present_mask); rc = __smp_rescan_cpus(); if (rc) goto out; - cpus_andnot(newcpus, cpu_present_map, newcpus); - for_each_cpu_mask(cpu, newcpus) { + cpumask_andnot(&newcpus, cpu_present_mask, &newcpus); + for_each_cpu(cpu, &newcpus) { rc = smp_add_present_cpu(cpu); if (rc) - cpu_clear(cpu, cpu_present_map); + set_cpu_present(cpu, false); } rc = 0; out: mutex_unlock(&smp_cpu_state_mutex); put_online_cpus(); - if (!cpus_empty(newcpus)) + if (!cpumask_empty(&newcpus)) topology_schedule_update(); return rc; } diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 87be655..a59557f 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -810,7 +810,7 @@ static int etr_sync_clock_stop(struct etr_aib *aib, int port) etr_sync.etr_port = port; get_online_cpus(); atomic_set(&etr_sync.cpus, num_online_cpus() - 1); - rc = stop_machine(etr_sync_clock, &etr_sync, &cpu_online_map); + rc = stop_machine(etr_sync_clock, &etr_sync, cpu_online_mask); put_online_cpus(); return rc; } @@ -1579,7 +1579,7 @@ static void stp_work_fn(struct work_struct *work) memset(&stp_sync, 0, sizeof(stp_sync)); get_online_cpus(); atomic_set(&stp_sync.cpus, num_online_cpus() - 1); - stop_machine(stp_sync_clock, &stp_sync, &cpu_online_map); + stop_machine(stp_sync_clock, &stp_sync, cpu_online_mask); put_online_cpus(); if (!check_sync_clock()) diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 94b06c3..2eafb8c 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -52,20 +52,20 @@ static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) { cpumask_t mask; - cpus_clear(mask); + cpumask_clear(&mask); if (!topology_enabled || !MACHINE_HAS_TOPOLOGY) { cpumask_copy(&mask, cpumask_of(cpu)); return mask; } while (info) { - if (cpu_isset(cpu, info->mask)) { + if (cpumask_test_cpu(cpu, &info->mask)) { mask = info->mask; break; } info = info->next; } - if (cpus_empty(mask)) - mask = cpumask_of_cpu(cpu); + if (cpumask_empty(&mask)) + cpumask_copy(&mask, cpumask_of(cpu)); return mask; } @@ -85,10 +85,10 @@ static void add_cpus_to_mask(struct topology_cpu *tl_cpu, if (cpu_logical_map(lcpu) != rcpu) continue; #ifdef CONFIG_SCHED_BOOK - cpu_set(lcpu, book->mask); + cpumask_set_cpu(lcpu, &book->mask); cpu_book_id[lcpu] = book->id; #endif - cpu_set(lcpu, core->mask); + cpumask_set_cpu(lcpu, &core->mask); cpu_core_id[lcpu] = core->id; smp_cpu_polarization[lcpu] = tl_cpu->pp; } @@ -101,13 +101,13 @@ static void clear_masks(void) info = &core_info; while (info) { - cpus_clear(info->mask); + cpumask_clear(&info->mask); info = info->next; } #ifdef CONFIG_SCHED_BOOK info = &book_info; while (info) { - cpus_clear(info->mask); + cpumask_clear(&info->mask); info = info->next; } #endif diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index d13e875..8ad2b34 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -22,6 +22,9 @@ obj-y += vdso32_wrapper.o extra-y += vdso32.lds CPPFLAGS_vdso32.lds += -P -C -U$(ARCH) +# Disable gcov profiling for VDSO code +GCOV_PROFILE := n + # Force dependency (incbin is bad) $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index 449352d..2a8ddfd 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -22,6 +22,9 @@ obj-y += vdso64_wrapper.o extra-y += vdso64.lds CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) +# Disable gcov profiling for VDSO code +GCOV_PROFILE := n + # Force dependency (incbin is bad) $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 3cc95dd..075ddad 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -412,6 +412,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long struct dcss_segment *seg; int rc, diag_cc; + start_addr = end_addr = 0; seg = kmalloc(sizeof(*seg), GFP_KERNEL | GFP_DMA); if (seg == NULL) { rc = -ENOMEM; @@ -573,6 +574,7 @@ segment_modify_shared (char *name, int do_nonshared) unsigned long start_addr, end_addr, dummy; int rc, diag_cc; + start_addr = end_addr = 0; mutex_lock(&dcss_lock); seg = segment_by_name (name); if (seg == NULL) { @@ -681,8 +683,6 @@ void segment_save(char *name) { struct dcss_segment *seg; - int startpfn = 0; - int endpfn = 0; char cmd1[160]; char cmd2[80]; int i, response; @@ -698,8 +698,6 @@ segment_save(char *name) goto out; } - startpfn = seg->start_addr >> PAGE_SHIFT; - endpfn = (seg->end) >> PAGE_SHIFT; sprintf(cmd1, "DEFSEG %s", name); for (i=0; i<seg->segcnt; i++) { sprintf(cmd1+strlen(cmd1), " %lX-%lX %s", diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index ab98813..a0f9e73 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -225,33 +225,6 @@ static noinline void do_sigbus(struct pt_regs *regs, long int_code, force_sig_info(SIGBUS, &si, tsk); } -#ifdef CONFIG_S390_EXEC_PROTECT -static noinline int signal_return(struct pt_regs *regs, long int_code, - unsigned long trans_exc_code) -{ - u16 instruction; - int rc; - - rc = __get_user(instruction, (u16 __user *) regs->psw.addr); - - if (!rc && instruction == 0x0a77) { - clear_tsk_thread_flag(current, TIF_PER_TRAP); - if (is_compat_task()) - sys32_sigreturn(); - else - sys_sigreturn(); - } else if (!rc && instruction == 0x0aad) { - clear_tsk_thread_flag(current, TIF_PER_TRAP); - if (is_compat_task()) - sys32_rt_sigreturn(); - else - sys_rt_sigreturn(); - } else - do_sigsegv(regs, int_code, SEGV_MAPERR, trans_exc_code); - return 0; -} -#endif /* CONFIG_S390_EXEC_PROTECT */ - static noinline void do_fault_error(struct pt_regs *regs, long int_code, unsigned long trans_exc_code, int fault) { @@ -259,13 +232,6 @@ static noinline void do_fault_error(struct pt_regs *regs, long int_code, switch (fault) { case VM_FAULT_BADACCESS: -#ifdef CONFIG_S390_EXEC_PROTECT - if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_SECONDARY && - (trans_exc_code & 3) == 0) { - signal_return(regs, int_code, trans_exc_code); - break; - } -#endif /* CONFIG_S390_EXEC_PROTECT */ case VM_FAULT_BADMAP: /* Bad memory access. Check if it is kernel or user space. */ if (regs->psw.mask & PSW_MASK_PSTATE) { @@ -414,11 +380,6 @@ void __kprobes do_dat_exception(struct pt_regs *regs, long pgm_int_code, int access, fault; access = VM_READ | VM_EXEC | VM_WRITE; -#ifdef CONFIG_S390_EXEC_PROTECT - if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_SECONDARY && - (trans_exc_code & 3) == 0) - access = VM_EXEC; -#endif fault = do_exception(regs, access, trans_exc_code); if (unlikely(fault)) do_fault_error(regs, pgm_int_code & 255, trans_exc_code, fault); @@ -491,22 +452,28 @@ static int __init nopfault(char *str) __setup("nopfault", nopfault); -typedef struct { - __u16 refdiagc; - __u16 reffcode; - __u16 refdwlen; - __u16 refversn; - __u64 refgaddr; - __u64 refselmk; - __u64 refcmpmk; - __u64 reserved; -} __attribute__ ((packed, aligned(8))) pfault_refbk_t; +struct pfault_refbk { + u16 refdiagc; + u16 reffcode; + u16 refdwlen; + u16 refversn; + u64 refgaddr; + u64 refselmk; + u64 refcmpmk; + u64 reserved; +} __attribute__ ((packed, aligned(8))); int pfault_init(void) { - pfault_refbk_t refbk = - { 0x258, 0, 5, 2, __LC_CURRENT, 1ULL << 48, 1ULL << 48, - __PF_RES_FIELD }; + struct pfault_refbk refbk = { + .refdiagc = 0x258, + .reffcode = 0, + .refdwlen = 5, + .refversn = 2, + .refgaddr = __LC_CURRENT_PID, + .refselmk = 1ULL << 48, + .refcmpmk = 1ULL << 48, + .reserved = __PF_RES_FIELD }; int rc; if (!MACHINE_IS_VM || pfault_disable) @@ -524,8 +491,12 @@ int pfault_init(void) void pfault_fini(void) { - pfault_refbk_t refbk = - { 0x258, 1, 5, 2, 0ULL, 0ULL, 0ULL, 0ULL }; + struct pfault_refbk refbk = { + .refdiagc = 0x258, + .reffcode = 1, + .refdwlen = 5, + .refversn = 2, + }; if (!MACHINE_IS_VM || pfault_disable) return; @@ -537,11 +508,15 @@ void pfault_fini(void) : : "a" (&refbk), "m" (refbk) : "cc"); } +static DEFINE_SPINLOCK(pfault_lock); +static LIST_HEAD(pfault_list); + static void pfault_interrupt(unsigned int ext_int_code, unsigned int param32, unsigned long param64) { struct task_struct *tsk; __u16 subcode; + pid_t pid; /* * Get the external interruption subcode & pfault @@ -553,44 +528,79 @@ static void pfault_interrupt(unsigned int ext_int_code, if ((subcode & 0xff00) != __SUBCODE_MASK) return; kstat_cpu(smp_processor_id()).irqs[EXTINT_PFL]++; - - /* - * Get the token (= address of the task structure of the affected task). - */ -#ifdef CONFIG_64BIT - tsk = (struct task_struct *) param64; -#else - tsk = (struct task_struct *) param32; -#endif - + if (subcode & 0x0080) { + /* Get the token (= pid of the affected task). */ + pid = sizeof(void *) == 4 ? param32 : param64; + rcu_read_lock(); + tsk = find_task_by_pid_ns(pid, &init_pid_ns); + if (tsk) + get_task_struct(tsk); + rcu_read_unlock(); + if (!tsk) + return; + } else { + tsk = current; + } + spin_lock(&pfault_lock); if (subcode & 0x0080) { /* signal bit is set -> a page has been swapped in by VM */ - if (xchg(&tsk->thread.pfault_wait, -1) != 0) { + if (tsk->thread.pfault_wait == 1) { /* Initial interrupt was faster than the completion * interrupt. pfault_wait is valid. Set pfault_wait * back to zero and wake up the process. This can * safely be done because the task is still sleeping * and can't produce new pfaults. */ tsk->thread.pfault_wait = 0; + list_del(&tsk->thread.list); wake_up_process(tsk); - put_task_struct(tsk); + } else { + /* Completion interrupt was faster than initial + * interrupt. Set pfault_wait to -1 so the initial + * interrupt doesn't put the task to sleep. */ + tsk->thread.pfault_wait = -1; } + put_task_struct(tsk); } else { /* signal bit not set -> a real page is missing. */ - get_task_struct(tsk); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - if (xchg(&tsk->thread.pfault_wait, 1) != 0) { + if (tsk->thread.pfault_wait == -1) { /* Completion interrupt was faster than the initial - * interrupt (swapped in a -1 for pfault_wait). Set - * pfault_wait back to zero and exit. This can be - * done safely because tsk is running in kernel - * mode and can't produce new pfaults. */ + * interrupt (pfault_wait == -1). Set pfault_wait + * back to zero and exit. */ tsk->thread.pfault_wait = 0; - set_task_state(tsk, TASK_RUNNING); - put_task_struct(tsk); - } else + } else { + /* Initial interrupt arrived before completion + * interrupt. Let the task sleep. */ + tsk->thread.pfault_wait = 1; + list_add(&tsk->thread.list, &pfault_list); + set_task_state(tsk, TASK_UNINTERRUPTIBLE); set_tsk_need_resched(tsk); + } + } + spin_unlock(&pfault_lock); +} + +static int __cpuinit pfault_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + struct thread_struct *thread, *next; + struct task_struct *tsk; + + switch (action) { + case CPU_DEAD: + case CPU_DEAD_FROZEN: + spin_lock_irq(&pfault_lock); + list_for_each_entry_safe(thread, next, &pfault_list, list) { + thread->pfault_wait = 0; + list_del(&thread->list); + tsk = container_of(thread, struct task_struct, thread); + wake_up_process(tsk); + } + spin_unlock_irq(&pfault_lock); + break; + default: + break; } + return NOTIFY_OK; } static int __init pfault_irq_init(void) @@ -599,22 +609,21 @@ static int __init pfault_irq_init(void) if (!MACHINE_IS_VM) return 0; - /* - * Try to get pfault pseudo page faults going. - */ rc = register_external_interrupt(0x2603, pfault_interrupt); - if (rc) { - pfault_disable = 1; - return rc; - } - if (pfault_init() == 0) - return 0; + if (rc) + goto out_extint; + rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP; + if (rc) + goto out_pfault; + hotcpu_notifier(pfault_cpu_notify, 0); + return 0; - /* Tough luck, no pfault. */ - pfault_disable = 1; +out_pfault: unregister_external_interrupt(0x2603, pfault_interrupt); - return 0; +out_extint: + pfault_disable = 1; + return rc; } early_initcall(pfault_irq_init); -#endif +#endif /* CONFIG_PFAULT */ diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 639cd21..a4d856d 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -13,7 +13,6 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *pteptr, pte_t pteval) { pmd_t *pmdp = (pmd_t *) pteptr; - pte_t shadow_pteval = pteval; unsigned long mask; if (!MACHINE_HAS_HPAGE) { @@ -21,18 +20,9 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, mask = pte_val(pteval) & (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO); pte_val(pteval) = (_SEGMENT_ENTRY + __pa(pteptr)) | mask; - if (mm->context.noexec) { - pteptr += PTRS_PER_PTE; - pte_val(shadow_pteval) = - (_SEGMENT_ENTRY + __pa(pteptr)) | mask; - } } pmd_val(*pmdp) = pte_val(pteval); - if (mm->context.noexec) { - pmdp = get_shadow_table(pmdp); - pmd_val(*pmdp) = pte_val(shadow_pteval); - } } int arch_prepare_hugepage(struct page *page) diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index bb40933..dfefc21 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -175,7 +175,8 @@ void kernel_map_pages(struct page *page, int numpages, int enable) pmd = pmd_offset(pud, address); pte = pte_offset_kernel(pmd, address); if (!enable) { - ptep_invalidate(&init_mm, address, pte); + __ptep_ipte(address, pte); + pte_val(*pte) = _PAGE_TYPE_EMPTY; continue; } *pte = mk_pte_phys(address, __pgprot(_PAGE_TYPE_RW)); diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index f05edcc..d013ed3 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -28,7 +28,7 @@ static void change_page_attr(unsigned long addr, int numpages, pte = *ptep; pte = set(pte); - ptep_invalidate(&init_mm, addr, ptep); + __ptep_ipte(addr, ptep); *ptep = pte; addr += PAGE_SIZE; } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index e1850c2..8d43306 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -40,7 +40,6 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); static DEFINE_PER_CPU(struct rcu_table_freelist *, rcu_table_freelist); static void __page_table_free(struct mm_struct *mm, unsigned long *table); -static void __crst_table_free(struct mm_struct *mm, unsigned long *table); static struct rcu_table_freelist *rcu_table_freelist_get(struct mm_struct *mm) { @@ -67,7 +66,7 @@ static void rcu_table_freelist_callback(struct rcu_head *head) while (batch->pgt_index > 0) __page_table_free(batch->mm, batch->table[--batch->pgt_index]); while (batch->crst_index < RCU_FREELIST_SIZE) - __crst_table_free(batch->mm, batch->table[batch->crst_index++]); + crst_table_free(batch->mm, batch->table[batch->crst_index++]); free_page((unsigned long) batch); } @@ -125,63 +124,33 @@ static int __init parse_vmalloc(char *arg) } early_param("vmalloc", parse_vmalloc); -unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) +unsigned long *crst_table_alloc(struct mm_struct *mm) { struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); if (!page) return NULL; - page->index = 0; - if (noexec) { - struct page *shadow = alloc_pages(GFP_KERNEL, ALLOC_ORDER); - if (!shadow) { - __free_pages(page, ALLOC_ORDER); - return NULL; - } - page->index = page_to_phys(shadow); - } - spin_lock_bh(&mm->context.list_lock); - list_add(&page->lru, &mm->context.crst_list); - spin_unlock_bh(&mm->context.list_lock); return (unsigned long *) page_to_phys(page); } -static void __crst_table_free(struct mm_struct *mm, unsigned long *table) -{ - unsigned long *shadow = get_shadow_table(table); - - if (shadow) - free_pages((unsigned long) shadow, ALLOC_ORDER); - free_pages((unsigned long) table, ALLOC_ORDER); -} - void crst_table_free(struct mm_struct *mm, unsigned long *table) { - struct page *page = virt_to_page(table); - - spin_lock_bh(&mm->context.list_lock); - list_del(&page->lru); - spin_unlock_bh(&mm->context.list_lock); - __crst_table_free(mm, table); + free_pages((unsigned long) table, ALLOC_ORDER); } void crst_table_free_rcu(struct mm_struct *mm, unsigned long *table) { struct rcu_table_freelist *batch; - struct page *page = virt_to_page(table); - spin_lock_bh(&mm->context.list_lock); - list_del(&page->lru); - spin_unlock_bh(&mm->context.list_lock); if (atomic_read(&mm->mm_users) < 2 && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { - __crst_table_free(mm, table); + crst_table_free(mm, table); return; } batch = rcu_table_freelist_get(mm); if (!batch) { smp_call_function(smp_sync, NULL, 1); - __crst_table_free(mm, table); + crst_table_free(mm, table); return; } batch->table[--batch->crst_index] = table; @@ -197,7 +166,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) BUG_ON(limit > (1UL << 53)); repeat: - table = crst_table_alloc(mm, mm->context.noexec); + table = crst_table_alloc(mm); if (!table) return -ENOMEM; spin_lock_bh(&mm->page_table_lock); @@ -273,7 +242,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) unsigned long *table; unsigned long bits; - bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; + bits = (mm->context.has_pgste) ? 3UL : 1UL; spin_lock_bh(&mm->context.list_lock); page = NULL; if (!list_empty(&mm->context.pgtable_list)) { @@ -329,7 +298,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) struct page *page; unsigned long bits; - bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; + bits = (mm->context.has_pgste) ? 3UL : 1UL; bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); page = pfn_to_page(__pa(table) >> PAGE_SHIFT); spin_lock_bh(&mm->context.list_lock); @@ -366,7 +335,7 @@ void page_table_free_rcu(struct mm_struct *mm, unsigned long *table) page_table_free(mm, table); return; } - bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; + bits = (mm->context.has_pgste) ? 3UL : 1UL; bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); page = pfn_to_page(__pa(table) >> PAGE_SHIFT); spin_lock_bh(&mm->context.list_lock); @@ -379,25 +348,6 @@ void page_table_free_rcu(struct mm_struct *mm, unsigned long *table) rcu_table_freelist_finish(); } -void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) -{ - struct page *page; - - spin_lock_bh(&mm->context.list_lock); - /* Free shadow region and segment tables. */ - list_for_each_entry(page, &mm->context.crst_list, lru) - if (page->index) { - free_pages((unsigned long) page->index, ALLOC_ORDER); - page->index = 0; - } - /* "Free" second halves of page tables. */ - list_for_each_entry(page, &mm->context.pgtable_list, lru) - page->flags &= ~SECOND_HALVES; - spin_unlock_bh(&mm->context.list_lock); - mm->context.noexec = 0; - update_mm(mm, tsk); -} - /* * switch on pgstes for its userspace process (for kvm) */ diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 34c43f2..8c1970d 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -95,7 +95,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) pu_dir = vmem_pud_alloc(); if (!pu_dir) goto out; - pgd_populate_kernel(&init_mm, pg_dir, pu_dir); + pgd_populate(&init_mm, pg_dir, pu_dir); } pu_dir = pud_offset(pg_dir, address); @@ -103,7 +103,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) pm_dir = vmem_pmd_alloc(); if (!pm_dir) goto out; - pud_populate_kernel(&init_mm, pu_dir, pm_dir); + pud_populate(&init_mm, pu_dir, pm_dir); } pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0)); @@ -123,7 +123,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) pt_dir = vmem_pte_alloc(); if (!pt_dir) goto out; - pmd_populate_kernel(&init_mm, pm_dir, pt_dir); + pmd_populate(&init_mm, pm_dir, pt_dir); } pt_dir = pte_offset_kernel(pm_dir, address); @@ -159,7 +159,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size) continue; if (pmd_huge(*pm_dir)) { - pmd_clear_kernel(pm_dir); + pmd_clear(pm_dir); address += HPAGE_SIZE - PAGE_SIZE; continue; } @@ -192,7 +192,7 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) pu_dir = vmem_pud_alloc(); if (!pu_dir) goto out; - pgd_populate_kernel(&init_mm, pg_dir, pu_dir); + pgd_populate(&init_mm, pg_dir, pu_dir); } pu_dir = pud_offset(pg_dir, address); @@ -200,7 +200,7 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) pm_dir = vmem_pmd_alloc(); if (!pm_dir) goto out; - pud_populate_kernel(&init_mm, pu_dir, pm_dir); + pud_populate(&init_mm, pu_dir, pm_dir); } pm_dir = pmd_offset(pu_dir, address); @@ -208,7 +208,7 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) pt_dir = vmem_pte_alloc(); if (!pt_dir) goto out; - pmd_populate_kernel(&init_mm, pm_dir, pt_dir); + pmd_populate(&init_mm, pm_dir, pt_dir); } pt_dir = pte_offset_kernel(pm_dir, address); diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index 33cbd37..053caa0 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -5,6 +5,7 @@ * Author: Heinz Graalfs <graalfs@de.ibm.com> */ +#include <linux/kernel_stat.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/smp.h> @@ -674,17 +675,11 @@ int hwsampler_activate(unsigned int cpu) static void hws_ext_handler(unsigned int ext_int_code, unsigned int param32, unsigned long param64) { - int cpu; struct hws_cpu_buffer *cb; - cpu = smp_processor_id(); - cb = &per_cpu(sampler_cpu_buffer, cpu); - - atomic_xchg( - &cb->ext_params, - atomic_read(&cb->ext_params) - | S390_lowcore.ext_params); - + kstat_cpu(smp_processor_id()).irqs[EXTINT_CPM]++; + cb = &__get_cpu_var(sampler_cpu_buffer); + atomic_xchg(&cb->ext_params, atomic_read(&cb->ext_params) | param32); if (hws_wq) queue_work(hws_wq, &cb->worker); } @@ -764,7 +759,7 @@ static int worker_check_error(unsigned int cpu, int ext_params) if (!sdbt || !*sdbt) return -EINVAL; - if (ext_params & EI_IEA) + if (ext_params & EI_PRA) cb->req_alert++; if (ext_params & EI_LSDA) @@ -1009,7 +1004,7 @@ int hwsampler_deallocate() if (hws_state != HWS_STOPPED) goto deallocate_exit; - smp_ctl_clear_bit(0, 5); /* set bit 58 CR0 off */ + ctl_clear_bit(0, 5); /* set bit 58 CR0 off */ deallocate_sdbt(); hws_state = HWS_DEALLOCATED; @@ -1123,7 +1118,7 @@ int hwsampler_shutdown() mutex_lock(&hws_sem); if (hws_state == HWS_STOPPED) { - smp_ctl_clear_bit(0, 5); /* set bit 58 CR0 off */ + ctl_clear_bit(0, 5); /* set bit 58 CR0 off */ deallocate_sdbt(); } if (hws_wq) { @@ -1198,7 +1193,7 @@ start_all_exit: hws_oom = 1; hws_flush_all = 0; /* now let them in, 1407 CPUMF external interrupts */ - smp_ctl_set_bit(0, 5); /* set CR0 bit 58 */ + ctl_set_bit(0, 5); /* set CR0 bit 58 */ return 0; } diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index c64c380..e0b25de 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -74,6 +74,8 @@ config ZCRYPT + PCI-X Cryptographic Coprocessor (PCIXCC) + Crypto Express2 Coprocessor (CEX2C) + Crypto Express2 Accelerator (CEX2A) + + Crypto Express3 Coprocessor (CEX3C) + + Crypto Express3 Accelerator (CEX3A) config ZCRYPT_MONOLITHIC bool "Monolithic zcrypt module" diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index 2b771f1..c388eda 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -253,13 +253,11 @@ int dasd_alias_make_device_known_to_lcu(struct dasd_device *device) */ void dasd_alias_lcu_setup_complete(struct dasd_device *device) { - struct dasd_eckd_private *private; unsigned long flags; struct alias_server *server; struct alias_lcu *lcu; struct dasd_uid uid; - private = (struct dasd_eckd_private *) device->private; device->discipline->get_uid(device, &uid); lcu = NULL; spin_lock_irqsave(&aliastree.lock, flags); @@ -279,13 +277,11 @@ void dasd_alias_lcu_setup_complete(struct dasd_device *device) void dasd_alias_wait_for_lcu_setup(struct dasd_device *device) { - struct dasd_eckd_private *private; unsigned long flags; struct alias_server *server; struct alias_lcu *lcu; struct dasd_uid uid; - private = (struct dasd_eckd_private *) device->private; device->discipline->get_uid(device, &uid); lcu = NULL; spin_lock_irqsave(&aliastree.lock, flags); diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 3ebdf5f..30fb979 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1611,10 +1611,8 @@ static void dasd_eckd_analysis_callback(struct dasd_ccw_req *init_cqr, static int dasd_eckd_start_analysis(struct dasd_block *block) { - struct dasd_eckd_private *private; struct dasd_ccw_req *init_cqr; - private = (struct dasd_eckd_private *) block->base->private; init_cqr = dasd_eckd_analysis_ccw(block->base); if (IS_ERR(init_cqr)) return PTR_ERR(init_cqr); @@ -2264,7 +2262,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_track( unsigned int blk_per_trk, unsigned int blksize) { - struct dasd_eckd_private *private; unsigned long *idaws; struct dasd_ccw_req *cqr; struct ccw1 *ccw; @@ -2283,7 +2280,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_track( unsigned int recoffs; basedev = block->base; - private = (struct dasd_eckd_private *) basedev->private; if (rq_data_dir(req) == READ) cmd = DASD_ECKD_CCW_READ_TRACK_DATA; else if (rq_data_dir(req) == WRITE) @@ -2556,8 +2552,7 @@ static int prepare_itcw(struct itcw *itcw, dcw = itcw_add_dcw(itcw, pfx_cmd, 0, &pfxdata, sizeof(pfxdata), total_data_size); - - return rc; + return IS_ERR(dcw) ? PTR_ERR(dcw) : 0; } static struct dasd_ccw_req *dasd_eckd_build_cp_tpm_track( @@ -2573,7 +2568,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_tpm_track( unsigned int blk_per_trk, unsigned int blksize) { - struct dasd_eckd_private *private; struct dasd_ccw_req *cqr; struct req_iterator iter; struct bio_vec *bv; @@ -2594,7 +2588,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_tpm_track( unsigned int count, count_to_trk_end; basedev = block->base; - private = (struct dasd_eckd_private *) basedev->private; if (rq_data_dir(req) == READ) { cmd = DASD_ECKD_CCW_READ_TRACK_DATA; itcw_op = ITCW_OP_READ; @@ -2801,7 +2794,6 @@ static struct dasd_ccw_req *dasd_raw_build_cp(struct dasd_device *startdev, struct dasd_block *block, struct request *req) { - struct dasd_eckd_private *private; unsigned long *idaws; struct dasd_device *basedev; struct dasd_ccw_req *cqr; @@ -2836,7 +2828,6 @@ static struct dasd_ccw_req *dasd_raw_build_cp(struct dasd_device *startdev, trkcount = last_trk - first_trk + 1; first_offs = 0; basedev = block->base; - private = (struct dasd_eckd_private *) basedev->private; if (rq_data_dir(req) == READ) cmd = DASD_ECKD_CCW_READ_TRACK; diff --git a/drivers/s390/char/Kconfig b/drivers/s390/char/Kconfig index dcee3c5..a4f117d 100644 --- a/drivers/s390/char/Kconfig +++ b/drivers/s390/char/Kconfig @@ -119,18 +119,6 @@ config S390_TAPE comment "S/390 tape interface support" depends on S390_TAPE -config S390_TAPE_BLOCK - def_bool y - prompt "Support for tape block devices" - depends on S390_TAPE && BLOCK - help - Select this option if you want to access your channel-attached tape - devices using the block device interface. This interface is similar - to CD-ROM devices on other platforms. The tapes can only be - accessed read-only when using this interface. Have a look at - <file:Documentation/s390/TAPE> for further information about creating - volumes for and using this interface. It is safe to say "Y" here. - comment "S/390 tape hardware support" depends on S390_TAPE diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile index efb500a..f3c3252 100644 --- a/drivers/s390/char/Makefile +++ b/drivers/s390/char/Makefile @@ -3,7 +3,7 @@ # obj-y += ctrlchar.o keyboard.o defkeymap.o sclp.o sclp_rw.o sclp_quiesce.o \ - sclp_cmd.o sclp_config.o sclp_cpi_sys.o + sclp_cmd.o sclp_config.o sclp_cpi_sys.o sclp_ocf.o obj-$(CONFIG_TN3270) += raw3270.o obj-$(CONFIG_TN3270_CONSOLE) += con3270.o @@ -22,7 +22,6 @@ obj-$(CONFIG_ZVM_WATCHDOG) += vmwatchdog.o obj-$(CONFIG_VMLOGRDR) += vmlogrdr.o obj-$(CONFIG_VMCP) += vmcp.o -tape-$(CONFIG_S390_TAPE_BLOCK) += tape_block.o tape-$(CONFIG_PROC_FS) += tape_proc.o tape-objs := tape_core.o tape_std.o tape_char.o $(tape-y) obj-$(CONFIG_S390_TAPE) += tape.o tape_class.o diff --git a/drivers/s390/char/monwriter.c b/drivers/s390/char/monwriter.c index e0702d3..4600aa1 100644 --- a/drivers/s390/char/monwriter.c +++ b/drivers/s390/char/monwriter.c @@ -97,7 +97,7 @@ static int monwrite_new_hdr(struct mon_private *monpriv) { struct monwrite_hdr *monhdr = &monpriv->hdr; struct mon_buf *monbuf; - int rc; + int rc = 0; if (monhdr->datalen > MONWRITE_MAX_DATALEN || monhdr->mon_function > MONWRITE_START_CONFIG || @@ -135,7 +135,7 @@ static int monwrite_new_hdr(struct mon_private *monpriv) mon_buf_count++; } monpriv->current_buf = monbuf; - return 0; + return rc; } static int monwrite_new_data(struct mon_private *monpriv) diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c index e21a5c3..810ac38 100644 --- a/drivers/s390/char/raw3270.c +++ b/drivers/s390/char/raw3270.c @@ -598,7 +598,6 @@ __raw3270_size_device(struct raw3270 *rp) static const unsigned char wbuf[] = { 0x00, 0x07, 0x01, 0xff, 0x03, 0x00, 0x81 }; struct raw3270_ua *uap; - unsigned short count; int rc; /* @@ -653,7 +652,6 @@ __raw3270_size_device(struct raw3270 *rp) if (rc) return rc; /* Got a Query Reply */ - count = sizeof(rp->init_data) - rp->init_request.rescnt; uap = (struct raw3270_ua *) (rp->init_data + 1); /* Paranoia check. */ if (rp->init_data[0] != 0x88 || uap->uab.qcode != 0x81) diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h index 6bb5a6b..49a1bb5 100644 --- a/drivers/s390/char/sclp.h +++ b/drivers/s390/char/sclp.h @@ -28,6 +28,7 @@ #define EVTYP_CONFMGMDATA 0x04 #define EVTYP_SDIAS 0x1C #define EVTYP_ASYNC 0x0A +#define EVTYP_OCF 0x1E #define EVTYP_OPCMD_MASK 0x80000000 #define EVTYP_MSG_MASK 0x40000000 @@ -40,6 +41,7 @@ #define EVTYP_CONFMGMDATA_MASK 0x10000000 #define EVTYP_SDIAS_MASK 0x00000010 #define EVTYP_ASYNC_MASK 0x00400000 +#define EVTYP_OCF_MASK 0x00000004 #define GNRLMSGFLGS_DOM 0x8000 #define GNRLMSGFLGS_SNDALRM 0x4000 @@ -186,4 +188,26 @@ sclp_ascebc_str(unsigned char *str, int nr) (MACHINE_IS_VM) ? ASCEBC(str, nr) : ASCEBC_500(str, nr); } +static inline struct gds_vector * +sclp_find_gds_vector(void *start, void *end, u16 id) +{ + struct gds_vector *v; + + for (v = start; (void *) v < end; v = (void *) v + v->length) + if (v->gds_id == id) + return v; + return NULL; +} + +static inline struct gds_subvector * +sclp_find_gds_subvector(void *start, void *end, u8 key) +{ + struct gds_subvector *sv; + + for (sv = start; (void *) sv < end; sv = (void *) sv + sv->length) + if (sv->key == key) + return sv; + return NULL; +} + #endif /* __SCLP_H__ */ diff --git a/drivers/s390/char/sclp_config.c b/drivers/s390/char/sclp_config.c index 16e232a..95b909a 100644 --- a/drivers/s390/char/sclp_config.c +++ b/drivers/s390/char/sclp_config.c @@ -71,21 +71,9 @@ static struct sclp_register sclp_conf_register = static int __init sclp_conf_init(void) { - int rc; - INIT_WORK(&sclp_cpu_capability_work, sclp_cpu_capability_notify); INIT_WORK(&sclp_cpu_change_work, sclp_cpu_change_notify); - - rc = sclp_register(&sclp_conf_register); - if (rc) - return rc; - - if (!(sclp_conf_register.sclp_send_mask & EVTYP_CONFMGMDATA_MASK)) { - pr_warning("no configuration management.\n"); - sclp_unregister(&sclp_conf_register); - rc = -ENOSYS; - } - return rc; + return sclp_register(&sclp_conf_register); } __initcall(sclp_conf_init); diff --git a/drivers/s390/char/sclp_ocf.c b/drivers/s390/char/sclp_ocf.c new file mode 100644 index 0000000..ab294d5 --- /dev/null +++ b/drivers/s390/char/sclp_ocf.c @@ -0,0 +1,145 @@ +/* + * drivers/s390/char/sclp_ocf.c + * SCLP OCF communication parameters sysfs interface + * + * Copyright IBM Corp. 2011 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#define KMSG_COMPONENT "sclp_ocf" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/stat.h> +#include <linux/device.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <linux/kmod.h> +#include <linux/timer.h> +#include <linux/err.h> +#include <asm/ebcdic.h> +#include <asm/sclp.h> + +#include "sclp.h" + +#define OCF_LENGTH_HMC_NETWORK 8UL +#define OCF_LENGTH_CPC_NAME 8UL + +static char hmc_network[OCF_LENGTH_HMC_NETWORK + 1]; +static char cpc_name[OCF_LENGTH_CPC_NAME + 1]; + +static DEFINE_SPINLOCK(sclp_ocf_lock); +static struct work_struct sclp_ocf_change_work; + +static struct kset *ocf_kset; + +static void sclp_ocf_change_notify(struct work_struct *work) +{ + kobject_uevent(&ocf_kset->kobj, KOBJ_CHANGE); +} + +/* Handler for OCF event. Look for the CPC image name. */ +static void sclp_ocf_handler(struct evbuf_header *evbuf) +{ + struct gds_vector *v; + struct gds_subvector *sv, *netid, *cpc; + size_t size; + + /* Find the 0x9f00 block. */ + v = sclp_find_gds_vector(evbuf + 1, (void *) evbuf + evbuf->length, + 0x9f00); + if (!v) + return; + /* Find the 0x9f22 block inside the 0x9f00 block. */ + v = sclp_find_gds_vector(v + 1, (void *) v + v->length, 0x9f22); + if (!v) + return; + /* Find the 0x81 block inside the 0x9f22 block. */ + sv = sclp_find_gds_subvector(v + 1, (void *) v + v->length, 0x81); + if (!sv) + return; + /* Find the 0x01 block inside the 0x81 block. */ + netid = sclp_find_gds_subvector(sv + 1, (void *) sv + sv->length, 1); + /* Find the 0x02 block inside the 0x81 block. */ + cpc = sclp_find_gds_subvector(sv + 1, (void *) sv + sv->length, 2); + /* Copy network name and cpc name. */ + spin_lock(&sclp_ocf_lock); + if (netid) { + size = min(OCF_LENGTH_HMC_NETWORK, (size_t) netid->length); + memcpy(hmc_network, netid + 1, size); + EBCASC(hmc_network, size); + hmc_network[size] = 0; + } + if (cpc) { + size = min(OCF_LENGTH_CPC_NAME, (size_t) cpc->length); + memcpy(cpc_name, cpc + 1, size); + EBCASC(cpc_name, size); + cpc_name[size] = 0; + } + spin_unlock(&sclp_ocf_lock); + schedule_work(&sclp_ocf_change_work); +} + +static struct sclp_register sclp_ocf_event = { + .receive_mask = EVTYP_OCF_MASK, + .receiver_fn = sclp_ocf_handler, +}; + +static ssize_t cpc_name_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + int rc; + + spin_lock_irq(&sclp_ocf_lock); + rc = snprintf(page, PAGE_SIZE, "%s\n", cpc_name); + spin_unlock_irq(&sclp_ocf_lock); + return rc; +} + +static struct kobj_attribute cpc_name_attr = + __ATTR(cpc_name, 0444, cpc_name_show, NULL); + +static ssize_t hmc_network_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + int rc; + + spin_lock_irq(&sclp_ocf_lock); + rc = snprintf(page, PAGE_SIZE, "%s\n", hmc_network); + spin_unlock_irq(&sclp_ocf_lock); + return rc; +} + +static struct kobj_attribute hmc_network_attr = + __ATTR(hmc_network, 0444, hmc_network_show, NULL); + +static struct attribute *ocf_attrs[] = { + &cpc_name_attr.attr, + &hmc_network_attr.attr, + NULL, +}; + +static struct attribute_group ocf_attr_group = { + .attrs = ocf_attrs, +}; + +static int __init ocf_init(void) +{ + int rc; + + INIT_WORK(&sclp_ocf_change_work, sclp_ocf_change_notify); + ocf_kset = kset_create_and_add("ocf", NULL, firmware_kobj); + if (!ocf_kset) + return -ENOMEM; + + rc = sysfs_create_group(&ocf_kset->kobj, &ocf_attr_group); + if (rc) { + kset_unregister(ocf_kset); + return rc; + } + + return sclp_register(&sclp_ocf_event); +} + +device_initcall(ocf_init); diff --git a/drivers/s390/char/sclp_sdias.c b/drivers/s390/char/sclp_sdias.c index 6a1c58d..fa733ec 100644 --- a/drivers/s390/char/sclp_sdias.c +++ b/drivers/s390/char/sclp_sdias.c @@ -69,9 +69,6 @@ static DEFINE_MUTEX(sdias_mutex); static void sdias_callback(struct sclp_req *request, void *data) { - struct sdias_sccb *cbsccb; - - cbsccb = (struct sdias_sccb *) request->sccb; sclp_req_done = 1; wake_up(&sdias_wq); /* Inform caller, that request is complete */ TRACE("callback done\n"); diff --git a/drivers/s390/char/sclp_tty.c b/drivers/s390/char/sclp_tty.c index 8258d59..a879c13 100644 --- a/drivers/s390/char/sclp_tty.c +++ b/drivers/s390/char/sclp_tty.c @@ -408,118 +408,72 @@ static int sclp_switch_cases(unsigned char *buf, int count) return op - buf; } -static void -sclp_get_input(unsigned char *start, unsigned char *end) +static void sclp_get_input(struct gds_subvector *sv) { + unsigned char *str; int count; - count = end - start; + str = (unsigned char *) (sv + 1); + count = sv->length - sizeof(*sv); if (sclp_tty_tolower) - EBC_TOLOWER(start, count); - count = sclp_switch_cases(start, count); + EBC_TOLOWER(str, count); + count = sclp_switch_cases(str, count); /* convert EBCDIC to ASCII (modify original input in SCCB) */ - sclp_ebcasc_str(start, count); + sclp_ebcasc_str(str, count); /* transfer input to high level driver */ - sclp_tty_input(start, count); -} - -static inline struct gds_vector * -find_gds_vector(struct gds_vector *start, struct gds_vector *end, u16 id) -{ - struct gds_vector *vec; - - for (vec = start; vec < end; vec = (void *) vec + vec->length) - if (vec->gds_id == id) - return vec; - return NULL; + sclp_tty_input(str, count); } -static inline struct gds_subvector * -find_gds_subvector(struct gds_subvector *start, - struct gds_subvector *end, u8 key) +static inline void sclp_eval_selfdeftextmsg(struct gds_subvector *sv) { - struct gds_subvector *subvec; + void *end; - for (subvec = start; subvec < end; - subvec = (void *) subvec + subvec->length) - if (subvec->key == key) - return subvec; - return NULL; + end = (void *) sv + sv->length; + for (sv = sv + 1; (void *) sv < end; sv = (void *) sv + sv->length) + if (sv->key == 0x30) + sclp_get_input(sv); } -static inline void -sclp_eval_selfdeftextmsg(struct gds_subvector *start, - struct gds_subvector *end) +static inline void sclp_eval_textcmd(struct gds_vector *v) { - struct gds_subvector *subvec; - - subvec = start; - while (subvec < end) { - subvec = find_gds_subvector(subvec, end, 0x30); - if (!subvec) - break; - sclp_get_input((unsigned char *)(subvec + 1), - (unsigned char *) subvec + subvec->length); - subvec = (void *) subvec + subvec->length; - } -} + struct gds_subvector *sv; + void *end; -static inline void -sclp_eval_textcmd(struct gds_subvector *start, - struct gds_subvector *end) -{ - struct gds_subvector *subvec; + end = (void *) v + v->length; + for (sv = (struct gds_subvector *) (v + 1); + (void *) sv < end; sv = (void *) sv + sv->length) + if (sv->key == GDS_KEY_SELFDEFTEXTMSG) + sclp_eval_selfdeftextmsg(sv); - subvec = start; - while (subvec < end) { - subvec = find_gds_subvector(subvec, end, - GDS_KEY_SELFDEFTEXTMSG); - if (!subvec) - break; - sclp_eval_selfdeftextmsg((struct gds_subvector *)(subvec + 1), - (void *)subvec + subvec->length); - subvec = (void *) subvec + subvec->length; - } } -static inline void -sclp_eval_cpmsu(struct gds_vector *start, struct gds_vector *end) +static inline void sclp_eval_cpmsu(struct gds_vector *v) { - struct gds_vector *vec; + void *end; - vec = start; - while (vec < end) { - vec = find_gds_vector(vec, end, GDS_ID_TEXTCMD); - if (!vec) - break; - sclp_eval_textcmd((struct gds_subvector *)(vec + 1), - (void *) vec + vec->length); - vec = (void *) vec + vec->length; - } + end = (void *) v + v->length; + for (v = v + 1; (void *) v < end; v = (void *) v + v->length) + if (v->gds_id == GDS_ID_TEXTCMD) + sclp_eval_textcmd(v); } -static inline void -sclp_eval_mdsmu(struct gds_vector *start, void *end) +static inline void sclp_eval_mdsmu(struct gds_vector *v) { - struct gds_vector *vec; - - vec = find_gds_vector(start, end, GDS_ID_CPMSU); - if (vec) - sclp_eval_cpmsu(vec + 1, (void *) vec + vec->length); + v = sclp_find_gds_vector(v + 1, (void *) v + v->length, GDS_ID_CPMSU); + if (v) + sclp_eval_cpmsu(v); } -static void -sclp_tty_receiver(struct evbuf_header *evbuf) +static void sclp_tty_receiver(struct evbuf_header *evbuf) { - struct gds_vector *start, *end, *vec; + struct gds_vector *v; - start = (struct gds_vector *)(evbuf + 1); - end = (void *) evbuf + evbuf->length; - vec = find_gds_vector(start, end, GDS_ID_MDSMU); - if (vec) - sclp_eval_mdsmu(vec + 1, (void *) vec + vec->length); + v = sclp_find_gds_vector(evbuf + 1, (void *) evbuf + evbuf->length, + GDS_ID_MDSMU); + if (v) + sclp_eval_mdsmu(v); } static void diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c index b98dcbd..a7d5707 100644 --- a/drivers/s390/char/tape_3590.c +++ b/drivers/s390/char/tape_3590.c @@ -796,10 +796,8 @@ static void tape_3590_med_state_set(struct tape_device *device, static int tape_3590_done(struct tape_device *device, struct tape_request *request) { - struct tape_3590_disc_data *disc_data; DBF_EVENT(6, "%s done\n", tape_op_verbose[request->op]); - disc_data = device->discdata; switch (request->op) { case TO_BSB: @@ -1394,17 +1392,12 @@ tape_3590_print_era_msg(struct tape_device *device, struct irb *irb) static int tape_3590_crypt_error(struct tape_device *device, struct tape_request *request, struct irb *irb) { - u8 cu_rc, ekm_rc1; + u8 cu_rc; u16 ekm_rc2; - u32 drv_rc; - const char *bus_id; char *sense; sense = ((struct tape_3590_sense *) irb->ecw)->fmt.data; - bus_id = dev_name(&device->cdev->dev); cu_rc = sense[0]; - drv_rc = *((u32*) &sense[5]) & 0xffffff; - ekm_rc1 = sense[9]; ekm_rc2 = *((u16*) &sense[10]); if ((cu_rc == 0) && (ekm_rc2 == 0xee31)) /* key not defined on EKM */ @@ -1429,7 +1422,6 @@ tape_3590_unit_check(struct tape_device *device, struct tape_request *request, struct irb *irb) { struct tape_3590_sense *sense; - int rc; #ifdef CONFIG_S390_TAPE_BLOCK if (request->op == TO_BLOCK) { @@ -1454,7 +1446,6 @@ tape_3590_unit_check(struct tape_device *device, struct tape_request *request, * - "break": basic error recovery is done * - "goto out:": just print error message if available */ - rc = -EIO; switch (sense->rc_rqc) { case 0x1110: diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c deleted file mode 100644 index 1b3924c..0000000 --- a/drivers/s390/char/tape_block.c +++ /dev/null @@ -1,444 +0,0 @@ -/* - * drivers/s390/char/tape_block.c - * block device frontend for tape device driver - * - * S390 and zSeries version - * Copyright (C) 2001,2003 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Carsten Otte <cotte@de.ibm.com> - * Tuan Ngo-Anh <ngoanh@de.ibm.com> - * Martin Schwidefsky <schwidefsky@de.ibm.com> - * Stefan Bader <shbader@de.ibm.com> - */ - -#define KMSG_COMPONENT "tape" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - -#include <linux/fs.h> -#include <linux/module.h> -#include <linux/blkdev.h> -#include <linux/mutex.h> -#include <linux/interrupt.h> -#include <linux/buffer_head.h> -#include <linux/kernel.h> - -#include <asm/debug.h> - -#define TAPE_DBF_AREA tape_core_dbf - -#include "tape.h" - -#define TAPEBLOCK_MAX_SEC 100 -#define TAPEBLOCK_MIN_REQUEUE 3 - -/* - * 2003/11/25 Stefan Bader <shbader@de.ibm.com> - * - * In 2.5/2.6 the block device request function is very likely to be called - * with disabled interrupts (e.g. generic_unplug_device). So the driver can't - * just call any function that tries to allocate CCW requests from that con- - * text since it might sleep. There are two choices to work around this: - * a) do not allocate with kmalloc but use its own memory pool - * b) take requests from the queue outside that context, knowing that - * allocation might sleep - */ - -/* - * file operation structure for tape block frontend - */ -static DEFINE_MUTEX(tape_block_mutex); -static int tapeblock_open(struct block_device *, fmode_t); -static int tapeblock_release(struct gendisk *, fmode_t); -static unsigned int tapeblock_check_events(struct gendisk *, unsigned int); -static int tapeblock_revalidate_disk(struct gendisk *); - -static const struct block_device_operations tapeblock_fops = { - .owner = THIS_MODULE, - .open = tapeblock_open, - .release = tapeblock_release, - .check_events = tapeblock_check_events, - .revalidate_disk = tapeblock_revalidate_disk, -}; - -static int tapeblock_major = 0; - -static void -tapeblock_trigger_requeue(struct tape_device *device) -{ - /* Protect against rescheduling. */ - if (atomic_cmpxchg(&device->blk_data.requeue_scheduled, 0, 1) != 0) - return; - schedule_work(&device->blk_data.requeue_task); -} - -/* - * Post finished request. - */ -static void -__tapeblock_end_request(struct tape_request *ccw_req, void *data) -{ - struct tape_device *device; - struct request *req; - - DBF_LH(6, "__tapeblock_end_request()\n"); - - device = ccw_req->device; - req = (struct request *) data; - blk_end_request_all(req, (ccw_req->rc == 0) ? 0 : -EIO); - if (ccw_req->rc == 0) - /* Update position. */ - device->blk_data.block_position = - (blk_rq_pos(req) + blk_rq_sectors(req)) >> TAPEBLOCK_HSEC_S2B; - else - /* We lost the position information due to an error. */ - device->blk_data.block_position = -1; - device->discipline->free_bread(ccw_req); - if (!list_empty(&device->req_queue) || - blk_peek_request(device->blk_data.request_queue)) - tapeblock_trigger_requeue(device); -} - -/* - * Feed the tape device CCW queue with requests supplied in a list. - */ -static int -tapeblock_start_request(struct tape_device *device, struct request *req) -{ - struct tape_request * ccw_req; - int rc; - - DBF_LH(6, "tapeblock_start_request(%p, %p)\n", device, req); - - ccw_req = device->discipline->bread(device, req); - if (IS_ERR(ccw_req)) { - DBF_EVENT(1, "TBLOCK: bread failed\n"); - blk_end_request_all(req, -EIO); - return PTR_ERR(ccw_req); - } - ccw_req->callback = __tapeblock_end_request; - ccw_req->callback_data = (void *) req; - ccw_req->retries = TAPEBLOCK_RETRIES; - - rc = tape_do_io_async(device, ccw_req); - if (rc) { - /* - * Start/enqueueing failed. No retries in - * this case. - */ - blk_end_request_all(req, -EIO); - device->discipline->free_bread(ccw_req); - } - - return rc; -} - -/* - * Move requests from the block device request queue to the tape device ccw - * queue. - */ -static void -tapeblock_requeue(struct work_struct *work) { - struct tape_blk_data * blkdat; - struct tape_device * device; - struct request_queue * queue; - int nr_queued; - struct request * req; - struct list_head * l; - int rc; - - blkdat = container_of(work, struct tape_blk_data, requeue_task); - device = blkdat->device; - if (!device) - return; - - spin_lock_irq(get_ccwdev_lock(device->cdev)); - queue = device->blk_data.request_queue; - - /* Count number of requests on ccw queue. */ - nr_queued = 0; - list_for_each(l, &device->req_queue) - nr_queued++; - spin_unlock(get_ccwdev_lock(device->cdev)); - - spin_lock_irq(&device->blk_data.request_queue_lock); - while ( - blk_peek_request(queue) && - nr_queued < TAPEBLOCK_MIN_REQUEUE - ) { - req = blk_fetch_request(queue); - if (rq_data_dir(req) == WRITE) { - DBF_EVENT(1, "TBLOCK: Rejecting write request\n"); - spin_unlock_irq(&device->blk_data.request_queue_lock); - blk_end_request_all(req, -EIO); - spin_lock_irq(&device->blk_data.request_queue_lock); - continue; - } - nr_queued++; - spin_unlock_irq(&device->blk_data.request_queue_lock); - rc = tapeblock_start_request(device, req); - spin_lock_irq(&device->blk_data.request_queue_lock); - } - spin_unlock_irq(&device->blk_data.request_queue_lock); - atomic_set(&device->blk_data.requeue_scheduled, 0); -} - -/* - * Tape request queue function. Called from ll_rw_blk.c - */ -static void -tapeblock_request_fn(struct request_queue *queue) -{ - struct tape_device *device; - - device = (struct tape_device *) queue->queuedata; - DBF_LH(6, "tapeblock_request_fn(device=%p)\n", device); - BUG_ON(device == NULL); - tapeblock_trigger_requeue(device); -} - -/* - * This function is called for every new tapedevice - */ -int -tapeblock_setup_device(struct tape_device * device) -{ - struct tape_blk_data * blkdat; - struct gendisk * disk; - int rc; - - blkdat = &device->blk_data; - blkdat->device = device; - spin_lock_init(&blkdat->request_queue_lock); - atomic_set(&blkdat->requeue_scheduled, 0); - - blkdat->request_queue = blk_init_queue( - tapeblock_request_fn, - &blkdat->request_queue_lock - ); - if (!blkdat->request_queue) - return -ENOMEM; - - rc = elevator_change(blkdat->request_queue, "noop"); - if (rc) - goto cleanup_queue; - - blk_queue_logical_block_size(blkdat->request_queue, TAPEBLOCK_HSEC_SIZE); - blk_queue_max_hw_sectors(blkdat->request_queue, TAPEBLOCK_MAX_SEC); - blk_queue_max_segments(blkdat->request_queue, -1L); - blk_queue_max_segment_size(blkdat->request_queue, -1L); - blk_queue_segment_boundary(blkdat->request_queue, -1L); - - disk = alloc_disk(1); - if (!disk) { - rc = -ENOMEM; - goto cleanup_queue; - } - - disk->major = tapeblock_major; - disk->first_minor = device->first_minor; - disk->fops = &tapeblock_fops; - disk->private_data = tape_get_device(device); - disk->queue = blkdat->request_queue; - set_capacity(disk, 0); - sprintf(disk->disk_name, "btibm%d", - device->first_minor / TAPE_MINORS_PER_DEV); - - blkdat->disk = disk; - blkdat->medium_changed = 1; - blkdat->request_queue->queuedata = tape_get_device(device); - - add_disk(disk); - - tape_get_device(device); - INIT_WORK(&blkdat->requeue_task, tapeblock_requeue); - - return 0; - -cleanup_queue: - blk_cleanup_queue(blkdat->request_queue); - blkdat->request_queue = NULL; - - return rc; -} - -void -tapeblock_cleanup_device(struct tape_device *device) -{ - flush_work_sync(&device->blk_data.requeue_task); - tape_put_device(device); - - if (!device->blk_data.disk) { - goto cleanup_queue; - } - - del_gendisk(device->blk_data.disk); - device->blk_data.disk->private_data = NULL; - tape_put_device(device); - put_disk(device->blk_data.disk); - - device->blk_data.disk = NULL; -cleanup_queue: - device->blk_data.request_queue->queuedata = NULL; - tape_put_device(device); - - blk_cleanup_queue(device->blk_data.request_queue); - device->blk_data.request_queue = NULL; -} - -/* - * Detect number of blocks of the tape. - * FIXME: can we extent this to detect the blocks size as well ? - */ -static int -tapeblock_revalidate_disk(struct gendisk *disk) -{ - struct tape_device * device; - unsigned int nr_of_blks; - int rc; - - device = (struct tape_device *) disk->private_data; - BUG_ON(!device); - - if (!device->blk_data.medium_changed) - return 0; - - rc = tape_mtop(device, MTFSFM, 1); - if (rc) - return rc; - - rc = tape_mtop(device, MTTELL, 1); - if (rc < 0) - return rc; - - pr_info("%s: Determining the size of the recorded area...\n", - dev_name(&device->cdev->dev)); - DBF_LH(3, "Image file ends at %d\n", rc); - nr_of_blks = rc; - - /* This will fail for the first file. Catch the error by checking the - * position. */ - tape_mtop(device, MTBSF, 1); - - rc = tape_mtop(device, MTTELL, 1); - if (rc < 0) - return rc; - - if (rc > nr_of_blks) - return -EINVAL; - - DBF_LH(3, "Image file starts at %d\n", rc); - device->bof = rc; - nr_of_blks -= rc; - - pr_info("%s: The size of the recorded area is %i blocks\n", - dev_name(&device->cdev->dev), nr_of_blks); - set_capacity(device->blk_data.disk, - nr_of_blks*(TAPEBLOCK_HSEC_SIZE/512)); - - device->blk_data.block_position = 0; - device->blk_data.medium_changed = 0; - return 0; -} - -static unsigned int -tapeblock_check_events(struct gendisk *disk, unsigned int clearing) -{ - struct tape_device *device; - - device = (struct tape_device *) disk->private_data; - DBF_LH(6, "tapeblock_medium_changed(%p) = %d\n", - device, device->blk_data.medium_changed); - - return device->blk_data.medium_changed ? DISK_EVENT_MEDIA_CHANGE : 0; -} - -/* - * Block frontend tape device open function. - */ -static int -tapeblock_open(struct block_device *bdev, fmode_t mode) -{ - struct gendisk * disk = bdev->bd_disk; - struct tape_device * device; - int rc; - - mutex_lock(&tape_block_mutex); - device = tape_get_device(disk->private_data); - - if (device->required_tapemarks) { - DBF_EVENT(2, "TBLOCK: missing tapemarks\n"); - pr_warning("%s: Opening the tape failed because of missing " - "end-of-file marks\n", dev_name(&device->cdev->dev)); - rc = -EPERM; - goto put_device; - } - - rc = tape_open(device); - if (rc) - goto put_device; - - rc = tapeblock_revalidate_disk(disk); - if (rc) - goto release; - - /* - * Note: The reference to <device> is hold until the release function - * is called. - */ - tape_state_set(device, TS_BLKUSE); - mutex_unlock(&tape_block_mutex); - return 0; - -release: - tape_release(device); - put_device: - tape_put_device(device); - mutex_unlock(&tape_block_mutex); - return rc; -} - -/* - * Block frontend tape device release function. - * - * Note: One reference to the tape device was made by the open function. So - * we just get the pointer here and release the reference. - */ -static int -tapeblock_release(struct gendisk *disk, fmode_t mode) -{ - struct tape_device *device = disk->private_data; - - mutex_lock(&tape_block_mutex); - tape_state_set(device, TS_IN_USE); - tape_release(device); - tape_put_device(device); - mutex_unlock(&tape_block_mutex); - - return 0; -} - -/* - * Initialize block device frontend. - */ -int -tapeblock_init(void) -{ - int rc; - - /* Register the tape major number to the kernel */ - rc = register_blkdev(tapeblock_major, "tBLK"); - if (rc < 0) - return rc; - - if (tapeblock_major == 0) - tapeblock_major = rc; - return 0; -} - -/* - * Deregister major for block device frontend - */ -void -tapeblock_exit(void) -{ - unregister_blkdev(tapeblock_major, "tBLK"); -} diff --git a/drivers/s390/char/tape_std.c b/drivers/s390/char/tape_std.c index 3c3f342..e765017 100644 --- a/drivers/s390/char/tape_std.c +++ b/drivers/s390/char/tape_std.c @@ -564,7 +564,6 @@ int tape_std_mtreten(struct tape_device *device, int mt_count) { struct tape_request *request; - int rc; request = tape_alloc_request(4, 0); if (IS_ERR(request)) @@ -576,7 +575,7 @@ tape_std_mtreten(struct tape_device *device, int mt_count) tape_ccw_cc(request->cpaddr + 2, NOP, 0, NULL); tape_ccw_end(request->cpaddr + 3, CCW_CMD_TIC, 0, request->cpaddr); /* execute it, MTRETEN rc gets ignored */ - rc = tape_do_io_interruptible(device, request); + tape_do_io_interruptible(device, request); tape_free_request(request); return tape_mtop(device, MTREW, 1); } diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index 0689fcf..75c3f1f 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -326,6 +326,36 @@ static void chsc_process_sei_res_acc(struct chsc_sei_area *sei_area) s390_process_res_acc(&link); } +static void chsc_process_sei_chp_avail(struct chsc_sei_area *sei_area) +{ + struct channel_path *chp; + struct chp_id chpid; + u8 *data; + int num; + + CIO_CRW_EVENT(4, "chsc: channel path availability information\n"); + if (sei_area->rs != 0) + return; + data = sei_area->ccdf; + chp_id_init(&chpid); + for (num = 0; num <= __MAX_CHPID; num++) { + if (!chp_test_bit(data, num)) + continue; + chpid.id = num; + + CIO_CRW_EVENT(4, "Update information for channel path " + "%x.%02x\n", chpid.cssid, chpid.id); + chp = chpid_to_chp(chpid); + if (!chp) { + chp_new(chpid); + continue; + } + mutex_lock(&chp->lock); + chsc_determine_base_channel_path_desc(chpid, &chp->desc); + mutex_unlock(&chp->lock); + } +} + struct chp_config_data { u8 map[32]; u8 op; @@ -376,9 +406,12 @@ static void chsc_process_sei(struct chsc_sei_area *sei_area) case 1: /* link incident*/ chsc_process_sei_link_incident(sei_area); break; - case 2: /* i/o resource accessibiliy */ + case 2: /* i/o resource accessibility */ chsc_process_sei_res_acc(sei_area); break; + case 7: /* channel-path-availability information */ + chsc_process_sei_chp_avail(sei_area); + break; case 8: /* channel-path-configuration notification */ chsc_process_sei_chp_config(sei_area); break; diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index 6084103..52c233f 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -408,9 +408,10 @@ ccw_device_done(struct ccw_device *cdev, int state) CIO_MSG_EVENT(0, "Disconnected device %04x on subchannel " "%04x\n", cdev->private->dev_id.devno, sch->schid.sch_no); - if (ccw_device_notify(cdev, CIO_NO_PATH) != NOTIFY_OK) + if (ccw_device_notify(cdev, CIO_NO_PATH) != NOTIFY_OK) { + cdev->private->state = DEV_STATE_NOT_OPER; ccw_device_sched_todo(cdev, CDEV_TODO_UNREG); - else + } else ccw_device_set_disconnected(cdev); cdev->private->flags.donotify = 0; break; @@ -840,9 +841,6 @@ call_handler: static void ccw_device_killing_irq(struct ccw_device *cdev, enum dev_event dev_event) { - struct subchannel *sch; - - sch = to_subchannel(cdev->dev.parent); ccw_device_set_timeout(cdev, 0); /* Start delayed path verification. */ ccw_device_online_verify(cdev, 0); diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index 651976b..f98698d 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -418,12 +418,9 @@ int ccw_device_resume(struct ccw_device *cdev) int ccw_device_call_handler(struct ccw_device *cdev) { - struct subchannel *sch; unsigned int stctl; int ending_status; - sch = to_subchannel(cdev->dev.parent); - /* * we allow for the device action handler if . * - we received ending status diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index e8f267e..55e8f72 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -1446,7 +1446,7 @@ set: static int handle_outbound(struct qdio_q *q, unsigned int callflags, int bufnr, int count) { - unsigned char state; + unsigned char state = 0; int used, rc = 0; qperf_inc(q, outbound_call); diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 67302b9..16e4a25 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1183,8 +1183,12 @@ static void ap_scan_bus(struct work_struct *unused) INIT_LIST_HEAD(&ap_dev->list); setup_timer(&ap_dev->timeout, ap_request_timeout, (unsigned long) ap_dev); - if (device_type == 0) - ap_probe_device_type(ap_dev); + if (device_type == 0) { + if (ap_probe_device_type(ap_dev)) { + kfree(ap_dev); + continue; + } + } else ap_dev->device_type = device_type; diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index b4bfe33..e9b8e59 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -184,22 +184,18 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif -#ifndef __HAVE_ARCH_PAGE_TEST_DIRTY -#define page_test_dirty(page) (0) +#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY +#define page_test_and_clear_dirty(pfn, mapped) (0) #endif -#ifndef __HAVE_ARCH_PAGE_CLEAR_DIRTY -#define page_clear_dirty(page, mapped) do { } while (0) -#endif - -#ifndef __HAVE_ARCH_PAGE_TEST_DIRTY +#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY #define pte_maybe_dirty(pte) pte_dirty(pte) #else #define pte_maybe_dirty(pte) (1) #endif #ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG -#define page_test_and_clear_young(page) (0) +#define page_test_and_clear_young(pfn) (0) #endif #ifndef __HAVE_ARCH_PGD_OFFSET_GATE diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 811183d..79a6700 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -308,7 +308,7 @@ static inline void SetPageUptodate(struct page *page) { #ifdef CONFIG_S390 if (!test_and_set_bit(PG_uptodate, &page->flags)) - page_clear_dirty(page, 0); + page_set_storage_key(page_to_pfn(page), PAGE_DEFAULT_KEY, 0); #else /* * Memory barrier must be issued before setting the PG_uptodate bit, @@ -719,7 +719,7 @@ int page_referenced(struct page *page, unlock_page(page); } out: - if (page_test_and_clear_young(page)) + if (page_test_and_clear_young(page_to_pfn(page))) referenced++; return referenced; @@ -785,10 +785,8 @@ int page_mkclean(struct page *page) struct address_space *mapping = page_mapping(page); if (mapping) { ret = page_mkclean_file(mapping, page); - if (page_test_dirty(page)) { - page_clear_dirty(page, 1); + if (page_test_and_clear_dirty(page_to_pfn(page), 1)) ret = 1; - } } } @@ -981,10 +979,9 @@ void page_remove_rmap(struct page *page) * not if it's in swapcache - there might be another pte slot * containing the swap entry, but page not yet written to swap. */ - if ((!PageAnon(page) || PageSwapCache(page)) && page_test_dirty(page)) { - page_clear_dirty(page, 1); + if ((!PageAnon(page) || PageSwapCache(page)) && + page_test_and_clear_dirty(page_to_pfn(page), 1)) set_page_dirty(page); - } /* * Hugepages are not counted in NR_ANON_PAGES nor NR_FILE_MAPPED * and not charged by memcg for now. |