27 files changed, 540 insertions, 462 deletions
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index 2a9cd74..076327f 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -8,9 +8,9 @@
  * On FSL-BookE we setup a 1:1 mapping which covers the first 2GiB of memory
  * and therefore we can only deal with memory within this range
  */
-#define KEXEC_SOURCE_MEMORY_LIMIT	(2 * 1024 * 1024 * 1024UL)
-#define KEXEC_DESTINATION_MEMORY_LIMIT	(2 * 1024 * 1024 * 1024UL)
-#define KEXEC_CONTROL_MEMORY_LIMIT	(2 * 1024 * 1024 * 1024UL)
+#define KEXEC_SOURCE_MEMORY_LIMIT	(2 * 1024 * 1024 * 1024UL - 1)
+#define KEXEC_DESTINATION_MEMORY_LIMIT	(2 * 1024 * 1024 * 1024UL - 1)
+#define KEXEC_CONTROL_MEMORY_LIMIT	(2 * 1024 * 1024 * 1024UL - 1)
 
 #else
 
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 6f74d93..8274a2d 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -115,7 +115,15 @@ extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu);
 extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte);
 extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr);
 extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu);
-extern struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool data);
+
+extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte);
+extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu);
+extern void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu);
+extern int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu);
+extern void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte);
+extern int kvmppc_mmu_hpte_sysinit(void);
+extern void kvmppc_mmu_hpte_sysexit(void);
+
 extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
 extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
 extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec);
diff --git a/arch/powerpc/include/asm/kvm_fpu.h b/arch/powerpc/include/asm/kvm_fpu.h
index 94f05de..c3d4f05 100644
--- a/arch/powerpc/include/asm/kvm_fpu.h
+++ b/arch/powerpc/include/asm/kvm_fpu.h
@@ -22,24 +22,24 @@
 
 #include <linux/types.h>
 
-extern void fps_fres(struct thread_struct *t, u32 *dst, u32 *src1);
-extern void fps_frsqrte(struct thread_struct *t, u32 *dst, u32 *src1);
-extern void fps_fsqrts(struct thread_struct *t, u32 *dst, u32 *src1);
+extern void fps_fres(u64 *fpscr, u32 *dst, u32 *src1);
+extern void fps_frsqrte(u64 *fpscr, u32 *dst, u32 *src1);
+extern void fps_fsqrts(u64 *fpscr, u32 *dst, u32 *src1);
 
-extern void fps_fadds(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2);
-extern void fps_fdivs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2);
-extern void fps_fmuls(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2);
-extern void fps_fsubs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2);
+extern void fps_fadds(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2);
+extern void fps_fdivs(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2);
+extern void fps_fmuls(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2);
+extern void fps_fsubs(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2);
 
-extern void fps_fmadds(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2,
+extern void fps_fmadds(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2,
 		       u32 *src3);
-extern void fps_fmsubs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2,
+extern void fps_fmsubs(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2,
 		       u32 *src3);
-extern void fps_fnmadds(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2,
+extern void fps_fnmadds(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2,
 		        u32 *src3);
-extern void fps_fnmsubs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2,
+extern void fps_fnmsubs(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2,
 		        u32 *src3);
-extern void fps_fsel(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2,
+extern void fps_fsel(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2,
 		     u32 *src3);
 
 #define FPD_ONE_IN(name) extern void fpd_ ## name(u64 *fpscr, u32 *cr, \
@@ -82,4 +82,7 @@ FPD_THREE_IN(fmadd)
 FPD_THREE_IN(fnmsub)
 FPD_THREE_IN(fnmadd)
 
+extern void kvm_cvt_fd(u32 *from, u64 *to, u64 *fpscr);
+extern void kvm_cvt_df(u64 *from, u32 *to, u64 *fpscr);
+
 #endif
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 0c9ad86..b0b23c0 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -35,10 +35,17 @@
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 
 /* We don't currently support large pages. */
+#define KVM_HPAGE_GFN_SHIFT(x)	0
 #define KVM_NR_PAGE_SIZES	1
 #define KVM_PAGES_PER_HPAGE(x)	(1UL<<31)
 
-#define HPTEG_CACHE_NUM 1024
+#define HPTEG_CACHE_NUM			(1 << 15)
+#define HPTEG_HASH_BITS_PTE		13
+#define HPTEG_HASH_BITS_VPTE		13
+#define HPTEG_HASH_BITS_VPTE_LONG	5
+#define HPTEG_HASH_NUM_PTE		(1 << HPTEG_HASH_BITS_PTE)
+#define HPTEG_HASH_NUM_VPTE		(1 << HPTEG_HASH_BITS_VPTE)
+#define HPTEG_HASH_NUM_VPTE_LONG	(1 << HPTEG_HASH_BITS_VPTE_LONG)
 
 struct kvm;
 struct kvm_run;
@@ -151,6 +158,9 @@ struct kvmppc_mmu {
 };
 
 struct hpte_cache {
+	struct hlist_node list_pte;
+	struct hlist_node list_vpte;
+	struct hlist_node list_vpte_long;
 	u64 host_va;
 	u64 pfn;
 	ulong slot;
@@ -282,8 +292,10 @@ struct kvm_vcpu_arch {
 	unsigned long pending_exceptions;
 
 #ifdef CONFIG_PPC_BOOK3S
-	struct hpte_cache hpte_cache[HPTEG_CACHE_NUM];
-	int hpte_cache_offset;
+	struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE];
+	struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE];
+	struct hlist_head hpte_hash_vpte_long[HPTEG_HASH_NUM_VPTE_LONG];
+	int hpte_cache_count;
 #endif
 };
 
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index 2102b21..0e398cf 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -250,7 +250,9 @@ extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
 int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 		     pte_t *ptep, unsigned long trap, int local, int ssize,
 		     unsigned int shift, unsigned int mmu_psize);
-
+extern void hash_failure_debug(unsigned long ea, unsigned long access,
+			       unsigned long vsid, unsigned long trap,
+			       int ssize, int psize, unsigned long pte);
 extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 			     unsigned long pstart, unsigned long prot,
 			     int psize, int ssize);
diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/system.h
index a6297c6..6c294ac 100644
--- a/arch/powerpc/include/asm/system.h
+++ b/arch/powerpc/include/asm/system.h
@@ -515,11 +515,8 @@ __cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new,
  * powers of 2 writes until it reaches sufficient alignment).
  *
  * Based on this we disable the IP header alignment in network drivers.
- * We also modify NET_SKB_PAD to be a cacheline in size, thus maintaining
- * cacheline alignment of buffers.
  */
 #define NET_IP_ALIGN	0
-#define NET_SKB_PAD	L1_CACHE_BYTES
 
 #define cmpxchg64(ptr, o, n)						\
   ({									\
diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c
index 369872f..babccee 100644
--- a/arch/powerpc/kernel/perf_event_fsl_emb.c
+++ b/arch/powerpc/kernel/perf_event_fsl_emb.c
@@ -566,9 +566,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 	 * Finally record data if requested.
 	 */
 	if (record) {
-		struct perf_sample_data data = {
-			.period	= event->hw.last_period,
-		};
+		struct perf_sample_data data;
+
+		perf_sample_data_init(&data, 0);
 
 		if (perf_event_overflow(event, nmi, &data, regs)) {
 			/*
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 3b4dcc8..ab3e392 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -101,10 +101,6 @@ EXPORT_SYMBOL(pci_dram_offset);
 EXPORT_SYMBOL(start_thread);
 EXPORT_SYMBOL(kernel_thread);
 
-#ifdef CONFIG_PPC_FPU
-EXPORT_SYMBOL_GPL(cvt_df);
-EXPORT_SYMBOL_GPL(cvt_fd);
-#endif
 EXPORT_SYMBOL(giveup_fpu);
 #ifdef CONFIG_ALTIVEC
 EXPORT_SYMBOL(giveup_altivec);
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 9d39539..fed9bf6 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -414,7 +414,7 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node)
 	u64 base, size, memblock_size;
 	unsigned int is_kexec_kdump = 0, rngs;
 
-	ls = of_get_flat_dt_prop(node, "ibm,memblock-size", &l);
+	ls = of_get_flat_dt_prop(node, "ibm,lmb-size", &l);
 	if (ls == NULL || l < dt_root_size_cells * sizeof(__be32))
 		return 0;
 	memblock_size = dt_mem_next_cell(dt_root_size_cells, &ls);
diff --git a/arch/powerpc/kernel/suspend.c b/arch/powerpc/kernel/suspend.c
index 6fc6328..0167d53 100644
--- a/arch/powerpc/kernel/suspend.c
+++ b/arch/powerpc/kernel/suspend.c
@@ -3,7 +3,7 @@
  *
  * Distribute under GPLv2
  *
- * Copyright (c) 2002 Pavel Machek <pavel@suse.cz>
+ * Copyright (c) 2002 Pavel Machek <pavel@ucw.cz>
  * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
  */
 
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index 8123125..9b9b5cd 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -316,7 +316,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
 	gfn = gpaddr >> PAGE_SHIFT;
 	new_page = gfn_to_page(vcpu->kvm, gfn);
 	if (is_error_page(new_page)) {
-		printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
+		printk(KERN_ERR "Couldn't get guest page for gfn %llx!\n",
+			(unsigned long long)gfn);
 		kvm_release_page_clean(new_page);
 		return;
 	}
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index ff43606..d45c818 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -45,6 +45,7 @@ kvm-book3s_64-objs := \
 	book3s.o \
 	book3s_emulate.o \
 	book3s_interrupts.o \
+	book3s_mmu_hpte.o \
 	book3s_64_mmu_host.o \
 	book3s_64_mmu.o \
 	book3s_32_mmu.o
@@ -57,6 +58,7 @@ kvm-book3s_32-objs := \
 	book3s.o \
 	book3s_emulate.o \
 	book3s_interrupts.o \
+	book3s_mmu_hpte.o \
 	book3s_32_mmu_host.o \
 	book3s_32_mmu.o
 kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index b998abf..a3cef30 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -1047,8 +1047,6 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
 	int i;
 
-	vcpu_load(vcpu);
-
 	regs->pc = kvmppc_get_pc(vcpu);
 	regs->cr = kvmppc_get_cr(vcpu);
 	regs->ctr = kvmppc_get_ctr(vcpu);
@@ -1069,8 +1067,6 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
 		regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
 
-	vcpu_put(vcpu);
-
 	return 0;
 }
 
@@ -1078,8 +1074,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
 	int i;
 
-	vcpu_load(vcpu);
-
 	kvmppc_set_pc(vcpu, regs->pc);
 	kvmppc_set_cr(vcpu, regs->cr);
 	kvmppc_set_ctr(vcpu, regs->ctr);
@@ -1099,8 +1093,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
 		kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
 
-	vcpu_put(vcpu);
-
 	return 0;
 }
 
@@ -1110,8 +1102,6 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
 	int i;
 
-	vcpu_load(vcpu);
-
 	sregs->pvr = vcpu->arch.pvr;
 
 	sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1;
@@ -1131,8 +1121,6 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 		}
 	}
 
-	vcpu_put(vcpu);
-
 	return 0;
 }
 
@@ -1142,8 +1130,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
 	int i;
 
-	vcpu_load(vcpu);
-
 	kvmppc_set_pvr(vcpu, sregs->pvr);
 
 	vcpu3s->sdr1 = sregs->u.s.sdr1;
@@ -1171,8 +1157,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	/* Flush the MMU after messing with the segments */
 	kvmppc_mmu_pte_flush(vcpu, 0, 0);
 
-	vcpu_put(vcpu);
-
 	return 0;
 }
 
@@ -1309,12 +1293,17 @@ extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
 int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
 	int ret;
-	struct thread_struct ext_bkp;
+	double fpr[32][TS_FPRWIDTH];
+	unsigned int fpscr;
+	int fpexc_mode;
 #ifdef CONFIG_ALTIVEC
-	bool save_vec = current->thread.used_vr;
+	vector128 vr[32];
+	vector128 vscr;
+	unsigned long uninitialized_var(vrsave);
+	int used_vr;
 #endif
 #ifdef CONFIG_VSX
-	bool save_vsx = current->thread.used_vsr;
+	int used_vsr;
 #endif
 	ulong ext_msr;
 
@@ -1327,27 +1316,27 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	/* Save FPU state in stack */
 	if (current->thread.regs->msr & MSR_FP)
 		giveup_fpu(current);
-	memcpy(ext_bkp.fpr, current->thread.fpr, sizeof(current->thread.fpr));
-	ext_bkp.fpscr = current->thread.fpscr;
-	ext_bkp.fpexc_mode = current->thread.fpexc_mode;
+	memcpy(fpr, current->thread.fpr, sizeof(current->thread.fpr));
+	fpscr = current->thread.fpscr.val;
+	fpexc_mode = current->thread.fpexc_mode;
 
 #ifdef CONFIG_ALTIVEC
 	/* Save Altivec state in stack */
-	if (save_vec) {
+	used_vr = current->thread.used_vr;
+	if (used_vr) {
 		if (current->thread.regs->msr & MSR_VEC)
 			giveup_altivec(current);
-		memcpy(ext_bkp.vr, current->thread.vr, sizeof(ext_bkp.vr));
-		ext_bkp.vscr = current->thread.vscr;
-		ext_bkp.vrsave = current->thread.vrsave;
+		memcpy(vr, current->thread.vr, sizeof(current->thread.vr));
+		vscr = current->thread.vscr;
+		vrsave = current->thread.vrsave;
 	}
-	ext_bkp.used_vr = current->thread.used_vr;
 #endif
 
 #ifdef CONFIG_VSX
 	/* Save VSX state in stack */
-	if (save_vsx && (current->thread.regs->msr & MSR_VSX))
+	used_vsr = current->thread.used_vsr;
+	if (used_vsr && (current->thread.regs->msr & MSR_VSX))
 			__giveup_vsx(current);
-	ext_bkp.used_vsr = current->thread.used_vsr;
 #endif
 
 	/* Remember the MSR with disabled extensions */
@@ -1372,22 +1361,22 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	kvmppc_giveup_ext(vcpu, MSR_VSX);
 
 	/* Restore FPU state from stack */
-	memcpy(current->thread.fpr, ext_bkp.fpr, sizeof(ext_bkp.fpr));
-	current->thread.fpscr = ext_bkp.fpscr;
-	current->thread.fpexc_mode = ext_bkp.fpexc_mode;
+	memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr));
+	current->thread.fpscr.val = fpscr;
+	current->thread.fpexc_mode = fpexc_mode;
 
 #ifdef CONFIG_ALTIVEC
 	/* Restore Altivec state from stack */
-	if (save_vec && current->thread.used_vr) {
-		memcpy(current->thread.vr, ext_bkp.vr, sizeof(ext_bkp.vr));
-		current->thread.vscr = ext_bkp.vscr;
-		current->thread.vrsave= ext_bkp.vrsave;
+	if (used_vr && current->thread.used_vr) {
+		memcpy(current->thread.vr, vr, sizeof(current->thread.vr));
+		current->thread.vscr = vscr;
+		current->thread.vrsave = vrsave;
 	}
-	current->thread.used_vr = ext_bkp.used_vr;
+	current->thread.used_vr = used_vr;
 #endif
 
 #ifdef CONFIG_VSX
-	current->thread.used_vsr = ext_bkp.used_vsr;
+	current->thread.used_vsr = used_vsr;
 #endif
 
 	return ret;
@@ -1395,12 +1384,22 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
 static int kvmppc_book3s_init(void)
 {
-	return kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0,
-			THIS_MODULE);
+	int r;
+
+	r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0,
+		     THIS_MODULE);
+
+	if (r)
+		return r;
+
+	r = kvmppc_mmu_hpte_sysinit();
+
+	return r;
 }
 
 static void kvmppc_book3s_exit(void)
 {
+	kvmppc_mmu_hpte_sysexit();
 	kvm_exit();
 }
 
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index 0b10503..3292d76 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -354,10 +354,10 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
 		*vsid = VSID_REAL_DR | gvsid;
 		break;
 	case MSR_DR|MSR_IR:
-		if (!sr->valid)
-			return -1;
-
-		*vsid = sr->vsid;
+		if (sr->valid)
+			*vsid = sr->vsid;
+		else
+			*vsid = VSID_BAT | gvsid;
 		break;
 	default:
 		BUG();
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index 0bb6600..0b51ef8 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/kvm_host.h>
+#include <linux/hash.h>
 
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
@@ -57,139 +58,26 @@
 static ulong htab;
 static u32 htabmask;
 
-static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
+void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
 {
 	volatile u32 *pteg;
 
-	dprintk_mmu("KVM: Flushing SPTE: 0x%llx (0x%llx) -> 0x%llx\n",
-		    pte->pte.eaddr, pte->pte.vpage, pte->host_va);
-
+	/* Remove from host HTAB */
 	pteg = (u32*)pte->slot;
-
 	pteg[0] = 0;
+
+	/* And make sure it's gone from the TLB too */
 	asm volatile ("sync");
 	asm volatile ("tlbie %0" : : "r" (pte->pte.eaddr) : "memory");
 	asm volatile ("sync");
 	asm volatile ("tlbsync");
-
-	pte->host_va = 0;
-
-	if (pte->pte.may_write)
-		kvm_release_pfn_dirty(pte->pfn);
-	else
-		kvm_release_pfn_clean(pte->pfn);
-}
-
-void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask)
-{
-	int i;
-
-	dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%x & 0x%x\n",
-		    vcpu->arch.hpte_cache_offset, guest_ea, ea_mask);
-	BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM);
-
-	guest_ea &= ea_mask;
-	for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) {
-		struct hpte_cache *pte;
-
-		pte = &vcpu->arch.hpte_cache[i];
-		if (!pte->host_va)
-			continue;
-
-		if ((pte->pte.eaddr & ea_mask) == guest_ea) {
-			invalidate_pte(vcpu, pte);
-		}
-	}
-
-	/* Doing a complete flush -> start from scratch */
-	if (!ea_mask)
-		vcpu->arch.hpte_cache_offset = 0;
-}
-
-void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask)
-{
-	int i;
-
-	dprintk_mmu("KVM: Flushing %d Shadow vPTEs: 0x%llx & 0x%llx\n",
-		    vcpu->arch.hpte_cache_offset, guest_vp, vp_mask);
-	BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM);
-
-	guest_vp &= vp_mask;
-	for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) {
-		struct hpte_cache *pte;
-
-		pte = &vcpu->arch.hpte_cache[i];
-		if (!pte->host_va)
-			continue;
-
-		if ((pte->pte.vpage & vp_mask) == guest_vp) {
-			invalidate_pte(vcpu, pte);
-		}
-	}
-}
-
-void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
-{
-	int i;
-
-	dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%llx & 0x%llx\n",
-		    vcpu->arch.hpte_cache_offset, pa_start, pa_end);
-	BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM);
-
-	for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) {
-		struct hpte_cache *pte;
-
-		pte = &vcpu->arch.hpte_cache[i];
-		if (!pte->host_va)
-			continue;
-
-		if ((pte->pte.raddr >= pa_start) &&
-		    (pte->pte.raddr < pa_end)) {
-			invalidate_pte(vcpu, pte);
-		}
-	}
-}
-
-struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool data)
-{
-	int i;
-	u64 guest_vp;
-
-	guest_vp = vcpu->arch.mmu.ea_to_vp(vcpu, ea, false);
-	for (i=0; i<vcpu->arch.hpte_cache_offset; i++) {
-		struct hpte_cache *pte;
-
-		pte = &vcpu->arch.hpte_cache[i];
-		if (!pte->host_va)
-			continue;
-
-		if (pte->pte.vpage == guest_vp)
-			return &pte->pte;
-	}
-
-	return NULL;
-}
-
-static int kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu)
-{
-	if (vcpu->arch.hpte_cache_offset == HPTEG_CACHE_NUM)
-		kvmppc_mmu_pte_flush(vcpu, 0, 0);
-
-	return vcpu->arch.hpte_cache_offset++;
 }
 
 /* We keep 512 gvsid->hvsid entries, mapping the guest ones to the array using
  * a hash, so we don't waste cycles on looping */
 static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid)
 {
-	return (u16)(((gvsid >> (SID_MAP_BITS * 7)) & SID_MAP_MASK) ^
-		     ((gvsid >> (SID_MAP_BITS * 6)) & SID_MAP_MASK) ^
-		     ((gvsid >> (SID_MAP_BITS * 5)) & SID_MAP_MASK) ^
-		     ((gvsid >> (SID_MAP_BITS * 4)) & SID_MAP_MASK) ^
-		     ((gvsid >> (SID_MAP_BITS * 3)) & SID_MAP_MASK) ^
-		     ((gvsid >> (SID_MAP_BITS * 2)) & SID_MAP_MASK) ^
-		     ((gvsid >> (SID_MAP_BITS * 1)) & SID_MAP_MASK) ^
-		     ((gvsid >> (SID_MAP_BITS * 0)) & SID_MAP_MASK));
+	return hash_64(gvsid, SID_MAP_BITS);
 }
 
 
@@ -256,7 +144,6 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
 	register int rr = 0;
 	bool primary = false;
 	bool evict = false;
-	int hpte_id;
 	struct hpte_cache *pte;
 
 	/* Get host physical address for gpa */
@@ -341,8 +228,7 @@ next_pteg:
 
 	/* Now tell our Shadow PTE code about the new page */
 
-	hpte_id = kvmppc_mmu_hpte_cache_next(vcpu);
-	pte = &vcpu->arch.hpte_cache[hpte_id];
+	pte = kvmppc_mmu_hpte_cache_next(vcpu);
 
 	dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%llx (0x%llx) -> %lx\n",
 		    orig_pte->may_write ? 'w' : '-',
@@ -355,6 +241,8 @@ next_pteg:
 	pte->pte = *orig_pte;
 	pte->pfn = hpaddr >> PAGE_SHIFT;
 
+	kvmppc_mmu_hpte_cache_map(vcpu, pte);
+
 	return 0;
 }
 
@@ -439,7 +327,7 @@ void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
 
 void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
 {
-	kvmppc_mmu_pte_flush(vcpu, 0, 0);
+	kvmppc_mmu_hpte_destroy(vcpu);
 	preempt_disable();
 	__destroy_context(to_book3s(vcpu)->context_id);
 	preempt_enable();
@@ -479,5 +367,7 @@ int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
 	htabmask = ((sdr1 & 0x1FF) << 16) | 0xFFC0;
 	htab = (ulong)__va(sdr1 & 0xffff0000);
 
+	kvmppc_mmu_hpte_init(vcpu);
+
 	return 0;
 }
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index e4b5744..384179a 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -20,6 +20,7 @@
  */
 
 #include <linux/kvm_host.h>
+#include <linux/hash.h>
 
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
@@ -46,135 +47,20 @@
 #define dprintk_slb(a, ...) do { } while(0)
 #endif
 
-static void invalidate_pte(struct hpte_cache *pte)
+void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
 {
-	dprintk_mmu("KVM: Flushing SPT: 0x%lx (0x%llx) -> 0x%llx\n",
-		    pte->pte.eaddr, pte->pte.vpage, pte->host_va);
-
 	ppc_md.hpte_invalidate(pte->slot, pte->host_va,
 			       MMU_PAGE_4K, MMU_SEGSIZE_256M,
 			       false);
-	pte->host_va = 0;
-
-	if (pte->pte.may_write)
-		kvm_release_pfn_dirty(pte->pfn);
-	else
-		kvm_release_pfn_clean(pte->pfn);
-}
-
-void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask)
-{
-	int i;
-
-	dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%lx & 0x%lx\n",
-		    vcpu->arch.hpte_cache_offset, guest_ea, ea_mask);
-	BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM);
-
-	guest_ea &= ea_mask;
-	for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) {
-		struct hpte_cache *pte;
-
-		pte = &vcpu->arch.hpte_cache[i];
-		if (!pte->host_va)
-			continue;
-
-		if ((pte->pte.eaddr & ea_mask) == guest_ea) {
-			invalidate_pte(pte);
-		}
-	}
-
-	/* Doing a complete flush -> start from scratch */
-	if (!ea_mask)
-		vcpu->arch.hpte_cache_offset = 0;
-}
-
-void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask)
-{
-	int i;
-
-	dprintk_mmu("KVM: Flushing %d Shadow vPTEs: 0x%llx & 0x%llx\n",
-		    vcpu->arch.hpte_cache_offset, guest_vp, vp_mask);
-	BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM);
-
-	guest_vp &= vp_mask;
-	for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) {
-		struct hpte_cache *pte;
-
-		pte = &vcpu->arch.hpte_cache[i];
-		if (!pte->host_va)
-			continue;
-
-		if ((pte->pte.vpage & vp_mask) == guest_vp) {
-			invalidate_pte(pte);
-		}
-	}
-}
-
-void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
-{
-	int i;
-
-	dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%lx & 0x%lx\n",
-		    vcpu->arch.hpte_cache_offset, pa_start, pa_end);
-	BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM);
-
-	for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) {
-		struct hpte_cache *pte;
-
-		pte = &vcpu->arch.hpte_cache[i];
-		if (!pte->host_va)
-			continue;
-
-		if ((pte->pte.raddr >= pa_start) &&
-		    (pte->pte.raddr < pa_end)) {
-			invalidate_pte(pte);
-		}
-	}
-}
-
-struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool data)
-{
-	int i;
-	u64 guest_vp;
-
-	guest_vp = vcpu->arch.mmu.ea_to_vp(vcpu, ea, false);
-	for (i=0; i<vcpu->arch.hpte_cache_offset; i++) {
-		struct hpte_cache *pte;
-
-		pte = &vcpu->arch.hpte_cache[i];
-		if (!pte->host_va)
-			continue;
-
-		if (pte->pte.vpage == guest_vp)
-			return &pte->pte;
-	}
-
-	return NULL;
-}
-
-static int kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu)
-{
-	if (vcpu->arch.hpte_cache_offset == HPTEG_CACHE_NUM)
-		kvmppc_mmu_pte_flush(vcpu, 0, 0);
-
-	return vcpu->arch.hpte_cache_offset++;
 }
 
 /* We keep 512 gvsid->hvsid entries, mapping the guest ones to the array using
  * a hash, so we don't waste cycles on looping */
 static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid)
 {
-	return (u16)(((gvsid >> (SID_MAP_BITS * 7)) & SID_MAP_MASK) ^
-		     ((gvsid >> (SID_MAP_BITS * 6)) & SID_MAP_MASK) ^
-		     ((gvsid >> (SID_MAP_BITS * 5)) & SID_MAP_MASK) ^
-		     ((gvsid >> (SID_MAP_BITS * 4)) & SID_MAP_MASK) ^
-		     ((gvsid >> (SID_MAP_BITS * 3)) & SID_MAP_MASK) ^
-		     ((gvsid >> (SID_MAP_BITS * 2)) & SID_MAP_MASK) ^
-		     ((gvsid >> (SID_MAP_BITS * 1)) & SID_MAP_MASK) ^
-		     ((gvsid >> (SID_MAP_BITS * 0)) & SID_MAP_MASK));
+	return hash_64(gvsid, SID_MAP_BITS);
 }
 
-
 static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
 {
 	struct kvmppc_sid_map *map;
@@ -273,8 +159,7 @@ map_again:
 		attempt++;
 		goto map_again;
 	} else {
-		int hpte_id = kvmppc_mmu_hpte_cache_next(vcpu);
-		struct hpte_cache *pte = &vcpu->arch.hpte_cache[hpte_id];
+		struct hpte_cache *pte = kvmppc_mmu_hpte_cache_next(vcpu);
 
 		dprintk_mmu("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx\n",
 			    ((rflags & HPTE_R_PP) == 3) ? '-' : 'w',
@@ -292,6 +177,8 @@ map_again:
 		pte->host_va = va;
 		pte->pte = *orig_pte;
 		pte->pfn = hpaddr >> PAGE_SHIFT;
+
+		kvmppc_mmu_hpte_cache_map(vcpu, pte);
 	}
 
 	return 0;
@@ -418,7 +305,7 @@ void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
 
 void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
 {
-	kvmppc_mmu_pte_flush(vcpu, 0, 0);
+	kvmppc_mmu_hpte_destroy(vcpu);
 	__destroy_context(to_book3s(vcpu)->context_id);
 }
 
@@ -436,5 +323,7 @@ int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
 	vcpu3s->vsid_first = vcpu3s->context_id << USER_ESID_BITS;
 	vcpu3s->vsid_next = vcpu3s->vsid_first;
 
+	kvmppc_mmu_hpte_init(vcpu);
+
 	return 0;
 }
diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c
new file mode 100644
index 0000000..4868d4a
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_mmu_hpte.c
@@ -0,0 +1,277 @@
+/*
+ * Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved.
+ *
+ * Authors:
+ *     Alexander Graf <agraf@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/hash.h>
+#include <linux/slab.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/machdep.h>
+#include <asm/mmu_context.h>
+#include <asm/hw_irq.h>
+
+#define PTE_SIZE	12
+
+/* #define DEBUG_MMU */
+
+#ifdef DEBUG_MMU
+#define dprintk_mmu(a, ...) printk(KERN_INFO a, __VA_ARGS__)
+#else
+#define dprintk_mmu(a, ...) do { } while(0)
+#endif
+
+static struct kmem_cache *hpte_cache;
+
+static inline u64 kvmppc_mmu_hash_pte(u64 eaddr)
+{
+	return hash_64(eaddr >> PTE_SIZE, HPTEG_HASH_BITS_PTE);
+}
+
+static inline u64 kvmppc_mmu_hash_vpte(u64 vpage)
+{
+	return hash_64(vpage & 0xfffffffffULL, HPTEG_HASH_BITS_VPTE);
+}
+
+static inline u64 kvmppc_mmu_hash_vpte_long(u64 vpage)
+{
+	return hash_64((vpage & 0xffffff000ULL) >> 12,
+		       HPTEG_HASH_BITS_VPTE_LONG);
+}
+
+void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
+{
+	u64 index;
+
+	/* Add to ePTE list */
+	index = kvmppc_mmu_hash_pte(pte->pte.eaddr);
+	hlist_add_head(&pte->list_pte, &vcpu->arch.hpte_hash_pte[index]);
+
+	/* Add to vPTE list */
+	index = kvmppc_mmu_hash_vpte(pte->pte.vpage);
+	hlist_add_head(&pte->list_vpte, &vcpu->arch.hpte_hash_vpte[index]);
+
+	/* Add to vPTE_long list */
+	index = kvmppc_mmu_hash_vpte_long(pte->pte.vpage);
+	hlist_add_head(&pte->list_vpte_long,
+		       &vcpu->arch.hpte_hash_vpte_long[index]);
+}
+
+static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
+{
+	dprintk_mmu("KVM: Flushing SPT: 0x%lx (0x%llx) -> 0x%llx\n",
+		    pte->pte.eaddr, pte->pte.vpage, pte->host_va);
+
+	/* Different for 32 and 64 bit */
+	kvmppc_mmu_invalidate_pte(vcpu, pte);
+
+	if (pte->pte.may_write)
+		kvm_release_pfn_dirty(pte->pfn);
+	else
+		kvm_release_pfn_clean(pte->pfn);
+
+	hlist_del(&pte->list_pte);
+	hlist_del(&pte->list_vpte);
+	hlist_del(&pte->list_vpte_long);
+
+	vcpu->arch.hpte_cache_count--;
+	kmem_cache_free(hpte_cache, pte);
+}
+
+static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu)
+{
+	struct hpte_cache *pte;
+	struct hlist_node *node, *tmp;
+	int i;
+
+	for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) {
+		struct hlist_head *list = &vcpu->arch.hpte_hash_vpte_long[i];
+
+		hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte_long)
+			invalidate_pte(vcpu, pte);
+	}
+}
+
+static void kvmppc_mmu_pte_flush_page(struct kvm_vcpu *vcpu, ulong guest_ea)
+{
+	struct hlist_head *list;
+	struct hlist_node *node, *tmp;
+	struct hpte_cache *pte;
+
+	/* Find the list of entries in the map */
+	list = &vcpu->arch.hpte_hash_pte[kvmppc_mmu_hash_pte(guest_ea)];
+
+	/* Check the list for matching entries and invalidate */
+	hlist_for_each_entry_safe(pte, node, tmp, list, list_pte)
+		if ((pte->pte.eaddr & ~0xfffUL) == guest_ea)
+			invalidate_pte(vcpu, pte);
+}
+
+void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask)
+{
+	u64 i;
+
+	dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%lx & 0x%lx\n",
+		    vcpu->arch.hpte_cache_count, guest_ea, ea_mask);
+
+	guest_ea &= ea_mask;
+
+	switch (ea_mask) {
+	case ~0xfffUL:
+		kvmppc_mmu_pte_flush_page(vcpu, guest_ea);
+		break;
+	case 0x0ffff000:
+		/* 32-bit flush w/o segment, go through all possible segments */
+		for (i = 0; i < 0x100000000ULL; i += 0x10000000ULL)
+			kvmppc_mmu_pte_flush(vcpu, guest_ea | i, ~0xfffUL);
+		break;
+	case 0:
+		/* Doing a complete flush -> start from scratch */
+		kvmppc_mmu_pte_flush_all(vcpu);
+		break;
+	default:
+		WARN_ON(1);
+		break;
+	}
+}
+
+/* Flush with mask 0xfffffffff */
+static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp)
+{
+	struct hlist_head *list;
+	struct hlist_node *node, *tmp;
+	struct hpte_cache *pte;
+	u64 vp_mask = 0xfffffffffULL;
+
+	list = &vcpu->arch.hpte_hash_vpte[kvmppc_mmu_hash_vpte(guest_vp)];
+
+	/* Check the list for matching entries and invalidate */
+	hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte)
+		if ((pte->pte.vpage & vp_mask) == guest_vp)
+			invalidate_pte(vcpu, pte);
+}
+
+/* Flush with mask 0xffffff000 */
+static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp)
+{
+	struct hlist_head *list;
+	struct hlist_node *node, *tmp;
+	struct hpte_cache *pte;
+	u64 vp_mask = 0xffffff000ULL;
+
+	list = &vcpu->arch.hpte_hash_vpte_long[
+		kvmppc_mmu_hash_vpte_long(guest_vp)];
+
+	/* Check the list for matching entries and invalidate */
+	hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte_long)
+		if ((pte->pte.vpage & vp_mask) == guest_vp)
+			invalidate_pte(vcpu, pte);
+}
+
+void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask)
+{
+	dprintk_mmu("KVM: Flushing %d Shadow vPTEs: 0x%llx & 0x%llx\n",
+		    vcpu->arch.hpte_cache_count, guest_vp, vp_mask);
+	guest_vp &= vp_mask;
+
+	switch(vp_mask) {
+	case 0xfffffffffULL:
+		kvmppc_mmu_pte_vflush_short(vcpu, guest_vp);
+		break;
+	case 0xffffff000ULL:
+		kvmppc_mmu_pte_vflush_long(vcpu, guest_vp);
+		break;
+	default:
+		WARN_ON(1);
+		return;
+	}
+}
+
+void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
+{
+	struct hlist_node *node, *tmp;
+	struct hpte_cache *pte;
+	int i;
+
+	dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%lx - 0x%lx\n",
+		    vcpu->arch.hpte_cache_count, pa_start, pa_end);
+
+	for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) {
+		struct hlist_head *list = &vcpu->arch.hpte_hash_vpte_long[i];
+
+		hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte_long)
+			if ((pte->pte.raddr >= pa_start) &&
+			    (pte->pte.raddr < pa_end))
+				invalidate_pte(vcpu, pte);
+	}
+}
+
+struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu)
+{
+	struct hpte_cache *pte;
+
+	pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL);
+	vcpu->arch.hpte_cache_count++;
+
+	if (vcpu->arch.hpte_cache_count == HPTEG_CACHE_NUM)
+		kvmppc_mmu_pte_flush_all(vcpu);
+
+	return pte;
+}
+
+void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu)
+{
+	kvmppc_mmu_pte_flush(vcpu, 0, 0);
+}
+
+static void kvmppc_mmu_hpte_init_hash(struct hlist_head *hash_list, int len)
+{
+	int i;
+
+	for (i = 0; i < len; i++)
+		INIT_HLIST_HEAD(&hash_list[i]);
+}
+
+int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu)
+{
+	/* init hpte lookup hashes */
+	kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_pte,
+				  ARRAY_SIZE(vcpu->arch.hpte_hash_pte));
+	kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_vpte,
+				  ARRAY_SIZE(vcpu->arch.hpte_hash_vpte));
+	kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_vpte_long,
+				  ARRAY_SIZE(vcpu->arch.hpte_hash_vpte_long));
+
+	return 0;
+}
+
+int kvmppc_mmu_hpte_sysinit(void)
+{
+	/* init hpte slab cache */
+	hpte_cache = kmem_cache_create("kvm-spt", sizeof(struct hpte_cache),
+				       sizeof(struct hpte_cache), 0, NULL);
+
+	return 0;
+}
+
+void kvmppc_mmu_hpte_sysexit(void)
+{
+	kmem_cache_destroy(hpte_cache);
+}
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c
index a9f66ab..474f2e2 100644
--- a/arch/powerpc/kvm/book3s_paired_singles.c
+++ b/arch/powerpc/kvm/book3s_paired_singles.c
@@ -159,10 +159,7 @@
 
 static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt)
 {
-	struct thread_struct t;
-
-	t.fpscr.val = vcpu->arch.fpscr;
-	cvt_df((double*)&vcpu->arch.fpr[rt], (float*)&vcpu->arch.qpr[rt], &t);
+	kvm_cvt_df(&vcpu->arch.fpr[rt], &vcpu->arch.qpr[rt], &vcpu->arch.fpscr);
 }
 
 static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store)
@@ -183,7 +180,6 @@ static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
 				   int rs, ulong addr, int ls_type)
 {
 	int emulated = EMULATE_FAIL;
-	struct thread_struct t;
 	int r;
 	char tmp[8];
 	int len = sizeof(u32);
@@ -191,8 +187,6 @@ static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	if (ls_type == FPU_LS_DOUBLE)
 		len = sizeof(u64);
 
-	t.fpscr.val = vcpu->arch.fpscr;
-
 	/* read from memory */
 	r = kvmppc_ld(vcpu, &addr, len, tmp, true);
 	vcpu->arch.paddr_accessed = addr;
@@ -210,7 +204,7 @@ static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	/* put in registers */
 	switch (ls_type) {
 	case FPU_LS_SINGLE:
-		cvt_fd((float*)tmp, (double*)&vcpu->arch.fpr[rs], &t);
+		kvm_cvt_fd((u32*)tmp, &vcpu->arch.fpr[rs], &vcpu->arch.fpscr);
 		vcpu->arch.qpr[rs] = *((u32*)tmp);
 		break;
 	case FPU_LS_DOUBLE:
@@ -229,17 +223,14 @@ static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 				    int rs, ulong addr, int ls_type)
 {
 	int emulated = EMULATE_FAIL;
-	struct thread_struct t;
 	int r;
 	char tmp[8];
 	u64 val;
 	int len;
 
-	t.fpscr.val = vcpu->arch.fpscr;
-
 	switch (ls_type) {
 	case FPU_LS_SINGLE:
-		cvt_df((double*)&vcpu->arch.fpr[rs], (float*)tmp, &t);
+		kvm_cvt_df(&vcpu->arch.fpr[rs], (u32*)tmp, &vcpu->arch.fpscr);
 		val = *((u32*)tmp);
 		len = sizeof(u32);
 		break;
@@ -278,13 +269,10 @@ static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
 				   int rs, ulong addr, bool w, int i)
 {
 	int emulated = EMULATE_FAIL;
-	struct thread_struct t;
 	int r;
 	float one = 1.0;
 	u32 tmp[2];
 
-	t.fpscr.val = vcpu->arch.fpscr;
-
 	/* read from memory */
 	if (w) {
 		r = kvmppc_ld(vcpu, &addr, sizeof(u32), tmp, true);
@@ -308,7 +296,7 @@ static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	emulated = EMULATE_DONE;
 
 	/* put in registers */
-	cvt_fd((float*)&tmp[0], (double*)&vcpu->arch.fpr[rs], &t);
+	kvm_cvt_fd(&tmp[0], &vcpu->arch.fpr[rs], &vcpu->arch.fpscr);
 	vcpu->arch.qpr[rs] = tmp[1];
 
 	dprintk(KERN_INFO "KVM: PSQ_LD [0x%x, 0x%x] at 0x%lx (%d)\n", tmp[0],
@@ -322,14 +310,11 @@ static int kvmppc_emulate_psq_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 				    int rs, ulong addr, bool w, int i)
 {
 	int emulated = EMULATE_FAIL;
-	struct thread_struct t;
 	int r;
 	u32 tmp[2];
 	int len = w ? sizeof(u32) : sizeof(u64);
 
-	t.fpscr.val = vcpu->arch.fpscr;
-
-	cvt_df((double*)&vcpu->arch.fpr[rs], (float*)&tmp[0], &t);
+	kvm_cvt_df(&vcpu->arch.fpr[rs], &tmp[0], &vcpu->arch.fpscr);
 	tmp[1] = vcpu->arch.qpr[rs];
 
 	r = kvmppc_st(vcpu, &addr, len, tmp, true);
@@ -517,7 +502,7 @@ static int get_d_signext(u32 inst)
 static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc,
 				      int reg_out, int reg_in1, int reg_in2,
 				      int reg_in3, int scalar,
-				      void (*func)(struct thread_struct *t,
+				      void (*func)(u64 *fpscr,
 						 u32 *dst, u32 *src1,
 						 u32 *src2, u32 *src3))
 {
@@ -526,27 +511,25 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc,
 	u32 ps0_out;
 	u32 ps0_in1, ps0_in2, ps0_in3;
 	u32 ps1_in1, ps1_in2, ps1_in3;
-	struct thread_struct t;
-	t.fpscr.val = vcpu->arch.fpscr;
 
 	/* RC */
 	WARN_ON(rc);
 
 	/* PS0 */
-	cvt_df((double*)&fpr[reg_in1], (float*)&ps0_in1, &t);
-	cvt_df((double*)&fpr[reg_in2], (float*)&ps0_in2, &t);
-	cvt_df((double*)&fpr[reg_in3], (float*)&ps0_in3, &t);
+	kvm_cvt_df(&fpr[reg_in1], &ps0_in1, &vcpu->arch.fpscr);
+	kvm_cvt_df(&fpr[reg_in2], &ps0_in2, &vcpu->arch.fpscr);
+	kvm_cvt_df(&fpr[reg_in3], &ps0_in3, &vcpu->arch.fpscr);
 
 	if (scalar & SCALAR_LOW)
 		ps0_in2 = qpr[reg_in2];
 
-	func(&t, &ps0_out, &ps0_in1, &ps0_in2, &ps0_in3);
+	func(&vcpu->arch.fpscr, &ps0_out, &ps0_in1, &ps0_in2, &ps0_in3);
 
 	dprintk(KERN_INFO "PS3 ps0 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n",
 			  ps0_in1, ps0_in2, ps0_in3, ps0_out);
 
 	if (!(scalar & SCALAR_NO_PS0))
-		cvt_fd((float*)&ps0_out, (double*)&fpr[reg_out], &t);
+		kvm_cvt_fd(&ps0_out, &fpr[reg_out], &vcpu->arch.fpscr);
 
 	/* PS1 */
 	ps1_in1 = qpr[reg_in1];
@@ -557,7 +540,7 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc,
 		ps1_in2 = ps0_in2;
 
 	if (!(scalar & SCALAR_NO_PS1))
-		func(&t, &qpr[reg_out], &ps1_in1, &ps1_in2, &ps1_in3);
+		func(&vcpu->arch.fpscr, &qpr[reg_out], &ps1_in1, &ps1_in2, &ps1_in3);
 
 	dprintk(KERN_INFO "PS3 ps1 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n",
 			  ps1_in1, ps1_in2, ps1_in3, qpr[reg_out]);
@@ -568,7 +551,7 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc,
 static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc,
 				    int reg_out, int reg_in1, int reg_in2,
 				    int scalar,
-				    void (*func)(struct thread_struct *t,
+				    void (*func)(u64 *fpscr,
 						 u32 *dst, u32 *src1,
 						 u32 *src2))
 {
@@ -578,27 +561,25 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc,
 	u32 ps0_in1, ps0_in2;
 	u32 ps1_out;
 	u32 ps1_in1, ps1_in2;
-	struct thread_struct t;
-	t.fpscr.val = vcpu->arch.fpscr;
 
 	/* RC */
 	WARN_ON(rc);
 
 	/* PS0 */
-	cvt_df((double*)&fpr[reg_in1], (float*)&ps0_in1, &t);
+	kvm_cvt_df(&fpr[reg_in1], &ps0_in1, &vcpu->arch.fpscr);
 
 	if (scalar & SCALAR_LOW)
 		ps0_in2 = qpr[reg_in2];
 	else
-		cvt_df((double*)&fpr[reg_in2], (float*)&ps0_in2, &t);
+		kvm_cvt_df(&fpr[reg_in2], &ps0_in2, &vcpu->arch.fpscr);
 
-	func(&t, &ps0_out, &ps0_in1, &ps0_in2);
+	func(&vcpu->arch.fpscr, &ps0_out, &ps0_in1, &ps0_in2);
 
 	if (!(scalar & SCALAR_NO_PS0)) {
 		dprintk(KERN_INFO "PS2 ps0 -> f(0x%x, 0x%x) = 0x%x\n",
 				  ps0_in1, ps0_in2, ps0_out);
 
-		cvt_fd((float*)&ps0_out, (double*)&fpr[reg_out], &t);
+		kvm_cvt_fd(&ps0_out, &fpr[reg_out], &vcpu->arch.fpscr);
 	}
 
 	/* PS1 */
@@ -608,7 +589,7 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc,
 	if (scalar & SCALAR_HIGH)
 		ps1_in2 = ps0_in2;
 
-	func(&t, &ps1_out, &ps1_in1, &ps1_in2);
+	func(&vcpu->arch.fpscr, &ps1_out, &ps1_in1, &ps1_in2);
 
 	if (!(scalar & SCALAR_NO_PS1)) {
 		qpr[reg_out] = ps1_out;
@@ -622,31 +603,29 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc,
 
 static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc,
 				    int reg_out, int reg_in,
-				    void (*func)(struct thread_struct *t,
+				    void (*func)(u64 *t,
 						 u32 *dst, u32 *src1))
 {
 	u32 *qpr = vcpu->arch.qpr;
 	u64 *fpr = vcpu->arch.fpr;
 	u32 ps0_out, ps0_in;
 	u32 ps1_in;
-	struct thread_struct t;
-	t.fpscr.val = vcpu->arch.fpscr;
 
 	/* RC */
 	WARN_ON(rc);
 
 	/* PS0 */
-	cvt_df((double*)&fpr[reg_in], (float*)&ps0_in, &t);
-	func(&t, &ps0_out, &ps0_in);
+	kvm_cvt_df(&fpr[reg_in], &ps0_in, &vcpu->arch.fpscr);
+	func(&vcpu->arch.fpscr, &ps0_out, &ps0_in);
 
 	dprintk(KERN_INFO "PS1 ps0 -> f(0x%x) = 0x%x\n",
 			  ps0_in, ps0_out);
 
-	cvt_fd((float*)&ps0_out, (double*)&fpr[reg_out], &t);
+	kvm_cvt_fd(&ps0_out, &fpr[reg_out], &vcpu->arch.fpscr);
 
 	/* PS1 */
 	ps1_in = qpr[reg_in];
-	func(&t, &qpr[reg_out], &ps1_in);
+	func(&vcpu->arch.fpscr, &qpr[reg_out], &ps1_in);
 
 	dprintk(KERN_INFO "PS1 ps1 -> f(0x%x) = 0x%x\n",
 			  ps1_in, qpr[reg_out]);
@@ -672,13 +651,10 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
 	bool rcomp = (inst & 1) ? true : false;
 	u32 cr = kvmppc_get_cr(vcpu);
-	struct thread_struct t;
 #ifdef DEBUG
 	int i;
 #endif
 
-	t.fpscr.val = vcpu->arch.fpscr;
-
 	if (!kvmppc_inst_is_paired_single(vcpu, inst))
 		return EMULATE_FAIL;
 
@@ -695,7 +671,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
 #ifdef DEBUG
 	for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) {
 		u32 f;
-		cvt_df((double*)&vcpu->arch.fpr[i], (float*)&f, &t);
+		kvm_cvt_df(&vcpu->arch.fpr[i], &f, &vcpu->arch.fpscr);
 		dprintk(KERN_INFO "FPR[%d] = 0x%x / 0x%llx    QPR[%d] = 0x%x\n",
 			i, f, vcpu->arch.fpr[i], i, vcpu->arch.qpr[i]);
 	}
@@ -819,8 +795,9 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			WARN_ON(rcomp);
 			vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra];
 			/* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */
-			cvt_df((double*)&vcpu->arch.fpr[ax_rb],
-			       (float*)&vcpu->arch.qpr[ax_rd], &t);
+			kvm_cvt_df(&vcpu->arch.fpr[ax_rb],
+				   &vcpu->arch.qpr[ax_rd],
+				   &vcpu->arch.fpscr);
 			break;
 		case OP_4X_PS_MERGE01:
 			WARN_ON(rcomp);
@@ -830,17 +807,20 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
 		case OP_4X_PS_MERGE10:
 			WARN_ON(rcomp);
 			/* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */
-			cvt_fd((float*)&vcpu->arch.qpr[ax_ra],
-			       (double*)&vcpu->arch.fpr[ax_rd], &t);
+			kvm_cvt_fd(&vcpu->arch.qpr[ax_ra],
+				   &vcpu->arch.fpr[ax_rd],
+				   &vcpu->arch.fpscr);
 			/* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */
-			cvt_df((double*)&vcpu->arch.fpr[ax_rb],
-			       (float*)&vcpu->arch.qpr[ax_rd], &t);
+			kvm_cvt_df(&vcpu->arch.fpr[ax_rb],
+				   &vcpu->arch.qpr[ax_rd],
+				   &vcpu->arch.fpscr);
 			break;
 		case OP_4X_PS_MERGE11:
 			WARN_ON(rcomp);
 			/* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */
-			cvt_fd((float*)&vcpu->arch.qpr[ax_ra],
-			       (double*)&vcpu->arch.fpr[ax_rd], &t);
+			kvm_cvt_fd(&vcpu->arch.qpr[ax_ra],
+				   &vcpu->arch.fpr[ax_rd],
+				   &vcpu->arch.fpscr);
 			vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
 			break;
 		}
@@ -1275,7 +1255,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
 #ifdef DEBUG
 	for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) {
 		u32 f;
-		cvt_df((double*)&vcpu->arch.fpr[i], (float*)&f, &t);
+		kvm_cvt_df(&vcpu->arch.fpr[i], &f, &vcpu->arch.fpscr);
 		dprintk(KERN_INFO "FPR[%d] = 0x%x\n", i, f);
 	}
 #endif
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index a33ab8c..8d4e35f 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -144,7 +144,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
                                         unsigned int priority)
 {
 	int allowed = 0;
-	ulong msr_mask;
+	ulong uninitialized_var(msr_mask);
 	bool update_esr = false, update_dear = false;
 
 	switch (priority) {
@@ -485,8 +485,6 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
 	int i;
 
-	vcpu_load(vcpu);
-
 	regs->pc = vcpu->arch.pc;
 	regs->cr = kvmppc_get_cr(vcpu);
 	regs->ctr = vcpu->arch.ctr;
@@ -507,8 +505,6 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
 		regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
 
-	vcpu_put(vcpu);
-
 	return 0;
 }
 
@@ -516,8 +512,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
 	int i;
 
-	vcpu_load(vcpu);
-
 	vcpu->arch.pc = regs->pc;
 	kvmppc_set_cr(vcpu, regs->cr);
 	vcpu->arch.ctr = regs->ctr;
@@ -537,8 +531,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
 		kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
 
-	vcpu_put(vcpu);
-
 	return 0;
 }
 
@@ -569,9 +561,7 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 {
 	int r;
 
-	vcpu_load(vcpu);
 	r = kvmppc_core_vcpu_translate(vcpu, tr);
-	vcpu_put(vcpu);
 	return r;
 }
 
diff --git a/arch/powerpc/kvm/fpu.S b/arch/powerpc/kvm/fpu.S
index 2b340a3..cb34bbe 100644
--- a/arch/powerpc/kvm/fpu.S
+++ b/arch/powerpc/kvm/fpu.S
@@ -271,3 +271,21 @@ FPD_THREE_IN(fmsub)
 FPD_THREE_IN(fmadd)
 FPD_THREE_IN(fnmsub)
 FPD_THREE_IN(fnmadd)
+
+_GLOBAL(kvm_cvt_fd)
+	lfd	0,0(r5)			/* load up fpscr value */
+	MTFSF_L(0)
+	lfs	0,0(r3)
+	stfd	0,0(r4)
+	mffs	0
+	stfd	0,0(r5)			/* save new fpscr value */
+	blr
+
+_GLOBAL(kvm_cvt_df)
+	lfd	0,0(r5)			/* load up fpscr value */
+	MTFSF_L(0)
+	lfd	0,0(r3)
+	stfs	0,0(r4)
+	mffs	0
+	stfd	0,0(r5)			/* save new fpscr value */
+	blr
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 9b8683f..72a4ad8 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -36,11 +36,6 @@
 #define CREATE_TRACE_POINTS
 #include "trace.h"
 
-gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
-{
-	return gfn;
-}
-
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 {
 	return !(v->arch.msr & MSR_WE) || !!(v->arch.pending_exceptions);
@@ -287,7 +282,7 @@ static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
 static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
                                       struct kvm_run *run)
 {
-	u64 gpr;
+	u64 uninitialized_var(gpr);
 
 	if (run->mmio.len > sizeof(gpr)) {
 		printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len);
@@ -423,8 +418,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	int r;
 	sigset_t sigsaved;
 
-	vcpu_load(vcpu);
-
 	if (vcpu->sigset_active)
 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
 
@@ -456,8 +449,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	if (vcpu->sigset_active)
 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
 
-	vcpu_put(vcpu);
-
 	return r;
 }
 
@@ -523,8 +514,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		if (copy_from_user(&irq, argp, sizeof(irq)))
 			goto out;
 		r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
-		break;
+		goto out;
 	}
+
 	case KVM_ENABLE_CAP:
 	{
 		struct kvm_enable_cap cap;
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index a719f53..3079f6b 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -68,9 +68,6 @@ _GLOBAL(__hash_page_4K)
 	std	r8,STK_PARM(r8)(r1)
 	std	r9,STK_PARM(r9)(r1)
 	
-	/* Add _PAGE_PRESENT to access */
-	ori	r4,r4,_PAGE_PRESENT
-
 	/* Save non-volatile registers.
 	 * r31 will hold "old PTE"
 	 * r30 is "new PTE"
@@ -347,9 +344,6 @@ _GLOBAL(__hash_page_4K)
 	std	r8,STK_PARM(r8)(r1)
 	std	r9,STK_PARM(r9)(r1)
 
-	/* Add _PAGE_PRESENT to access */
-	ori	r4,r4,_PAGE_PRESENT
-
 	/* Save non-volatile registers.
 	 * r31 will hold "old PTE"
 	 * r30 is "new PTE"
@@ -687,9 +681,6 @@ _GLOBAL(__hash_page_64K)
 	std	r8,STK_PARM(r8)(r1)
 	std	r9,STK_PARM(r9)(r1)
 
-	/* Add _PAGE_PRESENT to access */
-	ori	r4,r4,_PAGE_PRESENT
-
 	/* Save non-volatile registers.
 	 * r31 will hold "old PTE"
 	 * r30 is "new PTE"
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 98f262d..09dffe6 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -871,6 +871,18 @@ static inline int subpage_protection(struct mm_struct *mm, unsigned long ea)
 }
 #endif
 
+void hash_failure_debug(unsigned long ea, unsigned long access,
+			unsigned long vsid, unsigned long trap,
+			int ssize, int psize, unsigned long pte)
+{
+	if (!printk_ratelimit())
+		return;
+	pr_info("mm: Hashing failure ! EA=0x%lx access=0x%lx current=%s\n",
+		ea, access, current->comm);
+	pr_info("    trap=0x%lx vsid=0x%lx ssize=%d psize=%d pte=0x%lx\n",
+		trap, vsid, ssize, psize, pte);
+}
+
 /* Result code is:
  *  0 - handled
  *  1 - normal page fault
@@ -955,6 +967,17 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
 		return 1;
 	}
 
+	/* Add _PAGE_PRESENT to the required access perm */
+	access |= _PAGE_PRESENT;
+
+	/* Pre-check access permissions (will be re-checked atomically
+	 * in __hash_page_XX but this pre-check is a fast path
+	 */
+	if (access & ~pte_val(*ptep)) {
+		DBG_LOW(" no access !\n");
+		return 1;
+	}
+
 #ifdef CONFIG_HUGETLB_PAGE
 	if (hugeshift)
 		return __hash_page_huge(ea, access, vsid, ptep, trap, local,
@@ -967,14 +990,6 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
 	DBG_LOW(" i-pte: %016lx %016lx\n", pte_val(*ptep),
 		pte_val(*(ptep + PTRS_PER_PTE)));
 #endif
-	/* Pre-check access permissions (will be re-checked atomically
-	 * in __hash_page_XX but this pre-check is a fast path
-	 */
-	if (access & ~pte_val(*ptep)) {
-		DBG_LOW(" no access !\n");
-		return 1;
-	}
-
 	/* Do actual hashing */
 #ifdef CONFIG_PPC_64K_PAGES
 	/* If _PAGE_4K_PFN is set, make sure this is a 4k segment */
@@ -1033,6 +1048,12 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
 					    local, ssize, spp);
 	}
 
+	/* Dump some info in case of hash insertion failure, they should
+	 * never happen so it is really useful to know if/when they do
+	 */
+	if (rc == -1)
+		hash_failure_debug(ea, access, vsid, trap, ssize, psize,
+				   pte_val(*ptep));
 #ifndef CONFIG_PPC_64K_PAGES
 	DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep));
 #else
@@ -1051,8 +1072,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
 	void *pgdir;
 	pte_t *ptep;
 	unsigned long flags;
-	int local = 0;
-	int ssize;
+	int rc, ssize, local = 0;
 
 	BUG_ON(REGION_ID(ea) != USER_REGION_ID);
 
@@ -1098,11 +1118,18 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
 	/* Hash it in */
 #ifdef CONFIG_PPC_HAS_HASH_64K
 	if (mm->context.user_psize == MMU_PAGE_64K)
-		__hash_page_64K(ea, access, vsid, ptep, trap, local, ssize);
+		rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize);
 	else
 #endif /* CONFIG_PPC_HAS_HASH_64K */
-		__hash_page_4K(ea, access, vsid, ptep, trap, local, ssize,
-			       subpage_protection(pgdir, ea));
+		rc = __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize,
+				    subpage_protection(pgdir, ea));
+
+	/* Dump some info in case of hash insertion failure, they should
+	 * never happen so it is really useful to know if/when they do
+	 */
+	if (rc == -1)
+		hash_failure_debug(ea, access, vsid, trap, ssize,
+				   mm->context.user_psize, pte_val(*ptep));
 
 	local_irq_restore(flags);
 }
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index 1995398..cc5c273 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -21,21 +21,13 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 	unsigned long old_pte, new_pte;
 	unsigned long va, rflags, pa, sz;
 	long slot;
-	int err = 1;
 
 	BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
 
 	/* Search the Linux page table for a match with va */
 	va = hpt_va(ea, vsid, ssize);
 
-	/*
-	 * Check the user's access rights to the page.  If access should be
-	 * prevented then send the problem up to do_page_fault.
-	 */
-	if (unlikely(access & ~pte_val(*ptep)))
-		goto out;
-	/*
-	 * At this point, we have a pte (old_pte) which can be used to build
+	/* At this point, we have a pte (old_pte) which can be used to build
 	 * or update an HPTE. There are 2 cases:
 	 *
 	 * 1. There is a valid (present) pte with no associated HPTE (this is
@@ -49,9 +41,17 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 
 	do {
 		old_pte = pte_val(*ptep);
-		if (old_pte & _PAGE_BUSY)
-			goto out;
+		/* If PTE busy, retry the access */
+		if (unlikely(old_pte & _PAGE_BUSY))
+			return 0;
+		/* If PTE permissions don't match, take page fault */
+		if (unlikely(access & ~old_pte))
+			return 1;
+		/* Try to lock the PTE, add ACCESSED and DIRTY if it was
+		 * a write access */
 		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
+		if (access & _PAGE_RW)
+			new_pte |= _PAGE_DIRTY;
 	} while(old_pte != __cmpxchg_u64((unsigned long *)ptep,
 					 old_pte, new_pte));
 
@@ -121,8 +121,16 @@ repeat:
                         }
 		}
 
-		if (unlikely(slot == -2))
-			panic("hash_huge_page: pte_insert failed\n");
+		/*
+		 * Hypervisor failure. Restore old pte and return -1
+		 * similar to __hash_page_*
+		 */
+		if (unlikely(slot == -2)) {
+			*ptep = __pte(old_pte);
+			hash_failure_debug(ea, access, vsid, trap, ssize,
+					   mmu_psize, old_pte);
+			return -1;
+		}
 
 		new_pte |= (slot << 12) & (_PAGE_F_SECOND | _PAGE_F_GIX);
 	}
@@ -131,9 +139,5 @@ repeat:
 	 * No need to use ldarx/stdcx here
 	 */
 	*ptep = __pte(new_pte & ~_PAGE_BUSY);
-
-	err = 0;
-
- out:
-	return err;
+	return 0;
 }
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 338c6f3..002878c 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -454,15 +454,15 @@ static int of_get_drconf_memory(struct device_node *memory, const u32 **dm)
 }
 
 /*
- * Retreive and validate the ibm,memblock-size property for drconf memory
+ * Retreive and validate the ibm,lmb-size property for drconf memory
  * from the device tree.
  */
-static u64 of_get_memblock_size(struct device_node *memory)
+static u64 of_get_lmb_size(struct device_node *memory)
 {
 	const u32 *prop;
 	u32 len;
 
-	prop = of_get_property(memory, "ibm,memblock-size", &len);
+	prop = of_get_property(memory, "ibm,lmb-size", &len);
 	if (!prop || len < sizeof(unsigned int))
 		return 0;
 
@@ -618,7 +618,7 @@ static unsigned long __init numa_enforce_memory_limit(unsigned long start,
 static inline int __init read_usm_ranges(const u32 **usm)
 {
 	/*
-	 * For each memblock in ibm,dynamic-memory a corresponding
+	 * For each lmb in ibm,dynamic-memory a corresponding
 	 * entry in linux,drconf-usable-memory property contains
 	 * a counter followed by that many (base, size) duple.
 	 * read the counter from linux,drconf-usable-memory
@@ -634,7 +634,7 @@ static void __init parse_drconf_memory(struct device_node *memory)
 {
 	const u32 *dm, *usm;
 	unsigned int n, rc, ranges, is_kexec_kdump = 0;
-	unsigned long memblock_size, base, size, sz;
+	unsigned long lmb_size, base, size, sz;
 	int nid;
 	struct assoc_arrays aa;
 
@@ -642,8 +642,8 @@ static void __init parse_drconf_memory(struct device_node *memory)
 	if (!n)
 		return;
 
-	memblock_size = of_get_memblock_size(memory);
-	if (!memblock_size)
+	lmb_size = of_get_lmb_size(memory);
+	if (!lmb_size)
 		return;
 
 	rc = of_get_assoc_arrays(memory, &aa);
@@ -667,7 +667,7 @@ static void __init parse_drconf_memory(struct device_node *memory)
 			continue;
 
 		base = drmem.base_addr;
-		size = memblock_size;
+		size = lmb_size;
 		ranges = 1;
 
 		if (is_kexec_kdump) {
@@ -1128,7 +1128,7 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory,
 {
 	const u32 *dm;
 	unsigned int drconf_cell_cnt, rc;
-	unsigned long memblock_size;
+	unsigned long lmb_size;
 	struct assoc_arrays aa;
 	int nid = -1;
 
@@ -1136,8 +1136,8 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory,
 	if (!drconf_cell_cnt)
 		return -1;
 
-	memblock_size = of_get_memblock_size(memory);
-	if (!memblock_size)
+	lmb_size = of_get_lmb_size(memory);
+	if (!lmb_size)
 		return -1;
 
 	rc = of_get_assoc_arrays(memory, &aa);
@@ -1156,7 +1156,7 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory,
 			continue;
 
 		if ((scn_addr < drmem.base_addr)
-		    || (scn_addr >= (drmem.base_addr + memblock_size)))
+		    || (scn_addr >= (drmem.base_addr + lmb_size)))
 			continue;
 
 		nid = of_drconf_to_nid_single(&drmem, &aa);
diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c
index 2c0ed87..3124cf7 100644
--- a/arch/powerpc/platforms/ps3/htab.c
+++ b/arch/powerpc/platforms/ps3/htab.c
@@ -136,7 +136,7 @@ static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp,
 	 * As lv1_read_htab_entries() does not give us the RPN, we can
 	 * not synthesize the new hpte_r value here, and therefore can
 	 * not update the hpte with lv1_insert_htab_entry(), so we
-	 * insted invalidate it and ask the caller to update it via
+	 * instead invalidate it and ask the caller to update it via
 	 * ps3_hpte_insert() by returning a -1 value.
 	 */
 	if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index deab5f9..bc88036 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -69,7 +69,7 @@ static int pseries_remove_memory(struct device_node *np)
 	const char *type;
 	const unsigned int *regs;
 	unsigned long base;
-	unsigned int memblock_size;
+	unsigned int lmb_size;
 	int ret = -EINVAL;
 
 	/*
@@ -87,9 +87,9 @@ static int pseries_remove_memory(struct device_node *np)
 		return ret;
 
 	base = *(unsigned long *)regs;
-	memblock_size = regs[3];
+	lmb_size = regs[3];
 
-	ret = pseries_remove_memblock(base, memblock_size);
+	ret = pseries_remove_memblock(base, lmb_size);
 	return ret;
 }
 
@@ -98,7 +98,7 @@ static int pseries_add_memory(struct device_node *np)
 	const char *type;
 	const unsigned int *regs;
 	unsigned long base;
-	unsigned int memblock_size;
+	unsigned int lmb_size;
 	int ret = -EINVAL;
 
 	/*
@@ -116,36 +116,36 @@ static int pseries_add_memory(struct device_node *np)
 		return ret;
 
 	base = *(unsigned long *)regs;
-	memblock_size = regs[3];
+	lmb_size = regs[3];
 
 	/*
 	 * Update memory region to represent the memory add
 	 */
-	ret = memblock_add(base, memblock_size);
+	ret = memblock_add(base, lmb_size);
 	return (ret < 0) ? -EINVAL : 0;
 }
 
 static int pseries_drconf_memory(unsigned long *base, unsigned int action)
 {
 	struct device_node *np;
-	const unsigned long *memblock_size;
+	const unsigned long *lmb_size;
 	int rc;
 
 	np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
 	if (!np)
 		return -EINVAL;
 
-	memblock_size = of_get_property(np, "ibm,memblock-size", NULL);
-	if (!memblock_size) {
+	lmb_size = of_get_property(np, "ibm,lmb-size", NULL);
+	if (!lmb_size) {
 		of_node_put(np);
 		return -EINVAL;
 	}
 
 	if (action == PSERIES_DRCONF_MEM_ADD) {
-		rc = memblock_add(*base, *memblock_size);
+		rc = memblock_add(*base, *lmb_size);
 		rc = (rc < 0) ? -EINVAL : 0;
 	} else if (action == PSERIES_DRCONF_MEM_REMOVE) {
-		rc = pseries_remove_memblock(*base, *memblock_size);
+		rc = pseries_remove_memblock(*base, *lmb_size);
 	} else {
 		rc = -EINVAL;
 	}