From 42bdf991f4cad9678ee2b98c5c2e9299a3f986ef Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Mon, 15 Apr 2013 23:30:13 -0300
Subject: KVM: x86: fix maintenance of guest/host xcr0 state

Emulation of xcr0 writes zero guest_xcr0_loaded variable so that
subsequent VM-entry reloads CPU's xcr0 with guests xcr0 value.

However, this is incorrect because guest_xcr0_loaded variable is
read to decide whether to reload hosts xcr0.

In case the vcpu thread is scheduled out after the guest_xcr0_loaded = 0
assignment, and scheduler decides to preload FPU:

switch_to
{
  __switch_to
    __math_state_restore
      restore_fpu_checking
        fpu_restore_checking
          if (use_xsave())
              fpu_xrstor_checking
		xrstor64 with CPU's xcr0 == guests xcr0

Fix by properly restoring hosts xcr0 during emulation of xcr0 writes.

Analyzed-by: Ulrich Obergfell <uobergfe@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/x86.c | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 05a8b1a..094b5d9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -555,6 +555,25 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
 }
 EXPORT_SYMBOL_GPL(kvm_lmsw);
 
+static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
+{
+	if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
+			!vcpu->guest_xcr0_loaded) {
+		/* kvm_set_xcr() also depends on this */
+		xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
+		vcpu->guest_xcr0_loaded = 1;
+	}
+}
+
+static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->guest_xcr0_loaded) {
+		if (vcpu->arch.xcr0 != host_xcr0)
+			xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
+		vcpu->guest_xcr0_loaded = 0;
+	}
+}
+
 int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
 {
 	u64 xcr0;
@@ -571,8 +590,8 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
 		return 1;
 	if (xcr0 & ~host_xcr0)
 		return 1;
+	kvm_put_guest_xcr0(vcpu);
 	vcpu->arch.xcr0 = xcr0;
-	vcpu->guest_xcr0_loaded = 0;
 	return 0;
 }
 
@@ -5614,25 +5633,6 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
 	}
 }
 
-static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
-{
-	if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
-			!vcpu->guest_xcr0_loaded) {
-		/* kvm_set_xcr() also depends on this */
-		xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
-		vcpu->guest_xcr0_loaded = 1;
-	}
-}
-
-static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
-{
-	if (vcpu->guest_xcr0_loaded) {
-		if (vcpu->arch.xcr0 != host_xcr0)
-			xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
-		vcpu->guest_xcr0_loaded = 0;
-	}
-}
-
 static void process_nmi(struct kvm_vcpu *vcpu)
 {
 	unsigned limit = 2;
-- 
cgit v1.1


From 7dac16c379a876e256bc7349cd80007e7f9f2b59 Mon Sep 17 00:00:00 2001
From: Asias He <asias@redhat.com>
Date: Wed, 8 May 2013 10:57:29 +0800
Subject: KVM: Fix kvm_irqfd_init initialization

In commit a0f155e96 'KVM: Initialize irqfd from kvm_init()', when
kvm_init() is called the second time (e.g kvm-amd.ko and kvm-intel.ko),
kvm_arch_init() will fail with -EEXIST, then kvm_irqfd_exit() will be
called on the error handling path. This way, the kvm_irqfd system will
not be ready.

This patch fix the following:

BUG: unable to handle kernel NULL pointer dereference at           (null)
IP: [<ffffffff81c0721e>] _raw_spin_lock+0xe/0x30
PGD 0
Oops: 0002 [#1] SMP
Modules linked in: vhost_net
CPU 6
Pid: 4257, comm: qemu-system-x86 Not tainted 3.9.0-rc3+ #757 Dell Inc. OptiPlex 790/0V5HMK
RIP: 0010:[<ffffffff81c0721e>]  [<ffffffff81c0721e>] _raw_spin_lock+0xe/0x30
RSP: 0018:ffff880221721cc8  EFLAGS: 00010046
RAX: 0000000000000100 RBX: ffff88022dcc003f RCX: ffff880221734950
RDX: ffff8802208f6ca8 RSI: 000000007fffffff RDI: 0000000000000000
RBP: ffff880221721cc8 R08: 0000000000000002 R09: 0000000000000002
R10: 00007f7fd01087e0 R11: 0000000000000246 R12: ffff8802208f6ca8
R13: 0000000000000080 R14: ffff880223e2a900 R15: 0000000000000000
FS:  00007f7fd38488e0(0000) GS:ffff88022dcc0000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000000 CR3: 000000022309f000 CR4: 00000000000427e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process qemu-system-x86 (pid: 4257, threadinfo ffff880221720000, task ffff880222bd5640)
Stack:
 ffff880221721d08 ffffffff810ac5c5 ffff88022431dc00 0000000000000086
 0000000000000080 ffff880223e2a900 ffff8802208f6ca8 0000000000000000
 ffff880221721d48 ffffffff810ac8fe 0000000000000000 ffff880221734000
Call Trace:
 [<ffffffff810ac5c5>] __queue_work+0x45/0x2d0
 [<ffffffff810ac8fe>] queue_work_on+0x8e/0xa0
 [<ffffffff810ac949>] queue_work+0x19/0x20
 [<ffffffff81009b6b>] irqfd_deactivate+0x4b/0x60
 [<ffffffff8100a69d>] kvm_irqfd+0x39d/0x580
 [<ffffffff81007a27>] kvm_vm_ioctl+0x207/0x5b0
 [<ffffffff810c9545>] ? update_curr+0xf5/0x180
 [<ffffffff811b66e8>] do_vfs_ioctl+0x98/0x550
 [<ffffffff810c1f5e>] ? finish_task_switch+0x4e/0xe0
 [<ffffffff81c054aa>] ? __schedule+0x2ea/0x710
 [<ffffffff811b6bf7>] sys_ioctl+0x57/0x90
 [<ffffffff8140ae9e>] ? trace_hardirqs_on_thunk+0x3a/0x3c
 [<ffffffff81c0f602>] system_call_fastpath+0x16/0x1b
Code: c1 ea 08 38 c2 74 0f 66 0f 1f 44 00 00 f3 90 0f b6 03 38 c2 75 f7 48 83 c4 08 5b c9 c3 55 48 89 e5 66 66 66 66 90 b8 00 01 00 00 <f0> 66 0f c1 07 89 c2 66 c1 ea 08 38 c2 74 0c 0f 1f 00 f3 90 0f
RIP  [<ffffffff81c0721e>] _raw_spin_lock+0xe/0x30
RSP <ffff880221721cc8>
CR2: 0000000000000000
---[ end trace 13fb1e4b6e5ab21f ]---

Signed-off-by: Asias He <asias@redhat.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 virt/kvm/kvm_main.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 45f0936..8a1889c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3105,13 +3105,21 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
 	int r;
 	int cpu;
 
-	r = kvm_irqfd_init();
-	if (r)
-		goto out_irqfd;
 	r = kvm_arch_init(opaque);
 	if (r)
 		goto out_fail;
 
+	/*
+	 * kvm_arch_init makes sure there's at most one caller
+	 * for architectures that support multiple implementations,
+	 * like intel and amd on x86.
+	 * kvm_arch_init must be called before kvm_irqfd_init to avoid creating
+	 * conflicts in case kvm is already setup for another implementation.
+	 */
+	r = kvm_irqfd_init();
+	if (r)
+		goto out_irqfd;
+
 	if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
 		r = -ENOMEM;
 		goto out_free_0;
@@ -3186,10 +3194,10 @@ out_free_1:
 out_free_0a:
 	free_cpumask_var(cpus_hardware_enabled);
 out_free_0:
-	kvm_arch_exit();
-out_fail:
 	kvm_irqfd_exit();
 out_irqfd:
+	kvm_arch_exit();
+out_fail:
 	return r;
 }
 EXPORT_SYMBOL_GPL(kvm_init);
-- 
cgit v1.1


From 8d76c49e9ffeee839bc0b7a3278a23f99101263e Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Wed, 8 May 2013 18:38:44 +0300
Subject: KVM: VMX: fix halt emulation while emulating invalid guest sate

The invalid guest state emulation loop does not check halt_request
which causes 100% cpu loop while guest is in halt and in invalid
state, but more serious issue is that this leaves halt_request set, so
random instruction emulated by vm86 #GP exit can be interpreted
as halt which causes guest hang. Fix both problems by handling
halt_request in emulation loop.

Reported-by: Tomas Papan <tomas.papan@gmail.com>
Tested-by: Tomas Papan <tomas.papan@gmail.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
CC: stable@vger.kernel.org
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/vmx.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 25a791e..260a919 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5434,6 +5434,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
 			return 0;
 		}
 
+		if (vcpu->arch.halt_request) {
+			vcpu->arch.halt_request = 0;
+			ret = kvm_emulate_halt(vcpu);
+			goto out;
+		}
+
 		if (signal_pending(current))
 			goto out;
 		if (need_resched())
-- 
cgit v1.1


From a035d5c64d08a8ac12d81b596e7fa6d95a73c347 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 9 May 2013 11:32:49 +0200
Subject: KVM: emulator: emulate AAM

This is used by SGABIOS, KVM breaks with emulate_invalid_guest_state=1.

AAM needs the source operand to be unsigned; do the same in AAD as well
for consistency, even though it does not affect the result.

Reported-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Cc: stable@vger.kernel.org # 3.9
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/emulate.c | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 8e517bb..55322a7 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2996,6 +2996,28 @@ static int em_das(struct x86_emulate_ctxt *ctxt)
 	return X86EMUL_CONTINUE;
 }
 
+static int em_aam(struct x86_emulate_ctxt *ctxt)
+{
+	u8 al, ah;
+
+	if (ctxt->src.val == 0)
+		return emulate_de(ctxt);
+
+	al = ctxt->dst.val & 0xff;
+	ah = al / ctxt->src.val;
+	al %= ctxt->src.val;
+
+	ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al | (ah << 8);
+
+	/* Set PF, ZF, SF */
+	ctxt->src.type = OP_IMM;
+	ctxt->src.val = 0;
+	ctxt->src.bytes = 1;
+	fastop(ctxt, em_or);
+
+	return X86EMUL_CONTINUE;
+}
+
 static int em_aad(struct x86_emulate_ctxt *ctxt)
 {
 	u8 al = ctxt->dst.val & 0xff;
@@ -3936,7 +3958,8 @@ static const struct opcode opcode_table[256] = {
 	/* 0xD0 - 0xD7 */
 	G(Src2One | ByteOp, group2), G(Src2One, group2),
 	G(Src2CL | ByteOp, group2), G(Src2CL, group2),
-	N, I(DstAcc | SrcImmByte | No64, em_aad), N, N,
+	I(DstAcc | SrcImmUByte | No64, em_aam),
+	I(DstAcc | SrcImmUByte | No64, em_aad), N, N,
 	/* 0xD8 - 0xDF */
 	N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
 	/* 0xE0 - 0xE7 */
-- 
cgit v1.1


From 7fa57952d70f5737513d8319395e471d107e4e0d Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 9 May 2013 11:32:50 +0200
Subject: KVM: emulator: emulate XLAT

This is used by SGABIOS, KVM breaks with emulate_invalid_guest_state=1.
It is just a MOV in disguise, with a funny source address.

Reported-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Cc: stable@vger.kernel.org # 3.9
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/emulate.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 55322a7..a06a550 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -60,6 +60,7 @@
 #define OpGS              25ull  /* GS */
 #define OpMem8            26ull  /* 8-bit zero extended memory operand */
 #define OpImm64           27ull  /* Sign extended 16/32/64-bit immediate */
+#define OpXLat            28ull  /* memory at BX/EBX/RBX + zero-extended AL */
 
 #define OpBits             5  /* Width of operand field */
 #define OpMask             ((1ull << OpBits) - 1)
@@ -99,6 +100,7 @@
 #define SrcImmUByte (OpImmUByte << SrcShift)
 #define SrcImmU     (OpImmU << SrcShift)
 #define SrcSI       (OpSI << SrcShift)
+#define SrcXLat     (OpXLat << SrcShift)
 #define SrcImmFAddr (OpImmFAddr << SrcShift)
 #define SrcMemFAddr (OpMemFAddr << SrcShift)
 #define SrcAcc      (OpAcc << SrcShift)
@@ -3959,7 +3961,8 @@ static const struct opcode opcode_table[256] = {
 	G(Src2One | ByteOp, group2), G(Src2One, group2),
 	G(Src2CL | ByteOp, group2), G(Src2CL, group2),
 	I(DstAcc | SrcImmUByte | No64, em_aam),
-	I(DstAcc | SrcImmUByte | No64, em_aad), N, N,
+	I(DstAcc | SrcImmUByte | No64, em_aad), N,
+	I(DstAcc | SrcXLat | ByteOp, em_mov),
 	/* 0xD8 - 0xDF */
 	N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
 	/* 0xE0 - 0xE7 */
@@ -4221,6 +4224,16 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
 		op->val = 0;
 		op->count = 1;
 		break;
+	case OpXLat:
+		op->type = OP_MEM;
+		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
+		op->addr.mem.ea =
+			register_address(ctxt,
+				reg_read(ctxt, VCPU_REGS_RBX) +
+				(reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
+		op->addr.mem.seg = seg_override(ctxt);
+		op->val = 0;
+		break;
 	case OpImmFAddr:
 		op->type = OP_IMM;
 		op->addr.mem.ea = ctxt->_eip;
-- 
cgit v1.1


From 326f578f7e1443bac2333712dd130a261ec15288 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 9 May 2013 11:32:51 +0200
Subject: KVM: emulator: emulate SALC

This is an almost-undocumented instruction available in 32-bit mode.
I say "almost" undocumented because AMD documents it in their opcode
maps just to say that it is unavailable in 64-bit mode (sections
"A.2.1 One-Byte Opcodes" and "B.3 Invalid and Reassigned Instructions
in 64-Bit Mode").

It is roughly equivalent to "sbb %al, %al" except it does not
set the flags.  Use fastop to emulate it, but do not use the opcode
directly because it would fail if the host is 64-bit!

Reported-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Cc: stable@vger.kernel.org # 3.9
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/emulate.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index a06a550..8db0010 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -535,6 +535,9 @@ FOP_SETCC(setle)
 FOP_SETCC(setnle)
 FOP_END;
 
+FOP_START(salc) "pushf; sbb %al, %al; popf \n\t" FOP_RET
+FOP_END;
+
 #define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex)			\
 	do {								\
 		unsigned long _tmp;					\
@@ -3961,7 +3964,8 @@ static const struct opcode opcode_table[256] = {
 	G(Src2One | ByteOp, group2), G(Src2One, group2),
 	G(Src2CL | ByteOp, group2), G(Src2CL, group2),
 	I(DstAcc | SrcImmUByte | No64, em_aam),
-	I(DstAcc | SrcImmUByte | No64, em_aad), N,
+	I(DstAcc | SrcImmUByte | No64, em_aad),
+	F(DstAcc | ByteOp | No64, em_salc),
 	I(DstAcc | SrcXLat | ByteOp, em_mov),
 	/* 0xD8 - 0xDF */
 	N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
-- 
cgit v1.1