From f6e78475894d6534d7d62714a95e2265f53d2a92 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 2 Aug 2010 15:30:20 +0300 Subject: KVM: Use kvm_get_rflags() and kvm_set_rflags() instead of the raw versions Some rflags bits are owned by the host, not guest, so we need to use kvm_get_rflags() to strip those bits away or kvm_set_rflags() to add them back. Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 14 +++++++------- arch/x86/kvm/vmx.c | 2 +- arch/x86/kvm/x86.c | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 6bb15d5..2a19322 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -975,7 +975,7 @@ static void init_vmcb(struct vcpu_svm *svm) svm_set_efer(&svm->vcpu, 0); save->dr6 = 0xffff0ff0; save->dr7 = 0x400; - save->rflags = 2; + kvm_set_rflags(&svm->vcpu, 2); save->rip = 0x0000fff0; svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; @@ -2127,7 +2127,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu); nested_vmcb->save.cr2 = vmcb->save.cr2; nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; - nested_vmcb->save.rflags = vmcb->save.rflags; + nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu); nested_vmcb->save.rip = vmcb->save.rip; nested_vmcb->save.rsp = vmcb->save.rsp; nested_vmcb->save.rax = vmcb->save.rax; @@ -2184,7 +2184,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) svm->vmcb->save.ds = hsave->save.ds; svm->vmcb->save.gdtr = hsave->save.gdtr; svm->vmcb->save.idtr = hsave->save.idtr; - svm->vmcb->save.rflags = hsave->save.rflags; + kvm_set_rflags(&svm->vcpu, hsave->save.rflags); svm_set_efer(&svm->vcpu, hsave->save.efer); svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE); svm_set_cr4(&svm->vcpu, hsave->save.cr4); @@ -2312,7 +2312,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) hsave->save.efer = svm->vcpu.arch.efer; hsave->save.cr0 = kvm_read_cr0(&svm->vcpu); hsave->save.cr4 = svm->vcpu.arch.cr4; - hsave->save.rflags = vmcb->save.rflags; + hsave->save.rflags = kvm_get_rflags(&svm->vcpu); hsave->save.rip = kvm_rip_read(&svm->vcpu); hsave->save.rsp = vmcb->save.rsp; hsave->save.rax = vmcb->save.rax; @@ -2323,7 +2323,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) copy_vmcb_control_area(hsave, vmcb); - if (svm->vmcb->save.rflags & X86_EFLAGS_IF) + if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF) svm->vcpu.arch.hflags |= HF_HIF_MASK; else svm->vcpu.arch.hflags &= ~HF_HIF_MASK; @@ -2341,7 +2341,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) svm->vmcb->save.ds = nested_vmcb->save.ds; svm->vmcb->save.gdtr = nested_vmcb->save.gdtr; svm->vmcb->save.idtr = nested_vmcb->save.idtr; - svm->vmcb->save.rflags = nested_vmcb->save.rflags; + kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags); svm_set_efer(&svm->vcpu, nested_vmcb->save.efer); svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0); svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4); @@ -3384,7 +3384,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)) return 0; - ret = !!(vmcb->save.rflags & X86_EFLAGS_IF); + ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF); if (is_guest_mode(vcpu)) return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5b4cdcb..d09833e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2113,7 +2113,7 @@ static int vmx_get_cpl(struct kvm_vcpu *vcpu) if (!is_protmode(vcpu)) return 0; - if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ + if (kvm_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ return 3; return vmcs_read16(GUEST_CS_SELECTOR) & 3; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 934b4c6..3a557ee 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4310,7 +4310,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); vcpu->arch.emulate_ctxt.vcpu = vcpu; - vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); + vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); vcpu->arch.emulate_ctxt.mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : @@ -4340,7 +4340,7 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq) vcpu->arch.emulate_ctxt.eip = c->eip; memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); - kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); + kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); if (irq == NMI_VECTOR) vcpu->arch.nmi_pending = false; @@ -4473,7 +4473,7 @@ restart: r = EMULATE_DONE; toggle_interruptibility(vcpu, vcpu->arch.emulate_ctxt.interruptibility); - kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); + kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); kvm_make_request(KVM_REQ_EVENT, vcpu); memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); @@ -5592,7 +5592,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); - kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); + kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); kvm_make_request(KVM_REQ_EVENT, vcpu); return EMULATE_DONE; } -- cgit v1.1 From 6de12732c42c7070af42e3d6e42ecee2838fc920 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 7 Mar 2011 12:51:22 +0200 Subject: KVM: VMX: Optimize vmx_get_rflags() If called several times within the same exit, return cached results. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx.c | 20 ++++++++++++++------ 2 files changed, 15 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c8af099..5af4264 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -118,6 +118,7 @@ enum kvm_reg { enum kvm_reg_ex { VCPU_EXREG_PDPTR = NR_VCPU_REGS, VCPU_EXREG_CR3, + VCPU_EXREG_RFLAGS, }; enum { diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d09833e..4d11707 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -130,6 +130,7 @@ struct vcpu_vmx { u8 fail; u32 exit_intr_info; u32 idt_vectoring_info; + ulong rflags; struct shared_msr_entry *guest_msrs; int nmsrs; int save_nmsrs; @@ -970,17 +971,23 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) { unsigned long rflags, save_rflags; - rflags = vmcs_readl(GUEST_RFLAGS); - if (to_vmx(vcpu)->rmode.vm86_active) { - rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; - save_rflags = to_vmx(vcpu)->rmode.save_rflags; - rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; + if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail)) { + __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); + rflags = vmcs_readl(GUEST_RFLAGS); + if (to_vmx(vcpu)->rmode.vm86_active) { + rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; + save_rflags = to_vmx(vcpu)->rmode.save_rflags; + rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; + } + to_vmx(vcpu)->rflags = rflags; } - return rflags; + return to_vmx(vcpu)->rflags; } static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) { + __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); + to_vmx(vcpu)->rflags = rflags; if (to_vmx(vcpu)->rmode.vm86_active) { to_vmx(vcpu)->rmode.save_rflags = rflags; rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; @@ -4124,6 +4131,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) ); vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) + | (1 << VCPU_EXREG_RFLAGS) | (1 << VCPU_EXREG_PDPTR) | (1 << VCPU_EXREG_CR3)); vcpu->arch.regs_dirty = 0; -- cgit v1.1 From f4c63e5d5a356b652a3d984edbefca289176c40f Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 7 Mar 2011 14:54:28 +0200 Subject: KVM: VMX: Optimize vmx_get_cpl() In long mode, vm86 mode is disallowed, so we need not check for it. Reading rflags.vm may require a VMREAD, so it is expensive. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4d11707..e8a35ee 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2120,7 +2120,8 @@ static int vmx_get_cpl(struct kvm_vcpu *vcpu) if (!is_protmode(vcpu)) return 0; - if (kvm_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ + if (!is_long_mode(vcpu) + && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */ return 3; return vmcs_read16(GUEST_CS_SELECTOR) & 3; -- cgit v1.1 From 69c730289011df706a1c9890d6e6c5ee822623c7 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 7 Mar 2011 15:26:44 +0200 Subject: KVM: VMX: Cache cpl We may read the cpl quite often in the same vmexit (instruction privilege check, memory access checks for instruction and operands), so we gain a bit if we cache the value. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx.c | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 5af4264..35f81b1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -119,6 +119,7 @@ enum kvm_reg_ex { VCPU_EXREG_PDPTR = NR_VCPU_REGS, VCPU_EXREG_CR3, VCPU_EXREG_RFLAGS, + VCPU_EXREG_CPL, }; enum { diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e8a35ee..8f9e77e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -128,6 +128,7 @@ struct vcpu_vmx { unsigned long host_rsp; int launched; u8 fail; + u8 cpl; u32 exit_intr_info; u32 idt_vectoring_info; ulong rflags; @@ -987,6 +988,7 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) { __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); + __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); to_vmx(vcpu)->rflags = rflags; if (to_vmx(vcpu)->rmode.vm86_active) { to_vmx(vcpu)->rmode.save_rflags = rflags; @@ -2005,6 +2007,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) vmcs_writel(CR0_READ_SHADOW, cr0); vmcs_writel(GUEST_CR0, hw_cr0); vcpu->arch.cr0 = cr0; + __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); } static u64 construct_eptp(unsigned long root_hpa) @@ -2115,7 +2118,7 @@ static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) return vmcs_readl(sf->base); } -static int vmx_get_cpl(struct kvm_vcpu *vcpu) +static int __vmx_get_cpl(struct kvm_vcpu *vcpu) { if (!is_protmode(vcpu)) return 0; @@ -2127,6 +2130,16 @@ static int vmx_get_cpl(struct kvm_vcpu *vcpu) return vmcs_read16(GUEST_CS_SELECTOR) & 3; } +static int vmx_get_cpl(struct kvm_vcpu *vcpu) +{ + if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) { + __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); + to_vmx(vcpu)->cpl = __vmx_get_cpl(vcpu); + } + return to_vmx(vcpu)->cpl; +} + + static u32 vmx_segment_access_rights(struct kvm_segment *var) { u32 ar; @@ -2192,6 +2205,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, ar |= 0x1; /* Accessed */ vmcs_write32(sf->ar_bytes, ar); + __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); } static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) @@ -4133,6 +4147,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) | (1 << VCPU_EXREG_RFLAGS) + | (1 << VCPU_EXREG_CPL) | (1 << VCPU_EXREG_PDPTR) | (1 << VCPU_EXREG_CR3)); vcpu->arch.regs_dirty = 0; -- cgit v1.1 From 9d58b93192065f4b2ba6b880e9b0dab0bc11d0ba Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 7 Mar 2011 16:52:07 +0200 Subject: KVM: VMX: Avoid vmx_recover_nmi_blocking() when unneeded When we haven't injected an interrupt, we don't need to recover the nmi blocking state (since the guest can't set it by itself). This allows us to avoid a VMREAD later on. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8f9e77e..53bf6ae 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -129,6 +129,7 @@ struct vcpu_vmx { int launched; u8 fail; u8 cpl; + bool nmi_known_unmasked; u32 exit_intr_info; u32 idt_vectoring_info; ulong rflags; @@ -2959,6 +2960,7 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) } ++vcpu->stat.nmi_injections; + vmx->nmi_known_unmasked = false; if (vmx->rmode.vm86_active) { if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR) != EMULATE_DONE) kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); @@ -2983,6 +2985,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) { if (!cpu_has_virtual_nmis()) return to_vmx(vcpu)->soft_vnmi_blocked; + if (to_vmx(vcpu)->nmi_known_unmasked) + return false; return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; } @@ -2996,6 +3000,7 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) vmx->vnmi_blocked_time = 0; } } else { + vmx->nmi_known_unmasked = !masked; if (masked) vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); @@ -3527,9 +3532,11 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) switch (type) { case INTR_TYPE_NMI_INTR: vcpu->arch.nmi_injected = false; - if (cpu_has_virtual_nmis()) + if (cpu_has_virtual_nmis()) { vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); + vmx->nmi_known_unmasked = false; + } break; case INTR_TYPE_EXT_INTR: case INTR_TYPE_SOFT_INTR: @@ -3916,6 +3923,8 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK; if (cpu_has_virtual_nmis()) { + if (vmx->nmi_known_unmasked) + return; unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; vector = exit_intr_info & INTR_INFO_VECTOR_MASK; /* @@ -3932,6 +3941,10 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) vector != DF_VECTOR && !idtv_info_valid) vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); + else + vmx->nmi_known_unmasked = + !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) + & GUEST_INTR_STATE_NMI); } else if (unlikely(vmx->soft_vnmi_blocked)) vmx->vnmi_blocked_time += ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); @@ -3970,6 +3983,7 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx, */ vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); + vmx->nmi_known_unmasked = true; break; case INTR_TYPE_SOFT_EXCEPTION: vmx->vcpu.arch.event_exit_inst_len = -- cgit v1.1 From f9902069c41254ad116e089e64ea21d3a000cc41 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 7 Mar 2011 17:20:29 +0200 Subject: KVM: VMX: Qualify check for host NMI Check for the exit reason first; this allows us, later, to avoid a VMREAD for VM_EXIT_INTR_INFO_FIELD. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 53bf6ae..89130ba 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3905,7 +3905,8 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) kvm_machine_check(); /* We need to handle NMIs before interrupts are enabled */ - if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && + if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI && + (exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && (exit_intr_info & INTR_INFO_VALID_MASK)) { kvm_before_handle_nmi(&vmx->vcpu); asm("int $2"); -- cgit v1.1 From 00eba012d53e63f620455f7013917e4bf59424f2 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 7 Mar 2011 17:24:54 +0200 Subject: KVM: VMX: Refactor vmx_complete_atomic_exit() Move the exit reason checks to the front of the function, for early exit in the common case. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 89130ba..51aa827 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3896,17 +3896,20 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) { - u32 exit_intr_info = vmx->exit_intr_info; + u32 exit_intr_info; + + if (!(vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY + || vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)) + return; + + exit_intr_info = vmx->exit_intr_info; /* Handle machine checks before interrupts are enabled */ - if ((vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY) - || (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI - && is_machine_check(exit_intr_info))) + if (is_machine_check(exit_intr_info)) kvm_machine_check(); /* We need to handle NMIs before interrupts are enabled */ - if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI && - (exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && + if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && (exit_intr_info & INTR_INFO_VALID_MASK)) { kvm_before_handle_nmi(&vmx->vcpu); asm("int $2"); -- cgit v1.1 From c5ca8e572c4d8cb8dec1cf5b3fc9c7066f6b2c29 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 7 Mar 2011 17:37:37 +0200 Subject: KVM: VMX: Don't VMREAD VM_EXIT_INTR_INFO unconditionally Only read it if we're going to use it later. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 51aa827..f95f48b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3902,6 +3902,7 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) || vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)) return; + vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); exit_intr_info = vmx->exit_intr_info; /* Handle machine checks before interrupts are enabled */ @@ -3919,7 +3920,7 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) { - u32 exit_intr_info = vmx->exit_intr_info; + u32 exit_intr_info; bool unblock_nmi; u8 vector; bool idtv_info_valid; @@ -3929,6 +3930,11 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) if (cpu_has_virtual_nmis()) { if (vmx->nmi_known_unmasked) return; + /* + * Can't use vmx->exit_intr_info since we're not sure what + * the exit reason is. + */ + exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; vector = exit_intr_info & INTR_INFO_VECTOR_MASK; /* @@ -4176,7 +4182,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx->launched = 1; vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); - vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); vmx_complete_atomic_exit(vmx); vmx_recover_nmi_blocking(vmx); -- cgit v1.1 From 887864758580c80710947c38a4692032163777df Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 7 Mar 2011 17:39:45 +0200 Subject: KVM: VMX: Use cached VM_EXIT_INTR_INFO in handle_exception vmx_complete_atomic_exit() cached it for us, so we can use it here. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f95f48b..1bdb49d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3118,7 +3118,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) enum emulation_result er; vect_info = vmx->idt_vectoring_info; - intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + intr_info = vmx->exit_intr_info; if (is_machine_check(intr_info)) return handle_machine_check(vcpu); -- cgit v1.1 From 89a9fb78b5bd8bece353449079726556ecab41df Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 24 Mar 2011 09:45:10 +0100 Subject: KVM: SVM: Remove unused svm_features We use boot_cpu_has now. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 2a19322..cb43e98 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -376,7 +376,6 @@ struct svm_cpu_data { }; static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); -static uint32_t svm_features; struct svm_init_data { int cpu; @@ -802,8 +801,6 @@ static __init int svm_hardware_setup(void) goto err; } - svm_features = cpuid_edx(SVM_CPUID_FUNC); - if (!boot_cpu_has(X86_FEATURE_NPT)) npt_enabled = false; -- cgit v1.1 From 654f06fc651b01782015185e5b049197255463a3 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 23 Mar 2011 15:02:47 +0200 Subject: KVM: VMX: simplify NMI mask management Use vmx_set_nmi_mask() instead of open-coding management of the hardware bit and the software hint (nmi_known_unmasked). There's a slight change of behaviour when running without hardware virtual NMI support - we now clear the NMI mask if NMI delivery faulted in that case as well. This improves emulation accuracy. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1bdb49d..2b99ae7 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3532,11 +3532,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) switch (type) { case INTR_TYPE_NMI_INTR: vcpu->arch.nmi_injected = false; - if (cpu_has_virtual_nmis()) { - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); - vmx->nmi_known_unmasked = false; - } + vmx_set_nmi_mask(vcpu, true); break; case INTR_TYPE_EXT_INTR: case INTR_TYPE_SOFT_INTR: @@ -3991,9 +3987,7 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx, * Clear bit "block by NMI" before VM entry if a NMI * delivery faulted. */ - vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); - vmx->nmi_known_unmasked = true; + vmx_set_nmi_mask(&vmx->vcpu, false); break; case INTR_TYPE_SOFT_EXCEPTION: vmx->vcpu.arch.event_exit_inst_len = -- cgit v1.1 From 3291892450e670c4f170e271cd0c4b63d5a8e41a Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 23 Mar 2011 13:40:42 -0300 Subject: KVM: expose async pf through our standard mechanism As Avi recently mentioned, the new standard mechanism for exposing features is KVM_GET_SUPPORTED_CPUID, not spamming CAPs. For some reason async pf missed that. So expose async_pf here. Signed-off-by: Glauber Costa CC: Gleb Natapov CC: Avi Kivity Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3a557ee..a38fb9b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2418,6 +2418,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) | (1 << KVM_FEATURE_NOP_IO_DELAY) | (1 << KVM_FEATURE_CLOCKSOURCE2) | + (1 << KVM_FEATURE_ASYNC_PF) | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); entry->ebx = 0; entry->ecx = 0; -- cgit v1.1 From c761e5868e6737abe0464636ebd7fcbb6814c626 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Fri, 1 Apr 2011 11:25:03 -0300 Subject: Revert "KVM: Fix race between nmi injection and enabling nmi window" This reverts commit f86368493ec038218e8663cc1b6e5393cd8e008a. Simpler fix to follow. Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a38fb9b..b9402d5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -361,8 +361,8 @@ void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) void kvm_inject_nmi(struct kvm_vcpu *vcpu) { - kvm_make_request(KVM_REQ_NMI, vcpu); kvm_make_request(KVM_REQ_EVENT, vcpu); + vcpu->arch.nmi_pending = 1; } EXPORT_SYMBOL_GPL(kvm_inject_nmi); @@ -5208,8 +5208,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) r = 1; goto out; } - if (kvm_check_request(KVM_REQ_NMI, vcpu)) - vcpu->arch.nmi_pending = true; } r = kvm_mmu_reload(vcpu); -- cgit v1.1 From 1499e54af03ae51a937c59035bc86002deae0572 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Fri, 1 Apr 2011 11:26:29 -0300 Subject: KVM: x86: better fix for race between nmi injection and enabling nmi window Fix race between nmi injection and enabling nmi window in a simpler way. Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b9402d5..692c70d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5171,6 +5171,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) static int vcpu_enter_guest(struct kvm_vcpu *vcpu) { int r; + bool nmi_pending; bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && vcpu->run->request_interrupt_window; @@ -5214,11 +5215,19 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (unlikely(r)) goto out; + /* + * An NMI can be injected between local nmi_pending read and + * vcpu->arch.nmi_pending read inside inject_pending_event(). + * But in that case, KVM_REQ_EVENT will be set, which makes + * the race described above benign. + */ + nmi_pending = ACCESS_ONCE(vcpu->arch.nmi_pending); + if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { inject_pending_event(vcpu); /* enable NMI/IRQ window open exits if needed */ - if (vcpu->arch.nmi_pending) + if (nmi_pending) kvm_x86_ops->enable_nmi_window(vcpu); else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) kvm_x86_ops->enable_irq_window(vcpu); -- cgit v1.1 From 70252a1053636c35776d6bc843dd3b260d9d6de1 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 19 Jan 2010 12:51:22 +0200 Subject: KVM: extend in-kernel mmio to handle >8 byte transactions Needed for coalesced mmio using sse. Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 58 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 692c70d..3b234c1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3596,20 +3596,43 @@ static void kvm_init_msr_list(void) static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, const void *v) { - if (vcpu->arch.apic && - !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v)) - return 0; + int handled = 0; + int n; + + do { + n = min(len, 8); + if (!(vcpu->arch.apic && + !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v)) + && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v)) + break; + handled += n; + addr += n; + len -= n; + v += n; + } while (len); - return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); + return handled; } static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) { - if (vcpu->arch.apic && - !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v)) - return 0; + int handled = 0; + int n; + + do { + n = min(len, 8); + if (!(vcpu->arch.apic && + !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v)) + && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v)) + break; + trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v); + handled += n; + addr += n; + len -= n; + v += n; + } while (len); - return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); + return handled; } static void kvm_set_segment(struct kvm_vcpu *vcpu, @@ -3769,6 +3792,7 @@ static int emulator_read_emulated(unsigned long addr, struct kvm_vcpu *vcpu) { gpa_t gpa; + int handled; if (vcpu->mmio_read_completed) { memcpy(val, vcpu->mmio_data, bytes); @@ -3795,10 +3819,14 @@ mmio: /* * Is this MMIO handled locally? */ - if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) { - trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, gpa, *(u64 *)val); + handled = vcpu_mmio_read(vcpu, gpa, bytes, val); + + if (handled == bytes) return X86EMUL_CONTINUE; - } + + gpa += handled; + bytes -= handled; + val += handled; trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); @@ -3830,6 +3858,7 @@ static int emulator_write_emulated_onepage(unsigned long addr, struct kvm_vcpu *vcpu) { gpa_t gpa; + int handled; gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception); @@ -3848,9 +3877,14 @@ mmio: /* * Is this MMIO handled locally? */ - if (!vcpu_mmio_write(vcpu, gpa, bytes, val)) + handled = vcpu_mmio_write(vcpu, gpa, bytes, val); + if (handled == bytes) return X86EMUL_CONTINUE; + gpa += handled; + bytes -= handled; + val += handled; + vcpu->mmio_needed = 1; vcpu->run->exit_reason = KVM_EXIT_MMIO; vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; -- cgit v1.1 From 5287f194bf0d7062d6d99b725366202556f03e28 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 19 Jan 2010 14:20:10 +0200 Subject: KVM: Split mmio completion into a function Make room for sse mmio completions. Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3b234c1..bb6b9d3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5441,6 +5441,27 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) return r; } +static int complete_mmio(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + int r; + + if (!(vcpu->arch.pio.count || vcpu->mmio_needed)) + return 1; + + if (vcpu->mmio_needed) { + memcpy(vcpu->mmio_data, run->mmio.data, 8); + vcpu->mmio_read_completed = 1; + vcpu->mmio_needed = 0; + } + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); + if (r != EMULATE_DONE) + return 0; + return 1; +} + int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int r; @@ -5467,20 +5488,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } } - if (vcpu->arch.pio.count || vcpu->mmio_needed) { - if (vcpu->mmio_needed) { - memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); - vcpu->mmio_read_completed = 1; - vcpu->mmio_needed = 0; - } - vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); - if (r != EMULATE_DONE) { - r = 0; - goto out; - } - } + r = complete_mmio(vcpu); + if (r <= 0) + goto out; + if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) kvm_register_write(vcpu, VCPU_REGS_RAX, kvm_run->hypercall.ret); -- cgit v1.1 From cef4dea07f6720b36cc93e18a2e68be4bdb71a92 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Jan 2010 12:01:20 +0200 Subject: KVM: 16-byte mmio support Since sse instructions can issue 16-byte mmios, we need to support them. We can't increase the kvm_run mmio buffer size to 16 bytes without breaking compatibility, so instead we break the large mmios into two smaller 8-byte ones. Since the bus is 64-bit we aren't breaking any atomicity guarantees. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/x86.c | 34 +++++++++++++++++++++++++--------- 2 files changed, 26 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 35f81b1..e820c63 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -30,6 +30,7 @@ #define KVM_MEMORY_SLOTS 32 /* memory slots that does not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 4 +#define KVM_MMIO_SIZE 16 #define KVM_PIO_PAGE_OFFSET 1 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bb6b9d3..11d692c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3833,8 +3833,10 @@ mmio: vcpu->mmio_needed = 1; vcpu->run->exit_reason = KVM_EXIT_MMIO; vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; - vcpu->run->mmio.len = vcpu->mmio_size = bytes; + vcpu->mmio_size = bytes; + vcpu->run->mmio.len = min(vcpu->mmio_size, 8); vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0; + vcpu->mmio_index = 0; return X86EMUL_IO_NEEDED; } @@ -3886,11 +3888,14 @@ mmio: val += handled; vcpu->mmio_needed = 1; + memcpy(vcpu->mmio_data, val, bytes); vcpu->run->exit_reason = KVM_EXIT_MMIO; vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; - vcpu->run->mmio.len = vcpu->mmio_size = bytes; + vcpu->mmio_size = bytes; + vcpu->run->mmio.len = min(vcpu->mmio_size, 8); vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1; - memcpy(vcpu->run->mmio.data, val, bytes); + memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8); + vcpu->mmio_index = 0; return X86EMUL_CONTINUE; } @@ -4498,11 +4503,9 @@ restart: if (!vcpu->arch.pio.in) vcpu->arch.pio.count = 0; r = EMULATE_DO_MMIO; - } else if (vcpu->mmio_needed) { - if (vcpu->mmio_is_write) - vcpu->mmio_needed = 0; + } else if (vcpu->mmio_needed) r = EMULATE_DO_MMIO; - } else if (r == EMULATION_RESTART) + else if (r == EMULATION_RESTART) goto restart; else r = EMULATE_DONE; @@ -5450,9 +5453,22 @@ static int complete_mmio(struct kvm_vcpu *vcpu) return 1; if (vcpu->mmio_needed) { - memcpy(vcpu->mmio_data, run->mmio.data, 8); - vcpu->mmio_read_completed = 1; vcpu->mmio_needed = 0; + if (!vcpu->mmio_is_write) + memcpy(vcpu->mmio_data, run->mmio.data, 8); + vcpu->mmio_index += 8; + if (vcpu->mmio_index < vcpu->mmio_size) { + run->exit_reason = KVM_EXIT_MMIO; + run->mmio.phys_addr = vcpu->mmio_phys_addr + vcpu->mmio_index; + memcpy(run->mmio.data, vcpu->mmio_data + vcpu->mmio_index, 8); + run->mmio.len = min(vcpu->mmio_size - vcpu->mmio_index, 8); + run->mmio.is_write = vcpu->mmio_is_write; + vcpu->mmio_needed = 1; + return 0; + } + if (vcpu->mmio_is_write) + return 1; + vcpu->mmio_read_completed = 1; } vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); -- cgit v1.1 From 1d6b114f20d06ac0749686e4d7b7c7913d9116db Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Jan 2010 16:00:35 +0200 Subject: KVM: x86 emulator: do not munge rep prefix Currently we store a rep prefix as 1 or 2 depending on whether it is a REPE or REPNE. Since sse instructions depend on the prefix value, store it as the original opcode to simplify things further on. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 4 ++-- arch/x86/kvm/emulate.c | 4 +--- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 0f52135..c00aed1 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -249,8 +249,8 @@ struct x86_emulate_ctxt { }; /* Repeat String Operation Prefix */ -#define REPE_PREFIX 1 -#define REPNE_PREFIX 2 +#define REPE_PREFIX 0xf3 +#define REPNE_PREFIX 0xf2 /* Execution mode, passed to the emulator. */ #define X86EMUL_MODE_REAL 0 /* Real mode. */ diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 0ad47b8..075bb6f 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2692,10 +2692,8 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) c->lock_prefix = 1; break; case 0xf2: /* REPNE/REPNZ */ - c->rep_prefix = REPNE_PREFIX; - break; case 0xf3: /* REP/REPE/REPZ */ - c->rep_prefix = REPE_PREFIX; + c->rep_prefix = c->b; break; default: goto done_prefixes; -- cgit v1.1 From 5037f6f324cdcc6c9071dc774aba992f96c7e5ff Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 28 Mar 2011 16:53:59 +0200 Subject: KVM: x86 emulator: define callbacks for using the guest fpu within the emulator Needed for emulating fpu instructions. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 2 ++ arch/x86/kvm/x86.c | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index c00aed1..4c0e682 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -158,6 +158,8 @@ struct x86_emulate_ops { int (*set_dr)(int dr, unsigned long value, struct kvm_vcpu *vcpu); int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); + void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */ + void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */ }; /* Type, address-of, and value of an instruction's operand. */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 11d692c..5af6651 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4281,6 +4281,22 @@ static void emulator_set_segment_selector(u16 sel, int seg, kvm_set_segment(vcpu, &kvm_seg, seg); } +static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt) +{ + preempt_disable(); + kvm_load_guest_fpu(ctxt->vcpu); + /* + * CR0.TS may reference the host fpu state, not the guest fpu state, + * so it may be clear at this point. + */ + clts(); +} + +static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt) +{ + preempt_enable(); +} + static struct x86_emulate_ops emulate_ops = { .read_std = kvm_read_guest_virt_system, .write_std = kvm_write_guest_virt_system, @@ -4304,6 +4320,8 @@ static struct x86_emulate_ops emulate_ops = { .set_dr = emulator_set_dr, .set_msr = kvm_set_msr, .get_msr = kvm_get_msr, + .get_fpu = emulator_get_fpu, + .put_fpu = emulator_put_fpu, }; static void cache_all_regs(struct kvm_vcpu *vcpu) -- cgit v1.1 From 0d7cdee83ad1582eecbf3b4a220e82dcb5ad561c Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 29 Mar 2011 11:34:38 +0200 Subject: KVM: x86 emulator: Specialize decoding for insns with 66/f2/f3 prefixes Most SIMD instructions use the 66/f2/f3 prefixes to distinguish between different variants of the same instruction. Usually the encoding is quite regular, but in some cases (including non-SIMD instructions) the prefixes generate very different instructions. Examples include XCHG/PAUSE, MOVQ/MOVDQA/MOVDQU, and MOVBE/CRC32. Allow the emulator to handle these special cases by splitting such opcodes into groups, with different decode flags and execution functions for different prefixes. Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 075bb6f..fcce7ae 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -75,6 +75,7 @@ #define Stack (1<<13) /* Stack instruction (push/pop) */ #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ +#define Prefix (1<<16) /* Instruction varies with 66/f2/f3 prefix */ /* Misc flags */ #define VendorSpecific (1<<22) /* Vendor specific instruction */ #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */ @@ -106,6 +107,7 @@ struct opcode { int (*execute)(struct x86_emulate_ctxt *ctxt); struct opcode *group; struct group_dual *gdual; + struct gprefix *gprefix; } u; }; @@ -114,6 +116,13 @@ struct group_dual { struct opcode mod3[8]; }; +struct gprefix { + struct opcode pfx_no; + struct opcode pfx_66; + struct opcode pfx_f2; + struct opcode pfx_f3; +}; + /* EFLAGS bit definitions. */ #define EFLG_ID (1<<21) #define EFLG_VIP (1<<20) @@ -2625,7 +2634,8 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) struct decode_cache *c = &ctxt->decode; int rc = X86EMUL_CONTINUE; int mode = ctxt->mode; - int def_op_bytes, def_ad_bytes, dual, goffset; + int def_op_bytes, def_ad_bytes, dual, goffset, simd_prefix; + bool op_prefix = false; struct opcode opcode, *g_mod012, *g_mod3; struct operand memop = { .type = OP_NONE }; @@ -2662,6 +2672,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) for (;;) { switch (c->b = insn_fetch(u8, 1, c->eip)) { case 0x66: /* operand-size override */ + op_prefix = true; /* switch between 2/4 bytes */ c->op_bytes = def_op_bytes ^ 6; break; @@ -2742,6 +2753,19 @@ done_prefixes: c->d |= opcode.flags; } + if (c->d & Prefix) { + if (c->rep_prefix && op_prefix) + return X86EMUL_UNHANDLEABLE; + simd_prefix = op_prefix ? 0x66 : c->rep_prefix; + switch (simd_prefix) { + case 0x00: opcode = opcode.u.gprefix->pfx_no; break; + case 0x66: opcode = opcode.u.gprefix->pfx_66; break; + case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break; + case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break; + } + c->d |= opcode.flags; + } + c->execute = opcode.u.execute; /* Unrecognised? */ -- cgit v1.1 From 1253791df91b064c039282feea094e5943294924 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 29 Mar 2011 11:41:27 +0200 Subject: KVM: x86 emulator: SSE support Add support for marking an instruction as SSE, switching registers used to the SSE register file. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 6 ++- arch/x86/kvm/emulate.c | 102 +++++++++++++++++++++++++++++++++++-- 2 files changed, 104 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 4c0e682..48693f0 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -162,9 +162,11 @@ struct x86_emulate_ops { void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */ }; +typedef u32 __attribute__((vector_size(16))) sse128_t; + /* Type, address-of, and value of an instruction's operand. */ struct operand { - enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type; + enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_NONE } type; unsigned int bytes; union { unsigned long orig_val; @@ -176,11 +178,13 @@ struct operand { ulong ea; unsigned seg; } mem; + unsigned xmm; } addr; union { unsigned long val; u64 val64; char valptr[sizeof(unsigned long) + 2]; + sse128_t vec_val; }; }; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index fcce7ae..4e11102 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -76,6 +76,7 @@ #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ #define Prefix (1<<16) /* Instruction varies with 66/f2/f3 prefix */ +#define Sse (1<<17) /* SSE Vector instruction */ /* Misc flags */ #define VendorSpecific (1<<22) /* Vendor specific instruction */ #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */ @@ -505,6 +506,11 @@ static int emulate_de(struct x86_emulate_ctxt *ctxt) return emulate_exception(ctxt, DE_VECTOR, 0, false); } +static int emulate_nm(struct x86_emulate_ctxt *ctxt) +{ + return emulate_exception(ctxt, NM_VECTOR, 0, false); +} + static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, unsigned long eip, u8 *dest) @@ -632,7 +638,63 @@ static void fetch_register_operand(struct operand *op) } } -static void decode_register_operand(struct operand *op, +static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) +{ + ctxt->ops->get_fpu(ctxt); + switch (reg) { + case 0: asm("movdqu %%xmm0, %0" : "=m"(*data)); break; + case 1: asm("movdqu %%xmm1, %0" : "=m"(*data)); break; + case 2: asm("movdqu %%xmm2, %0" : "=m"(*data)); break; + case 3: asm("movdqu %%xmm3, %0" : "=m"(*data)); break; + case 4: asm("movdqu %%xmm4, %0" : "=m"(*data)); break; + case 5: asm("movdqu %%xmm5, %0" : "=m"(*data)); break; + case 6: asm("movdqu %%xmm6, %0" : "=m"(*data)); break; + case 7: asm("movdqu %%xmm7, %0" : "=m"(*data)); break; +#ifdef CONFIG_X86_64 + case 8: asm("movdqu %%xmm8, %0" : "=m"(*data)); break; + case 9: asm("movdqu %%xmm9, %0" : "=m"(*data)); break; + case 10: asm("movdqu %%xmm10, %0" : "=m"(*data)); break; + case 11: asm("movdqu %%xmm11, %0" : "=m"(*data)); break; + case 12: asm("movdqu %%xmm12, %0" : "=m"(*data)); break; + case 13: asm("movdqu %%xmm13, %0" : "=m"(*data)); break; + case 14: asm("movdqu %%xmm14, %0" : "=m"(*data)); break; + case 15: asm("movdqu %%xmm15, %0" : "=m"(*data)); break; +#endif + default: BUG(); + } + ctxt->ops->put_fpu(ctxt); +} + +static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, + int reg) +{ + ctxt->ops->get_fpu(ctxt); + switch (reg) { + case 0: asm("movdqu %0, %%xmm0" : : "m"(*data)); break; + case 1: asm("movdqu %0, %%xmm1" : : "m"(*data)); break; + case 2: asm("movdqu %0, %%xmm2" : : "m"(*data)); break; + case 3: asm("movdqu %0, %%xmm3" : : "m"(*data)); break; + case 4: asm("movdqu %0, %%xmm4" : : "m"(*data)); break; + case 5: asm("movdqu %0, %%xmm5" : : "m"(*data)); break; + case 6: asm("movdqu %0, %%xmm6" : : "m"(*data)); break; + case 7: asm("movdqu %0, %%xmm7" : : "m"(*data)); break; +#ifdef CONFIG_X86_64 + case 8: asm("movdqu %0, %%xmm8" : : "m"(*data)); break; + case 9: asm("movdqu %0, %%xmm9" : : "m"(*data)); break; + case 10: asm("movdqu %0, %%xmm10" : : "m"(*data)); break; + case 11: asm("movdqu %0, %%xmm11" : : "m"(*data)); break; + case 12: asm("movdqu %0, %%xmm12" : : "m"(*data)); break; + case 13: asm("movdqu %0, %%xmm13" : : "m"(*data)); break; + case 14: asm("movdqu %0, %%xmm14" : : "m"(*data)); break; + case 15: asm("movdqu %0, %%xmm15" : : "m"(*data)); break; +#endif + default: BUG(); + } + ctxt->ops->put_fpu(ctxt); +} + +static void decode_register_operand(struct x86_emulate_ctxt *ctxt, + struct operand *op, struct decode_cache *c, int inhibit_bytereg) { @@ -641,6 +703,15 @@ static void decode_register_operand(struct operand *op, if (!(c->d & ModRM)) reg = (c->b & 7) | ((c->rex_prefix & 1) << 3); + + if (c->d & Sse) { + op->type = OP_XMM; + op->bytes = 16; + op->addr.xmm = reg; + read_sse_reg(ctxt, &op->vec_val, reg); + return; + } + op->type = OP_REG; if ((c->d & ByteOp) && !inhibit_bytereg) { op->addr.reg = decode_register(reg, c->regs, highbyte_regs); @@ -680,6 +751,13 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, op->bytes = (c->d & ByteOp) ? 1 : c->op_bytes; op->addr.reg = decode_register(c->modrm_rm, c->regs, c->d & ByteOp); + if (c->d & Sse) { + op->type = OP_XMM; + op->bytes = 16; + op->addr.xmm = c->modrm_rm; + read_sse_reg(ctxt, &op->vec_val, c->modrm_rm); + return rc; + } fetch_register_operand(op); return rc; } @@ -1107,6 +1185,9 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, if (rc != X86EMUL_CONTINUE) return rc; break; + case OP_XMM: + write_sse_reg(ctxt, &c->dst.vec_val, c->dst.addr.xmm); + break; case OP_NONE: /* no writeback */ break; @@ -2785,6 +2866,9 @@ done_prefixes: c->op_bytes = 4; } + if (c->d & Sse) + c->op_bytes = 16; + /* ModRM and SIB bytes. */ if (c->d & ModRM) { rc = decode_modrm(ctxt, ops, &memop); @@ -2814,7 +2898,7 @@ done_prefixes: case SrcNone: break; case SrcReg: - decode_register_operand(&c->src, c, 0); + decode_register_operand(ctxt, &c->src, c, 0); break; case SrcMem16: memop.bytes = 2; @@ -2905,7 +2989,7 @@ done_prefixes: /* Decode and fetch the destination operand: register or memory. */ switch (c->d & DstMask) { case DstReg: - decode_register_operand(&c->dst, c, + decode_register_operand(ctxt, &c->dst, c, c->twobyte && (c->b == 0xb6 || c->b == 0xb7)); break; case DstImmUByte: @@ -3001,6 +3085,18 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) goto done; } + if ((c->d & Sse) + && ((ops->get_cr(0, ctxt->vcpu) & X86_CR0_EM) + || !(ops->get_cr(4, ctxt->vcpu) & X86_CR4_OSFXSR))) { + rc = emulate_ud(ctxt); + goto done; + } + + if ((c->d & Sse) && (ops->get_cr(0, ctxt->vcpu) & X86_CR0_TS)) { + rc = emulate_nm(ctxt); + goto done; + } + /* Privileged instruction can be executed only in CPL=0 */ if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { rc = emulate_gp(ctxt, 0); -- cgit v1.1 From aa97bb4891b1f1b35e7abef8d1e2bbd3dda07159 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Jan 2010 18:09:23 +0200 Subject: KVM: x86 emulator: implement movdqu instruction (f3 0f 6f, f3 0f 7f) Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 4e11102..2b6c24e 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2415,11 +2415,19 @@ static int em_mov(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int em_movdqu(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + memcpy(&c->dst.vec_val, &c->src.vec_val, c->op_bytes); + return X86EMUL_CONTINUE; +} + #define D(_y) { .flags = (_y) } #define N D(0) #define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) } #define GD(_f, _g) { .flags = ((_f) | Group | GroupDual), .u.gdual = (_g) } #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } +#define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) } #define D2bv(_f) D((_f) | ByteOp), D(_f) #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e) @@ -2484,6 +2492,10 @@ static struct opcode group11[] = { I(DstMem | SrcImm | ModRM | Mov, em_mov), X7(D(Undefined)), }; +static struct gprefix pfx_0f_6f_0f_7f = { + N, N, N, I(Sse, em_movdqu), +}; + static struct opcode opcode_table[256] = { /* 0x00 - 0x07 */ D6ALU(Lock), @@ -2608,9 +2620,15 @@ static struct opcode twobyte_table[256] = { /* 0x50 - 0x5F */ N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, /* 0x60 - 0x6F */ - N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, + N, N, N, N, + N, N, N, N, + N, N, N, N, + N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f), /* 0x70 - 0x7F */ - N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, + N, N, N, N, + N, N, N, N, + N, N, N, N, + N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f), /* 0x80 - 0x8F */ X16(D(SrcImm)), /* 0x90 - 0x9F */ @@ -2654,6 +2672,7 @@ static struct opcode twobyte_table[256] = { #undef G #undef GD #undef I +#undef GP #undef D2bv #undef I2bv -- cgit v1.1 From c4f035c60dad45ff8813550dc82540dbbc263df2 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 4 Apr 2011 12:39:22 +0200 Subject: KVM: x86 emulator: add framework for instruction intercepts When running in guest mode, certain instructions can be intercepted by hardware. This also holds for nested guests running on emulated virtualization hardware, in particular instructions emulated by kvm itself. This patch adds a framework for intercepting instructions. If an instruction is marked for interception, and if we're running in guest mode, a callback is called to check whether an intercept is needed or not. The callback is called at three points in time: immediately after beginning execution, after checking privilge exceptions, and after checking memory exception. This suits the different interception points defined for different instructions and for the various virtualization instruction sets. In addition, a new X86EMUL_INTERCEPT is defined, which any callback or memory access may define, allowing the more complicated intercepts to be implemented in existing callbacks. Signed-off-by: Avi Kivity Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 20 ++++++++++++++++++++ arch/x86/kvm/emulate.c | 26 ++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 9 +++++++++ 3 files changed, 55 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 48693f0..2cfea49 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -14,6 +14,8 @@ #include struct x86_emulate_ctxt; +enum x86_intercept; +enum x86_intercept_stage; struct x86_exception { u8 vector; @@ -62,6 +64,7 @@ struct x86_exception { #define X86EMUL_RETRY_INSTR 3 /* retry the instruction for some reason */ #define X86EMUL_CMPXCHG_FAILED 4 /* cmpxchg did not see expected value */ #define X86EMUL_IO_NEEDED 5 /* IO is needed to complete emulation */ +#define X86EMUL_INTERCEPTED 6 /* Intercepted by nested VMCB/VMCS */ struct x86_emulate_ops { /* @@ -160,6 +163,9 @@ struct x86_emulate_ops { int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */ void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */ + int (*intercept)(struct x86_emulate_ctxt *ctxt, + enum x86_intercept intercept, + enum x86_intercept_stage stage); }; typedef u32 __attribute__((vector_size(16))) sse128_t; @@ -203,6 +209,7 @@ struct read_cache { struct decode_cache { u8 twobyte; u8 b; + u8 intercept; u8 lock_prefix; u8 rep_prefix; u8 op_bytes; @@ -244,6 +251,7 @@ struct x86_emulate_ctxt { /* interruptibility state, as a result of execution of STI or MOV SS */ int interruptibility; + bool guest_mode; /* guest running a nested guest */ bool perm_ok; /* do not check permissions if true */ bool only_vendor_specific_insn; @@ -265,6 +273,18 @@ struct x86_emulate_ctxt { #define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */ #define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ +enum x86_intercept_stage { + X86_ICPT_PRE_EXCEPT, + X86_ICPT_POST_EXCEPT, + X86_ICPT_POST_MEMACCESS, +}; + +enum x86_intercept { + x86_intercept_none, + + nr_x86_intercepts +}; + /* Host execution mode. */ #if defined(CONFIG_X86_32) #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 2b6c24e..a814867 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -104,6 +104,7 @@ struct opcode { u32 flags; + u8 intercept; union { int (*execute)(struct x86_emulate_ctxt *ctxt); struct opcode *group; @@ -2423,10 +2424,13 @@ static int em_movdqu(struct x86_emulate_ctxt *ctxt) } #define D(_y) { .flags = (_y) } +#define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } #define N D(0) #define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) } #define GD(_f, _g) { .flags = ((_f) | Group | GroupDual), .u.gdual = (_g) } #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } +#define II(_f, _e, _i) \ + { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } #define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) } #define D2bv(_f) D((_f) | ByteOp), D(_f) @@ -2867,6 +2871,7 @@ done_prefixes: } c->execute = opcode.u.execute; + c->intercept = opcode.intercept; /* Unrecognised? */ if (c->d == 0 || (c->d & Undefined)) @@ -3116,12 +3121,26 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) goto done; } + if (unlikely(ctxt->guest_mode) && c->intercept) { + rc = ops->intercept(ctxt, c->intercept, + X86_ICPT_PRE_EXCEPT); + if (rc != X86EMUL_CONTINUE) + goto done; + } + /* Privileged instruction can be executed only in CPL=0 */ if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { rc = emulate_gp(ctxt, 0); goto done; } + if (unlikely(ctxt->guest_mode) && c->intercept) { + rc = ops->intercept(ctxt, c->intercept, + X86_ICPT_POST_EXCEPT); + if (rc != X86EMUL_CONTINUE) + goto done; + } + if (c->rep_prefix && (c->d & String)) { /* All REP prefixes have the same first termination condition */ if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) { @@ -3160,6 +3179,13 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) special_insn: + if (unlikely(ctxt->guest_mode) && c->intercept) { + rc = ops->intercept(ctxt, c->intercept, + X86_ICPT_POST_MEMACCESS); + if (rc != X86EMUL_CONTINUE) + goto done; + } + if (c->execute) { rc = c->execute(ctxt); if (rc != X86EMUL_CONTINUE) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5af6651..36786bb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4297,6 +4297,13 @@ static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt) preempt_enable(); } +static int emulator_intercept(struct x86_emulate_ctxt *ctxt, + enum x86_intercept intercept, + enum x86_intercept_stage stage) +{ + return X86EMUL_CONTINUE; +} + static struct x86_emulate_ops emulate_ops = { .read_std = kvm_read_guest_virt_system, .write_std = kvm_write_guest_virt_system, @@ -4322,6 +4329,7 @@ static struct x86_emulate_ops emulate_ops = { .get_msr = kvm_get_msr, .get_fpu = emulator_get_fpu, .put_fpu = emulator_put_fpu, + .intercept = emulator_intercept, }; static void cache_all_regs(struct kvm_vcpu *vcpu) @@ -4376,6 +4384,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) ? X86EMUL_MODE_VM86 : cs_l ? X86EMUL_MODE_PROT64 : cs_db ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; + vcpu->arch.emulate_ctxt.guest_mode = is_guest_mode(vcpu); memset(c, 0, sizeof(struct decode_cache)); memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); } -- cgit v1.1 From 3c6e276f22cf29188035535127c4c35aeeafcabc Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 4 Apr 2011 12:39:23 +0200 Subject: KVM: x86 emulator: add SVM intercepts Add intercept codes for instructions defined by SVM as interceptable. Signed-off-by: Avi Kivity Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 35 +++++++++++++++++++++++++++++++++++ arch/x86/kvm/emulate.c | 24 +++++++++++++----------- 2 files changed, 48 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 2cfea49..470ac54 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -281,6 +281,41 @@ enum x86_intercept_stage { enum x86_intercept { x86_intercept_none, + x86_intercept_lmsw, + x86_intercept_smsw, + x86_intercept_lidt, + x86_intercept_sidt, + x86_intercept_lgdt, + x86_intercept_sgdt, + x86_intercept_lldt, + x86_intercept_sldt, + x86_intercept_ltr, + x86_intercept_str, + x86_intercept_rdtsc, + x86_intercept_rdpmc, + x86_intercept_pushf, + x86_intercept_popf, + x86_intercept_cpuid, + x86_intercept_rsm, + x86_intercept_iret, + x86_intercept_intn, + x86_intercept_invd, + x86_intercept_pause, + x86_intercept_hlt, + x86_intercept_invlpg, + x86_intercept_invlpga, + x86_intercept_vmrun, + x86_intercept_vmload, + x86_intercept_vmsave, + x86_intercept_vmmcall, + x86_intercept_stgi, + x86_intercept_clgi, + x86_intercept_skinit, + x86_intercept_rdtscp, + x86_intercept_icebp, + x86_intercept_wbinvd, + x86_intercept_monitor, + x86_intercept_mwait, nr_x86_intercepts }; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a814867..c2260e5 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2469,15 +2469,15 @@ static struct opcode group5[] = { }; static struct group_dual group7 = { { - N, N, D(ModRM | SrcMem | Priv), D(ModRM | SrcMem | Priv), - D(SrcNone | ModRM | DstMem | Mov), N, - D(SrcMem16 | ModRM | Mov | Priv), - D(SrcMem | ModRM | ByteOp | Priv | NoAccess), + N, N, DI(ModRM | SrcMem | Priv, lgdt), DI(ModRM | SrcMem | Priv, lidt), + DI(SrcNone | ModRM | DstMem | Mov, smsw), N, + DI(SrcMem16 | ModRM | Mov | Priv, lmsw), + DI(SrcMem | ModRM | ByteOp | Priv | NoAccess, invlpg), }, { D(SrcNone | ModRM | Priv | VendorSpecific), N, N, D(SrcNone | ModRM | Priv | VendorSpecific), - D(SrcNone | ModRM | DstMem | Mov), N, - D(SrcMem16 | ModRM | Mov | Priv), N, + DI(SrcNone | ModRM | DstMem | Mov, smsw), N, + DI(SrcMem16 | ModRM | Mov | Priv, lmsw), N, } }; static struct opcode group8[] = { @@ -2556,7 +2556,7 @@ static struct opcode opcode_table[256] = { /* 0x98 - 0x9F */ D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd), I(SrcImmFAddr | No64, em_call_far), N, - D(ImplicitOps | Stack), D(ImplicitOps | Stack), N, N, + DI(ImplicitOps | Stack, pushf), DI(ImplicitOps | Stack, popf), N, N, /* 0xA0 - 0xA7 */ I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), I2bv(DstMem | SrcAcc | Mov | MemAbs, em_mov), @@ -2579,7 +2579,8 @@ static struct opcode opcode_table[256] = { G(ByteOp, group11), G(0, group11), /* 0xC8 - 0xCF */ N, N, N, D(ImplicitOps | Stack), - D(ImplicitOps), D(SrcImmByte), D(ImplicitOps | No64), D(ImplicitOps), + D(ImplicitOps), DI(SrcImmByte, intn), + D(ImplicitOps | No64), DI(ImplicitOps, iret), /* 0xD0 - 0xD7 */ D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM), N, N, N, N, @@ -2594,7 +2595,8 @@ static struct opcode opcode_table[256] = { D2bv(SrcNone | DstAcc), D2bv(SrcAcc | ImplicitOps), /* 0xF0 - 0xF7 */ N, N, N, N, - D(ImplicitOps | Priv), D(ImplicitOps), G(ByteOp, group3), G(0, group3), + DI(ImplicitOps | Priv, hlt), D(ImplicitOps), + G(ByteOp, group3), G(0, group3), /* 0xF8 - 0xFF */ D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5), @@ -2604,7 +2606,7 @@ static struct opcode twobyte_table[256] = { /* 0x00 - 0x0F */ N, GD(0, &group7), N, N, N, D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv), N, - D(ImplicitOps | Priv), D(ImplicitOps | Priv), N, N, + DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, N, D(ImplicitOps | ModRM), N, N, /* 0x10 - 0x1F */ N, N, N, N, N, N, N, N, D(ImplicitOps | ModRM), N, N, N, N, N, N, N, @@ -2614,7 +2616,7 @@ static struct opcode twobyte_table[256] = { N, N, N, N, N, N, N, N, N, N, N, N, /* 0x30 - 0x3F */ - D(ImplicitOps | Priv), I(ImplicitOps, em_rdtsc), + D(ImplicitOps | Priv), II(ImplicitOps, em_rdtsc, rdtsc), D(ImplicitOps | Priv), N, D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv | VendorSpecific), N, N, -- cgit v1.1 From 775fde8648ebc588d07de39457aadc7c2131df2e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2011 12:39:24 +0200 Subject: KVM: x86 emulator: Don't write-back cpu-state on X86EMUL_INTERCEPTED This patch prevents the changed CPU state to be written back when the emulator detected that the instruction was intercepted by the guest. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 1 + arch/x86/kvm/emulate.c | 3 +++ arch/x86/kvm/x86.c | 3 +++ 3 files changed, 7 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 470ac54..1dbd0c7 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -331,6 +331,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len); #define EMULATION_FAILED -1 #define EMULATION_OK 0 #define EMULATION_RESTART 1 +#define EMULATION_INTERCEPTED 2 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt); int emulator_task_switch(struct x86_emulate_ctxt *ctxt, u16 tss_selector, int reason, diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index c2260e5..a2c31e5 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3592,6 +3592,9 @@ writeback: done: if (rc == X86EMUL_PROPAGATE_FAULT) ctxt->have_exception = true; + if (rc == X86EMUL_INTERCEPTED) + return EMULATION_INTERCEPTED; + return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; twobyte_insn: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 36786bb..99bed74 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4516,6 +4516,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, restart: r = x86_emulate_insn(&vcpu->arch.emulate_ctxt); + if (r == EMULATION_INTERCEPTED) + return EMULATE_DONE; + if (r == EMULATION_FAILED) { if (reexecute_instruction(vcpu, cr2)) return EMULATE_DONE; -- cgit v1.1 From d09beabd7cd4cf70d982ff54656dc6431df80fa4 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2011 12:39:25 +0200 Subject: KVM: x86 emulator: Add check_perm callback This patch adds a check_perm callback for each opcode into the instruction emulator. This will be used to do all necessary permission checks on instructions before checking whether they are intercepted or not. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 1 + arch/x86/kvm/emulate.c | 14 ++++++++++++++ 2 files changed, 15 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 1dbd0c7..460c2d8 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -222,6 +222,7 @@ struct decode_cache { u8 seg_override; unsigned int d; int (*execute)(struct x86_emulate_ctxt *ctxt); + int (*check_perm)(struct x86_emulate_ctxt *ctxt); unsigned long regs[NR_VCPU_REGS]; unsigned long eip; /* modrm */ diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a2c31e5..4822824 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -111,6 +111,7 @@ struct opcode { struct group_dual *gdual; struct gprefix *gprefix; } u; + int (*check_perm)(struct x86_emulate_ctxt *ctxt); }; struct group_dual { @@ -2425,12 +2426,17 @@ static int em_movdqu(struct x86_emulate_ctxt *ctxt) #define D(_y) { .flags = (_y) } #define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } +#define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ + .check_perm = (_p) } #define N D(0) #define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) } #define GD(_f, _g) { .flags = ((_f) | Group | GroupDual), .u.gdual = (_g) } #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } #define II(_f, _e, _i) \ { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } +#define IIP(_f, _e, _i, _p) \ + { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i, \ + .check_perm = (_p) } #define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) } #define D2bv(_f) D((_f) | ByteOp), D(_f) @@ -2873,6 +2879,7 @@ done_prefixes: } c->execute = opcode.u.execute; + c->check_perm = opcode.check_perm; c->intercept = opcode.intercept; /* Unrecognised? */ @@ -3136,6 +3143,13 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) goto done; } + /* Do instruction specific permission checks */ + if (c->check_perm) { + rc = c->check_perm(ctxt); + if (rc != X86EMUL_CONTINUE) + goto done; + } + if (unlikely(ctxt->guest_mode) && c->intercept) { rc = ops->intercept(ctxt, c->intercept, X86_ICPT_POST_EXCEPT); -- cgit v1.1 From 8ea7d6aef84e278fcb121acff1bd4c3edaa95b8b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2011 12:39:26 +0200 Subject: KVM: x86 emulator: Add flag to check for protected mode instructions This patch adds a flag for the opcoded to tag instruction which are only recognized in protected mode. The necessary check is added too. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 4 ++++ arch/x86/kvm/emulate.c | 7 +++++++ 2 files changed, 11 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 460c2d8..cab841a 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -274,6 +274,10 @@ struct x86_emulate_ctxt { #define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */ #define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ +/* any protected mode */ +#define X86EMUL_MODE_PROT (X86EMUL_MODE_PROT16|X86EMUL_MODE_PROT32| \ + X86EMUL_MODE_PROT64) + enum x86_intercept_stage { X86_ICPT_PRE_EXCEPT, X86_ICPT_POST_EXCEPT, diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 4822824..3f32a66 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -78,6 +78,7 @@ #define Prefix (1<<16) /* Instruction varies with 66/f2/f3 prefix */ #define Sse (1<<17) /* SSE Vector instruction */ /* Misc flags */ +#define Prot (1<<21) /* instruction generates #UD if not in prot-mode */ #define VendorSpecific (1<<22) /* Vendor specific instruction */ #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */ #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */ @@ -3143,6 +3144,12 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) goto done; } + /* Instruction can only be executed in protected mode */ + if ((c->d & Prot) && !(ctxt->mode & X86EMUL_MODE_PROT)) { + rc = emulate_ud(ctxt); + goto done; + } + /* Do instruction specific permission checks */ if (c->check_perm) { rc = c->check_perm(ctxt); -- cgit v1.1 From 8a76d7f25f8f24fc5a328c8e15e4a7313cf141b9 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2011 12:39:27 +0200 Subject: KVM: x86: Add x86 callback for intercept check This patch adds a callback into kvm_x86_ops so that svm and vmx code can do intercept checks on emulated instructions. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 22 ++++++++++++++++++++-- arch/x86/include/asm/kvm_host.h | 7 +++++++ arch/x86/kvm/emulate.c | 32 ++++++++++++++++++++++++++------ arch/x86/kvm/svm.c | 9 +++++++++ arch/x86/kvm/vmx.c | 9 +++++++++ arch/x86/kvm/x86.c | 6 +++--- 6 files changed, 74 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index cab841a..eb7033c 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -26,6 +26,24 @@ struct x86_exception { }; /* + * This struct is used to carry enough information from the instruction + * decoder to main KVM so that a decision can be made whether the + * instruction needs to be intercepted or not. + */ +struct x86_instruction_info { + u8 intercept; /* which intercept */ + u8 rep_prefix; /* rep prefix? */ + u8 modrm_mod; /* mod part of modrm */ + u8 modrm_reg; /* index of register used */ + u8 modrm_rm; /* rm part of modrm */ + u64 src_val; /* value of source operand */ + u8 src_bytes; /* size of source operand */ + u8 dst_bytes; /* size of destination operand */ + u8 ad_bytes; /* size of src/dst address */ + u64 next_rip; /* rip following the instruction */ +}; + +/* * x86_emulate_ops: * * These operations represent the instruction emulator's interface to memory. @@ -163,8 +181,8 @@ struct x86_emulate_ops { int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */ void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */ - int (*intercept)(struct x86_emulate_ctxt *ctxt, - enum x86_intercept intercept, + int (*intercept)(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info, enum x86_intercept_stage stage); }; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e820c63..038562c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -505,6 +505,8 @@ struct kvm_vcpu_stat { u32 nmi_injections; }; +struct x86_instruction_info; + struct kvm_x86_ops { int (*cpu_has_kvm_support)(void); /* __init */ int (*disabled_by_bios)(void); /* __init */ @@ -592,6 +594,11 @@ struct kvm_x86_ops { void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); + + int (*check_intercept)(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info, + enum x86_intercept_stage stage); + const struct trace_print_flags *exit_reasons_str; }; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 3f32a66..e3e96ea 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -408,6 +408,26 @@ struct gprefix { (_eip) += (_size); \ }) +static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt, + enum x86_intercept intercept, + enum x86_intercept_stage stage) +{ + struct x86_instruction_info info = { + .intercept = intercept, + .rep_prefix = ctxt->decode.rep_prefix, + .modrm_mod = ctxt->decode.modrm_mod, + .modrm_reg = ctxt->decode.modrm_reg, + .modrm_rm = ctxt->decode.modrm_rm, + .src_val = ctxt->decode.src.val64, + .src_bytes = ctxt->decode.src.bytes, + .dst_bytes = ctxt->decode.dst.bytes, + .ad_bytes = ctxt->decode.ad_bytes, + .next_rip = ctxt->eip, + }; + + return ctxt->ops->intercept(ctxt->vcpu, &info, stage); +} + static inline unsigned long ad_mask(struct decode_cache *c) { return (1UL << (c->ad_bytes << 3)) - 1; @@ -3132,8 +3152,8 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) } if (unlikely(ctxt->guest_mode) && c->intercept) { - rc = ops->intercept(ctxt, c->intercept, - X86_ICPT_PRE_EXCEPT); + rc = emulator_check_intercept(ctxt, c->intercept, + X86_ICPT_PRE_EXCEPT); if (rc != X86EMUL_CONTINUE) goto done; } @@ -3158,8 +3178,8 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) } if (unlikely(ctxt->guest_mode) && c->intercept) { - rc = ops->intercept(ctxt, c->intercept, - X86_ICPT_POST_EXCEPT); + rc = emulator_check_intercept(ctxt, c->intercept, + X86_ICPT_POST_EXCEPT); if (rc != X86EMUL_CONTINUE) goto done; } @@ -3203,8 +3223,8 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) special_insn: if (unlikely(ctxt->guest_mode) && c->intercept) { - rc = ops->intercept(ctxt, c->intercept, - X86_ICPT_POST_MEMACCESS); + rc = emulator_check_intercept(ctxt, c->intercept, + X86_ICPT_POST_MEMACCESS); if (rc != X86EMUL_CONTINUE) goto done; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index cb43e98..798ebe6 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3868,6 +3868,13 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) update_cr0_intercept(svm); } +static int svm_check_intercept(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info, + enum x86_intercept_stage stage) +{ + return X86EMUL_CONTINUE; +} + static struct kvm_x86_ops svm_x86_ops = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, @@ -3953,6 +3960,8 @@ static struct kvm_x86_ops svm_x86_ops = { .adjust_tsc_offset = svm_adjust_tsc_offset, .set_tdp_cr3 = set_tdp_cr3, + + .check_intercept = svm_check_intercept, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 2b99ae7..3dfefe3 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4409,6 +4409,13 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) { } +static int vmx_check_intercept(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info, + enum x86_intercept_stage stage) +{ + return X86EMUL_CONTINUE; +} + static struct kvm_x86_ops vmx_x86_ops = { .cpu_has_kvm_support = cpu_has_kvm_support, .disabled_by_bios = vmx_disabled_by_bios, @@ -4494,6 +4501,8 @@ static struct kvm_x86_ops vmx_x86_ops = { .adjust_tsc_offset = vmx_adjust_tsc_offset, .set_tdp_cr3 = vmx_set_cr3, + + .check_intercept = vmx_check_intercept, }; static int __init vmx_init(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 99bed74..eebe546 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4297,11 +4297,11 @@ static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt) preempt_enable(); } -static int emulator_intercept(struct x86_emulate_ctxt *ctxt, - enum x86_intercept intercept, +static int emulator_intercept(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info, enum x86_intercept_stage stage) { - return X86EMUL_CONTINUE; + return kvm_x86_ops->check_intercept(vcpu, info, stage); } static struct x86_emulate_ops emulate_ops = { -- cgit v1.1 From cfec82cb7d313ae5b2c2dbb974401d7c214c7b09 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2011 12:39:28 +0200 Subject: KVM: SVM: Add intercept check for emulated cr accesses This patch adds all necessary intercept checks for instructions that access the crX registers. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 3 ++ arch/x86/include/asm/kvm_host.h | 15 ++++++ arch/x86/kvm/emulate.c | 105 +++++++++++++++++++++++++++++++++---- arch/x86/kvm/svm.c | 81 +++++++++++++++++++++++++++- arch/x86/kvm/x86.c | 13 ----- 5 files changed, 192 insertions(+), 25 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index eb7033c..2c0b5b4 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -304,6 +304,9 @@ enum x86_intercept_stage { enum x86_intercept { x86_intercept_none, + x86_intercept_cr_read, + x86_intercept_cr_write, + x86_intercept_clts, x86_intercept_lmsw, x86_intercept_smsw, x86_intercept_lidt, diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 038562c..f7dfd64 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -35,10 +35,25 @@ #define KVM_PIO_PAGE_OFFSET 1 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 +#define CR0_RESERVED_BITS \ + (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ + | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ + | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) + #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1) #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD)) #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \ 0xFFFFFF0000000000ULL) +#define CR4_RESERVED_BITS \ + (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ + | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ + | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ + | X86_CR4_OSXSAVE \ + | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) + +#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) + + #define INVALID_PAGE (~(hpa_t)0) #define VALID_PAGE(x) ((x) != INVALID_PAGE) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index e3e96ea..d2e7775 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2445,6 +2445,95 @@ static int em_movdqu(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static bool valid_cr(int nr) +{ + switch (nr) { + case 0: + case 2 ... 4: + case 8: + return true; + default: + return false; + } +} + +static int check_cr_read(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + if (!valid_cr(c->modrm_reg)) + return emulate_ud(ctxt); + + return X86EMUL_CONTINUE; +} + +static int check_cr_write(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + u64 new_val = c->src.val64; + int cr = c->modrm_reg; + + static u64 cr_reserved_bits[] = { + 0xffffffff00000000ULL, + 0, 0, 0, /* CR3 checked later */ + CR4_RESERVED_BITS, + 0, 0, 0, + CR8_RESERVED_BITS, + }; + + if (!valid_cr(cr)) + return emulate_ud(ctxt); + + if (new_val & cr_reserved_bits[cr]) + return emulate_gp(ctxt, 0); + + switch (cr) { + case 0: { + u64 cr4, efer; + if (((new_val & X86_CR0_PG) && !(new_val & X86_CR0_PE)) || + ((new_val & X86_CR0_NW) && !(new_val & X86_CR0_CD))) + return emulate_gp(ctxt, 0); + + cr4 = ctxt->ops->get_cr(4, ctxt->vcpu); + ctxt->ops->get_msr(ctxt->vcpu, MSR_EFER, &efer); + + if ((new_val & X86_CR0_PG) && (efer & EFER_LME) && + !(cr4 & X86_CR4_PAE)) + return emulate_gp(ctxt, 0); + + break; + } + case 3: { + u64 rsvd = 0; + + if (is_long_mode(ctxt->vcpu)) + rsvd = CR3_L_MODE_RESERVED_BITS; + else if (is_pae(ctxt->vcpu)) + rsvd = CR3_PAE_RESERVED_BITS; + else if (is_paging(ctxt->vcpu)) + rsvd = CR3_NONPAE_RESERVED_BITS; + + if (new_val & rsvd) + return emulate_gp(ctxt, 0); + + break; + } + case 4: { + u64 cr4, efer; + + cr4 = ctxt->ops->get_cr(4, ctxt->vcpu); + ctxt->ops->get_msr(ctxt->vcpu, MSR_EFER, &efer); + + if ((efer & EFER_LMA) && !(new_val & X86_CR4_PAE)) + return emulate_gp(ctxt, 0); + + break; + } + } + + return X86EMUL_CONTINUE; +} + #define D(_y) { .flags = (_y) } #define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } #define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ @@ -2632,14 +2721,16 @@ static struct opcode opcode_table[256] = { static struct opcode twobyte_table[256] = { /* 0x00 - 0x0F */ N, GD(0, &group7), N, N, - N, D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv), N, + N, D(ImplicitOps | VendorSpecific), DI(ImplicitOps | Priv, clts), N, DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, N, D(ImplicitOps | ModRM), N, N, /* 0x10 - 0x1F */ N, N, N, N, N, N, N, N, D(ImplicitOps | ModRM), N, N, N, N, N, N, N, /* 0x20 - 0x2F */ - D(ModRM | DstMem | Priv | Op3264), D(ModRM | DstMem | Priv | Op3264), - D(ModRM | SrcMem | Priv | Op3264), D(ModRM | SrcMem | Priv | Op3264), + DIP(ModRM | DstMem | Priv | Op3264, cr_read, check_cr_read), + D(ModRM | DstMem | Priv | Op3264), + DIP(ModRM | SrcMem | Priv | Op3264, cr_write, check_cr_write), + D(ModRM | SrcMem | Priv | Op3264), N, N, N, N, N, N, N, N, N, N, N, N, /* 0x30 - 0x3F */ @@ -3724,14 +3815,6 @@ twobyte_insn: case 0x18: /* Grp16 (prefetch/nop) */ break; case 0x20: /* mov cr, reg */ - switch (c->modrm_reg) { - case 1: - case 5 ... 7: - case 9 ... 15: - emulate_ud(ctxt); - rc = X86EMUL_PROPAGATE_FAULT; - goto done; - } c->dst.val = ops->get_cr(c->modrm_reg, ctxt->vcpu); break; case 0x21: /* mov from dr to reg */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 798ebe6..ff4ed36 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3868,11 +3868,90 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) update_cr0_intercept(svm); } +#define POST_EX(exit) { .exit_code = (exit), \ + .stage = X86_ICPT_POST_EXCEPT, \ + .valid = true } + +static struct __x86_intercept { + u32 exit_code; + enum x86_intercept_stage stage; + bool valid; +} x86_intercept_map[] = { + [x86_intercept_cr_read] = POST_EX(SVM_EXIT_READ_CR0), + [x86_intercept_cr_write] = POST_EX(SVM_EXIT_WRITE_CR0), + [x86_intercept_clts] = POST_EX(SVM_EXIT_WRITE_CR0), + [x86_intercept_lmsw] = POST_EX(SVM_EXIT_WRITE_CR0), + [x86_intercept_smsw] = POST_EX(SVM_EXIT_READ_CR0), +}; + +#undef POST_EX + static int svm_check_intercept(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, enum x86_intercept_stage stage) { - return X86EMUL_CONTINUE; + struct vcpu_svm *svm = to_svm(vcpu); + int vmexit, ret = X86EMUL_CONTINUE; + struct __x86_intercept icpt_info; + struct vmcb *vmcb = svm->vmcb; + + if (info->intercept >= ARRAY_SIZE(x86_intercept_map)) + goto out; + + icpt_info = x86_intercept_map[info->intercept]; + + if (!icpt_info.valid || stage != icpt_info.stage) + goto out; + + switch (icpt_info.exit_code) { + case SVM_EXIT_READ_CR0: + if (info->intercept == x86_intercept_cr_read) + icpt_info.exit_code += info->modrm_reg; + break; + case SVM_EXIT_WRITE_CR0: { + unsigned long cr0, val; + u64 intercept; + + if (info->intercept == x86_intercept_cr_write) + icpt_info.exit_code += info->modrm_reg; + + if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0) + break; + + intercept = svm->nested.intercept; + + if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))) + break; + + cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK; + val = info->src_val & ~SVM_CR0_SELECTIVE_MASK; + + if (info->intercept == x86_intercept_lmsw) { + cr0 &= 0xfUL; + val &= 0xfUL; + /* lmsw can't clear PE - catch this here */ + if (cr0 & X86_CR0_PE) + val |= X86_CR0_PE; + } + + if (cr0 ^ val) + icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE; + + break; + } + default: + break; + } + + vmcb->control.next_rip = info->next_rip; + vmcb->control.exit_code = icpt_info.exit_code; + vmexit = nested_svm_exit_handled(svm); + + ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED + : X86EMUL_CONTINUE; + +out: + return ret; } static struct kvm_x86_ops svm_x86_ops = { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index eebe546..0d6524f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -60,19 +60,6 @@ #include #define MAX_IO_MSRS 256 -#define CR0_RESERVED_BITS \ - (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ - | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ - | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) -#define CR4_RESERVED_BITS \ - (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ - | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ - | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ - | X86_CR4_OSXSAVE \ - | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) - -#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) - #define KVM_MAX_MCE_BANKS 32 #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P) -- cgit v1.1 From 3b88e41a41344846ee28007ebfe1bb0defa7f86a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2011 12:39:29 +0200 Subject: KVM: SVM: Add intercept check for accessing dr registers This patch adds the intercept checks for instruction accessing the debug registers. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 2 ++ arch/x86/kvm/emulate.c | 63 +++++++++++++++++++++++++++++--------- arch/x86/kvm/svm.c | 6 ++++ 3 files changed, 56 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 2c0b5b4..fdaf59a 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -309,6 +309,8 @@ enum x86_intercept { x86_intercept_clts, x86_intercept_lmsw, x86_intercept_smsw, + x86_intercept_dr_read, + x86_intercept_dr_write, x86_intercept_lidt, x86_intercept_sidt, x86_intercept_lgdt, diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index d2e7775..cd9ed9f 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -509,6 +509,11 @@ static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, return X86EMUL_PROPAGATE_FAULT; } +static int emulate_db(struct x86_emulate_ctxt *ctxt) +{ + return emulate_exception(ctxt, DB_VECTOR, 0, false); +} + static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err) { return emulate_exception(ctxt, GP_VECTOR, err, true); @@ -2534,6 +2539,47 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int check_dr7_gd(struct x86_emulate_ctxt *ctxt) +{ + unsigned long dr7; + + ctxt->ops->get_dr(7, &dr7, ctxt->vcpu); + + /* Check if DR7.Global_Enable is set */ + return dr7 & (1 << 13); +} + +static int check_dr_read(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + int dr = c->modrm_reg; + u64 cr4; + + if (dr > 7) + return emulate_ud(ctxt); + + cr4 = ctxt->ops->get_cr(4, ctxt->vcpu); + if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5)) + return emulate_ud(ctxt); + + if (check_dr7_gd(ctxt)) + return emulate_db(ctxt); + + return X86EMUL_CONTINUE; +} + +static int check_dr_write(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + u64 new_val = c->src.val64; + int dr = c->modrm_reg; + + if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL)) + return emulate_gp(ctxt, 0); + + return check_dr_read(ctxt); +} + #define D(_y) { .flags = (_y) } #define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } #define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ @@ -2728,9 +2774,9 @@ static struct opcode twobyte_table[256] = { N, N, N, N, N, N, N, N, D(ImplicitOps | ModRM), N, N, N, N, N, N, N, /* 0x20 - 0x2F */ DIP(ModRM | DstMem | Priv | Op3264, cr_read, check_cr_read), - D(ModRM | DstMem | Priv | Op3264), + DIP(ModRM | DstMem | Priv | Op3264, dr_read, check_dr_read), DIP(ModRM | SrcMem | Priv | Op3264, cr_write, check_cr_write), - D(ModRM | SrcMem | Priv | Op3264), + DIP(ModRM | SrcMem | Priv | Op3264, dr_write, check_dr_write), N, N, N, N, N, N, N, N, N, N, N, N, /* 0x30 - 0x3F */ @@ -3818,12 +3864,6 @@ twobyte_insn: c->dst.val = ops->get_cr(c->modrm_reg, ctxt->vcpu); break; case 0x21: /* mov from dr to reg */ - if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && - (c->modrm_reg == 4 || c->modrm_reg == 5)) { - emulate_ud(ctxt); - rc = X86EMUL_PROPAGATE_FAULT; - goto done; - } ops->get_dr(c->modrm_reg, &c->dst.val, ctxt->vcpu); break; case 0x22: /* mov reg, cr */ @@ -3835,13 +3875,6 @@ twobyte_insn: c->dst.type = OP_NONE; break; case 0x23: /* mov from reg to dr */ - if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && - (c->modrm_reg == 4 || c->modrm_reg == 5)) { - emulate_ud(ctxt); - rc = X86EMUL_PROPAGATE_FAULT; - goto done; - } - if (ops->set_dr(c->modrm_reg, c->src.val & ((ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U), ctxt->vcpu) < 0) { diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ff4ed36..381b038 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3882,6 +3882,8 @@ static struct __x86_intercept { [x86_intercept_clts] = POST_EX(SVM_EXIT_WRITE_CR0), [x86_intercept_lmsw] = POST_EX(SVM_EXIT_WRITE_CR0), [x86_intercept_smsw] = POST_EX(SVM_EXIT_READ_CR0), + [x86_intercept_dr_read] = POST_EX(SVM_EXIT_READ_DR0), + [x86_intercept_dr_write] = POST_EX(SVM_EXIT_WRITE_DR0), }; #undef POST_EX @@ -3939,6 +3941,10 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, break; } + case SVM_EXIT_READ_DR0: + case SVM_EXIT_WRITE_DR0: + icpt_info.exit_code += info->modrm_reg; + break; default: break; } -- cgit v1.1 From dee6bb70e4ac0588c98cc4e661664f0653117f89 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2011 12:39:30 +0200 Subject: KVM: SVM: Add intercept checks for descriptor table accesses This patch add intercept checks into the KVM instruction emulator to check for the 8 instructions that access the descriptor table addresses. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 14 ++++++++++++-- arch/x86/kvm/svm.c | 8 ++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index cd9ed9f..3ac830a 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2630,8 +2630,18 @@ static struct opcode group5[] = { D(SrcMem | ModRM | Stack), N, }; +static struct opcode group6[] = { + DI(ModRM | Prot, sldt), + DI(ModRM | Prot, str), + DI(ModRM | Prot | Priv, lldt), + DI(ModRM | Prot | Priv, ltr), + N, N, N, N, +}; + static struct group_dual group7 = { { - N, N, DI(ModRM | SrcMem | Priv, lgdt), DI(ModRM | SrcMem | Priv, lidt), + DI(ModRM | Mov | DstMem | Priv, sgdt), + DI(ModRM | Mov | DstMem | Priv, sidt), + DI(ModRM | SrcMem | Priv, lgdt), DI(ModRM | SrcMem | Priv, lidt), DI(SrcNone | ModRM | DstMem | Mov, smsw), N, DI(SrcMem16 | ModRM | Mov | Priv, lmsw), DI(SrcMem | ModRM | ByteOp | Priv | NoAccess, invlpg), @@ -2766,7 +2776,7 @@ static struct opcode opcode_table[256] = { static struct opcode twobyte_table[256] = { /* 0x00 - 0x0F */ - N, GD(0, &group7), N, N, + G(0, group6), GD(0, &group7), N, N, N, D(ImplicitOps | VendorSpecific), DI(ImplicitOps | Priv, clts), N, DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, N, D(ImplicitOps | ModRM), N, N, diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 381b038..ce251c9 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3884,6 +3884,14 @@ static struct __x86_intercept { [x86_intercept_smsw] = POST_EX(SVM_EXIT_READ_CR0), [x86_intercept_dr_read] = POST_EX(SVM_EXIT_READ_DR0), [x86_intercept_dr_write] = POST_EX(SVM_EXIT_WRITE_DR0), + [x86_intercept_sldt] = POST_EX(SVM_EXIT_LDTR_READ), + [x86_intercept_str] = POST_EX(SVM_EXIT_TR_READ), + [x86_intercept_lldt] = POST_EX(SVM_EXIT_LDTR_WRITE), + [x86_intercept_ltr] = POST_EX(SVM_EXIT_TR_WRITE), + [x86_intercept_sgdt] = POST_EX(SVM_EXIT_GDTR_READ), + [x86_intercept_sidt] = POST_EX(SVM_EXIT_IDTR_READ), + [x86_intercept_lgdt] = POST_EX(SVM_EXIT_GDTR_WRITE), + [x86_intercept_lidt] = POST_EX(SVM_EXIT_IDTR_WRITE), }; #undef POST_EX -- cgit v1.1 From 01de8b09e6068936f7f5e386cb85637cf926468c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2011 12:39:31 +0200 Subject: KVM: SVM: Add intercept checks for SVM instructions This patch adds the necessary code changes in the instruction emulator and the extensions to svm.c to implement intercept checks for the svm instructions. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 44 +++++++++++++++++++++++++++++++++++++++++++- arch/x86/kvm/svm.c | 8 ++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 3ac830a..a3aba95 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -77,6 +77,7 @@ #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ #define Prefix (1<<16) /* Instruction varies with 66/f2/f3 prefix */ #define Sse (1<<17) /* SSE Vector instruction */ +#define RMExt (1<<18) /* Opcode extension in ModRM r/m if mod == 3 */ /* Misc flags */ #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */ #define VendorSpecific (1<<22) /* Vendor specific instruction */ @@ -2580,11 +2581,35 @@ static int check_dr_write(struct x86_emulate_ctxt *ctxt) return check_dr_read(ctxt); } +static int check_svme(struct x86_emulate_ctxt *ctxt) +{ + u64 efer; + + ctxt->ops->get_msr(ctxt->vcpu, MSR_EFER, &efer); + + if (!(efer & EFER_SVME)) + return emulate_ud(ctxt); + + return X86EMUL_CONTINUE; +} + +static int check_svme_pa(struct x86_emulate_ctxt *ctxt) +{ + u64 rax = kvm_register_read(ctxt->vcpu, VCPU_REGS_RAX); + + /* Valid physical address? */ + if (rax & 0xffff000000000000) + return emulate_gp(ctxt, 0); + + return check_svme(ctxt); +} + #define D(_y) { .flags = (_y) } #define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } #define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ .check_perm = (_p) } #define N D(0) +#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } #define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) } #define GD(_f, _g) { .flags = ((_f) | Group | GroupDual), .u.gdual = (_g) } #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } @@ -2602,6 +2627,16 @@ static int check_dr_write(struct x86_emulate_ctxt *ctxt) D2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock), \ D2bv(((_f) & ~Lock) | DstAcc | SrcImm) +static struct opcode group7_rm3[] = { + DIP(SrcNone | ModRM | Prot | Priv, vmrun, check_svme_pa), + DIP(SrcNone | ModRM | Prot , vmmcall, check_svme), + DIP(SrcNone | ModRM | Prot | Priv, vmload, check_svme_pa), + DIP(SrcNone | ModRM | Prot | Priv, vmsave, check_svme_pa), + DIP(SrcNone | ModRM | Prot | Priv, stgi, check_svme), + DIP(SrcNone | ModRM | Prot | Priv, clgi, check_svme), + DIP(SrcNone | ModRM | Prot | Priv, skinit, check_svme), + DIP(SrcNone | ModRM | Prot | Priv, invlpga, check_svme), +}; static struct opcode group1[] = { X7(D(Lock)), N @@ -2647,7 +2682,7 @@ static struct group_dual group7 = { { DI(SrcMem | ModRM | ByteOp | Priv | NoAccess, invlpg), }, { D(SrcNone | ModRM | Priv | VendorSpecific), N, - N, D(SrcNone | ModRM | Priv | VendorSpecific), + N, EXT(0, group7_rm3), DI(SrcNone | ModRM | DstMem | Mov, smsw), N, DI(SrcMem16 | ModRM | Mov | Priv, lmsw), N, } }; @@ -2853,6 +2888,7 @@ static struct opcode twobyte_table[256] = { #undef GD #undef I #undef GP +#undef EXT #undef D2bv #undef I2bv @@ -3030,6 +3066,12 @@ done_prefixes: opcode = g_mod3[goffset]; else opcode = g_mod012[goffset]; + + if (opcode.flags & RMExt) { + goffset = c->modrm & 7; + opcode = opcode.u.group[goffset]; + } + c->d |= opcode.flags; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ce251c9..b98d00b 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3892,6 +3892,14 @@ static struct __x86_intercept { [x86_intercept_sidt] = POST_EX(SVM_EXIT_IDTR_READ), [x86_intercept_lgdt] = POST_EX(SVM_EXIT_GDTR_WRITE), [x86_intercept_lidt] = POST_EX(SVM_EXIT_IDTR_WRITE), + [x86_intercept_vmrun] = POST_EX(SVM_EXIT_VMRUN), + [x86_intercept_vmmcall] = POST_EX(SVM_EXIT_VMMCALL), + [x86_intercept_vmload] = POST_EX(SVM_EXIT_VMLOAD), + [x86_intercept_vmsave] = POST_EX(SVM_EXIT_VMSAVE), + [x86_intercept_stgi] = POST_EX(SVM_EXIT_STGI), + [x86_intercept_clgi] = POST_EX(SVM_EXIT_CLGI), + [x86_intercept_skinit] = POST_EX(SVM_EXIT_SKINIT), + [x86_intercept_invlpga] = POST_EX(SVM_EXIT_INVLPGA), }; #undef POST_EX -- cgit v1.1 From d7eb82030699e6151f1356e90d495bf292564fb7 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2011 12:39:32 +0200 Subject: KVM: SVM: Add intercept checks for remaining group7 instructions This patch implements the emulator intercept checks for the RDTSCP, MONITOR, and MWAIT instructions. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 25 +++++++++++++++++++++++-- arch/x86/kvm/svm.c | 7 +++++++ 2 files changed, 30 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a3aba95..b4adb4c 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2604,6 +2604,16 @@ static int check_svme_pa(struct x86_emulate_ctxt *ctxt) return check_svme(ctxt); } +static int check_rdtsc(struct x86_emulate_ctxt *ctxt) +{ + u64 cr4 = ctxt->ops->get_cr(4, ctxt->vcpu); + + if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt->vcpu)) + return emulate_ud(ctxt); + + return X86EMUL_CONTINUE; +} + #define D(_y) { .flags = (_y) } #define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } #define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ @@ -2627,6 +2637,12 @@ static int check_svme_pa(struct x86_emulate_ctxt *ctxt) D2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock), \ D2bv(((_f) & ~Lock) | DstAcc | SrcImm) +static struct opcode group7_rm1[] = { + DI(SrcNone | ModRM | Priv, monitor), + DI(SrcNone | ModRM | Priv, mwait), + N, N, N, N, N, N, +}; + static struct opcode group7_rm3[] = { DIP(SrcNone | ModRM | Prot | Priv, vmrun, check_svme_pa), DIP(SrcNone | ModRM | Prot , vmmcall, check_svme), @@ -2638,6 +2654,11 @@ static struct opcode group7_rm3[] = { DIP(SrcNone | ModRM | Prot | Priv, invlpga, check_svme), }; +static struct opcode group7_rm7[] = { + N, + DIP(SrcNone | ModRM, rdtscp, check_rdtsc), + N, N, N, N, N, N, +}; static struct opcode group1[] = { X7(D(Lock)), N }; @@ -2681,10 +2702,10 @@ static struct group_dual group7 = { { DI(SrcMem16 | ModRM | Mov | Priv, lmsw), DI(SrcMem | ModRM | ByteOp | Priv | NoAccess, invlpg), }, { - D(SrcNone | ModRM | Priv | VendorSpecific), N, + D(SrcNone | ModRM | Priv | VendorSpecific), EXT(0, group7_rm1), N, EXT(0, group7_rm3), DI(SrcNone | ModRM | DstMem | Mov, smsw), N, - DI(SrcMem16 | ModRM | Mov | Priv, lmsw), N, + DI(SrcMem16 | ModRM | Mov | Priv, lmsw), EXT(0, group7_rm7), } }; static struct opcode group8[] = { diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b98d00b..1eb5504 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3871,6 +3871,9 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) #define POST_EX(exit) { .exit_code = (exit), \ .stage = X86_ICPT_POST_EXCEPT, \ .valid = true } +#define POST_MEM(exit) { .exit_code = (exit), \ + .stage = X86_ICPT_POST_MEMACCESS, \ + .valid = true } static struct __x86_intercept { u32 exit_code; @@ -3900,9 +3903,13 @@ static struct __x86_intercept { [x86_intercept_clgi] = POST_EX(SVM_EXIT_CLGI), [x86_intercept_skinit] = POST_EX(SVM_EXIT_SKINIT), [x86_intercept_invlpga] = POST_EX(SVM_EXIT_INVLPGA), + [x86_intercept_rdtscp] = POST_EX(SVM_EXIT_RDTSCP), + [x86_intercept_monitor] = POST_MEM(SVM_EXIT_MONITOR), + [x86_intercept_mwait] = POST_EX(SVM_EXIT_MWAIT), }; #undef POST_EX +#undef POST_MEM static int svm_check_intercept(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, -- cgit v1.1 From 8061252ee0d21e1289235a4b7fe61f53010c46ff Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2011 12:39:33 +0200 Subject: KVM: SVM: Add intercept checks for remaining twobyte instructions This patch adds intercepts checks for the remaining twobyte instructions to the KVM instruction emulator. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 2 ++ arch/x86/kvm/emulate.c | 25 ++++++++++++++++++------- arch/x86/kvm/svm.c | 19 +++++++++++++++++++ 3 files changed, 39 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index fdaf59a..f30650f 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -344,6 +344,8 @@ enum x86_intercept { x86_intercept_wbinvd, x86_intercept_monitor, x86_intercept_mwait, + x86_intercept_rdmsr, + x86_intercept_wrmsr, nr_x86_intercepts }; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index b4adb4c..0bf1f68 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2425,12 +2425,9 @@ static int em_cwd(struct x86_emulate_ctxt *ctxt) static int em_rdtsc(struct x86_emulate_ctxt *ctxt) { - unsigned cpl = ctxt->ops->cpl(ctxt->vcpu); struct decode_cache *c = &ctxt->decode; u64 tsc = 0; - if (cpl > 0 && (ctxt->ops->get_cr(4, ctxt->vcpu) & X86_CR4_TSD)) - return emulate_gp(ctxt, 0); ctxt->ops->get_msr(ctxt->vcpu, MSR_IA32_TSC, &tsc); c->regs[VCPU_REGS_RAX] = (u32)tsc; c->regs[VCPU_REGS_RDX] = tsc >> 32; @@ -2614,6 +2611,18 @@ static int check_rdtsc(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int check_rdpmc(struct x86_emulate_ctxt *ctxt) +{ + u64 cr4 = ctxt->ops->get_cr(4, ctxt->vcpu); + u64 rcx = kvm_register_read(ctxt->vcpu, VCPU_REGS_RCX); + + if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt->vcpu)) || + (rcx > 3)) + return emulate_gp(ctxt, 0); + + return X86EMUL_CONTINUE; +} + #define D(_y) { .flags = (_y) } #define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } #define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ @@ -2846,8 +2855,10 @@ static struct opcode twobyte_table[256] = { N, N, N, N, N, N, N, N, N, N, N, N, /* 0x30 - 0x3F */ - D(ImplicitOps | Priv), II(ImplicitOps, em_rdtsc, rdtsc), - D(ImplicitOps | Priv), N, + DI(ImplicitOps | Priv, wrmsr), + IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), + DI(ImplicitOps | Priv, rdmsr), + DIP(ImplicitOps | Priv, rdpmc, check_rdpmc), D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv | VendorSpecific), N, N, N, N, N, N, N, N, N, N, @@ -2871,12 +2882,12 @@ static struct opcode twobyte_table[256] = { X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), /* 0xA0 - 0xA7 */ D(ImplicitOps | Stack), D(ImplicitOps | Stack), - N, D(DstMem | SrcReg | ModRM | BitOp), + DI(ImplicitOps, cpuid), D(DstMem | SrcReg | ModRM | BitOp), D(DstMem | SrcReg | Src2ImmByte | ModRM), D(DstMem | SrcReg | Src2CL | ModRM), N, N, /* 0xA8 - 0xAF */ D(ImplicitOps | Stack), D(ImplicitOps | Stack), - N, D(DstMem | SrcReg | ModRM | BitOp | Lock), + DI(ImplicitOps, rsm), D(DstMem | SrcReg | ModRM | BitOp | Lock), D(DstMem | SrcReg | Src2ImmByte | ModRM), D(DstMem | SrcReg | Src2CL | ModRM), D(ModRM), I(DstReg | SrcMem | ModRM, em_imul), diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1eb5504..9036289 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3868,6 +3868,9 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) update_cr0_intercept(svm); } +#define PRE_EX(exit) { .exit_code = (exit), \ + .stage = X86_ICPT_PRE_EXCEPT, \ + .valid = true } #define POST_EX(exit) { .exit_code = (exit), \ .stage = X86_ICPT_POST_EXCEPT, \ .valid = true } @@ -3906,8 +3909,18 @@ static struct __x86_intercept { [x86_intercept_rdtscp] = POST_EX(SVM_EXIT_RDTSCP), [x86_intercept_monitor] = POST_MEM(SVM_EXIT_MONITOR), [x86_intercept_mwait] = POST_EX(SVM_EXIT_MWAIT), + [x86_intercept_invlpg] = POST_EX(SVM_EXIT_INVLPG), + [x86_intercept_invd] = POST_EX(SVM_EXIT_INVD), + [x86_intercept_wbinvd] = POST_EX(SVM_EXIT_WBINVD), + [x86_intercept_wrmsr] = POST_EX(SVM_EXIT_MSR), + [x86_intercept_rdtsc] = POST_EX(SVM_EXIT_RDTSC), + [x86_intercept_rdmsr] = POST_EX(SVM_EXIT_MSR), + [x86_intercept_rdpmc] = POST_EX(SVM_EXIT_RDPMC), + [x86_intercept_cpuid] = PRE_EX(SVM_EXIT_CPUID), + [x86_intercept_rsm] = PRE_EX(SVM_EXIT_RSM), }; +#undef PRE_EX #undef POST_EX #undef POST_MEM @@ -3968,6 +3981,12 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, case SVM_EXIT_WRITE_DR0: icpt_info.exit_code += info->modrm_reg; break; + case SVM_EXIT_MSR: + if (info->intercept == x86_intercept_wrmsr) + vmcb->control.exit_info_1 = 1; + else + vmcb->control.exit_info_1 = 0; + break; default: break; } -- cgit v1.1 From bf608f88faef1245ff87e731512517fc676ffe02 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2011 12:39:34 +0200 Subject: KVM: SVM: Add intercept checks for one-byte instructions This patch add intercept checks for emulated one-byte instructions to the KVM instruction emulation path. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 4 ++-- arch/x86/kvm/svm.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 0bf1f68..cc32e72 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2789,7 +2789,7 @@ static struct opcode opcode_table[256] = { D(DstMem | SrcNone | ModRM | Mov), D(ModRM | SrcMem | NoAccess | DstReg), D(ImplicitOps | SrcMem16 | ModRM), G(0, group1A), /* 0x90 - 0x97 */ - X8(D(SrcAcc | DstReg)), + DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)), /* 0x98 - 0x9F */ D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd), I(SrcImmFAddr | No64, em_call_far), N, @@ -2831,7 +2831,7 @@ static struct opcode opcode_table[256] = { D(SrcImmFAddr | No64), D(SrcImmByte | ImplicitOps), D2bv(SrcNone | DstAcc), D2bv(SrcAcc | ImplicitOps), /* 0xF0 - 0xF7 */ - N, N, N, N, + N, DI(ImplicitOps, icebp), N, N, DI(ImplicitOps | Priv, hlt), D(ImplicitOps), G(ByteOp, group3), G(0, group3), /* 0xF8 - 0xFF */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 9036289..9eb2710 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3918,6 +3918,13 @@ static struct __x86_intercept { [x86_intercept_rdpmc] = POST_EX(SVM_EXIT_RDPMC), [x86_intercept_cpuid] = PRE_EX(SVM_EXIT_CPUID), [x86_intercept_rsm] = PRE_EX(SVM_EXIT_RSM), + [x86_intercept_pause] = PRE_EX(SVM_EXIT_PAUSE), + [x86_intercept_pushf] = PRE_EX(SVM_EXIT_PUSHF), + [x86_intercept_popf] = PRE_EX(SVM_EXIT_POPF), + [x86_intercept_intn] = PRE_EX(SVM_EXIT_SWINT), + [x86_intercept_iret] = PRE_EX(SVM_EXIT_IRET), + [x86_intercept_icebp] = PRE_EX(SVM_EXIT_ICEBP), + [x86_intercept_hlt] = POST_EX(SVM_EXIT_HLT), }; #undef PRE_EX @@ -3987,6 +3994,13 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, else vmcb->control.exit_info_1 = 0; break; + case SVM_EXIT_PAUSE: + /* + * We get this for NOP only, but pause + * is rep not, check this here + */ + if (info->rep_prefix != REPE_PREFIX) + goto out; default: break; } -- cgit v1.1 From f6511935f424b9a25059ae18e91ad11dd24980e6 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2011 12:39:35 +0200 Subject: KVM: SVM: Add checks for IO instructions This patch adds code to check for IOIO intercepts on instructions decoded by the KVM instruction emulator. [avi: fix build error due to missing #define D2bvIP] Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 4 ++++ arch/x86/kvm/emulate.c | 45 +++++++++++++++++++++++++------------- arch/x86/kvm/svm.c | 36 ++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index f30650f..0818448 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -346,6 +346,10 @@ enum x86_intercept { x86_intercept_mwait, x86_intercept_rdmsr, x86_intercept_wrmsr, + x86_intercept_in, + x86_intercept_ins, + x86_intercept_out, + x86_intercept_outs, nr_x86_intercepts }; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index cc32e72..d88dcfd 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2623,6 +2623,28 @@ static int check_rdpmc(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int check_perm_in(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + c->dst.bytes = min(c->dst.bytes, 4u); + if (!emulator_io_permited(ctxt, ctxt->ops, c->src.val, c->dst.bytes)) + return emulate_gp(ctxt, 0); + + return X86EMUL_CONTINUE; +} + +static int check_perm_out(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + c->src.bytes = min(c->src.bytes, 4u); + if (!emulator_io_permited(ctxt, ctxt->ops, c->dst.val, c->src.bytes)) + return emulate_gp(ctxt, 0); + + return X86EMUL_CONTINUE; +} + #define D(_y) { .flags = (_y) } #define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } #define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ @@ -2640,6 +2662,7 @@ static int check_rdpmc(struct x86_emulate_ctxt *ctxt) #define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) } #define D2bv(_f) D((_f) | ByteOp), D(_f) +#define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p) #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e) #define D6ALU(_f) D2bv((_f) | DstMem | SrcReg | ModRM), \ @@ -2773,8 +2796,8 @@ static struct opcode opcode_table[256] = { I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op), I(SrcImmByte | Mov | Stack, em_push), I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op), - D2bv(DstDI | Mov | String), /* insb, insw/insd */ - D2bv(SrcSI | ImplicitOps | String), /* outsb, outsw/outsd */ + D2bvIP(DstDI | Mov | String, ins, check_perm_in), /* insb, insw/insd */ + D2bvIP(SrcSI | ImplicitOps | String, outs, check_perm_out), /* outsb, outsw/outsd */ /* 0x70 - 0x7F */ X16(D(SrcImmByte)), /* 0x80 - 0x87 */ @@ -2825,11 +2848,13 @@ static struct opcode opcode_table[256] = { N, N, N, N, N, N, N, N, /* 0xE0 - 0xE7 */ X4(D(SrcImmByte)), - D2bv(SrcImmUByte | DstAcc), D2bv(SrcAcc | DstImmUByte), + D2bvIP(SrcImmUByte | DstAcc, in, check_perm_in), + D2bvIP(SrcAcc | DstImmUByte, out, check_perm_out), /* 0xE8 - 0xEF */ D(SrcImm | Stack), D(SrcImm | ImplicitOps), D(SrcImmFAddr | No64), D(SrcImmByte | ImplicitOps), - D2bv(SrcNone | DstAcc), D2bv(SrcAcc | ImplicitOps), + D2bvIP(SrcNone | DstAcc, in, check_perm_in), + D2bvIP(SrcAcc | ImplicitOps, out, check_perm_out), /* 0xF0 - 0xF7 */ N, DI(ImplicitOps, icebp), N, N, DI(ImplicitOps | Priv, hlt), D(ImplicitOps), @@ -2923,6 +2948,7 @@ static struct opcode twobyte_table[256] = { #undef EXT #undef D2bv +#undef D2bvIP #undef I2bv #undef D6ALU @@ -3731,11 +3757,6 @@ special_insn: case 0xed: /* in (e/r)ax,dx */ c->src.val = c->regs[VCPU_REGS_RDX]; do_io_in: - c->dst.bytes = min(c->dst.bytes, 4u); - if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { - rc = emulate_gp(ctxt, 0); - goto done; - } if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, &c->dst.val)) goto done; /* IO is needed */ @@ -3744,12 +3765,6 @@ special_insn: case 0xef: /* out dx,(e/r)ax */ c->dst.val = c->regs[VCPU_REGS_RDX]; do_io_out: - c->src.bytes = min(c->src.bytes, 4u); - if (!emulator_io_permited(ctxt, ops, c->dst.val, - c->src.bytes)) { - rc = emulate_gp(ctxt, 0); - goto done; - } ops->pio_out_emulated(c->src.bytes, c->dst.val, &c->src.val, 1, ctxt->vcpu); c->dst.type = OP_NONE; /* Disable writeback. */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 9eb2710..5c6512d 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3925,6 +3925,10 @@ static struct __x86_intercept { [x86_intercept_iret] = PRE_EX(SVM_EXIT_IRET), [x86_intercept_icebp] = PRE_EX(SVM_EXIT_ICEBP), [x86_intercept_hlt] = POST_EX(SVM_EXIT_HLT), + [x86_intercept_in] = POST_EX(SVM_EXIT_IOIO), + [x86_intercept_ins] = POST_EX(SVM_EXIT_IOIO), + [x86_intercept_out] = POST_EX(SVM_EXIT_IOIO), + [x86_intercept_outs] = POST_EX(SVM_EXIT_IOIO), }; #undef PRE_EX @@ -4001,6 +4005,38 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, */ if (info->rep_prefix != REPE_PREFIX) goto out; + case SVM_EXIT_IOIO: { + u64 exit_info; + u32 bytes; + + exit_info = (vcpu->arch.regs[VCPU_REGS_RDX] & 0xffff) << 16; + + if (info->intercept == x86_intercept_in || + info->intercept == x86_intercept_ins) { + exit_info |= SVM_IOIO_TYPE_MASK; + bytes = info->src_bytes; + } else { + bytes = info->dst_bytes; + } + + if (info->intercept == x86_intercept_outs || + info->intercept == x86_intercept_ins) + exit_info |= SVM_IOIO_STR_MASK; + + if (info->rep_prefix) + exit_info |= SVM_IOIO_REP_MASK; + + bytes = min(bytes, 4u); + + exit_info |= bytes << SVM_IOIO_SIZE_SHIFT; + + exit_info |= (u32)info->ad_bytes << (SVM_IOIO_ASIZE_SHIFT - 1); + + vmcb->control.exit_info_1 = exit_info; + vmcb->control.exit_info_2 = info->next_rip; + + break; + } default: break; } -- cgit v1.1 From 628afd2aeb286415f6e7d0ee7c87aae249e7f999 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2011 12:39:36 +0200 Subject: KVM: SVM: Remove nested sel_cr0_write handling code This patch removes all the old code which handled the nested selective cr0 write intercepts. This code was only in place as a work-around until the instruction emulator is capable of doing the same. This is the case with this patch-set and so the code can be removed. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 78 ++++++++++++++++++------------------------------------ 1 file changed, 26 insertions(+), 52 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 5c6512d..779b091 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -93,14 +93,6 @@ struct nested_state { /* A VMEXIT is required but not yet emulated */ bool exit_required; - /* - * If we vmexit during an instruction emulation we need this to restore - * the l1 guest rip after the emulation - */ - unsigned long vmexit_rip; - unsigned long vmexit_rsp; - unsigned long vmexit_rax; - /* cache for intercepts of the guest */ u32 intercept_cr; u32 intercept_dr; @@ -1362,31 +1354,6 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { struct vcpu_svm *svm = to_svm(vcpu); - if (is_guest_mode(vcpu)) { - /* - * We are here because we run in nested mode, the host kvm - * intercepts cr0 writes but the l1 hypervisor does not. - * But the L1 hypervisor may intercept selective cr0 writes. - * This needs to be checked here. - */ - unsigned long old, new; - - /* Remove bits that would trigger a real cr0 write intercept */ - old = vcpu->arch.cr0 & SVM_CR0_SELECTIVE_MASK; - new = cr0 & SVM_CR0_SELECTIVE_MASK; - - if (old == new) { - /* cr0 write with ts and mp unchanged */ - svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; - if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE) { - svm->nested.vmexit_rip = kvm_rip_read(vcpu); - svm->nested.vmexit_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP); - svm->nested.vmexit_rax = kvm_register_read(vcpu, VCPU_REGS_RAX); - return; - } - } - } - #ifdef CONFIG_X86_64 if (vcpu->arch.efer & EFER_LME) { if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { @@ -2673,6 +2640,29 @@ static int emulate_on_interception(struct vcpu_svm *svm) return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; } +bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val) +{ + unsigned long cr0 = svm->vcpu.arch.cr0; + bool ret = false; + u64 intercept; + + intercept = svm->nested.intercept; + + if (!is_guest_mode(&svm->vcpu) || + (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))) + return false; + + cr0 &= ~SVM_CR0_SELECTIVE_MASK; + val &= ~SVM_CR0_SELECTIVE_MASK; + + if (cr0 ^ val) { + svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; + ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE); + } + + return ret; +} + #define CR_VALID (1ULL << 63) static int cr_interception(struct vcpu_svm *svm) @@ -2696,7 +2686,8 @@ static int cr_interception(struct vcpu_svm *svm) val = kvm_register_read(&svm->vcpu, reg); switch (cr) { case 0: - err = kvm_set_cr0(&svm->vcpu, val); + if (!check_selective_cr0_intercepted(svm, val)) + err = kvm_set_cr0(&svm->vcpu, val); break; case 3: err = kvm_set_cr3(&svm->vcpu, val); @@ -2741,23 +2732,6 @@ static int cr_interception(struct vcpu_svm *svm) return 1; } -static int cr0_write_interception(struct vcpu_svm *svm) -{ - struct kvm_vcpu *vcpu = &svm->vcpu; - int r; - - r = cr_interception(svm); - - if (svm->nested.vmexit_rip) { - kvm_register_write(vcpu, VCPU_REGS_RIP, svm->nested.vmexit_rip); - kvm_register_write(vcpu, VCPU_REGS_RSP, svm->nested.vmexit_rsp); - kvm_register_write(vcpu, VCPU_REGS_RAX, svm->nested.vmexit_rax); - svm->nested.vmexit_rip = 0; - } - - return r; -} - static int dr_interception(struct vcpu_svm *svm) { int reg, dr; @@ -3045,7 +3019,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_READ_CR4] = cr_interception, [SVM_EXIT_READ_CR8] = cr_interception, [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, - [SVM_EXIT_WRITE_CR0] = cr0_write_interception, + [SVM_EXIT_WRITE_CR0] = cr_interception, [SVM_EXIT_WRITE_CR3] = cr_interception, [SVM_EXIT_WRITE_CR4] = cr_interception, [SVM_EXIT_WRITE_CR8] = cr8_write_interception, -- cgit v1.1 From 0be839bfb4b12f17fba2d982353b0f674e327f9e Mon Sep 17 00:00:00 2001 From: "Justin P. Mattock" Date: Wed, 30 Mar 2011 09:54:47 -0700 Subject: KVM: Remove base_addresss in kvm_pit since it is unused The patch below removes unsigned long base_addresss; in i8254.h since it is unused. Signed-off-by: Justin P. Mattock Signed-off-by: Avi Kivity --- arch/x86/kvm/i8254.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index 46d08ca..b681a9f 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h @@ -33,7 +33,6 @@ struct kvm_kpit_state { }; struct kvm_pit { - unsigned long base_addresss; struct kvm_io_device dev; struct kvm_io_device speaker_dev; struct kvm *kvm; -- cgit v1.1 From a0c0ab2feb9d696978a7475dce4253ec62e98a16 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 28 Mar 2011 16:57:49 +0200 Subject: KVM: x86 emulator: do not open code return values from the emulator Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index d88dcfd..ae5f491 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2287,7 +2287,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, ctxt->eip = c->eip; } - return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; + return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; } static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg, @@ -3333,7 +3333,7 @@ done_prefixes: } done: - return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; + return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; } static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) @@ -4163,5 +4163,5 @@ twobyte_insn: goto writeback; cannot_emulate: - return -1; + return EMULATION_FAILED; } -- cgit v1.1 From 7c5625227ff8c81953e953d8e25c3eba2ab0aeb3 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Mon, 28 Mar 2011 10:29:27 +0800 Subject: KVM: MMU: remove mmu_seq verification on pte update path The mmu_seq verification can be removed since we get the pfn in the protection of mmu_lock. Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/mmu.c | 16 +++++----------- arch/x86/kvm/paging_tmpl.h | 4 +--- 3 files changed, 7 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f7dfd64..ecdc562 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -274,7 +274,7 @@ struct kvm_mmu { struct kvm_mmu_page *sp); void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, - u64 *spte, const void *pte, unsigned long mmu_seq); + u64 *spte, const void *pte); hpa_t root_hpa; int root_level; int shadow_root_level; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 22fae75..2841805 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1206,7 +1206,7 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva) static void nonpaging_update_pte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, - const void *pte, unsigned long mmu_seq) + const void *pte) { WARN_ON(1); } @@ -3163,9 +3163,8 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, } static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, - struct kvm_mmu_page *sp, - u64 *spte, - const void *new, unsigned long mmu_seq) + struct kvm_mmu_page *sp, u64 *spte, + const void *new) { if (sp->role.level != PT_PAGE_TABLE_LEVEL) { ++vcpu->kvm->stat.mmu_pde_zapped; @@ -3173,7 +3172,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, } ++vcpu->kvm->stat.mmu_pte_updated; - vcpu->arch.mmu.update_pte(vcpu, sp, spte, new, mmu_seq); + vcpu->arch.mmu.update_pte(vcpu, sp, spte, new); } static bool need_remote_flush(u64 old, u64 new) @@ -3229,7 +3228,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_mmu_page *sp; struct hlist_node *node; LIST_HEAD(invalid_list); - unsigned long mmu_seq; u64 entry, gentry, *spte; unsigned pte_size, page_offset, misaligned, quadrant, offset; int level, npte, invlpg_counter, r, flooded = 0; @@ -3271,9 +3269,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, break; } - mmu_seq = vcpu->kvm->mmu_notifier_seq; - smp_rmb(); - spin_lock(&vcpu->kvm->mmu_lock); if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) gentry = 0; @@ -3345,8 +3340,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, if (gentry && !((sp->role.word ^ vcpu->arch.mmu.base_role.word) & mask.word)) - mmu_pte_write_new_pte(vcpu, sp, spte, &gentry, - mmu_seq); + mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); if (!remote_flush && need_remote_flush(entry, *spte)) remote_flush = true; ++spte; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index c639779..74f8567 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -325,7 +325,7 @@ no_present: } static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, - u64 *spte, const void *pte, unsigned long mmu_seq) + u64 *spte, const void *pte) { pt_element_t gpte; unsigned pte_access; @@ -342,8 +342,6 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, kvm_release_pfn_clean(pfn); return; } - if (mmu_notifier_retry(vcpu, mmu_seq)) - return; /* * we call mmu_set_spte() with host_writable = true because that -- cgit v1.1 From 8b18bc378224b4f195145b407b95768a289497e3 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 5 Apr 2011 16:21:58 +0300 Subject: KVM: x86 emulator: Re-add VendorSpecific tag to VMMCALL insn VMMCALL needs the VendorSpecific tag so that #UD emulation (called if a guest running on AMD was migrated to an Intel host) is allowed to process the instruction. Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index ae5f491..0e31b0c 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2677,7 +2677,7 @@ static struct opcode group7_rm1[] = { static struct opcode group7_rm3[] = { DIP(SrcNone | ModRM | Prot | Priv, vmrun, check_svme_pa), - DIP(SrcNone | ModRM | Prot , vmmcall, check_svme), + DIP(SrcNone | ModRM | Prot | VendorSpecific, vmmcall, check_svme), DIP(SrcNone | ModRM | Prot | Priv, vmload, check_svme_pa), DIP(SrcNone | ModRM | Prot | Priv, vmsave, check_svme_pa), DIP(SrcNone | ModRM | Prot | Priv, stgi, check_svme), -- cgit v1.1 From bfeed29d6d3ebd5f31253d2c067e4e6c4aeb376b Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 5 Apr 2011 16:25:20 +0300 Subject: KVM: x86 emulator: Drop EFER.SVME requirement from VMMCALL VMMCALL requires EFER.SVME to be enabled in the host, not in the guest, which is what check_svme() checks. Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 0e31b0c..50bffb9 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2677,7 +2677,7 @@ static struct opcode group7_rm1[] = { static struct opcode group7_rm3[] = { DIP(SrcNone | ModRM | Prot | Priv, vmrun, check_svme_pa), - DIP(SrcNone | ModRM | Prot | VendorSpecific, vmmcall, check_svme), + DI(SrcNone | ModRM | Prot | VendorSpecific, vmmcall), DIP(SrcNone | ModRM | Prot | Priv, vmload, check_svme_pa), DIP(SrcNone | ModRM | Prot | Priv, vmsave, check_svme_pa), DIP(SrcNone | ModRM | Prot | Priv, stgi, check_svme), -- cgit v1.1 From fbc0db76b77125e0a5131fb886cbaafa1ec5c525 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 25 Mar 2011 09:44:46 +0100 Subject: KVM: SVM: Implement infrastructure for TSC_RATE_MSR This patch enhances the kvm_amd module with functions to support the TSC_RATE_MSR which can be used to set a given tsc frequency for the guest vcpu. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/msr-index.h | 1 + arch/x86/kvm/svm.c | 54 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 54 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 3cce714..485b4f1 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -118,6 +118,7 @@ complete list. */ #define MSR_AMD64_PATCH_LEVEL 0x0000008b +#define MSR_AMD64_TSC_RATIO 0xc0000104 #define MSR_AMD64_NB_CFG 0xc001001f #define MSR_AMD64_PATCH_LOADER 0xc0010020 #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 779b091..8301500 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -63,6 +63,8 @@ MODULE_LICENSE("GPL"); #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) +#define TSC_RATIO_RSVD 0xffffff0000000000ULL + static bool erratum_383_found __read_mostly; static const u32 host_save_user_msrs[] = { @@ -136,8 +138,13 @@ struct vcpu_svm { unsigned int3_injected; unsigned long int3_rip; u32 apf_reason; + + u64 tsc_ratio; }; +static DEFINE_PER_CPU(u64, current_tsc_ratio); +#define TSC_RATIO_DEFAULT 0x0100000000ULL + #define MSR_INVALID 0xffffffffU static struct svm_direct_access_msrs { @@ -560,6 +567,10 @@ static int has_svm(void) static void svm_hardware_disable(void *garbage) { + /* Make sure we clean up behind us */ + if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) + wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); + cpu_svm_disable(); } @@ -601,6 +612,11 @@ static int svm_hardware_enable(void *garbage) wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT); + if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { + wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); + __get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT; + } + svm_init_erratum_383(); return 0; @@ -843,6 +859,32 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type) seg->base = 0; } +static u64 __scale_tsc(u64 ratio, u64 tsc) +{ + u64 mult, frac, _tsc; + + mult = ratio >> 32; + frac = ratio & ((1ULL << 32) - 1); + + _tsc = tsc; + _tsc *= mult; + _tsc += (tsc >> 32) * frac; + _tsc += ((tsc & ((1ULL << 32) - 1)) * frac) >> 32; + + return _tsc; +} + +static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc) +{ + struct vcpu_svm *svm = to_svm(vcpu); + u64 _tsc = tsc; + + if (svm->tsc_ratio != TSC_RATIO_DEFAULT) + _tsc = __scale_tsc(svm->tsc_ratio, tsc); + + return _tsc; +} + static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1037,6 +1079,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) goto out; } + svm->tsc_ratio = TSC_RATIO_DEFAULT; + err = kvm_vcpu_init(&svm->vcpu, kvm, id); if (err) goto free_svm; @@ -1130,6 +1174,12 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); + + if (static_cpu_has(X86_FEATURE_TSCRATEMSR) && + svm->tsc_ratio != __get_cpu_var(current_tsc_ratio)) { + __get_cpu_var(current_tsc_ratio) = svm->tsc_ratio; + wrmsrl(MSR_AMD64_TSC_RATIO, svm->tsc_ratio); + } } static void svm_vcpu_put(struct kvm_vcpu *vcpu) @@ -2784,7 +2834,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) case MSR_IA32_TSC: { struct vmcb *vmcb = get_host_vmcb(svm); - *data = vmcb->control.tsc_offset + native_read_tsc(); + *data = vmcb->control.tsc_offset + + svm_scale_tsc(vcpu, native_read_tsc()); + break; } case MSR_STAR: -- cgit v1.1 From 1e993611d0dc879fde25515dc9867d1cfd4c5137 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 25 Mar 2011 09:44:47 +0100 Subject: KVM: X86: Let kvm-clock report the right tsc frequency This patch changes the kvm_guest_time_update function to use TSC frequency the guest actually has for updating its clock. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 6 +++--- arch/x86/kvm/x86.c | 25 +++++++++++++++---------- 2 files changed, 18 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ecdc562..e3aaa02 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -396,7 +396,10 @@ struct kvm_vcpu_arch { u64 last_kernel_ns; u64 last_tsc_nsec; u64 last_tsc_write; + u32 virtual_tsc_khz; bool tsc_catchup; + u32 tsc_catchup_mult; + s8 tsc_catchup_shift; bool nmi_pending; bool nmi_injected; @@ -466,9 +469,6 @@ struct kvm_arch { u64 last_tsc_nsec; u64 last_tsc_offset; u64 last_tsc_write; - u32 virtual_tsc_khz; - u32 virtual_tsc_mult; - s8 virtual_tsc_shift; struct kvm_xen_hvm_config xen_hvm_config; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0d6524f..78d7291 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -969,6 +969,14 @@ static inline int kvm_tsc_changes_freq(void) return ret; } +static u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.virtual_tsc_khz) + return vcpu->arch.virtual_tsc_khz; + else + return __this_cpu_read(cpu_tsc_khz); +} + static inline u64 nsec_to_cycles(u64 nsec) { u64 ret; @@ -982,20 +990,19 @@ static inline u64 nsec_to_cycles(u64 nsec) return ret; } -static void kvm_arch_set_tsc_khz(struct kvm *kvm, u32 this_tsc_khz) +static void kvm_init_tsc_catchup(struct kvm_vcpu *vcpu, u32 this_tsc_khz) { /* Compute a scale to convert nanoseconds in TSC cycles */ kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, - &kvm->arch.virtual_tsc_shift, - &kvm->arch.virtual_tsc_mult); - kvm->arch.virtual_tsc_khz = this_tsc_khz; + &vcpu->arch.tsc_catchup_shift, + &vcpu->arch.tsc_catchup_mult); } static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) { u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec, - vcpu->kvm->arch.virtual_tsc_mult, - vcpu->kvm->arch.virtual_tsc_shift); + vcpu->arch.tsc_catchup_mult, + vcpu->arch.tsc_catchup_shift); tsc += vcpu->arch.last_tsc_write; return tsc; } @@ -1062,8 +1069,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) local_irq_save(flags); kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp); kernel_ns = get_kernel_ns(); - this_tsc_khz = __this_cpu_read(cpu_tsc_khz); - + this_tsc_khz = vcpu_tsc_khz(v); if (unlikely(this_tsc_khz == 0)) { local_irq_restore(flags); kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); @@ -6060,8 +6066,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) } vcpu->arch.pio_data = page_address(page); - if (!kvm->arch.virtual_tsc_khz) - kvm_arch_set_tsc_khz(kvm, max_tsc_khz); + kvm_init_tsc_catchup(vcpu, max_tsc_khz); r = kvm_mmu_create(vcpu); if (r < 0) -- cgit v1.1 From 8f6055cbaf68cbd9ff2692a2cfa691b43629ccd4 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 25 Mar 2011 09:44:48 +0100 Subject: KVM: X86: Make tsc_delta calculation a function of guest tsc The calculation of the tsc_delta value to ensure a forward-going tsc for the guest is a function of the host-tsc. This works as long as the guests tsc_khz is equal to the hosts tsc_khz. With tsc-scaling hardware support this is not longer true and the tsc_delta needs to be calculated using guest_tsc values. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 78d7291..fcce29b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2113,8 +2113,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_x86_ops->vcpu_load(vcpu, cpu); if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) { /* Make sure TSC doesn't go backwards */ - s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 : - native_read_tsc() - vcpu->arch.last_host_tsc; + s64 tsc_delta; + u64 tsc; + + kvm_get_msr(vcpu, MSR_IA32_TSC, &tsc); + tsc_delta = !vcpu->arch.last_guest_tsc ? 0 : + tsc - vcpu->arch.last_guest_tsc; + if (tsc_delta < 0) mark_tsc_unstable("KVM discovered backwards TSC"); if (check_tsc_unstable()) { -- cgit v1.1 From 4051b18801f5b47bb0369feefdc80e57819d0ddf Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 25 Mar 2011 09:44:49 +0100 Subject: KVM: X86: Implement call-back to propagate virtual_tsc_khz This patch implements a call-back into the architecture code to allow the propagation of changes to the virtual tsc_khz of the vcpu. On SVM it updates the tsc_ratio variable, on VMX it does nothing. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm.c | 33 +++++++++++++++++++++++++++++++++ arch/x86/kvm/vmx.c | 11 +++++++++++ 3 files changed, 45 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e3aaa02..f3a7116 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -606,6 +606,7 @@ struct kvm_x86_ops { bool (*has_wbinvd_exit)(void); + void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz); void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8301500..a39fde4 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -885,6 +885,38 @@ static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc) return _tsc; } +static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) +{ + struct vcpu_svm *svm = to_svm(vcpu); + u64 ratio; + u64 khz; + + /* TSC scaling supported? */ + if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) + return; + + /* TSC-Scaling disabled or guest TSC same frequency as host TSC? */ + if (user_tsc_khz == 0) { + vcpu->arch.virtual_tsc_khz = 0; + svm->tsc_ratio = TSC_RATIO_DEFAULT; + return; + } + + khz = user_tsc_khz; + + /* TSC scaling required - calculate ratio */ + ratio = khz << 32; + do_div(ratio, tsc_khz); + + if (ratio == 0 || ratio & TSC_RATIO_RSVD) { + WARN_ONCE(1, "Invalid TSC ratio - virtual-tsc-khz=%u\n", + user_tsc_khz); + return; + } + vcpu->arch.virtual_tsc_khz = user_tsc_khz; + svm->tsc_ratio = ratio; +} + static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) { struct vcpu_svm *svm = to_svm(vcpu); @@ -4159,6 +4191,7 @@ static struct kvm_x86_ops svm_x86_ops = { .has_wbinvd_exit = svm_has_wbinvd_exit, + .set_tsc_khz = svm_set_tsc_khz, .write_tsc_offset = svm_write_tsc_offset, .adjust_tsc_offset = svm_adjust_tsc_offset, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3dfefe3..e19c7a5 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1161,6 +1161,16 @@ static u64 guest_read_tsc(void) } /* + * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ + * ioctl. In this case the call-back should update internal vmx state to make + * the changes effective. + */ +static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) +{ + /* Nothing to do here */ +} + +/* * writes 'offset' into guest's timestamp counter offset register */ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) @@ -4497,6 +4507,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, + .set_tsc_khz = vmx_set_tsc_khz, .write_tsc_offset = vmx_write_tsc_offset, .adjust_tsc_offset = vmx_adjust_tsc_offset, -- cgit v1.1 From 857e40999e35906baa367a79137019912cfb5434 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 25 Mar 2011 09:44:50 +0100 Subject: KVM: X86: Delegate tsc-offset calculation to architecture code With TSC scaling in SVM the tsc-offset needs to be calculated differently. This patch propagates this calculation into the architecture specific modules so that this complexity can be handled there. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/svm.c | 10 ++++++++++ arch/x86/kvm/vmx.c | 6 ++++++ arch/x86/kvm/x86.c | 10 +++++----- 4 files changed, 23 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f3a7116..da0a8ce 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -609,6 +609,8 @@ struct kvm_x86_ops { void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz); void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); + u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc); + void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); int (*check_intercept)(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a39fde4..8c4549b 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -943,6 +943,15 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) mark_dirty(svm->vmcb, VMCB_INTERCEPTS); } +static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) +{ + u64 tsc; + + tsc = svm_scale_tsc(vcpu, native_read_tsc()); + + return target_tsc - tsc; +} + static void init_vmcb(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; @@ -4194,6 +4203,7 @@ static struct kvm_x86_ops svm_x86_ops = { .set_tsc_khz = svm_set_tsc_khz, .write_tsc_offset = svm_write_tsc_offset, .adjust_tsc_offset = svm_adjust_tsc_offset, + .compute_tsc_offset = svm_compute_tsc_offset, .set_tdp_cr3 = set_tdp_cr3, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e19c7a5..aabe333 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1184,6 +1184,11 @@ static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) vmcs_write64(TSC_OFFSET, offset + adjustment); } +static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) +{ + return target_tsc - native_read_tsc(); +} + /* * Reads an msr value (of 'msr_index') into 'pdata'. * Returns 0 on success, non-0 otherwise. @@ -4510,6 +4515,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .set_tsc_khz = vmx_set_tsc_khz, .write_tsc_offset = vmx_write_tsc_offset, .adjust_tsc_offset = vmx_adjust_tsc_offset, + .compute_tsc_offset = vmx_compute_tsc_offset, .set_tdp_cr3 = vmx_set_cr3, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fcce29b..579ce34 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -977,7 +977,7 @@ static u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu) return __this_cpu_read(cpu_tsc_khz); } -static inline u64 nsec_to_cycles(u64 nsec) +static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) { u64 ret; @@ -985,7 +985,7 @@ static inline u64 nsec_to_cycles(u64 nsec) if (kvm_tsc_changes_freq()) printk_once(KERN_WARNING "kvm: unreliable cycle conversion on adjustable rate TSC\n"); - ret = nsec * __this_cpu_read(cpu_tsc_khz); + ret = nsec * vcpu_tsc_khz(vcpu); do_div(ret, USEC_PER_SEC); return ret; } @@ -1015,7 +1015,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) s64 sdiff; raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); - offset = data - native_read_tsc(); + offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); ns = get_kernel_ns(); elapsed = ns - kvm->arch.last_tsc_nsec; sdiff = data - kvm->arch.last_tsc_write; @@ -1031,13 +1031,13 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) * In that case, for a reliable TSC, we can match TSC offsets, * or make a best guest using elapsed value. */ - if (sdiff < nsec_to_cycles(5ULL * NSEC_PER_SEC) && + if (sdiff < nsec_to_cycles(vcpu, 5ULL * NSEC_PER_SEC) && elapsed < 5ULL * NSEC_PER_SEC) { if (!check_tsc_unstable()) { offset = kvm->arch.last_tsc_offset; pr_debug("kvm: matched tsc offset for %llu\n", data); } else { - u64 delta = nsec_to_cycles(elapsed); + u64 delta = nsec_to_cycles(vcpu, elapsed); offset += delta; pr_debug("kvm: adjusted tsc offset by %llu\n", delta); } -- cgit v1.1 From 92a1f12d2598f429bd8639e21d89305e787115c5 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 25 Mar 2011 09:44:51 +0100 Subject: KVM: X86: Implement userspace interface to set virtual_tsc_khz This patch implements two new vm-ioctls to get and set the virtual_tsc_khz if the machine supports tsc-scaling. Setting the tsc-frequency is only possible before userspace creates any vcpu. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 7 +++++++ arch/x86/kvm/svm.c | 20 ++++++++++++++++++++ arch/x86/kvm/x86.c | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index da0a8ce..bd57639 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -655,6 +655,13 @@ u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); extern bool tdp_enabled; +/* control of guest tsc rate supported? */ +extern bool kvm_has_tsc_control; +/* minimum supported tsc_khz for guests */ +extern u32 kvm_min_guest_tsc_khz; +/* maximum supported tsc_khz for guests */ +extern u32 kvm_max_guest_tsc_khz; + enum emulation_result { EMULATE_DONE, /* no further processing */ EMULATE_DO_MMIO, /* kvm_run filled with mmio request */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8c4549b..a9887376 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -64,6 +64,8 @@ MODULE_LICENSE("GPL"); #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) #define TSC_RATIO_RSVD 0xffffff0000000000ULL +#define TSC_RATIO_MIN 0x0000000000000001ULL +#define TSC_RATIO_MAX 0x000000ffffffffffULL static bool erratum_383_found __read_mostly; @@ -189,6 +191,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm); static int nested_svm_vmexit(struct vcpu_svm *svm); static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, bool has_error_code, u32 error_code); +static u64 __scale_tsc(u64 ratio, u64 tsc); enum { VMCB_INTERCEPTS, /* Intercept vectors, TSC offset, @@ -798,6 +801,23 @@ static __init int svm_hardware_setup(void) if (boot_cpu_has(X86_FEATURE_FXSR_OPT)) kvm_enable_efer_bits(EFER_FFXSR); + if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { + u64 max; + + kvm_has_tsc_control = true; + + /* + * Make sure the user can only configure tsc_khz values that + * fit into a signed integer. + * A min value is not calculated needed because it will always + * be 1 on all machines and a value of 0 is used to disable + * tsc-scaling for the vcpu. + */ + max = min(0x7fffffffULL, __scale_tsc(tsc_khz, TSC_RATIO_MAX)); + + kvm_max_guest_tsc_khz = max; + } + if (nested) { printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 579ce34..1d5a7f4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -87,6 +87,11 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops); int ignore_msrs = 0; module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); +bool kvm_has_tsc_control; +EXPORT_SYMBOL_GPL(kvm_has_tsc_control); +u32 kvm_max_guest_tsc_khz; +EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz); + #define KVM_NR_SHARED_MSRS 16 struct kvm_shared_msrs_global { @@ -1986,6 +1991,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_X86_ROBUST_SINGLESTEP: case KVM_CAP_XSAVE: case KVM_CAP_ASYNC_PF: + case KVM_CAP_GET_TSC_KHZ: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -2012,6 +2018,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_XCRS: r = cpu_has_xsave; break; + case KVM_CAP_TSC_CONTROL: + r = kvm_has_tsc_control; + break; default: r = 0; break; @@ -3045,6 +3054,32 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs); break; } + case KVM_SET_TSC_KHZ: { + u32 user_tsc_khz; + + r = -EINVAL; + if (!kvm_has_tsc_control) + break; + + user_tsc_khz = (u32)arg; + + if (user_tsc_khz >= kvm_max_guest_tsc_khz) + goto out; + + kvm_x86_ops->set_tsc_khz(vcpu, user_tsc_khz); + + r = 0; + goto out; + } + case KVM_GET_TSC_KHZ: { + r = -EIO; + if (check_tsc_unstable()) + goto out; + + r = vcpu_tsc_khz(vcpu); + + goto out; + } default: r = -EINVAL; } -- cgit v1.1 From e3e9ed3d2c443fd90a04fb7ff231ad53ef087417 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 6 Apr 2011 12:30:03 +0200 Subject: KVM: SVM: Fix fault-rip on vmsave/vmload emulation When the emulation of vmload or vmsave fails because the guest passed an unsupported physical address it gets an #GP with rip pointing to the instruction after vmsave/vmload. This is a bug and fixed by this patch. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a9887376..a6bf2ad 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2518,13 +2518,13 @@ static int vmload_interception(struct vcpu_svm *svm) if (nested_svm_check_permissions(svm)) return 1; - svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; - skip_emulated_instruction(&svm->vcpu); - nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); if (!nested_vmcb) return 1; + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; + skip_emulated_instruction(&svm->vcpu); + nested_svm_vmloadsave(nested_vmcb, svm->vmcb); nested_svm_unmap(page); @@ -2539,13 +2539,13 @@ static int vmsave_interception(struct vcpu_svm *svm) if (nested_svm_check_permissions(svm)) return 1; - svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; - skip_emulated_instruction(&svm->vcpu); - nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); if (!nested_vmcb) return 1; + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; + skip_emulated_instruction(&svm->vcpu); + nested_svm_vmloadsave(svm->vmcb, nested_vmcb); nested_svm_unmap(page); -- cgit v1.1 From 3ca3ac4dae5da5af375a9e80d2316ccfa7f0c6ab Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 31 Mar 2011 16:52:26 +0200 Subject: KVM: x86 emulator: Add helpers for memory access using segmented addresses Will help later adding proper segment checks. Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 75 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 52 insertions(+), 23 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 50bffb9..8c38f6c 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -540,6 +540,15 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt) return emulate_exception(ctxt, NM_VECTOR, 0, false); } +static int segmented_read_std(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + void *data, + unsigned size) +{ + return ctxt->ops->read_std(linear(ctxt, addr), data, size, ctxt->vcpu, + &ctxt->exception); +} + static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, unsigned long eip, u8 *dest) @@ -604,13 +613,11 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, if (op_bytes == 2) op_bytes = 3; *address = 0; - rc = ops->read_std(linear(ctxt, addr), (unsigned long *)size, 2, - ctxt->vcpu, &ctxt->exception); + rc = segmented_read_std(ctxt, addr, size, 2); if (rc != X86EMUL_CONTINUE) return rc; addr.ea += 2; - rc = ops->read_std(linear(ctxt, addr), address, op_bytes, - ctxt->vcpu, &ctxt->exception); + rc = segmented_read_std(ctxt, addr, address, op_bytes); return rc; } @@ -950,6 +957,32 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt, return X86EMUL_CONTINUE; } +static int segmented_read(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + void *data, + unsigned size) +{ + return read_emulated(ctxt, ctxt->ops, linear(ctxt, addr), data, size); +} + +static int segmented_write(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + const void *data, + unsigned size) +{ + return ctxt->ops->write_emulated(linear(ctxt, addr), data, size, + &ctxt->exception, ctxt->vcpu); +} + +static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + const void *orig_data, const void *data, + unsigned size) +{ + return ctxt->ops->cmpxchg_emulated(linear(ctxt, addr), orig_data, data, + size, &ctxt->exception, ctxt->vcpu); +} + static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, unsigned int size, unsigned short port, @@ -1197,20 +1230,16 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, break; case OP_MEM: if (c->lock_prefix) - rc = ops->cmpxchg_emulated( - linear(ctxt, c->dst.addr.mem), - &c->dst.orig_val, - &c->dst.val, - c->dst.bytes, - &ctxt->exception, - ctxt->vcpu); + rc = segmented_cmpxchg(ctxt, + c->dst.addr.mem, + &c->dst.orig_val, + &c->dst.val, + c->dst.bytes); else - rc = ops->write_emulated( - linear(ctxt, c->dst.addr.mem), - &c->dst.val, - c->dst.bytes, - &ctxt->exception, - ctxt->vcpu); + rc = segmented_write(ctxt, + c->dst.addr.mem, + &c->dst.val, + c->dst.bytes); if (rc != X86EMUL_CONTINUE) return rc; break; @@ -1249,7 +1278,7 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt, addr.ea = register_address(c, c->regs[VCPU_REGS_RSP]); addr.seg = VCPU_SREG_SS; - rc = read_emulated(ctxt, ops, linear(ctxt, addr), dest, len); + rc = segmented_read(ctxt, addr, dest, len); if (rc != X86EMUL_CONTINUE) return rc; @@ -3440,16 +3469,16 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) } if ((c->src.type == OP_MEM) && !(c->d & NoAccess)) { - rc = read_emulated(ctxt, ops, linear(ctxt, c->src.addr.mem), - c->src.valptr, c->src.bytes); + rc = segmented_read(ctxt, c->src.addr.mem, + c->src.valptr, c->src.bytes); if (rc != X86EMUL_CONTINUE) goto done; c->src.orig_val64 = c->src.val64; } if (c->src2.type == OP_MEM) { - rc = read_emulated(ctxt, ops, linear(ctxt, c->src2.addr.mem), - &c->src2.val, c->src2.bytes); + rc = segmented_read(ctxt, c->src2.addr.mem, + &c->src2.val, c->src2.bytes); if (rc != X86EMUL_CONTINUE) goto done; } @@ -3460,7 +3489,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { /* optimisation - avoid slow emulated read if Mov */ - rc = read_emulated(ctxt, ops, linear(ctxt, c->dst.addr.mem), + rc = segmented_read(ctxt, c->dst.addr.mem, &c->dst.val, c->dst.bytes); if (rc != X86EMUL_CONTINUE) goto done; -- cgit v1.1 From 38503911b32186240301bbe81601cfabb37e752e Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 31 Mar 2011 18:48:09 +0200 Subject: KVM: x86 emulator: move invlpg emulation into a function It's going to get more complicated soon. Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 8c38f6c..c522b4e 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2477,6 +2477,15 @@ static int em_movdqu(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int em_invlpg(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + emulate_invlpg(ctxt->vcpu, linear(ctxt, c->src.addr.mem)); + /* Disable writeback. */ + c->dst.type = OP_NONE; + return X86EMUL_CONTINUE; +} + static bool valid_cr(int nr) { switch (nr) { @@ -3966,10 +3975,7 @@ twobyte_insn: rc = X86EMUL_PROPAGATE_FAULT; goto done; case 7: /* invlpg*/ - emulate_invlpg(ctxt->vcpu, - linear(ctxt, c->src.addr.mem)); - /* Disable writeback. */ - c->dst.type = OP_NONE; + rc = em_invlpg(ctxt); break; default: goto cannot_emulate; -- cgit v1.1 From 9fa088f4d24f045d91c37a5e55f0d2be2ef387ad Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 31 Mar 2011 18:54:30 +0200 Subject: KVM: x86 emulator: change address linearization to return an error code Preparing to add segment checks. Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 47 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index c522b4e..b46fa37 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -489,8 +489,9 @@ static unsigned seg_override(struct x86_emulate_ctxt *ctxt, return c->seg_override; } -static ulong linear(struct x86_emulate_ctxt *ctxt, - struct segmented_address addr) +static int linearize(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + ulong *linear) { struct decode_cache *c = &ctxt->decode; ulong la; @@ -498,7 +499,8 @@ static ulong linear(struct x86_emulate_ctxt *ctxt, la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea; if (c->ad_bytes != 8) la &= (u32)-1; - return la; + *linear = la; + return X86EMUL_CONTINUE; } static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, @@ -545,7 +547,13 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt, void *data, unsigned size) { - return ctxt->ops->read_std(linear(ctxt, addr), data, size, ctxt->vcpu, + int rc; + ulong linear; + + rc = linearize(ctxt, addr, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; + return ctxt->ops->read_std(linear, data, size, ctxt->vcpu, &ctxt->exception); } @@ -962,7 +970,13 @@ static int segmented_read(struct x86_emulate_ctxt *ctxt, void *data, unsigned size) { - return read_emulated(ctxt, ctxt->ops, linear(ctxt, addr), data, size); + int rc; + ulong linear; + + rc = linearize(ctxt, addr, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; + return read_emulated(ctxt, ctxt->ops, linear, data, size); } static int segmented_write(struct x86_emulate_ctxt *ctxt, @@ -970,7 +984,13 @@ static int segmented_write(struct x86_emulate_ctxt *ctxt, const void *data, unsigned size) { - return ctxt->ops->write_emulated(linear(ctxt, addr), data, size, + int rc; + ulong linear; + + rc = linearize(ctxt, addr, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; + return ctxt->ops->write_emulated(linear, data, size, &ctxt->exception, ctxt->vcpu); } @@ -979,7 +999,13 @@ static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt, const void *orig_data, const void *data, unsigned size) { - return ctxt->ops->cmpxchg_emulated(linear(ctxt, addr), orig_data, data, + int rc; + ulong linear; + + rc = linearize(ctxt, addr, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; + return ctxt->ops->cmpxchg_emulated(linear, orig_data, data, size, &ctxt->exception, ctxt->vcpu); } @@ -2480,7 +2506,12 @@ static int em_movdqu(struct x86_emulate_ctxt *ctxt) static int em_invlpg(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; - emulate_invlpg(ctxt->vcpu, linear(ctxt, c->src.addr.mem)); + int rc; + ulong linear; + + rc = linearize(ctxt, c->src.addr.mem, &linear); + if (rc == X86EMUL_CONTINUE) + emulate_invlpg(ctxt->vcpu, linear); /* Disable writeback. */ c->dst.type = OP_NONE; return X86EMUL_CONTINUE; -- cgit v1.1 From 83b8795a29c53a5f9f202933818128aa54c3e8d2 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 3 Apr 2011 11:31:19 +0300 Subject: KVM: x86 emulator: pass access size and read/write intent to linearize() Needed for segment read/write checks. Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index b46fa37..a2d343c 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -491,6 +491,7 @@ static unsigned seg_override(struct x86_emulate_ctxt *ctxt, static int linearize(struct x86_emulate_ctxt *ctxt, struct segmented_address addr, + unsigned size, bool write, ulong *linear) { struct decode_cache *c = &ctxt->decode; @@ -550,7 +551,7 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt, int rc; ulong linear; - rc = linearize(ctxt, addr, &linear); + rc = linearize(ctxt, addr, size, false, &linear); if (rc != X86EMUL_CONTINUE) return rc; return ctxt->ops->read_std(linear, data, size, ctxt->vcpu, @@ -973,7 +974,7 @@ static int segmented_read(struct x86_emulate_ctxt *ctxt, int rc; ulong linear; - rc = linearize(ctxt, addr, &linear); + rc = linearize(ctxt, addr, size, false, &linear); if (rc != X86EMUL_CONTINUE) return rc; return read_emulated(ctxt, ctxt->ops, linear, data, size); @@ -987,7 +988,7 @@ static int segmented_write(struct x86_emulate_ctxt *ctxt, int rc; ulong linear; - rc = linearize(ctxt, addr, &linear); + rc = linearize(ctxt, addr, size, true, &linear); if (rc != X86EMUL_CONTINUE) return rc; return ctxt->ops->write_emulated(linear, data, size, @@ -1002,7 +1003,7 @@ static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt, int rc; ulong linear; - rc = linearize(ctxt, addr, &linear); + rc = linearize(ctxt, addr, size, true, &linear); if (rc != X86EMUL_CONTINUE) return rc; return ctxt->ops->cmpxchg_emulated(linear, orig_data, data, @@ -2509,7 +2510,7 @@ static int em_invlpg(struct x86_emulate_ctxt *ctxt) int rc; ulong linear; - rc = linearize(ctxt, c->src.addr.mem, &linear); + rc = linearize(ctxt, c->src.addr.mem, 1, false, &linear); if (rc == X86EMUL_CONTINUE) emulate_invlpg(ctxt->vcpu, linear); /* Disable writeback. */ -- cgit v1.1 From 52fd8b445f5e8572526e3f84c753079470152414 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 3 Apr 2011 12:33:12 +0300 Subject: KVM: x86 emulator: move linearize() downwards So it can call emulate_gp() without forward declarations. Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a2d343c..601a9bc 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -489,21 +489,6 @@ static unsigned seg_override(struct x86_emulate_ctxt *ctxt, return c->seg_override; } -static int linearize(struct x86_emulate_ctxt *ctxt, - struct segmented_address addr, - unsigned size, bool write, - ulong *linear) -{ - struct decode_cache *c = &ctxt->decode; - ulong la; - - la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea; - if (c->ad_bytes != 8) - la &= (u32)-1; - *linear = la; - return X86EMUL_CONTINUE; -} - static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, u32 error, bool valid) { @@ -543,6 +528,21 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt) return emulate_exception(ctxt, NM_VECTOR, 0, false); } +static int linearize(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + unsigned size, bool write, + ulong *linear) +{ + struct decode_cache *c = &ctxt->decode; + ulong la; + + la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea; + if (c->ad_bytes != 8) + la &= (u32)-1; + *linear = la; + return X86EMUL_CONTINUE; +} + static int segmented_read_std(struct x86_emulate_ctxt *ctxt, struct segmented_address addr, void *data, -- cgit v1.1 From 56697687da592d0429c0c3ab80ee7e9d20a3b6e5 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 3 Apr 2011 14:08:51 +0300 Subject: KVM: x86 emulator: move desc_limit_scaled() For reuse later. Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 601a9bc..793aff5 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -464,6 +464,13 @@ static inline void jmp_rel(struct decode_cache *c, int rel) register_address_increment(c, &c->eip, rel); } +static u32 desc_limit_scaled(struct desc_struct *desc) +{ + u32 limit = get_desc_limit(desc); + + return desc->g ? (limit << 12) | 0xfff : limit; +} + static void set_seg_override(struct decode_cache *c, int seg) { c->has_seg_override = true; @@ -1040,13 +1047,6 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, return 1; } -static u32 desc_limit_scaled(struct desc_struct *desc) -{ - u32 limit = get_desc_limit(desc); - - return desc->g ? (limit << 12) | 0xfff : limit; -} - static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, u16 selector, struct desc_ptr *dt) -- cgit v1.1 From 618ff15de19109af126b33d90d7eaec27e61c691 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 3 Apr 2011 12:32:09 +0300 Subject: KVM: x86 emulator: implement segment permission checks Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 793aff5..2ec69bc 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -515,6 +515,11 @@ static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err) return emulate_exception(ctxt, GP_VECTOR, err, true); } +static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err) +{ + return emulate_exception(ctxt, SS_VECTOR, err, true); +} + static int emulate_ud(struct x86_emulate_ctxt *ctxt) { return emulate_exception(ctxt, UD_VECTOR, 0, false); @@ -541,13 +546,71 @@ static int linearize(struct x86_emulate_ctxt *ctxt, ulong *linear) { struct decode_cache *c = &ctxt->decode; + struct desc_struct desc; + bool usable; ulong la; + u32 lim; + unsigned cpl, rpl; la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea; + switch (ctxt->mode) { + case X86EMUL_MODE_REAL: + break; + case X86EMUL_MODE_PROT64: + if (((signed long)la << 16) >> 16 != la) + return emulate_gp(ctxt, 0); + break; + default: + usable = ctxt->ops->get_cached_descriptor(&desc, NULL, addr.seg, + ctxt->vcpu); + if (!usable) + goto bad; + /* code segment or read-only data segment */ + if (((desc.type & 8) || !(desc.type & 2)) && write) + goto bad; + /* unreadable code segment */ + if ((desc.type & 8) && !(desc.type & 2)) + goto bad; + lim = desc_limit_scaled(&desc); + if ((desc.type & 8) || !(desc.type & 4)) { + /* expand-up segment */ + if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) + goto bad; + } else { + /* exapand-down segment */ + if (addr.ea <= lim || (u32)(addr.ea + size - 1) <= lim) + goto bad; + lim = desc.d ? 0xffffffff : 0xffff; + if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) + goto bad; + } + cpl = ctxt->ops->cpl(ctxt->vcpu); + rpl = ctxt->ops->get_segment_selector(addr.seg, ctxt->vcpu) & 3; + cpl = max(cpl, rpl); + if (!(desc.type & 8)) { + /* data segment */ + if (cpl > desc.dpl) + goto bad; + } else if ((desc.type & 8) && !(desc.type & 4)) { + /* nonconforming code segment */ + if (cpl != desc.dpl) + goto bad; + } else if ((desc.type & 8) && (desc.type & 4)) { + /* conforming code segment */ + if (cpl < desc.dpl) + goto bad; + } + break; + } if (c->ad_bytes != 8) la &= (u32)-1; *linear = la; return X86EMUL_CONTINUE; +bad: + if (addr.seg == VCPU_SREG_SS) + return emulate_ss(ctxt, addr.seg); + else + return emulate_gp(ctxt, addr.seg); } static int segmented_read_std(struct x86_emulate_ctxt *ctxt, -- cgit v1.1 From 7ae441eac521b2006c9f03c4f2a23582c07fd76d Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 31 Mar 2011 12:06:41 +0200 Subject: KVM: emulator: do not needlesly sync registers from emulator ctxt to vcpu Currently we sync registers back and forth before/after exiting to userspace for IO, but during IO device model shouldn't need to read/write the registers, so we can as well skip those sync points. The only exaception is broken vmware backdor interface. The new code sync registers content during IO only if registers are read from/written to by userspace in the middle of the IO operation and this almost never happens in practise. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/x86.c | 47 +++++++++++++++++++++++++++++++++-------- 2 files changed, 40 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index bd57639..3e03f37 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -385,6 +385,8 @@ struct kvm_vcpu_arch { /* emulate context */ struct x86_emulate_ctxt emulate_ctxt; + bool emulate_regs_need_sync_to_vcpu; + bool emulate_regs_need_sync_from_vcpu; gpa_t time; struct pvclock_vcpu_time_info hv_clock; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1d5a7f4..3416a34 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4420,6 +4420,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) vcpu->arch.emulate_ctxt.guest_mode = is_guest_mode(vcpu); memset(c, 0, sizeof(struct decode_cache)); memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); + vcpu->arch.emulate_regs_need_sync_from_vcpu = false; } int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq) @@ -4502,6 +4503,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, { int r; struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; + bool writeback = true; kvm_clear_exception_queue(vcpu); vcpu->arch.mmio_fault_cr2 = cr2; @@ -4542,9 +4544,12 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, return EMULATE_DONE; } - /* this is needed for vmware backdor interface to work since it + /* this is needed for vmware backdoor interface to work since it changes registers values during IO operation */ - memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); + if (vcpu->arch.emulate_regs_need_sync_from_vcpu) { + vcpu->arch.emulate_regs_need_sync_from_vcpu = false; + memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); + } restart: r = x86_emulate_insn(&vcpu->arch.emulate_ctxt); @@ -4565,19 +4570,28 @@ restart: } else if (vcpu->arch.pio.count) { if (!vcpu->arch.pio.in) vcpu->arch.pio.count = 0; + else + writeback = false; r = EMULATE_DO_MMIO; - } else if (vcpu->mmio_needed) + } else if (vcpu->mmio_needed) { + if (!vcpu->mmio_is_write) + writeback = false; r = EMULATE_DO_MMIO; - else if (r == EMULATION_RESTART) + } else if (r == EMULATION_RESTART) goto restart; else r = EMULATE_DONE; - toggle_interruptibility(vcpu, vcpu->arch.emulate_ctxt.interruptibility); - kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); - kvm_make_request(KVM_REQ_EVENT, vcpu); - memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); - kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); + if (writeback) { + toggle_interruptibility(vcpu, + vcpu->arch.emulate_ctxt.interruptibility); + kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); + kvm_make_request(KVM_REQ_EVENT, vcpu); + memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); + vcpu->arch.emulate_regs_need_sync_to_vcpu = false; + kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); + } else + vcpu->arch.emulate_regs_need_sync_to_vcpu = true; return r; } @@ -5587,6 +5601,18 @@ out: int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { + if (vcpu->arch.emulate_regs_need_sync_to_vcpu) { + /* + * We are here if userspace calls get_regs() in the middle of + * instruction emulation. Registers state needs to be copied + * back from emulation context to vcpu. Usrapace shouldn't do + * that usually, but some bad designed PV devices (vmware + * backdoor interface) need this to work + */ + struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; + memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); + vcpu->arch.emulate_regs_need_sync_to_vcpu = false; + } regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX); regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX); @@ -5614,6 +5640,9 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { + vcpu->arch.emulate_regs_need_sync_from_vcpu = true; + vcpu->arch.emulate_regs_need_sync_to_vcpu = false; + kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax); kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx); kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx); -- cgit v1.1 From be6d05cfdf1f2ddbdc367a6433d8eac49d6bfe6f Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 13 Apr 2011 01:27:55 +0200 Subject: KVM: VMX: Ensure that vmx_create_vcpu always returns proper error In case certain allocations fail, vmx_create_vcpu may return 0 as error instead of a negative value encoded via ERR_PTR. This causes a NULL pointer dereferencing later on in kvm_vm_ioctl_vcpu_create. Reported-by: Sasha Levin Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index aabe333..af52069 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4251,8 +4251,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) goto free_vcpu; vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); + err = -ENOMEM; if (!vmx->guest_msrs) { - err = -ENOMEM; goto uninit_vcpu; } @@ -4271,7 +4271,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (err) goto free_vmcs; if (vm_need_virtualize_apic_accesses(kvm)) - if (alloc_apic_access_page(kvm) != 0) + err = alloc_apic_access_page(kvm); + if (err) goto free_vmcs; if (enable_ept) { -- cgit v1.1 From 575e7c1417d41dd72ddf2a49965f833ce9352e92 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Wed, 13 Apr 2011 00:24:55 +0900 Subject: KVM: x86 emulator: Disable writeback for CMP emulation This stops "CMP r/m, reg" to write back the data into memory. Pointed out by Avi. The writeback suppression now covers CMP, CMPS, SCAS. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 2ec69bc..1e0e3f8 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3671,6 +3671,7 @@ special_insn: break; case 0x38 ... 0x3d: cmp: /* cmp */ + c->dst.type = OP_NONE; /* Disable writeback. */ emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); break; case 0x40 ... 0x47: /* inc r16/r32 */ @@ -3797,7 +3798,6 @@ special_insn: rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes); break; case 0xa6 ... 0xa7: /* cmps */ - c->dst.type = OP_NONE; /* Disable writeback. */ goto cmp; case 0xa8 ... 0xa9: /* test ax, imm */ goto test; -- cgit v1.1 From 4179bb02fd3e87183e5f698495dfcb80df187889 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Wed, 13 Apr 2011 00:29:09 +0900 Subject: KVM: x86 emulator: Make emulate_push() store the value directly PUSH emulation stores the value by calling writeback() after setting the dst operand appropriately in emulate_push(). This writeback() using dst is not needed at all because we know the target is the stack. So this patch makes emulate_push() call, newly introduced, segmented_write() directly. By this, many inlined writeback()'s are removed. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 87 ++++++++++++++++++++------------------------------ 1 file changed, 34 insertions(+), 53 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 1e0e3f8..4f4d9bc 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1345,17 +1345,19 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, return X86EMUL_CONTINUE; } -static inline void emulate_push(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int emulate_push(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops) { struct decode_cache *c = &ctxt->decode; + struct segmented_address addr; - c->dst.type = OP_MEM; - c->dst.bytes = c->op_bytes; - c->dst.val = c->src.val; register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); - c->dst.addr.mem.ea = register_address(c, c->regs[VCPU_REGS_RSP]); - c->dst.addr.mem.seg = VCPU_SREG_SS; + addr.ea = register_address(c, c->regs[VCPU_REGS_RSP]); + addr.seg = VCPU_SREG_SS; + + /* Disable writeback. */ + c->dst.type = OP_NONE; + return segmented_write(ctxt, addr, &c->src.val, c->op_bytes); } static int emulate_pop(struct x86_emulate_ctxt *ctxt, @@ -1417,14 +1419,14 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, return rc; } -static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, int seg) +static int emulate_push_sreg(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops, int seg) { struct decode_cache *c = &ctxt->decode; c->src.val = ops->get_segment_selector(seg, ctxt->vcpu); - emulate_push(ctxt, ops); + return emulate_push(ctxt, ops); } static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, @@ -1454,18 +1456,13 @@ static int emulate_pusha(struct x86_emulate_ctxt *ctxt, (reg == VCPU_REGS_RSP) ? (c->src.val = old_esp) : (c->src.val = c->regs[reg]); - emulate_push(ctxt, ops); - - rc = writeback(ctxt, ops); + rc = emulate_push(ctxt, ops); if (rc != X86EMUL_CONTINUE) return rc; ++reg; } - /* Disable writeback. */ - c->dst.type = OP_NONE; - return rc; } @@ -1503,27 +1500,22 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt, /* TODO: Add limit checks */ c->src.val = ctxt->eflags; - emulate_push(ctxt, ops); - rc = writeback(ctxt, ops); + rc = emulate_push(ctxt, ops); if (rc != X86EMUL_CONTINUE) return rc; ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC); c->src.val = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); - emulate_push(ctxt, ops); - rc = writeback(ctxt, ops); + rc = emulate_push(ctxt, ops); if (rc != X86EMUL_CONTINUE) return rc; c->src.val = c->eip; - emulate_push(ctxt, ops); - rc = writeback(ctxt, ops); + rc = emulate_push(ctxt, ops); if (rc != X86EMUL_CONTINUE) return rc; - c->dst.type = OP_NONE; - ops->get_idt(&dt, ctxt->vcpu); eip_addr = dt.address + (irq << 2); @@ -1713,6 +1705,7 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) { struct decode_cache *c = &ctxt->decode; + int rc = X86EMUL_CONTINUE; switch (c->modrm_reg) { case 0: /* inc */ @@ -1726,17 +1719,17 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, old_eip = c->eip; c->eip = c->src.val; c->src.val = old_eip; - emulate_push(ctxt, ops); + rc = emulate_push(ctxt, ops); break; } case 4: /* jmp abs */ c->eip = c->src.val; break; case 6: /* push */ - emulate_push(ctxt, ops); + rc = emulate_push(ctxt, ops); break; } - return X86EMUL_CONTINUE; + return rc; } static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, @@ -2380,7 +2373,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2; c->lock_prefix = 0; c->src.val = (unsigned long) error_code; - emulate_push(ctxt, ops); + ret = emulate_push(ctxt, ops); } return ret; @@ -2400,11 +2393,8 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason, has_error_code, error_code); - if (rc == X86EMUL_CONTINUE) { - rc = writeback(ctxt, ops); - if (rc == X86EMUL_CONTINUE) - ctxt->eip = c->eip; - } + if (rc == X86EMUL_CONTINUE) + ctxt->eip = c->eip; return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; } @@ -2422,8 +2412,7 @@ static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg, static int em_push(struct x86_emulate_ctxt *ctxt) { - emulate_push(ctxt, ctxt->ops); - return X86EMUL_CONTINUE; + return emulate_push(ctxt, ctxt->ops); } static int em_das(struct x86_emulate_ctxt *ctxt) @@ -2483,20 +2472,12 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) memcpy(&c->eip, c->src.valptr, c->op_bytes); c->src.val = old_cs; - emulate_push(ctxt, ctxt->ops); - rc = writeback(ctxt, ctxt->ops); + rc = emulate_push(ctxt, ctxt->ops); if (rc != X86EMUL_CONTINUE) return rc; c->src.val = old_eip; - emulate_push(ctxt, ctxt->ops); - rc = writeback(ctxt, ctxt->ops); - if (rc != X86EMUL_CONTINUE) - return rc; - - c->dst.type = OP_NONE; - - return X86EMUL_CONTINUE; + return emulate_push(ctxt, ctxt->ops); } static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) @@ -3625,7 +3606,7 @@ special_insn: emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); break; case 0x06: /* push es */ - emulate_push_sreg(ctxt, ops, VCPU_SREG_ES); + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_ES); break; case 0x07: /* pop es */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); @@ -3635,14 +3616,14 @@ special_insn: emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); break; case 0x0e: /* push cs */ - emulate_push_sreg(ctxt, ops, VCPU_SREG_CS); + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_CS); break; case 0x10 ... 0x15: adc: /* adc */ emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); break; case 0x16: /* push ss */ - emulate_push_sreg(ctxt, ops, VCPU_SREG_SS); + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_SS); break; case 0x17: /* pop ss */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); @@ -3652,7 +3633,7 @@ special_insn: emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); break; case 0x1e: /* push ds */ - emulate_push_sreg(ctxt, ops, VCPU_SREG_DS); + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_DS); break; case 0x1f: /* pop ds */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); @@ -3789,7 +3770,7 @@ special_insn: break; case 0x9c: /* pushf */ c->src.val = (unsigned long) ctxt->eflags; - emulate_push(ctxt, ops); + rc = emulate_push(ctxt, ops); break; case 0x9d: /* popf */ c->dst.type = OP_REG; @@ -3864,7 +3845,7 @@ special_insn: long int rel = c->src.val; c->src.val = (unsigned long) c->eip; jmp_rel(c, rel); - emulate_push(ctxt, ops); + rc = emulate_push(ctxt, ops); break; } case 0xe9: /* jmp rel */ @@ -4157,7 +4138,7 @@ twobyte_insn: c->dst.val = test_cc(c->b, ctxt->eflags); break; case 0xa0: /* push fs */ - emulate_push_sreg(ctxt, ops, VCPU_SREG_FS); + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_FS); break; case 0xa1: /* pop fs */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); @@ -4174,7 +4155,7 @@ twobyte_insn: emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); break; case 0xa8: /* push gs */ - emulate_push_sreg(ctxt, ops, VCPU_SREG_GS); + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_GS); break; case 0xa9: /* pop gs */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); -- cgit v1.1 From 4487b3b48d8fa3f6a5dd4155c9e34d5e998ad7fe Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Wed, 13 Apr 2011 00:31:23 +0900 Subject: KVM: x86 emulator: Use em_push() instead of emulate_push() em_push() is a simple wrapper of emulate_push(). So this patch replaces emulate_push() with em_push() and removes the unnecessary former. In addition, the unused ops arguments are removed from emulate_pusha() and emulate_grp45(). Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 42 +++++++++++++++++------------------------- 1 file changed, 17 insertions(+), 25 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 4f4d9bc..cb2efa4 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1345,8 +1345,7 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, return X86EMUL_CONTINUE; } -static int emulate_push(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int em_push(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; struct segmented_address addr; @@ -1426,7 +1425,7 @@ static int emulate_push_sreg(struct x86_emulate_ctxt *ctxt, c->src.val = ops->get_segment_selector(seg, ctxt->vcpu); - return emulate_push(ctxt, ops); + return em_push(ctxt); } static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, @@ -1444,8 +1443,7 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, return rc; } -static int emulate_pusha(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int emulate_pusha(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; unsigned long old_esp = c->regs[VCPU_REGS_RSP]; @@ -1456,7 +1454,7 @@ static int emulate_pusha(struct x86_emulate_ctxt *ctxt, (reg == VCPU_REGS_RSP) ? (c->src.val = old_esp) : (c->src.val = c->regs[reg]); - rc = emulate_push(ctxt, ops); + rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) return rc; @@ -1500,19 +1498,19 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt, /* TODO: Add limit checks */ c->src.val = ctxt->eflags; - rc = emulate_push(ctxt, ops); + rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) return rc; ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC); c->src.val = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); - rc = emulate_push(ctxt, ops); + rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) return rc; c->src.val = c->eip; - rc = emulate_push(ctxt, ops); + rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) return rc; @@ -1701,8 +1699,7 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, return X86EMUL_CONTINUE; } -static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int emulate_grp45(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; int rc = X86EMUL_CONTINUE; @@ -1719,14 +1716,14 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, old_eip = c->eip; c->eip = c->src.val; c->src.val = old_eip; - rc = emulate_push(ctxt, ops); + rc = em_push(ctxt); break; } case 4: /* jmp abs */ c->eip = c->src.val; break; case 6: /* push */ - rc = emulate_push(ctxt, ops); + rc = em_push(ctxt); break; } return rc; @@ -2373,7 +2370,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2; c->lock_prefix = 0; c->src.val = (unsigned long) error_code; - ret = emulate_push(ctxt, ops); + ret = em_push(ctxt); } return ret; @@ -2410,11 +2407,6 @@ static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg, op->addr.mem.seg = seg; } -static int em_push(struct x86_emulate_ctxt *ctxt) -{ - return emulate_push(ctxt, ctxt->ops); -} - static int em_das(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; @@ -2472,12 +2464,12 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) memcpy(&c->eip, c->src.valptr, c->op_bytes); c->src.val = old_cs; - rc = emulate_push(ctxt, ctxt->ops); + rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) return rc; c->src.val = old_eip; - return emulate_push(ctxt, ctxt->ops); + return em_push(ctxt); } static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) @@ -3666,7 +3658,7 @@ special_insn: rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes); break; case 0x60: /* pusha */ - rc = emulate_pusha(ctxt, ops); + rc = emulate_pusha(ctxt); break; case 0x61: /* popa */ rc = emulate_popa(ctxt, ops); @@ -3770,7 +3762,7 @@ special_insn: break; case 0x9c: /* pushf */ c->src.val = (unsigned long) ctxt->eflags; - rc = emulate_push(ctxt, ops); + rc = em_push(ctxt); break; case 0x9d: /* popf */ c->dst.type = OP_REG; @@ -3845,7 +3837,7 @@ special_insn: long int rel = c->src.val; c->src.val = (unsigned long) c->eip; jmp_rel(c, rel); - rc = emulate_push(ctxt, ops); + rc = em_push(ctxt); break; } case 0xe9: /* jmp rel */ @@ -3923,7 +3915,7 @@ special_insn: break; case 0xfe: /* Grp4 */ grp45: - rc = emulate_grp45(ctxt, ops); + rc = emulate_grp45(ctxt); break; case 0xff: /* Grp5 */ if (c->modrm_reg == 5) -- cgit v1.1 From 71f9833bb1cba9939245f3e57388d87d69f8f399 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Wed, 13 Apr 2011 09:12:54 -0500 Subject: KVM: fix push of wrong eip when doing softint When doing a soft int, we need to bump eip before pushing it to the stack. Otherwise we'll do the int a second time. [apw@canonical.com: merged eip update as per Jan's recommendation.] Signed-off-by: Serge E. Hallyn Signed-off-by: Andy Whitcroft Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 12 +++++++++--- arch/x86/kvm/x86.c | 5 +++-- arch/x86/kvm/x86.h | 2 +- 3 files changed, 13 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index af52069..3f6e9bf 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1063,7 +1063,10 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, } if (vmx->rmode.vm86_active) { - if (kvm_inject_realmode_interrupt(vcpu, nr) != EMULATE_DONE) + int inc_eip = 0; + if (kvm_exception_is_soft(nr)) + inc_eip = vcpu->arch.event_exit_inst_len; + if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE) kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); return; } @@ -2942,7 +2945,10 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu) ++vcpu->stat.irq_injections; if (vmx->rmode.vm86_active) { - if (kvm_inject_realmode_interrupt(vcpu, irq) != EMULATE_DONE) + int inc_eip = 0; + if (vcpu->arch.interrupt.soft) + inc_eip = vcpu->arch.event_exit_inst_len; + if (kvm_inject_realmode_interrupt(vcpu, irq, inc_eip) != EMULATE_DONE) kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); return; } @@ -2977,7 +2983,7 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) ++vcpu->stat.nmi_injections; vmx->nmi_known_unmasked = false; if (vmx->rmode.vm86_active) { - if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR) != EMULATE_DONE) + if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE) kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); return; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3416a34..b05e3fc 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4423,7 +4423,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) vcpu->arch.emulate_regs_need_sync_from_vcpu = false; } -int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq) +int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) { struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; int ret; @@ -4432,7 +4432,8 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq) vcpu->arch.emulate_ctxt.decode.op_bytes = 2; vcpu->arch.emulate_ctxt.decode.ad_bytes = 2; - vcpu->arch.emulate_ctxt.decode.eip = vcpu->arch.emulate_ctxt.eip; + vcpu->arch.emulate_ctxt.decode.eip = vcpu->arch.emulate_ctxt.eip + + inc_eip; ret = emulate_int_real(&vcpu->arch.emulate_ctxt, &emulate_ops, irq); if (ret != X86EMUL_CONTINUE) diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index c600da8..e407ed3 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -77,7 +77,7 @@ static inline u32 bit(int bitno) void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); -int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq); +int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data); -- cgit v1.1 From 1e015968dfb9289934ad66a67fa38a93f0adf4f8 Mon Sep 17 00:00:00 2001 From: Duan Jiong Date: Mon, 11 Apr 2011 12:56:01 +0800 Subject: KVM: remove useless function declarations from file arch/x86/kvm/irq.h Just remove useless function define kvm_pic_clear_isr_ack() and pit_has_pending_timer() Signed-off-by: Duan Jiong Signed-off-by: Avi Kivity --- arch/x86/kvm/irq.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index ba910d1..53e2d08 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -75,7 +75,6 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm); void kvm_destroy_pic(struct kvm *kvm); int kvm_pic_read_irq(struct kvm *kvm); void kvm_pic_update_irq(struct kvm_pic *s); -void kvm_pic_clear_isr_ack(struct kvm *kvm); static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) { @@ -100,7 +99,6 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu); void __kvm_migrate_timers(struct kvm_vcpu *vcpu); -int pit_has_pending_timer(struct kvm_vcpu *vcpu); int apic_has_pending_timer(struct kvm_vcpu *vcpu); #endif -- cgit v1.1 From 49704f26586ca87fcab4fe9323fff8db41e78910 Mon Sep 17 00:00:00 2001 From: Duan Jiong Date: Mon, 11 Apr 2011 12:44:06 +0800 Subject: KVM: remove useless function declaration kvm_inject_pit_timer_irqs() Just remove useless function define kvm_inject_pit_timer_irqs() from file arch/x86/kvm/i8254.h Signed-off-by:Duan Jiong Signed-off-by: Avi Kivity --- arch/x86/kvm/i8254.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index b681a9f..51a9742 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h @@ -50,7 +50,6 @@ struct kvm_pit { #define KVM_MAX_PIT_INTR_INTERVAL HZ / 100 #define KVM_PIT_CHANNEL_MASK 0x3 -void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu); void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start); struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags); void kvm_free_pit(struct kvm *kvm); -- cgit v1.1 From 0521e4c0bc73aa86152ee4e4bd03724c8a9e1d6b Mon Sep 17 00:00:00 2001 From: Nelson Elhage Date: Wed, 13 Apr 2011 11:44:13 -0400 Subject: KVM: x86 emulator: Handle wraparound in (cs_base + offset) when fetching insns Currently, setting a large (i.e. negative) base address for %cs does not work on a 64-bit host. The "JOS" teaching operating system, used by MIT and other universities, relies on such segments while bootstrapping its way to full virtual memory management. Signed-off-by: Nelson Elhage Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index cb2efa4..4c5ff22 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -637,9 +637,12 @@ static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, int size, cur_size; if (eip == fc->end) { + unsigned long linear = eip + ctxt->cs_base; + if (ctxt->mode != X86EMUL_MODE_PROT64) + linear &= (u32)-1; cur_size = fc->end - fc->start; size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip)); - rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size, + rc = ops->fetch(linear, fc->data + cur_size, size, ctxt->vcpu, &ctxt->exception); if (rc != X86EMUL_CONTINUE) return rc; -- cgit v1.1 From 977b2d03e42e9ea9355d4baddb464810579719bd Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 18 Apr 2011 11:42:52 +0200 Subject: KVM: SVM: Fix nested sel_cr0 intercept path with decode-assists This patch fixes a bug in the nested-svm path when decode-assists is available on the machine. After a selective-cr0 intercept is detected the rip is advanced unconditionally. This causes the l1-guest to continue running with an l2-rip. This bug was with the sel_cr0 unit-test on decode-assists capable hardware. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a6bf2ad..de4bba9 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2799,6 +2799,9 @@ static int cr_interception(struct vcpu_svm *svm) case 0: if (!check_selective_cr0_intercepted(svm, val)) err = kvm_set_cr0(&svm->vcpu, val); + else + return 1; + break; case 3: err = kvm_set_cr3(&svm->vcpu, val); -- cgit v1.1 From 7c4c0f4fd5c3e82234c0ab61c7e7ffdb8f3af07b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 18 Apr 2011 11:42:53 +0200 Subject: KVM: X86: Update last_guest_tsc in vcpu_put The last_guest_tsc is used in vcpu_load to adjust the tsc_offset since tsc-scaling is merged. So the last_guest_tsc needs to be updated in vcpu_put instead of the the last_host_tsc. This is fixed with this patch. Reported-by: Jan Kiszka Tested-by: Jan Kiszka Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 - arch/x86/kvm/x86.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3e03f37..e50bffc 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -393,7 +393,6 @@ struct kvm_vcpu_arch { unsigned int hw_tsc_khz; unsigned int time_offset; struct page *time_page; - u64 last_host_tsc; u64 last_guest_tsc; u64 last_kernel_ns; u64 last_tsc_nsec; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b05e3fc..6aa1377 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2146,7 +2146,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { kvm_x86_ops->vcpu_put(vcpu); kvm_put_guest_fpu(vcpu); - vcpu->arch.last_host_tsc = native_read_tsc(); + kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc); } static int is_efer_nx(void) -- cgit v1.1 From 3d9b938eefb7d91a1ae13e425931bd5ac103b762 Mon Sep 17 00:00:00 2001 From: Nelson Elhage Date: Mon, 18 Apr 2011 12:05:53 -0400 Subject: KVM: emulator: Use linearize() when fetching instructions Since segments need to be handled slightly differently when fetching instructions, we add a __linearize helper that accepts a new 'fetch' boolean. [avi: fix oops caused by wrong segmented_address initialization order] Signed-off-by: Nelson Elhage Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 1 - arch/x86/kvm/emulate.c | 26 ++++++++++++++++++-------- 2 files changed, 18 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 0818448..9b760c8 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -265,7 +265,6 @@ struct x86_emulate_ctxt { unsigned long eip; /* eip before instruction emulation */ /* Emulated execution mode, represented by an X86EMUL_MODE value. */ int mode; - u32 cs_base; /* interruptibility state, as a result of execution of STI or MOV SS */ int interruptibility; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 4c5ff22..e1f77de 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -540,9 +540,9 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt) return emulate_exception(ctxt, NM_VECTOR, 0, false); } -static int linearize(struct x86_emulate_ctxt *ctxt, +static int __linearize(struct x86_emulate_ctxt *ctxt, struct segmented_address addr, - unsigned size, bool write, + unsigned size, bool write, bool fetch, ulong *linear) { struct decode_cache *c = &ctxt->decode; @@ -569,7 +569,7 @@ static int linearize(struct x86_emulate_ctxt *ctxt, if (((desc.type & 8) || !(desc.type & 2)) && write) goto bad; /* unreadable code segment */ - if ((desc.type & 8) && !(desc.type & 2)) + if (!fetch && (desc.type & 8) && !(desc.type & 2)) goto bad; lim = desc_limit_scaled(&desc); if ((desc.type & 8) || !(desc.type & 4)) { @@ -602,7 +602,7 @@ static int linearize(struct x86_emulate_ctxt *ctxt, } break; } - if (c->ad_bytes != 8) + if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : c->ad_bytes != 8) la &= (u32)-1; *linear = la; return X86EMUL_CONTINUE; @@ -613,6 +613,15 @@ bad: return emulate_gp(ctxt, addr.seg); } +static int linearize(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + unsigned size, bool write, + ulong *linear) +{ + return __linearize(ctxt, addr, size, write, false, linear); +} + + static int segmented_read_std(struct x86_emulate_ctxt *ctxt, struct segmented_address addr, void *data, @@ -637,11 +646,13 @@ static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, int size, cur_size; if (eip == fc->end) { - unsigned long linear = eip + ctxt->cs_base; - if (ctxt->mode != X86EMUL_MODE_PROT64) - linear &= (u32)-1; + unsigned long linear; + struct segmented_address addr = { .seg=VCPU_SREG_CS, .ea=eip}; cur_size = fc->end - fc->start; size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip)); + rc = __linearize(ctxt, addr, size, false, true, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; rc = ops->fetch(linear, fc->data + cur_size, size, ctxt->vcpu, &ctxt->exception); if (rc != X86EMUL_CONTINUE) @@ -3127,7 +3138,6 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) c->fetch.end = c->fetch.start + insn_len; if (insn_len > 0) memcpy(c->fetch.data, insn, insn_len); - ctxt->cs_base = seg_base(ctxt, ops, VCPU_SREG_CS); switch (mode) { case X86EMUL_MODE_REAL: -- cgit v1.1 From 7295261cdd42e6d41666df38d1b613cdd9e95f46 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 13:12:27 +0300 Subject: KVM: x86 emulator: whitespace cleanups Clean up lines longer than 80 columns. No code changes. Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 96 ++++++++++++++++++++++++++++---------------------- 1 file changed, 54 insertions(+), 42 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index e1f77de..4a5b61f 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -262,42 +262,42 @@ struct gprefix { "w", "r", _LO32, "r", "", "r") /* Instruction has three operands and one operand is stored in ECX register */ -#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \ - do { \ - unsigned long _tmp; \ - _type _clv = (_cl).val; \ - _type _srcv = (_src).val; \ - _type _dstv = (_dst).val; \ - \ - __asm__ __volatile__ ( \ - _PRE_EFLAGS("0", "5", "2") \ - _op _suffix " %4,%1 \n" \ - _POST_EFLAGS("0", "5", "2") \ - : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \ - : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \ - ); \ - \ - (_cl).val = (unsigned long) _clv; \ - (_src).val = (unsigned long) _srcv; \ - (_dst).val = (unsigned long) _dstv; \ +#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \ + do { \ + unsigned long _tmp; \ + _type _clv = (_cl).val; \ + _type _srcv = (_src).val; \ + _type _dstv = (_dst).val; \ + \ + __asm__ __volatile__ ( \ + _PRE_EFLAGS("0", "5", "2") \ + _op _suffix " %4,%1 \n" \ + _POST_EFLAGS("0", "5", "2") \ + : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \ + : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \ + ); \ + \ + (_cl).val = (unsigned long) _clv; \ + (_src).val = (unsigned long) _srcv; \ + (_dst).val = (unsigned long) _dstv; \ } while (0) -#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \ - do { \ - switch ((_dst).bytes) { \ - case 2: \ - __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ - "w", unsigned short); \ - break; \ - case 4: \ - __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ - "l", unsigned int); \ - break; \ - case 8: \ - ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ - "q", unsigned long)); \ - break; \ - } \ +#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \ + do { \ + switch ((_dst).bytes) { \ + case 2: \ + __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ + "w", unsigned short); \ + break; \ + case 4: \ + __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ + "l", unsigned int); \ + break; \ + case 8: \ + ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ + "q", unsigned long)); \ + break; \ + } \ } while (0) #define __emulate_1op(_op, _dst, _eflags, _suffix) \ @@ -360,13 +360,25 @@ struct gprefix { } while (0) /* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */ -#define emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags) \ - do { \ - switch((_src).bytes) { \ - case 1: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "b"); break; \ - case 2: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "w"); break; \ - case 4: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "l"); break; \ - case 8: ON64(__emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "q")); break; \ +#define emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags) \ + do { \ + switch((_src).bytes) { \ + case 1: \ + __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ + _eflags, "b"); \ + break; \ + case 2: \ + __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ + _eflags, "w"); \ + break; \ + case 4: \ + __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ + _eflags, "l"); \ + break; \ + case 8: \ + ON64(__emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ + _eflags, "q")); \ + break; \ } \ } while (0) @@ -402,7 +414,7 @@ struct gprefix { (_type)_x; \ }) -#define insn_fetch_arr(_arr, _size, _eip) \ +#define insn_fetch_arr(_arr, _size, _eip) \ ({ rc = do_insn_fetch(ctxt, ops, (_eip), _arr, (_size)); \ if (rc != X86EMUL_CONTINUE) \ goto done; \ -- cgit v1.1 From 0f65dd70a442ff498da10cec0a599fbd9d2d6f9e Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 13:37:53 +0300 Subject: KVM: x86 emulator: drop vcpu argument from memory read/write callbacks Making the emulator caller agnostic. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 34 +++++++++++------------- arch/x86/kvm/emulate.c | 54 ++++++++++++++++++-------------------- arch/x86/kvm/x86.c | 54 ++++++++++++++++++++++++-------------- 3 files changed, 75 insertions(+), 67 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 9b760c8..b4d8467 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -92,8 +92,9 @@ struct x86_emulate_ops { * @val: [OUT] Value read from memory, zero-extended to 'u_long'. * @bytes: [IN ] Number of bytes to read from memory. */ - int (*read_std)(unsigned long addr, void *val, - unsigned int bytes, struct kvm_vcpu *vcpu, + int (*read_std)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, + unsigned int bytes, struct x86_exception *fault); /* @@ -103,8 +104,8 @@ struct x86_emulate_ops { * @val: [OUT] Value write to memory, zero-extended to 'u_long'. * @bytes: [IN ] Number of bytes to write to memory. */ - int (*write_std)(unsigned long addr, void *val, - unsigned int bytes, struct kvm_vcpu *vcpu, + int (*write_std)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, unsigned int bytes, struct x86_exception *fault); /* * fetch: Read bytes of standard (non-emulated/special) memory. @@ -113,8 +114,8 @@ struct x86_emulate_ops { * @val: [OUT] Value read from memory, zero-extended to 'u_long'. * @bytes: [IN ] Number of bytes to read from memory. */ - int (*fetch)(unsigned long addr, void *val, - unsigned int bytes, struct kvm_vcpu *vcpu, + int (*fetch)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, unsigned int bytes, struct x86_exception *fault); /* @@ -123,11 +124,9 @@ struct x86_emulate_ops { * @val: [OUT] Value read from memory, zero-extended to 'u_long'. * @bytes: [IN ] Number of bytes to read from memory. */ - int (*read_emulated)(unsigned long addr, - void *val, - unsigned int bytes, - struct x86_exception *fault, - struct kvm_vcpu *vcpu); + int (*read_emulated)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, unsigned int bytes, + struct x86_exception *fault); /* * write_emulated: Write bytes to emulated/special memory area. @@ -136,11 +135,10 @@ struct x86_emulate_ops { * required). * @bytes: [IN ] Number of bytes to write to memory. */ - int (*write_emulated)(unsigned long addr, - const void *val, + int (*write_emulated)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, const void *val, unsigned int bytes, - struct x86_exception *fault, - struct kvm_vcpu *vcpu); + struct x86_exception *fault); /* * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an @@ -150,12 +148,12 @@ struct x86_emulate_ops { * @new: [IN ] Value to write to @addr. * @bytes: [IN ] Number of bytes to access using CMPXCHG. */ - int (*cmpxchg_emulated)(unsigned long addr, + int (*cmpxchg_emulated)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, const void *old, const void *new, unsigned int bytes, - struct x86_exception *fault, - struct kvm_vcpu *vcpu); + struct x86_exception *fault); int (*pio_in_emulated)(int size, unsigned short port, void *val, unsigned int count, struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 4a5b61f..ff64b17 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -645,8 +645,7 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt, rc = linearize(ctxt, addr, size, false, &linear); if (rc != X86EMUL_CONTINUE) return rc; - return ctxt->ops->read_std(linear, data, size, ctxt->vcpu, - &ctxt->exception); + return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception); } static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, @@ -665,8 +664,8 @@ static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, rc = __linearize(ctxt, addr, size, false, true, &linear); if (rc != X86EMUL_CONTINUE) return rc; - rc = ops->fetch(linear, fc->data + cur_size, - size, ctxt->vcpu, &ctxt->exception); + rc = ops->fetch(ctxt, linear, fc->data + cur_size, + size, &ctxt->exception); if (rc != X86EMUL_CONTINUE) return rc; fc->end += size; @@ -1047,8 +1046,8 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt, if (mc->pos < mc->end) goto read_cached; - rc = ops->read_emulated(addr, mc->data + mc->end, n, - &ctxt->exception, ctxt->vcpu); + rc = ops->read_emulated(ctxt, addr, mc->data + mc->end, n, + &ctxt->exception); if (rc != X86EMUL_CONTINUE) return rc; mc->end += n; @@ -1087,8 +1086,8 @@ static int segmented_write(struct x86_emulate_ctxt *ctxt, rc = linearize(ctxt, addr, size, true, &linear); if (rc != X86EMUL_CONTINUE) return rc; - return ctxt->ops->write_emulated(linear, data, size, - &ctxt->exception, ctxt->vcpu); + return ctxt->ops->write_emulated(ctxt, linear, data, size, + &ctxt->exception); } static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt, @@ -1102,8 +1101,8 @@ static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt, rc = linearize(ctxt, addr, size, true, &linear); if (rc != X86EMUL_CONTINUE) return rc; - return ctxt->ops->cmpxchg_emulated(linear, orig_data, data, - size, &ctxt->exception, ctxt->vcpu); + return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data, + size, &ctxt->exception); } static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, @@ -1168,8 +1167,7 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, if (dt.size < index * 8 + 7) return emulate_gp(ctxt, selector & 0xfffc); addr = dt.address + index * 8; - ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, - &ctxt->exception); + ret = ops->read_std(ctxt, addr, desc, sizeof *desc, &ctxt->exception); return ret; } @@ -1190,8 +1188,7 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, return emulate_gp(ctxt, selector & 0xfffc); addr = dt.address + index * 8; - ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, - &ctxt->exception); + ret = ops->write_std(ctxt, addr, desc, sizeof *desc, &ctxt->exception); return ret; } @@ -1545,11 +1542,11 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt, eip_addr = dt.address + (irq << 2); cs_addr = dt.address + (irq << 2) + 2; - rc = ops->read_std(cs_addr, &cs, 2, ctxt->vcpu, &ctxt->exception); + rc = ops->read_std(ctxt, cs_addr, &cs, 2, &ctxt->exception); if (rc != X86EMUL_CONTINUE) return rc; - rc = ops->read_std(eip_addr, &eip, 2, ctxt->vcpu, &ctxt->exception); + rc = ops->read_std(ctxt, eip_addr, &eip, 2, &ctxt->exception); if (rc != X86EMUL_CONTINUE) return rc; @@ -2036,13 +2033,12 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, #ifdef CONFIG_X86_64 base |= ((u64)base3) << 32; #endif - r = ops->read_std(base + 102, &io_bitmap_ptr, 2, ctxt->vcpu, NULL); + r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL); if (r != X86EMUL_CONTINUE) return false; if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg)) return false; - r = ops->read_std(base + io_bitmap_ptr + port/8, &perm, 2, ctxt->vcpu, - NULL); + r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL); if (r != X86EMUL_CONTINUE) return false; if ((perm >> bit_idx) & mask) @@ -2150,7 +2146,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, int ret; u32 new_tss_base = get_desc_base(new_desc); - ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ @@ -2158,13 +2154,13 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, save_state_to_tss16(ctxt, ops, &tss_seg); - ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ return ret; - ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ @@ -2173,10 +2169,10 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, if (old_tss_sel != 0xffff) { tss_seg.prev_task_link = old_tss_sel; - ret = ops->write_std(new_tss_base, + ret = ops->write_std(ctxt, new_tss_base, &tss_seg.prev_task_link, sizeof tss_seg.prev_task_link, - ctxt->vcpu, &ctxt->exception); + &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ return ret; @@ -2282,7 +2278,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, int ret; u32 new_tss_base = get_desc_base(new_desc); - ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ @@ -2290,13 +2286,13 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, save_state_to_tss32(ctxt, ops, &tss_seg); - ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ return ret; - ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ @@ -2305,10 +2301,10 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, if (old_tss_sel != 0xffff) { tss_seg.prev_task_link = old_tss_sel; - ret = ops->write_std(new_tss_base, + ret = ops->write_std(ctxt, new_tss_base, &tss_seg.prev_task_link, sizeof tss_seg.prev_task_link, - ctxt->vcpu, &ctxt->exception); + &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ return ret; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6aa1377..274652a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -63,6 +63,9 @@ #define KVM_MAX_MCE_BANKS 32 #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P) +#define emul_to_vcpu(ctxt) \ + container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt) + /* EFER defaults: * - enable syscall per default because its emulated by KVM * - enable LME and LMA per default on 64 bit KVM @@ -3760,37 +3763,43 @@ out: } /* used for instruction fetching */ -static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes, - struct kvm_vcpu *vcpu, +static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt, + gva_t addr, void *val, unsigned int bytes, struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; + return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access | PFERR_FETCH_MASK, exception); } -static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, - struct kvm_vcpu *vcpu, +static int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt, + gva_t addr, void *val, unsigned int bytes, struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; + return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception); } -static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes, - struct kvm_vcpu *vcpu, +static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt, + gva_t addr, void *val, unsigned int bytes, struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception); } -static int kvm_write_guest_virt_system(gva_t addr, void *val, +static int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, + gva_t addr, void *val, unsigned int bytes, - struct kvm_vcpu *vcpu, struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); void *data = val; int r = X86EMUL_CONTINUE; @@ -3818,12 +3827,13 @@ out: return r; } -static int emulator_read_emulated(unsigned long addr, +static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, unsigned int bytes, - struct x86_exception *exception, - struct kvm_vcpu *vcpu) + struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); gpa_t gpa; int handled; @@ -3844,7 +3854,7 @@ static int emulator_read_emulated(unsigned long addr, if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) goto mmio; - if (kvm_read_guest_virt(addr, val, bytes, vcpu, exception) + if (kvm_read_guest_virt(ctxt, addr, val, bytes, exception) == X86EMUL_CONTINUE) return X86EMUL_CONTINUE; @@ -3933,12 +3943,14 @@ mmio: return X86EMUL_CONTINUE; } -int emulator_write_emulated(unsigned long addr, +int emulator_write_emulated(struct x86_emulate_ctxt *ctxt, + unsigned long addr, const void *val, unsigned int bytes, - struct x86_exception *exception, - struct kvm_vcpu *vcpu) + struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); + /* Crossing a page boundary? */ if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { int rc, now; @@ -3966,13 +3978,14 @@ int emulator_write_emulated(unsigned long addr, (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old)) #endif -static int emulator_cmpxchg_emulated(unsigned long addr, +static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, + unsigned long addr, const void *old, const void *new, unsigned int bytes, - struct x86_exception *exception, - struct kvm_vcpu *vcpu) + struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); gpa_t gpa; struct page *page; char *kaddr; @@ -4028,7 +4041,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, emul_write: printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); - return emulator_write_emulated(addr, new, bytes, exception, vcpu); + return emulator_write_emulated(ctxt, addr, new, bytes, exception); } static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) @@ -5009,7 +5022,8 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) kvm_x86_ops->patch_hypercall(vcpu, instruction); - return emulator_write_emulated(rip, instruction, 3, NULL, vcpu); + return emulator_write_emulated(&vcpu->arch.emulate_ctxt, + rip, instruction, 3, NULL); } void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) -- cgit v1.1 From ca1d4a9e772bde0a0b8cda61ee9fdca29f80f361 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 13:37:53 +0300 Subject: KVM: x86 emulator: drop vcpu argument from pio callbacks Making the emulator caller agnostic. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 10 ++++++---- arch/x86/kvm/emulate.c | 6 +++--- arch/x86/kvm/x86.c | 18 ++++++++++++------ 3 files changed, 21 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index b4d8467..1348bdf 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -155,11 +155,13 @@ struct x86_emulate_ops { unsigned int bytes, struct x86_exception *fault); - int (*pio_in_emulated)(int size, unsigned short port, void *val, - unsigned int count, struct kvm_vcpu *vcpu); + int (*pio_in_emulated)(struct x86_emulate_ctxt *ctxt, + int size, unsigned short port, void *val, + unsigned int count); - int (*pio_out_emulated)(int size, unsigned short port, const void *val, - unsigned int count, struct kvm_vcpu *vcpu); + int (*pio_out_emulated)(struct x86_emulate_ctxt *ctxt, + int size, unsigned short port, const void *val, + unsigned int count); bool (*get_cached_descriptor)(struct desc_struct *desc, u32 *base3, int seg, struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index ff64b17..8af08a1 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1125,7 +1125,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, if (n == 0) n = 1; rc->pos = rc->end = 0; - if (!ops->pio_in_emulated(size, port, rc->data, n, ctxt->vcpu)) + if (!ops->pio_in_emulated(ctxt, size, port, rc->data, n)) return 0; rc->end = n * size; } @@ -3892,8 +3892,8 @@ special_insn: case 0xef: /* out dx,(e/r)ax */ c->dst.val = c->regs[VCPU_REGS_RDX]; do_io_out: - ops->pio_out_emulated(c->src.bytes, c->dst.val, - &c->src.val, 1, ctxt->vcpu); + ops->pio_out_emulated(ctxt, c->src.bytes, c->dst.val, + &c->src.val, 1); c->dst.type = OP_NONE; /* Disable writeback. */ break; case 0xf4: /* hlt */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 274652a..e9040a9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4060,9 +4060,12 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) } -static int emulator_pio_in_emulated(int size, unsigned short port, void *val, - unsigned int count, struct kvm_vcpu *vcpu) +static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt, + int size, unsigned short port, void *val, + unsigned int count) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); + if (vcpu->arch.pio.count) goto data_avail; @@ -4090,10 +4093,12 @@ static int emulator_pio_in_emulated(int size, unsigned short port, void *val, return 0; } -static int emulator_pio_out_emulated(int size, unsigned short port, - const void *val, unsigned int count, - struct kvm_vcpu *vcpu) +static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt, + int size, unsigned short port, + const void *val, unsigned int count) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); + trace_kvm_pio(1, port, size, count); vcpu->arch.pio.port = port; @@ -4614,7 +4619,8 @@ EXPORT_SYMBOL_GPL(x86_emulate_instruction); int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) { unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); - int ret = emulator_pio_out_emulated(size, port, &val, 1, vcpu); + int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt, + size, port, &val, 1); /* do not return to emulator after return from userspace */ vcpu->arch.pio.count = 0; return ret; -- cgit v1.1 From 4bff1e86ad286d4b3a54902540abeeaf95e64db3 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 13:37:53 +0300 Subject: KVM: x86 emulator: drop vcpu argument from segment/gdt/idt callbacks Making the emulator caller agnostic. [Takuya Yoshikawa: fix typo leading to LDT failures] Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 22 +++++--- arch/x86/kvm/emulate.c | 112 ++++++++++++++++++------------------- arch/x86/kvm/x86.c | 39 +++++++------ 3 files changed, 90 insertions(+), 83 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 1348bdf..656046a 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -163,15 +163,19 @@ struct x86_emulate_ops { int size, unsigned short port, const void *val, unsigned int count); - bool (*get_cached_descriptor)(struct desc_struct *desc, u32 *base3, - int seg, struct kvm_vcpu *vcpu); - void (*set_cached_descriptor)(struct desc_struct *desc, u32 base3, - int seg, struct kvm_vcpu *vcpu); - u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu); - void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); - unsigned long (*get_cached_segment_base)(int seg, struct kvm_vcpu *vcpu); - void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); - void (*get_idt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); + bool (*get_cached_descriptor)(struct x86_emulate_ctxt *ctxt, + struct desc_struct *desc, u32 *base3, + int seg); + void (*set_cached_descriptor)(struct x86_emulate_ctxt *ctxt, + struct desc_struct *desc, u32 base3, + int seg); + u16 (*get_segment_selector)(struct x86_emulate_ctxt *ctxt, int seg); + void (*set_segment_selector)(struct x86_emulate_ctxt *ctxt, + u16 sel, int seg); + unsigned long (*get_cached_segment_base)(struct x86_emulate_ctxt *ctxt, + int seg); + void (*get_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); + void (*get_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu); int (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); int (*cpl)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 8af08a1..9602889 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -495,7 +495,7 @@ static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS) return 0; - return ops->get_cached_segment_base(seg, ctxt->vcpu); + return ops->get_cached_segment_base(ctxt, seg); } static unsigned seg_override(struct x86_emulate_ctxt *ctxt, @@ -573,8 +573,8 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, return emulate_gp(ctxt, 0); break; default: - usable = ctxt->ops->get_cached_descriptor(&desc, NULL, addr.seg, - ctxt->vcpu); + usable = ctxt->ops->get_cached_descriptor(ctxt, &desc, NULL, + addr.seg); if (!usable) goto bad; /* code segment or read-only data segment */ @@ -597,7 +597,7 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, goto bad; } cpl = ctxt->ops->cpl(ctxt->vcpu); - rpl = ctxt->ops->get_segment_selector(addr.seg, ctxt->vcpu) & 3; + rpl = ctxt->ops->get_segment_selector(ctxt, addr.seg) & 3; cpl = max(cpl, rpl); if (!(desc.type & 8)) { /* data segment */ @@ -1142,14 +1142,14 @@ static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, if (selector & 1 << 2) { struct desc_struct desc; memset (dt, 0, sizeof *dt); - if (!ops->get_cached_descriptor(&desc, NULL, VCPU_SREG_LDTR, - ctxt->vcpu)) + if (!ops->get_cached_descriptor(ctxt, &desc, NULL, + VCPU_SREG_LDTR)) return; dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ dt->address = get_desc_base(&desc); } else - ops->get_gdt(dt, ctxt->vcpu); + ops->get_gdt(ctxt, dt); } /* allowed just for 8 bytes segments */ @@ -1304,8 +1304,8 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, return ret; } load: - ops->set_segment_selector(selector, seg, ctxt->vcpu); - ops->set_cached_descriptor(&seg_desc, 0, seg, ctxt->vcpu); + ops->set_segment_selector(ctxt, selector, seg); + ops->set_cached_descriptor(ctxt, &seg_desc, 0, seg); return X86EMUL_CONTINUE; exception: emulate_exception(ctxt, err_vec, err_code, true); @@ -1446,7 +1446,7 @@ static int emulate_push_sreg(struct x86_emulate_ctxt *ctxt, { struct decode_cache *c = &ctxt->decode; - c->src.val = ops->get_segment_selector(seg, ctxt->vcpu); + c->src.val = ops->get_segment_selector(ctxt, seg); return em_push(ctxt); } @@ -1527,7 +1527,7 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt, ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC); - c->src.val = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); + c->src.val = ops->get_segment_selector(ctxt, VCPU_SREG_CS); rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) return rc; @@ -1537,7 +1537,7 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt, if (rc != X86EMUL_CONTINUE) return rc; - ops->get_idt(&dt, ctxt->vcpu); + ops->get_idt(ctxt, &dt); eip_addr = dt.address + (irq << 2); cs_addr = dt.address + (irq << 2) + 2; @@ -1814,7 +1814,7 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, struct desc_struct *ss) { memset(cs, 0, sizeof(struct desc_struct)); - ops->get_cached_descriptor(cs, NULL, VCPU_SREG_CS, ctxt->vcpu); + ops->get_cached_descriptor(ctxt, cs, NULL, VCPU_SREG_CS); memset(ss, 0, sizeof(struct desc_struct)); cs->l = 0; /* will be adjusted later */ @@ -1861,10 +1861,10 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) cs.d = 0; cs.l = 1; } - ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); - ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); - ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); - ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); + ops->set_cached_descriptor(ctxt, &cs, 0, VCPU_SREG_CS); + ops->set_segment_selector(ctxt, cs_sel, VCPU_SREG_CS); + ops->set_cached_descriptor(ctxt, &ss, 0, VCPU_SREG_SS); + ops->set_segment_selector(ctxt, ss_sel, VCPU_SREG_SS); c->regs[VCPU_REGS_RCX] = c->eip; if (is_long_mode(ctxt->vcpu)) { @@ -1933,10 +1933,10 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) cs.l = 1; } - ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); - ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); - ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); - ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); + ops->set_cached_descriptor(ctxt, &cs, 0, VCPU_SREG_CS); + ops->set_segment_selector(ctxt, cs_sel, VCPU_SREG_CS); + ops->set_cached_descriptor(ctxt, &ss, 0, VCPU_SREG_SS); + ops->set_segment_selector(ctxt, ss_sel, VCPU_SREG_SS); ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data); c->eip = msr_data; @@ -1990,10 +1990,10 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) cs_sel |= SELECTOR_RPL_MASK; ss_sel |= SELECTOR_RPL_MASK; - ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); - ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); - ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); - ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); + ops->set_cached_descriptor(ctxt, &cs, 0, VCPU_SREG_CS); + ops->set_segment_selector(ctxt, cs_sel, VCPU_SREG_CS); + ops->set_cached_descriptor(ctxt, &ss, 0, VCPU_SREG_SS); + ops->set_segment_selector(ctxt, ss_sel, VCPU_SREG_SS); c->eip = c->regs[VCPU_REGS_RDX]; c->regs[VCPU_REGS_RSP] = c->regs[VCPU_REGS_RCX]; @@ -2024,7 +2024,7 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, unsigned mask = (1 << len) - 1; unsigned long base; - ops->get_cached_descriptor(&tr_seg, &base3, VCPU_SREG_TR, ctxt->vcpu); + ops->get_cached_descriptor(ctxt, &tr_seg, &base3, VCPU_SREG_TR); if (!tr_seg.p) return false; if (desc_limit_scaled(&tr_seg) < 103) @@ -2079,11 +2079,11 @@ static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, tss->si = c->regs[VCPU_REGS_RSI]; tss->di = c->regs[VCPU_REGS_RDI]; - tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); - tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); - tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); - tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); - tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); + tss->es = ops->get_segment_selector(ctxt, VCPU_SREG_ES); + tss->cs = ops->get_segment_selector(ctxt, VCPU_SREG_CS); + tss->ss = ops->get_segment_selector(ctxt, VCPU_SREG_SS); + tss->ds = ops->get_segment_selector(ctxt, VCPU_SREG_DS); + tss->ldt = ops->get_segment_selector(ctxt, VCPU_SREG_LDTR); } static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, @@ -2108,11 +2108,11 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, * SDM says that segment selectors are loaded before segment * descriptors */ - ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu); - ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); - ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); - ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); - ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); + ops->set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR); + ops->set_segment_selector(ctxt, tss->es, VCPU_SREG_ES); + ops->set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS); + ops->set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS); + ops->set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); /* * Now load segment descriptors. If fault happenes at this stage @@ -2199,13 +2199,13 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, tss->esi = c->regs[VCPU_REGS_RSI]; tss->edi = c->regs[VCPU_REGS_RDI]; - tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); - tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); - tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); - tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); - tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu); - tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu); - tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); + tss->es = ops->get_segment_selector(ctxt, VCPU_SREG_ES); + tss->cs = ops->get_segment_selector(ctxt, VCPU_SREG_CS); + tss->ss = ops->get_segment_selector(ctxt, VCPU_SREG_SS); + tss->ds = ops->get_segment_selector(ctxt, VCPU_SREG_DS); + tss->fs = ops->get_segment_selector(ctxt, VCPU_SREG_FS); + tss->gs = ops->get_segment_selector(ctxt, VCPU_SREG_GS); + tss->ldt_selector = ops->get_segment_selector(ctxt, VCPU_SREG_LDTR); } static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, @@ -2232,13 +2232,13 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, * SDM says that segment selectors are loaded before segment * descriptors */ - ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu); - ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); - ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); - ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); - ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); - ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu); - ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu); + ops->set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR); + ops->set_segment_selector(ctxt, tss->es, VCPU_SREG_ES); + ops->set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS); + ops->set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS); + ops->set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); + ops->set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS); + ops->set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS); /* * Now load segment descriptors. If fault happenes at this stage @@ -2320,9 +2320,9 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, { struct desc_struct curr_tss_desc, next_tss_desc; int ret; - u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu); + u16 old_tss_sel = ops->get_segment_selector(ctxt, VCPU_SREG_TR); ulong old_tss_base = - ops->get_cached_segment_base(VCPU_SREG_TR, ctxt->vcpu); + ops->get_cached_segment_base(ctxt, VCPU_SREG_TR); u32 desc_limit; /* FIXME: old_tss_base == ~0 ? */ @@ -2383,8 +2383,8 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, } ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu); - ops->set_cached_descriptor(&next_tss_desc, 0, VCPU_SREG_TR, ctxt->vcpu); - ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu); + ops->set_cached_descriptor(ctxt, &next_tss_desc, 0, VCPU_SREG_TR); + ops->set_segment_selector(ctxt, tss_selector, VCPU_SREG_TR); if (has_error_code) { struct decode_cache *c = &ctxt->decode; @@ -2475,7 +2475,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) ulong old_eip; int rc; - old_cs = ctxt->ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); + old_cs = ctxt->ops->get_segment_selector(ctxt, VCPU_SREG_CS); old_eip = c->eip; memcpy(&sel, c->src.valptr + c->op_bytes, 2); @@ -3743,7 +3743,7 @@ special_insn: rc = emulate_ud(ctxt); goto done; } - c->dst.val = ops->get_segment_selector(c->modrm_reg, ctxt->vcpu); + c->dst.val = ops->get_segment_selector(ctxt, c->modrm_reg); break; case 0x8d: /* lea r16/r32, m */ c->dst.val = c->src.addr.mem.ea; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e9040a9..6a7fbf6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4237,28 +4237,29 @@ static int emulator_get_cpl(struct kvm_vcpu *vcpu) return kvm_x86_ops->get_cpl(vcpu); } -static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) +static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) { - kvm_x86_ops->get_gdt(vcpu, dt); + kvm_x86_ops->get_gdt(emul_to_vcpu(ctxt), dt); } -static void emulator_get_idt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) +static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) { - kvm_x86_ops->get_idt(vcpu, dt); + kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt); } -static unsigned long emulator_get_cached_segment_base(int seg, - struct kvm_vcpu *vcpu) +static unsigned long emulator_get_cached_segment_base( + struct x86_emulate_ctxt *ctxt, int seg) { - return get_segment_base(vcpu, seg); + return get_segment_base(emul_to_vcpu(ctxt), seg); } -static bool emulator_get_cached_descriptor(struct desc_struct *desc, u32 *base3, - int seg, struct kvm_vcpu *vcpu) +static bool emulator_get_cached_descriptor(struct x86_emulate_ctxt *ctxt, + struct desc_struct *desc, u32 *base3, + int seg) { struct kvm_segment var; - kvm_get_segment(vcpu, &var, seg); + kvm_get_segment(emul_to_vcpu(ctxt), &var, seg); if (var.unusable) return false; @@ -4283,9 +4284,11 @@ static bool emulator_get_cached_descriptor(struct desc_struct *desc, u32 *base3, return true; } -static void emulator_set_cached_descriptor(struct desc_struct *desc, u32 base3, - int seg, struct kvm_vcpu *vcpu) +static void emulator_set_cached_descriptor(struct x86_emulate_ctxt *ctxt, + struct desc_struct *desc, u32 base3, + int seg) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); struct kvm_segment var; /* needed to preserve selector */ @@ -4314,22 +4317,22 @@ static void emulator_set_cached_descriptor(struct desc_struct *desc, u32 base3, return; } -static u16 emulator_get_segment_selector(int seg, struct kvm_vcpu *vcpu) +static u16 emulator_get_segment_selector(struct x86_emulate_ctxt *ctxt, int seg) { struct kvm_segment kvm_seg; - kvm_get_segment(vcpu, &kvm_seg, seg); + kvm_get_segment(emul_to_vcpu(ctxt), &kvm_seg, seg); return kvm_seg.selector; } -static void emulator_set_segment_selector(u16 sel, int seg, - struct kvm_vcpu *vcpu) +static void emulator_set_segment_selector(struct x86_emulate_ctxt *ctxt, + u16 sel, int seg) { struct kvm_segment kvm_seg; - kvm_get_segment(vcpu, &kvm_seg, seg); + kvm_get_segment(emul_to_vcpu(ctxt), &kvm_seg, seg); kvm_seg.selector = sel; - kvm_set_segment(vcpu, &kvm_seg, seg); + kvm_set_segment(emul_to_vcpu(ctxt), &kvm_seg, seg); } static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt) -- cgit v1.1 From 717746e382e58f075642403eaac26bce0640b2c5 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 13:37:53 +0300 Subject: KVM: x86 emulator: drop vcpu argument from cr/dr/cpl/msr callbacks Making the emulator caller agnostic. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 14 +++---- arch/x86/kvm/emulate.c | 84 +++++++++++++++++++------------------- arch/x86/kvm/x86.c | 34 ++++++++++----- 3 files changed, 73 insertions(+), 59 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 656046a..2c02e75 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -176,13 +176,13 @@ struct x86_emulate_ops { int seg); void (*get_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); void (*get_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); - ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu); - int (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); - int (*cpl)(struct kvm_vcpu *vcpu); - int (*get_dr)(int dr, unsigned long *dest, struct kvm_vcpu *vcpu); - int (*set_dr)(int dr, unsigned long value, struct kvm_vcpu *vcpu); - int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); - int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); + ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr); + int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val); + int (*cpl)(struct x86_emulate_ctxt *ctxt); + int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest); + int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); + int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); + int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */ void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */ int (*intercept)(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 9602889..33ad16b 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -596,7 +596,7 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) goto bad; } - cpl = ctxt->ops->cpl(ctxt->vcpu); + cpl = ctxt->ops->cpl(ctxt); rpl = ctxt->ops->get_segment_selector(ctxt, addr.seg) & 3; cpl = max(cpl, rpl); if (!(desc.type & 8)) { @@ -1248,7 +1248,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, rpl = selector & 3; dpl = seg_desc.dpl; - cpl = ops->cpl(ctxt->vcpu); + cpl = ops->cpl(ctxt); switch (seg) { case VCPU_SREG_SS: @@ -1407,7 +1407,7 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, int rc; unsigned long val, change_mask; int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; - int cpl = ops->cpl(ctxt->vcpu); + int cpl = ops->cpl(ctxt); rc = emulate_pop(ctxt, ops, &val, len); if (rc != X86EMUL_CONTINUE) @@ -1852,7 +1852,7 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) setup_syscalls_segments(ctxt, ops, &cs, &ss); - ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); + ops->get_msr(ctxt, MSR_STAR, &msr_data); msr_data >>= 32; cs_sel = (u16)(msr_data & 0xfffc); ss_sel = (u16)(msr_data + 8); @@ -1871,17 +1871,17 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) #ifdef CONFIG_X86_64 c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF; - ops->get_msr(ctxt->vcpu, + ops->get_msr(ctxt, ctxt->mode == X86EMUL_MODE_PROT64 ? MSR_LSTAR : MSR_CSTAR, &msr_data); c->eip = msr_data; - ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data); + ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data); ctxt->eflags &= ~(msr_data | EFLG_RF); #endif } else { /* legacy mode */ - ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); + ops->get_msr(ctxt, MSR_STAR, &msr_data); c->eip = (u32)msr_data; ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); @@ -1910,7 +1910,7 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) setup_syscalls_segments(ctxt, ops, &cs, &ss); - ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); + ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); switch (ctxt->mode) { case X86EMUL_MODE_PROT32: if ((msr_data & 0xfffc) == 0x0) @@ -1938,10 +1938,10 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) ops->set_cached_descriptor(ctxt, &ss, 0, VCPU_SREG_SS); ops->set_segment_selector(ctxt, ss_sel, VCPU_SREG_SS); - ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data); + ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data); c->eip = msr_data; - ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data); + ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data); c->regs[VCPU_REGS_RSP] = msr_data; return X86EMUL_CONTINUE; @@ -1970,7 +1970,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) cs.dpl = 3; ss.dpl = 3; - ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); + ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); switch (usermode) { case X86EMUL_MODE_PROT32: cs_sel = (u16)(msr_data + 16); @@ -2010,7 +2010,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt, if (ctxt->mode == X86EMUL_MODE_VM86) return true; iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; - return ops->cpl(ctxt->vcpu) > iopl; + return ops->cpl(ctxt) > iopl; } static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, @@ -2187,7 +2187,7 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, { struct decode_cache *c = &ctxt->decode; - tss->cr3 = ops->get_cr(3, ctxt->vcpu); + tss->cr3 = ops->get_cr(ctxt, 3); tss->eip = c->eip; tss->eflags = ctxt->eflags; tss->eax = c->regs[VCPU_REGS_RAX]; @@ -2215,7 +2215,7 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, struct decode_cache *c = &ctxt->decode; int ret; - if (ops->set_cr(3, tss->cr3, ctxt->vcpu)) + if (ops->set_cr(ctxt, 3, tss->cr3)) return emulate_gp(ctxt, 0); c->eip = tss->eip; ctxt->eflags = tss->eflags | 2; @@ -2338,7 +2338,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, if (reason != TASK_SWITCH_IRET) { if ((tss_selector & 3) > next_tss_desc.dpl || - ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) + ops->cpl(ctxt) > next_tss_desc.dpl) return emulate_gp(ctxt, 0); } @@ -2382,7 +2382,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, &next_tss_desc); } - ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu); + ops->set_cr(ctxt, 0, ops->get_cr(ctxt, 0) | X86_CR0_TS); ops->set_cached_descriptor(ctxt, &next_tss_desc, 0, VCPU_SREG_TR); ops->set_segment_selector(ctxt, tss_selector, VCPU_SREG_TR); @@ -2542,7 +2542,7 @@ static int em_rdtsc(struct x86_emulate_ctxt *ctxt) struct decode_cache *c = &ctxt->decode; u64 tsc = 0; - ctxt->ops->get_msr(ctxt->vcpu, MSR_IA32_TSC, &tsc); + ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc); c->regs[VCPU_REGS_RAX] = (u32)tsc; c->regs[VCPU_REGS_RDX] = tsc >> 32; return X86EMUL_CONTINUE; @@ -2625,8 +2625,8 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt) ((new_val & X86_CR0_NW) && !(new_val & X86_CR0_CD))) return emulate_gp(ctxt, 0); - cr4 = ctxt->ops->get_cr(4, ctxt->vcpu); - ctxt->ops->get_msr(ctxt->vcpu, MSR_EFER, &efer); + cr4 = ctxt->ops->get_cr(ctxt, 4); + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); if ((new_val & X86_CR0_PG) && (efer & EFER_LME) && !(cr4 & X86_CR4_PAE)) @@ -2652,8 +2652,8 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt) case 4: { u64 cr4, efer; - cr4 = ctxt->ops->get_cr(4, ctxt->vcpu); - ctxt->ops->get_msr(ctxt->vcpu, MSR_EFER, &efer); + cr4 = ctxt->ops->get_cr(ctxt, 4); + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); if ((efer & EFER_LMA) && !(new_val & X86_CR4_PAE)) return emulate_gp(ctxt, 0); @@ -2669,7 +2669,7 @@ static int check_dr7_gd(struct x86_emulate_ctxt *ctxt) { unsigned long dr7; - ctxt->ops->get_dr(7, &dr7, ctxt->vcpu); + ctxt->ops->get_dr(ctxt, 7, &dr7); /* Check if DR7.Global_Enable is set */ return dr7 & (1 << 13); @@ -2684,7 +2684,7 @@ static int check_dr_read(struct x86_emulate_ctxt *ctxt) if (dr > 7) return emulate_ud(ctxt); - cr4 = ctxt->ops->get_cr(4, ctxt->vcpu); + cr4 = ctxt->ops->get_cr(ctxt, 4); if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5)) return emulate_ud(ctxt); @@ -2710,7 +2710,7 @@ static int check_svme(struct x86_emulate_ctxt *ctxt) { u64 efer; - ctxt->ops->get_msr(ctxt->vcpu, MSR_EFER, &efer); + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); if (!(efer & EFER_SVME)) return emulate_ud(ctxt); @@ -2731,9 +2731,9 @@ static int check_svme_pa(struct x86_emulate_ctxt *ctxt) static int check_rdtsc(struct x86_emulate_ctxt *ctxt) { - u64 cr4 = ctxt->ops->get_cr(4, ctxt->vcpu); + u64 cr4 = ctxt->ops->get_cr(ctxt, 4); - if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt->vcpu)) + if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt)) return emulate_ud(ctxt); return X86EMUL_CONTINUE; @@ -2741,10 +2741,10 @@ static int check_rdtsc(struct x86_emulate_ctxt *ctxt) static int check_rdpmc(struct x86_emulate_ctxt *ctxt) { - u64 cr4 = ctxt->ops->get_cr(4, ctxt->vcpu); + u64 cr4 = ctxt->ops->get_cr(ctxt, 4); u64 rcx = kvm_register_read(ctxt->vcpu, VCPU_REGS_RCX); - if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt->vcpu)) || + if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) || (rcx > 3)) return emulate_gp(ctxt, 0); @@ -3514,13 +3514,13 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) } if ((c->d & Sse) - && ((ops->get_cr(0, ctxt->vcpu) & X86_CR0_EM) - || !(ops->get_cr(4, ctxt->vcpu) & X86_CR4_OSFXSR))) { + && ((ops->get_cr(ctxt, 0) & X86_CR0_EM) + || !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) { rc = emulate_ud(ctxt); goto done; } - if ((c->d & Sse) && (ops->get_cr(0, ctxt->vcpu) & X86_CR0_TS)) { + if ((c->d & Sse) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) { rc = emulate_nm(ctxt); goto done; } @@ -3533,7 +3533,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) } /* Privileged instruction can be executed only in CPL=0 */ - if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { + if ((c->d & Priv) && ops->cpl(ctxt)) { rc = emulate_gp(ctxt, 0); goto done; } @@ -4052,11 +4052,11 @@ twobyte_insn: break; case 4: /* smsw */ c->dst.bytes = 2; - c->dst.val = ops->get_cr(0, ctxt->vcpu); + c->dst.val = ops->get_cr(ctxt, 0); break; case 6: /* lmsw */ - ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0eul) | - (c->src.val & 0x0f), ctxt->vcpu); + ops->set_cr(ctxt, 0, (ops->get_cr(ctxt, 0) & ~0x0eul) | + (c->src.val & 0x0f)); c->dst.type = OP_NONE; break; case 5: /* not defined */ @@ -4084,13 +4084,13 @@ twobyte_insn: case 0x18: /* Grp16 (prefetch/nop) */ break; case 0x20: /* mov cr, reg */ - c->dst.val = ops->get_cr(c->modrm_reg, ctxt->vcpu); + c->dst.val = ops->get_cr(ctxt, c->modrm_reg); break; case 0x21: /* mov from dr to reg */ - ops->get_dr(c->modrm_reg, &c->dst.val, ctxt->vcpu); + ops->get_dr(ctxt, c->modrm_reg, &c->dst.val); break; case 0x22: /* mov reg, cr */ - if (ops->set_cr(c->modrm_reg, c->src.val, ctxt->vcpu)) { + if (ops->set_cr(ctxt, c->modrm_reg, c->src.val)) { emulate_gp(ctxt, 0); rc = X86EMUL_PROPAGATE_FAULT; goto done; @@ -4098,9 +4098,9 @@ twobyte_insn: c->dst.type = OP_NONE; break; case 0x23: /* mov from reg to dr */ - if (ops->set_dr(c->modrm_reg, c->src.val & + if (ops->set_dr(ctxt, c->modrm_reg, c->src.val & ((ctxt->mode == X86EMUL_MODE_PROT64) ? - ~0ULL : ~0U), ctxt->vcpu) < 0) { + ~0ULL : ~0U)) < 0) { /* #UD condition is already handled by the code above */ emulate_gp(ctxt, 0); rc = X86EMUL_PROPAGATE_FAULT; @@ -4113,7 +4113,7 @@ twobyte_insn: /* wrmsr */ msr_data = (u32)c->regs[VCPU_REGS_RAX] | ((u64)c->regs[VCPU_REGS_RDX] << 32); - if (ops->set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) { + if (ops->set_msr(ctxt, c->regs[VCPU_REGS_RCX], msr_data)) { emulate_gp(ctxt, 0); rc = X86EMUL_PROPAGATE_FAULT; goto done; @@ -4122,7 +4122,7 @@ twobyte_insn: break; case 0x32: /* rdmsr */ - if (ops->get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) { + if (ops->get_msr(ctxt, c->regs[VCPU_REGS_RCX], &msr_data)) { emulate_gp(ctxt, 0); rc = X86EMUL_PROPAGATE_FAULT; goto done; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6a7fbf6..16373a5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4160,15 +4160,15 @@ int emulate_clts(struct kvm_vcpu *vcpu) return X86EMUL_CONTINUE; } -int emulator_get_dr(int dr, unsigned long *dest, struct kvm_vcpu *vcpu) +int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) { - return _kvm_get_dr(vcpu, dr, dest); + return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest); } -int emulator_set_dr(int dr, unsigned long value, struct kvm_vcpu *vcpu) +int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) { - return __kvm_set_dr(vcpu, dr, value); + return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value); } static u64 mk_cr_64(u64 curr_cr, u32 new_val) @@ -4176,8 +4176,9 @@ static u64 mk_cr_64(u64 curr_cr, u32 new_val) return (curr_cr & ~((1ULL << 32) - 1)) | new_val; } -static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) +static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); unsigned long value; switch (cr) { @@ -4204,8 +4205,9 @@ static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) return value; } -static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) +static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); int res = 0; switch (cr) { @@ -4232,9 +4234,9 @@ static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) return res; } -static int emulator_get_cpl(struct kvm_vcpu *vcpu) +static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt) { - return kvm_x86_ops->get_cpl(vcpu); + return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt)); } static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) @@ -4335,6 +4337,18 @@ static void emulator_set_segment_selector(struct x86_emulate_ctxt *ctxt, kvm_set_segment(emul_to_vcpu(ctxt), &kvm_seg, seg); } +static int emulator_get_msr(struct x86_emulate_ctxt *ctxt, + u32 msr_index, u64 *pdata) +{ + return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata); +} + +static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, + u32 msr_index, u64 data) +{ + return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data); +} + static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt) { preempt_disable(); @@ -4379,8 +4393,8 @@ static struct x86_emulate_ops emulate_ops = { .cpl = emulator_get_cpl, .get_dr = emulator_get_dr, .set_dr = emulator_set_dr, - .set_msr = kvm_set_msr, - .get_msr = kvm_get_msr, + .set_msr = emulator_set_msr, + .get_msr = emulator_get_msr, .get_fpu = emulator_get_fpu, .put_fpu = emulator_put_fpu, .intercept = emulator_intercept, -- cgit v1.1 From 2953538ebbd95b145bd3629126fe5af61b88be11 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 13:37:53 +0300 Subject: KVM: x86 emulator: drop vcpu argument from intercept callback Making the emulator caller agnostic. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 2 +- arch/x86/kvm/emulate.c | 2 +- arch/x86/kvm/x86.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 2c02e75..e2b082a 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -185,7 +185,7 @@ struct x86_emulate_ops { int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */ void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */ - int (*intercept)(struct kvm_vcpu *vcpu, + int (*intercept)(struct x86_emulate_ctxt *ctxt, struct x86_instruction_info *info, enum x86_intercept_stage stage); }; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 33ad16b..acb9fcc 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -438,7 +438,7 @@ static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt, .next_rip = ctxt->eip, }; - return ctxt->ops->intercept(ctxt->vcpu, &info, stage); + return ctxt->ops->intercept(ctxt, &info, stage); } static inline unsigned long ad_mask(struct decode_cache *c) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 16373a5..4f7248e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4365,11 +4365,11 @@ static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt) preempt_enable(); } -static int emulator_intercept(struct kvm_vcpu *vcpu, +static int emulator_intercept(struct x86_emulate_ctxt *ctxt, struct x86_instruction_info *info, enum x86_intercept_stage stage) { - return kvm_x86_ops->check_intercept(vcpu, info, stage); + return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage); } static struct x86_emulate_ops emulate_ops = { -- cgit v1.1 From fe870ab9ce1c3e64c6d6b6ee3fe53d0d029f1044 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 15:01:23 +0300 Subject: KVM: x86 emulator: avoid using ctxt->vcpu in check_perm() callbacks Unneeded for register access. Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index acb9fcc..0ff7d4b 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2720,7 +2720,7 @@ static int check_svme(struct x86_emulate_ctxt *ctxt) static int check_svme_pa(struct x86_emulate_ctxt *ctxt) { - u64 rax = kvm_register_read(ctxt->vcpu, VCPU_REGS_RAX); + u64 rax = ctxt->decode.regs[VCPU_REGS_RAX]; /* Valid physical address? */ if (rax & 0xffff000000000000) @@ -2742,7 +2742,7 @@ static int check_rdtsc(struct x86_emulate_ctxt *ctxt) static int check_rdpmc(struct x86_emulate_ctxt *ctxt) { u64 cr4 = ctxt->ops->get_cr(ctxt, 4); - u64 rcx = kvm_register_read(ctxt->vcpu, VCPU_REGS_RCX); + u64 rcx = ctxt->decode.regs[VCPU_REGS_RCX]; if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) || (rcx > 3)) -- cgit v1.1 From 1ac9d0cfb07e8ac3b5007d8279c5bd56e124250c Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 15:12:00 +0300 Subject: KVM: x86 emulator: add and use new callbacks set_idt(), set_gdt() Replacing direct calls to realmode_lgdt(), realmode_lidt(). Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 2 ++ arch/x86/include/asm/kvm_host.h | 3 --- arch/x86/kvm/emulate.c | 14 +++++++------- arch/x86/kvm/x86.c | 26 ++++++++++++-------------- 4 files changed, 21 insertions(+), 24 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index e2b082a..4d1546a 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -176,6 +176,8 @@ struct x86_emulate_ops { int seg); void (*get_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); void (*get_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); + void (*set_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); + void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr); int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val); int (*cpl)(struct x86_emulate_ctxt *ctxt); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e50bffc..a8616ca 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -681,9 +681,6 @@ static inline int emulate_instruction(struct kvm_vcpu *vcpu, return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); } -void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); -void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); - void kvm_enable_efer_bits(u64); int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 0ff7d4b..57e0e29 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3494,6 +3494,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) int rc = X86EMUL_CONTINUE; int saved_dst_type = c->dst.type; int irq; /* Used for int 3, int, and into */ + struct desc_ptr desc_ptr; ctxt->decode.mem_read.pos = 0; @@ -4005,9 +4006,6 @@ twobyte_insn: switch (c->b) { case 0x01: /* lgdt, lidt, lmsw */ switch (c->modrm_reg) { - u16 size; - unsigned long address; - case 0: /* vmcall */ if (c->modrm_mod != 3 || c->modrm_rm != 1) goto cannot_emulate; @@ -4023,10 +4021,11 @@ twobyte_insn: break; case 2: /* lgdt */ rc = read_descriptor(ctxt, ops, c->src.addr.mem, - &size, &address, c->op_bytes); + &desc_ptr.size, &desc_ptr.address, + c->op_bytes); if (rc != X86EMUL_CONTINUE) goto done; - realmode_lgdt(ctxt->vcpu, size, address); + ctxt->ops->set_gdt(ctxt, &desc_ptr); /* Disable writeback. */ c->dst.type = OP_NONE; break; @@ -4041,11 +4040,12 @@ twobyte_insn: } } else { rc = read_descriptor(ctxt, ops, c->src.addr.mem, - &size, &address, + &desc_ptr.size, + &desc_ptr.address, c->op_bytes); if (rc != X86EMUL_CONTINUE) goto done; - realmode_lidt(ctxt->vcpu, size, address); + ctxt->ops->set_idt(ctxt, &desc_ptr); } /* Disable writeback. */ c->dst.type = OP_NONE; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4f7248e..7cd3a3b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4249,6 +4249,16 @@ static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt); } +static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) +{ + kvm_x86_ops->set_gdt(emul_to_vcpu(ctxt), dt); +} + +static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) +{ + kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt); +} + static unsigned long emulator_get_cached_segment_base( struct x86_emulate_ctxt *ctxt, int seg) { @@ -4388,6 +4398,8 @@ static struct x86_emulate_ops emulate_ops = { .get_cached_segment_base = emulator_get_cached_segment_base, .get_gdt = emulator_get_gdt, .get_idt = emulator_get_idt, + .set_gdt = emulator_set_gdt, + .set_idt = emulator_set_idt, .get_cr = emulator_get_cr, .set_cr = emulator_set_cr, .cpl = emulator_get_cpl, @@ -5049,20 +5061,6 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) rip, instruction, 3, NULL); } -void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) -{ - struct desc_ptr dt = { limit, base }; - - kvm_x86_ops->set_gdt(vcpu, &dt); -} - -void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) -{ - struct desc_ptr dt = { limit, base }; - - kvm_x86_ops->set_idt(vcpu, &dt); -} - static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) { struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i]; -- cgit v1.1 From c2ad2bb3ef870067ecfc9ccdcf465feb51f2b6a5 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 15:21:35 +0300 Subject: KVM: x86 emulator: drop use of is_long_mode() Requires ctxt->vcpu, which is to be abolished. Replace with open calls to get_msr(). Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 57e0e29..be1532f 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1844,12 +1844,14 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) struct desc_struct cs, ss; u64 msr_data; u16 cs_sel, ss_sel; + u64 efer = 0; /* syscall is not available in real mode */ if (ctxt->mode == X86EMUL_MODE_REAL || ctxt->mode == X86EMUL_MODE_VM86) return emulate_ud(ctxt); + ops->get_msr(ctxt, MSR_EFER, &efer); setup_syscalls_segments(ctxt, ops, &cs, &ss); ops->get_msr(ctxt, MSR_STAR, &msr_data); @@ -1857,7 +1859,7 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) cs_sel = (u16)(msr_data & 0xfffc); ss_sel = (u16)(msr_data + 8); - if (is_long_mode(ctxt->vcpu)) { + if (efer & EFER_LMA) { cs.d = 0; cs.l = 1; } @@ -1867,7 +1869,7 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) ops->set_segment_selector(ctxt, ss_sel, VCPU_SREG_SS); c->regs[VCPU_REGS_RCX] = c->eip; - if (is_long_mode(ctxt->vcpu)) { + if (efer & EFER_LMA) { #ifdef CONFIG_X86_64 c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF; @@ -1897,7 +1899,9 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) struct desc_struct cs, ss; u64 msr_data; u16 cs_sel, ss_sel; + u64 efer = 0; + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); /* inject #GP if in real mode */ if (ctxt->mode == X86EMUL_MODE_REAL) return emulate_gp(ctxt, 0); @@ -1927,8 +1931,7 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) cs_sel &= ~SELECTOR_RPL_MASK; ss_sel = cs_sel + 8; ss_sel &= ~SELECTOR_RPL_MASK; - if (ctxt->mode == X86EMUL_MODE_PROT64 - || is_long_mode(ctxt->vcpu)) { + if (ctxt->mode == X86EMUL_MODE_PROT64 || (efer & EFER_LMA)) { cs.d = 0; cs.l = 1; } @@ -2603,6 +2606,7 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt) struct decode_cache *c = &ctxt->decode; u64 new_val = c->src.val64; int cr = c->modrm_reg; + u64 efer = 0; static u64 cr_reserved_bits[] = { 0xffffffff00000000ULL, @@ -2620,7 +2624,7 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt) switch (cr) { case 0: { - u64 cr4, efer; + u64 cr4; if (((new_val & X86_CR0_PG) && !(new_val & X86_CR0_PE)) || ((new_val & X86_CR0_NW) && !(new_val & X86_CR0_CD))) return emulate_gp(ctxt, 0); @@ -2637,7 +2641,8 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt) case 3: { u64 rsvd = 0; - if (is_long_mode(ctxt->vcpu)) + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); + if (efer & EFER_LMA) rsvd = CR3_L_MODE_RESERVED_BITS; else if (is_pae(ctxt->vcpu)) rsvd = CR3_PAE_RESERVED_BITS; @@ -2650,7 +2655,7 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt) break; } case 4: { - u64 cr4, efer; + u64 cr4; cr4 = ctxt->ops->get_cr(ctxt, 4); ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); -- cgit v1.1 From fd72c4192220d0086fb24356ac6ff9c3b1e067d9 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 15:24:32 +0300 Subject: KVM: x86 emulator: Replace calls to is_pae() and is_paging with ->get_cr() Avoid use of ctxt->vcpu. Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index be1532f..6a51253 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2644,9 +2644,9 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt) ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); if (efer & EFER_LMA) rsvd = CR3_L_MODE_RESERVED_BITS; - else if (is_pae(ctxt->vcpu)) + else if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_PAE) rsvd = CR3_PAE_RESERVED_BITS; - else if (is_paging(ctxt->vcpu)) + else if (ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PG) rsvd = CR3_NONPAE_RESERVED_BITS; if (new_val & rsvd) -- cgit v1.1 From 2d04a05bd7e93c13f13a82ac40de4065a99d069b Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 15:32:49 +0300 Subject: KVM: x86 emulator: emulate CLTS internally Avoid using ctxt->vcpu; we can do everything with ->get_cr() and ->set_cr(). A side effect is that we no longer activate the fpu on emulated CLTS; but that should be very rare. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 - arch/x86/kvm/emulate.c | 12 +++++++++++- arch/x86/kvm/x86.c | 7 ------- 3 files changed, 11 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a8616ca..9c3567e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -691,7 +691,6 @@ int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int kvm_emulate_halt(struct kvm_vcpu *vcpu); int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); -int emulate_clts(struct kvm_vcpu *vcpu); int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 6a51253..2b903a3 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2579,6 +2579,16 @@ static int em_invlpg(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int em_clts(struct x86_emulate_ctxt *ctxt) +{ + ulong cr0; + + cr0 = ctxt->ops->get_cr(ctxt, 0); + cr0 &= ~X86_CR0_TS; + ctxt->ops->set_cr(ctxt, 0, cr0); + return X86EMUL_CONTINUE; +} + static bool valid_cr(int nr) { switch (nr) { @@ -4079,7 +4089,7 @@ twobyte_insn: rc = emulate_syscall(ctxt, ops); break; case 0x06: - emulate_clts(ctxt->vcpu); + rc = em_clts(ctxt); break; case 0x09: /* wbinvd */ kvm_emulate_wbinvd(ctxt->vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7cd3a3b..a9e8386 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4153,13 +4153,6 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); -int emulate_clts(struct kvm_vcpu *vcpu) -{ - kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); - kvm_x86_ops->fpu_activate(vcpu); - return X86EMUL_CONTINUE; -} - int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) { return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest); -- cgit v1.1 From 3cb16fe78ce91991a876c74fc5dc99419b737b7a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 15:38:44 +0300 Subject: KVM: x86 emulator: make emulate_invlpg() an emulator callback Removing direct calls to KVM. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 1 + arch/x86/include/asm/kvm_host.h | 1 - arch/x86/kvm/emulate.c | 2 +- arch/x86/kvm/x86.c | 6 +++--- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 4d1546a..f890769 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -154,6 +154,7 @@ struct x86_emulate_ops { const void *new, unsigned int bytes, struct x86_exception *fault); + void (*invlpg)(struct x86_emulate_ctxt *ctxt, ulong addr); int (*pio_in_emulated)(struct x86_emulate_ctxt *ctxt, int size, unsigned short port, void *val, diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9c3567e..d957d0d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -690,7 +690,6 @@ struct x86_emulate_ctxt; int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int kvm_emulate_halt(struct kvm_vcpu *vcpu); -int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 2b903a3..5d774e9 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2573,7 +2573,7 @@ static int em_invlpg(struct x86_emulate_ctxt *ctxt) rc = linearize(ctxt, c->src.addr.mem, 1, false, &linear); if (rc == X86EMUL_CONTINUE) - emulate_invlpg(ctxt->vcpu, linear); + ctxt->ops->invlpg(ctxt, linear); /* Disable writeback. */ c->dst.type = OP_NONE; return X86EMUL_CONTINUE; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a9e8386..8af49b3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4128,10 +4128,9 @@ static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) return kvm_x86_ops->get_segment_base(vcpu, seg); } -int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) +static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address) { - kvm_mmu_invlpg(vcpu, address); - return X86EMUL_CONTINUE; + kvm_mmu_invlpg(emul_to_vcpu(ctxt), address); } int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) @@ -4382,6 +4381,7 @@ static struct x86_emulate_ops emulate_ops = { .read_emulated = emulator_read_emulated, .write_emulated = emulator_write_emulated, .cmpxchg_emulated = emulator_cmpxchg_emulated, + .invlpg = emulator_invlpg, .pio_in_emulated = emulator_pio_in_emulated, .pio_out_emulated = emulator_pio_out_emulated, .get_cached_descriptor = emulator_get_cached_descriptor, -- cgit v1.1 From 6c3287f7c5050076b554145f11bdba058de287d1 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 15:43:05 +0300 Subject: KVM: x86 emulator: add new ->halt() callback Instead of reaching into vcpu internals. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 1 + arch/x86/kvm/emulate.c | 2 +- arch/x86/kvm/x86.c | 6 ++++++ 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index f890769..d30f1e9 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -186,6 +186,7 @@ struct x86_emulate_ops { int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); + void (*halt)(struct x86_emulate_ctxt *ctxt); void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */ void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */ int (*intercept)(struct x86_emulate_ctxt *ctxt, diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 5d774e9..210df51 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3913,7 +3913,7 @@ special_insn: c->dst.type = OP_NONE; /* Disable writeback. */ break; case 0xf4: /* hlt */ - ctxt->vcpu->arch.halt_request = 1; + ctxt->ops->halt(ctxt); break; case 0xf5: /* cmc */ /* complement carry flag from eflags reg */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8af49b3..2246cf1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4351,6 +4351,11 @@ static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data); } +static void emulator_halt(struct x86_emulate_ctxt *ctxt) +{ + emul_to_vcpu(ctxt)->arch.halt_request = 1; +} + static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt) { preempt_disable(); @@ -4400,6 +4405,7 @@ static struct x86_emulate_ops emulate_ops = { .set_dr = emulator_set_dr, .set_msr = emulator_set_msr, .get_msr = emulator_get_msr, + .halt = emulator_halt, .get_fpu = emulator_get_fpu, .put_fpu = emulator_put_fpu, .intercept = emulator_intercept, -- cgit v1.1 From d6aa10003b0cded5a538af0d198460e89dc2d6d2 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 15:47:13 +0300 Subject: KVM: x86 emulator: add ->fix_hypercall() callback Artificial, but needed to remove direct calls to KVM. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 1 + arch/x86/include/asm/kvm_host.h | 2 -- arch/x86/kvm/emulate.c | 4 ++-- arch/x86/kvm/x86.c | 6 +++++- 4 files changed, 8 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index d30f1e9..d30840d 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -187,6 +187,7 @@ struct x86_emulate_ops { int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); void (*halt)(struct x86_emulate_ctxt *ctxt); + int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt); void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */ void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */ int (*intercept)(struct x86_emulate_ctxt *ctxt, diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d957d0d..6cfc1ab 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -752,8 +752,6 @@ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); -int kvm_fix_hypercall(struct kvm_vcpu *vcpu); - int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, void *insn, int insn_len); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 210df51..64e7373 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4025,7 +4025,7 @@ twobyte_insn: if (c->modrm_mod != 3 || c->modrm_rm != 1) goto cannot_emulate; - rc = kvm_fix_hypercall(ctxt->vcpu); + rc = ctxt->ops->fix_hypercall(ctxt); if (rc != X86EMUL_CONTINUE) goto done; @@ -4048,7 +4048,7 @@ twobyte_insn: if (c->modrm_mod == 3) { switch (c->modrm_rm) { case 1: - rc = kvm_fix_hypercall(ctxt->vcpu); + rc = ctxt->ops->fix_hypercall(ctxt); break; default: goto cannot_emulate; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2246cf1..4a2b40e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -152,6 +152,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { u64 __read_mostly host_xcr0; +int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); + static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) { int i; @@ -4406,6 +4408,7 @@ static struct x86_emulate_ops emulate_ops = { .set_msr = emulator_set_msr, .get_msr = emulator_get_msr, .halt = emulator_halt, + .fix_hypercall = emulator_fix_hypercall, .get_fpu = emulator_get_fpu, .put_fpu = emulator_put_fpu, .intercept = emulator_intercept, @@ -5042,8 +5045,9 @@ out: } EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); -int kvm_fix_hypercall(struct kvm_vcpu *vcpu) +int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); char instruction[3]; unsigned long rip = kvm_rip_read(vcpu); -- cgit v1.1 From bcaf5cc543bdb8f61fc3ce09944e0ecde2966595 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 15:53:23 +0300 Subject: KVM: x86 emulator: add new ->wbinvd() callback Instead of calling kvm_emulate_wbinvd() directly. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 1 + arch/x86/kvm/emulate.c | 2 +- arch/x86/kvm/x86.c | 6 ++++++ 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index d30840d..51341d6 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -187,6 +187,7 @@ struct x86_emulate_ops { int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); void (*halt)(struct x86_emulate_ctxt *ctxt); + void (*wbinvd)(struct x86_emulate_ctxt *ctxt); int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt); void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */ void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */ diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 64e7373..522bc35 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4092,7 +4092,7 @@ twobyte_insn: rc = em_clts(ctxt); break; case 0x09: /* wbinvd */ - kvm_emulate_wbinvd(ctxt->vcpu); + ctxt->ops->wbinvd(ctxt); break; case 0x08: /* invd */ case 0x0d: /* GrpP (prefetch) */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4a2b40e..5d853d5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4154,6 +4154,11 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); +static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt) +{ + kvm_emulate_wbinvd(emul_to_vcpu(ctxt)); +} + int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) { return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest); @@ -4408,6 +4413,7 @@ static struct x86_emulate_ops emulate_ops = { .set_msr = emulator_set_msr, .get_msr = emulator_get_msr, .halt = emulator_halt, + .wbinvd = emulator_wbinvd, .fix_hypercall = emulator_fix_hypercall, .get_fpu = emulator_get_fpu, .put_fpu = emulator_put_fpu, -- cgit v1.1 From 5197b808a7f459f9c7436573c7785ff3c1324c08 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 15:55:40 +0300 Subject: KVM: Avoid using x86_emulate_ctxt.vcpu We can use container_of() instead. Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5d853d5..65a5b0c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4366,7 +4366,7 @@ static void emulator_halt(struct x86_emulate_ctxt *ctxt) static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt) { preempt_disable(); - kvm_load_guest_fpu(ctxt->vcpu); + kvm_load_guest_fpu(emul_to_vcpu(ctxt)); /* * CR0.TS may reference the host fpu state, not the guest fpu state, * so it may be clear at this point. -- cgit v1.1 From 13db70eca62c5bbb2cbbf6b23dadb94065d363d1 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 15:56:20 +0300 Subject: KVM: x86 emulator: drop x86_emulate_ctxt::vcpu No longer used. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 2 -- arch/x86/kvm/x86.c | 1 - 2 files changed, 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 51341d6..127ea3e 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -269,8 +269,6 @@ struct x86_emulate_ctxt { struct x86_emulate_ops *ops; /* Register state before/after emulation. */ - struct kvm_vcpu *vcpu; - unsigned long eflags; unsigned long eip; /* eip before instruction emulation */ /* Emulated execution mode, represented by an X86EMUL_MODE value. */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 65a5b0c..a831d5d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4463,7 +4463,6 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); - vcpu->arch.emulate_ctxt.vcpu = vcpu; vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); vcpu->arch.emulate_ctxt.mode = -- cgit v1.1 From a78484c60e35555d6e0e5b1eb83d4913621c59fb Mon Sep 17 00:00:00 2001 From: "Roedel, Joerg" Date: Wed, 20 Apr 2011 15:33:16 +0200 Subject: KVM: MMU: Make cmpxchg_gpte aware of nesting too This patch makes the cmpxchg_gpte() function aware of the difference between l1-gfns and l2-gfns when nested virtualization is in use. This fixes a potential data-corruption problem in the l1-guest and makes the code work correct (at least as correct as the hardware which is emulated in this code) again. Cc: stable@kernel.org Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 74f8567..1b68990 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -78,15 +78,21 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl) return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT; } -static bool FNAME(cmpxchg_gpte)(struct kvm *kvm, +static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gfn_t table_gfn, unsigned index, pt_element_t orig_pte, pt_element_t new_pte) { pt_element_t ret; pt_element_t *table; struct page *page; + gpa_t gpa; - page = gfn_to_page(kvm, table_gfn); + gpa = mmu->translate_gpa(vcpu, table_gfn << PAGE_SHIFT, + PFERR_USER_MASK|PFERR_WRITE_MASK); + if (gpa == UNMAPPED_GVA) + return -EFAULT; + + page = gfn_to_page(vcpu->kvm, gpa_to_gfn(gpa)); table = kmap_atomic(page, KM_USER0); ret = CMPXCHG(&table[index], orig_pte, new_pte); @@ -192,11 +198,17 @@ walk: #endif if (!eperm && !rsvd_fault && !(pte & PT_ACCESSED_MASK)) { + int ret; trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte)); - if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, - index, pte, pte|PT_ACCESSED_MASK)) + ret = FNAME(cmpxchg_gpte)(vcpu, mmu, table_gfn, + index, pte, pte|PT_ACCESSED_MASK); + if (ret < 0) { + present = false; + break; + } else if (ret) goto walk; + mark_page_dirty(vcpu->kvm, table_gfn); pte |= PT_ACCESSED_MASK; } @@ -245,13 +257,17 @@ walk: goto error; if (write_fault && !is_dirty_gpte(pte)) { - bool ret; + int ret; trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); - ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte, + ret = FNAME(cmpxchg_gpte)(vcpu, mmu, table_gfn, index, pte, pte|PT_DIRTY_MASK); - if (ret) + if (ret < 0) { + present = false; + goto error; + } else if (ret) goto walk; + mark_page_dirty(vcpu->kvm, table_gfn); pte |= PT_DIRTY_MASK; walker->ptes[walker->level - 1] = pte; -- cgit v1.1 From cfb223753c3750b28103136ad4d51d1a3ae4d64b Mon Sep 17 00:00:00 2001 From: Clemens Noss Date: Thu, 21 Apr 2011 21:16:05 +0200 Subject: KVM: x86 emulator: avoid calling wbinvd() macro Commit 0b56652e33c72092956c651ab6ceb9f0ad081153 fails to build: CC [M] arch/x86/kvm/emulate.o arch/x86/kvm/emulate.c: In function 'x86_emulate_insn': arch/x86/kvm/emulate.c:4095:25: error: macro "wbinvd" passed 1 arguments, but takes just 0 arch/x86/kvm/emulate.c:4095:3: warning: statement with no effect make[2]: *** [arch/x86/kvm/emulate.o] Error 1 make[1]: *** [arch/x86/kvm] Error 2 make: *** [arch/x86] Error 2 Work around this for now. Signed-off-by: Clemens Noss Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 522bc35..ccb8b38 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4092,7 +4092,7 @@ twobyte_insn: rc = em_clts(ctxt); break; case 0x09: /* wbinvd */ - ctxt->ops->wbinvd(ctxt); + (ctxt->ops->wbinvd)(ctxt); break; case 0x08: /* invd */ case 0x0d: /* GrpP (prefetch) */ -- cgit v1.1 From d42244499f171a499aa6748bc52304bb40e68ecc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 21 Apr 2011 09:09:22 -0700 Subject: KVM: x86 emulator: fix const value warning on i386 in svm insn RAX check arch/x86/kvm/emulate.c:2598: warning: integer constant is too large for 'long' type Signed-off-by: Randy Dunlap Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index ccb8b38..77a5f54 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2738,7 +2738,7 @@ static int check_svme_pa(struct x86_emulate_ctxt *ctxt) u64 rax = ctxt->decode.regs[VCPU_REGS_RAX]; /* Valid physical address? */ - if (rax & 0xffff000000000000) + if (rax & 0xffff000000000000ULL) return emulate_gp(ctxt, 0); return check_svme(ctxt); -- cgit v1.1 From 26d05cc7403dfffbc5afd66a1292adfb7566e3ff Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 21 Apr 2011 12:07:59 +0300 Subject: KVM: x86 emulator: move 0F 01 sub-opcodes into their own functions Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 146 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 99 insertions(+), 47 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 77a5f54..8e1d0c8 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2589,6 +2589,95 @@ static int em_clts(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int em_vmcall(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + int rc; + + if (c->modrm_mod != 3 || c->modrm_rm != 1) + return X86EMUL_UNHANDLEABLE; + + rc = ctxt->ops->fix_hypercall(ctxt); + if (rc != X86EMUL_CONTINUE) + return rc; + + /* Let the processor re-execute the fixed hypercall */ + c->eip = ctxt->eip; + /* Disable writeback. */ + c->dst.type = OP_NONE; + return X86EMUL_CONTINUE; +} + +static int em_lgdt(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + struct desc_ptr desc_ptr; + int rc; + + rc = read_descriptor(ctxt, ctxt->ops, c->src.addr.mem, + &desc_ptr.size, &desc_ptr.address, + c->op_bytes); + if (rc != X86EMUL_CONTINUE) + return rc; + ctxt->ops->set_gdt(ctxt, &desc_ptr); + /* Disable writeback. */ + c->dst.type = OP_NONE; + return X86EMUL_CONTINUE; +} + +static int em_svm(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + int rc; + + switch (c->modrm_rm) { + case 1: + rc = ctxt->ops->fix_hypercall(ctxt); + break; + default: + return X86EMUL_UNHANDLEABLE; + } + /* Disable writeback. */ + c->dst.type = OP_NONE; + return rc; +} + +static int em_lidt(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + struct desc_ptr desc_ptr; + int rc; + + rc = read_descriptor(ctxt, ctxt->ops, c->src.addr.mem, + &desc_ptr.size, + &desc_ptr.address, + c->op_bytes); + if (rc != X86EMUL_CONTINUE) + return rc; + ctxt->ops->set_idt(ctxt, &desc_ptr); + /* Disable writeback. */ + c->dst.type = OP_NONE; + return X86EMUL_CONTINUE; +} + +static int em_smsw(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + c->dst.bytes = 2; + c->dst.val = ctxt->ops->get_cr(ctxt, 0); + return X86EMUL_CONTINUE; +} + +static int em_lmsw(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul) + | (c->src.val & 0x0f)); + c->dst.type = OP_NONE; + return X86EMUL_CONTINUE; +} + static bool valid_cr(int nr) { switch (nr) { @@ -3509,7 +3598,6 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) int rc = X86EMUL_CONTINUE; int saved_dst_type = c->dst.type; int irq; /* Used for int 3, int, and into */ - struct desc_ptr desc_ptr; ctxt->decode.mem_read.pos = 0; @@ -4022,62 +4110,26 @@ twobyte_insn: case 0x01: /* lgdt, lidt, lmsw */ switch (c->modrm_reg) { case 0: /* vmcall */ - if (c->modrm_mod != 3 || c->modrm_rm != 1) - goto cannot_emulate; - - rc = ctxt->ops->fix_hypercall(ctxt); - if (rc != X86EMUL_CONTINUE) - goto done; - - /* Let the processor re-execute the fixed hypercall */ - c->eip = ctxt->eip; - /* Disable writeback. */ - c->dst.type = OP_NONE; + rc = em_vmcall(ctxt); break; case 2: /* lgdt */ - rc = read_descriptor(ctxt, ops, c->src.addr.mem, - &desc_ptr.size, &desc_ptr.address, - c->op_bytes); - if (rc != X86EMUL_CONTINUE) - goto done; - ctxt->ops->set_gdt(ctxt, &desc_ptr); - /* Disable writeback. */ - c->dst.type = OP_NONE; + rc = em_lgdt(ctxt); break; case 3: /* lidt/vmmcall */ - if (c->modrm_mod == 3) { - switch (c->modrm_rm) { - case 1: - rc = ctxt->ops->fix_hypercall(ctxt); - break; - default: - goto cannot_emulate; - } - } else { - rc = read_descriptor(ctxt, ops, c->src.addr.mem, - &desc_ptr.size, - &desc_ptr.address, - c->op_bytes); - if (rc != X86EMUL_CONTINUE) - goto done; - ctxt->ops->set_idt(ctxt, &desc_ptr); - } - /* Disable writeback. */ - c->dst.type = OP_NONE; + if (c->modrm_mod == 3) + return em_svm(ctxt); + else + return em_lidt(ctxt); break; case 4: /* smsw */ - c->dst.bytes = 2; - c->dst.val = ops->get_cr(ctxt, 0); + rc = em_smsw(ctxt); break; case 6: /* lmsw */ - ops->set_cr(ctxt, 0, (ops->get_cr(ctxt, 0) & ~0x0eul) | - (c->src.val & 0x0f)); - c->dst.type = OP_NONE; + rc = em_lmsw(ctxt); break; case 5: /* not defined */ - emulate_ud(ctxt); - rc = X86EMUL_PROPAGATE_FAULT; - goto done; + rc = emulate_ud(ctxt); + break; case 7: /* invlpg*/ rc = em_invlpg(ctxt); break; -- cgit v1.1 From 68152d88122b24fad0f5910f74efcd19120a19a8 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 21 Apr 2011 12:17:13 +0300 Subject: KVM: x86 emulator: Don't force #UD for 0F 01 /5 While it isn't defined, no need to force a #UD. If it becomes defined in the future this can cause wierd problems for the guest. Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 8e1d0c8..2132fab 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4127,9 +4127,6 @@ twobyte_insn: case 6: /* lmsw */ rc = em_lmsw(ctxt); break; - case 5: /* not defined */ - rc = emulate_ud(ctxt); - break; case 7: /* invlpg*/ rc = em_invlpg(ctxt); break; -- cgit v1.1 From 5ef39c71d8398115245a5974b488f8703ba3a6b0 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 21 Apr 2011 12:21:50 +0300 Subject: KVM: x86 emulator: Use opcode::execute for 0F 01 opcode Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 56 ++++++++++++-------------------------------------- 1 file changed, 13 insertions(+), 43 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 2132fab..252f283 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2625,18 +2625,13 @@ static int em_lgdt(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } -static int em_svm(struct x86_emulate_ctxt *ctxt) +static int em_vmmcall(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; int rc; - switch (c->modrm_rm) { - case 1: - rc = ctxt->ops->fix_hypercall(ctxt); - break; - default: - return X86EMUL_UNHANDLEABLE; - } + rc = ctxt->ops->fix_hypercall(ctxt); + /* Disable writeback. */ c->dst.type = OP_NONE; return rc; @@ -2909,7 +2904,7 @@ static struct opcode group7_rm1[] = { static struct opcode group7_rm3[] = { DIP(SrcNone | ModRM | Prot | Priv, vmrun, check_svme_pa), - DI(SrcNone | ModRM | Prot | VendorSpecific, vmmcall), + II(SrcNone | ModRM | Prot | VendorSpecific, em_vmmcall, vmmcall), DIP(SrcNone | ModRM | Prot | Priv, vmload, check_svme_pa), DIP(SrcNone | ModRM | Prot | Priv, vmsave, check_svme_pa), DIP(SrcNone | ModRM | Prot | Priv, stgi, check_svme), @@ -2961,15 +2956,17 @@ static struct opcode group6[] = { static struct group_dual group7 = { { DI(ModRM | Mov | DstMem | Priv, sgdt), DI(ModRM | Mov | DstMem | Priv, sidt), - DI(ModRM | SrcMem | Priv, lgdt), DI(ModRM | SrcMem | Priv, lidt), - DI(SrcNone | ModRM | DstMem | Mov, smsw), N, - DI(SrcMem16 | ModRM | Mov | Priv, lmsw), - DI(SrcMem | ModRM | ByteOp | Priv | NoAccess, invlpg), + II(ModRM | SrcMem | Priv, em_lgdt, lgdt), + II(ModRM | SrcMem | Priv, em_lidt, lidt), + II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N, + II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw), + II(SrcMem | ModRM | ByteOp | Priv | NoAccess, em_invlpg, invlpg), }, { - D(SrcNone | ModRM | Priv | VendorSpecific), EXT(0, group7_rm1), + I(SrcNone | ModRM | Priv | VendorSpecific, em_vmcall), + EXT(0, group7_rm1), N, EXT(0, group7_rm3), - DI(SrcNone | ModRM | DstMem | Mov, smsw), N, - DI(SrcMem16 | ModRM | Mov | Priv, lmsw), EXT(0, group7_rm7), + II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N, + II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw), EXT(0, group7_rm7), } }; static struct opcode group8[] = { @@ -4107,33 +4104,6 @@ done: twobyte_insn: switch (c->b) { - case 0x01: /* lgdt, lidt, lmsw */ - switch (c->modrm_reg) { - case 0: /* vmcall */ - rc = em_vmcall(ctxt); - break; - case 2: /* lgdt */ - rc = em_lgdt(ctxt); - break; - case 3: /* lidt/vmmcall */ - if (c->modrm_mod == 3) - return em_svm(ctxt); - else - return em_lidt(ctxt); - break; - case 4: /* smsw */ - rc = em_smsw(ctxt); - break; - case 6: /* lmsw */ - rc = em_lmsw(ctxt); - break; - case 7: /* invlpg*/ - rc = em_invlpg(ctxt); - break; - default: - goto cannot_emulate; - } - break; case 0x05: /* syscall */ rc = emulate_syscall(ctxt, ops); break; -- cgit v1.1 From 40e19b519caeb93def89c45082d776fccfb96dbb Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 21 Apr 2011 12:35:41 +0300 Subject: KVM: SVM: Get rid of x86_intercept_map::valid By reserving 0 as an invalid x86_intercept_stage, we no longer need to store a valid flag in x86_intercept_map. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 1 + arch/x86/kvm/svm.c | 12 ++++-------- 2 files changed, 5 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 127ea3e..28114f5 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -304,6 +304,7 @@ struct x86_emulate_ctxt { X86EMUL_MODE_PROT64) enum x86_intercept_stage { + X86_ICTP_NONE = 0, /* Allow zero-init to not match anything */ X86_ICPT_PRE_EXCEPT, X86_ICPT_POST_EXCEPT, X86_ICPT_POST_MEMACCESS, diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index de4bba9..9cff036 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3959,19 +3959,15 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) } #define PRE_EX(exit) { .exit_code = (exit), \ - .stage = X86_ICPT_PRE_EXCEPT, \ - .valid = true } + .stage = X86_ICPT_PRE_EXCEPT, } #define POST_EX(exit) { .exit_code = (exit), \ - .stage = X86_ICPT_POST_EXCEPT, \ - .valid = true } + .stage = X86_ICPT_POST_EXCEPT, } #define POST_MEM(exit) { .exit_code = (exit), \ - .stage = X86_ICPT_POST_MEMACCESS, \ - .valid = true } + .stage = X86_ICPT_POST_MEMACCESS, } static struct __x86_intercept { u32 exit_code; enum x86_intercept_stage stage; - bool valid; } x86_intercept_map[] = { [x86_intercept_cr_read] = POST_EX(SVM_EXIT_READ_CR0), [x86_intercept_cr_write] = POST_EX(SVM_EXIT_WRITE_CR0), @@ -4039,7 +4035,7 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, icpt_info = x86_intercept_map[info->intercept]; - if (!icpt_info.valid || stage != icpt_info.stage) + if (stage != icpt_info.stage) goto out; switch (icpt_info.exit_code) { -- cgit v1.1 From 6e2ca7d1802bf8ed9908435e34daa116662e7790 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Fri, 22 Apr 2011 00:34:44 +0900 Subject: KVM: MMU: Optimize guest page table walk This patch optimizes the guest page table walk by using get_user() instead of copy_from_user(). With this patch applied, paging64_walk_addr_generic() has become about 0.5us to 1.0us faster on my Phenom II machine with NPT on. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 1b68990..a32a1c8 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -123,6 +123,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, gva_t addr, u32 access) { pt_element_t pte; + pt_element_t __user *ptep_user; gfn_t table_gfn; unsigned index, pt_access, uninitialized_var(pte_access); gpa_t pte_gpa; @@ -158,6 +159,9 @@ walk: pt_access = ACC_ALL; for (;;) { + gfn_t real_gfn; + unsigned long host_addr; + index = PT_INDEX(addr, walker->level); table_gfn = gpte_to_gfn(pte); @@ -166,9 +170,22 @@ walk: walker->table_gfn[walker->level - 1] = table_gfn; walker->pte_gpa[walker->level - 1] = pte_gpa; - if (kvm_read_guest_page_mmu(vcpu, mmu, table_gfn, &pte, - offset, sizeof(pte), - PFERR_USER_MASK|PFERR_WRITE_MASK)) { + real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), + PFERR_USER_MASK|PFERR_WRITE_MASK); + if (real_gfn == UNMAPPED_GVA) { + present = false; + break; + } + real_gfn = gpa_to_gfn(real_gfn); + + host_addr = gfn_to_hva(vcpu->kvm, real_gfn); + if (kvm_is_error_hva(host_addr)) { + present = false; + break; + } + + ptep_user = (pt_element_t __user *)((void *)host_addr + offset); + if (get_user(pte, ptep_user)) { present = false; break; } -- cgit v1.1 From d67fc27ae2bdc2d2fb6c8ec4238a12b502b95cc7 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Sat, 23 Apr 2011 18:48:02 +0900 Subject: KVM: x86 emulator: Use opcode::execute for Group 1, CMPS and SCAS The following instructions are changed to use opcode::execute. Group 1 (80-83) ADD (00-05), OR (08-0D), ADC (10-15), SBB (18-1D), AND (20-25), SUB (28-2D), XOR (30-35), CMP (38-3D) CMPS (A6-A7), SCAS (AE-AF) The last two do the same as CMP in the emulator, so em_cmp() is used. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 161 +++++++++++++++++++++++++++---------------------- 1 file changed, 89 insertions(+), 72 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 252f283..8784916 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2512,6 +2512,72 @@ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int em_add(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_or(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_adc(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_sbb(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_and(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_sub(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_xor(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_cmp(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); + /* Disable writeback. */ + c->dst.type = OP_NONE; + return X86EMUL_CONTINUE; +} + static int em_imul(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; @@ -2892,9 +2958,9 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p) #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e) -#define D6ALU(_f) D2bv((_f) | DstMem | SrcReg | ModRM), \ - D2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock), \ - D2bv(((_f) & ~Lock) | DstAcc | SrcImm) +#define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e), \ + I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ + I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) static struct opcode group7_rm1[] = { DI(SrcNone | ModRM | Priv, monitor), @@ -2918,8 +2984,16 @@ static struct opcode group7_rm7[] = { DIP(SrcNone | ModRM, rdtscp, check_rdtsc), N, N, N, N, N, N, }; + static struct opcode group1[] = { - X7(D(Lock)), N + I(Lock, em_add), + I(Lock, em_or), + I(Lock, em_adc), + I(Lock, em_sbb), + I(Lock, em_and), + I(Lock, em_sub), + I(Lock, em_xor), + I(0, em_cmp), }; static struct opcode group1A[] = { @@ -2991,25 +3065,25 @@ static struct gprefix pfx_0f_6f_0f_7f = { static struct opcode opcode_table[256] = { /* 0x00 - 0x07 */ - D6ALU(Lock), + I6ALU(Lock, em_add), D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), /* 0x08 - 0x0F */ - D6ALU(Lock), + I6ALU(Lock, em_or), D(ImplicitOps | Stack | No64), N, /* 0x10 - 0x17 */ - D6ALU(Lock), + I6ALU(Lock, em_adc), D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), /* 0x18 - 0x1F */ - D6ALU(Lock), + I6ALU(Lock, em_sbb), D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), /* 0x20 - 0x27 */ - D6ALU(Lock), N, N, + I6ALU(Lock, em_and), N, N, /* 0x28 - 0x2F */ - D6ALU(Lock), N, I(ByteOp | DstAcc | No64, em_das), + I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), /* 0x30 - 0x37 */ - D6ALU(Lock), N, N, + I6ALU(Lock, em_xor), N, N, /* 0x38 - 0x3F */ - D6ALU(0), N, N, + I6ALU(0, em_cmp), N, N, /* 0x40 - 0x4F */ X16(D(DstReg)), /* 0x50 - 0x57 */ @@ -3050,12 +3124,12 @@ static struct opcode opcode_table[256] = { I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), I2bv(DstMem | SrcAcc | Mov | MemAbs, em_mov), I2bv(SrcSI | DstDI | Mov | String, em_mov), - D2bv(SrcSI | DstDI | String), + I2bv(SrcSI | DstDI | String, em_cmp), /* 0xA8 - 0xAF */ D2bv(DstAcc | SrcImm), I2bv(SrcAcc | DstDI | Mov | String, em_mov), I2bv(SrcSI | DstAcc | Mov | String, em_mov), - D2bv(SrcAcc | DstDI | String), + I2bv(SrcAcc | DstDI | String, em_cmp), /* 0xB0 - 0xB7 */ X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)), /* 0xB8 - 0xBF */ @@ -3179,7 +3253,7 @@ static struct opcode twobyte_table[256] = { #undef D2bv #undef D2bvIP #undef I2bv -#undef D6ALU +#undef I6ALU static unsigned imm_size(struct decode_cache *c) { @@ -3715,60 +3789,27 @@ special_insn: goto twobyte_insn; switch (c->b) { - case 0x00 ... 0x05: - add: /* add */ - emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); - break; case 0x06: /* push es */ rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_ES); break; case 0x07: /* pop es */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); break; - case 0x08 ... 0x0d: - or: /* or */ - emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); - break; case 0x0e: /* push cs */ rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_CS); break; - case 0x10 ... 0x15: - adc: /* adc */ - emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); - break; case 0x16: /* push ss */ rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_SS); break; case 0x17: /* pop ss */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); break; - case 0x18 ... 0x1d: - sbb: /* sbb */ - emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); - break; case 0x1e: /* push ds */ rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_DS); break; case 0x1f: /* pop ds */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); break; - case 0x20 ... 0x25: - and: /* and */ - emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags); - break; - case 0x28 ... 0x2d: - sub: /* sub */ - emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags); - break; - case 0x30 ... 0x35: - xor: /* xor */ - emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags); - break; - case 0x38 ... 0x3d: - cmp: /* cmp */ - c->dst.type = OP_NONE; /* Disable writeback. */ - emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); - break; case 0x40 ... 0x47: /* inc r16/r32 */ emulate_1op("inc", c->dst, ctxt->eflags); break; @@ -3803,26 +3844,6 @@ special_insn: if (test_cc(c->b, ctxt->eflags)) jmp_rel(c, c->src.val); break; - case 0x80 ... 0x83: /* Grp1 */ - switch (c->modrm_reg) { - case 0: - goto add; - case 1: - goto or; - case 2: - goto adc; - case 3: - goto sbb; - case 4: - goto and; - case 5: - goto sub; - case 6: - goto xor; - case 7: - goto cmp; - } - break; case 0x84 ... 0x85: test: emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags); @@ -3892,12 +3913,8 @@ special_insn: c->dst.bytes = c->op_bytes; rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes); break; - case 0xa6 ... 0xa7: /* cmps */ - goto cmp; case 0xa8 ... 0xa9: /* test ax, imm */ goto test; - case 0xae ... 0xaf: /* scas */ - goto cmp; case 0xc0 ... 0xc1: emulate_grp2(ctxt); break; -- cgit v1.1 From c54fe504693204fa672b10a57c3d82a8c41e0b4e Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Sat, 23 Apr 2011 18:49:40 +0900 Subject: KVM: x86 emulator: Use opcode::execute for POP reg (58-5F) In addition, the RET emulation is changed to call em_pop() to remove the pop_instruction label. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 8784916..9f491bf 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1400,6 +1400,13 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt, return rc; } +static int em_pop(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + return emulate_pop(ctxt, ctxt->ops, &c->dst.val, c->op_bytes); +} + static int emulate_popf(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, void *dest, int len) @@ -3089,7 +3096,7 @@ static struct opcode opcode_table[256] = { /* 0x50 - 0x57 */ X8(I(SrcReg | Stack, em_push)), /* 0x58 - 0x5F */ - X8(D(DstReg | Stack)), + X8(I(DstReg | Stack, em_pop)), /* 0x60 - 0x67 */ D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), N, D(DstReg | SrcMem32 | ModRM | Mov) /* movsxd (x86/64) */ , @@ -3816,10 +3823,6 @@ special_insn: case 0x48 ... 0x4f: /* dec r16/r32 */ emulate_1op("dec", c->dst, ctxt->eflags); break; - case 0x58 ... 0x5f: /* pop reg */ - pop_instruction: - rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes); - break; case 0x60: /* pusha */ rc = emulate_pusha(ctxt); break; @@ -3922,7 +3925,8 @@ special_insn: c->dst.type = OP_REG; c->dst.addr.reg = &c->eip; c->dst.bytes = c->op_bytes; - goto pop_instruction; + rc = em_pop(ctxt); + break; case 0xc4: /* les */ rc = emulate_load_segment(ctxt, ops, VCPU_SREG_ES); break; -- cgit v1.1 From b96a7fad020b42eb4a564f8a2fb41827a83c4375 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Sat, 23 Apr 2011 18:51:07 +0900 Subject: KVM: x86 emulator: Use opcode::execute for PUSHA/POPA (60/61) For this, emulate_pusha/popa() are converted to em_pusha/popa(). Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 9f491bf..b7c6e43 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1473,7 +1473,7 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, return rc; } -static int emulate_pusha(struct x86_emulate_ctxt *ctxt) +static int em_pusha(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; unsigned long old_esp = c->regs[VCPU_REGS_RSP]; @@ -1494,8 +1494,7 @@ static int emulate_pusha(struct x86_emulate_ctxt *ctxt) return rc; } -static int emulate_popa(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int em_popa(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; int rc = X86EMUL_CONTINUE; @@ -1508,7 +1507,7 @@ static int emulate_popa(struct x86_emulate_ctxt *ctxt, --reg; } - rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes); + rc = emulate_pop(ctxt, ctxt->ops, &c->regs[reg], c->op_bytes); if (rc != X86EMUL_CONTINUE) break; --reg; @@ -3098,7 +3097,8 @@ static struct opcode opcode_table[256] = { /* 0x58 - 0x5F */ X8(I(DstReg | Stack, em_pop)), /* 0x60 - 0x67 */ - D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), + I(ImplicitOps | Stack | No64, em_pusha), + I(ImplicitOps | Stack | No64, em_popa), N, D(DstReg | SrcMem32 | ModRM | Mov) /* movsxd (x86/64) */ , N, N, N, N, /* 0x68 - 0x6F */ @@ -3823,12 +3823,6 @@ special_insn: case 0x48 ... 0x4f: /* dec r16/r32 */ emulate_1op("dec", c->dst, ctxt->eflags); break; - case 0x60: /* pusha */ - rc = emulate_pusha(ctxt); - break; - case 0x61: /* popa */ - rc = emulate_popa(ctxt, ops); - break; case 0x63: /* movsxd */ if (ctxt->mode != X86EMUL_MODE_PROT64) goto cannot_emulate; -- cgit v1.1 From 62aaa2f05abd59598f132e6ebad86318291b5be0 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Sat, 23 Apr 2011 18:52:56 +0900 Subject: KVM: x86 emulator: Use opcode::execute for PUSHF/POPF (9C/9D) For this, em_pushf/popf() are introduced. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index b7c6e43..c1d9116 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1448,6 +1448,16 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, return rc; } +static int em_popf(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + c->dst.type = OP_REG; + c->dst.addr.reg = &ctxt->eflags; + c->dst.bytes = c->op_bytes; + return emulate_popf(ctxt, ctxt->ops, &c->dst.val, c->op_bytes); +} + static int emulate_push_sreg(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, int seg) { @@ -1494,6 +1504,14 @@ static int em_pusha(struct x86_emulate_ctxt *ctxt) return rc; } +static int em_pushf(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + c->src.val = (unsigned long)ctxt->eflags; + return em_push(ctxt); +} + static int em_popa(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; @@ -3126,7 +3144,8 @@ static struct opcode opcode_table[256] = { /* 0x98 - 0x9F */ D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd), I(SrcImmFAddr | No64, em_call_far), N, - DI(ImplicitOps | Stack, pushf), DI(ImplicitOps | Stack, popf), N, N, + II(ImplicitOps | Stack, em_pushf, pushf), + II(ImplicitOps | Stack, em_popf, popf), N, N, /* 0xA0 - 0xA7 */ I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), I2bv(DstMem | SrcAcc | Mov | MemAbs, em_mov), @@ -3900,16 +3919,6 @@ special_insn: case 8: c->dst.val = (s32)c->dst.val; break; } break; - case 0x9c: /* pushf */ - c->src.val = (unsigned long) ctxt->eflags; - rc = em_push(ctxt); - break; - case 0x9d: /* popf */ - c->dst.type = OP_REG; - c->dst.addr.reg = &ctxt->eflags; - c->dst.bytes = c->op_bytes; - rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes); - break; case 0xa8 ... 0xa9: /* test ax, imm */ goto test; case 0xc0 ... 0xc1: -- cgit v1.1 From 781e0743af3c5ba356d55bc60df59f2dded1e938 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 24 Apr 2011 12:25:50 +0300 Subject: KVM: MMU: Add unlikely() annotations to walk_addr_generic() walk_addr_generic() is a hot path and is also hard for the cpu to predict - some of the parameters (fetch_fault in particular) vary wildly from invocation to invocation. Add unlikely() annotations where appropriate; all walk failures are considered unlikely, as are cases where we have to mark the accessed or dirty bit, as they are slow paths both in kvm and on real processors. Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index a32a1c8..652d56c 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -172,49 +172,51 @@ walk: real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), PFERR_USER_MASK|PFERR_WRITE_MASK); - if (real_gfn == UNMAPPED_GVA) { + if (unlikely(real_gfn == UNMAPPED_GVA)) { present = false; break; } real_gfn = gpa_to_gfn(real_gfn); host_addr = gfn_to_hva(vcpu->kvm, real_gfn); - if (kvm_is_error_hva(host_addr)) { + if (unlikely(kvm_is_error_hva(host_addr))) { present = false; break; } ptep_user = (pt_element_t __user *)((void *)host_addr + offset); - if (get_user(pte, ptep_user)) { + if (unlikely(get_user(pte, ptep_user))) { present = false; break; } trace_kvm_mmu_paging_element(pte, walker->level); - if (!is_present_gpte(pte)) { + if (unlikely(!is_present_gpte(pte))) { present = false; break; } - if (is_rsvd_bits_set(&vcpu->arch.mmu, pte, walker->level)) { + if (unlikely(is_rsvd_bits_set(&vcpu->arch.mmu, pte, + walker->level))) { rsvd_fault = true; break; } - if (write_fault && !is_writable_pte(pte)) - if (user_fault || is_write_protection(vcpu)) - eperm = true; + if (unlikely(write_fault && !is_writable_pte(pte) + && (user_fault || is_write_protection(vcpu)))) + eperm = true; - if (user_fault && !(pte & PT_USER_MASK)) + if (unlikely(user_fault && !(pte & PT_USER_MASK))) eperm = true; #if PTTYPE == 64 - if (fetch_fault && (pte & PT64_NX_MASK)) + if (unlikely(fetch_fault && (pte & PT64_NX_MASK))) eperm = true; #endif - if (!eperm && !rsvd_fault && !(pte & PT_ACCESSED_MASK)) { + if (!eperm && !rsvd_fault + && unlikely(!(pte & PT_ACCESSED_MASK))) { int ret; trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte)); @@ -270,10 +272,10 @@ walk: --walker->level; } - if (!present || eperm || rsvd_fault) + if (unlikely(!present || eperm || rsvd_fault)) goto error; - if (write_fault && !is_dirty_gpte(pte)) { + if (write_fault && unlikely(!is_dirty_gpte(pte))) { int ret; trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); -- cgit v1.1 From 46561646ce409ad96c22645362c113de04f60bfb Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 24 Apr 2011 14:09:59 +0300 Subject: KVM: x86 emulator: consolidate group handling Move all groups into a single field and handle them in a single place. This saves bits when we add more group types (3 bits -> 7 groups types). Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 80 +++++++++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 40 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index c1d9116..7466aba 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -73,11 +73,12 @@ #define MemAbs (1<<11) /* Memory operand is absolute displacement */ #define String (1<<12) /* String instruction (rep capable) */ #define Stack (1<<13) /* Stack instruction (push/pop) */ +#define GroupMask (7<<14) /* Opcode uses one of the group mechanisms */ #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ -#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ -#define Prefix (1<<16) /* Instruction varies with 66/f2/f3 prefix */ +#define GroupDual (2<<14) /* Alternate decoding of mod == 3 */ +#define Prefix (3<<14) /* Instruction varies with 66/f2/f3 prefix */ +#define RMExt (4<<14) /* Opcode extension in ModRM r/m if mod == 3 */ #define Sse (1<<17) /* SSE Vector instruction */ -#define RMExt (1<<18) /* Opcode extension in ModRM r/m if mod == 3 */ /* Misc flags */ #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */ #define VendorSpecific (1<<22) /* Vendor specific instruction */ @@ -2969,7 +2970,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) #define N D(0) #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } #define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) } -#define GD(_f, _g) { .flags = ((_f) | Group | GroupDual), .u.gdual = (_g) } +#define GD(_f, _g) { .flags = ((_f) | GroupDual), .u.gdual = (_g) } #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } #define II(_f, _e, _i) \ { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } @@ -3337,9 +3338,9 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) struct decode_cache *c = &ctxt->decode; int rc = X86EMUL_CONTINUE; int mode = ctxt->mode; - int def_op_bytes, def_ad_bytes, dual, goffset, simd_prefix; + int def_op_bytes, def_ad_bytes, goffset, simd_prefix; bool op_prefix = false; - struct opcode opcode, *g_mod012, *g_mod3; + struct opcode opcode; struct operand memop = { .type = OP_NONE }; c->eip = ctxt->eip; @@ -3433,44 +3434,43 @@ done_prefixes: } c->d = opcode.flags; - if (c->d & Group) { - dual = c->d & GroupDual; - c->modrm = insn_fetch(u8, 1, c->eip); - --c->eip; - - if (c->d & GroupDual) { - g_mod012 = opcode.u.gdual->mod012; - g_mod3 = opcode.u.gdual->mod3; - } else - g_mod012 = g_mod3 = opcode.u.group; - - c->d &= ~(Group | GroupDual); - - goffset = (c->modrm >> 3) & 7; - - if ((c->modrm >> 6) == 3) - opcode = g_mod3[goffset]; - else - opcode = g_mod012[goffset]; - - if (opcode.flags & RMExt) { + while (c->d & GroupMask) { + switch (c->d & GroupMask) { + case Group: + c->modrm = insn_fetch(u8, 1, c->eip); + --c->eip; + goffset = (c->modrm >> 3) & 7; + opcode = opcode.u.group[goffset]; + break; + case GroupDual: + c->modrm = insn_fetch(u8, 1, c->eip); + --c->eip; + goffset = (c->modrm >> 3) & 7; + if ((c->modrm >> 6) == 3) + opcode = opcode.u.gdual->mod3[goffset]; + else + opcode = opcode.u.gdual->mod012[goffset]; + break; + case RMExt: goffset = c->modrm & 7; opcode = opcode.u.group[goffset]; - } - - c->d |= opcode.flags; - } - - if (c->d & Prefix) { - if (c->rep_prefix && op_prefix) + break; + case Prefix: + if (c->rep_prefix && op_prefix) + return X86EMUL_UNHANDLEABLE; + simd_prefix = op_prefix ? 0x66 : c->rep_prefix; + switch (simd_prefix) { + case 0x00: opcode = opcode.u.gprefix->pfx_no; break; + case 0x66: opcode = opcode.u.gprefix->pfx_66; break; + case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break; + case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break; + } + break; + default: return X86EMUL_UNHANDLEABLE; - simd_prefix = op_prefix ? 0x66 : c->rep_prefix; - switch (simd_prefix) { - case 0x00: opcode = opcode.u.gprefix->pfx_no; break; - case 0x66: opcode = opcode.u.gprefix->pfx_66; break; - case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break; - case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break; } + + c->d &= ~GroupMask; c->d |= opcode.flags; } -- cgit v1.1 From 8d7d810255982bfcc355cdb8972d72843acb0cf8 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 12 Apr 2011 12:36:21 +0300 Subject: KVM: mmio_fault_cr2 is not used Remove unused variable mmio_fault_cr2. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 - arch/x86/kvm/x86.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 6cfc1ab..afb0e69 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -358,7 +358,6 @@ struct kvm_vcpu_arch { struct fpu guest_fpu; u64 xcr0; - gva_t mmio_fault_cr2; struct kvm_pio_request pio; void *pio_data; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a831d5d..e3ac212 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4561,7 +4561,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, bool writeback = true; kvm_clear_exception_queue(vcpu); - vcpu->arch.mmio_fault_cr2 = cr2; + /* * TODO: fix emulate.c to use guest_read/write_register * instead of direct ->regs accesses, can save hundred cycles -- cgit v1.1 From 4947e7cd0ee36e1aa37dfec4f7fa71cc64a2f0fd Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 12 Apr 2011 12:36:23 +0300 Subject: KVM: emulator: Propagate fault in far jump emulation Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 7466aba..3624f20 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3993,7 +3993,8 @@ special_insn: jump_far: memcpy(&sel, c->src.valptr + c->op_bytes, 2); - if (load_segment_descriptor(ctxt, ops, sel, VCPU_SREG_CS)) + rc = load_segment_descriptor(ctxt, ops, sel, VCPU_SREG_CS); + if (rc != X86EMUL_CONTINUE) goto done; c->eip = 0; -- cgit v1.1 From 0004c7c25757f103ddb3a9e4bcfd533aad41f9a0 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 12 Apr 2011 12:36:24 +0300 Subject: KVM: Fix compound mmio mmio_index should be taken into account when copying data from userspace. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e3ac212..a9a307a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5576,7 +5576,8 @@ static int complete_mmio(struct kvm_vcpu *vcpu) if (vcpu->mmio_needed) { vcpu->mmio_needed = 0; if (!vcpu->mmio_is_write) - memcpy(vcpu->mmio_data, run->mmio.data, 8); + memcpy(vcpu->mmio_data + vcpu->mmio_index, + run->mmio.data, 8); vcpu->mmio_index += 8; if (vcpu->mmio_index < vcpu->mmio_size) { run->exit_reason = KVM_EXIT_MMIO; -- cgit v1.1 From 2aab2c5b2bac6510b3bd143ca83babee382f4302 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 12 Apr 2011 12:36:25 +0300 Subject: KVM: call cache_all_regs() only once during instruction emulation Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a9a307a..f5f2d3d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4459,6 +4459,12 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; int cs_db, cs_l; + /* + * TODO: fix emulate.c to use guest_read/write_register + * instead of direct ->regs accesses, can save hundred cycles + * on Intel for instructions that don't read/change RSP, for + * for example. + */ cache_all_regs(vcpu); kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); @@ -4562,14 +4568,6 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, kvm_clear_exception_queue(vcpu); - /* - * TODO: fix emulate.c to use guest_read/write_register - * instead of direct ->regs accesses, can save hundred cycles - * on Intel for instructions that don't read/change RSP, for - * for example. - */ - cache_all_regs(vcpu); - if (!(emulation_type & EMULTYPE_NO_DECODE)) { init_emulate_ctxt(vcpu); vcpu->arch.emulate_ctxt.interruptibility = 0; -- cgit v1.1 From 4429d5dc1197aaf8188e4febcde54d26a51baf6c Mon Sep 17 00:00:00 2001 From: "BrillyWu@viatech.com.cn" Date: Mon, 25 Apr 2011 13:55:15 +0800 Subject: KVM: Add CPUID support for VIA CPU The CPUIDs for Centaur are added, and then the features of PadLock hardware engine on VIA CPU, such as "ace", "ace_en" and so on, can be passed into the kvm guest. Signed-off-by: Brilly Wu Signed-off-by: Kary Jin Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f5f2d3d..22bc69c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2336,6 +2336,12 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); + /* cpuid 0xC0000001.edx */ + const u32 kvm_supported_word5_x86_features = + F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | + F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) | + F(PMM) | F(PMM_EN); + /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); do_cpuid_1_ent(entry, function, index); @@ -2445,6 +2451,20 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->ecx &= kvm_supported_word6_x86_features; cpuid_mask(&entry->ecx, 6); break; + /*Add support for Centaur's CPUID instruction*/ + case 0xC0000000: + /*Just support up to 0xC0000004 now*/ + entry->eax = min(entry->eax, 0xC0000004); + break; + case 0xC0000001: + entry->edx &= kvm_supported_word5_x86_features; + cpuid_mask(&entry->edx, 5); + break; + case 0xC0000002: + case 0xC0000003: + case 0xC0000004: + /*Now nothing to do, reserved for the future*/ + break; } kvm_x86_ops->set_supported_cpuid(function, entry); @@ -2491,6 +2511,26 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, if (nent >= cpuid->nent) goto out_free; + /* Add support for Centaur's CPUID instruction. */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR) { + do_cpuid_ent(&cpuid_entries[nent], 0xC0000000, 0, + &nent, cpuid->nent); + + r = -E2BIG; + if (nent >= cpuid->nent) + goto out_free; + + limit = cpuid_entries[nent - 1].eax; + for (func = 0xC0000001; + func <= limit && nent < cpuid->nent; ++func) + do_cpuid_ent(&cpuid_entries[nent], func, 0, + &nent, cpuid->nent); + + r = -E2BIG; + if (nent >= cpuid->nent) + goto out_free; + } + do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent, cpuid->nent); -- cgit v1.1 From 8f74d8e16812d63639871b4e56409b08bdcb66fc Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Thu, 28 Apr 2011 07:08:36 +0900 Subject: KVM: MMU: Fix 64-bit paging breakage on x86_32 Fix regression introduced by commit e30d2a170506830d5eef5e9d7990c5aedf1b0a51 KVM: MMU: Optimize guest page table walk On x86_32, get_user() does not support 64-bit values and we fail to build KVM at the point of 64-bit paging. This patch fixes this by using get_user() twice for that condition. Signed-off-by: Takuya Yoshikawa Reported-by: Jan Kiszka Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 652d56c..52450a6 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -115,6 +115,20 @@ static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte) return access; } +static int FNAME(read_gpte)(pt_element_t *pte, pt_element_t __user *ptep_user) +{ +#if defined(CONFIG_X86_32) && (PTTYPE == 64) + u32 *p = (u32 *)pte; + u32 __user *p_user = (u32 __user *)ptep_user; + + if (unlikely(get_user(*p, p_user))) + return -EFAULT; + return get_user(*(p + 1), p_user + 1); +#else + return get_user(*pte, ptep_user); +#endif +} + /* * Fetch a guest pte for a guest virtual address */ @@ -185,7 +199,7 @@ walk: } ptep_user = (pt_element_t __user *)((void *)host_addr + offset); - if (unlikely(get_user(pte, ptep_user))) { + if (unlikely(FNAME(read_gpte)(&pte, ptep_user))) { present = false; break; } -- cgit v1.1 From ae8cc059550b2c2ec7a5e9650bb1be7b988a1208 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 24 Apr 2011 22:00:50 -0700 Subject: KVM: SVM: Make dump_vmcb static, reduce text dump_vmcb isn't used outside this module, make it static. Shrink text and object by ~1% by standardizing formats. $ size arch/x86/kvm/svm.o* text data bss dec hex filename 52910 580 10072 63562 f84a arch/x86/kvm/svm.o.new 53563 580 10072 64215 fad7 arch/x86/kvm/svm.o.old Signed-off-by: Joe Perches Acked-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 176 ++++++++++++++++++++++++++++------------------------- 1 file changed, 94 insertions(+), 82 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 9cff036..506e4fe 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3191,97 +3191,109 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_NPF] = pf_interception, }; -void dump_vmcb(struct kvm_vcpu *vcpu) +static void dump_vmcb(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); struct vmcb_control_area *control = &svm->vmcb->control; struct vmcb_save_area *save = &svm->vmcb->save; pr_err("VMCB Control Area:\n"); - pr_err("cr_read: %04x\n", control->intercept_cr & 0xffff); - pr_err("cr_write: %04x\n", control->intercept_cr >> 16); - pr_err("dr_read: %04x\n", control->intercept_dr & 0xffff); - pr_err("dr_write: %04x\n", control->intercept_dr >> 16); - pr_err("exceptions: %08x\n", control->intercept_exceptions); - pr_err("intercepts: %016llx\n", control->intercept); - pr_err("pause filter count: %d\n", control->pause_filter_count); - pr_err("iopm_base_pa: %016llx\n", control->iopm_base_pa); - pr_err("msrpm_base_pa: %016llx\n", control->msrpm_base_pa); - pr_err("tsc_offset: %016llx\n", control->tsc_offset); - pr_err("asid: %d\n", control->asid); - pr_err("tlb_ctl: %d\n", control->tlb_ctl); - pr_err("int_ctl: %08x\n", control->int_ctl); - pr_err("int_vector: %08x\n", control->int_vector); - pr_err("int_state: %08x\n", control->int_state); - pr_err("exit_code: %08x\n", control->exit_code); - pr_err("exit_info1: %016llx\n", control->exit_info_1); - pr_err("exit_info2: %016llx\n", control->exit_info_2); - pr_err("exit_int_info: %08x\n", control->exit_int_info); - pr_err("exit_int_info_err: %08x\n", control->exit_int_info_err); - pr_err("nested_ctl: %lld\n", control->nested_ctl); - pr_err("nested_cr3: %016llx\n", control->nested_cr3); - pr_err("event_inj: %08x\n", control->event_inj); - pr_err("event_inj_err: %08x\n", control->event_inj_err); - pr_err("lbr_ctl: %lld\n", control->lbr_ctl); - pr_err("next_rip: %016llx\n", control->next_rip); + pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff); + pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16); + pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff); + pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16); + pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions); + pr_err("%-20s%016llx\n", "intercepts:", control->intercept); + pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count); + pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa); + pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa); + pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset); + pr_err("%-20s%d\n", "asid:", control->asid); + pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl); + pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl); + pr_err("%-20s%08x\n", "int_vector:", control->int_vector); + pr_err("%-20s%08x\n", "int_state:", control->int_state); + pr_err("%-20s%08x\n", "exit_code:", control->exit_code); + pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1); + pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2); + pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info); + pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err); + pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl); + pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3); + pr_err("%-20s%08x\n", "event_inj:", control->event_inj); + pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err); + pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl); + pr_err("%-20s%016llx\n", "next_rip:", control->next_rip); pr_err("VMCB State Save Area:\n"); - pr_err("es: s: %04x a: %04x l: %08x b: %016llx\n", - save->es.selector, save->es.attrib, - save->es.limit, save->es.base); - pr_err("cs: s: %04x a: %04x l: %08x b: %016llx\n", - save->cs.selector, save->cs.attrib, - save->cs.limit, save->cs.base); - pr_err("ss: s: %04x a: %04x l: %08x b: %016llx\n", - save->ss.selector, save->ss.attrib, - save->ss.limit, save->ss.base); - pr_err("ds: s: %04x a: %04x l: %08x b: %016llx\n", - save->ds.selector, save->ds.attrib, - save->ds.limit, save->ds.base); - pr_err("fs: s: %04x a: %04x l: %08x b: %016llx\n", - save->fs.selector, save->fs.attrib, - save->fs.limit, save->fs.base); - pr_err("gs: s: %04x a: %04x l: %08x b: %016llx\n", - save->gs.selector, save->gs.attrib, - save->gs.limit, save->gs.base); - pr_err("gdtr: s: %04x a: %04x l: %08x b: %016llx\n", - save->gdtr.selector, save->gdtr.attrib, - save->gdtr.limit, save->gdtr.base); - pr_err("ldtr: s: %04x a: %04x l: %08x b: %016llx\n", - save->ldtr.selector, save->ldtr.attrib, - save->ldtr.limit, save->ldtr.base); - pr_err("idtr: s: %04x a: %04x l: %08x b: %016llx\n", - save->idtr.selector, save->idtr.attrib, - save->idtr.limit, save->idtr.base); - pr_err("tr: s: %04x a: %04x l: %08x b: %016llx\n", - save->tr.selector, save->tr.attrib, - save->tr.limit, save->tr.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "es:", + save->es.selector, save->es.attrib, + save->es.limit, save->es.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "cs:", + save->cs.selector, save->cs.attrib, + save->cs.limit, save->cs.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "ss:", + save->ss.selector, save->ss.attrib, + save->ss.limit, save->ss.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "ds:", + save->ds.selector, save->ds.attrib, + save->ds.limit, save->ds.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "fs:", + save->fs.selector, save->fs.attrib, + save->fs.limit, save->fs.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "gs:", + save->gs.selector, save->gs.attrib, + save->gs.limit, save->gs.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "gdtr:", + save->gdtr.selector, save->gdtr.attrib, + save->gdtr.limit, save->gdtr.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "ldtr:", + save->ldtr.selector, save->ldtr.attrib, + save->ldtr.limit, save->ldtr.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "idtr:", + save->idtr.selector, save->idtr.attrib, + save->idtr.limit, save->idtr.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "tr:", + save->tr.selector, save->tr.attrib, + save->tr.limit, save->tr.base); pr_err("cpl: %d efer: %016llx\n", save->cpl, save->efer); - pr_err("cr0: %016llx cr2: %016llx\n", - save->cr0, save->cr2); - pr_err("cr3: %016llx cr4: %016llx\n", - save->cr3, save->cr4); - pr_err("dr6: %016llx dr7: %016llx\n", - save->dr6, save->dr7); - pr_err("rip: %016llx rflags: %016llx\n", - save->rip, save->rflags); - pr_err("rsp: %016llx rax: %016llx\n", - save->rsp, save->rax); - pr_err("star: %016llx lstar: %016llx\n", - save->star, save->lstar); - pr_err("cstar: %016llx sfmask: %016llx\n", - save->cstar, save->sfmask); - pr_err("kernel_gs_base: %016llx sysenter_cs: %016llx\n", - save->kernel_gs_base, save->sysenter_cs); - pr_err("sysenter_esp: %016llx sysenter_eip: %016llx\n", - save->sysenter_esp, save->sysenter_eip); - pr_err("gpat: %016llx dbgctl: %016llx\n", - save->g_pat, save->dbgctl); - pr_err("br_from: %016llx br_to: %016llx\n", - save->br_from, save->br_to); - pr_err("excp_from: %016llx excp_to: %016llx\n", - save->last_excp_from, save->last_excp_to); - + pr_err("%-15s %016llx %-13s %016llx\n", + "cr0:", save->cr0, "cr2:", save->cr2); + pr_err("%-15s %016llx %-13s %016llx\n", + "cr3:", save->cr3, "cr4:", save->cr4); + pr_err("%-15s %016llx %-13s %016llx\n", + "dr6:", save->dr6, "dr7:", save->dr7); + pr_err("%-15s %016llx %-13s %016llx\n", + "rip:", save->rip, "rflags:", save->rflags); + pr_err("%-15s %016llx %-13s %016llx\n", + "rsp:", save->rsp, "rax:", save->rax); + pr_err("%-15s %016llx %-13s %016llx\n", + "star:", save->star, "lstar:", save->lstar); + pr_err("%-15s %016llx %-13s %016llx\n", + "cstar:", save->cstar, "sfmask:", save->sfmask); + pr_err("%-15s %016llx %-13s %016llx\n", + "kernel_gs_base:", save->kernel_gs_base, + "sysenter_cs:", save->sysenter_cs); + pr_err("%-15s %016llx %-13s %016llx\n", + "sysenter_esp:", save->sysenter_esp, + "sysenter_eip:", save->sysenter_eip); + pr_err("%-15s %016llx %-13s %016llx\n", + "gpat:", save->g_pat, "dbgctl:", save->dbgctl); + pr_err("%-15s %016llx %-13s %016llx\n", + "br_from:", save->br_from, "br_to:", save->br_to); + pr_err("%-15s %016llx %-13s %016llx\n", + "excp_from:", save->last_excp_from, + "excp_to:", save->last_excp_to); } static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) -- cgit v1.1 From 0a434bb2bf094f463ca3ca71ac42cea9e423048f Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 28 Apr 2011 15:59:33 +0300 Subject: KVM: VMX: Avoid reading %rip unnecessarily when handling exceptions Avoids a VMREAD. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3f6e9bf..139a5cb 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3170,7 +3170,6 @@ static int handle_exception(struct kvm_vcpu *vcpu) } error_code = 0; - rip = kvm_rip_read(vcpu); if (intr_info & INTR_INFO_DELIVER_CODE_MASK) error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); if (is_page_fault(intr_info)) { @@ -3217,6 +3216,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) vmx->vcpu.arch.event_exit_inst_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); kvm_run->exit_reason = KVM_EXIT_DEBUG; + rip = kvm_rip_read(vcpu); kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; kvm_run->debug.arch.exception = ex_no; break; -- cgit v1.1 From 1aa366163b8b69f660cf94fd5062fa44859e4318 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 27 Apr 2011 13:20:30 +0300 Subject: KVM: x86 emulator: consolidate segment accessors Instead of separate accessors for the segment selector and cached descriptor, use one accessor for both. This simplifies the code somewhat. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 13 ++-- arch/x86/kvm/emulate.c | 122 +++++++++++++++++++++---------------- arch/x86/kvm/x86.c | 41 +++---------- 3 files changed, 83 insertions(+), 93 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 28114f5..0049211 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -164,15 +164,10 @@ struct x86_emulate_ops { int size, unsigned short port, const void *val, unsigned int count); - bool (*get_cached_descriptor)(struct x86_emulate_ctxt *ctxt, - struct desc_struct *desc, u32 *base3, - int seg); - void (*set_cached_descriptor)(struct x86_emulate_ctxt *ctxt, - struct desc_struct *desc, u32 base3, - int seg); - u16 (*get_segment_selector)(struct x86_emulate_ctxt *ctxt, int seg); - void (*set_segment_selector)(struct x86_emulate_ctxt *ctxt, - u16 sel, int seg); + bool (*get_segment)(struct x86_emulate_ctxt *ctxt, u16 *selector, + struct desc_struct *desc, u32 *base3, int seg); + void (*set_segment)(struct x86_emulate_ctxt *ctxt, u16 selector, + struct desc_struct *desc, u32 base3, int seg); unsigned long (*get_cached_segment_base)(struct x86_emulate_ctxt *ctxt, int seg); void (*get_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 3624f20..59992484 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -553,6 +553,26 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt) return emulate_exception(ctxt, NM_VECTOR, 0, false); } +static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg) +{ + u16 selector; + struct desc_struct desc; + + ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg); + return selector; +} + +static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector, + unsigned seg) +{ + u16 dummy; + u32 base3; + struct desc_struct desc; + + ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg); + ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg); +} + static int __linearize(struct x86_emulate_ctxt *ctxt, struct segmented_address addr, unsigned size, bool write, bool fetch, @@ -563,6 +583,7 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, bool usable; ulong la; u32 lim; + u16 sel; unsigned cpl, rpl; la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea; @@ -574,8 +595,8 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, return emulate_gp(ctxt, 0); break; default: - usable = ctxt->ops->get_cached_descriptor(ctxt, &desc, NULL, - addr.seg); + usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL, + addr.seg); if (!usable) goto bad; /* code segment or read-only data segment */ @@ -598,7 +619,7 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, goto bad; } cpl = ctxt->ops->cpl(ctxt); - rpl = ctxt->ops->get_segment_selector(ctxt, addr.seg) & 3; + rpl = sel & 3; cpl = max(cpl, rpl); if (!(desc.type & 8)) { /* data segment */ @@ -1142,9 +1163,10 @@ static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, { if (selector & 1 << 2) { struct desc_struct desc; + u16 sel; + memset (dt, 0, sizeof *dt); - if (!ops->get_cached_descriptor(ctxt, &desc, NULL, - VCPU_SREG_LDTR)) + if (!ops->get_segment(ctxt, &sel, &desc, NULL, VCPU_SREG_LDTR)) return; dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ @@ -1305,8 +1327,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, return ret; } load: - ops->set_segment_selector(ctxt, selector, seg); - ops->set_cached_descriptor(ctxt, &seg_desc, 0, seg); + ops->set_segment(ctxt, selector, &seg_desc, 0, seg); return X86EMUL_CONTINUE; exception: emulate_exception(ctxt, err_vec, err_code, true); @@ -1464,7 +1485,7 @@ static int emulate_push_sreg(struct x86_emulate_ctxt *ctxt, { struct decode_cache *c = &ctxt->decode; - c->src.val = ops->get_segment_selector(ctxt, seg); + c->src.val = get_segment_selector(ctxt, seg); return em_push(ctxt); } @@ -1552,7 +1573,7 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt, ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC); - c->src.val = ops->get_segment_selector(ctxt, VCPU_SREG_CS); + c->src.val = get_segment_selector(ctxt, VCPU_SREG_CS); rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) return rc; @@ -1838,8 +1859,10 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, struct desc_struct *cs, struct desc_struct *ss) { + u16 selector; + memset(cs, 0, sizeof(struct desc_struct)); - ops->get_cached_descriptor(ctxt, cs, NULL, VCPU_SREG_CS); + ops->get_segment(ctxt, &selector, cs, NULL, VCPU_SREG_CS); memset(ss, 0, sizeof(struct desc_struct)); cs->l = 0; /* will be adjusted later */ @@ -1888,10 +1911,8 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) cs.d = 0; cs.l = 1; } - ops->set_cached_descriptor(ctxt, &cs, 0, VCPU_SREG_CS); - ops->set_segment_selector(ctxt, cs_sel, VCPU_SREG_CS); - ops->set_cached_descriptor(ctxt, &ss, 0, VCPU_SREG_SS); - ops->set_segment_selector(ctxt, ss_sel, VCPU_SREG_SS); + ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); + ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); c->regs[VCPU_REGS_RCX] = c->eip; if (efer & EFER_LMA) { @@ -1961,10 +1982,8 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) cs.l = 1; } - ops->set_cached_descriptor(ctxt, &cs, 0, VCPU_SREG_CS); - ops->set_segment_selector(ctxt, cs_sel, VCPU_SREG_CS); - ops->set_cached_descriptor(ctxt, &ss, 0, VCPU_SREG_SS); - ops->set_segment_selector(ctxt, ss_sel, VCPU_SREG_SS); + ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); + ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data); c->eip = msr_data; @@ -2018,10 +2037,8 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) cs_sel |= SELECTOR_RPL_MASK; ss_sel |= SELECTOR_RPL_MASK; - ops->set_cached_descriptor(ctxt, &cs, 0, VCPU_SREG_CS); - ops->set_segment_selector(ctxt, cs_sel, VCPU_SREG_CS); - ops->set_cached_descriptor(ctxt, &ss, 0, VCPU_SREG_SS); - ops->set_segment_selector(ctxt, ss_sel, VCPU_SREG_SS); + ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); + ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); c->eip = c->regs[VCPU_REGS_RDX]; c->regs[VCPU_REGS_RSP] = c->regs[VCPU_REGS_RCX]; @@ -2048,11 +2065,11 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, struct desc_struct tr_seg; u32 base3; int r; - u16 io_bitmap_ptr, perm, bit_idx = port & 0x7; + u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7; unsigned mask = (1 << len) - 1; unsigned long base; - ops->get_cached_descriptor(ctxt, &tr_seg, &base3, VCPU_SREG_TR); + ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR); if (!tr_seg.p) return false; if (desc_limit_scaled(&tr_seg) < 103) @@ -2107,11 +2124,11 @@ static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, tss->si = c->regs[VCPU_REGS_RSI]; tss->di = c->regs[VCPU_REGS_RDI]; - tss->es = ops->get_segment_selector(ctxt, VCPU_SREG_ES); - tss->cs = ops->get_segment_selector(ctxt, VCPU_SREG_CS); - tss->ss = ops->get_segment_selector(ctxt, VCPU_SREG_SS); - tss->ds = ops->get_segment_selector(ctxt, VCPU_SREG_DS); - tss->ldt = ops->get_segment_selector(ctxt, VCPU_SREG_LDTR); + tss->es = get_segment_selector(ctxt, VCPU_SREG_ES); + tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS); + tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS); + tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS); + tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR); } static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, @@ -2136,11 +2153,11 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, * SDM says that segment selectors are loaded before segment * descriptors */ - ops->set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR); - ops->set_segment_selector(ctxt, tss->es, VCPU_SREG_ES); - ops->set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS); - ops->set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS); - ops->set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); + set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR); + set_segment_selector(ctxt, tss->es, VCPU_SREG_ES); + set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS); + set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS); + set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); /* * Now load segment descriptors. If fault happenes at this stage @@ -2227,13 +2244,13 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, tss->esi = c->regs[VCPU_REGS_RSI]; tss->edi = c->regs[VCPU_REGS_RDI]; - tss->es = ops->get_segment_selector(ctxt, VCPU_SREG_ES); - tss->cs = ops->get_segment_selector(ctxt, VCPU_SREG_CS); - tss->ss = ops->get_segment_selector(ctxt, VCPU_SREG_SS); - tss->ds = ops->get_segment_selector(ctxt, VCPU_SREG_DS); - tss->fs = ops->get_segment_selector(ctxt, VCPU_SREG_FS); - tss->gs = ops->get_segment_selector(ctxt, VCPU_SREG_GS); - tss->ldt_selector = ops->get_segment_selector(ctxt, VCPU_SREG_LDTR); + tss->es = get_segment_selector(ctxt, VCPU_SREG_ES); + tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS); + tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS); + tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS); + tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS); + tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS); + tss->ldt_selector = get_segment_selector(ctxt, VCPU_SREG_LDTR); } static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, @@ -2260,13 +2277,13 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, * SDM says that segment selectors are loaded before segment * descriptors */ - ops->set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR); - ops->set_segment_selector(ctxt, tss->es, VCPU_SREG_ES); - ops->set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS); - ops->set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS); - ops->set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); - ops->set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS); - ops->set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS); + set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR); + set_segment_selector(ctxt, tss->es, VCPU_SREG_ES); + set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS); + set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS); + set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); + set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS); + set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS); /* * Now load segment descriptors. If fault happenes at this stage @@ -2348,7 +2365,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, { struct desc_struct curr_tss_desc, next_tss_desc; int ret; - u16 old_tss_sel = ops->get_segment_selector(ctxt, VCPU_SREG_TR); + u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR); ulong old_tss_base = ops->get_cached_segment_base(ctxt, VCPU_SREG_TR); u32 desc_limit; @@ -2411,8 +2428,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, } ops->set_cr(ctxt, 0, ops->get_cr(ctxt, 0) | X86_CR0_TS); - ops->set_cached_descriptor(ctxt, &next_tss_desc, 0, VCPU_SREG_TR); - ops->set_segment_selector(ctxt, tss_selector, VCPU_SREG_TR); + ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR); if (has_error_code) { struct decode_cache *c = &ctxt->decode; @@ -2503,7 +2519,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) ulong old_eip; int rc; - old_cs = ctxt->ops->get_segment_selector(ctxt, VCPU_SREG_CS); + old_cs = get_segment_selector(ctxt, VCPU_SREG_CS); old_eip = c->eip; memcpy(&sel, c->src.valptr + c->op_bytes, 2); @@ -3881,7 +3897,7 @@ special_insn: rc = emulate_ud(ctxt); goto done; } - c->dst.val = ops->get_segment_selector(ctxt, c->modrm_reg); + c->dst.val = get_segment_selector(ctxt, c->modrm_reg); break; case 0x8d: /* lea r16/r32, m */ c->dst.val = c->src.addr.mem.ea; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 22bc69c..77c9d86 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4304,13 +4304,14 @@ static unsigned long emulator_get_cached_segment_base( return get_segment_base(emul_to_vcpu(ctxt), seg); } -static bool emulator_get_cached_descriptor(struct x86_emulate_ctxt *ctxt, - struct desc_struct *desc, u32 *base3, - int seg) +static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector, + struct desc_struct *desc, u32 *base3, + int seg) { struct kvm_segment var; kvm_get_segment(emul_to_vcpu(ctxt), &var, seg); + *selector = var.selector; if (var.unusable) return false; @@ -4335,16 +4336,14 @@ static bool emulator_get_cached_descriptor(struct x86_emulate_ctxt *ctxt, return true; } -static void emulator_set_cached_descriptor(struct x86_emulate_ctxt *ctxt, - struct desc_struct *desc, u32 base3, - int seg) +static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector, + struct desc_struct *desc, u32 base3, + int seg) { struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); struct kvm_segment var; - /* needed to preserve selector */ - kvm_get_segment(vcpu, &var, seg); - + var.selector = selector; var.base = get_desc_base(desc); #ifdef CONFIG_X86_64 var.base |= ((u64)base3) << 32; @@ -4368,24 +4367,6 @@ static void emulator_set_cached_descriptor(struct x86_emulate_ctxt *ctxt, return; } -static u16 emulator_get_segment_selector(struct x86_emulate_ctxt *ctxt, int seg) -{ - struct kvm_segment kvm_seg; - - kvm_get_segment(emul_to_vcpu(ctxt), &kvm_seg, seg); - return kvm_seg.selector; -} - -static void emulator_set_segment_selector(struct x86_emulate_ctxt *ctxt, - u16 sel, int seg) -{ - struct kvm_segment kvm_seg; - - kvm_get_segment(emul_to_vcpu(ctxt), &kvm_seg, seg); - kvm_seg.selector = sel; - kvm_set_segment(emul_to_vcpu(ctxt), &kvm_seg, seg); -} - static int emulator_get_msr(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata) { @@ -4436,10 +4417,8 @@ static struct x86_emulate_ops emulate_ops = { .invlpg = emulator_invlpg, .pio_in_emulated = emulator_pio_in_emulated, .pio_out_emulated = emulator_pio_out_emulated, - .get_cached_descriptor = emulator_get_cached_descriptor, - .set_cached_descriptor = emulator_set_cached_descriptor, - .get_segment_selector = emulator_get_segment_selector, - .set_segment_selector = emulator_set_segment_selector, + .get_segment = emulator_get_segment, + .set_segment = emulator_set_segment, .get_cached_segment_base = emulator_get_cached_segment_base, .get_gdt = emulator_get_gdt, .get_idt = emulator_get_idt, -- cgit v1.1 From 2fb92db1ec08f3235c500e7f460eeb78092d844e Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 27 Apr 2011 19:42:18 +0300 Subject: KVM: VMX: Cache vmcs segment fields Since the emulator now checks segment limits and access rights, it generates a lot more accesses to the vmcs segment fields. Undo some of the performance hit by cacheing those fields in a read-only cache (the entire cache is invalidated on any write, or on guest exit). Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx.c | 102 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 93 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index afb0e69..d2ac8e2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -136,6 +136,7 @@ enum kvm_reg_ex { VCPU_EXREG_CR3, VCPU_EXREG_RFLAGS, VCPU_EXREG_CPL, + VCPU_EXREG_SEGMENTS, }; enum { diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 139a5cb..4c3fa0f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -162,6 +162,10 @@ struct vcpu_vmx { u32 ar; } tr, es, ds, fs, gs; } rmode; + struct { + u32 bitmask; /* 4 bits per segment (1 bit per field) */ + struct kvm_save_segment seg[8]; + } segment_cache; int vpid; bool emulation_required; @@ -174,6 +178,15 @@ struct vcpu_vmx { bool rdtscp_enabled; }; +enum segment_cache_field { + SEG_FIELD_SEL = 0, + SEG_FIELD_BASE = 1, + SEG_FIELD_LIMIT = 2, + SEG_FIELD_AR = 3, + + SEG_FIELD_NR = 4 +}; + static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) { return container_of(vcpu, struct vcpu_vmx, vcpu); @@ -646,6 +659,62 @@ static void vmcs_set_bits(unsigned long field, u32 mask) vmcs_writel(field, vmcs_readl(field) | mask); } +static void vmx_segment_cache_clear(struct vcpu_vmx *vmx) +{ + vmx->segment_cache.bitmask = 0; +} + +static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg, + unsigned field) +{ + bool ret; + u32 mask = 1 << (seg * SEG_FIELD_NR + field); + + if (!(vmx->vcpu.arch.regs_avail & (1 << VCPU_EXREG_SEGMENTS))) { + vmx->vcpu.arch.regs_avail |= (1 << VCPU_EXREG_SEGMENTS); + vmx->segment_cache.bitmask = 0; + } + ret = vmx->segment_cache.bitmask & mask; + vmx->segment_cache.bitmask |= mask; + return ret; +} + +static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg) +{ + u16 *p = &vmx->segment_cache.seg[seg].selector; + + if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL)) + *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector); + return *p; +} + +static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg) +{ + ulong *p = &vmx->segment_cache.seg[seg].base; + + if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE)) + *p = vmcs_readl(kvm_vmx_segment_fields[seg].base); + return *p; +} + +static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg) +{ + u32 *p = &vmx->segment_cache.seg[seg].limit; + + if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT)) + *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit); + return *p; +} + +static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg) +{ + u32 *p = &vmx->segment_cache.seg[seg].ar; + + if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR)) + *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes); + return *p; +} + static void update_exception_bitmap(struct kvm_vcpu *vcpu) { u32 eb; @@ -1271,9 +1340,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) break; #ifdef CONFIG_X86_64 case MSR_FS_BASE: + vmx_segment_cache_clear(vmx); vmcs_writel(GUEST_FS_BASE, data); break; case MSR_GS_BASE: + vmx_segment_cache_clear(vmx); vmcs_writel(GUEST_GS_BASE, data); break; case MSR_KERNEL_GS_BASE: @@ -1717,6 +1788,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu) vmx->emulation_required = 1; vmx->rmode.vm86_active = 0; + vmx_segment_cache_clear(vmx); + vmcs_write16(GUEST_TR_SELECTOR, vmx->rmode.tr.selector); vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base); vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit); @@ -1740,6 +1813,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu) fix_pmode_dataseg(VCPU_SREG_GS, &vmx->rmode.gs); fix_pmode_dataseg(VCPU_SREG_FS, &vmx->rmode.fs); + vmx_segment_cache_clear(vmx); + vmcs_write16(GUEST_SS_SELECTOR, 0); vmcs_write32(GUEST_SS_AR_BYTES, 0x93); @@ -1803,6 +1878,8 @@ static void enter_rmode(struct kvm_vcpu *vcpu) vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); } + vmx_segment_cache_clear(vmx); + vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR); vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE); vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); @@ -1879,6 +1956,8 @@ static void enter_lmode(struct kvm_vcpu *vcpu) { u32 guest_tr_ar; + vmx_segment_cache_clear(to_vmx(vcpu)); + guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) { printk(KERN_DEBUG "%s: tss fixup for long mode. \n", @@ -2082,7 +2161,6 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) { struct vcpu_vmx *vmx = to_vmx(vcpu); - struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; struct kvm_save_segment *save; u32 ar; @@ -2104,13 +2182,13 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, var->limit = save->limit; ar = save->ar; if (seg == VCPU_SREG_TR - || var->selector == vmcs_read16(sf->selector)) + || var->selector == vmx_read_guest_seg_selector(vmx, seg)) goto use_saved_rmode_seg; } - var->base = vmcs_readl(sf->base); - var->limit = vmcs_read32(sf->limit); - var->selector = vmcs_read16(sf->selector); - ar = vmcs_read32(sf->ar_bytes); + var->base = vmx_read_guest_seg_base(vmx, seg); + var->limit = vmx_read_guest_seg_limit(vmx, seg); + var->selector = vmx_read_guest_seg_selector(vmx, seg); + ar = vmx_read_guest_seg_ar(vmx, seg); use_saved_rmode_seg: if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) ar = 0; @@ -2127,14 +2205,13 @@ use_saved_rmode_seg: static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) { - struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; struct kvm_segment s; if (to_vmx(vcpu)->rmode.vm86_active) { vmx_get_segment(vcpu, &s, seg); return s.base; } - return vmcs_readl(sf->base); + return vmx_read_guest_seg_base(to_vmx(vcpu), seg); } static int __vmx_get_cpl(struct kvm_vcpu *vcpu) @@ -2146,7 +2223,7 @@ static int __vmx_get_cpl(struct kvm_vcpu *vcpu) && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */ return 3; - return vmcs_read16(GUEST_CS_SELECTOR) & 3; + return vmx_read_guest_seg_selector(to_vmx(vcpu), VCPU_SREG_CS) & 3; } static int vmx_get_cpl(struct kvm_vcpu *vcpu) @@ -2188,6 +2265,8 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; u32 ar; + vmx_segment_cache_clear(vmx); + if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { vmcs_write16(sf->selector, var->selector); vmx->rmode.tr.selector = var->selector; @@ -2229,7 +2308,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) { - u32 ar = vmcs_read32(GUEST_CS_AR_BYTES); + u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS); *db = (ar >> 14) & 1; *l = (ar >> 13) & 1; @@ -2816,6 +2895,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) if (ret != 0) goto out; + vmx_segment_cache_clear(vmx); + seg_setup(VCPU_SREG_CS); /* * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode @@ -4188,6 +4269,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | (1 << VCPU_EXREG_RFLAGS) | (1 << VCPU_EXREG_CPL) | (1 << VCPU_EXREG_PDPTR) + | (1 << VCPU_EXREG_SEGMENTS) | (1 << VCPU_EXREG_CR3)); vcpu->arch.regs_dirty = 0; -- cgit v1.1 From 12cb814f3bb35736420cc6bfc9fed7b6a9d3a828 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Sat, 7 May 2011 16:31:36 +0900 Subject: KVM: MMU: Clean up gpte reading with copy_from_user() When we optimized walk_addr_generic() by not using the generic guest memory reader, we replaced copy_from_user() with get_user(): commit e30d2a170506830d5eef5e9d7990c5aedf1b0a51 KVM: MMU: Optimize guest page table walk commit 15e2ac9a43d4d7d08088e404fddf2533a8e7d52e KVM: MMU: Fix 64-bit paging breakage on x86_32 But as Andi pointed out later, copy_from_user() does the same as get_user() as long as we give a constant size to it. So we use copy_from_user() to clean up the code. The only, noticeable, regression introduced by this is 64-bit gpte reading on x86_32 hosts needed for PAE guests. But this can be mitigated by implementing 8-byte get_user() for x86_32, if needed. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 52450a6..88ca456 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -115,20 +115,6 @@ static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte) return access; } -static int FNAME(read_gpte)(pt_element_t *pte, pt_element_t __user *ptep_user) -{ -#if defined(CONFIG_X86_32) && (PTTYPE == 64) - u32 *p = (u32 *)pte; - u32 __user *p_user = (u32 __user *)ptep_user; - - if (unlikely(get_user(*p, p_user))) - return -EFAULT; - return get_user(*(p + 1), p_user + 1); -#else - return get_user(*pte, ptep_user); -#endif -} - /* * Fetch a guest pte for a guest virtual address */ @@ -199,7 +185,7 @@ walk: } ptep_user = (pt_element_t __user *)((void *)host_addr + offset); - if (unlikely(FNAME(read_gpte)(&pte, ptep_user))) { + if (unlikely(copy_from_user(&pte, ptep_user, sizeof(pte)))) { present = false; break; } -- cgit v1.1 From fa3d315a4ce2c0891cdde262562e710d95fba19e Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Sat, 7 May 2011 16:35:38 +0900 Subject: KVM: Validate userspace_addr of memslot when registered This way, we can avoid checking the user space address many times when we read the guest memory. Although we can do the same for write if we check which slots are writable, we do not care write now: reading the guest memory happens more often than writing. [avi: change VERIFY_READ to VERIFY_WRITE] Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 88ca456..e3f8141 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -185,7 +185,7 @@ walk: } ptep_user = (pt_element_t __user *)((void *)host_addr + offset); - if (unlikely(copy_from_user(&pte, ptep_user, sizeof(pte)))) { + if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte)))) { present = false; break; } -- cgit v1.1 From c1ed6dea8113597cfa00911faa814d9dbb586932 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Mon, 2 May 2011 02:23:13 +0900 Subject: KVM: x86 emulator: Remove unused arg from seg_override() In addition, one comma at the end of a statement is replaced with a semicolon. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 59992484..3d0e5ac 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -500,7 +500,6 @@ static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, } static unsigned seg_override(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, struct decode_cache *c) { if (!c->has_seg_override) @@ -3527,7 +3526,7 @@ done_prefixes: if (!c->has_seg_override) set_seg_override(c, VCPU_SREG_DS); - memop.addr.mem.seg = seg_override(ctxt, ops, c); + memop.addr.mem.seg = seg_override(ctxt, c); if (memop.type == OP_MEM && c->ad_bytes != 8) memop.addr.mem.ea = (u32)memop.addr.mem.ea; @@ -3587,7 +3586,7 @@ done_prefixes: c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; c->src.addr.mem.ea = register_address(c, c->regs[VCPU_REGS_RSI]); - c->src.addr.mem.seg = seg_override(ctxt, ops, c), + c->src.addr.mem.seg = seg_override(ctxt, c); c->src.val = 0; break; case SrcImmFAddr: @@ -4103,7 +4102,7 @@ writeback: c->dst.type = saved_dst_type; if ((c->d & SrcMask) == SrcSI) - string_addr_inc(ctxt, seg_override(ctxt, ops, c), + string_addr_inc(ctxt, seg_override(ctxt, c), VCPU_REGS_RSI, &c->src); if ((c->d & DstMask) == DstDI) -- cgit v1.1 From 509cf9fe11dad85f96944095ed63b2caa85cdfc9 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Mon, 2 May 2011 02:25:07 +0900 Subject: KVM: x86 emulator: Remove unused arg from read_descriptor() Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 3d0e5ac..c26243f 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -729,7 +729,6 @@ static void *decode_register(u8 modrm_reg, unsigned long *regs, } static int read_descriptor(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, struct segmented_address addr, u16 *size, unsigned long *address, int op_bytes) { @@ -2720,7 +2719,7 @@ static int em_lgdt(struct x86_emulate_ctxt *ctxt) struct desc_ptr desc_ptr; int rc; - rc = read_descriptor(ctxt, ctxt->ops, c->src.addr.mem, + rc = read_descriptor(ctxt, c->src.addr.mem, &desc_ptr.size, &desc_ptr.address, c->op_bytes); if (rc != X86EMUL_CONTINUE) @@ -2749,9 +2748,8 @@ static int em_lidt(struct x86_emulate_ctxt *ctxt) struct desc_ptr desc_ptr; int rc; - rc = read_descriptor(ctxt, ctxt->ops, c->src.addr.mem, - &desc_ptr.size, - &desc_ptr.address, + rc = read_descriptor(ctxt, c->src.addr.mem, + &desc_ptr.size, &desc_ptr.address, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; -- cgit v1.1 From adddcecf9222aa32938480cc1d03de629fab2a86 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Mon, 2 May 2011 02:26:23 +0900 Subject: KVM: x86 emulator: Remove unused arg from writeback() Remove inline at this chance. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index c26243f..d4f4375 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1351,8 +1351,7 @@ static void write_register_operand(struct operand *op) } } -static inline int writeback(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int writeback(struct x86_emulate_ctxt *ctxt) { int rc; struct decode_cache *c = &ctxt->decode; @@ -4089,7 +4088,7 @@ special_insn: goto done; writeback: - rc = writeback(ctxt, ops); + rc = writeback(ctxt); if (rc != X86EMUL_CONTINUE) goto done; -- cgit v1.1 From 3b9be3bf2e4d45828f84ba615283a53d11ebf470 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Mon, 2 May 2011 02:27:55 +0900 Subject: KVM: x86 emulator: Remove unused arg from emulate_pop() The opt of emulate_grp1a() is also removed. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index d4f4375..569e57d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1402,7 +1402,6 @@ static int em_push(struct x86_emulate_ctxt *ctxt) } static int emulate_pop(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, void *dest, int len) { struct decode_cache *c = &ctxt->decode; @@ -1423,7 +1422,7 @@ static int em_pop(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; - return emulate_pop(ctxt, ctxt->ops, &c->dst.val, c->op_bytes); + return emulate_pop(ctxt, &c->dst.val, c->op_bytes); } static int emulate_popf(struct x86_emulate_ctxt *ctxt, @@ -1435,7 +1434,7 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; int cpl = ops->cpl(ctxt); - rc = emulate_pop(ctxt, ops, &val, len); + rc = emulate_pop(ctxt, &val, len); if (rc != X86EMUL_CONTINUE) return rc; @@ -1494,7 +1493,7 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, unsigned long selector; int rc; - rc = emulate_pop(ctxt, ops, &selector, c->op_bytes); + rc = emulate_pop(ctxt, &selector, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; @@ -1544,7 +1543,7 @@ static int em_popa(struct x86_emulate_ctxt *ctxt) --reg; } - rc = emulate_pop(ctxt, ctxt->ops, &c->regs[reg], c->op_bytes); + rc = emulate_pop(ctxt, &c->regs[reg], c->op_bytes); if (rc != X86EMUL_CONTINUE) break; --reg; @@ -1633,7 +1632,7 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt, /* TODO: Add stack limit check */ - rc = emulate_pop(ctxt, ops, &temp_eip, c->op_bytes); + rc = emulate_pop(ctxt, &temp_eip, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; @@ -1641,12 +1640,12 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt, if (temp_eip & ~0xffff) return emulate_gp(ctxt, 0); - rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); + rc = emulate_pop(ctxt, &cs, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; - rc = emulate_pop(ctxt, ops, &temp_eflags, c->op_bytes); + rc = emulate_pop(ctxt, &temp_eflags, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; @@ -1688,12 +1687,11 @@ static inline int emulate_iret(struct x86_emulate_ctxt *ctxt, } } -static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; - return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes); + return emulate_pop(ctxt, &c->dst.val, c->dst.bytes); } static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) @@ -1822,12 +1820,12 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, int rc; unsigned long cs; - rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes); + rc = emulate_pop(ctxt, &c->eip, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; if (c->op_bytes == 4) c->eip = (u32)c->eip; - rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); + rc = emulate_pop(ctxt, &cs, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS); @@ -2543,7 +2541,7 @@ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) c->dst.type = OP_REG; c->dst.addr.reg = &c->eip; c->dst.bytes = c->op_bytes; - rc = emulate_pop(ctxt, ctxt->ops, &c->dst.val, c->op_bytes); + rc = emulate_pop(ctxt, &c->dst.val, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.val); @@ -3918,7 +3916,7 @@ special_insn: break; } case 0x8f: /* pop (sole member of Grp1a) */ - rc = emulate_grp1a(ctxt, ops); + rc = emulate_grp1a(ctxt); break; case 0x90 ... 0x97: /* nop / xchg reg, rax */ if (c->dst.addr.reg == &c->regs[VCPU_REGS_RAX]) -- cgit v1.1 From 51187683cb11b959535d32eb91b673c6a9a03e88 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Mon, 2 May 2011 02:29:17 +0900 Subject: KVM: x86 emulator: Rename emulate_grpX() to em_grpX() The prototypes are changed appropriately. We also replaces "goto grp45;" with simple em_grp45() call. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 569e57d..d9ebf69 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1687,14 +1687,14 @@ static inline int emulate_iret(struct x86_emulate_ctxt *ctxt, } } -static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt) +static int em_grp1a(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; return emulate_pop(ctxt, &c->dst.val, c->dst.bytes); } -static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) +static int em_grp2(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; switch (c->modrm_reg) { @@ -1721,10 +1721,10 @@ static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) emulate_2op_SrcB("sar", c->src, c->dst, ctxt->eflags); break; } + return X86EMUL_CONTINUE; } -static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int em_grp3(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; unsigned long *rax = &c->regs[VCPU_REGS_RAX]; @@ -1763,7 +1763,7 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, return X86EMUL_CONTINUE; } -static int emulate_grp45(struct x86_emulate_ctxt *ctxt) +static int em_grp45(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; int rc = X86EMUL_CONTINUE; @@ -1793,8 +1793,7 @@ static int emulate_grp45(struct x86_emulate_ctxt *ctxt) return rc; } -static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int em_grp9(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; u64 old = c->dst.orig_val64; @@ -3916,7 +3915,7 @@ special_insn: break; } case 0x8f: /* pop (sole member of Grp1a) */ - rc = emulate_grp1a(ctxt); + rc = em_grp1a(ctxt); break; case 0x90 ... 0x97: /* nop / xchg reg, rax */ if (c->dst.addr.reg == &c->regs[VCPU_REGS_RAX]) @@ -3932,7 +3931,7 @@ special_insn: case 0xa8 ... 0xa9: /* test ax, imm */ goto test; case 0xc0 ... 0xc1: - emulate_grp2(ctxt); + rc = em_grp2(ctxt); break; case 0xc3: /* ret */ c->dst.type = OP_REG; @@ -3967,11 +3966,11 @@ special_insn: rc = emulate_iret(ctxt, ops); break; case 0xd0 ... 0xd1: /* Grp2 */ - emulate_grp2(ctxt); + rc = em_grp2(ctxt); break; case 0xd2 ... 0xd3: /* Grp2 */ c->src.val = c->regs[VCPU_REGS_RCX]; - emulate_grp2(ctxt); + rc = em_grp2(ctxt); break; case 0xe0 ... 0xe2: /* loop/loopz/loopnz */ register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1); @@ -4040,7 +4039,7 @@ special_insn: ctxt->eflags ^= EFLG_CF; break; case 0xf6 ... 0xf7: /* Grp3 */ - rc = emulate_grp3(ctxt, ops); + rc = em_grp3(ctxt); break; case 0xf8: /* clc */ ctxt->eflags &= ~EFLG_CF; @@ -4071,13 +4070,13 @@ special_insn: ctxt->eflags |= EFLG_DF; break; case 0xfe: /* Grp4 */ - grp45: - rc = emulate_grp45(ctxt); + rc = em_grp45(ctxt); break; case 0xff: /* Grp5 */ if (c->modrm_reg == 5) goto jump_far; - goto grp45; + rc = em_grp45(ctxt); + break; default: goto cannot_emulate; } @@ -4344,7 +4343,7 @@ twobyte_insn: (u64) c->src.val; break; case 0xc7: /* Grp9 (cmpxchg8b) */ - rc = emulate_grp9(ctxt, ops); + rc = em_grp9(ctxt); break; default: goto cannot_emulate; -- cgit v1.1 From d2f62766d5778bbaf80d4feb90a23c7edc371a54 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Mon, 2 May 2011 02:30:48 +0900 Subject: KVM: x86 emulator: Make jmp far emulation into a separate function We introduce em_jmp_far(). We also call this from em_grp45() to stop treating modrm_reg == 5 case separately in the group 5 emulation. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index d9ebf69..d6e2477 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1687,6 +1687,23 @@ static inline int emulate_iret(struct x86_emulate_ctxt *ctxt, } } +static int em_jmp_far(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + int rc; + unsigned short sel; + + memcpy(&sel, c->src.valptr + c->op_bytes, 2); + + rc = load_segment_descriptor(ctxt, ctxt->ops, sel, VCPU_SREG_CS); + if (rc != X86EMUL_CONTINUE) + return rc; + + c->eip = 0; + memcpy(&c->eip, c->src.valptr, c->op_bytes); + return X86EMUL_CONTINUE; +} + static int em_grp1a(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; @@ -1786,6 +1803,9 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt) case 4: /* jmp abs */ c->eip = c->src.val; break; + case 5: /* jmp far */ + rc = em_jmp_far(ctxt); + break; case 6: /* push */ rc = em_push(ctxt); break; @@ -3997,19 +4017,9 @@ special_insn: } case 0xe9: /* jmp rel */ goto jmp; - case 0xea: { /* jmp far */ - unsigned short sel; - jump_far: - memcpy(&sel, c->src.valptr + c->op_bytes, 2); - - rc = load_segment_descriptor(ctxt, ops, sel, VCPU_SREG_CS); - if (rc != X86EMUL_CONTINUE) - goto done; - - c->eip = 0; - memcpy(&c->eip, c->src.valptr, c->op_bytes); + case 0xea: /* jmp far */ + rc = em_jmp_far(ctxt); break; - } case 0xeb: jmp: /* jmp rel short */ jmp_rel(c, c->src.val); @@ -4073,8 +4083,6 @@ special_insn: rc = em_grp45(ctxt); break; case 0xff: /* Grp5 */ - if (c->modrm_reg == 5) - goto jump_far; rc = em_grp45(ctxt); break; default: -- cgit v1.1 From c8cfbb555eb3632bf3dcbe1a591c1f4d0c28681c Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Sun, 1 May 2011 14:33:07 +0900 Subject: KVM: MMU: Use ptep_user for cmpxchg_gpte() The address of the gpte was already calculated and stored in ptep_user before entering cmpxchg_gpte(). This patch makes cmpxchg_gpte() to use that to make it clear that we are using the same address during walk_addr_generic(). Note that the unlikely annotations are used to show that the conditions are something unusual rather than for performance. Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/paging_tmpl.h | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index e3f8141..6c4dc01 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -79,21 +79,19 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl) } static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, - gfn_t table_gfn, unsigned index, - pt_element_t orig_pte, pt_element_t new_pte) + pt_element_t __user *ptep_user, unsigned index, + pt_element_t orig_pte, pt_element_t new_pte) { + int npages; pt_element_t ret; pt_element_t *table; struct page *page; - gpa_t gpa; - gpa = mmu->translate_gpa(vcpu, table_gfn << PAGE_SHIFT, - PFERR_USER_MASK|PFERR_WRITE_MASK); - if (gpa == UNMAPPED_GVA) + npages = get_user_pages_fast((unsigned long)ptep_user, 1, 1, &page); + /* Check if the user is doing something meaningless. */ + if (unlikely(npages != 1)) return -EFAULT; - page = gfn_to_page(vcpu->kvm, gpa_to_gfn(gpa)); - table = kmap_atomic(page, KM_USER0); ret = CMPXCHG(&table[index], orig_pte, new_pte); kunmap_atomic(table, KM_USER0); @@ -220,9 +218,9 @@ walk: int ret; trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte)); - ret = FNAME(cmpxchg_gpte)(vcpu, mmu, table_gfn, - index, pte, pte|PT_ACCESSED_MASK); - if (ret < 0) { + ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, + pte, pte|PT_ACCESSED_MASK); + if (unlikely(ret < 0)) { present = false; break; } else if (ret) @@ -279,9 +277,9 @@ walk: int ret; trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); - ret = FNAME(cmpxchg_gpte)(vcpu, mmu, table_gfn, index, pte, - pte|PT_DIRTY_MASK); - if (ret < 0) { + ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, + pte, pte|PT_DIRTY_MASK); + if (unlikely(ret < 0)) { present = false; goto error; } else if (ret) -- cgit v1.1