Skip to content

Commit 1cb3f3a

Browse files
Xiao Guangrongavikivity
authored andcommitted
KVM: x86: retry non-page-table writing instructions
If the emulation is caused by #PF and it is non-page_table writing instruction, it means the VM-EXIT is caused by shadow page protected, we can zap the shadow page and retry this instruction directly The idea is from Avi Signed-off-by: Xiao Guangrong <[email protected]> Signed-off-by: Avi Kivity <[email protected]>
1 parent d5ae7ce commit 1cb3f3a

File tree

5 files changed

+77
-6
lines changed

5 files changed

+77
-6
lines changed

arch/x86/include/asm/kvm_emulate.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,7 @@ enum x86_intercept {
364364
#endif
365365

366366
int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len);
367+
bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt);
367368
#define EMULATION_FAILED -1
368369
#define EMULATION_OK 0
369370
#define EMULATION_RESTART 1

arch/x86/include/asm/kvm_host.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -444,6 +444,9 @@ struct kvm_vcpu_arch {
444444

445445
cpumask_var_t wbinvd_dirty_mask;
446446

447+
unsigned long last_retry_eip;
448+
unsigned long last_retry_addr;
449+
447450
struct {
448451
bool halted;
449452
gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)];
@@ -692,6 +695,7 @@ enum emulation_result {
692695
#define EMULTYPE_NO_DECODE (1 << 0)
693696
#define EMULTYPE_TRAP_UD (1 << 1)
694697
#define EMULTYPE_SKIP (1 << 2)
698+
#define EMULTYPE_RETRY (1 << 3)
695699
int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
696700
int emulation_type, void *insn, int insn_len);
697701

@@ -756,6 +760,7 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);
756760
void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
757761
const u8 *new, int bytes,
758762
bool guest_initiated);
763+
int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
759764
int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
760765
void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
761766
int kvm_mmu_load(struct kvm_vcpu *vcpu);

arch/x86/kvm/emulate.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3702,6 +3702,11 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
37023702
return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
37033703
}
37043704

3705+
bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt)
3706+
{
3707+
return ctxt->d & PageTable;
3708+
}
3709+
37053710
static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
37063711
{
37073712
/* The second termination condition only applies for REPE

arch/x86/kvm/mmu.c

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1998,7 +1998,7 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
19981998
kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages;
19991999
}
20002000

2001-
static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
2001+
int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
20022002
{
20032003
struct kvm_mmu_page *sp;
20042004
struct hlist_node *node;
@@ -2007,16 +2007,19 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
20072007

20082008
pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
20092009
r = 0;
2010-
2010+
spin_lock(&kvm->mmu_lock);
20112011
for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) {
20122012
pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
20132013
sp->role.word);
20142014
r = 1;
20152015
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
20162016
}
20172017
kvm_mmu_commit_zap_page(kvm, &invalid_list);
2018+
spin_unlock(&kvm->mmu_lock);
2019+
20182020
return r;
20192021
}
2022+
EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page);
20202023

20212024
static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
20222025
{
@@ -3698,9 +3701,8 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
36983701

36993702
gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
37003703

3701-
spin_lock(&vcpu->kvm->mmu_lock);
37023704
r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
3703-
spin_unlock(&vcpu->kvm->mmu_lock);
3705+
37043706
return r;
37053707
}
37063708
EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt);
@@ -3721,10 +3723,18 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
37213723
kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
37223724
}
37233725

3726+
static bool is_mmio_page_fault(struct kvm_vcpu *vcpu, gva_t addr)
3727+
{
3728+
if (vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu))
3729+
return vcpu_match_mmio_gpa(vcpu, addr);
3730+
3731+
return vcpu_match_mmio_gva(vcpu, addr);
3732+
}
3733+
37243734
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code,
37253735
void *insn, int insn_len)
37263736
{
3727-
int r;
3737+
int r, emulation_type = EMULTYPE_RETRY;
37283738
enum emulation_result er;
37293739

37303740
r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false);
@@ -3736,7 +3746,10 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code,
37363746
goto out;
37373747
}
37383748

3739-
er = x86_emulate_instruction(vcpu, cr2, 0, insn, insn_len);
3749+
if (is_mmio_page_fault(vcpu, cr2))
3750+
emulation_type = 0;
3751+
3752+
er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len);
37403753

37413754
switch (er) {
37423755
case EMULATE_DONE:

arch/x86/kvm/x86.c

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4836,6 +4836,50 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
48364836
return false;
48374837
}
48384838

4839+
static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
4840+
unsigned long cr2, int emulation_type)
4841+
{
4842+
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4843+
unsigned long last_retry_eip, last_retry_addr, gpa = cr2;
4844+
4845+
last_retry_eip = vcpu->arch.last_retry_eip;
4846+
last_retry_addr = vcpu->arch.last_retry_addr;
4847+
4848+
/*
4849+
* If the emulation is caused by #PF and it is non-page_table
4850+
* writing instruction, it means the VM-EXIT is caused by shadow
4851+
* page protected, we can zap the shadow page and retry this
4852+
* instruction directly.
4853+
*
4854+
* Note: if the guest uses a non-page-table modifying instruction
4855+
* on the PDE that points to the instruction, then we will unmap
4856+
* the instruction and go to an infinite loop. So, we cache the
4857+
* last retried eip and the last fault address, if we meet the eip
4858+
* and the address again, we can break out of the potential infinite
4859+
* loop.
4860+
*/
4861+
vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
4862+
4863+
if (!(emulation_type & EMULTYPE_RETRY))
4864+
return false;
4865+
4866+
if (x86_page_table_writing_insn(ctxt))
4867+
return false;
4868+
4869+
if (ctxt->eip == last_retry_eip && last_retry_addr == cr2)
4870+
return false;
4871+
4872+
vcpu->arch.last_retry_eip = ctxt->eip;
4873+
vcpu->arch.last_retry_addr = cr2;
4874+
4875+
if (!vcpu->arch.mmu.direct_map)
4876+
gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
4877+
4878+
kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
4879+
4880+
return true;
4881+
}
4882+
48394883
int x86_emulate_instruction(struct kvm_vcpu *vcpu,
48404884
unsigned long cr2,
48414885
int emulation_type,
@@ -4877,6 +4921,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
48774921
return EMULATE_DONE;
48784922
}
48794923

4924+
if (retry_instruction(ctxt, cr2, emulation_type))
4925+
return EMULATE_DONE;
4926+
48804927
/* this is needed for vmware backdoor interface to work since it
48814928
changes registers values during IO operation */
48824929
if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {

0 commit comments

Comments
 (0)