Skip to content

Commit a30f47c

Browse files
Xiao Guangrongavikivity
authored andcommitted
KVM: MMU: improve write flooding detected
Detecting write-flooding does not work well, when we handle page written, if the last speculative spte is not accessed, we treat the page is write-flooding, however, we can speculative spte on many path, such as pte prefetch, page synced, that means the last speculative spte may be not point to the written page and the written page can be accessed via other sptes, so depends on the Accessed bit of the last speculative spte is not enough Instead of detected page accessed, we can detect whether the spte is accessed after it is written, if the spte is not accessed but it is written frequently, we treat is not a page table or it not used for a long time Signed-off-by: Xiao Guangrong <[email protected]> Signed-off-by: Avi Kivity <[email protected]>
1 parent 5d9ca30 commit a30f47c

File tree

3 files changed

+32
-48
lines changed

3 files changed

+32
-48
lines changed

arch/x86/include/asm/kvm_host.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,8 @@ struct kvm_mmu_page {
239239
int clear_spte_count;
240240
#endif
241241

242+
int write_flooding_count;
243+
242244
struct rcu_head rcu;
243245
};
244246

@@ -353,10 +355,6 @@ struct kvm_vcpu_arch {
353355
struct kvm_mmu_memory_cache mmu_page_cache;
354356
struct kvm_mmu_memory_cache mmu_page_header_cache;
355357

356-
gfn_t last_pt_write_gfn;
357-
int last_pt_write_count;
358-
u64 *last_pte_updated;
359-
360358
struct fpu guest_fpu;
361359
u64 xcr0;
362360

arch/x86/kvm/mmu.c

Lines changed: 25 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1653,6 +1653,18 @@ static void init_shadow_page_table(struct kvm_mmu_page *sp)
16531653
sp->spt[i] = 0ull;
16541654
}
16551655

1656+
static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp)
1657+
{
1658+
sp->write_flooding_count = 0;
1659+
}
1660+
1661+
static void clear_sp_write_flooding_count(u64 *spte)
1662+
{
1663+
struct kvm_mmu_page *sp = page_header(__pa(spte));
1664+
1665+
__clear_sp_write_flooding_count(sp);
1666+
}
1667+
16561668
static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
16571669
gfn_t gfn,
16581670
gva_t gaddr,
@@ -1696,6 +1708,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
16961708
} else if (sp->unsync)
16971709
kvm_mmu_mark_parents_unsync(sp);
16981710

1711+
__clear_sp_write_flooding_count(sp);
16991712
trace_kvm_mmu_get_page(sp, false);
17001713
return sp;
17011714
}
@@ -1848,15 +1861,6 @@ static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte)
18481861
mmu_page_remove_parent_pte(sp, parent_pte);
18491862
}
18501863

1851-
static void kvm_mmu_reset_last_pte_updated(struct kvm *kvm)
1852-
{
1853-
int i;
1854-
struct kvm_vcpu *vcpu;
1855-
1856-
kvm_for_each_vcpu(i, vcpu, kvm)
1857-
vcpu->arch.last_pte_updated = NULL;
1858-
}
1859-
18601864
static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
18611865
{
18621866
u64 *parent_pte;
@@ -1916,7 +1920,6 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
19161920
}
19171921

19181922
sp->role.invalid = 1;
1919-
kvm_mmu_reset_last_pte_updated(kvm);
19201923
return ret;
19211924
}
19221925

@@ -2361,8 +2364,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
23612364
}
23622365
}
23632366
kvm_release_pfn_clean(pfn);
2364-
if (speculative)
2365-
vcpu->arch.last_pte_updated = sptep;
23662367
}
23672368

23682369
static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
@@ -3523,13 +3524,6 @@ static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page,
35233524
kvm_mmu_flush_tlb(vcpu);
35243525
}
35253526

3526-
static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu)
3527-
{
3528-
u64 *spte = vcpu->arch.last_pte_updated;
3529-
3530-
return !!(spte && (*spte & shadow_accessed_mask));
3531-
}
3532-
35333527
static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
35343528
const u8 *new, int *bytes)
35353529
{
@@ -3570,22 +3564,16 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
35703564
* If we're seeing too many writes to a page, it may no longer be a page table,
35713565
* or we may be forking, in which case it is better to unmap the page.
35723566
*/
3573-
static bool detect_write_flooding(struct kvm_vcpu *vcpu, gfn_t gfn)
3567+
static bool detect_write_flooding(struct kvm_mmu_page *sp, u64 *spte)
35743568
{
3575-
bool flooded = false;
3576-
3577-
if (gfn == vcpu->arch.last_pt_write_gfn
3578-
&& !last_updated_pte_accessed(vcpu)) {
3579-
++vcpu->arch.last_pt_write_count;
3580-
if (vcpu->arch.last_pt_write_count >= 3)
3581-
flooded = true;
3582-
} else {
3583-
vcpu->arch.last_pt_write_gfn = gfn;
3584-
vcpu->arch.last_pt_write_count = 1;
3585-
vcpu->arch.last_pte_updated = NULL;
3586-
}
3569+
/*
3570+
* Skip write-flooding detected for the sp whose level is 1, because
3571+
* it can become unsync, then the guest page is not write-protected.
3572+
*/
3573+
if (sp->role.level == 1)
3574+
return false;
35873575

3588-
return flooded;
3576+
return ++sp->write_flooding_count >= 3;
35893577
}
35903578

35913579
/*
@@ -3657,7 +3645,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
36573645
LIST_HEAD(invalid_list);
36583646
u64 entry, gentry, *spte;
36593647
int npte;
3660-
bool remote_flush, local_flush, zap_page, flooded, misaligned;
3648+
bool remote_flush, local_flush, zap_page;
36613649

36623650
/*
36633651
* If we don't have indirect shadow pages, it means no page is
@@ -3683,12 +3671,12 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
36833671
++vcpu->kvm->stat.mmu_pte_write;
36843672
trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
36853673

3686-
flooded = detect_write_flooding(vcpu, gfn);
36873674
mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
36883675
for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) {
3689-
misaligned = detect_write_misaligned(sp, gpa, bytes);
3676+
spte = get_written_sptes(sp, gpa, &npte);
36903677

3691-
if (misaligned || flooded) {
3678+
if (detect_write_misaligned(sp, gpa, bytes) ||
3679+
detect_write_flooding(sp, spte)) {
36923680
zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
36933681
&invalid_list);
36943682
++vcpu->kvm->stat.mmu_flooded;

arch/x86/kvm/paging_tmpl.h

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
497497
shadow_walk_next(&it)) {
498498
gfn_t table_gfn;
499499

500+
clear_sp_write_flooding_count(it.sptep);
500501
drop_large_spte(vcpu, it.sptep);
501502

502503
sp = NULL;
@@ -522,6 +523,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
522523
shadow_walk_next(&it)) {
523524
gfn_t direct_gfn;
524525

526+
clear_sp_write_flooding_count(it.sptep);
525527
validate_direct_spte(vcpu, it.sptep, direct_access);
526528

527529
drop_large_spte(vcpu, it.sptep);
@@ -536,6 +538,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
536538
link_shadow_page(it.sptep, sp);
537539
}
538540

541+
clear_sp_write_flooding_count(it.sptep);
539542
mmu_set_spte(vcpu, it.sptep, access, gw->pte_access,
540543
user_fault, write_fault, emulate, it.level,
541544
gw->gfn, pfn, prefault, map_writable);
@@ -599,11 +602,9 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
599602
*/
600603
if (!r) {
601604
pgprintk("%s: guest page fault\n", __func__);
602-
if (!prefault) {
605+
if (!prefault)
603606
inject_page_fault(vcpu, &walker.fault);
604-
/* reset fork detector */
605-
vcpu->arch.last_pt_write_count = 0;
606-
}
607+
607608
return 0;
608609
}
609610

@@ -641,9 +642,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
641642
pgprintk("%s: shadow pte %p %llx emulate %d\n", __func__,
642643
sptep, *sptep, emulate);
643644

644-
if (!emulate)
645-
vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
646-
647645
++vcpu->stat.pf_fixed;
648646
trace_kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
649647
spin_unlock(&vcpu->kvm->mmu_lock);

0 commit comments

Comments
 (0)