Skip to content

Commit 889e5cb

Browse files
Xiao Guangrongavikivity
authored andcommitted
KVM: MMU: split kvm_mmu_pte_write function
kvm_mmu_pte_write is too long, we split it for better readable Signed-off-by: Xiao Guangrong <[email protected]> Signed-off-by: Avi Kivity <[email protected]>
1 parent f873435 commit 889e5cb

File tree

1 file changed

+119
-75
lines changed

1 file changed

+119
-75
lines changed

arch/x86/kvm/mmu.c

Lines changed: 119 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -3530,48 +3530,28 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu)
35303530
return !!(spte && (*spte & shadow_accessed_mask));
35313531
}
35323532

3533-
void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
3534-
const u8 *new, int bytes)
3533+
static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
3534+
const u8 *new, int *bytes)
35353535
{
3536-
gfn_t gfn = gpa >> PAGE_SHIFT;
3537-
union kvm_mmu_page_role mask = { .word = 0 };
3538-
struct kvm_mmu_page *sp;
3539-
struct hlist_node *node;
3540-
LIST_HEAD(invalid_list);
3541-
u64 entry, gentry, *spte;
3542-
unsigned pte_size, page_offset, misaligned, quadrant, offset;
3543-
int level, npte, r, flooded = 0;
3544-
bool remote_flush, local_flush, zap_page;
3545-
3546-
/*
3547-
* If we don't have indirect shadow pages, it means no page is
3548-
* write-protected, so we can exit simply.
3549-
*/
3550-
if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages))
3551-
return;
3552-
3553-
zap_page = remote_flush = local_flush = false;
3554-
offset = offset_in_page(gpa);
3555-
3556-
pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
3536+
u64 gentry;
3537+
int r;
35573538

35583539
/*
35593540
* Assume that the pte write on a page table of the same type
35603541
* as the current vcpu paging mode since we update the sptes only
35613542
* when they have the same mode.
35623543
*/
3563-
if (is_pae(vcpu) && bytes == 4) {
3544+
if (is_pae(vcpu) && *bytes == 4) {
35643545
/* Handle a 32-bit guest writing two halves of a 64-bit gpte */
3565-
gpa &= ~(gpa_t)7;
3566-
bytes = 8;
3567-
3568-
r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8));
3546+
*gpa &= ~(gpa_t)7;
3547+
*bytes = 8;
3548+
r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, min(*bytes, 8));
35693549
if (r)
35703550
gentry = 0;
35713551
new = (const u8 *)&gentry;
35723552
}
35733553

3574-
switch (bytes) {
3554+
switch (*bytes) {
35753555
case 4:
35763556
gentry = *(const u32 *)new;
35773557
break;
@@ -3583,71 +3563,135 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
35833563
break;
35843564
}
35853565

3586-
/*
3587-
* No need to care whether allocation memory is successful
3588-
* or not since pte prefetch is skiped if it does not have
3589-
* enough objects in the cache.
3590-
*/
3591-
mmu_topup_memory_caches(vcpu);
3592-
spin_lock(&vcpu->kvm->mmu_lock);
3593-
++vcpu->kvm->stat.mmu_pte_write;
3594-
trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
3566+
return gentry;
3567+
}
3568+
3569+
/*
3570+
* If we're seeing too many writes to a page, it may no longer be a page table,
3571+
* or we may be forking, in which case it is better to unmap the page.
3572+
*/
3573+
static bool detect_write_flooding(struct kvm_vcpu *vcpu, gfn_t gfn)
3574+
{
3575+
bool flooded = false;
3576+
35953577
if (gfn == vcpu->arch.last_pt_write_gfn
35963578
&& !last_updated_pte_accessed(vcpu)) {
35973579
++vcpu->arch.last_pt_write_count;
35983580
if (vcpu->arch.last_pt_write_count >= 3)
3599-
flooded = 1;
3581+
flooded = true;
36003582
} else {
36013583
vcpu->arch.last_pt_write_gfn = gfn;
36023584
vcpu->arch.last_pt_write_count = 1;
36033585
vcpu->arch.last_pte_updated = NULL;
36043586
}
36053587

3588+
return flooded;
3589+
}
3590+
3591+
/*
3592+
* Misaligned accesses are too much trouble to fix up; also, they usually
3593+
* indicate a page is not used as a page table.
3594+
*/
3595+
static bool detect_write_misaligned(struct kvm_mmu_page *sp, gpa_t gpa,
3596+
int bytes)
3597+
{
3598+
unsigned offset, pte_size, misaligned;
3599+
3600+
pgprintk("misaligned: gpa %llx bytes %d role %x\n",
3601+
gpa, bytes, sp->role.word);
3602+
3603+
offset = offset_in_page(gpa);
3604+
pte_size = sp->role.cr4_pae ? 8 : 4;
3605+
misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
3606+
misaligned |= bytes < 4;
3607+
3608+
return misaligned;
3609+
}
3610+
3611+
static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte)
3612+
{
3613+
unsigned page_offset, quadrant;
3614+
u64 *spte;
3615+
int level;
3616+
3617+
page_offset = offset_in_page(gpa);
3618+
level = sp->role.level;
3619+
*nspte = 1;
3620+
if (!sp->role.cr4_pae) {
3621+
page_offset <<= 1; /* 32->64 */
3622+
/*
3623+
* A 32-bit pde maps 4MB while the shadow pdes map
3624+
* only 2MB. So we need to double the offset again
3625+
* and zap two pdes instead of one.
3626+
*/
3627+
if (level == PT32_ROOT_LEVEL) {
3628+
page_offset &= ~7; /* kill rounding error */
3629+
page_offset <<= 1;
3630+
*nspte = 2;
3631+
}
3632+
quadrant = page_offset >> PAGE_SHIFT;
3633+
page_offset &= ~PAGE_MASK;
3634+
if (quadrant != sp->role.quadrant)
3635+
return NULL;
3636+
}
3637+
3638+
spte = &sp->spt[page_offset / sizeof(*spte)];
3639+
return spte;
3640+
}
3641+
3642+
void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
3643+
const u8 *new, int bytes)
3644+
{
3645+
gfn_t gfn = gpa >> PAGE_SHIFT;
3646+
union kvm_mmu_page_role mask = { .word = 0 };
3647+
struct kvm_mmu_page *sp;
3648+
struct hlist_node *node;
3649+
LIST_HEAD(invalid_list);
3650+
u64 entry, gentry, *spte;
3651+
int npte;
3652+
bool remote_flush, local_flush, zap_page, flooded, misaligned;
3653+
3654+
/*
3655+
* If we don't have indirect shadow pages, it means no page is
3656+
* write-protected, so we can exit simply.
3657+
*/
3658+
if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages))
3659+
return;
3660+
3661+
zap_page = remote_flush = local_flush = false;
3662+
3663+
pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
3664+
3665+
gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, new, &bytes);
3666+
3667+
/*
3668+
* No need to care whether allocation memory is successful
3669+
* or not since pte prefetch is skiped if it does not have
3670+
* enough objects in the cache.
3671+
*/
3672+
mmu_topup_memory_caches(vcpu);
3673+
3674+
spin_lock(&vcpu->kvm->mmu_lock);
3675+
++vcpu->kvm->stat.mmu_pte_write;
3676+
trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
3677+
3678+
flooded = detect_write_flooding(vcpu, gfn);
36063679
mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
36073680
for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) {
3608-
pte_size = sp->role.cr4_pae ? 8 : 4;
3609-
misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
3610-
misaligned |= bytes < 4;
3681+
misaligned = detect_write_misaligned(sp, gpa, bytes);
3682+
36113683
if (misaligned || flooded) {
3612-
/*
3613-
* Misaligned accesses are too much trouble to fix
3614-
* up; also, they usually indicate a page is not used
3615-
* as a page table.
3616-
*
3617-
* If we're seeing too many writes to a page,
3618-
* it may no longer be a page table, or we may be
3619-
* forking, in which case it is better to unmap the
3620-
* page.
3621-
*/
3622-
pgprintk("misaligned: gpa %llx bytes %d role %x\n",
3623-
gpa, bytes, sp->role.word);
36243684
zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
36253685
&invalid_list);
36263686
++vcpu->kvm->stat.mmu_flooded;
36273687
continue;
36283688
}
3629-
page_offset = offset;
3630-
level = sp->role.level;
3631-
npte = 1;
3632-
if (!sp->role.cr4_pae) {
3633-
page_offset <<= 1; /* 32->64 */
3634-
/*
3635-
* A 32-bit pde maps 4MB while the shadow pdes map
3636-
* only 2MB. So we need to double the offset again
3637-
* and zap two pdes instead of one.
3638-
*/
3639-
if (level == PT32_ROOT_LEVEL) {
3640-
page_offset &= ~7; /* kill rounding error */
3641-
page_offset <<= 1;
3642-
npte = 2;
3643-
}
3644-
quadrant = page_offset >> PAGE_SHIFT;
3645-
page_offset &= ~PAGE_MASK;
3646-
if (quadrant != sp->role.quadrant)
3647-
continue;
3648-
}
3689+
3690+
spte = get_written_sptes(sp, gpa, &npte);
3691+
if (!spte)
3692+
continue;
3693+
36493694
local_flush = true;
3650-
spte = &sp->spt[page_offset / sizeof(*spte)];
36513695
while (npte--) {
36523696
entry = *spte;
36533697
mmu_page_zap_pte(vcpu->kvm, sp, spte);

0 commit comments

Comments
 (0)