@@ -3530,48 +3530,28 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu)
3530
3530
return !!(spte && (* spte & shadow_accessed_mask ));
3531
3531
}
3532
3532
3533
- void kvm_mmu_pte_write (struct kvm_vcpu * vcpu , gpa_t gpa ,
3534
- const u8 * new , int bytes )
3533
+ static u64 mmu_pte_write_fetch_gpte (struct kvm_vcpu * vcpu , gpa_t * gpa ,
3534
+ const u8 * new , int * bytes )
3535
3535
{
3536
- gfn_t gfn = gpa >> PAGE_SHIFT ;
3537
- union kvm_mmu_page_role mask = { .word = 0 };
3538
- struct kvm_mmu_page * sp ;
3539
- struct hlist_node * node ;
3540
- LIST_HEAD (invalid_list );
3541
- u64 entry , gentry , * spte ;
3542
- unsigned pte_size , page_offset , misaligned , quadrant , offset ;
3543
- int level , npte , r , flooded = 0 ;
3544
- bool remote_flush , local_flush , zap_page ;
3545
-
3546
- /*
3547
- * If we don't have indirect shadow pages, it means no page is
3548
- * write-protected, so we can exit simply.
3549
- */
3550
- if (!ACCESS_ONCE (vcpu -> kvm -> arch .indirect_shadow_pages ))
3551
- return ;
3552
-
3553
- zap_page = remote_flush = local_flush = false;
3554
- offset = offset_in_page (gpa );
3555
-
3556
- pgprintk ("%s: gpa %llx bytes %d\n" , __func__ , gpa , bytes );
3536
+ u64 gentry ;
3537
+ int r ;
3557
3538
3558
3539
/*
3559
3540
* Assume that the pte write on a page table of the same type
3560
3541
* as the current vcpu paging mode since we update the sptes only
3561
3542
* when they have the same mode.
3562
3543
*/
3563
- if (is_pae (vcpu ) && bytes == 4 ) {
3544
+ if (is_pae (vcpu ) && * bytes == 4 ) {
3564
3545
/* Handle a 32-bit guest writing two halves of a 64-bit gpte */
3565
- gpa &= ~(gpa_t )7 ;
3566
- bytes = 8 ;
3567
-
3568
- r = kvm_read_guest (vcpu -> kvm , gpa , & gentry , min (bytes , 8 ));
3546
+ * gpa &= ~(gpa_t )7 ;
3547
+ * bytes = 8 ;
3548
+ r = kvm_read_guest (vcpu -> kvm , * gpa , & gentry , min (* bytes , 8 ));
3569
3549
if (r )
3570
3550
gentry = 0 ;
3571
3551
new = (const u8 * )& gentry ;
3572
3552
}
3573
3553
3574
- switch (bytes ) {
3554
+ switch (* bytes ) {
3575
3555
case 4 :
3576
3556
gentry = * (const u32 * )new ;
3577
3557
break ;
@@ -3583,71 +3563,135 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
3583
3563
break ;
3584
3564
}
3585
3565
3586
- /*
3587
- * No need to care whether allocation memory is successful
3588
- * or not since pte prefetch is skiped if it does not have
3589
- * enough objects in the cache.
3590
- */
3591
- mmu_topup_memory_caches (vcpu );
3592
- spin_lock (& vcpu -> kvm -> mmu_lock );
3593
- ++ vcpu -> kvm -> stat .mmu_pte_write ;
3594
- trace_kvm_mmu_audit (vcpu , AUDIT_PRE_PTE_WRITE );
3566
+ return gentry ;
3567
+ }
3568
+
3569
+ /*
3570
+ * If we're seeing too many writes to a page, it may no longer be a page table,
3571
+ * or we may be forking, in which case it is better to unmap the page.
3572
+ */
3573
+ static bool detect_write_flooding (struct kvm_vcpu * vcpu , gfn_t gfn )
3574
+ {
3575
+ bool flooded = false;
3576
+
3595
3577
if (gfn == vcpu -> arch .last_pt_write_gfn
3596
3578
&& !last_updated_pte_accessed (vcpu )) {
3597
3579
++ vcpu -> arch .last_pt_write_count ;
3598
3580
if (vcpu -> arch .last_pt_write_count >= 3 )
3599
- flooded = 1 ;
3581
+ flooded = true ;
3600
3582
} else {
3601
3583
vcpu -> arch .last_pt_write_gfn = gfn ;
3602
3584
vcpu -> arch .last_pt_write_count = 1 ;
3603
3585
vcpu -> arch .last_pte_updated = NULL ;
3604
3586
}
3605
3587
3588
+ return flooded ;
3589
+ }
3590
+
3591
+ /*
3592
+ * Misaligned accesses are too much trouble to fix up; also, they usually
3593
+ * indicate a page is not used as a page table.
3594
+ */
3595
+ static bool detect_write_misaligned (struct kvm_mmu_page * sp , gpa_t gpa ,
3596
+ int bytes )
3597
+ {
3598
+ unsigned offset , pte_size , misaligned ;
3599
+
3600
+ pgprintk ("misaligned: gpa %llx bytes %d role %x\n" ,
3601
+ gpa , bytes , sp -> role .word );
3602
+
3603
+ offset = offset_in_page (gpa );
3604
+ pte_size = sp -> role .cr4_pae ? 8 : 4 ;
3605
+ misaligned = (offset ^ (offset + bytes - 1 )) & ~(pte_size - 1 );
3606
+ misaligned |= bytes < 4 ;
3607
+
3608
+ return misaligned ;
3609
+ }
3610
+
3611
+ static u64 * get_written_sptes (struct kvm_mmu_page * sp , gpa_t gpa , int * nspte )
3612
+ {
3613
+ unsigned page_offset , quadrant ;
3614
+ u64 * spte ;
3615
+ int level ;
3616
+
3617
+ page_offset = offset_in_page (gpa );
3618
+ level = sp -> role .level ;
3619
+ * nspte = 1 ;
3620
+ if (!sp -> role .cr4_pae ) {
3621
+ page_offset <<= 1 ; /* 32->64 */
3622
+ /*
3623
+ * A 32-bit pde maps 4MB while the shadow pdes map
3624
+ * only 2MB. So we need to double the offset again
3625
+ * and zap two pdes instead of one.
3626
+ */
3627
+ if (level == PT32_ROOT_LEVEL ) {
3628
+ page_offset &= ~7 ; /* kill rounding error */
3629
+ page_offset <<= 1 ;
3630
+ * nspte = 2 ;
3631
+ }
3632
+ quadrant = page_offset >> PAGE_SHIFT ;
3633
+ page_offset &= ~PAGE_MASK ;
3634
+ if (quadrant != sp -> role .quadrant )
3635
+ return NULL ;
3636
+ }
3637
+
3638
+ spte = & sp -> spt [page_offset / sizeof (* spte )];
3639
+ return spte ;
3640
+ }
3641
+
3642
+ void kvm_mmu_pte_write (struct kvm_vcpu * vcpu , gpa_t gpa ,
3643
+ const u8 * new , int bytes )
3644
+ {
3645
+ gfn_t gfn = gpa >> PAGE_SHIFT ;
3646
+ union kvm_mmu_page_role mask = { .word = 0 };
3647
+ struct kvm_mmu_page * sp ;
3648
+ struct hlist_node * node ;
3649
+ LIST_HEAD (invalid_list );
3650
+ u64 entry , gentry , * spte ;
3651
+ int npte ;
3652
+ bool remote_flush , local_flush , zap_page , flooded , misaligned ;
3653
+
3654
+ /*
3655
+ * If we don't have indirect shadow pages, it means no page is
3656
+ * write-protected, so we can exit simply.
3657
+ */
3658
+ if (!ACCESS_ONCE (vcpu -> kvm -> arch .indirect_shadow_pages ))
3659
+ return ;
3660
+
3661
+ zap_page = remote_flush = local_flush = false;
3662
+
3663
+ pgprintk ("%s: gpa %llx bytes %d\n" , __func__ , gpa , bytes );
3664
+
3665
+ gentry = mmu_pte_write_fetch_gpte (vcpu , & gpa , new , & bytes );
3666
+
3667
+ /*
3668
+ * No need to care whether allocation memory is successful
3669
+ * or not since pte prefetch is skiped if it does not have
3670
+ * enough objects in the cache.
3671
+ */
3672
+ mmu_topup_memory_caches (vcpu );
3673
+
3674
+ spin_lock (& vcpu -> kvm -> mmu_lock );
3675
+ ++ vcpu -> kvm -> stat .mmu_pte_write ;
3676
+ trace_kvm_mmu_audit (vcpu , AUDIT_PRE_PTE_WRITE );
3677
+
3678
+ flooded = detect_write_flooding (vcpu , gfn );
3606
3679
mask .cr0_wp = mask .cr4_pae = mask .nxe = 1 ;
3607
3680
for_each_gfn_indirect_valid_sp (vcpu -> kvm , sp , gfn , node ) {
3608
- pte_size = sp -> role .cr4_pae ? 8 : 4 ;
3609
- misaligned = (offset ^ (offset + bytes - 1 )) & ~(pte_size - 1 );
3610
- misaligned |= bytes < 4 ;
3681
+ misaligned = detect_write_misaligned (sp , gpa , bytes );
3682
+
3611
3683
if (misaligned || flooded ) {
3612
- /*
3613
- * Misaligned accesses are too much trouble to fix
3614
- * up; also, they usually indicate a page is not used
3615
- * as a page table.
3616
- *
3617
- * If we're seeing too many writes to a page,
3618
- * it may no longer be a page table, or we may be
3619
- * forking, in which case it is better to unmap the
3620
- * page.
3621
- */
3622
- pgprintk ("misaligned: gpa %llx bytes %d role %x\n" ,
3623
- gpa , bytes , sp -> role .word );
3624
3684
zap_page |= !!kvm_mmu_prepare_zap_page (vcpu -> kvm , sp ,
3625
3685
& invalid_list );
3626
3686
++ vcpu -> kvm -> stat .mmu_flooded ;
3627
3687
continue ;
3628
3688
}
3629
- page_offset = offset ;
3630
- level = sp -> role .level ;
3631
- npte = 1 ;
3632
- if (!sp -> role .cr4_pae ) {
3633
- page_offset <<= 1 ; /* 32->64 */
3634
- /*
3635
- * A 32-bit pde maps 4MB while the shadow pdes map
3636
- * only 2MB. So we need to double the offset again
3637
- * and zap two pdes instead of one.
3638
- */
3639
- if (level == PT32_ROOT_LEVEL ) {
3640
- page_offset &= ~7 ; /* kill rounding error */
3641
- page_offset <<= 1 ;
3642
- npte = 2 ;
3643
- }
3644
- quadrant = page_offset >> PAGE_SHIFT ;
3645
- page_offset &= ~PAGE_MASK ;
3646
- if (quadrant != sp -> role .quadrant )
3647
- continue ;
3648
- }
3689
+
3690
+ spte = get_written_sptes (sp , gpa , & npte );
3691
+ if (!spte )
3692
+ continue ;
3693
+
3649
3694
local_flush = true;
3650
- spte = & sp -> spt [page_offset / sizeof (* spte )];
3651
3695
while (npte -- ) {
3652
3696
entry = * spte ;
3653
3697
mmu_page_zap_pte (vcpu -> kvm , sp , spte );
0 commit comments