Skip to content

Commit 8cbc706

Browse files
committed
KVM: MMU: Update accessed and dirty bits after guest pagetable walk
While unspecified, the behaviour of Intel processors is to first perform the page table walk, then, if the walk was successful, to atomically update the accessed and dirty bits of walked paging elements. While we are not required to follow this exactly, doing so will allow us to perform the access permissions check after the walk is complete, rather than after each walk step. (the tricky case is SMEP: a zero in any pte's U bit makes the referenced page a supervisor page, so we can't fault on a one bit during the walk itself). Reviewed-by: Xiao Guangrong <[email protected]> Signed-off-by: Avi Kivity <[email protected]>
1 parent 3d34ade commit 8cbc706

File tree

1 file changed

+47
-29
lines changed

1 file changed

+47
-29
lines changed

arch/x86/kvm/paging_tmpl.h

Lines changed: 47 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,12 @@
6363
*/
6464
struct guest_walker {
6565
int level;
66+
unsigned max_level;
6667
gfn_t table_gfn[PT_MAX_FULL_LEVELS];
6768
pt_element_t ptes[PT_MAX_FULL_LEVELS];
6869
pt_element_t prefetch_ptes[PTE_PREFETCH_NUM];
6970
gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
71+
pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS];
7072
unsigned pt_access;
7173
unsigned pte_access;
7274
gfn_t gfn;
@@ -119,13 +121,51 @@ static bool FNAME(is_last_gpte)(struct guest_walker *walker,
119121
return false;
120122
}
121123

124+
static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
125+
struct kvm_mmu *mmu,
126+
struct guest_walker *walker,
127+
int write_fault)
128+
{
129+
unsigned level, index;
130+
pt_element_t pte, orig_pte;
131+
pt_element_t __user *ptep_user;
132+
gfn_t table_gfn;
133+
int ret;
134+
135+
for (level = walker->max_level; level >= walker->level; --level) {
136+
pte = orig_pte = walker->ptes[level - 1];
137+
table_gfn = walker->table_gfn[level - 1];
138+
ptep_user = walker->ptep_user[level - 1];
139+
index = offset_in_page(ptep_user) / sizeof(pt_element_t);
140+
if (!(pte & PT_ACCESSED_MASK)) {
141+
trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte));
142+
pte |= PT_ACCESSED_MASK;
143+
}
144+
if (level == walker->level && write_fault && !is_dirty_gpte(pte)) {
145+
trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
146+
pte |= PT_DIRTY_MASK;
147+
}
148+
if (pte == orig_pte)
149+
continue;
150+
151+
ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte);
152+
if (ret)
153+
return ret;
154+
155+
mark_page_dirty(vcpu->kvm, table_gfn);
156+
walker->ptes[level] = pte;
157+
}
158+
return 0;
159+
}
160+
122161
/*
123162
* Fetch a guest pte for a guest virtual address
124163
*/
125164
static int FNAME(walk_addr_generic)(struct guest_walker *walker,
126165
struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
127166
gva_t addr, u32 access)
128167
{
168+
int ret;
129169
pt_element_t pte;
130170
pt_element_t __user *uninitialized_var(ptep_user);
131171
gfn_t table_gfn;
@@ -153,6 +193,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
153193
--walker->level;
154194
}
155195
#endif
196+
walker->max_level = walker->level;
156197
ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
157198
(mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0);
158199

@@ -183,6 +224,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
183224
ptep_user = (pt_element_t __user *)((void *)host_addr + offset);
184225
if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte))))
185226
goto error;
227+
walker->ptep_user[walker->level - 1] = ptep_user;
186228

187229
trace_kvm_mmu_paging_element(pte, walker->level);
188230

@@ -214,21 +256,6 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
214256
eperm = true;
215257
}
216258

217-
if (!eperm && unlikely(!(pte & PT_ACCESSED_MASK))) {
218-
int ret;
219-
trace_kvm_mmu_set_accessed_bit(table_gfn, index,
220-
sizeof(pte));
221-
ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index,
222-
pte, pte|PT_ACCESSED_MASK);
223-
if (unlikely(ret < 0))
224-
goto error;
225-
else if (ret)
226-
goto retry_walk;
227-
228-
mark_page_dirty(vcpu->kvm, table_gfn);
229-
pte |= PT_ACCESSED_MASK;
230-
}
231-
232259
walker->ptes[walker->level - 1] = pte;
233260

234261
if (last_gpte) {
@@ -268,21 +295,12 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
268295

269296
if (!write_fault)
270297
protect_clean_gpte(&pte_access, pte);
271-
else if (unlikely(!is_dirty_gpte(pte))) {
272-
int ret;
273298

274-
trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
275-
ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index,
276-
pte, pte|PT_DIRTY_MASK);
277-
if (unlikely(ret < 0))
278-
goto error;
279-
else if (ret)
280-
goto retry_walk;
281-
282-
mark_page_dirty(vcpu->kvm, table_gfn);
283-
pte |= PT_DIRTY_MASK;
284-
walker->ptes[walker->level - 1] = pte;
285-
}
299+
ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault);
300+
if (unlikely(ret < 0))
301+
goto error;
302+
else if (ret)
303+
goto retry_walk;
286304

287305
walker->pt_access = pt_access;
288306
walker->pte_access = pte_access;

0 commit comments

Comments
 (0)