Skip to content

Commit 1d86b5c

Browse files
committed
Merge branch 'queue' into next
* queue: KVM: MMU: Eliminate pointless temporary 'ac' KVM: MMU: Avoid access/dirty update loop if all is well KVM: MMU: Eliminate eperm temporary KVM: MMU: Optimize is_last_gpte() KVM: MMU: Simplify walk_addr_generic() loop KVM: MMU: Optimize pte permission checks KVM: MMU: Update accessed and dirty bits after guest pagetable walk KVM: MMU: Move gpte_access() out of paging_tmpl.h KVM: MMU: Optimize gpte_access() slightly KVM: MMU: Push clean gpte write protection out of gpte_access() KVM: clarify kvmclock documentation KVM: make processes waiting on vcpu mutex killable KVM: SVM: Make use of asm.h KVM: VMX: Make use of asm.h KVM: VMX: Make lto-friendly Signed-off-by: Avi Kivity <[email protected]>
2 parents ecba9a5 + c542151 commit 1d86b5c

File tree

10 files changed

+300
-220
lines changed

10 files changed

+300
-220
lines changed

Documentation/virtual/kvm/msr.txt

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,12 @@ MSR_KVM_WALL_CLOCK_NEW: 0x4b564d00
3434
time information and check that they are both equal and even.
3535
An odd version indicates an in-progress update.
3636

37-
sec: number of seconds for wallclock.
37+
sec: number of seconds for wallclock at time of boot.
3838

39-
nsec: number of nanoseconds for wallclock.
39+
nsec: number of nanoseconds for wallclock at time of boot.
40+
41+
In order to get the current wallclock time, the system_time from
42+
MSR_KVM_SYSTEM_TIME_NEW needs to be added.
4043

4144
Note that although MSRs are per-CPU entities, the effect of this
4245
particular MSR is global.
@@ -82,20 +85,25 @@ MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01
8285
time at the time this structure was last updated. Unit is
8386
nanoseconds.
8487

85-
tsc_to_system_mul: a function of the tsc frequency. One has
86-
to multiply any tsc-related quantity by this value to get
87-
a value in nanoseconds, besides dividing by 2^tsc_shift
88+
tsc_to_system_mul: multiplier to be used when converting
89+
tsc-related quantity to nanoseconds
8890

89-
tsc_shift: cycle to nanosecond divider, as a power of two, to
90-
allow for shift rights. One has to shift right any tsc-related
91-
quantity by this value to get a value in nanoseconds, besides
92-
multiplying by tsc_to_system_mul.
91+
tsc_shift: shift to be used when converting tsc-related
92+
quantity to nanoseconds. This shift will ensure that
93+
multiplication with tsc_to_system_mul does not overflow.
94+
A positive value denotes a left shift, a negative value
95+
a right shift.
9396

94-
With this information, guests can derive per-CPU time by
95-
doing:
97+
The conversion from tsc to nanoseconds involves an additional
98+
right shift by 32 bits. With this information, guests can
99+
derive per-CPU time by doing:
96100

97101
time = (current_tsc - tsc_timestamp)
98-
time = (time * tsc_to_system_mul) >> tsc_shift
102+
if (tsc_shift >= 0)
103+
time <<= tsc_shift;
104+
else
105+
time >>= -tsc_shift;
106+
time = (time * tsc_to_system_mul) >> 32
99107
time = time + system_time
100108

101109
flags: bits in this field indicate extended capabilities

arch/x86/include/asm/kvm_host.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,10 +287,24 @@ struct kvm_mmu {
287287
union kvm_mmu_page_role base_role;
288288
bool direct_map;
289289

290+
/*
291+
* Bitmap; bit set = permission fault
292+
* Byte index: page fault error code [4:1]
293+
* Bit index: pte permissions in ACC_* format
294+
*/
295+
u8 permissions[16];
296+
290297
u64 *pae_root;
291298
u64 *lm_root;
292299
u64 rsvd_bits_mask[2][4];
293300

301+
/*
302+
* Bitmap: bit set = last pte in walk
303+
* index[0:1]: level (zero-based)
304+
* index[2]: pte.ps
305+
*/
306+
u8 last_pte_bitmap;
307+
294308
bool nx;
295309

296310
u64 pdptrs[4]; /* pae */

arch/x86/kvm/mmu.c

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3408,6 +3408,18 @@ static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
34083408
return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0;
34093409
}
34103410

3411+
static inline void protect_clean_gpte(unsigned *access, unsigned gpte)
3412+
{
3413+
unsigned mask;
3414+
3415+
BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK);
3416+
3417+
mask = (unsigned)~ACC_WRITE_MASK;
3418+
/* Allow write access to dirty gptes */
3419+
mask |= (gpte >> (PT_DIRTY_SHIFT - PT_WRITABLE_SHIFT)) & PT_WRITABLE_MASK;
3420+
*access &= mask;
3421+
}
3422+
34113423
static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access,
34123424
int *nr_present)
34133425
{
@@ -3425,6 +3437,25 @@ static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access,
34253437
return false;
34263438
}
34273439

3440+
static inline unsigned gpte_access(struct kvm_vcpu *vcpu, u64 gpte)
3441+
{
3442+
unsigned access;
3443+
3444+
access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
3445+
access &= ~(gpte >> PT64_NX_SHIFT);
3446+
3447+
return access;
3448+
}
3449+
3450+
static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gpte)
3451+
{
3452+
unsigned index;
3453+
3454+
index = level - 1;
3455+
index |= (gpte & PT_PAGE_SIZE_MASK) >> (PT_PAGE_SIZE_SHIFT - 2);
3456+
return mmu->last_pte_bitmap & (1 << index);
3457+
}
3458+
34283459
#define PTTYPE 64
34293460
#include "paging_tmpl.h"
34303461
#undef PTTYPE
@@ -3494,6 +3525,56 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
34943525
}
34953526
}
34963527

3528+
static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
3529+
{
3530+
unsigned bit, byte, pfec;
3531+
u8 map;
3532+
bool fault, x, w, u, wf, uf, ff, smep;
3533+
3534+
smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
3535+
for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) {
3536+
pfec = byte << 1;
3537+
map = 0;
3538+
wf = pfec & PFERR_WRITE_MASK;
3539+
uf = pfec & PFERR_USER_MASK;
3540+
ff = pfec & PFERR_FETCH_MASK;
3541+
for (bit = 0; bit < 8; ++bit) {
3542+
x = bit & ACC_EXEC_MASK;
3543+
w = bit & ACC_WRITE_MASK;
3544+
u = bit & ACC_USER_MASK;
3545+
3546+
/* Not really needed: !nx will cause pte.nx to fault */
3547+
x |= !mmu->nx;
3548+
/* Allow supervisor writes if !cr0.wp */
3549+
w |= !is_write_protection(vcpu) && !uf;
3550+
/* Disallow supervisor fetches of user code if cr4.smep */
3551+
x &= !(smep && u && !uf);
3552+
3553+
fault = (ff && !x) || (uf && !u) || (wf && !w);
3554+
map |= fault << bit;
3555+
}
3556+
mmu->permissions[byte] = map;
3557+
}
3558+
}
3559+
3560+
static void update_last_pte_bitmap(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
3561+
{
3562+
u8 map;
3563+
unsigned level, root_level = mmu->root_level;
3564+
const unsigned ps_set_index = 1 << 2; /* bit 2 of index: ps */
3565+
3566+
if (root_level == PT32E_ROOT_LEVEL)
3567+
--root_level;
3568+
/* PT_PAGE_TABLE_LEVEL always terminates */
3569+
map = 1 | (1 << ps_set_index);
3570+
for (level = PT_DIRECTORY_LEVEL; level <= root_level; ++level) {
3571+
if (level <= PT_PDPE_LEVEL
3572+
&& (mmu->root_level >= PT32E_ROOT_LEVEL || is_pse(vcpu)))
3573+
map |= 1 << (ps_set_index | (level - 1));
3574+
}
3575+
mmu->last_pte_bitmap = map;
3576+
}
3577+
34973578
static int paging64_init_context_common(struct kvm_vcpu *vcpu,
34983579
struct kvm_mmu *context,
34993580
int level)
@@ -3502,6 +3583,8 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu,
35023583
context->root_level = level;
35033584

35043585
reset_rsvds_bits_mask(vcpu, context);
3586+
update_permission_bitmask(vcpu, context);
3587+
update_last_pte_bitmap(vcpu, context);
35053588

35063589
ASSERT(is_pae(vcpu));
35073590
context->new_cr3 = paging_new_cr3;
@@ -3530,6 +3613,8 @@ static int paging32_init_context(struct kvm_vcpu *vcpu,
35303613
context->root_level = PT32_ROOT_LEVEL;
35313614

35323615
reset_rsvds_bits_mask(vcpu, context);
3616+
update_permission_bitmask(vcpu, context);
3617+
update_last_pte_bitmap(vcpu, context);
35333618

35343619
context->new_cr3 = paging_new_cr3;
35353620
context->page_fault = paging32_page_fault;
@@ -3590,6 +3675,9 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
35903675
context->gva_to_gpa = paging32_gva_to_gpa;
35913676
}
35923677

3678+
update_permission_bitmask(vcpu, context);
3679+
update_last_pte_bitmap(vcpu, context);
3680+
35933681
return 0;
35943682
}
35953683

@@ -3665,6 +3753,9 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
36653753
g_context->gva_to_gpa = paging32_gva_to_gpa_nested;
36663754
}
36673755

3756+
update_permission_bitmask(vcpu, g_context);
3757+
update_last_pte_bitmap(vcpu, g_context);
3758+
36683759
return 0;
36693760
}
36703761

arch/x86/kvm/mmu.h

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@
1818
#define PT_PCD_MASK (1ULL << 4)
1919
#define PT_ACCESSED_SHIFT 5
2020
#define PT_ACCESSED_MASK (1ULL << PT_ACCESSED_SHIFT)
21-
#define PT_DIRTY_MASK (1ULL << 6)
22-
#define PT_PAGE_SIZE_MASK (1ULL << 7)
21+
#define PT_DIRTY_SHIFT 6
22+
#define PT_DIRTY_MASK (1ULL << PT_DIRTY_SHIFT)
23+
#define PT_PAGE_SIZE_SHIFT 7
24+
#define PT_PAGE_SIZE_MASK (1ULL << PT_PAGE_SIZE_SHIFT)
2325
#define PT_PAT_MASK (1ULL << 7)
2426
#define PT_GLOBAL_MASK (1ULL << 8)
2527
#define PT64_NX_SHIFT 63
@@ -88,17 +90,14 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu)
8890
return kvm_read_cr0_bits(vcpu, X86_CR0_WP);
8991
}
9092

91-
static inline bool check_write_user_access(struct kvm_vcpu *vcpu,
92-
bool write_fault, bool user_fault,
93-
unsigned long pte)
93+
/*
94+
* Will a fault with a given page-fault error code (pfec) cause a permission
95+
* fault with the given access (in ACC_* format)?
96+
*/
97+
static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access,
98+
unsigned pfec)
9499
{
95-
if (unlikely(write_fault && !is_writable_pte(pte)
96-
&& (user_fault || is_write_protection(vcpu))))
97-
return false;
98-
99-
if (unlikely(user_fault && !(pte & PT_USER_MASK)))
100-
return false;
101-
102-
return true;
100+
return (mmu->permissions[pfec >> 1] >> pte_access) & 1;
103101
}
102+
104103
#endif

0 commit comments

Comments
 (0)