Skip to content

Commit 081f323

Browse files
paulusmackavikivity
authored andcommitted
KVM: PPC: Book3S HV: Drop locks around call to kvmppc_pin_guest_page
At the moment we call kvmppc_pin_guest_page() in kvmppc_update_vpa() with two spinlocks held: the vcore lock and the vcpu->vpa_update_lock. This is not good, since kvmppc_pin_guest_page() calls down_read() and get_user_pages_fast(), both of which can sleep. This bug was introduced in 2e25aa5 ("KVM: PPC: Book3S HV: Make virtual processor area registration more robust"). This arranges to drop those spinlocks before calling kvmppc_pin_guest_page() and re-take them afterwards. Dropping the vcore lock in kvmppc_run_core() means we have to set the vcore_state field to VCORE_RUNNING before we drop the lock, so that other vcpus won't try to run this vcore. Signed-off-by: Paul Mackerras <[email protected]> Acked-by: Alexander Graf <[email protected]> Signed-off-by: Avi Kivity <[email protected]>
1 parent f961f72 commit 081f323

File tree

1 file changed

+66
-30
lines changed

1 file changed

+66
-30
lines changed

arch/powerpc/kvm/book3s_hv.c

Lines changed: 66 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -268,24 +268,45 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
268268
return err;
269269
}
270270

271-
static void kvmppc_update_vpa(struct kvm *kvm, struct kvmppc_vpa *vpap)
271+
static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
272272
{
273+
struct kvm *kvm = vcpu->kvm;
273274
void *va;
274275
unsigned long nb;
276+
unsigned long gpa;
275277

276-
vpap->update_pending = 0;
277-
va = NULL;
278-
if (vpap->next_gpa) {
279-
va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb);
280-
if (nb < vpap->len) {
281-
/*
282-
* If it's now too short, it must be that userspace
283-
* has changed the mappings underlying guest memory,
284-
* so unregister the region.
285-
*/
278+
/*
279+
* We need to pin the page pointed to by vpap->next_gpa,
280+
* but we can't call kvmppc_pin_guest_page under the lock
281+
* as it does get_user_pages() and down_read(). So we
282+
* have to drop the lock, pin the page, then get the lock
283+
* again and check that a new area didn't get registered
284+
* in the meantime.
285+
*/
286+
for (;;) {
287+
gpa = vpap->next_gpa;
288+
spin_unlock(&vcpu->arch.vpa_update_lock);
289+
va = NULL;
290+
nb = 0;
291+
if (gpa)
292+
va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb);
293+
spin_lock(&vcpu->arch.vpa_update_lock);
294+
if (gpa == vpap->next_gpa)
295+
break;
296+
/* sigh... unpin that one and try again */
297+
if (va)
286298
kvmppc_unpin_guest_page(kvm, va);
287-
va = NULL;
288-
}
299+
}
300+
301+
vpap->update_pending = 0;
302+
if (va && nb < vpap->len) {
303+
/*
304+
* If it's now too short, it must be that userspace
305+
* has changed the mappings underlying guest memory,
306+
* so unregister the region.
307+
*/
308+
kvmppc_unpin_guest_page(kvm, va);
309+
va = NULL;
289310
}
290311
if (vpap->pinned_addr)
291312
kvmppc_unpin_guest_page(kvm, vpap->pinned_addr);
@@ -296,20 +317,18 @@ static void kvmppc_update_vpa(struct kvm *kvm, struct kvmppc_vpa *vpap)
296317

297318
static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
298319
{
299-
struct kvm *kvm = vcpu->kvm;
300-
301320
spin_lock(&vcpu->arch.vpa_update_lock);
302321
if (vcpu->arch.vpa.update_pending) {
303-
kvmppc_update_vpa(kvm, &vcpu->arch.vpa);
322+
kvmppc_update_vpa(vcpu, &vcpu->arch.vpa);
304323
init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);
305324
}
306325
if (vcpu->arch.dtl.update_pending) {
307-
kvmppc_update_vpa(kvm, &vcpu->arch.dtl);
326+
kvmppc_update_vpa(vcpu, &vcpu->arch.dtl);
308327
vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr;
309328
vcpu->arch.dtl_index = 0;
310329
}
311330
if (vcpu->arch.slb_shadow.update_pending)
312-
kvmppc_update_vpa(kvm, &vcpu->arch.slb_shadow);
331+
kvmppc_update_vpa(vcpu, &vcpu->arch.slb_shadow);
313332
spin_unlock(&vcpu->arch.vpa_update_lock);
314333
}
315334

@@ -800,12 +819,39 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
800819
struct kvm_vcpu *vcpu, *vcpu0, *vnext;
801820
long ret;
802821
u64 now;
803-
int ptid, i;
822+
int ptid, i, need_vpa_update;
804823

805824
/* don't start if any threads have a signal pending */
806-
list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
825+
need_vpa_update = 0;
826+
list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
807827
if (signal_pending(vcpu->arch.run_task))
808828
return 0;
829+
need_vpa_update |= vcpu->arch.vpa.update_pending |
830+
vcpu->arch.slb_shadow.update_pending |
831+
vcpu->arch.dtl.update_pending;
832+
}
833+
834+
/*
835+
* Initialize *vc, in particular vc->vcore_state, so we can
836+
* drop the vcore lock if necessary.
837+
*/
838+
vc->n_woken = 0;
839+
vc->nap_count = 0;
840+
vc->entry_exit_count = 0;
841+
vc->vcore_state = VCORE_RUNNING;
842+
vc->in_guest = 0;
843+
vc->napping_threads = 0;
844+
845+
/*
846+
* Updating any of the vpas requires calling kvmppc_pin_guest_page,
847+
* which can't be called with any spinlocks held.
848+
*/
849+
if (need_vpa_update) {
850+
spin_unlock(&vc->lock);
851+
list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
852+
kvmppc_update_vpas(vcpu);
853+
spin_lock(&vc->lock);
854+
}
809855

810856
/*
811857
* Make sure we are running on thread 0, and that
@@ -838,20 +884,10 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
838884
if (vcpu->arch.ceded)
839885
vcpu->arch.ptid = ptid++;
840886

841-
vc->n_woken = 0;
842-
vc->nap_count = 0;
843-
vc->entry_exit_count = 0;
844-
vc->vcore_state = VCORE_RUNNING;
845887
vc->stolen_tb += mftb() - vc->preempt_tb;
846-
vc->in_guest = 0;
847888
vc->pcpu = smp_processor_id();
848-
vc->napping_threads = 0;
849889
list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
850890
kvmppc_start_thread(vcpu);
851-
if (vcpu->arch.vpa.update_pending ||
852-
vcpu->arch.slb_shadow.update_pending ||
853-
vcpu->arch.dtl.update_pending)
854-
kvmppc_update_vpas(vcpu);
855891
kvmppc_create_dtl_entry(vcpu, vc);
856892
}
857893
/* Grab any remaining hw threads so they can't go into the kernel */

0 commit comments

Comments
 (0)