Skip to content

Commit 3e3062b

Browse files
kwachowsgregkh
authored andcommitted
accel/ivpu: Abort all jobs after command queue unregister
commit 5bbccad upstream. With hardware scheduler it is not expected to receive JOB_DONE notifications from NPU FW for the jobs aborted due to command queue destroy JSM command. Remove jobs submitted to unregistered command queue from submitted_jobs_xa to avoid triggering a TDR in such case. Add explicit submitted_jobs_lock that protects access to list of submitted jobs which is now used to find jobs to abort. Move context abort procedure to separate work queue not to slow down handling of IPCs or DCT requests in case where job abort takes longer, especially when destruction of the last job of a specific context results in context release. Signed-off-by: Karol Wachowski <[email protected]> Signed-off-by: Maciej Falkowski <[email protected]> Reviewed-by: Jacek Lawrynowicz <[email protected]> Signed-off-by: Jacek Lawrynowicz <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected] [ This backport removes all the lines from upstream commit related to the command queue UAPI, as it is not present in the 6.12 kernel and should not be backported. ] Signed-off-by: Jacek Lawrynowicz <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent a39b5b6 commit 3e3062b

File tree

6 files changed

+77
-48
lines changed

6 files changed

+77
-48
lines changed

drivers/accel/ivpu/ivpu_drv.c

+6-26
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,6 @@
3636
__stringify(DRM_IVPU_DRIVER_MINOR) "."
3737
#endif
3838

39-
static struct lock_class_key submitted_jobs_xa_lock_class_key;
40-
4139
int ivpu_dbg_mask;
4240
module_param_named(dbg_mask, ivpu_dbg_mask, int, 0644);
4341
MODULE_PARM_DESC(dbg_mask, "Driver debug mask. See IVPU_DBG_* macros.");
@@ -455,26 +453,6 @@ static const struct drm_driver driver = {
455453
.minor = DRM_IVPU_DRIVER_MINOR,
456454
};
457455

458-
static void ivpu_context_abort_invalid(struct ivpu_device *vdev)
459-
{
460-
struct ivpu_file_priv *file_priv;
461-
unsigned long ctx_id;
462-
463-
mutex_lock(&vdev->context_list_lock);
464-
465-
xa_for_each(&vdev->context_xa, ctx_id, file_priv) {
466-
if (!file_priv->has_mmu_faults || file_priv->aborted)
467-
continue;
468-
469-
mutex_lock(&file_priv->lock);
470-
ivpu_context_abort_locked(file_priv);
471-
file_priv->aborted = true;
472-
mutex_unlock(&file_priv->lock);
473-
}
474-
475-
mutex_unlock(&vdev->context_list_lock);
476-
}
477-
478456
static irqreturn_t ivpu_irq_thread_handler(int irq, void *arg)
479457
{
480458
struct ivpu_device *vdev = arg;
@@ -488,9 +466,6 @@ static irqreturn_t ivpu_irq_thread_handler(int irq, void *arg)
488466
case IVPU_HW_IRQ_SRC_IPC:
489467
ivpu_ipc_irq_thread_handler(vdev);
490468
break;
491-
case IVPU_HW_IRQ_SRC_MMU_EVTQ:
492-
ivpu_context_abort_invalid(vdev);
493-
break;
494469
case IVPU_HW_IRQ_SRC_DCT:
495470
ivpu_pm_dct_irq_thread_handler(vdev);
496471
break;
@@ -607,16 +582,21 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
607582
xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
608583
xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1);
609584
xa_init_flags(&vdev->db_xa, XA_FLAGS_ALLOC1);
610-
lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key);
611585
INIT_LIST_HEAD(&vdev->bo_list);
612586

613587
vdev->db_limit.min = IVPU_MIN_DB;
614588
vdev->db_limit.max = IVPU_MAX_DB;
615589

590+
INIT_WORK(&vdev->context_abort_work, ivpu_context_abort_thread_handler);
591+
616592
ret = drmm_mutex_init(&vdev->drm, &vdev->context_list_lock);
617593
if (ret)
618594
goto err_xa_destroy;
619595

596+
ret = drmm_mutex_init(&vdev->drm, &vdev->submitted_jobs_lock);
597+
if (ret)
598+
goto err_xa_destroy;
599+
620600
ret = drmm_mutex_init(&vdev->drm, &vdev->bo_list_lock);
621601
if (ret)
622602
goto err_xa_destroy;

drivers/accel/ivpu/ivpu_drv.h

+2
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ struct ivpu_device {
137137
struct mutex context_list_lock; /* Protects user context addition/removal */
138138
struct xarray context_xa;
139139
struct xa_limit context_xa_limit;
140+
struct work_struct context_abort_work;
140141

141142
struct xarray db_xa;
142143
struct xa_limit db_limit;
@@ -145,6 +146,7 @@ struct ivpu_device {
145146
struct mutex bo_list_lock; /* Protects bo_list */
146147
struct list_head bo_list;
147148

149+
struct mutex submitted_jobs_lock; /* Protects submitted_jobs */
148150
struct xarray submitted_jobs_xa;
149151
struct ivpu_ipc_consumer job_done_consumer;
150152

drivers/accel/ivpu/ivpu_job.c

+64-18
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,8 @@ void ivpu_context_abort_locked(struct ivpu_file_priv *file_priv)
335335

336336
if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_OS)
337337
ivpu_jsm_context_release(vdev, file_priv->ctx.id);
338+
339+
file_priv->aborted = true;
338340
}
339341

340342
static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job)
@@ -467,23 +469,23 @@ static struct ivpu_job *ivpu_job_remove_from_submitted_jobs(struct ivpu_device *
467469
{
468470
struct ivpu_job *job;
469471

470-
xa_lock(&vdev->submitted_jobs_xa);
471-
job = __xa_erase(&vdev->submitted_jobs_xa, job_id);
472+
lockdep_assert_held(&vdev->submitted_jobs_lock);
472473

474+
job = xa_erase(&vdev->submitted_jobs_xa, job_id);
473475
if (xa_empty(&vdev->submitted_jobs_xa) && job) {
474476
vdev->busy_time = ktime_add(ktime_sub(ktime_get(), vdev->busy_start_ts),
475477
vdev->busy_time);
476478
}
477479

478-
xa_unlock(&vdev->submitted_jobs_xa);
479-
480480
return job;
481481
}
482482

483483
static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 job_status)
484484
{
485485
struct ivpu_job *job;
486486

487+
lockdep_assert_held(&vdev->submitted_jobs_lock);
488+
487489
job = ivpu_job_remove_from_submitted_jobs(vdev, job_id);
488490
if (!job)
489491
return -ENOENT;
@@ -501,6 +503,10 @@ static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32
501503
ivpu_stop_job_timeout_detection(vdev);
502504

503505
ivpu_rpm_put(vdev);
506+
507+
if (!xa_empty(&vdev->submitted_jobs_xa))
508+
ivpu_start_job_timeout_detection(vdev);
509+
504510
return 0;
505511
}
506512

@@ -509,8 +515,12 @@ void ivpu_jobs_abort_all(struct ivpu_device *vdev)
509515
struct ivpu_job *job;
510516
unsigned long id;
511517

518+
mutex_lock(&vdev->submitted_jobs_lock);
519+
512520
xa_for_each(&vdev->submitted_jobs_xa, id, job)
513521
ivpu_job_signal_and_destroy(vdev, id, DRM_IVPU_JOB_STATUS_ABORTED);
522+
523+
mutex_unlock(&vdev->submitted_jobs_lock);
514524
}
515525

516526
static int ivpu_job_submit(struct ivpu_job *job, u8 priority)
@@ -535,15 +545,16 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority)
535545
goto err_unlock_file_priv;
536546
}
537547

538-
xa_lock(&vdev->submitted_jobs_xa);
548+
mutex_lock(&vdev->submitted_jobs_lock);
549+
539550
is_first_job = xa_empty(&vdev->submitted_jobs_xa);
540-
ret = __xa_alloc_cyclic(&vdev->submitted_jobs_xa, &job->job_id, job, file_priv->job_limit,
541-
&file_priv->job_id_next, GFP_KERNEL);
551+
ret = xa_alloc_cyclic(&vdev->submitted_jobs_xa, &job->job_id, job, file_priv->job_limit,
552+
&file_priv->job_id_next, GFP_KERNEL);
542553
if (ret < 0) {
543554
ivpu_dbg(vdev, JOB, "Too many active jobs in ctx %d\n",
544555
file_priv->ctx.id);
545556
ret = -EBUSY;
546-
goto err_unlock_submitted_jobs_xa;
557+
goto err_unlock_submitted_jobs;
547558
}
548559

549560
ret = ivpu_cmdq_push_job(cmdq, job);
@@ -565,19 +576,21 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority)
565576
job->job_id, file_priv->ctx.id, job->engine_idx, priority,
566577
job->cmd_buf_vpu_addr, cmdq->jobq->header.tail);
567578

568-
xa_unlock(&vdev->submitted_jobs_xa);
569-
579+
mutex_unlock(&vdev->submitted_jobs_lock);
570580
mutex_unlock(&file_priv->lock);
571581

572-
if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW))
582+
if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW)) {
583+
mutex_lock(&vdev->submitted_jobs_lock);
573584
ivpu_job_signal_and_destroy(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS);
585+
mutex_unlock(&vdev->submitted_jobs_lock);
586+
}
574587

575588
return 0;
576589

577590
err_erase_xa:
578-
__xa_erase(&vdev->submitted_jobs_xa, job->job_id);
579-
err_unlock_submitted_jobs_xa:
580-
xa_unlock(&vdev->submitted_jobs_xa);
591+
xa_erase(&vdev->submitted_jobs_xa, job->job_id);
592+
err_unlock_submitted_jobs:
593+
mutex_unlock(&vdev->submitted_jobs_lock);
581594
err_unlock_file_priv:
582595
mutex_unlock(&file_priv->lock);
583596
ivpu_rpm_put(vdev);
@@ -748,7 +761,6 @@ ivpu_job_done_callback(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr,
748761
struct vpu_jsm_msg *jsm_msg)
749762
{
750763
struct vpu_ipc_msg_payload_job_done *payload;
751-
int ret;
752764

753765
if (!jsm_msg) {
754766
ivpu_err(vdev, "IPC message has no JSM payload\n");
@@ -761,9 +773,10 @@ ivpu_job_done_callback(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr,
761773
}
762774

763775
payload = (struct vpu_ipc_msg_payload_job_done *)&jsm_msg->payload;
764-
ret = ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status);
765-
if (!ret && !xa_empty(&vdev->submitted_jobs_xa))
766-
ivpu_start_job_timeout_detection(vdev);
776+
777+
mutex_lock(&vdev->submitted_jobs_lock);
778+
ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status);
779+
mutex_unlock(&vdev->submitted_jobs_lock);
767780
}
768781

769782
void ivpu_job_done_consumer_init(struct ivpu_device *vdev)
@@ -776,3 +789,36 @@ void ivpu_job_done_consumer_fini(struct ivpu_device *vdev)
776789
{
777790
ivpu_ipc_consumer_del(vdev, &vdev->job_done_consumer);
778791
}
792+
793+
void ivpu_context_abort_thread_handler(struct work_struct *work)
794+
{
795+
struct ivpu_device *vdev = container_of(work, struct ivpu_device, context_abort_work);
796+
struct ivpu_file_priv *file_priv;
797+
unsigned long ctx_id;
798+
struct ivpu_job *job;
799+
unsigned long id;
800+
801+
mutex_lock(&vdev->context_list_lock);
802+
xa_for_each(&vdev->context_xa, ctx_id, file_priv) {
803+
if (!file_priv->has_mmu_faults || file_priv->aborted)
804+
continue;
805+
806+
mutex_lock(&file_priv->lock);
807+
ivpu_context_abort_locked(file_priv);
808+
mutex_unlock(&file_priv->lock);
809+
}
810+
mutex_unlock(&vdev->context_list_lock);
811+
812+
if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW)
813+
return;
814+
/*
815+
* In hardware scheduling mode NPU already has stopped processing jobs
816+
* and won't send us any further notifications, thus we have to free job related resources
817+
* and notify userspace
818+
*/
819+
mutex_lock(&vdev->submitted_jobs_lock);
820+
xa_for_each(&vdev->submitted_jobs_xa, id, job)
821+
if (job->file_priv->aborted)
822+
ivpu_job_signal_and_destroy(vdev, job->job_id, DRM_IVPU_JOB_STATUS_ABORTED);
823+
mutex_unlock(&vdev->submitted_jobs_lock);
824+
}

drivers/accel/ivpu/ivpu_job.h

+1
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev);
6464

6565
void ivpu_job_done_consumer_init(struct ivpu_device *vdev);
6666
void ivpu_job_done_consumer_fini(struct ivpu_device *vdev);
67+
void ivpu_context_abort_thread_handler(struct work_struct *work);
6768

6869
void ivpu_jobs_abort_all(struct ivpu_device *vdev);
6970

drivers/accel/ivpu/ivpu_mmu.c

+1-2
Original file line numberDiff line numberDiff line change
@@ -917,8 +917,7 @@ void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev)
917917
REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, vdev->mmu->evtq.cons);
918918
}
919919

920-
if (!kfifo_put(&vdev->hw->irq.fifo, IVPU_HW_IRQ_SRC_MMU_EVTQ))
921-
ivpu_err_ratelimited(vdev, "IRQ FIFO full\n");
920+
queue_work(system_wq, &vdev->context_abort_work);
922921
}
923922

924923
void ivpu_mmu_evtq_dump(struct ivpu_device *vdev)

drivers/accel/ivpu/ivpu_sysfs.c

+3-2
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,12 @@ npu_busy_time_us_show(struct device *dev, struct device_attribute *attr, char *b
3030
struct ivpu_device *vdev = to_ivpu_device(drm);
3131
ktime_t total, now = 0;
3232

33-
xa_lock(&vdev->submitted_jobs_xa);
33+
mutex_lock(&vdev->submitted_jobs_lock);
34+
3435
total = vdev->busy_time;
3536
if (!xa_empty(&vdev->submitted_jobs_xa))
3637
now = ktime_sub(ktime_get(), vdev->busy_start_ts);
37-
xa_unlock(&vdev->submitted_jobs_xa);
38+
mutex_unlock(&vdev->submitted_jobs_lock);
3839

3940
return sysfs_emit(buf, "%lld\n", ktime_to_us(ktime_add(total, now)));
4041
}

0 commit comments

Comments
 (0)