Skip to content

Commit 82b057c

Browse files
rakeshpanditChristoph Hellwig
authored and
Christoph Hellwig
committed
nvme-pci: fix multiple ctrl removal scheduling
Commit c5f6ce9 tries to address multiple resets but fails as work_busy doesn't involve any synchronization and can fail. This is reproducible easily as can be seen by WARNING below which is triggered with line: WARN_ON(dev->ctrl.state == NVME_CTRL_RESETTING) Allowing multiple resets can result in multiple controller removal as well if different conditions inside nvme_reset_work fail and which might deadlock on device_release_driver. [ 480.327007] WARNING: CPU: 3 PID: 150 at drivers/nvme/host/pci.c:1900 nvme_reset_work+0x36c/0xec0 [ 480.327008] Modules linked in: rfcomm fuse nf_conntrack_netbios_ns nf_conntrack_broadcast... [ 480.327044] btusb videobuf2_core ghash_clmulni_intel snd_hwdep cfg80211 acer_wmi hci_uart.. [ 480.327065] CPU: 3 PID: 150 Comm: kworker/u16:2 Not tainted 4.12.0-rc1+ #13 [ 480.327065] Hardware name: Acer Predator G9-591/Mustang_SLS, BIOS V1.10 03/03/2016 [ 480.327066] Workqueue: nvme nvme_reset_work [ 480.327067] task: ffff880498ad8000 task.stack: ffffc90002218000 [ 480.327068] RIP: 0010:nvme_reset_work+0x36c/0xec0 [ 480.327069] RSP: 0018:ffffc9000221bdb8 EFLAGS: 00010246 [ 480.327070] RAX: 0000000000460000 RBX: ffff880498a98128 RCX: dead000000000200 [ 480.327070] RDX: 0000000000000001 RSI: ffff8804b1028020 RDI: ffff880498a98128 [ 480.327071] RBP: ffffc9000221be50 R08: 0000000000000000 R09: 0000000000000000 [ 480.327071] R10: ffffc90001963ce8 R11: 000000000000020d R12: ffff880498a98000 [ 480.327072] R13: ffff880498a53500 R14: ffff880498a98130 R15: ffff880498a98128 [ 480.327072] FS: 0000000000000000(0000) GS:ffff8804c1cc0000(0000) knlGS:0000000000000000 [ 480.327073] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 480.327074] CR2: 00007ffcf3c37f78 CR3: 0000000001e09000 CR4: 00000000003406e0 [ 480.327074] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 480.327075] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 480.327075] Call Trace: [ 480.327079] ? __switch_to+0x227/0x400 [ 480.327081] process_one_work+0x18c/0x3a0 [ 480.327082] worker_thread+0x4e/0x3b0 [ 480.327084] kthread+0x109/0x140 [ 480.327085] ? process_one_work+0x3a0/0x3a0 [ 480.327087] ? kthread_park+0x60/0x60 [ 480.327102] ret_from_fork+0x2c/0x40 [ 480.327103] Code: e8 5a dc ff ff 85 c0 41 89 c1 0f..... This patch addresses the problem by using state of controller to decide whether reset should be queued or not as state change is synchronizated using controller spinlock. Also cancel_work_sync is used to make sure remove cancels the reset_work and waits for it to finish. This patch also changes return value from -ENODEV to more appropriate -EBUSY if nvme_reset fails to change state. Fixes: c5f6ce9 ("nvme: don't schedule multiple resets") Signed-off-by: Rakesh Pandit <[email protected]> Reviewed-by: Sagi Grimberg <[email protected]> Signed-off-by: Christoph Hellwig <[email protected]>
1 parent 82654b6 commit 82b057c

File tree

1 file changed

+6
-7
lines changed

1 file changed

+6
-7
lines changed

drivers/nvme/host/pci.c

+6-7
Original file line numberDiff line numberDiff line change
@@ -1367,7 +1367,7 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
13671367
bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
13681368

13691369
/* If there is a reset ongoing, we shouldn't reset again. */
1370-
if (work_busy(&dev->reset_work))
1370+
if (dev->ctrl.state == NVME_CTRL_RESETTING)
13711371
return false;
13721372

13731373
/* We shouldn't reset unless the controller is on fatal error state
@@ -1903,7 +1903,7 @@ static void nvme_reset_work(struct work_struct *work)
19031903
bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
19041904
int result = -ENODEV;
19051905

1906-
if (WARN_ON(dev->ctrl.state == NVME_CTRL_RESETTING))
1906+
if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING))
19071907
goto out;
19081908

19091909
/*
@@ -1913,9 +1913,6 @@ static void nvme_reset_work(struct work_struct *work)
19131913
if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
19141914
nvme_dev_disable(dev, false);
19151915

1916-
if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING))
1917-
goto out;
1918-
19191916
result = nvme_pci_enable(dev);
19201917
if (result)
19211918
goto out;
@@ -2009,8 +2006,8 @@ static int nvme_reset(struct nvme_dev *dev)
20092006
{
20102007
if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q))
20112008
return -ENODEV;
2012-
if (work_busy(&dev->reset_work))
2013-
return -ENODEV;
2009+
if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING))
2010+
return -EBUSY;
20142011
if (!queue_work(nvme_workq, &dev->reset_work))
20152012
return -EBUSY;
20162013
return 0;
@@ -2136,6 +2133,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
21362133
if (result)
21372134
goto release_pools;
21382135

2136+
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING);
21392137
dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
21402138

21412139
queue_work(nvme_workq, &dev->reset_work);
@@ -2179,6 +2177,7 @@ static void nvme_remove(struct pci_dev *pdev)
21792177

21802178
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
21812179

2180+
cancel_work_sync(&dev->reset_work);
21822181
pci_set_drvdata(pdev, NULL);
21832182

21842183
if (!pci_device_is_present(pdev)) {

0 commit comments

Comments
 (0)