Skip to content

Commit d783e0b

Browse files
mrybczynskaaxboe
authored andcommitted
nvme: avoid cqe corruption when update at the same time as read
Make sure the CQE phase (validity) is read before the rest of the structure. The phase bit is the highest address and the CQE read will happen on most platforms from lower to upper addresses and will be done by multiple non-atomic loads. If the structure is updated by PCI during the reads from the processor, the processor may get a corrupted copy. The addition of the new nvme_cqe_valid function that verifies the validity bit also allows refactoring of the other CQE read sequences. Signed-off-by: Marta Rybczynska <[email protected]> Reviewed-by: Johannes Thumshirn <[email protected]> Reviewed-by: Christoph Hellwig <[email protected]> Reviewed-by: Keith Busch <[email protected]> Signed-off-by: Jens Axboe <[email protected]>
1 parent aaf2559 commit d783e0b

File tree

1 file changed

+13
-11
lines changed

1 file changed

+13
-11
lines changed

drivers/nvme/host/pci.c

+13-11
Original file line numberDiff line numberDiff line change
@@ -723,20 +723,24 @@ static void nvme_complete_rq(struct request *req)
723723
blk_mq_end_request(req, error);
724724
}
725725

726+
/* We read the CQE phase first to check if the rest of the entry is valid */
727+
static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head,
728+
u16 phase)
729+
{
730+
return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase;
731+
}
732+
726733
static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
727734
{
728735
u16 head, phase;
729736

730737
head = nvmeq->cq_head;
731738
phase = nvmeq->cq_phase;
732739

733-
for (;;) {
740+
while (nvme_cqe_valid(nvmeq, head, phase)) {
734741
struct nvme_completion cqe = nvmeq->cqes[head];
735-
u16 status = le16_to_cpu(cqe.status);
736742
struct request *req;
737743

738-
if ((status & 1) != phase)
739-
break;
740744
if (++head == nvmeq->q_depth) {
741745
head = 0;
742746
phase = !phase;
@@ -767,7 +771,7 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
767771
req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
768772
if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special)
769773
memcpy(req->special, &cqe, sizeof(cqe));
770-
blk_mq_complete_request(req, status >> 1);
774+
blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1);
771775

772776
}
773777

@@ -808,18 +812,16 @@ static irqreturn_t nvme_irq(int irq, void *data)
808812
static irqreturn_t nvme_irq_check(int irq, void *data)
809813
{
810814
struct nvme_queue *nvmeq = data;
811-
struct nvme_completion cqe = nvmeq->cqes[nvmeq->cq_head];
812-
if ((le16_to_cpu(cqe.status) & 1) != nvmeq->cq_phase)
813-
return IRQ_NONE;
814-
return IRQ_WAKE_THREAD;
815+
if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase))
816+
return IRQ_WAKE_THREAD;
817+
return IRQ_NONE;
815818
}
816819

817820
static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
818821
{
819822
struct nvme_queue *nvmeq = hctx->driver_data;
820823

821-
if ((le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) ==
822-
nvmeq->cq_phase) {
824+
if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
823825
spin_lock_irq(&nvmeq->q_lock);
824826
__nvme_process_cq(nvmeq, &tag);
825827
spin_unlock_irq(&nvmeq->q_lock);

0 commit comments

Comments
 (0)