public inbox for [email protected]
 help / color / mirror / Atom feed
From: Christoph Hellwig <[email protected]>
To: Jens Axboe <[email protected]>
Cc: Christoph Hellwig <[email protected]>,
	Chaitanya Kulkarni <[email protected]>,
	"Michael S. Tsirkin" <[email protected]>,
	Jason Wang <[email protected]>, Keith Busch <[email protected]>,
	Sagi Grimberg <[email protected]>,
	Pavel Begunkov <[email protected]>,
	"[email protected]" <[email protected]>,
	"[email protected]" <[email protected]>,
	"[email protected]" <[email protected]>,
	"[email protected]" <[email protected]>
Subject: Re: don't reorder requests passed to ->queue_rqs
Date: Mon, 18 Nov 2024 16:23:57 +0100	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>

On Thu, Nov 14, 2024 at 08:14:11AM -0700, Jens Axboe wrote:
> > that goes back to the scheme currently used upstream in nvme and virtio
> > that just cuts of the list at a hctx change instead of moving the
> > requests one by one now that the block layer doesn't mess up the order.
> 
> I think that would be useful. I can test.

I played with this a bit, and I really hate keeping prev for the
unlinking.  So this version uses a slightly different approach:

 ‐ a first pass that just rolls through all requests and does the prep
   checks.  If they fail that is recorded in a new field in the iod
   (could also become a request flag)
 - the submission loop inside sq_lock becomes a bit more complex
   as it handles breaking out of the inner loop for hctx changes,
   and to move the previously failed commands to the resubmit list.

But this completely gets rid of the prev tracking and all list
manipulation except for the final list_pop.

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 5f2e3ad2cc52..3470dae73a8c 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -233,6 +233,7 @@ struct nvme_iod {
 	struct nvme_request req;
 	struct nvme_command cmd;
 	bool aborted;
+	bool batch_failed;
 	s8 nr_allocations;	/* PRP list pool allocations. 0 means small
 				   pool in use */
 	unsigned int dma_len;	/* length of single DMA segment mapping */
@@ -843,6 +844,7 @@ static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req)
 	blk_status_t ret;
 
 	iod->aborted = false;
+	iod->batch_failed = false;
 	iod->nr_allocations = -1;
 	iod->sgt.nents = 0;
 
@@ -904,54 +906,51 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 	return BLK_STS_OK;
 }
 
-static void nvme_submit_cmds(struct nvme_queue *nvmeq, struct rq_list *rqlist)
+static void nvme_submit_cmds(struct rq_list *rqlist,
+		struct rq_list *requeue_list)
 {
-	struct request *req;
+	struct request *req = rq_list_peek(rqlist);
+	struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
 
 	spin_lock(&nvmeq->sq_lock);
 	while ((req = rq_list_pop(rqlist))) {
 		struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
 
-		nvme_sq_copy_cmd(nvmeq, &iod->cmd);
+		if (iod->batch_failed)
+			rq_list_add_tail(requeue_list, req);
+		else
+			nvme_sq_copy_cmd(nvmeq, &iod->cmd);
+
+		if (!req->rq_next || req->mq_hctx != req->rq_next->mq_hctx)
+			break;
 	}
 	nvme_write_sq_db(nvmeq, true);
 	spin_unlock(&nvmeq->sq_lock);
 }
 
-static bool nvme_prep_rq_batch(struct nvme_queue *nvmeq, struct request *req)
-{
-	/*
-	 * We should not need to do this, but we're still using this to
-	 * ensure we can drain requests on a dying queue.
-	 */
-	if (unlikely(!test_bit(NVMEQ_ENABLED, &nvmeq->flags)))
-		return false;
-	if (unlikely(!nvme_check_ready(&nvmeq->dev->ctrl, req, true)))
-		return false;
-
-	return nvme_prep_rq(nvmeq->dev, req) == BLK_STS_OK;
-}
-
 static void nvme_queue_rqs(struct rq_list *rqlist)
 {
-	struct rq_list submit_list = { };
 	struct rq_list requeue_list = { };
-	struct nvme_queue *nvmeq = NULL;
 	struct request *req;
 
-	while ((req = rq_list_pop(rqlist))) {
-		if (nvmeq && nvmeq != req->mq_hctx->driver_data)
-			nvme_submit_cmds(nvmeq, &submit_list);
-		nvmeq = req->mq_hctx->driver_data;
+	rq_list_for_each(rqlist, req) {
+		struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
+		struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
 
-		if (nvme_prep_rq_batch(nvmeq, req))
-			rq_list_add_tail(&submit_list, req);
-		else
-			rq_list_add_tail(&requeue_list, req);
+		/*
+		 * We should not need to do this, but we're still using this to
+		 * ensure we can drain requests on a dying queue.
+		 */
+		if (unlikely(!test_bit(NVMEQ_ENABLED, &nvmeq->flags)) ||
+		    unlikely(!nvme_check_ready(&nvmeq->dev->ctrl, req, true)) ||
+		    unlikely(nvme_prep_rq(nvmeq->dev, req) != BLK_STS_OK))
+			iod->batch_failed = true;
 	}
 
-	if (nvmeq)
-		nvme_submit_cmds(nvmeq, &submit_list);
+	do {
+		nvme_submit_cmds(rqlist, &requeue_list);
+	} while (!rq_list_empty(rqlist));
+
 	*rqlist = requeue_list;
 }
 

      reply	other threads:[~2024-11-18 15:24 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-11-13 15:20 don't reorder requests passed to ->queue_rqs Christoph Hellwig
2024-11-13 15:20 ` [PATCH 1/6] nvme-pci: reverse request order in nvme_queue_rqs Christoph Hellwig
2024-11-13 19:10   ` Keith Busch
2024-11-13 15:20 ` [PATCH 2/6] virtio_blk: reverse request order in virtio_queue_rqs Christoph Hellwig
2024-11-13 19:03   ` Keith Busch
2024-11-13 19:05     ` Jens Axboe
2024-11-13 23:25   ` Michael S. Tsirkin
2024-11-13 15:20 ` [PATCH 3/6] block: remove rq_list_move Christoph Hellwig
2024-11-13 15:20 ` [PATCH 4/6] block: add a rq_list type Christoph Hellwig
2024-11-14 20:11   ` Nathan Chancellor
2024-11-15  9:10     ` Christoph Hellwig
2024-11-15 12:49     ` Jens Axboe
2024-11-15 19:38       ` Jens Axboe
2024-11-16  0:56         ` Nathan Chancellor
2024-11-13 15:20 ` [PATCH 5/6] block: don't reorder requests in blk_add_rq_to_plug Christoph Hellwig
2024-11-13 15:20 ` [PATCH 6/6] block: don't reorder requests in blk_mq_add_to_batch Christoph Hellwig
2024-11-13 18:33 ` don't reorder requests passed to ->queue_rqs Bart Van Assche
2024-11-13 18:39   ` Jens Axboe
2024-11-13 18:46 ` Jens Axboe
2024-11-13 20:36 ` Chaitanya Kulkarni
2024-11-13 20:51   ` Jens Axboe
2024-11-13 22:23     ` Chaitanya Kulkarni
2024-11-13 22:27       ` Jens Axboe
2024-11-14  4:16     ` Christoph Hellwig
2024-11-14 15:14       ` Jens Axboe
2024-11-18 15:23         ` Christoph Hellwig [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox