From: Keith Busch <[email protected]>
To: <[email protected]>, <[email protected]>, <[email protected]>,
<[email protected]>, <[email protected]>,
<[email protected]>
Cc: <[email protected]>, <[email protected]>,
Keith Busch <[email protected]>, Keith Busch <[email protected]>
Subject: [PATCHv11 09/10] nvme: register fdp queue limits
Date: Thu, 5 Dec 2024 17:53:07 -0800 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
From: Keith Busch <[email protected]>
Register the device data placement limits if supported. This is just
registering the limits with the block layer. Nothing beyond reporting
these attributes is happening in this patch.
Signed-off-by: Keith Busch <[email protected]>
---
drivers/nvme/host/core.c | 116 +++++++++++++++++++++++++++++++++++++++
drivers/nvme/host/nvme.h | 4 ++
include/linux/nvme.h | 73 ++++++++++++++++++++++++
3 files changed, 193 insertions(+)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 36c44be98e38c..410a77de92f88 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -38,6 +38,8 @@ struct nvme_ns_info {
u32 nsid;
__le32 anagrpid;
u8 pi_offset;
+ u16 endgid;
+ u64 runs;
bool is_shared;
bool is_readonly;
bool is_ready;
@@ -1613,6 +1615,7 @@ static int nvme_ns_info_from_identify(struct nvme_ctrl *ctrl,
info->is_shared = id->nmic & NVME_NS_NMIC_SHARED;
info->is_readonly = id->nsattr & NVME_NS_ATTR_RO;
info->is_ready = true;
+ info->endgid = le16_to_cpu(id->endgid);
if (ctrl->quirks & NVME_QUIRK_BOGUS_NID) {
dev_info(ctrl->device,
"Ignoring bogus Namespace Identifiers\n");
@@ -1653,6 +1656,7 @@ static int nvme_ns_info_from_id_cs_indep(struct nvme_ctrl *ctrl,
info->is_ready = id->nstat & NVME_NSTAT_NRDY;
info->is_rotational = id->nsfeat & NVME_NS_ROTATIONAL;
info->no_vwc = id->nsfeat & NVME_NS_VWC_NOT_PRESENT;
+ info->endgid = le16_to_cpu(id->endgid);
}
kfree(id);
return ret;
@@ -2147,6 +2151,101 @@ static int nvme_update_ns_info_generic(struct nvme_ns *ns,
return ret;
}
+static int nvme_check_fdp(struct nvme_ns *ns, struct nvme_ns_info *info,
+ u8 fdp_idx)
+{
+ struct nvme_fdp_config_log hdr, *h;
+ size_t size = sizeof(hdr);
+ int i, n, ret;
+ void *log;
+
+ info->runs = 0;
+ ret = nvme_get_log_lsi(ns->ctrl, 0, NVME_LOG_FDP_CONFIG, 0, NVME_CSI_NVM,
+ (void *)&hdr, size, 0, info->endgid);
+ if (ret)
+ return ret;
+
+ size = le32_to_cpu(hdr.sze);
+ log = kzalloc(size, GFP_KERNEL);
+ if (!log)
+ return 0;
+
+ ret = nvme_get_log_lsi(ns->ctrl, 0, NVME_LOG_FDP_CONFIG, 0, NVME_CSI_NVM,
+ log, size, 0, info->endgid);
+ if (ret)
+ goto out;
+
+ n = le16_to_cpu(h->numfdpc) + 1;
+ if (fdp_idx > n)
+ goto out;
+
+ h = log;
+ log = h->configs;
+ for (i = 0; i < n; i++) {
+ struct nvme_fdp_config_desc *config = log;
+
+ if (i == fdp_idx) {
+ info->runs = le64_to_cpu(config->runs);
+ break;
+ }
+ log += le16_to_cpu(config->size);
+ }
+out:
+ kfree(h);
+ return ret;
+}
+
+static int nvme_query_fdp_info(struct nvme_ns *ns, struct nvme_ns_info *info)
+{
+ struct nvme_ns_head *head = ns->head;
+ struct nvme_fdp_ruh_status *ruhs;
+ struct nvme_command c = {};
+ u32 fdp, fdp_idx;
+ int size, ret;
+
+ ret = nvme_get_features(ns->ctrl, NVME_FEAT_FDP, info->endgid, NULL, 0,
+ &fdp);
+ if (ret)
+ goto err;
+
+ if (!(fdp & NVME_FDP_FDPE))
+ goto err;
+
+ fdp_idx = (fdp >> NVME_FDP_FDPCIDX_SHIFT) & NVME_FDP_FDPCIDX_MASK;
+ ret = nvme_check_fdp(ns, info, fdp_idx);
+ if (ret || !info->runs)
+ goto err;
+
+ size = struct_size(ruhs, ruhsd, NVME_MAX_PLIDS);
+ ruhs = kzalloc(size, GFP_KERNEL);
+ if (!ruhs) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ c.imr.opcode = nvme_cmd_io_mgmt_recv;
+ c.imr.nsid = cpu_to_le32(head->ns_id);
+ c.imr.mo = NVME_IO_MGMT_RECV_MO_RUHS;
+ c.imr.numd = cpu_to_le32(nvme_bytes_to_numd(size));
+ ret = nvme_submit_sync_cmd(ns->queue, &c, ruhs, size);
+ if (ret)
+ goto free;
+
+ head->nr_plids = le16_to_cpu(ruhs->nruhsd);
+ if (!head->nr_plids)
+ goto free;
+
+ kfree(ruhs);
+ return 0;
+
+free:
+ kfree(ruhs);
+err:
+ head->nr_plids = 0;
+ info->runs = 0;
+ return ret;
+}
+
static int nvme_update_ns_info_block(struct nvme_ns *ns,
struct nvme_ns_info *info)
{
@@ -2183,6 +2282,15 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
goto out;
}
+ if (ns->ctrl->ctratt & NVME_CTRL_ATTR_FDPS) {
+ ret = nvme_query_fdp_info(ns, info);
+ if (ret)
+ dev_warn(ns->ctrl->device,
+ "FDP failure status:0x%x\n", ret);
+ if (ret < 0)
+ goto out;
+ }
+
blk_mq_freeze_queue(ns->disk->queue);
ns->head->lba_shift = id->lbaf[lbaf].ds;
ns->head->nuse = le64_to_cpu(id->nuse);
@@ -2216,6 +2324,12 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
if (!nvme_init_integrity(ns->head, &lim, info))
capacity = 0;
+ lim.max_write_streams = ns->head->nr_plids;
+ if (lim.max_write_streams)
+ lim.write_stream_granularity = info->runs;
+ else
+ lim.write_stream_granularity = 0;
+
ret = queue_limits_commit_update(ns->disk->queue, &lim);
if (ret) {
blk_mq_unfreeze_queue(ns->disk->queue);
@@ -2318,6 +2432,8 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
ns->head->disk->flags |= GENHD_FL_HIDDEN;
else
nvme_init_integrity(ns->head, &lim, info);
+ lim.max_write_streams = ns_lim->max_write_streams;
+ lim.write_stream_granularity = ns_lim->write_stream_granularity;
ret = queue_limits_commit_update(ns->head->disk->queue, &lim);
set_capacity_and_notify(ns->head->disk, get_capacity(ns->disk));
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 611b02c8a8b37..5c8bdaa2c8824 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -454,6 +454,8 @@ struct nvme_ns_ids {
u8 csi;
};
+#define NVME_MAX_PLIDS (S8_MAX - 1)
+
/*
* Anchor structure for namespaces. There is one for each namespace in a
* NVMe subsystem that any of our controllers can see, and the namespace
@@ -491,6 +493,8 @@ struct nvme_ns_head {
struct device cdev_device;
struct gendisk *disk;
+
+ u16 nr_plids;
#ifdef CONFIG_NVME_MULTIPATH
struct bio_list requeue_list;
spinlock_t requeue_lock;
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 13377dde4527b..78657a8e39561 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -275,6 +275,7 @@ enum nvme_ctrl_attr {
NVME_CTRL_ATTR_HID_128_BIT = (1 << 0),
NVME_CTRL_ATTR_TBKAS = (1 << 6),
NVME_CTRL_ATTR_ELBAS = (1 << 15),
+ NVME_CTRL_ATTR_FDPS = (1 << 19),
};
struct nvme_id_ctrl {
@@ -761,6 +762,34 @@ struct nvme_zone_report {
struct nvme_zone_descriptor entries[];
};
+struct nvme_fdp_ruh_desc {
+ __u8 ruht;
+ __u8 rsvd1[3];
+};
+
+struct nvme_fdp_config_desc {
+ __le16 size;
+ __u8 fdpa;
+ __u8 vss;
+ __le32 nrg;
+ __le16 nruh;
+ __le16 maxpids;
+ __le32 nnss;
+ __le64 runs;
+ __le32 erutl;
+ __u8 rsvd28[36];
+ struct nvme_fdp_ruh_desc ruhs[];
+};
+
+struct nvme_fdp_config_log {
+ __le16 numfdpc;
+ __u8 ver;
+ __u8 rsvd3;
+ __le32 sze;
+ __u8 rsvd8[8];
+ struct nvme_fdp_config_desc configs[];
+};
+
enum {
NVME_SMART_CRIT_SPARE = 1 << 0,
NVME_SMART_CRIT_TEMPERATURE = 1 << 1,
@@ -887,6 +916,7 @@ enum nvme_opcode {
nvme_cmd_resv_register = 0x0d,
nvme_cmd_resv_report = 0x0e,
nvme_cmd_resv_acquire = 0x11,
+ nvme_cmd_io_mgmt_recv = 0x12,
nvme_cmd_resv_release = 0x15,
nvme_cmd_zone_mgmt_send = 0x79,
nvme_cmd_zone_mgmt_recv = 0x7a,
@@ -908,6 +938,7 @@ enum nvme_opcode {
nvme_opcode_name(nvme_cmd_resv_register), \
nvme_opcode_name(nvme_cmd_resv_report), \
nvme_opcode_name(nvme_cmd_resv_acquire), \
+ nvme_opcode_name(nvme_cmd_io_mgmt_recv), \
nvme_opcode_name(nvme_cmd_resv_release), \
nvme_opcode_name(nvme_cmd_zone_mgmt_send), \
nvme_opcode_name(nvme_cmd_zone_mgmt_recv), \
@@ -1059,6 +1090,7 @@ enum {
NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12,
NVME_RW_PRINFO_PRACT = 1 << 13,
NVME_RW_DTYPE_STREAMS = 1 << 4,
+ NVME_RW_DTYPE_DPLCMT = 2 << 4,
NVME_WZ_DEAC = 1 << 9,
};
@@ -1146,6 +1178,38 @@ struct nvme_zone_mgmt_recv_cmd {
__le32 cdw14[2];
};
+struct nvme_io_mgmt_recv_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __le64 rsvd2[2];
+ union nvme_data_ptr dptr;
+ __u8 mo;
+ __u8 rsvd11;
+ __u16 mos;
+ __le32 numd;
+ __le32 cdw12[4];
+};
+
+enum {
+ NVME_IO_MGMT_RECV_MO_RUHS = 1,
+};
+
+struct nvme_fdp_ruh_status_desc {
+ u16 pid;
+ u16 ruhid;
+ u32 earutr;
+ u64 ruamw;
+ u8 rsvd16[16];
+};
+
+struct nvme_fdp_ruh_status {
+ u8 rsvd0[14];
+ __le16 nruhsd;
+ struct nvme_fdp_ruh_status_desc ruhsd[];
+};
+
enum {
NVME_ZRA_ZONE_REPORT = 0,
NVME_ZRASF_ZONE_REPORT_ALL = 0,
@@ -1281,6 +1345,7 @@ enum {
NVME_FEAT_PLM_WINDOW = 0x14,
NVME_FEAT_HOST_BEHAVIOR = 0x16,
NVME_FEAT_SANITIZE = 0x17,
+ NVME_FEAT_FDP = 0x1d,
NVME_FEAT_SW_PROGRESS = 0x80,
NVME_FEAT_HOST_ID = 0x81,
NVME_FEAT_RESV_MASK = 0x82,
@@ -1301,6 +1366,7 @@ enum {
NVME_LOG_ANA = 0x0c,
NVME_LOG_FEATURES = 0x12,
NVME_LOG_RMI = 0x16,
+ NVME_LOG_FDP_CONFIG = 0x20,
NVME_LOG_DISC = 0x70,
NVME_LOG_RESERVATION = 0x80,
NVME_FWACT_REPL = (0 << 3),
@@ -1326,6 +1392,12 @@ enum {
NVME_FIS_CSCPE = 1 << 21,
};
+enum {
+ NVME_FDP_FDPE = 1 << 0,
+ NVME_FDP_FDPCIDX_SHIFT = 8,
+ NVME_FDP_FDPCIDX_MASK = 0xff,
+};
+
/* NVMe Namespace Write Protect State */
enum {
NVME_NS_NO_WRITE_PROTECT = 0,
@@ -1888,6 +1960,7 @@ struct nvme_command {
struct nvmf_auth_receive_command auth_receive;
struct nvme_dbbuf dbbuf;
struct nvme_directive_cmd directive;
+ struct nvme_io_mgmt_recv_cmd imr;
};
};
--
2.43.5
next prev parent reply other threads:[~2024-12-06 1:53 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-12-06 1:52 [PATCHv11 00/10] block write streams with nvme fdp Keith Busch
2024-12-06 1:52 ` [PATCHv11 01/10] fs: add a write stream field to the kiocb Keith Busch
2024-12-06 1:53 ` [PATCHv11 02/10] io_uring: protection information enhancements Keith Busch
[not found] ` <CGME20241206095739epcas5p1ee968cb92c9d4ceb25a79ad80521601f@epcas5p1.samsung.com>
2024-12-06 9:49 ` Anuj Gupta
2024-12-06 1:53 ` [PATCHv11 03/10] io_uring: add write stream attribute Keith Busch
[not found] ` <CGME20241206100326epcas5p17d4dad663ccc6c6f40cfab98437e63f3@epcas5p1.samsung.com>
2024-12-06 9:55 ` Anuj Gupta
2024-12-06 12:44 ` Kanchan Joshi
2024-12-06 16:53 ` Keith Busch
2024-12-06 1:53 ` [PATCHv11 04/10] block: add a bi_write_stream field Keith Busch
2024-12-06 1:53 ` [PATCHv11 05/10] block: introduce max_write_streams queue limit Keith Busch
2024-12-06 1:53 ` [PATCHv11 06/10] block: introduce a write_stream_granularity " Keith Busch
2024-12-06 1:53 ` [PATCHv11 07/10] block: expose write streams for block device nodes Keith Busch
[not found] ` <CGME20241206091949epcas5p14a01e4cfe614ddd04e23b84f8f1036d5@epcas5p1.samsung.com>
2024-12-06 9:11 ` Nitesh Shetty
2024-12-06 1:53 ` [PATCHv11 08/10] nvme: add a nvme_get_log_lsi helper Keith Busch
2024-12-06 1:53 ` Keith Busch [this message]
2024-12-06 5:26 ` [PATCHv11 09/10] nvme: register fdp queue limits kernel test robot
2024-12-06 1:53 ` [PATCHv11 10/10] nvme: use fdp streams if write stream is provided Keith Busch
2024-12-06 13:18 ` kernel test robot
2024-12-06 2:18 ` [PATCHv11 00/10] block write streams with nvme fdp Keith Busch
2024-12-09 12:51 ` Christoph Hellwig
2024-12-09 15:57 ` Keith Busch
2024-12-09 17:14 ` [EXT] " Pierre Labat
2024-12-09 17:25 ` Keith Busch
2024-12-09 17:35 ` Pierre Labat
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox