mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-08-27 15:50:48 +00:00
nvme-multipath: Add visibility for round-robin io-policy
This patch helps add nvme native multipath visibility for round-robin io-policy. It creates a "multipath" sysfs directory under head gendisk device node directory and then from "multipath" directory it adds a link to each namespace path device the head node refers. For instance, if we have a shared namespace accessible from two different controllers/paths then we create a soft link to each path device from head disk node as shown below: $ ls -l /sys/block/nvme1n1/multipath/ nvme1c1n1 -> ../../../../../pci052e:78/052e:78:00.0/nvme/nvme1/nvme1c1n1 nvme1c3n1 -> ../../../../../pci058e:78/058e:78:00.0/nvme/nvme3/nvme1c3n1 In the above example, nvme1n1 is head gendisk node created for a shared namespace and the namespace is accessible from nvme1c1n1 and nvme1c3n1 paths. For round-robin I/O policy, we could easily infer from the above output that I/O workload targeted to nvme1n1 would toggle across paths nvme1c1n1 and nvme1c3n1. Reviewed-by: Hannes Reinecke <hare@suse.de> Signed-off-by: Nilay Shroff <nilay@linux.ibm.com> Signed-off-by: Keith Busch <kbusch@kernel.org>
This commit is contained in:
parent
316dabe608
commit
4dbd2b2ebe
@ -4020,6 +4020,9 @@ static void nvme_ns_remove(struct nvme_ns *ns)
|
|||||||
|
|
||||||
if (!nvme_ns_head_multipath(ns->head))
|
if (!nvme_ns_head_multipath(ns->head))
|
||||||
nvme_cdev_del(&ns->cdev, &ns->cdev_device);
|
nvme_cdev_del(&ns->cdev, &ns->cdev_device);
|
||||||
|
|
||||||
|
nvme_mpath_remove_sysfs_link(ns);
|
||||||
|
|
||||||
del_gendisk(ns->disk);
|
del_gendisk(ns->disk);
|
||||||
|
|
||||||
mutex_lock(&ns->ctrl->namespaces_lock);
|
mutex_lock(&ns->ctrl->namespaces_lock);
|
||||||
|
@ -686,6 +686,8 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
|
|||||||
kblockd_schedule_work(&head->partition_scan_work);
|
kblockd_schedule_work(&head->partition_scan_work);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
nvme_mpath_add_sysfs_link(ns->head);
|
||||||
|
|
||||||
mutex_lock(&head->lock);
|
mutex_lock(&head->lock);
|
||||||
if (nvme_path_is_optimized(ns)) {
|
if (nvme_path_is_optimized(ns)) {
|
||||||
int node, srcu_idx;
|
int node, srcu_idx;
|
||||||
@ -768,6 +770,25 @@ static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc,
|
|||||||
if (nvme_state_is_live(ns->ana_state) &&
|
if (nvme_state_is_live(ns->ana_state) &&
|
||||||
nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE)
|
nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE)
|
||||||
nvme_mpath_set_live(ns);
|
nvme_mpath_set_live(ns);
|
||||||
|
else {
|
||||||
|
/*
|
||||||
|
* Add sysfs link from multipath head gendisk node to path
|
||||||
|
* device gendisk node.
|
||||||
|
* If path's ana state is live (i.e. state is either optimized
|
||||||
|
* or non-optimized) while we alloc the ns then sysfs link would
|
||||||
|
* be created from nvme_mpath_set_live(). In that case we would
|
||||||
|
* not fallthrough this code path. However for the path's ana
|
||||||
|
* state other than live, we call nvme_mpath_set_live() only
|
||||||
|
* after ana state transitioned to the live state. But we still
|
||||||
|
* want to create the sysfs link from head node to a path device
|
||||||
|
* irrespctive of the path's ana state.
|
||||||
|
* If we reach through here then it means that path's ana state
|
||||||
|
* is not live but still create the sysfs link to this path from
|
||||||
|
* head node if head node of the path has already come alive.
|
||||||
|
*/
|
||||||
|
if (test_bit(NVME_NSHEAD_DISK_LIVE, &ns->head->flags))
|
||||||
|
nvme_mpath_add_sysfs_link(ns->head);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
|
static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
|
||||||
@ -967,6 +988,84 @@ static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl,
|
|||||||
return -ENXIO; /* just break out of the loop */
|
return -ENXIO; /* just break out of the loop */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void nvme_mpath_add_sysfs_link(struct nvme_ns_head *head)
|
||||||
|
{
|
||||||
|
struct device *target;
|
||||||
|
int rc, srcu_idx;
|
||||||
|
struct nvme_ns *ns;
|
||||||
|
struct kobject *kobj;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Ensure head disk node is already added otherwise we may get invalid
|
||||||
|
* kobj for head disk node
|
||||||
|
*/
|
||||||
|
if (!test_bit(GD_ADDED, &head->disk->state))
|
||||||
|
return;
|
||||||
|
|
||||||
|
kobj = &disk_to_dev(head->disk)->kobj;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* loop through each ns chained through the head->list and create the
|
||||||
|
* sysfs link from head node to the ns path node
|
||||||
|
*/
|
||||||
|
srcu_idx = srcu_read_lock(&head->srcu);
|
||||||
|
|
||||||
|
list_for_each_entry_rcu(ns, &head->list, siblings) {
|
||||||
|
/*
|
||||||
|
* Avoid creating link if it already exists for the given path.
|
||||||
|
* When path ana state transitions from optimized to non-
|
||||||
|
* optimized or vice-versa, the nvme_mpath_set_live() is
|
||||||
|
* invoked which in truns call this function. Now if the sysfs
|
||||||
|
* link already exists for the given path and we attempt to re-
|
||||||
|
* create the link then sysfs code would warn about it loudly.
|
||||||
|
* So we evaluate NVME_NS_SYSFS_ATTR_LINK flag here to ensure
|
||||||
|
* that we're not creating duplicate link.
|
||||||
|
* The test_and_set_bit() is used because it is protecting
|
||||||
|
* against multiple nvme paths being simultaneously added.
|
||||||
|
*/
|
||||||
|
if (test_and_set_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Ensure that ns path disk node is already added otherwise we
|
||||||
|
* may get invalid kobj name for target
|
||||||
|
*/
|
||||||
|
if (!test_bit(GD_ADDED, &ns->disk->state))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
target = disk_to_dev(ns->disk);
|
||||||
|
/*
|
||||||
|
* Create sysfs link from head gendisk kobject @kobj to the
|
||||||
|
* ns path gendisk kobject @target->kobj.
|
||||||
|
*/
|
||||||
|
rc = sysfs_add_link_to_group(kobj, nvme_ns_mpath_attr_group.name,
|
||||||
|
&target->kobj, dev_name(target));
|
||||||
|
if (unlikely(rc)) {
|
||||||
|
dev_err(disk_to_dev(ns->head->disk),
|
||||||
|
"failed to create link to %s\n",
|
||||||
|
dev_name(target));
|
||||||
|
clear_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
srcu_read_unlock(&head->srcu, srcu_idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
void nvme_mpath_remove_sysfs_link(struct nvme_ns *ns)
|
||||||
|
{
|
||||||
|
struct device *target;
|
||||||
|
struct kobject *kobj;
|
||||||
|
|
||||||
|
if (!test_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags))
|
||||||
|
return;
|
||||||
|
|
||||||
|
target = disk_to_dev(ns->disk);
|
||||||
|
kobj = &disk_to_dev(ns->head->disk)->kobj;
|
||||||
|
sysfs_remove_link_from_group(kobj, nvme_ns_mpath_attr_group.name,
|
||||||
|
dev_name(target));
|
||||||
|
clear_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags);
|
||||||
|
}
|
||||||
|
|
||||||
void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid)
|
void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid)
|
||||||
{
|
{
|
||||||
if (nvme_ctrl_use_ana(ns->ctrl)) {
|
if (nvme_ctrl_use_ana(ns->ctrl)) {
|
||||||
|
@ -538,6 +538,7 @@ struct nvme_ns {
|
|||||||
#define NVME_NS_ANA_PENDING 2
|
#define NVME_NS_ANA_PENDING 2
|
||||||
#define NVME_NS_FORCE_RO 3
|
#define NVME_NS_FORCE_RO 3
|
||||||
#define NVME_NS_READY 4
|
#define NVME_NS_READY 4
|
||||||
|
#define NVME_NS_SYSFS_ATTR_LINK 5
|
||||||
|
|
||||||
struct cdev cdev;
|
struct cdev cdev;
|
||||||
struct device cdev_device;
|
struct device cdev_device;
|
||||||
@ -933,6 +934,7 @@ int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo);
|
|||||||
int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
|
int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
|
||||||
|
|
||||||
extern const struct attribute_group *nvme_ns_attr_groups[];
|
extern const struct attribute_group *nvme_ns_attr_groups[];
|
||||||
|
extern const struct attribute_group nvme_ns_mpath_attr_group;
|
||||||
extern const struct pr_ops nvme_pr_ops;
|
extern const struct pr_ops nvme_pr_ops;
|
||||||
extern const struct block_device_operations nvme_ns_head_ops;
|
extern const struct block_device_operations nvme_ns_head_ops;
|
||||||
extern const struct attribute_group nvme_dev_attrs_group;
|
extern const struct attribute_group nvme_dev_attrs_group;
|
||||||
@ -955,6 +957,8 @@ void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys);
|
|||||||
void nvme_failover_req(struct request *req);
|
void nvme_failover_req(struct request *req);
|
||||||
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
|
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
|
||||||
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
|
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
|
||||||
|
void nvme_mpath_add_sysfs_link(struct nvme_ns_head *ns);
|
||||||
|
void nvme_mpath_remove_sysfs_link(struct nvme_ns *ns);
|
||||||
void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid);
|
void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid);
|
||||||
void nvme_mpath_remove_disk(struct nvme_ns_head *head);
|
void nvme_mpath_remove_disk(struct nvme_ns_head *head);
|
||||||
int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
|
int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
|
||||||
@ -1009,6 +1013,12 @@ static inline void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid)
|
|||||||
static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)
|
static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
static inline void nvme_mpath_add_sysfs_link(struct nvme_ns *ns)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
static inline void nvme_mpath_remove_sysfs_link(struct nvme_ns *ns)
|
||||||
|
{
|
||||||
|
}
|
||||||
static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns)
|
static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns)
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
|
@ -299,8 +299,22 @@ static const struct attribute_group nvme_ns_attr_group = {
|
|||||||
.is_visible = nvme_ns_attrs_are_visible,
|
.is_visible = nvme_ns_attrs_are_visible,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_NVME_MULTIPATH
|
||||||
|
static struct attribute *nvme_ns_mpath_attrs[] = {
|
||||||
|
NULL,
|
||||||
|
};
|
||||||
|
|
||||||
|
const struct attribute_group nvme_ns_mpath_attr_group = {
|
||||||
|
.name = "multipath",
|
||||||
|
.attrs = nvme_ns_mpath_attrs,
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
const struct attribute_group *nvme_ns_attr_groups[] = {
|
const struct attribute_group *nvme_ns_attr_groups[] = {
|
||||||
&nvme_ns_attr_group,
|
&nvme_ns_attr_group,
|
||||||
|
#ifdef CONFIG_NVME_MULTIPATH
|
||||||
|
&nvme_ns_mpath_attr_group,
|
||||||
|
#endif
|
||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user