mirror of
https://git.proxmox.com/git/mirror_zfs
synced 2025-04-28 11:40:17 +00:00
zed: Ensure spare activation after kernel-initiated device removal
In addition to hotplug events, the kernel may also mark a failing vdev as REMOVED. This was observed in a customer report and reproduced by forcing the NVMe host driver to disable the device after a failed reset due to command timeout. In such cases, the spare was not activated because the device had already transitioned to a REMOVED state before zed processed the event. To address this, explicitly attempt hot spare activation when the kernel marks a device as REMOVED. Reviewed-by: Alexander Motin <mav@FreeBSD.org> Reviewed-by: Tony Hutter <hutter2@llnl.gov> Signed-off-by: Ameer Hamza <ahamza@ixsystems.com> Closes #17187
This commit is contained in:
parent
dd2a46b5e6
commit
30cc2331f4
@ -404,6 +404,7 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
|
|||||||
(state == VDEV_STATE_REMOVED || state == VDEV_STATE_FAULTED))) {
|
(state == VDEV_STATE_REMOVED || state == VDEV_STATE_FAULTED))) {
|
||||||
const char *devtype;
|
const char *devtype;
|
||||||
char *devname;
|
char *devname;
|
||||||
|
boolean_t skip_removal = B_FALSE;
|
||||||
|
|
||||||
if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE,
|
if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE,
|
||||||
&devtype) == 0) {
|
&devtype) == 0) {
|
||||||
@ -441,18 +442,28 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
|
|||||||
nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS,
|
nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS,
|
||||||
(uint64_t **)&vs, &c);
|
(uint64_t **)&vs, &c);
|
||||||
|
|
||||||
|
if (vs->vs_state == VDEV_STATE_OFFLINE)
|
||||||
|
return;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If state removed is requested for already removed vdev,
|
* If state removed is requested for already removed vdev,
|
||||||
* its a loopback event from spa_async_remove(). Just
|
* its a loopback event from spa_async_remove(). Just
|
||||||
* ignore it.
|
* ignore it.
|
||||||
*/
|
*/
|
||||||
if ((vs->vs_state == VDEV_STATE_REMOVED && state ==
|
if ((vs->vs_state == VDEV_STATE_REMOVED &&
|
||||||
VDEV_STATE_REMOVED) || vs->vs_state == VDEV_STATE_OFFLINE)
|
state == VDEV_STATE_REMOVED)) {
|
||||||
return;
|
if (strcmp(class, "resource.fs.zfs.removed") == 0 &&
|
||||||
|
nvlist_exists(nvl, "by_kernel")) {
|
||||||
|
skip_removal = B_TRUE;
|
||||||
|
} else {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Remove the vdev since device is unplugged */
|
/* Remove the vdev since device is unplugged */
|
||||||
int remove_status = 0;
|
int remove_status = 0;
|
||||||
if (l2arc || (strcmp(class, "resource.fs.zfs.removed") == 0)) {
|
if (!skip_removal && (l2arc ||
|
||||||
|
(strcmp(class, "resource.fs.zfs.removed") == 0))) {
|
||||||
remove_status = zpool_vdev_remove_wanted(zhp, devname);
|
remove_status = zpool_vdev_remove_wanted(zhp, devname);
|
||||||
fmd_hdl_debug(hdl, "zpool_vdev_remove_wanted '%s'"
|
fmd_hdl_debug(hdl, "zpool_vdev_remove_wanted '%s'"
|
||||||
", err:%d", devname, libzfs_errno(zhdl));
|
", err:%d", devname, libzfs_errno(zhdl));
|
||||||
|
@ -784,6 +784,7 @@ extern int bpobj_enqueue_free_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
|
|||||||
#define SPA_ASYNC_L2CACHE_TRIM 0x1000
|
#define SPA_ASYNC_L2CACHE_TRIM 0x1000
|
||||||
#define SPA_ASYNC_REBUILD_DONE 0x2000
|
#define SPA_ASYNC_REBUILD_DONE 0x2000
|
||||||
#define SPA_ASYNC_DETACH_SPARE 0x4000
|
#define SPA_ASYNC_DETACH_SPARE 0x4000
|
||||||
|
#define SPA_ASYNC_REMOVE_BY_USER 0x8000
|
||||||
|
|
||||||
/* device manipulation */
|
/* device manipulation */
|
||||||
extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot, boolean_t ashift_check);
|
extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot, boolean_t ashift_check);
|
||||||
@ -1179,7 +1180,7 @@ extern void zfs_ereport_taskq_fini(void);
|
|||||||
extern void zfs_ereport_clear(spa_t *spa, vdev_t *vd);
|
extern void zfs_ereport_clear(spa_t *spa, vdev_t *vd);
|
||||||
extern nvlist_t *zfs_event_create(spa_t *spa, vdev_t *vd, const char *type,
|
extern nvlist_t *zfs_event_create(spa_t *spa, vdev_t *vd, const char *type,
|
||||||
const char *name, nvlist_t *aux);
|
const char *name, nvlist_t *aux);
|
||||||
extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
|
extern void zfs_post_remove(spa_t *spa, vdev_t *vd, boolean_t by_kernel);
|
||||||
extern void zfs_post_state_change(spa_t *spa, vdev_t *vd, uint64_t laststate);
|
extern void zfs_post_state_change(spa_t *spa, vdev_t *vd, uint64_t laststate);
|
||||||
extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
|
extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
|
||||||
extern uint64_t spa_approx_errlog_size(spa_t *spa);
|
extern uint64_t spa_approx_errlog_size(spa_t *spa);
|
||||||
|
@ -8921,7 +8921,7 @@ spa_scan_range(spa_t *spa, pool_scan_func_t func, uint64_t txgstart,
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
static void
|
static void
|
||||||
spa_async_remove(spa_t *spa, vdev_t *vd)
|
spa_async_remove(spa_t *spa, vdev_t *vd, boolean_t by_kernel)
|
||||||
{
|
{
|
||||||
if (vd->vdev_remove_wanted) {
|
if (vd->vdev_remove_wanted) {
|
||||||
vd->vdev_remove_wanted = B_FALSE;
|
vd->vdev_remove_wanted = B_FALSE;
|
||||||
@ -8941,11 +8941,11 @@ spa_async_remove(spa_t *spa, vdev_t *vd)
|
|||||||
vdev_state_dirty(vd->vdev_top);
|
vdev_state_dirty(vd->vdev_top);
|
||||||
|
|
||||||
/* Tell userspace that the vdev is gone. */
|
/* Tell userspace that the vdev is gone. */
|
||||||
zfs_post_remove(spa, vd);
|
zfs_post_remove(spa, vd, by_kernel);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int c = 0; c < vd->vdev_children; c++)
|
for (int c = 0; c < vd->vdev_children; c++)
|
||||||
spa_async_remove(spa, vd->vdev_child[c]);
|
spa_async_remove(spa, vd->vdev_child[c], by_kernel);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -9039,13 +9039,18 @@ spa_async_thread(void *arg)
|
|||||||
/*
|
/*
|
||||||
* See if any devices need to be marked REMOVED.
|
* See if any devices need to be marked REMOVED.
|
||||||
*/
|
*/
|
||||||
if (tasks & SPA_ASYNC_REMOVE) {
|
if (tasks & (SPA_ASYNC_REMOVE | SPA_ASYNC_REMOVE_BY_USER)) {
|
||||||
|
boolean_t by_kernel = B_TRUE;
|
||||||
|
if (tasks & SPA_ASYNC_REMOVE_BY_USER)
|
||||||
|
by_kernel = B_FALSE;
|
||||||
spa_vdev_state_enter(spa, SCL_NONE);
|
spa_vdev_state_enter(spa, SCL_NONE);
|
||||||
spa_async_remove(spa, spa->spa_root_vdev);
|
spa_async_remove(spa, spa->spa_root_vdev, by_kernel);
|
||||||
for (int i = 0; i < spa->spa_l2cache.sav_count; i++)
|
for (int i = 0; i < spa->spa_l2cache.sav_count; i++)
|
||||||
spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]);
|
spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i],
|
||||||
|
by_kernel);
|
||||||
for (int i = 0; i < spa->spa_spares.sav_count; i++)
|
for (int i = 0; i < spa->spa_spares.sav_count; i++)
|
||||||
spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]);
|
spa_async_remove(spa, spa->spa_spares.sav_vdevs[i],
|
||||||
|
by_kernel);
|
||||||
(void) spa_vdev_state_exit(spa, NULL, 0);
|
(void) spa_vdev_state_exit(spa, NULL, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4271,7 +4271,7 @@ vdev_remove_wanted(spa_t *spa, uint64_t guid)
|
|||||||
return (spa_vdev_state_exit(spa, NULL, SET_ERROR(EEXIST)));
|
return (spa_vdev_state_exit(spa, NULL, SET_ERROR(EEXIST)));
|
||||||
|
|
||||||
vd->vdev_remove_wanted = B_TRUE;
|
vd->vdev_remove_wanted = B_TRUE;
|
||||||
spa_async_request(spa, SPA_ASYNC_REMOVE);
|
spa_async_request(spa, SPA_ASYNC_REMOVE_BY_USER);
|
||||||
|
|
||||||
return (spa_vdev_state_exit(spa, vd, 0));
|
return (spa_vdev_state_exit(spa, vd, 0));
|
||||||
}
|
}
|
||||||
|
@ -1433,9 +1433,23 @@ zfs_post_common(spa_t *spa, vdev_t *vd, const char *type, const char *name,
|
|||||||
* removal.
|
* removal.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
zfs_post_remove(spa_t *spa, vdev_t *vd)
|
zfs_post_remove(spa_t *spa, vdev_t *vd, boolean_t by_kernel)
|
||||||
{
|
{
|
||||||
zfs_post_common(spa, vd, FM_RSRC_CLASS, FM_RESOURCE_REMOVED, NULL);
|
nvlist_t *aux = NULL;
|
||||||
|
|
||||||
|
if (by_kernel) {
|
||||||
|
/*
|
||||||
|
* Add optional supplemental keys to payload
|
||||||
|
*/
|
||||||
|
aux = fm_nvlist_create(NULL);
|
||||||
|
if (aux)
|
||||||
|
fnvlist_add_boolean(aux, "by_kernel");
|
||||||
|
}
|
||||||
|
|
||||||
|
zfs_post_common(spa, vd, FM_RSRC_CLASS, FM_RESOURCE_REMOVED, aux);
|
||||||
|
|
||||||
|
if (by_kernel && aux)
|
||||||
|
fm_nvlist_destroy(aux, FM_NVA_FREE);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
Loading…
Reference in New Issue
Block a user