mirror of
https://git.proxmox.com/git/mirror_zfs
synced 2025-04-29 23:03:35 +00:00
arc: avoid possible deadlock in arc_read
In l2arc_evict(), the config lock may be acquired in reverse order (e.g., first the config lock (writer), then a hash lock) unlike in arc_read() during scenarios like L2ARC device removal. To avoid deadlocks, if the attempt to acquire the config lock (reader) fails in arc_read(), release the hash lock, wait for the config lock, and retry from the beginning. Reviewed-by: Alexander Motin <mav@FreeBSD.org> Signed-off-by: Ameer Hamza <ahamza@ixsystems.com> Closes #17071
This commit is contained in:
parent
7e72312eff
commit
637f918211
@ -9043,7 +9043,7 @@ zdb_read_block(char *thing, spa_t *spa)
|
|||||||
const blkptr_t *b = (const blkptr_t *)(void *)
|
const blkptr_t *b = (const blkptr_t *)(void *)
|
||||||
((uintptr_t)buf + (uintptr_t)blkptr_offset);
|
((uintptr_t)buf + (uintptr_t)blkptr_offset);
|
||||||
if (zfs_blkptr_verify(spa, b,
|
if (zfs_blkptr_verify(spa, b,
|
||||||
BLK_CONFIG_NEEDED, BLK_VERIFY_ONLY) == B_FALSE) {
|
BLK_CONFIG_NEEDED, BLK_VERIFY_ONLY)) {
|
||||||
abd_return_buf_copy(pabd, buf, lsize);
|
abd_return_buf_copy(pabd, buf, lsize);
|
||||||
borrowed = B_FALSE;
|
borrowed = B_FALSE;
|
||||||
buf = lbuf;
|
buf = lbuf;
|
||||||
@ -9052,7 +9052,7 @@ zdb_read_block(char *thing, spa_t *spa)
|
|||||||
b = (const blkptr_t *)(void *)
|
b = (const blkptr_t *)(void *)
|
||||||
((uintptr_t)buf + (uintptr_t)blkptr_offset);
|
((uintptr_t)buf + (uintptr_t)blkptr_offset);
|
||||||
if (lsize == -1 || zfs_blkptr_verify(spa, b,
|
if (lsize == -1 || zfs_blkptr_verify(spa, b,
|
||||||
BLK_CONFIG_NEEDED, BLK_VERIFY_LOG) == B_FALSE) {
|
BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) {
|
||||||
printf("invalid block pointer at this DVA\n");
|
printf("invalid block pointer at this DVA\n");
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
@ -545,6 +545,7 @@ enum blk_verify_flag {
|
|||||||
enum blk_config_flag {
|
enum blk_config_flag {
|
||||||
BLK_CONFIG_HELD, // SCL_VDEV held for writer
|
BLK_CONFIG_HELD, // SCL_VDEV held for writer
|
||||||
BLK_CONFIG_NEEDED, // SCL_VDEV should be obtained for reader
|
BLK_CONFIG_NEEDED, // SCL_VDEV should be obtained for reader
|
||||||
|
BLK_CONFIG_NEEDED_TRY, // Try with SCL_VDEV for reader
|
||||||
BLK_CONFIG_SKIP, // skip checks which require SCL_VDEV
|
BLK_CONFIG_SKIP, // skip checks which require SCL_VDEV
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -662,7 +663,7 @@ extern void zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t);
|
|||||||
extern int zio_resume(spa_t *spa);
|
extern int zio_resume(spa_t *spa);
|
||||||
extern void zio_resume_wait(spa_t *spa);
|
extern void zio_resume_wait(spa_t *spa);
|
||||||
|
|
||||||
extern boolean_t zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
|
extern int zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
|
||||||
enum blk_config_flag blk_config, enum blk_verify_flag blk_verify);
|
enum blk_config_flag blk_config, enum blk_verify_flag blk_verify);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -5568,6 +5568,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
|
|||||||
boolean_t no_buf = *arc_flags & ARC_FLAG_NO_BUF;
|
boolean_t no_buf = *arc_flags & ARC_FLAG_NO_BUF;
|
||||||
arc_buf_t *buf = NULL;
|
arc_buf_t *buf = NULL;
|
||||||
int rc = 0;
|
int rc = 0;
|
||||||
|
boolean_t bp_validation = B_FALSE;
|
||||||
|
|
||||||
ASSERT(!embedded_bp ||
|
ASSERT(!embedded_bp ||
|
||||||
BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA);
|
BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA);
|
||||||
@ -5610,7 +5611,7 @@ top:
|
|||||||
* should always be the case since the blkptr is protected by
|
* should always be the case since the blkptr is protected by
|
||||||
* a checksum.
|
* a checksum.
|
||||||
*/
|
*/
|
||||||
if (!zfs_blkptr_verify(spa, bp, BLK_CONFIG_SKIP,
|
if (zfs_blkptr_verify(spa, bp, BLK_CONFIG_SKIP,
|
||||||
BLK_VERIFY_LOG)) {
|
BLK_VERIFY_LOG)) {
|
||||||
mutex_exit(hash_lock);
|
mutex_exit(hash_lock);
|
||||||
rc = SET_ERROR(ECKSUM);
|
rc = SET_ERROR(ECKSUM);
|
||||||
@ -5762,6 +5763,8 @@ top:
|
|||||||
abd_t *hdr_abd;
|
abd_t *hdr_abd;
|
||||||
int alloc_flags = encrypted_read ? ARC_HDR_ALLOC_RDATA : 0;
|
int alloc_flags = encrypted_read ? ARC_HDR_ALLOC_RDATA : 0;
|
||||||
arc_buf_contents_t type = BP_GET_BUFC_TYPE(bp);
|
arc_buf_contents_t type = BP_GET_BUFC_TYPE(bp);
|
||||||
|
int config_lock;
|
||||||
|
int error;
|
||||||
|
|
||||||
if (*arc_flags & ARC_FLAG_CACHED_ONLY) {
|
if (*arc_flags & ARC_FLAG_CACHED_ONLY) {
|
||||||
if (hash_lock != NULL)
|
if (hash_lock != NULL)
|
||||||
@ -5770,16 +5773,31 @@ top:
|
|||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (zio_flags & ZIO_FLAG_CONFIG_WRITER) {
|
||||||
|
config_lock = BLK_CONFIG_HELD;
|
||||||
|
} else if (hash_lock != NULL) {
|
||||||
|
/*
|
||||||
|
* Prevent lock order reversal
|
||||||
|
*/
|
||||||
|
config_lock = BLK_CONFIG_NEEDED_TRY;
|
||||||
|
} else {
|
||||||
|
config_lock = BLK_CONFIG_NEEDED;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Verify the block pointer contents are reasonable. This
|
* Verify the block pointer contents are reasonable. This
|
||||||
* should always be the case since the blkptr is protected by
|
* should always be the case since the blkptr is protected by
|
||||||
* a checksum.
|
* a checksum.
|
||||||
*/
|
*/
|
||||||
if (!zfs_blkptr_verify(spa, bp,
|
if (!bp_validation && (error = zfs_blkptr_verify(spa, bp,
|
||||||
(zio_flags & ZIO_FLAG_CONFIG_WRITER) ?
|
config_lock, BLK_VERIFY_LOG))) {
|
||||||
BLK_CONFIG_HELD : BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) {
|
|
||||||
if (hash_lock != NULL)
|
if (hash_lock != NULL)
|
||||||
mutex_exit(hash_lock);
|
mutex_exit(hash_lock);
|
||||||
|
if (error == EBUSY && !zfs_blkptr_verify(spa, bp,
|
||||||
|
BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) {
|
||||||
|
bp_validation = B_TRUE;
|
||||||
|
goto top;
|
||||||
|
}
|
||||||
rc = SET_ERROR(ECKSUM);
|
rc = SET_ERROR(ECKSUM);
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
|
@ -2305,7 +2305,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
|
|||||||
DMU_USERUSED_OBJECT, tx);
|
DMU_USERUSED_OBJECT, tx);
|
||||||
}
|
}
|
||||||
arc_buf_destroy(buf, &buf);
|
arc_buf_destroy(buf, &buf);
|
||||||
} else if (!zfs_blkptr_verify(spa, bp,
|
} else if (zfs_blkptr_verify(spa, bp,
|
||||||
BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) {
|
BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) {
|
||||||
/*
|
/*
|
||||||
* Sanity check the block pointer contents, this is handled
|
* Sanity check the block pointer contents, this is handled
|
||||||
|
@ -2778,7 +2778,7 @@ spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
|||||||
* When damaged consider it to be a metadata error since we cannot
|
* When damaged consider it to be a metadata error since we cannot
|
||||||
* trust the BP_GET_TYPE and BP_GET_LEVEL values.
|
* trust the BP_GET_TYPE and BP_GET_LEVEL values.
|
||||||
*/
|
*/
|
||||||
if (!zfs_blkptr_verify(spa, bp, BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) {
|
if (zfs_blkptr_verify(spa, bp, BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) {
|
||||||
atomic_inc_64(&sle->sle_meta_count);
|
atomic_inc_64(&sle->sle_meta_count);
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
@ -1164,7 +1164,7 @@ zfs_blkptr_verify_log(spa_t *spa, const blkptr_t *bp,
|
|||||||
* it only contains known object types, checksum/compression identifiers,
|
* it only contains known object types, checksum/compression identifiers,
|
||||||
* block sizes within the maximum allowed limits, valid DVAs, etc.
|
* block sizes within the maximum allowed limits, valid DVAs, etc.
|
||||||
*
|
*
|
||||||
* If everything checks out B_TRUE is returned. The zfs_blkptr_verify
|
* If everything checks out 0 is returned. The zfs_blkptr_verify
|
||||||
* argument controls the behavior when an invalid field is detected.
|
* argument controls the behavior when an invalid field is detected.
|
||||||
*
|
*
|
||||||
* Values for blk_verify_flag:
|
* Values for blk_verify_flag:
|
||||||
@ -1179,7 +1179,7 @@ zfs_blkptr_verify_log(spa_t *spa, const blkptr_t *bp,
|
|||||||
* BLK_CONFIG_SKIP: skip checks which require SCL_VDEV, for better
|
* BLK_CONFIG_SKIP: skip checks which require SCL_VDEV, for better
|
||||||
* performance
|
* performance
|
||||||
*/
|
*/
|
||||||
boolean_t
|
int
|
||||||
zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
|
zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
|
||||||
enum blk_config_flag blk_config, enum blk_verify_flag blk_verify)
|
enum blk_config_flag blk_config, enum blk_verify_flag blk_verify)
|
||||||
{
|
{
|
||||||
@ -1211,7 +1211,7 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
|
|||||||
"blkptr at %px has invalid PSIZE %llu",
|
"blkptr at %px has invalid PSIZE %llu",
|
||||||
bp, (longlong_t)BPE_GET_PSIZE(bp));
|
bp, (longlong_t)BPE_GET_PSIZE(bp));
|
||||||
}
|
}
|
||||||
return (errors == 0);
|
return (errors ? ECKSUM : 0);
|
||||||
}
|
}
|
||||||
if (unlikely(BP_GET_CHECKSUM(bp) >= ZIO_CHECKSUM_FUNCTIONS)) {
|
if (unlikely(BP_GET_CHECKSUM(bp) >= ZIO_CHECKSUM_FUNCTIONS)) {
|
||||||
errors += zfs_blkptr_verify_log(spa, bp, blk_verify,
|
errors += zfs_blkptr_verify_log(spa, bp, blk_verify,
|
||||||
@ -1229,7 +1229,7 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
|
|||||||
* will be done once the zio is executed in vdev_mirror_map_alloc.
|
* will be done once the zio is executed in vdev_mirror_map_alloc.
|
||||||
*/
|
*/
|
||||||
if (unlikely(!spa->spa_trust_config))
|
if (unlikely(!spa->spa_trust_config))
|
||||||
return (errors == 0);
|
return (errors ? ECKSUM : 0);
|
||||||
|
|
||||||
switch (blk_config) {
|
switch (blk_config) {
|
||||||
case BLK_CONFIG_HELD:
|
case BLK_CONFIG_HELD:
|
||||||
@ -1238,8 +1238,12 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
|
|||||||
case BLK_CONFIG_NEEDED:
|
case BLK_CONFIG_NEEDED:
|
||||||
spa_config_enter(spa, SCL_VDEV, bp, RW_READER);
|
spa_config_enter(spa, SCL_VDEV, bp, RW_READER);
|
||||||
break;
|
break;
|
||||||
|
case BLK_CONFIG_NEEDED_TRY:
|
||||||
|
if (!spa_config_tryenter(spa, SCL_VDEV, bp, RW_READER))
|
||||||
|
return (EBUSY);
|
||||||
|
break;
|
||||||
case BLK_CONFIG_SKIP:
|
case BLK_CONFIG_SKIP:
|
||||||
return (errors == 0);
|
return (errors ? ECKSUM : 0);
|
||||||
default:
|
default:
|
||||||
panic("invalid blk_config %u", blk_config);
|
panic("invalid blk_config %u", blk_config);
|
||||||
}
|
}
|
||||||
@ -1294,10 +1298,11 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
|
|||||||
bp, i, (longlong_t)offset);
|
bp, i, (longlong_t)offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (blk_config == BLK_CONFIG_NEEDED)
|
if (blk_config == BLK_CONFIG_NEEDED || blk_config ==
|
||||||
|
BLK_CONFIG_NEEDED_TRY)
|
||||||
spa_config_exit(spa, SCL_VDEV, bp);
|
spa_config_exit(spa, SCL_VDEV, bp);
|
||||||
|
|
||||||
return (errors == 0);
|
return (errors ? ECKSUM : 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean_t
|
boolean_t
|
||||||
|
Loading…
Reference in New Issue
Block a user