mirror of
https://git.proxmox.com/git/mirror_zfs
synced 2025-04-28 11:40:17 +00:00
arc: avoid possible deadlock in arc_read
In l2arc_evict(), the config lock may be acquired in reverse order (e.g., first the config lock (writer), then a hash lock) unlike in arc_read() during scenarios like L2ARC device removal. To avoid deadlocks, if the attempt to acquire the config lock (reader) fails in arc_read(), release the hash lock, wait for the config lock, and retry from the beginning. Reviewed-by: Alexander Motin <mav@FreeBSD.org> Signed-off-by: Ameer Hamza <ahamza@ixsystems.com> Closes #17071
This commit is contained in:
parent
701093c44f
commit
ab3db6d15d
@ -9043,7 +9043,7 @@ zdb_read_block(char *thing, spa_t *spa)
|
||||
const blkptr_t *b = (const blkptr_t *)(void *)
|
||||
((uintptr_t)buf + (uintptr_t)blkptr_offset);
|
||||
if (zfs_blkptr_verify(spa, b,
|
||||
BLK_CONFIG_NEEDED, BLK_VERIFY_ONLY) == B_FALSE) {
|
||||
BLK_CONFIG_NEEDED, BLK_VERIFY_ONLY)) {
|
||||
abd_return_buf_copy(pabd, buf, lsize);
|
||||
borrowed = B_FALSE;
|
||||
buf = lbuf;
|
||||
@ -9052,7 +9052,7 @@ zdb_read_block(char *thing, spa_t *spa)
|
||||
b = (const blkptr_t *)(void *)
|
||||
((uintptr_t)buf + (uintptr_t)blkptr_offset);
|
||||
if (lsize == -1 || zfs_blkptr_verify(spa, b,
|
||||
BLK_CONFIG_NEEDED, BLK_VERIFY_LOG) == B_FALSE) {
|
||||
BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) {
|
||||
printf("invalid block pointer at this DVA\n");
|
||||
goto out;
|
||||
}
|
||||
|
@ -545,6 +545,7 @@ enum blk_verify_flag {
|
||||
enum blk_config_flag {
|
||||
BLK_CONFIG_HELD, // SCL_VDEV held for writer
|
||||
BLK_CONFIG_NEEDED, // SCL_VDEV should be obtained for reader
|
||||
BLK_CONFIG_NEEDED_TRY, // Try with SCL_VDEV for reader
|
||||
BLK_CONFIG_SKIP, // skip checks which require SCL_VDEV
|
||||
};
|
||||
|
||||
@ -662,7 +663,7 @@ extern void zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t);
|
||||
extern int zio_resume(spa_t *spa);
|
||||
extern void zio_resume_wait(spa_t *spa);
|
||||
|
||||
extern boolean_t zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
|
||||
extern int zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
|
||||
enum blk_config_flag blk_config, enum blk_verify_flag blk_verify);
|
||||
|
||||
/*
|
||||
|
@ -5683,6 +5683,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
|
||||
boolean_t no_buf = *arc_flags & ARC_FLAG_NO_BUF;
|
||||
arc_buf_t *buf = NULL;
|
||||
int rc = 0;
|
||||
boolean_t bp_validation = B_FALSE;
|
||||
|
||||
ASSERT(!embedded_bp ||
|
||||
BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA);
|
||||
@ -5725,7 +5726,7 @@ top:
|
||||
* should always be the case since the blkptr is protected by
|
||||
* a checksum.
|
||||
*/
|
||||
if (!zfs_blkptr_verify(spa, bp, BLK_CONFIG_SKIP,
|
||||
if (zfs_blkptr_verify(spa, bp, BLK_CONFIG_SKIP,
|
||||
BLK_VERIFY_LOG)) {
|
||||
mutex_exit(hash_lock);
|
||||
rc = SET_ERROR(ECKSUM);
|
||||
@ -5877,6 +5878,8 @@ top:
|
||||
abd_t *hdr_abd;
|
||||
int alloc_flags = encrypted_read ? ARC_HDR_ALLOC_RDATA : 0;
|
||||
arc_buf_contents_t type = BP_GET_BUFC_TYPE(bp);
|
||||
int config_lock;
|
||||
int error;
|
||||
|
||||
if (*arc_flags & ARC_FLAG_CACHED_ONLY) {
|
||||
if (hash_lock != NULL)
|
||||
@ -5885,16 +5888,31 @@ top:
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (zio_flags & ZIO_FLAG_CONFIG_WRITER) {
|
||||
config_lock = BLK_CONFIG_HELD;
|
||||
} else if (hash_lock != NULL) {
|
||||
/*
|
||||
* Prevent lock order reversal
|
||||
*/
|
||||
config_lock = BLK_CONFIG_NEEDED_TRY;
|
||||
} else {
|
||||
config_lock = BLK_CONFIG_NEEDED;
|
||||
}
|
||||
|
||||
/*
|
||||
* Verify the block pointer contents are reasonable. This
|
||||
* should always be the case since the blkptr is protected by
|
||||
* a checksum.
|
||||
*/
|
||||
if (!zfs_blkptr_verify(spa, bp,
|
||||
(zio_flags & ZIO_FLAG_CONFIG_WRITER) ?
|
||||
BLK_CONFIG_HELD : BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) {
|
||||
if (!bp_validation && (error = zfs_blkptr_verify(spa, bp,
|
||||
config_lock, BLK_VERIFY_LOG))) {
|
||||
if (hash_lock != NULL)
|
||||
mutex_exit(hash_lock);
|
||||
if (error == EBUSY && !zfs_blkptr_verify(spa, bp,
|
||||
BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) {
|
||||
bp_validation = B_TRUE;
|
||||
goto top;
|
||||
}
|
||||
rc = SET_ERROR(ECKSUM);
|
||||
goto done;
|
||||
}
|
||||
|
@ -2305,7 +2305,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
|
||||
DMU_USERUSED_OBJECT, tx);
|
||||
}
|
||||
arc_buf_destroy(buf, &buf);
|
||||
} else if (!zfs_blkptr_verify(spa, bp,
|
||||
} else if (zfs_blkptr_verify(spa, bp,
|
||||
BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) {
|
||||
/*
|
||||
* Sanity check the block pointer contents, this is handled
|
||||
|
@ -2778,7 +2778,7 @@ spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
||||
* When damaged consider it to be a metadata error since we cannot
|
||||
* trust the BP_GET_TYPE and BP_GET_LEVEL values.
|
||||
*/
|
||||
if (!zfs_blkptr_verify(spa, bp, BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) {
|
||||
if (zfs_blkptr_verify(spa, bp, BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) {
|
||||
atomic_inc_64(&sle->sle_meta_count);
|
||||
return (0);
|
||||
}
|
||||
|
@ -1164,7 +1164,7 @@ zfs_blkptr_verify_log(spa_t *spa, const blkptr_t *bp,
|
||||
* it only contains known object types, checksum/compression identifiers,
|
||||
* block sizes within the maximum allowed limits, valid DVAs, etc.
|
||||
*
|
||||
* If everything checks out B_TRUE is returned. The zfs_blkptr_verify
|
||||
* If everything checks out 0 is returned. The zfs_blkptr_verify
|
||||
* argument controls the behavior when an invalid field is detected.
|
||||
*
|
||||
* Values for blk_verify_flag:
|
||||
@ -1179,7 +1179,7 @@ zfs_blkptr_verify_log(spa_t *spa, const blkptr_t *bp,
|
||||
* BLK_CONFIG_SKIP: skip checks which require SCL_VDEV, for better
|
||||
* performance
|
||||
*/
|
||||
boolean_t
|
||||
int
|
||||
zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
|
||||
enum blk_config_flag blk_config, enum blk_verify_flag blk_verify)
|
||||
{
|
||||
@ -1211,7 +1211,7 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
|
||||
"blkptr at %px has invalid PSIZE %llu",
|
||||
bp, (longlong_t)BPE_GET_PSIZE(bp));
|
||||
}
|
||||
return (errors == 0);
|
||||
return (errors ? ECKSUM : 0);
|
||||
}
|
||||
if (unlikely(BP_GET_CHECKSUM(bp) >= ZIO_CHECKSUM_FUNCTIONS)) {
|
||||
errors += zfs_blkptr_verify_log(spa, bp, blk_verify,
|
||||
@ -1229,7 +1229,7 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
|
||||
* will be done once the zio is executed in vdev_mirror_map_alloc.
|
||||
*/
|
||||
if (unlikely(!spa->spa_trust_config))
|
||||
return (errors == 0);
|
||||
return (errors ? ECKSUM : 0);
|
||||
|
||||
switch (blk_config) {
|
||||
case BLK_CONFIG_HELD:
|
||||
@ -1238,8 +1238,12 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
|
||||
case BLK_CONFIG_NEEDED:
|
||||
spa_config_enter(spa, SCL_VDEV, bp, RW_READER);
|
||||
break;
|
||||
case BLK_CONFIG_NEEDED_TRY:
|
||||
if (!spa_config_tryenter(spa, SCL_VDEV, bp, RW_READER))
|
||||
return (EBUSY);
|
||||
break;
|
||||
case BLK_CONFIG_SKIP:
|
||||
return (errors == 0);
|
||||
return (errors ? ECKSUM : 0);
|
||||
default:
|
||||
panic("invalid blk_config %u", blk_config);
|
||||
}
|
||||
@ -1294,10 +1298,11 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
|
||||
bp, i, (longlong_t)offset);
|
||||
}
|
||||
}
|
||||
if (blk_config == BLK_CONFIG_NEEDED)
|
||||
if (blk_config == BLK_CONFIG_NEEDED || blk_config ==
|
||||
BLK_CONFIG_NEEDED_TRY)
|
||||
spa_config_exit(spa, SCL_VDEV, bp);
|
||||
|
||||
return (errors == 0);
|
||||
return (errors ? ECKSUM : 0);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
|
Loading…
Reference in New Issue
Block a user