Fix deduplication of overridden blocks

Implementation of DDT pruning introduced verification of DVAs in
a block pointer during ddt_lookup() to not by mistake free previous
pruned incarnation of the entry.  But when writing a new block in
zio_ddt_write() we might have the DVAs only from override pointer,
which may never have "D" flag to be confused with pruned DDT entry,
and we'll abandon those DVAs if we find a matching entry in DDT.

This fixes deduplication for blocks written via dmu_sync() for
purposes of indirect ZIL write records, that I have tested.  And
I suspect it might actually allow deduplication for Direct I/O,
even though in an odd way -- first write block directly and then
delete it later during TXG commit if found duplicate, which part
I haven't tested.

Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by:	Alexander Motin <mav@FreeBSD.org>
Sponsored by:	iXsystems, Inc.
Closes #17120
This commit is contained in:
Alexander Motin 2025-03-13 13:27:57 -04:00 committed by GitHub
parent 09f4dd06c3
commit 0ea44e576b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 19 additions and 11 deletions

View File

@ -5864,7 +5864,7 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
* Find the block. This will create the entry in memory, but
* we'll know if that happened by its refcount.
*/
ddt_entry_t *dde = ddt_lookup(ddt, bp);
ddt_entry_t *dde = ddt_lookup(ddt, bp, B_TRUE);
/*
* ddt_lookup() can return NULL if this block didn't exist

View File

@ -378,7 +378,8 @@ extern void ddt_enter(ddt_t *ddt);
extern void ddt_exit(ddt_t *ddt);
extern void ddt_init(void);
extern void ddt_fini(void);
extern ddt_entry_t *ddt_lookup(ddt_t *ddt, const blkptr_t *bp);
extern ddt_entry_t *ddt_lookup(ddt_t *ddt, const blkptr_t *bp,
boolean_t verify);
extern void ddt_remove(ddt_t *ddt, ddt_entry_t *dde);
extern void ddt_prefetch(spa_t *spa, const blkptr_t *bp);
extern void ddt_prefetch_all(spa_t *spa);

View File

@ -1106,7 +1106,7 @@ ddt_entry_lookup_is_valid(ddt_t *ddt, const blkptr_t *bp, ddt_entry_t *dde)
}
ddt_entry_t *
ddt_lookup(ddt_t *ddt, const blkptr_t *bp)
ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t verify)
{
spa_t *spa = ddt->ddt_spa;
ddt_key_t search;
@ -1141,7 +1141,7 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp)
/* If it's already loaded, we can just return it. */
DDT_KSTAT_BUMP(ddt, dds_lookup_live_hit);
if (dde->dde_flags & DDE_FLAG_LOADED) {
if (ddt_entry_lookup_is_valid(ddt, bp, dde))
if (!verify || ddt_entry_lookup_is_valid(ddt, bp, dde))
return (dde);
return (NULL);
}
@ -1165,7 +1165,7 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp)
DDT_KSTAT_BUMP(ddt, dds_lookup_existing);
/* Make sure the loaded entry matches the BP */
if (ddt_entry_lookup_is_valid(ddt, bp, dde))
if (!verify || ddt_entry_lookup_is_valid(ddt, bp, dde))
return (dde);
return (NULL);
} else
@ -1194,7 +1194,8 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp)
memcpy(dde->dde_phys, &ddlwe.ddlwe_phys,
DDT_PHYS_SIZE(ddt));
/* Whatever we found isn't valid for this BP, eject */
if (!ddt_entry_lookup_is_valid(ddt, bp, dde)) {
if (verify &&
!ddt_entry_lookup_is_valid(ddt, bp, dde)) {
avl_remove(&ddt->ddt_tree, dde);
ddt_free(ddt, dde);
return (NULL);
@ -1276,7 +1277,7 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp)
* worth the effort to deal with without taking an entry
* update.
*/
valid = ddt_entry_lookup_is_valid(ddt, bp, dde);
valid = !verify || ddt_entry_lookup_is_valid(ddt, bp, dde);
if (!valid && dde->dde_waiters == 0) {
avl_remove(&ddt->ddt_tree, dde);
ddt_free(ddt, dde);
@ -2469,7 +2470,7 @@ ddt_addref(spa_t *spa, const blkptr_t *bp)
ddt = ddt_select(spa, bp);
ddt_enter(ddt);
dde = ddt_lookup(ddt, bp);
dde = ddt_lookup(ddt, bp, B_TRUE);
/* Can be NULL if the entry for this block was pruned. */
if (dde == NULL) {
@ -2551,7 +2552,7 @@ prune_candidates_sync(void *arg, dmu_tx_t *tx)
ddt_bp_create(ddt->ddt_checksum, &dpe->dpe_key,
dpe->dpe_phys, DDT_PHYS_FLAT, &blk);
ddt_entry_t *dde = ddt_lookup(ddt, &blk);
ddt_entry_t *dde = ddt_lookup(ddt, &blk, B_TRUE);
if (dde != NULL && !(dde->dde_flags & DDE_FLAG_LOGGED)) {
ASSERT(dde->dde_flags & DDE_FLAG_LOADED);
/*

View File

@ -3717,7 +3717,13 @@ zio_ddt_write(zio_t *zio)
ASSERT3B(zio->io_prop.zp_direct_write, ==, B_FALSE);
ddt_enter(ddt);
dde = ddt_lookup(ddt, bp);
/*
* Search DDT for matching entry. Skip DVAs verification here, since
* they can go only from override, and once we get here the override
* pointer can't have "D" flag to be confused with pruned DDT entries.
*/
IMPLY(zio->io_bp_override, !BP_GET_DEDUP(zio->io_bp_override));
dde = ddt_lookup(ddt, bp, B_FALSE);
if (dde == NULL) {
/* DDT size is over its quota so no new entries */
zp->zp_dedup = B_FALSE;
@ -3993,7 +3999,7 @@ zio_ddt_free(zio_t *zio)
ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
ddt_enter(ddt);
freedde = dde = ddt_lookup(ddt, bp);
freedde = dde = ddt_lookup(ddt, bp, B_TRUE);
if (dde) {
ddt_phys_variant_t v = ddt_phys_select(ddt, dde, bp);
if (v != DDT_PHYS_NONE)