From 9250403ba68d4c9917acf071e40ea9d0bf2b531c Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Wed, 19 Mar 2025 15:58:29 -0700 Subject: [PATCH] Make ganging redundancy respect redundant_metadata property (#17073) The redundant_metadata setting in ZFS allows users to trade resilience for performance and space savings. This applies to all data and metadata blocks in zfs, with one exception: gang blocks. Gang blocks currently just take the copies property of the IO being ganged and, if it's 1, sets it to 2. This means that we always make at least two copies of a gang header, which is good for resilience. However, if the users care more about performance than resilience, their gang blocks will be even more of a penalty than usual. We add logic to calculate the number of gang headers copies directly, and store it as a separate IO property. This is stored in the IO properties and not calculated when we decide to gang because by that point we may not have easy access to the relevant information about what kind of block is being stored. We also check the redundant_metadata property when doing so, and use that to decide whether to store an extra copy of the gang headers, compared to the underlying blocks. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Signed-off-by: Paul Dagnelie Co-authored-by: Paul Dagnelie Reviewed-by: Alexander Motin Reviewed-by: Tony Hutter --- cmd/zdb/zdb.c | 12 +- include/sys/dbuf.h | 1 + include/sys/zio.h | 3 +- module/zfs/arc.c | 2 + module/zfs/dbuf.c | 4 +- module/zfs/dmu.c | 21 ++- module/zfs/dmu_recv.c | 3 + module/zfs/zio.c | 23 ++-- tests/runfiles/common.run | 4 + tests/zfs-tests/include/tunables.cfg | 1 + tests/zfs-tests/tests/Makefile.am | 4 + .../tests/functional/gang_blocks/cleanup.ksh | 31 +++++ .../functional/gang_blocks/gang_blocks.kshlib | 120 ++++++++++++++++++ .../gang_blocks/gang_blocks_redundant.ksh | 88 +++++++++++++ .../tests/functional/gang_blocks/setup.ksh | 30 +++++ 15 files changed, 327 insertions(+), 20 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/gang_blocks/cleanup.ksh create mode 100644 tests/zfs-tests/tests/functional/gang_blocks/gang_blocks.kshlib create mode 100755 tests/zfs-tests/tests/functional/gang_blocks/gang_blocks_redundant.ksh create mode 100755 tests/zfs-tests/tests/functional/gang_blocks/setup.ksh diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 1ca97d5c1..45eb9c783 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -2545,12 +2545,14 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp, blkbuf[0] = '\0'; - for (i = 0; i < ndvas; i++) + for (i = 0; i < ndvas; i++) { (void) snprintf(blkbuf + strlen(blkbuf), - buflen - strlen(blkbuf), "%llu:%llx:%llx ", + buflen - strlen(blkbuf), "%llu:%llx:%llx%s ", (u_longlong_t)DVA_GET_VDEV(&dva[i]), (u_longlong_t)DVA_GET_OFFSET(&dva[i]), - (u_longlong_t)DVA_GET_ASIZE(&dva[i])); + (u_longlong_t)DVA_GET_ASIZE(&dva[i]), + (DVA_GET_GANG(&dva[i]) ? "G" : "")); + } if (BP_IS_HOLE(bp)) { (void) snprintf(blkbuf + strlen(blkbuf), @@ -8981,7 +8983,7 @@ zdb_read_block(char *thing, spa_t *spa) DVA_SET_VDEV(&dva[0], vd->vdev_id); DVA_SET_OFFSET(&dva[0], offset); - DVA_SET_GANG(&dva[0], !!(flags & ZDB_FLAG_GBH)); + DVA_SET_GANG(&dva[0], 0); DVA_SET_ASIZE(&dva[0], vdev_psize_to_asize(vd, psize)); BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL); @@ -8996,7 +8998,7 @@ zdb_read_block(char *thing, spa_t *spa) BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER); spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); - zio = zio_root(spa, NULL, NULL, 0); + zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); if (vd == vd->vdev_top) { /* diff --git a/include/sys/dbuf.h b/include/sys/dbuf.h index 2e9b7edf8..285e02484 100644 --- a/include/sys/dbuf.h +++ b/include/sys/dbuf.h @@ -174,6 +174,7 @@ typedef struct dbuf_dirty_record { arc_buf_t *dr_data; override_states_t dr_override_state; uint8_t dr_copies; + uint8_t dr_gang_copies; boolean_t dr_nopwrite; boolean_t dr_brtwrite; boolean_t dr_diowrite; diff --git a/include/sys/zio.h b/include/sys/zio.h index af47d6f87..78adca4d7 100644 --- a/include/sys/zio.h +++ b/include/sys/zio.h @@ -350,6 +350,7 @@ typedef struct zio_prop { uint8_t zp_complevel; uint8_t zp_level; uint8_t zp_copies; + uint8_t zp_gang_copies; dmu_object_type_t zp_type; boolean_t zp_dedup; boolean_t zp_dedup_verify; @@ -575,7 +576,7 @@ extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, zio_priority_t priority, zio_flag_t flags, zbookmark_phys_t *zb); extern void zio_write_override(zio_t *zio, blkptr_t *bp, int copies, - boolean_t nopwrite, boolean_t brtwrite); + int gang_copies, boolean_t nopwrite, boolean_t brtwrite); extern void zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp); diff --git a/module/zfs/arc.c b/module/zfs/arc.c index e97d588b4..d07a5f076 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -6887,6 +6887,8 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg, localprop.zp_nopwrite = B_FALSE; localprop.zp_copies = MIN(localprop.zp_copies, SPA_DVAS_PER_BP - 1); + localprop.zp_gang_copies = + MIN(localprop.zp_gang_copies, SPA_DVAS_PER_BP - 1); } zio_flags |= ZIO_FLAG_RAW; } else if (ARC_BUF_COMPRESSED(buf)) { diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 01f92411b..0a243a242 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -5352,8 +5352,8 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx) mutex_enter(&db->db_mtx); dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN; zio_write_override(dr->dr_zio, &dr->dt.dl.dr_overridden_by, - dr->dt.dl.dr_copies, dr->dt.dl.dr_nopwrite, - dr->dt.dl.dr_brtwrite); + dr->dt.dl.dr_copies, dr->dt.dl.dr_gang_copies, + dr->dt.dl.dr_nopwrite, dr->dt.dl.dr_brtwrite); mutex_exit(&db->db_mtx); } else if (data == NULL) { ASSERT(zp.zp_checksum == ZIO_CHECKSUM_OFF || diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index bddb90f29..2b52ae139 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -1916,6 +1916,7 @@ dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg) dr->dt.dl.dr_overridden_by = *zio->io_bp; dr->dt.dl.dr_override_state = DR_OVERRIDDEN; dr->dt.dl.dr_copies = zio->io_prop.zp_copies; + dr->dt.dl.dr_gang_copies = zio->io_prop.zp_gang_copies; /* * Old style holes are filled with all zeros, whereas @@ -2322,6 +2323,7 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp) boolean_t dedup_verify = os->os_dedup_verify; boolean_t encrypt = B_FALSE; int copies = os->os_copies; + int gang_copies = os->os_copies; /* * We maintain different write policies for each of the following @@ -2354,15 +2356,24 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp) switch (os->os_redundant_metadata) { case ZFS_REDUNDANT_METADATA_ALL: copies++; + gang_copies++; break; case ZFS_REDUNDANT_METADATA_MOST: if (level >= zfs_redundant_metadata_most_ditto_level || DMU_OT_IS_METADATA(type) || (wp & WP_SPILL)) copies++; + if (level + 1 >= + zfs_redundant_metadata_most_ditto_level || + DMU_OT_IS_METADATA(type) || (wp & WP_SPILL)) + gang_copies++; break; case ZFS_REDUNDANT_METADATA_SOME: - if (DMU_OT_IS_CRITICAL(type)) + if (DMU_OT_IS_CRITICAL(type)) { copies++; + gang_copies++; + } else if (DMU_OT_IS_METADATA(type)) { + gang_copies++; + } break; case ZFS_REDUNDANT_METADATA_NONE: break; @@ -2436,6 +2447,12 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp) nopwrite = (!dedup && (zio_checksum_table[checksum].ci_flags & ZCHECKSUM_FLAG_NOPWRITE) && compress != ZIO_COMPRESS_OFF && zfs_nopwrite_enabled); + + if (os->os_redundant_metadata == ZFS_REDUNDANT_METADATA_ALL || + (os->os_redundant_metadata == + ZFS_REDUNDANT_METADATA_MOST && + zfs_redundant_metadata_most_ditto_level <= 1)) + gang_copies++; } /* @@ -2452,6 +2469,7 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp) if (DMU_OT_IS_ENCRYPTED(type)) { copies = MIN(copies, SPA_DVAS_PER_BP - 1); + gang_copies = MIN(gang_copies, SPA_DVAS_PER_BP - 1); nopwrite = B_FALSE; } else { dedup = B_FALSE; @@ -2469,6 +2487,7 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp) zp->zp_type = (wp & WP_SPILL) ? dn->dn_bonustype : type; zp->zp_level = level; zp->zp_copies = MIN(copies, spa_max_replication(os->os_spa)); + zp->zp_gang_copies = MIN(gang_copies, spa_max_replication(os->os_spa)); zp->zp_dedup = dedup; zp->zp_dedup_verify = dedup && dedup_verify; zp->zp_nopwrite = nopwrite; diff --git a/module/zfs/dmu_recv.c b/module/zfs/dmu_recv.c index 91e3ca1cf..a636ae73b 100644 --- a/module/zfs/dmu_recv.c +++ b/module/zfs/dmu_recv.c @@ -2300,6 +2300,9 @@ flush_write_batch_impl(struct receive_writer_arg *rwa) zp.zp_nopwrite = B_FALSE; zp.zp_copies = MIN(zp.zp_copies, SPA_DVAS_PER_BP - 1); + zp.zp_gang_copies = + MIN(zp.zp_gang_copies, + SPA_DVAS_PER_BP - 1); } zio_flags |= ZIO_FLAG_RAW; } else if (DRR_WRITE_COMPRESSED(drrw)) { diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 50dbafa09..63f57cf26 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -1415,8 +1415,8 @@ zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, abd_t *data, } void -zio_write_override(zio_t *zio, blkptr_t *bp, int copies, boolean_t nopwrite, - boolean_t brtwrite) +zio_write_override(zio_t *zio, blkptr_t *bp, int copies, int gang_copies, + boolean_t nopwrite, boolean_t brtwrite) { ASSERT(zio->io_type == ZIO_TYPE_WRITE); ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); @@ -1433,6 +1433,7 @@ zio_write_override(zio_t *zio, blkptr_t *bp, int copies, boolean_t nopwrite, zio->io_prop.zp_nopwrite = nopwrite; zio->io_prop.zp_brtwrite = brtwrite; zio->io_prop.zp_copies = copies; + zio->io_prop.zp_gang_copies = gang_copies; zio->io_bp_override = bp; } @@ -3140,15 +3141,13 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc) boolean_t has_data = !(pio->io_flags & ZIO_FLAG_NODATA); /* - * If one copy was requested, store 2 copies of the GBH, so that we - * can still traverse all the data (e.g. to free or scrub) even if a - * block is damaged. Note that we can't store 3 copies of the GBH in - * all cases, e.g. with encryption, which uses DVA[2] for the IV+salt. + * Store multiple copies of the GBH, so that we can still traverse + * all the data (e.g. to free or scrub) even if a block is damaged. + * This value respects the redundant_metadata property. */ - int gbh_copies = copies; - if (gbh_copies == 1) { - gbh_copies = MIN(2, spa_max_replication(spa)); - } + int gbh_copies = gio->io_prop.zp_gang_copies; + ASSERT3S(gbh_copies, >, 0); + ASSERT3S(gbh_copies, <=, SPA_DVAS_PER_BP); ASSERT(ZIO_HAS_ALLOCATOR(pio)); int flags = METASLAB_HINTBP_FAVOR | METASLAB_GANG_HEADER; @@ -3168,6 +3167,7 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc) * since metaslab_class_throttle_reserve() always allows * additional reservations for gang blocks. */ + ASSERT3U(gbh_copies, >=, copies); VERIFY(metaslab_class_throttle_reserve(mc, gbh_copies - copies, pio->io_allocator, pio, flags)); } @@ -3230,6 +3230,7 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc) zp.zp_type = zp.zp_storage_type = DMU_OT_NONE; zp.zp_level = 0; zp.zp_copies = gio->io_prop.zp_copies; + zp.zp_gang_copies = gio->io_prop.zp_gang_copies; zp.zp_dedup = B_FALSE; zp.zp_dedup_verify = B_FALSE; zp.zp_nopwrite = B_FALSE; @@ -3950,7 +3951,7 @@ zio_ddt_write(zio_t *zio) * grow the DDT entry by to satisfy the request. */ zio_prop_t czp = *zp; - czp.zp_copies = need_dvas; + czp.zp_copies = czp.zp_gang_copies = need_dvas; zio_t *cio = zio_write(zio, spa, txg, bp, zio->io_orig_abd, zio->io_orig_size, zio->io_orig_size, &czp, zio_ddt_child_write_ready, NULL, diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index 70b35e30e..1f8aca0d9 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -724,6 +724,10 @@ tests = ['large_dnode_001_pos', 'large_dnode_003_pos', 'large_dnode_004_neg', 'large_dnode_005_pos', 'large_dnode_007_neg', 'large_dnode_009_pos'] tags = ['functional', 'features', 'large_dnode'] +[tests/functional/gang_blocks] +tests = ['gang_blocks_redundant'] +tags = ['functional', 'gang_blocks'] + [tests/functional/grow] pre = post = diff --git a/tests/zfs-tests/include/tunables.cfg b/tests/zfs-tests/include/tunables.cfg index 0a546dd44..79dc64ad9 100644 --- a/tests/zfs-tests/include/tunables.cfg +++ b/tests/zfs-tests/include/tunables.cfg @@ -64,6 +64,7 @@ MAX_DATASET_NESTING max_dataset_nesting zfs_max_dataset_nesting MAX_MISSING_TVDS max_missing_tvds zfs_max_missing_tvds METASLAB_DEBUG_LOAD metaslab.debug_load metaslab_debug_load METASLAB_FORCE_GANGING metaslab.force_ganging metaslab_force_ganging +METASLAB_FORCE_GANGING_PCT metaslab.force_ganging_pct metaslab_force_ganging_pct MULTIHOST_FAIL_INTERVALS multihost.fail_intervals zfs_multihost_fail_intervals MULTIHOST_HISTORY multihost.history zfs_multihost_history MULTIHOST_IMPORT_INTERVALS multihost.import_intervals zfs_multihost_import_intervals diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index 0942082cf..bce546d06 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -275,6 +275,7 @@ nobase_dist_datadir_zfs_tests_tests_DATA += \ functional/events/events.cfg \ functional/events/events_common.kshlib \ functional/fault/fault.cfg \ + functional/gang_blocks/gang_blocks.kshlib \ functional/grow/grow.cfg \ functional/history/history.cfg \ functional/history/history_common.kshlib \ @@ -1558,6 +1559,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/features/large_dnode/large_dnode_008_pos.ksh \ functional/features/large_dnode/large_dnode_009_pos.ksh \ functional/features/large_dnode/setup.ksh \ + functional/gang_blocks/cleanup.ksh \ + functional/gang_blocks/gang_blocks_redundant.ksh \ + functional/gang_blocks/setup.ksh \ functional/grow/grow_pool_001_pos.ksh \ functional/grow/grow_replicas_001_pos.ksh \ functional/history/cleanup.ksh \ diff --git a/tests/zfs-tests/tests/functional/gang_blocks/cleanup.ksh b/tests/zfs-tests/tests/functional/gang_blocks/cleanup.ksh new file mode 100755 index 000000000..4ae6ec16f --- /dev/null +++ b/tests/zfs-tests/tests/functional/gang_blocks/cleanup.ksh @@ -0,0 +1,31 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025 by Klara Inc. +# + +. $STF_SUITE/include/libtest.shlib + +restore_tunable METASLAB_FORCE_GANGING +restore_tunable METASLAB_FORCE_GANGING_PCT +default_cleanup diff --git a/tests/zfs-tests/tests/functional/gang_blocks/gang_blocks.kshlib b/tests/zfs-tests/tests/functional/gang_blocks/gang_blocks.kshlib new file mode 100644 index 000000000..8799a1436 --- /dev/null +++ b/tests/zfs-tests/tests/functional/gang_blocks/gang_blocks.kshlib @@ -0,0 +1,120 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025 By Klara Inc. +# + +. $STF_SUITE/include/libtest.shlib + +# +# Get 0th DVA of first L0 block of file +# +# $1 filesystem +# $2 object number +# +function get_object_info +{ + typeset fs=$1 + typeset obj=$2 + + zdb -dddddd $fs $obj +} + +# +# $1 filesystem +# $2 path to file +# $3 block filter +# +function get_blocks_filter +{ + typeset fs=$1 + typeset path=$2 + + typeset full_path="$(get_prop mountpoint $fs)/$path" + typeset obj="$(ls -i $full_path | awk '{print $1}')" + + get_object_info $fs $obj | grep $3 | grep -v Dataset +} + +function get_first_block +{ + get_blocks_filter $1 $2 L0 | head -n 1 +} + +function get_first_block_dva +{ + get_first_block $1 $2 | sed 's/.*L0 \([^ ]*\).*/\1/' +} + +# Takes a zdb compressed blkptr line on stdin +function get_num_dvas +{ + sed 's/.*L[0-9] \(.*\) [a-f0-9]*L.*/\1/' | awk '{print NF}' +} + +function check_gang_dva +{ + typeset last_byte="$(echo -n $1 | tail -c 1)" + [[ "$last_byte" == "G" ]] || return 1 + return 0 +} + +function check_is_gang_dva +{ + check_gang_dva $1 || log_fail "Not a gang DVA: \"$1\"" +} + +function check_not_gang_dva +{ + check_gang_dva $1 && log_fail "Gang DVA: \"$1\"" +} + +# +# Get the gang header contents of the given dva in the given pool +# +# $1 pool +# $2 dva +# $3 size (in hexidecimal) +# +function read_gang_header +{ + typeset pool=$1 + typeset dva=$2 + typeset size=$3 + + check_is_gang_dva $dva + + zdb -R $pool "${dva%:*}:$size:g" 2>&1 | grep -v "Found vdev:" +} + +function preamble +{ + save_tunable METASLAB_FORCE_GANGING + save_tunable METASLAB_FORCE_GANGING_PCT +} + +function cleanup +{ + destroy_pool $TESTPOOL + restore_tunable METASLAB_FORCE_GANGING + restore_tunable METASLAB_FORCE_GANGING_PCT +} diff --git a/tests/zfs-tests/tests/functional/gang_blocks/gang_blocks_redundant.ksh b/tests/zfs-tests/tests/functional/gang_blocks/gang_blocks_redundant.ksh new file mode 100755 index 000000000..1c44a7c5e --- /dev/null +++ b/tests/zfs-tests/tests/functional/gang_blocks/gang_blocks_redundant.ksh @@ -0,0 +1,88 @@ +#!/bin/ksh +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2025 by Klara Inc. +# + +# +# Description: +# Verify that the redundant_metadata setting is respected by gang headers +# +# Strategy: +# 1. Create a filesystem with redundant_metadata={all,most,some,none} +# 2. Verify that gang blocks at different levels have the right amount of redundancy +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/gang_blocks/gang_blocks.kshlib + +log_assert "Verify that gang blocks at different levels have the right amount of redundancy." + +function cleanup2 +{ + for red in all most some none; do zfs destroy $TESTPOOL/$TESTFS-$red; done + cleanup +} + +preamble +log_onexit cleanup2 + +log_must zpool create -f -o ashift=9 $TESTPOOL $DISKS +set_tunable64 METASLAB_FORCE_GANGING 1500 +set_tunable32 METASLAB_FORCE_GANGING_PCT 100 +for red in all most some none; do + log_must zfs create -o redundant_metadata=$red -o recordsize=512 \ + $TESTPOOL/$TESTFS-$red + if [[ "$red" == "all" ]]; then + log_must zfs set recordsize=8k $TESTPOOL/$TESTFS-$red + fi + mountpoint=$(get_prop mountpoint $TESTPOOL/$TESTFS-$red) + + path="${mountpoint}/file" + log_must dd if=/dev/urandom of=$path bs=1M count=1 + log_must zpool sync $TESTPOOL + num_l0_dvas=$(get_first_block $TESTPOOL/$TESTFS-$red file | get_num_dvas) + if [[ "$red" == "all" ]]; then + [[ "$num_l0_dvas" -eq 2 ]] || \ + log_fail "wrong number of DVAs for L0 in $red: $num_l0_dvas" + else + [[ "$num_l0_dvas" -eq 1 ]] || \ + log_fail "wrong number of DVAs for L0 in $red: $num_l0_dvas" + fi + + num_l1_dvas=$(get_blocks_filter $TESTPOOL/$TESTFS-$red file L1 | head -n 1 | get_num_dvas) + if [[ "$red" == "all" || "$red" == "most" ]]; then + [[ "$num_l1_dvas" -eq 2 ]] || \ + log_fail "wrong number of DVAs for L1 in $red: $num_l1_dvas" + else + [[ "$num_l1_dvas" -eq 1 ]] || \ + log_fail "wrong number of DVAs for L1 in $red: $num_l1_dvas" + fi + + for i in `seq 1 80`; do + dd if=/dev/urandom of=/$mountpoint/f$i bs=512 count=1 2>/dev/null || log_fail "dd failed" + done + log_must zpool sync $TESTPOOL + obj_0_gangs=$(get_object_info $TESTPOOL/$TESTFS-$red 0 L0 | grep G) + num_obj_0_dvas=$(echo "$obj_0_gangs" | head -n 1 | get_num_dvas) + if [[ "$red" != "none" ]]; then + [[ "$num_obj_0_dvas" -eq 2 ]] || \ + log_fail "wrong number of DVAs for obj 0 in $red: $num_obj_0_dvas" + else + [[ "$num_obj_0_dvas" -eq 1 ]] || \ + log_fail "wrong number of DVAs for obj 0 in $red: $num_obj_0_dvas" + fi + log_note "Level $red passed" +done + +log_pass "Gang blocks at different levels have the right amount of redundancy." diff --git a/tests/zfs-tests/tests/functional/gang_blocks/setup.ksh b/tests/zfs-tests/tests/functional/gang_blocks/setup.ksh new file mode 100755 index 000000000..0d2b239a0 --- /dev/null +++ b/tests/zfs-tests/tests/functional/gang_blocks/setup.ksh @@ -0,0 +1,30 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025 by Klara Inc. +# + +. $STF_SUITE/include/libtest.shlib + +set_tunable64 METASLAB_FORCE_GANGING 16777217 +set_tunable32 METASLAB_FORCE_GANGING_PCT 0