Add kstats tracking gang allocations

Gang blocks have a significant impact on the long and short term
performance of a zpool, but there is not a lot of observability into
whether they're being used.  This change adds gang-specific kstats to
ZFS, to better allow users to see whether ganging is happening.

Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Paul Dagnelie <paul.dagnelie@klarasystems.com>
Closes #17003
This commit is contained in:
Paul Dagnelie 2025-02-06 12:42:50 -08:00 committed by Ameer Hamza
parent 2adca179b6
commit a0f8d3c584

View File

@ -145,10 +145,53 @@ static const int zio_buf_debug_limit = 16384;
static const int zio_buf_debug_limit = 0; static const int zio_buf_debug_limit = 0;
#endif #endif
typedef struct zio_stats {
kstat_named_t ziostat_total_allocations;
kstat_named_t ziostat_alloc_class_fallbacks;
kstat_named_t ziostat_gang_writes;
kstat_named_t ziostat_gang_multilevel;
} zio_stats_t;
static zio_stats_t zio_stats = {
{ "total_allocations", KSTAT_DATA_UINT64 },
{ "alloc_class_fallbacks", KSTAT_DATA_UINT64 },
{ "gang_writes", KSTAT_DATA_UINT64 },
{ "gang_multilevel", KSTAT_DATA_UINT64 },
};
struct {
wmsum_t ziostat_total_allocations;
wmsum_t ziostat_alloc_class_fallbacks;
wmsum_t ziostat_gang_writes;
wmsum_t ziostat_gang_multilevel;
} ziostat_sums;
#define ZIOSTAT_BUMP(stat) wmsum_add(&ziostat_sums.stat, 1);
static kstat_t *zio_ksp;
static inline void __zio_execute(zio_t *zio); static inline void __zio_execute(zio_t *zio);
static void zio_taskq_dispatch(zio_t *, zio_taskq_type_t, boolean_t); static void zio_taskq_dispatch(zio_t *, zio_taskq_type_t, boolean_t);
static int
zio_kstats_update(kstat_t *ksp, int rw)
{
zio_stats_t *zs = ksp->ks_data;
if (rw == KSTAT_WRITE)
return (EACCES);
zs->ziostat_total_allocations.value.ui64 =
wmsum_value(&ziostat_sums.ziostat_total_allocations);
zs->ziostat_alloc_class_fallbacks.value.ui64 =
wmsum_value(&ziostat_sums.ziostat_alloc_class_fallbacks);
zs->ziostat_gang_writes.value.ui64 =
wmsum_value(&ziostat_sums.ziostat_gang_writes);
zs->ziostat_gang_multilevel.value.ui64 =
wmsum_value(&ziostat_sums.ziostat_gang_multilevel);
return (0);
}
void void
zio_init(void) zio_init(void)
{ {
@ -159,6 +202,19 @@ zio_init(void)
zio_link_cache = kmem_cache_create("zio_link_cache", zio_link_cache = kmem_cache_create("zio_link_cache",
sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, 0); sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
wmsum_init(&ziostat_sums.ziostat_total_allocations, 0);
wmsum_init(&ziostat_sums.ziostat_alloc_class_fallbacks, 0);
wmsum_init(&ziostat_sums.ziostat_gang_writes, 0);
wmsum_init(&ziostat_sums.ziostat_gang_multilevel, 0);
zio_ksp = kstat_create("zfs", 0, "zio_stats",
"misc", KSTAT_TYPE_NAMED, sizeof (zio_stats) /
sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
if (zio_ksp != NULL) {
zio_ksp->ks_data = &zio_stats;
zio_ksp->ks_update = zio_kstats_update;
kstat_install(zio_ksp);
}
for (c = 0; c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; c++) { for (c = 0; c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; c++) {
size_t size = (c + 1) << SPA_MINBLOCKSHIFT; size_t size = (c + 1) << SPA_MINBLOCKSHIFT;
size_t align, cflags, data_cflags; size_t align, cflags, data_cflags;
@ -286,6 +342,16 @@ zio_fini(void)
VERIFY3P(zio_data_buf_cache[i], ==, NULL); VERIFY3P(zio_data_buf_cache[i], ==, NULL);
} }
if (zio_ksp != NULL) {
kstat_delete(zio_ksp);
zio_ksp = NULL;
}
wmsum_fini(&ziostat_sums.ziostat_total_allocations);
wmsum_fini(&ziostat_sums.ziostat_alloc_class_fallbacks);
wmsum_fini(&ziostat_sums.ziostat_gang_writes);
wmsum_fini(&ziostat_sums.ziostat_gang_multilevel);
kmem_cache_destroy(zio_link_cache); kmem_cache_destroy(zio_link_cache);
kmem_cache_destroy(zio_cache); kmem_cache_destroy(zio_cache);
@ -4053,6 +4119,7 @@ zio_dva_allocate(zio_t *zio)
mc = spa_preferred_class(spa, zio); mc = spa_preferred_class(spa, zio);
zio->io_metaslab_class = mc; zio->io_metaslab_class = mc;
} }
ZIOSTAT_BUMP(ziostat_total_allocations);
/* /*
* Try allocating the block in the usual metaslab class. * Try allocating the block in the usual metaslab class.
@ -4118,6 +4185,7 @@ zio_dva_allocate(zio_t *zio)
error); error);
} }
ZIOSTAT_BUMP(ziostat_alloc_class_fallbacks);
error = metaslab_alloc(spa, mc, zio->io_size, bp, error = metaslab_alloc(spa, mc, zio->io_size, bp,
zio->io_prop.zp_copies, zio->io_txg, NULL, flags, zio->io_prop.zp_copies, zio->io_txg, NULL, flags,
&zio->io_alloc_list, zio, zio->io_allocator); &zio->io_alloc_list, zio, zio->io_allocator);
@ -4130,6 +4198,9 @@ zio_dva_allocate(zio_t *zio)
spa_name(spa), zio, (u_longlong_t)zio->io_size, spa_name(spa), zio, (u_longlong_t)zio->io_size,
error); error);
} }
ZIOSTAT_BUMP(ziostat_gang_writes);
if (flags & METASLAB_GANG_CHILD)
ZIOSTAT_BUMP(ziostat_gang_multilevel);
return (zio_write_gang_block(zio, mc)); return (zio_write_gang_block(zio, mc));
} }
if (error != 0) { if (error != 0) {
@ -4221,6 +4292,7 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp,
int flags = METASLAB_ZIL; int flags = METASLAB_ZIL;
int allocator = (uint_t)cityhash1(os->os_dsl_dataset->ds_object) int allocator = (uint_t)cityhash1(os->os_dsl_dataset->ds_object)
% spa->spa_alloc_count; % spa->spa_alloc_count;
ZIOSTAT_BUMP(ziostat_total_allocations);
error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1, error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1,
txg, NULL, flags, &io_alloc_list, NULL, allocator); txg, NULL, flags, &io_alloc_list, NULL, allocator);
*slog = (error == 0); *slog = (error == 0);
@ -4230,6 +4302,7 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp,
&io_alloc_list, NULL, allocator); &io_alloc_list, NULL, allocator);
} }
if (error != 0) { if (error != 0) {
ZIOSTAT_BUMP(ziostat_alloc_class_fallbacks);
error = metaslab_alloc(spa, spa_normal_class(spa), size, error = metaslab_alloc(spa, spa_normal_class(spa), size,
new_bp, 1, txg, NULL, flags, new_bp, 1, txg, NULL, flags,
&io_alloc_list, NULL, allocator); &io_alloc_list, NULL, allocator);