mirror of
https://git.proxmox.com/git/mirror_zfs
synced 2025-04-28 11:40:17 +00:00
Expand fragmentation table to reflect larger possibile allocation sizes
When you are using large recordsizes in conjunction with raidz, with incompressible data, you can pretty reliably be making 21 MB allocations. Unfortunately, the fragmentation metric in ZFS considers any metaslabs with 16 MB free chunks completely unfragmented, so you can have a metaslab report 0% fragmented and be unable to satisfy an allocation. When using the segment-based metaslab weight, this is inconvenient; when using the space-based one, it can seriously degrade performance. We expand the fragmentation table to extend up to 512MB, and redefine the table size based on the actual table, rather than having a static define. We also tweak the one variable that depends on fragmentation directly. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Reviewed-by: Allan Jude <allan@klarasystems.com> Reviewed-by: Alexander Motin <mav@FreeBSD.org> Signed-off-by: Paul Dagnelie <paul.dagnelie@klarasystems.com> Closes #16986
This commit is contained in:
parent
2cccbacefc
commit
40496514b8
@ -1778,7 +1778,7 @@ Normally disabled because these datasets may be missing key data.
|
||||
.It Sy zfs_min_metaslabs_to_flush Ns = Ns Sy 1 Pq u64
|
||||
Minimum number of metaslabs to flush per dirty TXG.
|
||||
.
|
||||
.It Sy zfs_metaslab_fragmentation_threshold Ns = Ns Sy 70 Ns % Pq uint
|
||||
.It Sy zfs_metaslab_fragmentation_threshold Ns = Ns Sy 77 Ns % Pq uint
|
||||
Allow metaslabs to keep their active state as long as their fragmentation
|
||||
percentage is no more than this value.
|
||||
An active metaslab that exceeds this threshold
|
||||
|
@ -146,7 +146,7 @@ static uint_t zfs_mg_fragmentation_threshold = 95;
|
||||
* active metaslab that exceeds this threshold will no longer keep its active
|
||||
* status allowing better metaslabs to be selected.
|
||||
*/
|
||||
static uint_t zfs_metaslab_fragmentation_threshold = 70;
|
||||
static uint_t zfs_metaslab_fragmentation_threshold = 77;
|
||||
|
||||
/*
|
||||
* When set will load all metaslabs when pool is first opened.
|
||||
@ -2889,8 +2889,6 @@ metaslab_fini(metaslab_t *msp)
|
||||
kmem_free(msp, sizeof (metaslab_t));
|
||||
}
|
||||
|
||||
#define FRAGMENTATION_TABLE_SIZE 17
|
||||
|
||||
/*
|
||||
* This table defines a segment size based fragmentation metric that will
|
||||
* allow each metaslab to derive its own fragmentation value. This is done
|
||||
@ -2901,33 +2899,40 @@ metaslab_fini(metaslab_t *msp)
|
||||
* us the fragmentation metric. This means that a high fragmentation metric
|
||||
* equates to most of the free space being comprised of small segments.
|
||||
* Conversely, if the metric is low, then most of the free space is in
|
||||
* large segments. A 10% change in fragmentation equates to approximately
|
||||
* double the number of segments.
|
||||
* large segments.
|
||||
*
|
||||
* This table defines 0% fragmented space using 16MB segments. Testing has
|
||||
* shown that segments that are greater than or equal to 16MB do not suffer
|
||||
* from drastic performance problems. Using this value, we derive the rest
|
||||
* of the table. Since the fragmentation value is never stored on disk, it
|
||||
* is possible to change these calculations in the future.
|
||||
* This table defines 0% fragmented space using 512M segments. Using this value,
|
||||
* we derive the rest of the table. This table originally went up to 16MB, but
|
||||
* with larger recordsizes, larger ashifts, and use of raidz3, it is possible
|
||||
* to have significantly larger allocations than were previously possible.
|
||||
* Since the fragmentation value is never stored on disk, it is possible to
|
||||
* change these calculations in the future.
|
||||
*/
|
||||
static const int zfs_frag_table[FRAGMENTATION_TABLE_SIZE] = {
|
||||
static const int zfs_frag_table[] = {
|
||||
100, /* 512B */
|
||||
100, /* 1K */
|
||||
98, /* 2K */
|
||||
95, /* 4K */
|
||||
90, /* 8K */
|
||||
80, /* 16K */
|
||||
70, /* 32K */
|
||||
60, /* 64K */
|
||||
50, /* 128K */
|
||||
40, /* 256K */
|
||||
30, /* 512K */
|
||||
20, /* 1M */
|
||||
15, /* 2M */
|
||||
10, /* 4M */
|
||||
5, /* 8M */
|
||||
0 /* 16M */
|
||||
99, /* 1K */
|
||||
97, /* 2K */
|
||||
93, /* 4K */
|
||||
88, /* 8K */
|
||||
83, /* 16K */
|
||||
77, /* 32K */
|
||||
71, /* 64K */
|
||||
64, /* 128K */
|
||||
57, /* 256K */
|
||||
50, /* 512K */
|
||||
43, /* 1M */
|
||||
36, /* 2M */
|
||||
29, /* 4M */
|
||||
23, /* 8M */
|
||||
17, /* 16M */
|
||||
12, /* 32M */
|
||||
7, /* 64M */
|
||||
3, /* 128M */
|
||||
1, /* 256M */
|
||||
0, /* 512M */
|
||||
};
|
||||
#define FRAGMENTATION_TABLE_SIZE \
|
||||
(sizeof (zfs_frag_table)/(sizeof (zfs_frag_table[0])))
|
||||
|
||||
/*
|
||||
* Calculate the metaslab's fragmentation metric and set ms_fragmentation.
|
||||
|
Loading…
Reference in New Issue
Block a user