mirror of
				https://git.proxmox.com/git/mirror_zfs
				synced 2025-10-24 22:22:17 +00:00 
			
		
		
		
	 92dc4ad83d
			
		
	
	
		92dc4ad83d
		
			
		
	
	
	
	
		
			
			Entries in the dbuf cache contribute only the size of the dbuf data to the cache size. Attached "user" data is not counted. This can lead to the data currently "owned" by the cache consuming more memory accounting appears to show. In some cases (eg a metadnode data block with all child dnode_t slots allocated), the actual size can be as much as 3x as what the cache believes it to be. This is arguably correct behaviour, as the cache is only tracking the size of the dbuf data, not even the overhead of the dbuf_t. On the other hand, in the above case of dnodes, evicting cached metadnode dbufs is the only current way to reclaim the dnode objects, and can lead to the situation where the dbuf cache appears to be comfortably within its target memory window and yet is holding enormous amounts of slab memory that cannot be reclaimed. This commit adds a facility for a dbuf user to artificially inflate the apparent size of the dbuf for caching purposes. This at least allows for cache tuning to be adjusted to match something closer to the real memory overhead. metadnode dbufs carry a >1KiB allocation per dnode in their user data. This informs the dbuf cache machinery of that fact, allowing it to make better decisions when evicting dbufs. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Reviewed-by: Alexander Motin <mav@FreeBSD.org> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Rob Norris <rob.norris@klarasystems.com> Closes #15511
		
			
				
	
	
		
			232 lines
		
	
	
		
			6.1 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			232 lines
		
	
	
		
			6.1 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * CDDL HEADER START
 | |
|  *
 | |
|  * The contents of this file are subject to the terms of the
 | |
|  * Common Development and Distribution License (the "License").
 | |
|  * You may not use this file except in compliance with the License.
 | |
|  *
 | |
|  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 | |
|  * or https://opensource.org/licenses/CDDL-1.0.
 | |
|  * See the License for the specific language governing permissions
 | |
|  * and limitations under the License.
 | |
|  *
 | |
|  * When distributing Covered Code, include this CDDL HEADER in each
 | |
|  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 | |
|  * If applicable, add the following below this CDDL HEADER, with the
 | |
|  * fields enclosed by brackets "[]" replaced with your own identifying
 | |
|  * information: Portions Copyright [yyyy] [name of copyright owner]
 | |
|  *
 | |
|  * CDDL HEADER END
 | |
|  */
 | |
| 
 | |
| #include <sys/zfs_context.h>
 | |
| #include <sys/dbuf.h>
 | |
| #include <sys/dmu_objset.h>
 | |
| 
 | |
| /*
 | |
|  * Calculate the index of the arc header for the state, disabled by default.
 | |
|  */
 | |
| int zfs_dbuf_state_index = 0;
 | |
| 
 | |
| /*
 | |
|  * ==========================================================================
 | |
|  * Dbuf Hash Read Routines
 | |
|  * ==========================================================================
 | |
|  */
 | |
| typedef struct dbuf_stats_t {
 | |
| 	kmutex_t		lock;
 | |
| 	kstat_t			*kstat;
 | |
| 	dbuf_hash_table_t	*hash;
 | |
| 	int			idx;
 | |
| } dbuf_stats_t;
 | |
| 
 | |
| static dbuf_stats_t dbuf_stats_hash_table;
 | |
| 
 | |
| static int
 | |
| dbuf_stats_hash_table_headers(char *buf, size_t size)
 | |
| {
 | |
| 	(void) snprintf(buf, size,
 | |
| 	    "%-105s | %-119s | %s\n"
 | |
| 	    "%-16s %-8s %-8s %-8s %-8s %-10s %-8s %-8s %-5s %-5s %-7s %3s | "
 | |
| 	    "%-5s %-5s %-9s %-6s %-8s %-12s "
 | |
| 	    "%-6s %-6s %-6s %-6s %-6s %-8s %-8s %-8s %-6s | "
 | |
| 	    "%-6s %-6s %-8s %-8s %-6s %-6s %-6s %-8s %-8s\n",
 | |
| 	    "dbuf", "arcbuf", "dnode", "pool", "objset", "object", "level",
 | |
| 	    "blkid", "offset", "dbsize", "usize", "meta", "state", "dbholds",
 | |
| 	    "dbc", "list", "atype", "flags", "count", "asize", "access",
 | |
| 	    "mru", "gmru", "mfu", "gmfu", "l2", "l2_dattr", "l2_asize",
 | |
| 	    "l2_comp", "aholds", "dtype", "btype", "data_bs", "meta_bs",
 | |
| 	    "bsize", "lvls", "dholds", "blocks", "dsize");
 | |
| 
 | |
| 	return (0);
 | |
| }
 | |
| 
 | |
| static int
 | |
| __dbuf_stats_hash_table_data(char *buf, size_t size, dmu_buf_impl_t *db)
 | |
| {
 | |
| 	arc_buf_info_t abi = { 0 };
 | |
| 	dmu_object_info_t doi = { 0 };
 | |
| 	dnode_t *dn = DB_DNODE(db);
 | |
| 	size_t nwritten;
 | |
| 
 | |
| 	if (db->db_buf)
 | |
| 		arc_buf_info(db->db_buf, &abi, zfs_dbuf_state_index);
 | |
| 
 | |
| 	__dmu_object_info_from_dnode(dn, &doi);
 | |
| 
 | |
| 	nwritten = snprintf(buf, size,
 | |
| 	    "%-16s %-8llu %-8lld %-8lld %-8lld %-10llu %-8llu %-8llu "
 | |
| 	    "%-5d %-5d %-7lu %-3d | %-5d %-5d 0x%-7x %-6lu %-8llu %-12llu "
 | |
| 	    "%-6lu %-6lu %-6lu %-6lu %-6lu %-8llu %-8llu %-8d %-6lu | "
 | |
| 	    "%-6d %-6d %-8lu %-8lu %-6llu %-6lu %-6lu %-8llu %-8llu\n",
 | |
| 	    /* dmu_buf_impl_t */
 | |
| 	    spa_name(dn->dn_objset->os_spa),
 | |
| 	    (u_longlong_t)dmu_objset_id(db->db_objset),
 | |
| 	    (longlong_t)db->db.db_object,
 | |
| 	    (longlong_t)db->db_level,
 | |
| 	    (longlong_t)db->db_blkid,
 | |
| 	    (u_longlong_t)db->db.db_offset,
 | |
| 	    (u_longlong_t)db->db.db_size,
 | |
| 	    (u_longlong_t)dmu_buf_user_size(&db->db),
 | |
| 	    !!dbuf_is_metadata(db),
 | |
| 	    db->db_state,
 | |
| 	    (ulong_t)zfs_refcount_count(&db->db_holds),
 | |
| 	    multilist_link_active(&db->db_cache_link),
 | |
| 	    /* arc_buf_info_t */
 | |
| 	    abi.abi_state_type,
 | |
| 	    abi.abi_state_contents,
 | |
| 	    abi.abi_flags,
 | |
| 	    (ulong_t)abi.abi_bufcnt,
 | |
| 	    (u_longlong_t)abi.abi_size,
 | |
| 	    (u_longlong_t)abi.abi_access,
 | |
| 	    (ulong_t)abi.abi_mru_hits,
 | |
| 	    (ulong_t)abi.abi_mru_ghost_hits,
 | |
| 	    (ulong_t)abi.abi_mfu_hits,
 | |
| 	    (ulong_t)abi.abi_mfu_ghost_hits,
 | |
| 	    (ulong_t)abi.abi_l2arc_hits,
 | |
| 	    (u_longlong_t)abi.abi_l2arc_dattr,
 | |
| 	    (u_longlong_t)abi.abi_l2arc_asize,
 | |
| 	    abi.abi_l2arc_compress,
 | |
| 	    (ulong_t)abi.abi_holds,
 | |
| 	    /* dmu_object_info_t */
 | |
| 	    doi.doi_type,
 | |
| 	    doi.doi_bonus_type,
 | |
| 	    (ulong_t)doi.doi_data_block_size,
 | |
| 	    (ulong_t)doi.doi_metadata_block_size,
 | |
| 	    (u_longlong_t)doi.doi_bonus_size,
 | |
| 	    (ulong_t)doi.doi_indirection,
 | |
| 	    (ulong_t)zfs_refcount_count(&dn->dn_holds),
 | |
| 	    (u_longlong_t)doi.doi_fill_count,
 | |
| 	    (u_longlong_t)doi.doi_max_offset);
 | |
| 
 | |
| 	if (nwritten >= size)
 | |
| 		return (size);
 | |
| 
 | |
| 	return (nwritten + 1);
 | |
| }
 | |
| 
 | |
| static int
 | |
| dbuf_stats_hash_table_data(char *buf, size_t size, void *data)
 | |
| {
 | |
| 	dbuf_stats_t *dsh = (dbuf_stats_t *)data;
 | |
| 	dbuf_hash_table_t *h = dsh->hash;
 | |
| 	dmu_buf_impl_t *db;
 | |
| 	int length, error = 0;
 | |
| 
 | |
| 	ASSERT3S(dsh->idx, >=, 0);
 | |
| 	ASSERT3S(dsh->idx, <=, h->hash_table_mask);
 | |
| 	if (size)
 | |
| 		buf[0] = 0;
 | |
| 
 | |
| 	mutex_enter(DBUF_HASH_MUTEX(h, dsh->idx));
 | |
| 	for (db = h->hash_table[dsh->idx]; db != NULL; db = db->db_hash_next) {
 | |
| 		/*
 | |
| 		 * Returning ENOMEM will cause the data and header functions
 | |
| 		 * to be called with a larger scratch buffers.
 | |
| 		 */
 | |
| 		if (size < 512) {
 | |
| 			error = SET_ERROR(ENOMEM);
 | |
| 			break;
 | |
| 		}
 | |
| 
 | |
| 		mutex_enter(&db->db_mtx);
 | |
| 
 | |
| 		if (db->db_state != DB_EVICTING) {
 | |
| 			length = __dbuf_stats_hash_table_data(buf, size, db);
 | |
| 			buf += length;
 | |
| 			size -= length;
 | |
| 		}
 | |
| 
 | |
| 		mutex_exit(&db->db_mtx);
 | |
| 	}
 | |
| 	mutex_exit(DBUF_HASH_MUTEX(h, dsh->idx));
 | |
| 
 | |
| 	return (error);
 | |
| }
 | |
| 
 | |
| static void *
 | |
| dbuf_stats_hash_table_addr(kstat_t *ksp, loff_t n)
 | |
| {
 | |
| 	dbuf_stats_t *dsh = ksp->ks_private;
 | |
| 
 | |
| 	ASSERT(MUTEX_HELD(&dsh->lock));
 | |
| 
 | |
| 	if (n <= dsh->hash->hash_table_mask) {
 | |
| 		dsh->idx = n;
 | |
| 		return (dsh);
 | |
| 	}
 | |
| 
 | |
| 	return (NULL);
 | |
| }
 | |
| 
 | |
| static void
 | |
| dbuf_stats_hash_table_init(dbuf_hash_table_t *hash)
 | |
| {
 | |
| 	dbuf_stats_t *dsh = &dbuf_stats_hash_table;
 | |
| 	kstat_t *ksp;
 | |
| 
 | |
| 	mutex_init(&dsh->lock, NULL, MUTEX_DEFAULT, NULL);
 | |
| 	dsh->hash = hash;
 | |
| 
 | |
| 	ksp = kstat_create("zfs", 0, "dbufs", "misc",
 | |
| 	    KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
 | |
| 	dsh->kstat = ksp;
 | |
| 
 | |
| 	if (ksp) {
 | |
| 		ksp->ks_lock = &dsh->lock;
 | |
| 		ksp->ks_ndata = UINT32_MAX;
 | |
| 		ksp->ks_private = dsh;
 | |
| 		kstat_set_raw_ops(ksp, dbuf_stats_hash_table_headers,
 | |
| 		    dbuf_stats_hash_table_data, dbuf_stats_hash_table_addr);
 | |
| 		kstat_install(ksp);
 | |
| 	}
 | |
| }
 | |
| 
 | |
| static void
 | |
| dbuf_stats_hash_table_destroy(void)
 | |
| {
 | |
| 	dbuf_stats_t *dsh = &dbuf_stats_hash_table;
 | |
| 	kstat_t *ksp;
 | |
| 
 | |
| 	ksp = dsh->kstat;
 | |
| 	if (ksp)
 | |
| 		kstat_delete(ksp);
 | |
| 
 | |
| 	mutex_destroy(&dsh->lock);
 | |
| }
 | |
| 
 | |
| void
 | |
| dbuf_stats_init(dbuf_hash_table_t *hash)
 | |
| {
 | |
| 	dbuf_stats_hash_table_init(hash);
 | |
| }
 | |
| 
 | |
| void
 | |
| dbuf_stats_destroy(void)
 | |
| {
 | |
| 	dbuf_stats_hash_table_destroy();
 | |
| }
 | |
| 
 | |
| ZFS_MODULE_PARAM(zfs, zfs_, dbuf_state_index, INT, ZMOD_RW,
 | |
| 	"Calculate arc header index");
 |