mirror of
				https://git.proxmox.com/git/mirror_zfs
				synced 2025-10-26 07:16:23 +00:00 
			
		
		
		
	ddt: add FDT feature and support for legacy and new on-disk formats
This is the supporting infrastructure for the upcoming dedup features. Traditionally, dedup objects live directly in the MOS root. While their details vary (checksum, type and class), they are all the same "kind" of thing - a store of dedup entries. The new features are more varied than that, and are better thought of as a set of related stores for the overall state of a dedup table. This adds a new feature flag, SPA_FEATURE_FAST_DEDUP. Enabling this will cause new DDTs to be created as a ZAP in the MOS root, named DDT-<checksum>. The is used as the root object for the normal type/class store objects, but will also be a place for any storage required by new features. This commit adds two new fields to ddt_t, for version and flags. These are intended to describe the structure and features of the overall dedup table, and are stored as-is in the DDT root. In this commit, flags are always zero, but the intent is that they can be used to hang optional logic or state onto for new dedup features. Version is always 1. For a "legacy" dedup table, where no DDT root directory exists, the version will be 0. ddt_configure() is expected to determine the version and flags features currently in operation based on whether or not the fast_dedup feature is enabled, and from what's available on disk. In this way, its possible to support both old and new tables. This also provides a migration path. A legacy setup can be upgraded to FDT by creating the DDT root ZAP, moving the existing objects into it, and setting version and flags appropriately. There's no support for that here, but it would be straightforward to add later and allows the possibility that newer features could be applied to existing dedup tables. Reviewed-by: Alexander Motin <mav@FreeBSD.org> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Co-authored-by: Allan Jude <allan@klarasystems.com> Signed-off-by: Rob Norris <rob.norris@klarasystems.com> Sponsored-by: Klara, Inc. Sponsored-by: iXsystems, Inc. Closes #15892
This commit is contained in:
		
							parent
							
								
									bdf4d6be1d
								
							
						
					
					
						commit
						db2b1fdb79
					
				| @ -39,6 +39,12 @@ extern "C" { | |||||||
| 
 | 
 | ||||||
| struct abd; | struct abd; | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * DDT-wide feature flags. These are set in ddt_flags by ddt_configure(). | ||||||
|  |  */ | ||||||
|  | /* No flags yet. */ | ||||||
|  | #define	DDT_FLAG_MASK	(0) | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * DDT on-disk storage object types. Each one corresponds to specific |  * DDT on-disk storage object types. Each one corresponds to specific | ||||||
|  * implementation, see ddt_ops_t. The value itself is not stored on disk. |  * implementation, see ddt_ops_t. The value itself is not stored on disk. | ||||||
| @ -191,6 +197,10 @@ typedef struct { | |||||||
| 	spa_t		*ddt_spa;	/* pool this ddt is on */ | 	spa_t		*ddt_spa;	/* pool this ddt is on */ | ||||||
| 	objset_t	*ddt_os;	/* ddt objset (always MOS) */ | 	objset_t	*ddt_os;	/* ddt objset (always MOS) */ | ||||||
| 
 | 
 | ||||||
|  | 	uint64_t	ddt_dir_object;	/* MOS dir holding ddt objects */ | ||||||
|  | 	uint64_t	ddt_version;	/* DDT version */ | ||||||
|  | 	uint64_t	ddt_flags;	/* FDT option flags */ | ||||||
|  | 
 | ||||||
| 	/* per-type/per-class entry store objects */ | 	/* per-type/per-class entry store objects */ | ||||||
| 	uint64_t	ddt_object[DDT_TYPES][DDT_CLASSES]; | 	uint64_t	ddt_object[DDT_TYPES][DDT_CLASSES]; | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -33,6 +33,14 @@ | |||||||
| extern "C" { | extern "C" { | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|  | /* DDT version numbers */ | ||||||
|  | #define	DDT_VERSION_LEGACY	(0) | ||||||
|  | #define	DDT_VERSION_FDT		(1) | ||||||
|  | 
 | ||||||
|  | /* Names of interesting objects in the DDT root dir */ | ||||||
|  | #define	DDT_DIR_VERSION		"version" | ||||||
|  | #define	DDT_DIR_FLAGS		"flags" | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Ops vector to access a specific DDT object type. |  * Ops vector to access a specific DDT object type. | ||||||
|  */ |  */ | ||||||
|  | |||||||
| @ -376,6 +376,7 @@ typedef struct dmu_buf { | |||||||
| #define	DMU_POOL_TMP_USERREFS		"tmp_userrefs" | #define	DMU_POOL_TMP_USERREFS		"tmp_userrefs" | ||||||
| #define	DMU_POOL_DDT			"DDT-%s-%s-%s" | #define	DMU_POOL_DDT			"DDT-%s-%s-%s" | ||||||
| #define	DMU_POOL_DDT_STATS		"DDT-statistics" | #define	DMU_POOL_DDT_STATS		"DDT-statistics" | ||||||
|  | #define	DMU_POOL_DDT_DIR		"DDT-%s" | ||||||
| #define	DMU_POOL_CREATION_VERSION	"creation_version" | #define	DMU_POOL_CREATION_VERSION	"creation_version" | ||||||
| #define	DMU_POOL_SCAN			"scan" | #define	DMU_POOL_SCAN			"scan" | ||||||
| #define	DMU_POOL_ERRORSCRUB		"error_scrub" | #define	DMU_POOL_ERRORSCRUB		"error_scrub" | ||||||
|  | |||||||
| @ -82,6 +82,7 @@ typedef enum spa_feature { | |||||||
| 	SPA_FEATURE_AVZ_V2, | 	SPA_FEATURE_AVZ_V2, | ||||||
| 	SPA_FEATURE_REDACTION_LIST_SPILL, | 	SPA_FEATURE_REDACTION_LIST_SPILL, | ||||||
| 	SPA_FEATURE_RAIDZ_EXPANSION, | 	SPA_FEATURE_RAIDZ_EXPANSION, | ||||||
|  | 	SPA_FEATURE_FAST_DEDUP, | ||||||
| 	SPA_FEATURES | 	SPA_FEATURES | ||||||
| } spa_feature_t; | } spa_feature_t; | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -616,7 +616,7 @@ | |||||||
|     <elf-symbol name='fletcher_4_superscalar_ops' size='128' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> |     <elf-symbol name='fletcher_4_superscalar_ops' size='128' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> | ||||||
|     <elf-symbol name='libzfs_config_ops' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> |     <elf-symbol name='libzfs_config_ops' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> | ||||||
|     <elf-symbol name='sa_protocol_names' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> |     <elf-symbol name='sa_protocol_names' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> | ||||||
|     <elf-symbol name='spa_feature_table' size='2296' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> |     <elf-symbol name='spa_feature_table' size='2352' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> | ||||||
|     <elf-symbol name='zfeature_checks_disable' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> |     <elf-symbol name='zfeature_checks_disable' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> | ||||||
|     <elf-symbol name='zfs_deleg_perm_tab' size='512' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> |     <elf-symbol name='zfs_deleg_perm_tab' size='512' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> | ||||||
|     <elf-symbol name='zfs_history_event_names' size='328' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> |     <elf-symbol name='zfs_history_event_names' size='328' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> | ||||||
| @ -6006,7 +6006,8 @@ | |||||||
|       <enumerator name='SPA_FEATURE_AVZ_V2' value='38'/> |       <enumerator name='SPA_FEATURE_AVZ_V2' value='38'/> | ||||||
|       <enumerator name='SPA_FEATURE_REDACTION_LIST_SPILL' value='39'/> |       <enumerator name='SPA_FEATURE_REDACTION_LIST_SPILL' value='39'/> | ||||||
|       <enumerator name='SPA_FEATURE_RAIDZ_EXPANSION' value='40'/> |       <enumerator name='SPA_FEATURE_RAIDZ_EXPANSION' value='40'/> | ||||||
|       <enumerator name='SPA_FEATURES' value='41'/> |       <enumerator name='SPA_FEATURE_FAST_DEDUP' value='41'/> | ||||||
|  |       <enumerator name='SPA_FEATURES' value='42'/> | ||||||
|     </enum-decl> |     </enum-decl> | ||||||
|     <typedef-decl name='spa_feature_t' type-id='33ecb627' id='d6618c78'/> |     <typedef-decl name='spa_feature_t' type-id='33ecb627' id='d6618c78'/> | ||||||
|     <qualified-type-def type-id='80f4b756' const='yes' id='b99c00c9'/> |     <qualified-type-def type-id='80f4b756' const='yes' id='b99c00c9'/> | ||||||
| @ -9131,8 +9132,8 @@ | |||||||
|     </function-decl> |     </function-decl> | ||||||
|   </abi-instr> |   </abi-instr> | ||||||
|   <abi-instr address-size='64' path='module/zcommon/zfeature_common.c' language='LANG_C99'> |   <abi-instr address-size='64' path='module/zcommon/zfeature_common.c' language='LANG_C99'> | ||||||
|     <array-type-def dimensions='1' type-id='83f29ca2' size-in-bits='18368' id='b93e4d14'> |     <array-type-def dimensions='1' type-id='83f29ca2' size-in-bits='18816' id='b937914f'> | ||||||
|       <subrange length='41' type-id='7359adad' id='cb834f44'/> |       <subrange length='42' type-id='7359adad' id='cb7c937f'/> | ||||||
|     </array-type-def> |     </array-type-def> | ||||||
|     <enum-decl name='zfeature_flags' id='6db816a4'> |     <enum-decl name='zfeature_flags' id='6db816a4'> | ||||||
|       <underlying-type type-id='9cac1fee'/> |       <underlying-type type-id='9cac1fee'/> | ||||||
| @ -9209,7 +9210,7 @@ | |||||||
|     <pointer-type-def type-id='611586a1' size-in-bits='64' id='2e243169'/> |     <pointer-type-def type-id='611586a1' size-in-bits='64' id='2e243169'/> | ||||||
|     <qualified-type-def type-id='eaa32e2f' const='yes' id='83be723c'/> |     <qualified-type-def type-id='eaa32e2f' const='yes' id='83be723c'/> | ||||||
|     <pointer-type-def type-id='83be723c' size-in-bits='64' id='7acd98a2'/> |     <pointer-type-def type-id='83be723c' size-in-bits='64' id='7acd98a2'/> | ||||||
|     <var-decl name='spa_feature_table' type-id='b93e4d14' mangled-name='spa_feature_table' visibility='default' elf-symbol-id='spa_feature_table'/> |     <var-decl name='spa_feature_table' type-id='b937914f' mangled-name='spa_feature_table' visibility='default' elf-symbol-id='spa_feature_table'/> | ||||||
|     <var-decl name='zfeature_checks_disable' type-id='c19b74c3' mangled-name='zfeature_checks_disable' visibility='default' elf-symbol-id='zfeature_checks_disable'/> |     <var-decl name='zfeature_checks_disable' type-id='c19b74c3' mangled-name='zfeature_checks_disable' visibility='default' elf-symbol-id='zfeature_checks_disable'/> | ||||||
|     <function-decl name='opendir' visibility='default' binding='global' size-in-bits='64'> |     <function-decl name='opendir' visibility='default' binding='global' size-in-bits='64'> | ||||||
|       <parameter type-id='80f4b756'/> |       <parameter type-id='80f4b756'/> | ||||||
|  | |||||||
| @ -17,8 +17,9 @@ | |||||||
| .\" Copyright (c) 2019, Klara Inc. | .\" Copyright (c) 2019, Klara Inc. | ||||||
| .\" Copyright (c) 2019, Allan Jude | .\" Copyright (c) 2019, Allan Jude | ||||||
| .\" Copyright (c) 2021, Colm Buckley <colm@tuatha.org> | .\" Copyright (c) 2021, Colm Buckley <colm@tuatha.org> | ||||||
|  | .\" Copyright (c) 2023, Klara Inc. | ||||||
| .\" | .\" | ||||||
| .Dd June 23, 2022 | .Dd February 14, 2024 | ||||||
| .Dt ZPOOL-FEATURES 7 | .Dt ZPOOL-FEATURES 7 | ||||||
| .Os | .Os | ||||||
| . | . | ||||||
| @ -550,6 +551,20 @@ when an encrypted dataset is created and will be returned to the | |||||||
| .Sy enabled | .Sy enabled | ||||||
| state when all datasets that use this feature are destroyed. | state when all datasets that use this feature are destroyed. | ||||||
| . | . | ||||||
|  | .feature com.klarasystems fast_dedup yes | ||||||
|  | This feature allows more advanced deduplication features to be enabled on new | ||||||
|  | dedup tables. | ||||||
|  | .Pp | ||||||
|  | This feature will be | ||||||
|  | .Sy active | ||||||
|  | when the first deduplicated block is written after a new dedup table is created | ||||||
|  | (ie after a new pool creation, or new checksum used on a dataset with | ||||||
|  | .Sy dedup | ||||||
|  | enabled). | ||||||
|  | It will be returned to the | ||||||
|  | .Sy enabled | ||||||
|  | state when all deduplicated blocks using it are freed. | ||||||
|  | . | ||||||
| .feature com.delphix extensible_dataset no | .feature com.delphix extensible_dataset no | ||||||
| This feature allows more flexible use of internal ZFS data structures, | This feature allows more flexible use of internal ZFS data structures, | ||||||
| and exists for other features to depend on. | and exists for other features to depend on. | ||||||
|  | |||||||
| @ -754,6 +754,12 @@ zpool_feature_init(void) | |||||||
| 	    "Support for raidz expansion", | 	    "Support for raidz expansion", | ||||||
| 	    ZFEATURE_FLAG_MOS, ZFEATURE_TYPE_BOOLEAN, NULL, sfeatures); | 	    ZFEATURE_FLAG_MOS, ZFEATURE_TYPE_BOOLEAN, NULL, sfeatures); | ||||||
| 
 | 
 | ||||||
|  | 	zfeature_register(SPA_FEATURE_FAST_DEDUP, | ||||||
|  | 	    "com.klarasystems:fast_dedup", "fast_dedup", | ||||||
|  | 	    "Support for advanced deduplication", | ||||||
|  | 	    ZFEATURE_FLAG_READONLY_COMPAT, ZFEATURE_TYPE_BOOLEAN, NULL, | ||||||
|  | 	    sfeatures); | ||||||
|  | 
 | ||||||
| 	zfs_mod_list_supported_free(sfeatures); | 	zfs_mod_list_supported_free(sfeatures); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | |||||||
							
								
								
									
										258
									
								
								module/zfs/ddt.c
									
									
									
									
									
								
							
							
						
						
									
										258
									
								
								module/zfs/ddt.c
									
									
									
									
									
								
							| @ -39,6 +39,7 @@ | |||||||
| #include <sys/zio_checksum.h> | #include <sys/zio_checksum.h> | ||||||
| #include <sys/dsl_scan.h> | #include <sys/dsl_scan.h> | ||||||
| #include <sys/abd.h> | #include <sys/abd.h> | ||||||
|  | #include <sys/zfeature.h> | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * # DDT: Deduplication tables |  * # DDT: Deduplication tables | ||||||
| @ -185,6 +186,18 @@ static const char *const ddt_class_name[DDT_CLASSES] = { | |||||||
| 	"unique", | 	"unique", | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * DDT feature flags automatically enabled for each on-disk version. Note that | ||||||
|  |  * versions >0 cannot exist on disk without SPA_FEATURE_FAST_DEDUP enabled. | ||||||
|  |  */ | ||||||
|  | static const uint64_t ddt_version_flags[] = { | ||||||
|  | 	[DDT_VERSION_LEGACY] = 0, | ||||||
|  | 	[DDT_VERSION_FDT] = 0, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | /* Dummy version to signal that configure is still necessary */ | ||||||
|  | #define	DDT_VERSION_UNCONFIGURED	(UINT64_MAX) | ||||||
|  | 
 | ||||||
| static void | static void | ||||||
| ddt_object_create(ddt_t *ddt, ddt_type_t type, ddt_class_t class, | ddt_object_create(ddt_t *ddt, ddt_type_t type, ddt_class_t class, | ||||||
|     dmu_tx_t *tx) |     dmu_tx_t *tx) | ||||||
| @ -196,14 +209,18 @@ ddt_object_create(ddt_t *ddt, ddt_type_t type, ddt_class_t class, | |||||||
| 	    ZCHECKSUM_FLAG_DEDUP; | 	    ZCHECKSUM_FLAG_DEDUP; | ||||||
| 	char name[DDT_NAMELEN]; | 	char name[DDT_NAMELEN]; | ||||||
| 
 | 
 | ||||||
|  | 	ASSERT3U(ddt->ddt_dir_object, >, 0); | ||||||
|  | 
 | ||||||
| 	ddt_object_name(ddt, type, class, name); | 	ddt_object_name(ddt, type, class, name); | ||||||
| 
 | 
 | ||||||
| 	ASSERT3U(*objectp, ==, 0); | 	ASSERT3U(*objectp, ==, 0); | ||||||
| 	VERIFY0(ddt_ops[type]->ddt_op_create(os, objectp, tx, prehash)); | 	VERIFY0(ddt_ops[type]->ddt_op_create(os, objectp, tx, prehash)); | ||||||
| 	ASSERT3U(*objectp, !=, 0); | 	ASSERT3U(*objectp, !=, 0); | ||||||
| 
 | 
 | ||||||
| 	VERIFY0(zap_add(os, DMU_POOL_DIRECTORY_OBJECT, name, | 	ASSERT3U(ddt->ddt_version, !=, DDT_VERSION_UNCONFIGURED); | ||||||
| 	    sizeof (uint64_t), 1, objectp, tx)); | 
 | ||||||
|  | 	VERIFY0(zap_add(os, ddt->ddt_dir_object, name, sizeof (uint64_t), 1, | ||||||
|  | 	    objectp, tx)); | ||||||
| 
 | 
 | ||||||
| 	VERIFY0(zap_add(os, spa->spa_ddt_stat_object, name, | 	VERIFY0(zap_add(os, spa->spa_ddt_stat_object, name, | ||||||
| 	    sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t), | 	    sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t), | ||||||
| @ -220,13 +237,15 @@ ddt_object_destroy(ddt_t *ddt, ddt_type_t type, ddt_class_t class, | |||||||
| 	uint64_t count; | 	uint64_t count; | ||||||
| 	char name[DDT_NAMELEN]; | 	char name[DDT_NAMELEN]; | ||||||
| 
 | 
 | ||||||
|  | 	ASSERT3U(ddt->ddt_dir_object, >, 0); | ||||||
|  | 
 | ||||||
| 	ddt_object_name(ddt, type, class, name); | 	ddt_object_name(ddt, type, class, name); | ||||||
| 
 | 
 | ||||||
| 	ASSERT3U(*objectp, !=, 0); | 	ASSERT3U(*objectp, !=, 0); | ||||||
| 	ASSERT(ddt_histogram_empty(&ddt->ddt_histogram[type][class])); | 	ASSERT(ddt_histogram_empty(&ddt->ddt_histogram[type][class])); | ||||||
| 	VERIFY0(ddt_object_count(ddt, type, class, &count)); | 	VERIFY0(ddt_object_count(ddt, type, class, &count)); | ||||||
| 	VERIFY0(count); | 	VERIFY0(count); | ||||||
| 	VERIFY0(zap_remove(os, DMU_POOL_DIRECTORY_OBJECT, name, tx)); | 	VERIFY0(zap_remove(os, ddt->ddt_dir_object, name, tx)); | ||||||
| 	VERIFY0(zap_remove(os, spa->spa_ddt_stat_object, name, tx)); | 	VERIFY0(zap_remove(os, spa->spa_ddt_stat_object, name, tx)); | ||||||
| 	VERIFY0(ddt_ops[type]->ddt_op_destroy(os, *objectp, tx)); | 	VERIFY0(ddt_ops[type]->ddt_op_destroy(os, *objectp, tx)); | ||||||
| 	memset(&ddt->ddt_object_stats[type][class], 0, sizeof (ddt_object_t)); | 	memset(&ddt->ddt_object_stats[type][class], 0, sizeof (ddt_object_t)); | ||||||
| @ -243,9 +262,18 @@ ddt_object_load(ddt_t *ddt, ddt_type_t type, ddt_class_t class) | |||||||
| 	char name[DDT_NAMELEN]; | 	char name[DDT_NAMELEN]; | ||||||
| 	int error; | 	int error; | ||||||
| 
 | 
 | ||||||
|  | 	if (ddt->ddt_dir_object == 0) { | ||||||
|  | 		/*
 | ||||||
|  | 		 * If we're configured but the containing dir doesn't exist | ||||||
|  | 		 * yet, then this object can't possibly exist either. | ||||||
|  | 		 */ | ||||||
|  | 		ASSERT3U(ddt->ddt_version, !=, DDT_VERSION_UNCONFIGURED); | ||||||
|  | 		return (SET_ERROR(ENOENT)); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	ddt_object_name(ddt, type, class, name); | 	ddt_object_name(ddt, type, class, name); | ||||||
| 
 | 
 | ||||||
| 	error = zap_lookup(ddt->ddt_os, DMU_POOL_DIRECTORY_OBJECT, name, | 	error = zap_lookup(ddt->ddt_os, ddt->ddt_dir_object, name, | ||||||
| 	    sizeof (uint64_t), 1, &ddt->ddt_object[type][class]); | 	    sizeof (uint64_t), 1, &ddt->ddt_object[type][class]); | ||||||
| 	if (error != 0) | 	if (error != 0) | ||||||
| 		return (error); | 		return (error); | ||||||
| @ -684,6 +712,8 @@ ddt_prefetch_all(spa_t *spa) | |||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static int ddt_configure(ddt_t *ddt, boolean_t new); | ||||||
|  | 
 | ||||||
| ddt_entry_t * | ddt_entry_t * | ||||||
| ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add) | ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add) | ||||||
| { | { | ||||||
| @ -697,6 +727,15 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add) | |||||||
| 
 | 
 | ||||||
| 	ASSERT(MUTEX_HELD(&ddt->ddt_lock)); | 	ASSERT(MUTEX_HELD(&ddt->ddt_lock)); | ||||||
| 
 | 
 | ||||||
|  | 	if (ddt->ddt_version == DDT_VERSION_UNCONFIGURED) { | ||||||
|  | 		/*
 | ||||||
|  | 		 * This is the first use of this DDT since the pool was | ||||||
|  | 		 * created; finish getting it ready for use. | ||||||
|  | 		 */ | ||||||
|  | 		VERIFY0(ddt_configure(ddt, B_TRUE)); | ||||||
|  | 		ASSERT3U(ddt->ddt_version, !=, DDT_VERSION_UNCONFIGURED); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	ddt_key_fill(&search, bp); | 	ddt_key_fill(&search, bp); | ||||||
| 
 | 
 | ||||||
| 	/* Find an existing live entry */ | 	/* Find an existing live entry */ | ||||||
| @ -837,6 +876,181 @@ ddt_key_compare(const void *x1, const void *x2) | |||||||
| 	return (TREE_ISIGN(cmp)); | 	return (TREE_ISIGN(cmp)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /* Create the containing dir for this DDT and bump the feature count */ | ||||||
|  | static void | ||||||
|  | ddt_create_dir(ddt_t *ddt, dmu_tx_t *tx) | ||||||
|  | { | ||||||
|  | 	ASSERT3U(ddt->ddt_dir_object, ==, 0); | ||||||
|  | 	ASSERT3U(ddt->ddt_version, ==, DDT_VERSION_FDT); | ||||||
|  | 
 | ||||||
|  | 	char name[DDT_NAMELEN]; | ||||||
|  | 	snprintf(name, DDT_NAMELEN, DMU_POOL_DDT_DIR, | ||||||
|  | 	    zio_checksum_table[ddt->ddt_checksum].ci_name); | ||||||
|  | 
 | ||||||
|  | 	ddt->ddt_dir_object = zap_create_link(ddt->ddt_os, | ||||||
|  | 	    DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT, name, tx); | ||||||
|  | 
 | ||||||
|  | 	VERIFY0(zap_add(ddt->ddt_os, ddt->ddt_dir_object, DDT_DIR_VERSION, | ||||||
|  | 	    sizeof (uint64_t), 1, &ddt->ddt_version, tx)); | ||||||
|  | 	VERIFY0(zap_add(ddt->ddt_os, ddt->ddt_dir_object, DDT_DIR_FLAGS, | ||||||
|  | 	    sizeof (uint64_t), 1, &ddt->ddt_flags, tx)); | ||||||
|  | 
 | ||||||
|  | 	spa_feature_incr(ddt->ddt_spa, SPA_FEATURE_FAST_DEDUP, tx); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /* Destroy the containing dir and deactivate the feature */ | ||||||
|  | static void | ||||||
|  | ddt_destroy_dir(ddt_t *ddt, dmu_tx_t *tx) | ||||||
|  | { | ||||||
|  | 	ASSERT3U(ddt->ddt_dir_object, !=, 0); | ||||||
|  | 	ASSERT3U(ddt->ddt_dir_object, !=, DMU_POOL_DIRECTORY_OBJECT); | ||||||
|  | 	ASSERT3U(ddt->ddt_version, ==, DDT_VERSION_FDT); | ||||||
|  | 
 | ||||||
|  | 	char name[DDT_NAMELEN]; | ||||||
|  | 	snprintf(name, DDT_NAMELEN, DMU_POOL_DDT_DIR, | ||||||
|  | 	    zio_checksum_table[ddt->ddt_checksum].ci_name); | ||||||
|  | 
 | ||||||
|  | 	for (ddt_type_t type = 0; type < DDT_TYPES; type++) { | ||||||
|  | 		for (ddt_class_t class = 0; class < DDT_CLASSES; class++) { | ||||||
|  | 			ASSERT(!ddt_object_exists(ddt, type, class)); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	uint64_t count; | ||||||
|  | 	ASSERT0(zap_count(ddt->ddt_os, ddt->ddt_dir_object, &count)); | ||||||
|  | 	ASSERT0(zap_contains(ddt->ddt_os, ddt->ddt_dir_object, | ||||||
|  | 	    DDT_DIR_VERSION)); | ||||||
|  | 	ASSERT0(zap_contains(ddt->ddt_os, ddt->ddt_dir_object, DDT_DIR_FLAGS)); | ||||||
|  | 	ASSERT3U(count, ==, 2); | ||||||
|  | 
 | ||||||
|  | 	VERIFY0(zap_remove(ddt->ddt_os, DMU_POOL_DIRECTORY_OBJECT, name, tx)); | ||||||
|  | 	VERIFY0(zap_destroy(ddt->ddt_os, ddt->ddt_dir_object, tx)); | ||||||
|  | 
 | ||||||
|  | 	ddt->ddt_dir_object = 0; | ||||||
|  | 
 | ||||||
|  | 	spa_feature_decr(ddt->ddt_spa, SPA_FEATURE_FAST_DEDUP, tx); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Determine, flags and on-disk layout from what's already stored. If there's | ||||||
|  |  * nothing stored, then if new is false, returns ENOENT, and if true, selects | ||||||
|  |  * based on pool config. | ||||||
|  |  */ | ||||||
|  | static int | ||||||
|  | ddt_configure(ddt_t *ddt, boolean_t new) | ||||||
|  | { | ||||||
|  | 	spa_t *spa = ddt->ddt_spa; | ||||||
|  | 	char name[DDT_NAMELEN]; | ||||||
|  | 	int error; | ||||||
|  | 
 | ||||||
|  | 	ASSERT3U(spa_load_state(spa), !=, SPA_LOAD_CREATE); | ||||||
|  | 
 | ||||||
|  | 	boolean_t fdt_enabled = | ||||||
|  | 	    spa_feature_is_enabled(spa, SPA_FEATURE_FAST_DEDUP); | ||||||
|  | 	boolean_t fdt_active = | ||||||
|  | 	    spa_feature_is_active(spa, SPA_FEATURE_FAST_DEDUP); | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * First, look for the global DDT stats object. If its not there, then | ||||||
|  | 	 * there's never been a DDT written before ever, and we know we're | ||||||
|  | 	 * starting from scratch. | ||||||
|  | 	 */ | ||||||
|  | 	error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, | ||||||
|  | 	    DMU_POOL_DDT_STATS, sizeof (uint64_t), 1, | ||||||
|  | 	    &spa->spa_ddt_stat_object); | ||||||
|  | 	if (error != 0) { | ||||||
|  | 		if (error != ENOENT) | ||||||
|  | 			return (error); | ||||||
|  | 		goto not_found; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if (fdt_active) { | ||||||
|  | 		/*
 | ||||||
|  | 		 * Now look for a DDT directory. If it exists, then it has | ||||||
|  | 		 * everything we need. | ||||||
|  | 		 */ | ||||||
|  | 		snprintf(name, DDT_NAMELEN, DMU_POOL_DDT_DIR, | ||||||
|  | 		    zio_checksum_table[ddt->ddt_checksum].ci_name); | ||||||
|  | 
 | ||||||
|  | 		error = zap_lookup(spa->spa_meta_objset, | ||||||
|  | 		    DMU_POOL_DIRECTORY_OBJECT, name, sizeof (uint64_t), 1, | ||||||
|  | 		    &ddt->ddt_dir_object); | ||||||
|  | 		if (error == 0) { | ||||||
|  | 			ASSERT3U(spa->spa_meta_objset, ==, ddt->ddt_os); | ||||||
|  | 
 | ||||||
|  | 			error = zap_lookup(ddt->ddt_os, ddt->ddt_dir_object, | ||||||
|  | 			    DDT_DIR_VERSION, sizeof (uint64_t), 1, | ||||||
|  | 			    &ddt->ddt_version); | ||||||
|  | 			if (error != 0) | ||||||
|  | 				return (error); | ||||||
|  | 
 | ||||||
|  | 			error = zap_lookup(ddt->ddt_os, ddt->ddt_dir_object, | ||||||
|  | 			    DDT_DIR_FLAGS, sizeof (uint64_t), 1, | ||||||
|  | 			    &ddt->ddt_flags); | ||||||
|  | 			if (error != 0) | ||||||
|  | 				return (error); | ||||||
|  | 
 | ||||||
|  | 			if (ddt->ddt_version != DDT_VERSION_FDT) { | ||||||
|  | 				zfs_dbgmsg("ddt_configure: spa=%s ddt_dir=%s " | ||||||
|  | 				    "unknown version %llu", spa_name(spa), | ||||||
|  | 				    name, (u_longlong_t)ddt->ddt_version); | ||||||
|  | 				return (SET_ERROR(EINVAL)); | ||||||
|  | 			} | ||||||
|  | 
 | ||||||
|  | 			if ((ddt->ddt_flags & ~DDT_FLAG_MASK) != 0) { | ||||||
|  | 				zfs_dbgmsg("ddt_configure: spa=%s ddt_dir=%s " | ||||||
|  | 				    "version=%llu unknown flags %llx", | ||||||
|  | 				    spa_name(spa), name, | ||||||
|  | 				    (u_longlong_t)ddt->ddt_flags, | ||||||
|  | 				    (u_longlong_t)ddt->ddt_version); | ||||||
|  | 				return (SET_ERROR(EINVAL)); | ||||||
|  | 			} | ||||||
|  | 
 | ||||||
|  | 			return (0); | ||||||
|  | 		} | ||||||
|  | 		if (error != ENOENT) | ||||||
|  | 			return (error); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	/* Any object in the root indicates a traditional setup. */ | ||||||
|  | 	for (ddt_type_t type = 0; type < DDT_TYPES; type++) { | ||||||
|  | 		for (ddt_class_t class = 0; class < DDT_CLASSES; class++) { | ||||||
|  | 			ddt_object_name(ddt, type, class, name); | ||||||
|  | 			uint64_t obj; | ||||||
|  | 			error = zap_lookup(spa->spa_meta_objset, | ||||||
|  | 			    DMU_POOL_DIRECTORY_OBJECT, name, sizeof (uint64_t), | ||||||
|  | 			    1, &obj); | ||||||
|  | 			if (error == ENOENT) | ||||||
|  | 				continue; | ||||||
|  | 			if (error != 0) | ||||||
|  | 				return (error); | ||||||
|  | 
 | ||||||
|  | 			ddt->ddt_version = DDT_VERSION_LEGACY; | ||||||
|  | 			ddt->ddt_flags = ddt_version_flags[ddt->ddt_version]; | ||||||
|  | 			ddt->ddt_dir_object = DMU_POOL_DIRECTORY_OBJECT; | ||||||
|  | 
 | ||||||
|  | 			return (0); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | not_found: | ||||||
|  | 	if (!new) | ||||||
|  | 		return (SET_ERROR(ENOENT)); | ||||||
|  | 
 | ||||||
|  | 	/* Nothing on disk, so set up for the best version we can */ | ||||||
|  | 	if (fdt_enabled) { | ||||||
|  | 		ddt->ddt_version = DDT_VERSION_FDT; | ||||||
|  | 		ddt->ddt_flags = ddt_version_flags[ddt->ddt_version]; | ||||||
|  | 		ddt->ddt_dir_object = 0; /* create on first use */ | ||||||
|  | 	} else { | ||||||
|  | 		ddt->ddt_version = DDT_VERSION_LEGACY; | ||||||
|  | 		ddt->ddt_flags = ddt_version_flags[ddt->ddt_version]; | ||||||
|  | 		ddt->ddt_dir_object = DMU_POOL_DIRECTORY_OBJECT; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return (0); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static ddt_t * | static ddt_t * | ||||||
| ddt_table_alloc(spa_t *spa, enum zio_checksum c) | ddt_table_alloc(spa_t *spa, enum zio_checksum c) | ||||||
| { | { | ||||||
| @ -853,6 +1067,7 @@ ddt_table_alloc(spa_t *spa, enum zio_checksum c) | |||||||
| 	ddt->ddt_checksum = c; | 	ddt->ddt_checksum = c; | ||||||
| 	ddt->ddt_spa = spa; | 	ddt->ddt_spa = spa; | ||||||
| 	ddt->ddt_os = spa->spa_meta_objset; | 	ddt->ddt_os = spa->spa_meta_objset; | ||||||
|  | 	ddt->ddt_version = DDT_VERSION_UNCONFIGURED; | ||||||
| 
 | 
 | ||||||
| 	return (ddt); | 	return (ddt); | ||||||
| } | } | ||||||
| @ -889,7 +1104,6 @@ ddt_load(spa_t *spa) | |||||||
| 	error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, | 	error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, | ||||||
| 	    DMU_POOL_DDT_STATS, sizeof (uint64_t), 1, | 	    DMU_POOL_DDT_STATS, sizeof (uint64_t), 1, | ||||||
| 	    &spa->spa_ddt_stat_object); | 	    &spa->spa_ddt_stat_object); | ||||||
| 
 |  | ||||||
| 	if (error) | 	if (error) | ||||||
| 		return (error == ENOENT ? 0 : error); | 		return (error == ENOENT ? 0 : error); | ||||||
| 
 | 
 | ||||||
| @ -898,6 +1112,12 @@ ddt_load(spa_t *spa) | |||||||
| 			continue; | 			continue; | ||||||
| 
 | 
 | ||||||
| 		ddt_t *ddt = spa->spa_ddt[c]; | 		ddt_t *ddt = spa->spa_ddt[c]; | ||||||
|  | 		error = ddt_configure(ddt, B_FALSE); | ||||||
|  | 		if (error == ENOENT) | ||||||
|  | 			continue; | ||||||
|  | 		if (error != 0) | ||||||
|  | 			return (error); | ||||||
|  | 
 | ||||||
| 		for (ddt_type_t type = 0; type < DDT_TYPES; type++) { | 		for (ddt_type_t type = 0; type < DDT_TYPES; type++) { | ||||||
| 			for (ddt_class_t class = 0; class < DDT_CLASSES; | 			for (ddt_class_t class = 0; class < DDT_CLASSES; | ||||||
| 			    class++) { | 			    class++) { | ||||||
| @ -912,9 +1132,10 @@ ddt_load(spa_t *spa) | |||||||
| 		 */ | 		 */ | ||||||
| 		memcpy(&ddt->ddt_histogram_cache, ddt->ddt_histogram, | 		memcpy(&ddt->ddt_histogram_cache, ddt->ddt_histogram, | ||||||
| 		    sizeof (ddt->ddt_histogram)); | 		    sizeof (ddt->ddt_histogram)); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	spa->spa_dedup_dspace = ~0ULL; | 	spa->spa_dedup_dspace = ~0ULL; | ||||||
| 	spa->spa_dedup_dsize = ~0ULL; | 	spa->spa_dedup_dsize = ~0ULL; | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	return (0); | 	return (0); | ||||||
| } | } | ||||||
| @ -1147,25 +1368,44 @@ ddt_sync_table(ddt_t *ddt, dmu_tx_t *tx, uint64_t txg) | |||||||
| 		    DMU_POOL_DDT_STATS, tx); | 		    DMU_POOL_DDT_STATS, tx); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | 	if (ddt->ddt_version == DDT_VERSION_FDT && ddt->ddt_dir_object == 0) | ||||||
|  | 		ddt_create_dir(ddt, tx); | ||||||
|  | 
 | ||||||
| 	while ((dde = avl_destroy_nodes(&ddt->ddt_tree, &cookie)) != NULL) { | 	while ((dde = avl_destroy_nodes(&ddt->ddt_tree, &cookie)) != NULL) { | ||||||
| 		ddt_sync_entry(ddt, dde, tx, txg); | 		ddt_sync_entry(ddt, dde, tx, txg); | ||||||
| 		ddt_free(dde); | 		ddt_free(dde); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | 	uint64_t count = 0; | ||||||
| 	for (ddt_type_t type = 0; type < DDT_TYPES; type++) { | 	for (ddt_type_t type = 0; type < DDT_TYPES; type++) { | ||||||
| 		uint64_t add, count = 0; | 		uint64_t add, tcount = 0; | ||||||
| 		for (ddt_class_t class = 0; class < DDT_CLASSES; class++) { | 		for (ddt_class_t class = 0; class < DDT_CLASSES; class++) { | ||||||
| 			if (ddt_object_exists(ddt, type, class)) { | 			if (ddt_object_exists(ddt, type, class)) { | ||||||
| 				ddt_object_sync(ddt, type, class, tx); | 				ddt_object_sync(ddt, type, class, tx); | ||||||
| 				VERIFY0(ddt_object_count(ddt, type, class, | 				VERIFY0(ddt_object_count(ddt, type, class, | ||||||
| 				    &add)); | 				    &add)); | ||||||
| 				count += add; | 				tcount += add; | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
| 		for (ddt_class_t class = 0; class < DDT_CLASSES; class++) { | 		for (ddt_class_t class = 0; class < DDT_CLASSES; class++) { | ||||||
| 			if (count == 0 && ddt_object_exists(ddt, type, class)) | 			if (tcount == 0 && ddt_object_exists(ddt, type, class)) | ||||||
| 				ddt_object_destroy(ddt, type, class, tx); | 				ddt_object_destroy(ddt, type, class, tx); | ||||||
| 		} | 		} | ||||||
|  | 		count += tcount; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if (count == 0) { | ||||||
|  | 		/*
 | ||||||
|  | 		 * No entries left on the DDT, so reset the version for next | ||||||
|  | 		 * time. This allows us to handle the feature being changed | ||||||
|  | 		 * since the DDT was originally created. New entries should get | ||||||
|  | 		 * whatever the feature currently demands. | ||||||
|  | 		 */ | ||||||
|  | 		if (ddt->ddt_version == DDT_VERSION_FDT) | ||||||
|  | 			ddt_destroy_dir(ddt, tx); | ||||||
|  | 
 | ||||||
|  | 		ddt->ddt_version = DDT_VERSION_UNCONFIGURED; | ||||||
|  | 		ddt->ddt_flags = 0; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	memcpy(&ddt->ddt_histogram_cache, ddt->ddt_histogram, | 	memcpy(&ddt->ddt_histogram_cache, ddt->ddt_histogram, | ||||||
|  | |||||||
| @ -48,6 +48,10 @@ static unsigned long zio_decompress_fail_fraction = 0; | |||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Compression vectors. |  * Compression vectors. | ||||||
|  |  * | ||||||
|  |  * NOTE: DO NOT CHANGE THE NAMES OF THESE COMPRESSION FUNCTIONS. | ||||||
|  |  * THEY ARE USED AS ZAP KEY NAMES BY FAST DEDUP AND THEREFORE | ||||||
|  |  * PART OF THE ON-DISK FORMAT. | ||||||
|  */ |  */ | ||||||
| zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS] = { | zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS] = { | ||||||
| 	{"inherit",	0,	NULL,		NULL, NULL}, | 	{"inherit",	0,	NULL,		NULL, NULL}, | ||||||
|  | |||||||
| @ -109,5 +109,6 @@ if is_linux || is_freebsd; then | |||||||
| 	    "feature@block_cloning" | 	    "feature@block_cloning" | ||||||
| 	    "feature@vdev_zaps_v2" | 	    "feature@vdev_zaps_v2" | ||||||
| 	    "feature@raidz_expansion" | 	    "feature@raidz_expansion" | ||||||
|  | 	    "feature@fast_dedup" | ||||||
| 	) | 	) | ||||||
| fi | fi | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Rob Norris
						Rob Norris