mirror of
				https://git.proxmox.com/git/mirror_zfs
				synced 2025-10-31 04:00:50 +00:00 
			
		
		
		
	Remove fastwrite mechanism.
Fastwrite was introduced many years ago to improve ZIL writes spread between multiple top-level vdevs by tracking number of allocated but not written blocks and choosing vdev with smaller count. It suposed to reduce ZIL knowledge about allocation, but actually made ZIL to even more actively report allocation code about the allocations, complicating both ZIL and metaslabs code. On top of that, it seems ZIO_FLAG_FASTWRITE setting in dmu_sync() was lost many years ago, that was one of the declared benefits. Plus introduction of embedded log metaslab class solved another problem with allocation rotor accounting both normal and log allocations, since in most cases those are now in different metaslab classes. After all that, I'd prefer to simplify already too complicated ZIL, ZIO and metaslab code if the benefit of complexity is not obvious. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: George Wilson <george.wilson@delphix.com> Signed-off-by: Alexander Motin <mav@FreeBSD.org> Sponsored by: iXsystems, Inc. Closes #15107
This commit is contained in:
		
							parent
							
								
									02ce9030e6
								
							
						
					
					
						commit
						ffaedf0a44
					
				| @ -80,7 +80,6 @@ uint64_t metaslab_largest_allocatable(metaslab_t *); | ||||
| #define	METASLAB_ASYNC_ALLOC		0x8 | ||||
| #define	METASLAB_DONT_THROTTLE		0x10 | ||||
| #define	METASLAB_MUST_RESERVE		0x20 | ||||
| #define	METASLAB_FASTWRITE		0x40 | ||||
| #define	METASLAB_ZIL			0x80 | ||||
| 
 | ||||
| int metaslab_alloc(spa_t *, metaslab_class_t *, uint64_t, | ||||
| @ -96,8 +95,6 @@ void metaslab_unalloc_dva(spa_t *, const dva_t *, uint64_t); | ||||
| int metaslab_claim(spa_t *, const blkptr_t *, uint64_t); | ||||
| int metaslab_claim_impl(vdev_t *, uint64_t, uint64_t, uint64_t); | ||||
| void metaslab_check_free(spa_t *, const blkptr_t *); | ||||
| void metaslab_fastwrite_mark(spa_t *, const blkptr_t *); | ||||
| void metaslab_fastwrite_unmark(spa_t *, const blkptr_t *); | ||||
| 
 | ||||
| void metaslab_stat_init(void); | ||||
| void metaslab_stat_fini(void); | ||||
|  | ||||
| @ -266,7 +266,6 @@ struct vdev { | ||||
| 	metaslab_group_t *vdev_mg;	/* metaslab group		*/ | ||||
| 	metaslab_group_t *vdev_log_mg;	/* embedded slog metaslab group	*/ | ||||
| 	metaslab_t	**vdev_ms;	/* metaslab array		*/ | ||||
| 	uint64_t	vdev_pending_fastwrite; /* allocated fastwrites */ | ||||
| 	txg_list_t	vdev_ms_list;	/* per-txg dirty metaslab lists	*/ | ||||
| 	txg_list_t	vdev_dtl_list;	/* per-txg dirty DTL lists	*/ | ||||
| 	txg_node_t	vdev_txg_node;	/* per-txg dirty vdev linkage	*/ | ||||
|  | ||||
| @ -91,7 +91,6 @@ typedef enum { | ||||
| typedef struct lwb { | ||||
| 	zilog_t		*lwb_zilog;	/* back pointer to log struct */ | ||||
| 	blkptr_t	lwb_blk;	/* on disk address of this log blk */ | ||||
| 	boolean_t	lwb_fastwrite;	/* is blk marked for fastwrite? */ | ||||
| 	boolean_t	lwb_slog;	/* lwb_blk is on SLOG device */ | ||||
| 	boolean_t	lwb_indirect;	/* do not postpone zil_lwb_commit() */ | ||||
| 	int		lwb_nused;	/* # used bytes in buffer */ | ||||
|  | ||||
| @ -222,7 +222,6 @@ typedef uint64_t zio_flag_t; | ||||
| #define	ZIO_FLAG_NOPWRITE	(1ULL << 28) | ||||
| #define	ZIO_FLAG_REEXECUTED	(1ULL << 29) | ||||
| #define	ZIO_FLAG_DELEGATED	(1ULL << 30) | ||||
| #define	ZIO_FLAG_FASTWRITE	(1ULL << 31) | ||||
| 
 | ||||
| #define	ZIO_FLAG_MUSTSUCCEED		0 | ||||
| #define	ZIO_FLAG_RAW	(ZIO_FLAG_RAW_COMPRESS | ZIO_FLAG_RAW_ENCRYPT) | ||||
|  | ||||
| @ -5101,7 +5101,7 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize, | ||||
|     zio_alloc_list_t *zal, int allocator) | ||||
| { | ||||
| 	metaslab_class_allocator_t *mca = &mc->mc_allocator[allocator]; | ||||
| 	metaslab_group_t *mg, *fast_mg, *rotor; | ||||
| 	metaslab_group_t *mg, *rotor; | ||||
| 	vdev_t *vd; | ||||
| 	boolean_t try_hard = B_FALSE; | ||||
| 
 | ||||
| @ -5164,15 +5164,6 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize, | ||||
| 	} else if (d != 0) { | ||||
| 		vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d - 1])); | ||||
| 		mg = vd->vdev_mg->mg_next; | ||||
| 	} else if (flags & METASLAB_FASTWRITE) { | ||||
| 		mg = fast_mg = mca->mca_rotor; | ||||
| 
 | ||||
| 		do { | ||||
| 			if (fast_mg->mg_vd->vdev_pending_fastwrite < | ||||
| 			    mg->mg_vd->vdev_pending_fastwrite) | ||||
| 				mg = fast_mg; | ||||
| 		} while ((fast_mg = fast_mg->mg_next) != mca->mca_rotor); | ||||
| 
 | ||||
| 	} else { | ||||
| 		ASSERT(mca->mca_rotor != NULL); | ||||
| 		mg = mca->mca_rotor; | ||||
| @ -5297,7 +5288,7 @@ top: | ||||
| 				mg->mg_bias = 0; | ||||
| 			} | ||||
| 
 | ||||
| 			if ((flags & METASLAB_FASTWRITE) || | ||||
| 			if ((flags & METASLAB_ZIL) || | ||||
| 			    atomic_add_64_nv(&mca->mca_aliquot, asize) >= | ||||
| 			    mg->mg_aliquot + mg->mg_bias) { | ||||
| 				mca->mca_rotor = mg->mg_next; | ||||
| @ -5310,11 +5301,6 @@ top: | ||||
| 			    ((flags & METASLAB_GANG_HEADER) ? 1 : 0)); | ||||
| 			DVA_SET_ASIZE(&dva[d], asize); | ||||
| 
 | ||||
| 			if (flags & METASLAB_FASTWRITE) { | ||||
| 				atomic_add_64(&vd->vdev_pending_fastwrite, | ||||
| 				    psize); | ||||
| 			} | ||||
| 
 | ||||
| 			return (0); | ||||
| 		} | ||||
| next: | ||||
| @ -5950,55 +5936,6 @@ metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg) | ||||
| 	return (error); | ||||
| } | ||||
| 
 | ||||
| void | ||||
| metaslab_fastwrite_mark(spa_t *spa, const blkptr_t *bp) | ||||
| { | ||||
| 	const dva_t *dva = bp->blk_dva; | ||||
| 	int ndvas = BP_GET_NDVAS(bp); | ||||
| 	uint64_t psize = BP_GET_PSIZE(bp); | ||||
| 	int d; | ||||
| 	vdev_t *vd; | ||||
| 
 | ||||
| 	ASSERT(!BP_IS_HOLE(bp)); | ||||
| 	ASSERT(!BP_IS_EMBEDDED(bp)); | ||||
| 	ASSERT(psize > 0); | ||||
| 
 | ||||
| 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); | ||||
| 
 | ||||
| 	for (d = 0; d < ndvas; d++) { | ||||
| 		if ((vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d]))) == NULL) | ||||
| 			continue; | ||||
| 		atomic_add_64(&vd->vdev_pending_fastwrite, psize); | ||||
| 	} | ||||
| 
 | ||||
| 	spa_config_exit(spa, SCL_VDEV, FTAG); | ||||
| } | ||||
| 
 | ||||
| void | ||||
| metaslab_fastwrite_unmark(spa_t *spa, const blkptr_t *bp) | ||||
| { | ||||
| 	const dva_t *dva = bp->blk_dva; | ||||
| 	int ndvas = BP_GET_NDVAS(bp); | ||||
| 	uint64_t psize = BP_GET_PSIZE(bp); | ||||
| 	int d; | ||||
| 	vdev_t *vd; | ||||
| 
 | ||||
| 	ASSERT(!BP_IS_HOLE(bp)); | ||||
| 	ASSERT(!BP_IS_EMBEDDED(bp)); | ||||
| 	ASSERT(psize > 0); | ||||
| 
 | ||||
| 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); | ||||
| 
 | ||||
| 	for (d = 0; d < ndvas; d++) { | ||||
| 		if ((vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d]))) == NULL) | ||||
| 			continue; | ||||
| 		ASSERT3U(vd->vdev_pending_fastwrite, >=, psize); | ||||
| 		atomic_sub_64(&vd->vdev_pending_fastwrite, psize); | ||||
| 	} | ||||
| 
 | ||||
| 	spa_config_exit(spa, SCL_VDEV, FTAG); | ||||
| } | ||||
| 
 | ||||
| static void | ||||
| metaslab_check_free_impl_cb(uint64_t inner, vdev_t *vd, uint64_t offset, | ||||
|     uint64_t size, void *arg) | ||||
|  | ||||
| @ -1192,7 +1192,6 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd) | ||||
| 
 | ||||
| 	ASSERT(tvd == tvd->vdev_top); | ||||
| 
 | ||||
| 	tvd->vdev_pending_fastwrite = svd->vdev_pending_fastwrite; | ||||
| 	tvd->vdev_ms_array = svd->vdev_ms_array; | ||||
| 	tvd->vdev_ms_shift = svd->vdev_ms_shift; | ||||
| 	tvd->vdev_ms_count = svd->vdev_ms_count; | ||||
| @ -1655,7 +1654,6 @@ vdev_metaslab_fini(vdev_t *vd) | ||||
| 		} | ||||
| 	} | ||||
| 	ASSERT0(vd->vdev_ms_count); | ||||
| 	ASSERT3U(vd->vdev_pending_fastwrite, ==, 0); | ||||
| } | ||||
| 
 | ||||
| typedef struct vdev_probe_stats { | ||||
|  | ||||
| @ -761,15 +761,13 @@ zil_lwb_vdev_compare(const void *x1, const void *x2) | ||||
| } | ||||
| 
 | ||||
| static lwb_t * | ||||
| zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, boolean_t slog, uint64_t txg, | ||||
|     boolean_t fastwrite) | ||||
| zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, boolean_t slog, uint64_t txg) | ||||
| { | ||||
| 	lwb_t *lwb; | ||||
| 
 | ||||
| 	lwb = kmem_cache_alloc(zil_lwb_cache, KM_SLEEP); | ||||
| 	lwb->lwb_zilog = zilog; | ||||
| 	lwb->lwb_blk = *bp; | ||||
| 	lwb->lwb_fastwrite = fastwrite; | ||||
| 	lwb->lwb_slog = slog; | ||||
| 	lwb->lwb_indirect = B_FALSE; | ||||
| 	if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) { | ||||
| @ -916,7 +914,6 @@ zil_create(zilog_t *zilog) | ||||
| 	dmu_tx_t *tx = NULL; | ||||
| 	blkptr_t blk; | ||||
| 	int error = 0; | ||||
| 	boolean_t fastwrite = FALSE; | ||||
| 	boolean_t slog = FALSE; | ||||
| 	dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os); | ||||
| 
 | ||||
| @ -949,8 +946,6 @@ zil_create(zilog_t *zilog) | ||||
| 
 | ||||
| 		error = zio_alloc_zil(zilog->zl_spa, zilog->zl_os, txg, &blk, | ||||
| 		    ZIL_MIN_BLKSZ, &slog); | ||||
| 		fastwrite = TRUE; | ||||
| 
 | ||||
| 		if (error == 0) | ||||
| 			zil_init_log_chain(zilog, &blk); | ||||
| 	} | ||||
| @ -959,7 +954,7 @@ zil_create(zilog_t *zilog) | ||||
| 	 * Allocate a log write block (lwb) for the first log block. | ||||
| 	 */ | ||||
| 	if (error == 0) | ||||
| 		lwb = zil_alloc_lwb(zilog, &blk, slog, txg, fastwrite); | ||||
| 		lwb = zil_alloc_lwb(zilog, &blk, slog, txg); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If we just allocated the first log block, commit our transaction | ||||
| @ -1044,9 +1039,6 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first) | ||||
| 		ASSERT(zh->zh_claim_txg == 0); | ||||
| 		VERIFY(!keep_first); | ||||
| 		while ((lwb = list_remove_head(&zilog->zl_lwb_list)) != NULL) { | ||||
| 			if (lwb->lwb_fastwrite) | ||||
| 				metaslab_fastwrite_unmark(zilog->zl_spa, | ||||
| 				    &lwb->lwb_blk); | ||||
| 			if (lwb->lwb_buf != NULL) | ||||
| 				zio_buf_free(lwb->lwb_buf, lwb->lwb_sz); | ||||
| 			zio_free(zilog->zl_spa, txg, &lwb->lwb_blk); | ||||
| @ -1551,7 +1543,6 @@ zil_lwb_write_done(zio_t *zio) | ||||
| 	ASSERT3S(lwb->lwb_state, ==, LWB_STATE_ISSUED); | ||||
| 	lwb->lwb_state = LWB_STATE_WRITE_DONE; | ||||
| 	lwb->lwb_write_zio = NULL; | ||||
| 	lwb->lwb_fastwrite = FALSE; | ||||
| 	nlwb = list_next(&zilog->zl_lwb_list, lwb); | ||||
| 	mutex_exit(&zilog->zl_lock); | ||||
| 
 | ||||
| @ -1718,20 +1709,12 @@ zil_lwb_write_open(zilog_t *zilog, lwb_t *lwb) | ||||
| 	    ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, | ||||
| 	    lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_SEQ]); | ||||
| 
 | ||||
| 	/* Lock so zil_sync() doesn't fastwrite_unmark after zio is created */ | ||||
| 	mutex_enter(&zilog->zl_lock); | ||||
| 	if (!lwb->lwb_fastwrite) { | ||||
| 		metaslab_fastwrite_mark(zilog->zl_spa, &lwb->lwb_blk); | ||||
| 		lwb->lwb_fastwrite = 1; | ||||
| 	} | ||||
| 
 | ||||
| 	lwb->lwb_write_zio = zio_rewrite(lwb->lwb_root_zio, zilog->zl_spa, 0, | ||||
| 	    &lwb->lwb_blk, lwb_abd, BP_GET_LSIZE(&lwb->lwb_blk), | ||||
| 	    zil_lwb_write_done, lwb, prio, | ||||
| 	    ZIO_FLAG_CANFAIL | ZIO_FLAG_FASTWRITE, &zb); | ||||
| 	    zil_lwb_write_done, lwb, prio, ZIO_FLAG_CANFAIL, &zb); | ||||
| 
 | ||||
| 	mutex_enter(&zilog->zl_lock); | ||||
| 	lwb->lwb_state = LWB_STATE_OPENED; | ||||
| 
 | ||||
| 	zil_lwb_set_zio_dependency(zilog, lwb); | ||||
| 	zilog->zl_last_lwb_opened = lwb; | ||||
| 	mutex_exit(&zilog->zl_lock); | ||||
| @ -1864,7 +1847,7 @@ zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb, list_t *ilwbs) | ||||
| 		/*
 | ||||
| 		 * Allocate a new log write block (lwb). | ||||
| 		 */ | ||||
| 		nlwb = zil_alloc_lwb(zilog, bp, slog, txg, TRUE); | ||||
| 		nlwb = zil_alloc_lwb(zilog, bp, slog, txg); | ||||
| 	} | ||||
| 
 | ||||
| 	lwb->lwb_state = LWB_STATE_ISSUED; | ||||
| @ -3651,18 +3634,6 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx) | ||||
| 			BP_ZERO(&zh->zh_log); | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Remove fastwrite on any blocks that have been pre-allocated for | ||||
| 	 * the next commit. This prevents fastwrite counter pollution by | ||||
| 	 * unused, long-lived LWBs. | ||||
| 	 */ | ||||
| 	for (; lwb != NULL; lwb = list_next(&zilog->zl_lwb_list, lwb)) { | ||||
| 		if (lwb->lwb_fastwrite && !lwb->lwb_write_zio) { | ||||
| 			metaslab_fastwrite_unmark(zilog->zl_spa, &lwb->lwb_blk); | ||||
| 			lwb->lwb_fastwrite = 0; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	mutex_exit(&zilog->zl_lock); | ||||
| } | ||||
| 
 | ||||
| @ -3895,9 +3866,6 @@ zil_close(zilog_t *zilog) | ||||
| 		ASSERT(list_is_empty(&zilog->zl_lwb_list)); | ||||
| 		ASSERT3S(lwb->lwb_state, !=, LWB_STATE_ISSUED); | ||||
| 
 | ||||
| 		if (lwb->lwb_fastwrite) | ||||
| 			metaslab_fastwrite_unmark(zilog->zl_spa, &lwb->lwb_blk); | ||||
| 
 | ||||
| 		zio_buf_free(lwb->lwb_buf, lwb->lwb_sz); | ||||
| 		zil_free_lwb(zilog, lwb); | ||||
| 	} | ||||
|  | ||||
| @ -3024,11 +3024,6 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc) | ||||
| 	 */ | ||||
| 	pio->io_pipeline = ZIO_INTERLOCK_PIPELINE; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * We didn't allocate this bp, so make sure it doesn't get unmarked. | ||||
| 	 */ | ||||
| 	pio->io_flags &= ~ZIO_FLAG_FASTWRITE; | ||||
| 
 | ||||
| 	zio_nowait(zio); | ||||
| 
 | ||||
| 	return (pio); | ||||
| @ -3616,7 +3611,6 @@ zio_dva_allocate(zio_t *zio) | ||||
| 	ASSERT3U(zio->io_prop.zp_copies, <=, spa_max_replication(spa)); | ||||
| 	ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp)); | ||||
| 
 | ||||
| 	flags |= (zio->io_flags & ZIO_FLAG_FASTWRITE) ? METASLAB_FASTWRITE : 0; | ||||
| 	if (zio->io_flags & ZIO_FLAG_NODATA) | ||||
| 		flags |= METASLAB_DONT_THROTTLE; | ||||
| 	if (zio->io_flags & ZIO_FLAG_GANG_CHILD) | ||||
| @ -3776,7 +3770,7 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp, | ||||
| 	 * of, so we just hash the objset ID to pick the allocator to get | ||||
| 	 * some parallelism. | ||||
| 	 */ | ||||
| 	int flags = METASLAB_FASTWRITE | METASLAB_ZIL; | ||||
| 	int flags = METASLAB_ZIL; | ||||
| 	int allocator = (uint_t)cityhash4(0, 0, 0, | ||||
| 	    os->os_dsl_dataset->ds_object) % spa->spa_alloc_count; | ||||
| 	error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1, | ||||
| @ -4931,12 +4925,6 @@ zio_done(zio_t *zio) | ||||
| 		zfs_ereport_free_checksum(zcr); | ||||
| 	} | ||||
| 
 | ||||
| 	if (zio->io_flags & ZIO_FLAG_FASTWRITE && zio->io_bp && | ||||
| 	    !BP_IS_HOLE(zio->io_bp) && !BP_IS_EMBEDDED(zio->io_bp) && | ||||
| 	    !(zio->io_flags & ZIO_FLAG_NOPWRITE)) { | ||||
| 		metaslab_fastwrite_unmark(zio->io_spa, zio->io_bp); | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * It is the responsibility of the done callback to ensure that this | ||||
| 	 * particular zio is no longer discoverable for adoption, and as | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Alexander Motin
						Alexander Motin