mirror of
				https://git.proxmox.com/git/mirror_zfs
				synced 2025-10-26 11:02:35 +00:00 
			
		
		
		
	vdev_disk: try harder to ensure IO alignment rules
It seems out our notion of "properly" aligned IO was incomplete. In particular, dm-crypt does its own splitting, and assumes that a logical block will never cross an order-0 page boundary (ie, the physical page size, not compound size). This effectively means that it needs to be possible to split a BIO at any page or block size boundary and have it work correctly. This updates the alignment check function to enforce these rules (to the extent possible). Our response to misaligned data is to make some new allocation that is properly aligned, and copy the data into it. It turns out that linearising (via abd_borrow_buf()) is not enough, because we allocate eg 4K blocks from a general purpose slab, and so may receive (or already have) a 4K block that crosses pages. So instead, we allocate a new ABD, which is guaranteed to be aligned properly to block sizes, and then copy everything into it, and back out on the way back. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Alexander Motin <mav@FreeBSD.org> Reviewed-by: Tony Hutter <hutter2@llnl.gov> Signed-off-by: Rob Norris <rob.norris@klarasystems.com> Closes #16687 #16631 #15646 #15533 #14533
This commit is contained in:
		
							parent
							
								
									8ac70aade7
								
							
						
					
					
						commit
						19a8dd48e1
					
				| @ -805,14 +805,11 @@ vbio_completion(struct bio *bio) | ||||
| 	 * to the ADB, with changes if appropriate. | ||||
| 	 */ | ||||
| 	if (vbio->vbio_abd != NULL) { | ||||
| 		void *buf = abd_to_buf(vbio->vbio_abd); | ||||
| 		if (zio->io_type == ZIO_TYPE_READ) | ||||
| 			abd_copy(zio->io_abd, vbio->vbio_abd, zio->io_size); | ||||
| 
 | ||||
| 		abd_free(vbio->vbio_abd); | ||||
| 		vbio->vbio_abd = NULL; | ||||
| 
 | ||||
| 		if (zio->io_type == ZIO_TYPE_READ) | ||||
| 			abd_return_buf_copy(zio->io_abd, buf, zio->io_size); | ||||
| 		else | ||||
| 			abd_return_buf(zio->io_abd, buf, zio->io_size); | ||||
| 	} | ||||
| 
 | ||||
| 	/* Final cleanup */ | ||||
| @ -834,38 +831,61 @@ vbio_completion(struct bio *bio) | ||||
|  * NOTE: if you change this function, change the copy in | ||||
|  * tests/zfs-tests/tests/functional/vdev_disk/page_alignment.c, and add test | ||||
|  * data there to validate the change you're making. | ||||
|  * | ||||
|  */ | ||||
| typedef struct { | ||||
| 	uint_t  bmask; | ||||
| 	uint_t  npages; | ||||
| 	uint_t  end; | ||||
| } vdev_disk_check_pages_t; | ||||
| 	size_t	blocksize; | ||||
| 	int	seen_first; | ||||
| 	int	seen_last; | ||||
| } vdev_disk_check_alignment_t; | ||||
| 
 | ||||
| static int | ||||
| vdev_disk_check_pages_cb(struct page *page, size_t off, size_t len, void *priv) | ||||
| vdev_disk_check_alignment_cb(struct page *page, size_t off, size_t len, | ||||
|     void *priv) | ||||
| { | ||||
| 	(void) page; | ||||
| 	vdev_disk_check_pages_t *s = priv; | ||||
| 	vdev_disk_check_alignment_t *s = priv; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If we didn't finish on a block size boundary last time, then there | ||||
| 	 * would be a gap if we tried to use this ABD as-is, so abort. | ||||
| 	 * The cardinal rule: a single on-disk block must never cross an | ||||
| 	 * physical (order-0) page boundary, as the kernel expects to be able | ||||
| 	 * to split at both LBS and page boundaries. | ||||
| 	 * | ||||
| 	 * This implies various alignment rules for the blocks in this | ||||
| 	 * (possibly compound) page, which we can check for. | ||||
| 	 */ | ||||
| 	if (s->end != 0) | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If the previous page did not end on a page boundary, then we | ||||
| 	 * can't proceed without creating a hole. | ||||
| 	 */ | ||||
| 	if (s->seen_last) | ||||
| 		return (1); | ||||
| 
 | ||||
| 	/* This page must contain only whole LBS-sized blocks. */ | ||||
| 	if (!IS_P2ALIGNED(len, s->blocksize)) | ||||
| 		return (1); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Note if we're taking less than a full block, so we can check it | ||||
| 	 * above on the next call. | ||||
| 	 * If this is not the first page in the ABD, then the data must start | ||||
| 	 * on a page-aligned boundary (so the kernel can split on page | ||||
| 	 * boundaries without having to deal with a hole). If it is, then | ||||
| 	 * it can start on LBS-alignment. | ||||
| 	 */ | ||||
| 	s->end = (off+len) & s->bmask; | ||||
| 	if (s->seen_first) { | ||||
| 		if (!IS_P2ALIGNED(off, PAGESIZE)) | ||||
| 			return (1); | ||||
| 	} else { | ||||
| 		if (!IS_P2ALIGNED(off, s->blocksize)) | ||||
| 			return (1); | ||||
| 		s->seen_first = 1; | ||||
| 	} | ||||
| 
 | ||||
| 	/* All blocks after the first must start on a block size boundary. */ | ||||
| 	if (s->npages != 0 && (off & s->bmask) != 0) | ||||
| 		return (1); | ||||
| 	/*
 | ||||
| 	 * If this data does not end on a page-aligned boundary, then this | ||||
| 	 * must be the last page in the ABD, for the same reason. | ||||
| 	 */ | ||||
| 	s->seen_last = !IS_P2ALIGNED(off+len, PAGESIZE); | ||||
| 
 | ||||
| 	s->npages++; | ||||
| 	return (0); | ||||
| } | ||||
| 
 | ||||
| @ -874,15 +894,14 @@ vdev_disk_check_pages_cb(struct page *page, size_t off, size_t len, void *priv) | ||||
|  * the number of pages, or 0 if it can't be submitted like this. | ||||
|  */ | ||||
| static boolean_t | ||||
| vdev_disk_check_pages(abd_t *abd, uint64_t size, struct block_device *bdev) | ||||
| vdev_disk_check_alignment(abd_t *abd, uint64_t size, struct block_device *bdev) | ||||
| { | ||||
| 	vdev_disk_check_pages_t s = { | ||||
| 	    .bmask = bdev_logical_block_size(bdev)-1, | ||||
| 	    .npages = 0, | ||||
| 	    .end = 0, | ||||
| 	vdev_disk_check_alignment_t s = { | ||||
| 	    .blocksize = bdev_logical_block_size(bdev), | ||||
| 	}; | ||||
| 
 | ||||
| 	if (abd_iterate_page_func(abd, 0, size, vdev_disk_check_pages_cb, &s)) | ||||
| 	if (abd_iterate_page_func(abd, 0, size, | ||||
| 	    vdev_disk_check_alignment_cb, &s)) | ||||
| 		return (B_FALSE); | ||||
| 
 | ||||
| 	return (B_TRUE); | ||||
| @ -916,37 +935,32 @@ vdev_disk_io_rw(zio_t *zio) | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Check alignment of the incoming ABD. If any part of it would require | ||||
| 	 * submitting a page that is not aligned to the logical block size, | ||||
| 	 * then we take a copy into a linear buffer and submit that instead. | ||||
| 	 * This should be impossible on a 512b LBS, and fairly rare on 4K, | ||||
| 	 * usually requiring abnormally-small data blocks (eg gang blocks) | ||||
| 	 * mixed into the same ABD as larger ones (eg aggregated). | ||||
| 	 * submitting a page that is not aligned to both the logical block size | ||||
| 	 * and the page size, then we take a copy into a new memory region with | ||||
| 	 * correct alignment.  This should be impossible on a 512b LBS. On | ||||
| 	 * larger blocks, this can happen at least when a small number of | ||||
| 	 * blocks (usually 1) are allocated from a shared slab, or when | ||||
| 	 * abnormally-small data regions (eg gang headers) are mixed into the | ||||
| 	 * same ABD as larger allocations (eg aggregations). | ||||
| 	 */ | ||||
| 	abd_t *abd = zio->io_abd; | ||||
| 	if (!vdev_disk_check_pages(abd, zio->io_size, bdev)) { | ||||
| 		void *buf; | ||||
| 		if (zio->io_type == ZIO_TYPE_READ) | ||||
| 			buf = abd_borrow_buf(zio->io_abd, zio->io_size); | ||||
| 		else | ||||
| 			buf = abd_borrow_buf_copy(zio->io_abd, zio->io_size); | ||||
| 	if (!vdev_disk_check_alignment(abd, zio->io_size, bdev)) { | ||||
| 		/* Allocate a new memory region with guaranteed alignment */ | ||||
| 		abd = abd_alloc_for_io(zio->io_size, | ||||
| 		    zio->io_abd->abd_flags & ABD_FLAG_META); | ||||
| 
 | ||||
| 		/* If we're writing copy our data into it */ | ||||
| 		if (zio->io_type == ZIO_TYPE_WRITE) | ||||
| 			abd_copy(abd, zio->io_abd, zio->io_size); | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * Wrap the copy in an abd_t, so we can use the same iterators | ||||
| 		 * to count and fill the vbio later. | ||||
| 		 */ | ||||
| 		abd = abd_get_from_buf(buf, zio->io_size); | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * False here would mean the borrowed copy has an invalid | ||||
| 		 * alignment too, which would mean we've somehow been passed a | ||||
| 		 * linear ABD with an interior page that has a non-zero offset | ||||
| 		 * or a size not a multiple of PAGE_SIZE. This is not possible. | ||||
| 		 * It would mean either zio_buf_alloc() or its underlying | ||||
| 		 * allocators have done something extremely strange, or our | ||||
| 		 * math in vdev_disk_check_pages() is wrong. In either case, | ||||
| 		 * False here would mean the new allocation has an invalid | ||||
| 		 * alignment too, which would mean that abd_alloc() is not | ||||
| 		 * guaranteeing this, or our logic in | ||||
| 		 * vdev_disk_check_alignment() is wrong. In either case, | ||||
| 		 * something in seriously wrong and its not safe to continue. | ||||
| 		 */ | ||||
| 		VERIFY(vdev_disk_check_pages(abd, zio->io_size, bdev)); | ||||
| 		VERIFY(vdev_disk_check_alignment(abd, zio->io_size, bdev)); | ||||
| 	} | ||||
| 
 | ||||
| 	/* Allocate vbio, with a pointer to the borrowed ABD if necessary */ | ||||
|  | ||||
| @ -30,7 +30,7 @@ | ||||
| 
 | ||||
| /*
 | ||||
|  * This tests the vdev_disk page alignment check callback | ||||
|  * vdev_disk_check_pages_cb(). For now, this test includes a copy of that | ||||
|  * vdev_disk_check_alignment_cb(). For now, this test includes a copy of that | ||||
|  * function from module/os/linux/zfs/vdev_disk.c. If you change it here, | ||||
|  * remember to change it there too, and add tests data here to validate the | ||||
|  * change you're making. | ||||
| @ -38,36 +38,69 @@ | ||||
| 
 | ||||
| struct page; | ||||
| 
 | ||||
| /*
 | ||||
|  * This is spl_pagesize() in userspace, which requires linking libspl, but | ||||
|  * would also then use the platform page size, which isn't what we want for | ||||
|  * a test. To keep the check callback the same as the real one, we just | ||||
|  * redefine it. | ||||
|  */ | ||||
| #undef	PAGESIZE | ||||
| #define	PAGESIZE	(4096) | ||||
| 
 | ||||
| typedef struct { | ||||
| 	uint32_t  bmask; | ||||
| 	uint32_t  npages; | ||||
| 	uint32_t  end; | ||||
| } vdev_disk_check_pages_t; | ||||
| 	size_t	blocksize; | ||||
| 	int	seen_first; | ||||
| 	int	seen_last; | ||||
| } vdev_disk_check_alignment_t; | ||||
| 
 | ||||
| static int | ||||
| vdev_disk_check_pages_cb(struct page *page, size_t off, size_t len, void *priv) | ||||
| vdev_disk_check_alignment_cb(struct page *page, size_t off, size_t len, | ||||
|     void *priv) | ||||
| { | ||||
| 	(void) page; | ||||
| 	vdev_disk_check_pages_t *s = priv; | ||||
| 	vdev_disk_check_alignment_t *s = priv; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If we didn't finish on a block size boundary last time, then there | ||||
| 	 * would be a gap if we tried to use this ABD as-is, so abort. | ||||
| 	 * The cardinal rule: a single on-disk block must never cross an | ||||
| 	 * physical (order-0) page boundary, as the kernel expects to be able | ||||
| 	 * to split at both LBS and page boundaries. | ||||
| 	 * | ||||
| 	 * This implies various alignment rules for the blocks in this | ||||
| 	 * (possibly compound) page, which we can check for. | ||||
| 	 */ | ||||
| 	if (s->end != 0) | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If the previous page did not end on a page boundary, then we | ||||
| 	 * can't proceed without creating a hole. | ||||
| 	 */ | ||||
| 	if (s->seen_last) | ||||
| 		return (1); | ||||
| 
 | ||||
| 	/* This page must contain only whole LBS-sized blocks. */ | ||||
| 	if (!IS_P2ALIGNED(len, s->blocksize)) | ||||
| 		return (1); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Note if we're taking less than a full block, so we can check it | ||||
| 	 * above on the next call. | ||||
| 	 * If this is not the first page in the ABD, then the data must start | ||||
| 	 * on a page-aligned boundary (so the kernel can split on page | ||||
| 	 * boundaries without having to deal with a hole). If it is, then | ||||
| 	 * it can start on LBS-alignment. | ||||
| 	 */ | ||||
| 	s->end = (off+len) & s->bmask; | ||||
| 	if (s->seen_first) { | ||||
| 		if (!IS_P2ALIGNED(off, PAGESIZE)) | ||||
| 			return (1); | ||||
| 	} else { | ||||
| 		if (!IS_P2ALIGNED(off, s->blocksize)) | ||||
| 			return (1); | ||||
| 		s->seen_first = 1; | ||||
| 	} | ||||
| 
 | ||||
| 	/* All blocks after the first must start on a block size boundary. */ | ||||
| 	if (s->npages != 0 && (off & s->bmask) != 0) | ||||
| 		return (1); | ||||
| 	/*
 | ||||
| 	 * If this data does not end on a page-aligned boundary, then this | ||||
| 	 * must be the last page in the ABD, for the same reason. | ||||
| 	 */ | ||||
| 	s->seen_last = !IS_P2ALIGNED(off+len, PAGESIZE); | ||||
| 
 | ||||
| 	s->npages++; | ||||
| 	return (0); | ||||
| } | ||||
| 
 | ||||
| @ -75,8 +108,8 @@ typedef struct { | ||||
| 	/* test name */ | ||||
| 	const char	*name; | ||||
| 
 | ||||
| 	/* blocks size mask */ | ||||
| 	uint32_t	mask; | ||||
| 	/* stored block size */ | ||||
| 	uint32_t	blocksize; | ||||
| 
 | ||||
| 	/* amount of data to take */ | ||||
| 	size_t		size; | ||||
| @ -89,39 +122,39 @@ static const page_test_t valid_tests[] = { | ||||
| 	/* 512B block tests */ | ||||
| 	{ | ||||
| 		"512B blocks, 4K single page", | ||||
| 		0x1ff, 0x1000, { | ||||
| 		512, 0x1000, { | ||||
| 			{ 0x0, 0x1000 }, | ||||
| 		}, | ||||
| 	}, { | ||||
| 		"512B blocks, 1K at start of page", | ||||
| 		0x1ff, 0x400, { | ||||
| 		512, 0x400, { | ||||
| 			{ 0x0, 0x1000 }, | ||||
| 		}, | ||||
| 	}, { | ||||
| 		"512B blocks, 1K at end of page", | ||||
| 		0x1ff, 0x400, { | ||||
| 		512, 0x400, { | ||||
| 			{ 0x0c00, 0x0400 }, | ||||
| 		}, | ||||
| 	}, { | ||||
| 		"512B blocks, 1K within page, 512B start offset", | ||||
| 		0x1ff, 0x400, { | ||||
| 		512, 0x400, { | ||||
| 			{ 0x0200, 0x0e00 }, | ||||
| 		}, | ||||
| 	}, { | ||||
| 		"512B blocks, 8K across 2x4K pages", | ||||
| 		0x1ff, 0x2000, { | ||||
| 		512, 0x2000, { | ||||
| 			{ 0x0, 0x1000 }, | ||||
| 			{ 0x0, 0x1000 }, | ||||
| 		}, | ||||
| 	}, { | ||||
| 		"512B blocks, 4K across two pages, 2K start offset", | ||||
| 		0x1ff, 0x1000, { | ||||
| 		512, 0x1000, { | ||||
| 			{ 0x0800, 0x0800 }, | ||||
| 			{ 0x0,    0x0800 }, | ||||
| 		}, | ||||
| 	}, { | ||||
| 		"512B blocks, 16K across 5x4K pages, 512B start offset", | ||||
| 		0x1ff, 0x4000, { | ||||
| 		512, 0x4000, { | ||||
| 			{ 0x0200, 0x0e00 }, | ||||
| 			{ 0x0,    0x1000 }, | ||||
| 			{ 0x0,    0x1000 }, | ||||
| @ -130,7 +163,7 @@ static const page_test_t valid_tests[] = { | ||||
| 		}, | ||||
| 	}, { | ||||
| 		"512B blocks, 64K data, 8x8K compound pages", | ||||
| 		0x1ff, 0x10000, { | ||||
| 		512, 0x10000, { | ||||
| 			{ 0x0, 0x2000 }, | ||||
| 			{ 0x0, 0x2000 }, | ||||
| 			{ 0x0, 0x2000 }, | ||||
| @ -142,7 +175,7 @@ static const page_test_t valid_tests[] = { | ||||
| 		}, | ||||
| 	}, { | ||||
| 		"512B blocks, 64K data, 9x8K compound pages, 512B start offset", | ||||
| 		0x1ff, 0x10000, { | ||||
| 		512, 0x10000, { | ||||
| 			{ 0x0200, 0x1e00 }, | ||||
| 			{ 0x0,    0x2000 }, | ||||
| 			{ 0x0,    0x2000 }, | ||||
| @ -155,7 +188,7 @@ static const page_test_t valid_tests[] = { | ||||
| 		}, | ||||
| 	}, { | ||||
| 		"512B blocks, 64K data, 2x16K compound pages, 8x4K pages", | ||||
| 		0x1ff, 0x10000, { | ||||
| 		512, 0x10000, { | ||||
| 			{ 0x0, 0x8000 }, | ||||
| 			{ 0x0, 0x8000 }, | ||||
| 			{ 0x0, 0x1000 }, | ||||
| @ -169,7 +202,7 @@ static const page_test_t valid_tests[] = { | ||||
| 		}, | ||||
| 	}, { | ||||
| 		"512B blocks, 64K data, mixed 4K/8K/16K pages", | ||||
| 		0x1ff, 0x10000, { | ||||
| 		512, 0x10000, { | ||||
| 			{ 0x0, 0x1000 }, | ||||
| 			{ 0x0, 0x2000 }, | ||||
| 			{ 0x0, 0x1000 }, | ||||
| @ -183,7 +216,7 @@ static const page_test_t valid_tests[] = { | ||||
| 		}, | ||||
| 	}, { | ||||
| 		"512B blocks, 64K data, mixed 4K/8K/16K pages, 1K start offset", | ||||
| 		0x1ff, 0x10000, { | ||||
| 		512, 0x10000, { | ||||
| 			{ 0x0400, 0x0c00 }, | ||||
| 			{ 0x0,    0x1000 }, | ||||
| 			{ 0x0,    0x1000 }, | ||||
| @ -200,48 +233,18 @@ static const page_test_t valid_tests[] = { | ||||
| 	/* 4K block tests */ | ||||
| 	{ | ||||
| 		"4K blocks, 4K single page", | ||||
| 		0xfff, 0x1000, { | ||||
| 		4096, 0x1000, { | ||||
| 			{ 0x0, 0x1000 }, | ||||
| 		}, | ||||
| 	}, { | ||||
| 		"4K blocks, 1K at start of page", | ||||
| 		0xfff, 0x400, { | ||||
| 			{ 0x0, 0x1000 }, | ||||
| 		}, | ||||
| 	}, { | ||||
| 		"4K blocks, 1K at end of page", | ||||
| 		0xfff, 0x400, { | ||||
| 			{ 0x0c00, 0x0400 }, | ||||
| 		}, | ||||
| 	}, { | ||||
| 		"4K blocks, 1K within page, 512B start offset", | ||||
| 		0xfff, 0x400, { | ||||
| 			{ 0x0200, 0x0e00 }, | ||||
| 		}, | ||||
| 	}, { | ||||
| 		"4K blocks, 8K across 2x4K pages", | ||||
| 		0xfff, 0x2000, { | ||||
| 		4096, 0x2000, { | ||||
| 			{ 0x0, 0x1000 }, | ||||
| 			{ 0x0, 0x1000 }, | ||||
| 		}, | ||||
| 	}, { | ||||
| 		"4K blocks, 4K across two pages, 2K start offset", | ||||
| 		0xfff, 0x1000, { | ||||
| 			{ 0x0800, 0x0800 }, | ||||
| 			{ 0x0,    0x0800 }, | ||||
| 		}, | ||||
| 	}, { | ||||
| 		"4K blocks, 16K across 5x4K pages, 512B start offset", | ||||
| 		0xfff, 0x4000, { | ||||
| 			{ 0x0200, 0x0e00 }, | ||||
| 			{ 0x0,    0x1000 }, | ||||
| 			{ 0x0,    0x1000 }, | ||||
| 			{ 0x0,    0x1000 }, | ||||
| 			{ 0x0,    0x0200 }, | ||||
| 		}, | ||||
| 	}, { | ||||
| 		"4K blocks, 64K data, 8x8K compound pages", | ||||
| 		0xfff, 0x10000, { | ||||
| 		4096, 0x10000, { | ||||
| 			{ 0x0, 0x2000 }, | ||||
| 			{ 0x0, 0x2000 }, | ||||
| 			{ 0x0, 0x2000 }, | ||||
| @ -251,22 +254,9 @@ static const page_test_t valid_tests[] = { | ||||
| 			{ 0x0, 0x2000 }, | ||||
| 			{ 0x0, 0x2000 }, | ||||
| 		}, | ||||
| 	}, { | ||||
| 		"4K blocks, 64K data, 9x8K compound pages, 512B start offset", | ||||
| 		0xfff, 0x10000, { | ||||
| 			{ 0x0200, 0x1e00 }, | ||||
| 			{ 0x0,    0x2000 }, | ||||
| 			{ 0x0,    0x2000 }, | ||||
| 			{ 0x0,    0x2000 }, | ||||
| 			{ 0x0,    0x2000 }, | ||||
| 			{ 0x0,    0x2000 }, | ||||
| 			{ 0x0,    0x2000 }, | ||||
| 			{ 0x0,    0x2000 }, | ||||
| 			{ 0x0,    0x0200 }, | ||||
| 		}, | ||||
| 	}, { | ||||
| 		"4K blocks, 64K data, 2x16K compound pages, 8x4K pages", | ||||
| 		0xfff, 0x10000, { | ||||
| 		4096, 0x10000, { | ||||
| 			{ 0x0, 0x8000 }, | ||||
| 			{ 0x0, 0x8000 }, | ||||
| 			{ 0x0, 0x1000 }, | ||||
| @ -280,7 +270,7 @@ static const page_test_t valid_tests[] = { | ||||
| 		}, | ||||
| 	}, { | ||||
| 		"4K blocks, 64K data, mixed 4K/8K/16K pages", | ||||
| 		0xfff, 0x10000, { | ||||
| 		4096, 0x10000, { | ||||
| 			{ 0x0, 0x1000 }, | ||||
| 			{ 0x0, 0x2000 }, | ||||
| 			{ 0x0, 0x1000 }, | ||||
| @ -292,9 +282,78 @@ static const page_test_t valid_tests[] = { | ||||
| 			{ 0x0, 0x1000 }, | ||||
| 			{ 0x0, 0x2000 }, | ||||
| 		}, | ||||
| 	}, | ||||
| 
 | ||||
| 	{ 0 }, | ||||
| }; | ||||
| 
 | ||||
| static const page_test_t invalid_tests[] = { | ||||
| 	/*
 | ||||
| 	 * Gang tests. Composed of lots of smaller allocations, rarely properly | ||||
| 	 * aligned. | ||||
| 	 */ | ||||
| 	{ | ||||
| 		"512B blocks, 16K data, 512 leader (gang block simulation)", | ||||
| 		512, 0x8000, { | ||||
| 			{ 0x0, 0x0200 }, | ||||
| 			{ 0x0, 0x1000 }, | ||||
| 			{ 0x0, 0x1000 }, | ||||
| 			{ 0x0, 0x1000 }, | ||||
| 			{ 0x0, 0x0c00 }, | ||||
| 		}, | ||||
| 	}, { | ||||
| 		"4K blocks, 32K data, 2 incompatible spans " | ||||
| 		"(gang abd simulation)", | ||||
| 		4096, 0x8000, { | ||||
| 			{ 0x0800, 0x0800 }, | ||||
| 			{ 0x0,    0x1000 }, | ||||
| 			{ 0x0,    0x1000 }, | ||||
| 			{ 0x0,    0x1000 }, | ||||
| 			{ 0x0,    0x0800 }, | ||||
| 			{ 0x0800, 0x0800 }, | ||||
| 			{ 0x0,    0x1000 }, | ||||
| 			{ 0x0,    0x1000 }, | ||||
| 			{ 0x0,    0x1000 }, | ||||
| 			{ 0x0,    0x0800 }, | ||||
| 		}, | ||||
| 	}, | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Blocks must not span multiple physical pages. These tests used to | ||||
| 	 * be considered valid, but were since found to be invalid and were | ||||
| 	 * moved here. | ||||
| 	 */ | ||||
| 	{ | ||||
| 		"4K blocks, 4K across two pages, 2K start offset", | ||||
| 		4096, 0x1000, { | ||||
| 			{ 0x0800, 0x0800 }, | ||||
| 			{ 0x0,    0x0800 }, | ||||
| 		}, | ||||
| 	}, { | ||||
| 		"4K blocks, 16K across 5x4K pages, 512B start offset", | ||||
| 		4096, 0x4000, { | ||||
| 			{ 0x0200, 0x0e00 }, | ||||
| 			{ 0x0,    0x1000 }, | ||||
| 			{ 0x0,    0x1000 }, | ||||
| 			{ 0x0,    0x1000 }, | ||||
| 			{ 0x0,    0x0200 }, | ||||
| 		}, | ||||
| 	}, { | ||||
| 		"4K blocks, 64K data, 9x8K compound pages, 512B start offset", | ||||
| 		4096, 0x10000, { | ||||
| 			{ 0x0200, 0x1e00 }, | ||||
| 			{ 0x0,    0x2000 }, | ||||
| 			{ 0x0,    0x2000 }, | ||||
| 			{ 0x0,    0x2000 }, | ||||
| 			{ 0x0,    0x2000 }, | ||||
| 			{ 0x0,    0x2000 }, | ||||
| 			{ 0x0,    0x2000 }, | ||||
| 			{ 0x0,    0x2000 }, | ||||
| 			{ 0x0,    0x0200 }, | ||||
| 		}, | ||||
| 	}, { | ||||
| 		"4K blocks, 64K data, mixed 4K/8K/16K pages, 1K start offset", | ||||
| 		0xfff, 0x10000, { | ||||
| 		4096, 0x10000, { | ||||
| 			{ 0x0400, 0x0c00 }, | ||||
| 			{ 0x0,    0x1000 }, | ||||
| 			{ 0x0,    0x1000 }, | ||||
| @ -308,35 +367,40 @@ static const page_test_t valid_tests[] = { | ||||
| 		}, | ||||
| 	}, | ||||
| 
 | ||||
| 	{ 0 }, | ||||
| }; | ||||
| 
 | ||||
| static const page_test_t invalid_tests[] = { | ||||
| 	/*
 | ||||
| 	 * This is the very typical case of a 4K block being allocated from | ||||
| 	 * the middle of a mixed-used slab backed by a higher-order compound | ||||
| 	 * page. | ||||
| 	 */ | ||||
| 	{ | ||||
| 		"512B blocks, 16K data, 512 leader (gang block simulation)", | ||||
| 		0x1ff, 0x8000, { | ||||
| 			{ 0x0, 0x0200 }, | ||||
| 			{ 0x0, 0x1000 }, | ||||
| 			{ 0x0, 0x1000 }, | ||||
| 			{ 0x0, 0x1000 }, | ||||
| 			{ 0x0, 0x0c00 }, | ||||
| 		"4K blocks, 4K data from compound slab, 2K-align offset", | ||||
| 		4096, 0x1000, { | ||||
| 			{ 0x1800, 0x6800 } | ||||
| 		} | ||||
| 	}, | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Blocks smaller than LBS should never be possible, but used to be by | ||||
| 	 * accident (see GH#16990). We test for and reject them just to be | ||||
| 	 * sure. | ||||
| 	 */ | ||||
| 	{ | ||||
| 		"4K blocks, 1K at end of page", | ||||
| 		4096, 0x400, { | ||||
| 			{ 0x0c00, 0x0400 }, | ||||
| 		}, | ||||
| 	}, { | ||||
| 		"4K blocks, 32K data, 2 incompatible spans " | ||||
| 		"(gang abd simulation)", | ||||
| 		0xfff, 0x8000, { | ||||
| 			{ 0x0800, 0x0800 }, | ||||
| 			{ 0x0,    0x1000 }, | ||||
| 			{ 0x0,    0x1000 }, | ||||
| 			{ 0x0,    0x1000 }, | ||||
| 			{ 0x0,    0x0800 }, | ||||
| 			{ 0x0800, 0x0800 }, | ||||
| 			{ 0x0,    0x1000 }, | ||||
| 			{ 0x0,    0x1000 }, | ||||
| 			{ 0x0,    0x1000 }, | ||||
| 			{ 0x0,    0x0800 }, | ||||
| 		"4K blocks, 1K at start of page", | ||||
| 		4096, 0x400, { | ||||
| 			{ 0x0, 0x1000 }, | ||||
| 		}, | ||||
| 	}, { | ||||
| 		"4K blocks, 1K within page, 512B start offset", | ||||
| 		4096, 0x400, { | ||||
| 			{ 0x0200, 0x0e00 }, | ||||
| 		}, | ||||
| 	}, | ||||
| 
 | ||||
| 	{ 0 }, | ||||
| }; | ||||
| 
 | ||||
| @ -345,10 +409,8 @@ run_test(const page_test_t *test, bool verbose) | ||||
| { | ||||
| 	size_t rem = test->size; | ||||
| 
 | ||||
| 	vdev_disk_check_pages_t s = { | ||||
| 	    .bmask = 0xfff, | ||||
| 	    .npages = 0, | ||||
| 	    .end = 0, | ||||
| 	vdev_disk_check_alignment_t s = { | ||||
| 	    .blocksize = test->blocksize, | ||||
| 	}; | ||||
| 
 | ||||
| 	for (int i = 0; test->pages[i][1] > 0; i++) { | ||||
| @ -362,7 +424,7 @@ run_test(const page_test_t *test, bool verbose) | ||||
| 			    "rem %lx, take %lx\n", | ||||
| 			    i, off, len, rem, take); | ||||
| 
 | ||||
| 		if (vdev_disk_check_pages_cb(NULL, off, take, &s)) { | ||||
| 		if (vdev_disk_check_alignment_cb(NULL, off, take, &s)) { | ||||
| 			if (verbose) | ||||
| 				printf("  ABORT: misalignment detected, " | ||||
| 				    "rem %lx\n", rem); | ||||
| @ -389,7 +451,7 @@ run_test_set(const page_test_t *tests, bool want, int *ntests, int *npassed) | ||||
| 	for (const page_test_t *test = &tests[0]; test->name; test++) { | ||||
| 		bool pass = (run_test(test, false) == want); | ||||
| 		if (pass) { | ||||
| 			printf("%s: PASS\n", test->name); | ||||
| 			printf("%c %s: PASS\n", want ? '+' : '-', test->name); | ||||
| 			(*npassed)++; | ||||
| 		} else { | ||||
| 			printf("%s: FAIL [expected %s, got %s]\n", test->name, | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Rob Norris
						Rob Norris