mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson
synced 2025-08-27 06:50:37 +00:00
fs/dax: always remove DAX page-cache entries when breaking layouts
Prior to any truncation operations file systems call dax_break_mapping() to ensure pages in the range are not under going DMA. Later DAX page-cache entries will be removed by truncate_folio_batch_exceptionals() in the generic page-cache code. However this makes it possible for folios to be removed from the page-cache even though they are still DMA busy if the file-system hasn't called dax_break_mapping(). It also means they can never be waited on in future because FS DAX will lose track of them once the page-cache entry has been deleted. Instead it is better to delete the FS DAX entry when the file-system calls dax_break_mapping() as part of it's truncate operation. This ensures only idle pages can be removed from the FS DAX page-cache and makes it easy to detect if a file-system hasn't called dax_break_mapping() prior to a truncate operation. Link: https://lkml.kernel.org/r/3be6115eaaa8d28fee37fcba3287be4f226a7d24.1740713401.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple <apopple@nvidia.com> Reviewed-by: Dan Williams <dan.j.williams@intel.com> Tested-by: Alison Schofield <alison.schofield@intel.com> Cc: Alexander Gordeev <agordeev@linux.ibm.com> Cc: Asahi Lina <lina@asahilina.net> Cc: Balbir Singh <balbirs@nvidia.com> Cc: Bjorn Helgaas <bhelgaas@google.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christian Borntraeger <borntraeger@linux.ibm.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Chunyan Zhang <zhang.lyra@gmail.com> Cc: "Darrick J. Wong" <djwong@kernel.org> Cc: Dave Chinner <david@fromorbit.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Dave Jiang <dave.jiang@intel.com> Cc: David Hildenbrand <david@redhat.com> Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Huacai Chen <chenhuacai@kernel.org> Cc: Ira Weiny <ira.weiny@intel.com> Cc: Jan Kara <jack@suse.cz> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Jason Gunthorpe <jgg@ziepe.ca> Cc: John Hubbard <jhubbard@nvidia.com> Cc: linmiaohe <linmiaohe@huawei.com> Cc: Logan Gunthorpe <logang@deltatee.com> Cc: Matthew Wilcow (Oracle) <willy@infradead.org> Cc: Michael "Camp Drill Sergeant" Ellerman <mpe@ellerman.id.au> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: Peter Xu <peterx@redhat.com> Cc: Sven Schnelle <svens@linux.ibm.com> Cc: Ted Ts'o <tytso@mit.edu> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Vishal Verma <vishal.l.verma@intel.com> Cc: Vivek Goyal <vgoyal@redhat.com> Cc: WANG Xuerui <kernel@xen0n.name> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
d5b3afea22
commit
bde708f1a6
40
fs/dax.c
40
fs/dax.c
@ -846,6 +846,36 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void dax_delete_mapping_range(struct address_space *mapping,
|
||||||
|
loff_t start, loff_t end)
|
||||||
|
{
|
||||||
|
void *entry;
|
||||||
|
pgoff_t start_idx = start >> PAGE_SHIFT;
|
||||||
|
pgoff_t end_idx;
|
||||||
|
XA_STATE(xas, &mapping->i_pages, start_idx);
|
||||||
|
|
||||||
|
/* If end == LLONG_MAX, all pages from start to till end of file */
|
||||||
|
if (end == LLONG_MAX)
|
||||||
|
end_idx = ULONG_MAX;
|
||||||
|
else
|
||||||
|
end_idx = end >> PAGE_SHIFT;
|
||||||
|
|
||||||
|
xas_lock_irq(&xas);
|
||||||
|
xas_for_each(&xas, entry, end_idx) {
|
||||||
|
if (!xa_is_value(entry))
|
||||||
|
continue;
|
||||||
|
entry = wait_entry_unlocked_exclusive(&xas, entry);
|
||||||
|
if (!entry)
|
||||||
|
continue;
|
||||||
|
dax_disassociate_entry(entry, mapping, true);
|
||||||
|
xas_store(&xas, NULL);
|
||||||
|
mapping->nrpages -= 1UL << dax_entry_order(entry);
|
||||||
|
put_unlocked_entry(&xas, entry, WAKE_ALL);
|
||||||
|
}
|
||||||
|
xas_unlock_irq(&xas);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(dax_delete_mapping_range);
|
||||||
|
|
||||||
static int wait_page_idle(struct page *page,
|
static int wait_page_idle(struct page *page,
|
||||||
void (cb)(struct inode *),
|
void (cb)(struct inode *),
|
||||||
struct inode *inode)
|
struct inode *inode)
|
||||||
@ -857,6 +887,9 @@ static int wait_page_idle(struct page *page,
|
|||||||
/*
|
/*
|
||||||
* Unmaps the inode and waits for any DMA to complete prior to deleting the
|
* Unmaps the inode and waits for any DMA to complete prior to deleting the
|
||||||
* DAX mapping entries for the range.
|
* DAX mapping entries for the range.
|
||||||
|
*
|
||||||
|
* For NOWAIT behavior, pass @cb as NULL to early-exit on first found
|
||||||
|
* busy page
|
||||||
*/
|
*/
|
||||||
int dax_break_layout(struct inode *inode, loff_t start, loff_t end,
|
int dax_break_layout(struct inode *inode, loff_t start, loff_t end,
|
||||||
void (cb)(struct inode *))
|
void (cb)(struct inode *))
|
||||||
@ -871,10 +904,17 @@ int dax_break_layout(struct inode *inode, loff_t start, loff_t end,
|
|||||||
page = dax_layout_busy_page_range(inode->i_mapping, start, end);
|
page = dax_layout_busy_page_range(inode->i_mapping, start, end);
|
||||||
if (!page)
|
if (!page)
|
||||||
break;
|
break;
|
||||||
|
if (!cb) {
|
||||||
|
error = -ERESTARTSYS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
error = wait_page_idle(page, cb, inode);
|
error = wait_page_idle(page, cb, inode);
|
||||||
} while (error == 0);
|
} while (error == 0);
|
||||||
|
|
||||||
|
if (!page)
|
||||||
|
dax_delete_mapping_range(inode->i_mapping, start, end);
|
||||||
|
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(dax_break_layout);
|
EXPORT_SYMBOL_GPL(dax_break_layout);
|
||||||
|
@ -2735,7 +2735,6 @@ xfs_mmaplock_two_inodes_and_break_dax_layout(
|
|||||||
struct xfs_inode *ip2)
|
struct xfs_inode *ip2)
|
||||||
{
|
{
|
||||||
int error;
|
int error;
|
||||||
struct page *page;
|
|
||||||
|
|
||||||
if (ip1->i_ino > ip2->i_ino)
|
if (ip1->i_ino > ip2->i_ino)
|
||||||
swap(ip1, ip2);
|
swap(ip1, ip2);
|
||||||
@ -2759,8 +2758,8 @@ xfs_mmaplock_two_inodes_and_break_dax_layout(
|
|||||||
* need to unlock & lock the XFS_MMAPLOCK_EXCL which is not suitable
|
* need to unlock & lock the XFS_MMAPLOCK_EXCL which is not suitable
|
||||||
* for this nested lock case.
|
* for this nested lock case.
|
||||||
*/
|
*/
|
||||||
page = dax_layout_busy_page(VFS_I(ip2)->i_mapping);
|
error = dax_break_layout(VFS_I(ip2), 0, -1, NULL);
|
||||||
if (!dax_page_is_idle(page)) {
|
if (error) {
|
||||||
xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL);
|
xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL);
|
||||||
xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL);
|
xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL);
|
||||||
goto again;
|
goto again;
|
||||||
|
@ -255,6 +255,8 @@ vm_fault_t dax_iomap_fault(struct vm_fault *vmf, unsigned int order,
|
|||||||
vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
|
vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
|
||||||
unsigned int order, pfn_t pfn);
|
unsigned int order, pfn_t pfn);
|
||||||
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
|
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
|
||||||
|
void dax_delete_mapping_range(struct address_space *mapping,
|
||||||
|
loff_t start, loff_t end);
|
||||||
int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
|
int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
|
||||||
pgoff_t index);
|
pgoff_t index);
|
||||||
int __must_check dax_break_layout(struct inode *inode, loff_t start,
|
int __must_check dax_break_layout(struct inode *inode, loff_t start,
|
||||||
|
@ -78,8 +78,22 @@ static void truncate_folio_batch_exceptionals(struct address_space *mapping,
|
|||||||
|
|
||||||
if (dax_mapping(mapping)) {
|
if (dax_mapping(mapping)) {
|
||||||
for (i = j; i < nr; i++) {
|
for (i = j; i < nr; i++) {
|
||||||
if (xa_is_value(fbatch->folios[i]))
|
if (xa_is_value(fbatch->folios[i])) {
|
||||||
|
/*
|
||||||
|
* File systems should already have called
|
||||||
|
* dax_break_layout_entry() to remove all DAX
|
||||||
|
* entries while holding a lock to prevent
|
||||||
|
* establishing new entries. Therefore we
|
||||||
|
* shouldn't find any here.
|
||||||
|
*/
|
||||||
|
WARN_ON_ONCE(1);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Delete the mapping so truncate_pagecache()
|
||||||
|
* doesn't loop forever.
|
||||||
|
*/
|
||||||
dax_delete_mapping_entry(mapping, indices[i]);
|
dax_delete_mapping_entry(mapping, indices[i]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user