linux/fs/xfs/scrub/rtbitmap_repair.c
Darrick J. Wong 6470ceef32 xfs: check new rtbitmap records against rt refcount btree
When we're rebuilding the realtime bitmap, check the proposed free
extents against the rt refcount btree to make sure we don't commit any
grievous errors.

Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
2024-12-23 13:06:16 -08:00

636 lines
15 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2020-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_btree.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_rtalloc.h"
#include "xfs_inode.h"
#include "xfs_bit.h"
#include "xfs_bmap.h"
#include "xfs_bmap_btree.h"
#include "xfs_rmap.h"
#include "xfs_rtrmap_btree.h"
#include "xfs_exchmaps.h"
#include "xfs_rtbitmap.h"
#include "xfs_rtgroup.h"
#include "xfs_extent_busy.h"
#include "xfs_refcount.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
#include "scrub/repair.h"
#include "scrub/xfile.h"
#include "scrub/tempfile.h"
#include "scrub/tempexch.h"
#include "scrub/reap.h"
#include "scrub/rtbitmap.h"
/* rt bitmap content repairs */
/* Set up to repair the realtime bitmap for this group. */
int
xrep_setup_rtbitmap(
struct xfs_scrub *sc,
struct xchk_rtbitmap *rtb)
{
struct xfs_mount *mp = sc->mp;
char *descr;
unsigned long long blocks = mp->m_sb.sb_rbmblocks;
int error;
error = xrep_tempfile_create(sc, S_IFREG);
if (error)
return error;
/* Create an xfile to hold our reconstructed bitmap. */
descr = xchk_xfile_rtgroup_descr(sc, "bitmap file");
error = xfile_create(descr, blocks * mp->m_sb.sb_blocksize, &sc->xfile);
kfree(descr);
if (error)
return error;
/*
* Reserve enough blocks to write out a completely new bitmap file,
* plus twice as many blocks as we would need if we can only allocate
* one block per data fork mapping. This should cover the
* preallocation of the temporary file and exchanging the extent
* mappings.
*
* We cannot use xfs_exchmaps_estimate because we have not yet
* constructed the replacement bitmap and therefore do not know how
* many extents it will use. By the time we do, we will have a dirty
* transaction (which we cannot drop because we cannot drop the
* rtbitmap ILOCK) and cannot ask for more reservation.
*/
blocks += xfs_bmbt_calc_size(mp, blocks) * 2;
if (blocks > UINT_MAX)
return -EOPNOTSUPP;
rtb->resblks += blocks;
return 0;
}
static inline xrep_wordoff_t
rtx_to_wordoff(
struct xfs_mount *mp,
xfs_rtxnum_t rtx)
{
return rtx >> XFS_NBWORDLOG;
}
static inline xrep_wordcnt_t
rtxlen_to_wordcnt(
xfs_rtxlen_t rtxlen)
{
return rtxlen >> XFS_NBWORDLOG;
}
/* Helper functions to record rtwords in an xfile. */
static inline int
xfbmp_load(
struct xchk_rtbitmap *rtb,
xrep_wordoff_t wordoff,
xfs_rtword_t *word)
{
union xfs_rtword_raw urk;
int error;
ASSERT(xfs_has_rtgroups(rtb->sc->mp));
error = xfile_load(rtb->sc->xfile, &urk,
sizeof(union xfs_rtword_raw),
wordoff << XFS_WORDLOG);
if (error)
return error;
*word = be32_to_cpu(urk.rtg);
return 0;
}
static inline int
xfbmp_store(
struct xchk_rtbitmap *rtb,
xrep_wordoff_t wordoff,
const xfs_rtword_t word)
{
union xfs_rtword_raw urk;
ASSERT(xfs_has_rtgroups(rtb->sc->mp));
urk.rtg = cpu_to_be32(word);
return xfile_store(rtb->sc->xfile, &urk,
sizeof(union xfs_rtword_raw),
wordoff << XFS_WORDLOG);
}
static inline int
xfbmp_copyin(
struct xchk_rtbitmap *rtb,
xrep_wordoff_t wordoff,
const union xfs_rtword_raw *word,
xrep_wordcnt_t nr_words)
{
return xfile_store(rtb->sc->xfile, word, nr_words << XFS_WORDLOG,
wordoff << XFS_WORDLOG);
}
static inline int
xfbmp_copyout(
struct xchk_rtbitmap *rtb,
xrep_wordoff_t wordoff,
union xfs_rtword_raw *word,
xrep_wordcnt_t nr_words)
{
return xfile_load(rtb->sc->xfile, word, nr_words << XFS_WORDLOG,
wordoff << XFS_WORDLOG);
}
/* Perform a logical OR operation on an rtword in the incore bitmap. */
static int
xrep_rtbitmap_or(
struct xchk_rtbitmap *rtb,
xrep_wordoff_t wordoff,
xfs_rtword_t mask)
{
xfs_rtword_t word;
int error;
error = xfbmp_load(rtb, wordoff, &word);
if (error)
return error;
trace_xrep_rtbitmap_or(rtb->sc->mp, wordoff, mask, word);
return xfbmp_store(rtb, wordoff, word | mask);
}
/*
* Mark as free every rt extent between the next rt block we expected to see
* in the rtrmap records and the given rt block.
*/
STATIC int
xrep_rtbitmap_mark_free(
struct xchk_rtbitmap *rtb,
xfs_rgblock_t rgbno)
{
struct xfs_mount *mp = rtb->sc->mp;
struct xchk_rt *sr = &rtb->sc->sr;
struct xfs_rtgroup *rtg = sr->rtg;
xfs_rtxnum_t startrtx;
xfs_rtxnum_t nextrtx;
xrep_wordoff_t wordoff, nextwordoff;
unsigned int bit;
unsigned int bufwsize;
xfs_extlen_t mod;
xfs_rtword_t mask;
enum xbtree_recpacking outcome;
int error;
if (!xfs_verify_rgbext(rtg, rtb->next_rgbno, rgbno - rtb->next_rgbno))
return -EFSCORRUPTED;
/*
* Convert rt blocks to rt extents The block range we find must be
* aligned to an rtextent boundary on both ends.
*/
startrtx = xfs_rgbno_to_rtx(mp, rtb->next_rgbno);
mod = xfs_rgbno_to_rtxoff(mp, rtb->next_rgbno);
if (mod)
return -EFSCORRUPTED;
nextrtx = xfs_rgbno_to_rtx(mp, rgbno - 1) + 1;
mod = xfs_rgbno_to_rtxoff(mp, rgbno - 1);
if (mod != mp->m_sb.sb_rextsize - 1)
return -EFSCORRUPTED;
/* Must not be shared or CoW staging. */
if (sr->refc_cur) {
error = xfs_refcount_has_records(sr->refc_cur,
XFS_REFC_DOMAIN_SHARED, rtb->next_rgbno,
rgbno - rtb->next_rgbno, &outcome);
if (error)
return error;
if (outcome != XBTREE_RECPACKING_EMPTY)
return -EFSCORRUPTED;
error = xfs_refcount_has_records(sr->refc_cur,
XFS_REFC_DOMAIN_COW, rtb->next_rgbno,
rgbno - rtb->next_rgbno, &outcome);
if (error)
return error;
if (outcome != XBTREE_RECPACKING_EMPTY)
return -EFSCORRUPTED;
}
trace_xrep_rtbitmap_record_free(mp, startrtx, nextrtx - 1);
/* Set bits as needed to round startrtx up to the nearest word. */
bit = startrtx & XREP_RTBMP_WORDMASK;
if (bit) {
xfs_rtblock_t len = nextrtx - startrtx;
unsigned int lastbit;
lastbit = min(bit + len, XFS_NBWORD);
mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit;
error = xrep_rtbitmap_or(rtb, rtx_to_wordoff(mp, startrtx),
mask);
if (error || lastbit - bit == len)
return error;
startrtx += XFS_NBWORD - bit;
}
/* Set bits as needed to round nextrtx down to the nearest word. */
bit = nextrtx & XREP_RTBMP_WORDMASK;
if (bit) {
mask = ((xfs_rtword_t)1 << bit) - 1;
error = xrep_rtbitmap_or(rtb, rtx_to_wordoff(mp, nextrtx),
mask);
if (error || startrtx + bit == nextrtx)
return error;
nextrtx -= bit;
}
trace_xrep_rtbitmap_record_free_bulk(mp, startrtx, nextrtx - 1);
/* Set all the words in between, up to a whole fs block at once. */
wordoff = rtx_to_wordoff(mp, startrtx);
nextwordoff = rtx_to_wordoff(mp, nextrtx);
bufwsize = mp->m_sb.sb_blocksize >> XFS_WORDLOG;
while (wordoff < nextwordoff) {
xrep_wordoff_t rem;
xrep_wordcnt_t wordcnt;
wordcnt = min_t(xrep_wordcnt_t, nextwordoff - wordoff,
bufwsize);
/*
* Try to keep us aligned to the rtwords buffer to reduce the
* number of xfile writes.
*/
rem = wordoff & (bufwsize - 1);
if (rem)
wordcnt = min_t(xrep_wordcnt_t, wordcnt,
bufwsize - rem);
error = xfbmp_copyin(rtb, wordoff, rtb->words, wordcnt);
if (error)
return error;
wordoff += wordcnt;
}
return 0;
}
/* Set free space in the rtbitmap based on rtrmapbt records. */
STATIC int
xrep_rtbitmap_walk_rtrmap(
struct xfs_btree_cur *cur,
const struct xfs_rmap_irec *rec,
void *priv)
{
struct xchk_rtbitmap *rtb = priv;
int error = 0;
if (xchk_should_terminate(rtb->sc, &error))
return error;
if (rtb->next_rgbno < rec->rm_startblock) {
error = xrep_rtbitmap_mark_free(rtb, rec->rm_startblock);
if (error)
return error;
}
rtb->next_rgbno = max(rtb->next_rgbno,
rec->rm_startblock + rec->rm_blockcount);
return 0;
}
/*
* Walk the rtrmapbt to find all the gaps between records, and mark the gaps
* in the realtime bitmap that we're computing.
*/
STATIC int
xrep_rtbitmap_find_freespace(
struct xchk_rtbitmap *rtb)
{
struct xfs_scrub *sc = rtb->sc;
struct xfs_mount *mp = sc->mp;
struct xfs_rtgroup *rtg = sc->sr.rtg;
uint64_t blockcount;
int error;
/* Prepare a buffer of ones so that we can accelerate bulk setting. */
memset(rtb->words, 0xFF, mp->m_sb.sb_blocksize);
xrep_rtgroup_btcur_init(sc, &sc->sr);
error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_rtbitmap_walk_rtrmap,
rtb);
if (error)
goto out;
/*
* Mark as free every possible rt extent from the last one we saw to
* the end of the rt group.
*/
blockcount = rtg->rtg_extents * mp->m_sb.sb_rextsize;
if (rtb->next_rgbno < blockcount) {
error = xrep_rtbitmap_mark_free(rtb, blockcount);
if (error)
goto out;
}
out:
xchk_rtgroup_btcur_free(&sc->sr);
return error;
}
static int
xrep_rtbitmap_prep_buf(
struct xfs_scrub *sc,
struct xfs_buf *bp,
void *data)
{
struct xchk_rtbitmap *rtb = data;
struct xfs_mount *mp = sc->mp;
union xfs_rtword_raw *ondisk;
int error;
rtb->args.mp = sc->mp;
rtb->args.tp = sc->tp;
rtb->args.rbmbp = bp;
ondisk = xfs_rbmblock_wordptr(&rtb->args, 0);
rtb->args.rbmbp = NULL;
error = xfbmp_copyout(rtb, rtb->prep_wordoff, ondisk,
mp->m_blockwsize);
if (error)
return error;
if (xfs_has_rtgroups(sc->mp)) {
struct xfs_rtbuf_blkinfo *hdr = bp->b_addr;
hdr->rt_magic = cpu_to_be32(XFS_RTBITMAP_MAGIC);
hdr->rt_owner = cpu_to_be64(sc->ip->i_ino);
hdr->rt_blkno = cpu_to_be64(xfs_buf_daddr(bp));
hdr->rt_lsn = 0;
uuid_copy(&hdr->rt_uuid, &sc->mp->m_sb.sb_meta_uuid);
bp->b_ops = &xfs_rtbitmap_buf_ops;
} else {
bp->b_ops = &xfs_rtbuf_ops;
}
rtb->prep_wordoff += mp->m_blockwsize;
xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_RTBITMAP_BUF);
return 0;
}
/*
* Make sure that the given range of the data fork of the realtime file is
* mapped to written blocks. The caller must ensure that the inode is joined
* to the transaction.
*/
STATIC int
xrep_rtbitmap_data_mappings(
struct xfs_scrub *sc,
xfs_filblks_t len)
{
struct xfs_bmbt_irec map;
xfs_fileoff_t off = 0;
int error;
ASSERT(sc->ip != NULL);
while (off < len) {
int nmaps = 1;
/*
* If we have a real extent mapping this block then we're
* in ok shape.
*/
error = xfs_bmapi_read(sc->ip, off, len - off, &map, &nmaps,
XFS_DATA_FORK);
if (error)
return error;
if (nmaps == 0) {
ASSERT(nmaps != 0);
return -EFSCORRUPTED;
}
/*
* Written extents are ok. Holes are not filled because we
* do not know the freespace information.
*/
if (xfs_bmap_is_written_extent(&map) ||
map.br_startblock == HOLESTARTBLOCK) {
off = map.br_startoff + map.br_blockcount;
continue;
}
/*
* If we find a delalloc reservation then something is very
* very wrong. Bail out.
*/
if (map.br_startblock == DELAYSTARTBLOCK)
return -EFSCORRUPTED;
/* Make sure we're really converting an unwritten extent. */
if (map.br_state != XFS_EXT_UNWRITTEN) {
ASSERT(map.br_state == XFS_EXT_UNWRITTEN);
return -EFSCORRUPTED;
}
/* Make sure this block has a real zeroed extent mapped. */
nmaps = 1;
error = xfs_bmapi_write(sc->tp, sc->ip, map.br_startoff,
map.br_blockcount,
XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO,
0, &map, &nmaps);
if (error)
return error;
/* Commit new extent and all deferred work. */
error = xrep_defer_finish(sc);
if (error)
return error;
off = map.br_startoff + map.br_blockcount;
}
return 0;
}
/* Fix broken rt volume geometry. */
STATIC int
xrep_rtbitmap_geometry(
struct xfs_scrub *sc,
struct xchk_rtbitmap *rtb)
{
struct xfs_mount *mp = sc->mp;
struct xfs_trans *tp = sc->tp;
/* Superblock fields */
if (mp->m_sb.sb_rextents != rtb->rextents)
xfs_trans_mod_sb(sc->tp, XFS_TRANS_SB_REXTENTS,
rtb->rextents - mp->m_sb.sb_rextents);
if (mp->m_sb.sb_rbmblocks != rtb->rbmblocks)
xfs_trans_mod_sb(tp, XFS_TRANS_SB_RBMBLOCKS,
rtb->rbmblocks - mp->m_sb.sb_rbmblocks);
if (mp->m_sb.sb_rextslog != rtb->rextslog)
xfs_trans_mod_sb(tp, XFS_TRANS_SB_REXTSLOG,
rtb->rextslog - mp->m_sb.sb_rextslog);
/* Fix broken isize */
sc->ip->i_disk_size = roundup_64(sc->ip->i_disk_size,
mp->m_sb.sb_blocksize);
if (sc->ip->i_disk_size < XFS_FSB_TO_B(mp, rtb->rbmblocks))
sc->ip->i_disk_size = XFS_FSB_TO_B(mp, rtb->rbmblocks);
xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
return xrep_roll_trans(sc);
}
/* Repair the realtime bitmap file metadata. */
int
xrep_rtbitmap(
struct xfs_scrub *sc)
{
struct xchk_rtbitmap *rtb = sc->buf;
struct xfs_mount *mp = sc->mp;
struct xfs_group *xg = rtg_group(sc->sr.rtg);
unsigned long long blocks = 0;
unsigned int busy_gen;
int error;
/* We require the realtime rmapbt to rebuild anything. */
if (!xfs_has_rtrmapbt(sc->mp))
return -EOPNOTSUPP;
/* We require atomic file exchange range to rebuild anything. */
if (!xfs_has_exchange_range(sc->mp))
return -EOPNOTSUPP;
/* Impossibly large rtbitmap means we can't touch the filesystem. */
if (rtb->rbmblocks > U32_MAX)
return 0;
/*
* If the size of the rt bitmap file is larger than what we reserved,
* figure out if we need to adjust the block reservation in the
* transaction.
*/
blocks = xfs_bmbt_calc_size(mp, rtb->rbmblocks);
if (blocks > UINT_MAX)
return -EOPNOTSUPP;
if (blocks > rtb->resblks) {
error = xfs_trans_reserve_more(sc->tp, blocks, 0);
if (error)
return error;
rtb->resblks += blocks;
}
/* Fix inode core and forks. */
error = xrep_metadata_inode_forks(sc);
if (error)
return error;
xfs_trans_ijoin(sc->tp, sc->ip, 0);
/* Ensure no unwritten extents. */
error = xrep_rtbitmap_data_mappings(sc, rtb->rbmblocks);
if (error)
return error;
/*
* Fix inconsistent bitmap geometry. This function returns with a
* clean scrub transaction.
*/
error = xrep_rtbitmap_geometry(sc, rtb);
if (error)
return error;
/*
* Make sure the busy extent list is clear because we can't put extents
* on there twice.
*/
if (!xfs_extent_busy_list_empty(xg, &busy_gen)) {
error = xfs_extent_busy_flush(sc->tp, xg, busy_gen, 0);
if (error)
return error;
}
/*
* Generate the new rtbitmap data. We don't need the rtbmp information
* once this call is finished.
*/
error = xrep_rtbitmap_find_freespace(rtb);
if (error)
return error;
/*
* Try to take ILOCK_EXCL of the temporary file. We had better be the
* only ones holding onto this inode, but we can't block while holding
* the rtbitmap file's ILOCK_EXCL.
*/
while (!xrep_tempfile_ilock_nowait(sc)) {
if (xchk_should_terminate(sc, &error))
return error;
delay(1);
}
/*
* Make sure we have space allocated for the part of the bitmap
* file that corresponds to this group. We already joined sc->ip.
*/
xfs_trans_ijoin(sc->tp, sc->tempip, 0);
error = xrep_tempfile_prealloc(sc, 0, rtb->rbmblocks);
if (error)
return error;
/* Last chance to abort before we start committing fixes. */
if (xchk_should_terminate(sc, &error))
return error;
/* Copy the bitmap file that we generated. */
error = xrep_tempfile_copyin(sc, 0, rtb->rbmblocks,
xrep_rtbitmap_prep_buf, rtb);
if (error)
return error;
error = xrep_tempfile_set_isize(sc,
XFS_FSB_TO_B(sc->mp, sc->mp->m_sb.sb_rbmblocks));
if (error)
return error;
/*
* Now exchange the data fork contents. We're done with the temporary
* buffer, so we can reuse it for the tempfile exchmaps information.
*/
error = xrep_tempexch_trans_reserve(sc, XFS_DATA_FORK, 0,
rtb->rbmblocks, &rtb->tempexch);
if (error)
return error;
error = xrep_tempexch_contents(sc, &rtb->tempexch);
if (error)
return error;
/* Free the old rtbitmap blocks if they're not in use. */
return xrep_reap_ifork(sc, sc->tempip, XFS_DATA_FORK);
}