mirror of
https://git.proxmox.com/git/mirror_ubuntu-kernels.git
synced 2025-11-08 23:31:31 +00:00
jbd2: kill t_handle_lock transaction spinlock
This patch kills t_handle_lock transaction spinlock completely from
jbd2.
To explain the reasoning, currently there were three sites at which
this spinlock was used.
1. jbd2_journal_wait_updates()
a. Based on careful code review it can be seen that, we don't need this
lock here. This is since we wait for any currently ongoing updates
based on a atomic variable t_updates. And we anyway don't take any
t_handle_lock while in stop_this_handle().
i.e.
write_lock(&journal->j_state_lock()
jbd2_journal_wait_updates() stop_this_handle()
while (atomic_read(txn->t_updates) { |
DEFINE_WAIT(wait); |
prepare_to_wait(); |
if (atomic_read(txn->t_updates) if (atomic_dec_and_test(txn->t_updates))
write_unlock(&journal->j_state_lock);
schedule(); wake_up()
write_lock(&journal->j_state_lock);
finish_wait();
}
txn->t_state = T_COMMIT
write_unlock(&journal->j_state_lock);
b. Also note that between atomic_inc(&txn->t_updates) in
start_this_handle() and jbd2_journal_wait_updates(), the
synchronization happens via read_lock(journal->j_state_lock) in
start_this_handle();
2. jbd2_journal_extend()
a. jbd2_journal_extend() is called with the handle of each process from
task_struct. So no lock required in updating member fields of handle_t
b. For member fields of h_transaction, all updates happens only via
atomic APIs (which is also within read_lock()).
So, no need of this transaction spinlock.
3. update_t_max_wait()
Based on Jan suggestion, this can be carefully removed using atomic
cmpxchg API.
Note that there can be several processes which are waiting for a new
transaction to be allocated and started. For doing this only one
process will succeed in taking write_lock() and allocating a new txn.
After that all of the process will be updating the t_max_wait (max
transaction wait time). This can be done via below method w/o taking
any locks using atomic cmpxchg.
For more details refer [1]
new = get_new_val();
old = READ_ONCE(ptr->max_val);
while (old < new)
old = cmpxchg(&ptr->max_val, old, new);
[1]: https://lwn.net/Articles/849237/
Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/d89e599658b4a1f3893a48c6feded200073037fc.1644992076.git.riteshh@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
This commit is contained in:
parent
cc16eecae6
commit
f7f497cb70
@ -107,7 +107,6 @@ static void jbd2_get_transaction(journal_t *journal,
|
|||||||
transaction->t_start_time = ktime_get();
|
transaction->t_start_time = ktime_get();
|
||||||
transaction->t_tid = journal->j_transaction_sequence++;
|
transaction->t_tid = journal->j_transaction_sequence++;
|
||||||
transaction->t_expires = jiffies + journal->j_commit_interval;
|
transaction->t_expires = jiffies + journal->j_commit_interval;
|
||||||
spin_lock_init(&transaction->t_handle_lock);
|
|
||||||
atomic_set(&transaction->t_updates, 0);
|
atomic_set(&transaction->t_updates, 0);
|
||||||
atomic_set(&transaction->t_outstanding_credits,
|
atomic_set(&transaction->t_outstanding_credits,
|
||||||
jbd2_descriptor_blocks_per_trans(journal) +
|
jbd2_descriptor_blocks_per_trans(journal) +
|
||||||
@ -139,24 +138,21 @@ static void jbd2_get_transaction(journal_t *journal,
|
|||||||
/*
|
/*
|
||||||
* Update transaction's maximum wait time, if debugging is enabled.
|
* Update transaction's maximum wait time, if debugging is enabled.
|
||||||
*
|
*
|
||||||
* In order for t_max_wait to be reliable, it must be protected by a
|
* t_max_wait is carefully updated here with use of atomic compare exchange.
|
||||||
* lock. But doing so will mean that start_this_handle() can not be
|
* Note that there could be multiplre threads trying to do this simultaneously
|
||||||
* run in parallel on SMP systems, which limits our scalability. So
|
* hence using cmpxchg to avoid any use of locks in this case.
|
||||||
* unless debugging is enabled, we no longer update t_max_wait, which
|
|
||||||
* means that maximum wait time reported by the jbd2_run_stats
|
|
||||||
* tracepoint will always be zero.
|
|
||||||
*/
|
*/
|
||||||
static inline void update_t_max_wait(transaction_t *transaction,
|
static inline void update_t_max_wait(transaction_t *transaction,
|
||||||
unsigned long ts)
|
unsigned long ts)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_JBD2_DEBUG
|
#ifdef CONFIG_JBD2_DEBUG
|
||||||
|
unsigned long oldts, newts;
|
||||||
if (jbd2_journal_enable_debug &&
|
if (jbd2_journal_enable_debug &&
|
||||||
time_after(transaction->t_start, ts)) {
|
time_after(transaction->t_start, ts)) {
|
||||||
ts = jbd2_time_diff(ts, transaction->t_start);
|
newts = jbd2_time_diff(ts, transaction->t_start);
|
||||||
spin_lock(&transaction->t_handle_lock);
|
oldts = READ_ONCE(transaction->t_max_wait);
|
||||||
if (ts > transaction->t_max_wait)
|
while (oldts < newts)
|
||||||
transaction->t_max_wait = ts;
|
oldts = cmpxchg(&transaction->t_max_wait, oldts, newts);
|
||||||
spin_unlock(&transaction->t_handle_lock);
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
@ -690,7 +686,6 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
|
|||||||
DIV_ROUND_UP(
|
DIV_ROUND_UP(
|
||||||
handle->h_revoke_credits_requested,
|
handle->h_revoke_credits_requested,
|
||||||
journal->j_revoke_records_per_block);
|
journal->j_revoke_records_per_block);
|
||||||
spin_lock(&transaction->t_handle_lock);
|
|
||||||
wanted = atomic_add_return(nblocks,
|
wanted = atomic_add_return(nblocks,
|
||||||
&transaction->t_outstanding_credits);
|
&transaction->t_outstanding_credits);
|
||||||
|
|
||||||
@ -698,7 +693,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
|
|||||||
jbd_debug(3, "denied handle %p %d blocks: "
|
jbd_debug(3, "denied handle %p %d blocks: "
|
||||||
"transaction too large\n", handle, nblocks);
|
"transaction too large\n", handle, nblocks);
|
||||||
atomic_sub(nblocks, &transaction->t_outstanding_credits);
|
atomic_sub(nblocks, &transaction->t_outstanding_credits);
|
||||||
goto unlock;
|
goto error_out;
|
||||||
}
|
}
|
||||||
|
|
||||||
trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev,
|
trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev,
|
||||||
@ -714,8 +709,6 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
|
|||||||
result = 0;
|
result = 0;
|
||||||
|
|
||||||
jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
|
jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
|
||||||
unlock:
|
|
||||||
spin_unlock(&transaction->t_handle_lock);
|
|
||||||
error_out:
|
error_out:
|
||||||
read_unlock(&journal->j_state_lock);
|
read_unlock(&journal->j_state_lock);
|
||||||
return result;
|
return result;
|
||||||
@ -860,15 +853,12 @@ void jbd2_journal_wait_updates(journal_t *journal)
|
|||||||
if (!transaction)
|
if (!transaction)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
spin_lock(&transaction->t_handle_lock);
|
|
||||||
prepare_to_wait(&journal->j_wait_updates, &wait,
|
prepare_to_wait(&journal->j_wait_updates, &wait,
|
||||||
TASK_UNINTERRUPTIBLE);
|
TASK_UNINTERRUPTIBLE);
|
||||||
if (!atomic_read(&transaction->t_updates)) {
|
if (!atomic_read(&transaction->t_updates)) {
|
||||||
spin_unlock(&transaction->t_handle_lock);
|
|
||||||
finish_wait(&journal->j_wait_updates, &wait);
|
finish_wait(&journal->j_wait_updates, &wait);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
spin_unlock(&transaction->t_handle_lock);
|
|
||||||
write_unlock(&journal->j_state_lock);
|
write_unlock(&journal->j_state_lock);
|
||||||
schedule();
|
schedule();
|
||||||
finish_wait(&journal->j_wait_updates, &wait);
|
finish_wait(&journal->j_wait_updates, &wait);
|
||||||
|
|||||||
@ -554,9 +554,6 @@ struct transaction_chp_stats_s {
|
|||||||
* ->j_list_lock
|
* ->j_list_lock
|
||||||
*
|
*
|
||||||
* j_state_lock
|
* j_state_lock
|
||||||
* ->t_handle_lock
|
|
||||||
*
|
|
||||||
* j_state_lock
|
|
||||||
* ->j_list_lock (journal_unmap_buffer)
|
* ->j_list_lock (journal_unmap_buffer)
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user