s390/mm: Reimplement lazy ASCE handling

Reduce system call overhead time (round trip time for invoking a
non-existent system call) by 25%.

With the removal of set_fs() [1] lazy control register handling was removed
in order to keep kernel entry and exit simple. However this made system
calls slower.

With the conversion to generic entry [2] and numerous follow up changes
which simplified the entry code significantly, adding support for lazy asce
handling doesn't add much complexity to the entry code anymore.

In particular this means:

- On kernel entry the primary asce is not modified and contains the user
  asce

- Kernel accesses which require secondary-space mode (for example futex
  operations) are surrounded by enable_sacf_uaccess() and
  disable_sacf_uaccess() calls. enable_sacf_uaccess() sets the primary asce
  to kernel asce so that the sacf instruction can be used to switch to
  secondary-space mode. The primary asce is changed back to user asce with
  disable_sacf_uaccess().

The state of the control register which contains the primary asce is
reflected with a new TIF_ASCE_PRIMARY bit. This is required on context
switch so that the correct asce is restored for the scheduled in process.

In result address spaces are now setup like this:

CPU running in               | %cr1 ASCE | %cr7 ASCE | %cr13 ASCE
-----------------------------|-----------|-----------|-----------
user space                   |  user     |  user     |  kernel
kernel (no sacf)             |  user     |  user     |  kernel
kernel (during sacf uaccess) |  kernel   |  user     |  kernel
kernel (kvm guest execution) |  guest    |  user     |  kernel

In result cr1 control register content is not changed except for:
- futex system calls
- legacy s390 PCI system calls
- the kvm specific cmpxchg_user_key() uaccess helper

This leads to faster system call execution.

[1] 87d5986345 ("s390/mm: remove set_fs / rework address space handling")
[2] 56e62a7370 ("s390: convert to generic entry")

Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
This commit is contained in:
Heiko Carstens 2025-04-09 15:01:50 +02:00
parent 8ffd015db8
commit 8b72f5a97b
12 changed files with 97 additions and 25 deletions

View File

@ -0,0 +1,36 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_S390_ASCE_H
#define _ASM_S390_ASCE_H
#include <linux/thread_info.h>
#include <linux/irqflags.h>
#include <asm/lowcore.h>
#include <asm/ctlreg.h>
static inline bool enable_sacf_uaccess(void)
{
unsigned long flags;
if (test_thread_flag(TIF_ASCE_PRIMARY))
return true;
local_irq_save(flags);
local_ctl_load(1, &get_lowcore()->kernel_asce);
set_thread_flag(TIF_ASCE_PRIMARY);
local_irq_restore(flags);
return false;
}
static inline void disable_sacf_uaccess(bool previous)
{
unsigned long flags;
if (previous)
return;
local_irq_save(flags);
local_ctl_load(1, &get_lowcore()->user_asce);
clear_thread_flag(TIF_ASCE_PRIMARY);
local_irq_restore(flags);
}
#endif /* _ASM_S390_ASCE_H */

View File

@ -13,9 +13,11 @@
static uaccess_kmsan_or_inline int \
__futex_atomic_##name(int oparg, int *old, u32 __user *uaddr) \
{ \
bool sacf_flag; \
int rc, new; \
\
instrument_copy_from_user_before(old, uaddr, sizeof(*old)); \
sacf_flag = enable_sacf_uaccess(); \
asm_inline volatile( \
" sacf 256\n" \
"0: l %[old],%[uaddr]\n" \
@ -32,6 +34,7 @@ __futex_atomic_##name(int oparg, int *old, u32 __user *uaddr) \
[new] "=&d" (new), [uaddr] "+Q" (*uaddr) \
: [oparg] "d" (oparg) \
: "cc"); \
disable_sacf_uaccess(sacf_flag); \
if (!rc) \
instrument_copy_from_user_after(old, uaddr, sizeof(*old), 0); \
return rc; \
@ -75,9 +78,11 @@ int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
static uaccess_kmsan_or_inline
int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval)
{
bool sacf_flag;
int rc;
instrument_copy_from_user_before(uval, uaddr, sizeof(*uval));
sacf_flag = enable_sacf_uaccess();
asm_inline volatile(
" sacf 256\n"
"0: cs %[old],%[new],%[uaddr]\n"
@ -88,6 +93,7 @@ int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32
: [rc] "=d" (rc), [old] "+d" (oldval), [uaddr] "+Q" (*uaddr)
: [new] "d" (newval)
: "cc", "memory");
disable_sacf_uaccess(sacf_flag);
*uval = oldval;
instrument_copy_from_user_after(uval, uaddr, sizeof(*uval), 0);
return rc;

View File

@ -13,6 +13,7 @@
#include <linux/mm_types.h>
#include <asm/tlbflush.h>
#include <asm/ctlreg.h>
#include <asm/asce.h>
#include <asm-generic/mm_hooks.h>
#define init_new_context init_new_context
@ -77,7 +78,8 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *
else
get_lowcore()->user_asce.val = next->context.asce;
cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
/* Clear previous user-ASCE from CR7 */
/* Clear previous user-ASCE from CR1 and CR7 */
local_ctl_load(1, &s390_invalid_asce);
local_ctl_load(7, &s390_invalid_asce);
if (prev != next)
cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
@ -99,6 +101,7 @@ static inline void finish_arch_post_lock_switch(void)
{
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm;
unsigned long flags;
if (mm) {
preempt_disable();
@ -108,16 +111,30 @@ static inline void finish_arch_post_lock_switch(void)
__tlb_flush_mm_lazy(mm);
preempt_enable();
}
local_irq_save(flags);
if (test_thread_flag(TIF_ASCE_PRIMARY))
local_ctl_load(1, &get_lowcore()->kernel_asce);
else
local_ctl_load(1, &get_lowcore()->user_asce);
local_ctl_load(7, &get_lowcore()->user_asce);
local_irq_restore(flags);
}
#define activate_mm activate_mm
static inline void activate_mm(struct mm_struct *prev,
struct mm_struct *next)
{
unsigned long flags;
switch_mm(prev, next, current);
cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
local_irq_save(flags);
if (test_thread_flag(TIF_ASCE_PRIMARY))
local_ctl_load(1, &get_lowcore()->kernel_asce);
else
local_ctl_load(1, &get_lowcore()->user_asce);
local_ctl_load(7, &get_lowcore()->user_asce);
local_irq_restore(flags);
}
#include <asm-generic/mmu_context.h>

View File

@ -126,7 +126,6 @@ struct pt_regs {
struct tpi_info tpi_info;
};
unsigned long flags;
unsigned long cr1;
unsigned long last_break;
};

View File

@ -64,6 +64,7 @@ void arch_setup_new_exec(void);
#define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling needed */
#define TIF_UPROBE 4 /* breakpointed or single-stepping */
#define TIF_PATCH_PENDING 5 /* pending live patching update */
#define TIF_ASCE_PRIMARY 6 /* primary asce is kernel asce */
#define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */
#define TIF_GUARDED_STORAGE 8 /* load guarded storage control block */
#define TIF_ISOLATE_BP_GUEST 9 /* Run KVM guests with isolated BP */
@ -85,6 +86,7 @@ void arch_setup_new_exec(void);
#define _TIF_NEED_RESCHED_LAZY BIT(TIF_NEED_RESCHED_LAZY)
#define _TIF_UPROBE BIT(TIF_UPROBE)
#define _TIF_PATCH_PENDING BIT(TIF_PATCH_PENDING)
#define _TIF_ASCE_PRIMARY BIT(TIF_ASCE_PRIMARY)
#define _TIF_NOTIFY_SIGNAL BIT(TIF_NOTIFY_SIGNAL)
#define _TIF_GUARDED_STORAGE BIT(TIF_GUARDED_STORAGE)
#define _TIF_ISOLATE_BP_GUEST BIT(TIF_ISOLATE_BP_GUEST)

View File

@ -19,6 +19,7 @@
#include <asm/extable.h>
#include <asm/facility.h>
#include <asm-generic/access_ok.h>
#include <asm/asce.h>
#include <linux/instrumented.h>
void debug_user_asce(int exit);
@ -478,6 +479,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
__uint128_t old, __uint128_t new,
unsigned long key, int size)
{
bool sacf_flag;
int rc = 0;
switch (size) {
@ -490,6 +492,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
_old = ((unsigned int)old & 0xff) << shift;
_new = ((unsigned int)new & 0xff) << shift;
mask = ~(0xff << shift);
sacf_flag = enable_sacf_uaccess();
asm_inline volatile(
" spka 0(%[key])\n"
" sacf 256\n"
@ -524,6 +527,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
[default_key] "J" (PAGE_DEFAULT_KEY),
[max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS)
: "memory", "cc");
disable_sacf_uaccess(sacf_flag);
*(unsigned char *)uval = prev >> shift;
if (!count)
rc = -EAGAIN;
@ -538,6 +542,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
_old = ((unsigned int)old & 0xffff) << shift;
_new = ((unsigned int)new & 0xffff) << shift;
mask = ~(0xffff << shift);
sacf_flag = enable_sacf_uaccess();
asm_inline volatile(
" spka 0(%[key])\n"
" sacf 256\n"
@ -572,6 +577,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
[default_key] "J" (PAGE_DEFAULT_KEY),
[max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS)
: "memory", "cc");
disable_sacf_uaccess(sacf_flag);
*(unsigned short *)uval = prev >> shift;
if (!count)
rc = -EAGAIN;
@ -580,6 +586,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
case 4: {
unsigned int prev = old;
sacf_flag = enable_sacf_uaccess();
asm_inline volatile(
" spka 0(%[key])\n"
" sacf 256\n"
@ -595,12 +602,14 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
[key] "a" (key << 4),
[default_key] "J" (PAGE_DEFAULT_KEY)
: "memory", "cc");
disable_sacf_uaccess(sacf_flag);
*(unsigned int *)uval = prev;
return rc;
}
case 8: {
unsigned long prev = old;
sacf_flag = enable_sacf_uaccess();
asm_inline volatile(
" spka 0(%[key])\n"
" sacf 256\n"
@ -616,12 +625,14 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
[key] "a" (key << 4),
[default_key] "J" (PAGE_DEFAULT_KEY)
: "memory", "cc");
disable_sacf_uaccess(sacf_flag);
*(unsigned long *)uval = prev;
return rc;
}
case 16: {
__uint128_t prev = old;
sacf_flag = enable_sacf_uaccess();
asm_inline volatile(
" spka 0(%[key])\n"
" sacf 256\n"
@ -637,6 +648,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
[key] "a" (key << 4),
[default_key] "J" (PAGE_DEFAULT_KEY)
: "memory", "cc");
disable_sacf_uaccess(sacf_flag);
*(__uint128_t *)uval = prev;
return rc;
}

View File

@ -50,7 +50,6 @@ int main(void)
OFFSET(__PT_ORIG_GPR2, pt_regs, orig_gpr2);
OFFSET(__PT_INT_CODE, pt_regs, int_code);
OFFSET(__PT_FLAGS, pt_regs, flags);
OFFSET(__PT_CR1, pt_regs, cr1);
OFFSET(__PT_LAST_BREAK, pt_regs, last_break);
DEFINE(__PT_SIZE, sizeof(struct pt_regs));
BLANK();

View File

@ -116,7 +116,7 @@ _LPP_OFFSET = __LC_LPP
.macro SIEEXIT sie_control,lowcore
lg %r9,\sie_control # get control block pointer
ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
lctlg %c1,%c1,__LC_KERNEL_ASCE(\lowcore) # load primary asce
lctlg %c1,%c1,__LC_USER_ASCE(\lowcore) # load primary asce
lg %r9,__LC_CURRENT(\lowcore)
mvi __TI_sie(%r9),0
larl %r9,sie_exit # skip forward to sie_exit
@ -208,7 +208,7 @@ SYM_FUNC_START(__sie64a)
lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer
ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
GET_LC %r14
lctlg %c1,%c1,__LC_KERNEL_ASCE(%r14) # load primary asce
lctlg %c1,%c1,__LC_USER_ASCE(%r14) # load primary asce
lg %r14,__LC_CURRENT(%r14)
mvi __TI_sie(%r14),0
SYM_INNER_LABEL(sie_exit, SYM_L_GLOBAL)
@ -240,7 +240,6 @@ SYM_CODE_START(system_call)
lghi %r14,0
.Lsysc_per:
STBEAR __LC_LAST_BREAK(%r13)
lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
lg %r15,__LC_KERNEL_STACK(%r13)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
stmg %r0,%r7,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
@ -261,7 +260,6 @@ SYM_CODE_START(system_call)
lgr %r3,%r14
brasl %r14,__do_syscall
STACKLEAK_ERASE
lctlg %c1,%c1,__LC_USER_ASCE(%r13)
mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
BPON
LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
@ -278,7 +276,6 @@ SYM_CODE_START(ret_from_fork)
brasl %r14,__ret_from_fork
STACKLEAK_ERASE
GET_LC %r13
lctlg %c1,%c1,__LC_USER_ASCE(%r13)
mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
BPON
LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
@ -299,10 +296,7 @@ SYM_CODE_START(pgm_check_handler)
lmg %r8,%r9,__LC_PGM_OLD_PSW(%r13)
xgr %r10,%r10
tmhh %r8,0x0001 # coming from user space?
jno .Lpgm_skip_asce
lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
j 3f # -> fault in user space
.Lpgm_skip_asce:
jo 3f # -> fault in user space
#if IS_ENABLED(CONFIG_KVM)
lg %r11,__LC_CURRENT(%r13)
tm __TI_sie(%r11),0xff
@ -340,7 +334,6 @@ SYM_CODE_START(pgm_check_handler)
tmhh %r8,0x0001 # returning to user space?
jno .Lpgm_exit_kernel
STACKLEAK_ERASE
lctlg %c1,%c1,__LC_USER_ASCE(%r13)
BPON
stpt __LC_EXIT_TIMER(%r13)
.Lpgm_exit_kernel:
@ -384,8 +377,7 @@ SYM_CODE_START(\name)
#endif
0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
j 2f
1: lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
lg %r15,__LC_KERNEL_STACK(%r13)
1: lg %r15,__LC_KERNEL_STACK(%r13)
2: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
@ -408,7 +400,6 @@ SYM_CODE_START(\name)
tmhh %r8,0x0001 # returning to user ?
jno 2f
STACKLEAK_ERASE
lctlg %c1,%c1,__LC_USER_ASCE(%r13)
BPON
stpt __LC_EXIT_TIMER(%r13)
2: LBEAR __PT_LAST_BREAK(%r11)
@ -476,8 +467,6 @@ SYM_CODE_START(mcck_int_handler)
.Lmcck_user:
lg %r15,__LC_MCCK_STACK(%r13)
la %r11,STACK_FRAME_OVERHEAD(%r15)
stctg %c1,%c1,__PT_CR1(%r11)
lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
lay %r14,__LC_GPREGS_SAVE_AREA(%r13)
mvc __PT_R0(128,%r11),0(%r14)
@ -495,7 +484,6 @@ SYM_CODE_START(mcck_int_handler)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
lgr %r2,%r11 # pass pointer to pt_regs
brasl %r14,s390_do_machine_check
lctlg %c1,%c1,__PT_CR1(%r11)
lmg %r0,%r10,__PT_R0(%r11)
mvc __LC_RETURN_MCCK_PSW(16,%r13),__PT_PSW(%r11) # move return PSW
tm __LC_RETURN_MCCK_PSW+1(%r13),0x01 # returning to user ?

View File

@ -263,7 +263,7 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
abs_lc = get_abs_lowcore();
memcpy(lc->cregs_save_area, abs_lc->cregs_save_area, sizeof(lc->cregs_save_area));
put_abs_lowcore(abs_lc);
lc->cregs_save_area[1] = lc->kernel_asce;
lc->cregs_save_area[1] = lc->user_asce;
lc->cregs_save_area[7] = lc->user_asce;
save_access_regs((unsigned int *) lc->access_regs_save_area);
arch_spin_lock_setup(cpu);

View File

@ -17,17 +17,18 @@
#ifdef CONFIG_DEBUG_ENTRY
void debug_user_asce(int exit)
{
struct lowcore *lc = get_lowcore();
struct ctlreg cr1, cr7;
local_ctl_store(1, &cr1);
local_ctl_store(7, &cr7);
if (cr1.val == get_lowcore()->kernel_asce.val && cr7.val == get_lowcore()->user_asce.val)
if (cr1.val == lc->user_asce.val && cr7.val == lc->user_asce.val)
return;
panic("incorrect ASCE on kernel %s\n"
"cr1: %016lx cr7: %016lx\n"
"kernel: %016lx user: %016lx\n",
exit ? "exit" : "entry", cr1.val, cr7.val,
get_lowcore()->kernel_asce.val, get_lowcore()->user_asce.val);
lc->kernel_asce.val, lc->user_asce.val);
}
#endif /*CONFIG_DEBUG_ENTRY */

View File

@ -38,11 +38,15 @@ void crst_table_free(struct mm_struct *mm, unsigned long *table)
static void __crst_table_upgrade(void *arg)
{
struct mm_struct *mm = arg;
struct ctlreg asce;
asce.val = mm->context.asce;
/* change all active ASCEs to avoid the creation of new TLBs */
if (current->active_mm == mm) {
get_lowcore()->user_asce.val = mm->context.asce;
local_ctl_load(7, &get_lowcore()->user_asce);
get_lowcore()->user_asce = asce;
local_ctl_load(7, &asce);
if (!test_thread_flag(TIF_ASCE_PRIMARY))
local_ctl_load(1, &asce);
}
__tlb_flush_local();
}

View File

@ -32,8 +32,10 @@ static inline int __pcistb_mio_inuser(
u64 len, u8 *status)
{
int cc, exception;
bool sacf_flag;
exception = 1;
sacf_flag = enable_sacf_uaccess();
asm_inline volatile (
" sacf 256\n"
"0: .insn rsy,0xeb00000000d4,%[len],%[ioaddr],%[src]\n"
@ -44,6 +46,7 @@ static inline int __pcistb_mio_inuser(
: CC_OUT(cc, cc), [len] "+d" (len), [exc] "+d" (exception)
: [ioaddr] "a" (ioaddr), [src] "Q" (*((u8 __force *)src))
: CC_CLOBBER_LIST("memory"));
disable_sacf_uaccess(sacf_flag);
*status = len >> 24 & 0xff;
return exception ? -ENXIO : CC_TRANSFORM(cc);
}
@ -54,6 +57,7 @@ static inline int __pcistg_mio_inuser(
{
union register_pair ioaddr_len = {.even = (u64 __force)ioaddr, .odd = ulen};
int cc, exception;
bool sacf_flag;
u64 val = 0;
u64 cnt = ulen;
u8 tmp;
@ -64,6 +68,7 @@ static inline int __pcistg_mio_inuser(
* address space. pcistg then uses the user mappings.
*/
exception = 1;
sacf_flag = enable_sacf_uaccess();
asm_inline volatile (
" sacf 256\n"
"0: llgc %[tmp],0(%[src])\n"
@ -81,6 +86,7 @@ static inline int __pcistg_mio_inuser(
CC_OUT(cc, cc), [ioaddr_len] "+&d" (ioaddr_len.pair)
:
: CC_CLOBBER_LIST("memory"));
disable_sacf_uaccess(sacf_flag);
*status = ioaddr_len.odd >> 24 & 0xff;
cc = exception ? -ENXIO : CC_TRANSFORM(cc);
@ -204,6 +210,7 @@ static inline int __pcilg_mio_inuser(
u64 ulen, u8 *status)
{
union register_pair ioaddr_len = {.even = (u64 __force)ioaddr, .odd = ulen};
bool sacf_flag;
u64 cnt = ulen;
int shift = ulen * 8;
int cc, exception;
@ -215,6 +222,7 @@ static inline int __pcilg_mio_inuser(
* user address @dst
*/
exception = 1;
sacf_flag = enable_sacf_uaccess();
asm_inline volatile (
" sacf 256\n"
"0: .insn rre,0xb9d60000,%[val],%[ioaddr_len]\n"
@ -239,7 +247,7 @@ static inline int __pcilg_mio_inuser(
[shift] "+d" (shift)
:
: CC_CLOBBER_LIST("memory"));
disable_sacf_uaccess(sacf_flag);
cc = exception ? -ENXIO : CC_TRANSFORM(cc);
/* did we write everything to the user space buffer? */
if (!cc && cnt != 0)