mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-08-16 23:51:44 +00:00

- Consolidate the VDSO storage The VDSO data storage and data layout has been largely architecture specific for historical reasons. That increases the maintenance effort and causes inconsistencies over and over. There is no real technical reason for architecture specific layouts and implementations. The architecture specific details can easily be integrated into a generic layout, which also reduces the amount of duplicated code for managing the mappings. Convert all architectures over to a unified layout and common mapping infrastructure. This splits the VDSO data layout into subsystem specific blocks, timekeeping, random and architecture parts, which provides a better structure and allows to improve and update the functionalities without conflict and interaction. - Rework the timekeeping data storage The current implementation is designed for exposing system timekeeping accessors, which was good enough at the time when it was designed. PTP and Time Sensitive Networking (TSN) change that as there are requirements to expose independent PTP clocks, which are not related to system timekeeping. Replace the monolithic data storage by a structured layout, which allows to add support for independent PTP clocks on top while reusing both the data structures and the time accessor implementations. -----BEGIN PGP SIGNATURE----- iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAmfgSWUTHHRnbHhAbGlu dXRyb25peC5kZQAKCRCmGPVMDXSYoYGED/0f/M8YyacAyErDYW4ufW+zh2sUidSf GVlK0Jn5BMljOoye+y2XfTxuvvXxEDjJNYiJm2uKGPdV29tjNXreGK39XyNqXPu5 jwR4f/IN/QVSM2nCO6jyydMz8ympJ2k6M4RewwmxXBL2KsUzzJWSKTgRNqM5Tdjs 1RhJMjkQVTiiSYerBpHXYCeZLM7/VEfZ120uuzVAYPXo0/R6zuyF7IBgIao9hbfO IQeCMLLfpDQHQhwquTA8ZbWqQusiEoSYHT+kTDa3eXDDbE/2UklAUs9gaatI979x 73zs0Yqxyx2iIGaghACWOAbKdcBWBeCYDw5fFwYVKn4VMQi1+wcxbtOYL767jp9o vfkLXGilXcVkvDjv4fH+e1NoJXXBxq1Ug1silKdOeJzenQF8Q1i3tavkWUVCNfwH qyOIM72NiCEWbYBDcz0lwBxEAyO4o0E6NP1bDc4y50VedEYIbXwSh0QGrdev1abn rjY9vsuUR9oznmZ6BRPPxMTY87gOSHoKvqydgSZUACEgLV9346f5qZf341OReYai MXUmXOM4+LdyaM1+Mec8ppvjMbLw+736NZyZtT2InusEBE+Ddp25L3hYiWnklJu8 2uwv0AoyrwaJ8y6ADOX4thcLZq0gND0Z/Ayz/XvpeI30eftsGUCt5KOVlqwfwOkI 4EQKvk2fAixPxg== =rwei -----END PGP SIGNATURE----- Merge tag 'timers-vdso-2025-03-23' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull VDSO infrastructure updates from Thomas Gleixner: - Consolidate the VDSO storage The VDSO data storage and data layout has been largely architecture specific for historical reasons. That increases the maintenance effort and causes inconsistencies over and over. There is no real technical reason for architecture specific layouts and implementations. The architecture specific details can easily be integrated into a generic layout, which also reduces the amount of duplicated code for managing the mappings. Convert all architectures over to a unified layout and common mapping infrastructure. This splits the VDSO data layout into subsystem specific blocks, timekeeping, random and architecture parts, which provides a better structure and allows to improve and update the functionalities without conflict and interaction. - Rework the timekeeping data storage The current implementation is designed for exposing system timekeeping accessors, which was good enough at the time when it was designed. PTP and Time Sensitive Networking (TSN) change that as there are requirements to expose independent PTP clocks, which are not related to system timekeeping. Replace the monolithic data storage by a structured layout, which allows to add support for independent PTP clocks on top while reusing both the data structures and the time accessor implementations. * tag 'timers-vdso-2025-03-23' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (55 commits) sparc/vdso: Always reject undefined references during linking x86/vdso: Always reject undefined references during linking vdso: Rework struct vdso_time_data and introduce struct vdso_clock vdso: Move architecture related data before basetime data powerpc/vdso: Prepare introduction of struct vdso_clock arm64/vdso: Prepare introduction of struct vdso_clock x86/vdso: Prepare introduction of struct vdso_clock time/namespace: Prepare introduction of struct vdso_clock vdso/namespace: Rename timens_setup_vdso_data() to reflect new vdso_clock struct vdso/vsyscall: Prepare introduction of struct vdso_clock vdso/gettimeofday: Prepare helper functions for introduction of struct vdso_clock vdso/gettimeofday: Prepare do_coarse_timens() for introduction of struct vdso_clock vdso/gettimeofday: Prepare do_coarse() for introduction of struct vdso_clock vdso/gettimeofday: Prepare do_hres_timens() for introduction of struct vdso_clock vdso/gettimeofday: Prepare do_hres() for introduction of struct vdso_clock vdso/gettimeofday: Prepare introduction of struct vdso_clock vdso/helpers: Prepare introduction of struct vdso_clock vdso/datapage: Define vdso_clock to prepare for multiple PTP clocks vdso: Make vdso_time_data cacheline aligned arm64: Make asm/cache.h compatible with vDSO ...
301 lines
7.4 KiB
C
301 lines
7.4 KiB
C
/*
|
|
* parse_vdso.c: Linux reference vDSO parser
|
|
* Written by Andrew Lutomirski, 2011-2014.
|
|
*
|
|
* This code is meant to be linked in to various programs that run on Linux.
|
|
* As such, it is available with as few restrictions as possible. This file
|
|
* is licensed under the Creative Commons Zero License, version 1.0,
|
|
* available at http://creativecommons.org/publicdomain/zero/1.0/legalcode
|
|
*
|
|
* The vDSO is a regular ELF DSO that the kernel maps into user space when
|
|
* it starts a program. It works equally well in statically and dynamically
|
|
* linked binaries.
|
|
*
|
|
* This code is tested on x86. In principle it should work on any
|
|
* architecture that has a vDSO.
|
|
*/
|
|
|
|
#include <stdbool.h>
|
|
#include <stdint.h>
|
|
#include <string.h>
|
|
#include <limits.h>
|
|
#include <linux/auxvec.h>
|
|
#include <linux/elf.h>
|
|
|
|
#include "parse_vdso.h"
|
|
|
|
/* And here's the code. */
|
|
#ifndef ELF_BITS
|
|
# if __SIZEOF_LONG__ >= 8
|
|
# define ELF_BITS 64
|
|
# else
|
|
# define ELF_BITS 32
|
|
# endif
|
|
#endif
|
|
|
|
#define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x
|
|
#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x)
|
|
#define ELF(x) ELF_BITS_XFORM(ELF_BITS, x)
|
|
|
|
#ifdef __s390x__
|
|
#define ELF_HASH_ENTRY ELF(Xword)
|
|
#else
|
|
#define ELF_HASH_ENTRY ELF(Word)
|
|
#endif
|
|
|
|
static struct vdso_info
|
|
{
|
|
bool valid;
|
|
|
|
/* Load information */
|
|
uintptr_t load_addr;
|
|
uintptr_t load_offset; /* load_addr - recorded vaddr */
|
|
|
|
/* Symbol table */
|
|
ELF(Sym) *symtab;
|
|
const char *symstrings;
|
|
ELF(Word) *gnu_hash, *gnu_bucket;
|
|
ELF_HASH_ENTRY *bucket, *chain;
|
|
ELF_HASH_ENTRY nbucket, nchain;
|
|
|
|
/* Version table */
|
|
ELF(Versym) *versym;
|
|
ELF(Verdef) *verdef;
|
|
} vdso_info;
|
|
|
|
/*
|
|
* Straight from the ELF specification...and then tweaked slightly, in order to
|
|
* avoid a few clang warnings.
|
|
*/
|
|
static unsigned long elf_hash(const char *name)
|
|
{
|
|
unsigned long h = 0, g;
|
|
const unsigned char *uch_name = (const unsigned char *)name;
|
|
|
|
while (*uch_name)
|
|
{
|
|
h = (h << 4) + *uch_name++;
|
|
g = h & 0xf0000000;
|
|
if (g)
|
|
h ^= g >> 24;
|
|
h &= ~g;
|
|
}
|
|
return h;
|
|
}
|
|
|
|
static uint32_t gnu_hash(const char *name)
|
|
{
|
|
const unsigned char *s = (void *)name;
|
|
uint32_t h = 5381;
|
|
|
|
for (; *s; s++)
|
|
h += h * 32 + *s;
|
|
return h;
|
|
}
|
|
|
|
void vdso_init_from_sysinfo_ehdr(uintptr_t base)
|
|
{
|
|
size_t i;
|
|
bool found_vaddr = false;
|
|
|
|
vdso_info.valid = false;
|
|
|
|
vdso_info.load_addr = base;
|
|
|
|
ELF(Ehdr) *hdr = (ELF(Ehdr)*)base;
|
|
if (hdr->e_ident[EI_CLASS] !=
|
|
(ELF_BITS == 32 ? ELFCLASS32 : ELFCLASS64)) {
|
|
return; /* Wrong ELF class -- check ELF_BITS */
|
|
}
|
|
|
|
ELF(Phdr) *pt = (ELF(Phdr)*)(vdso_info.load_addr + hdr->e_phoff);
|
|
ELF(Dyn) *dyn = 0;
|
|
|
|
/*
|
|
* We need two things from the segment table: the load offset
|
|
* and the dynamic table.
|
|
*/
|
|
for (i = 0; i < hdr->e_phnum; i++)
|
|
{
|
|
if (pt[i].p_type == PT_LOAD && !found_vaddr) {
|
|
found_vaddr = true;
|
|
vdso_info.load_offset = base
|
|
+ (uintptr_t)pt[i].p_offset
|
|
- (uintptr_t)pt[i].p_vaddr;
|
|
} else if (pt[i].p_type == PT_DYNAMIC) {
|
|
dyn = (ELF(Dyn)*)(base + pt[i].p_offset);
|
|
}
|
|
}
|
|
|
|
if (!found_vaddr || !dyn)
|
|
return; /* Failed */
|
|
|
|
/*
|
|
* Fish out the useful bits of the dynamic table.
|
|
*/
|
|
ELF_HASH_ENTRY *hash = 0;
|
|
vdso_info.symstrings = 0;
|
|
vdso_info.gnu_hash = 0;
|
|
vdso_info.symtab = 0;
|
|
vdso_info.versym = 0;
|
|
vdso_info.verdef = 0;
|
|
for (i = 0; dyn[i].d_tag != DT_NULL; i++) {
|
|
switch (dyn[i].d_tag) {
|
|
case DT_STRTAB:
|
|
vdso_info.symstrings = (const char *)
|
|
((uintptr_t)dyn[i].d_un.d_ptr
|
|
+ vdso_info.load_offset);
|
|
break;
|
|
case DT_SYMTAB:
|
|
vdso_info.symtab = (ELF(Sym) *)
|
|
((uintptr_t)dyn[i].d_un.d_ptr
|
|
+ vdso_info.load_offset);
|
|
break;
|
|
case DT_HASH:
|
|
hash = (ELF_HASH_ENTRY *)
|
|
((uintptr_t)dyn[i].d_un.d_ptr
|
|
+ vdso_info.load_offset);
|
|
break;
|
|
case DT_GNU_HASH:
|
|
vdso_info.gnu_hash =
|
|
(ELF(Word) *)((uintptr_t)dyn[i].d_un.d_ptr +
|
|
vdso_info.load_offset);
|
|
break;
|
|
case DT_VERSYM:
|
|
vdso_info.versym = (ELF(Versym) *)
|
|
((uintptr_t)dyn[i].d_un.d_ptr
|
|
+ vdso_info.load_offset);
|
|
break;
|
|
case DT_VERDEF:
|
|
vdso_info.verdef = (ELF(Verdef) *)
|
|
((uintptr_t)dyn[i].d_un.d_ptr
|
|
+ vdso_info.load_offset);
|
|
break;
|
|
}
|
|
}
|
|
if (!vdso_info.symstrings || !vdso_info.symtab ||
|
|
(!hash && !vdso_info.gnu_hash))
|
|
return; /* Failed */
|
|
|
|
if (!vdso_info.verdef)
|
|
vdso_info.versym = 0;
|
|
|
|
/* Parse the hash table header. */
|
|
if (vdso_info.gnu_hash) {
|
|
vdso_info.nbucket = vdso_info.gnu_hash[0];
|
|
/* The bucket array is located after the header (4 uint32) and the bloom
|
|
* filter (size_t array of gnu_hash[2] elements).
|
|
*/
|
|
vdso_info.gnu_bucket = vdso_info.gnu_hash + 4 +
|
|
sizeof(size_t) / 4 * vdso_info.gnu_hash[2];
|
|
} else {
|
|
vdso_info.nbucket = hash[0];
|
|
vdso_info.nchain = hash[1];
|
|
vdso_info.bucket = &hash[2];
|
|
vdso_info.chain = &hash[vdso_info.nbucket + 2];
|
|
}
|
|
|
|
/* That's all we need. */
|
|
vdso_info.valid = true;
|
|
}
|
|
|
|
static bool vdso_match_version(ELF(Versym) ver,
|
|
const char *name, ELF(Word) hash)
|
|
{
|
|
/*
|
|
* This is a helper function to check if the version indexed by
|
|
* ver matches name (which hashes to hash).
|
|
*
|
|
* The version definition table is a mess, and I don't know how
|
|
* to do this in better than linear time without allocating memory
|
|
* to build an index. I also don't know why the table has
|
|
* variable size entries in the first place.
|
|
*
|
|
* For added fun, I can't find a comprehensible specification of how
|
|
* to parse all the weird flags in the table.
|
|
*
|
|
* So I just parse the whole table every time.
|
|
*/
|
|
|
|
/* First step: find the version definition */
|
|
ver &= 0x7fff; /* Apparently bit 15 means "hidden" */
|
|
ELF(Verdef) *def = vdso_info.verdef;
|
|
while(true) {
|
|
if ((def->vd_flags & VER_FLG_BASE) == 0
|
|
&& (def->vd_ndx & 0x7fff) == ver)
|
|
break;
|
|
|
|
if (def->vd_next == 0)
|
|
return false; /* No definition. */
|
|
|
|
def = (ELF(Verdef) *)((char *)def + def->vd_next);
|
|
}
|
|
|
|
/* Now figure out whether it matches. */
|
|
ELF(Verdaux) *aux = (ELF(Verdaux)*)((char *)def + def->vd_aux);
|
|
return def->vd_hash == hash
|
|
&& !strcmp(name, vdso_info.symstrings + aux->vda_name);
|
|
}
|
|
|
|
static bool check_sym(ELF(Sym) *sym, ELF(Word) i, const char *name,
|
|
const char *version, unsigned long ver_hash)
|
|
{
|
|
/* Check for a defined global or weak function w/ right name. */
|
|
if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
|
|
return false;
|
|
if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
|
|
ELF64_ST_BIND(sym->st_info) != STB_WEAK)
|
|
return false;
|
|
if (strcmp(name, vdso_info.symstrings + sym->st_name))
|
|
return false;
|
|
|
|
/* Check symbol version. */
|
|
if (vdso_info.versym &&
|
|
!vdso_match_version(vdso_info.versym[i], version, ver_hash))
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
void *vdso_sym(const char *version, const char *name)
|
|
{
|
|
unsigned long ver_hash;
|
|
if (!vdso_info.valid)
|
|
return 0;
|
|
|
|
ver_hash = elf_hash(version);
|
|
ELF(Word) i;
|
|
|
|
if (vdso_info.gnu_hash) {
|
|
uint32_t h1 = gnu_hash(name), h2, *hashval;
|
|
|
|
i = vdso_info.gnu_bucket[h1 % vdso_info.nbucket];
|
|
if (i == 0)
|
|
return 0;
|
|
h1 |= 1;
|
|
hashval = vdso_info.gnu_bucket + vdso_info.nbucket +
|
|
(i - vdso_info.gnu_hash[1]);
|
|
for (;; i++) {
|
|
ELF(Sym) *sym = &vdso_info.symtab[i];
|
|
h2 = *hashval++;
|
|
if (h1 == (h2 | 1) &&
|
|
check_sym(sym, i, name, version, ver_hash))
|
|
return (void *)(vdso_info.load_offset +
|
|
sym->st_value);
|
|
if (h2 & 1)
|
|
break;
|
|
}
|
|
} else {
|
|
i = vdso_info.bucket[elf_hash(name) % vdso_info.nbucket];
|
|
for (; i; i = vdso_info.chain[i]) {
|
|
ELF(Sym) *sym = &vdso_info.symtab[i];
|
|
if (sym->st_shndx != SHN_UNDEF &&
|
|
check_sym(sym, i, name, version, ver_hash))
|
|
return (void *)(vdso_info.load_offset +
|
|
sym->st_value);
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|