aboutsummaryrefslogtreecommitdiff
path: root/sys/amd64/linux/linux_sysvec.c
diff options
context:
space:
mode:
authorDmitry Chagin <dchagin@FreeBSD.org>2022-06-17 19:33:07 +0000
committerDmitry Chagin <dchagin@FreeBSD.org>2022-06-17 19:33:07 +0000
commita340b5b4bd4814ad2010c5e7bfaa51082427c4ae (patch)
tree8c9a413c2df5d174e0d27bcae2cb9792104ba1f3 /sys/amd64/linux/linux_sysvec.c
parent54689a282aee8075063228881ee577de181967b6 (diff)
downloadsrc-a340b5b4bd4814ad2010c5e7bfaa51082427c4ae.tar.gz
src-a340b5b4bd4814ad2010c5e7bfaa51082427c4ae.zip
linux(4); Almost complete the vDSO.
The vDSO (virtual dynamic shared object) is a small shared library that the kernel maps R/O into the address space of all Linux processes on image activation. The vDSO is a fully formed ELF image, shared by all processes with the same ABI, has no process private data. The primary purpose of the vDSO: - non-executable stack, signal trampolines not copied to the stack; - signal trampolines unwind, mandatory for the NPTL; - to avoid contex-switch overhead frequently used system calls can be implemented in the vDSO: for now gettimeofday, clock_gettime. The first two have been implemented, so add the implementation of system calls. System calls implemenation based on a native timekeeping code with some limitations: - ifunc can't be used, as vDSO r/o mapped to the process VA and rtld can't relocate symbols; - reading HPET memory is not implemented for now (TODO). In case on any error vDSO system calls fallback to the kernel system calls. For unimplemented vDSO system calls added prototypes which call corresponding kernel system call. Relnotes: yes Tested by: trasz (arm64) Differential revision: https://reviews.freebsd.org/D30900 MFC after: 2 weeks (cherry picked from commit 9931033bbfbe56a037723638cf3712366c6d943f)
Diffstat (limited to 'sys/amd64/linux/linux_sysvec.c')
-rw-r--r--sys/amd64/linux/linux_sysvec.c161
1 files changed, 131 insertions, 30 deletions
diff --git a/sys/amd64/linux/linux_sysvec.c b/sys/amd64/linux/linux_sysvec.c
index bcc8cbf0b0bd..f13526b00d85 100644
--- a/sys/amd64/linux/linux_sysvec.c
+++ b/sys/amd64/linux/linux_sysvec.c
@@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
+#include <sys/stddef.h>
#include <sys/signalvar.h>
#include <sys/syscallsubr.h>
#include <sys/sysctl.h>
@@ -72,6 +73,7 @@ __FBSDID("$FreeBSD$");
#include <machine/specialreg.h>
#include <machine/trap.h>
+#include <x86/linux/linux_x86.h>
#include <amd64/linux/linux.h>
#include <amd64/linux/linux_proto.h>
#include <compat/linux/linux_emul.h>
@@ -85,11 +87,24 @@ __FBSDID("$FreeBSD$");
MODULE_VERSION(linux64, 1);
+#define LINUX_VDSOPAGE_SIZE PAGE_SIZE * 2
+#define LINUX_VDSOPAGE_LA48 (VM_MAXUSER_ADDRESS_LA48 - \
+ LINUX_VDSOPAGE_SIZE)
+#define LINUX_SHAREDPAGE_LA48 (LINUX_VDSOPAGE_LA48 - PAGE_SIZE)
+ /*
+ * PAGE_SIZE - the size
+ * of the native SHAREDPAGE
+ */
+#define LINUX_USRSTACK_LA48 LINUX_SHAREDPAGE_LA48
+#define LINUX_PS_STRINGS_LA48 (LINUX_USRSTACK_LA48 - \
+ sizeof(struct ps_strings))
+
static int linux_szsigcode;
-static vm_object_t linux_shared_page_obj;
-static char *linux_shared_page_mapping;
-extern char _binary_linux_locore_o_start;
-extern char _binary_linux_locore_o_end;
+static vm_object_t linux_vdso_obj;
+static char *linux_vdso_mapping;
+extern char _binary_linux_vdso_so_o_start;
+extern char _binary_linux_vdso_so_o_end;
+static vm_offset_t linux_vdso_base;
extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
@@ -102,10 +117,12 @@ static int linux_fixup_elf(uintptr_t *stack_base,
static bool linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
static void linux_vdso_install(void *param);
static void linux_vdso_deinstall(void *param);
+static void linux_vdso_reloc(char *mapping, Elf_Addr offset);
static void linux_set_syscall_retval(struct thread *td, int error);
static int linux_fetch_syscall_args(struct thread *td);
static void linux_exec_setregs(struct thread *td, struct image_params *imgp,
uintptr_t stack);
+static void linux_exec_sysvec_init(void *param);
static int linux_on_exec_vmspace(struct proc *p,
struct image_params *imgp);
static int linux_vsyscall(struct thread *td);
@@ -151,6 +168,8 @@ static int _bsd_to_linux_trapcode[] = {
LINUX_VDSO_SYM_INTPTR(linux_rt_sigcode);
LINUX_VDSO_SYM_CHAR(linux_platform);
+LINUX_VDSO_SYM_INTPTR(kern_timekeep_base);
+LINUX_VDSO_SYM_INTPTR(kern_tsc_selector);
/*
* If FreeBSD & Linux have a difference of opinion about what a trap
@@ -263,8 +282,7 @@ linux_copyout_auxargs(struct image_params *imgp, uintptr_t base)
M_WAITOK | M_ZERO);
issetugid = p->p_flag & P_SUGID ? 1 : 0;
- AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR,
- imgp->proc->p_sysent->sv_shared_page_base);
+ AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, linux_vdso_base);
AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
@@ -732,7 +750,7 @@ struct sysentvec elf_linux_sysvec = {
.sv_transtrap = linux_translate_traps,
.sv_fixup = linux_fixup_elf,
.sv_sendsig = linux_rt_sendsig,
- .sv_sigcode = &_binary_linux_locore_o_start,
+ .sv_sigcode = &_binary_linux_vdso_so_o_start,
.sv_szsigcode = &linux_szsigcode,
.sv_name = "Linux ELF64",
.sv_coredump = elf64_coredump,
@@ -743,8 +761,8 @@ struct sysentvec elf_linux_sysvec = {
.sv_minsigstksz = LINUX_MINSIGSTKSZ,
.sv_minuser = VM_MIN_ADDRESS,
.sv_maxuser = VM_MAXUSER_ADDRESS_LA48,
- .sv_usrstack = USRSTACK_LA48,
- .sv_psstrings = PS_STRINGS_LA48,
+ .sv_usrstack = LINUX_USRSTACK_LA48,
+ .sv_psstrings = LINUX_PS_STRINGS_LA48,
.sv_psstringssz = sizeof(struct ps_strings),
.sv_stackprot = VM_PROT_ALL,
.sv_copyout_auxargs = linux_copyout_auxargs,
@@ -753,11 +771,11 @@ struct sysentvec elf_linux_sysvec = {
.sv_fixlimit = NULL,
.sv_maxssiz = NULL,
.sv_flags = SV_ABI_LINUX | SV_LP64 | SV_SHP | SV_SIG_DISCIGN |
- SV_SIG_WAITNDQ,
+ SV_SIG_WAITNDQ | SV_TIMEKEEP,
.sv_set_syscall_retval = linux_set_syscall_retval,
.sv_fetch_syscall_args = linux_fetch_syscall_args,
.sv_syscallnames = NULL,
- .sv_shared_page_base = SHAREDPAGE_LA48,
+ .sv_shared_page_base = LINUX_SHAREDPAGE_LA48,
.sv_shared_page_len = PAGE_SIZE,
.sv_schedtail = linux_schedtail,
.sv_thread_detach = linux_thread_detach,
@@ -771,47 +789,130 @@ struct sysentvec elf_linux_sysvec = {
static int
linux_on_exec_vmspace(struct proc *p, struct image_params *imgp)
{
+ int error;
- linux_on_exec(p, imgp);
- return (0);
+ error = linux_map_vdso(p, linux_vdso_obj, linux_vdso_base,
+ LINUX_VDSOPAGE_SIZE, imgp);
+ if (error == 0)
+ linux_on_exec(p, imgp);
+ return (error);
}
static void
-linux_vdso_install(void *param)
+linux_exec_sysvec_init(void *param)
{
+ l_uintptr_t *ktimekeep_base, *ktsc_selector;
+ struct sysentvec *sv;
+ ptrdiff_t tkoff;
+
+ sv = param;
+ amd64_lower_shared_page(sv);
+ /* Fill timekeep_base */
+ exec_sysvec_init(sv);
+
+ tkoff = kern_timekeep_base - linux_vdso_base;
+ ktimekeep_base = (l_uintptr_t *)(linux_vdso_mapping + tkoff);
+ *ktimekeep_base = sv->sv_timekeep_base;
+
+ tkoff = kern_tsc_selector - linux_vdso_base;
+ ktsc_selector = (l_uintptr_t *)(linux_vdso_mapping + tkoff);
+ *ktsc_selector = linux_vdso_tsc_selector_idx();
+ if (bootverbose)
+ printf("Linux x86-64 vDSO tsc_selector: %lu\n", *ktsc_selector);
+}
+SYSINIT(elf_linux_exec_sysvec_init, SI_SUB_EXEC, SI_ORDER_ANY,
+ linux_exec_sysvec_init, &elf_linux_sysvec);
- amd64_lower_shared_page(&elf_linux_sysvec);
-
- linux_szsigcode = (&_binary_linux_locore_o_end -
- &_binary_linux_locore_o_start);
+static void
+linux_vdso_install(void *param)
+{
+ char *vdso_start = &_binary_linux_vdso_so_o_start;
+ char *vdso_end = &_binary_linux_vdso_so_o_end;
- if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len)
- panic("Linux invalid vdso size\n");
+ linux_szsigcode = vdso_end - vdso_start;
+ MPASS(linux_szsigcode <= LINUX_VDSOPAGE_SIZE);
- __elfN(linux_vdso_fixup)(&elf_linux_sysvec);
+ linux_vdso_base = LINUX_VDSOPAGE_LA48;
+ if (hw_lower_amd64_sharedpage != 0)
+ linux_vdso_base -= PAGE_SIZE;
- linux_shared_page_obj = __elfN(linux_shared_page_init)
- (&linux_shared_page_mapping);
+ __elfN(linux_vdso_fixup)(vdso_start, linux_vdso_base);
- __elfN(linux_vdso_reloc)(&elf_linux_sysvec);
+ linux_vdso_obj = __elfN(linux_shared_page_init)
+ (&linux_vdso_mapping, LINUX_VDSOPAGE_SIZE);
+ bcopy(vdso_start, linux_vdso_mapping, linux_szsigcode);
- bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping,
- linux_szsigcode);
- elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj;
+ linux_vdso_reloc(linux_vdso_mapping, linux_vdso_base);
}
-SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY,
+SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_FIRST,
linux_vdso_install, NULL);
static void
linux_vdso_deinstall(void *param)
{
- __elfN(linux_shared_page_fini)(linux_shared_page_obj,
- linux_shared_page_mapping);
+ __elfN(linux_shared_page_fini)(linux_vdso_obj,
+ linux_vdso_mapping, LINUX_VDSOPAGE_SIZE);
}
SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST,
linux_vdso_deinstall, NULL);
+static void
+linux_vdso_reloc(char *mapping, Elf_Addr offset)
+{
+ const Elf_Ehdr *ehdr;
+ const Elf_Shdr *shdr;
+ Elf64_Addr *where, val;
+ Elf_Size rtype, symidx;
+ const Elf_Rela *rela;
+ Elf_Addr addr, addend;
+ int relacnt;
+ int i, j;
+
+ MPASS(offset != 0);
+
+ relacnt = 0;
+ ehdr = (const Elf_Ehdr *)mapping;
+ shdr = (const Elf_Shdr *)(mapping + ehdr->e_shoff);
+ for (i = 0; i < ehdr->e_shnum; i++)
+ {
+ switch (shdr[i].sh_type) {
+ case SHT_REL:
+ printf("Linux x86_64 vDSO: unexpected Rel section\n");
+ break;
+ case SHT_RELA:
+ rela = (const Elf_Rela *)(mapping + shdr[i].sh_offset);
+ relacnt = shdr[i].sh_size / sizeof(*rela);
+ }
+ }
+
+ for (j = 0; j < relacnt; j++, rela++) {
+ where = (Elf_Addr *)(mapping + rela->r_offset);
+ addend = rela->r_addend;
+ rtype = ELF_R_TYPE(rela->r_info);
+ symidx = ELF_R_SYM(rela->r_info);
+
+ switch (rtype) {
+ case R_X86_64_NONE: /* none */
+ break;
+
+ case R_X86_64_RELATIVE: /* B + A */
+ addr = (Elf_Addr)(offset + addend);
+ val = addr;
+ if (*where != val)
+ *where = val;
+ break;
+ case R_X86_64_IRELATIVE:
+ printf("Linux x86_64 vDSO: unexpected ifunc relocation, "
+ "symbol index %ld\n", symidx);
+ break;
+ default:
+ printf("Linux x86_64 vDSO: unexpected relocation type %ld, "
+ "symbol index %ld\n", rtype, symidx);
+ }
+ }
+}
+
static char GNULINUX_ABI_VENDOR[] = "GNU";
static int GNULINUX_ABI_DESC = 0;