diff options
Diffstat (limited to 'lib/libsys/x86/__vdso_gettc.c')
-rw-r--r-- | lib/libsys/x86/__vdso_gettc.c | 453 |
1 files changed, 453 insertions, 0 deletions
diff --git a/lib/libsys/x86/__vdso_gettc.c b/lib/libsys/x86/__vdso_gettc.c new file mode 100644 index 000000000000..ea05f5abf62a --- /dev/null +++ b/lib/libsys/x86/__vdso_gettc.c @@ -0,0 +1,453 @@ +/*- + * Copyright (c) 2012 Konstantin Belousov <kib@FreeBSD.org> + * Copyright (c) 2016, 2017, 2019 The FreeBSD Foundation + * All rights reserved. + * + * Portions of this software were developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include "namespace.h" +#include <sys/capsicum.h> +#include <sys/elf.h> +#include <sys/fcntl.h> +#include <sys/mman.h> +#include <sys/time.h> +#include <sys/vdso.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include "un-namespace.h" +#include <machine/atomic.h> +#include <machine/cpufunc.h> +#include <machine/pvclock.h> +#include <machine/specialreg.h> +#include <dev/acpica/acpi_hpet.h> +#ifdef WANT_HYPERV +#include <dev/hyperv/hyperv.h> +#endif +#include <x86/ifunc.h> +#include "libc_private.h" + +static inline u_int +rdtsc_low(const struct vdso_timehands *th) +{ + u_int rv; + + __asm __volatile("rdtsc; shrd %%cl, %%edx, %0" + : "=a" (rv) : "c" (th->th_x86_shift) : "edx"); + return (rv); +} + +static inline u_int +rdtscp_low(const struct vdso_timehands *th) +{ + u_int rv; + + __asm __volatile("rdtscp; movl %%edi,%%ecx; shrd %%cl, %%edx, %0" + : "=a" (rv) : "D" (th->th_x86_shift) : "ecx", "edx"); + return (rv); +} + +static u_int +rdtsc_low_mb_lfence(const struct vdso_timehands *th) +{ + lfence(); + return (rdtsc_low(th)); +} + +static u_int +rdtsc_low_mb_mfence(const struct vdso_timehands *th) +{ + mfence(); + return (rdtsc_low(th)); +} + +static u_int +rdtsc_low_mb_none(const struct vdso_timehands *th) +{ + return (rdtsc_low(th)); +} + +static u_int +rdtsc32_mb_lfence(void) +{ + lfence(); + return (rdtsc32()); +} + +static uint64_t +rdtsc_mb_lfence(void) +{ + lfence(); + return (rdtsc()); +} + +static u_int +rdtsc32_mb_mfence(void) +{ + mfence(); + return (rdtsc32()); +} + +static uint64_t +rdtsc_mb_mfence(void) +{ + mfence(); + return (rdtsc()); +} + +static u_int +rdtsc32_mb_none(void) +{ + return (rdtsc32()); +} + +static uint64_t +rdtsc_mb_none(void) +{ + return (rdtsc()); +} + +static u_int +rdtscp32_(void) +{ + return (rdtscp32()); +} + +static uint64_t +rdtscp_(void) +{ + return (rdtscp()); +} + +struct tsc_selector_tag { + u_int (*ts_rdtsc32)(void); + uint64_t (*ts_rdtsc)(void); + u_int (*ts_rdtsc_low)(const struct vdso_timehands *); +}; + +static const struct tsc_selector_tag tsc_selector[] = { + [0] = { /* Intel, LFENCE */ + .ts_rdtsc32 = rdtsc32_mb_lfence, + .ts_rdtsc = rdtsc_mb_lfence, + .ts_rdtsc_low = rdtsc_low_mb_lfence, + }, + [1] = { /* AMD, MFENCE */ + .ts_rdtsc32 = rdtsc32_mb_mfence, + .ts_rdtsc = rdtsc_mb_mfence, + .ts_rdtsc_low = rdtsc_low_mb_mfence, + }, + [2] = { /* No SSE2 */ + .ts_rdtsc32 = rdtsc32_mb_none, + .ts_rdtsc = rdtsc_mb_none, + .ts_rdtsc_low = rdtsc_low_mb_none, + }, + [3] = { /* RDTSCP */ + .ts_rdtsc32 = rdtscp32_, + .ts_rdtsc = rdtscp_, + .ts_rdtsc_low = rdtscp_low, + }, +}; + +static int +tsc_selector_idx(u_int cpu_feature) +{ + u_int amd_feature, cpu_exthigh, p[4], v[3]; + static const char amd_id[] = "AuthenticAMD"; + static const char hygon_id[] = "HygonGenuine"; + bool amd_cpu; + + if (cpu_feature == 0) + return (2); /* should not happen due to RDTSC */ + + do_cpuid(0, p); + v[0] = p[1]; + v[1] = p[3]; + v[2] = p[2]; + amd_cpu = memcmp(v, amd_id, sizeof(amd_id) - 1) == 0 || + memcmp(v, hygon_id, sizeof(hygon_id) - 1) == 0; + + if (cpu_feature != 0) { + do_cpuid(0x80000000, p); + cpu_exthigh = p[0]; + } else { + cpu_exthigh = 0; + } + if (cpu_exthigh >= 0x80000001) { + do_cpuid(0x80000001, p); + amd_feature = p[3]; + } else { + amd_feature = 0; + } + + if ((amd_feature & AMDID_RDTSCP) != 0) + return (3); + if ((cpu_feature & CPUID_SSE2) == 0) + return (2); + return (amd_cpu ? 1 : 0); +} + +DEFINE_UIFUNC(static, u_int, __vdso_gettc_rdtsc_low, + (const struct vdso_timehands *th)) +{ + return (tsc_selector[tsc_selector_idx(cpu_feature)].ts_rdtsc_low); +} + +DEFINE_UIFUNC(static, u_int, __vdso_gettc_rdtsc32, (void)) +{ + return (tsc_selector[tsc_selector_idx(cpu_feature)].ts_rdtsc32); +} + +DEFINE_UIFUNC(static, uint64_t, __vdso_gettc_rdtsc, (void)) +{ + return (tsc_selector[tsc_selector_idx(cpu_feature)].ts_rdtsc); +} + +#define HPET_DEV_MAP_MAX 10 +static volatile char *hpet_dev_map[HPET_DEV_MAP_MAX]; + +static void +__vdso_init_hpet(uint32_t u) +{ + static const char devprefix[] = "/dev/hpet"; + char devname[64], *c, *c1, t; + volatile char *new_map, *old_map; + unsigned int mode; + uint32_t u1; + int fd; + + c1 = c = stpcpy(devname, devprefix); + u1 = u; + do { + *c++ = u1 % 10 + '0'; + u1 /= 10; + } while (u1 != 0); + *c = '\0'; + for (c--; c1 != c; c1++, c--) { + t = *c1; + *c1 = *c; + *c = t; + } + + old_map = hpet_dev_map[u]; + if (old_map != NULL) + return; + + /* + * Explicitely check for the capability mode to avoid + * triggering trap_enocap on the device open by absolute path. + */ + if ((cap_getmode(&mode) == 0 && mode != 0) || + (fd = _open(devname, O_RDONLY | O_CLOEXEC)) == -1) { + /* Prevent the caller from re-entering. */ + atomic_cmpset_rel_ptr((volatile uintptr_t *)&hpet_dev_map[u], + (uintptr_t)old_map, (uintptr_t)MAP_FAILED); + return; + } + + new_map = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, fd, 0); + _close(fd); + if (atomic_cmpset_rel_ptr((volatile uintptr_t *)&hpet_dev_map[u], + (uintptr_t)old_map, (uintptr_t)new_map) == 0 && + new_map != MAP_FAILED) + munmap((void *)new_map, PAGE_SIZE); +} + +#ifdef WANT_HYPERV + +#define HYPERV_REFTSC_DEVPATH "/dev/" HYPERV_REFTSC_DEVNAME + +/* + * NOTE: + * We use 'NULL' for this variable to indicate that initialization + * is required. And if this variable is 'MAP_FAILED', then Hyper-V + * reference TSC can not be used, e.g. in misconfigured jail. + */ +static struct hyperv_reftsc *hyperv_ref_tsc; + +static void +__vdso_init_hyperv_tsc(void) +{ + int fd; + unsigned int mode; + + if (cap_getmode(&mode) == 0 && mode != 0) + goto fail; + + fd = _open(HYPERV_REFTSC_DEVPATH, O_RDONLY | O_CLOEXEC); + if (fd < 0) + goto fail; + hyperv_ref_tsc = mmap(NULL, sizeof(*hyperv_ref_tsc), PROT_READ, + MAP_SHARED, fd, 0); + _close(fd); + + return; +fail: + /* Prevent the caller from re-entering. */ + hyperv_ref_tsc = MAP_FAILED; +} + +static int +__vdso_hyperv_tsc(struct hyperv_reftsc *tsc_ref, u_int *tc) +{ + uint64_t disc, ret, tsc, scale; + uint32_t seq; + int64_t ofs; + + while ((seq = atomic_load_acq_int(&tsc_ref->tsc_seq)) != 0) { + scale = tsc_ref->tsc_scale; + ofs = tsc_ref->tsc_ofs; + + mfence(); /* XXXKIB */ + tsc = rdtsc(); + + /* ret = ((tsc * scale) >> 64) + ofs */ + __asm__ __volatile__ ("mulq %3" : + "=d" (ret), "=a" (disc) : + "a" (tsc), "r" (scale)); + ret += ofs; + + atomic_thread_fence_acq(); + if (tsc_ref->tsc_seq == seq) { + *tc = ret; + return (0); + } + + /* Sequence changed; re-sync. */ + } + return (ENOSYS); +} + +#endif /* WANT_HYPERV */ + +static struct pvclock_vcpu_time_info *pvclock_timeinfos; + +static int +__vdso_pvclock_gettc(const struct vdso_timehands *th, u_int *tc) +{ + uint64_t delta, ns, tsc; + struct pvclock_vcpu_time_info *ti; + uint32_t cpuid_ti, cpuid_tsc, version; + bool stable; + + do { + ti = &pvclock_timeinfos[0]; + version = atomic_load_acq_32(&ti->version); + stable = (ti->flags & th->th_x86_pvc_stable_mask) != 0; + if (stable) { + tsc = __vdso_gettc_rdtsc(); + } else { + (void)rdtscp_aux(&cpuid_ti); + ti = &pvclock_timeinfos[cpuid_ti]; + version = atomic_load_acq_32(&ti->version); + tsc = rdtscp_aux(&cpuid_tsc); + } + delta = tsc - ti->tsc_timestamp; + ns = ti->system_time + pvclock_scale_delta(delta, + ti->tsc_to_system_mul, ti->tsc_shift); + atomic_thread_fence_acq(); + } while ((ti->version & 1) != 0 || ti->version != version || + (!stable && cpuid_ti != cpuid_tsc)); + *tc = MAX(ns, th->th_x86_pvc_last_systime); + return (0); +} + +static void +__vdso_init_pvclock_timeinfos(void) +{ + struct pvclock_vcpu_time_info *timeinfos; + size_t len; + int fd, ncpus; + unsigned int mode; + + timeinfos = MAP_FAILED; + if (_elf_aux_info(AT_NCPUS, &ncpus, sizeof(ncpus)) != 0 || + (cap_getmode(&mode) == 0 && mode != 0) || + (fd = _open("/dev/" PVCLOCK_CDEVNAME, O_RDONLY | O_CLOEXEC)) < 0) + goto leave; + len = ncpus * sizeof(*pvclock_timeinfos); + timeinfos = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0); + _close(fd); +leave: + if (atomic_cmpset_rel_ptr( + (volatile uintptr_t *)&pvclock_timeinfos, (uintptr_t)NULL, + (uintptr_t)timeinfos) == 0 && timeinfos != MAP_FAILED) + (void)munmap((void *)timeinfos, len); +} + +#pragma weak __vdso_gettc +int +__vdso_gettc(const struct vdso_timehands *th, u_int *tc) +{ + volatile char *map; + uint32_t idx; + + switch (th->th_algo) { + case VDSO_TH_ALGO_X86_TSC: + *tc = th->th_x86_shift > 0 ? __vdso_gettc_rdtsc_low(th) : + __vdso_gettc_rdtsc32(); + return (0); + case VDSO_TH_ALGO_X86_HPET: + idx = th->th_x86_hpet_idx; + if (idx >= HPET_DEV_MAP_MAX) + return (ENOSYS); + map = (volatile char *)atomic_load_acq_ptr( + (volatile uintptr_t *)&hpet_dev_map[idx]); + if (map == NULL) { + __vdso_init_hpet(idx); + map = (volatile char *)atomic_load_acq_ptr( + (volatile uintptr_t *)&hpet_dev_map[idx]); + } + if (map == MAP_FAILED) + return (ENOSYS); + *tc = *(volatile uint32_t *)(map + HPET_MAIN_COUNTER); + return (0); +#ifdef WANT_HYPERV + case VDSO_TH_ALGO_X86_HVTSC: + if (hyperv_ref_tsc == NULL) + __vdso_init_hyperv_tsc(); + if (hyperv_ref_tsc == MAP_FAILED) + return (ENOSYS); + return (__vdso_hyperv_tsc(hyperv_ref_tsc, tc)); +#endif + case VDSO_TH_ALGO_X86_PVCLK: + if (pvclock_timeinfos == NULL) + __vdso_init_pvclock_timeinfos(); + if (pvclock_timeinfos == MAP_FAILED) + return (ENOSYS); + return (__vdso_pvclock_gettc(th, tc)); + default: + return (ENOSYS); + } +} + +#pragma weak __vdso_gettimekeep +int +__vdso_gettimekeep(struct vdso_timekeep **tk) +{ + + return (_elf_aux_info(AT_TIMEKEEP, tk, sizeof(*tk))); +} |