diff options
Diffstat (limited to 'lib/libsys/x86')
-rw-r--r-- | lib/libsys/x86/Makefile.sys | 24 | ||||
-rw-r--r-- | lib/libsys/x86/__vdso_gettc.c | 453 | ||||
-rw-r--r-- | lib/libsys/x86/pkru.3 | 203 | ||||
-rw-r--r-- | lib/libsys/x86/pkru.c | 134 | ||||
-rw-r--r-- | lib/libsys/x86/sched_getcpu_x86.c | 81 |
5 files changed, 895 insertions, 0 deletions
diff --git a/lib/libsys/x86/Makefile.sys b/lib/libsys/x86/Makefile.sys new file mode 100644 index 000000000000..483d1784bdc9 --- /dev/null +++ b/lib/libsys/x86/Makefile.sys @@ -0,0 +1,24 @@ +.PATH: ${LIBSYS_SRCTOP}/x86 + +SRCS+= \ + __vdso_gettc.c \ + pkru.c \ + sched_getcpu_x86.c + +.if ${LIB} == "sys" +MAN+= \ + pkru.3 +.endif # ${LIB} == "sys" + +# Note: vdso support for hyperv only on amd64 +.if ${MACHINE_CPUARCH} == "amd64" && ${MK_HYPERV} != "no" +CFLAGS+= -DWANT_HYPERV +.endif +# We can't use sanitizer instrumentation on ifuncs called during sanitizer +# runtime startup. +.if ${MK_ASAN} != "no" +CFLAGS.__vdso_gettc.c+=-fno-sanitize=address +.endif +.if ${MK_UBSAN} != "no" +CFLAGS.__vdso_gettc.c+=-fno-sanitize=undefined +.endif diff --git a/lib/libsys/x86/__vdso_gettc.c b/lib/libsys/x86/__vdso_gettc.c new file mode 100644 index 000000000000..ea05f5abf62a --- /dev/null +++ b/lib/libsys/x86/__vdso_gettc.c @@ -0,0 +1,453 @@ +/*- + * Copyright (c) 2012 Konstantin Belousov <kib@FreeBSD.org> + * Copyright (c) 2016, 2017, 2019 The FreeBSD Foundation + * All rights reserved. + * + * Portions of this software were developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include "namespace.h" +#include <sys/capsicum.h> +#include <sys/elf.h> +#include <sys/fcntl.h> +#include <sys/mman.h> +#include <sys/time.h> +#include <sys/vdso.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include "un-namespace.h" +#include <machine/atomic.h> +#include <machine/cpufunc.h> +#include <machine/pvclock.h> +#include <machine/specialreg.h> +#include <dev/acpica/acpi_hpet.h> +#ifdef WANT_HYPERV +#include <dev/hyperv/hyperv.h> +#endif +#include <x86/ifunc.h> +#include "libc_private.h" + +static inline u_int +rdtsc_low(const struct vdso_timehands *th) +{ + u_int rv; + + __asm __volatile("rdtsc; shrd %%cl, %%edx, %0" + : "=a" (rv) : "c" (th->th_x86_shift) : "edx"); + return (rv); +} + +static inline u_int +rdtscp_low(const struct vdso_timehands *th) +{ + u_int rv; + + __asm __volatile("rdtscp; movl %%edi,%%ecx; shrd %%cl, %%edx, %0" + : "=a" (rv) : "D" (th->th_x86_shift) : "ecx", "edx"); + return (rv); +} + +static u_int +rdtsc_low_mb_lfence(const struct vdso_timehands *th) +{ + lfence(); + return (rdtsc_low(th)); +} + +static u_int +rdtsc_low_mb_mfence(const struct vdso_timehands *th) +{ + mfence(); + return (rdtsc_low(th)); +} + +static u_int +rdtsc_low_mb_none(const struct vdso_timehands *th) +{ + return (rdtsc_low(th)); +} + +static u_int +rdtsc32_mb_lfence(void) +{ + lfence(); + return (rdtsc32()); +} + +static uint64_t +rdtsc_mb_lfence(void) +{ + lfence(); + return (rdtsc()); +} + +static u_int +rdtsc32_mb_mfence(void) +{ + mfence(); + return (rdtsc32()); +} + +static uint64_t +rdtsc_mb_mfence(void) +{ + mfence(); + return (rdtsc()); +} + +static u_int +rdtsc32_mb_none(void) +{ + return (rdtsc32()); +} + +static uint64_t +rdtsc_mb_none(void) +{ + return (rdtsc()); +} + +static u_int +rdtscp32_(void) +{ + return (rdtscp32()); +} + +static uint64_t +rdtscp_(void) +{ + return (rdtscp()); +} + +struct tsc_selector_tag { + u_int (*ts_rdtsc32)(void); + uint64_t (*ts_rdtsc)(void); + u_int (*ts_rdtsc_low)(const struct vdso_timehands *); +}; + +static const struct tsc_selector_tag tsc_selector[] = { + [0] = { /* Intel, LFENCE */ + .ts_rdtsc32 = rdtsc32_mb_lfence, + .ts_rdtsc = rdtsc_mb_lfence, + .ts_rdtsc_low = rdtsc_low_mb_lfence, + }, + [1] = { /* AMD, MFENCE */ + .ts_rdtsc32 = rdtsc32_mb_mfence, + .ts_rdtsc = rdtsc_mb_mfence, + .ts_rdtsc_low = rdtsc_low_mb_mfence, + }, + [2] = { /* No SSE2 */ + .ts_rdtsc32 = rdtsc32_mb_none, + .ts_rdtsc = rdtsc_mb_none, + .ts_rdtsc_low = rdtsc_low_mb_none, + }, + [3] = { /* RDTSCP */ + .ts_rdtsc32 = rdtscp32_, + .ts_rdtsc = rdtscp_, + .ts_rdtsc_low = rdtscp_low, + }, +}; + +static int +tsc_selector_idx(u_int cpu_feature) +{ + u_int amd_feature, cpu_exthigh, p[4], v[3]; + static const char amd_id[] = "AuthenticAMD"; + static const char hygon_id[] = "HygonGenuine"; + bool amd_cpu; + + if (cpu_feature == 0) + return (2); /* should not happen due to RDTSC */ + + do_cpuid(0, p); + v[0] = p[1]; + v[1] = p[3]; + v[2] = p[2]; + amd_cpu = memcmp(v, amd_id, sizeof(amd_id) - 1) == 0 || + memcmp(v, hygon_id, sizeof(hygon_id) - 1) == 0; + + if (cpu_feature != 0) { + do_cpuid(0x80000000, p); + cpu_exthigh = p[0]; + } else { + cpu_exthigh = 0; + } + if (cpu_exthigh >= 0x80000001) { + do_cpuid(0x80000001, p); + amd_feature = p[3]; + } else { + amd_feature = 0; + } + + if ((amd_feature & AMDID_RDTSCP) != 0) + return (3); + if ((cpu_feature & CPUID_SSE2) == 0) + return (2); + return (amd_cpu ? 1 : 0); +} + +DEFINE_UIFUNC(static, u_int, __vdso_gettc_rdtsc_low, + (const struct vdso_timehands *th)) +{ + return (tsc_selector[tsc_selector_idx(cpu_feature)].ts_rdtsc_low); +} + +DEFINE_UIFUNC(static, u_int, __vdso_gettc_rdtsc32, (void)) +{ + return (tsc_selector[tsc_selector_idx(cpu_feature)].ts_rdtsc32); +} + +DEFINE_UIFUNC(static, uint64_t, __vdso_gettc_rdtsc, (void)) +{ + return (tsc_selector[tsc_selector_idx(cpu_feature)].ts_rdtsc); +} + +#define HPET_DEV_MAP_MAX 10 +static volatile char *hpet_dev_map[HPET_DEV_MAP_MAX]; + +static void +__vdso_init_hpet(uint32_t u) +{ + static const char devprefix[] = "/dev/hpet"; + char devname[64], *c, *c1, t; + volatile char *new_map, *old_map; + unsigned int mode; + uint32_t u1; + int fd; + + c1 = c = stpcpy(devname, devprefix); + u1 = u; + do { + *c++ = u1 % 10 + '0'; + u1 /= 10; + } while (u1 != 0); + *c = '\0'; + for (c--; c1 != c; c1++, c--) { + t = *c1; + *c1 = *c; + *c = t; + } + + old_map = hpet_dev_map[u]; + if (old_map != NULL) + return; + + /* + * Explicitely check for the capability mode to avoid + * triggering trap_enocap on the device open by absolute path. + */ + if ((cap_getmode(&mode) == 0 && mode != 0) || + (fd = _open(devname, O_RDONLY | O_CLOEXEC)) == -1) { + /* Prevent the caller from re-entering. */ + atomic_cmpset_rel_ptr((volatile uintptr_t *)&hpet_dev_map[u], + (uintptr_t)old_map, (uintptr_t)MAP_FAILED); + return; + } + + new_map = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, fd, 0); + _close(fd); + if (atomic_cmpset_rel_ptr((volatile uintptr_t *)&hpet_dev_map[u], + (uintptr_t)old_map, (uintptr_t)new_map) == 0 && + new_map != MAP_FAILED) + munmap((void *)new_map, PAGE_SIZE); +} + +#ifdef WANT_HYPERV + +#define HYPERV_REFTSC_DEVPATH "/dev/" HYPERV_REFTSC_DEVNAME + +/* + * NOTE: + * We use 'NULL' for this variable to indicate that initialization + * is required. And if this variable is 'MAP_FAILED', then Hyper-V + * reference TSC can not be used, e.g. in misconfigured jail. + */ +static struct hyperv_reftsc *hyperv_ref_tsc; + +static void +__vdso_init_hyperv_tsc(void) +{ + int fd; + unsigned int mode; + + if (cap_getmode(&mode) == 0 && mode != 0) + goto fail; + + fd = _open(HYPERV_REFTSC_DEVPATH, O_RDONLY | O_CLOEXEC); + if (fd < 0) + goto fail; + hyperv_ref_tsc = mmap(NULL, sizeof(*hyperv_ref_tsc), PROT_READ, + MAP_SHARED, fd, 0); + _close(fd); + + return; +fail: + /* Prevent the caller from re-entering. */ + hyperv_ref_tsc = MAP_FAILED; +} + +static int +__vdso_hyperv_tsc(struct hyperv_reftsc *tsc_ref, u_int *tc) +{ + uint64_t disc, ret, tsc, scale; + uint32_t seq; + int64_t ofs; + + while ((seq = atomic_load_acq_int(&tsc_ref->tsc_seq)) != 0) { + scale = tsc_ref->tsc_scale; + ofs = tsc_ref->tsc_ofs; + + mfence(); /* XXXKIB */ + tsc = rdtsc(); + + /* ret = ((tsc * scale) >> 64) + ofs */ + __asm__ __volatile__ ("mulq %3" : + "=d" (ret), "=a" (disc) : + "a" (tsc), "r" (scale)); + ret += ofs; + + atomic_thread_fence_acq(); + if (tsc_ref->tsc_seq == seq) { + *tc = ret; + return (0); + } + + /* Sequence changed; re-sync. */ + } + return (ENOSYS); +} + +#endif /* WANT_HYPERV */ + +static struct pvclock_vcpu_time_info *pvclock_timeinfos; + +static int +__vdso_pvclock_gettc(const struct vdso_timehands *th, u_int *tc) +{ + uint64_t delta, ns, tsc; + struct pvclock_vcpu_time_info *ti; + uint32_t cpuid_ti, cpuid_tsc, version; + bool stable; + + do { + ti = &pvclock_timeinfos[0]; + version = atomic_load_acq_32(&ti->version); + stable = (ti->flags & th->th_x86_pvc_stable_mask) != 0; + if (stable) { + tsc = __vdso_gettc_rdtsc(); + } else { + (void)rdtscp_aux(&cpuid_ti); + ti = &pvclock_timeinfos[cpuid_ti]; + version = atomic_load_acq_32(&ti->version); + tsc = rdtscp_aux(&cpuid_tsc); + } + delta = tsc - ti->tsc_timestamp; + ns = ti->system_time + pvclock_scale_delta(delta, + ti->tsc_to_system_mul, ti->tsc_shift); + atomic_thread_fence_acq(); + } while ((ti->version & 1) != 0 || ti->version != version || + (!stable && cpuid_ti != cpuid_tsc)); + *tc = MAX(ns, th->th_x86_pvc_last_systime); + return (0); +} + +static void +__vdso_init_pvclock_timeinfos(void) +{ + struct pvclock_vcpu_time_info *timeinfos; + size_t len; + int fd, ncpus; + unsigned int mode; + + timeinfos = MAP_FAILED; + if (_elf_aux_info(AT_NCPUS, &ncpus, sizeof(ncpus)) != 0 || + (cap_getmode(&mode) == 0 && mode != 0) || + (fd = _open("/dev/" PVCLOCK_CDEVNAME, O_RDONLY | O_CLOEXEC)) < 0) + goto leave; + len = ncpus * sizeof(*pvclock_timeinfos); + timeinfos = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0); + _close(fd); +leave: + if (atomic_cmpset_rel_ptr( + (volatile uintptr_t *)&pvclock_timeinfos, (uintptr_t)NULL, + (uintptr_t)timeinfos) == 0 && timeinfos != MAP_FAILED) + (void)munmap((void *)timeinfos, len); +} + +#pragma weak __vdso_gettc +int +__vdso_gettc(const struct vdso_timehands *th, u_int *tc) +{ + volatile char *map; + uint32_t idx; + + switch (th->th_algo) { + case VDSO_TH_ALGO_X86_TSC: + *tc = th->th_x86_shift > 0 ? __vdso_gettc_rdtsc_low(th) : + __vdso_gettc_rdtsc32(); + return (0); + case VDSO_TH_ALGO_X86_HPET: + idx = th->th_x86_hpet_idx; + if (idx >= HPET_DEV_MAP_MAX) + return (ENOSYS); + map = (volatile char *)atomic_load_acq_ptr( + (volatile uintptr_t *)&hpet_dev_map[idx]); + if (map == NULL) { + __vdso_init_hpet(idx); + map = (volatile char *)atomic_load_acq_ptr( + (volatile uintptr_t *)&hpet_dev_map[idx]); + } + if (map == MAP_FAILED) + return (ENOSYS); + *tc = *(volatile uint32_t *)(map + HPET_MAIN_COUNTER); + return (0); +#ifdef WANT_HYPERV + case VDSO_TH_ALGO_X86_HVTSC: + if (hyperv_ref_tsc == NULL) + __vdso_init_hyperv_tsc(); + if (hyperv_ref_tsc == MAP_FAILED) + return (ENOSYS); + return (__vdso_hyperv_tsc(hyperv_ref_tsc, tc)); +#endif + case VDSO_TH_ALGO_X86_PVCLK: + if (pvclock_timeinfos == NULL) + __vdso_init_pvclock_timeinfos(); + if (pvclock_timeinfos == MAP_FAILED) + return (ENOSYS); + return (__vdso_pvclock_gettc(th, tc)); + default: + return (ENOSYS); + } +} + +#pragma weak __vdso_gettimekeep +int +__vdso_gettimekeep(struct vdso_timekeep **tk) +{ + + return (_elf_aux_info(AT_TIMEKEEP, tk, sizeof(*tk))); +} diff --git a/lib/libsys/x86/pkru.3 b/lib/libsys/x86/pkru.3 new file mode 100644 index 000000000000..95bc66c979ac --- /dev/null +++ b/lib/libsys/x86/pkru.3 @@ -0,0 +1,203 @@ +.\" Copyright (c) 2019 The FreeBSD Foundation +.\" +.\" This documentation was written by +.\" Konstantin Belousov <kib@FreeBSD.org> under sponsorship +.\" from the FreeBSD Foundation. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd February 16, 2019 +.Dt PKRU 3 +.Os +.Sh NAME +.Nm Protection Key Rights for User pages +.Nd provide fast user-managed key-based access control for pages +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In machine/sysarch.h +.Ft int +.Fn x86_pkru_get_perm "unsigned int keyidx" "int *access" "int *modify" +.Ft int +.Fn x86_pkru_set_perm "unsigned int keyidx" "int access" "int modify" +.Ft int +.Fo x86_pkru_protect_range +.Fa "void *addr" +.Fa "unsigned long len" +.Fa "unsigned int keyidx" +.Fa "int flag" +.Fc +.Ft int +.Fn x86_pkru_unprotect_range "void *addr" "unsigned long len" +.Sh DESCRIPTION +The protection keys feature provides an additional mechanism, besides the +normal page permissions as established by +.Xr mmap 2 +and +.Xr mprotect 2 , +to control access to user-mode addresses. +The mechanism gives safety measures which can be used to avoid +incidental read or modification of sensitive memory, +or as a debugging feature. +It cannot guard against conscious accesses since permissions +are user-controllable. +.Pp +If supported by hardware, each mapped user linear address +has an associated 4-bit protection key. +A new per-thread PKRU hardware register determines, for each protection +key, whether user-mode addresses with that protection key may be +read or written. +.Pp +Only one key may apply to a given range at a time. +The default protection key index is zero, it is used even if no key +was explicitly assigned to the address, or if the key was removed. +.Pp +The protection prevents the system from accessing user addresses as well +as the user applications. +When a system call was unable to read or write user memory due to key +protection, it returns the +.Er EFAULT +error code. +Note that some side effects may have occurred if this error is reported. +.Pp +Protection keys require that the system uses 4-level paging +(also called long mode), +which means that it is only available on amd64 system. +Both 64-bit and 32-bit applications can use protection keys. +More information about the hardware feature is provided in the IA32 Software +Developer's Manual published by Intel Corp. +.Pp +The key indexes written into the page table entries are managed by the +.Fn sysarch +syscall. +Per-key permissions are managed using the user-mode instructions +.Em RDPKRU +and +.Em WRPKRU . +The system provides convenient library helpers for both the syscall and +the instructions, described below. +.Pp +The +.Fn x86_pkru_protect_range +function assigns key +.Fa keyidx +to the range starting at +.Fa addr +and having length +.Fa len . +Starting address is truncated to the page start, +and the end is rounded up to the end of the page. +After a successful call, the range has the specified key assigned, +even if the key is zero and it did not change the page table entries. +.Pp +The +.Fa flags +argument takes the logical OR of the following values: +.Bl -tag -width +.It Bq Va AMD64_PKRU_EXCL +Only assign the key if the range does not have any other keys assigned +(including the zero key). +You must first remove any existing key with +.Fn x86_pkru_unprotect_range +in order for this request to succeed. +If the +.Va AMD64_PKRU_EXCL +flag is not specified, +.Fn x86_pkru_protect_range +replaces any existing key. +.It Bq Va AMD64_PKRU_PERSIST +The keys assigned to the range are persistent. +They are re-established when the current mapping is destroyed +and a new mapping is created in any sub-range of the specified range. +You must use a +.Fn x86_pkru_unprotect_range +call to forget the key. +.El +.Pp +The +.Fn x86_pkru_unprotect_range +function removes any keys assigned to the specified range. +Existing mappings are changed to use key index zero in page table entries. +Keys are no longer considered installed for all mappings in the range, +for the purposes of +.Fn x86_pkru_protect_range +with the +.Va AMD64_PKRU_EXCL +flag. +.Pp +The +.Fn x86_pkru_get_perm +function returns access rights for the key specified by the +.Fa keyidx +argument. +If the value pointed to by +.Fa access +is zero after the call, no read or write permissions is granted for +mappings which are assigned the key +.Fa keyidx . +If +.Fa access +is not zero, read access is permitted. +The non-zero value of the variable pointed to by the +.Fa modify +argument indicates that write access is permitted. +.Pp +Conversely, the +.Fn x86_pkru_set_perm +establishes the access and modify permissions for the given key index +as specified by its arguments. +.Sh RETURN VALUES +.Rv -std +.Sh ERRORS +.Bl -tag -width Er +.It Bq Er EOPNOTSUPP +The hardware does not support protection keys. +.It Bq Er EINVAL +The supplied key index is invalid (greater than 15). +.It Bq Er EINVAL +The supplied +.Fa flags +argument for +.Fn x86_pkru_protect_range +has reserved bits set. +.It Bq Er EFAULT +The supplied address range does not completely fit into the user-managed +address range. +.It Bq Er ENOMEM +The memory shortage prevents the completion of the operation. +.It Bq Er EBUSY +The +.Va AMD64_PKRU_EXCL +flag was specified for +.Fn x86_pkru_protect_range +and the range already has defined protection keys. +.El +.Sh SEE ALSO +.Xr mmap 2 , +.Xr mprotect 2 , +.Xr munmap 2 , +.Xr sysarch 2 +.Sh STANDARDS +The +.Nm +functions are non-standard and first appeared in +.Fx 13.0 . diff --git a/lib/libsys/x86/pkru.c b/lib/libsys/x86/pkru.c new file mode 100644 index 000000000000..b6ae181c131f --- /dev/null +++ b/lib/libsys/x86/pkru.c @@ -0,0 +1,134 @@ +/*- + * Copyright (c) 2019 The FreeBSD Foundation + * + * Portions of this software were developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <machine/cpufunc.h> +#include <machine/specialreg.h> +#include <machine/sysarch.h> +#include <x86/ifunc.h> +#include <errno.h> +#include <string.h> + +#define MAX_PKRU_IDX 0xf +#ifdef __i386__ +#define X86_SET_PKRU I386_SET_PKRU +#define X86_CLEAR_PKRU I386_CLEAR_PKRU +#else +#define X86_SET_PKRU AMD64_SET_PKRU +#define X86_CLEAR_PKRU AMD64_CLEAR_PKRU +#endif + +static int +x86_pkru_get_perm_unsup(u_int keyidx, int *access, int *modify) +{ + + errno = EOPNOTSUPP; + return (-1); +} + +static int +x86_pkru_get_perm_hw(u_int keyidx, int *access, int *modify) +{ + uint32_t pkru; + + if (keyidx > MAX_PKRU_IDX) { + errno = EINVAL; + return (-1); + } + keyidx *= 2; + pkru = rdpkru(); + *access = (pkru & (1 << keyidx)) == 0; + *modify = (pkru & (2 << keyidx)) == 0; + return (0); +} + +DEFINE_UIFUNC(, int, x86_pkru_get_perm, (u_int, int *, int *)) +{ + + return ((cpu_stdext_feature2 & CPUID_STDEXT2_OSPKE) == 0 ? + x86_pkru_get_perm_unsup : x86_pkru_get_perm_hw); +} + +static int +x86_pkru_set_perm_unsup(u_int keyidx, int access, int modify) +{ + + errno = EOPNOTSUPP; + return (-1); +} + +static int +x86_pkru_set_perm_hw(u_int keyidx, int access, int modify) +{ + uint32_t pkru; + + if (keyidx > MAX_PKRU_IDX) { + errno = EINVAL; + return (-1); + } + keyidx *= 2; + pkru = rdpkru(); + pkru &= ~(3 << keyidx); + if (!access) + pkru |= 1 << keyidx; + if (!modify) + pkru |= 2 << keyidx; + wrpkru(pkru); + return (0); +} + +DEFINE_UIFUNC(, int, x86_pkru_set_perm, (u_int, int, int)) +{ + + return ((cpu_stdext_feature2 & CPUID_STDEXT2_OSPKE) == 0 ? + x86_pkru_set_perm_unsup : x86_pkru_set_perm_hw); +} + +int +x86_pkru_protect_range(void *addr, unsigned long len, u_int keyidx, int flags) +{ + struct amd64_set_pkru a64pkru; + + memset(&a64pkru, 0, sizeof(a64pkru)); + a64pkru.addr = addr; + a64pkru.len = len; + a64pkru.keyidx = keyidx; + a64pkru.flags = flags; + return (sysarch(X86_SET_PKRU, &a64pkru)); +} + +int +x86_pkru_unprotect_range(void *addr, unsigned long len) +{ + struct amd64_set_pkru a64pkru; + + memset(&a64pkru, 0, sizeof(a64pkru)); + a64pkru.addr = addr; + a64pkru.len = len; + return (sysarch(X86_CLEAR_PKRU, &a64pkru)); +} diff --git a/lib/libsys/x86/sched_getcpu_x86.c b/lib/libsys/x86/sched_getcpu_x86.c new file mode 100644 index 000000000000..13ba18ef5e36 --- /dev/null +++ b/lib/libsys/x86/sched_getcpu_x86.c @@ -0,0 +1,81 @@ +/*- + * Copyright (c) 2021 The FreeBSD Foundation + * + * This software were developed by Konstantin Belousov <kib@FreeBSD.org> + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <machine/cpufunc.h> +#include <machine/specialreg.h> +#include <machine/sysarch.h> +#include <x86/ifunc.h> +#include <errno.h> +#include <sched.h> +#include "libc_private.h" + +static int +sched_getcpu_sys(void) +{ + return (__sys_sched_getcpu()); +} + +static int +sched_getcpu_rdpid(void) +{ + register_t res; + + __asm("rdpid %0" : "=r" (res)); + return ((int)res); +} + +static int +sched_getcpu_rdtscp(void) +{ + int res; + + __asm("rdtscp" : "=c" (res) : : "eax", "edx"); + return (res); +} + +DEFINE_UIFUNC(, int, sched_getcpu, (void)) +{ + u_int amd_feature, cpu_exthigh, p[4]; + + if ((cpu_stdext_feature2 & CPUID_STDEXT2_RDPID) != 0) + return (sched_getcpu_rdpid); + + amd_feature = 0; + if (cpu_feature != 0) { + do_cpuid(0x80000000, p); + cpu_exthigh = p[0]; + if (cpu_exthigh >= 0x80000001) { + do_cpuid(0x80000001, p); + amd_feature = p[3]; + } + } + + return ((amd_feature & AMDID_RDTSCP) == 0 ? + sched_getcpu_sys : sched_getcpu_rdtscp); +} |