aboutsummaryrefslogtreecommitdiff
path: root/lib/libsys/x86
diff options
context:
space:
mode:
Diffstat (limited to 'lib/libsys/x86')
-rw-r--r--lib/libsys/x86/Makefile.sys24
-rw-r--r--lib/libsys/x86/__vdso_gettc.c453
-rw-r--r--lib/libsys/x86/pkru.3203
-rw-r--r--lib/libsys/x86/pkru.c134
-rw-r--r--lib/libsys/x86/sched_getcpu_x86.c81
5 files changed, 895 insertions, 0 deletions
diff --git a/lib/libsys/x86/Makefile.sys b/lib/libsys/x86/Makefile.sys
new file mode 100644
index 000000000000..483d1784bdc9
--- /dev/null
+++ b/lib/libsys/x86/Makefile.sys
@@ -0,0 +1,24 @@
+.PATH: ${LIBSYS_SRCTOP}/x86
+
+SRCS+= \
+ __vdso_gettc.c \
+ pkru.c \
+ sched_getcpu_x86.c
+
+.if ${LIB} == "sys"
+MAN+= \
+ pkru.3
+.endif # ${LIB} == "sys"
+
+# Note: vdso support for hyperv only on amd64
+.if ${MACHINE_CPUARCH} == "amd64" && ${MK_HYPERV} != "no"
+CFLAGS+= -DWANT_HYPERV
+.endif
+# We can't use sanitizer instrumentation on ifuncs called during sanitizer
+# runtime startup.
+.if ${MK_ASAN} != "no"
+CFLAGS.__vdso_gettc.c+=-fno-sanitize=address
+.endif
+.if ${MK_UBSAN} != "no"
+CFLAGS.__vdso_gettc.c+=-fno-sanitize=undefined
+.endif
diff --git a/lib/libsys/x86/__vdso_gettc.c b/lib/libsys/x86/__vdso_gettc.c
new file mode 100644
index 000000000000..ea05f5abf62a
--- /dev/null
+++ b/lib/libsys/x86/__vdso_gettc.c
@@ -0,0 +1,453 @@
+/*-
+ * Copyright (c) 2012 Konstantin Belousov <kib@FreeBSD.org>
+ * Copyright (c) 2016, 2017, 2019 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by Konstantin Belousov
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include "namespace.h"
+#include <sys/capsicum.h>
+#include <sys/elf.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+#include <sys/vdso.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+#include "un-namespace.h"
+#include <machine/atomic.h>
+#include <machine/cpufunc.h>
+#include <machine/pvclock.h>
+#include <machine/specialreg.h>
+#include <dev/acpica/acpi_hpet.h>
+#ifdef WANT_HYPERV
+#include <dev/hyperv/hyperv.h>
+#endif
+#include <x86/ifunc.h>
+#include "libc_private.h"
+
+static inline u_int
+rdtsc_low(const struct vdso_timehands *th)
+{
+ u_int rv;
+
+ __asm __volatile("rdtsc; shrd %%cl, %%edx, %0"
+ : "=a" (rv) : "c" (th->th_x86_shift) : "edx");
+ return (rv);
+}
+
+static inline u_int
+rdtscp_low(const struct vdso_timehands *th)
+{
+ u_int rv;
+
+ __asm __volatile("rdtscp; movl %%edi,%%ecx; shrd %%cl, %%edx, %0"
+ : "=a" (rv) : "D" (th->th_x86_shift) : "ecx", "edx");
+ return (rv);
+}
+
+static u_int
+rdtsc_low_mb_lfence(const struct vdso_timehands *th)
+{
+ lfence();
+ return (rdtsc_low(th));
+}
+
+static u_int
+rdtsc_low_mb_mfence(const struct vdso_timehands *th)
+{
+ mfence();
+ return (rdtsc_low(th));
+}
+
+static u_int
+rdtsc_low_mb_none(const struct vdso_timehands *th)
+{
+ return (rdtsc_low(th));
+}
+
+static u_int
+rdtsc32_mb_lfence(void)
+{
+ lfence();
+ return (rdtsc32());
+}
+
+static uint64_t
+rdtsc_mb_lfence(void)
+{
+ lfence();
+ return (rdtsc());
+}
+
+static u_int
+rdtsc32_mb_mfence(void)
+{
+ mfence();
+ return (rdtsc32());
+}
+
+static uint64_t
+rdtsc_mb_mfence(void)
+{
+ mfence();
+ return (rdtsc());
+}
+
+static u_int
+rdtsc32_mb_none(void)
+{
+ return (rdtsc32());
+}
+
+static uint64_t
+rdtsc_mb_none(void)
+{
+ return (rdtsc());
+}
+
+static u_int
+rdtscp32_(void)
+{
+ return (rdtscp32());
+}
+
+static uint64_t
+rdtscp_(void)
+{
+ return (rdtscp());
+}
+
+struct tsc_selector_tag {
+ u_int (*ts_rdtsc32)(void);
+ uint64_t (*ts_rdtsc)(void);
+ u_int (*ts_rdtsc_low)(const struct vdso_timehands *);
+};
+
+static const struct tsc_selector_tag tsc_selector[] = {
+ [0] = { /* Intel, LFENCE */
+ .ts_rdtsc32 = rdtsc32_mb_lfence,
+ .ts_rdtsc = rdtsc_mb_lfence,
+ .ts_rdtsc_low = rdtsc_low_mb_lfence,
+ },
+ [1] = { /* AMD, MFENCE */
+ .ts_rdtsc32 = rdtsc32_mb_mfence,
+ .ts_rdtsc = rdtsc_mb_mfence,
+ .ts_rdtsc_low = rdtsc_low_mb_mfence,
+ },
+ [2] = { /* No SSE2 */
+ .ts_rdtsc32 = rdtsc32_mb_none,
+ .ts_rdtsc = rdtsc_mb_none,
+ .ts_rdtsc_low = rdtsc_low_mb_none,
+ },
+ [3] = { /* RDTSCP */
+ .ts_rdtsc32 = rdtscp32_,
+ .ts_rdtsc = rdtscp_,
+ .ts_rdtsc_low = rdtscp_low,
+ },
+};
+
+static int
+tsc_selector_idx(u_int cpu_feature)
+{
+ u_int amd_feature, cpu_exthigh, p[4], v[3];
+ static const char amd_id[] = "AuthenticAMD";
+ static const char hygon_id[] = "HygonGenuine";
+ bool amd_cpu;
+
+ if (cpu_feature == 0)
+ return (2); /* should not happen due to RDTSC */
+
+ do_cpuid(0, p);
+ v[0] = p[1];
+ v[1] = p[3];
+ v[2] = p[2];
+ amd_cpu = memcmp(v, amd_id, sizeof(amd_id) - 1) == 0 ||
+ memcmp(v, hygon_id, sizeof(hygon_id) - 1) == 0;
+
+ if (cpu_feature != 0) {
+ do_cpuid(0x80000000, p);
+ cpu_exthigh = p[0];
+ } else {
+ cpu_exthigh = 0;
+ }
+ if (cpu_exthigh >= 0x80000001) {
+ do_cpuid(0x80000001, p);
+ amd_feature = p[3];
+ } else {
+ amd_feature = 0;
+ }
+
+ if ((amd_feature & AMDID_RDTSCP) != 0)
+ return (3);
+ if ((cpu_feature & CPUID_SSE2) == 0)
+ return (2);
+ return (amd_cpu ? 1 : 0);
+}
+
+DEFINE_UIFUNC(static, u_int, __vdso_gettc_rdtsc_low,
+ (const struct vdso_timehands *th))
+{
+ return (tsc_selector[tsc_selector_idx(cpu_feature)].ts_rdtsc_low);
+}
+
+DEFINE_UIFUNC(static, u_int, __vdso_gettc_rdtsc32, (void))
+{
+ return (tsc_selector[tsc_selector_idx(cpu_feature)].ts_rdtsc32);
+}
+
+DEFINE_UIFUNC(static, uint64_t, __vdso_gettc_rdtsc, (void))
+{
+ return (tsc_selector[tsc_selector_idx(cpu_feature)].ts_rdtsc);
+}
+
+#define HPET_DEV_MAP_MAX 10
+static volatile char *hpet_dev_map[HPET_DEV_MAP_MAX];
+
+static void
+__vdso_init_hpet(uint32_t u)
+{
+ static const char devprefix[] = "/dev/hpet";
+ char devname[64], *c, *c1, t;
+ volatile char *new_map, *old_map;
+ unsigned int mode;
+ uint32_t u1;
+ int fd;
+
+ c1 = c = stpcpy(devname, devprefix);
+ u1 = u;
+ do {
+ *c++ = u1 % 10 + '0';
+ u1 /= 10;
+ } while (u1 != 0);
+ *c = '\0';
+ for (c--; c1 != c; c1++, c--) {
+ t = *c1;
+ *c1 = *c;
+ *c = t;
+ }
+
+ old_map = hpet_dev_map[u];
+ if (old_map != NULL)
+ return;
+
+ /*
+ * Explicitely check for the capability mode to avoid
+ * triggering trap_enocap on the device open by absolute path.
+ */
+ if ((cap_getmode(&mode) == 0 && mode != 0) ||
+ (fd = _open(devname, O_RDONLY | O_CLOEXEC)) == -1) {
+ /* Prevent the caller from re-entering. */
+ atomic_cmpset_rel_ptr((volatile uintptr_t *)&hpet_dev_map[u],
+ (uintptr_t)old_map, (uintptr_t)MAP_FAILED);
+ return;
+ }
+
+ new_map = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, fd, 0);
+ _close(fd);
+ if (atomic_cmpset_rel_ptr((volatile uintptr_t *)&hpet_dev_map[u],
+ (uintptr_t)old_map, (uintptr_t)new_map) == 0 &&
+ new_map != MAP_FAILED)
+ munmap((void *)new_map, PAGE_SIZE);
+}
+
+#ifdef WANT_HYPERV
+
+#define HYPERV_REFTSC_DEVPATH "/dev/" HYPERV_REFTSC_DEVNAME
+
+/*
+ * NOTE:
+ * We use 'NULL' for this variable to indicate that initialization
+ * is required. And if this variable is 'MAP_FAILED', then Hyper-V
+ * reference TSC can not be used, e.g. in misconfigured jail.
+ */
+static struct hyperv_reftsc *hyperv_ref_tsc;
+
+static void
+__vdso_init_hyperv_tsc(void)
+{
+ int fd;
+ unsigned int mode;
+
+ if (cap_getmode(&mode) == 0 && mode != 0)
+ goto fail;
+
+ fd = _open(HYPERV_REFTSC_DEVPATH, O_RDONLY | O_CLOEXEC);
+ if (fd < 0)
+ goto fail;
+ hyperv_ref_tsc = mmap(NULL, sizeof(*hyperv_ref_tsc), PROT_READ,
+ MAP_SHARED, fd, 0);
+ _close(fd);
+
+ return;
+fail:
+ /* Prevent the caller from re-entering. */
+ hyperv_ref_tsc = MAP_FAILED;
+}
+
+static int
+__vdso_hyperv_tsc(struct hyperv_reftsc *tsc_ref, u_int *tc)
+{
+ uint64_t disc, ret, tsc, scale;
+ uint32_t seq;
+ int64_t ofs;
+
+ while ((seq = atomic_load_acq_int(&tsc_ref->tsc_seq)) != 0) {
+ scale = tsc_ref->tsc_scale;
+ ofs = tsc_ref->tsc_ofs;
+
+ mfence(); /* XXXKIB */
+ tsc = rdtsc();
+
+ /* ret = ((tsc * scale) >> 64) + ofs */
+ __asm__ __volatile__ ("mulq %3" :
+ "=d" (ret), "=a" (disc) :
+ "a" (tsc), "r" (scale));
+ ret += ofs;
+
+ atomic_thread_fence_acq();
+ if (tsc_ref->tsc_seq == seq) {
+ *tc = ret;
+ return (0);
+ }
+
+ /* Sequence changed; re-sync. */
+ }
+ return (ENOSYS);
+}
+
+#endif /* WANT_HYPERV */
+
+static struct pvclock_vcpu_time_info *pvclock_timeinfos;
+
+static int
+__vdso_pvclock_gettc(const struct vdso_timehands *th, u_int *tc)
+{
+ uint64_t delta, ns, tsc;
+ struct pvclock_vcpu_time_info *ti;
+ uint32_t cpuid_ti, cpuid_tsc, version;
+ bool stable;
+
+ do {
+ ti = &pvclock_timeinfos[0];
+ version = atomic_load_acq_32(&ti->version);
+ stable = (ti->flags & th->th_x86_pvc_stable_mask) != 0;
+ if (stable) {
+ tsc = __vdso_gettc_rdtsc();
+ } else {
+ (void)rdtscp_aux(&cpuid_ti);
+ ti = &pvclock_timeinfos[cpuid_ti];
+ version = atomic_load_acq_32(&ti->version);
+ tsc = rdtscp_aux(&cpuid_tsc);
+ }
+ delta = tsc - ti->tsc_timestamp;
+ ns = ti->system_time + pvclock_scale_delta(delta,
+ ti->tsc_to_system_mul, ti->tsc_shift);
+ atomic_thread_fence_acq();
+ } while ((ti->version & 1) != 0 || ti->version != version ||
+ (!stable && cpuid_ti != cpuid_tsc));
+ *tc = MAX(ns, th->th_x86_pvc_last_systime);
+ return (0);
+}
+
+static void
+__vdso_init_pvclock_timeinfos(void)
+{
+ struct pvclock_vcpu_time_info *timeinfos;
+ size_t len;
+ int fd, ncpus;
+ unsigned int mode;
+
+ timeinfos = MAP_FAILED;
+ if (_elf_aux_info(AT_NCPUS, &ncpus, sizeof(ncpus)) != 0 ||
+ (cap_getmode(&mode) == 0 && mode != 0) ||
+ (fd = _open("/dev/" PVCLOCK_CDEVNAME, O_RDONLY | O_CLOEXEC)) < 0)
+ goto leave;
+ len = ncpus * sizeof(*pvclock_timeinfos);
+ timeinfos = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
+ _close(fd);
+leave:
+ if (atomic_cmpset_rel_ptr(
+ (volatile uintptr_t *)&pvclock_timeinfos, (uintptr_t)NULL,
+ (uintptr_t)timeinfos) == 0 && timeinfos != MAP_FAILED)
+ (void)munmap((void *)timeinfos, len);
+}
+
+#pragma weak __vdso_gettc
+int
+__vdso_gettc(const struct vdso_timehands *th, u_int *tc)
+{
+ volatile char *map;
+ uint32_t idx;
+
+ switch (th->th_algo) {
+ case VDSO_TH_ALGO_X86_TSC:
+ *tc = th->th_x86_shift > 0 ? __vdso_gettc_rdtsc_low(th) :
+ __vdso_gettc_rdtsc32();
+ return (0);
+ case VDSO_TH_ALGO_X86_HPET:
+ idx = th->th_x86_hpet_idx;
+ if (idx >= HPET_DEV_MAP_MAX)
+ return (ENOSYS);
+ map = (volatile char *)atomic_load_acq_ptr(
+ (volatile uintptr_t *)&hpet_dev_map[idx]);
+ if (map == NULL) {
+ __vdso_init_hpet(idx);
+ map = (volatile char *)atomic_load_acq_ptr(
+ (volatile uintptr_t *)&hpet_dev_map[idx]);
+ }
+ if (map == MAP_FAILED)
+ return (ENOSYS);
+ *tc = *(volatile uint32_t *)(map + HPET_MAIN_COUNTER);
+ return (0);
+#ifdef WANT_HYPERV
+ case VDSO_TH_ALGO_X86_HVTSC:
+ if (hyperv_ref_tsc == NULL)
+ __vdso_init_hyperv_tsc();
+ if (hyperv_ref_tsc == MAP_FAILED)
+ return (ENOSYS);
+ return (__vdso_hyperv_tsc(hyperv_ref_tsc, tc));
+#endif
+ case VDSO_TH_ALGO_X86_PVCLK:
+ if (pvclock_timeinfos == NULL)
+ __vdso_init_pvclock_timeinfos();
+ if (pvclock_timeinfos == MAP_FAILED)
+ return (ENOSYS);
+ return (__vdso_pvclock_gettc(th, tc));
+ default:
+ return (ENOSYS);
+ }
+}
+
+#pragma weak __vdso_gettimekeep
+int
+__vdso_gettimekeep(struct vdso_timekeep **tk)
+{
+
+ return (_elf_aux_info(AT_TIMEKEEP, tk, sizeof(*tk)));
+}
diff --git a/lib/libsys/x86/pkru.3 b/lib/libsys/x86/pkru.3
new file mode 100644
index 000000000000..95bc66c979ac
--- /dev/null
+++ b/lib/libsys/x86/pkru.3
@@ -0,0 +1,203 @@
+.\" Copyright (c) 2019 The FreeBSD Foundation
+.\"
+.\" This documentation was written by
+.\" Konstantin Belousov <kib@FreeBSD.org> under sponsorship
+.\" from the FreeBSD Foundation.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.Dd February 16, 2019
+.Dt PKRU 3
+.Os
+.Sh NAME
+.Nm Protection Key Rights for User pages
+.Nd provide fast user-managed key-based access control for pages
+.Sh LIBRARY
+.Lb libc
+.Sh SYNOPSIS
+.In machine/sysarch.h
+.Ft int
+.Fn x86_pkru_get_perm "unsigned int keyidx" "int *access" "int *modify"
+.Ft int
+.Fn x86_pkru_set_perm "unsigned int keyidx" "int access" "int modify"
+.Ft int
+.Fo x86_pkru_protect_range
+.Fa "void *addr"
+.Fa "unsigned long len"
+.Fa "unsigned int keyidx"
+.Fa "int flag"
+.Fc
+.Ft int
+.Fn x86_pkru_unprotect_range "void *addr" "unsigned long len"
+.Sh DESCRIPTION
+The protection keys feature provides an additional mechanism, besides the
+normal page permissions as established by
+.Xr mmap 2
+and
+.Xr mprotect 2 ,
+to control access to user-mode addresses.
+The mechanism gives safety measures which can be used to avoid
+incidental read or modification of sensitive memory,
+or as a debugging feature.
+It cannot guard against conscious accesses since permissions
+are user-controllable.
+.Pp
+If supported by hardware, each mapped user linear address
+has an associated 4-bit protection key.
+A new per-thread PKRU hardware register determines, for each protection
+key, whether user-mode addresses with that protection key may be
+read or written.
+.Pp
+Only one key may apply to a given range at a time.
+The default protection key index is zero, it is used even if no key
+was explicitly assigned to the address, or if the key was removed.
+.Pp
+The protection prevents the system from accessing user addresses as well
+as the user applications.
+When a system call was unable to read or write user memory due to key
+protection, it returns the
+.Er EFAULT
+error code.
+Note that some side effects may have occurred if this error is reported.
+.Pp
+Protection keys require that the system uses 4-level paging
+(also called long mode),
+which means that it is only available on amd64 system.
+Both 64-bit and 32-bit applications can use protection keys.
+More information about the hardware feature is provided in the IA32 Software
+Developer's Manual published by Intel Corp.
+.Pp
+The key indexes written into the page table entries are managed by the
+.Fn sysarch
+syscall.
+Per-key permissions are managed using the user-mode instructions
+.Em RDPKRU
+and
+.Em WRPKRU .
+The system provides convenient library helpers for both the syscall and
+the instructions, described below.
+.Pp
+The
+.Fn x86_pkru_protect_range
+function assigns key
+.Fa keyidx
+to the range starting at
+.Fa addr
+and having length
+.Fa len .
+Starting address is truncated to the page start,
+and the end is rounded up to the end of the page.
+After a successful call, the range has the specified key assigned,
+even if the key is zero and it did not change the page table entries.
+.Pp
+The
+.Fa flags
+argument takes the logical OR of the following values:
+.Bl -tag -width
+.It Bq Va AMD64_PKRU_EXCL
+Only assign the key if the range does not have any other keys assigned
+(including the zero key).
+You must first remove any existing key with
+.Fn x86_pkru_unprotect_range
+in order for this request to succeed.
+If the
+.Va AMD64_PKRU_EXCL
+flag is not specified,
+.Fn x86_pkru_protect_range
+replaces any existing key.
+.It Bq Va AMD64_PKRU_PERSIST
+The keys assigned to the range are persistent.
+They are re-established when the current mapping is destroyed
+and a new mapping is created in any sub-range of the specified range.
+You must use a
+.Fn x86_pkru_unprotect_range
+call to forget the key.
+.El
+.Pp
+The
+.Fn x86_pkru_unprotect_range
+function removes any keys assigned to the specified range.
+Existing mappings are changed to use key index zero in page table entries.
+Keys are no longer considered installed for all mappings in the range,
+for the purposes of
+.Fn x86_pkru_protect_range
+with the
+.Va AMD64_PKRU_EXCL
+flag.
+.Pp
+The
+.Fn x86_pkru_get_perm
+function returns access rights for the key specified by the
+.Fa keyidx
+argument.
+If the value pointed to by
+.Fa access
+is zero after the call, no read or write permissions is granted for
+mappings which are assigned the key
+.Fa keyidx .
+If
+.Fa access
+is not zero, read access is permitted.
+The non-zero value of the variable pointed to by the
+.Fa modify
+argument indicates that write access is permitted.
+.Pp
+Conversely, the
+.Fn x86_pkru_set_perm
+establishes the access and modify permissions for the given key index
+as specified by its arguments.
+.Sh RETURN VALUES
+.Rv -std
+.Sh ERRORS
+.Bl -tag -width Er
+.It Bq Er EOPNOTSUPP
+The hardware does not support protection keys.
+.It Bq Er EINVAL
+The supplied key index is invalid (greater than 15).
+.It Bq Er EINVAL
+The supplied
+.Fa flags
+argument for
+.Fn x86_pkru_protect_range
+has reserved bits set.
+.It Bq Er EFAULT
+The supplied address range does not completely fit into the user-managed
+address range.
+.It Bq Er ENOMEM
+The memory shortage prevents the completion of the operation.
+.It Bq Er EBUSY
+The
+.Va AMD64_PKRU_EXCL
+flag was specified for
+.Fn x86_pkru_protect_range
+and the range already has defined protection keys.
+.El
+.Sh SEE ALSO
+.Xr mmap 2 ,
+.Xr mprotect 2 ,
+.Xr munmap 2 ,
+.Xr sysarch 2
+.Sh STANDARDS
+The
+.Nm
+functions are non-standard and first appeared in
+.Fx 13.0 .
diff --git a/lib/libsys/x86/pkru.c b/lib/libsys/x86/pkru.c
new file mode 100644
index 000000000000..b6ae181c131f
--- /dev/null
+++ b/lib/libsys/x86/pkru.c
@@ -0,0 +1,134 @@
+/*-
+ * Copyright (c) 2019 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by Konstantin Belousov
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <machine/cpufunc.h>
+#include <machine/specialreg.h>
+#include <machine/sysarch.h>
+#include <x86/ifunc.h>
+#include <errno.h>
+#include <string.h>
+
+#define MAX_PKRU_IDX 0xf
+#ifdef __i386__
+#define X86_SET_PKRU I386_SET_PKRU
+#define X86_CLEAR_PKRU I386_CLEAR_PKRU
+#else
+#define X86_SET_PKRU AMD64_SET_PKRU
+#define X86_CLEAR_PKRU AMD64_CLEAR_PKRU
+#endif
+
+static int
+x86_pkru_get_perm_unsup(u_int keyidx, int *access, int *modify)
+{
+
+ errno = EOPNOTSUPP;
+ return (-1);
+}
+
+static int
+x86_pkru_get_perm_hw(u_int keyidx, int *access, int *modify)
+{
+ uint32_t pkru;
+
+ if (keyidx > MAX_PKRU_IDX) {
+ errno = EINVAL;
+ return (-1);
+ }
+ keyidx *= 2;
+ pkru = rdpkru();
+ *access = (pkru & (1 << keyidx)) == 0;
+ *modify = (pkru & (2 << keyidx)) == 0;
+ return (0);
+}
+
+DEFINE_UIFUNC(, int, x86_pkru_get_perm, (u_int, int *, int *))
+{
+
+ return ((cpu_stdext_feature2 & CPUID_STDEXT2_OSPKE) == 0 ?
+ x86_pkru_get_perm_unsup : x86_pkru_get_perm_hw);
+}
+
+static int
+x86_pkru_set_perm_unsup(u_int keyidx, int access, int modify)
+{
+
+ errno = EOPNOTSUPP;
+ return (-1);
+}
+
+static int
+x86_pkru_set_perm_hw(u_int keyidx, int access, int modify)
+{
+ uint32_t pkru;
+
+ if (keyidx > MAX_PKRU_IDX) {
+ errno = EINVAL;
+ return (-1);
+ }
+ keyidx *= 2;
+ pkru = rdpkru();
+ pkru &= ~(3 << keyidx);
+ if (!access)
+ pkru |= 1 << keyidx;
+ if (!modify)
+ pkru |= 2 << keyidx;
+ wrpkru(pkru);
+ return (0);
+}
+
+DEFINE_UIFUNC(, int, x86_pkru_set_perm, (u_int, int, int))
+{
+
+ return ((cpu_stdext_feature2 & CPUID_STDEXT2_OSPKE) == 0 ?
+ x86_pkru_set_perm_unsup : x86_pkru_set_perm_hw);
+}
+
+int
+x86_pkru_protect_range(void *addr, unsigned long len, u_int keyidx, int flags)
+{
+ struct amd64_set_pkru a64pkru;
+
+ memset(&a64pkru, 0, sizeof(a64pkru));
+ a64pkru.addr = addr;
+ a64pkru.len = len;
+ a64pkru.keyidx = keyidx;
+ a64pkru.flags = flags;
+ return (sysarch(X86_SET_PKRU, &a64pkru));
+}
+
+int
+x86_pkru_unprotect_range(void *addr, unsigned long len)
+{
+ struct amd64_set_pkru a64pkru;
+
+ memset(&a64pkru, 0, sizeof(a64pkru));
+ a64pkru.addr = addr;
+ a64pkru.len = len;
+ return (sysarch(X86_CLEAR_PKRU, &a64pkru));
+}
diff --git a/lib/libsys/x86/sched_getcpu_x86.c b/lib/libsys/x86/sched_getcpu_x86.c
new file mode 100644
index 000000000000..13ba18ef5e36
--- /dev/null
+++ b/lib/libsys/x86/sched_getcpu_x86.c
@@ -0,0 +1,81 @@
+/*-
+ * Copyright (c) 2021 The FreeBSD Foundation
+ *
+ * This software were developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <machine/cpufunc.h>
+#include <machine/specialreg.h>
+#include <machine/sysarch.h>
+#include <x86/ifunc.h>
+#include <errno.h>
+#include <sched.h>
+#include "libc_private.h"
+
+static int
+sched_getcpu_sys(void)
+{
+ return (__sys_sched_getcpu());
+}
+
+static int
+sched_getcpu_rdpid(void)
+{
+ register_t res;
+
+ __asm("rdpid %0" : "=r" (res));
+ return ((int)res);
+}
+
+static int
+sched_getcpu_rdtscp(void)
+{
+ int res;
+
+ __asm("rdtscp" : "=c" (res) : : "eax", "edx");
+ return (res);
+}
+
+DEFINE_UIFUNC(, int, sched_getcpu, (void))
+{
+ u_int amd_feature, cpu_exthigh, p[4];
+
+ if ((cpu_stdext_feature2 & CPUID_STDEXT2_RDPID) != 0)
+ return (sched_getcpu_rdpid);
+
+ amd_feature = 0;
+ if (cpu_feature != 0) {
+ do_cpuid(0x80000000, p);
+ cpu_exthigh = p[0];
+ if (cpu_exthigh >= 0x80000001) {
+ do_cpuid(0x80000001, p);
+ amd_feature = p[3];
+ }
+ }
+
+ return ((amd_feature & AMDID_RDTSCP) == 0 ?
+ sched_getcpu_sys : sched_getcpu_rdtscp);
+}