23 files changed, 527 insertions, 403 deletions
diff --git a/sys/kern/device_if.m b/sys/kern/device_if.m
index c02e5a46f326..ed94b2ccbe1b 100644
--- a/sys/kern/device_if.m
+++ b/sys/kern/device_if.m
@@ -49,25 +49,25 @@ HEADER {
 CODE {
 	static int null_shutdown(device_t dev)
 	{
-	    return 0;
+		return (0);
 	}
 
 	static int null_suspend(device_t dev)
 	{
-	    return 0;
+		return (0);
 	}
 
 	static int null_resume(device_t dev)
 	{
-	    return 0;
+		return (0);
 	}
 
 	static int null_quiesce(device_t dev)
 	{
-	    return 0;
+		return (0);
 	}
 
-	static void * null_register(device_t dev)
+	static void *null_register(device_t dev)
 	{
 		return NULL;
 	}
diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c
index 2690ad3b2679..5a53fac50f2c 100644
--- a/sys/kern/imgact_elf.c
+++ b/sys/kern/imgact_elf.c
@@ -84,6 +84,13 @@
 #define ELF_NOTE_ROUNDSIZE	4
 #define OLD_EI_BRAND	8
 
+/*
+ * ELF_ABI_NAME is a string name of the ELF ABI.  ELF_ABI_ID is used
+ * to build variable names.
+ */
+#define	ELF_ABI_NAME	__XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
+#define	ELF_ABI_ID	__CONCAT(elf, __ELF_WORD_SIZE)
+
 static int __elfN(check_header)(const Elf_Ehdr *hdr);
 static Elf_Brandinfo *__elfN(get_brandinfo)(struct image_params *imgp,
     const char *interp, int32_t *osrel, uint32_t *fctl0);
@@ -104,14 +111,15 @@ static Elf_Word __elfN(untrans_prot)(vm_prot_t);
 static size_t __elfN(prepare_register_notes)(struct thread *td,
     struct note_info_list *list, struct thread *target_td);
 
-SYSCTL_NODE(_kern, OID_AUTO, __CONCAT(elf, __ELF_WORD_SIZE),
-    CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
+SYSCTL_NODE(_kern, OID_AUTO, ELF_ABI_ID, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "");
 
+#define	ELF_NODE_OID	__CONCAT(_kern_, ELF_ABI_ID)
+
 int __elfN(fallback_brand) = -1;
-SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO,
+SYSCTL_INT(ELF_NODE_OID, OID_AUTO,
     fallback_brand, CTLFLAG_RWTUN, &__elfN(fallback_brand), 0,
-    __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) " brand of last resort");
+    ELF_ABI_NAME " brand of last resort");
 
 static int elf_legacy_coredump = 0;
 SYSCTL_INT(_debug, OID_AUTO, __elfN(legacy_coredump), CTLFLAG_RW, 
@@ -126,22 +134,22 @@ int __elfN(nxstack) =
 #else
 	0;
 #endif
-SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO,
+SYSCTL_INT(ELF_NODE_OID, OID_AUTO,
     nxstack, CTLFLAG_RW, &__elfN(nxstack), 0,
-    __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": support PT_GNU_STACK for non-executable stack control");
+    ELF_ABI_NAME ": support PT_GNU_STACK for non-executable stack control");
 
 #if defined(__amd64__)
 static int __elfN(vdso) = 1;
-SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO,
+SYSCTL_INT(ELF_NODE_OID, OID_AUTO,
     vdso, CTLFLAG_RWTUN, &__elfN(vdso), 0,
-    __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": enable vdso preloading");
+    ELF_ABI_NAME ": enable vdso preloading");
 #else
 static int __elfN(vdso) = 0;
 #endif
 
 #if __ELF_WORD_SIZE == 32 && (defined(__amd64__) || defined(__i386__))
 int i386_read_exec = 0;
-SYSCTL_INT(_kern_elf32, OID_AUTO, read_exec, CTLFLAG_RW, &i386_read_exec, 0,
+SYSCTL_INT(ELF_NODE_OID, OID_AUTO, read_exec, CTLFLAG_RW, &i386_read_exec, 0,
     "enable execution from readable segments");
 #endif
 
@@ -161,15 +169,15 @@ sysctl_pie_base(SYSCTL_HANDLER_ARGS)
 	__elfN(pie_base) = val;
 	return (0);
 }
-SYSCTL_PROC(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, pie_base,
+SYSCTL_PROC(ELF_NODE_OID, OID_AUTO, pie_base,
     CTLTYPE_ULONG | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0,
     sysctl_pie_base, "LU",
     "PIE load base without randomization");
 
-SYSCTL_NODE(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, aslr,
+SYSCTL_NODE(ELF_NODE_OID, OID_AUTO, aslr,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "");
-#define	ASLR_NODE_OID	__CONCAT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), _aslr)
+#define	ASLR_NODE_OID	__CONCAT(ELF_NODE_OID, _aslr)
 
 /*
  * Enable ASLR by default for 64-bit non-PIE binaries.  32-bit architectures
@@ -179,8 +187,7 @@ SYSCTL_NODE(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, aslr,
 static int __elfN(aslr_enabled) = __ELF_WORD_SIZE == 64;
 SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, enable, CTLFLAG_RWTUN,
     &__elfN(aslr_enabled), 0,
-    __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
-    ": enable address map randomization");
+    ELF_ABI_NAME ": enable address map randomization");
 
 /*
  * Enable ASLR by default for 64-bit PIE binaries.
@@ -188,8 +195,7 @@ SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, enable, CTLFLAG_RWTUN,
 static int __elfN(pie_aslr_enabled) = __ELF_WORD_SIZE == 64;
 SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, pie_enable, CTLFLAG_RWTUN,
     &__elfN(pie_aslr_enabled), 0,
-    __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
-    ": enable address map randomization for PIE binaries");
+    ELF_ABI_NAME ": enable address map randomization for PIE binaries");
 
 /*
  * Sbrk is deprecated and it can be assumed that in most cases it will not be
@@ -199,27 +205,25 @@ SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, pie_enable, CTLFLAG_RWTUN,
 static int __elfN(aslr_honor_sbrk) = 0;
 SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, honor_sbrk, CTLFLAG_RW,
     &__elfN(aslr_honor_sbrk), 0,
-    __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": assume sbrk is used");
+    ELF_ABI_NAME ": assume sbrk is used");
 
 static int __elfN(aslr_stack) = __ELF_WORD_SIZE == 64;
 SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, stack, CTLFLAG_RWTUN,
     &__elfN(aslr_stack), 0,
-    __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
-    ": enable stack address randomization");
+    ELF_ABI_NAME ": enable stack address randomization");
 
 static int __elfN(aslr_shared_page) = __ELF_WORD_SIZE == 64;
 SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, shared_page, CTLFLAG_RWTUN,
     &__elfN(aslr_shared_page), 0,
-    __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
-    ": enable shared page address randomization");
+    ELF_ABI_NAME ": enable shared page address randomization");
 
 static int __elfN(sigfastblock) = 1;
-SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, sigfastblock,
+SYSCTL_INT(ELF_NODE_OID, OID_AUTO, sigfastblock,
     CTLFLAG_RWTUN, &__elfN(sigfastblock), 0,
     "enable sigfastblock for new processes");
 
 static bool __elfN(allow_wx) = true;
-SYSCTL_BOOL(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, allow_wx,
+SYSCTL_BOOL(ELF_NODE_OID, OID_AUTO, allow_wx,
     CTLFLAG_RWTUN, &__elfN(allow_wx), 0,
     "Allow pages to be mapped simultaneously writable and executable");
 
@@ -2951,9 +2955,9 @@ __elfN(check_note)(struct image_params *imgp, Elf_Brandnote *brandnote,
  */
 static struct execsw __elfN(execsw) = {
 	.ex_imgact = __CONCAT(exec_, __elfN(imgact)),
-	.ex_name = __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
+	.ex_name = ELF_ABI_NAME
 };
-EXEC_SET(__CONCAT(elf, __ELF_WORD_SIZE), __elfN(execsw));
+EXEC_SET(ELF_ABI_ID, __elfN(execsw));
 
 static vm_prot_t
 __elfN(trans_prot)(Elf_Word flags)
diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c
index 91792430d24c..fcd232cde21e 100644
--- a/sys/kern/init_sysent.c
+++ b/sys/kern/init_sysent.c
@@ -67,7 +67,7 @@
 /* The casts are bogus but will do for now. */
 struct sysent sysent[] = {
 	{ .sy_narg = 0, .sy_call = (sy_call_t *)nosys, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC },	/* 0 = syscall */
-	{ .sy_narg = AS(exit_args), .sy_call = (sy_call_t *)sys_exit, .sy_auevent = AUE_EXIT, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC },	/* 1 = exit */
+	{ .sy_narg = AS(_exit_args), .sy_call = (sy_call_t *)sys__exit, .sy_auevent = AUE_EXIT, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC },	/* 1 = _exit */
 	{ .sy_narg = 0, .sy_call = (sy_call_t *)sys_fork, .sy_auevent = AUE_FORK, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC },	/* 2 = fork */
 	{ .sy_narg = AS(read_args), .sy_call = (sy_call_t *)sys_read, .sy_auevent = AUE_READ, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC },	/* 3 = read */
 	{ .sy_narg = AS(write_args), .sy_call = (sy_call_t *)sys_write, .sy_auevent = AUE_WRITE, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC },	/* 4 = write */
@@ -145,8 +145,8 @@ struct sysent sysent[] = {
 	{ .sy_narg = 0, .sy_call = (sy_call_t *)nosys, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_ABSENT },	/* 76 = obsolete vhangup */
 	{ .sy_narg = 0, .sy_call = (sy_call_t *)nosys, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_ABSENT },	/* 77 = obsolete vlimit */
 	{ .sy_narg = AS(mincore_args), .sy_call = (sy_call_t *)sys_mincore, .sy_auevent = AUE_MINCORE, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC },	/* 78 = mincore */
-	{ .sy_narg = AS(getgroups_args), .sy_call = (sy_call_t *)sys_getgroups, .sy_auevent = AUE_GETGROUPS, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC },	/* 79 = getgroups */
-	{ .sy_narg = AS(setgroups_args), .sy_call = (sy_call_t *)sys_setgroups, .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC },	/* 80 = setgroups */
+	{ compat14(AS(freebsd14_getgroups_args),getgroups), .sy_auevent = AUE_GETGROUPS, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC },	/* 79 = freebsd14 getgroups */
+	{ compat14(AS(freebsd14_setgroups_args),setgroups), .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC },	/* 80 = freebsd14 setgroups */
 	{ .sy_narg = 0, .sy_call = (sy_call_t *)sys_getpgrp, .sy_auevent = AUE_GETPGRP, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC },	/* 81 = getpgrp */
 	{ .sy_narg = AS(setpgid_args), .sy_call = (sy_call_t *)sys_setpgid, .sy_auevent = AUE_SETPGRP, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC },	/* 82 = setpgid */
 	{ .sy_narg = AS(setitimer_args), .sy_call = (sy_call_t *)sys_setitimer, .sy_auevent = AUE_SETITIMER, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC },	/* 83 = setitimer */
@@ -661,4 +661,6 @@ struct sysent sysent[] = {
 	{ .sy_narg = AS(exterrctl_args), .sy_call = (sy_call_t *)sys_exterrctl, .sy_auevent = AUE_NULL, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC },	/* 592 = exterrctl */
 	{ .sy_narg = AS(inotify_add_watch_at_args), .sy_call = (sy_call_t *)sys_inotify_add_watch_at, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC },	/* 593 = inotify_add_watch_at */
 	{ .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC },	/* 594 = inotify_rm_watch */
+	{ .sy_narg = AS(getgroups_args), .sy_call = (sy_call_t *)sys_getgroups, .sy_auevent = AUE_GETGROUPS, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC },	/* 595 = getgroups */
+	{ .sy_narg = AS(setgroups_args), .sy_call = (sy_call_t *)sys_setgroups, .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC },	/* 596 = setgroups */
 };
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index 54e3044ab093..a32b5a1b3354 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -202,7 +202,7 @@ exit_onexit(struct proc *p)
  * exit -- death of process.
  */
 int
-sys_exit(struct thread *td, struct exit_args *uap)
+sys__exit(struct thread *td, struct _exit_args *uap)
 {
 
 	exit1(td, uap->rval, 0);
diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
index 7ef1d19f0ea8..7c9a15ae18f3 100644
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -115,8 +115,11 @@ struct prison prison0 = {
 #else
 	.pr_flags	= PR_HOST|_PR_IP_SADDRSEL,
 #endif
-	.pr_allow	= PR_ALLOW_ALL_STATIC,
+	.pr_allow	= PR_ALLOW_PRISON0,
 };
+_Static_assert((PR_ALLOW_PRISON0 & ~PR_ALLOW_ALL_STATIC) == 0,
+    "Bits enabled in PR_ALLOW_PRISON0 that are not statically reserved");
+
 MTX_SYSINIT(prison0, &prison0.pr_mtx, "jail mutex", MTX_DEF);
 
 struct bool_flags {
@@ -232,6 +235,9 @@ static struct bool_flags pr_flag_allow[NBBY * NBPW] = {
 	{"allow.adjtime", "allow.noadjtime", PR_ALLOW_ADJTIME},
 	{"allow.settime", "allow.nosettime", PR_ALLOW_SETTIME},
 	{"allow.routing", "allow.norouting", PR_ALLOW_ROUTING},
+	{"allow.unprivileged_parent_tampering",
+	    "allow.nounprivileged_parent_tampering",
+	    PR_ALLOW_UNPRIV_PARENT_TAMPER},
 };
 static unsigned pr_allow_all = PR_ALLOW_ALL_STATIC;
 const size_t pr_flag_allow_size = sizeof(pr_flag_allow);
@@ -4006,6 +4012,7 @@ prison_priv_check(struct ucred *cred, int priv)
 	case PRIV_DEBUG_DIFFCRED:
 	case PRIV_DEBUG_SUGID:
 	case PRIV_DEBUG_UNPRIV:
+	case PRIV_DEBUG_DIFFJAIL:
 
 		/*
 		 * Allow jail to set various resource limits and login
@@ -4043,8 +4050,10 @@ prison_priv_check(struct ucred *cred, int priv)
 		 */
 	case PRIV_SCHED_DIFFCRED:
 	case PRIV_SCHED_CPUSET:
+	case PRIV_SCHED_DIFFJAIL:
 	case PRIV_SIGNAL_DIFFCRED:
 	case PRIV_SIGNAL_SUGID:
+	case PRIV_SIGNAL_DIFFJAIL:
 
 		/*
 		 * Allow jailed processes to write to sysctls marked as jail
@@ -4688,6 +4697,10 @@ SYSCTL_JAIL_PARAM(_allow, read_msgbuf, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Jail may read the kernel message buffer");
 SYSCTL_JAIL_PARAM(_allow, unprivileged_proc_debug, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Unprivileged processes may use process debugging facilities");
+SYSCTL_JAIL_PARAM(_allow, unprivileged_parent_tampering,
+    CTLTYPE_INT | CTLFLAG_RW, "B",
+    "Unprivileged parent jail processes may tamper with same-uid processes"
+    " (signal/debug/cpuset)");
 SYSCTL_JAIL_PARAM(_allow, suser, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Processes in jail with uid 0 have privilege");
 #ifdef VIMAGE
diff --git a/sys/kern/kern_prot.c b/sys/kern/kern_prot.c
index bbb622547598..0ca42d640767 100644
--- a/sys/kern/kern_prot.c
+++ b/sys/kern/kern_prot.c
@@ -310,6 +310,39 @@ sys_getegid(struct thread *td, struct getegid_args *uap)
 	return (0);
 }
 
+#ifdef COMPAT_FREEBSD14
+int
+freebsd14_getgroups(struct thread *td, struct freebsd14_getgroups_args *uap)
+{
+	struct ucred *cred;
+	int ngrp, error;
+
+	cred = td->td_ucred;
+
+	/*
+	 * For FreeBSD < 15.0, we account for the egid being placed at the
+	 * beginning of the group list prior to all supplementary groups.
+	 */
+	ngrp = cred->cr_ngroups + 1;
+	if (uap->gidsetsize == 0) {
+		error = 0;
+		goto out;
+	} else if (uap->gidsetsize < ngrp) {
+		return (EINVAL);
+	}
+
+	error = copyout(&cred->cr_gid, uap->gidset, sizeof(gid_t));
+	if (error == 0)
+		error = copyout(cred->cr_groups, uap->gidset + 1,
+		    (ngrp - 1) * sizeof(gid_t));
+
+out:
+	td->td_retval[0] = ngrp;
+	return (error);
+
+}
+#endif	/* COMPAT_FREEBSD14 */
+
 #ifndef _SYS_SYSPROTO_H_
 struct getgroups_args {
 	int	gidsetsize;
@@ -320,18 +353,11 @@ int
 sys_getgroups(struct thread *td, struct getgroups_args *uap)
 {
 	struct ucred *cred;
-	gid_t *ugidset;
 	int ngrp, error;
 
 	cred = td->td_ucred;
 
-	/*
-	 * cr_gid has been moved out of cr_groups, but we'll continue exporting
-	 * the egid as groups[0] for the time being until we audit userland for
-	 * any surprises.
-	 */
-	ngrp = cred->cr_ngroups + 1;
-
+	ngrp = cred->cr_ngroups;
 	if (uap->gidsetsize == 0) {
 		error = 0;
 		goto out;
@@ -339,14 +365,7 @@ sys_getgroups(struct thread *td, struct getgroups_args *uap)
 	if (uap->gidsetsize < ngrp)
 		return (EINVAL);
 
-	ugidset = uap->gidset;
-	error = copyout(&cred->cr_gid, ugidset, sizeof(*ugidset));
-	if (error != 0)
-		goto out;
-
-	if (ngrp > 1)
-		error = copyout(cred->cr_groups, ugidset + 1,
-		    (ngrp - 1) * sizeof(*ugidset));
+	error = copyout(cred->cr_groups, uap->gidset, ngrp * sizeof(gid_t));
 out:
 	td->td_retval[0] = ngrp;
 	return (error);
@@ -1186,6 +1205,44 @@ fail:
 	return (error);
 }
 
+#ifdef COMPAT_FREEBSD14
+int
+freebsd14_setgroups(struct thread *td, struct freebsd14_setgroups_args *uap)
+{
+	gid_t smallgroups[CRED_SMALLGROUPS_NB];
+	gid_t *groups;
+	int gidsetsize, error;
+
+	/*
+	 * Before FreeBSD 15.0, we allow one more group to be supplied to
+	 * account for the egid appearing before the supplementary groups.  This
+	 * may technically allow one more supplementary group for systems that
+	 * did use the default NGROUPS_MAX if we round it back up to 1024.
+	 */
+	gidsetsize = uap->gidsetsize;
+	if (gidsetsize > ngroups_max + 1 || gidsetsize < 0)
+		return (EINVAL);
+
+	if (gidsetsize > CRED_SMALLGROUPS_NB)
+		groups = malloc(gidsetsize * sizeof(gid_t), M_TEMP, M_WAITOK);
+	else
+		groups = smallgroups;
+
+	error = copyin(uap->gidset, groups, gidsetsize * sizeof(gid_t));
+	if (error == 0) {
+		int ngroups = gidsetsize > 0 ? gidsetsize - 1 /* egid */ : 0;
+
+		error = kern_setgroups(td, &ngroups, groups + 1);
+		if (error == 0 && gidsetsize > 0)
+			td->td_proc->p_ucred->cr_gid = groups[0];
+	}
+
+	if (groups != smallgroups)
+		free(groups, M_TEMP);
+	return (error);
+}
+#endif	/* COMPAT_FREEBSD14 */
+
 #ifndef _SYS_SYSPROTO_H_
 struct setgroups_args {
 	int	gidsetsize;
@@ -1210,8 +1267,7 @@ sys_setgroups(struct thread *td, struct setgroups_args *uap)
 	 * setgroups() differ.
 	 */
 	gidsetsize = uap->gidsetsize;
-	/* XXXKE Limit to ngroups_max when we change the userland interface. */
-	if (gidsetsize > ngroups_max + 1 || gidsetsize < 0)
+	if (gidsetsize > ngroups_max || gidsetsize < 0)
 		return (EINVAL);
 
 	if (gidsetsize > CRED_SMALLGROUPS_NB)
@@ -1238,35 +1294,17 @@ kern_setgroups(struct thread *td, int *ngrpp, gid_t *groups)
 	struct proc *p = td->td_proc;
 	struct ucred *newcred, *oldcred;
 	int ngrp, error;
-	gid_t egid;
 
 	ngrp = *ngrpp;
 	/* Sanity check size. */
-	/* XXXKE Limit to ngroups_max when we change the userland interface. */
-	if (ngrp < 0 || ngrp > ngroups_max + 1)
+	if (ngrp < 0 || ngrp > ngroups_max)
 		return (EINVAL);
 
 	AUDIT_ARG_GROUPSET(groups, ngrp);
-	/*
-	 * setgroups(0, NULL) is a legitimate way of clearing the groups vector
-	 * on non-BSD systems (which generally do not have the egid in the
-	 * groups[0]).  We risk security holes when running non-BSD software if
-	 * we do not do the same.  So we allow and treat 0 for 'ngrp' specially
-	 * below (twice).
-	 */
-	if (ngrp != 0) {
-		/*
-		 * To maintain userland compat for now, we use the first group
-		 * as our egid and we'll use the rest as our supplemental
-		 * groups.
-		 */
-		egid = groups[0];
-		ngrp--;
-		groups++;
 
-		groups_normalize(&ngrp, groups);
-		*ngrpp = ngrp;
-	}
+	groups_normalize(&ngrp, groups);
+	*ngrpp = ngrp;
+
 	newcred = crget();
 	crextend(newcred, ngrp);
 	PROC_LOCK(p);
@@ -1289,15 +1327,7 @@ kern_setgroups(struct thread *td, int *ngrpp, gid_t *groups)
 	if (error)
 		goto fail;
 
-	/*
-	 * If some groups were passed, the first one is currently the desired
-	 * egid.  This code is to be removed (along with some commented block
-	 * above) when setgroups() is changed to take only supplementary groups.
-	 */
-	if (ngrp != 0)
-		newcred->cr_gid = egid;
 	crsetgroups_internal(newcred, ngrp, groups);
-
 	setsugid(p);
 	proc_set_cred(p, newcred);
 	PROC_UNLOCK(p);
@@ -1914,6 +1944,38 @@ cr_canseejailproc(struct ucred *u1, struct ucred *u2)
 }
 
 /*
+ * Determine if u1 can tamper with the subject specified by u2, if they are in
+ * different jails and 'unprivileged_parent_tampering' jail policy allows it.
+ *
+ * May be called if u1 and u2 are in the same jail, but it is expected that the
+ * caller has already done a prison_check() prior to calling it.
+ *
+ * Returns: 0 for permitted, EPERM otherwise
+ */
+static int
+cr_can_tamper_with_subjail(struct ucred *u1, struct ucred *u2, int priv)
+{
+
+	MPASS(prison_check(u1, u2) == 0);
+	if (u1->cr_prison == u2->cr_prison)
+		return (0);
+
+	if (priv_check_cred(u1, priv) == 0)
+		return (0);
+
+	/*
+	 * Jails do not maintain a distinct UID space, so process visibility is
+	 * all that would control an unprivileged process' ability to tamper
+	 * with a process in a subjail by default if we did not have the
+	 * allow.unprivileged_parent_tampering knob to restrict it by default.
+	 */
+	if (prison_allow(u2, PR_ALLOW_UNPRIV_PARENT_TAMPER))
+		return (0);
+
+	return (EPERM);
+}
+
+/*
  * Helper for cr_cansee*() functions to abide by system-wide security.bsd.see_*
  * policies.  Determines if u1 "can see" u2 according to these policies.
  * Returns: 0 for permitted, ESRCH otherwise
@@ -2062,6 +2124,19 @@ cr_cansignal(struct ucred *cred, struct proc *proc, int signum)
 			return (error);
 	}
 
+	/*
+	 * At this point, the target may be in a different jail than the
+	 * subject -- the subject must be in a parent jail to the target,
+	 * whether it is prison0 or a subordinate of prison0 that has
+	 * children.  Additional privileges are required to allow this, as
+	 * whether the creds are truly equivalent or not must be determined on
+	 * a case-by-case basis.
+	 */
+	error = cr_can_tamper_with_subjail(cred, proc->p_ucred,
+	    PRIV_SIGNAL_DIFFJAIL);
+	if (error)
+		return (error);
+
 	return (0);
 }
 
@@ -2138,6 +2213,12 @@ p_cansched(struct thread *td, struct proc *p)
 		if (error)
 			return (error);
 	}
+
+	error = cr_can_tamper_with_subjail(td->td_ucred, p->p_ucred,
+	    PRIV_SCHED_DIFFJAIL);
+	if (error)
+		return (error);
+
 	return (0);
 }
 
@@ -2258,6 +2339,11 @@ p_candebug(struct thread *td, struct proc *p)
 			return (error);
 	}
 
+	error = cr_can_tamper_with_subjail(td->td_ucred, p->p_ucred,
+	    PRIV_DEBUG_DIFFJAIL);
+	if (error)
+		return (error);
+
 	/* Can't trace init when securelevel > 0. */
 	if (p == initproc) {
 		error = securelevel_gt(td->td_ucred, 0);
@@ -2835,7 +2921,8 @@ crextend(struct ucred *cr, int n)
  * Normalizes a set of groups to be applied to a 'struct ucred'.
  *
  * Normalization ensures that the supplementary groups are sorted in ascending
- * order and do not contain duplicates.
+ * order and do not contain duplicates.  This allows group_is_supplementary
+ * to do a binary search.
  */
 static void
 groups_normalize(int *ngrp, gid_t *groups)
diff --git a/sys/kern/kern_racct.c b/sys/kern/kern_racct.c
index 7ee3b9e2048a..7351e9cb6313 100644
--- a/sys/kern/kern_racct.c
+++ b/sys/kern/kern_racct.c
@@ -96,6 +96,13 @@ static void racct_sub_cred_locked(struct ucred *cred, int resource,
 		uint64_t amount);
 static void racct_add_cred_locked(struct ucred *cred, int resource,
 		uint64_t amount);
+static int racct_set_locked(struct proc *p, int resource, uint64_t amount,
+                int force);
+static void racct_updatepcpu_locked(struct proc *p);
+static void racct_updatepcpu_racct_locked(struct racct *racct);
+static void racct_updatepcpu_containers(void);
+static void racct_settime_locked(struct proc *p, bool exit);
+static void racct_zeropcpu_locked(struct proc *p);
 
 SDT_PROVIDER_DEFINE(racct);
 SDT_PROBE_DEFINE3(racct, , rusage, add,
@@ -308,68 +315,6 @@ fixpt_t ccpu_exp[] = {
 
 #define	CCPU_EXP_MAX	110
 
-/*
- * This function is analogical to the getpcpu() function in the ps(1) command.
- * They should both calculate in the same way so that the racct %cpu
- * calculations are consistent with the values shown by the ps(1) tool.
- * The calculations are more complex in the 4BSD scheduler because of the value
- * of the ccpu variable.  In ULE it is defined to be zero which saves us some
- * work.
- */
-static uint64_t
-racct_getpcpu(struct proc *p, u_int pcpu)
-{
-	u_int swtime;
-#ifdef SCHED_4BSD
-	fixpt_t pctcpu, pctcpu_next;
-#endif
-	fixpt_t p_pctcpu;
-	struct thread *td;
-
-	ASSERT_RACCT_ENABLED();
-	KASSERT((p->p_flag & P_IDLEPROC) == 0,
-	    ("racct_getpcpu: idle process %p", p));
-
-	swtime = (ticks - p->p_swtick) / hz;
-
-	/*
-	 * For short-lived processes, the sched_pctcpu() returns small
-	 * values even for cpu intensive processes.  Therefore we use
-	 * our own estimate in this case.
-	 */
-	if (swtime < RACCT_PCPU_SECS)
-		return (pcpu);
-
-	p_pctcpu = 0;
-	FOREACH_THREAD_IN_PROC(p, td) {
-		thread_lock(td);
-#ifdef SCHED_4BSD
-		pctcpu = sched_pctcpu(td);
-		/* Count also the yet unfinished second. */
-		pctcpu_next = (pctcpu * ccpu_exp[1]) >> FSHIFT;
-		pctcpu_next += sched_pctcpu_delta(td);
-		p_pctcpu += max(pctcpu, pctcpu_next);
-#else
-		/*
-		 * In ULE the %cpu statistics are updated on every
-		 * sched_pctcpu() call.  So special calculations to
-		 * account for the latest (unfinished) second are
-		 * not needed.
-		 */
-		p_pctcpu += sched_pctcpu(td);
-#endif
-		thread_unlock(td);
-	}
-
-#ifdef SCHED_4BSD
-	if (swtime <= CCPU_EXP_MAX)
-		return ((100 * (uint64_t)p_pctcpu * 1000000) /
-		    (FSCALE - ccpu_exp[swtime]));
-#endif
-
-	return ((100 * (uint64_t)p_pctcpu * 1000000) / FSCALE);
-}
-
 static void
 racct_add_racct(struct racct *dest, const struct racct *src)
 {
@@ -499,19 +444,6 @@ racct_adjust_resource(struct racct *racct, int resource,
 		    ("%s: resource %d usage < 0", __func__, resource));
 		racct->r_resources[resource] = 0;
 	}
-
-	/*
-	 * There are some cases where the racct %cpu resource would grow
-	 * beyond 100% per core.  For example in racct_proc_exit() we add
-	 * the process %cpu usage to the ucred racct containers.  If too
-	 * many processes terminated in a short time span, the ucred %cpu
-	 * resource could grow too much.  Also, the 4BSD scheduler sometimes
-	 * returns for a thread more than 100% cpu usage. So we set a sane
-	 * boundary here to 100% * the maximum number of CPUs.
-	 */
-	if ((resource == RACCT_PCTCPU) &&
-	    (racct->r_resources[RACCT_PCTCPU] > 100 * 1000000 * (int64_t)MAXCPU))
-		racct->r_resources[RACCT_PCTCPU] = 100 * 1000000 * (int64_t)MAXCPU;
 }
 
 static int
@@ -635,10 +567,44 @@ racct_add_buf(struct proc *p, const struct buf *bp, int is_write)
 	RACCT_UNLOCK();
 }
 
+static void
+racct_settime_locked(struct proc *p, bool exit)
+{
+	struct thread *td;
+	struct timeval wallclock;
+	uint64_t runtime;
+
+	ASSERT_RACCT_ENABLED();
+	RACCT_LOCK_ASSERT();
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+
+	if (exit) {
+		/*
+		 * proc_reap() has already calculated rux
+		 * and added crux to rux.
+		 */
+		runtime = cputick2usec(p->p_rux.rux_runtime -
+		    p->p_crux.rux_runtime);
+	} else {
+		PROC_STATLOCK(p);
+		FOREACH_THREAD_IN_PROC(p, td)
+			ruxagg(p, td);
+		PROC_STATUNLOCK(p);
+		runtime = cputick2usec(p->p_rux.rux_runtime);
+	}
+	microuptime(&wallclock);
+	timevalsub(&wallclock, &p->p_stats->p_start);
+
+	racct_set_locked(p, RACCT_CPU, runtime, 0);
+	racct_set_locked(p, RACCT_WALLCLOCK,
+	    (uint64_t)wallclock.tv_sec * 1000000 +
+	    wallclock.tv_usec, 0);
+}
+
 static int
 racct_set_locked(struct proc *p, int resource, uint64_t amount, int force)
 {
-	int64_t old_amount, decayed_amount, diff_proc, diff_cred;
+	int64_t old_amount, diff_proc, diff_cred;
 #ifdef RCTL
 	int error;
 #endif
@@ -655,17 +621,7 @@ racct_set_locked(struct proc *p, int resource, uint64_t amount, int force)
 	 * The diffs may be negative.
 	 */
 	diff_proc = amount - old_amount;
-	if (resource == RACCT_PCTCPU) {
-		/*
-		 * Resources in per-credential racct containers may decay.
-		 * If this is the case, we need to calculate the difference
-		 * between the new amount and the proportional value of the
-		 * old amount that has decayed in the ucred racct containers.
-		 */
-		decayed_amount = old_amount * RACCT_DECAY_FACTOR / FSCALE;
-		diff_cred = amount - decayed_amount;
-	} else
-		diff_cred = diff_proc;
+	diff_cred = diff_proc;
 #ifdef notyet
 	KASSERT(diff_proc >= 0 || RACCT_CAN_DROP(resource),
 	    ("%s: usage of non-droppable resource %d dropping", __func__,
@@ -908,8 +864,6 @@ racct_proc_fork(struct proc *parent, struct proc *child)
 		goto out;
 #endif
 
-	/* Init process cpu time. */
-	child->p_prev_runtime = 0;
 	child->p_throttled = 0;
 
 	/*
@@ -964,37 +918,16 @@ racct_proc_fork_done(struct proc *child)
 void
 racct_proc_exit(struct proc *p)
 {
-	struct timeval wallclock;
-	uint64_t pct_estimate, pct, runtime;
 	int i;
 
 	if (!racct_enable)
 		return;
 
 	PROC_LOCK(p);
-	/*
-	 * We don't need to calculate rux, proc_reap() has already done this.
-	 */
-	runtime = cputick2usec(p->p_rux.rux_runtime);
-#ifdef notyet
-	KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime"));
-#else
-	if (runtime < p->p_prev_runtime)
-		runtime = p->p_prev_runtime;
-#endif
-	microuptime(&wallclock);
-	timevalsub(&wallclock, &p->p_stats->p_start);
-	if (wallclock.tv_sec > 0 || wallclock.tv_usec > 0) {
-		pct_estimate = (1000000 * runtime * 100) /
-		    ((uint64_t)wallclock.tv_sec * 1000000 +
-		    wallclock.tv_usec);
-	} else
-		pct_estimate = 0;
-	pct = racct_getpcpu(p, pct_estimate);
-
 	RACCT_LOCK();
-	racct_set_locked(p, RACCT_CPU, runtime, 0);
-	racct_add_cred_locked(p->p_ucred, RACCT_PCTCPU, pct);
+
+	racct_settime_locked(p, true);
+	racct_zeropcpu_locked(p);
 
 	KASSERT(p->p_racct->r_resources[RACCT_RSS] == 0,
 	    ("process reaped with %ju allocated for RSS\n",
@@ -1068,6 +1001,10 @@ racct_move(struct racct *dest, struct racct *src)
 	RACCT_LOCK();
 	racct_add_racct(dest, src);
 	racct_sub_racct(src, src);
+	dest->r_runtime = src->r_runtime;
+	dest->r_time = src->r_time;
+	src->r_runtime = 0;
+	timevalsub(&src->r_time, &src->r_time);
 	RACCT_UNLOCK();
 }
 
@@ -1170,8 +1107,6 @@ racct_proc_wakeup(struct proc *p)
 static void
 racct_decay_callback(struct racct *racct, void *dummy1, void *dummy2)
 {
-	int64_t r_old, r_new;
-
 	ASSERT_RACCT_ENABLED();
 	RACCT_LOCK_ASSERT();
 
@@ -1181,15 +1116,6 @@ racct_decay_callback(struct racct *racct, void *dummy1, void *dummy2)
 	rctl_throttle_decay(racct, RACCT_READIOPS);
 	rctl_throttle_decay(racct, RACCT_WRITEIOPS);
 #endif
-
-	r_old = racct->r_resources[RACCT_PCTCPU];
-
-	/* If there is nothing to decay, just exit. */
-	if (r_old <= 0)
-		return;
-
-	r_new = r_old * RACCT_DECAY_FACTOR / FSCALE;
-	racct->r_resources[RACCT_PCTCPU] = r_new;
 }
 
 static void
@@ -1221,15 +1147,105 @@ racct_decay(void)
 }
 
 static void
+racct_updatepcpu_racct_locked(struct racct *racct)
+{
+	struct timeval diff;
+	uint64_t elapsed;
+	uint64_t runtime;
+	uint64_t newpcpu;
+	uint64_t oldpcpu;
+
+	ASSERT_RACCT_ENABLED();
+	RACCT_LOCK_ASSERT();
+
+	/* Difference between now and previously-recorded time. */
+	microuptime(&diff);
+	timevalsub(&diff, &racct->r_time);
+	elapsed = (uint64_t)diff.tv_sec * 1000000 + diff.tv_usec;
+
+	/* Difference between current and previously-recorded runtime. */
+	runtime = racct->r_resources[RACCT_CPU] - racct->r_runtime;
+
+	newpcpu = runtime * 100 * 1000000 / elapsed;
+	oldpcpu = racct->r_resources[RACCT_PCTCPU];
+	/*
+	 * This calculation is equivalent to
+	 *    (1 - 0.3) * newpcpu + 0.3 * oldpcpu
+	 * where RACCT_DECAY_FACTOR = 0.3 * FSCALE.
+	 */
+	racct->r_resources[RACCT_PCTCPU] = ((FSCALE - RACCT_DECAY_FACTOR) *
+	    newpcpu + RACCT_DECAY_FACTOR * oldpcpu) / FSCALE;
+	if (racct->r_resources[RACCT_PCTCPU] >
+	    100 * 1000000 * (uint64_t)mp_ncpus)
+		racct->r_resources[RACCT_PCTCPU] = 100 * 1000000 *
+		    (uint64_t)mp_ncpus;
+
+	/* Record current times. */
+	racct->r_runtime = racct->r_resources[RACCT_CPU];
+	timevaladd(&racct->r_time, &diff);
+}
+
+static void
+racct_zeropcpu_locked(struct proc *p)
+{
+	ASSERT_RACCT_ENABLED();
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+
+	p->p_racct->r_resources[RACCT_PCTCPU] = 0;
+}
+
+static void
+racct_updatepcpu_locked(struct proc *p)
+{
+	ASSERT_RACCT_ENABLED();
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+
+	racct_updatepcpu_racct_locked(p->p_racct);
+}
+
+static void
+racct_updatepcpu_pre(void)
+{
+
+	RACCT_LOCK();
+}
+
+static void
+racct_updatepcpu_post(void)
+{
+
+	RACCT_UNLOCK();
+}
+
+static void
+racct_updatepcpu_racct_callback(struct racct *racct, void *dummy1, void *dummy2)
+{
+	racct_updatepcpu_racct_locked(racct);
+}
+
+static void
+racct_updatepcpu_containers(void)
+{
+	ASSERT_RACCT_ENABLED();
+
+	ui_racct_foreach(racct_updatepcpu_racct_callback, racct_updatepcpu_pre,
+	    racct_updatepcpu_post, NULL, NULL);
+	loginclass_racct_foreach(racct_updatepcpu_racct_callback, racct_updatepcpu_pre,
+	    racct_updatepcpu_post, NULL, NULL);
+	prison_racct_foreach(racct_updatepcpu_racct_callback, racct_updatepcpu_pre,
+	    racct_updatepcpu_post, NULL, NULL);
+}
+
+static void
 racctd(void)
 {
-	struct thread *td;
 	struct proc *p;
-	struct timeval wallclock;
-	uint64_t pct, pct_estimate, runtime;
+	struct proc *idle;
 
 	ASSERT_RACCT_ENABLED();
 
+	idle = STAILQ_FIRST(&cpuhead)->pc_idlethread->td_proc;
+
 	for (;;) {
 		racct_decay();
 
@@ -1237,36 +1253,16 @@ racctd(void)
 
 		FOREACH_PROC_IN_SYSTEM(p) {
 			PROC_LOCK(p);
+			if (p == idle) {
+				PROC_UNLOCK(p);
+				continue;
+			}
 			if (p->p_state != PRS_NORMAL ||
 			    (p->p_flag & P_IDLEPROC) != 0) {
-				if (p->p_state == PRS_ZOMBIE)
-					racct_set(p, RACCT_PCTCPU, 0);
 				PROC_UNLOCK(p);
 				continue;
 			}
 
-			microuptime(&wallclock);
-			timevalsub(&wallclock, &p->p_stats->p_start);
-			PROC_STATLOCK(p);
-			FOREACH_THREAD_IN_PROC(p, td)
-				ruxagg(p, td);
-			runtime = cputick2usec(p->p_rux.rux_runtime);
-			PROC_STATUNLOCK(p);
-#ifdef notyet
-			KASSERT(runtime >= p->p_prev_runtime,
-			    ("runtime < p_prev_runtime"));
-#else
-			if (runtime < p->p_prev_runtime)
-				runtime = p->p_prev_runtime;
-#endif
-			p->p_prev_runtime = runtime;
-			if (wallclock.tv_sec > 0 || wallclock.tv_usec > 0) {
-				pct_estimate = (1000000 * runtime * 100) /
-				    ((uint64_t)wallclock.tv_sec * 1000000 +
-				    wallclock.tv_usec);
-			} else
-				pct_estimate = 0;
-			pct = racct_getpcpu(p, pct_estimate);
 			RACCT_LOCK();
 #ifdef RCTL
 			rctl_throttle_decay(p->p_racct, RACCT_READBPS);
@@ -1274,11 +1270,8 @@ racctd(void)
 			rctl_throttle_decay(p->p_racct, RACCT_READIOPS);
 			rctl_throttle_decay(p->p_racct, RACCT_WRITEIOPS);
 #endif
-			racct_set_locked(p, RACCT_PCTCPU, pct, 1);
-			racct_set_locked(p, RACCT_CPU, runtime, 0);
-			racct_set_locked(p, RACCT_WALLCLOCK,
-			    (uint64_t)wallclock.tv_sec * 1000000 +
-			    wallclock.tv_usec, 0);
+			racct_settime_locked(p, false);
+			racct_updatepcpu_locked(p);
 			RACCT_UNLOCK();
 			PROC_UNLOCK(p);
 		}
@@ -1306,6 +1299,8 @@ racctd(void)
 			PROC_UNLOCK(p);
 		}
 		sx_sunlock(&allproc_lock);
+
+		racct_updatepcpu_containers();
 		pause("-", hz);
 	}
 }
diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c
index 9830e5093a3a..2a6f0989f6aa 100644
--- a/sys/kern/kern_time.c
+++ b/sys/kern/kern_time.c
@@ -571,7 +571,10 @@ kern_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags,
 				td->td_rtcgen =
 				    atomic_load_acq_int(&rtc_generation);
 			error = kern_clock_gettime(td, clock_id, &now);
-			KASSERT(error == 0, ("kern_clock_gettime: %d", error));
+			if (error != 0) {
+				td->td_rtcgen = 0;
+				return (error);
+			}
 			timespecsub(&ts, &now, &ts);
 		}
 		if (ts.tv_sec < 0 || (ts.tv_sec == 0 && ts.tv_nsec == 0)) {
diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c
index ab47b6ad29a3..a65c3ca128d9 100644
--- a/sys/kern/subr_witness.c
+++ b/sys/kern/subr_witness.c
@@ -57,7 +57,7 @@
  *	  b : public affirmation by word or example of usually
  *	      religious faith or conviction <the heroic witness to divine
  *	      life -- Pilot>
- *	6 capitalized : a member of the Jehovah's Witnesses 
+ *	6 capitalized : a member of the Jehovah's Witnesses
  */
 
 /*
@@ -131,7 +131,7 @@
 #define	LI_SLEEPABLE	0x00040000	/* Lock may be held while sleeping. */
 
 #ifndef WITNESS_COUNT
-#define	WITNESS_COUNT 		1536
+#define	WITNESS_COUNT		1536
 #endif
 #define	WITNESS_HASH_SIZE	251	/* Prime, gives load factor < 2 */
 #define	WITNESS_PENDLIST	(512 + (MAXCPU * 4))
@@ -158,20 +158,18 @@
  * These flags go in the witness relationship matrix and describe the
  * relationship between any two struct witness objects.
  */
-#define	WITNESS_UNRELATED        0x00    /* No lock order relation. */
-#define	WITNESS_PARENT           0x01    /* Parent, aka direct ancestor. */
-#define	WITNESS_ANCESTOR         0x02    /* Direct or indirect ancestor. */
-#define	WITNESS_CHILD            0x04    /* Child, aka direct descendant. */
-#define	WITNESS_DESCENDANT       0x08    /* Direct or indirect descendant. */
-#define	WITNESS_ANCESTOR_MASK    (WITNESS_PARENT | WITNESS_ANCESTOR)
-#define	WITNESS_DESCENDANT_MASK  (WITNESS_CHILD | WITNESS_DESCENDANT)
-#define	WITNESS_RELATED_MASK						\
-	(WITNESS_ANCESTOR_MASK | WITNESS_DESCENDANT_MASK)
-#define	WITNESS_REVERSAL         0x10    /* A lock order reversal has been
-					  * observed. */
-#define	WITNESS_RESERVED1        0x20    /* Unused flag, reserved. */
-#define	WITNESS_RESERVED2        0x40    /* Unused flag, reserved. */
-#define	WITNESS_LOCK_ORDER_KNOWN 0x80    /* This lock order is known. */
+#define	WITNESS_UNRELATED	0x00	/* No lock order relation. */
+#define	WITNESS_PARENT		0x01	/* Parent, aka direct ancestor. */
+#define	WITNESS_ANCESTOR	0x02	/* Direct or indirect ancestor. */
+#define	WITNESS_CHILD		0x04	/* Child, aka direct descendant. */
+#define	WITNESS_DESCENDANT	0x08	/* Direct or indirect descendant. */
+#define	WITNESS_ANCESTOR_MASK	(WITNESS_PARENT | WITNESS_ANCESTOR)
+#define	WITNESS_DESCENDANT_MASK	(WITNESS_CHILD | WITNESS_DESCENDANT)
+#define	WITNESS_RELATED_MASK	(WITNESS_ANCESTOR_MASK | WITNESS_DESCENDANT_MASK)
+#define	WITNESS_REVERSAL	0x10	/* A lock order reversal has been observed. */
+#define	WITNESS_RESERVED1	0x20	/* Unused flag, reserved. */
+#define	WITNESS_RESERVED2	0x40	/* Unused flag, reserved. */
+#define	WITNESS_LOCK_ORDER_KNOWN 0x80	/* This lock order is known. */
 
 /* Descendant to ancestor flags */
 #define	WITNESS_DTOA(x)	(((x) & WITNESS_RELATED_MASK) >> 2)
@@ -218,20 +216,18 @@ struct lock_list_entry {
  * (for example, "vnode interlock").
  */
 struct witness {
-	char  			w_name[MAX_W_NAME];
-	uint32_t 		w_index;  /* Index in the relationship matrix */
+	char			w_name[MAX_W_NAME];
+	uint32_t		w_index;	/* Index in the relationship matrix */
 	struct lock_class	*w_class;
-	STAILQ_ENTRY(witness) 	w_list;		/* List of all witnesses. */
-	STAILQ_ENTRY(witness) 	w_typelist;	/* Witnesses of a type. */
-	struct witness		*w_hash_next; /* Linked list in hash buckets. */
-	const char		*w_file; /* File where last acquired */
-	uint32_t 		w_line; /* Line where last acquired */
-	uint32_t 		w_refcount;
-	uint16_t 		w_num_ancestors; /* direct/indirect
-						  * ancestor count */
-	uint16_t 		w_num_descendants; /* direct/indirect
-						    * descendant count */
-	int16_t 		w_ddb_level;
+	STAILQ_ENTRY(witness)	w_list;		/* List of all witnesses. */
+	STAILQ_ENTRY(witness)	w_typelist;	/* Witnesses of a type. */
+	struct witness		*w_hash_next;	/* Linked list in hash buckets. */
+	const char		*w_file;	/* File where last acquired */
+	uint32_t		w_line;		/* Line where last acquired */
+	uint32_t		w_refcount;
+	uint16_t		w_num_ancestors;   /* direct/indirect ancestor count */
+	uint16_t		w_num_descendants; /* direct/indirect descendant count */
+	int16_t			w_ddb_level;
 	unsigned		w_displayed:1;
 	unsigned		w_reversed:1;
 };
@@ -265,7 +261,7 @@ struct witness_lock_order_data {
 /*
  * The witness lock order data hash table. Keys are witness index tuples
  * (struct witness_lock_order_key), elements are lock order data objects
- * (struct witness_lock_order_data). 
+ * (struct witness_lock_order_data).
  */
 struct witness_lock_order_hash {
 	struct witness_lock_order_data	*wloh_array[WITNESS_LO_HASH_SIZE];
@@ -295,7 +291,6 @@ struct witness_order_list_entry {
 static __inline int
 witness_lock_type_equal(struct witness *w1, struct witness *w2)
 {
-
 	return ((w1->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)) ==
 		(w2->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)));
 }
@@ -304,7 +299,6 @@ static __inline int
 witness_lock_order_key_equal(const struct witness_lock_order_key *a,
     const struct witness_lock_order_key *b)
 {
-
 	return (a->from == b->from && a->to == b->to);
 }
 
@@ -415,7 +409,7 @@ SYSCTL_INT(_debug_witness, OID_AUTO, skipspin, CTLFLAG_RDTUN, &witness_skipspin,
 int badstack_sbuf_size;
 
 int witness_count = WITNESS_COUNT;
-SYSCTL_INT(_debug_witness, OID_AUTO, witness_count, CTLFLAG_RDTUN, 
+SYSCTL_INT(_debug_witness, OID_AUTO, witness_count, CTLFLAG_RDTUN,
     &witness_count, 0, "");
 
 /*
@@ -760,7 +754,6 @@ static int witness_spin_warn = 0;
 static const char *
 fixup_filename(const char *file)
 {
-
 	if (file == NULL)
 		return (NULL);
 	while (strncmp(file, "../", 3) == 0)
@@ -835,7 +828,7 @@ witness_startup(void *mem)
 	w_free_cnt--;
 
 	for (i = 0; i < witness_count; i++) {
-		memset(w_rmatrix[i], 0, sizeof(*w_rmatrix[i]) * 
+		memset(w_rmatrix[i], 0, sizeof(*w_rmatrix[i]) *
 		    (witness_count + 1));
 	}
 
@@ -989,16 +982,16 @@ witness_ddb_display_descendants(int(*prnt)(const char *fmt, ...),
 {
 	int i;
 
- 	for (i = 0; i < indent; i++)
- 		prnt(" ");
+	for (i = 0; i < indent; i++)
+		prnt(" ");
 	prnt("%s (type: %s, depth: %d, active refs: %d)",
 	     w->w_name, w->w_class->lc_name,
 	     w->w_ddb_level, w->w_refcount);
- 	if (w->w_displayed) {
- 		prnt(" -- (already displayed)\n");
- 		return;
- 	}
- 	w->w_displayed = 1;
+	if (w->w_displayed) {
+		prnt(" -- (already displayed)\n");
+		return;
+	}
+	w->w_displayed = 1;
 	if (w->w_file != NULL && w->w_line != 0)
 		prnt(" -- last acquired @ %s:%d\n", fixup_filename(w->w_file),
 		    w->w_line);
@@ -1079,7 +1072,6 @@ witness_ddb_display(int(*prnt)(const char *fmt, ...))
 int
 witness_defineorder(struct lock_object *lock1, struct lock_object *lock2)
 {
-
 	if (witness_watch == -1 || KERNEL_PANICKED())
 		return (0);
 
@@ -1257,7 +1249,7 @@ witness_checkorder(struct lock_object *lock, int flags, const char *file,
 			w->w_reversed = 1;
 			mtx_unlock_spin(&w_mtx);
 			witness_output(
-			    "acquiring duplicate lock of same type: \"%s\"\n", 
+			    "acquiring duplicate lock of same type: \"%s\"\n",
 			    w->w_name);
 			witness_output(" 1st %s @ %s:%d\n", plock->li_lock->lo_name,
 			    fixup_filename(plock->li_file), plock->li_line);
@@ -1743,7 +1735,7 @@ found:
 
 	/*
 	 * In order to reduce contention on w_mtx, we want to keep always an
-	 * head object into lists so that frequent allocation from the 
+	 * head object into lists so that frequent allocation from the
 	 * free witness pool (and subsequent locking) is avoided.
 	 * In order to maintain the current code simple, when the head
 	 * object is totally unloaded it means also that we do not have
@@ -1781,7 +1773,7 @@ witness_thread_exit(struct thread *td)
 				n++;
 				witness_list_lock(&lle->ll_children[i],
 				    witness_output);
-				
+
 			}
 		kassert_panic(
 		    "Thread %p cannot exit while holding sleeplocks\n", td);
@@ -1948,7 +1940,6 @@ found:
 static void
 depart(struct witness *w)
 {
-
 	MPASS(w->w_refcount == 0);
 	if (w->w_class->lc_flags & LC_SLEEPLOCK) {
 		w_sleep_cnt--;
@@ -1999,18 +1990,18 @@ adopt(struct witness *parent, struct witness *child)
 		child->w_num_ancestors++;
 	}
 
-	/* 
-	 * Find each ancestor of 'pi'. Note that 'pi' itself is counted as 
+	/*
+	 * Find each ancestor of 'pi'. Note that 'pi' itself is counted as
 	 * an ancestor of 'pi' during this loop.
 	 */
 	for (i = 1; i <= w_max_used_index; i++) {
-		if ((w_rmatrix[i][pi] & WITNESS_ANCESTOR_MASK) == 0 && 
+		if ((w_rmatrix[i][pi] & WITNESS_ANCESTOR_MASK) == 0 &&
 		    (i != pi))
 			continue;
 
 		/* Find each descendant of 'i' and mark it as a descendant. */
 		for (j = 1; j <= w_max_used_index; j++) {
-			/* 
+			/*
 			 * Skip children that are already marked as
 			 * descendants of 'i'.
 			 */
@@ -2021,7 +2012,7 @@ adopt(struct witness *parent, struct witness *child)
 			 * We are only interested in descendants of 'ci'. Note
 			 * that 'ci' itself is counted as a descendant of 'ci'.
 			 */
-			if ((w_rmatrix[ci][j] & WITNESS_ANCESTOR_MASK) == 0 && 
+			if ((w_rmatrix[ci][j] & WITNESS_ANCESTOR_MASK) == 0 &&
 			    (j != ci))
 				continue;
 			w_rmatrix[i][j] |= WITNESS_ANCESTOR;
@@ -2029,16 +2020,16 @@ adopt(struct witness *parent, struct witness *child)
 			w_data[i].w_num_descendants++;
 			w_data[j].w_num_ancestors++;
 
-			/* 
+			/*
 			 * Make sure we aren't marking a node as both an
-			 * ancestor and descendant. We should have caught 
+			 * ancestor and descendant. We should have caught
 			 * this as a lock order reversal earlier.
 			 */
 			if ((w_rmatrix[i][j] & WITNESS_ANCESTOR_MASK) &&
 			    (w_rmatrix[i][j] & WITNESS_DESCENDANT_MASK)) {
 				printf("witness rmatrix paradox! [%d][%d]=%d "
 				    "both ancestor and descendant\n",
-				    i, j, w_rmatrix[i][j]); 
+				    i, j, w_rmatrix[i][j]);
 				kdb_backtrace();
 				printf("Witness disabled.\n");
 				witness_watch = -1;
@@ -2047,7 +2038,7 @@ adopt(struct witness *parent, struct witness *child)
 			    (w_rmatrix[j][i] & WITNESS_DESCENDANT_MASK)) {
 				printf("witness rmatrix paradox! [%d][%d]=%d "
 				    "both ancestor and descendant\n",
-				    j, i, w_rmatrix[j][i]); 
+				    j, i, w_rmatrix[j][i]);
 				kdb_backtrace();
 				printf("Witness disabled.\n");
 				witness_watch = -1;
@@ -2124,7 +2115,6 @@ _isitmyx(struct witness *w1, struct witness *w2, int rmask, const char *fname)
 static int
 isitmychild(struct witness *parent, struct witness *child)
 {
-
 	return (_isitmyx(parent, child, WITNESS_PARENT, __func__));
 }
 
@@ -2134,7 +2124,6 @@ isitmychild(struct witness *parent, struct witness *child)
 static int
 isitmydescendant(struct witness *ancestor, struct witness *descendant)
 {
-
 	return (_isitmyx(ancestor, descendant, WITNESS_ANCESTOR_MASK,
 	    __func__));
 }
@@ -2182,7 +2171,7 @@ witness_get(void)
 	STAILQ_REMOVE_HEAD(&w_free, w_list);
 	w_free_cnt--;
 	index = w->w_index;
-	MPASS(index > 0 && index == w_max_used_index+1 &&
+	MPASS(index > 0 && index == w_max_used_index + 1 &&
 	    index < witness_count);
 	bzero(w, sizeof(*w));
 	w->w_index = index;
@@ -2194,7 +2183,6 @@ witness_get(void)
 static void
 witness_free(struct witness *w)
 {
-
 	STAILQ_INSERT_HEAD(&w_free, w, w_list);
 	w_free_cnt++;
 }
@@ -2219,11 +2207,10 @@ witness_lock_list_get(void)
 	bzero(lle, sizeof(*lle));
 	return (lle);
 }
-		
+
 static void
 witness_lock_list_free(struct lock_list_entry *lle)
 {
-
 	mtx_lock_spin(&w_mtx);
 	lle->ll_next = w_lock_list_free;
 	w_lock_list_free = lle;
@@ -2297,7 +2284,6 @@ witness_voutput(const char *fmt, va_list ap)
 static int
 witness_thread_has_locks(struct thread *td)
 {
-
 	if (td->td_sleeplocks == NULL)
 		return (0);
 	return (td->td_sleeplocks->ll_count != 0);
@@ -2573,14 +2559,12 @@ witness_setflag(struct lock_object *lock, int flag, int set)
 void
 witness_norelease(struct lock_object *lock)
 {
-
 	witness_setflag(lock, LI_NORELEASE, 1);
 }
 
 void
 witness_releaseok(struct lock_object *lock)
 {
-
 	witness_setflag(lock, LI_NORELEASE, 0);
 }
 
@@ -2588,7 +2572,6 @@ witness_releaseok(struct lock_object *lock)
 static void
 witness_ddb_list(struct thread *td)
 {
-
 	KASSERT(witness_cold == 0, ("%s: witness_cold", __func__));
 	KASSERT(kdb_active, ("%s: not in the debugger", __func__));
 
@@ -2653,7 +2636,6 @@ DB_SHOW_ALIAS_FLAGS(alllocks, db_witness_list_all, DB_CMD_MEMSAFE);
 
 DB_SHOW_COMMAND_FLAGS(witness, db_witness_display, DB_CMD_MEMSAFE)
 {
-
 	witness_ddb_display(db_printf);
 }
 #endif
@@ -2673,9 +2655,9 @@ sbuf_print_witness_badstacks(struct sbuf *sb, size_t *oldidx)
 	/* Allocate and init temporary storage space. */
 	tmp_w1 = malloc(sizeof(struct witness), M_TEMP, M_WAITOK | M_ZERO);
 	tmp_w2 = malloc(sizeof(struct witness), M_TEMP, M_WAITOK | M_ZERO);
-	tmp_data1 = malloc(sizeof(struct witness_lock_order_data), M_TEMP, 
+	tmp_data1 = malloc(sizeof(struct witness_lock_order_data), M_TEMP,
 	    M_WAITOK | M_ZERO);
-	tmp_data2 = malloc(sizeof(struct witness_lock_order_data), M_TEMP, 
+	tmp_data2 = malloc(sizeof(struct witness_lock_order_data), M_TEMP,
 	    M_WAITOK | M_ZERO);
 	stack_zero(&tmp_data1->wlod_stack);
 	stack_zero(&tmp_data2->wlod_stack);
@@ -2750,12 +2732,12 @@ restart:
 
 			sbuf_printf(sb,
 	    "\nLock order reversal between \"%s\"(%s) and \"%s\"(%s)!\n",
-			    tmp_w1->w_name, tmp_w1->w_class->lc_name, 
+			    tmp_w1->w_name, tmp_w1->w_class->lc_name,
 			    tmp_w2->w_name, tmp_w2->w_class->lc_name);
 			if (data1) {
 				sbuf_printf(sb,
 			"Lock order \"%s\"(%s) -> \"%s\"(%s) first seen at:\n",
-				    tmp_w1->w_name, tmp_w1->w_class->lc_name, 
+				    tmp_w1->w_name, tmp_w1->w_class->lc_name,
 				    tmp_w2->w_name, tmp_w2->w_class->lc_name);
 				stack_sbuf_print(sb, &tmp_data1->wlod_stack);
 				sbuf_putc(sb, '\n');
@@ -2763,7 +2745,7 @@ restart:
 			if (data2 && data2 != data1) {
 				sbuf_printf(sb,
 			"Lock order \"%s\"(%s) -> \"%s\"(%s) first seen at:\n",
-				    tmp_w2->w_name, tmp_w2->w_class->lc_name, 
+				    tmp_w2->w_name, tmp_w2->w_class->lc_name,
 				    tmp_w1->w_name, tmp_w1->w_class->lc_name);
 				stack_sbuf_print(sb, &tmp_data2->wlod_stack);
 				sbuf_putc(sb, '\n');
@@ -2823,7 +2805,6 @@ sysctl_debug_witness_badstacks(SYSCTL_HANDLER_ARGS)
 static int
 sbuf_db_printf_drain(void *arg __unused, const char *data, int len)
 {
-
 	return (db_printf("%.*s", len, data));
 }
 
@@ -3068,7 +3049,7 @@ witness_lock_order_get(struct witness *parent, struct witness *child)
 	    & WITNESS_LOCK_ORDER_KNOWN) == 0)
 		goto out;
 
-	hash = witness_hash_djb2((const char*)&key,
+	hash = witness_hash_djb2((const char *)&key,
 	    sizeof(key)) % w_lohash.wloh_size;
 	data = w_lohash.wloh_array[hash];
 	while (data != NULL) {
@@ -3089,7 +3070,6 @@ out:
 static int
 witness_lock_order_check(struct witness *parent, struct witness *child)
 {
-
 	if (parent != child &&
 	    w_rmatrix[parent->w_index][child->w_index]
 	    & WITNESS_LOCK_ORDER_KNOWN &&
@@ -3115,7 +3095,7 @@ witness_lock_order_add(struct witness *parent, struct witness *child)
 	    & WITNESS_LOCK_ORDER_KNOWN)
 		return (1);
 
-	hash = witness_hash_djb2((const char*)&key,
+	hash = witness_hash_djb2((const char *)&key,
 	    sizeof(key)) % w_lohash.wloh_size;
 	w_rmatrix[parent->w_index][child->w_index] |= WITNESS_LOCK_ORDER_KNOWN;
 	data = w_lofree;
@@ -3134,7 +3114,6 @@ witness_lock_order_add(struct witness *parent, struct witness *child)
 static void
 witness_increment_graph_generation(void)
 {
-
 	if (witness_cold == 0)
 		mtx_assert(&w_mtx, MA_OWNED);
 	w_generation++;
@@ -3143,7 +3122,6 @@ witness_increment_graph_generation(void)
 static int
 witness_output_drain(void *arg __unused, const char *data, int len)
 {
-
 	witness_output("%.*s", len, data);
 	return (len);
 }
diff --git a/sys/kern/sys_timerfd.c b/sys/kern/sys_timerfd.c
index ab7e048a2ab1..565ab3ad6ee6 100644
--- a/sys/kern/sys_timerfd.c
+++ b/sys/kern/sys_timerfd.c
@@ -206,7 +206,6 @@ retry:
 			mtx_unlock(&tfd->tfd_lock);
 			return (EAGAIN);
 		}
-		td->td_rtcgen = atomic_load_acq_int(&rtc_generation);
 		error = mtx_sleep(&tfd->tfd_count, &tfd->tfd_lock,
 		    PCATCH, "tfdrd", 0);
 		if (error == 0) {
diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c
index 90a4f3a7dad8..4122f9261871 100644
--- a/sys/kern/syscalls.c
+++ b/sys/kern/syscalls.c
@@ -6,7 +6,7 @@
 
 const char *syscallnames[] = {
 	"syscall",			/* 0 = syscall */
-	"exit",			/* 1 = exit */
+	"_exit",			/* 1 = _exit */
 	"fork",			/* 2 = fork */
 	"read",			/* 3 = read */
 	"write",			/* 4 = write */
@@ -84,8 +84,8 @@ const char *syscallnames[] = {
 	"obs_vhangup",			/* 76 = obsolete vhangup */
 	"obs_vlimit",			/* 77 = obsolete vlimit */
 	"mincore",			/* 78 = mincore */
-	"getgroups",			/* 79 = getgroups */
-	"setgroups",			/* 80 = setgroups */
+	"compat14.getgroups",		/* 79 = freebsd14 getgroups */
+	"compat14.setgroups",		/* 80 = freebsd14 setgroups */
 	"getpgrp",			/* 81 = getpgrp */
 	"setpgid",			/* 82 = setpgid */
 	"setitimer",			/* 83 = setitimer */
@@ -600,4 +600,6 @@ const char *syscallnames[] = {
 	"exterrctl",			/* 592 = exterrctl */
 	"inotify_add_watch_at",			/* 593 = inotify_add_watch_at */
 	"inotify_rm_watch",			/* 594 = inotify_rm_watch */
+	"getgroups",			/* 595 = getgroups */
+	"setgroups",			/* 596 = setgroups */
 };
diff --git a/sys/kern/syscalls.conf b/sys/kern/syscalls.conf
index a98d52659832..ae7bd1f87612 100644
--- a/sys/kern/syscalls.conf
+++ b/sys/kern/syscalls.conf
@@ -1,3 +1,4 @@
 libsysmap="../../lib/libsys/syscalls.map"
 libsys_h="../../lib/libsys/_libsys.h"
 sysmk="../sys/syscall.mk"
+syshdr_extra="#define 	SYS_exit	SYS__exit"
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
index a8815afee866..fa64597d14a5 100644
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master
@@ -51,6 +51,7 @@
 ;	SYSMUX	syscall multiplexer.  No prototype, argument struct, or
 ;		handler is declared or used.  Handled in MD syscall code.
 ;	CAPENABLED syscall is allowed in capability mode
+;	NORETURN the syscall does not return
 ;
 ; To support programmatic generation of both the default ABI and 32-bit compat
 ; (freebsd32) we impose a number of restrictions on the types of system calls.
@@ -124,8 +125,8 @@
 		    ...
 		);
 	}
-1	AUE_EXIT	STD|CAPENABLED {
-		void exit(
+1	AUE_EXIT	STD|CAPENABLED|NORETURN {
+		void _exit(
 		    int rval
 		);
 	}
@@ -551,13 +552,13 @@
 		    _Out_writes_bytes_(len/PAGE_SIZE) char *vec
 		);
 	}
-79	AUE_GETGROUPS	STD|CAPENABLED {
+79	AUE_GETGROUPS	STD|CAPENABLED|COMPAT14 {
 		int getgroups(
 		    int gidsetsize,
 		    _Out_writes_opt_(gidsetsize) gid_t *gidset
 		);
 	}
-80	AUE_SETGROUPS	STD {
+80	AUE_SETGROUPS	STD|COMPAT14 {
 		int setgroups(
 		    int gidsetsize,
 		    _In_reads_(gidsetsize) const gid_t *gidset
@@ -3370,5 +3371,17 @@
 		    int wd
 		);
 	}
+595	AUE_GETGROUPS	STD|CAPENABLED {
+		int getgroups(
+		    int gidsetsize,
+		    _Out_writes_opt_(gidsetsize) gid_t *gidset
+		);
+	}
+596	AUE_SETGROUPS	STD {
+		int setgroups(
+		    int gidsetsize,
+		    _In_reads_(gidsetsize) const gid_t *gidset
+		);
+	}
 
 ; vim: syntax=off
diff --git a/sys/kern/systrace_args.c b/sys/kern/systrace_args.c
index 467caa71f20d..2b1ea9eed8d4 100644
--- a/sys/kern/systrace_args.c
+++ b/sys/kern/systrace_args.c
@@ -17,9 +17,9 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
 		*n_args = 0;
 		break;
 	}
-	/* exit */
+	/* _exit */
 	case 1: {
-		struct exit_args *p = params;
+		struct _exit_args *p = params;
 		iarg[a++] = p->rval; /* int */
 		*n_args = 1;
 		break;
@@ -454,22 +454,6 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
 		*n_args = 3;
 		break;
 	}
-	/* getgroups */
-	case 79: {
-		struct getgroups_args *p = params;
-		iarg[a++] = p->gidsetsize; /* int */
-		uarg[a++] = (intptr_t)p->gidset; /* gid_t * */
-		*n_args = 2;
-		break;
-	}
-	/* setgroups */
-	case 80: {
-		struct setgroups_args *p = params;
-		iarg[a++] = p->gidsetsize; /* int */
-		uarg[a++] = (intptr_t)p->gidset; /* const gid_t * */
-		*n_args = 2;
-		break;
-	}
 	/* getpgrp */
 	case 81: {
 		*n_args = 0;
@@ -3500,6 +3484,22 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
 		*n_args = 2;
 		break;
 	}
+	/* getgroups */
+	case 595: {
+		struct getgroups_args *p = params;
+		iarg[a++] = p->gidsetsize; /* int */
+		uarg[a++] = (intptr_t)p->gidset; /* gid_t * */
+		*n_args = 2;
+		break;
+	}
+	/* setgroups */
+	case 596: {
+		struct setgroups_args *p = params;
+		iarg[a++] = p->gidsetsize; /* int */
+		uarg[a++] = (intptr_t)p->gidset; /* const gid_t * */
+		*n_args = 2;
+		break;
+	}
 	default:
 		*n_args = 0;
 		break;
@@ -3513,7 +3513,7 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
 	/* syscall */
 	case 0:
 		break;
-	/* exit */
+	/* _exit */
 	case 1:
 		switch (ndx) {
 		case 0:
@@ -4199,32 +4199,6 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
 			break;
 		};
 		break;
-	/* getgroups */
-	case 79:
-		switch (ndx) {
-		case 0:
-			p = "int";
-			break;
-		case 1:
-			p = "userland gid_t *";
-			break;
-		default:
-			break;
-		};
-		break;
-	/* setgroups */
-	case 80:
-		switch (ndx) {
-		case 0:
-			p = "int";
-			break;
-		case 1:
-			p = "userland const gid_t *";
-			break;
-		default:
-			break;
-		};
-		break;
 	/* getpgrp */
 	case 81:
 		break;
@@ -9367,6 +9341,32 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
 			break;
 		};
 		break;
+	/* getgroups */
+	case 595:
+		switch (ndx) {
+		case 0:
+			p = "int";
+			break;
+		case 1:
+			p = "userland gid_t *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* setgroups */
+	case 596:
+		switch (ndx) {
+		case 0:
+			p = "int";
+			break;
+		case 1:
+			p = "userland const gid_t *";
+			break;
+		default:
+			break;
+		};
+		break;
 	default:
 		break;
 	};
@@ -9380,7 +9380,7 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
 	switch (sysnum) {
 	/* syscall */
 	case 0:
-	/* exit */
+	/* _exit */
 	case 1:
 		if (ndx == 0 || ndx == 1)
 			p = "void";
@@ -9633,16 +9633,6 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
 		if (ndx == 0 || ndx == 1)
 			p = "int";
 		break;
-	/* getgroups */
-	case 79:
-		if (ndx == 0 || ndx == 1)
-			p = "int";
-		break;
-	/* setgroups */
-	case 80:
-		if (ndx == 0 || ndx == 1)
-			p = "int";
-		break;
 	/* getpgrp */
 	case 81:
 	/* setpgid */
@@ -11365,6 +11355,16 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
 		if (ndx == 0 || ndx == 1)
 			p = "int";
 		break;
+	/* getgroups */
+	case 595:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* setgroups */
+	case 596:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
 	default:
 		break;
 	};
diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c
index 85fe48ddd466..eb1327f7f2de 100644
--- a/sys/kern/uipc_shm.c
+++ b/sys/kern/uipc_shm.c
@@ -1160,7 +1160,8 @@ kern_shm_open2(struct thread *td, const char *userpath, int flags, mode_t mode,
 	if ((flags & O_ACCMODE) != O_RDONLY && (flags & O_ACCMODE) != O_RDWR)
 		return (EINVAL);
 
-	if ((flags & ~(O_ACCMODE | O_CREAT | O_EXCL | O_TRUNC | O_CLOEXEC)) != 0)
+	if ((flags & ~(O_ACCMODE | O_CREAT | O_EXCL | O_TRUNC | O_CLOEXEC |
+	    O_CLOFORK)) != 0)
 		return (EINVAL);
 
 	largepage = (shmflags & SHM_LARGEPAGE) != 0;
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index 0056dac65c7d..19870e989437 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -154,15 +154,12 @@ static struct task	unp_defer_task;
  * and don't really want to reserve the sendspace.  Their recvspace should be
  * large enough for at least one max-size datagram plus address.
  */
-#ifndef PIPSIZ
-#define	PIPSIZ	8192
-#endif
-static u_long	unpst_sendspace = PIPSIZ;
-static u_long	unpst_recvspace = PIPSIZ;
+static u_long	unpst_sendspace = 64*1024;
+static u_long	unpst_recvspace = 64*1024;
 static u_long	unpdg_maxdgram = 8*1024;	/* support 8KB syslog msgs */
 static u_long	unpdg_recvspace = 16*1024;
-static u_long	unpsp_sendspace = PIPSIZ;
-static u_long	unpsp_recvspace = PIPSIZ;
+static u_long	unpsp_sendspace = 64*1024;
+static u_long	unpsp_recvspace = 64*1024;
 
 static SYSCTL_NODE(_net, PF_LOCAL, local, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Local domain");
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index fa655c43d155..19c39e42bafa 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -5170,7 +5170,7 @@ bufstrategy(struct bufobj *bo, struct buf *bp)
 
 	vp = bp->b_vp;
 	KASSERT(vp == bo->bo_private, ("Inconsistent vnode bufstrategy"));
-	KASSERT(vp->v_type != VCHR && vp->v_type != VBLK,
+	KASSERT(!VN_ISDEV(vp),
 	    ("Wrong vnode in bufstrategy(bp=%p, vp=%p)", bp, vp));
 	i = VOP_STRATEGY(vp, bp);
 	KASSERT(i == 0, ("VOP_STRATEGY failed bp=%p vp=%p", bp, bp->b_vp));
diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c
index fd6202a1424c..85f67731e1cc 100644
--- a/sys/kern/vfs_default.c
+++ b/sys/kern/vfs_default.c
@@ -457,6 +457,7 @@ vop_stdpathconf(struct vop_pathconf_args *ap)
 		case _PC_NAMEDATTR_ENABLED:
 		case _PC_HAS_NAMEDATTR:
 		case _PC_HAS_HIDDENSYSTEM:
+		case _PC_CLONE_BLKSIZE:
 			*ap->a_retval = 0;
 			return (0);
 		default:
diff --git a/sys/kern/vfs_init.c b/sys/kern/vfs_init.c
index cd30d5cfae47..93ac001af8ad 100644
--- a/sys/kern/vfs_init.c
+++ b/sys/kern/vfs_init.c
@@ -103,6 +103,16 @@ struct vattr va_null;
  * Routines having to do with the management of the vnode table.
  */
 
+void
+vfs_unref_vfsconf(struct vfsconf *vfsp)
+{
+	vfsconf_lock();
+	KASSERT(vfsp->vfc_refcount > 0,
+	    ("vfs %p refcount underflow %d", vfsp, vfsp->vfc_refcount));
+	vfsp->vfc_refcount--;
+	vfsconf_unlock();
+}
+
 static struct vfsconf *
 vfs_byname_locked(const char *name)
 {
@@ -123,9 +133,11 @@ vfs_byname(const char *name)
 {
 	struct vfsconf *vfsp;
 
-	vfsconf_slock();
+	vfsconf_lock();
 	vfsp = vfs_byname_locked(name);
-	vfsconf_sunlock();
+	if (vfsp != NULL)
+		vfsp->vfc_refcount++;
+	vfsconf_unlock();
 	return (vfsp);
 }
 
diff --git a/sys/kern/vfs_inotify.c b/sys/kern/vfs_inotify.c
index 746a5a39208e..b265a5ff3a62 100644
--- a/sys/kern/vfs_inotify.c
+++ b/sys/kern/vfs_inotify.c
@@ -801,6 +801,7 @@ vn_inotify_add_watch(struct vnode *vp, struct inotify_softc *sc, uint32_t mask,
 			vn_lock(vp, LK_SHARED | LK_RETRY);
 			if (error != 0)
 				break;
+			NDFREE_PNBUF(&nd);
 			vn_irflag_set_cond(nd.ni_vp, VIRF_INOTIFY_PARENT);
 			vrele(nd.ni_vp);
 		}
diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c
index 8e64a7fe966b..13403acacc08 100644
--- a/sys/kern/vfs_mount.c
+++ b/sys/kern/vfs_mount.c
@@ -683,7 +683,6 @@ vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp, const char *fspath,
 	MPASSERT(mp->mnt_vfs_ops == 1, mp,
 	    ("vfs_ops should be 1 but %d found", mp->mnt_vfs_ops));
 	(void) vfs_busy(mp, MBF_NOWAIT);
-	atomic_add_acq_int(&vfsp->vfc_refcount, 1);
 	mp->mnt_op = vfsp->vfc_vfsops;
 	mp->mnt_vfc = vfsp;
 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
@@ -731,7 +730,6 @@ vfs_mount_destroy(struct mount *mp)
 	    __FILE__, __LINE__));
 	MPPASS(mp->mnt_writeopcount == 0, mp);
 	MPPASS(mp->mnt_secondary_writes == 0, mp);
-	atomic_subtract_rel_int(&mp->mnt_vfc->vfc_refcount, 1);
 	if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) {
 		struct vnode *vp;
 
@@ -769,6 +767,9 @@ vfs_mount_destroy(struct mount *mp)
 		vfs_free_addrlist(mp->mnt_export);
 		free(mp->mnt_export, M_MOUNT);
 	}
+	vfsconf_lock();
+	mp->mnt_vfc->vfc_refcount--;
+	vfsconf_unlock();
 	crfree(mp->mnt_cred);
 	uma_zfree(mount_zone, mp);
 }
@@ -1133,6 +1134,7 @@ vfs_domount_first(
 	if (jailed(td->td_ucred) && (!prison_allow(td->td_ucred,
 	    vfsp->vfc_prison_flag) || vp == td->td_ucred->cr_prison->pr_root)) {
 		vput(vp);
+		vfs_unref_vfsconf(vfsp);
 		return (EPERM);
 	}
 
@@ -1169,6 +1171,7 @@ vfs_domount_first(
 	}
 	if (error != 0) {
 		vput(vp);
+		vfs_unref_vfsconf(vfsp);
 		return (error);
 	}
 	vn_seqc_write_begin(vp);
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 9704e9c160a8..bf3ed9d515dc 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -2839,7 +2839,7 @@ setfflags(struct thread *td, struct vnode *vp, u_long flags)
 	 * if they are allowed to set flags and programs assume that
 	 * chown can't fail when done as root.
 	 */
-	if (vp->v_type == VCHR || vp->v_type == VBLK) {
+	if (VN_ISDEV(vp)) {
 		error = priv_check(td, PRIV_VFS_CHFLAGS_DEV);
 		if (error != 0)
 			return (error);
@@ -5050,15 +5050,16 @@ kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd,
 	size_t retlen;
 	void *rl_rcookie, *rl_wcookie;
 	off_t inoff, outoff, savinoff, savoutoff;
-	bool foffsets_locked;
+	bool foffsets_locked, foffsets_set;
 
 	infp = outfp = NULL;
 	rl_rcookie = rl_wcookie = NULL;
 	foffsets_locked = false;
+	foffsets_set = false;
 	error = 0;
 	retlen = 0;
 
-	if (flags != 0) {
+	if ((flags & ~COPY_FILE_RANGE_USERFLAGS) != 0) {
 		error = EINVAL;
 		goto out;
 	}
@@ -5122,6 +5123,8 @@ kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd,
 		}
 		foffset_lock_pair(infp1, &inoff, outfp1, &outoff, 0);
 		foffsets_locked = true;
+	} else {
+		foffsets_set = true;
 	}
 	savinoff = inoff;
 	savoutoff = outoff;
@@ -5180,11 +5183,12 @@ out:
 		vn_rangelock_unlock(invp, rl_rcookie);
 	if (rl_wcookie != NULL)
 		vn_rangelock_unlock(outvp, rl_wcookie);
+	if ((foffsets_locked || foffsets_set) &&
+	    (error == EINTR || error == ERESTART)) {
+		inoff = savinoff;
+		outoff = savoutoff;
+	}
 	if (foffsets_locked) {
-		if (error == EINTR || error == ERESTART) {
-			inoff = savinoff;
-			outoff = savoutoff;
-		}
 		if (inoffp == NULL)
 			foffset_unlock(infp, inoff, 0);
 		else
@@ -5193,6 +5197,9 @@ out:
 			foffset_unlock(outfp, outoff, 0);
 		else
 			*outoffp = outoff;
+	} else if (foffsets_set) {
+		*inoffp = inoff;
+		*outoffp = outoff;
 	}
 	if (outfp != NULL)
 		fdrop(outfp, td);
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index 6451c9e07a60..a4f41192f684 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -3443,6 +3443,11 @@ vn_generic_copy_file_range(struct vnode *invp, off_t *inoffp,
 	interrupted = 0;
 	dat = NULL;
 
+	if ((flags & COPY_FILE_RANGE_CLONE) != 0) {
+		error = EOPNOTSUPP;
+		goto out;
+	}
+
 	error = vn_lock(invp, LK_SHARED);
 	if (error != 0)
 		goto out;