aboutsummaryrefslogtreecommitdiff
path: root/sys/kern
diff options
context:
space:
mode:
Diffstat (limited to 'sys/kern')
-rw-r--r--sys/kern/imgact_elf.c54
-rw-r--r--sys/kern/init_sysent.c6
-rw-r--r--sys/kern/kern_prot.c129
-rw-r--r--sys/kern/kern_thread.c6
-rw-r--r--sys/kern/subr_witness.c134
-rw-r--r--sys/kern/syscalls.c6
-rw-r--r--sys/kern/syscalls.master16
-rw-r--r--sys/kern/systrace_args.c104
-rw-r--r--sys/kern/uipc_shm.c3
-rw-r--r--sys/kern/uipc_usrreq.c11
-rw-r--r--sys/kern/vfs_bio.c2
-rw-r--r--sys/kern/vfs_init.c16
-rw-r--r--sys/kern/vfs_inotify.c1
-rw-r--r--sys/kern/vfs_mount.c7
-rw-r--r--sys/kern/vfs_subr.c8
-rw-r--r--sys/kern/vfs_syscalls.c2
-rw-r--r--sys/kern/vfs_vnops.c2
17 files changed, 279 insertions, 228 deletions
diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c
index 2690ad3b2679..5a53fac50f2c 100644
--- a/sys/kern/imgact_elf.c
+++ b/sys/kern/imgact_elf.c
@@ -84,6 +84,13 @@
#define ELF_NOTE_ROUNDSIZE 4
#define OLD_EI_BRAND 8
+/*
+ * ELF_ABI_NAME is a string name of the ELF ABI. ELF_ABI_ID is used
+ * to build variable names.
+ */
+#define ELF_ABI_NAME __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
+#define ELF_ABI_ID __CONCAT(elf, __ELF_WORD_SIZE)
+
static int __elfN(check_header)(const Elf_Ehdr *hdr);
static Elf_Brandinfo *__elfN(get_brandinfo)(struct image_params *imgp,
const char *interp, int32_t *osrel, uint32_t *fctl0);
@@ -104,14 +111,15 @@ static Elf_Word __elfN(untrans_prot)(vm_prot_t);
static size_t __elfN(prepare_register_notes)(struct thread *td,
struct note_info_list *list, struct thread *target_td);
-SYSCTL_NODE(_kern, OID_AUTO, __CONCAT(elf, __ELF_WORD_SIZE),
- CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
+SYSCTL_NODE(_kern, OID_AUTO, ELF_ABI_ID, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
"");
+#define ELF_NODE_OID __CONCAT(_kern_, ELF_ABI_ID)
+
int __elfN(fallback_brand) = -1;
-SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO,
+SYSCTL_INT(ELF_NODE_OID, OID_AUTO,
fallback_brand, CTLFLAG_RWTUN, &__elfN(fallback_brand), 0,
- __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) " brand of last resort");
+ ELF_ABI_NAME " brand of last resort");
static int elf_legacy_coredump = 0;
SYSCTL_INT(_debug, OID_AUTO, __elfN(legacy_coredump), CTLFLAG_RW,
@@ -126,22 +134,22 @@ int __elfN(nxstack) =
#else
0;
#endif
-SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO,
+SYSCTL_INT(ELF_NODE_OID, OID_AUTO,
nxstack, CTLFLAG_RW, &__elfN(nxstack), 0,
- __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": support PT_GNU_STACK for non-executable stack control");
+ ELF_ABI_NAME ": support PT_GNU_STACK for non-executable stack control");
#if defined(__amd64__)
static int __elfN(vdso) = 1;
-SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO,
+SYSCTL_INT(ELF_NODE_OID, OID_AUTO,
vdso, CTLFLAG_RWTUN, &__elfN(vdso), 0,
- __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": enable vdso preloading");
+ ELF_ABI_NAME ": enable vdso preloading");
#else
static int __elfN(vdso) = 0;
#endif
#if __ELF_WORD_SIZE == 32 && (defined(__amd64__) || defined(__i386__))
int i386_read_exec = 0;
-SYSCTL_INT(_kern_elf32, OID_AUTO, read_exec, CTLFLAG_RW, &i386_read_exec, 0,
+SYSCTL_INT(ELF_NODE_OID, OID_AUTO, read_exec, CTLFLAG_RW, &i386_read_exec, 0,
"enable execution from readable segments");
#endif
@@ -161,15 +169,15 @@ sysctl_pie_base(SYSCTL_HANDLER_ARGS)
__elfN(pie_base) = val;
return (0);
}
-SYSCTL_PROC(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, pie_base,
+SYSCTL_PROC(ELF_NODE_OID, OID_AUTO, pie_base,
CTLTYPE_ULONG | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0,
sysctl_pie_base, "LU",
"PIE load base without randomization");
-SYSCTL_NODE(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, aslr,
+SYSCTL_NODE(ELF_NODE_OID, OID_AUTO, aslr,
CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
"");
-#define ASLR_NODE_OID __CONCAT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), _aslr)
+#define ASLR_NODE_OID __CONCAT(ELF_NODE_OID, _aslr)
/*
* Enable ASLR by default for 64-bit non-PIE binaries. 32-bit architectures
@@ -179,8 +187,7 @@ SYSCTL_NODE(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, aslr,
static int __elfN(aslr_enabled) = __ELF_WORD_SIZE == 64;
SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, enable, CTLFLAG_RWTUN,
&__elfN(aslr_enabled), 0,
- __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
- ": enable address map randomization");
+ ELF_ABI_NAME ": enable address map randomization");
/*
* Enable ASLR by default for 64-bit PIE binaries.
@@ -188,8 +195,7 @@ SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, enable, CTLFLAG_RWTUN,
static int __elfN(pie_aslr_enabled) = __ELF_WORD_SIZE == 64;
SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, pie_enable, CTLFLAG_RWTUN,
&__elfN(pie_aslr_enabled), 0,
- __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
- ": enable address map randomization for PIE binaries");
+ ELF_ABI_NAME ": enable address map randomization for PIE binaries");
/*
* Sbrk is deprecated and it can be assumed that in most cases it will not be
@@ -199,27 +205,25 @@ SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, pie_enable, CTLFLAG_RWTUN,
static int __elfN(aslr_honor_sbrk) = 0;
SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, honor_sbrk, CTLFLAG_RW,
&__elfN(aslr_honor_sbrk), 0,
- __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": assume sbrk is used");
+ ELF_ABI_NAME ": assume sbrk is used");
static int __elfN(aslr_stack) = __ELF_WORD_SIZE == 64;
SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, stack, CTLFLAG_RWTUN,
&__elfN(aslr_stack), 0,
- __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
- ": enable stack address randomization");
+ ELF_ABI_NAME ": enable stack address randomization");
static int __elfN(aslr_shared_page) = __ELF_WORD_SIZE == 64;
SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, shared_page, CTLFLAG_RWTUN,
&__elfN(aslr_shared_page), 0,
- __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
- ": enable shared page address randomization");
+ ELF_ABI_NAME ": enable shared page address randomization");
static int __elfN(sigfastblock) = 1;
-SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, sigfastblock,
+SYSCTL_INT(ELF_NODE_OID, OID_AUTO, sigfastblock,
CTLFLAG_RWTUN, &__elfN(sigfastblock), 0,
"enable sigfastblock for new processes");
static bool __elfN(allow_wx) = true;
-SYSCTL_BOOL(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, allow_wx,
+SYSCTL_BOOL(ELF_NODE_OID, OID_AUTO, allow_wx,
CTLFLAG_RWTUN, &__elfN(allow_wx), 0,
"Allow pages to be mapped simultaneously writable and executable");
@@ -2951,9 +2955,9 @@ __elfN(check_note)(struct image_params *imgp, Elf_Brandnote *brandnote,
*/
static struct execsw __elfN(execsw) = {
.ex_imgact = __CONCAT(exec_, __elfN(imgact)),
- .ex_name = __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
+ .ex_name = ELF_ABI_NAME
};
-EXEC_SET(__CONCAT(elf, __ELF_WORD_SIZE), __elfN(execsw));
+EXEC_SET(ELF_ABI_ID, __elfN(execsw));
static vm_prot_t
__elfN(trans_prot)(Elf_Word flags)
diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c
index c0a5479c9634..fcd232cde21e 100644
--- a/sys/kern/init_sysent.c
+++ b/sys/kern/init_sysent.c
@@ -145,8 +145,8 @@ struct sysent sysent[] = {
{ .sy_narg = 0, .sy_call = (sy_call_t *)nosys, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_ABSENT }, /* 76 = obsolete vhangup */
{ .sy_narg = 0, .sy_call = (sy_call_t *)nosys, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_ABSENT }, /* 77 = obsolete vlimit */
{ .sy_narg = AS(mincore_args), .sy_call = (sy_call_t *)sys_mincore, .sy_auevent = AUE_MINCORE, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 78 = mincore */
- { .sy_narg = AS(getgroups_args), .sy_call = (sy_call_t *)sys_getgroups, .sy_auevent = AUE_GETGROUPS, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 79 = getgroups */
- { .sy_narg = AS(setgroups_args), .sy_call = (sy_call_t *)sys_setgroups, .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 80 = setgroups */
+ { compat14(AS(freebsd14_getgroups_args),getgroups), .sy_auevent = AUE_GETGROUPS, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 79 = freebsd14 getgroups */
+ { compat14(AS(freebsd14_setgroups_args),setgroups), .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 80 = freebsd14 setgroups */
{ .sy_narg = 0, .sy_call = (sy_call_t *)sys_getpgrp, .sy_auevent = AUE_GETPGRP, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 81 = getpgrp */
{ .sy_narg = AS(setpgid_args), .sy_call = (sy_call_t *)sys_setpgid, .sy_auevent = AUE_SETPGRP, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 82 = setpgid */
{ .sy_narg = AS(setitimer_args), .sy_call = (sy_call_t *)sys_setitimer, .sy_auevent = AUE_SETITIMER, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 83 = setitimer */
@@ -661,4 +661,6 @@ struct sysent sysent[] = {
{ .sy_narg = AS(exterrctl_args), .sy_call = (sy_call_t *)sys_exterrctl, .sy_auevent = AUE_NULL, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 592 = exterrctl */
{ .sy_narg = AS(inotify_add_watch_at_args), .sy_call = (sy_call_t *)sys_inotify_add_watch_at, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 593 = inotify_add_watch_at */
{ .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */
+ { .sy_narg = AS(getgroups_args), .sy_call = (sy_call_t *)sys_getgroups, .sy_auevent = AUE_GETGROUPS, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 595 = getgroups */
+ { .sy_narg = AS(setgroups_args), .sy_call = (sy_call_t *)sys_setgroups, .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 596 = setgroups */
};
diff --git a/sys/kern/kern_prot.c b/sys/kern/kern_prot.c
index 2cd5b7069023..0ca42d640767 100644
--- a/sys/kern/kern_prot.c
+++ b/sys/kern/kern_prot.c
@@ -310,6 +310,39 @@ sys_getegid(struct thread *td, struct getegid_args *uap)
return (0);
}
+#ifdef COMPAT_FREEBSD14
+int
+freebsd14_getgroups(struct thread *td, struct freebsd14_getgroups_args *uap)
+{
+ struct ucred *cred;
+ int ngrp, error;
+
+ cred = td->td_ucred;
+
+ /*
+ * For FreeBSD < 15.0, we account for the egid being placed at the
+ * beginning of the group list prior to all supplementary groups.
+ */
+ ngrp = cred->cr_ngroups + 1;
+ if (uap->gidsetsize == 0) {
+ error = 0;
+ goto out;
+ } else if (uap->gidsetsize < ngrp) {
+ return (EINVAL);
+ }
+
+ error = copyout(&cred->cr_gid, uap->gidset, sizeof(gid_t));
+ if (error == 0)
+ error = copyout(cred->cr_groups, uap->gidset + 1,
+ (ngrp - 1) * sizeof(gid_t));
+
+out:
+ td->td_retval[0] = ngrp;
+ return (error);
+
+}
+#endif /* COMPAT_FREEBSD14 */
+
#ifndef _SYS_SYSPROTO_H_
struct getgroups_args {
int gidsetsize;
@@ -320,18 +353,11 @@ int
sys_getgroups(struct thread *td, struct getgroups_args *uap)
{
struct ucred *cred;
- gid_t *ugidset;
int ngrp, error;
cred = td->td_ucred;
- /*
- * cr_gid has been moved out of cr_groups, but we'll continue exporting
- * the egid as groups[0] for the time being until we audit userland for
- * any surprises.
- */
- ngrp = cred->cr_ngroups + 1;
-
+ ngrp = cred->cr_ngroups;
if (uap->gidsetsize == 0) {
error = 0;
goto out;
@@ -339,14 +365,7 @@ sys_getgroups(struct thread *td, struct getgroups_args *uap)
if (uap->gidsetsize < ngrp)
return (EINVAL);
- ugidset = uap->gidset;
- error = copyout(&cred->cr_gid, ugidset, sizeof(*ugidset));
- if (error != 0)
- goto out;
-
- if (ngrp > 1)
- error = copyout(cred->cr_groups, ugidset + 1,
- (ngrp - 1) * sizeof(*ugidset));
+ error = copyout(cred->cr_groups, uap->gidset, ngrp * sizeof(gid_t));
out:
td->td_retval[0] = ngrp;
return (error);
@@ -1186,6 +1205,44 @@ fail:
return (error);
}
+#ifdef COMPAT_FREEBSD14
+int
+freebsd14_setgroups(struct thread *td, struct freebsd14_setgroups_args *uap)
+{
+ gid_t smallgroups[CRED_SMALLGROUPS_NB];
+ gid_t *groups;
+ int gidsetsize, error;
+
+ /*
+ * Before FreeBSD 15.0, we allow one more group to be supplied to
+ * account for the egid appearing before the supplementary groups. This
+ * may technically allow one more supplementary group for systems that
+ * did use the default NGROUPS_MAX if we round it back up to 1024.
+ */
+ gidsetsize = uap->gidsetsize;
+ if (gidsetsize > ngroups_max + 1 || gidsetsize < 0)
+ return (EINVAL);
+
+ if (gidsetsize > CRED_SMALLGROUPS_NB)
+ groups = malloc(gidsetsize * sizeof(gid_t), M_TEMP, M_WAITOK);
+ else
+ groups = smallgroups;
+
+ error = copyin(uap->gidset, groups, gidsetsize * sizeof(gid_t));
+ if (error == 0) {
+ int ngroups = gidsetsize > 0 ? gidsetsize - 1 /* egid */ : 0;
+
+ error = kern_setgroups(td, &ngroups, groups + 1);
+ if (error == 0 && gidsetsize > 0)
+ td->td_proc->p_ucred->cr_gid = groups[0];
+ }
+
+ if (groups != smallgroups)
+ free(groups, M_TEMP);
+ return (error);
+}
+#endif /* COMPAT_FREEBSD14 */
+
#ifndef _SYS_SYSPROTO_H_
struct setgroups_args {
int gidsetsize;
@@ -1210,8 +1267,7 @@ sys_setgroups(struct thread *td, struct setgroups_args *uap)
* setgroups() differ.
*/
gidsetsize = uap->gidsetsize;
- /* XXXKE Limit to ngroups_max when we change the userland interface. */
- if (gidsetsize > ngroups_max + 1 || gidsetsize < 0)
+ if (gidsetsize > ngroups_max || gidsetsize < 0)
return (EINVAL);
if (gidsetsize > CRED_SMALLGROUPS_NB)
@@ -1238,35 +1294,17 @@ kern_setgroups(struct thread *td, int *ngrpp, gid_t *groups)
struct proc *p = td->td_proc;
struct ucred *newcred, *oldcred;
int ngrp, error;
- gid_t egid;
ngrp = *ngrpp;
/* Sanity check size. */
- /* XXXKE Limit to ngroups_max when we change the userland interface. */
- if (ngrp < 0 || ngrp > ngroups_max + 1)
+ if (ngrp < 0 || ngrp > ngroups_max)
return (EINVAL);
AUDIT_ARG_GROUPSET(groups, ngrp);
- /*
- * setgroups(0, NULL) is a legitimate way of clearing the groups vector
- * on non-BSD systems (which generally do not have the egid in the
- * groups[0]). We risk security holes when running non-BSD software if
- * we do not do the same. So we allow and treat 0 for 'ngrp' specially
- * below (twice).
- */
- if (ngrp != 0) {
- /*
- * To maintain userland compat for now, we use the first group
- * as our egid and we'll use the rest as our supplemental
- * groups.
- */
- egid = groups[0];
- ngrp--;
- groups++;
- groups_normalize(&ngrp, groups);
- *ngrpp = ngrp;
- }
+ groups_normalize(&ngrp, groups);
+ *ngrpp = ngrp;
+
newcred = crget();
crextend(newcred, ngrp);
PROC_LOCK(p);
@@ -1289,15 +1327,7 @@ kern_setgroups(struct thread *td, int *ngrpp, gid_t *groups)
if (error)
goto fail;
- /*
- * If some groups were passed, the first one is currently the desired
- * egid. This code is to be removed (along with some commented block
- * above) when setgroups() is changed to take only supplementary groups.
- */
- if (ngrp != 0)
- newcred->cr_gid = egid;
crsetgroups_internal(newcred, ngrp, groups);
-
setsugid(p);
proc_set_cred(p, newcred);
PROC_UNLOCK(p);
@@ -2891,7 +2921,8 @@ crextend(struct ucred *cr, int n)
* Normalizes a set of groups to be applied to a 'struct ucred'.
*
* Normalization ensures that the supplementary groups are sorted in ascending
- * order and do not contain duplicates.
+ * order and do not contain duplicates. This allows group_is_supplementary
+ * to do a binary search.
*/
static void
groups_normalize(int *ngrp, gid_t *groups)
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index 50b040132396..3180c66cb42b 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@@ -1694,8 +1694,10 @@ thread_single_end(struct proc *p, int mode)
thread_unlock(td);
}
}
- KASSERT(mode != SINGLE_BOUNDARY || p->p_boundary_count == 0,
- ("inconsistent boundary count %d", p->p_boundary_count));
+ KASSERT(mode != SINGLE_BOUNDARY || P_SHOULDSTOP(p) ||
+ p->p_boundary_count == 0,
+ ("pid %d proc %p flags %#x inconsistent boundary count %d",
+ p->p_pid, p, p->p_flag, p->p_boundary_count));
PROC_SUNLOCK(p);
wakeup(&p->p_flag);
}
diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c
index ab47b6ad29a3..a65c3ca128d9 100644
--- a/sys/kern/subr_witness.c
+++ b/sys/kern/subr_witness.c
@@ -57,7 +57,7 @@
* b : public affirmation by word or example of usually
* religious faith or conviction <the heroic witness to divine
* life -- Pilot>
- * 6 capitalized : a member of the Jehovah's Witnesses
+ * 6 capitalized : a member of the Jehovah's Witnesses
*/
/*
@@ -131,7 +131,7 @@
#define LI_SLEEPABLE 0x00040000 /* Lock may be held while sleeping. */
#ifndef WITNESS_COUNT
-#define WITNESS_COUNT 1536
+#define WITNESS_COUNT 1536
#endif
#define WITNESS_HASH_SIZE 251 /* Prime, gives load factor < 2 */
#define WITNESS_PENDLIST (512 + (MAXCPU * 4))
@@ -158,20 +158,18 @@
* These flags go in the witness relationship matrix and describe the
* relationship between any two struct witness objects.
*/
-#define WITNESS_UNRELATED 0x00 /* No lock order relation. */
-#define WITNESS_PARENT 0x01 /* Parent, aka direct ancestor. */
-#define WITNESS_ANCESTOR 0x02 /* Direct or indirect ancestor. */
-#define WITNESS_CHILD 0x04 /* Child, aka direct descendant. */
-#define WITNESS_DESCENDANT 0x08 /* Direct or indirect descendant. */
-#define WITNESS_ANCESTOR_MASK (WITNESS_PARENT | WITNESS_ANCESTOR)
-#define WITNESS_DESCENDANT_MASK (WITNESS_CHILD | WITNESS_DESCENDANT)
-#define WITNESS_RELATED_MASK \
- (WITNESS_ANCESTOR_MASK | WITNESS_DESCENDANT_MASK)
-#define WITNESS_REVERSAL 0x10 /* A lock order reversal has been
- * observed. */
-#define WITNESS_RESERVED1 0x20 /* Unused flag, reserved. */
-#define WITNESS_RESERVED2 0x40 /* Unused flag, reserved. */
-#define WITNESS_LOCK_ORDER_KNOWN 0x80 /* This lock order is known. */
+#define WITNESS_UNRELATED 0x00 /* No lock order relation. */
+#define WITNESS_PARENT 0x01 /* Parent, aka direct ancestor. */
+#define WITNESS_ANCESTOR 0x02 /* Direct or indirect ancestor. */
+#define WITNESS_CHILD 0x04 /* Child, aka direct descendant. */
+#define WITNESS_DESCENDANT 0x08 /* Direct or indirect descendant. */
+#define WITNESS_ANCESTOR_MASK (WITNESS_PARENT | WITNESS_ANCESTOR)
+#define WITNESS_DESCENDANT_MASK (WITNESS_CHILD | WITNESS_DESCENDANT)
+#define WITNESS_RELATED_MASK (WITNESS_ANCESTOR_MASK | WITNESS_DESCENDANT_MASK)
+#define WITNESS_REVERSAL 0x10 /* A lock order reversal has been observed. */
+#define WITNESS_RESERVED1 0x20 /* Unused flag, reserved. */
+#define WITNESS_RESERVED2 0x40 /* Unused flag, reserved. */
+#define WITNESS_LOCK_ORDER_KNOWN 0x80 /* This lock order is known. */
/* Descendant to ancestor flags */
#define WITNESS_DTOA(x) (((x) & WITNESS_RELATED_MASK) >> 2)
@@ -218,20 +216,18 @@ struct lock_list_entry {
* (for example, "vnode interlock").
*/
struct witness {
- char w_name[MAX_W_NAME];
- uint32_t w_index; /* Index in the relationship matrix */
+ char w_name[MAX_W_NAME];
+ uint32_t w_index; /* Index in the relationship matrix */
struct lock_class *w_class;
- STAILQ_ENTRY(witness) w_list; /* List of all witnesses. */
- STAILQ_ENTRY(witness) w_typelist; /* Witnesses of a type. */
- struct witness *w_hash_next; /* Linked list in hash buckets. */
- const char *w_file; /* File where last acquired */
- uint32_t w_line; /* Line where last acquired */
- uint32_t w_refcount;
- uint16_t w_num_ancestors; /* direct/indirect
- * ancestor count */
- uint16_t w_num_descendants; /* direct/indirect
- * descendant count */
- int16_t w_ddb_level;
+ STAILQ_ENTRY(witness) w_list; /* List of all witnesses. */
+ STAILQ_ENTRY(witness) w_typelist; /* Witnesses of a type. */
+ struct witness *w_hash_next; /* Linked list in hash buckets. */
+ const char *w_file; /* File where last acquired */
+ uint32_t w_line; /* Line where last acquired */
+ uint32_t w_refcount;
+ uint16_t w_num_ancestors; /* direct/indirect ancestor count */
+ uint16_t w_num_descendants; /* direct/indirect descendant count */
+ int16_t w_ddb_level;
unsigned w_displayed:1;
unsigned w_reversed:1;
};
@@ -265,7 +261,7 @@ struct witness_lock_order_data {
/*
* The witness lock order data hash table. Keys are witness index tuples
* (struct witness_lock_order_key), elements are lock order data objects
- * (struct witness_lock_order_data).
+ * (struct witness_lock_order_data).
*/
struct witness_lock_order_hash {
struct witness_lock_order_data *wloh_array[WITNESS_LO_HASH_SIZE];
@@ -295,7 +291,6 @@ struct witness_order_list_entry {
static __inline int
witness_lock_type_equal(struct witness *w1, struct witness *w2)
{
-
return ((w1->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)) ==
(w2->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)));
}
@@ -304,7 +299,6 @@ static __inline int
witness_lock_order_key_equal(const struct witness_lock_order_key *a,
const struct witness_lock_order_key *b)
{
-
return (a->from == b->from && a->to == b->to);
}
@@ -415,7 +409,7 @@ SYSCTL_INT(_debug_witness, OID_AUTO, skipspin, CTLFLAG_RDTUN, &witness_skipspin,
int badstack_sbuf_size;
int witness_count = WITNESS_COUNT;
-SYSCTL_INT(_debug_witness, OID_AUTO, witness_count, CTLFLAG_RDTUN,
+SYSCTL_INT(_debug_witness, OID_AUTO, witness_count, CTLFLAG_RDTUN,
&witness_count, 0, "");
/*
@@ -760,7 +754,6 @@ static int witness_spin_warn = 0;
static const char *
fixup_filename(const char *file)
{
-
if (file == NULL)
return (NULL);
while (strncmp(file, "../", 3) == 0)
@@ -835,7 +828,7 @@ witness_startup(void *mem)
w_free_cnt--;
for (i = 0; i < witness_count; i++) {
- memset(w_rmatrix[i], 0, sizeof(*w_rmatrix[i]) *
+ memset(w_rmatrix[i], 0, sizeof(*w_rmatrix[i]) *
(witness_count + 1));
}
@@ -989,16 +982,16 @@ witness_ddb_display_descendants(int(*prnt)(const char *fmt, ...),
{
int i;
- for (i = 0; i < indent; i++)
- prnt(" ");
+ for (i = 0; i < indent; i++)
+ prnt(" ");
prnt("%s (type: %s, depth: %d, active refs: %d)",
w->w_name, w->w_class->lc_name,
w->w_ddb_level, w->w_refcount);
- if (w->w_displayed) {
- prnt(" -- (already displayed)\n");
- return;
- }
- w->w_displayed = 1;
+ if (w->w_displayed) {
+ prnt(" -- (already displayed)\n");
+ return;
+ }
+ w->w_displayed = 1;
if (w->w_file != NULL && w->w_line != 0)
prnt(" -- last acquired @ %s:%d\n", fixup_filename(w->w_file),
w->w_line);
@@ -1079,7 +1072,6 @@ witness_ddb_display(int(*prnt)(const char *fmt, ...))
int
witness_defineorder(struct lock_object *lock1, struct lock_object *lock2)
{
-
if (witness_watch == -1 || KERNEL_PANICKED())
return (0);
@@ -1257,7 +1249,7 @@ witness_checkorder(struct lock_object *lock, int flags, const char *file,
w->w_reversed = 1;
mtx_unlock_spin(&w_mtx);
witness_output(
- "acquiring duplicate lock of same type: \"%s\"\n",
+ "acquiring duplicate lock of same type: \"%s\"\n",
w->w_name);
witness_output(" 1st %s @ %s:%d\n", plock->li_lock->lo_name,
fixup_filename(plock->li_file), plock->li_line);
@@ -1743,7 +1735,7 @@ found:
/*
* In order to reduce contention on w_mtx, we want to keep always an
- * head object into lists so that frequent allocation from the
+ * head object into lists so that frequent allocation from the
* free witness pool (and subsequent locking) is avoided.
* In order to maintain the current code simple, when the head
* object is totally unloaded it means also that we do not have
@@ -1781,7 +1773,7 @@ witness_thread_exit(struct thread *td)
n++;
witness_list_lock(&lle->ll_children[i],
witness_output);
-
+
}
kassert_panic(
"Thread %p cannot exit while holding sleeplocks\n", td);
@@ -1948,7 +1940,6 @@ found:
static void
depart(struct witness *w)
{
-
MPASS(w->w_refcount == 0);
if (w->w_class->lc_flags & LC_SLEEPLOCK) {
w_sleep_cnt--;
@@ -1999,18 +1990,18 @@ adopt(struct witness *parent, struct witness *child)
child->w_num_ancestors++;
}
- /*
- * Find each ancestor of 'pi'. Note that 'pi' itself is counted as
+ /*
+ * Find each ancestor of 'pi'. Note that 'pi' itself is counted as
* an ancestor of 'pi' during this loop.
*/
for (i = 1; i <= w_max_used_index; i++) {
- if ((w_rmatrix[i][pi] & WITNESS_ANCESTOR_MASK) == 0 &&
+ if ((w_rmatrix[i][pi] & WITNESS_ANCESTOR_MASK) == 0 &&
(i != pi))
continue;
/* Find each descendant of 'i' and mark it as a descendant. */
for (j = 1; j <= w_max_used_index; j++) {
- /*
+ /*
* Skip children that are already marked as
* descendants of 'i'.
*/
@@ -2021,7 +2012,7 @@ adopt(struct witness *parent, struct witness *child)
* We are only interested in descendants of 'ci'. Note
* that 'ci' itself is counted as a descendant of 'ci'.
*/
- if ((w_rmatrix[ci][j] & WITNESS_ANCESTOR_MASK) == 0 &&
+ if ((w_rmatrix[ci][j] & WITNESS_ANCESTOR_MASK) == 0 &&
(j != ci))
continue;
w_rmatrix[i][j] |= WITNESS_ANCESTOR;
@@ -2029,16 +2020,16 @@ adopt(struct witness *parent, struct witness *child)
w_data[i].w_num_descendants++;
w_data[j].w_num_ancestors++;
- /*
+ /*
* Make sure we aren't marking a node as both an
- * ancestor and descendant. We should have caught
+ * ancestor and descendant. We should have caught
* this as a lock order reversal earlier.
*/
if ((w_rmatrix[i][j] & WITNESS_ANCESTOR_MASK) &&
(w_rmatrix[i][j] & WITNESS_DESCENDANT_MASK)) {
printf("witness rmatrix paradox! [%d][%d]=%d "
"both ancestor and descendant\n",
- i, j, w_rmatrix[i][j]);
+ i, j, w_rmatrix[i][j]);
kdb_backtrace();
printf("Witness disabled.\n");
witness_watch = -1;
@@ -2047,7 +2038,7 @@ adopt(struct witness *parent, struct witness *child)
(w_rmatrix[j][i] & WITNESS_DESCENDANT_MASK)) {
printf("witness rmatrix paradox! [%d][%d]=%d "
"both ancestor and descendant\n",
- j, i, w_rmatrix[j][i]);
+ j, i, w_rmatrix[j][i]);
kdb_backtrace();
printf("Witness disabled.\n");
witness_watch = -1;
@@ -2124,7 +2115,6 @@ _isitmyx(struct witness *w1, struct witness *w2, int rmask, const char *fname)
static int
isitmychild(struct witness *parent, struct witness *child)
{
-
return (_isitmyx(parent, child, WITNESS_PARENT, __func__));
}
@@ -2134,7 +2124,6 @@ isitmychild(struct witness *parent, struct witness *child)
static int
isitmydescendant(struct witness *ancestor, struct witness *descendant)
{
-
return (_isitmyx(ancestor, descendant, WITNESS_ANCESTOR_MASK,
__func__));
}
@@ -2182,7 +2171,7 @@ witness_get(void)
STAILQ_REMOVE_HEAD(&w_free, w_list);
w_free_cnt--;
index = w->w_index;
- MPASS(index > 0 && index == w_max_used_index+1 &&
+ MPASS(index > 0 && index == w_max_used_index + 1 &&
index < witness_count);
bzero(w, sizeof(*w));
w->w_index = index;
@@ -2194,7 +2183,6 @@ witness_get(void)
static void
witness_free(struct witness *w)
{
-
STAILQ_INSERT_HEAD(&w_free, w, w_list);
w_free_cnt++;
}
@@ -2219,11 +2207,10 @@ witness_lock_list_get(void)
bzero(lle, sizeof(*lle));
return (lle);
}
-
+
static void
witness_lock_list_free(struct lock_list_entry *lle)
{
-
mtx_lock_spin(&w_mtx);
lle->ll_next = w_lock_list_free;
w_lock_list_free = lle;
@@ -2297,7 +2284,6 @@ witness_voutput(const char *fmt, va_list ap)
static int
witness_thread_has_locks(struct thread *td)
{
-
if (td->td_sleeplocks == NULL)
return (0);
return (td->td_sleeplocks->ll_count != 0);
@@ -2573,14 +2559,12 @@ witness_setflag(struct lock_object *lock, int flag, int set)
void
witness_norelease(struct lock_object *lock)
{
-
witness_setflag(lock, LI_NORELEASE, 1);
}
void
witness_releaseok(struct lock_object *lock)
{
-
witness_setflag(lock, LI_NORELEASE, 0);
}
@@ -2588,7 +2572,6 @@ witness_releaseok(struct lock_object *lock)
static void
witness_ddb_list(struct thread *td)
{
-
KASSERT(witness_cold == 0, ("%s: witness_cold", __func__));
KASSERT(kdb_active, ("%s: not in the debugger", __func__));
@@ -2653,7 +2636,6 @@ DB_SHOW_ALIAS_FLAGS(alllocks, db_witness_list_all, DB_CMD_MEMSAFE);
DB_SHOW_COMMAND_FLAGS(witness, db_witness_display, DB_CMD_MEMSAFE)
{
-
witness_ddb_display(db_printf);
}
#endif
@@ -2673,9 +2655,9 @@ sbuf_print_witness_badstacks(struct sbuf *sb, size_t *oldidx)
/* Allocate and init temporary storage space. */
tmp_w1 = malloc(sizeof(struct witness), M_TEMP, M_WAITOK | M_ZERO);
tmp_w2 = malloc(sizeof(struct witness), M_TEMP, M_WAITOK | M_ZERO);
- tmp_data1 = malloc(sizeof(struct witness_lock_order_data), M_TEMP,
+ tmp_data1 = malloc(sizeof(struct witness_lock_order_data), M_TEMP,
M_WAITOK | M_ZERO);
- tmp_data2 = malloc(sizeof(struct witness_lock_order_data), M_TEMP,
+ tmp_data2 = malloc(sizeof(struct witness_lock_order_data), M_TEMP,
M_WAITOK | M_ZERO);
stack_zero(&tmp_data1->wlod_stack);
stack_zero(&tmp_data2->wlod_stack);
@@ -2750,12 +2732,12 @@ restart:
sbuf_printf(sb,
"\nLock order reversal between \"%s\"(%s) and \"%s\"(%s)!\n",
- tmp_w1->w_name, tmp_w1->w_class->lc_name,
+ tmp_w1->w_name, tmp_w1->w_class->lc_name,
tmp_w2->w_name, tmp_w2->w_class->lc_name);
if (data1) {
sbuf_printf(sb,
"Lock order \"%s\"(%s) -> \"%s\"(%s) first seen at:\n",
- tmp_w1->w_name, tmp_w1->w_class->lc_name,
+ tmp_w1->w_name, tmp_w1->w_class->lc_name,
tmp_w2->w_name, tmp_w2->w_class->lc_name);
stack_sbuf_print(sb, &tmp_data1->wlod_stack);
sbuf_putc(sb, '\n');
@@ -2763,7 +2745,7 @@ restart:
if (data2 && data2 != data1) {
sbuf_printf(sb,
"Lock order \"%s\"(%s) -> \"%s\"(%s) first seen at:\n",
- tmp_w2->w_name, tmp_w2->w_class->lc_name,
+ tmp_w2->w_name, tmp_w2->w_class->lc_name,
tmp_w1->w_name, tmp_w1->w_class->lc_name);
stack_sbuf_print(sb, &tmp_data2->wlod_stack);
sbuf_putc(sb, '\n');
@@ -2823,7 +2805,6 @@ sysctl_debug_witness_badstacks(SYSCTL_HANDLER_ARGS)
static int
sbuf_db_printf_drain(void *arg __unused, const char *data, int len)
{
-
return (db_printf("%.*s", len, data));
}
@@ -3068,7 +3049,7 @@ witness_lock_order_get(struct witness *parent, struct witness *child)
& WITNESS_LOCK_ORDER_KNOWN) == 0)
goto out;
- hash = witness_hash_djb2((const char*)&key,
+ hash = witness_hash_djb2((const char *)&key,
sizeof(key)) % w_lohash.wloh_size;
data = w_lohash.wloh_array[hash];
while (data != NULL) {
@@ -3089,7 +3070,6 @@ out:
static int
witness_lock_order_check(struct witness *parent, struct witness *child)
{
-
if (parent != child &&
w_rmatrix[parent->w_index][child->w_index]
& WITNESS_LOCK_ORDER_KNOWN &&
@@ -3115,7 +3095,7 @@ witness_lock_order_add(struct witness *parent, struct witness *child)
& WITNESS_LOCK_ORDER_KNOWN)
return (1);
- hash = witness_hash_djb2((const char*)&key,
+ hash = witness_hash_djb2((const char *)&key,
sizeof(key)) % w_lohash.wloh_size;
w_rmatrix[parent->w_index][child->w_index] |= WITNESS_LOCK_ORDER_KNOWN;
data = w_lofree;
@@ -3134,7 +3114,6 @@ witness_lock_order_add(struct witness *parent, struct witness *child)
static void
witness_increment_graph_generation(void)
{
-
if (witness_cold == 0)
mtx_assert(&w_mtx, MA_OWNED);
w_generation++;
@@ -3143,7 +3122,6 @@ witness_increment_graph_generation(void)
static int
witness_output_drain(void *arg __unused, const char *data, int len)
{
-
witness_output("%.*s", len, data);
return (len);
}
diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c
index 09bf4d519927..4122f9261871 100644
--- a/sys/kern/syscalls.c
+++ b/sys/kern/syscalls.c
@@ -84,8 +84,8 @@ const char *syscallnames[] = {
"obs_vhangup", /* 76 = obsolete vhangup */
"obs_vlimit", /* 77 = obsolete vlimit */
"mincore", /* 78 = mincore */
- "getgroups", /* 79 = getgroups */
- "setgroups", /* 80 = setgroups */
+ "compat14.getgroups", /* 79 = freebsd14 getgroups */
+ "compat14.setgroups", /* 80 = freebsd14 setgroups */
"getpgrp", /* 81 = getpgrp */
"setpgid", /* 82 = setpgid */
"setitimer", /* 83 = setitimer */
@@ -600,4 +600,6 @@ const char *syscallnames[] = {
"exterrctl", /* 592 = exterrctl */
"inotify_add_watch_at", /* 593 = inotify_add_watch_at */
"inotify_rm_watch", /* 594 = inotify_rm_watch */
+ "getgroups", /* 595 = getgroups */
+ "setgroups", /* 596 = setgroups */
};
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
index 53b5d3cbbba9..fa64597d14a5 100644
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master
@@ -552,13 +552,13 @@
_Out_writes_bytes_(len/PAGE_SIZE) char *vec
);
}
-79 AUE_GETGROUPS STD|CAPENABLED {
+79 AUE_GETGROUPS STD|CAPENABLED|COMPAT14 {
int getgroups(
int gidsetsize,
_Out_writes_opt_(gidsetsize) gid_t *gidset
);
}
-80 AUE_SETGROUPS STD {
+80 AUE_SETGROUPS STD|COMPAT14 {
int setgroups(
int gidsetsize,
_In_reads_(gidsetsize) const gid_t *gidset
@@ -3371,5 +3371,17 @@
int wd
);
}
+595 AUE_GETGROUPS STD|CAPENABLED {
+ int getgroups(
+ int gidsetsize,
+ _Out_writes_opt_(gidsetsize) gid_t *gidset
+ );
+ }
+596 AUE_SETGROUPS STD {
+ int setgroups(
+ int gidsetsize,
+ _In_reads_(gidsetsize) const gid_t *gidset
+ );
+ }
; vim: syntax=off
diff --git a/sys/kern/systrace_args.c b/sys/kern/systrace_args.c
index 4dfc63924da9..2b1ea9eed8d4 100644
--- a/sys/kern/systrace_args.c
+++ b/sys/kern/systrace_args.c
@@ -454,22 +454,6 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
*n_args = 3;
break;
}
- /* getgroups */
- case 79: {
- struct getgroups_args *p = params;
- iarg[a++] = p->gidsetsize; /* int */
- uarg[a++] = (intptr_t)p->gidset; /* gid_t * */
- *n_args = 2;
- break;
- }
- /* setgroups */
- case 80: {
- struct setgroups_args *p = params;
- iarg[a++] = p->gidsetsize; /* int */
- uarg[a++] = (intptr_t)p->gidset; /* const gid_t * */
- *n_args = 2;
- break;
- }
/* getpgrp */
case 81: {
*n_args = 0;
@@ -3500,6 +3484,22 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
*n_args = 2;
break;
}
+ /* getgroups */
+ case 595: {
+ struct getgroups_args *p = params;
+ iarg[a++] = p->gidsetsize; /* int */
+ uarg[a++] = (intptr_t)p->gidset; /* gid_t * */
+ *n_args = 2;
+ break;
+ }
+ /* setgroups */
+ case 596: {
+ struct setgroups_args *p = params;
+ iarg[a++] = p->gidsetsize; /* int */
+ uarg[a++] = (intptr_t)p->gidset; /* const gid_t * */
+ *n_args = 2;
+ break;
+ }
default:
*n_args = 0;
break;
@@ -4199,32 +4199,6 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
break;
};
break;
- /* getgroups */
- case 79:
- switch (ndx) {
- case 0:
- p = "int";
- break;
- case 1:
- p = "userland gid_t *";
- break;
- default:
- break;
- };
- break;
- /* setgroups */
- case 80:
- switch (ndx) {
- case 0:
- p = "int";
- break;
- case 1:
- p = "userland const gid_t *";
- break;
- default:
- break;
- };
- break;
/* getpgrp */
case 81:
break;
@@ -9367,6 +9341,32 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
break;
};
break;
+ /* getgroups */
+ case 595:
+ switch (ndx) {
+ case 0:
+ p = "int";
+ break;
+ case 1:
+ p = "userland gid_t *";
+ break;
+ default:
+ break;
+ };
+ break;
+ /* setgroups */
+ case 596:
+ switch (ndx) {
+ case 0:
+ p = "int";
+ break;
+ case 1:
+ p = "userland const gid_t *";
+ break;
+ default:
+ break;
+ };
+ break;
default:
break;
};
@@ -9633,16 +9633,6 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
if (ndx == 0 || ndx == 1)
p = "int";
break;
- /* getgroups */
- case 79:
- if (ndx == 0 || ndx == 1)
- p = "int";
- break;
- /* setgroups */
- case 80:
- if (ndx == 0 || ndx == 1)
- p = "int";
- break;
/* getpgrp */
case 81:
/* setpgid */
@@ -11365,6 +11355,16 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
if (ndx == 0 || ndx == 1)
p = "int";
break;
+ /* getgroups */
+ case 595:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
+ /* setgroups */
+ case 596:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
default:
break;
};
diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c
index 85fe48ddd466..eb1327f7f2de 100644
--- a/sys/kern/uipc_shm.c
+++ b/sys/kern/uipc_shm.c
@@ -1160,7 +1160,8 @@ kern_shm_open2(struct thread *td, const char *userpath, int flags, mode_t mode,
if ((flags & O_ACCMODE) != O_RDONLY && (flags & O_ACCMODE) != O_RDWR)
return (EINVAL);
- if ((flags & ~(O_ACCMODE | O_CREAT | O_EXCL | O_TRUNC | O_CLOEXEC)) != 0)
+ if ((flags & ~(O_ACCMODE | O_CREAT | O_EXCL | O_TRUNC | O_CLOEXEC |
+ O_CLOFORK)) != 0)
return (EINVAL);
largepage = (shmflags & SHM_LARGEPAGE) != 0;
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index 0056dac65c7d..19870e989437 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -154,15 +154,12 @@ static struct task unp_defer_task;
* and don't really want to reserve the sendspace. Their recvspace should be
* large enough for at least one max-size datagram plus address.
*/
-#ifndef PIPSIZ
-#define PIPSIZ 8192
-#endif
-static u_long unpst_sendspace = PIPSIZ;
-static u_long unpst_recvspace = PIPSIZ;
+static u_long unpst_sendspace = 64*1024;
+static u_long unpst_recvspace = 64*1024;
static u_long unpdg_maxdgram = 8*1024; /* support 8KB syslog msgs */
static u_long unpdg_recvspace = 16*1024;
-static u_long unpsp_sendspace = PIPSIZ;
-static u_long unpsp_recvspace = PIPSIZ;
+static u_long unpsp_sendspace = 64*1024;
+static u_long unpsp_recvspace = 64*1024;
static SYSCTL_NODE(_net, PF_LOCAL, local, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
"Local domain");
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index fa655c43d155..19c39e42bafa 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -5170,7 +5170,7 @@ bufstrategy(struct bufobj *bo, struct buf *bp)
vp = bp->b_vp;
KASSERT(vp == bo->bo_private, ("Inconsistent vnode bufstrategy"));
- KASSERT(vp->v_type != VCHR && vp->v_type != VBLK,
+ KASSERT(!VN_ISDEV(vp),
("Wrong vnode in bufstrategy(bp=%p, vp=%p)", bp, vp));
i = VOP_STRATEGY(vp, bp);
KASSERT(i == 0, ("VOP_STRATEGY failed bp=%p vp=%p", bp, bp->b_vp));
diff --git a/sys/kern/vfs_init.c b/sys/kern/vfs_init.c
index cd30d5cfae47..93ac001af8ad 100644
--- a/sys/kern/vfs_init.c
+++ b/sys/kern/vfs_init.c
@@ -103,6 +103,16 @@ struct vattr va_null;
* Routines having to do with the management of the vnode table.
*/
+void
+vfs_unref_vfsconf(struct vfsconf *vfsp)
+{
+ vfsconf_lock();
+ KASSERT(vfsp->vfc_refcount > 0,
+ ("vfs %p refcount underflow %d", vfsp, vfsp->vfc_refcount));
+ vfsp->vfc_refcount--;
+ vfsconf_unlock();
+}
+
static struct vfsconf *
vfs_byname_locked(const char *name)
{
@@ -123,9 +133,11 @@ vfs_byname(const char *name)
{
struct vfsconf *vfsp;
- vfsconf_slock();
+ vfsconf_lock();
vfsp = vfs_byname_locked(name);
- vfsconf_sunlock();
+ if (vfsp != NULL)
+ vfsp->vfc_refcount++;
+ vfsconf_unlock();
return (vfsp);
}
diff --git a/sys/kern/vfs_inotify.c b/sys/kern/vfs_inotify.c
index 746a5a39208e..b265a5ff3a62 100644
--- a/sys/kern/vfs_inotify.c
+++ b/sys/kern/vfs_inotify.c
@@ -801,6 +801,7 @@ vn_inotify_add_watch(struct vnode *vp, struct inotify_softc *sc, uint32_t mask,
vn_lock(vp, LK_SHARED | LK_RETRY);
if (error != 0)
break;
+ NDFREE_PNBUF(&nd);
vn_irflag_set_cond(nd.ni_vp, VIRF_INOTIFY_PARENT);
vrele(nd.ni_vp);
}
diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c
index 8e64a7fe966b..13403acacc08 100644
--- a/sys/kern/vfs_mount.c
+++ b/sys/kern/vfs_mount.c
@@ -683,7 +683,6 @@ vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp, const char *fspath,
MPASSERT(mp->mnt_vfs_ops == 1, mp,
("vfs_ops should be 1 but %d found", mp->mnt_vfs_ops));
(void) vfs_busy(mp, MBF_NOWAIT);
- atomic_add_acq_int(&vfsp->vfc_refcount, 1);
mp->mnt_op = vfsp->vfc_vfsops;
mp->mnt_vfc = vfsp;
mp->mnt_stat.f_type = vfsp->vfc_typenum;
@@ -731,7 +730,6 @@ vfs_mount_destroy(struct mount *mp)
__FILE__, __LINE__));
MPPASS(mp->mnt_writeopcount == 0, mp);
MPPASS(mp->mnt_secondary_writes == 0, mp);
- atomic_subtract_rel_int(&mp->mnt_vfc->vfc_refcount, 1);
if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) {
struct vnode *vp;
@@ -769,6 +767,9 @@ vfs_mount_destroy(struct mount *mp)
vfs_free_addrlist(mp->mnt_export);
free(mp->mnt_export, M_MOUNT);
}
+ vfsconf_lock();
+ mp->mnt_vfc->vfc_refcount--;
+ vfsconf_unlock();
crfree(mp->mnt_cred);
uma_zfree(mount_zone, mp);
}
@@ -1133,6 +1134,7 @@ vfs_domount_first(
if (jailed(td->td_ucred) && (!prison_allow(td->td_ucred,
vfsp->vfc_prison_flag) || vp == td->td_ucred->cr_prison->pr_root)) {
vput(vp);
+ vfs_unref_vfsconf(vfsp);
return (EPERM);
}
@@ -1169,6 +1171,7 @@ vfs_domount_first(
}
if (error != 0) {
vput(vp);
+ vfs_unref_vfsconf(vfsp);
return (error);
}
vn_seqc_write_begin(vp);
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index a6e38be89291..57732ddab7d9 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -2186,6 +2186,8 @@ freevnode(struct vnode *vp)
{
struct bufobj *bo;
+ ASSERT_VOP_UNLOCKED(vp, __func__);
+
/*
* The vnode has been marked for destruction, so free it.
*
@@ -2222,12 +2224,16 @@ freevnode(struct vnode *vp)
mac_vnode_destroy(vp);
#endif
if (vp->v_pollinfo != NULL) {
+ int error __diagused;
+
/*
* Use LK_NOWAIT to shut up witness about the lock. We may get
* here while having another vnode locked when trying to
* satisfy a lookup and needing to recycle.
*/
- VOP_LOCK(vp, LK_EXCLUSIVE | LK_NOWAIT);
+ error = VOP_LOCK(vp, LK_EXCLUSIVE | LK_NOWAIT);
+ VNASSERT(error == 0, vp,
+ ("freevnode: cannot lock vp %p for pollinfo destroy", vp));
destroy_vpollinfo(vp->v_pollinfo);
VOP_UNLOCK(vp);
vp->v_pollinfo = NULL;
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index b805e147bd62..bf3ed9d515dc 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -2839,7 +2839,7 @@ setfflags(struct thread *td, struct vnode *vp, u_long flags)
* if they are allowed to set flags and programs assume that
* chown can't fail when done as root.
*/
- if (vp->v_type == VCHR || vp->v_type == VBLK) {
+ if (VN_ISDEV(vp)) {
error = priv_check(td, PRIV_VFS_CHFLAGS_DEV);
if (error != 0)
return (error);
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index 93f87ddae4de..a4f41192f684 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -3444,7 +3444,7 @@ vn_generic_copy_file_range(struct vnode *invp, off_t *inoffp,
dat = NULL;
if ((flags & COPY_FILE_RANGE_CLONE) != 0) {
- error = ENOSYS;
+ error = EOPNOTSUPP;
goto out;
}