diff options
Diffstat (limited to 'lib/libsys')
51 files changed, 1651 insertions, 321 deletions
diff --git a/lib/libsys/Makefile.sys b/lib/libsys/Makefile.sys index 491c765e9416..bd65b58083c2 100644 --- a/lib/libsys/Makefile.sys +++ b/lib/libsys/Makefile.sys @@ -52,7 +52,6 @@ STATICOBJS+= interposing_table.o PSEUDO= \ __realpathat \ clock_gettime \ - exit \ getlogin \ gettimeofday \ sched_getcpu @@ -135,15 +134,28 @@ FEATURE_NOTE='\#include <sys/elf_common.h>\nGNU_PROPERTY_AARCH64_FEATURE_1_NOTE( FEATURE_NOTE='' .endif -${SASM}: - printf '/* %sgenerated by libc/sys/Makefile.inc */\n' @ > ${.TARGET} +# Add this file as a dependency of the generated assembly along with +# the two included files compat.h and SYS.h. Depending on this Makefile +# will cause some needless regenerations, but handles both changes in +# generated assembly and movement between MIASM and PSEUDO/INTERPOSED. +# The dependency on compat.h and SYS.h should properly be on the +# <foo>.S-><foo>.o rules, but there are too many .o variants for it to +# be easy and touching the geneated source files has the same effect in +# practice. +__makefile_sys:= ${.PARSEDIR}/${.PARSEFILE} +__asm_deps= ${__makefile_sys} \ + ${LIBC_SRCTOP}/include/compat.h \ + ${LIBSYS_SRCTOP}/${LIBC_ARCH}/SYS.h + +${SASM}: ${__asm_deps} + printf '/* %sgenerated by libsys/Makefile.sys */\n' @ > ${.TARGET} printf '#include "compat.h"\n' >> ${.TARGET} printf '#include "SYS.h"\nRSYSCALL(${.PREFIX})\n' >> ${.TARGET} printf ${NOTE_GNU_STACK} >>${.TARGET} printf ${FEATURE_NOTE} >> ${.TARGET} -${SPSEUDO}: - printf '/* %sgenerated by libc/sys/Makefile.inc */\n' @ > ${.TARGET} +${SPSEUDO}: ${__asm_deps} + printf '/* %sgenerated by libsys/Makefile.sys */\n' @ > ${.TARGET} printf '#include "compat.h"\n' >> ${.TARGET} printf '#include "SYS.h"\nPSEUDO(${.PREFIX:S/_//})\n' \ >> ${.TARGET} @@ -224,6 +236,7 @@ MAN+= abort2.2 \ getsockopt.2 \ gettimeofday.2 \ getuid.2 \ + inotify.2 \ intro.2 \ ioctl.2 \ issetugid.2 \ @@ -448,6 +461,11 @@ MLINKS+=getrlimit.2 setrlimit.2 MLINKS+=getsockopt.2 setsockopt.2 MLINKS+=gettimeofday.2 settimeofday.2 MLINKS+=getuid.2 geteuid.2 +MLINKS+=inotify.2 inotify_init.2 \ + inotify.2 inotify_init1.2 \ + inotify.2 inotify_add_watch.2 \ + inotify.2 inotify_add_watch_at.2 \ + inotify.2 inotify_rm_watch.2 MLINKS+=intro.2 errno.2 MLINKS+=jail.2 jail_attach.2 \ jail.2 jail_get.2 \ diff --git a/lib/libsys/Symbol.map b/lib/libsys/Symbol.map index eb71c813ae86..ae12124ca210 100644 --- a/lib/libsys/Symbol.map +++ b/lib/libsys/Symbol.map @@ -4,10 +4,4 @@ FBSDprivate_1.0 { __getosreldate; __libsys_interposing_slot; _elf_aux_info; - freebsd11_fstat; - freebsd11_fstatat; - freebsd11_getfsstat; - freebsd11_lstat; - freebsd11_stat; - freebsd11_statfs; }; diff --git a/lib/libsys/Symbol.sys.map b/lib/libsys/Symbol.sys.map index 225eca2fc0de..e3fd8ac10621 100644 --- a/lib/libsys/Symbol.sys.map +++ b/lib/libsys/Symbol.sys.map @@ -89,7 +89,6 @@ FBSD_1.0 { geteuid; getfh; getgid; - getgroups; getitimer; getpagesize; getpeername; @@ -204,7 +203,6 @@ FBSD_1.0 { setegid; seteuid; setgid; - setgroups; setitimer; setlogin; setpgid; @@ -378,10 +376,17 @@ FBSD_1.7 { }; FBSD_1.8 { + exterrctl; fchroot; + getgroups; getrlimitusage; + inotify_add_watch_at; + inotify_rm_watch; + jail_attach_jd; + jail_remove_jd; kcmp; setcred; + setgroups; }; FBSDprivate_1.0 { diff --git a/lib/libsys/_libsys.h b/lib/libsys/_libsys.h index d06017edf6d9..6bd768708a78 100644 --- a/lib/libsys/_libsys.h +++ b/lib/libsys/_libsys.h @@ -65,7 +65,7 @@ struct uuid; union semun; __BEGIN_DECLS -typedef void (__sys_exit_t)(int); +typedef void (__sys__exit_t)(int); typedef int (__sys_fork_t)(void); typedef ssize_t (__sys_read_t)(int, void *, size_t); typedef ssize_t (__sys_write_t)(int, const void *, size_t); @@ -121,8 +121,6 @@ typedef int (__sys_munmap_t)(void *, size_t); typedef int (__sys_mprotect_t)(void *, size_t, int); typedef int (__sys_madvise_t)(void *, size_t, int); typedef int (__sys_mincore_t)(const void *, size_t, char *); -typedef int (__sys_getgroups_t)(int, gid_t *); -typedef int (__sys_setgroups_t)(int, const gid_t *); typedef int (__sys_getpgrp_t)(void); typedef int (__sys_setpgid_t)(int, int); typedef int (__sys_setitimer_t)(int, const struct itimerval *, struct itimerval *); @@ -180,7 +178,7 @@ typedef int (__sys_pathconf_t)(const char *, int); typedef int (__sys_fpathconf_t)(int, int); typedef int (__sys_getrlimit_t)(u_int, struct rlimit *); typedef int (__sys_setrlimit_t)(u_int, struct rlimit *); -typedef int (__sys___sysctl_t)(int *, u_int, void *, size_t *, const void *, size_t); +typedef int (__sys___sysctl_t)(const int *, u_int, void *, size_t *, const void *, size_t); typedef int (__sys_mlock_t)(const void *, size_t); typedef int (__sys_munlock_t)(const void *, size_t); typedef int (__sys_undelete_t)(const char *); @@ -465,8 +463,15 @@ typedef int (__sys_kcmp_t)(pid_t, pid_t, int, uintptr_t, uintptr_t); typedef int (__sys_getrlimitusage_t)(u_int, int, rlim_t *); typedef int (__sys_fchroot_t)(int); typedef int (__sys_setcred_t)(u_int, const struct setcred *, size_t); +typedef int (__sys_exterrctl_t)(u_int, u_int, void *); +typedef int (__sys_inotify_add_watch_at_t)(int, int, const char *, uint32_t); +typedef int (__sys_inotify_rm_watch_t)(int, int); +typedef int (__sys_getgroups_t)(int, gid_t *); +typedef int (__sys_setgroups_t)(int, const gid_t *); +typedef int (__sys_jail_attach_jd_t)(int); +typedef int (__sys_jail_remove_jd_t)(int); -void __sys_exit(int rval); +_Noreturn void __sys__exit(int rval); int __sys_fork(void); ssize_t __sys_read(int fd, void * buf, size_t nbyte); ssize_t __sys_write(int fd, const void * buf, size_t nbyte); @@ -522,8 +527,6 @@ int __sys_munmap(void * addr, size_t len); int __sys_mprotect(void * addr, size_t len, int prot); int __sys_madvise(void * addr, size_t len, int behav); int __sys_mincore(const void * addr, size_t len, char * vec); -int __sys_getgroups(int gidsetsize, gid_t * gidset); -int __sys_setgroups(int gidsetsize, const gid_t * gidset); int __sys_getpgrp(void); int __sys_setpgid(int pid, int pgid); int __sys_setitimer(int which, const struct itimerval * itv, struct itimerval * oitv); @@ -581,7 +584,7 @@ int __sys_pathconf(const char * path, int name); int __sys_fpathconf(int fd, int name); int __sys_getrlimit(u_int which, struct rlimit * rlp); int __sys_setrlimit(u_int which, struct rlimit * rlp); -int __sys___sysctl(int * name, u_int namelen, void * old, size_t * oldlenp, const void * new, size_t newlen); +int __sys___sysctl(const int * name, u_int namelen, void * old, size_t * oldlenp, const void * new, size_t newlen); int __sys_mlock(const void * addr, size_t len); int __sys_munlock(const void * addr, size_t len); int __sys_undelete(const char * path); @@ -866,6 +869,13 @@ int __sys_kcmp(pid_t pid1, pid_t pid2, int type, uintptr_t idx1, uintptr_t idx2) int __sys_getrlimitusage(u_int which, int flags, rlim_t * res); int __sys_fchroot(int fd); int __sys_setcred(u_int flags, const struct setcred * wcred, size_t size); +int __sys_exterrctl(u_int op, u_int flags, void * ptr); +int __sys_inotify_add_watch_at(int fd, int dfd, const char * path, uint32_t mask); +int __sys_inotify_rm_watch(int fd, int wd); +int __sys_getgroups(int gidsetsize, gid_t * gidset); +int __sys_setgroups(int gidsetsize, const gid_t * gidset); +int __sys_jail_attach_jd(int fd); +int __sys_jail_remove_jd(int fd); __END_DECLS #endif /* __LIBSYS_H_ */ diff --git a/lib/libsys/_umtx_op.2 b/lib/libsys/_umtx_op.2 index 974850fb8425..c590f8e8e0c8 100644 --- a/lib/libsys/_umtx_op.2 +++ b/lib/libsys/_umtx_op.2 @@ -210,6 +210,8 @@ Valid clock identifiers are a subset of those for .It .Dv CLOCK_SECOND .It +.Dv CLOCK_TAI +.It .Dv CLOCK_UPTIME .It .Dv CLOCK_UPTIME_FAST diff --git a/lib/libsys/accept.2 b/lib/libsys/accept.2 index 53926b3153d2..2da2af066a5b 100644 --- a/lib/libsys/accept.2 +++ b/lib/libsys/accept.2 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd October 9, 2014 +.Dd May 17, 2025 .Dt ACCEPT 2 .Os .Sh NAME @@ -85,6 +85,13 @@ and the close-on-exec flag on the new file descriptor can be set via the flag in the .Fa flags argument. +Similarly, the +.Dv O_CLOFORK +property can be set via the +.Dv SOCK_CLOFORK +flag in the +.Fa flags +argument. .Pp If no pending connections are present on the queue, and the original socket @@ -234,3 +241,8 @@ The .Fn accept4 system call appeared in .Fx 10.0 . +.Pp +The +.Dv SOCK_CLOFORK +flag appeared in +.Fx 15.0 . diff --git a/lib/libsys/amd64/Makefile.sys b/lib/libsys/amd64/Makefile.sys index 09947d998480..8134bdc422a6 100644 --- a/lib/libsys/amd64/Makefile.sys +++ b/lib/libsys/amd64/Makefile.sys @@ -3,6 +3,7 @@ SRCS+= \ amd64_get_gsbase.c \ amd64_set_fsbase.c \ amd64_set_gsbase.c \ + amd64_set_tlsbase.c \ rfork_thread.S MDASM= vfork.S cerror.S getcontext.S diff --git a/lib/libsys/amd64/Symbol.sys.map b/lib/libsys/amd64/Symbol.sys.map index 5f463c85f872..11e0507b6613 100644 --- a/lib/libsys/amd64/Symbol.sys.map +++ b/lib/libsys/amd64/Symbol.sys.map @@ -13,6 +13,10 @@ FBSD_1.6 { x86_pkru_unprotect_range; }; +FBSD_1.8 { + amd64_set_tlsbase; +}; + FBSDprivate_1.0 { _vfork; }; diff --git a/lib/libsys/amd64/amd64_get_fsbase.c b/lib/libsys/amd64/amd64_get_fsbase.c index 00f16a5e404f..b5e87f8a3ce5 100644 --- a/lib/libsys/amd64/amd64_get_fsbase.c +++ b/lib/libsys/amd64/amd64_get_fsbase.c @@ -30,7 +30,6 @@ * SUCH DAMAGE. */ -#define _WANT_P_OSREL #include <sys/param.h> #include <machine/cpufunc.h> #include <machine/specialreg.h> @@ -41,7 +40,6 @@ static int amd64_get_fsbase_cpu(void **addr) { - *addr = (void *)rdfsbase(); return (0); } @@ -49,15 +47,12 @@ amd64_get_fsbase_cpu(void **addr) static int amd64_get_fsbase_syscall(void **addr) { - return (sysarch(AMD64_GET_FSBASE, addr)); } DEFINE_UIFUNC(, int, amd64_get_fsbase, (void **)) { - - if (__getosreldate() >= P_OSREL_WRFSBASE && - (cpu_stdext_feature & CPUID_STDEXT_FSGSBASE) != 0) + if ((cpu_stdext_feature & CPUID_STDEXT_FSGSBASE) != 0) return (amd64_get_fsbase_cpu); return (amd64_get_fsbase_syscall); } diff --git a/lib/libsys/amd64/amd64_get_gsbase.c b/lib/libsys/amd64/amd64_get_gsbase.c index ef135b1eed7f..51be412ddd7a 100644 --- a/lib/libsys/amd64/amd64_get_gsbase.c +++ b/lib/libsys/amd64/amd64_get_gsbase.c @@ -30,7 +30,6 @@ * SUCH DAMAGE. */ -#define _WANT_P_OSREL #include <sys/param.h> #include <machine/cpufunc.h> #include <machine/specialreg.h> @@ -41,7 +40,6 @@ static int amd64_get_gsbase_cpu(void **addr) { - *addr = (void *)rdgsbase(); return (0); } @@ -49,15 +47,12 @@ amd64_get_gsbase_cpu(void **addr) static int amd64_get_gsbase_syscall(void **addr) { - return (sysarch(AMD64_GET_GSBASE, addr)); } DEFINE_UIFUNC(, int, amd64_get_gsbase, (void **)) { - - if (__getosreldate() >= P_OSREL_WRFSBASE && - (cpu_stdext_feature & CPUID_STDEXT_FSGSBASE) != 0) + if ((cpu_stdext_feature & CPUID_STDEXT_FSGSBASE) != 0) return (amd64_get_gsbase_cpu); return (amd64_get_gsbase_syscall); } diff --git a/lib/libsys/amd64/amd64_set_fsbase.c b/lib/libsys/amd64/amd64_set_fsbase.c index f1690fde6e17..5265bd712f17 100644 --- a/lib/libsys/amd64/amd64_set_fsbase.c +++ b/lib/libsys/amd64/amd64_set_fsbase.c @@ -30,7 +30,6 @@ * SUCH DAMAGE. */ -#define _WANT_P_OSREL #include <sys/param.h> #include <machine/cpufunc.h> #include <machine/specialreg.h> @@ -41,7 +40,6 @@ static int amd64_set_fsbase_cpu(void *addr) { - wrfsbase((uintptr_t)addr); return (0); } @@ -49,15 +47,12 @@ amd64_set_fsbase_cpu(void *addr) static int amd64_set_fsbase_syscall(void *addr) { - return (sysarch(AMD64_SET_FSBASE, &addr)); } DEFINE_UIFUNC(, int, amd64_set_fsbase, (void *)) { - - if (__getosreldate() >= P_OSREL_WRFSBASE && - (cpu_stdext_feature & CPUID_STDEXT_FSGSBASE) != 0) + if ((cpu_stdext_feature & CPUID_STDEXT_FSGSBASE) != 0) return (amd64_set_fsbase_cpu); return (amd64_set_fsbase_syscall); } diff --git a/lib/libsys/amd64/amd64_set_gsbase.c b/lib/libsys/amd64/amd64_set_gsbase.c index 756bbae18844..94f5736ed1ab 100644 --- a/lib/libsys/amd64/amd64_set_gsbase.c +++ b/lib/libsys/amd64/amd64_set_gsbase.c @@ -30,7 +30,6 @@ * SUCH DAMAGE. */ -#define _WANT_P_OSREL #include <sys/param.h> #include <machine/cpufunc.h> #include <machine/specialreg.h> @@ -41,7 +40,6 @@ static int amd64_set_gsbase_cpu(void *addr) { - wrgsbase((uintptr_t)addr); return (0); } @@ -49,15 +47,12 @@ amd64_set_gsbase_cpu(void *addr) static int amd64_set_gsbase_syscall(void *addr) { - return (sysarch(AMD64_SET_GSBASE, &addr)); } DEFINE_UIFUNC(, int, amd64_set_gsbase, (void *)) { - - if (__getosreldate() >= P_OSREL_WRFSBASE && - (cpu_stdext_feature & CPUID_STDEXT_FSGSBASE) != 0) + if ((cpu_stdext_feature & CPUID_STDEXT_FSGSBASE) != 0) return (amd64_set_gsbase_cpu); return (amd64_set_gsbase_syscall); } diff --git a/lib/libsys/amd64/amd64_set_tlsbase.c b/lib/libsys/amd64/amd64_set_tlsbase.c new file mode 100644 index 000000000000..a28441fbfc5d --- /dev/null +++ b/lib/libsys/amd64/amd64_set_tlsbase.c @@ -0,0 +1,51 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#define _WANT_P_OSREL +#include <sys/param.h> +#include <machine/cpufunc.h> +#include <machine/specialreg.h> +#include <machine/sysarch.h> +#include <x86/ifunc.h> +#include "libc_private.h" + +static int +amd64_set_tlsbase_syscall(void *addr) +{ + return (sysarch(AMD64_SET_TLSBASE, &addr)); +} + +DEFINE_UIFUNC(, int, amd64_set_tlsbase, (void *)) +{ + if (__getosreldate() >= P_OSREL_TLSBASE) + return (amd64_set_tlsbase_syscall); + return (amd64_set_fsbase); +} diff --git a/lib/libsys/auxv.3 b/lib/libsys/auxv.3 index 10ec10e8755c..b3b741937ac7 100644 --- a/lib/libsys/auxv.3 +++ b/lib/libsys/auxv.3 @@ -22,7 +22,7 @@ .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .\" -.Dd September 16, 2022 +.Dd June 24, 2025 .Dt ELF_AUX_INFO 3 .Os .Sh NAME @@ -59,6 +59,12 @@ CPU / hardware feature flags .It AT_HWCAP2 CPU / hardware feature flags .Dv (sizeof(u_long)). +.It AT_HWCAP3 +CPU / hardware feature flags +.Dv (sizeof(u_long)). +.It AT_HWCAP4 +CPU / hardware feature flags +.Dv (sizeof(u_long)). .It AT_NCPUS Number of CPUs .Dv (sizeof(int)). diff --git a/lib/libsys/auxv.c b/lib/libsys/auxv.c index 1a4fd352950e..80702c66ba22 100644 --- a/lib/libsys/auxv.c +++ b/lib/libsys/auxv.c @@ -69,10 +69,10 @@ __init_elf_aux_vector(void) static int aux_once; static int pagesize, osreldate, canary_len, ncpus, pagesizes_len, bsdflags; -static int hwcap_present, hwcap2_present; +static int hwcap_present, hwcap2_present, hwcap3_present, hwcap4_present; static char *canary, *pagesizes, *execpath; static void *ps_strings, *timekeep; -static u_long hwcap, hwcap2; +static u_long hwcap, hwcap2, hwcap3, hwcap4; static void *fxrng_seed_version; static u_long usrstackbase, usrstacklim; @@ -123,6 +123,16 @@ init_aux(void) hwcap2 = (u_long)(aux->a_un.a_val); break; + case AT_HWCAP3: + hwcap3_present = 1; + hwcap3 = (u_long)(aux->a_un.a_val); + break; + + case AT_HWCAP4: + hwcap4_present = 1; + hwcap4 = (u_long)(aux->a_un.a_val); + break; + case AT_PAGESIZES: pagesizes = (char *)(aux->a_un.a_ptr); break; @@ -318,6 +328,20 @@ _elf_aux_info(int aux, void *buf, int buflen) } else res = ENOENT; break; + case AT_HWCAP3: + if (hwcap3_present && buflen == sizeof(u_long)) { + *(u_long *)buf = hwcap3; + res = 0; + } else + res = ENOENT; + break; + case AT_HWCAP4: + if (hwcap4_present && buflen == sizeof(u_long)) { + *(u_long *)buf = hwcap4; + res = 0; + } else + res = ENOENT; + break; case AT_PAGESIZES: if (pagesizes != NULL && pagesizes_len >= buflen) { memcpy(buf, pagesizes, buflen); diff --git a/lib/libsys/chroot.2 b/lib/libsys/chroot.2 index 4c06e3673e03..3347df5cceee 100644 --- a/lib/libsys/chroot.2 +++ b/lib/libsys/chroot.2 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd July 15, 2024 +.Dd August 2, 2025 .Dt CHROOT 2 .Os .Sh NAME @@ -61,7 +61,12 @@ It should be noted that .Fn chroot has no effect on the process's current directory. .Pp -This call is restricted to the super-user. +This call is restricted to the super-user, unless the +.Ql security.bsd.unprivileged_chroot +sysctl variable is set to 1 +and the process has enabled the +.Dv PROC_NO_NEW_PRIVS_CTL +.Xr procctl 2 . .Pp Depending on the setting of the .Ql kern.chroot_allow_open_directories @@ -106,14 +111,37 @@ except it takes a file descriptor instead of path. .Sh ERRORS The .Fn chroot +and +.Fn fchroot +system calls +will fail and the root directory will be unchanged if: +.Bl -tag -width Er +.It Bq Er EPERM +The effective user ID is not the super-user and the +.Ql security.bsd.unprivileged_chroot +sysctl is 0. +.It Bq Er EPERM +The effective user ID is not the super-user and the +process has not enabled the +.Dv PROC_NO_NEW_PRIVS_CTL +.Xr procctl 2 . +.It Bq Er EPERM +One or more filedescriptors are open directories and the +.Ql kern.chroot_allow_open_directories +sysctl is not set to permit this. +.It Bq Er EIO +An I/O error occurred while reading from or writing to the file system. +.It Bq Er EINTEGRITY +Corrupted data was detected while reading from the file system. +.El +.Pp +The +.Fn chroot system call will fail and the root directory will be unchanged if: .Bl -tag -width Er .It Bq Er ENOTDIR A component of the path name is not a directory. -.It Bq Er EPERM -The effective user ID is not the super-user, or one or more -filedescriptors are open directories. .It Bq Er ENAMETOOLONG A component of a pathname exceeded 255 characters, or an entire path name exceeded 1023 characters. @@ -128,10 +156,6 @@ The .Fa dirname argument points outside the process's allocated address space. -.It Bq Er EIO -An I/O error occurred while reading from or writing to the file system. -.It Bq Er EINTEGRITY -Corrupted data was detected while reading from the file system. .El .Pp The @@ -146,15 +170,8 @@ file descriptor. The argument .Fa fd is not a valid file descriptor. -.It Bq Er EIO -An I/O error occurred while reading from or writing to the file system. -.It Bq Er EINTEGRITY -Corrupted data was detected while reading from the file system. .It Bq Er ENOTDIR The file descriptor does not reference a directory. -.It Bq Er EPERM -The effective user ID is not the super-user, or one or more -filedescriptors are open directories. .El .Sh SEE ALSO .Xr chdir 2 , diff --git a/lib/libsys/clock_gettime.2 b/lib/libsys/clock_gettime.2 index fcdc5be498f2..89551d0f720b 100644 --- a/lib/libsys/clock_gettime.2 +++ b/lib/libsys/clock_gettime.2 @@ -27,7 +27,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd June 28, 2024 +.Dd August 10, 2024 .Dt CLOCK_GETTIME 2 .Os .Sh NAME @@ -99,11 +99,24 @@ query, using an in-kernel cached value of the current second. Returns the execution time of the calling process. .It Dv CLOCK_THREAD_CPUTIME_ID Returns the execution time of the calling thread. +.It Dv CLOCK_TAI +Increments in SI seconds like a wall clock. +It uses a 1970 epoch and implements the TAI timescale. +Similar to +.Dv CLOCK_REALTIME , +but without leap seconds. +It will increase monotonically during a leap second. +Will return +.Er EINVAL +if the current offset between TAI and UTC is not known, +which may be the case early in boot before NTP or other time daemon has +synchronized. .El .Pp The clock IDs .Dv CLOCK_BOOTTIME , .Dv CLOCK_REALTIME , +.Dv CLOCK_TAI , .Dv CLOCK_MONOTONIC , and .Dv CLOCK_UPTIME @@ -202,7 +215,8 @@ The clock IDs .Dv CLOCK_MONOTONIC_PRECISE , .Dv CLOCK_REALTIME_FAST , .Dv CLOCK_REALTIME_PRECISE , -.Dv CLOCK_SECOND +.Dv CLOCK_SECOND , +.Dv CLOCK_TAI , .Dv CLOCK_UPTIME , .Dv CLOCK_UPTIME_FAST , and diff --git a/lib/libsys/closefrom.2 b/lib/libsys/closefrom.2 index aaa4c55607ac..1885a6fdeaa8 100644 --- a/lib/libsys/closefrom.2 +++ b/lib/libsys/closefrom.2 @@ -23,7 +23,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd March 3, 2022 +.Dd May 17, 2025 .Dt CLOSEFROM 2 .Os .Sh NAME @@ -59,6 +59,8 @@ Supported .Bl -tag -width ".Dv CLOSE_RANGE_CLOEXEC" .It Dv CLOSE_RANGE_CLOEXEC Set the close-on-exec flag on descriptors in the range instead of closing them. +.It Dv CLOSE_RANGE_CLOFORK +Set the close-on-fork flag on descriptors in the range instead of closing them. .El .Sh RETURN VALUES Upon successful completion, @@ -90,3 +92,8 @@ The .Fn closefrom function first appeared in .Fx 8.0 . +.Pp +The +.Dv CLOSE_RANGE_CLOFORK +flag appeared in +.Fx 15.0 . diff --git a/lib/libsys/copy_file_range.2 b/lib/libsys/copy_file_range.2 index bcd9170842d5..829a5a5d3c13 100644 --- a/lib/libsys/copy_file_range.2 +++ b/lib/libsys/copy_file_range.2 @@ -23,7 +23,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd December 28, 2023 +.Dd August 16, 2025 .Dt COPY_FILE_RANGE 2 .Os .Sh NAME @@ -74,6 +74,7 @@ argument must be opened for reading and the .Fa outfd argument must be opened for writing, but not .Dv O_APPEND . +.Pp If .Fa inoffp or @@ -101,9 +102,29 @@ respectively will be used/updated and the file offset for or .Fa outfd respectively will not be affected. -The +.Pp +The only +.Fa flags +argument currently defined is +.Dv COPY_FILE_RANGE_CLONE . +When this flag is set, +.Fn copy_file_range +will return +.Er EOPNOTSUPP +if the copy cannot be done via +block cloning. +When .Fa flags -argument must be 0. +is 0, a file system may do the copy via block cloning +or by data copying. +Block cloning is only possible when the offsets (plus +.Fa len +if not to EOF on the input file) are block +aligned. +The correct block alignment can normally be acquired via the +.Dv _PC_CLONE_BLKSIZE +query for +.Xr pathconf 2 . .Pp This system call attempts to maintain holes in the output file for the byte range being copied. @@ -203,9 +224,15 @@ refers to a directory. File system that stores .Fa outfd is full. +.It Bq Er EOPNOTSUPP +Cannot do the copy via block cloning and the +.Dv COPY_FILE_RANGE_CLONE +.Fa flags +argument is specified. .El .Sh SEE ALSO -.Xr lseek 2 +.Xr lseek 2 , +.Xr pathconf 2 .Sh STANDARDS The .Fn copy_file_range diff --git a/lib/libsys/execve.2 b/lib/libsys/execve.2 index 5a35980e9555..dc85b9321e48 100644 --- a/lib/libsys/execve.2 +++ b/lib/libsys/execve.2 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd January 26, 2022 +.Dd July 02, 2025 .Dt EXECVE 2 .Os .Sh NAME @@ -127,7 +127,10 @@ flag is set (see and .Xr fcntl 2 ) . Descriptors that remain open are unaffected by -.Fn execve . +.Fn execve , +except those with the close-on-fork flag +.Dv FD_CLOFORK +which is cleared from all file descriptors. If any of the standard descriptors (0, 1, and/or 2) are closed at the time .Fn execve diff --git a/lib/libsys/fcntl.2 b/lib/libsys/fcntl.2 index b5d4abe35aeb..d67c38cfbc6c 100644 --- a/lib/libsys/fcntl.2 +++ b/lib/libsys/fcntl.2 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd December 7, 2021 +.Dd June 24, 2025 .Dt FCNTL 2 .Os .Sh NAME @@ -80,6 +80,18 @@ associated with the new file descriptor is cleared, so the file descriptor is to remain open across .Xr execve 2 system calls. +.It +The fork-on-exec flag +.Dv FD_CLOFORK +associated with the new file descriptor is cleared, so the file descriptor is +to remain open across +.Xr fork 2 +system calls. +.It +The +.Dv FD_RESOLVE_BENEATH +flag, described below, will be set if it was set on the original +descriptor. .El .It Dv F_DUPFD_CLOEXEC Like @@ -90,6 +102,15 @@ flag associated with the new file descriptor is set, so the file descriptor is closed when .Xr execve 2 system call executes. +.It Dv F_DUPFD_CLOFORK +Like +.Dv F_DUPFD , +but the +.Dv FD_CLOFORK +flag associated with the new file descriptor is set, so the file descriptor +is closed when +.Xr fork 2 +system call executes. .It Dv F_DUP2FD It is functionally equivalent to .Bd -literal -offset indent @@ -112,30 +133,58 @@ Use .Fn dup2 instead of .Dv F_DUP2FD . +.It Dv F_DUP3FD +Used to implement the +.Fn dup3 +call. +Do not use it. .It Dv F_GETFD -Get the close-on-exec flag associated with the file descriptor -.Fa fd -as -.Dv FD_CLOEXEC . -If the returned value ANDed with -.Dv FD_CLOEXEC -is 0, -the file will remain open across -.Fn exec , -otherwise the file will be closed upon execution of +Get the flags associated with the file descriptor +.Fa fd . +The following flags are defined: +.Bl -tag -width FD_RESOLVE_BENEATH +.It Dv FD_CLOEXEC +The file will be closed upon execution of .Fn exec .Fa ( arg is ignored). +Otherwise, the file descriptor will remain open. +.It Dv FD_CLOFORK +The file will be closed upon execution of the +.Fn fork +family of system calls. +.It Dv FD_RESOLVE_BENEATH +All path name lookups relative to that file descriptor +will behave as if the lookup had +.Dv O_RESOLVE_BENEATH +or +.Dv AT_RESOLVE_BENEATH +semantics. +It is not permitted to call +.Xr fchdir 2 +or +.Xr fchroot 2 +on such a file descriptor. +The +.Dv FD_RESOLVE_BENEATH +flag is sticky, meaning that it is preserved by +.Xr dup 2 +and similar operations, and opening a directory with +.Xr openat 2 +where the directory descriptor has the flag set causes the new directory +descriptor to also have the flag set. +.El .It Dv F_SETFD -Set the close-on-exec flag associated with -.Fa fd -to -.Fa arg , -where -.Fa arg -is either 0 or +Set flags associated with +.Fa fd . +The available flags are .Dv FD_CLOEXEC , -as described above. +.Dv FD_CLOFORK +and +.Dv FD_RESOLVE_BENEATH . +The +.Dv FD_RESOLVE_BENEATH +flag cannot be cleared once set. .It Dv F_GETFL Get descriptor status flags, as described below .Fa ( arg @@ -528,7 +577,7 @@ A new file descriptor. A file descriptor equal to .Fa arg . .It Dv F_GETFD -Value of flag (only the low-order bit is defined). +Value of flags. .It Dv F_GETFL Value of flags. .It Dv F_GETOWN @@ -762,8 +811,10 @@ for the reasons as stated in .Sh STANDARDS The .Dv F_DUP2FD -constant is non portable. -It is provided for compatibility with AIX and Solaris. +and +.Dv F_DUP3FD +constants are not portable. +They are provided for compatibility with AIX and Solaris. .Pp Per .St -susv4 , @@ -788,3 +839,10 @@ The .Dv F_DUP2FD constant first appeared in .Fx 7.1 . +.Pp +The +.Dv F_DUPFD_CLOFORK +and +.Dv F_DUP3FD +flags appeared in +.Fx 15.0 . diff --git a/lib/libsys/fhopen.2 b/lib/libsys/fhopen.2 index 5bd1e9f8d90b..b281ac3d8949 100644 --- a/lib/libsys/fhopen.2 +++ b/lib/libsys/fhopen.2 @@ -31,7 +31,7 @@ .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" -.Dd June 29, 1999 +.Dd July 20, 2025 .Dt FHOPEN 2 .Os .Sh NAME @@ -75,6 +75,10 @@ together the flags used for the system call. All said flags are valid except for .Dv O_CREAT . +If the file handle refers to a named attribute or named attribute +directory, the +.Dv O_NAMEDATTR +flag must be specified. .Pp The .Fn fhstat @@ -116,6 +120,16 @@ Calling with .Dv O_CREAT set. +.It Bq Er ENOATTR +The file handle does not refer to a named attribute or named attribute +directory although the +.Dv O_NAMEDATTR +flag was specified. +.It Bq Er ENOATTR +The file handle refers to a named attribute or named attribute directory +although the +.Dv O_NAMEDATTR +flag was not specified. .It Bq Er ESTALE The file handle .Fa fhp @@ -125,7 +139,8 @@ is no longer valid. .Xr fstat 2 , .Xr fstatfs 2 , .Xr getfh 2 , -.Xr open 2 +.Xr open 2 , +.Xr named_attribute 7 .Sh HISTORY The .Fn fhopen , diff --git a/lib/libsys/fork.2 b/lib/libsys/fork.2 index 7d548a42890d..e59b208a9ff5 100644 --- a/lib/libsys/fork.2 +++ b/lib/libsys/fork.2 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd August 5, 2021 +.Dd May 17, 2024 .Dt FORK 2 .Os .Sh NAME @@ -68,6 +68,16 @@ by the parent. This descriptor copying is also used by the shell to establish standard input and output for newly created processes as well as to set up pipes. +Any file descriptors that were marked with the close-on-fork flag, +.Dv FD_CLOFORK +.Po see +.Fn fcntl 2 +and +.Dv O_CLOFORK +in +.Fn open 2 +.Pc , +will not be present in the child process, but remain open in the parent. .It The child process' resource utilizations are set to 0; see diff --git a/lib/libsys/getdirentries.2 b/lib/libsys/getdirentries.2 index 0e5840ce25cd..202ae133f548 100644 --- a/lib/libsys/getdirentries.2 +++ b/lib/libsys/getdirentries.2 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd September 5, 2023 +.Dd July 8, 2025 .Dt GETDIRENTRIES 2 .Os .Sh NAME @@ -178,9 +178,7 @@ or non-NULL .Fa basep point outside the allocated address space. .It Bq Er EINVAL -The file referenced by -.Fa fd -is not a directory, or +The value of .Fa nbytes is too small for returning a directory entry or block of entries, or the current position pointer is invalid. @@ -192,6 +190,10 @@ error occurred while reading from or writing to the file system. Corrupted data was detected while reading from the file system. .It Bq Er ENOENT Directory unlinked but still open. +.It Bq Er ENOTDIR +The file referenced by +.Fa fd +is not a directory. .El .Sh SEE ALSO .Xr lseek 2 , diff --git a/lib/libsys/getgroups.2 b/lib/libsys/getgroups.2 index 91cca2748ec2..4e94b32d4e7b 100644 --- a/lib/libsys/getgroups.2 +++ b/lib/libsys/getgroups.2 @@ -1,5 +1,13 @@ +.\"- +.\" SPDX-License-Identifier: BSD-3-Clause +.\" .\" Copyright (c) 1983, 1991, 1993 .\" The Regents of the University of California. All rights reserved. +.\" Copyright (c) 2025 The FreeBSD Foundation +.\" +.\" Portions of this documentation were written by Olivier Certner +.\" <olce@FreeBSD.org> at Kumacom SARL under sponsorship from the FreeBSD +.\" Foundation. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions @@ -25,12 +33,12 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd January 21, 2011 +.Dd October 10, 2025 .Dt GETGROUPS 2 .Os .Sh NAME .Nm getgroups -.Nd get group access list +.Nd get the calling process' supplementary groups .Sh LIBRARY .Lb libc .Sh SYNOPSIS @@ -40,36 +48,39 @@ .Sh DESCRIPTION The .Fn getgroups -system call -gets the current group access list of the user process -and stores it in the array -.Fa gidset . -The +system call gets the calling process' supplementary groups and stores them in +the +.Fa gidset +array in strictly ascending order. +The value of .Fa gidsetlen -argument -indicates the number of entries that may be placed in +indicates the maximum number of entries that may be placed in .Fa gidset . -The -.Fn getgroups -system call -returns the actual number of groups returned in -.Fa gidset . -At least one and as many as {NGROUPS_MAX}+1 values may be returned. +.Pp If .Fa gidsetlen is zero, .Fn getgroups -returns the number of supplementary group IDs associated with -the calling process without modifying the array pointed to by +returns the cardinal of the calling process' supplementary groups set and +ignores argument .Fa gidset . .Pp +No more than +.Dv {NGROUPS_MAX} +values may ever be returned. The value of .Dv {NGROUPS_MAX} should be obtained using .Xr sysconf 3 to avoid hard-coding it into the executable. .Sh RETURN VALUES -A successful call returns the number of groups in the group set. +On success, the +.Fn getgroups +system call returns the cardinal of the supplementary groups set. +It always succeeds if argument +.Fa gidsetlen +is zero. +.Pp A value of -1 indicates that an error occurred, and the error code is stored in the global variable .Va errno . @@ -81,12 +92,12 @@ are: .It Bq Er EINVAL The argument .Fa gidsetlen -is smaller than the number of groups in the group set. +is smaller than the number of supplementary groups +.Pq but not zero . .It Bq Er EFAULT -The argument +An invalid address was encountered while reading from the .Fa gidset -specifies -an invalid address. +array. .El .Sh SEE ALSO .Xr setgroups 2 , @@ -96,9 +107,48 @@ an invalid address. The .Fn getgroups system call conforms to -.St -p1003.1-2008 . +.St -p1003.1-2008 , +not reporting the effective group ID. .Sh HISTORY The .Fn getgroups system call appeared in .Bx 4.2 . +.Pp +Since +.Fx 14.3 , +the +.Fn getgroups +system call has been reporting the supplementary groups in strictly ascending +order. +.Pp +Before +.Fx 15.0 , +the +.Fn getgroups +system call would additionally return the effective group ID as the first +element of the array, before the supplementary groups. +.Sh SECURITY CONSIDERATIONS +The +.Fn getgroups +system call gets the supplementary groups set in the +.Fa gidset +array. +In particular, as evoked in +.Sx HISTORY , +it does not anymore retrieve the effective group ID in the first slot of +.Fa gidset . +Programs that process this slot in a specific way must be modified to obtain the +effective group ID through other means, such as a call to +.Xr getegid 2 . +.Pp +The effective group ID is present in the supplementary groups set if and only if +it was explicitly set as a supplementary group. +The function +.Fn initgroups +enforces that, while the +.Fn setgroups +system call does not. +Please consult the +.Xr initgroups 3 +manual page for the rationale. diff --git a/lib/libsys/getsockopt.2 b/lib/libsys/getsockopt.2 index 619540b53fae..3867824681d7 100644 --- a/lib/libsys/getsockopt.2 +++ b/lib/libsys/getsockopt.2 @@ -593,6 +593,15 @@ specified amount of time has elapsed since the initial call to If .Fa sp_fd is -1, the socket will be unspliced immediately. +A successful +.Xr select 2 , +.Xr poll 2 , +or +.Xr kqueue 2 +operation testing the ability to read from the source socket indicates +that the splicing has terminated and at least one byte is available for +reading. +When one of the sockets gets closed, splicing ends. .Pp When passed to .Fn getsockopt , @@ -679,7 +688,7 @@ The option originated in .Ox 4.9 and first appeared in -.Fx 15.0 . +.Fx 14.3 . The .Fx implementation aims to be source-compatible. diff --git a/lib/libsys/inotify.2 b/lib/libsys/inotify.2 new file mode 100644 index 000000000000..f94509d6f59e --- /dev/null +++ b/lib/libsys/inotify.2 @@ -0,0 +1,379 @@ +.\" +.\" SPDX-License-Identifier: BSD-2-Clause +.\" +.\" Copyright (c) 2025 Klara, Inc. +.\" +.Dd May 19, 2025 +.Dt INOTIFY 2 +.Os +.Sh NAME +.Nm inotify_init , +.Nm inotify_init1 , +.Nm inotify_add_watch , +.Nm inotify_add_watch_at , +.Nm inotify_rm_watch +.Nd monitor file system events +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In sys/inotify.h +.Ft int +.Fo inotify_init +.Fc +.Ft int +.Fo inotify_init1 +.Fa "int flags" +.Fc +.Ft int +.Fo inotify_add_watch +.Fa "int fd" +.Fa "const char *pathname" +.Fa "uint32_t mask" +.Fc +.Ft int +.Fo inotify_add_watch_at +.Fa "int fd" +.Fa "int dfd" +.Fa "const char *pathname" +.Fa "uint32_t mask" +.Fc +.Ft int +.Fo inotify_rm_watch +.Fa "int fd" +.Fa "uint32_t wd" +.Fc +.Bd -literal +struct inotify_event { + int wd; /* Watch descriptor */ + uint32_t mask; /* Event and flags */ + uint32_t cookie; /* Unique ID which links rename events */ + uint32_t len; /* Name field size, including nul bytes */ + char name[0]; /* Filename (nul-terminated) */ +}; +.Ed +.Sh DESCRIPTION +The inotify system calls provide an interface to monitor file system events. +They aim to be compatible with the Linux inotify interface. +The provided functionality is similar to the +.Dv EVFILT_VNODE +filter of the +.Xr kevent 2 +system call, but further allows monitoring of a directory without needing to +open each object in that directory. +This avoids races and reduces the number of file descriptors needed to monitor +a large file hierarchy. +.Pp +inotify allows one or more file system objects, generally files or directories, +to be watched for events, such as file open or close. +Watched objects are associated with a file descriptor returned +by +.Fn inotify_init +or +.Fn inotify_init1 . +When an event occurs, a record describing the event becomes available for +reading from the inotify file descriptor. +Each inotify descriptor thus refers to a queue of events waiting to be read. +inotify descriptors are inherited across +.Xr fork 2 +calls and may be passed to other processes via +.Xr unix 4 +sockets. +.Pp +The +.Fn inotify_init1 +system call accepts two flags. +The +.Dv IN_NONBLOCK +flag causes the inotify descriptor to be opened in non-blocking mode, such that +.Xr read 2 +calls will not block if no records are available to consume, and will instead +return +.Er EWOULDBLOCK . +The +.Dv IN_CLOEXEC +flag causes the inotify descriptor to be closed automatically when +.Xr execve 2 +is called. +.Pp +To watch a file or directory, the +.Fn inotify_add_watch +or +.Fn inotify_add_watch_at +system calls must be used. +They take a path and a mask of events to watch for, and return a +.Dq watch descriptor , +a non-negative integer which uniquely identifies the watched object within the +inotify descriptor. +.Pp +The +.Fn inotify_rm_watch +system call removes a watch from an inotify descriptor. +.Pp +When watching a directory, objects within the directory are monitored for events +as well as the directory itself. +A record describing an inotify event consists of a +.Dq struct inotify_event +followed by the name of the object in the directory being watched. +If the watched object itself generates an event, no name is present. +Extra nul bytes may follow the file name in order to provide alignment for a +subsequent record. +.Pp +The following events are defined: +.Bl -tag -width IN_CLOSE_NOWRITE +.It Dv IN_ACCESS +A file's contents were accessed, e.g., by +.Xr read 2 +.Xr copy_file_range 2 , +.Xr sendfile 2 , +or +.Xr getdirentries 2 . +.It Dv IN_ATTRIB +A file's metadata was changed, e.g., by +.Xr chmod 2 +or +.Xr unlink 2 . +.It Dv IN_CLOSE_WRITE +A file that was previously opened for writing was closed. +.It Dv IN_CLOSE_NOWRITE +A file that was previously opened read-only was closed. +.It Dv IN_CREATE +A file within a watched directory was created, e.g., by +.Xr open 2 , +.Xr mkdir 2 , +.Xr symlink 2 , +.Xr mknod 2 , +or +.Xr bind 2 . +.It Dv IN_DELETE +A file or directory within a watched directory was removed. +.It Dv IN_DELETE_SELF +The watched file or directory itself was deleted. +This event is generated only when the link count of the file drops +to zero. +.It Dv IN_MODIFY +A file's contents were modified, e.g., by +.Xr write 2 +or +.Xr copy_file_range 2 . +.It Dv IN_MOVE_SELF +The watched file or directory itself was renamed. +.It Dv IN_MOVED_FROM +A file or directory was moved from a watched directory. +.It Dv IN_MOVED_TO +A file or directory was moved into a watched directory. +A +.Xr rename 2 +call thus may generate two events, one for the old name and one for the new +name. +These are linked together by the +.Ar cookie +field in the inotify record, which can be compared to link the two records +to the same event. +.It Dv IN_OPEN +A file was opened. +.El +.Pp +Some additional flags may be set in inotify event records: +.Bl -tag -width IN_Q_OVERFLOW +.It Dv IN_IGNORED +When a watch is removed from a file, for example because it was created with the +.Dv IN_ONESHOT +flag, the file was deleted, or the watch was explicitly removed with +.Xr inotify_rm_watch 2 , +an event with this mask is generated to indicate that the watch will not +generate any more events. +Once this event is generated, the watch is automatically removed, and in +particular should not be removed manually with +.Xr inotify_rm_watch 2 . +.It Dv IN_ISDIR +When the subject of an event is a directory, this flag is set in the +.Ar mask +.It Dv IN_Q_OVERFLOW +One or more events were dropped, for example because of a kernel memory allocation +failure or because the event queue size hit a limit. +.It Dv IN_UNMOUNT +The filesystem containing the watched object was unmounted. +.El +.Pp +A number of flags may also be specified in the +.Ar mask +given to +.Fn inotify_add_watch +and +.Fn inotify_add_watch_at : +.Bl -tag -width IN_DONT_FOLLOW +.It Dv IN_DONT_FOLLOW +If +.Ar pathname +is a symbolic link, do not follow it. +.It Dv IN_EXCL_UNLINK +This currently has no effect, see the +.Sx BUGS +section. +.In Dv IN_MASK_ADD +When adding a watch to an object, and that object is already watched by the +same inotify descriptor, by default the mask of the existing watch is +overwritten. +When +.Dv IN_MASK_ADD +is specified, the mask of the existing watch is instead logically ORed with +the new mask. +.In Dv IN_MASK_CREATE +When +.Fn inotify_add watch +is used to add a watch to an object, +.Dv IN_MASK_CREATE +is specified, and that object is already watched by the same inotify descriptor, +return an error instead of updating the existing watch. +.In Dv IN_ONESHOT +Monitor the object for a single event, after which the watch is automatically +removed. +As part of removal, a +.Dv IN_IGNORED +event is generated. +.In Dv IN_ONLYDIR +When creating a watch, fail with +.Er ENOTDIR +if the path does not refer to a directory. +.El +.Sh SYSCTL VARIABLES +The following variables are available as both +.Xr sysctl 8 +variables and +.Xr loader 8 +tunables: +.Bl -tag -width 15 +.It Va vfs.inotify.max_events +The maximum number of inotify records that can be queued for a single +inotify descriptor. +Records in excess of this limit are discarded, and a single event with +mask equal to +.Dv IN_Q_OVERFLOW +will be present in the queue. +.It Va vfs.inotify.max_user_instances +The maximum number of inotify descriptors that can be created by a single +user. +.It Va vfs.inotify.max_user_watches +The maximum number of inotify watches per user. +.El +.Sh EXAMPLES +See the example program in +.Pa /usr/share/examples/inotify/inotify.c . +.Sh ERRORS +The +.Fn inotify_init +and +.Fn inotify_init1 +functions will fail if: +.Bl -tag -width Er +.It Bq Er ENFILE +The system limit on the total number of open files has been reached. +.It Bq Er EMFILE +A per-process limit on the number of open files has been reached. +.It Bq Er EMFILE +The system limit on the number of inotify descriptors has been reached. +.It Bq Er EINVAL +An unrecognized flag was passed to +.Fn inotify_init1 . +.El +.Pp +The +.Fn inotify_add_watch +and +.Fn inotify_add_watch_at +system calls will fail if: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Ar fd +parameter is not a valid file descriptor. +.It Bq Er EINVAL +The +.Ar fd +parameter is not an inotify descriptor. +.It Bq Er EINVAL +The +.Ar mask +parameter does not specify an event, or +the +.Dv IN_MASK_CREATE +and +.Dv IN_MASK_ADD +flags are both set, or an unrecognized flag was passed. +.It Bq Er ENOTDIR +The +.Ar pathname +parameter refers to a file that is not a directory, and the +.Dv IN_ONLYDIR +flag was specified. +.It Bq Er ENOSPC +The per-user limit on the total number of inotify watches has been reached. +.It Bq Er ECAPMODE +The process is in capability mode and +.Fn inotify_add_watch +was called, or +.Fn inotify_add_watch_at +was called with +.Dv AT_FDCWD +as the directory file descriptor +.Ar dfd . +.It Bq Er ENOTCAPABLE +The process is in capability mode and +.Ar pathname +contains a +.Dq .. +component leading to a directory outside the directory hierarchy specified +by +.Ar dfd . +.El +.Pp +The +.Fn inotify_rm_watch +system call will fail if: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Ar fd +parameter is not a valid file descriptor. +.It Bq Er EINVAL +The +.Ar fd +parameter is not an inotify descriptor. +.It Bq Er EINVAL +The +.Ar wd +parameter is not a valid watch descriptor. +.El +.Sh SEE ALSO +.Xr kevent 2 , +.Xr capsicum 4 +.Sh STANDARDS +The +.Nm +interface originates from Linux and is non-standard. +This implementation aims to be compatible with that of Linux and is based +on the documentation available at +.Pa https://man7.org/linux/man-pages/man7/inotify.7.html . +.Sh HISTORY +The inotify system calls first appeared in +.Fx 15.0 . +.Sh BUGS +If a file in a watched directory has multiple hard links, +an access via any hard link for that file will generate an event, even +if the accessed link belongs to an unwatched directory. +This is not the case for the Linux implementation, where only accesses +via the hard link in the watched directory will generate an event. +.Pp +If a watched directory contains multiple hard links of a file, an event +on one of the hard links will generate an inotify record for each link +in the directory. +.Pp +When a file is unlinked, no more events will be generated for that file, +even if it continues to be accessed. +By default, the Linux implementation will continue to generate events in +this case. +Thus, the +.Fx +implementation behaves as though +.Dv IN_EXCL_UNLINK +is always set. diff --git a/lib/libsys/jail.2 b/lib/libsys/jail.2 index 8f8b9925c712..ee4e5b03d38e 100644 --- a/lib/libsys/jail.2 +++ b/lib/libsys/jail.2 @@ -23,7 +23,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd November 29, 2023 +.Dd September 15, 2025 .Dt JAIL 2 .Os .Sh NAME @@ -31,7 +31,9 @@ .Nm jail_get , .Nm jail_set , .Nm jail_remove , -.Nm jail_attach +.Nm jail_attach , +.Nm jail_remove_jd , +.Nm jail_attach_jd .Nd create and manage system jails .Sh LIBRARY .Lb libc @@ -44,6 +46,10 @@ .Fn jail_attach "int jid" .Ft int .Fn jail_remove "int jid" +.Ft int +.Fn jail_attach_jd "int fd" +.Ft int +.Fn jail_remove_jd "int fd" .In sys/uio.h .Ft int .Fn jail_get "struct iovec *iov" "u_int niov" "int flags" @@ -188,6 +194,29 @@ system call. This is deprecated in .Fn jail_set and has no effect. +.It Dv JAIL_USE_DESC +Identify the jail by a descriptor in the +.Va desc +parameter. +.It Dv JAIL_AT_DESC +Operate in the context of the jail described by the +.Va desc +parameter, instead of the current jail. +Only one of +.Dv JAIL_USE_DESC +or +.Dv JAIL_AT_DESC +may be specified. +.It Dv JAIL_GET_DESC +Return a new jail descriptor for the jail in the +.Va desc +parameter. +.It Dv JAIL_OWN_DESC +Return an +.Dq owning +jail descriptor in the +.Va desc +parameter. .El .Pp The @@ -221,6 +250,9 @@ arguments consists of one or more following flags: .Bl -tag -width indent .It Dv JAIL_DYING Allow getting a jail that is in the process of being removed. +.It Dv JAIL_USE_DESC , Dv JAIL_AT_DESC , Dv JAIL_GET_DESC , Dv JAIL_OWN_DESC +These have the same meaning as they do in +.Fn jail_set . .El .Pp The @@ -238,6 +270,76 @@ system call removes the jail identified by .Fa jid . It will kill all processes belonging to the jail, and remove any children of that jail. +.Pp +The +.Fn jail_attach_fd +and +.Fn jail_remove_fd +system calls work the same as +.Fn jail_attach +and +.Fn jail_remove , +except that they operate on the jail identified by jail descriptor +.Fa fd . +.Ss Jail Descriptors +In addition to the jail ID, +jails can be referred to using a jail descriptor, +a type of file descriptor tied to a particular jail. +Jail descriptors are created by calling +.Fn jail_set +or +.Fn jail_get +with the special parameter +.Va desc , +and either the +.Dv JAIL_GET_DESC +or +.Dv JAIL_OWN_DESC +flags set. +The difference between the two flags is that descriptors created with +.Dv JAIL_OWN_DESC +.Po +called +.Dq owning +descriptors +.Pc +will automatically remove the jail when the descriptor is closed. +.Pp +Jail descriptors can be passed back to +.Fn jail_set +or +.Fm jail_get +with the +.Va desc +parameter, +and either the +.Dv JAIL_USE_DESC +or +.Dv JAIL_AT_DESC +flags set. +With +.Dv JAIL_USE_DESC , +the descriptor identifies the jail to operate on, +instead of the +.Va jid +or +.Va name +parameter. +With +.Dv JAIL_AT_DESC , +the descriptor is used in place of the current jail, +allowing accessing or creating jails that are children of the +descriptor jail. +.Pp +The system calls +.Fn jail_attach_jd +and +.Fn jail_aremove_jd +work the same as +.Fn jail_attach +and +.Fn jail_remove , +except that they operate on the jail referred to by the passed descriptor. .Sh RETURN VALUES If successful, .Fn jail , @@ -249,7 +351,7 @@ They return \-1 on failure, and set .Va errno to indicate the error. .Pp -.Rv -std jail_attach jail_remove +.Rv -std jail_attach jail_remove jail_attach_jd jail_remove_jd .Sh ERRORS The .Fn jail @@ -281,6 +383,13 @@ the super-user, or because it would exceed the jail's .Va children.max limit. .It Bq Er EPERM +The jail descriptor in the +.Va desc +parameter was created by a user other than the super-user, +and the +.Dv JAIL_USE_DESC +flag was set. +.It Bq Er EPERM A jail parameter was set to a less restrictive value then the current environment. .It Bq Er EFAULT @@ -298,8 +407,12 @@ flag is not set. .It Bq Er ENOENT The jail referred to by a .Va jid -is not accessible by the process, because the process is in a different -jail. +parameter is not accessible by the process, because the process is in a +different jail. +.It Bq Er ENOENT +The jail referred to by a +.Va desc +parameter has been removed. .It Bq Er EEXIST The jail referred to by a .Va jid @@ -326,6 +439,24 @@ flags is not set. A supplied string parameter is longer than allowed. .It Bq Er EAGAIN There are no jail IDs left. +.It Bq Er EMFILE +A jail descriptor could not be created for the +.Va desc +parameter with either the +.Dv JAIL_GET_DESC +or +.Dv JAIL_OWN_DESC +flag set, +because the process has already reached its limit for open file descriptors. +.It Bq Er ENFILE +A jail descriptor could not be created for the +.Va desc +parameter with either the +.Dv JAIL_GET_DESC +or +.Dv JAIL_OWN_DESC +flag set, +because the system file table is full. .El .Pp The @@ -333,10 +464,6 @@ The system call will fail if: .Bl -tag -width Er -.It Bq Er EFAULT -.Fa Iov , -or one of the addresses contained within it, -points to an address outside the allocated address space of the process. .It Bq Er ENOENT The jail referred to by a .Va jid @@ -352,10 +479,37 @@ jail. The .Va lastjid parameter is greater than the highest current jail ID. +.It Bq Er ENOENT +The jail referred to by a +.Va desc +parameter has been removed +.Pq even if the Dv JAIL_CREATE flag has been set . .It Bq Er EINVAL A supplied parameter is the wrong size. .It Bq Er EINVAL +A supplied parameter is out of range. +.It Bq Er EINVAL +A supplied string parameter is not null-terminated. +.It Bq Er EINVAL A supplied parameter name does not match any known parameters. +.It Bq Er EMFILE +A jail descriptor could not be created for the +.Va desc +parameter with either the +.Dv JAIL_GET_DESC +or +.Dv JAIL_OWN_DESC +flag set, +because the process has already reached its limit for open file descriptors. +.It Bq Er ENFILE +A jail descriptor could not be created for the +.Va desc +parameter with either the +.Dv JAIL_GET_DESC +or +.Dv JAIL_OWN_DESC +flag set, +because the system file table is full. .El .Pp The @@ -373,14 +527,37 @@ The jail specified by does not exist. .El .Pp +The +.Fn jail_attach_jd +and +.Fn jail_remove_jd +system calls +will fail if: +.Bl -tag -width Er +.It Bq Er EINVAL +The +.Fa fd +argument is not a valid jail descriptor. +.It Bq Er EPERM +The jail descriptor was created by a user other than the super-user. +.It Bq Er EINVAL +The jail specified by +.Fa jid +has been removed. +.El +.Pp Further .Fn jail , .Fn jail_set , +.Fn jail_attach , and -.Fn jail_attach +.Fn jail_attach_jd call .Xr chroot 2 internally, so they can fail for all the same reasons. +In particular, they return the +.Bq Er EPERM +error when the process to join a jail has open directories. Please consult the .Xr chroot 2 manual page for details. diff --git a/lib/libsys/kqueue.2 b/lib/libsys/kqueue.2 index d6e949baa24c..a8ebabf02cf7 100644 --- a/lib/libsys/kqueue.2 +++ b/lib/libsys/kqueue.2 @@ -22,7 +22,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd March 26, 2023 +.Dd September 12, 2025 .Dt KQUEUE 2 .Os .Sh NAME @@ -97,10 +97,37 @@ system call also creates a new kernel event queue, and additionally takes a .Fa flags argument, which is a bitwise-inclusive OR of the following flags: -.Bl -tag -width "KQUEUE_CLOEXEC" +.Bl -tag -width "KQUEUE_CPONFORK" .It Dv KQUEUE_CLOEXEC The returned file descriptor is automatically closed on .Xr execve 2 +.It Dv KQUEUE_CPONFORK +When this flag is set, the created kqueue is copied into +the child process on +.Xr fork 2 +calls. +The kqueue descriptor index of the new kqueue will be inherited by the child, +that is, the numeric value of the descriptor will remain the same. +.Pp +Copying is deep, that is, each registered event in the original kqueue is +copied (and not shared) into the new kqueue. +This is contrary to how other descriptor types are handled upon +.Xr fork 2 , +where the copied file descriptor references the same file object +as the source descriptor (shallow copy). +.Pp +By default, in other words, when the flag is not set, kqueues from +the parent are not copied on fork to the child process. +The corresponding file descriptor indeces are unused in the child. +.Pp +Registered events that reference file descriptors which are not +duplicated on fork, are not copied into the new kqueue. +For instance, if the event references a file descriptor opened with the +.Dv O_CLOEXEC +flag set, it is not copied. +Similarly, if event references a kqueue opened without the +.Dv KQUEUE_CPONFORK +flag, the event is not copied. .El .Pp The @@ -593,6 +620,64 @@ returns the number of times the signal has occurred since the last call to This filter automatically sets the .Dv EV_CLEAR flag internally. +.It Dv EVFILT_JAIL +Takes the jail ID to monitor as the identifier and the events to watch for +in +.Va fflags , +and returns when the jail performs one or more of the requested events. +If a process can normally see a jail, it can attach an event to it. +An identifier of zero will watch the process's own jail. +The events to monitor are: +.Bl -tag -width "Dv NOTE_JAIL_ATTACH" +.It Dv NOTE_JAIL_SET +The jail has been changed via +.Xr jail_set 2 . +.It Dv NOTE_JAIL_ATTACH +A process has attached to the jail via +.Xr jail_attach 2 +or a similar call. +The process ID will be stored in +.Va data . +If more than one process has attached since the last call to +.Fn kevent , +.Va data +will be zero. +.It Dv NOTE_JAIL_REMOVE +The jail has been removed. +.It Dv NOTE_JAIL_CHILD +A child of the watched jail has been created. +Its jail ID will be stored in +.Va data . +If more than one jail has been created since the last call to +.Fn kevent , +.Va data +will be zero. +.El +.Pp +On return, +.Va fflags +contains the events which triggered the filter. +It will also contain +.Dv NOTE_JAIL_MULTI +if more than one +.Dv NOTE_JAIL_ATTACH +or +.Dv NOTE_JAIL_CHILD +event has been received since the last call to +.Fn kevent . +.It Dv EVFILT_JAILDESC +Takes a jail descriptor returned by +.Xr jail_set 2 +or +.Xr jail_get 2 +as the identifier and the events to watch for in +.Va fflags , +and returns when the jail performs one or more of the requested events. +The events to monitor and the resulting +.Va fflags +are the same as those listed in +.Dv EVFILT_JAIL , +above. .It Dv EVFILT_TIMER Establishes an arbitrary timer identified by .Va ident . diff --git a/lib/libsys/mkdir.2 b/lib/libsys/mkdir.2 index e1f1624cebc4..100f44d1dcf9 100644 --- a/lib/libsys/mkdir.2 +++ b/lib/libsys/mkdir.2 @@ -176,4 +176,4 @@ system call appeared in The .Fn mkdir system call appeared in -.At v1 . +.Bx 4.2 . diff --git a/lib/libsys/nanosleep.2 b/lib/libsys/nanosleep.2 index 8a4931e51413..290565dbd6e1 100644 --- a/lib/libsys/nanosleep.2 +++ b/lib/libsys/nanosleep.2 @@ -27,7 +27,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd April 3, 2022 +.Dd May 3, 2025 .Dt NANOSLEEP 2 .Os .Sh NAME @@ -87,14 +87,6 @@ If, at the time of the call, the time value specified by is less than or equal to the time value of the specified clock, then .Fn clock_nanosleep returns immediately and the calling thread is not suspended. -.Pp -The suspension time may be longer than requested due to the -scheduling of other activity by the system. -It is also subject to the allowed time interval deviation -specified by the -.Va kern.timecounter.alloweddeviation -.Xr sysctl 8 -variable. An unmasked signal will terminate the sleep early, regardless of the .Dv SA_RESTART value on the interrupting signal. @@ -124,6 +116,8 @@ CLOCK_REALTIME_PRECISE .It CLOCK_SECOND .It +CLOCK_TAI +.It CLOCK_UPTIME .It CLOCK_UPTIME_FAST @@ -131,6 +125,32 @@ CLOCK_UPTIME_FAST CLOCK_UPTIME_PRECISE .El .Pp +The suspension time may be longer than requested due to the +scheduling of other activity by the system. +The clocks with the +.Dv _FAST +suffix and the +.Dv CLOCK_SECOND +are subject to the allowed time interval deviation specified by the +.Va kern.timecounter.alloweddeviation +.Xr sysctl 8 +variable. +The clocks with the +.Dv _PRECISE +suffix are always as precise as possible. +The +.Dv CLOCK_MONOTONIC , +.Dv CLOCK_REALTIME +and +.Dv CLOCK_UPTIME +are precise by default. +Setting the +.Va kern.timecounter.nanosleep_precise +.Xr sysctl 8 +to a false value would make those clocks to behave like the +.Dv _FAST +clocks. +.Pp The .Fn nanosleep function behaves like @@ -217,3 +237,19 @@ and was ported to .Ox 2.1 and .Fx 3.0 . +The +.Fn clock_nanosleep +system call has been available since +.Fx 11.1 . +.Pp +In +.Fx 15.0 +the default behavior of +.Fn clock_nanosleep +with +.Dv CLOCK_MONOTONIC , +.Dv CLOCK_REALTIME , +.Dv CLOCK_UPTIME +clocks and +.Fn nanosleep +has been switched to use precise clock. diff --git a/lib/libsys/open.2 b/lib/libsys/open.2 index d8540637b690..a0e905a8f375 100644 --- a/lib/libsys/open.2 +++ b/lib/libsys/open.2 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd February 28, 2025 +.Dd May 17, 2025 .Dt OPEN 2 .Os .Sh NAME @@ -195,6 +195,9 @@ error if file is not a directory .It Dv O_CLOEXEC automatically close file on .Xr execve 2 +.It Dv O_CLOFORK +automatically close file on any child process created with +.Fn fork 2 .It Dv O_VERIFY verify the contents of the file with .Xr mac_veriexec 4 @@ -210,6 +213,8 @@ record only the target path in the opened descriptor open file referenced by .Fa fd if path is empty +.It Dv O_NAMEDATTR +open a named attribute or named attribute directory .El .Pp Exactly one of the flags @@ -358,6 +363,27 @@ may be used to set .Dv FD_CLOEXEC flag for the newly returned file descriptor. .Pp +.Dv O_CLOFORK +may be used to set +.Dv FD_CLOFORK +flag for the newly returned file descriptor. +The file will be closed on any child process created with +.Fn fork 2 , +.Fn vfork 2 +or +.Fn rfork 2 +with the +.Dv RFFDG +flag, remaining open in the parent. +Both the +.Dv O_CLOEXEC +and +.Dv O_CLOFORK +flags can be modified with the +.Dv F_SETFD +.Fn fcntl 2 +command. +.Pp .Dv O_VERIFY may be used to indicate to the kernel that the contents of the file should be verified before allowing the open to proceed. @@ -464,6 +490,13 @@ flag for .Xr fstatat 2 and related syscalls. .Pp +Conversely, a file descriptor +.Dv fd +referencing a filesystem file can be converted to the +.Dv O_PATH +type of descriptor by using the following call +.Dl opath_fd = openat(fd, \[dq]\[dq], O_EMPTY_PATH | O_PATH); +.Pp If successful, .Fn open returns a non-negative integer, termed a file descriptor. @@ -499,6 +532,42 @@ and the description of the .Dv O_CLOEXEC flag. .Pp +When the +.Dv O_NAMEDATTR +flag is specified for an +.Fn openat +where the +.Fa fd +argument is for a file object, +a named attribute for the file object +is opened and not the file object itself. +If the +.Dv O_CREAT +flag has been specified as well, the named attribute will be +created if it does not exist. +When the +.Dv O_NAMEDATTR +flag is specified for a +.Fn open , +a named attribute for the current working directory is opened and +not the current working directory. +The +.Fa path +argument for this +.Fn openat +or +.Fn open +must be a single component name with no embedded +.Ql / . +If the +.Fa path +argument is +.Ql .\& +then the named attribute directory for the file object is opened. +(See +.Xr named_attribute 7 +for more information.) +.Pp The system imposes a limit on the number of file descriptors open simultaneously by one process. The @@ -730,6 +799,10 @@ contains a ".." component, the .Dv vfs.lookup_cap_dotdot .Xr sysctl 3 is set, and the process is in capability mode. +.It Bq Er ENOATTR +.Dv O_NAMEDATTR +has been specified and the file object is not a named attribute +directory or named attribute. .El .Sh SEE ALSO .Xr chmod 2 , @@ -745,7 +818,8 @@ is set, and the process is in capability mode. .Xr umask 2 , .Xr write 2 , .Xr fopen 3 , -.Xr capsicum 4 +.Xr capsicum 4 , +.Xr named_attribute 7 .Sh STANDARDS These functions are specified by .St -p1003.1-2008 . @@ -794,6 +868,11 @@ function was introduced in .Fx 8.0 . .Dv O_DSYNC appeared in 13.0. +.Dv O_NAMEDATTR +appeared in 15.0. +.Dv O_CLOFORK +appeared in +.Fx 15.0 . .Sh BUGS The .Fa mode diff --git a/lib/libsys/pathconf.2 b/lib/libsys/pathconf.2 index 786753f2d02a..5a983a3a13e2 100644 --- a/lib/libsys/pathconf.2 +++ b/lib/libsys/pathconf.2 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd August 6, 2021 +.Dd August 6, 2025 .Dt PATHCONF 2 .Os .Sh NAME @@ -175,6 +175,23 @@ hole size but still reports holes. Return 1 if a file system supports hole-punching (see .Xr fspacectl 2 ) , otherwise 0. +.It Li _PC_NAMEDATTR_ENABLED +Return 1 if named attributes are enabled for the file system, otherwise 0. +.It Li _PC_HAS_NAMEDATTR +Return 1 if one or more named attributes exist for the file, otherwise 0. +.It Li _PC_HAS_HIDDENSYSTEM +Return 1 if both +.Dv UF_HIDDEN +and +.Dv UF_SYSTEM +flags can be set by +.Xr chflags 2 , +otherwise 0. +.It Li _PC_CLONE_BLKSIZE +Returns the block size required for block cloning via +.Xr copy_file_range 2 +for a file system if block cloning is supported, +otherwise 0. .El .Sh RETURN VALUES If the call to @@ -251,6 +268,8 @@ An I/O error occurred while reading from or writing to the file system. Corrupted data was detected while reading from the file system. .El .Sh SEE ALSO +.Xr chflags 2 , +.Xr copy_file_range 2 , .Xr lseek 2 , .Xr sysctl 3 .Sh HISTORY diff --git a/lib/libsys/pipe.2 b/lib/libsys/pipe.2 index 9531c9717395..37d6eba420de 100644 --- a/lib/libsys/pipe.2 +++ b/lib/libsys/pipe.2 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd December 1, 2017 +.Dd May 17, 2025 .Dt PIPE 2 .Os .Sh NAME @@ -64,6 +64,8 @@ list, defined in .Bl -tag -width ".Dv O_NONBLOCK" .It Dv O_CLOEXEC Set the close-on-exec flag for the new file descriptors. +.It Dv O_CLOFORK +Set the close-on-fork flag for the new file descriptors. .It Dv O_NONBLOCK Set the non-blocking flag for the ends of the pipe. .El @@ -173,3 +175,8 @@ function became a wrapper around .Fn pipe2 in .Fx 11.0 . +.Pp +The +.Dv O_CLOFORK +flag appeared in +.Fx 15.0 . diff --git a/lib/libsys/procctl.2 b/lib/libsys/procctl.2 index 75804ba243f1..dfb7931de265 100644 --- a/lib/libsys/procctl.2 +++ b/lib/libsys/procctl.2 @@ -27,7 +27,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd December 14, 2024 +.Dd April 21, 2025 .Dt PROCCTL 2 .Os .Sh NAME @@ -133,17 +133,17 @@ dump. The .Va arg parameter must point to an integer variable holding one of the following values: -.Bl -tag -width PROC_LOGSIGEXIT_FORCE_DISABLE -.It Dv PROC_LOGSIGEXIT_FORCE_ENABLE +.Bl -tag -width PROC_LOGSIGEXIT_CTL_FORCE_DISABLE +.It Dv PROC_LOGSIGEXIT_CTL_FORCE_ENABLE Enables logging of exits due to signals that would normally cause a core dump. Logging is done via .Xr log 9 with a log level of .Dv LOG_INFO . -.It Dv PROC_LOGSIGEXIT_FORCE_DISABLE +.It Dv PROC_LOGSIGEXIT_CTL_FORCE_DISABLE Disables the logging of exits due to signals that would normally cause a core dump. -.It Dv PROC_LOGSIGEXIT_NOFORCE +.It Dv PROC_LOGSIGEXIT_CTL_NOFORCE The logging behavior is delegated to the .Xr sysctl 3 MIB variable @@ -155,10 +155,10 @@ The .Va arg parameter must point to an integer variable, where one of the following values is written: -.Bl -tag -width PROC_LOGSIGEXIT_FORCE_DISABLE -.It Dv PROC_LOGSIGEXIT_FORCE_ENABLE -.It Dv PROC_LOGSIGEXIT_FORCE_DISABLE -.It Dv PROC_LOGSIGEXIT_NOFORCE +.Bl -tag -width PROC_LOGSIGEXIT_CTL_FORCE_DISABLE +.It Dv PROC_LOGSIGEXIT_CTL_FORCE_ENABLE +.It Dv PROC_LOGSIGEXIT_CTL_FORCE_DISABLE +.It Dv PROC_LOGSIGEXIT_CTL_NOFORCE .El .It Dv PROC_PROTMAX_CTL Controls the maximum protection used for diff --git a/lib/libsys/ptrace.2 b/lib/libsys/ptrace.2 index 9b789a0e45b3..7aa24a3f820b 100644 --- a/lib/libsys/ptrace.2 +++ b/lib/libsys/ptrace.2 @@ -1,7 +1,7 @@ .\" $NetBSD: ptrace.2,v 1.2 1995/02/27 12:35:37 cgd Exp $ .\" .\" This file is in the public domain. -.Dd August 18, 2023 +.Dd June 19, 2025 .Dt PTRACE 2 .Os .Sh NAME @@ -473,6 +473,16 @@ This request is like PT_CONTINUE, except that it does not allow specifying an alternate place to continue execution, and after it succeeds, the traced process is no longer traced and continues execution normally. +.Pp +The parent of the traced process will be sent a +.Dv SIGCHLD +to indicate that the process has continued from a stopped state regardless of +whether the process was in a stopped state prior to the corresponding +.Dv PT_ATTACH +request. +A +.Xr wait 2 +for the traced process would indicate that it had been continued. .It Dv PT_GETREGS This request reads the traced process's machine registers into the .Do diff --git a/lib/libsys/recv.2 b/lib/libsys/recv.2 index f3ee60b75663..b78cd70b8a1d 100644 --- a/lib/libsys/recv.2 +++ b/lib/libsys/recv.2 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd July 30, 2022 +.Dd May 17, 2025 .Dt RECV 2 .Os .Sh NAME @@ -164,6 +164,7 @@ one or more of the values: .It Dv MSG_WAITALL Ta wait for full request or error .It Dv MSG_DONTWAIT Ta do not block .It Dv MSG_CMSG_CLOEXEC Ta set received fds close-on-exec +.It Dv MSG_CMSG_CLOFORK Ta set received fds close-on-fork .It Dv MSG_WAITFORONE Ta do not block after receiving the first message (only for .Fn recvmmsg diff --git a/lib/libsys/sendfile.2 b/lib/libsys/sendfile.2 index 07a563d5ef82..6000e3e9828f 100644 --- a/lib/libsys/sendfile.2 +++ b/lib/libsys/sendfile.2 @@ -23,7 +23,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd March 30, 2020 +.Dd June 24, 2025 .Dt SENDFILE 2 .Os .Sh NAME @@ -147,12 +147,6 @@ in a different context. .It Dv SF_NOCACHE The data sent to socket will not be cached by the virtual memory system, and will be freed directly to the pool of free pages. -.It Dv SF_SYNC -.Nm -sleeps until the network stack no longer references the VM pages -of the file, making subsequent modifications to it safe. -Please note that this is not a guarantee that the data has actually -been sent. .It Dv SF_USER_READAHEAD .Nm has some internal heuristics to do readahead when sending data. diff --git a/lib/libsys/setcred.2 b/lib/libsys/setcred.2 index a1b819d24c52..f5d1f15b631b 100644 --- a/lib/libsys/setcred.2 +++ b/lib/libsys/setcred.2 @@ -6,7 +6,7 @@ .\" This documentation was written by Olivier Certner <olce.freebsd@certner.fr> .\" at Kumacom SARL under sponsorship from the FreeBSD Foundation. .\" -.Dd December 19, 2024 +.Dd August 29, 2025 .Dt SETCRED 2 .Os .Sh NAME @@ -119,11 +119,6 @@ It must be less than or equal to An array of IDs to set the supplementary groups to, if flag .Dv SETCREDF_SUPP_GROUPS is specified. -Note that all groups in this array will be set as supplementary groups only, in -contrast to -.Xr setgroups 2 -which treats the first element specially as the new effective group, not adding -it to supplementary groups. .It Fa sc_label A pointer to a valid MAC label structure, e.g., built with the .Xr mac_from_text 3 @@ -248,7 +243,7 @@ does not. The .Fn setcred system call appeared in -.Fx 15.0 . +.Fx 14.3 . .Pp Traditionally in UNIX, all credential changes beyond shuffles of effective, real and saved IDs have been done by setuid binaries that successively call multiple diff --git a/lib/libsys/setgroups.2 b/lib/libsys/setgroups.2 index a226aeafea96..0ec99507cfb0 100644 --- a/lib/libsys/setgroups.2 +++ b/lib/libsys/setgroups.2 @@ -1,5 +1,13 @@ +.\"- +.\" SPDX-License-Identifier: BSD-3-Clause +.\" .\" Copyright (c) 1983, 1991, 1993, 1994 .\" The Regents of the University of California. All rights reserved. +.\" Copyright (c) 2025 The FreeBSD Foundation +.\" +.\" Portions of this documentation were written by Olivier Certner +.\" <olce@FreeBSD.org> at Kumacom SARL under sponsorship from the FreeBSD +.\" Foundation. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions @@ -25,12 +33,12 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd January 19, 2018 +.Dd September 17, 2025 .Dt SETGROUPS 2 .Os .Sh NAME .Nm setgroups -.Nd set group access list +.Nd set the calling process' supplementary groups .Sh LIBRARY .Lb libc .Sh SYNOPSIS @@ -41,35 +49,21 @@ .Sh DESCRIPTION The .Fn setgroups -system call -sets the group access list of the current user process -according to the array -.Fa gidset . +system call sets the calling process' supplementary groups according to the +.Fa gidset +array. The .Fa ngroups -argument -indicates the number of entries in the array and must be no -more than -.Dv {NGROUPS_MAX}+1 . +argument indicates the number of entries in the array and must be no more than +.Dv {NGROUPS_MAX} . .Pp -Only the super-user may set a new group list. +The +.Fa ngroups +argument may be set to zero to clear all supplementary groups, in which case +.Fa gidset +is ignored. .Pp -The first entry of the group array -.Pq Va gidset[0] -is used as the effective group-ID for the process. -This entry is over-written when a setgid program is run. -To avoid losing access to the privileges of the -.Va gidset[0] -entry, it should be duplicated later in the group array. -By convention, -this happens because the group value indicated -in the password file also appears in -.Pa /etc/group . -The group value in the password file is placed in -.Va gidset[0] -and that value then gets added a second time when the -.Pa /etc/group -file is scanned to create the group set. +Only the super-user may install a new supplementary groups set. .Sh RETURN VALUES .Rv -std setgroups .Sh ERRORS @@ -83,19 +77,86 @@ The caller is not the super-user. The number specified in the .Fa ngroups argument is larger than the -.Dv {NGROUPS_MAX}+1 +.Dv {NGROUPS_MAX} limit. .It Bq Er EFAULT -The address specified for +Part of the groups array starting at .Fa gidset -is outside the process -address space. +is outside the process address space. .El .Sh SEE ALSO .Xr getgroups 2 , +.Xr setcred 2 , .Xr initgroups 3 .Sh HISTORY The .Fn setgroups system call appeared in .Bx 4.2 . +.Pp +Before +.Fx 15.0 , +the +.Fn setgroups +system call would set the effective group ID for the process to the first +element of +.Fa gidset , +and only the other elements as supplementary groups. +Despite treating the first element as the effective group ID to set, it accepted +an empty +.Fa gidset +.Po +.Fa ngroups +being zero +.Pc +as a stance requiring to drop all supplementary groups, leaving the effective +group ID unchanged. +.Sh SECURITY CONSIDERATIONS +The +.Fn setgroups +system call sets the process' supplementary groups to those contained in the +.Fa gidset +array. +In particular, as evoked in +.Sx HISTORY , +it does not anymore treat the first element of +.Fa gidset +separately. +Formerly, it would set it as the effective group ID while only the others were +used as supplementary groups. +.Pp +Programs solely relying on +.Fn setgroups +to change the effective group ID must be modified, e.g., to also call +.Xr setegid 2 +or to instead use +.Xr setcred 2 , +else they will unwillingly keep their effective group ID. +.Pp +Programs using +.Fn setgroups +with the effective group ID as the first element of array +.Fa gidset +and not duplicating it in the rest of the array, which includes those using +.Fn initgroups , +now insert this group ID in the supplementary groups set. +This is in general desirable, as explained in the +.Xr initgroups 3 +manual page, and has the consequence that subsequent process' effective group +ID's changes do not remove membership of the original effective group ID, since +these changes do not affect the supplementary groups. +Applications that expressly do not want that must be modified to stop passing +the effective group ID as the first element to +.Fn setgroups . +.Pp +To clear all the calling process' supplementary groups, always use the statement +.Bd -literal -offset indent +setgroups(0, NULL); +.Ed +.Pp +which works also on older FreeBSD version +.Po +see the +.Sx HISTORY +section +.Pc . diff --git a/lib/libsys/shm_open.2 b/lib/libsys/shm_open.2 index 8bea939690ba..c3196d966e6b 100644 --- a/lib/libsys/shm_open.2 +++ b/lib/libsys/shm_open.2 @@ -26,7 +26,7 @@ .\" OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd January 30, 2023 +.Dd August 4, 2025 .Dt SHM_OPEN 2 .Os .Sh NAME @@ -74,8 +74,9 @@ must be included in The optional flags .Dv O_CREAT , .Dv O_EXCL , +.Dv O_TRUNC , and -.Dv O_TRUNC +.Dv O_CLOFORK may also be specified. .Pp If diff --git a/lib/libsys/socket.2 b/lib/libsys/socket.2 index a383cbcc4d80..48b8f4e87489 100644 --- a/lib/libsys/socket.2 +++ b/lib/libsys/socket.2 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd January 15, 2023 +.Dd September 28, 2025 .Dt SOCKET 2 .Os .Sh NAME @@ -64,7 +64,7 @@ PF_NETGRAPH Netgraph sockets, PF_NETLINK Netlink protocols, PF_BLUETOOTH Bluetooth protocols, PF_INET_SDP OFED socket direct protocol (IPv4), -AF_HYPERV HyperV sockets +PF_HYPERV HyperV sockets .Ed .Pp Each protocol family is connected to an address family, which has the @@ -89,38 +89,13 @@ SOCK_RAW Raw-protocol interface, SOCK_SEQPACKET Sequenced packet stream .Ed .Pp -A -.Dv SOCK_STREAM -type provides sequenced, reliable, -two-way connection based byte streams. -An out-of-band data transmission mechanism may be supported. -A -.Dv SOCK_DGRAM -socket supports -datagrams (connectionless, unreliable messages of -a fixed (typically small) maximum length). -A -.Dv SOCK_SEQPACKET -socket may provide a sequenced, reliable, -two-way connection-based data transmission path for datagrams -of fixed maximum length; a consumer may be required to read -an entire packet with each read system call. -This facility may have protocol-specific properties. -.Dv SOCK_RAW -sockets provide access to internal network protocols and interfaces. -The -.Dv SOCK_RAW -type is available only to the super-user and is described in -.Xr ip 4 -and -.Xr ip6 4 . -.Pp Additionally, the following flags are allowed in the .Fa type argument: .Pp .Bd -literal -offset indent -compact SOCK_CLOEXEC Set close-on-exec on the new descriptor, +SOCK_CLOFORK Set close-on-fork on the new descriptor, SOCK_NONBLOCK Set non-blocking mode on the new socket .Ed .Pp @@ -139,32 +114,23 @@ particular to the in which communication is to take place; see .Xr protocols 5 . -.Pp The .Fa protocol argument may be set to zero (0) to request the default implementation of a socket type for the protocol, if any. -.Pp -Sockets of type +.Sh STREAM SOCKET TYPE +The +.Dv SOCK_STREAM +socket type provides reliable, sequenced, full-duplex octet streams between +the socket and a peer to which the socket is connected. +A socket of type .Dv SOCK_STREAM -are full-duplex byte streams, similar -to pipes. -A stream socket must be in a +needs to be in a .Em connected -state before any data may be sent or received -on it. +state before any data can be sent or received. A connection to another socket is created with a .Xr connect 2 system call. -Once connected, data may be transferred using -.Xr read 2 -and -.Xr write 2 -calls or some variant of the -.Xr send 2 -and -.Xr recv 2 -functions. (Some protocol families, such as the Internet family, support the notion of an .Dq implied connect , @@ -172,62 +138,210 @@ which permits data to be sent piggybacked onto a connect operation by using the .Xr sendto 2 system call.) -When a session has been completed a -.Xr close 2 -may be performed. -Out-of-band data may also be transmitted as described in +Once connected, data may be sent using +.Xr send 2 , +.Xr sendto 2 , +.Xr sendmsg 2 +and +.Xr write 2 +system calls. +Data may be received using +.Xr recv 2 , +.Xr recvfrom 2 , +.Xr recvmsg 2 , +and +.Xr read 2 +system calls. +Record boundaries are not maintained; data sent on a stream socket using output +operations of one size can be received using input operations of smaller or +larger sizes without loss of data. +Data may be buffered; successful return from an output function does not imply +that the data has been delivered to the peer or even transmitted from the local +system. +For certain protocols out-of-band data may also be transmitted as described in .Xr send 2 and received as described in .Xr recv 2 . .Pp -The communications protocols used to implement a -.Dv SOCK_STREAM -ensure that data -is not lost or duplicated. -If a piece of data for which the -peer protocol has buffer space cannot be successfully transmitted -within a reasonable length of time, then -the connection is considered broken and calls -will indicate an error with --1 returns and with -.Er ETIMEDOUT -as the specific code -in the global variable -.Va errno . -The protocols optionally keep sockets -.Dq warm -by forcing transmissions -roughly every minute in the absence of other activity. -An error is then indicated if no response can be -elicited on an otherwise -idle connection for an extended period (e.g.\& 5 minutes). -By default, a +If data cannot be successfully transmitted within a given time then the +connection is considered broken, and subsequent operations shall fail with +a protocol specific error code. +A .Dv SIGPIPE -signal is raised if a process sends -on a broken stream, but this behavior may be inhibited via +signal is raised if a thread attempts to send data on a broken stream (one that +is no longer connected). +The signal can be suppressed by the +.Dv MSG_NOSIGNAL +flag with distinct +.Xr send 2 , +.Xr sendto 2 , +and +.Xr sendmsg 2 +system calls or by the +.Dv SO_NOSIGPIPE +socket option set on the socket with .Xr setsockopt 2 . .Pp -.Dv SOCK_SEQPACKET -sockets employ the same system calls -as +The .Dv SOCK_STREAM -sockets. -The only difference -is that -.Xr read 2 -calls will return only the amount of data requested, -and any remaining in the arriving packet will be discarded. +socket is supported by the following protocol families: +.Dv PF_INET , +.Dv PF_INET6 , +.Dv PF_UNIX , +.Dv PF_BLUETOOTH , +.Dv PF_HYPERV , +and +.Dv PF_INET_SDP . +Out-of-band data transmission mechanism is supported for stream sockets of +.Dv PF_INET +and +.Dv PF_INET6 +protocol families. +.Sh DATAGRAM SOCKET TYPE +The +.Dv SOCK_DGRAM +socket type supports connectionless data transfer which is not necessarily +acknowledged or reliable. +Datagrams can be sent to the address specified (possibly multicast or +broadcast) in each output operation, and incoming datagrams can be received +from multiple sources. +The source address of each datagram is available when receiving the datagram +with +.Xr recvfrom 2 +or +.Xr recvmsg 2 . +An application can also pre-specify a peer address with +.Xr sendto 2 +or +.Xr sendmsg 2 , +in which case calls to output functions that do not specify a peer address +shall send to the pre-specified peer. +If a peer has been specified, only datagrams from that peer shall be received. +A datagram shall be sent in a single output operation, and needs to be received +in a single input operation. +The maximum size of a datagram is protocol-specific. +Output datagrams may be buffered within the system; thus, a successful return +from an output function does not guarantee that a datagram is actually sent or +received. .Pp +The .Dv SOCK_DGRAM +socket is supported by the following protocol families: +.Dv PF_INET , +.Dv PF_INET6 , +.Dv PF_UNIX , +.Dv PF_NETGRAPH , and -.Dv SOCK_RAW -sockets allow sending of datagrams to correspondents -named in +.Dv PF_NETLINK . +.Sh SEQUENCED PACKET SOCKET TYPE +The +.Dv SOCK_SEQPACKET +socket type is similar to the +.Dv SOCK_STREAM +type, and is also connection-oriented. +The only difference between these types is that record boundaries are +maintained using the +.Dv SOCK_SEQPACKET +type. +A record can be sent using one or more output operations and received using one +or more input operations, but a single operation never transfers parts of more +than one record. +Record boundaries are set by the sender with the +.Dv MSG_EOR +flag of .Xr send 2 -calls. -Datagrams are generally received with +or +.Xr sendmsg 2 +functions. +There is no possibility to set a record boundary with +.Xr write 2 . +Record boundaries are visible to the receiver via the +.Dv MSG_EOR +flag in the received message flags returned by the +.Xr recvmsg 2 +function. +It is protocol-specific whether a maximum record size is imposed. +.Pp +The +.Dv SOCK_SEQPACKET +socket is supported by the following protocol families: +.Dv PF_INET , +.Dv PF_INET6 , +and +.Dv PF_UNIX . +.Pp +.Sh RAW SOCKET TYPE +The +.Dv SOCK_RAW +socket type provides access to internal network protocols and interfaces. +It is a datagram socket in its nature, thus has the same semantics of +read and write operations. +The +.Dv SOCK_RAW +type is available only to the super-user and is described in +.Xr ip 4 +and +.Xr ip6 4 . +.Sh NON-BLOCKING MODE +A socket can be created in +.Em non-blocking mode +with the help of +.Dv SOCK_NONBLOCK +flag. +Alternatively, the non-blocking mode on a socket can be turned on and off with +the help of the +.Dv O_NONBLOCK +flag of the +.Xr fcntl 2 +system call. +.Pp +When a non-blocking socket has not enough data in its receive buffer to fulfill +the application supplied buffer, then data receiving system calls like +.Xr recv 2 , .Xr recvfrom 2 , -which returns the next datagram with its return address. +.Xr recvmsg 2 +and +.Xr read 2 +will not block waiting for the data but immediately return. +Return value will indicate amount of bytes read into the supplied buffer. +The +.Va errno +will be set to +.Dv EAGAIN +.Po +has same value as +.Dv EWOULDBLOCK +.Pc . +.Pp +If application tries to send more data on a non-blocking socket than the socket +send buffer can accomodate with +.Xr send 2 , +.Xr sendto 2 , +.Xr sendmsg 2 +or +.Xr write 2 +system calls partial data will be sent. +Return value will indicate amount of bytes sent. +The +.Va errno +will be set to +.Dv EAGAIN . +Note that sockets of +.Dv SOCK_DGRAM +type are unreliable, thus for these sockets sending operations will never fail +with +.Dv EAGAIN +in non-blocking mode neither will block in blocking mode. +.Sh OTHER OPERATIONS ON SOCKETS +Since socket descriptors are file descriptors, many generic file operations +performed by +.Xr fcntl 2 , +apply. +Socket descriptors can be used with all event engines, such as +.Xr kevent 2 , +.Xr select 2 +and +.Xr poll 2 . .Pp An .Xr fcntl 2 @@ -249,6 +363,12 @@ The and .Xr getsockopt 2 system calls are used to set and get options, respectively. +.Pp +Connection associated with a socket can be terminated by +.Xr close 2 +system call. +One direction of communication can be disabled with +.Xr shutdown 2 . .Sh RETURN VALUES A -1 is returned if an error occurs, otherwise the return value is a descriptor referencing the socket. @@ -281,16 +401,23 @@ The socket type is not supported by the protocol. .Sh SEE ALSO .Xr accept 2 , .Xr bind 2 , +.Xr close 2 , .Xr connect 2 , +.Xr fcntl 2 , .Xr getpeername 2 , .Xr getsockname 2 , .Xr getsockopt 2 , .Xr ioctl 2 , +.Xr kevent 2 , .Xr listen 2 , +.Xr poll 2 , .Xr read 2 , .Xr recv 2 , .Xr select 2 , .Xr send 2 , +.Xr sendmsg 2 , +.Xr sendto 2 , +.Xr signal 3 , .Xr shutdown 2 , .Xr socketpair 2 , .Xr write 2 , @@ -331,7 +458,10 @@ argument of .Fn socket . The .Dv SOCK_CLOEXEC -flag is expected to conform to the next revision of the +and +.Dv SOCK_CLOFORK +flags are expected to conform to +.St -p1003.1-2024 . .Tn POSIX standard. The @@ -347,3 +477,8 @@ The .Fn socket system call appeared in .Bx 4.2 . +.Pp +The +.Dv SOCK_CLOFORK +flag appeared in +.Fx 15.0 . diff --git a/lib/libsys/socketpair.2 b/lib/libsys/socketpair.2 index 5874a0791f4d..60dec74f9cc2 100644 --- a/lib/libsys/socketpair.2 +++ b/lib/libsys/socketpair.2 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd February 10, 2018 +.Dd May 17, 2025 .Dt SOCKETPAIR 2 .Os .Sh NAME @@ -56,7 +56,8 @@ and The two sockets are indistinguishable. .Pp The -.Dv SOCK_CLOEXEC +.Dv SOCK_CLOEXEC , +.Dv SOCK_CLOFORK and .Dv SOCK_NONBLOCK flags in the diff --git a/lib/libsys/stat.2 b/lib/libsys/stat.2 index bd9005710147..8107740bd901 100644 --- a/lib/libsys/stat.2 +++ b/lib/libsys/stat.2 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd March 30, 2021 +.Dd August 17, 2025 .Dt STAT 2 .Os .Sh NAME @@ -169,6 +169,9 @@ Flags enabled for the file. See .Xr chflags 2 for the list of flags and their description. +.It Va st_rdev +Numeric ID of the device referenced by the file, if the file is a +character or block special; otherwise unspecified. .El .Pp The diff --git a/lib/libsys/statfs.2 b/lib/libsys/statfs.2 index b411b3b6ff46..ab65def11ebb 100644 --- a/lib/libsys/statfs.2 +++ b/lib/libsys/statfs.2 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd March 29, 2023 +.Dd July 20, 2025 .Dt STATFS 2 .Os .Sh NAME @@ -125,6 +125,9 @@ The file system resides locally. Mandatory Access Control (MAC) support for individual objects (see .Xr mac 4 ) . +.It Dv MNT_NAMEDATTR +The file system supports named attributes as described in +.Xr named_attribute 7 . .It Dv MNT_NFS4ACLS ACLs in NFSv4 variant are supported. .It Dv MNT_NOATIME @@ -260,7 +263,8 @@ each file or directory name or disk label .Pc . .Sh SEE ALSO .Xr fhstatfs 2 , -.Xr getfsstat 2 +.Xr getfsstat 2 , +.Xr named_attribute 7 .Sh HISTORY The .Fn statfs diff --git a/lib/libsys/symlink.2 b/lib/libsys/symlink.2 index 6892586f69f2..b5d878aaae50 100644 --- a/lib/libsys/symlink.2 +++ b/lib/libsys/symlink.2 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd March 30, 2020 +.Dd April 15, 2025 .Dt SYMLINK 2 .Os .Sh NAME @@ -92,6 +92,10 @@ or the entire length of either path name exceeded 1023 characters. A component of the .Fa name2 path prefix does not exist. +.It Bq Er EOPNOTSUPP +The file system containing the file named by +.Fa name2 +does not support symbolic links. .It Bq Er EACCES A component of the .Fa name2 diff --git a/lib/libsys/syscalls.map b/lib/libsys/syscalls.map index 474df2cd8b1c..b5400b9849b3 100644 --- a/lib/libsys/syscalls.map +++ b/lib/libsys/syscalls.map @@ -7,7 +7,7 @@ FBSDprivate_1.0 { _syscall; __sys_syscall; - __sys_exit; + __sys__exit; _fork; __sys_fork; _read; @@ -117,10 +117,6 @@ FBSDprivate_1.0 { __sys_madvise; _mincore; __sys_mincore; - _getgroups; - __sys_getgroups; - _setgroups; - __sys_setgroups; _getpgrp; __sys_getpgrp; _setpgid; @@ -807,4 +803,18 @@ FBSDprivate_1.0 { __sys_fchroot; _setcred; __sys_setcred; + _exterrctl; + __sys_exterrctl; + _inotify_add_watch_at; + __sys_inotify_add_watch_at; + _inotify_rm_watch; + __sys_inotify_rm_watch; + _getgroups; + __sys_getgroups; + _setgroups; + __sys_setgroups; + _jail_attach_jd; + __sys_jail_attach_jd; + _jail_remove_jd; + __sys_jail_remove_jd; }; diff --git a/lib/libsys/thr_new.2 b/lib/libsys/thr_new.2 index c0bcc8bbc7c2..a04327723c34 100644 --- a/lib/libsys/thr_new.2 +++ b/lib/libsys/thr_new.2 @@ -133,6 +133,15 @@ The flag is not currently implemented. .It Dv THR_SYSTEM_SCOPE Create the system scope thread. The flag is not currently implemented. +.It Dv THR_C_RUNTIME +Indicate that the new thread is created by the C language runtime. +It has architecture-specific meaning. +.Pp +On amd64, the flag requests that the specified +.Fa tls_base +was loaded into the +.Va %fsbase +register before calling a signal handler. .El .It Va rtp Real-time scheduling priority for the new thread. diff --git a/lib/libsys/timer_create.2 b/lib/libsys/timer_create.2 index e8489b390845..8f6ff2e27c51 100644 --- a/lib/libsys/timer_create.2 +++ b/lib/libsys/timer_create.2 @@ -126,7 +126,8 @@ the value of the timer ID. This implementation supports a .Fa clock_id of -.Dv CLOCK_REALTIME +.Dv CLOCK_REALTIME , +.Dv CLOCK_TAI , or .Dv CLOCK_MONOTONIC . .Pp diff --git a/lib/libsys/wait.2 b/lib/libsys/wait.2 index 3c649f3dfa77..eeddf77aeac7 100644 --- a/lib/libsys/wait.2 +++ b/lib/libsys/wait.2 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd August 27, 2024 +.Dd June 19, 2025 .Dt WAIT 2 .Os .Sh NAME @@ -273,6 +273,10 @@ Report the status of selected processes that have continued from a job control stop by receiving a .Dv SIGCONT signal. +.Xr ptrace 2 +can also cause a process to be continued, when a +.Dv PT_DETACH +request is issued to detach the debugger. .It Dv WNOHANG Do not block when there are no processes wishing to report status. @@ -450,7 +454,7 @@ value: .Bl -tag -width Ds .It Fn WIFCONTINUED status True if the process has not terminated, and -has continued after a job control stop. +has continued after a job control stop or detach of a debugger. This macro can be true only if the wait call specified the .Dv WCONTINUED option. diff --git a/lib/libsys/write.2 b/lib/libsys/write.2 index 5fea75150e3b..d2ff41ceead9 100644 --- a/lib/libsys/write.2 +++ b/lib/libsys/write.2 @@ -185,13 +185,6 @@ A signal interrupted the write before it could be completed. .It Bq Er EAGAIN The file was marked for non-blocking I/O, and no data could be written immediately. -.It Bq Er EROFS -An attempt was made to write over a disk label area at the beginning -of a slice. -Use -.Xr disklabel 8 -.Fl W -to enable writing on the disk label area. .It Bq Er EINVAL The value .Fa nbytes @@ -202,6 +195,9 @@ is greater than if the sysctl .Va debug.iosize_max_clamp is non-zero). +.It Bq Er EINVAL +The file descriptor refers to a raw device, and the write +offset or size is not a multiple of the device's block size. .It Bq Er EINTEGRITY The backing store for .Fa fd |