diff options
Diffstat (limited to 'sys/compat/linux/linux_misc.c')
-rw-r--r-- | sys/compat/linux/linux_misc.c | 1259 |
1 files changed, 671 insertions, 588 deletions
diff --git a/sys/compat/linux/linux_misc.c b/sys/compat/linux/linux_misc.c index a7502465efbf..9b8ab193f2bd 100644 --- a/sys/compat/linux/linux_misc.c +++ b/sys/compat/linux/linux_misc.c @@ -29,57 +29,38 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include "opt_compat.h" - #include <sys/param.h> -#include <sys/blist.h> #include <sys/fcntl.h> -#if defined(__i386__) -#include <sys/imgact_aout.h> -#endif #include <sys/jail.h> -#include <sys/kernel.h> +#include <sys/imgact.h> #include <sys/limits.h> #include <sys/lock.h> -#include <sys/malloc.h> -#include <sys/mman.h> -#include <sys/mount.h> #include <sys/msgbuf.h> #include <sys/mutex.h> -#include <sys/namei.h> #include <sys/poll.h> #include <sys/priv.h> #include <sys/proc.h> #include <sys/procctl.h> #include <sys/reboot.h> -#include <sys/racct.h> #include <sys/random.h> #include <sys/resourcevar.h> +#include <sys/rtprio.h> #include <sys/sched.h> -#include <sys/sdt.h> -#include <sys/signalvar.h> +#include <sys/smp.h> #include <sys/stat.h> #include <sys/syscallsubr.h> #include <sys/sysctl.h> +#include <sys/sysent.h> #include <sys/sysproto.h> -#include <sys/systm.h> #include <sys/time.h> #include <sys/vmmeter.h> #include <sys/vnode.h> -#include <sys/wait.h> -#include <sys/cpuset.h> -#include <sys/uio.h> +#include <security/audit/audit.h> #include <security/mac/mac_framework.h> -#include <vm/vm.h> #include <vm/pmap.h> -#include <vm/vm_kern.h> #include <vm/vm_map.h> -#include <vm/vm_extern.h> #include <vm/swap_pager.h> #ifdef COMPAT_LINUX32 @@ -94,10 +75,10 @@ __FBSDID("$FreeBSD$"); #include <compat/linux/linux_dtrace.h> #include <compat/linux/linux_file.h> #include <compat/linux/linux_mib.h> +#include <compat/linux/linux_mmap.h> #include <compat/linux/linux_signal.h> -#include <compat/linux/linux_timer.h> +#include <compat/linux/linux_time.h> #include <compat/linux/linux_util.h> -#include <compat/linux/linux_sysproto.h> #include <compat/linux/linux_emul.h> #include <compat/linux/linux_misc.h> @@ -205,7 +186,7 @@ linux_alarm(struct thread *td, struct linux_alarm_args *args) { struct itimerval it, old_it; u_int secs; - int error; + int error __diagused; secs = args->secs; /* @@ -248,261 +229,6 @@ linux_brk(struct thread *td, struct linux_brk_args *args) return (0); } -#if defined(__i386__) -/* XXX: what about amd64/linux32? */ - -int -linux_uselib(struct thread *td, struct linux_uselib_args *args) -{ - struct nameidata ni; - struct vnode *vp; - struct exec *a_out; - vm_map_t map; - vm_map_entry_t entry; - struct vattr attr; - vm_offset_t vmaddr; - unsigned long file_offset; - unsigned long bss_size; - char *library; - ssize_t aresid; - int error; - bool locked, opened, textset; - - a_out = NULL; - vp = NULL; - locked = false; - textset = false; - opened = false; - - if (!LUSECONVPATH(td)) { - NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, - UIO_USERSPACE, args->library); - error = namei(&ni); - } else { - LCONVPATHEXIST(args->library, &library); - NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, - UIO_SYSSPACE, library); - error = namei(&ni); - LFREEPATH(library); - } - if (error) - goto cleanup; - - vp = ni.ni_vp; - NDFREE(&ni, NDF_ONLY_PNBUF); - - /* - * From here on down, we have a locked vnode that must be unlocked. - * XXX: The code below largely duplicates exec_check_permissions(). - */ - locked = true; - - /* Executable? */ - error = VOP_GETATTR(vp, &attr, td->td_ucred); - if (error) - goto cleanup; - - if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || - ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) { - /* EACCESS is what exec(2) returns. */ - error = ENOEXEC; - goto cleanup; - } - - /* Sensible size? */ - if (attr.va_size == 0) { - error = ENOEXEC; - goto cleanup; - } - - /* Can we access it? */ - error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); - if (error) - goto cleanup; - - /* - * XXX: This should use vn_open() so that it is properly authorized, - * and to reduce code redundancy all over the place here. - * XXX: Not really, it duplicates far more of exec_check_permissions() - * than vn_open(). - */ -#ifdef MAC - error = mac_vnode_check_open(td->td_ucred, vp, VREAD); - if (error) - goto cleanup; -#endif - error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); - if (error) - goto cleanup; - opened = true; - - /* Pull in executable header into exec_map */ - error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE, - VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0); - if (error) - goto cleanup; - - /* Is it a Linux binary ? */ - if (((a_out->a_magic >> 16) & 0xff) != 0x64) { - error = ENOEXEC; - goto cleanup; - } - - /* - * While we are here, we should REALLY do some more checks - */ - - /* Set file/virtual offset based on a.out variant. */ - switch ((int)(a_out->a_magic & 0xffff)) { - case 0413: /* ZMAGIC */ - file_offset = 1024; - break; - case 0314: /* QMAGIC */ - file_offset = 0; - break; - default: - error = ENOEXEC; - goto cleanup; - } - - bss_size = round_page(a_out->a_bss); - - /* Check various fields in header for validity/bounds. */ - if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { - error = ENOEXEC; - goto cleanup; - } - - /* text + data can't exceed file size */ - if (a_out->a_data + a_out->a_text > attr.va_size) { - error = EFAULT; - goto cleanup; - } - - /* - * text/data/bss must not exceed limits - * XXX - this is not complete. it should check current usage PLUS - * the resources needed by this library. - */ - PROC_LOCK(td->td_proc); - if (a_out->a_text > maxtsiz || - a_out->a_data + bss_size > lim_cur_proc(td->td_proc, RLIMIT_DATA) || - racct_set(td->td_proc, RACCT_DATA, a_out->a_data + - bss_size) != 0) { - PROC_UNLOCK(td->td_proc); - error = ENOMEM; - goto cleanup; - } - PROC_UNLOCK(td->td_proc); - - /* - * Prevent more writers. - */ - error = VOP_SET_TEXT(vp); - if (error != 0) - goto cleanup; - textset = true; - - /* - * Lock no longer needed - */ - locked = false; - VOP_UNLOCK(vp); - - /* - * Check if file_offset page aligned. Currently we cannot handle - * misalinged file offsets, and so we read in the entire image - * (what a waste). - */ - if (file_offset & PAGE_MASK) { - /* Map text+data read/write/execute */ - - /* a_entry is the load address and is page aligned */ - vmaddr = trunc_page(a_out->a_entry); - - /* get anon user mapping, read+write+execute */ - error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, - &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE, - VM_PROT_ALL, VM_PROT_ALL, 0); - if (error) - goto cleanup; - - error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset, - a_out->a_text + a_out->a_data, UIO_USERSPACE, 0, - td->td_ucred, NOCRED, &aresid, td); - if (error != 0) - goto cleanup; - if (aresid != 0) { - error = ENOEXEC; - goto cleanup; - } - } else { - /* - * for QMAGIC, a_entry is 20 bytes beyond the load address - * to skip the executable header - */ - vmaddr = trunc_page(a_out->a_entry); - - /* - * Map it all into the process's space as a single - * copy-on-write "data" segment. - */ - map = &td->td_proc->p_vmspace->vm_map; - error = vm_mmap(map, &vmaddr, - a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL, - MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset); - if (error) - goto cleanup; - vm_map_lock(map); - if (!vm_map_lookup_entry(map, vmaddr, &entry)) { - vm_map_unlock(map); - error = EDOOFUS; - goto cleanup; - } - entry->eflags |= MAP_ENTRY_VN_EXEC; - vm_map_unlock(map); - textset = false; - } - - if (bss_size != 0) { - /* Calculate BSS start address */ - vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + - a_out->a_data; - - /* allocate some 'anon' space */ - error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, - &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL, - VM_PROT_ALL, 0); - if (error) - goto cleanup; - } - -cleanup: - if (opened) { - if (locked) - VOP_UNLOCK(vp); - locked = false; - VOP_CLOSE(vp, FREAD, td->td_ucred, td); - } - if (textset) { - if (!locked) { - locked = true; - VOP_LOCK(vp, LK_SHARED | LK_RETRY); - } - VOP_UNSET_TEXT_CHECKED(vp); - } - if (locked) - VOP_UNLOCK(vp); - - /* Release the temporary mapping. */ - if (a_out) - kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE); - - return (error); -} - -#endif /* __i386__ */ - #ifdef LINUX_LEGACY_SYSCALLS int linux_select(struct thread *td, struct linux_select_args *args) @@ -622,6 +348,39 @@ linux_msync(struct thread *td, struct linux_msync_args *args) args->fl & ~LINUX_MS_SYNC)); } +int +linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) +{ + + return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, + uap->prot)); +} + +int +linux_madvise(struct thread *td, struct linux_madvise_args *uap) +{ + + return (linux_madvise_common(td, PTROUT(uap->addr), uap->len, + uap->behav)); +} + +int +linux_mmap2(struct thread *td, struct linux_mmap2_args *uap) +{ +#if defined(LINUX_ARCHWANT_MMAP2PGOFF) + /* + * For architectures with sizeof (off_t) < sizeof (loff_t) mmap is + * implemented with mmap2 syscall and the offset is represented in + * multiples of page size. + */ + return (linux_mmap_common(td, PTROUT(uap->addr), uap->len, uap->prot, + uap->flags, uap->fd, (uint64_t)(uint32_t)uap->pgoff * PAGE_SIZE)); +#else + return (linux_mmap_common(td, PTROUT(uap->addr), uap->len, uap->prot, + uap->flags, uap->fd, uap->pgoff)); +#endif +} + #ifdef LINUX_LEGACY_SYSCALLS int linux_time(struct thread *td, struct linux_time_args *args) @@ -656,7 +415,7 @@ struct l_times_argv { #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) -#define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER_2004000 ? \ +#define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER(2,4,0) ? \ CONVNTCK(r) : CONVOTCK(r)) int @@ -720,6 +479,11 @@ linux_newuname(struct thread *td, struct linux_newuname_args *args) * the string returned by getauxval(AT_PLATFORM) needs * to remain "i686", though. */ +#if defined(COMPAT_LINUX32) + if (linux32_emulate_i386) + strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); + else +#endif strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME); #elif defined(__aarch64__) strlcpy(utsname.machine, "aarch64", LINUX_MAX_UTSNAME); @@ -741,7 +505,6 @@ linux_utime(struct thread *td, struct linux_utime_args *args) { struct timeval tv[2], *tvp; struct l_utimbuf lut; - char *fname; int error; if (args->times) { @@ -755,16 +518,8 @@ linux_utime(struct thread *td, struct linux_utime_args *args) } else tvp = NULL; - if (!LUSECONVPATH(td)) { - error = kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, - tvp, UIO_SYSSPACE); - } else { - LCONVPATHEXIST(args->fname, &fname); - error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp, - UIO_SYSSPACE); - LFREEPATH(fname); - } - return (error); + return (kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, + tvp, UIO_SYSSPACE)); } #endif @@ -774,7 +529,6 @@ linux_utimes(struct thread *td, struct linux_utimes_args *args) { l_timeval ltv[2]; struct timeval tv[2], *tvp = NULL; - char *fname; int error; if (args->tptr != NULL) { @@ -787,16 +541,8 @@ linux_utimes(struct thread *td, struct linux_utimes_args *args) tvp = tv; } - if (!LUSECONVPATH(td)) { - error = kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, - tvp, UIO_SYSSPACE); - } else { - LCONVPATHEXIST(args->fname, &fname); - error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, - tvp, UIO_SYSSPACE); - LFREEPATH(fname); - } - return (error); + return (kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, + tvp, UIO_SYSSPACE)); } #endif @@ -829,8 +575,7 @@ static int linux_common_utimensat(struct thread *td, int ldfd, const char *pathname, struct timespec *timesp, int lflags) { - char *path = NULL; - int error, dfd, flags = 0; + int dfd, flags = 0; dfd = (ldfd == LINUX_AT_FDCWD) ? AT_FDCWD : ldfd; @@ -851,27 +596,14 @@ linux_common_utimensat(struct thread *td, int ldfd, const char *pathname, if (lflags & LINUX_AT_EMPTY_PATH) flags |= AT_EMPTY_PATH; - if (!LUSECONVPATH(td)) { - if (pathname != NULL) { - return (kern_utimensat(td, dfd, pathname, - UIO_USERSPACE, timesp, UIO_SYSSPACE, flags)); - } - } - if (pathname != NULL) - LCONVPATHEXIST_AT(pathname, &path, dfd); - else if (lflags != 0) - return (EINVAL); + return (kern_utimensat(td, dfd, pathname, + UIO_USERSPACE, timesp, UIO_SYSSPACE, flags)); - if (path == NULL) - error = kern_futimens(td, dfd, timesp, UIO_SYSSPACE); - else { - error = kern_utimensat(td, dfd, path, UIO_SYSSPACE, timesp, - UIO_SYSSPACE, flags); - LFREEPATH(path); - } + if (lflags != 0) + return (EINVAL); - return (error); + return (kern_futimens(td, dfd, timesp, UIO_SYSSPACE)); } int @@ -905,6 +637,9 @@ static int linux_utimensat_lts64_to_ts(struct l_timespec64 *l_times, struct timespec *times) { + /* Zero out the padding in compat mode. */ + l_times->tv_nsec &= 0xFFFFFFFFUL; + if (l_times->tv_nsec != LINUX_UTIME_OMIT && l_times->tv_nsec != LINUX_UTIME_NOW && (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) @@ -959,7 +694,6 @@ linux_futimesat(struct thread *td, struct linux_futimesat_args *args) { l_timeval ltv[2]; struct timeval tv[2], *tvp = NULL; - char *fname; int error, dfd; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; @@ -974,49 +708,24 @@ linux_futimesat(struct thread *td, struct linux_futimesat_args *args) tvp = tv; } - if (!LUSECONVPATH(td)) { - error = kern_utimesat(td, dfd, args->filename, UIO_USERSPACE, - tvp, UIO_SYSSPACE); - } else { - LCONVPATHEXIST_AT(args->filename, &fname, dfd); - error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, - tvp, UIO_SYSSPACE); - LFREEPATH(fname); - } - return (error); + return (kern_utimesat(td, dfd, args->filename, UIO_USERSPACE, + tvp, UIO_SYSSPACE)); } #endif static int -linux_common_wait(struct thread *td, int pid, int *statusp, - int options, struct __wrusage *wrup) +linux_common_wait(struct thread *td, idtype_t idtype, int id, int *statusp, + int options, void *rup, l_siginfo_t *infop) { + l_siginfo_t lsi; siginfo_t siginfo; - idtype_t idtype; - id_t id; - int error, status, tmpstat; - - if (pid == WAIT_ANY) { - idtype = P_ALL; - id = 0; - } else if (pid < 0) { - idtype = P_PGID; - id = (id_t)-pid; - } else { - idtype = P_PID; - id = (id_t)pid; - } + struct __wrusage wru; + int error, status, tmpstat, sig; - /* - * For backward compatibility we implicitly add flags WEXITED - * and WTRAPPED here. - */ - options |= WEXITED | WTRAPPED; - error = kern_wait6(td, idtype, id, &status, options, wrup, &siginfo); - if (error) - return (error); + error = kern_wait6(td, idtype, id, &status, options, + rup != NULL ? &wru : NULL, &siginfo); - if (statusp) { + if (error == 0 && statusp) { tmpstat = status & 0xffff; if (WIFSIGNALED(tmpstat)) { tmpstat = (tmpstat & 0xffffff80) | @@ -1035,6 +744,13 @@ linux_common_wait(struct thread *td, int pid, int *statusp, } error = copyout(&tmpstat, statusp, sizeof(int)); } + if (error == 0 && rup != NULL) + error = linux_copyout_rusage(&wru.wru_self, rup); + if (error == 0 && infop != NULL && td->td_retval[0] != 0) { + sig = bsd_to_linux_signal(siginfo.si_signo); + siginfo_to_lsiginfo(&siginfo, &lsi, sig); + error = copyout(&lsi, infop, sizeof(lsi)); + } return (error); } @@ -1043,12 +759,12 @@ linux_common_wait(struct thread *td, int pid, int *statusp, int linux_waitpid(struct thread *td, struct linux_waitpid_args *args) { - struct linux_wait4_args wait4_args; - - wait4_args.pid = args->pid; - wait4_args.status = args->status; - wait4_args.options = args->options; - wait4_args.rusage = NULL; + struct linux_wait4_args wait4_args = { + .pid = args->pid, + .status = args->status, + .options = args->options, + .rusage = NULL, + }; return (linux_wait4(td, &wait4_args)); } @@ -1057,46 +773,63 @@ linux_waitpid(struct thread *td, struct linux_waitpid_args *args) int linux_wait4(struct thread *td, struct linux_wait4_args *args) { - int error, options; - struct __wrusage wru, *wrup; + struct proc *p; + int options, id, idtype; if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) return (EINVAL); - options = WEXITED; + /* -INT_MIN is not defined. */ + if (args->pid == INT_MIN) + return (ESRCH); + + options = 0; linux_to_bsd_waitopts(args->options, &options); - if (args->rusage != NULL) - wrup = &wru; - else - wrup = NULL; - error = linux_common_wait(td, args->pid, args->status, options, wrup); - if (error != 0) - return (error); - if (args->rusage != NULL) - error = linux_copyout_rusage(&wru.wru_self, args->rusage); - return (error); + /* + * For backward compatibility we implicitly add flags WEXITED + * and WTRAPPED here. + */ + options |= WEXITED | WTRAPPED; + + if (args->pid == WAIT_ANY) { + idtype = P_ALL; + id = 0; + } else if (args->pid < 0) { + idtype = P_PGID; + id = (id_t)-args->pid; + } else if (args->pid == 0) { + idtype = P_PGID; + p = td->td_proc; + PROC_LOCK(p); + id = p->p_pgid; + PROC_UNLOCK(p); + } else { + idtype = P_PID; + id = (id_t)args->pid; + } + + return (linux_common_wait(td, idtype, id, args->status, options, + args->rusage, NULL)); } int linux_waitid(struct thread *td, struct linux_waitid_args *args) { - int status, options, sig; - struct __wrusage wru; - siginfo_t siginfo; - l_siginfo_t lsi; idtype_t idtype; - int error; + int error, options; + struct proc *p; + pid_t id; + + if (args->options & ~(LINUX_WNOHANG | LINUX_WNOWAIT | LINUX_WEXITED | + LINUX_WSTOPPED | LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) + return (EINVAL); options = 0; linux_to_bsd_waitopts(args->options, &options); - if (options & ~(WNOHANG | WNOWAIT | WEXITED | WUNTRACED | WCONTINUED)) - return (EINVAL); - if (!(options & (WEXITED | WUNTRACED | WCONTINUED))) - return (EINVAL); - + id = args->id; switch (args->idtype) { case LINUX_P_ALL: idtype = P_ALL; @@ -1107,32 +840,24 @@ linux_waitid(struct thread *td, struct linux_waitid_args *args) idtype = P_PID; break; case LINUX_P_PGID: - if (args->id <= 0) + if (linux_kernver(td) >= LINUX_KERNVER(5,4,0) && args->id == 0) { + p = td->td_proc; + PROC_LOCK(p); + id = p->p_pgid; + PROC_UNLOCK(p); + } else if (args->id <= 0) return (EINVAL); idtype = P_PGID; break; + case LINUX_P_PIDFD: + LINUX_RATELIMIT_MSG("unsupported waitid P_PIDFD idtype"); + return (ENOSYS); default: return (EINVAL); } - error = kern_wait6(td, idtype, args->id, &status, options, - &wru, &siginfo); - if (error != 0) - return (error); - if (args->rusage != NULL) { - error = linux_copyout_rusage(&wru.wru_children, - args->rusage); - if (error != 0) - return (error); - } - if (args->info != NULL) { - bzero(&lsi, sizeof(lsi)); - if (td->td_retval[0] != 0) { - sig = bsd_to_linux_signal(siginfo.si_signo); - siginfo_to_lsiginfo(&siginfo, &lsi, sig); - } - error = copyout(&lsi, args->info, sizeof(lsi)); - } + error = linux_common_wait(td, idtype, id, NULL, options, + args->rusage, args->info); td->td_retval[0] = 0; return (error); @@ -1142,31 +867,19 @@ linux_waitid(struct thread *td, struct linux_waitid_args *args) int linux_mknod(struct thread *td, struct linux_mknod_args *args) { - char *path; int error; - enum uio_seg seg; - bool convpath; - - convpath = LUSECONVPATH(td); - if (!convpath) { - path = args->path; - seg = UIO_USERSPACE; - } else { - LCONVPATHCREAT(args->path, &path); - seg = UIO_SYSSPACE; - } switch (args->mode & S_IFMT) { case S_IFIFO: case S_IFSOCK: - error = kern_mkfifoat(td, AT_FDCWD, path, seg, + error = kern_mkfifoat(td, AT_FDCWD, args->path, UIO_USERSPACE, args->mode); break; case S_IFCHR: case S_IFBLK: - error = kern_mknodat(td, AT_FDCWD, path, seg, - args->mode, args->dev); + error = kern_mknodat(td, AT_FDCWD, args->path, UIO_USERSPACE, + args->mode, linux_decode_dev(args->dev)); break; case S_IFDIR: @@ -1177,7 +890,7 @@ linux_mknod(struct thread *td, struct linux_mknod_args *args) args->mode |= S_IFREG; /* FALLTHROUGH */ case S_IFREG: - error = kern_openat(td, AT_FDCWD, path, seg, + error = kern_openat(td, AT_FDCWD, args->path, UIO_USERSPACE, O_WRONLY | O_CREAT | O_TRUNC, args->mode); if (error == 0) kern_close(td, td->td_retval[0]); @@ -1187,8 +900,6 @@ linux_mknod(struct thread *td, struct linux_mknod_args *args) error = EINVAL; break; } - if (convpath) - LFREEPATH(path); return (error); } #endif @@ -1196,32 +907,21 @@ linux_mknod(struct thread *td, struct linux_mknod_args *args) int linux_mknodat(struct thread *td, struct linux_mknodat_args *args) { - char *path; int error, dfd; - enum uio_seg seg; - bool convpath; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; - convpath = LUSECONVPATH(td); - if (!convpath) { - path = __DECONST(char *, args->filename); - seg = UIO_USERSPACE; - } else { - LCONVPATHCREAT_AT(args->filename, &path, dfd); - seg = UIO_SYSSPACE; - } - switch (args->mode & S_IFMT) { case S_IFIFO: case S_IFSOCK: - error = kern_mkfifoat(td, dfd, path, seg, args->mode); + error = kern_mkfifoat(td, dfd, args->filename, UIO_USERSPACE, + args->mode); break; case S_IFCHR: case S_IFBLK: - error = kern_mknodat(td, dfd, path, seg, args->mode, - args->dev); + error = kern_mknodat(td, dfd, args->filename, UIO_USERSPACE, + args->mode, linux_decode_dev(args->dev)); break; case S_IFDIR: @@ -1232,7 +932,7 @@ linux_mknodat(struct thread *td, struct linux_mknodat_args *args) args->mode |= S_IFREG; /* FALLTHROUGH */ case S_IFREG: - error = kern_openat(td, dfd, path, seg, + error = kern_openat(td, dfd, args->filename, UIO_USERSPACE, O_WRONLY | O_CREAT | O_TRUNC, args->mode); if (error == 0) kern_close(td, td->td_retval[0]); @@ -1242,8 +942,6 @@ linux_mknodat(struct thread *td, struct linux_mknodat_args *args) error = EINVAL; break; } - if (convpath) - LFREEPATH(path); return (error); } @@ -1427,16 +1125,16 @@ linux_getgroups(struct thread *td, struct linux_getgroups_args *args) } static bool -linux_get_dummy_limit(l_uint resource, struct rlimit *rlim) +linux_get_dummy_limit(struct thread *td, l_uint resource, struct rlimit *rlim) { + ssize_t size; + int res, error; if (linux_dummy_rlimits == 0) return (false); switch (resource) { case LINUX_RLIMIT_LOCKS: - case LINUX_RLIMIT_SIGPENDING: - case LINUX_RLIMIT_MSGQUEUE: case LINUX_RLIMIT_RTTIME: rlim->rlim_cur = LINUX_RLIM_INFINITY; rlim->rlim_max = LINUX_RLIM_INFINITY; @@ -1446,6 +1144,23 @@ linux_get_dummy_limit(l_uint resource, struct rlimit *rlim) rlim->rlim_cur = 0; rlim->rlim_max = 0; return (true); + case LINUX_RLIMIT_SIGPENDING: + error = kernel_sysctlbyname(td, + "kern.sigqueue.max_pending_per_proc", + &res, &size, 0, 0, 0, 0); + if (error != 0) + return (false); + rlim->rlim_cur = res; + rlim->rlim_max = res; + return (true); + case LINUX_RLIMIT_MSGQUEUE: + error = kernel_sysctlbyname(td, + "kern.ipc.msgmnb", &res, &size, 0, 0, 0, 0); + if (error != 0) + return (false); + rlim->rlim_cur = res; + rlim->rlim_max = res; + return (true); default: return (false); } @@ -1483,7 +1198,7 @@ linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) struct rlimit bsd_rlim; u_int which; - if (linux_get_dummy_limit(args->resource, &bsd_rlim)) { + if (linux_get_dummy_limit(td, args->resource, &bsd_rlim)) { rlim.rlim_cur = bsd_rlim.rlim_cur; rlim.rlim_max = bsd_rlim.rlim_max; return (copyout(&rlim, args->rlim, sizeof(rlim))); @@ -1524,7 +1239,7 @@ linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) struct rlimit bsd_rlim; u_int which; - if (linux_get_dummy_limit(args->resource, &bsd_rlim)) { + if (linux_get_dummy_limit(td, args->resource, &bsd_rlim)) { rlim.rlim_cur = bsd_rlim.rlim_cur; rlim.rlim_max = bsd_rlim.rlim_max; return (copyout(&rlim, args->rlim, sizeof(rlim))); @@ -1808,13 +1523,6 @@ linux_getsid(struct thread *td, struct linux_getsid_args *args) } int -linux_nosys(struct thread *td, struct nosys_args *ignore) -{ - - return (ENOSYS); -} - -int linux_getpriority(struct thread *td, struct linux_getpriority_args *args) { int error; @@ -2238,23 +1946,29 @@ int linux_sched_getaffinity(struct thread *td, struct linux_sched_getaffinity_args *args) { - int error; struct thread *tdt; - - if (args->len < sizeof(cpuset_t)) - return (EINVAL); + cpuset_t *mask; + size_t size; + int error; + id_t tid; tdt = linux_tdfind(td, args->pid, -1); if (tdt == NULL) return (ESRCH); - + tid = tdt->td_tid; PROC_UNLOCK(tdt->td_proc); + mask = malloc(sizeof(cpuset_t), M_LINUX, M_WAITOK | M_ZERO); + size = min(args->len, sizeof(cpuset_t)); error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, - tdt->td_tid, sizeof(cpuset_t), (cpuset_t *)args->user_mask_ptr); + tid, size, mask); + if (error == ERANGE) + error = EINVAL; + if (error == 0) + error = copyout(mask, args->user_mask_ptr, size); if (error == 0) - td->td_retval[0] = sizeof(cpuset_t); - + td->td_retval[0] = size; + free(mask, M_LINUX); return (error); } @@ -2266,18 +1980,34 @@ linux_sched_setaffinity(struct thread *td, struct linux_sched_setaffinity_args *args) { struct thread *tdt; - - if (args->len < sizeof(cpuset_t)) - return (EINVAL); + cpuset_t *mask; + int cpu, error; + size_t len; + id_t tid; tdt = linux_tdfind(td, args->pid, -1); if (tdt == NULL) return (ESRCH); - + tid = tdt->td_tid; PROC_UNLOCK(tdt->td_proc); - return (kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, - tdt->td_tid, sizeof(cpuset_t), (cpuset_t *) args->user_mask_ptr)); + len = min(args->len, sizeof(cpuset_t)); + mask = malloc(sizeof(cpuset_t), M_TEMP, M_WAITOK | M_ZERO); + error = copyin(args->user_mask_ptr, mask, len); + if (error != 0) + goto out; + /* Linux ignore high bits */ + CPU_FOREACH_ISSET(cpu, mask) + if (cpu > mp_maxid) + CPU_CLR(cpu, mask); + + error = kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, + tid, mask); + if (error == EDEADLK) + error = EINVAL; +out: + free(mask, M_TEMP); + return (error); } struct linux_rlimit64 { @@ -2296,7 +2026,7 @@ linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) int error; if (args->new == NULL && args->old != NULL) { - if (linux_get_dummy_limit(args->resource, &rlim)) { + if (linux_get_dummy_limit(td, args->resource, &rlim)) { lrlim.rlim_cur = rlim.rlim_cur; lrlim.rlim_max = rlim.rlim_max; return (copyout(&lrlim, args->old, sizeof(lrlim))); @@ -2362,15 +2092,11 @@ linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) int linux_pselect6(struct thread *td, struct linux_pselect6_args *args) { - struct l_timespec lts; struct timespec ts, *tsp; int error; if (args->tsp != NULL) { - error = copyin(args->tsp, <s, sizeof(lts)); - if (error != 0) - return (error); - error = linux_to_native_timespec(&ts, <s); + error = linux_get_timespec(&ts, args->tsp); if (error != 0) return (error); tsp = &ts; @@ -2379,14 +2105,9 @@ linux_pselect6(struct thread *td, struct linux_pselect6_args *args) error = linux_common_pselect6(td, args->nfds, args->readfds, args->writefds, args->exceptfds, tsp, args->sig); - if (error != 0) - return (error); - if (args->tsp != NULL) { - error = native_to_linux_timespec(<s, tsp); - if (error == 0) - error = copyout(<s, args->tsp, sizeof(lts)); - } + if (args->tsp != NULL) + linux_put_timespec(&ts, args->tsp); return (error); } @@ -2397,7 +2118,6 @@ linux_common_pselect6(struct thread *td, l_int nfds, l_fd_set *readfds, { struct timeval utv, tv0, tv1, *tvp; struct l_pselect6arg lpse6; - l_sigset_t l_ss; sigset_t *ssp; sigset_t ss; int error; @@ -2407,16 +2127,10 @@ linux_common_pselect6(struct thread *td, l_int nfds, l_fd_set *readfds, error = copyin(sig, &lpse6, sizeof(lpse6)); if (error != 0) return (error); - if (lpse6.ss_len != sizeof(l_ss)) - return (EINVAL); - if (lpse6.ss != 0) { - error = copyin(PTRIN(lpse6.ss), &l_ss, - sizeof(l_ss)); - if (error != 0) - return (error); - linux_to_bsd_sigset(&l_ss, &ss); - ssp = &ss; - } + error = linux_copyin_sigset(td, PTRIN(lpse6.ss), + lpse6.ss_len, &ss, &ssp); + if (error != 0) + return (error); } else ssp = NULL; @@ -2437,21 +2151,17 @@ linux_common_pselect6(struct thread *td, l_int nfds, l_fd_set *readfds, error = kern_pselect(td, nfds, readfds, writefds, exceptfds, tvp, ssp, LINUX_NFDBITS); - if (error == 0 && tsp != NULL) { - if (td->td_retval[0] != 0) { - /* - * Compute how much time was left of the timeout, - * by subtracting the current time and the time - * before we started the call, and subtracting - * that result from the user-supplied value. - */ - - microtime(&tv1); - timevalsub(&tv1, &tv0); - timevalsub(&utv, &tv1); - if (utv.tv_sec < 0) - timevalclear(&utv); - } else + if (tsp != NULL) { + /* + * Compute how much time was left of the timeout, + * by subtracting the current time and the time + * before we started the call, and subtracting + * that result from the user-supplied value. + */ + microtime(&tv1); + timevalsub(&tv1, &tv0); + timevalsub(&utv, &tv1); + if (utv.tv_sec < 0) timevalclear(&utv); TIMEVAL_TO_TIMESPEC(&utv, tsp); } @@ -2463,15 +2173,11 @@ int linux_pselect6_time64(struct thread *td, struct linux_pselect6_time64_args *args) { - struct l_timespec64 lts; struct timespec ts, *tsp; int error; if (args->tsp != NULL) { - error = copyin(args->tsp, <s, sizeof(lts)); - if (error != 0) - return (error); - error = linux_to_native_timespec64(&ts, <s); + error = linux_get_timespec64(&ts, args->tsp); if (error != 0) return (error); tsp = &ts; @@ -2480,14 +2186,9 @@ linux_pselect6_time64(struct thread *td, error = linux_common_pselect6(td, args->nfds, args->readfds, args->writefds, args->exceptfds, tsp, args->sig); - if (error != 0) - return (error); - if (args->tsp != NULL) { - error = native_to_linux_timespec64(<s, tsp); - if (error == 0) - error = copyout(<s, args->tsp, sizeof(lts)); - } + if (args->tsp != NULL) + linux_put_timespec64(&ts, args->tsp); return (error); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ @@ -2496,14 +2197,10 @@ int linux_ppoll(struct thread *td, struct linux_ppoll_args *args) { struct timespec uts, *tsp; - struct l_timespec lts; int error; if (args->tsp != NULL) { - error = copyin(args->tsp, <s, sizeof(lts)); - if (error) - return (error); - error = linux_to_native_timespec(&uts, <s); + error = linux_get_timespec(&uts, args->tsp); if (error != 0) return (error); tsp = &uts; @@ -2512,13 +2209,8 @@ linux_ppoll(struct thread *td, struct linux_ppoll_args *args) error = linux_common_ppoll(td, args->fds, args->nfds, tsp, args->sset, args->ssize); - if (error != 0) - return (error); - if (tsp != NULL) { - error = native_to_linux_timespec(<s, tsp); - if (error == 0) - error = copyout(<s, args->tsp, sizeof(lts)); - } + if (error == 0 && args->tsp != NULL) + error = linux_put_timespec(&uts, args->tsp); return (error); } @@ -2529,7 +2221,6 @@ linux_common_ppoll(struct thread *td, struct pollfd *fds, uint32_t nfds, struct timespec ts0, ts1; struct pollfd stackfds[32]; struct pollfd *kfds; - l_sigset_t l_ss; sigset_t *ssp; sigset_t ss; int error; @@ -2537,13 +2228,9 @@ linux_common_ppoll(struct thread *td, struct pollfd *fds, uint32_t nfds, if (kern_poll_maxfds(nfds)) return (EINVAL); if (sset != NULL) { - if (ssize != sizeof(l_ss)) - return (EINVAL); - error = copyin(sset, &l_ss, sizeof(l_ss)); - if (error) - return (error); - linux_to_bsd_sigset(&l_ss, &ss); - ssp = &ss; + error = linux_copyin_sigset(td, sset, ssize, &ss, &ssp); + if (error != 0) + return (error); } else ssp = NULL; if (tsp != NULL) @@ -2583,14 +2270,10 @@ int linux_ppoll_time64(struct thread *td, struct linux_ppoll_time64_args *args) { struct timespec uts, *tsp; - struct l_timespec64 lts; int error; if (args->tsp != NULL) { - error = copyin(args->tsp, <s, sizeof(lts)); - if (error != 0) - return (error); - error = linux_to_native_timespec64(&uts, <s); + error = linux_get_timespec64(&uts, args->tsp); if (error != 0) return (error); tsp = &uts; @@ -2598,13 +2281,8 @@ linux_ppoll_time64(struct thread *td, struct linux_ppoll_time64_args *args) tsp = NULL; error = linux_common_ppoll(td, args->fds, args->nfds, tsp, args->sset, args->ssize); - if (error != 0) - return (error); - if (tsp != NULL) { - error = native_to_linux_timespec64(<s, tsp); - if (error == 0) - error = copyout(<s, args->tsp, sizeof(lts)); - } + if (error == 0 && args->tsp != NULL) + error = linux_put_timespec64(&uts, args->tsp); return (error); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ @@ -2651,12 +2329,10 @@ linux_pollout(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int return (0); } -int -linux_sched_rr_get_interval(struct thread *td, - struct linux_sched_rr_get_interval_args *uap) +static int +linux_sched_rr_get_interval_common(struct thread *td, pid_t pid, + struct timespec *ts) { - struct timespec ts; - struct l_timespec lts; struct thread *tdt; int error; @@ -2664,22 +2340,45 @@ linux_sched_rr_get_interval(struct thread *td, * According to man in case the invalid pid specified * EINVAL should be returned. */ - if (uap->pid < 0) + if (pid < 0) return (EINVAL); - tdt = linux_tdfind(td, uap->pid, -1); + tdt = linux_tdfind(td, pid, -1); if (tdt == NULL) return (ESRCH); - error = kern_sched_rr_get_interval_td(td, tdt, &ts); + error = kern_sched_rr_get_interval_td(td, tdt, ts); PROC_UNLOCK(tdt->td_proc); + return (error); +} + +int +linux_sched_rr_get_interval(struct thread *td, + struct linux_sched_rr_get_interval_args *uap) +{ + struct timespec ts; + int error; + + error = linux_sched_rr_get_interval_common(td, uap->pid, &ts); if (error != 0) return (error); - error = native_to_linux_timespec(<s, &ts); + return (linux_put_timespec(&ts, uap->interval)); +} + +#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) +int +linux_sched_rr_get_interval_time64(struct thread *td, + struct linux_sched_rr_get_interval_time64_args *uap) +{ + struct timespec ts; + int error; + + error = linux_sched_rr_get_interval_common(td, uap->pid, &ts); if (error != 0) return (error); - return (copyout(<s, uap->interval, sizeof(lts))); + return (linux_put_timespec64(&ts, uap->interval)); } +#endif /* * In case when the Linux thread is the initial thread in @@ -2695,34 +2394,34 @@ linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) tdt = NULL; if (tid == 0 || tid == td->td_tid) { - tdt = td; - PROC_LOCK(tdt->td_proc); + if (pid != -1 && td->td_proc->p_pid != pid) + return (NULL); + PROC_LOCK(td->td_proc); + return (td); } else if (tid > PID_MAX) - tdt = tdfind(tid, pid); - else { - /* - * Initial thread where the tid equal to the pid. - */ - p = pfind(tid); - if (p != NULL) { - if (SV_PROC_ABI(p) != SV_ABI_LINUX) { - /* - * p is not a Linuxulator process. - */ - PROC_UNLOCK(p); - return (NULL); - } - FOREACH_THREAD_IN_PROC(p, tdt) { - em = em_find(tdt); - if (tid == em->em_tid) - return (tdt); - } + return (tdfind(tid, pid)); + + /* + * Initial thread where the tid equal to the pid. + */ + p = pfind(tid); + if (p != NULL) { + if (SV_PROC_ABI(p) != SV_ABI_LINUX || + (pid != -1 && tid != pid)) { + /* + * p is not a Linuxulator process. + */ PROC_UNLOCK(p); + return (NULL); + } + FOREACH_THREAD_IN_PROC(p, tdt) { + em = em_find(tdt); + if (tid == em->em_tid) + return (tdt); } - return (NULL); + PROC_UNLOCK(p); } - - return (tdt); + return (NULL); } void @@ -2880,3 +2579,387 @@ linux_poll(struct thread *td, struct linux_poll_args *args) tsp, NULL, 0)); } #endif /* __i386__ || __amd64__ */ + +int +linux_seccomp(struct thread *td, struct linux_seccomp_args *args) +{ + + switch (args->op) { + case LINUX_SECCOMP_GET_ACTION_AVAIL: + return (EOPNOTSUPP); + default: + /* + * Ignore unknown operations, just like Linux kernel built + * without CONFIG_SECCOMP. + */ + return (EINVAL); + } +} + +/* + * Custom version of exec_copyin_args(), to copy out argument and environment + * strings from the old process address space into the temporary string buffer. + * Based on freebsd32_exec_copyin_args. + */ +static int +linux_exec_copyin_args(struct image_args *args, const char *fname, + enum uio_seg segflg, l_uintptr_t *argv, l_uintptr_t *envv) +{ + char *argp, *envp; + l_uintptr_t *ptr, arg; + int error; + + bzero(args, sizeof(*args)); + if (argv == NULL) + return (EFAULT); + + /* + * Allocate demand-paged memory for the file name, argument, and + * environment strings. + */ + error = exec_alloc_args(args); + if (error != 0) + return (error); + + /* + * Copy the file name. + */ + error = exec_args_add_fname(args, fname, segflg); + if (error != 0) + goto err_exit; + + /* + * extract arguments first + */ + ptr = argv; + for (;;) { + error = copyin(ptr++, &arg, sizeof(arg)); + if (error) + goto err_exit; + if (arg == 0) + break; + argp = PTRIN(arg); + error = exec_args_add_arg(args, argp, UIO_USERSPACE); + if (error != 0) + goto err_exit; + } + + /* + * This comment is from Linux do_execveat_common: + * When argv is empty, add an empty string ("") as argv[0] to + * ensure confused userspace programs that start processing + * from argv[1] won't end up walking envp. + */ + if (args->argc == 0 && + (error = exec_args_add_arg(args, "", UIO_SYSSPACE) != 0)) + goto err_exit; + + /* + * extract environment strings + */ + if (envv) { + ptr = envv; + for (;;) { + error = copyin(ptr++, &arg, sizeof(arg)); + if (error) + goto err_exit; + if (arg == 0) + break; + envp = PTRIN(arg); + error = exec_args_add_env(args, envp, UIO_USERSPACE); + if (error != 0) + goto err_exit; + } + } + + return (0); + +err_exit: + exec_free_args(args); + return (error); +} + +int +linux_execve(struct thread *td, struct linux_execve_args *args) +{ + struct image_args eargs; + int error; + + LINUX_CTR(execve); + + error = linux_exec_copyin_args(&eargs, args->path, UIO_USERSPACE, + args->argp, args->envp); + if (error == 0) + error = linux_common_execve(td, &eargs); + AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); + return (error); +} + +static void +linux_up_rtprio_if(struct thread *td1, struct rtprio *rtp) +{ + struct rtprio rtp2; + + pri_to_rtp(td1, &rtp2); + if (rtp2.type < rtp->type || + (rtp2.type == rtp->type && + rtp2.prio < rtp->prio)) { + rtp->type = rtp2.type; + rtp->prio = rtp2.prio; + } +} + +#define LINUX_PRIO_DIVIDER RTP_PRIO_MAX / LINUX_IOPRIO_MAX + +static int +linux_rtprio2ioprio(struct rtprio *rtp) +{ + int ioprio, prio; + + switch (rtp->type) { + case RTP_PRIO_IDLE: + prio = RTP_PRIO_MIN; + ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_IDLE, prio); + break; + case RTP_PRIO_NORMAL: + prio = rtp->prio / LINUX_PRIO_DIVIDER; + ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_BE, prio); + break; + case RTP_PRIO_REALTIME: + prio = rtp->prio / LINUX_PRIO_DIVIDER; + ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_RT, prio); + break; + default: + prio = RTP_PRIO_MIN; + ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_NONE, prio); + break; + } + return (ioprio); +} + +static int +linux_ioprio2rtprio(int ioprio, struct rtprio *rtp) +{ + + switch (LINUX_IOPRIO_PRIO_CLASS(ioprio)) { + case LINUX_IOPRIO_CLASS_IDLE: + rtp->prio = RTP_PRIO_MIN; + rtp->type = RTP_PRIO_IDLE; + break; + case LINUX_IOPRIO_CLASS_BE: + rtp->prio = LINUX_IOPRIO_PRIO_DATA(ioprio) * LINUX_PRIO_DIVIDER; + rtp->type = RTP_PRIO_NORMAL; + break; + case LINUX_IOPRIO_CLASS_RT: + rtp->prio = LINUX_IOPRIO_PRIO_DATA(ioprio) * LINUX_PRIO_DIVIDER; + rtp->type = RTP_PRIO_REALTIME; + break; + default: + return (EINVAL); + } + return (0); +} +#undef LINUX_PRIO_DIVIDER + +int +linux_ioprio_get(struct thread *td, struct linux_ioprio_get_args *args) +{ + struct thread *td1; + struct rtprio rtp; + struct pgrp *pg; + struct proc *p; + int error, found; + + p = NULL; + td1 = NULL; + error = 0; + found = 0; + rtp.type = RTP_PRIO_IDLE; + rtp.prio = RTP_PRIO_MAX; + switch (args->which) { + case LINUX_IOPRIO_WHO_PROCESS: + if (args->who == 0) { + td1 = td; + p = td1->td_proc; + PROC_LOCK(p); + } else if (args->who > PID_MAX) { + td1 = linux_tdfind(td, args->who, -1); + if (td1 != NULL) + p = td1->td_proc; + } else + p = pfind(args->who); + if (p == NULL) + return (ESRCH); + if ((error = p_cansee(td, p))) { + PROC_UNLOCK(p); + break; + } + if (td1 != NULL) { + pri_to_rtp(td1, &rtp); + } else { + FOREACH_THREAD_IN_PROC(p, td1) { + linux_up_rtprio_if(td1, &rtp); + } + } + found++; + PROC_UNLOCK(p); + break; + case LINUX_IOPRIO_WHO_PGRP: + sx_slock(&proctree_lock); + if (args->who == 0) { + pg = td->td_proc->p_pgrp; + PGRP_LOCK(pg); + } else { + pg = pgfind(args->who); + if (pg == NULL) { + sx_sunlock(&proctree_lock); + error = ESRCH; + break; + } + } + sx_sunlock(&proctree_lock); + LIST_FOREACH(p, &pg->pg_members, p_pglist) { + PROC_LOCK(p); + if (p->p_state == PRS_NORMAL && + p_cansee(td, p) == 0) { + FOREACH_THREAD_IN_PROC(p, td1) { + linux_up_rtprio_if(td1, &rtp); + found++; + } + } + PROC_UNLOCK(p); + } + PGRP_UNLOCK(pg); + break; + case LINUX_IOPRIO_WHO_USER: + if (args->who == 0) + args->who = td->td_ucred->cr_uid; + sx_slock(&allproc_lock); + FOREACH_PROC_IN_SYSTEM(p) { + PROC_LOCK(p); + if (p->p_state == PRS_NORMAL && + p->p_ucred->cr_uid == args->who && + p_cansee(td, p) == 0) { + FOREACH_THREAD_IN_PROC(p, td1) { + linux_up_rtprio_if(td1, &rtp); + found++; + } + } + PROC_UNLOCK(p); + } + sx_sunlock(&allproc_lock); + break; + default: + error = EINVAL; + break; + } + if (error == 0) { + if (found != 0) + td->td_retval[0] = linux_rtprio2ioprio(&rtp); + else + error = ESRCH; + } + return (error); +} + +int +linux_ioprio_set(struct thread *td, struct linux_ioprio_set_args *args) +{ + struct thread *td1; + struct rtprio rtp; + struct pgrp *pg; + struct proc *p; + int error; + + if ((error = linux_ioprio2rtprio(args->ioprio, &rtp)) != 0) + return (error); + /* Attempts to set high priorities (REALTIME) require su privileges. */ + if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME && + (error = priv_check(td, PRIV_SCHED_RTPRIO)) != 0) + return (error); + + p = NULL; + td1 = NULL; + switch (args->which) { + case LINUX_IOPRIO_WHO_PROCESS: + if (args->who == 0) { + td1 = td; + p = td1->td_proc; + PROC_LOCK(p); + } else if (args->who > PID_MAX) { + td1 = linux_tdfind(td, args->who, -1); + if (td1 != NULL) + p = td1->td_proc; + } else + p = pfind(args->who); + if (p == NULL) + return (ESRCH); + if ((error = p_cansched(td, p))) { + PROC_UNLOCK(p); + break; + } + if (td1 != NULL) { + error = rtp_to_pri(&rtp, td1); + } else { + FOREACH_THREAD_IN_PROC(p, td1) { + if ((error = rtp_to_pri(&rtp, td1)) != 0) + break; + } + } + PROC_UNLOCK(p); + break; + case LINUX_IOPRIO_WHO_PGRP: + sx_slock(&proctree_lock); + if (args->who == 0) { + pg = td->td_proc->p_pgrp; + PGRP_LOCK(pg); + } else { + pg = pgfind(args->who); + if (pg == NULL) { + sx_sunlock(&proctree_lock); + error = ESRCH; + break; + } + } + sx_sunlock(&proctree_lock); + LIST_FOREACH(p, &pg->pg_members, p_pglist) { + PROC_LOCK(p); + if (p->p_state == PRS_NORMAL && + p_cansched(td, p) == 0) { + FOREACH_THREAD_IN_PROC(p, td1) { + if ((error = rtp_to_pri(&rtp, td1)) != 0) + break; + } + } + PROC_UNLOCK(p); + if (error != 0) + break; + } + PGRP_UNLOCK(pg); + break; + case LINUX_IOPRIO_WHO_USER: + if (args->who == 0) + args->who = td->td_ucred->cr_uid; + sx_slock(&allproc_lock); + FOREACH_PROC_IN_SYSTEM(p) { + PROC_LOCK(p); + if (p->p_state == PRS_NORMAL && + p->p_ucred->cr_uid == args->who && + p_cansched(td, p) == 0) { + FOREACH_THREAD_IN_PROC(p, td1) { + if ((error = rtp_to_pri(&rtp, td1)) != 0) + break; + } + } + PROC_UNLOCK(p); + if (error != 0) + break; + } + sx_sunlock(&allproc_lock); + break; + default: + error = EINVAL; + break; + } + return (error); +} |