diff options
Diffstat (limited to 'sys')
-rw-r--r-- | sys/amd64/linux32/linux.h | 105 | ||||
-rw-r--r-- | sys/amd64/linux32/linux32_dummy.c | 6 | ||||
-rw-r--r-- | sys/amd64/linux32/linux32_machdep.c | 18 | ||||
-rw-r--r-- | sys/amd64/linux32/linux32_sysvec.c | 2 | ||||
-rw-r--r-- | sys/compat/linux/linux_misc.c | 98 | ||||
-rw-r--r-- | sys/compat/linux/linux_signal.c | 58 | ||||
-rw-r--r-- | sys/compat/linux/linux_util.c | 9 | ||||
-rw-r--r-- | sys/conf/files.amd64 | 3 | ||||
-rw-r--r-- | sys/conf/files.i386 | 3 | ||||
-rw-r--r-- | sys/i386/linux/linux.h | 99 | ||||
-rw-r--r-- | sys/i386/linux/linux_dummy.c | 19 | ||||
-rw-r--r-- | sys/i386/linux/linux_machdep.c | 381 | ||||
-rw-r--r-- | sys/i386/linux/linux_sysvec.c | 33 |
13 files changed, 768 insertions, 66 deletions
diff --git a/sys/amd64/linux32/linux.h b/sys/amd64/linux32/linux.h index c5715974a01d..138871f35889 100644 --- a/sys/amd64/linux32/linux.h +++ b/sys/amd64/linux32/linux.h @@ -34,6 +34,10 @@ #define _AMD64_LINUX_LINUX_H_ #include <sys/signal.h> /* for sigval union */ +#include <sys/param.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/sx.h> #include <amd64/linux32/linux32_syscall.h> @@ -495,6 +499,7 @@ struct l_rt_sigframe { extern int bsd_to_linux_signal[]; extern int linux_to_bsd_signal[]; +extern struct sysentvec elf_linux_sysvec; /* * Pluggable ioctl handlers @@ -527,6 +532,11 @@ int linux_ioctl_unregister_handler(struct linux_ioctl_handler *h); #define LINUX_O_NDELAY LINUX_O_NONBLOCK #define LINUX_O_SYNC 010000 #define LINUX_FASYNC 020000 +#define LINUX_O_DIRECT 040000 /* direct disk access hint */ +#define LINUX_O_LARGEFILE 0100000 +#define LINUX_O_DIRECTORY 0200000 /* must be a directory */ +#define LINUX_O_NOFOLLOW 0400000 /* don't follow links */ +#define LINUX_O_NOATIME 01000000 #define LINUX_F_DUPFD 0 #define LINUX_F_GETFD 1 @@ -737,4 +747,99 @@ struct l_pollfd { l_short revents; } __packed; +struct l_user_desc { + l_uint entry_number; + l_uint base_addr; + l_uint limit; + l_uint seg_32bit:1; + l_uint contents:2; + l_uint read_exec_only:1; + l_uint limit_in_pages:1; + l_uint seg_not_present:1; + l_uint useable:1; +}; + +struct l_desc_struct { + unsigned long a,b; +}; + + +#define LINUX_LOWERWORD 0x0000ffff + +/* macros which does the same thing as those in linux include/asm-um/ldt-i386.h + * these convert linux user-space descriptor to machine one + */ +#define LDT_entry_a(info) \ + ((((info)->base_addr & LINUX_LOWERWORD) << 16) | ((info)->limit & LINUX_LOWERWORD)) + +#define ENTRY_B_READ_EXEC_ONLY 9 +#define ENTRY_B_CONTENTS 10 +#define ENTRY_B_SEG_NOT_PRESENT 15 +#define ENTRY_B_BASE_ADDR 16 +#define ENTRY_B_USEABLE 20 +#define ENTRY_B_SEG32BIT 22 +#define ENTRY_B_LIMIT 23 + +#define LDT_entry_b(info) \ + (((info)->base_addr & 0xff000000) | \ + ((info)->limit & 0xf0000) | \ + ((info)->contents << ENTRY_B_CONTENTS) | \ + (((info)->seg_not_present == 0) << ENTRY_B_SEG_NOT_PRESENT) | \ + (((info)->base_addr & 0x00ff0000) >> ENTRY_B_BASE_ADDR) | \ + (((info)->read_exec_only == 0) << ENTRY_B_READ_EXEC_ONLY) | \ + ((info)->seg_32bit << ENTRY_B_SEG32BIT) | \ + ((info)->useable << ENTRY_B_USEABLE) | \ + ((info)->limit_in_pages << ENTRY_B_LIMIT) | 0x7000) + +#define LDT_empty(info) (\ + (info)->base_addr == 0 && \ + (info)->limit == 0 && \ + (info)->contents == 0 && \ + (info)->seg_not_present == 1 && \ + (info)->read_exec_only == 1 && \ + (info)->seg_32bit == 0 && \ + (info)->limit_in_pages == 0 && \ + (info)->useable == 0 ) + +/* macros for converting segments, they do the same as those in arch/i386/kernel/process.c */ +#define GET_BASE(desc) ( \ + (((desc)->a >> 16) & LINUX_LOWERWORD) | \ + (((desc)->b << 16) & 0x00ff0000) | \ + ( (desc)->b & 0xff000000) ) + +#define GET_LIMIT(desc) ( \ + ((desc)->a & LINUX_LOWERWORD) | \ + ((desc)->b & 0xf0000) ) + +#define GET_32BIT(desc) (((desc)->b >> ENTRY_B_SEG32BIT) & 1) +#define GET_CONTENTS(desc) (((desc)->b >> ENTRY_B_CONTENTS) & 3) +#define GET_WRITABLE(desc) (((desc)->b >> ENTRY_B_READ_EXEC_ONLY) & 1) +#define GET_LIMIT_PAGES(desc) (((desc)->b >> ENTRY_B_LIMIT) & 1) +#define GET_PRESENT(desc) (((desc)->b >> ENTRY_B_SEG_NOT_PRESENT) & 1) +#define GET_USEABLE(desc) (((desc)->b >> ENTRY_B_USEABLE) & 1) + +#define LINUX_CLOCK_REALTIME 0 +#define LINUX_CLOCK_MONOTONIC 1 +#define LINUX_CLOCK_PROCESS_CPUTIME_ID 2 +#define LINUX_CLOCK_THREAD_CPUTIME_ID 3 +#define LINUX_CLOCK_REALTIME_HR 4 +#define LINUX_CLOCK_MONOTONIC_HR 5 + +typedef int l_timer_t; +typedef int l_mqd_t; + +#define CLONE_VM 0x100 +#define CLONE_FS 0x200 +#define CLONE_FILES 0x400 +#define CLONE_SIGHAND 0x800 +#define CLONE_PID 0x1000 /* this flag does not exist in linux anymore */ +#define CLONE_PARENT 0x00008000 +#define CLONE_THREAD 0x10000 +#define CLONE_SETTLS 0x80000 +#define CLONE_CHILD_CLEARTID 0x00200000 +#define CLONE_CHILD_SETTID 0x01000000 +#define CLONE_PARENT_SETTID 0x00100000 + +#define THREADING_FLAGS (CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND) + #endif /* !_AMD64_LINUX_LINUX_H_ */ diff --git a/sys/amd64/linux32/linux32_dummy.c b/sys/amd64/linux32/linux32_dummy.c index 33d81990af55..81be23fc6042 100644 --- a/sys/amd64/linux32/linux32_dummy.c +++ b/sys/amd64/linux32/linux32_dummy.c @@ -72,19 +72,13 @@ DUMMY(epoll_create); DUMMY(epoll_ctl); DUMMY(epoll_wait); DUMMY(remap_file_pages); -DUMMY(set_tid_address); DUMMY(timer_create); DUMMY(timer_settime); DUMMY(timer_gettime); DUMMY(timer_getoverrun); DUMMY(timer_delete); -DUMMY(clock_settime); -DUMMY(clock_gettime); -DUMMY(clock_getres); -DUMMY(clock_nanosleep); DUMMY(statfs64); DUMMY(fstatfs64); -DUMMY(tgkill); DUMMY(utimes); DUMMY(fadvise64_64); DUMMY(mbind); diff --git a/sys/amd64/linux32/linux32_machdep.c b/sys/amd64/linux32/linux32_machdep.c index 83ebf53aafad..dd795a4acacb 100644 --- a/sys/amd64/linux32/linux32_machdep.c +++ b/sys/amd64/linux32/linux32_machdep.c @@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$"); #include <sys/kernel.h> #include <sys/systm.h> #include <sys/imgact.h> +#include <sys/limits.h> #include <sys/lock.h> #include <sys/malloc.h> #include <sys/mman.h> @@ -472,12 +473,6 @@ linux_vfork(struct thread *td, struct linux_vfork_args *args) return (0); } -#define CLONE_VM 0x100 -#define CLONE_FS 0x200 -#define CLONE_FILES 0x400 -#define CLONE_SIGHAND 0x800 -#define CLONE_PID 0x1000 - int linux_clone(struct thread *td, struct linux_clone_args *args) { @@ -491,14 +486,9 @@ linux_clone(struct thread *td, struct linux_clone_args *args) printf(ARGS(clone, "flags %x, stack %x"), (unsigned int)(uintptr_t)args->flags, (unsigned int)(uintptr_t)args->stack); - if (args->flags & CLONE_PID) - printf(LMSG("CLONE_PID not yet supported")); } #endif - if (!args->stack) - return (EINVAL); - exit_signal = args->flags & 0x000000ff; if (exit_signal >= LINUX_NSIG) return (EINVAL); @@ -522,7 +512,11 @@ linux_clone(struct thread *td, struct linux_clone_args *args) p2->p_sigparent = exit_signal; PROC_UNLOCK(p2); td2 = FIRST_THREAD_IN_PROC(p2); - td2->td_frame->tf_rsp = PTROUT(args->stack); + /* in a case of stack = NULL we are supposed to COW calling process stack + * this is what normal fork() does so we just keep the tf_rsp arg intact + */ + if (args->stack) + td2->td_frame->tf_rsp = PTROUT(args->stack); #ifdef DEBUG if (ldebug(clone)) diff --git a/sys/amd64/linux32/linux32_sysvec.c b/sys/amd64/linux32/linux32_sysvec.c index 123c3f6522ff..4e5da2eacb81 100644 --- a/sys/amd64/linux32/linux32_sysvec.c +++ b/sys/amd64/linux32/linux32_sysvec.c @@ -993,7 +993,7 @@ linux32_fixlimits(struct proc *p) struct sysentvec elf_linux_sysvec = { LINUX_SYS_MAXSYSCALL, linux_sysent, - 0xff, + 0, LINUX_SIGTBLSZ, bsd_to_linux_signal, ELAST + 1, diff --git a/sys/compat/linux/linux_misc.c b/sys/compat/linux/linux_misc.c index e85fec31ab35..848c11493046 100644 --- a/sys/compat/linux/linux_misc.c +++ b/sys/compat/linux/linux_misc.c @@ -74,6 +74,7 @@ __FBSDID("$FreeBSD$"); #include <posix4/sched.h> #include <compat/linux/linux_sysproto.h> +#include <compat/linux/linux_emul.h> #ifdef COMPAT_LINUX32 #include <machine/../linux32/linux.h> @@ -93,6 +94,9 @@ __FBSDID("$FreeBSD$"); #define BSD_TO_LINUX_SIGNAL(sig) \ (((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig) +extern struct sx emul_shared_lock; +extern struct sx emul_lock; + static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, @@ -1330,11 +1334,69 @@ linux_reboot(struct thread *td, struct linux_reboot_args *args) int linux_getpid(struct thread *td, struct linux_getpid_args *args) { + struct linux_emuldata *em; + + em = em_find(td->td_proc, EMUL_UNLOCKED); + + KASSERT(em != NULL, ("getpid: emuldata not found.\n")); + + td->td_retval[0] = em->shared->group_pid; + EMUL_UNLOCK(&emul_lock); + return (0); +} + +int +linux_gettid(struct thread *td, struct linux_gettid_args *args) +{ +#ifdef DEBUG + if (ldebug(gettid)) + printf(ARGS(gettid, "")); +#endif td->td_retval[0] = td->td_proc->p_pid; return (0); } + +int +linux_getppid(struct thread *td, struct linux_getppid_args *args) +{ + struct linux_emuldata *em; + struct proc *p, *pp; + + em = em_find(td->td_proc, EMUL_UNLOCKED); + + KASSERT(em != NULL, ("getppid: process emuldata not found.\n")); + + /* find the group leader */ + p = pfind(em->shared->group_pid); + + if (p == NULL) { +#ifdef DEBUG + printf(LMSG("parent process not found.\n")); +#endif + return (0); + } + + pp = p->p_pptr; /* switch to parent */ + PROC_LOCK(pp); + PROC_UNLOCK(p); + + /* if its also linux process */ + if (pp->p_sysent == &elf_linux_sysvec) { + em = em_find(pp, EMUL_LOCKED); + KASSERT(em != NULL, ("getppid: parent emuldata not found.\n")); + + td->td_retval[0] = em->shared->group_pid; + } else + td->td_retval[0] = pp->p_pid; + + EMUL_UNLOCK(&emul_lock); + PROC_UNLOCK(pp); + + return (0); +} + int linux_getgid(struct thread *td, struct linux_getgid_args *args) { @@ -1394,3 +1456,39 @@ linux_sethostname(struct thread *td, struct linux_sethostname_args *args) args->len, 0, 0)); } +int +linux_exit_group(struct thread *td, struct linux_exit_group_args *args) +{ + struct linux_emuldata *em, *td_em, *tmp_em; + struct proc *sp; + +#ifdef DEBUG + if (ldebug(exit_group)) + printf(ARGS(exit_group, "%i"), args->error_code); +#endif + + td_em = em_find(td->td_proc, EMUL_UNLOCKED); + + KASSERT(td_em != NULL, ("exit_group: emuldata not found.\n")); + + EMUL_SHARED_RLOCK(&emul_shared_lock); + LIST_FOREACH_SAFE(em, &td_em->shared->threads, threads, tmp_em) { + if (em->pid == td_em->pid) + continue; + + sp = pfind(em->pid); + psignal(sp, SIGKILL); + PROC_UNLOCK(sp); +#ifdef DEBUG + printf(LMSG("linux_sys_exit_group: kill PID %d\n"), em->pid); +#endif + } + + EMUL_SHARED_RUNLOCK(&emul_shared_lock); + EMUL_UNLOCK(&emul_lock); + + exit1(td, W_EXITCODE(args->error_code,0)); + + return (0); +} + diff --git a/sys/compat/linux/linux_signal.c b/sys/compat/linux/linux_signal.c index 081b7c0b8035..50a05fa434e1 100644 --- a/sys/compat/linux/linux_signal.c +++ b/sys/compat/linux/linux_signal.c @@ -49,6 +49,10 @@ __FBSDID("$FreeBSD$"); #endif #include <compat/linux/linux_signal.h> #include <compat/linux/linux_util.h> +#include <compat/linux/linux_emul.h> + +extern struct sx emul_shared_lock; +extern struct sx emul_lock; void linux_to_bsd_sigset(l_sigset_t *lss, sigset_t *bss) @@ -447,3 +451,57 @@ linux_kill(struct thread *td, struct linux_kill_args *args) tmp.pid = args->pid; return (kill(td, &tmp)); } + +int +linux_tgkill(struct thread *td, struct linux_tgkill_args *args) +{ + struct linux_emuldata *em; + struct linux_kill_args ka; + struct proc *p; + +#ifdef DEBUG + if (ldebug(tgkill)) + printf(ARGS(tgkill, "%d, %d, %d"), args->tgid, args->pid, args->sig); +#endif + + ka.pid = args->pid; + ka.signum = args->sig; + + if (args->tgid == -1) + return linux_kill(td, &ka); + + if ((p = pfind(args->pid)) == NULL) + return ESRCH; + + if (p->p_sysent != &elf_linux_sysvec) + return ESRCH; + + PROC_UNLOCK(p); + + em = em_find(p, EMUL_UNLOCKED); + + if (em == NULL) { +#ifdef DEBUG + printf("emuldata not found in tgkill.\n"); +#endif + return ESRCH; + } + + if (em->shared->group_pid != args->tgid) + return ESRCH; + + EMUL_UNLOCK(&emul_lock); + + return linux_kill(td, &ka); +} + +int +linux_tkill(struct thread *td, struct linux_tkill_args *args) +{ +#ifdef DEBUG + if (ldebug(tkill)) + printf(ARGS(tkill, "%i, %i"), args->tid, args->sig); +#endif + + return (linux_kill(td, (struct linux_kill_args *) args)); +} diff --git a/sys/compat/linux/linux_util.c b/sys/compat/linux/linux_util.c index 09c51311dc7c..8103c3a90838 100644 --- a/sys/compat/linux/linux_util.c +++ b/sys/compat/linux/linux_util.c @@ -32,6 +32,8 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#include "opt_compat.h" + #include <sys/param.h> #include <sys/bus.h> #include <sys/lock.h> @@ -47,6 +49,11 @@ __FBSDID("$FreeBSD$"); #include <machine/stdarg.h> #include <compat/linux/linux_util.h> +#ifdef COMPAT_LINUX32 +#include <machine/../linux32/linux.h> +#else +#include <machine/../linux/linux.h> +#endif const char linux_emul_path[] = "/compat/linux"; @@ -85,8 +92,6 @@ linux_msg(const struct thread *td, const char *fmt, ...) printf("\n"); } -MALLOC_DECLARE(M_LINUX); - struct device_element { TAILQ_ENTRY(device_element) list; diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index d1366bf15197..742483233b4b 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -233,7 +233,9 @@ amd64/linux32/linux32_locore.s optional compat_linux32 \ amd64/linux32/linux32_machdep.c optional compat_linux32 amd64/linux32/linux32_sysent.c optional compat_linux32 amd64/linux32/linux32_sysvec.c optional compat_linux32 +compat/linux/linux_emul.c optional compat_linux32 compat/linux/linux_file.c optional compat_linux32 +compat/linux/linux_futex.c optional compat_linux32 compat/linux/linux_getcwd.c optional compat_linux32 compat/linux/linux_ioctl.c optional compat_linux32 compat/linux/linux_ipc.c optional compat_linux32 @@ -243,6 +245,7 @@ compat/linux/linux_signal.c optional compat_linux32 compat/linux/linux_socket.c optional compat_linux32 compat/linux/linux_stats.c optional compat_linux32 compat/linux/linux_sysctl.c optional compat_linux32 +compat/linux/linux_time.c optional compat_linux32 compat/linux/linux_uid16.c optional compat_linux32 compat/linux/linux_util.c optional compat_linux32 dev/amr/amr_linux.c optional compat_linux32 amr diff --git a/sys/conf/files.i386 b/sys/conf/files.i386 index eba426ee603d..bd569bdeee4c 100644 --- a/sys/conf/files.i386 +++ b/sys/conf/files.i386 @@ -85,7 +85,9 @@ rr232x_lib.o optional rr232x \ # compat/linprocfs/linprocfs.c optional linprocfs compat/linsysfs/linsysfs.c optional linsysfs +compat/linux/linux_emul.c optional compat_linux compat/linux/linux_file.c optional compat_linux +compat/linux/linux_futex.c optional compat_linux compat/linux/linux_getcwd.c optional compat_linux compat/linux/linux_ioctl.c optional compat_linux compat/linux/linux_ipc.c optional compat_linux @@ -95,6 +97,7 @@ compat/linux/linux_signal.c optional compat_linux compat/linux/linux_socket.c optional compat_linux compat/linux/linux_stats.c optional compat_linux compat/linux/linux_sysctl.c optional compat_linux +compat/linux/linux_time.c optional compat_linux compat/linux/linux_uid16.c optional compat_linux compat/linux/linux_util.c optional compat_linux compat/ndis/kern_ndis.c optional ndisapi pci diff --git a/sys/i386/linux/linux.h b/sys/i386/linux/linux.h index 08a76f320e86..4bbf3039f535 100644 --- a/sys/i386/linux/linux.h +++ b/sys/i386/linux/linux.h @@ -32,6 +32,10 @@ #define _I386_LINUX_LINUX_H_ #include <sys/signal.h> /* for sigval union */ +#include <sys/param.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/sx.h> #include <i386/linux/linux_syscall.h> @@ -706,4 +710,99 @@ struct l_pollfd { l_short revents; }; +struct l_user_desc { + l_uint entry_number; + l_uint base_addr; + l_uint limit; + l_uint seg_32bit:1; + l_uint contents:2; + l_uint read_exec_only:1; + l_uint limit_in_pages:1; + l_uint seg_not_present:1; + l_uint useable:1; +}; + +struct l_desc_struct { + unsigned long a,b; +}; + + +#define LINUX_LOWERWORD 0x0000ffff + +/* macros which does the same thing as those in linux include/asm-um/ldt-i386.h + * these convert linux user-space descriptor to machine one + */ +#define LDT_entry_a(info) \ + ((((info)->base_addr & LINUX_LOWERWORD) << 16) | ((info)->limit & LINUX_LOWERWORD)) + +#define ENTRY_B_READ_EXEC_ONLY 9 +#define ENTRY_B_CONTENTS 10 +#define ENTRY_B_SEG_NOT_PRESENT 15 +#define ENTRY_B_BASE_ADDR 16 +#define ENTRY_B_USEABLE 20 +#define ENTRY_B_SEG32BIT 22 +#define ENTRY_B_LIMIT 23 + +#define LDT_entry_b(info) \ + (((info)->base_addr & 0xff000000) | \ + ((info)->limit & 0xf0000) | \ + ((info)->contents << ENTRY_B_CONTENTS) | \ + (((info)->seg_not_present == 0) << ENTRY_B_SEG_NOT_PRESENT) | \ + (((info)->base_addr & 0x00ff0000) >> ENTRY_B_BASE_ADDR) | \ + (((info)->read_exec_only == 0) << ENTRY_B_READ_EXEC_ONLY) | \ + ((info)->seg_32bit << ENTRY_B_SEG32BIT) | \ + ((info)->useable << ENTRY_B_USEABLE) | \ + ((info)->limit_in_pages << ENTRY_B_LIMIT) | 0x7000) + +#define LDT_empty(info) (\ + (info)->base_addr == 0 && \ + (info)->limit == 0 && \ + (info)->contents == 0 && \ + (info)->seg_not_present == 1 && \ + (info)->read_exec_only == 1 && \ + (info)->seg_32bit == 0 && \ + (info)->limit_in_pages == 0 && \ + (info)->useable == 0 ) + +/* macros for converting segments, they do the same as those in arch/i386/kernel/process.c */ +#define GET_BASE(desc) ( \ + (((desc)->a >> 16) & LINUX_LOWERWORD) | \ + (((desc)->b << 16) & 0x00ff0000) | \ + ( (desc)->b & 0xff000000) ) + +#define GET_LIMIT(desc) ( \ + ((desc)->a & LINUX_LOWERWORD) | \ + ((desc)->b & 0xf0000) ) + +#define GET_32BIT(desc) (((desc)->b >> ENTRY_B_SEG32BIT) & 1) +#define GET_CONTENTS(desc) (((desc)->b >> ENTRY_B_CONTENTS) & 3) +#define GET_WRITABLE(desc) (((desc)->b >> ENTRY_B_READ_EXEC_ONLY) & 1) +#define GET_LIMIT_PAGES(desc) (((desc)->b >> ENTRY_B_LIMIT) & 1) +#define GET_PRESENT(desc) (((desc)->b >> ENTRY_B_SEG_NOT_PRESENT) & 1) +#define GET_USEABLE(desc) (((desc)->b >> ENTRY_B_USEABLE) & 1) + +#define LINUX_CLOCK_REALTIME 0 +#define LINUX_CLOCK_MONOTONIC 1 +#define LINUX_CLOCK_PROCESS_CPUTIME_ID 2 +#define LINUX_CLOCK_THREAD_CPUTIME_ID 3 +#define LINUX_CLOCK_REALTIME_HR 4 +#define LINUX_CLOCK_MONOTONIC_HR 5 + +typedef int l_timer_t; +typedef int l_mqd_t; + +#define CLONE_VM 0x100 +#define CLONE_FS 0x200 +#define CLONE_FILES 0x400 +#define CLONE_SIGHAND 0x800 +#define CLONE_PID 0x1000 /* this flag does not exist in linux anymore */ +#define CLONE_PARENT 0x00008000 +#define CLONE_THREAD 0x10000 +#define CLONE_SETTLS 0x80000 +#define CLONE_CHILD_CLEARTID 0x00200000 +#define CLONE_CHILD_SETTID 0x01000000 +#define CLONE_PARENT_SETTID 0x00100000 + +#define THREADING_FLAGS (CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND) + #endif /* !_I386_LINUX_LINUX_H_ */ diff --git a/sys/i386/linux/linux_dummy.c b/sys/i386/linux/linux_dummy.c index 939866ae882c..322b9b2ccece 100644 --- a/sys/i386/linux/linux_dummy.c +++ b/sys/i386/linux/linux_dummy.c @@ -61,7 +61,7 @@ DUMMY(rt_sigtimedwait); DUMMY(rt_sigqueueinfo); DUMMY(capget); DUMMY(capset); -DUMMY(sendfile); +DUMMY(sendfile); /* different semantics */ DUMMY(truncate64); DUMMY(setfsuid); DUMMY(setfsgid); @@ -73,30 +73,13 @@ DUMMY(epoll_create); DUMMY(epoll_ctl); DUMMY(epoll_wait); DUMMY(remap_file_pages); -DUMMY(set_tid_address); -DUMMY(timer_create); -DUMMY(timer_settime); -DUMMY(timer_gettime); -DUMMY(timer_getoverrun); -DUMMY(timer_delete); -DUMMY(clock_settime); -DUMMY(clock_gettime); -DUMMY(clock_getres); -DUMMY(clock_nanosleep); DUMMY(statfs64); DUMMY(fstatfs64); -DUMMY(tgkill); DUMMY(utimes); DUMMY(fadvise64_64); DUMMY(mbind); DUMMY(get_mempolicy); DUMMY(set_mempolicy); -DUMMY(mq_open); -DUMMY(mq_unlink); -DUMMY(mq_timedsend); -DUMMY(mq_timedreceive); -DUMMY(mq_notify); -DUMMY(mq_getsetattr); DUMMY(kexec_load); DUMMY(waitid); DUMMY(add_key); diff --git a/sys/i386/linux/linux_machdep.c b/sys/i386/linux/linux_machdep.c index 68e7039f3529..1b73e7e97326 100644 --- a/sys/i386/linux/linux_machdep.c +++ b/sys/i386/linux/linux_machdep.c @@ -36,13 +36,16 @@ __FBSDID("$FreeBSD$"); #include <sys/malloc.h> #include <sys/mman.h> #include <sys/mutex.h> +#include <sys/sx.h> #include <sys/proc.h> +#include <sys/queue.h> #include <sys/resource.h> #include <sys/resourcevar.h> #include <sys/signalvar.h> #include <sys/syscallsubr.h> #include <sys/sysproto.h> #include <sys/unistd.h> +#include <sys/wait.h> #include <machine/frame.h> #include <machine/psl.h> @@ -58,6 +61,16 @@ __FBSDID("$FreeBSD$"); #include <compat/linux/linux_ipc.h> #include <compat/linux/linux_signal.h> #include <compat/linux/linux_util.h> +#include <compat/linux/linux_emul.h> + +#include <i386/include/pcb.h> /* needed for pcb definition in linux_set_thread_area */ + +#include "opt_posix.h" + +extern struct sx emul_shared_lock; +extern struct sx emul_lock; + +extern struct sysentvec elf32_freebsd_sysvec; /* defined in i386/i386/elf_machdep.c */ struct l_descriptor { l_uint entry_number; @@ -122,6 +135,14 @@ linux_execve(struct thread *td, struct linux_execve_args *args) free(newpath, M_TEMP); if (error == 0) error = kern_execve(td, &eargs, NULL); + if (error == 0) + /* linux process can exec fbsd one, dont attempt + * to create emuldata for such process using + * linux_proc_init, this leads to a panic on KASSERT + * because such process has p->p_emuldata == NULL + */ + if (td->td_proc->p_sysent == &elf_linux_sysvec) + error = linux_proc_init(td, 0, 0); return (error); } @@ -287,6 +308,10 @@ linux_fork(struct thread *td, struct linux_fork_args *args) if (td->td_retval[1] == 1) td->td_retval[0] = 0; + error = linux_proc_init(td, td->td_retval[0], 0); + if (error) + return (error); + return (0); } @@ -305,18 +330,12 @@ linux_vfork(struct thread *td, struct linux_vfork_args *args) /* Are we the child? */ if (td->td_retval[1] == 1) td->td_retval[0] = 0; + error = linux_proc_init(td, td->td_retval[0], 0); + if (error) + return (error); return (0); } -#define CLONE_VM 0x100 -#define CLONE_FS 0x200 -#define CLONE_FILES 0x400 -#define CLONE_SIGHAND 0x800 -#define CLONE_PID 0x1000 -#define CLONE_THREAD 0x10000 - -#define THREADING_FLAGS (CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND) - int linux_clone(struct thread *td, struct linux_clone_args *args) { @@ -324,19 +343,16 @@ linux_clone(struct thread *td, struct linux_clone_args *args) struct proc *p2; struct thread *td2; int exit_signal; + struct linux_emuldata *em; #ifdef DEBUG if (ldebug(clone)) { - printf(ARGS(clone, "flags %x, stack %x"), - (unsigned int)args->flags, (unsigned int)args->stack); - if (args->flags & CLONE_PID) - printf(LMSG("CLONE_PID not yet supported")); + printf(ARGS(clone, "flags %x, stack %x, parent tid: %x, child tid: %x"), + (unsigned int)args->flags, (unsigned int)args->stack, + (unsigned int)args->parent_tidptr, (unsigned int)args->child_tidptr); } #endif - if (!args->stack) - return (EINVAL); - exit_signal = args->flags & 0x000000ff; if (exit_signal >= LINUX_NSIG) return (EINVAL); @@ -371,12 +387,118 @@ linux_clone(struct thread *td, struct linux_clone_args *args) if (error) return (error); + /* create the emuldata */ + error = linux_proc_init(td, p2->p_pid, args->flags); + /* reference it - no need to check this */ + em = em_find(p2, EMUL_UNLOCKED); + KASSERT(em != NULL, ("clone: emuldata not found.\n")); + /* and adjust it */ + if (args->flags & CLONE_PARENT_SETTID) { + if (args->parent_tidptr == NULL) { + EMUL_UNLOCK(&emul_lock); + return (EINVAL); + } + error = copyout(&p2->p_pid, args->parent_tidptr, sizeof(p2->p_pid)); + if (error) { + EMUL_UNLOCK(&emul_lock); + return (error); + } + } + + if (args->flags & CLONE_PARENT) { +#ifdef DEBUG + printf("linux_clone: CLONE_PARENT\n"); +#endif + } + + if (args->flags & CLONE_THREAD) { + /* XXX: linux mangles pgrp and pptr somehow + * I think it might be this but I am not sure. + */ +#ifdef notyet + p2->p_pgrp = td->td_proc->p_pgrp; + p2->p_pptr = td->td_proc->p_pptr; +#endif + exit_signal = 0; +#ifdef DEBUG + printf("linux_clone: CLONE_THREADS\n"); +#endif + } + + if (args->flags & CLONE_CHILD_SETTID) + em->child_set_tid = args->child_tidptr; + else + em->child_set_tid = NULL; + + if (args->flags & CLONE_CHILD_CLEARTID) + em->child_clear_tid = args->child_tidptr; + else + em->child_clear_tid = NULL; + EMUL_UNLOCK(&emul_lock); PROC_LOCK(p2); p2->p_sigparent = exit_signal; PROC_UNLOCK(p2); td2 = FIRST_THREAD_IN_PROC(p2); - td2->td_frame->tf_esp = (unsigned int)args->stack; + /* in a case of stack = NULL we are supposed to COW calling process stack + * this is what normal fork() does so we just keep the tf_esp arg intact + */ + if (args->stack) + td2->td_frame->tf_esp = (unsigned int)args->stack; + + if (args->flags & CLONE_SETTLS) { + struct l_user_desc info; + int idx; + int a[2]; + struct segment_descriptor sd; + + error = copyin((void *)td->td_frame->tf_esi, &info, sizeof(struct l_user_desc)); + if (error) + return (error); + + idx = info.entry_number; + + /* looks like we're getting the idx we returned + * in the set_thread_area() syscall + */ + if (idx != 6 && idx != 3) + return (EINVAL); + + /* this doesnt happen in practice */ + if (idx == 6) { + /* we might copy out the entry_number as 3 */ + info.entry_number = 3; + error = copyout(&info, (void *) td->td_frame->tf_esi, sizeof(struct l_user_desc)); + if (error) + return (error); + } + + a[0] = LDT_entry_a(&info); + a[1] = LDT_entry_b(&info); + + memcpy(&sd, &a, sizeof(a)); +#ifdef DEBUG + if (ldebug(clone)) + printf("Segment created in clone with CLONE_SETTLS: lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, type: %i, dpl: %i, p: %i, xx: %i, def32: %i, gran: %i\n", sd.sd_lobase, + sd.sd_hibase, + sd.sd_lolimit, + sd.sd_hilimit, + sd.sd_type, + sd.sd_dpl, + sd.sd_p, + sd.sd_xx, + sd.sd_def32, + sd.sd_gran); +#endif + + /* this is taken from i386 version of cpu_set_user_tls() */ + critical_enter(); + /* set %gs */ + td2->td_pcb->pcb_gsd = sd; + PCPU_GET(fsgs_gdt)[1] = sd; + load_gs(GSEL(GUGS_SEL, SEL_UPL)); + critical_exit(); + } #ifdef DEBUG if (ldebug(clone)) @@ -847,25 +969,234 @@ linux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args) int linux_set_thread_area(struct thread *td, struct linux_set_thread_area_args *args) { - /* - * Return an error code instead of raising a SIGSYS so that - * the caller will fall back to simpler LDT methods. + struct l_user_desc info; + int error; + int idx; + int a[2]; + struct segment_descriptor sd; + + error = copyin(args->desc, &info, sizeof(struct l_user_desc)); + if (error) + return (error); + +#ifdef DEBUG + if (ldebug(set_thread_area)) + printf(ARGS(set_thread_area, "%i, %x, %x, %i, %i, %i, %i, %i, %i\n"), + info.entry_number, + info.base_addr, + info.limit, + info.seg_32bit, + info.contents, + info.read_exec_only, + info.limit_in_pages, + info.seg_not_present, + info.useable); +#endif + + idx = info.entry_number; + /* Semantics of linux version: every thread in the system has array + * of 3 tls descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. This + * syscall loads one of the selected tls decriptors with a value + * and also loads GDT descriptors 6, 7 and 8 with the content of the per-thread + * descriptors. + * + * Semantics of fbsd version: I think we can ignore that linux has 3 per-thread + * descriptors and use just the 1st one. The tls_array[] is used only in + * set/get-thread_area() syscalls and for loading the GDT descriptors. In fbsd + * we use just one GDT descriptor for TLS so we will load just one. + * XXX: this doesnt work when user-space process tries to use more then 1 TLS segment + * comment in the linux sources says wine might do that. */ - return (ENOSYS); + + /* we support just GLIBC TLS now + * we should let 3 proceed as well because we use this segment so + * if code does two subsequent calls it should succeed + */ + if (idx != 6 && idx != -1 && idx != 3) + return (EINVAL); + + /* we have to copy out the GDT entry we use + * FreeBSD uses GDT entry #3 for storing %gs so load that + * XXX: what if userspace program doesnt check this value and tries + * to use 6, 7 or 8? + */ + idx = info.entry_number = 3; + error = copyout(&info, args->desc, sizeof(struct l_user_desc)); + if (error) + return (error); + + if (LDT_empty(&info)) { + a[0] = 0; + a[1] = 0; + } else { + a[0] = LDT_entry_a(&info); + a[1] = LDT_entry_b(&info); + } + + memcpy(&sd, &a, sizeof(a)); +#ifdef DEBUG + if (ldebug(set_thread_area)) + printf("Segment created in set_thread_area: lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, type: %i, dpl: %i, p: %i, xx: %i, def32: %i, gran: %i\n", sd.sd_lobase, + sd.sd_hibase, + sd.sd_lolimit, + sd.sd_hilimit, + sd.sd_type, + sd.sd_dpl, + sd.sd_p, + sd.sd_xx, + sd.sd_def32, + sd.sd_gran); +#endif + + /* this is taken from i386 version of cpu_set_user_tls() */ + critical_enter(); + /* set %gs */ + td->td_pcb->pcb_gsd = sd; + PCPU_GET(fsgs_gdt)[1] = sd; + load_gs(GSEL(GUGS_SEL, SEL_UPL)); + critical_exit(); + + return (0); } int -linux_gettid(struct thread *td, struct linux_gettid_args *args) +linux_get_thread_area(struct thread *td, struct linux_get_thread_area_args *args) { + + struct l_user_desc info; + int error; + int idx; + struct l_desc_struct desc; + struct segment_descriptor sd; + +#ifdef DEBUG + if (ldebug(get_thread_area)) + printf(ARGS(get_thread_area, "%p"), args->desc); +#endif + + error = copyin(args->desc, &info, sizeof(struct l_user_desc)); + if (error) + return (error); + + idx = info.entry_number; + /* XXX: I am not sure if we want 3 to be allowed too. */ + if (idx != 6 && idx != 3) + return (EINVAL); + + idx = 3; + + memset(&info, 0, sizeof(info)); + + sd = PCPU_GET(fsgs_gdt)[1]; + + memcpy(&desc, &sd, sizeof(desc)); + + info.entry_number = idx; + info.base_addr = GET_BASE(&desc); + info.limit = GET_LIMIT(&desc); + info.seg_32bit = GET_32BIT(&desc); + info.contents = GET_CONTENTS(&desc); + info.read_exec_only = !GET_WRITABLE(&desc); + info.limit_in_pages = GET_LIMIT_PAGES(&desc); + info.seg_not_present = !GET_PRESENT(&desc); + info.useable = GET_USEABLE(&desc); + + error = copyout(&info, args->desc, sizeof(struct l_user_desc)); + if (error) + return (EFAULT); - td->td_retval[0] = td->td_proc->p_pid; return (0); } +/* copied from kern/kern_time.c */ +int +linux_timer_create(struct thread *td, struct linux_timer_create_args *args) +{ + return ktimer_create(td, (struct ktimer_create_args *) args); +} + +int +linux_timer_settime(struct thread *td, struct linux_timer_settime_args *args) +{ + return ktimer_settime(td, (struct ktimer_settime_args *) args); +} + +int +linux_timer_gettime(struct thread *td, struct linux_timer_gettime_args *args) +{ + return ktimer_gettime(td, (struct ktimer_gettime_args *) args); +} + +int +linux_timer_getoverrun(struct thread *td, struct linux_timer_getoverrun_args *args) +{ + return ktimer_getoverrun(td, (struct ktimer_getoverrun_args *) args); +} + int -linux_tkill(struct thread *td, struct linux_tkill_args *args) +linux_timer_delete(struct thread *td, struct linux_timer_delete_args *args) { + return ktimer_delete(td, (struct ktimer_delete_args *) args); +} + +/* XXX: this wont work with module - convert it */ +int +linux_mq_open(struct thread *td, struct linux_mq_open_args *args) +{ +#ifdef P1003_1B_MQUEUE + return kmq_open(td, (struct kmq_open_args *) args); +#else + return (ENOSYS); +#endif +} + +int +linux_mq_unlink(struct thread *td, struct linux_mq_unlink_args *args) +{ +#ifdef P1003_1B_MQUEUE + return kmq_unlink(td, (struct kmq_unlink_args *) args); +#else + return (ENOSYS); +#endif +} - return (linux_kill(td, (struct linux_kill_args *) args)); +int +linux_mq_timedsend(struct thread *td, struct linux_mq_timedsend_args *args) +{ +#ifdef P1003_1B_MQUEUE + return kmq_timedsend(td, (struct kmq_timedsend_args *) args); +#else + return (ENOSYS); +#endif +} + +int +linux_mq_timedreceive(struct thread *td, struct linux_mq_timedreceive_args *args) +{ +#ifdef P1003_1B_MQUEUE + return kmq_timedreceive(td, (struct kmq_timedreceive_args *) args); +#else + return (ENOSYS); +#endif +} + +int +linux_mq_notify(struct thread *td, struct linux_mq_notify_args *args) +{ +#ifdef P1003_1B_MQUEUE + return kmq_notify(td, (struct kmq_notify_args *) args); +#else + return (ENOSYS); +#endif +} + +int +linux_mq_getsetattr(struct thread *td, struct linux_mq_getsetattr_args *args) +{ +#ifdef P1003_1B_MQUEUE + return kmq_setattr(td, (struct kmq_setattr_args *) args); +#else + return (ENOSYS); +#endif } diff --git a/sys/i386/linux/linux_sysvec.c b/sys/i386/linux/linux_sysvec.c index 70ebfc123e2c..e3e65e9bfc53 100644 --- a/sys/i386/linux/linux_sysvec.c +++ b/sys/i386/linux/linux_sysvec.c @@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$"); #include <sys/sysent.h> #include <sys/sysproto.h> #include <sys/vnode.h> +#include <sys/eventhandler.h> #include <vm/vm.h> #include <vm/pmap.h> @@ -105,6 +106,18 @@ static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); static void exec_linux_setregs(struct thread *td, u_long entry, u_long stack, u_long ps_strings); +extern void linux_proc_exit(void *, struct proc *, struct image_params *); +extern void linux_proc_exec(void *, struct proc *, struct image_params *); +extern void linux_schedtail(void *, struct proc *); +extern LIST_HEAD(futex_list, futex) futex_list; +extern struct sx emul_shared_lock; +extern struct sx emul_lock; +extern struct mtx futex_mtx; + +static eventhandler_tag linux_exit_tag; +static eventhandler_tag linux_schedtail_tag; +static eventhandler_tag linux_exec_tag; + /* * Linux syscalls return negative errno's, we do positive and map them * Reference: @@ -804,7 +817,7 @@ exec_linux_setregs(struct thread *td, u_long entry, struct sysentvec linux_sysvec = { LINUX_SYS_MAXSYSCALL, linux_sysent, - 0xff, + 0, LINUX_SIGTBLSZ, bsd_to_linux_signal, ELAST + 1, @@ -833,7 +846,7 @@ struct sysentvec linux_sysvec = { struct sysentvec elf_linux_sysvec = { LINUX_SYS_MAXSYSCALL, linux_sysent, - 0xff, + 0, LINUX_SIGTBLSZ, bsd_to_linux_signal, ELAST + 1, @@ -908,6 +921,16 @@ linux_elf_modevent(module_t mod, int type, void *data) linux_ioctl_register_handler(*lihp); SET_FOREACH(ldhp, linux_device_handler_set) linux_device_register_handler(*ldhp); + sx_init(&emul_lock, "emuldata lock"); + sx_init(&emul_shared_lock, "emuldata->shared lock"); + LIST_INIT(&futex_list); + mtx_init(&futex_mtx, "futex protection lock", NULL, MTX_DEF); + linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit, + NULL, 1000); + linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail, linux_schedtail, + NULL, 1000); + linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec, + NULL, 1000); if (bootverbose) printf("Linux ELF exec handler installed\n"); } else @@ -929,6 +952,12 @@ linux_elf_modevent(module_t mod, int type, void *data) linux_ioctl_unregister_handler(*lihp); SET_FOREACH(ldhp, linux_device_handler_set) linux_device_unregister_handler(*ldhp); + sx_destroy(&emul_lock); + sx_destroy(&emul_shared_lock); + mtx_destroy(&futex_mtx); + EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag); + EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag); + EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag); if (bootverbose) printf("Linux ELF exec handler removed\n"); } else |