aboutsummaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/linux/linux_dummy.c4
-rw-r--r--sys/amd64/linux/syscalls.master13
-rw-r--r--sys/amd64/linux32/linux32_dummy.c4
-rw-r--r--sys/amd64/linux32/syscalls.master13
-rw-r--r--sys/compat/linux/linux_emul.c35
-rw-r--r--sys/compat/linux/linux_emul.h3
-rw-r--r--sys/compat/linux/linux_event.c500
-rw-r--r--sys/compat/linux/linux_event.h58
-rw-r--r--sys/compat/linux/linux_util.c1
-rw-r--r--sys/compat/linux/linux_util.h1
-rw-r--r--sys/conf/files.amd641
-rw-r--r--sys/conf/files.i3861
-rw-r--r--sys/conf/files.pc981
-rw-r--r--sys/i386/linux/linux_dummy.c4
-rw-r--r--sys/i386/linux/syscalls.master13
-rw-r--r--sys/modules/linux/Makefile2
-rw-r--r--sys/modules/linux64/Makefile2
17 files changed, 625 insertions, 31 deletions
diff --git a/sys/amd64/linux/linux_dummy.c b/sys/amd64/linux/linux_dummy.c
index 4e85bec3cab9..b2c175b09033 100644
--- a/sys/amd64/linux/linux_dummy.c
+++ b/sys/amd64/linux/linux_dummy.c
@@ -69,13 +69,10 @@ DUMMY(tuxcall);
DUMMY(security);
DUMMY(set_thread_area);
DUMMY(lookup_dcookie);
-DUMMY(epoll_create);
DUMMY(epoll_ctl_old);
DUMMY(epoll_wait_old);
DUMMY(remap_file_pages);
DUMMY(semtimedop);
-DUMMY(epoll_ctl);
-DUMMY(epoll_wait);
DUMMY(mbind);
DUMMY(get_mempolicy);
DUMMY(set_mempolicy);
@@ -112,7 +109,6 @@ DUMMY(timerfd_settime);
DUMMY(timerfd_gettime);
DUMMY(signalfd4);
DUMMY(eventfd2);
-DUMMY(epoll_create1);
DUMMY(inotify_init1);
DUMMY(preadv);
DUMMY(pwritev);
diff --git a/sys/amd64/linux/syscalls.master b/sys/amd64/linux/syscalls.master
index 4bdf287cc076..b5d0bdcc6557 100644
--- a/sys/amd64/linux/syscalls.master
+++ b/sys/amd64/linux/syscalls.master
@@ -373,7 +373,7 @@
210 AUE_NULL UNIMPL linux_io_cancel
211 AUE_NULL UNIMPL linux_get_thread_area
212 AUE_NULL STD { int linux_lookup_dcookie(void); }
-213 AUE_NULL STD { int linux_epoll_create(void); }
+213 AUE_NULL STD { int linux_epoll_create(l_int size); }
214 AUE_NULL STD { int linux_epoll_ctl_old(void); }
215 AUE_NULL STD { int linux_epoll_wait_old(void); }
216 AUE_NULL STD { int linux_remap_file_pages(void); }
@@ -397,8 +397,10 @@
230 AUE_NULL STD { int linux_clock_nanosleep(clockid_t which, int flags, \
struct l_timespec *rqtp, struct l_timespec *rmtp); }
231 AUE_EXIT STD { int linux_exit_group(int error_code); }
-232 AUE_NULL STD { int linux_epoll_wait(void); }
-233 AUE_NULL STD { int linux_epoll_ctl(void); }
+232 AUE_NULL STD { int linux_epoll_wait(l_int epfd, struct epoll_event *events, \
+ l_int maxevents, l_int timeout); }
+233 AUE_NULL STD { int linux_epoll_ctl(l_int epfd, l_int op, l_int fd, \
+ struct epoll_event *event); }
234 AUE_NULL STD { int linux_tgkill(int tgid, int pid, int sig); }
235 AUE_UTIMES STD { int linux_utimes(char *fname, \
struct l_timeval *tptr); }
@@ -466,7 +468,8 @@
278 AUE_NULL STD { int linux_vmsplice(void); }
279 AUE_NULL STD { int linux_move_pages(void); }
280 AUE_NULL STD { int linux_utimensat(void); }
-281 AUE_NULL STD { int linux_epoll_pwait(void); }
+281 AUE_NULL STD { int linux_epoll_pwait(l_int epfd, struct epoll_event *events, \
+ l_int maxevents, l_int timeout, l_sigset_t *mask); }
282 AUE_NULL STD { int linux_signalfd(void); }
283 AUE_NULL STD { int linux_timerfd(void); }
284 AUE_NULL STD { int linux_eventfd(void); }
@@ -477,7 +480,7 @@
l_uintptr_t namelen, int flags); }
289 AUE_NULL STD { int linux_signalfd4(void); }
290 AUE_NULL STD { int linux_eventfd2(void); }
-291 AUE_NULL STD { int linux_epoll_create1(void); }
+291 AUE_NULL STD { int linux_epoll_create1(l_int flags); }
292 AUE_NULL STD { int linux_dup3(l_int oldfd, \
l_int newfd, l_int flags); }
293 AUE_NULL STD { int linux_pipe2(l_int *pipefds, l_int flags); }
diff --git a/sys/amd64/linux32/linux32_dummy.c b/sys/amd64/linux32/linux32_dummy.c
index 2461c557819a..0f29f57fb359 100644
--- a/sys/amd64/linux32/linux32_dummy.c
+++ b/sys/amd64/linux32/linux32_dummy.c
@@ -68,9 +68,6 @@ DUMMY(pivot_root);
DUMMY(mincore);
DUMMY(ptrace);
DUMMY(lookup_dcookie);
-DUMMY(epoll_create);
-DUMMY(epoll_ctl);
-DUMMY(epoll_wait);
DUMMY(remap_file_pages);
DUMMY(fstatfs64);
DUMMY(mbind);
@@ -120,7 +117,6 @@ DUMMY(timerfd_gettime);
/* linux 2.6.27: */
DUMMY(signalfd4);
DUMMY(eventfd2);
-DUMMY(epoll_create1);
DUMMY(inotify_init1);
/* linux 2.6.30: */
DUMMY(preadv);
diff --git a/sys/amd64/linux32/syscalls.master b/sys/amd64/linux32/syscalls.master
index f286c69588ee..0c66beb177da 100644
--- a/sys/amd64/linux32/syscalls.master
+++ b/sys/amd64/linux32/syscalls.master
@@ -430,9 +430,11 @@
251 AUE_NULL UNIMPL
252 AUE_EXIT STD { int linux_exit_group(int error_code); }
253 AUE_NULL STD { int linux_lookup_dcookie(void); }
-254 AUE_NULL STD { int linux_epoll_create(void); }
-255 AUE_NULL STD { int linux_epoll_ctl(void); }
-256 AUE_NULL STD { int linux_epoll_wait(void); }
+254 AUE_NULL STD { int linux_epoll_create(l_int size); }
+255 AUE_NULL STD { int linux_epoll_ctl(l_int epfd, l_int op, l_int fd, \
+ struct epoll_event *event); }
+256 AUE_NULL STD { int linux_epoll_wait(l_int epfd, struct epoll_event *events, \
+ l_int maxevents, l_int timeout); }
257 AUE_NULL STD { int linux_remap_file_pages(void); }
258 AUE_NULL STD { int linux_set_tid_address(int *tidptr); }
259 AUE_NULL STD { int linux_timer_create(clockid_t clock_id, \
@@ -527,7 +529,8 @@
317 AUE_NULL STD { int linux_move_pages(void); }
; linux 2.6.19:
318 AUE_NULL STD { int linux_getcpu(void); }
-319 AUE_NULL STD { int linux_epoll_pwait(void); }
+319 AUE_NULL STD { int linux_epoll_pwait(l_int epfd, struct epoll_event *events, \
+ l_int maxevents, l_int timeout, l_osigset_t *mask); }
; linux 2.6.22:
320 AUE_NULL STD { int linux_utimensat(void); }
321 AUE_NULL STD { int linux_signalfd(void); }
@@ -541,7 +544,7 @@
; linux 2.6.27:
327 AUE_NULL STD { int linux_signalfd4(void); }
328 AUE_NULL STD { int linux_eventfd2(void); }
-329 AUE_NULL STD { int linux_epoll_create1(void); }
+329 AUE_NULL STD { int linux_epoll_create1(l_int flags); }
330 AUE_NULL STD { int linux_dup3(l_int oldfd, \
l_int newfd, l_int flags); }
331 AUE_NULL STD { int linux_pipe2(l_int *pipefds, l_int flags); }
diff --git a/sys/compat/linux/linux_emul.c b/sys/compat/linux/linux_emul.c
index 20eb8d131e07..78a11766a4fd 100644
--- a/sys/compat/linux/linux_emul.c
+++ b/sys/compat/linux/linux_emul.c
@@ -42,8 +42,6 @@ __FBSDID("$FreeBSD$");
#include <sys/proc.h>
#include <sys/syscallsubr.h>
#include <sys/sysent.h>
-#include <sys/sysproto.h>
-#include <sys/unistd.h>
#include <compat/linux/linux_emul.h>
#include <compat/linux/linux_misc.h>
@@ -86,6 +84,7 @@ linux_proc_init(struct thread *td, struct thread *newtd, int flags)
{
struct linux_emuldata *em;
struct linux_pemuldata *pem;
+ struct epoll_emuldata *emd;
if (newtd != NULL) {
/* non-exec call */
@@ -93,8 +92,13 @@ linux_proc_init(struct thread *td, struct thread *newtd, int flags)
em->pdeath_signal = 0;
em->robust_futexes = NULL;
if (flags & LINUX_CLONE_THREAD) {
+ LINUX_CTR1(proc_init, "thread newtd(%d)",
+ newtd->td_tid);
+
em->em_tid = newtd->td_tid;
} else {
+ LINUX_CTR1(proc_init, "fork newtd(%d)",
+ newtd->td_proc->p_pid);
em->em_tid = newtd->td_proc->p_pid;
@@ -105,12 +109,24 @@ linux_proc_init(struct thread *td, struct thread *newtd, int flags)
newtd->td_emuldata = em;
} else {
/* exec */
+ LINUX_CTR1(proc_init, "exec newtd(%d)",
+ td->td_proc->p_pid);
/* lookup the old one */
em = em_find(td);
KASSERT(em != NULL, ("proc_init: emuldata not found in exec case.\n"));
em->em_tid = td->td_proc->p_pid;
+
+ /* epoll should be destroyed in a case of exec. */
+ pem = pem_find(td->td_proc);
+ KASSERT(pem != NULL, ("proc_exit: proc emuldata not found.\n"));
+
+ if (pem->epoll != NULL) {
+ emd = pem->epoll;
+ pem->epoll = NULL;
+ free(emd, M_EPOLL);
+ }
}
em->child_clear_tid = NULL;
@@ -121,6 +137,7 @@ void
linux_proc_exit(void *arg __unused, struct proc *p)
{
struct linux_pemuldata *pem;
+ struct epoll_emuldata *emd;
struct thread *td = curthread;
if (__predict_false(SV_CURPROC_ABI() != SV_ABI_LINUX))
@@ -133,6 +150,12 @@ linux_proc_exit(void *arg __unused, struct proc *p)
p->p_emuldata = NULL;
+ if (pem->epoll != NULL) {
+ emd = pem->epoll;
+ pem->epoll = NULL;
+ free(emd, M_EPOLL);
+ }
+
sx_destroy(&pem->pem_sx);
free(pem, M_LINUX);
}
@@ -141,6 +164,7 @@ int
linux_common_execve(struct thread *td, struct image_args *eargs)
{
struct linux_pemuldata *pem;
+ struct epoll_emuldata *emd;
struct linux_emuldata *em;
struct proc *p;
int error;
@@ -180,6 +204,12 @@ linux_common_execve(struct thread *td, struct image_args *eargs)
p->p_emuldata = NULL;
PROC_UNLOCK(p);
+ if (pem->epoll != NULL) {
+ emd = pem->epoll;
+ pem->epoll = NULL;
+ free(emd, M_EPOLL);
+ }
+
free(em, M_TEMP);
free(pem, M_LINUX);
}
@@ -197,6 +227,7 @@ linux_proc_exec(void *arg __unused, struct proc *p, struct image_params *imgp)
*/
if (__predict_false((imgp->sysent->sv_flags & SV_ABI_MASK) ==
SV_ABI_LINUX)) {
+
if (SV_PROC_ABI(p) == SV_ABI_LINUX)
linux_proc_init(td, NULL, 0);
else
diff --git a/sys/compat/linux/linux_emul.h b/sys/compat/linux/linux_emul.h
index 21787010bfbf..3558b0a71583 100644
--- a/sys/compat/linux/linux_emul.h
+++ b/sys/compat/linux/linux_emul.h
@@ -60,9 +60,12 @@ int linux_common_execve(struct thread *, struct image_args *);
/* process emuldata flags */
#define LINUX_XDEPR_REQUEUEOP 0x00000001 /* uses deprecated
futex REQUEUE op*/
+#define LINUX_XUNSUP_EPOLL 0x00000002 /* unsupported epoll events */
+
struct linux_pemuldata {
uint32_t flags; /* process emuldata flags */
struct sx pem_sx; /* lock for this struct */
+ void *epoll; /* epoll data */
};
#define LINUX_PEM_XLOCK(p) sx_xlock(&(p)->pem_sx)
diff --git a/sys/compat/linux/linux_event.c b/sys/compat/linux/linux_event.c
new file mode 100644
index 000000000000..2456c41bb554
--- /dev/null
+++ b/sys/compat/linux/linux_event.c
@@ -0,0 +1,500 @@
+/*-
+ * Copyright (c) 2007 Roman Divacky
+ * Copyright (c) 2014 Dmitry Chagin
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_compat.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/imgact.h>
+#include <sys/kernel.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/capability.h>
+#include <sys/types.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/errno.h>
+#include <sys/event.h>
+#include <sys/proc.h>
+#include <sys/sx.h>
+#include <sys/syscallsubr.h>
+#include <sys/timespec.h>
+
+#ifdef COMPAT_LINUX32
+#include <machine/../linux32/linux.h>
+#include <machine/../linux32/linux32_proto.h>
+#else
+#include <machine/../linux/linux.h>
+#include <machine/../linux/linux_proto.h>
+#endif
+
+#include <compat/linux/linux_emul.h>
+#include <compat/linux/linux_event.h>
+#include <compat/linux/linux_file.h>
+#include <compat/linux/linux_util.h>
+
+/*
+ * epoll defines 'struct epoll_event' with the field 'data' as 64 bits
+ * on all architectures. But on 32 bit architectures BSD 'struct kevent' only
+ * has 32 bit opaque pointer as 'udata' field. So we can't pass epoll supplied
+ * data verbatuim. Therefore we allocate 64-bit memory block to pass
+ * user supplied data for every file descriptor.
+ */
+
+typedef uint64_t epoll_udata_t;
+
+struct epoll_emuldata {
+ uint32_t fdc; /* epoll udata max index */
+ epoll_udata_t udata[1]; /* epoll user data vector */
+};
+
+#define EPOLL_DEF_SZ 16
+#define EPOLL_SIZE(fdn) \
+ (sizeof(struct epoll_emuldata)+(fdn) * sizeof(epoll_udata_t))
+
+struct epoll_event {
+ uint32_t events;
+ epoll_udata_t data;
+}
+#if defined(__amd64__)
+__attribute__((packed))
+#endif
+;
+
+#define LINUX_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
+
+static void epoll_fd_install(struct thread *td, int fd, epoll_udata_t udata);
+static int epoll_to_kevent(struct thread *td, struct file *epfp,
+ int fd, struct epoll_event *l_event, int *kev_flags,
+ struct kevent *kevent, int *nkevents);
+static void kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event);
+static int epoll_kev_copyout(void *arg, struct kevent *kevp, int count);
+static int epoll_kev_copyin(void *arg, struct kevent *kevp, int count);
+static int epoll_delete_event(struct thread *td, struct file *epfp,
+ int fd, int filter);
+static int epoll_delete_all_events(struct thread *td, struct file *epfp,
+ int fd);
+
+struct epoll_copyin_args {
+ struct kevent *changelist;
+};
+
+struct epoll_copyout_args {
+ struct epoll_event *leventlist;
+ struct proc *p;
+ uint32_t count;
+ int error;
+};
+
+
+static void
+epoll_fd_install(struct thread *td, int fd, epoll_udata_t udata)
+{
+ struct linux_pemuldata *pem;
+ struct epoll_emuldata *emd;
+ struct proc *p;
+
+ p = td->td_proc;
+
+ pem = pem_find(p);
+ KASSERT(pem != NULL, ("epoll proc emuldata not found.\n"));
+
+ LINUX_PEM_XLOCK(pem);
+ if (pem->epoll == NULL) {
+ emd = malloc(EPOLL_SIZE(fd), M_EPOLL, M_WAITOK);
+ emd->fdc = fd;
+ pem->epoll = emd;
+ } else {
+ emd = pem->epoll;
+ if (fd > emd->fdc) {
+ emd = realloc(emd, EPOLL_SIZE(fd), M_EPOLL, M_WAITOK);
+ emd->fdc = fd;
+ pem->epoll = emd;
+ }
+ }
+ emd->udata[fd] = udata;
+ LINUX_PEM_XUNLOCK(pem);
+}
+
+static int
+epoll_create_common(struct thread *td, int flags)
+{
+ int error;
+
+ error = kern_kqueue(td, flags);
+ if (error)
+ return (error);
+
+ epoll_fd_install(td, EPOLL_DEF_SZ, 0);
+
+ return (0);
+}
+
+int
+linux_epoll_create(struct thread *td, struct linux_epoll_create_args *args)
+{
+
+ /*
+ * args->size is unused. Linux just tests it
+ * and then forgets it as well.
+ */
+ if (args->size <= 0)
+ return (EINVAL);
+
+ return (epoll_create_common(td, 0));
+}
+
+int
+linux_epoll_create1(struct thread *td, struct linux_epoll_create1_args *args)
+{
+ int flags;
+
+ if ((args->flags & ~(LINUX_O_CLOEXEC)) != 0)
+ return (EINVAL);
+
+ flags = 0;
+ if ((args->flags & LINUX_O_CLOEXEC) != 0)
+ flags |= O_CLOEXEC;
+
+ return (epoll_create_common(td, flags));
+}
+
+/* Structure converting function from epoll to kevent. */
+static int
+epoll_to_kevent(struct thread *td, struct file *epfp,
+ int fd, struct epoll_event *l_event, int *kev_flags,
+ struct kevent *kevent, int *nkevents)
+{
+ uint32_t levents = l_event->events;
+ struct linux_pemuldata *pem;
+ struct proc *p;
+
+ /* flags related to how event is registered */
+ if ((levents & LINUX_EPOLLONESHOT) != 0)
+ *kev_flags |= EV_ONESHOT;
+ if ((levents & LINUX_EPOLLET) != 0)
+ *kev_flags |= EV_CLEAR;
+
+ /* flags related to what event is registered */
+ if ((levents & LINUX_EPOLL_EVRD) != 0) {
+ EV_SET(kevent++, fd, EVFILT_READ, *kev_flags, 0, 0, 0);
+ ++(*nkevents);
+ }
+ if ((levents & LINUX_EPOLL_EVWR) != 0) {
+ EV_SET(kevent++, fd, EVFILT_WRITE, *kev_flags, 0, 0, 0);
+ ++(*nkevents);
+ }
+
+ if ((levents & ~(LINUX_EPOLL_EVSUP)) != 0) {
+ p = td->td_proc;
+
+ pem = pem_find(p);
+ KASSERT(pem != NULL, ("epoll proc emuldata not found.\n"));
+ KASSERT(pem->epoll != NULL, ("epoll proc epolldata not found.\n"));
+
+ LINUX_PEM_XLOCK(pem);
+ if ((pem->flags & LINUX_XUNSUP_EPOLL) == 0) {
+ pem->flags |= LINUX_XUNSUP_EPOLL;
+ LINUX_PEM_XUNLOCK(pem);
+ linux_msg(td, "epoll_ctl unsupported flags: 0x%x\n",
+ levents);
+ } else
+ LINUX_PEM_XUNLOCK(pem);
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
+/*
+ * Structure converting function from kevent to epoll. In a case
+ * this is called on error in registration we store the error in
+ * event->data and pick it up later in linux_epoll_ctl().
+ */
+static void
+kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event)
+{
+
+ if ((kevent->flags & EV_ERROR) != 0)
+ return;
+
+ switch (kevent->filter) {
+ case EVFILT_READ:
+ l_event->events = LINUX_EPOLLIN|LINUX_EPOLLRDNORM|LINUX_EPOLLPRI;
+ break;
+ case EVFILT_WRITE:
+ l_event->events = LINUX_EPOLLOUT|LINUX_EPOLLWRNORM;
+ break;
+ }
+}
+
+/*
+ * Copyout callback used by kevent. This converts kevent
+ * events to epoll events and copies them back to the
+ * userspace. This is also called on error on registering
+ * of the filter.
+ */
+static int
+epoll_kev_copyout(void *arg, struct kevent *kevp, int count)
+{
+ struct epoll_copyout_args *args;
+ struct linux_pemuldata *pem;
+ struct epoll_emuldata *emd;
+ struct epoll_event *eep;
+ int error, fd, i;
+
+ args = (struct epoll_copyout_args*) arg;
+ eep = malloc(sizeof(*eep) * count, M_EPOLL, M_WAITOK | M_ZERO);
+
+ pem = pem_find(args->p);
+ KASSERT(pem != NULL, ("epoll proc emuldata not found.\n"));
+ LINUX_PEM_SLOCK(pem);
+ emd = pem->epoll;
+ KASSERT(emd != NULL, ("epoll proc epolldata not found.\n"));
+
+ for (i = 0; i < count; i++) {
+ kevent_to_epoll(&kevp[i], &eep[i]);
+
+ fd = kevp[i].ident;
+ KASSERT(fd <= emd->fdc, ("epoll user data vector"
+ " is too small.\n"));
+ eep[i].data = emd->udata[fd];
+ }
+ LINUX_PEM_SUNLOCK(pem);
+
+ error = copyout(eep, args->leventlist, count * sizeof(*eep));
+ if (error == 0) {
+ args->leventlist += count;
+ args->count += count;
+ } else if (args->error == 0)
+ args->error = error;
+
+ free(eep, M_EPOLL);
+ return (error);
+}
+
+/*
+ * Copyin callback used by kevent. This copies already
+ * converted filters from kernel memory to the kevent
+ * internal kernel memory. Hence the memcpy instead of
+ * copyin.
+ */
+static int
+epoll_kev_copyin(void *arg, struct kevent *kevp, int count)
+{
+ struct epoll_copyin_args *args;
+
+ args = (struct epoll_copyin_args*) arg;
+
+ memcpy(kevp, args->changelist, count * sizeof(*kevp));
+ args->changelist += count;
+
+ return (0);
+}
+
+/*
+ * Load epoll filter, convert it to kevent filter
+ * and load it into kevent subsystem.
+ */
+int
+linux_epoll_ctl(struct thread *td, struct linux_epoll_ctl_args *args)
+{
+ struct file *epfp, *fp;
+ struct epoll_copyin_args ciargs;
+ struct kevent kev[2];
+ struct kevent_copyops k_ops = { &ciargs,
+ NULL,
+ epoll_kev_copyin};
+ struct epoll_event le;
+ cap_rights_t rights;
+ int kev_flags;
+ int nchanges = 0;
+ int error;
+
+ if (args->op != LINUX_EPOLL_CTL_DEL) {
+ error = copyin(args->event, &le, sizeof(le));
+ if (error != 0)
+ return (error);
+ }
+
+ error = fget(td, args->epfd,
+ cap_rights_init(&rights, CAP_KQUEUE_CHANGE), &epfp);
+ if (error != 0)
+ return (error);
+ if (epfp->f_type != DTYPE_KQUEUE)
+ goto leave1;
+
+ /* Protect user data vector from incorrectly supplied fd. */
+ error = fget(td, args->fd, cap_rights_init(&rights, CAP_POLL_EVENT), &fp);
+ if (error != 0)
+ goto leave1;
+
+ /* Linux disallows spying on himself */
+ if (epfp == fp) {
+ error = EINVAL;
+ goto leave0;
+ }
+
+ ciargs.changelist = kev;
+
+ switch (args->op) {
+ case LINUX_EPOLL_CTL_MOD:
+ /*
+ * We don't memorize which events were set for this FD
+ * on this level, so just delete all we could have set:
+ * EVFILT_READ and EVFILT_WRITE, ignoring any errors
+ */
+ error = epoll_delete_all_events(td, epfp, args->fd);
+ if (error)
+ goto leave0;
+ /* FALLTHROUGH */
+
+ case LINUX_EPOLL_CTL_ADD:
+ kev_flags = EV_ADD | EV_ENABLE;
+ break;
+
+ case LINUX_EPOLL_CTL_DEL:
+ /* CTL_DEL means unregister this fd with this epoll */
+ error = epoll_delete_all_events(td, epfp, args->fd);
+ goto leave0;
+
+ default:
+ error = EINVAL;
+ goto leave0;
+ }
+
+ error = epoll_to_kevent(td, epfp, args->fd, &le, &kev_flags,
+ kev, &nchanges);
+ if (error)
+ goto leave0;
+
+ epoll_fd_install(td, args->fd, le.data);
+
+ error = kern_kevent_fp(td, epfp, nchanges, 0, &k_ops, NULL);
+
+leave0:
+ fdrop(fp, td);
+
+leave1:
+ fdrop(epfp, td);
+ return (error);
+}
+
+/*
+ * Wait for a filter to be triggered on the epoll file descriptor.
+ */
+int
+linux_epoll_wait(struct thread *td, struct linux_epoll_wait_args *args)
+{
+ struct file *epfp;
+ struct timespec ts, *tsp;
+ cap_rights_t rights;
+ struct epoll_copyout_args coargs;
+ struct kevent_copyops k_ops = { &coargs,
+ epoll_kev_copyout,
+ NULL};
+ int error;
+
+ if (args->maxevents <= 0 || args->maxevents > LINUX_MAX_EVENTS)
+ return (EINVAL);
+
+ error = fget(td, args->epfd,
+ cap_rights_init(&rights, CAP_KQUEUE_EVENT), &epfp);
+ if (error != 0)
+ return (error);
+
+ coargs.leventlist = args->events;
+ coargs.p = td->td_proc;
+ coargs.count = 0;
+ coargs.error = 0;
+
+ if (args->timeout != -1) {
+ if (args->timeout < 0) {
+ error = EINVAL;
+ goto leave;
+ }
+ /* Convert from milliseconds to timespec. */
+ ts.tv_sec = args->timeout / 1000;
+ ts.tv_nsec = (args->timeout % 1000) * 1000000;
+ tsp = &ts;
+ } else {
+ tsp = NULL;
+ }
+
+ error = kern_kevent_fp(td, epfp, 0, args->maxevents, &k_ops, tsp);
+ if (error == 0 && coargs.error != 0)
+ error = coargs.error;
+
+ /*
+ * kern_kevent might return ENOMEM which is not expected from epoll_wait.
+ * Maybe we should translate that but I don't think it matters at all.
+ */
+ if (error == 0)
+ td->td_retval[0] = coargs.count;
+leave:
+ fdrop(epfp, td);
+ return (error);
+}
+
+static int
+epoll_delete_event(struct thread *td, struct file *epfp, int fd, int filter)
+{
+ struct epoll_copyin_args ciargs;
+ struct kevent kev;
+ struct kevent_copyops k_ops = { &ciargs,
+ NULL,
+ epoll_kev_copyin};
+ int error;
+
+ ciargs.changelist = &kev;
+ EV_SET(&kev, fd, filter, EV_DELETE | EV_DISABLE, 0, 0, 0);
+
+ error = kern_kevent_fp(td, epfp, 1, 0, &k_ops, NULL);
+
+ /*
+ * here we ignore ENONT, because we don't keep track of events here
+ */
+ if (error == ENOENT)
+ error = 0;
+ return (error);
+}
+
+static int
+epoll_delete_all_events(struct thread *td, struct file *epfp, int fd)
+{
+ int error1, error2;
+
+ error1 = epoll_delete_event(td, epfp, fd, EVFILT_READ);
+ error2 = epoll_delete_event(td, epfp, fd, EVFILT_WRITE);
+
+ /* report any errors we got */
+ return (error1 == 0 ? error2 : error1);
+}
diff --git a/sys/compat/linux/linux_event.h b/sys/compat/linux/linux_event.h
new file mode 100644
index 000000000000..f4bb803efaae
--- /dev/null
+++ b/sys/compat/linux/linux_event.h
@@ -0,0 +1,58 @@
+/*-
+ * Copyright (c) 2007 Roman Divacky
+ * Copyright (c) 2014 Dmitry Chagin
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _LINUX_EVENT_H_
+#define _LINUX_EVENT_H_
+
+#define LINUX_EPOLLIN 0x001
+#define LINUX_EPOLLPRI 0x002
+#define LINUX_EPOLLOUT 0x004
+#define LINUX_EPOLLRDNORM 0x040
+#define LINUX_EPOLLRDBAND 0x080
+#define LINUX_EPOLLWRNORM 0x100
+#define LINUX_EPOLLWRBAND 0x200
+#define LINUX_EPOLLMSG 0x400
+#define LINUX_EPOLLERR 0x008
+#define LINUX_EPOLLHUP 0x010
+#define LINUX_EPOLLRDHUP 0x2000
+#define LINUX_EPOLLWAKEUP 1u<<29
+#define LINUX_EPOLLONESHOT 1u<<30
+#define LINUX_EPOLLET 1u<<31
+
+#define LINUX_EPOLL_EVRD (LINUX_EPOLLIN|LINUX_EPOLLRDNORM \
+ |LINUX_EPOLLHUP|LINUX_EPOLLPRI)
+#define LINUX_EPOLL_EVWR (LINUX_EPOLLOUT|LINUX_EPOLLWRNORM)
+#define LINUX_EPOLL_EVSUP (LINUX_EPOLLET|LINUX_EPOLLONESHOT \
+ |LINUX_EPOLL_EVRD|LINUX_EPOLL_EVWR)
+
+#define LINUX_EPOLL_CTL_ADD 1
+#define LINUX_EPOLL_CTL_DEL 2
+#define LINUX_EPOLL_CTL_MOD 3
+
+#endif /* !_LINUX_EVENT_H_ */
diff --git a/sys/compat/linux/linux_util.c b/sys/compat/linux/linux_util.c
index 245717f888e8..466c588157c5 100644
--- a/sys/compat/linux/linux_util.c
+++ b/sys/compat/linux/linux_util.c
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <compat/linux/linux_util.h>
MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
+MALLOC_DEFINE(M_EPOLL, "lepoll", "Linux events structures");
MALLOC_DEFINE(M_FUTEX, "futex", "Linux futexes");
MALLOC_DEFINE(M_FUTEX_WP, "futex wp", "Linux futex waiting proc");
diff --git a/sys/compat/linux/linux_util.h b/sys/compat/linux/linux_util.h
index a42335a9017f..55e0eba26ab8 100644
--- a/sys/compat/linux/linux_util.h
+++ b/sys/compat/linux/linux_util.h
@@ -45,6 +45,7 @@
#include <sys/uio.h>
MALLOC_DECLARE(M_LINUX);
+MALLOC_DECLARE(M_EPOLL);
MALLOC_DECLARE(M_FUTEX);
MALLOC_DECLARE(M_FUTEX_WP);
diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64
index 9d40d586b0ef..f76c895b14e5 100644
--- a/sys/conf/files.amd64
+++ b/sys/conf/files.amd64
@@ -509,6 +509,7 @@ compat/linux/linux_uid16.c optional compat_linux32
compat/linux/linux_util.c optional compat_linux32
compat/linux/linux_vdso.c optional compat_linux32
compat/linux/linux_common.c optional compat_linux32
+compat/linux/linux_event.c optional compat_linux32
dev/amr/amr_linux.c optional compat_linux32 amr
dev/mfi/mfi_linux.c optional compat_linux32 mfi
#
diff --git a/sys/conf/files.i386 b/sys/conf/files.i386
index 9dffbdb6239e..360435557b97 100644
--- a/sys/conf/files.i386
+++ b/sys/conf/files.i386
@@ -81,6 +81,7 @@ hptrr_lib.o optional hptrr \
cddl/contrib/opensolaris/common/atomic/i386/opensolaris_atomic.S optional zfs compile-with "${ZFS_S}"
compat/linprocfs/linprocfs.c optional linprocfs
compat/linsysfs/linsysfs.c optional linsysfs
+compat/linux/linux_event.c optional compat_linux
compat/linux/linux_emul.c optional compat_linux
compat/linux/linux_file.c optional compat_linux
compat/linux/linux_fork.c optional compat_linux
diff --git a/sys/conf/files.pc98 b/sys/conf/files.pc98
index ae165fcd3b85..6c60546647cb 100644
--- a/sys/conf/files.pc98
+++ b/sys/conf/files.pc98
@@ -41,6 +41,7 @@ ukbdmap.h optional ukbd_dflt_keymap \
cddl/contrib/opensolaris/common/atomic/i386/opensolaris_atomic.S optional zfs compile-with "${ZFS_S}"
compat/linprocfs/linprocfs.c optional linprocfs
compat/linsysfs/linsysfs.c optional linsysfs
+compat/linux/linux_event.c optional compat_linux
compat/linux/linux_emul.c optional compat_linux
compat/linux/linux_file.c optional compat_linux
compat/linux/linux_fork.c optional compat_linux
diff --git a/sys/i386/linux/linux_dummy.c b/sys/i386/linux/linux_dummy.c
index 874062b919f9..70c55bd14087 100644
--- a/sys/i386/linux/linux_dummy.c
+++ b/sys/i386/linux/linux_dummy.c
@@ -70,9 +70,6 @@ DUMMY(setfsgid);
DUMMY(pivot_root);
DUMMY(mincore);
DUMMY(lookup_dcookie);
-DUMMY(epoll_create);
-DUMMY(epoll_ctl);
-DUMMY(epoll_wait);
DUMMY(remap_file_pages);
DUMMY(fstatfs64);
DUMMY(mbind);
@@ -116,7 +113,6 @@ DUMMY(timerfd_gettime);
/* linux 2.6.27: */
DUMMY(signalfd4);
DUMMY(eventfd2);
-DUMMY(epoll_create1);
DUMMY(inotify_init1);
/* linux 2.6.30: */
DUMMY(preadv);
diff --git a/sys/i386/linux/syscalls.master b/sys/i386/linux/syscalls.master
index 947791033603..ff72a48ea4b9 100644
--- a/sys/i386/linux/syscalls.master
+++ b/sys/i386/linux/syscalls.master
@@ -432,9 +432,11 @@
251 AUE_NULL UNIMPL
252 AUE_EXIT STD { int linux_exit_group(int error_code); }
253 AUE_NULL STD { int linux_lookup_dcookie(void); }
-254 AUE_NULL STD { int linux_epoll_create(void); }
-255 AUE_NULL STD { int linux_epoll_ctl(void); }
-256 AUE_NULL STD { int linux_epoll_wait(void); }
+254 AUE_NULL STD { int linux_epoll_create(l_int size); }
+255 AUE_NULL STD { int linux_epoll_ctl(l_int epfd, l_int op, l_int fd, \
+ struct epoll_event *event); }
+256 AUE_NULL STD { int linux_epoll_wait(l_int epfd, struct epoll_event *events, \
+ l_int maxevents, l_int timeout); }
257 AUE_NULL STD { int linux_remap_file_pages(void); }
258 AUE_NULL STD { int linux_set_tid_address(int *tidptr); }
259 AUE_NULL STD { int linux_timer_create(clockid_t clock_id, \
@@ -535,7 +537,8 @@
317 AUE_NULL STD { int linux_move_pages(void); }
; linux 2.6.19:
318 AUE_NULL STD { int linux_getcpu(void); }
-319 AUE_NULL STD { int linux_epoll_pwait(void); }
+319 AUE_NULL STD { int linux_epoll_pwait(l_int epfd, struct epoll_event *events, \
+ l_int maxevents, l_int timeout, l_osigset_t *mask); }
; linux 2.6.22:
320 AUE_NULL STD { int linux_utimensat(void); }
321 AUE_NULL STD { int linux_signalfd(void); }
@@ -549,7 +552,7 @@
; linux 2.6.27:
327 AUE_NULL STD { int linux_signalfd4(void); }
328 AUE_NULL STD { int linux_eventfd2(void); }
-329 AUE_NULL STD { int linux_epoll_create1(void); }
+329 AUE_NULL STD { int linux_epoll_create1(l_int flags); }
330 AUE_NULL STD { int linux_dup3(l_int oldfd, \
l_int newfd, l_int flags); }
331 AUE_NULL STD { int linux_pipe2(l_int *pipefds, l_int flags); }
diff --git a/sys/modules/linux/Makefile b/sys/modules/linux/Makefile
index 96fa94dc47b1..415d1ea01e1f 100644
--- a/sys/modules/linux/Makefile
+++ b/sys/modules/linux/Makefile
@@ -10,7 +10,7 @@ CFLAGS+=-DCOMPAT_FREEBSD32 -DCOMPAT_LINUX32
VDSO= linux${SFX}_vdso
KMOD= linux
-SRCS= linux_fork.c linux${SFX}_dummy.c linux_file.c \
+SRCS= linux_fork.c linux${SFX}_dummy.c linux_file.c linux_event.c \
linux_futex.c linux_getcwd.c linux_ioctl.c linux_ipc.c \
linux${SFX}_machdep.c linux_misc.c linux_signal.c \
linux_socket.c linux_stats.c linux_sysctl.c linux${SFX}_sysent.c \
diff --git a/sys/modules/linux64/Makefile b/sys/modules/linux64/Makefile
index 40ae87387446..fe86d5313f37 100644
--- a/sys/modules/linux64/Makefile
+++ b/sys/modules/linux64/Makefile
@@ -5,7 +5,7 @@
VDSO= linux_vdso
KMOD= linux64
-SRCS= linux_fork.c linux_dummy.c linux_file.c \
+SRCS= linux_fork.c linux_dummy.c linux_file.c linux_event.c \
linux_futex.c linux_getcwd.c linux_ioctl.c linux_ipc.c \
linux_machdep.c linux_misc.c linux_signal.c \
linux_socket.c linux_stats.c linux_sysctl.c linux_sysent.c \