aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDmitry Chagin <dchagin@FreeBSD.org>2023-08-04 13:03:57 +0000
committerDmitry Chagin <dchagin@FreeBSD.org>2023-08-04 13:03:57 +0000
commitbbe017e0415a60114036f2315605ff6881ed9c46 (patch)
tree699b3d73dc14524316f4dab8e64c0152c1a52fbd
parentb5c0b9555d1533506e6cb4fe88060134c3ec577f (diff)
downloadsrc-bbe017e0415a.tar.gz
src-bbe017e0415a.zip
linux(4): Add a dedicated ioprio system calls
On Linux these system calls have an effect only when used in conjuction with an I/O scheduler that supports I/O priorities. If no I/O scheduler has been set for a thread, then by defaut the I/O priority will follow the CPU nice value. Due to FreeBSD lack of I/O scheduler facilities, the default Linux behavior is implemented. Ubuntu 23.04 debootstrap requires Linux ionice which depends on these syscalls. Differential Revision: https://reviews.freebsd.org/D41153 MFC after: 1 month
-rw-r--r--sys/compat/linux/linux_dummy.c2
-rw-r--r--sys/compat/linux/linux_misc.c270
-rw-r--r--sys/compat/linux/linux_misc.h24
3 files changed, 294 insertions, 2 deletions
diff --git a/sys/compat/linux/linux_dummy.c b/sys/compat/linux/linux_dummy.c
index 861482b67d8f..5556652b31a1 100644
--- a/sys/compat/linux/linux_dummy.c
+++ b/sys/compat/linux/linux_dummy.c
@@ -78,8 +78,6 @@ DUMMY(add_key);
DUMMY(request_key);
DUMMY(keyctl);
/* Linux 2.6.13: */
-DUMMY(ioprio_set);
-DUMMY(ioprio_get);
DUMMY(inotify_add_watch);
DUMMY(inotify_rm_watch);
/* Linux 2.6.16: */
diff --git a/sys/compat/linux/linux_misc.c b/sys/compat/linux/linux_misc.c
index 583cc25f1c43..0cea43001a0d 100644
--- a/sys/compat/linux/linux_misc.c
+++ b/sys/compat/linux/linux_misc.c
@@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$");
#include <sys/reboot.h>
#include <sys/random.h>
#include <sys/resourcevar.h>
+#include <sys/rtprio.h>
#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/stat.h>
@@ -2653,3 +2654,272 @@ linux_execve(struct thread *td, struct linux_execve_args *args)
AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td);
return (error);
}
+
+static void
+linux_up_rtprio_if(struct thread *td1, struct rtprio *rtp)
+{
+ struct rtprio rtp2;
+
+ pri_to_rtp(td1, &rtp2);
+ if (rtp2.type < rtp->type ||
+ (rtp2.type == rtp->type &&
+ rtp2.prio < rtp->prio)) {
+ rtp->type = rtp2.type;
+ rtp->prio = rtp2.prio;
+ }
+}
+
+#define LINUX_PRIO_DIVIDER RTP_PRIO_MAX / LINUX_IOPRIO_MAX
+
+static int
+linux_rtprio2ioprio(struct rtprio *rtp)
+{
+ int ioprio, prio;
+
+ switch (rtp->type) {
+ case RTP_PRIO_IDLE:
+ prio = RTP_PRIO_MIN;
+ ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_IDLE, prio);
+ break;
+ case RTP_PRIO_NORMAL:
+ prio = rtp->prio / LINUX_PRIO_DIVIDER;
+ ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_BE, prio);
+ break;
+ case RTP_PRIO_REALTIME:
+ prio = rtp->prio / LINUX_PRIO_DIVIDER;
+ ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_RT, prio);
+ break;
+ default:
+ prio = RTP_PRIO_MIN;
+ ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_NONE, prio);
+ break;
+ }
+ return (ioprio);
+}
+
+static int
+linux_ioprio2rtprio(int ioprio, struct rtprio *rtp)
+{
+
+ switch (LINUX_IOPRIO_PRIO_CLASS(ioprio)) {
+ case LINUX_IOPRIO_CLASS_IDLE:
+ rtp->prio = RTP_PRIO_MIN;
+ rtp->type = RTP_PRIO_IDLE;
+ break;
+ case LINUX_IOPRIO_CLASS_BE:
+ rtp->prio = LINUX_IOPRIO_PRIO_DATA(ioprio) * LINUX_PRIO_DIVIDER;
+ rtp->type = RTP_PRIO_NORMAL;
+ break;
+ case LINUX_IOPRIO_CLASS_RT:
+ rtp->prio = LINUX_IOPRIO_PRIO_DATA(ioprio) * LINUX_PRIO_DIVIDER;
+ rtp->type = RTP_PRIO_REALTIME;
+ break;
+ default:
+ return (EINVAL);
+ }
+ return (0);
+}
+#undef LINUX_PRIO_DIVIDER
+
+int
+linux_ioprio_get(struct thread *td, struct linux_ioprio_get_args *args)
+{
+ struct thread *td1;
+ struct rtprio rtp;
+ struct pgrp *pg;
+ struct proc *p;
+ int error, found;
+
+ p = NULL;
+ td1 = NULL;
+ error = 0;
+ found = 0;
+ rtp.type = RTP_PRIO_IDLE;
+ rtp.prio = RTP_PRIO_MAX;
+ switch (args->which) {
+ case LINUX_IOPRIO_WHO_PROCESS:
+ if (args->who == 0) {
+ td1 = td;
+ p = td1->td_proc;
+ PROC_LOCK(p);
+ } else if (args->who > PID_MAX) {
+ td1 = linux_tdfind(td, args->who, -1);
+ if (td1 != NULL)
+ p = td1->td_proc;
+ } else
+ p = pfind(args->who);
+ if (p == NULL)
+ return (ESRCH);
+ if ((error = p_cansee(td, p))) {
+ PROC_UNLOCK(p);
+ break;
+ }
+ if (td1 != NULL) {
+ pri_to_rtp(td1, &rtp);
+ } else {
+ FOREACH_THREAD_IN_PROC(p, td1) {
+ linux_up_rtprio_if(td1, &rtp);
+ }
+ }
+ found++;
+ PROC_UNLOCK(p);
+ break;
+ case LINUX_IOPRIO_WHO_PGRP:
+ sx_slock(&proctree_lock);
+ if (args->who == 0) {
+ pg = td->td_proc->p_pgrp;
+ PGRP_LOCK(pg);
+ } else {
+ pg = pgfind(args->who);
+ if (pg == NULL) {
+ sx_sunlock(&proctree_lock);
+ error = ESRCH;
+ break;
+ }
+ }
+ sx_sunlock(&proctree_lock);
+ LIST_FOREACH(p, &pg->pg_members, p_pglist) {
+ PROC_LOCK(p);
+ if (p->p_state == PRS_NORMAL &&
+ p_cansee(td, p) == 0) {
+ FOREACH_THREAD_IN_PROC(p, td1) {
+ linux_up_rtprio_if(td1, &rtp);
+ found++;
+ }
+ }
+ PROC_UNLOCK(p);
+ }
+ PGRP_UNLOCK(pg);
+ break;
+ case LINUX_IOPRIO_WHO_USER:
+ if (args->who == 0)
+ args->who = td->td_ucred->cr_uid;
+ sx_slock(&allproc_lock);
+ FOREACH_PROC_IN_SYSTEM(p) {
+ PROC_LOCK(p);
+ if (p->p_state == PRS_NORMAL &&
+ p->p_ucred->cr_uid == args->who &&
+ p_cansee(td, p) == 0) {
+ FOREACH_THREAD_IN_PROC(p, td1) {
+ linux_up_rtprio_if(td1, &rtp);
+ found++;
+ }
+ }
+ PROC_UNLOCK(p);
+ }
+ sx_sunlock(&allproc_lock);
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+ if (error == 0) {
+ if (found != 0)
+ td->td_retval[0] = linux_rtprio2ioprio(&rtp);
+ else
+ error = ESRCH;
+ }
+ return (error);
+}
+
+int
+linux_ioprio_set(struct thread *td, struct linux_ioprio_set_args *args)
+{
+ struct thread *td1;
+ struct rtprio rtp;
+ struct pgrp *pg;
+ struct proc *p;
+ int error;
+
+ if ((error = linux_ioprio2rtprio(args->ioprio, &rtp)) != 0)
+ return (error);
+ /* Attempts to set high priorities (REALTIME) require su privileges. */
+ if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME &&
+ (error = priv_check(td, PRIV_SCHED_RTPRIO)) != 0)
+ return (error);
+
+ p = NULL;
+ td1 = NULL;
+ switch (args->which) {
+ case LINUX_IOPRIO_WHO_PROCESS:
+ if (args->who == 0) {
+ td1 = td;
+ p = td1->td_proc;
+ PROC_LOCK(p);
+ } else if (args->who > PID_MAX) {
+ td1 = linux_tdfind(td, args->who, -1);
+ if (td1 != NULL)
+ p = td1->td_proc;
+ } else
+ p = pfind(args->who);
+ if (p == NULL)
+ return (ESRCH);
+ if ((error = p_cansched(td, p))) {
+ PROC_UNLOCK(p);
+ break;
+ }
+ if (td1 != NULL) {
+ error = rtp_to_pri(&rtp, td1);
+ } else {
+ FOREACH_THREAD_IN_PROC(p, td1) {
+ if ((error = rtp_to_pri(&rtp, td1)) != 0)
+ break;
+ }
+ }
+ PROC_UNLOCK(p);
+ break;
+ case LINUX_IOPRIO_WHO_PGRP:
+ sx_slock(&proctree_lock);
+ if (args->who == 0) {
+ pg = td->td_proc->p_pgrp;
+ PGRP_LOCK(pg);
+ } else {
+ pg = pgfind(args->who);
+ if (pg == NULL) {
+ sx_sunlock(&proctree_lock);
+ error = ESRCH;
+ break;
+ }
+ }
+ sx_sunlock(&proctree_lock);
+ LIST_FOREACH(p, &pg->pg_members, p_pglist) {
+ PROC_LOCK(p);
+ if (p->p_state == PRS_NORMAL &&
+ p_cansched(td, p) == 0) {
+ FOREACH_THREAD_IN_PROC(p, td1) {
+ if ((error = rtp_to_pri(&rtp, td1)) != 0)
+ break;
+ }
+ }
+ PROC_UNLOCK(p);
+ if (error != 0)
+ break;
+ }
+ PGRP_UNLOCK(pg);
+ break;
+ case LINUX_IOPRIO_WHO_USER:
+ if (args->who == 0)
+ args->who = td->td_ucred->cr_uid;
+ sx_slock(&allproc_lock);
+ FOREACH_PROC_IN_SYSTEM(p) {
+ PROC_LOCK(p);
+ if (p->p_state == PRS_NORMAL &&
+ p->p_ucred->cr_uid == args->who &&
+ p_cansched(td, p) == 0) {
+ FOREACH_THREAD_IN_PROC(p, td1) {
+ if ((error = rtp_to_pri(&rtp, td1)) != 0)
+ break;
+ }
+ }
+ PROC_UNLOCK(p);
+ if (error != 0)
+ break;
+ }
+ sx_sunlock(&allproc_lock);
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+ return (error);
+}
diff --git a/sys/compat/linux/linux_misc.h b/sys/compat/linux/linux_misc.h
index 80f12ef13545..92cc7be636d2 100644
--- a/sys/compat/linux/linux_misc.h
+++ b/sys/compat/linux/linux_misc.h
@@ -191,4 +191,28 @@ struct syscall_info {
};
};
+/* Linux ioprio set/get syscalls */
+#define LINUX_IOPRIO_CLASS_SHIFT 13
+#define LINUX_IOPRIO_CLASS_MASK 0x07
+#define LINUX_IOPRIO_PRIO_MASK ((1UL << LINUX_IOPRIO_CLASS_SHIFT) - 1)
+
+#define LINUX_IOPRIO_PRIO_CLASS(ioprio) \
+ (((ioprio) >> LINUX_IOPRIO_CLASS_SHIFT) & LINUX_IOPRIO_CLASS_MASK)
+#define LINUX_IOPRIO_PRIO_DATA(ioprio) ((ioprio) & LINUX_IOPRIO_PRIO_MASK)
+#define LINUX_IOPRIO_PRIO(class, data) \
+ ((((class) & LINUX_IOPRIO_CLASS_MASK) << LINUX_IOPRIO_CLASS_SHIFT) | \
+ ((data) & LINUX_IOPRIO_PRIO_MASK))
+
+#define LINUX_IOPRIO_CLASS_NONE 0
+#define LINUX_IOPRIO_CLASS_RT 1
+#define LINUX_IOPRIO_CLASS_BE 2
+#define LINUX_IOPRIO_CLASS_IDLE 3
+
+#define LINUX_IOPRIO_MIN 0
+#define LINUX_IOPRIO_MAX 7
+
+#define LINUX_IOPRIO_WHO_PROCESS 1
+#define LINUX_IOPRIO_WHO_PGRP 2
+#define LINUX_IOPRIO_WHO_USER 3
+
#endif /* _LINUX_MISC_H_ */