aboutsummaryrefslogtreecommitdiff
path: root/sys/kern
diff options
context:
space:
mode:
authorJulian Elischer <julian@FreeBSD.org>2002-06-29 17:26:22 +0000
committerJulian Elischer <julian@FreeBSD.org>2002-06-29 17:26:22 +0000
commite602ba25fd1f9a7ea2215c01f470c08f140de809 (patch)
tree0a0483a267784fa8e2bf86857d8727edb5b122e9 /sys/kern
parentcc5dcb202cd7616bae9321687ec46a384a061d99 (diff)
downloadsrc-e602ba25fd1f9a7ea2215c01f470c08f140de809.tar.gz
src-e602ba25fd1f9a7ea2215c01f470c08f140de809.zip
Part 1 of KSE-III
The ability to schedule multiple threads per process (one one cpu) by making ALL system calls optionally asynchronous. to come: ia64 and power-pc patches, patches for gdb, test program (in tools) Reviewed by: Almost everyone who counts (at various times, peter, jhb, matt, alfred, mini, bernd, and a cast of thousands) NOTE: this is still Beta code, and contains lots of debugging stuff. expect slight instability in signals..
Notes
Notes: svn path=/head/; revision=99072
Diffstat (limited to 'sys/kern')
-rw-r--r--sys/kern/init_main.c33
-rw-r--r--sys/kern/init_sysent.c2
-rw-r--r--sys/kern/kern_condvar.c89
-rw-r--r--sys/kern/kern_exec.c10
-rw-r--r--sys/kern/kern_exit.c97
-rw-r--r--sys/kern/kern_fork.c75
-rw-r--r--sys/kern/kern_idle.c19
-rw-r--r--sys/kern/kern_intr.c27
-rw-r--r--sys/kern/kern_kthread.c3
-rw-r--r--sys/kern/kern_mutex.c31
-rw-r--r--sys/kern/kern_poll.c1
-rw-r--r--sys/kern/kern_proc.c217
-rw-r--r--sys/kern/kern_shutdown.c1
-rw-r--r--sys/kern/kern_sig.c386
-rw-r--r--sys/kern/kern_subr.c1
-rw-r--r--sys/kern/kern_switch.c662
-rw-r--r--sys/kern/kern_synch.c275
-rw-r--r--sys/kern/ksched.c27
-rw-r--r--sys/kern/subr_smp.c4
-rw-r--r--sys/kern/subr_trap.c37
-rw-r--r--sys/kern/subr_turnstile.c31
-rw-r--r--sys/kern/subr_witness.c1
-rw-r--r--sys/kern/sys_generic.c2
-rw-r--r--sys/kern/sys_process.c6
-rw-r--r--sys/kern/syscalls.master2
-rw-r--r--sys/kern/tty.c53
26 files changed, 1601 insertions, 491 deletions
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index d5c565680605..06cc8d831ebb 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@@ -289,6 +289,7 @@ proc0_init(void *dummy __unused)
* Initialize thread, process and pgrp structures.
*/
procinit();
+ threadinit();
/*
* Initialize sleep queue hash table
@@ -322,19 +323,34 @@ proc0_init(void *dummy __unused)
p->p_sysent = &aout_sysvec;
#endif
+ /*
+ * proc_linkup was already done in init_i386() or alphainit() etc.
+ * because the earlier code needed to follow td->td_proc. Otherwise
+ * I would have done it here.. maybe this means this should be
+ * done earlier too.
+ */
ke = &proc0.p_kse; /* XXXKSE */
kg = &proc0.p_ksegrp; /* XXXKSE */
p->p_flag = P_SYSTEM;
p->p_sflag = PS_INMEM;
- p->p_stat = SRUN;
- p->p_ksegrp.kg_nice = NZERO;
- kg->kg_pri_class = PRI_TIMESHARE;
- kg->kg_user_pri = PUSER;
- td->td_priority = PVM;
- td->td_base_pri = PUSER;
-
+ p->p_state = PRS_NORMAL;
+ td->td_state = TDS_RUNNING;
+ kg->kg_nice = NZERO;
+ kg->kg_pri_class = PRI_TIMESHARE;
+ kg->kg_user_pri = PUSER;
+ td->td_priority = PVM;
+ td->td_base_pri = PUSER;
+ td->td_kse = ke; /* XXXKSE */
+ ke->ke_oncpu = 0;
+ ke->ke_state = KES_RUNNING;
+ ke->ke_thread = td;
+ /* proc_linkup puts it in the idle queue, that's not what we want. */
+ TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
+ kg->kg_idle_kses--;
p->p_peers = 0;
p->p_leader = p;
+KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!"));
+
bcopy("swapper", p->p_comm, sizeof ("swapper"));
@@ -662,8 +678,7 @@ kick_init(const void *udata __unused)
td = FIRST_THREAD_IN_PROC(initproc);
mtx_lock_spin(&sched_lock);
- initproc->p_stat = SRUN;
- setrunqueue(FIRST_THREAD_IN_PROC(initproc)); /* XXXKSE */
+ setrunqueue(td); /* XXXKSE */
mtx_unlock_spin(&sched_lock);
}
SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_FIRST, kick_init, NULL)
diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c
index 425e3b73fc88..cf8ba8038bcf 100644
--- a/sys/kern/init_sysent.c
+++ b/sys/kern/init_sysent.c
@@ -405,7 +405,7 @@ struct sysent sysent[] = {
{ 0, (sy_call_t *)kse_wakeup }, /* 380 = kse_wakeup */
{ AS(kse_new_args), (sy_call_t *)kse_new }, /* 381 = kse_new */
{ AS(thread_wakeup_args), (sy_call_t *)thread_wakeup }, /* 382 = thread_wakeup */
- { 0, (sy_call_t *)kse_yield }, /* 383 = kse_yield */
+ { SYF_MPSAFE | 0, (sy_call_t *)kse_yield }, /* 383 = kse_yield */
{ 0, (sy_call_t *)nosys }, /* 384 = __mac_get_proc */
{ 0, (sy_call_t *)nosys }, /* 385 = __mac_set_proc */
{ 0, (sy_call_t *)nosys }, /* 386 = __mac_get_fd */
diff --git a/sys/kern/kern_condvar.c b/sys/kern/kern_condvar.c
index 9d30d2503e18..78585b28da6a 100644
--- a/sys/kern/kern_condvar.c
+++ b/sys/kern/kern_condvar.c
@@ -48,7 +48,7 @@
*/
#define CV_ASSERT(cvp, mp, td) do { \
KASSERT((td) != NULL, ("%s: curthread NULL", __func__)); \
- KASSERT((td)->td_proc->p_stat == SRUN, ("%s: not SRUN", __func__)); \
+ KASSERT((td)->td_state == TDS_RUNNING, ("%s: not TDS_RUNNING", __func__)); \
KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__)); \
KASSERT((mp) != NULL, ("%s: mp NULL", __func__)); \
mtx_assert((mp), MA_OWNED | MA_NOTRECURSED); \
@@ -80,6 +80,7 @@
#endif
static void cv_timedwait_end(void *arg);
+static void cv_check_upcall(struct thread *td);
/*
* Initialize a condition variable. Must be called before use.
@@ -109,14 +110,47 @@ cv_destroy(struct cv *cvp)
*/
/*
+ * Decide if we need to queue an upcall.
+ * This is copied from msleep(), perhaps this should be a common function.
+ */
+static void
+cv_check_upcall(struct thread *td)
+{
+
+ /*
+ * If we are capable of async syscalls and there isn't already
+ * another one ready to return, start a new thread
+ * and queue it as ready to run. Note that there is danger here
+ * because we need to make sure that we don't sleep allocating
+ * the thread (recursion here might be bad).
+ * Hence the TDF_INMSLEEP flag.
+ */
+ if ((td->td_proc->p_flag & P_KSES) && td->td_mailbox &&
+ (td->td_flags & TDF_INMSLEEP) == 0) {
+ /*
+ * If we have no queued work to do,
+ * upcall to the UTS to see if it has more work.
+ * We don't need to upcall now, just queue it.
+ */
+ if (TAILQ_FIRST(&td->td_ksegrp->kg_runq) == NULL) {
+ /* Don't recurse here! */
+ td->td_flags |= TDF_INMSLEEP;
+ thread_schedule_upcall(td, td->td_kse);
+ td->td_flags &= ~TDF_INMSLEEP;
+ }
+ }
+}
+
+/*
* Switch context.
*/
static __inline void
cv_switch(struct thread *td)
{
- td->td_proc->p_stat = SSLEEP;
+ td->td_state = TDS_SLP;
td->td_proc->p_stats->p_ru.ru_nvcsw++;
+ cv_check_upcall(td);
mi_switch();
CTR3(KTR_PROC, "cv_switch: resume thread %p (pid %d, %s)", td,
td->td_proc->p_pid, td->td_proc->p_comm);
@@ -135,7 +169,7 @@ cv_switch_catch(struct thread *td)
* We put ourselves on the sleep queue and start our timeout before
* calling cursig, as we could stop there, and a wakeup or a SIGCONT (or
* both) could occur while we were stopped. A SIGCONT would cause us to
- * be marked as SSLEEP without resuming us, thus we must be ready for
+ * be marked as TDS_SLP without resuming us, thus we must be ready for
* sleep when cursig is called. If the wakeup happens while we're
* stopped, td->td_wchan will be 0 upon return from cursig.
*/
@@ -143,13 +177,15 @@ cv_switch_catch(struct thread *td)
mtx_unlock_spin(&sched_lock);
p = td->td_proc;
PROC_LOCK(p);
- sig = cursig(p); /* XXXKSE */
+ sig = cursig(td); /* XXXKSE */
+ if (thread_suspend_check(1))
+ sig = SIGSTOP;
mtx_lock_spin(&sched_lock);
PROC_UNLOCK(p);
if (sig != 0) {
if (td->td_wchan != NULL)
cv_waitq_remove(td);
- td->td_proc->p_stat = SRUN;
+ td->td_state = TDS_RUNNING; /* XXXKSE */
} else if (td->td_wchan != NULL) {
cv_switch(td);
}
@@ -175,7 +211,6 @@ cv_waitq_add(struct cv *cvp, struct thread *td)
td->td_flags |= TDF_CVWAITQ;
td->td_wchan = cvp;
td->td_wmesg = cvp->cv_description;
- td->td_kse->ke_slptime = 0; /* XXXKSE */
td->td_ksegrp->kg_slptime = 0; /* XXXKSE */
td->td_base_pri = td->td_priority;
CTR3(KTR_PROC, "cv_waitq_add: thread %p (pid %d, %s)", td,
@@ -285,7 +320,7 @@ cv_wait_sig(struct cv *cvp, struct mtx *mp)
PROC_LOCK(p);
if (sig == 0)
- sig = cursig(p); /* XXXKSE */
+ sig = cursig(td); /* XXXKSE */
if (sig != 0) {
if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
rval = EINTR;
@@ -293,6 +328,8 @@ cv_wait_sig(struct cv *cvp, struct mtx *mp)
rval = ERESTART;
}
PROC_UNLOCK(p);
+ if (p->p_flag & P_WEXIT)
+ rval = EINTR;
#ifdef KTRACE
if (KTRPOINT(td, KTR_CSW))
@@ -363,6 +400,8 @@ cv_timedwait(struct cv *cvp, struct mtx *mp, int timo)
mi_switch();
}
+ if (td->td_proc->p_flag & P_WEXIT)
+ rval = EWOULDBLOCK;
mtx_unlock_spin(&sched_lock);
#ifdef KTRACE
if (KTRPOINT(td, KTR_CSW))
@@ -436,12 +475,11 @@ cv_timedwait_sig(struct cv *cvp, struct mtx *mp, int timo)
td->td_proc->p_stats->p_ru.ru_nivcsw++;
mi_switch();
}
-
mtx_unlock_spin(&sched_lock);
PROC_LOCK(p);
if (sig == 0)
- sig = cursig(p);
+ sig = cursig(td);
if (sig != 0) {
if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
rval = EINTR;
@@ -450,6 +488,9 @@ cv_timedwait_sig(struct cv *cvp, struct mtx *mp, int timo)
}
PROC_UNLOCK(p);
+ if (p->p_flag & P_WEXIT)
+ rval = EINTR;
+
#ifdef KTRACE
if (KTRPOINT(td, KTR_CSW))
ktrcsw(0, 0);
@@ -477,15 +518,13 @@ cv_wakeup(struct cv *cvp)
TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq);
td->td_flags &= ~TDF_CVWAITQ;
td->td_wchan = 0;
- if (td->td_proc->p_stat == SSLEEP) {
+ if (td->td_state == TDS_SLP) {
/* OPTIMIZED EXPANSION OF setrunnable(td); */
CTR3(KTR_PROC, "cv_signal: thread %p (pid %d, %s)",
td, td->td_proc->p_pid, td->td_proc->p_comm);
if (td->td_ksegrp->kg_slptime > 1) /* XXXKSE */
updatepri(td);
- td->td_kse->ke_slptime = 0;
td->td_ksegrp->kg_slptime = 0;
- td->td_proc->p_stat = SRUN;
if (td->td_proc->p_sflag & PS_INMEM) {
setrunqueue(td);
maybe_resched(td);
@@ -568,7 +607,7 @@ cv_timedwait_end(void *arg)
td->td_flags &= ~TDF_TIMEOUT;
setrunqueue(td);
} else if (td->td_wchan != NULL) {
- if (td->td_proc->p_stat == SSLEEP) /* XXXKSE */
+ if (td->td_state == TDS_SLP) /* XXXKSE */
setrunnable(td);
else
cv_waitq_remove(td);
@@ -577,3 +616,27 @@ cv_timedwait_end(void *arg)
td->td_flags |= TDF_TIMOFAIL;
mtx_unlock_spin(&sched_lock);
}
+
+/*
+ * For now only abort interruptable waits.
+ * The others will have to either complete on their own or have a timeout.
+ */
+void
+cv_abort(struct thread *td)
+{
+
+ CTR3(KTR_PROC, "cv_abort: thread %p (pid %d, %s)", td,
+ td->td_proc->p_pid,
+ td->td_proc->p_comm);
+ mtx_lock_spin(&sched_lock);
+ if ((td->td_flags & (TDF_SINTR|TDF_TIMEOUT)) == TDF_SINTR) {
+ if (td->td_wchan != NULL) {
+ if (td->td_state == TDS_SLP)
+ setrunnable(td);
+ else
+ cv_waitq_remove(td);
+ }
+ }
+ mtx_unlock_spin(&sched_lock);
+}
+
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index feaa12343f77..0cd7f2794482 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -154,12 +154,14 @@ execve(td, uap)
PROC_LOCK(p);
KASSERT((p->p_flag & P_INEXEC) == 0,
("%s(): process already has P_INEXEC flag", __func__));
+ if ((p->p_flag & P_KSES) && thread_single(SNGLE_EXIT)) {
+ PROC_UNLOCK(p);
+ mtx_unlock(&Giant);
+ return (ERESTART); /* Try again later. */
+ }
+ /* If we get here all other threads are dead. */
p->p_flag |= P_INEXEC;
PROC_UNLOCK(p);
-
-/* XXXKSE */
-/* !!!!!!!! we need abort all the other threads of this process before we */
-/* proceed beyond his point! */
/*
* Initialize part of the common data
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index 63a51351fa49..fea5438f3f22 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -145,6 +145,67 @@ exit1(td, rv)
/*
* XXXXKSE: MUST abort all other threads before proceeding past here.
*/
+ PROC_LOCK(p);
+ if (p->p_flag & P_KSES) {
+ /*
+ * First check if some other thread got here before us..
+ * if so, act apropriatly, (exit or suspend);
+ */
+ thread_suspend_check(0);
+ /*
+ * Here is a trick..
+ * We need to free up our KSE to process other threads
+ * so that we can safely set the UNBOUND flag
+ * (whether or not we have a mailbox) as we are NEVER
+ * going to return to the user.
+ * The flag will not be set yet if we are exiting
+ * because of a signal, pagefault, or similar
+ * (or even an exit(2) from the UTS).
+ */
+ td->td_flags |= TDF_UNBOUND;
+
+ /*
+ * Kill off the other threads. This requires
+ * Some co-operation from other parts of the kernel
+ * so it may not be instant.
+ * With this state set:
+ * Any thread entering the kernel from userspace will
+ * thread_exit() in trap(). Any thread attempting to
+ * sleep will return immediatly
+ * with EINTR or EWOULDBLOCK, which will hopefully force them
+ * to back out to userland, freeing resources as they go, and
+ * anything attempting to return to userland will thread_exit()
+ * from userret(). thread_exit() will unsuspend us
+ * when the last other thread exits.
+ */
+ if (thread_single(SNGLE_EXIT)) {
+ panic ("Exit: Single threading fouled up");
+ }
+ /*
+ * All other activity in this process is now stopped.
+ * Remove excess KSEs and KSEGRPS. XXXKSE (when we have them)
+ * ...
+ * Turn off threading support.
+ */
+ p->p_flag &= ~P_KSES;
+ td->td_flags &= ~TDF_UNBOUND;
+ thread_single_end(); /* Don't need this any more. */
+ }
+ /*
+ * With this state set:
+ * Any thread entering the kernel from userspace will thread_exit()
+ * in trap(). Any thread attempting to sleep will return immediatly
+ * with EINTR or EWOULDBLOCK, which will hopefully force them
+ * to back out to userland, freeing resources as they go, and
+ * anything attempting to return to userland will thread_exit()
+ * from userret(). thread_exit() will do a wakeup on p->p_numthreads
+ * if it transitions to 1.
+ */
+
+ p->p_flag |= P_WEXIT;
+ PROC_UNLOCK(p);
+ if (td->td_kse->ke_mdstorage)
+ cpu_free_kse_mdstorage(td->td_kse);
/* Are we a task leader? */
PROC_LOCK(p);
@@ -185,7 +246,6 @@ exit1(td, rv)
*/
PROC_LOCK(p);
p->p_flag &= ~(P_TRACED | P_PPWAIT);
- p->p_flag |= P_WEXIT;
SIGEMPTYSET(p->p_siglist);
PROC_UNLOCK(p);
if (timevalisset(&p->p_realtimer.it_value))
@@ -434,22 +494,24 @@ exit1(td, rv)
/*
* We have to wait until after releasing all locks before
- * changing p_stat. If we block on a mutex then we will be
+ * changing p_state. If we block on a mutex then we will be
* back at SRUN when we resume and our parent will never
* harvest us.
*/
- p->p_stat = SZOMB;
+ p->p_state = PRS_ZOMBIE;
wakeup(p->p_pptr);
PROC_UNLOCK(p->p_pptr);
- PROC_UNLOCK(p);
-
cnt.v_swtch++;
binuptime(PCPU_PTR(switchtime));
PCPU_SET(switchticks, ticks);
- cpu_sched_exit(td);
- cpu_throw();
+ cpu_sched_exit(td); /* XXXKSE check if this should be in thread_exit */
+ /*
+ * Make sure this thread is discarded from the zombie.
+ * This will also release this thread's reference to the ucred.
+ */
+ thread_exit();
panic("exit1");
}
@@ -504,6 +566,8 @@ wait1(td, uap, compat)
register int nfound;
register struct proc *p, *q, *t;
int status, error;
+ struct kse *ke;
+ struct ksegrp *kg;
q = td->td_proc;
if (uap->pid == 0) {
@@ -540,7 +604,7 @@ loop:
}
nfound++;
- if (p->p_stat == SZOMB) {
+ if (p->p_state == PRS_ZOMBIE) {
/*
* charge childs scheduling cpu usage to parent
* XXXKSE assume only one thread & kse & ksegrp
@@ -656,6 +720,21 @@ loop:
}
/*
+ * There should only be one KSE/KSEGRP but
+ * do it right anyhow.
+ */
+ FOREACH_KSEGRP_IN_PROC(p, kg) {
+ FOREACH_KSE_IN_GROUP(kg, ke) {
+ /* Free the KSE spare thread. */
+ if (ke->ke_tdspare != NULL) {
+ thread_free(ke->ke_tdspare);
+ p->p_kse.ke_tdspare = NULL;
+ }
+ }
+ }
+ thread_reap(); /* check for zombie threads */
+
+ /*
* Give vm and machine-dependent layer a chance
* to free anything that cpu_exit couldn't
* release while still running in process context.
@@ -669,7 +748,7 @@ loop:
mtx_unlock(&Giant);
return (0);
}
- if (p->p_stat == SSTOP && (p->p_flag & P_WAITED) == 0 &&
+ if (P_SHOULDSTOP(p) && ((p->p_flag & P_WAITED) == 0) &&
(p->p_flag & P_TRACED || uap->options & WUNTRACED)) {
p->p_flag |= P_WAITED;
sx_xunlock(&proctree_lock);
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index 016653bcb471..eac0267ce1bb 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -212,23 +212,6 @@ sysctl_kern_randompid(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW,
0, 0, sysctl_kern_randompid, "I", "Random PID modulus");
-#if 0
-void
-kse_init(struct kse *kse1, struct kse *kse2)
-{
-}
-
-void
-thread_init(struct thread *thread1, struct thread *thread2)
-{
-}
-
-void
-ksegrp_init(struct ksegrp *ksegrp1, struct ksegrp *ksegrp2)
-{
-}
-#endif
-
int
fork1(td, flags, procp)
struct thread *td; /* parent proc */
@@ -296,6 +279,29 @@ fork1(td, flags, procp)
return (0);
}
+ if (p1->p_flag & P_KSES) {
+ /*
+ * Idle the other threads for a second.
+ * Since the user space is copied, it must remain stable.
+ * In addition, all threads (from the user perspective)
+ * need to either be suspended or in the kernel,
+ * where they will try restart in the parent and will
+ * be aborted in the child.
+ */
+ PROC_LOCK(p1);
+ if (thread_single(SNGLE_NO_EXIT)) {
+ /* Abort.. someone else is single threading before us */
+ PROC_UNLOCK(p1);
+ return (ERESTART);
+ }
+ PROC_UNLOCK(p1);
+ /*
+ * All other activity in this process
+ * is now suspended at the user boundary,
+ * (or other safe places if we think of any).
+ */
+ }
+
/* Allocate new proc. */
newproc = uma_zalloc(proc_zone, M_WAITOK);
@@ -311,6 +317,11 @@ fork1(td, flags, procp)
if ((nprocs >= maxproc - 10 && uid != 0) || nprocs >= maxproc) {
sx_xunlock(&allproc_lock);
uma_zfree(proc_zone, newproc);
+ if (p1->p_flag & P_KSES) {
+ PROC_LOCK(p1);
+ thread_single_end();
+ PROC_UNLOCK(p1);
+ }
tsleep(&forksleep, PUSER, "fork", hz / 2);
return (EAGAIN);
}
@@ -325,6 +336,11 @@ fork1(td, flags, procp)
if (!ok) {
sx_xunlock(&allproc_lock);
uma_zfree(proc_zone, newproc);
+ if (p1->p_flag & P_KSES) {
+ PROC_LOCK(p1);
+ thread_single_end();
+ PROC_UNLOCK(p1);
+ }
tsleep(&forksleep, PUSER, "fork", hz / 2);
return (EAGAIN);
}
@@ -411,7 +427,7 @@ again:
lastpid = trypid;
p2 = newproc;
- p2->p_stat = SIDL; /* protect against others */
+ p2->p_state = PRS_NEW; /* protect against others */
p2->p_pid = trypid;
LIST_INSERT_HEAD(&allproc, p2, p_list);
LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
@@ -449,7 +465,7 @@ again:
* Start by zeroing the section of proc that is zero-initialized,
* then copy the section that is copied directly from the parent.
*/
- td2 = thread_get(p2);
+ td2 = thread_alloc();
ke2 = &p2->p_kse;
kg2 = &p2->p_ksegrp;
@@ -459,8 +475,10 @@ again:
(unsigned) RANGEOF(struct proc, p_startzero, p_endzero));
bzero(&ke2->ke_startzero,
(unsigned) RANGEOF(struct kse, ke_startzero, ke_endzero));
+#if 0 /* bzero'd by the thread allocator */
bzero(&td2->td_startzero,
(unsigned) RANGEOF(struct thread, td_startzero, td_endzero));
+#endif
bzero(&kg2->kg_startzero,
(unsigned) RANGEOF(struct ksegrp, kg_startzero, kg_endzero));
@@ -482,9 +500,22 @@ again:
* XXXKSE Theoretically only the running thread would get copied
* Others in the kernel would be 'aborted' in the child.
* i.e return E*something*
+ * On SMP we would have to stop them running on
+ * other CPUs! (set a flag in the proc that stops
+ * all returns to userland until completed)
+ * This is wrong but ok for 1:1.
*/
proc_linkup(p2, kg2, ke2, td2);
+ /* Set up the thread as an active thread (as if runnable). */
+ TAILQ_REMOVE(&kg2->kg_iq, ke2, ke_kgrlist);
+ kg2->kg_idle_kses--;
+ ke2->ke_state = KES_UNQUEUED;
+ ke2->ke_thread = td2;
+ td2->td_kse = ke2;
+ td2->td_flags &= ~TDF_UNBOUND; /* For the rest of this syscall. */
+KASSERT((ke2->ke_kgrlist.tqe_next != ke2), ("linked to self!"));
+
/* note.. XXXKSE no pcb or u-area yet */
/*
@@ -699,7 +730,6 @@ again:
p2->p_acflag = AFORK;
if ((flags & RFSTOPPED) == 0) {
mtx_lock_spin(&sched_lock);
- p2->p_stat = SRUN;
setrunqueue(td2);
mtx_unlock_spin(&sched_lock);
}
@@ -803,6 +833,9 @@ fork_exit(callout, arg, frame)
struct proc *p = td->td_proc;
td->td_kse->ke_oncpu = PCPU_GET(cpuid);
+ p->p_state = PRS_NORMAL;
+ td->td_state = TDS_RUNNING; /* Already done in switch() on 386. */
+ td->td_kse->ke_state = KES_RUNNING;
/*
* Finish setting up thread glue. We need to initialize
* the thread into a td_critnest=1 state. Some platforms
@@ -814,7 +847,7 @@ fork_exit(callout, arg, frame)
sched_lock.mtx_lock = (uintptr_t)td;
sched_lock.mtx_recurse = 0;
cpu_critical_fork_exit();
- CTR3(KTR_PROC, "fork_exit: new proc %p (pid %d, %s)", p, p->p_pid,
+ CTR3(KTR_PROC, "fork_exit: new thread %p (pid %d, %s)", td, p->p_pid,
p->p_comm);
if (PCPU_GET(switchtime.sec) == 0)
binuptime(PCPU_PTR(switchtime));
diff --git a/sys/kern/kern_idle.c b/sys/kern/kern_idle.c
index 29194b735f45..306f2a57cdad 100644
--- a/sys/kern/kern_idle.c
+++ b/sys/kern/kern_idle.c
@@ -40,6 +40,7 @@ idle_setup(void *dummy)
struct pcpu *pc;
#endif
struct proc *p;
+ struct thread *td;
int error;
#ifdef SMP
@@ -60,7 +61,10 @@ idle_setup(void *dummy)
panic("idle_setup: kthread_create error %d\n", error);
p->p_flag |= P_NOLOAD;
- p->p_stat = SRUN;
+ td = FIRST_THREAD_IN_PROC(p);
+ td->td_state = TDS_RUNQ;
+ td->td_kse->ke_state = KES_ONRUNQ;
+ td->td_kse->ke_flags |= KEF_IDLEKSE;
#ifdef SMP
}
#endif
@@ -75,16 +79,22 @@ idle_proc(void *dummy)
#ifdef DIAGNOSTIC
int count;
#endif
+ struct thread *td;
+ struct proc *p;
+ td = curthread;
+ p = td->td_proc;
+ td->td_state = TDS_RUNNING;
+ td->td_kse->ke_state = KES_RUNNING;
for (;;) {
mtx_assert(&Giant, MA_NOTOWNED);
#ifdef DIAGNOSTIC
count = 0;
- while (count >= 0 && procrunnable() == 0) {
+ while (count >= 0 && kserunnable() == 0) {
#else
- while (procrunnable() == 0) {
+ while (kserunnable() == 0) {
#endif
/*
* This is a good place to put things to be done in
@@ -103,8 +113,9 @@ idle_proc(void *dummy)
}
mtx_lock_spin(&sched_lock);
- curproc->p_stats->p_ru.ru_nvcsw++;
+ p->p_stats->p_ru.ru_nvcsw++;
mi_switch();
+ td->td_kse->ke_state = KES_RUNNING;
mtx_unlock_spin(&sched_lock);
}
}
diff --git a/sys/kern/kern_intr.c b/sys/kern/kern_intr.c
index d65dc8228c03..fb9c092d4311 100644
--- a/sys/kern/kern_intr.c
+++ b/sys/kern/kern_intr.c
@@ -201,7 +201,7 @@ ithread_create(struct ithd **ithread, int vector, int flags,
td = FIRST_THREAD_IN_PROC(p); /* XXXKSE */
td->td_ksegrp->kg_pri_class = PRI_ITHD;
td->td_priority = PRI_MAX_ITHD;
- p->p_stat = SWAIT;
+ td->td_state = TDS_IWAIT;
ithd->it_td = td;
td->td_ithd = ithd;
if (ithread != NULL)
@@ -229,8 +229,7 @@ ithread_destroy(struct ithd *ithread)
}
ithread->it_flags |= IT_DEAD;
mtx_lock_spin(&sched_lock);
- if (p->p_stat == SWAIT) {
- p->p_stat = SRUN; /* XXXKSE */
+ if (td->td_state == TDS_IWAIT) {
setrunqueue(td);
}
mtx_unlock_spin(&sched_lock);
@@ -327,7 +326,7 @@ ok:
* handler as being dead and let the ithread do the actual removal.
*/
mtx_lock_spin(&sched_lock);
- if (ithread->it_td->td_proc->p_stat != SWAIT) {
+ if (ithread->it_td->td_state != TDS_IWAIT) {
handler->ih_flags |= IH_DEAD;
/*
@@ -374,8 +373,8 @@ ithread_schedule(struct ithd *ithread, int do_switch)
td = ithread->it_td;
p = td->td_proc;
KASSERT(p != NULL, ("ithread %s has no process", ithread->it_name));
- CTR4(KTR_INTR, "%s: pid %d: (%s) need = %d", __func__, p->p_pid, p->p_comm,
- ithread->it_need);
+ CTR4(KTR_INTR, "%s: pid %d: (%s) need = %d",
+ __func__, p->p_pid, p->p_comm, ithread->it_need);
/*
* Set it_need to tell the thread to keep running if it is already
@@ -387,14 +386,16 @@ ithread_schedule(struct ithd *ithread, int do_switch)
*/
ithread->it_need = 1;
mtx_lock_spin(&sched_lock);
- if (p->p_stat == SWAIT) {
+ if (td->td_state == TDS_IWAIT) {
CTR2(KTR_INTR, "%s: setrunqueue %d", __func__, p->p_pid);
- p->p_stat = SRUN;
- setrunqueue(td); /* XXXKSE */
- if (do_switch && curthread->td_critnest == 1 &&
- curthread->td_proc->p_stat == SRUN) {
+ setrunqueue(td);
+ if (do_switch &&
+ (curthread->td_critnest == 1)/* &&
+ (curthread->td_state == TDS_RUNNING) XXXKSE*/) {
+#if 0 /* not needed in KSE */
if (curthread != PCPU_GET(idlethread))
setrunqueue(curthread);
+#endif
curthread->td_proc->p_stats->p_ru.ru_nivcsw++;
mi_switch();
} else {
@@ -402,7 +403,7 @@ ithread_schedule(struct ithd *ithread, int do_switch)
}
} else {
CTR4(KTR_INTR, "%s: pid %d: it_need %d, state %d",
- __func__, p->p_pid, ithread->it_need, p->p_stat);
+ __func__, p->p_pid, ithread->it_need, p->p_state);
}
mtx_unlock_spin(&sched_lock);
@@ -550,7 +551,7 @@ restart:
*/
if (ithd->it_enable != NULL)
ithd->it_enable(ithd->it_vector);
- p->p_stat = SWAIT; /* we're idle */
+ td->td_state = TDS_IWAIT; /* we're idle */
p->p_stats->p_ru.ru_nvcsw++;
CTR2(KTR_INTR, "%s: pid %d: done", __func__, p->p_pid);
mi_switch();
diff --git a/sys/kern/kern_kthread.c b/sys/kern/kern_kthread.c
index a456a86fa489..e8e2feaa9c0a 100644
--- a/sys/kern/kern_kthread.c
+++ b/sys/kern/kern_kthread.c
@@ -109,8 +109,7 @@ kthread_create(void (*func)(void *), void *arg,
mtx_lock_spin(&sched_lock);
p2->p_sflag |= PS_INMEM;
if (!(flags & RFSTOPPED)) {
- p2->p_stat = SRUN;
- setrunqueue(FIRST_THREAD_IN_PROC(p2)); /* XXXKSE */
+ setrunqueue(FIRST_THREAD_IN_PROC(p2));
}
mtx_unlock_spin(&sched_lock);
diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c
index 08bca8d67b2c..c2e79d02d5f2 100644
--- a/sys/kern/kern_mutex.c
+++ b/sys/kern/kern_mutex.c
@@ -119,23 +119,20 @@ propagate_priority(struct thread *td)
return;
}
+ KASSERT(td->td_state != TDS_SURPLUS, ("Mutex owner SURPLUS"));
+ MPASS(td->td_proc != NULL);
MPASS(td->td_proc->p_magic == P_MAGIC);
- KASSERT(td->td_proc->p_stat != SSLEEP, ("sleeping thread owns a mutex"));
+ KASSERT(td->td_state != TDS_SLP,
+ ("sleeping thread owns a mutex"));
if (td->td_priority <= pri) /* lower is higher priority */
return;
- /*
- * Bump this thread's priority.
- */
- td->td_priority = pri;
/*
* If lock holder is actually running, just bump priority.
*/
- if (thread_running(td)) {
- MPASS(td->td_proc->p_stat == SRUN
- || td->td_proc->p_stat == SZOMB
- || td->td_proc->p_stat == SSTOP);
+ if (td->td_state == TDS_RUNNING) {
+ td->td_priority = pri;
return;
}
@@ -151,20 +148,26 @@ propagate_priority(struct thread *td)
* If on run queue move to new run queue, and quit.
* XXXKSE this gets a lot more complicated under threads
* but try anyhow.
+ * We should have a special call to do this more efficiently.
*/
- if (td->td_proc->p_stat == SRUN) {
+ if (td->td_state == TDS_RUNQ) {
MPASS(td->td_blocked == NULL);
remrunqueue(td);
+ td->td_priority = pri;
setrunqueue(td);
return;
}
+ /*
+ * Adjust for any other cases.
+ */
+ td->td_priority = pri;
/*
* If we aren't blocked on a mutex, we should be.
*/
- KASSERT(td->td_proc->p_stat == SMTX, (
+ KASSERT(td->td_state == TDS_MTX, (
"process %d(%s):%d holds %s but isn't blocked on a mutex\n",
- td->td_proc->p_pid, td->td_proc->p_comm, td->td_proc->p_stat,
+ td->td_proc->p_pid, td->td_proc->p_comm, td->td_state,
m->mtx_object.lo_name));
/*
@@ -590,7 +593,7 @@ _mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
*/
td->td_blocked = m;
td->td_mtxname = m->mtx_object.lo_name;
- td->td_proc->p_stat = SMTX;
+ td->td_state = TDS_MTX;
propagate_priority(td);
if (LOCK_LOG_TEST(&m->mtx_object, opts))
@@ -727,7 +730,6 @@ _mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
m, td1);
td1->td_blocked = NULL;
- td1->td_proc->p_stat = SRUN;
setrunqueue(td1);
if (td->td_critnest == 1 && td1->td_priority < pri) {
@@ -744,7 +746,6 @@ _mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
}
}
#endif
- setrunqueue(td);
if (LOCK_LOG_TEST(&m->mtx_object, opts))
CTR2(KTR_LOCK,
"_mtx_unlock_sleep: %p switching out lock=%p", m,
diff --git a/sys/kern/kern_poll.c b/sys/kern/kern_poll.c
index a197bc0e3c11..9dd692463209 100644
--- a/sys/kern/kern_poll.c
+++ b/sys/kern/kern_poll.c
@@ -503,7 +503,6 @@ poll_idle(void)
mtx_unlock(&Giant);
mtx_assert(&Giant, MA_NOTOWNED);
mtx_lock_spin(&sched_lock);
- setrunqueue(td);
td->td_proc->p_stats->p_ru.ru_nvcsw++;
mi_switch();
mtx_unlock_spin(&sched_lock);
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
index a5378d9c3482..8b15fc2c4d46 100644
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@@ -44,6 +44,7 @@
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/sysproto.h>
+#include <sys/kse.h>
#include <sys/sysctl.h>
#include <sys/filedesc.h>
#include <sys/tty.h>
@@ -111,44 +112,28 @@ procinit()
uihashinit();
}
-/*
- * Note that we do not link to the proc's ucred here
- * The thread is linked as if running but no KSE assigned
- */
-static void
-thread_link(struct thread *td, struct ksegrp *kg)
-{
- struct proc *p = kg->kg_proc;
-
- td->td_proc = p;
- td->td_ksegrp = kg;
- td->td_last_kse = &p->p_kse;
-
- TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist);
- TAILQ_INSERT_HEAD(&kg->kg_threads, td, td_kglist);
- td->td_critnest = 0;
- td->td_kse = NULL;
- cpu_thread_link(td);
-}
-
/*
* KSE is linked onto the idle queue.
*/
-static void
+void
kse_link(struct kse *ke, struct ksegrp *kg)
{
struct proc *p = kg->kg_proc;
+KASSERT((ke->ke_state != KES_ONRUNQ), ("linking suspect kse on run queue"));
TAILQ_INSERT_HEAD(&kg->kg_kseq, ke, ke_kglist);
kg->kg_kses++;
+KASSERT((ke->ke_state != KES_IDLE), ("already on idle queue"));
+ ke->ke_state = KES_IDLE;
TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist);
+ kg->kg_idle_kses++;
ke->ke_proc = p;
ke->ke_ksegrp = kg;
ke->ke_thread = NULL;
ke->ke_oncpu = NOCPU;
}
-static void
+void
ksegrp_link(struct ksegrp *kg, struct proc *p)
{
@@ -159,10 +144,13 @@ ksegrp_link(struct ksegrp *kg, struct proc *p)
TAILQ_INIT(&kg->kg_iq); /* all kses in ksegrp */
kg->kg_proc = p;
/* the following counters are in the -zero- section and may not need clearing */
+ kg->kg_numthreads = 0;
kg->kg_runnable = 0;
kg->kg_kses = 0;
+ kg->kg_idle_kses = 0;
kg->kg_runq_kses = 0; /* XXXKSE change name */
/* link it in now that it's consitant */
+ p->p_numksegrps++;
TAILQ_INSERT_HEAD(&p->p_ksegrps, kg, kg_ksegrp);
}
@@ -177,30 +165,13 @@ proc_linkup(struct proc *p, struct ksegrp *kg,
TAILQ_INIT(&p->p_ksegrps); /* all ksegrps in proc */
TAILQ_INIT(&p->p_threads); /* all threads in proc */
+ TAILQ_INIT(&p->p_suspended); /* Threads suspended */
ksegrp_link(kg, p);
kse_link(ke, kg);
thread_link(td, kg);
- /* link them together for 1:1 */
- td->td_kse = ke;
- ke->ke_thread = td;
}
-/* temporary version is ultra simple while we are in 1:1 mode */
-struct thread *
-thread_get(struct proc *p)
-{
- struct thread *td = &p->p_xxthread;
-
- return (td);
-}
-
-
-/*********************
-* STUB KSE syscalls
-*********************/
-
-/* struct thread_wakeup_args { struct thread_mailbox *tmbx; }; */
int
thread_wakeup(struct thread *td, struct thread_wakeup_args *uap)
{
@@ -219,7 +190,11 @@ int
kse_yield(struct thread *td, struct kse_yield_args *uap)
{
- return(ENOSYS);
+ PROC_LOCK(td->td_proc);
+ mtx_lock_spin(&sched_lock);
+ thread_exit();
+ /* NOTREACHED */
+ return(0);
}
int kse_wakeup(struct thread *td, struct kse_wakeup_args *uap)
@@ -228,16 +203,80 @@ int kse_wakeup(struct thread *td, struct kse_wakeup_args *uap)
return(ENOSYS);
}
-
-int
-kse_new(struct thread *td, struct kse_new_args *uap)
+/*
+ * No new KSEG: first call: use current KSE, don't schedule an upcall
+ * All other situations, do alloate a new KSE and schedule an upcall on it.
+ */
/* struct kse_new_args {
struct kse_mailbox *mbx;
int new_grp_flag;
}; */
+int
+kse_new(struct thread *td, struct kse_new_args *uap)
{
+ struct kse *newkse;
+ struct proc *p;
+ struct kse_mailbox mbx;
+ int err;
- return (ENOSYS);
+ p = td->td_proc;
+ if ((err = copyin(uap->mbx, &mbx, sizeof(mbx))))
+ return (err);
+ PROC_LOCK(p);
+ /*
+ * If we have no KSE mode set, just set it, and skip KSE and KSEGRP
+ * creation. You cannot request a new group with the first one as
+ * you are effectively getting one. Instead, go directly to saving
+ * the upcall info.
+ */
+ if ((td->td_proc->p_flag & P_KSES) || (uap->new_grp_flag)) {
+
+ return (EINVAL); /* XXX */
+ /*
+ * If newgroup then create the new group.
+ * Check we have the resources for this.
+ */
+ /* Copy lots of fields from the current KSEGRP. */
+ /* Create the new KSE */
+ /* Copy lots of fields from the current KSE. */
+ } else {
+ /*
+ * We are switching to KSEs so just
+ * use the preallocated ones for this call.
+ * XXXKSE if we have to initialise any fields for KSE
+ * mode operation, do it here.
+ */
+ newkse = td->td_kse;
+ }
+ /*
+ * Fill out the KSE-mode specific fields of the new kse.
+ */
+ PROC_UNLOCK(p);
+ mtx_lock_spin(&sched_lock);
+ mi_switch(); /* Save current registers to PCB. */
+ mtx_unlock_spin(&sched_lock);
+ newkse->ke_upcall = mbx.kmbx_upcall;
+ newkse->ke_stackbase = mbx.kmbx_stackbase;
+ newkse->ke_stacksize = mbx.kmbx_stacksize;
+ newkse->ke_mailbox = uap->mbx;
+ cpu_save_upcall(td, newkse);
+ /* Note that we are the returning syscall */
+ td->td_retval[0] = 0;
+ td->td_retval[1] = 0;
+
+ if ((td->td_proc->p_flag & P_KSES) || (uap->new_grp_flag)) {
+ thread_schedule_upcall(td, newkse);
+ } else {
+ /*
+ * Don't set this until we are truely ready, because
+ * things will start acting differently. Return to the
+ * calling code for the first time. Assuming we set up
+ * the mailboxes right, all syscalls after this will be
+ * asynchronous.
+ */
+ td->td_proc->p_flag |= P_KSES;
+ }
+ return (0);
}
/*
@@ -554,7 +593,7 @@ fixjobc(p, pgrp, entering)
LIST_FOREACH(p, &p->p_children, p_sibling) {
if ((hispgrp = p->p_pgrp) != pgrp &&
hispgrp->pg_session == mysession &&
- p->p_stat != SZOMB) {
+ p->p_state != PRS_ZOMBIE) {
PGRP_LOCK(hispgrp);
if (entering)
hispgrp->pg_jobc++;
@@ -583,7 +622,7 @@ orphanpg(pg)
mtx_lock_spin(&sched_lock);
LIST_FOREACH(p, &pg->pg_members, p_pglist) {
- if (p->p_stat == SSTOP) {
+ if (P_SHOULDSTOP(p)) {
mtx_unlock_spin(&sched_lock);
LIST_FOREACH(p, &pg->pg_members, p_pglist) {
PROC_LOCK(p);
@@ -674,7 +713,9 @@ fill_kinfo_proc(p, kp)
kp->ki_sigcatch = p->p_procsig->ps_sigcatch;
}
mtx_lock_spin(&sched_lock);
- if (p->p_stat != SIDL && p->p_stat != SZOMB && p->p_vmspace != NULL) {
+ if (p->p_state != PRS_NEW &&
+ p->p_state != PRS_ZOMBIE &&
+ p->p_vmspace != NULL) {
struct vmspace *vm = p->p_vmspace;
kp->ki_size = vm->vm_map.size;
@@ -697,35 +738,65 @@ fill_kinfo_proc(p, kp)
p->p_stats->p_cru.ru_stime.tv_usec;
}
td = FIRST_THREAD_IN_PROC(p);
- if (td->td_wmesg != NULL)
- strncpy(kp->ki_wmesg, td->td_wmesg, sizeof(kp->ki_wmesg) - 1);
- if (p->p_stat == SMTX) {
- kp->ki_kiflag |= KI_MTXBLOCK;
- strncpy(kp->ki_mtxname, td->td_mtxname,
- sizeof(kp->ki_mtxname) - 1);
+ if (!(p->p_flag & P_KSES)) {
+ if (td->td_wmesg != NULL) {
+ strncpy(kp->ki_wmesg, td->td_wmesg,
+ sizeof(kp->ki_wmesg) - 1);
+ }
+ if (td->td_state == TDS_MTX) {
+ kp->ki_kiflag |= KI_MTXBLOCK;
+ strncpy(kp->ki_mtxname, td->td_mtxname,
+ sizeof(kp->ki_mtxname) - 1);
+ }
}
- kp->ki_stat = p->p_stat;
+
+ if (p->p_state == PRS_NORMAL) { /* XXXKSE very aproximate */
+ if ((td->td_state == TDS_RUNQ) ||
+ (td->td_state == TDS_RUNNING)) {
+ kp->ki_stat = SRUN;
+ } else if (td->td_state == TDS_SLP) {
+ kp->ki_stat = SSLEEP;
+ } else if (P_SHOULDSTOP(p)) {
+ kp->ki_stat = SSTOP;
+ } else if (td->td_state == TDS_MTX) {
+ kp->ki_stat = SMTX;
+ } else {
+ kp->ki_stat = SWAIT;
+ }
+ } else if (p->p_state == PRS_ZOMBIE) {
+ kp->ki_stat = SZOMB;
+ } else {
+ kp->ki_stat = SIDL;
+ }
+
kp->ki_sflag = p->p_sflag;
kp->ki_swtime = p->p_swtime;
kp->ki_pid = p->p_pid;
/* vvv XXXKSE */
- bintime2timeval(&p->p_runtime, &tv);
- kp->ki_runtime = tv.tv_sec * (u_int64_t)1000000 + tv.tv_usec;
- kp->ki_pctcpu = p->p_kse.ke_pctcpu;
- kp->ki_estcpu = td->td_ksegrp->kg_estcpu;
- kp->ki_slptime = td->td_ksegrp->kg_slptime;
- kp->ki_wchan = td->td_wchan;
- kp->ki_pri.pri_level = td->td_priority;
- kp->ki_pri.pri_user = td->td_ksegrp->kg_user_pri;
- kp->ki_pri.pri_class = td->td_ksegrp->kg_pri_class;
- kp->ki_pri.pri_native = td->td_base_pri;
- kp->ki_nice = td->td_ksegrp->kg_nice;
- kp->ki_rqindex = p->p_kse.ke_rqindex;
- kp->ki_oncpu = p->p_kse.ke_oncpu;
- kp->ki_lastcpu = td->td_lastcpu;
- kp->ki_tdflags = td->td_flags;
- kp->ki_pcb = td->td_pcb;
- kp->ki_kstack = (void *)td->td_kstack;
+ if (!(p->p_flag & P_KSES)) {
+ bintime2timeval(&p->p_runtime, &tv);
+ kp->ki_runtime = tv.tv_sec * (u_int64_t)1000000 + tv.tv_usec;
+ kp->ki_pctcpu = p->p_kse.ke_pctcpu;
+ kp->ki_estcpu = p->p_ksegrp.kg_estcpu;
+ kp->ki_slptime = p->p_ksegrp.kg_slptime;
+ kp->ki_wchan = td->td_wchan;
+ kp->ki_pri.pri_level = td->td_priority;
+ kp->ki_pri.pri_user = p->p_ksegrp.kg_user_pri;
+ kp->ki_pri.pri_class = p->p_ksegrp.kg_pri_class;
+ kp->ki_pri.pri_native = td->td_base_pri;
+ kp->ki_nice = p->p_ksegrp.kg_nice;
+ kp->ki_rqindex = p->p_kse.ke_rqindex;
+ kp->ki_oncpu = p->p_kse.ke_oncpu;
+ kp->ki_lastcpu = td->td_lastcpu;
+ kp->ki_tdflags = td->td_flags;
+ kp->ki_pcb = td->td_pcb;
+ kp->ki_kstack = (void *)td->td_kstack;
+ } else {
+ kp->ki_oncpu = -1;
+ kp->ki_lastcpu = -1;
+ kp->ki_tdflags = -1;
+ /* All the reast are 0 */
+ }
/* ^^^ XXXKSE */
mtx_unlock_spin(&sched_lock);
sp = NULL;
@@ -878,7 +949,7 @@ sysctl_kern_proc(SYSCTL_HANDLER_ARGS)
/*
* Skip embryonic processes.
*/
- if (p->p_stat == SIDL) {
+ if (p->p_state == PRS_NEW) {
PROC_UNLOCK(p);
continue;
}
diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c
index d2cb69d4fe26..0803cff61614 100644
--- a/sys/kern/kern_shutdown.c
+++ b/sys/kern/kern_shutdown.c
@@ -281,7 +281,6 @@ boot(int howto)
DROP_GIANT();
for (subiter = 0; subiter < 50 * iter; subiter++) {
mtx_lock_spin(&sched_lock);
- setrunqueue(curthread);
curthread->td_proc->p_stats->p_ru.ru_nvcsw++;
mi_switch(); /* Allow interrupt threads to run */
mtx_unlock_spin(&sched_lock);
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index a561a1967288..e8ded210c749 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -84,7 +84,7 @@ static int killpg1(struct thread *td, int sig, int pgid, int all);
static int sig_ffs(sigset_t *set);
static int sigprop(int sig);
static void stop(struct proc *);
-
+static void tdsignal(struct thread *td, int sig, sig_t action);
static int filt_sigattach(struct knote *kn);
static void filt_sigdetach(struct knote *kn);
static int filt_signal(struct knote *kn, long hint);
@@ -168,16 +168,18 @@ static int sigproptbl[NSIG] = {
* Determine signal that should be delivered to process p, the current
* process, 0 if none. If there is a pending stop signal with default
* action, the process stops in issignal().
+ * XXXKSE the check for a pending stop is not done under KSE
*
* MP SAFE.
*/
int
-cursig(struct proc *p)
+cursig(struct thread *td)
{
+ struct proc *p = td->td_proc;
PROC_LOCK_ASSERT(p, MA_OWNED);
mtx_assert(&sched_lock, MA_NOTOWNED);
- return (SIGPENDING(p) ? issignal(p) : 0);
+ return (SIGPENDING(p) ? issignal(td) : 0);
}
/*
@@ -1042,7 +1044,7 @@ killpg1(td, sig, pgid, all)
PROC_UNLOCK(p);
continue;
}
- if (p->p_stat == SZOMB) {
+ if (p->p_state == PRS_ZOMBIE) {
PROC_UNLOCK(p);
continue;
}
@@ -1243,12 +1245,10 @@ psignal(p, sig)
register struct proc *p;
register int sig;
{
- register int prop;
register sig_t action;
struct thread *td;
-#ifdef SMP
- struct ksegrp *kg;
-#endif
+ register int prop;
+
KASSERT(_SIG_VALID(sig),
("psignal(): invalid signal %d\n", sig));
@@ -1257,7 +1257,6 @@ psignal(p, sig)
KNOTE(&p->p_klist, NOTE_SIGNAL | sig);
prop = sigprop(sig);
-
/*
* If proc is traced, always give parent a chance;
* if signal event is tracked by procfs, give *that*
@@ -1283,29 +1282,6 @@ psignal(p, sig)
action = SIG_DFL;
}
- /*
- * bring the priority of a process up if we want it to get
- * killed in this lifetime.
- * XXXKSE think if a better way to do this.
- *
- * What we need to do is see if there is a thread that will
- * be able to accept the signal. e.g.
- * FOREACH_THREAD_IN_PROC() {
- * if runnable, we're done
- * else pick one at random.
- * }
- */
- /* XXXKSE
- * For now there is one thread per proc.
- * Effectively select one sucker thread..
- */
- td = FIRST_THREAD_IN_PROC(p);
- mtx_lock_spin(&sched_lock);
- if ((p->p_ksegrp.kg_nice > NZERO) && (action == SIG_DFL) &&
- (prop & SA_KILL) && ((p->p_flag & P_TRACED) == 0))
- p->p_ksegrp.kg_nice = NZERO; /* XXXKSE */
- mtx_unlock_spin(&sched_lock);
-
if (prop & SA_CONT)
SIG_STOPSIGMASK(p->p_siglist);
@@ -1316,48 +1292,125 @@ psignal(p, sig)
* is default; don't stop the process below if sleeping,
* and don't clear any pending SIGCONT.
*/
- if (prop & SA_TTYSTOP && p->p_pgrp->pg_jobc == 0 &&
- action == SIG_DFL)
+ if ((prop & SA_TTYSTOP) &&
+ (p->p_pgrp->pg_jobc == 0) &&
+ (action == SIG_DFL))
return;
SIG_CONTSIGMASK(p->p_siglist);
}
SIGADDSET(p->p_siglist, sig);
mtx_lock_spin(&sched_lock);
signotify(p);
+ mtx_unlock_spin(&sched_lock);
/*
- * Defer further processing for signals which are held,
- * except that stopped processes must be continued by SIGCONT.
+ * Some signals have a process-wide effect and a per-thread
+ * component. Most processing occurs when the process next
+ * tries to cross the user boundary, however there are some
+ * times when processing needs to be done immediatly, such as
+ * waking up threads so that they can cross the user boundary.
+ * We try do the per-process part here.
*/
- if (action == SIG_HOLD && (!(prop & SA_CONT) || p->p_stat != SSTOP)) {
- mtx_unlock_spin(&sched_lock);
- return;
- }
-
- switch (p->p_stat) {
-
- case SSLEEP:
+ if (P_SHOULDSTOP(p)) {
/*
- * If process is sleeping uninterruptibly
- * we can't interrupt the sleep... the signal will
- * be noticed when the process returns through
- * trap() or syscall().
+ * The process is in stopped mode. All the threads should be
+ * either winding down or already on the suspended queue.
*/
- if ((td->td_flags & TDF_SINTR) == 0)
+ if (p->p_flag & P_TRACED) {
+ /*
+ * The traced process is already stopped,
+ * so no further action is necessary.
+ * No signal can restart us.
+ */
goto out;
+ }
+
+ if (sig == SIGKILL) {
+ /*
+ * SIGKILL sets process running.
+ * It will die elsewhere.
+ * All threads must be restarted.
+ */
+ p->p_flag &= ~P_STOPPED;
+ goto runfast;
+ }
+
+ if (prop & SA_CONT) {
+ /*
+ * If SIGCONT is default (or ignored), we continue the
+ * process but don't leave the signal in p_siglist as
+ * it has no further action. If SIGCONT is held, we
+ * continue the process and leave the signal in
+ * p_siglist. If the process catches SIGCONT, let it
+ * handle the signal itself. If it isn't waiting on
+ * an event, it goes back to run state.
+ * Otherwise, process goes back to sleep state.
+ */
+ p->p_flag &= ~P_STOPPED_SGNL;
+ if (action == SIG_DFL) {
+ SIGDELSET(p->p_siglist, sig);
+ } else if (action == SIG_CATCH) {
+ /*
+ * The process wants to catch it so it needs
+ * to run at least one thread, but which one?
+ * It would seem that the answer would be to
+ * run an upcall in the next KSE to run, and
+ * deliver the signal that way. In a NON KSE
+ * process, we need to make sure that the
+ * single thread is runnable asap.
+ * XXXKSE for now however, make them all run.
+ */
+ goto runfast;
+ }
+ /*
+ * The signal is not ignored or caught.
+ */
+ mtx_lock_spin(&sched_lock);
+ thread_unsuspend(p); /* Checks if should do it. */
+ mtx_unlock_spin(&sched_lock);
+ goto out;
+ }
+
+ if (prop & SA_STOP) {
+ /*
+ * Already stopped, don't need to stop again
+ * (If we did the shell could get confused).
+ */
+ SIGDELSET(p->p_siglist, sig);
+ goto out;
+ }
+
/*
- * Process is sleeping and traced... make it runnable
- * so it can discover the signal in issignal() and stop
- * for the parent.
+ * All other kinds of signals:
+ * If a thread is sleeping interruptibly, simulate a
+ * wakeup so that when it is continued it will be made
+ * runnable and can look at the signal. However, don't make
+ * the process runnable, leave it stopped.
+ * It may run a bit until it hits a thread_suspend_check().
+ *
+ * XXXKSE I don't understand this at all.
*/
- if (p->p_flag & P_TRACED)
- goto run;
+ mtx_lock_spin(&sched_lock);
+ FOREACH_THREAD_IN_PROC(p, td) {
+ if (td->td_wchan && (td->td_flags & TDF_SINTR)) {
+ if (td->td_flags & TDF_CVWAITQ)
+ cv_waitq_remove(td);
+ else
+ unsleep(td);
+ setrunnable(td);
+ }
+ }
+ mtx_unlock_spin(&sched_lock);
+ goto out;
/*
- * If SIGCONT is default (or ignored) and process is
- * asleep, we are finished; the process should not
- * be awakened.
+ * XXXKSE What about threads that are waiting on mutexes?
+ * Shouldn't they abort too?
*/
- if ((prop & SA_CONT) && action == SIG_DFL) {
+ } else if (p->p_state == PRS_NORMAL) {
+ if (prop & SA_CONT) {
+ /*
+ * Already active, don't need to start again.
+ */
SIGDELSET(p->p_siglist, sig);
goto out;
}
@@ -1370,133 +1423,128 @@ psignal(p, sig)
if (prop & SA_STOP) {
if (action != SIG_DFL)
goto runfast;
+
/*
* If a child holding parent blocked,
* stopping could cause deadlock.
*/
if (p->p_flag & P_PPWAIT)
goto out;
- mtx_unlock_spin(&sched_lock);
SIGDELSET(p->p_siglist, sig);
p->p_xstat = sig;
PROC_LOCK(p->p_pptr);
- if ((p->p_pptr->p_procsig->ps_flag & PS_NOCLDSTOP) == 0)
+ if (!(p->p_pptr->p_procsig->ps_flag & PS_NOCLDSTOP))
psignal(p->p_pptr, SIGCHLD);
PROC_UNLOCK(p->p_pptr);
mtx_lock_spin(&sched_lock);
stop(p);
+ mtx_unlock_spin(&sched_lock);
goto out;
} else
goto runfast;
/* NOTREACHED */
+ } else {
+ /* Not in "NORMAL" state. discard the signal. */
+ SIGDELSET(p->p_siglist, sig);
+ goto out;
+ }
- case SSTOP:
- /*
- * If traced process is already stopped,
- * then no further action is necessary.
- */
- if (p->p_flag & P_TRACED)
- goto out;
+ /*
+ * The process is not stopped so we need to apply the signal to all the
+ * running threads.
+ */
- /*
- * Kill signal always sets processes running.
- */
- if (sig == SIGKILL)
- goto runfast;
+runfast:
+ FOREACH_THREAD_IN_PROC(p, td)
+ tdsignal(td, sig, action);
+ mtx_lock_spin(&sched_lock);
+ thread_unsuspend(p);
+ mtx_unlock_spin(&sched_lock);
+out:
+ /* If we jump here, sched_lock should not be owned. */
+ mtx_assert(&sched_lock, MA_NOTOWNED);
+}
- if (prop & SA_CONT) {
- /*
- * If SIGCONT is default (or ignored), we continue the
- * process but don't leave the signal in p_siglist, as
- * it has no further action. If SIGCONT is held, we
- * continue the process and leave the signal in
- * p_siglist. If the process catches SIGCONT, let it
- * handle the signal itself. If it isn't waiting on
- * an event, then it goes back to run state.
- * Otherwise, process goes back to sleep state.
- */
- if (action == SIG_DFL)
- SIGDELSET(p->p_siglist, sig);
- if (action == SIG_CATCH)
- goto runfast;
- /*
- * XXXKSE
- * do this for each thread.
- */
- if (p->p_flag & P_KSES) {
- mtx_assert(&sched_lock,
- MA_OWNED | MA_NOTRECURSED);
- FOREACH_THREAD_IN_PROC(p, td) {
- if (td->td_wchan == NULL) {
- setrunnable(td); /* XXXKSE */
- } else {
- /* mark it as sleeping */
- }
- }
- } else {
- p->p_flag |= P_CONTINUED;
- wakeup(p->p_pptr);
- if (td->td_wchan == NULL)
- goto run;
- p->p_stat = SSLEEP;
- }
- goto out;
+/*
+ * The force of a signal has been directed against a single
+ * thread. We need to see what we can do about knocking it
+ * out of any sleep it may be in etc.
+ */
+static void
+tdsignal(struct thread *td, int sig, sig_t action)
+{
+ struct proc *p = td->td_proc;
+ register int prop;
+
+ prop = sigprop(sig);
+
+ /*
+ * Bring the priority of a process up if we want it to get
+ * killed in this lifetime.
+ * XXXKSE we should shift the priority to the thread.
+ */
+ mtx_lock_spin(&sched_lock);
+ if ((action == SIG_DFL) && (prop & SA_KILL)) {
+ if (td->td_priority > PUSER) {
+ td->td_priority = PUSER;
}
+ }
+ mtx_unlock_spin(&sched_lock);
- if (prop & SA_STOP) {
- /*
- * Already stopped, don't need to stop again.
- * (If we did the shell could get confused.)
- */
- SIGDELSET(p->p_siglist, sig);
+ /*
+ * Defer further processing for signals which are held,
+ * except that stopped processes must be continued by SIGCONT.
+ */
+ if (action == SIG_HOLD) {
+ goto out;
+ }
+ mtx_lock_spin(&sched_lock);
+ if (td->td_state == TDS_SLP) {
+ /*
+ * If thread is sleeping uninterruptibly
+ * we can't interrupt the sleep... the signal will
+ * be noticed when the process returns through
+ * trap() or syscall().
+ */
+ if ((td->td_flags & TDF_SINTR) == 0) {
+ mtx_unlock_spin(&sched_lock);
goto out;
}
-
/*
- * If process is sleeping interruptibly, then simulate a
- * wakeup so that when it is continued, it will be made
- * runnable and can look at the signal. But don't make
- * the process runnable, leave it stopped.
- * XXXKSE should we wake ALL blocked threads?
+ * Process is sleeping and traced. Make it runnable
+ * so it can discover the signal in issignal() and stop
+ * for its parent.
*/
- if (p->p_flag & P_KSES) {
- FOREACH_THREAD_IN_PROC(p, td) {
- if (td->td_wchan && (td->td_flags & TDF_SINTR)){
- if (td->td_flags & TDF_CVWAITQ)
- cv_waitq_remove(td);
- else
- unsleep(td); /* XXXKSE */
- }
- }
- } else {
- if (td->td_wchan && td->td_flags & TDF_SINTR) {
- if (td->td_flags & TDF_CVWAITQ)
- cv_waitq_remove(td);
- else
- unsleep(td); /* XXXKSE */
- }
+ if (p->p_flag & P_TRACED) {
+ p->p_flag &= ~P_STOPPED_TRACE;
+ goto run;
}
- goto out;
+ mtx_unlock_spin(&sched_lock);
+ /*
+ * If SIGCONT is default (or ignored) and process is
+ * asleep, we are finished; the process should not
+ * be awakened.
+ */
+ if ((prop & SA_CONT) && action == SIG_DFL) {
+ SIGDELSET(p->p_siglist, sig);
+ goto out;
+ }
+ goto runfast;
+ /* NOTREACHED */
- default:
+ } else {
/*
- * SRUN, SIDL, SZOMB do nothing with the signal,
+ * Other states do nothing with the signal immediatly,
* other than kicking ourselves if we are running.
* It will either never be noticed, or noticed very soon.
*/
- if (p->p_stat == SRUN) {
+ mtx_unlock_spin(&sched_lock);
+ if (td->td_state == TDS_RUNQ ||
+ td->td_state == TDS_RUNNING) {
+ signotify(td->td_proc);
#ifdef SMP
- struct kse *ke;
- struct thread *td = curthread;
-/* we should only deliver to one thread.. but which one? */
- FOREACH_KSEGRP_IN_PROC(p, kg) {
- FOREACH_KSE_IN_GROUP(kg, ke) {
- if (ke->ke_thread == td) {
- continue;
- }
- forward_signal(ke->ke_thread);
- }
- }
+ if (td->td_state == TDS_RUNNING && td != curthread)
+ forward_signal(td);
#endif
}
goto out;
@@ -1506,21 +1554,17 @@ psignal(p, sig)
runfast:
/*
* Raise priority to at least PUSER.
- * XXXKSE Should we make them all run fast?
- * Maybe just one would be enough?
*/
-
- if (FIRST_THREAD_IN_PROC(p)->td_priority > PUSER) {
- FIRST_THREAD_IN_PROC(p)->td_priority = PUSER;
+ mtx_lock_spin(&sched_lock);
+ if (td->td_priority > PUSER) {
+ td->td_priority = PUSER;
}
run:
- /* If we jump here, sched_lock has to be owned. */
mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED);
- setrunnable(td); /* XXXKSE */
-out:
+ setrunnable(td);
mtx_unlock_spin(&sched_lock);
- /* Once we get here, sched_lock should not be owned. */
+out:
mtx_assert(&sched_lock, MA_NOTOWNED);
}
@@ -1533,16 +1577,18 @@ out:
* by checking the pending signal masks in cursig.) The normal call
* sequence is
*
- * while (sig = cursig(curproc))
+ * while (sig = cursig(curthread))
* postsig(sig);
*/
int
-issignal(p)
- register struct proc *p;
+issignal(td)
+ struct thread *td;
{
+ struct proc *p;
sigset_t mask;
register int sig, prop;
+ p = td->td_proc;
PROC_LOCK_ASSERT(p, MA_OWNED);
for (;;) {
int traced = (p->p_flag & P_TRACED) || (p->p_stops & S_SIG);
@@ -1576,6 +1622,7 @@ issignal(p)
PROC_UNLOCK(p->p_pptr);
mtx_lock_spin(&sched_lock);
stop(p);
+ td->td_state = TDS_UNQUEUED;
PROC_UNLOCK(p);
DROP_GIANT();
p->p_stats->p_ru.ru_nivcsw++;
@@ -1633,6 +1680,7 @@ issignal(p)
#endif
break; /* == ignore */
}
+#if 0
/*
* If there is a pending stop signal to process
* with default action, stop here,
@@ -1647,8 +1695,10 @@ issignal(p)
break; /* == ignore */
p->p_xstat = sig;
PROC_LOCK(p->p_pptr);
- if ((p->p_pptr->p_procsig->ps_flag & PS_NOCLDSTOP) == 0)
+ if ((p->p_pptr->p_procsig->ps_flag &
+ PS_NOCLDSTOP) == 0) {
psignal(p->p_pptr, SIGCHLD);
+ }
PROC_UNLOCK(p->p_pptr);
mtx_lock_spin(&sched_lock);
stop(p);
@@ -1660,7 +1710,9 @@ issignal(p)
PICKUP_GIANT();
PROC_LOCK(p);
break;
- } else if (prop & SA_IGNORE) {
+ } else
+#endif
+ if (prop & SA_IGNORE) {
/*
* Except for SIGCONT, shouldn't get here.
* Default action is to ignore; drop it.
@@ -1706,7 +1758,7 @@ stop(p)
PROC_LOCK_ASSERT(p, MA_OWNED);
mtx_assert(&sched_lock, MA_OWNED);
- p->p_stat = SSTOP;
+ p->p_flag |= P_STOPPED_SGNL;
p->p_flag &= ~P_WAITED;
wakeup(p->p_pptr);
}
diff --git a/sys/kern/kern_subr.c b/sys/kern/kern_subr.c
index 5e32eeeb817e..c63091c008f8 100644
--- a/sys/kern/kern_subr.c
+++ b/sys/kern/kern_subr.c
@@ -538,7 +538,6 @@ uio_yield()
mtx_lock_spin(&sched_lock);
DROP_GIANT();
td->td_priority = td->td_ksegrp->kg_user_pri; /* XXXKSE */
- setrunqueue(td);
td->td_proc->p_stats->p_ru.ru_nivcsw++;
mi_switch();
mtx_unlock_spin(&sched_lock);
diff --git a/sys/kern/kern_switch.c b/sys/kern/kern_switch.c
index 2b531c0dae3d..40d3ef87bd33 100644
--- a/sys/kern/kern_switch.c
+++ b/sys/kern/kern_switch.c
@@ -26,6 +26,69 @@
* $FreeBSD$
*/
+/***
+
+Here is the logic..
+
+If there are N processors, then there are at most N KSEs (kernel
+schedulable entities) working to process threads that belong to a
+KSEGOUP (kg). If there are X of these KSEs actually running at the
+moment in question, then there are at most M (N-X) of these KSEs on
+the run queue, as running KSEs are not on the queue.
+
+Runnable threads are queued off the KSEGROUP in priority order.
+If there are M or more threads runnable, the top M threads
+(by priority) are 'preassigned' to the M KSEs not running. The KSEs take
+their priority from those threads and are put on the run queue.
+
+The last thread that had a priority high enough to have a KSE associated
+with it, AND IS ON THE RUN QUEUE is pointed to by
+kg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs
+assigned as all the available KSEs are activly running, or because there
+are no threads queued, that pointer is NULL.
+
+When a KSE is removed from the run queue to become runnable, we know
+it was associated with the highest priority thread in the queue (at the head
+of the queue). If it is also the last assigned we know M was 1 and must
+now be 0. Since the thread is no longer queued that pointer must be
+removed from it. Since we know there were no more KSEs available,
+(M was 1 and is now 0) and since we are not FREEING our KSE
+but using it, we know there are STILL no more KSEs available, we can prove
+that the next thread in the ksegrp list will not have a KSE to assign to
+it, so we can show that the pointer must be made 'invalid' (NULL).
+
+The pointer exists so that when a new thread is made runnable, it can
+have its priority compared with the last assigned thread to see if
+it should 'steal' its KSE or not.. i.e. is it 'earlier'
+on the list than that thread or later.. If it's earlier, then the KSE is
+removed from the last assigned (which is now not assigned a KSE)
+and reassigned to the new thread, which is placed earlier in the list.
+The pointer is then backed up to the previous thread (which may or may not
+be the new thread).
+
+When a thread sleeps or is removed, the KSE becomes available and if there
+are queued threads that are not assigned KSEs, the highest priority one of
+them is assigned the KSE, which is then placed back on the run queue at
+the approipriate place, and the kg->kg_last_assigned pointer is adjusted down
+to point to it.
+
+The following diagram shows 2 KSEs and 3 threads from a single process.
+
+ RUNQ: --->KSE---KSE--... (KSEs queued at priorities from threads)
+ \ \____
+ \ \
+ KSEGROUP---thread--thread--thread (queued in priority order)
+ \ /
+ \_______________/
+ (last_assigned)
+
+The result of this scheme is that the M available KSEs are always
+queued at the priorities they have inherrited from the M highest priority
+threads for that KSEGROUP. If this situation changes, the KSEs are
+reassigned to keep this true.
+
+*/
+
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
@@ -44,34 +107,442 @@ CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
static struct runq runq;
SYSINIT(runq, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, runq_init, &runq)
+static void runq_readjust(struct runq *rq, struct kse *ke);
+/************************************************************************
+ * Functions that manipulate runnability from a thread perspective. *
+ ************************************************************************/
+
/*
- * Wrappers which implement old interface; act on global run queue.
+ * Select the KSE that will be run next. From that find the thread, and x
+ * remove it from the KSEGRP's run queue. If there is thread clustering,
+ * this will be what does it.
*/
-
struct thread *
choosethread(void)
{
- return (runq_choose(&runq)->ke_thread);
+ struct kse *ke;
+ struct thread *td;
+ struct ksegrp *kg;
+
+ if ((ke = runq_choose(&runq))) {
+ td = ke->ke_thread;
+ KASSERT((td->td_kse == ke), ("kse/thread mismatch"));
+ kg = ke->ke_ksegrp;
+ if (td->td_flags & TDF_UNBOUND) {
+ TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
+ if (kg->kg_last_assigned == td)
+ if (TAILQ_PREV(td, threadqueue, td_runq)
+ != NULL)
+ printf("Yo MAMA!\n");
+ kg->kg_last_assigned = TAILQ_PREV(td,
+ threadqueue, td_runq);
+ /*
+ * If we have started running an upcall,
+ * Then TDF_UNBOUND WAS set because the thread was
+ * created without a KSE. Now that we have one,
+ * and it is our time to run, we make sure
+ * that BOUND semantics apply for the rest of
+ * the journey to userland, and into the UTS.
+ */
+#ifdef NOTYET
+ if (td->td_flags & TDF_UPCALLING)
+ tdf->td_flags &= ~TDF_UNBOUND;
+#endif
+ }
+ kg->kg_runnable--;
+ CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d",
+ td, td->td_priority);
+ } else {
+ /* Pretend the idle thread was on the run queue. */
+ td = PCPU_GET(idlethread);
+ /* Simulate that it was on the run queue */
+ td->td_state = TDS_RUNQ;
+ td->td_kse->ke_state = KES_UNQUEUED;
+ CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
+ }
+ thread_sanity_check(td);
+ return (td);
+}
+
+/*
+ * Given a KSE (now surplus), either assign a new runable thread to it
+ * (and put it in the run queue) or put it in the ksegrp's idle KSE list.
+ * Assumes the kse is not linked to any threads any more. (has been cleaned).
+ */
+void
+kse_reassign(struct kse *ke)
+{
+ struct ksegrp *kg;
+ struct thread *td;
+
+ kg = ke->ke_ksegrp;
+
+KASSERT((ke->ke_state != KES_ONRUNQ), ("kse_reassigning non-free kse"));
+ /*
+ * Find the first unassigned thread
+ * If there is a 'last assigned' then see what's next.
+ * otherwise look at what is first.
+ */
+ if ((td = kg->kg_last_assigned)) {
+ td = TAILQ_NEXT(td, td_runq);
+ } else {
+ td = TAILQ_FIRST(&kg->kg_runq);
+ }
+
+ /*
+ * If we found one assign it the kse, otherwise idle the kse.
+ */
+ if (td) {
+ thread_sanity_check(td);
+ kg->kg_last_assigned = td;
+ td->td_kse = ke;
+ ke->ke_thread = td;
+ runq_add(&runq, ke);
+ CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p", ke, td);
+ } else {
+ KASSERT((ke->ke_state != KES_IDLE), ("kse already idle"));
+KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!"));
+ ke->ke_state = KES_IDLE;
+ ke->ke_thread = NULL;
+ TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist);
+ kg->kg_idle_kses++;
+ CTR1(KTR_RUNQ, "kse_reassign: ke%p idled", ke);
+KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self2!"));
+ }
}
int
-procrunnable(void)
+kserunnable(void)
{
return runq_check(&runq);
}
+/*
+ * Remove a thread from its KSEGRP's run queue.
+ * This in turn may remove it from a KSE if it was already assigned
+ * to one, possibly causing a new thread to be assigned to the KSE
+ * and the KSE getting a new priority (unless it's a BOUND thread/KSE pair).
+ */
void
remrunqueue(struct thread *td)
{
- runq_remove(&runq, td->td_kse);
+ struct thread *td2, *td3;
+ struct ksegrp *kg;
+ struct kse *ke;
+
+ mtx_assert(&sched_lock, MA_OWNED);
+ thread_sanity_check(td);
+ KASSERT ((td->td_state == TDS_RUNQ),
+ ("remrunqueue: Bad state on run queue"));
+ kg = td->td_ksegrp;
+ ke = td->td_kse;
+ /*
+ * If it's a bound thread/KSE pair, take the shortcut. All non-KSE
+ * threads are BOUND.
+ */
+ CTR1(KTR_RUNQ, "remrunqueue: td%p", td);
+ td->td_state = TDS_UNQUEUED;
+ kg->kg_runnable--;
+ if ((td->td_flags & TDF_UNBOUND) == 0) {
+ /* Bring its kse with it, leave the thread attached */
+ runq_remove(&runq, ke);
+ ke->ke_state = KES_UNQUEUED;
+ return;
+ }
+ if (ke) {
+ /*
+ * This thread has been assigned to a KSE.
+ * We need to dissociate it and try assign the
+ * KSE to the next available thread. Then, we should
+ * see if we need to move the KSE in the run queues.
+ */
+ td2 = kg->kg_last_assigned;
+ KASSERT((td2 != NULL), ("last assigned has wrong value "));
+ td->td_kse = NULL;
+ if ((td3 = TAILQ_NEXT(td2, td_runq))) {
+ KASSERT(td3 != td, ("td3 somehow matched td"));
+ /*
+ * Give the next unassigned thread to the KSE
+ * so the number of runnable KSEs remains
+ * constant.
+ */
+ td3->td_kse = ke;
+ ke->ke_thread = td3;
+ kg->kg_last_assigned = td3;
+ runq_readjust(&runq, ke);
+ } else {
+ /*
+ * There is no unassigned thread.
+ * If we were the last assigned one,
+ * adjust the last assigned pointer back
+ * one, which may result in NULL.
+ */
+ if (td == td2) {
+ kg->kg_last_assigned =
+ TAILQ_PREV(td, threadqueue, td_runq);
+ }
+ runq_remove(&runq, ke);
+KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!"));
+ KASSERT((ke->ke_state != KES_IDLE),
+ ("kse already idle"));
+ ke->ke_state = KES_IDLE;
+ ke->ke_thread = NULL;
+KASSERT((TAILQ_FIRST(&kg->kg_iq) != ke), ("really bad screwup"));
+ TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist);
+ kg->kg_idle_kses++;
+KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self2!"));
+ }
+ }
+ TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
+ thread_sanity_check(td);
}
+#if 1 /* use the first version */
+
void
setrunqueue(struct thread *td)
{
- runq_add(&runq, td->td_kse);
+ struct kse *ke;
+ struct ksegrp *kg;
+ struct thread *td2;
+ struct thread *tda;
+
+ CTR1(KTR_RUNQ, "setrunqueue: td%p", td);
+ mtx_assert(&sched_lock, MA_OWNED);
+ thread_sanity_check(td);
+ KASSERT((td->td_state != TDS_RUNQ), ("setrunqueue: bad thread state"));
+ td->td_state = TDS_RUNQ;
+ kg = td->td_ksegrp;
+ kg->kg_runnable++;
+ if ((td->td_flags & TDF_UNBOUND) == 0) {
+ KASSERT((td->td_kse != NULL),
+ ("queueing BAD thread to run queue"));
+ /*
+ * Common path optimisation: Only one of everything
+ * and the KSE is always already attached.
+ * Totally ignore the ksegrp run queue.
+ */
+ runq_add(&runq, td->td_kse);
+ return;
+ }
+ /*
+ * Ok, so we are threading with this thread.
+ * We don't have a KSE, see if we can get one..
+ */
+ tda = kg->kg_last_assigned;
+ if ((ke = td->td_kse) == NULL) {
+ /*
+ * We will need a KSE, see if there is one..
+ * First look for a free one, before getting desperate.
+ * If we can't get one, our priority is not high enough..
+ * that's ok..
+ */
+ if (kg->kg_idle_kses) {
+ /*
+ * There is a free one so it's ours for the asking..
+ */
+ ke = TAILQ_FIRST(&kg->kg_iq);
+KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self3!"));
+ TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
+ ke->ke_state = KES_UNQUEUED;
+ kg->kg_idle_kses--;
+KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self4!"));
+ } else if (tda && (tda->td_priority > td->td_priority)) {
+ /*
+ * None free, but there is one we can commandeer.
+ */
+ ke = tda->td_kse;
+ tda->td_kse = NULL;
+ ke->ke_thread = NULL;
+ tda = kg->kg_last_assigned =
+ TAILQ_PREV(tda, threadqueue, td_runq);
+ runq_remove(&runq, ke);
+KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self5!"));
+ }
+ } else {
+ KASSERT(ke->ke_thread == td, ("KSE/thread mismatch"));
+ KASSERT(ke->ke_state != KES_IDLE, ("KSE unexpectedly idle"));
+ ke->ke_thread = NULL;
+ td->td_kse = NULL;
+ }
+
+ /*
+ * Add the thread to the ksegrp's run queue at
+ * the appropriate place.
+ */
+ TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
+ if (td2->td_priority > td->td_priority) {
+ TAILQ_INSERT_BEFORE(td2, td, td_runq);
+ break;
+ }
+ }
+ if (td2 == NULL) {
+ /* We ran off the end of the TAILQ or it was empty. */
+ TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
+ }
+
+ /*
+ * If we have a ke to use, then put it on the run queue and
+ * If needed, readjust the last_assigned pointer.
+ */
+ if (ke) {
+ if (tda == NULL) {
+ /*
+ * No pre-existing last assigned so whoever is first
+ * gets the KSE we borught in.. (may be us)
+ */
+ td2 = TAILQ_FIRST(&kg->kg_runq);
+ KASSERT((td2->td_kse == NULL),
+ ("unexpected ke present"));
+ td2->td_kse = ke;
+ ke->ke_thread = td2;
+ kg->kg_last_assigned = td2;
+ } else if (tda->td_priority > td->td_priority) {
+ /*
+ * It's ours, grab it, but last_assigned is past us
+ * so don't change it.
+ */
+ td->td_kse = ke;
+ ke->ke_thread = td;
+ } else {
+ /*
+ * We are past last_assigned, so
+ * put the new kse on whatever is next,
+ * which may or may not be us.
+ */
+ td2 = TAILQ_NEXT(tda, td_runq);
+ kg->kg_last_assigned = td2;
+ td2->td_kse = ke;
+ ke->ke_thread = td2;
+ }
+ runq_add(&runq, ke);
+ }
+ thread_sanity_check(td);
}
+#else
+
+void
+setrunqueue(struct thread *td)
+{
+ struct kse *ke;
+ struct ksegrp *kg;
+ struct thread *td2;
+
+ CTR1(KTR_RUNQ, "setrunqueue: td%p", td);
+ KASSERT((td->td_state != TDS_RUNQ), ("setrunqueue: bad thread state"));
+ td->td_state = TDS_RUNQ;
+ kg = td->td_ksegrp;
+ kg->kg_runnable++;
+ if ((td->td_flags & TDF_UNBOUND) == 0) {
+ /*
+ * Common path optimisation: Only one of everything
+ * and the KSE is always already attached.
+ * Totally ignore the ksegrp run queue.
+ */
+ runq_add(&runq, td->td_kse);
+ return;
+ }
+ /*
+ * First add the thread to the ksegrp's run queue at
+ * the appropriate place.
+ */
+ TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
+ if (td2->td_priority > td->td_priority) {
+ TAILQ_INSERT_BEFORE(td2, td, td_runq);
+ break;
+ }
+ }
+ if (td2 == NULL) {
+ /* We ran off the end of the TAILQ or it was empty. */
+ TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
+ }
+
+ /*
+ * The following could be achieved by simply doing:
+ * td->td_kse = NULL; kse_reassign(ke);
+ * but I felt that I'd try do it inline here.
+ * All this work may not be worth it.
+ */
+ if ((ke = td->td_kse)) { /* XXXKSE */
+ /*
+ * We have a KSE already. See whether we can keep it
+ * or if we need to give it to someone else.
+ * Either way it will need to be inserted into
+ * the runq. kse_reassign() will do this as will runq_add().
+ */
+ if ((kg->kg_last_assigned) &&
+ (kg->kg_last_assigned->td_priority > td->td_priority)) {
+ /*
+ * We can definitly keep the KSE
+ * as the "last assignead thread" has
+ * less priority than we do.
+ * The "last assigned" pointer stays the same.
+ */
+ runq_add(&runq, ke);
+ return;
+
+ }
+ /*
+ * Give it to the correct thread,
+ * which may be (often is) us, but may not be.
+ */
+ td->td_kse = NULL;
+ kse_reassign(ke);
+ return;
+ }
+ /*
+ * There are two cases where KSE adjustment is needed.
+ * Usurpation of an already assigned KSE, and assignment
+ * of a previously IDLE KSE.
+ */
+ if (kg->kg_idle_kses) {
+ /*
+ * If there are unassigned KSEs then we definitly
+ * will be assigned one from the idle KSE list.
+ * If we are the last, we should get the "last
+ * assigned" pointer set to us as well.
+ */
+ ke = TAILQ_FIRST(&kg->kg_iq);
+KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!"));
+ TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
+ ke->ke_state = KES_UNQUEUED;
+ kg->kg_idle_kses--;
+KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!"));
+ ke->ke_thread = td;
+ td->td_kse = ke;
+ runq_add(&runq, ke);
+KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!"));
+ if (TAILQ_NEXT(td, td_runq) == NULL) {
+ kg->kg_last_assigned = td;
+ }
+ } else if (kg->kg_last_assigned &&
+ (kg->kg_last_assigned->td_priority > td->td_priority)) {
+ /*
+ * If there were none last-assigned, all KSEs
+ * are actually out running as we speak.
+ * If there was a last assigned, but we didn't see it,
+ * we must be inserting before it, so take the KSE from
+ * the last assigned, and back it up one entry. Then,
+ * assign the KSE to the new thread and adjust its priority.
+ */
+ td2 = kg->kg_last_assigned;
+ ke = td2->td_kse;
+KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!"));
+ kg->kg_last_assigned =
+ TAILQ_PREV(td2, threadqueue, td_runq);
+ td2->td_kse = NULL;
+ td->td_kse = ke;
+ ke->ke_thread = td;
+ runq_readjust(&runq, ke);
+KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!"));
+ }
+}
+#endif
+
+/************************************************************************
+ * Critical section marker functions *
+ ************************************************************************/
/* Critical sections that prevent preemption. */
void
critical_enter(void)
@@ -98,6 +569,23 @@ critical_exit(void)
}
}
+
+/************************************************************************
+ * SYSTEM RUN QUEUE manipulations and tests *
+ ************************************************************************/
+/*
+ * Initialize a run structure.
+ */
+void
+runq_init(struct runq *rq)
+{
+ int i;
+
+ bzero(rq, sizeof *rq);
+ for (i = 0; i < RQ_NQS; i++)
+ TAILQ_INIT(&rq->rq_queues[i]);
+}
+
/*
* Clear the status bit of the queue corresponding to priority level pri,
* indicating that it is empty.
@@ -156,7 +644,7 @@ runq_setbit(struct runq *rq, int pri)
}
/*
- * Add the process to the queue specified by its priority, and set the
+ * Add the KSE to the queue specified by its priority, and set the
* corresponding status bit.
*/
void
@@ -165,14 +653,16 @@ runq_add(struct runq *rq, struct kse *ke)
struct rqhead *rqh;
int pri;
-#ifdef INVARIANTS
- struct proc *p = ke->ke_proc;
-#endif
- if (ke->ke_flags & KEF_ONRUNQ)
- return;
mtx_assert(&sched_lock, MA_OWNED);
- KASSERT(p->p_stat == SRUN, ("runq_add: proc %p (%s) not SRUN",
- p, p->p_comm));
+ KASSERT((ke->ke_thread != NULL), ("runq_add: No thread on KSE"));
+ KASSERT((ke->ke_thread->td_kse != NULL), ("runq_add: No KSE on thread"));
+ if (ke->ke_state == KES_ONRUNQ)
+ return;
+#if defined(INVARIANTS) && defined(DIAGNOSTIC)
+ KASSERT(ke->ke_state != KES_ONRUNQ,
+ ("runq_add: kse %p (%s) already in run queue", ke,
+ ke->ke_proc->p_comm));
+#endif
pri = ke->ke_thread->td_priority / RQ_PPQ;
ke->ke_rqindex = pri;
runq_setbit(rq, pri);
@@ -180,7 +670,8 @@ runq_add(struct runq *rq, struct kse *ke)
CTR4(KTR_RUNQ, "runq_add: p=%p pri=%d %d rqh=%p",
ke->ke_proc, ke->ke_thread->td_priority, pri, rqh);
TAILQ_INSERT_TAIL(rqh, ke, ke_procq);
- ke->ke_flags |= KEF_ONRUNQ;
+ ke->ke_ksegrp->kg_runq_kses++;
+ ke->ke_state = KES_ONRUNQ;
}
/*
@@ -219,43 +710,38 @@ runq_choose(struct runq *rq)
int pri;
mtx_assert(&sched_lock, MA_OWNED);
- if ((pri = runq_findbit(rq)) != -1) {
+ while ((pri = runq_findbit(rq)) != -1) {
rqh = &rq->rq_queues[pri];
ke = TAILQ_FIRST(rqh);
KASSERT(ke != NULL, ("runq_choose: no proc on busy queue"));
- KASSERT(ke->ke_proc->p_stat == SRUN,
- ("runq_choose: process %d(%s) in state %d", ke->ke_proc->p_pid,
- ke->ke_proc->p_comm, ke->ke_proc->p_stat));
- CTR3(KTR_RUNQ, "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh);
+ CTR3(KTR_RUNQ,
+ "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh);
+KASSERT(ke->ke_procq.tqe_prev != NULL, ("no prev"));
+if (ke->ke_procq.tqe_next)
+ KASSERT(ke->ke_procq.tqe_next->ke_procq.tqe_prev != NULL, ("no next"));
TAILQ_REMOVE(rqh, ke, ke_procq);
+ ke->ke_ksegrp->kg_runq_kses--;
if (TAILQ_EMPTY(rqh)) {
CTR0(KTR_RUNQ, "runq_choose: empty");
runq_clrbit(rq, pri);
}
- ke->ke_flags &= ~KEF_ONRUNQ;
+
+ ke->ke_state = KES_RUNNING;
+ KASSERT((ke->ke_thread != NULL),
+ ("runq_choose: No thread on KSE"));
+ KASSERT((ke->ke_thread->td_kse != NULL),
+ ("runq_choose: No KSE on thread"));
return (ke);
}
CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri);
- return (PCPU_GET(idlethread)->td_kse);
+ return (NULL);
}
/*
- * Initialize a run structure.
- */
-void
-runq_init(struct runq *rq)
-{
- int i;
-
- bzero(rq, sizeof *rq);
- for (i = 0; i < RQ_NQS; i++)
- TAILQ_INIT(&rq->rq_queues[i]);
-}
-
-/*
- * Remove the process from the queue specified by its priority, and clear the
+ * Remove the KSE from the queue specified by its priority, and clear the
* corresponding status bit if the queue becomes empty.
+ * Caller must set ke->ke_state afterwards.
*/
void
runq_remove(struct runq *rq, struct kse *ke)
@@ -263,8 +749,7 @@ runq_remove(struct runq *rq, struct kse *ke)
struct rqhead *rqh;
int pri;
- if (!(ke->ke_flags & KEF_ONRUNQ))
- return;
+ KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue"));
mtx_assert(&sched_lock, MA_OWNED);
pri = ke->ke_rqindex;
rqh = &rq->rq_queues[pri];
@@ -276,5 +761,104 @@ runq_remove(struct runq *rq, struct kse *ke)
CTR0(KTR_RUNQ, "runq_remove: empty");
runq_clrbit(rq, pri);
}
- ke->ke_flags &= ~KEF_ONRUNQ;
+ ke->ke_state = KES_UNQUEUED;
+ ke->ke_ksegrp->kg_runq_kses--;
+}
+
+static void
+runq_readjust(struct runq *rq, struct kse *ke)
+{
+
+ if (ke->ke_rqindex != (ke->ke_thread->td_priority / RQ_PPQ)) {
+ runq_remove(rq, ke);
+ runq_add(rq, ke);
+ }
+}
+
+void
+thread_sanity_check(struct thread *td)
+{
+ struct proc *p;
+ struct ksegrp *kg;
+ struct kse *ke;
+ struct thread *td2;
+ unsigned int prevpri;
+ int saw_lastassigned;
+ int unassigned;
+ int assigned;
+
+ p = td->td_proc;
+ kg = td->td_ksegrp;
+ ke = td->td_kse;
+
+ if (kg != &p->p_ksegrp) {
+ panic ("wrong ksegrp");
+ }
+
+ if (ke) {
+ if (ke != &p->p_kse) {
+ panic("wrong kse");
+ }
+ if (ke->ke_thread != td) {
+ panic("wrong thread");
+ }
+ }
+
+ if ((p->p_flag & P_KSES) == 0) {
+ if (ke == NULL) {
+ panic("non KSE thread lost kse");
+ }
+ } else {
+ prevpri = 0;
+ saw_lastassigned = 0;
+ unassigned = 0;
+ assigned = 0;
+ TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
+ if (td2->td_priority < prevpri) {
+ panic("thread runqueue unosorted");
+ }
+ prevpri = td2->td_priority;
+ if (td2->td_kse) {
+ assigned++;
+ if (unassigned) {
+ panic("unassigned before assigned");
+ }
+ if (kg->kg_last_assigned == NULL) {
+ panic("lastassigned corrupt");
+ }
+ if (saw_lastassigned) {
+ panic("last assigned not last");
+ }
+ if (td2->td_kse->ke_thread != td2) {
+ panic("mismatched kse/thread");
+ }
+ } else {
+ unassigned++;
+ }
+ if (td2 == kg->kg_last_assigned) {
+ saw_lastassigned = 1;
+ if (td2->td_kse == NULL) {
+ panic("last assigned not assigned");
+ }
+ }
+ }
+ if (kg->kg_last_assigned && (saw_lastassigned == 0)) {
+ panic("where on earth does lastassigned point?");
+ }
+ FOREACH_THREAD_IN_GROUP(kg, td2) {
+ if (((td2->td_flags & TDF_UNBOUND) == 0) &&
+ (td2->td_state == TDS_RUNQ)) {
+ assigned++;
+ if (td2->td_kse == NULL) {
+ panic ("BOUND thread with no KSE");
+ }
+ }
+ }
+#if 0
+ if ((unassigned + assigned) != kg->kg_runnable) {
+ panic("wrong number in runnable");
+ }
+#endif
+ }
}
+
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index bd1a625e8757..a2a44ff8e143 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -277,9 +277,13 @@ schedcpu(arg)
* with 16-bit int's (remember them?)
* overflow takes 45 days.
*/
- /* XXXKSE */
- /* if ((ke->ke_flags & KEF_ONRUNQ) == 0) */
- if (p->p_stat == SSLEEP || p->p_stat == SSTOP) {
+ /* XXXKSE **WRONG***/
+ /*
+ * the kse slptimes are not touched in wakeup
+ * because the thread may not HAVE a KSE
+ */
+ if (ke->ke_state == KES_ONRUNQ &&
+ ke->ke_state == KES_RUNNING) {
ke->ke_slptime++;
} else {
ke->ke_slptime = 0;
@@ -321,20 +325,31 @@ schedcpu(arg)
}
kg->kg_estcpu = decay_cpu(loadfac, kg->kg_estcpu);
resetpriority(kg);
- td = FIRST_THREAD_IN_PROC(p);
- if (td->td_priority >= PUSER &&
- (p->p_sflag & PS_INMEM)) {
- int changedqueue =
- ((td->td_priority / RQ_PPQ) !=
- (kg->kg_user_pri / RQ_PPQ));
-
- td->td_priority = kg->kg_user_pri;
- FOREACH_KSE_IN_GROUP(kg, ke) {
- if ((ke->ke_oncpu == NOCPU) &&
- (p->p_stat == SRUN) && /* XXXKSE */
- changedqueue) {
- remrunqueue(ke->ke_thread);
- setrunqueue(ke->ke_thread);
+ FOREACH_THREAD_IN_GROUP(kg, td) {
+ int changedqueue;
+ if (td->td_priority >= PUSER) {
+ /*
+ * Only change the priority
+ * of threads that are still at their
+ * user priority.
+ * XXXKSE This is problematic
+ * as we may need to re-order
+ * the threads on the KSEG list.
+ */
+ changedqueue =
+ ((td->td_priority / RQ_PPQ) !=
+ (kg->kg_user_pri / RQ_PPQ));
+
+ td->td_priority = kg->kg_user_pri;
+ if (changedqueue &&
+ td->td_state == TDS_RUNQ) {
+ /* this could be optimised */
+ remrunqueue(td);
+ td->td_priority =
+ kg->kg_user_pri;
+ setrunqueue(td);
+ } else {
+ td->td_priority = kg->kg_user_pri;
}
}
}
@@ -409,6 +424,7 @@ sleepinit(void)
* entered before msleep returns. If priority includes the PDROP
* flag the mutex is not entered before returning.
*/
+
int
msleep(ident, mtx, priority, wmesg, timo)
void *ident;
@@ -426,9 +442,48 @@ msleep(ident, mtx, priority, wmesg, timo)
if (KTRPOINT(td, KTR_CSW))
ktrcsw(1, 0);
#endif
+ KASSERT((td->td_kse != NULL), ("msleep: NULL KSE?"));
+ KASSERT((td->td_kse->ke_state == KES_RUNNING), ("msleep: kse state?"));
WITNESS_SLEEP(0, &mtx->mtx_object);
KASSERT(timo != 0 || mtx_owned(&Giant) || mtx != NULL,
("sleeping without a mutex"));
+ /*
+ * If we are capable of async syscalls and there isn't already
+ * another one ready to return, start a new thread
+ * and queue it as ready to run. Note that there is danger here
+ * because we need to make sure that we don't sleep allocating
+ * the thread (recursion here might be bad).
+ * Hence the TDF_INMSLEEP flag.
+ */
+ if (p->p_flag & P_KSES) {
+ /* Just don't bother if we are exiting
+ and not the exiting thread. */
+ if ((p->p_flag & P_WEXIT) && catch && p->p_singlethread != td)
+ return (EINTR);
+ if (td->td_mailbox && (!(td->td_flags & TDF_INMSLEEP))) {
+ /*
+ * If we have no queued work to do, then
+ * upcall to the UTS to see if it has more to do.
+ * We don't need to upcall now, just make it and
+ * queue it.
+ */
+ mtx_lock_spin(&sched_lock);
+ if (TAILQ_FIRST(&td->td_ksegrp->kg_runq) == NULL) {
+ /* Don't recurse here! */
+ KASSERT((td->td_kse->ke_state == KES_RUNNING), ("msleep: kse stateX?"));
+ td->td_flags |= TDF_INMSLEEP;
+ thread_schedule_upcall(td, td->td_kse);
+ td->td_flags &= ~TDF_INMSLEEP;
+ KASSERT((td->td_kse->ke_state == KES_RUNNING), ("msleep: kse stateY?"));
+ }
+ mtx_unlock_spin(&sched_lock);
+ }
+ KASSERT((td->td_kse != NULL), ("msleep: NULL KSE2?"));
+ KASSERT((td->td_kse->ke_state == KES_RUNNING),
+ ("msleep: kse state2?"));
+ KASSERT((td->td_kse->ke_thread == td),
+ ("msleep: kse/thread mismatch?"));
+ }
mtx_lock_spin(&sched_lock);
if (cold || panicstr) {
/*
@@ -454,7 +509,7 @@ msleep(ident, mtx, priority, wmesg, timo)
}
KASSERT(p != NULL, ("msleep1"));
- KASSERT(ident != NULL && td->td_proc->p_stat == SRUN, ("msleep"));
+ KASSERT(ident != NULL && td->td_state == TDS_RUNNING, ("msleep"));
td->td_wchan = ident;
td->td_wmesg = wmesg;
@@ -468,20 +523,23 @@ msleep(ident, mtx, priority, wmesg, timo)
callout_reset(&td->td_slpcallout, timo, endtsleep, td);
/*
* We put ourselves on the sleep queue and start our timeout
- * before calling cursig, as we could stop there, and a wakeup
- * or a SIGCONT (or both) could occur while we were stopped.
- * A SIGCONT would cause us to be marked as SSLEEP
+ * before calling thread_suspend_check, as we could stop there, and
+ * a wakeup or a SIGCONT (or both) could occur while we were stopped.
* without resuming us, thus we must be ready for sleep
* when cursig is called. If the wakeup happens while we're
* stopped, td->td_wchan will be 0 upon return from cursig.
*/
if (catch) {
- CTR3(KTR_PROC, "msleep caught: proc %p (pid %d, %s)", p,
+ CTR3(KTR_PROC, "msleep caught: thread %p (pid %d, %s)", td,
p->p_pid, p->p_comm);
td->td_flags |= TDF_SINTR;
mtx_unlock_spin(&sched_lock);
PROC_LOCK(p);
- sig = cursig(p);
+ sig = cursig(td);
+ if (thread_suspend_check(1)) {
+ sig = EINTR;
+ rval = EINTR;
+ }
mtx_lock_spin(&sched_lock);
PROC_UNLOCK(p);
if (sig != 0) {
@@ -492,13 +550,13 @@ msleep(ident, mtx, priority, wmesg, timo)
} else
sig = 0;
if (td->td_wchan != NULL) {
- td->td_proc->p_stat = SSLEEP;
p->p_stats->p_ru.ru_nvcsw++;
+ td->td_state = TDS_SLP;
mi_switch();
}
- CTR3(KTR_PROC, "msleep resume: proc %p (pid %d, %s)", td, p->p_pid,
+ CTR3(KTR_PROC, "msleep resume: thread %p (pid %d, %s)", td, p->p_pid,
p->p_comm);
- KASSERT(td->td_proc->p_stat == SRUN, ("running but not SRUN"));
+ KASSERT(td->td_state == TDS_RUNNING, ("running but not TDS_RUNNING"));
td->td_flags &= ~TDF_SINTR;
if (td->td_flags & TDF_TIMEOUT) {
td->td_flags &= ~TDF_TIMEOUT;
@@ -524,8 +582,8 @@ msleep(ident, mtx, priority, wmesg, timo)
if (rval == 0 && catch) {
PROC_LOCK(p);
- /* XXX: shouldn't we always be calling cursig() */
- if (sig != 0 || (sig = cursig(p))) {
+ /* XXX: shouldn't we always be calling cursig() */
+ if (sig != 0 || (sig = cursig(td))) {
if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
rval = EINTR;
else
@@ -571,7 +629,7 @@ endtsleep(arg)
td->td_flags &= ~TDF_TIMEOUT;
setrunqueue(td);
} else if (td->td_wchan != NULL) {
- if (td->td_proc->p_stat == SSLEEP) /* XXXKSE */
+ if (td->td_state == TDS_SLP) /* XXXKSE */
setrunnable(td);
else
unsleep(td);
@@ -583,6 +641,38 @@ endtsleep(arg)
}
/*
+ * Abort a thread, as if an interrupt had occured. Only abort
+ * interruptable waits (unfortunatly it isn't only safe to abort others).
+ * This is about identical to cv_abort().
+ * Think about merging them?
+ * Also, whatever the signal code does...
+ */
+void
+abortsleep(struct thread *td)
+{
+
+ mtx_lock_spin(&sched_lock);
+ /*
+ * If the TDF_TIMEOUT flag is set, just leave. A
+ * timeout is scheduled anyhow.
+ */
+ if ((td->td_flags & (TDF_TIMEOUT | TDF_SINTR)) == TDF_SINTR) {
+ if (td->td_wchan != NULL) {
+ if (td->td_state == TDS_SLP) { /* XXXKSE */
+ setrunnable(td);
+ } else {
+ /*
+ * Probably in a suspended state..
+ * um.. dunno XXXKSE
+ */
+ unsleep(td);
+ }
+ }
+ }
+ mtx_unlock_spin(&sched_lock);
+}
+
+/*
* Remove a process from its wait queue
*/
void
@@ -618,25 +708,24 @@ restart:
if (td->td_wchan == ident) {
TAILQ_REMOVE(qp, td, td_slpq);
td->td_wchan = NULL;
- if (td->td_proc->p_stat == SSLEEP) {
+ if (td->td_state == TDS_SLP) {
/* OPTIMIZED EXPANSION OF setrunnable(p); */
CTR3(KTR_PROC, "wakeup: thread %p (pid %d, %s)",
td, p->p_pid, p->p_comm);
if (td->td_ksegrp->kg_slptime > 1)
updatepri(td);
td->td_ksegrp->kg_slptime = 0;
- td->td_kse->ke_slptime = 0;
- td->td_proc->p_stat = SRUN;
if (p->p_sflag & PS_INMEM) {
setrunqueue(td);
maybe_resched(td);
} else {
+/* XXXKSE Wrong! */ td->td_state = TDS_RUNQ;
p->p_sflag |= PS_SWAPINREQ;
wakeup(&proc0);
}
/* END INLINE EXPANSION */
- goto restart;
}
+ goto restart;
}
}
mtx_unlock_spin(&sched_lock);
@@ -665,20 +754,19 @@ restart:
if (td->td_wchan == ident) {
TAILQ_REMOVE(qp, td, td_slpq);
td->td_wchan = NULL;
- if (td->td_proc->p_stat == SSLEEP) {
+ if (td->td_state == TDS_SLP) {
/* OPTIMIZED EXPANSION OF setrunnable(p); */
- CTR3(KTR_PROC, "wakeup1: proc %p (pid %d, %s)",
- p, p->p_pid, p->p_comm);
+ CTR3(KTR_PROC,"wakeup1: thread %p (pid %d, %s)",
+ td, p->p_pid, p->p_comm);
if (td->td_ksegrp->kg_slptime > 1)
updatepri(td);
td->td_ksegrp->kg_slptime = 0;
- td->td_kse->ke_slptime = 0;
- td->td_proc->p_stat = SRUN;
if (p->p_sflag & PS_INMEM) {
setrunqueue(td);
maybe_resched(td);
break;
} else {
+/* XXXKSE Wrong */ td->td_state = TDS_RUNQ;
p->p_sflag |= PS_SWAPINREQ;
wakeup(&proc0);
}
@@ -698,15 +786,19 @@ mi_switch()
{
struct bintime new_switchtime;
struct thread *td = curthread; /* XXX */
- register struct proc *p = td->td_proc; /* XXX */
+ struct proc *p = td->td_proc; /* XXX */
+ struct kse *ke = td->td_kse;
#if 0
register struct rlimit *rlim;
#endif
u_int sched_nest;
mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED);
+ KASSERT((ke->ke_state == KES_RUNNING), ("mi_switch: kse state?"));
#ifdef INVARIANTS
- if (p->p_stat != SMTX && p->p_stat != SRUN)
+ if (td->td_state != TDS_MTX &&
+ td->td_state != TDS_RUNQ &&
+ td->td_state != TDS_RUNNING)
mtx_assert(&Giant, MA_NOTOWNED);
#endif
@@ -735,7 +827,8 @@ mi_switch()
*
* XXX drop sched_lock, pickup Giant
*/
- if (p->p_stat != SZOMB && p->p_limit->p_cpulimit != RLIM_INFINITY &&
+ if (p->p_state != PRS_ZOMBIE &&
+ p->p_limit->p_cpulimit != RLIM_INFINITY &&
p->p_runtime > p->p_limit->p_cpulimit) {
rlim = &p->p_rlimit[RLIMIT_CPU];
if (p->p_runtime / (rlim_t)1000000 >= rlim->rlim_max) {
@@ -763,17 +856,35 @@ mi_switch()
*/
cnt.v_swtch++;
PCPU_SET(switchtime, new_switchtime);
- CTR3(KTR_PROC, "mi_switch: old proc %p (pid %d, %s)", p, p->p_pid,
+ CTR3(KTR_PROC, "mi_switch: old thread %p (pid %d, %s)", td, p->p_pid,
p->p_comm);
sched_nest = sched_lock.mtx_recurse;
- td->td_lastcpu = td->td_kse->ke_oncpu;
- td->td_kse->ke_oncpu = NOCPU;
- td->td_kse->ke_flags &= ~KEF_NEEDRESCHED;
+ td->td_lastcpu = ke->ke_oncpu;
+ ke->ke_oncpu = NOCPU;
+ ke->ke_flags &= ~KEF_NEEDRESCHED;
+ /*
+ * At the last moment: if this KSE is not on the run queue,
+ * it needs to be freed correctly and the thread treated accordingly.
+ */
+ if ((td->td_state == TDS_RUNNING) &&
+ ((ke->ke_flags & KEF_IDLEKSE) == 0)) {
+ /* Put us back on the run queue (kse and all). */
+ setrunqueue(td);
+ } else if ((td->td_flags & TDF_UNBOUND) &&
+ (td->td_state != TDS_RUNQ)) { /* in case of old code */
+ /*
+ * We will not be on the run queue.
+ * Someone else can use the KSE if they need it.
+ */
+ td->td_kse = NULL;
+ kse_reassign(ke);
+ }
cpu_switch();
td->td_kse->ke_oncpu = PCPU_GET(cpuid);
+ td->td_kse->ke_state = KES_RUNNING;
sched_lock.mtx_recurse = sched_nest;
sched_lock.mtx_lock = (uintptr_t)td;
- CTR3(KTR_PROC, "mi_switch: new proc %p (pid %d, %s)", p, p->p_pid,
+ CTR3(KTR_PROC, "mi_switch: new thread %p (pid %d, %s)", td, p->p_pid,
p->p_comm);
if (PCPU_GET(switchtime.sec) == 0)
binuptime(PCPU_PTR(switchtime));
@@ -791,37 +902,42 @@ setrunnable(struct thread *td)
struct proc *p = td->td_proc;
mtx_lock_spin(&sched_lock);
- switch (p->p_stat) {
- case SZOMB: /* not a thread flag XXXKSE */
+ switch (p->p_state) {
+ case PRS_ZOMBIE:
panic("setrunnable(1)");
+ default:
+ break;
}
- switch (td->td_proc->p_stat) {
+ switch (td->td_state) {
case 0:
- case SRUN:
- case SWAIT:
+ case TDS_RUNNING:
+ case TDS_IWAIT:
default:
+ printf("state is %d", td->td_state);
panic("setrunnable(2)");
- case SSTOP:
- case SSLEEP: /* e.g. when sending signals */
+ case TDS_SUSPENDED:
+ thread_unsuspend(p);
+ break;
+ case TDS_SLP: /* e.g. when sending signals */
if (td->td_flags & TDF_CVWAITQ)
cv_waitq_remove(td);
else
unsleep(td);
- break;
-
- case SIDL:
+ case TDS_UNQUEUED: /* being put back onto the queue */
+ case TDS_NEW: /* not yet had time to suspend */
+ case TDS_RUNQ: /* not yet had time to suspend */
break;
}
- td->td_proc->p_stat = SRUN;
if (td->td_ksegrp->kg_slptime > 1)
updatepri(td);
td->td_ksegrp->kg_slptime = 0;
- td->td_kse->ke_slptime = 0;
if ((p->p_sflag & PS_INMEM) == 0) {
+ td->td_state = TDS_RUNQ; /* XXXKSE not a good idea */
p->p_sflag |= PS_SWAPINREQ;
wakeup(&proc0);
} else {
- setrunqueue(td);
+ if (td->td_state != TDS_RUNQ)
+ setrunqueue(td); /* XXXKSE */
maybe_resched(td);
}
mtx_unlock_spin(&sched_lock);
@@ -848,7 +964,7 @@ resetpriority(kg)
kg->kg_user_pri = newpriority;
}
FOREACH_THREAD_IN_GROUP(kg, td) {
- maybe_resched(td);
+ maybe_resched(td); /* XXXKSE silly */
}
mtx_unlock_spin(&sched_lock);
}
@@ -865,20 +981,21 @@ loadav(void *arg)
int i, nrun;
struct loadavg *avg;
struct proc *p;
- struct ksegrp *kg;
+ struct thread *td;
avg = &averunnable;
sx_slock(&allproc_lock);
nrun = 0;
FOREACH_PROC_IN_SYSTEM(p) {
- FOREACH_KSEGRP_IN_PROC(p, kg) {
- switch (p->p_stat) {
- case SRUN:
+ FOREACH_THREAD_IN_PROC(p, td) {
+ switch (td->td_state) {
+ case TDS_RUNQ:
+ case TDS_RUNNING:
if ((p->p_flag & P_NOLOAD) != 0)
goto nextproc;
- /* FALLTHROUGH */
- case SIDL:
- nrun++;
+ nrun++; /* XXXKSE */
+ default:
+ break;
}
nextproc:
continue;
@@ -932,19 +1049,18 @@ void
schedclock(td)
struct thread *td;
{
- struct kse *ke = td->td_kse;
- struct ksegrp *kg = td->td_ksegrp;
+ struct kse *ke;
+ struct ksegrp *kg;
- if (td) {
- ke->ke_cpticks++;
- kg->kg_estcpu = ESTCPULIM(kg->kg_estcpu + 1);
- if ((kg->kg_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) {
- resetpriority(td->td_ksegrp);
- if (td->td_priority >= PUSER)
- td->td_priority = kg->kg_user_pri;
- }
- } else {
- panic("schedclock");
+ KASSERT((td != NULL), ("schedlock: null thread pointer"));
+ ke = td->td_kse;
+ kg = td->td_ksegrp;
+ ke->ke_cpticks++;
+ kg->kg_estcpu = ESTCPULIM(kg->kg_estcpu + 1);
+ if ((kg->kg_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) {
+ resetpriority(kg);
+ if (td->td_priority >= PUSER)
+ td->td_priority = kg->kg_user_pri;
}
}
@@ -959,7 +1075,6 @@ yield(struct thread *td, struct yield_args *uap)
mtx_assert(&Giant, MA_NOTOWNED);
mtx_lock_spin(&sched_lock);
td->td_priority = PRI_MAX_TIMESHARE;
- setrunqueue(td);
kg->kg_proc->p_stats->p_ru.ru_nvcsw++;
mi_switch();
mtx_unlock_spin(&sched_lock);
diff --git a/sys/kern/ksched.c b/sys/kern/ksched.c
index c9081c314c75..bbe36bea6874 100644
--- a/sys/kern/ksched.c
+++ b/sys/kern/ksched.c
@@ -181,7 +181,18 @@ int ksched_setscheduler(register_t *ret, struct ksched *ksched,
mtx_lock_spin(&sched_lock);
rtp_to_pri(&rtp, kg);
- td->td_last_kse->ke_flags |= KEF_NEEDRESCHED; /* XXXKSE */
+ FOREACH_THREAD_IN_GROUP(kg, td) { /* XXXKSE */
+ if (td->td_state == TDS_RUNNING) {
+ td->td_kse->ke_flags |= KEF_NEEDRESCHED;
+ } else if (td->td_state == TDS_RUNQ) {
+ if (td->td_priority > kg->kg_user_pri) {
+ remrunqueue(td);
+ td->td_priority =
+ kg->kg_user_pri;
+ setrunqueue(td);
+ }
+ }
+ }
mtx_unlock_spin(&sched_lock);
}
else
@@ -203,7 +214,19 @@ int ksched_setscheduler(register_t *ret, struct ksched *ksched,
* on the scheduling code: You must leave the
* scheduling info alone.
*/
- td->td_last_kse->ke_flags |= KEF_NEEDRESCHED; /* XXXKSE */
+ FOREACH_THREAD_IN_GROUP(kg, td) {
+ if (td->td_state == TDS_RUNNING) {
+ td->td_kse->ke_flags |= KEF_NEEDRESCHED;
+ } else if (td->td_state == TDS_RUNQ) {
+ if (td->td_priority > kg->kg_user_pri) {
+ remrunqueue(td);
+ td->td_priority =
+ kg->kg_user_pri;
+ setrunqueue(td);
+ }
+ }
+
+ }
mtx_unlock_spin(&sched_lock);
}
break;
diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c
index 9dad93bb2dd5..afd4c5d0c069 100644
--- a/sys/kern/subr_smp.c
+++ b/sys/kern/subr_smp.c
@@ -124,8 +124,8 @@ forward_signal(struct thread *td)
* executing so that it executes ast().
*/
mtx_assert(&sched_lock, MA_OWNED);
- KASSERT(td->td_proc->p_stat == SRUN,
- ("forward_signal: process is not SRUN"));
+ KASSERT(td->td_state == TDS_RUNNING,
+ ("forward_signal: thread is not TDS_RUNNING"));
CTR1(KTR_SMP, "forward_signal(%p)", td->td_proc);
diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c
index 3b415de5c401..027aa9c7f34b 100644
--- a/sys/kern/subr_trap.c
+++ b/sys/kern/subr_trap.c
@@ -48,6 +48,8 @@
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
+#include <sys/kse.h>
+#include <sys/ktr.h>
#include <sys/resourcevar.h>
#include <sys/signalvar.h>
#include <sys/systm.h>
@@ -71,13 +73,15 @@ userret(td, frame, oticks)
struct kse *ke = td->td_kse;
struct ksegrp *kg = td->td_ksegrp;
+ CTR3(KTR_SYSC, "userret: thread %p (pid %d, %s)", td, p->p_pid,
+ p->p_comm);
#ifdef INVARIANTS
/* Check that we called signotify() enough. */
mtx_lock(&Giant);
PROC_LOCK(p);
mtx_lock_spin(&sched_lock);
if (SIGPENDING(p) && ((p->p_sflag & PS_NEEDSIGCHK) == 0 ||
- (p->p_kse.ke_flags & KEF_ASTPENDING) == 0))
+ (ke->ke_flags & KEF_ASTPENDING) == 0))
printf("failed to set signal flags proprly for ast()\n");
mtx_unlock_spin(&sched_lock);
PROC_UNLOCK(p);
@@ -100,6 +104,22 @@ userret(td, frame, oticks)
}
/*
+ * We need to check to see if we have to exit or wait due to a
+ * single threading requirement or some other STOP condition.
+ */
+ PROC_LOCK(p);
+ thread_suspend_check(0); /* Can suspend or kill */
+ PROC_UNLOCK(p);
+
+ /*
+ * DO special thread processing, e.g. upcall tweaking and such
+ */
+ if (p->p_flag & P_KSES) {
+ thread_userret(p, kg, ke, td, frame);
+ /* printf("KSE thread returned"); */
+ }
+
+ /*
* Charge system time if profiling.
*
* XXX should move PS_PROFIL to a place that can obviously be
@@ -121,8 +141,7 @@ userret(td, frame, oticks)
* This function will return with preemption disabled.
*/
void
-ast(framep)
- struct trapframe *framep;
+ast(struct trapframe *framep)
{
struct thread *td = curthread;
struct proc *p = td->td_proc;
@@ -136,6 +155,8 @@ ast(framep)
int ucode;
#endif
+ CTR3(KTR_SYSC, "ast: thread %p (pid %d, %s)", td, p->p_pid,
+ p->p_comm);
KASSERT(TRAPF_USERMODE(framep), ("ast in kernel mode"));
#ifdef WITNESS
if (witness_list(td))
@@ -164,6 +185,13 @@ ast(framep)
p->p_stats->p_prof.pr_ticks = 0;
}
mtx_unlock_spin(&sched_lock);
+ /*
+ * XXXKSE While the fact that we owe a user profiling
+ * tick is stored per KSE in this code, the statistics
+ * themselves are still stored per process.
+ * This should probably change, by which I mean that
+ * possibly the location of both might change.
+ */
if (td->td_ucred != p->p_ucred)
cred_update_thread(td);
@@ -192,14 +220,13 @@ ast(framep)
if (flags & KEF_NEEDRESCHED) {
mtx_lock_spin(&sched_lock);
td->td_priority = kg->kg_user_pri;
- setrunqueue(td);
p->p_stats->p_ru.ru_nivcsw++;
mi_switch();
mtx_unlock_spin(&sched_lock);
}
if (sflag & PS_NEEDSIGCHK) {
PROC_LOCK(p);
- while ((sig = cursig(p)) != 0)
+ while ((sig = cursig(td)) != 0)
postsig(sig);
PROC_UNLOCK(p);
}
diff --git a/sys/kern/subr_turnstile.c b/sys/kern/subr_turnstile.c
index 08bca8d67b2c..c2e79d02d5f2 100644
--- a/sys/kern/subr_turnstile.c
+++ b/sys/kern/subr_turnstile.c
@@ -119,23 +119,20 @@ propagate_priority(struct thread *td)
return;
}
+ KASSERT(td->td_state != TDS_SURPLUS, ("Mutex owner SURPLUS"));
+ MPASS(td->td_proc != NULL);
MPASS(td->td_proc->p_magic == P_MAGIC);
- KASSERT(td->td_proc->p_stat != SSLEEP, ("sleeping thread owns a mutex"));
+ KASSERT(td->td_state != TDS_SLP,
+ ("sleeping thread owns a mutex"));
if (td->td_priority <= pri) /* lower is higher priority */
return;
- /*
- * Bump this thread's priority.
- */
- td->td_priority = pri;
/*
* If lock holder is actually running, just bump priority.
*/
- if (thread_running(td)) {
- MPASS(td->td_proc->p_stat == SRUN
- || td->td_proc->p_stat == SZOMB
- || td->td_proc->p_stat == SSTOP);
+ if (td->td_state == TDS_RUNNING) {
+ td->td_priority = pri;
return;
}
@@ -151,20 +148,26 @@ propagate_priority(struct thread *td)
* If on run queue move to new run queue, and quit.
* XXXKSE this gets a lot more complicated under threads
* but try anyhow.
+ * We should have a special call to do this more efficiently.
*/
- if (td->td_proc->p_stat == SRUN) {
+ if (td->td_state == TDS_RUNQ) {
MPASS(td->td_blocked == NULL);
remrunqueue(td);
+ td->td_priority = pri;
setrunqueue(td);
return;
}
+ /*
+ * Adjust for any other cases.
+ */
+ td->td_priority = pri;
/*
* If we aren't blocked on a mutex, we should be.
*/
- KASSERT(td->td_proc->p_stat == SMTX, (
+ KASSERT(td->td_state == TDS_MTX, (
"process %d(%s):%d holds %s but isn't blocked on a mutex\n",
- td->td_proc->p_pid, td->td_proc->p_comm, td->td_proc->p_stat,
+ td->td_proc->p_pid, td->td_proc->p_comm, td->td_state,
m->mtx_object.lo_name));
/*
@@ -590,7 +593,7 @@ _mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
*/
td->td_blocked = m;
td->td_mtxname = m->mtx_object.lo_name;
- td->td_proc->p_stat = SMTX;
+ td->td_state = TDS_MTX;
propagate_priority(td);
if (LOCK_LOG_TEST(&m->mtx_object, opts))
@@ -727,7 +730,6 @@ _mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
m, td1);
td1->td_blocked = NULL;
- td1->td_proc->p_stat = SRUN;
setrunqueue(td1);
if (td->td_critnest == 1 && td1->td_priority < pri) {
@@ -744,7 +746,6 @@ _mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
}
}
#endif
- setrunqueue(td);
if (LOCK_LOG_TEST(&m->mtx_object, opts))
CTR2(KTR_LOCK,
"_mtx_unlock_sleep: %p switching out lock=%p", m,
diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c
index 182221d2124f..02b3a0dac965 100644
--- a/sys/kern/subr_witness.c
+++ b/sys/kern/subr_witness.c
@@ -225,6 +225,7 @@ static struct witness_order_list_entry order_lists[] = {
#endif
{ "clk", &lock_class_mtx_spin },
{ "mutex profiling lock", &lock_class_mtx_spin },
+ { "zombie_thread_lock", &lock_class_mtx_spin },
{ NULL, NULL },
{ NULL, NULL }
};
diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c
index 1bdd913ea2e9..d8fba59f28e9 100644
--- a/sys/kern/sys_generic.c
+++ b/sys/kern/sys_generic.c
@@ -1187,7 +1187,7 @@ selwakeup(sip)
sip->si_thread = NULL;
mtx_lock_spin(&sched_lock);
if (td->td_wchan == (caddr_t)&selwait) {
- if (td->td_proc->p_stat == SSLEEP)
+ if (td->td_state == TDS_SLP)
setrunnable(td);
else
cv_waitq_remove(td);
diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c
index dacb9d9384a4..ab6f1e88326c 100644
--- a/sys/kern/sys_process.c
+++ b/sys/kern/sys_process.c
@@ -467,7 +467,7 @@ ptrace(struct thread *td, struct ptrace_args *uap)
}
/* not currently stopped */
- if (p->p_stat != SSTOP || (p->p_flag & P_WAITED) == 0) {
+ if (!P_SHOULDSTOP(p) || (p->p_flag & P_WAITED) == 0) {
error = EBUSY;
goto fail;
}
@@ -566,10 +566,12 @@ ptrace(struct thread *td, struct ptrace_args *uap)
if (proctree_locked)
sx_xunlock(&proctree_lock);
/* deliver or queue signal */
- if (p->p_stat == SSTOP) {
+ if (P_SHOULDSTOP(p)) {
p->p_xstat = uap->data;
mtx_lock_spin(&sched_lock);
+ p->p_flag &= ~(P_STOPPED_TRACE|P_STOPPED_SGNL);
setrunnable(td2); /* XXXKSE */
+ /* Need foreach kse in proc, ... make_kse_queued(). */
mtx_unlock_spin(&sched_lock);
} else if (uap->data)
psignal(p, uap->data);
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
index d8115fb2e428..15a5d7cdda7d 100644
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master
@@ -552,7 +552,7 @@
381 STD BSD { int kse_new(struct kse_mailbox * mbx, \
int new_grp_flag); }
382 STD BSD { int thread_wakeup(struct thread_mailbox *tmbx); }
-383 STD BSD { int kse_yield(void); }
+383 MSTD BSD { int kse_yield(void); }
384 UNIMPL BSD __mac_get_proc
385 UNIMPL BSD __mac_set_proc
386 UNIMPL BSD __mac_get_fd
diff --git a/sys/kern/tty.c b/sys/kern/tty.c
index b9c57432699b..6c915e1b39ca 100644
--- a/sys/kern/tty.c
+++ b/sys/kern/tty.c
@@ -2392,17 +2392,35 @@ ttyinfo(struct tty *tp)
PGRP_UNLOCK(tp->t_pgrp);
td = FIRST_THREAD_IN_PROC(pick);
- stmp = pick->p_stat == SRUN ? "running" : /* XXXKSE */
- pick->p_stat == SMTX ? td->td_mtxname :
- td->td_wmesg ? td->td_wmesg : "iowait";
+ if (pick->p_flag & P_KSES) {
+ stmp = "KSE" ; /* XXXKSE */
+ } else {
+ if (td) {
+ if (td->td_state == TDS_RUNQ) {
+ stmp = "running";
+ } else if (td->td_state == TDS_MTX) {
+ stmp = td->td_mtxname;
+ } else if (td->td_wmesg) {
+ stmp = td->td_wmesg;
+ } else {
+ stmp = "iowait";
+ }
+ } else {
+ stmp = "threadless";
+ panic("ttyinfo: no thread!?");
+ }
+ }
calcru(pick, &utime, &stime, NULL);
- ltmp = pick->p_stat == SIDL || pick->p_stat == SWAIT ||
- pick->p_stat == SZOMB ? 0 :
- pgtok(vmspace_resident_count(pick->p_vmspace));
+ ltmp = ((pick->p_state == PRS_NEW)
+ || (td && (td->td_state == TDS_IWAIT))
+ || (pick->p_state == PRS_ZOMBIE ? 0 :
+ pgtok(vmspace_resident_count(pick->p_vmspace))));
mtx_unlock_spin(&sched_lock);
ttyprintf(tp, " cmd: %s %d [%s%s] ", pick->p_comm,
- pick->p_pid, pick->p_stat == SMTX ? "*" : "", stmp);
+ pick->p_pid,
+ td->td_state == TDS_MTX ? "*" : "",
+ stmp);
/* Print user time. */
ttyprintf(tp, "%ld.%02ldu ",
@@ -2433,7 +2451,19 @@ ttyinfo(struct tty *tp)
* we pick out just "short-term" sleepers (P_SINTR == 0).
* 4) Further ties are broken by picking the highest pid.
*/
-#define ISRUN(p) (((p)->p_stat == SRUN) || ((p)->p_stat == SIDL))
+#define ISRUN(p, val) \
+do { \
+ struct thread *td; \
+ val = 0; \
+ FOREACH_THREAD_IN_PROC(p, td) { \
+ if (td->td_state == TDS_RUNQ || \
+ td->td_state == TDS_RUNNING) { \
+ val = 1; \
+ break; \
+ } \
+ } \
+} while (0)
+
#define TESTAB(a, b) ((a)<<1 | (b))
#define ONLYA 2
#define ONLYB 1
@@ -2449,10 +2479,13 @@ proc_compare(struct proc *p1, struct proc *p2)
if (p1 == NULL)
return (1);
+ ISRUN(p1, esta);
+ ISRUN(p2, estb);
+
/*
* see if at least one of them is runnable
*/
- switch (TESTAB(ISRUN(p1), ISRUN(p2))) {
+ switch (TESTAB(esta, estb)) {
case ONLYA:
return (0);
case ONLYB:
@@ -2477,7 +2510,7 @@ proc_compare(struct proc *p1, struct proc *p2)
/*
* weed out zombies
*/
- switch (TESTAB(p1->p_stat == SZOMB, p2->p_stat == SZOMB)) {
+ switch (TESTAB(p1->p_state == PRS_ZOMBIE, p2->p_state == PRS_ZOMBIE)) {
case ONLYA:
return (1);
case ONLYB: