* Alfred Perlstein <bright@mu.org> [020310 16:12] wrote:
> Chad David and I have been working on fixing select(2) and poll(2)'s
> locking. I've been working on pushing pipe read/write ops down
> out from under Giant.
Ok, after further investigation revision 1.79 of sys_generic.c seems
to have been wrong. I had to pretty much lay the 4.x version of
sys_generic.c next to the 5.x one to get this right.
If you review the patch after it's applied the code is a LOT cleaner
and makes a hell of a lot more sense now. The XXX near the TDF_SELECT
check is gone because it's obvious what it's for.
So do we brave this delta for the developer preview? :)
Index: dev/bktr/bktr_core.c
===================================================================
RCS file: /home/ncvs/src/sys/dev/bktr/bktr_core.c,v
retrieving revision 1.117
diff -u -r1.117 bktr_core.c
--- dev/bktr/bktr_core.c 12 Sep 2001 08:37:02 -0000 1.117
+++ dev/bktr/bktr_core.c 11 Mar 2002 02:44:14 -0000
@@ -809,7 +809,7 @@
}
/* If someone has a select() on /dev/vbi, inform them */
- if (bktr->vbi_select.si_pid) {
+ if (SEL_WAITING(&bktr->vbi_select)) {
selwakeup(&bktr->vbi_select);
}
Index: dev/kbd/kbd.c
===================================================================
RCS file: /home/ncvs/src/sys/dev/kbd/kbd.c,v
retrieving revision 1.27
diff -u -r1.27 kbd.c
--- dev/kbd/kbd.c 12 Sep 2001 08:37:06 -0000 1.27
+++ dev/kbd/kbd.c 11 Mar 2002 22:08:08 -0000
@@ -523,8 +523,6 @@
bzero(&sc->gkb_q, sizeof(sc->gkb_q));
#endif
clist_alloc_cblocks(&sc->gkb_q, KB_QSIZE, KB_QSIZE/2); /* XXX */
- sc->gkb_rsel.si_flags = 0;
- sc->gkb_rsel.si_pid = 0;
splx(s);
return 0;
Index: dev/snp/snp.c
===================================================================
RCS file: /home/ncvs/src/sys/dev/snp/snp.c,v
retrieving revision 1.69
diff -u -r1.69 snp.c
--- dev/snp/snp.c 24 Nov 2001 15:59:46 -0000 1.69
+++ dev/snp/snp.c 11 Mar 2002 02:44:14 -0000
@@ -373,7 +373,6 @@
wakeup((caddr_t)snp);
}
selwakeup(&snp->snp_sel);
- snp->snp_sel.si_pid = 0;
return (n);
}
@@ -447,7 +446,6 @@
detach_notty:
selwakeup(&snp->snp_sel);
- snp->snp_sel.si_pid = 0;
if ((snp->snp_flags & SNOOP_OPEN) == 0)
free(snp, M_SNP);
Index: dev/sound/pcm/channel.c
===================================================================
RCS file: /home/ncvs/src/sys/dev/sound/pcm/channel.c,v
retrieving revision 1.81
diff -u -r1.81 channel.c
--- dev/sound/pcm/channel.c 24 Feb 2002 00:49:43 -0000 1.81
+++ dev/sound/pcm/channel.c 11 Mar 2002 02:44:14 -0000
@@ -116,7 +116,7 @@
struct snd_dbuf *bs = c->bufsoft;
CHN_LOCKASSERT(c);
- if (sndbuf_getsel(bs)->si_pid && chn_polltrigger(c))
+ if (SEL_WAITING(sndbuf_getsel(bs)) && chn_polltrigger(c))
selwakeup(sndbuf_getsel(bs));
wakeup(bs);
}
Index: dev/usb/ums.c
===================================================================
RCS file: /home/ncvs/src/sys/dev/usb/ums.c,v
retrieving revision 1.48
diff -u -r1.48 ums.c
--- dev/usb/ums.c 15 Feb 2002 22:54:10 -0000 1.48
+++ dev/usb/ums.c 11 Mar 2002 22:38:21 -0000
@@ -344,8 +344,10 @@
sc->status.button = sc->status.obutton = 0;
sc->status.dx = sc->status.dy = sc->status.dz = 0;
+#ifndef __FreeBSD__
sc->rsel.si_flags = 0;
sc->rsel.si_pid = 0;
+#endif
sc->dev = make_dev(&ums_cdevsw, device_get_unit(self),
UID_ROOT, GID_OPERATOR,
Index: isa/psm.c
===================================================================
RCS file: /home/ncvs/src/sys/isa/psm.c,v
retrieving revision 1.43
diff -u -r1.43 psm.c
--- isa/psm.c 19 Dec 2001 13:32:21 -0000 1.43
+++ isa/psm.c 11 Mar 2002 22:08:51 -0000
@@ -1314,8 +1314,6 @@
device_busy(devclass_get_device(psm_devclass, unit));
/* Initialize state */
- sc->rsel.si_flags = 0;
- sc->rsel.si_pid = 0;
sc->mode.level = sc->dflt_mode.level;
sc->mode.protocol = sc->dflt_mode.protocol;
sc->watchdog = FALSE;
Index: kern/sys_generic.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/sys_generic.c,v
retrieving revision 1.92
diff -u -r1.92 sys_generic.c
--- kern/sys_generic.c 9 Mar 2002 22:44:37 -0000 1.92
+++ kern/sys_generic.c 12 Mar 2002 05:46:13 -0000
@@ -80,6 +80,7 @@
size_t, off_t, int);
static int dofilewrite(struct thread *, struct file *, int,
const void *, size_t, off_t, int);
+static void clear_selinfo_list(struct thread *);
/*
* Read system call.
@@ -696,11 +697,36 @@
return (error);
}
+/*
+ * These are initialized in selectinit() via SYSINIT
+ */
+static struct mtx sellock;
+static struct cv selwait;
static int nselcoll; /* Select collisions since boot */
-struct cv selwait;
SYSCTL_INT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, "");
/*
+ * Remove the references to the thread from all of the objects
+ * we were polling.
+ *
+ * This code assumes that the underlying owner of the selinfo
+ * structure will hold sellock before it changes it, and that
+ * it will unlink itself from our list if it goes away.
+ */
+static void
+clear_selinfo_list(td)
+ struct thread *td;
+{
+ struct selinfo *si;
+
+ mtx_assert(&sellock, MA_OWNED);
+
+ TAILQ_FOREACH(si, &td->td_selq, si_thrlist)
+ si->si_thread = NULL;
+ TAILQ_INIT(&td->td_selq);
+}
+
+/*
* Select system call.
*/
#ifndef _SYS_SYSPROTO_H_
@@ -775,7 +801,7 @@
sbp += ncpbytes / sizeof *sbp; \
error = copyin(uap->name, ibits[x], ncpbytes); \
if (error != 0) \
- goto done_noproclock; \
+ goto done_nosellock; \
} \
} while (0)
getbits(in, 0);
@@ -789,10 +815,10 @@
error = copyin((caddr_t)uap->tv, (caddr_t)&atv,
sizeof (atv));
if (error)
- goto done_noproclock;
+ goto done_nosellock;
if (itimerfix(&atv)) {
error = EINVAL;
- goto done_noproclock;
+ goto done_nosellock;
}
getmicrouptime(&rtv);
timevaladd(&atv, &rtv);
@@ -801,61 +827,62 @@
atv.tv_usec = 0;
}
timo = 0;
- PROC_LOCK(td->td_proc);
+ mtx_lock(&sellock);
retry:
+
ncoll = nselcoll;
mtx_lock_spin(&sched_lock);
td->td_flags |= TDF_SELECT;
mtx_unlock_spin(&sched_lock);
- PROC_UNLOCK(td->td_proc);
+ mtx_unlock(&sellock);
+
+ /* XXX Is there a better place for this? */
+ TAILQ_INIT(&td->td_selq);
error = selscan(td, ibits, obits, uap->nd);
- PROC_LOCK(td->td_proc);
+ mtx_lock(&sellock);
if (error || td->td_retval[0])
goto done;
if (atv.tv_sec || atv.tv_usec) {
getmicrouptime(&rtv);
- if (timevalcmp(&rtv, &atv, >=)) {
- /*
- * An event of our interest may occur during locking a process.
- * In order to avoid missing the event that occured during locking
- * the process, test TDF_SELECT and rescan file descriptors if
- * necessary.
- */
- mtx_lock_spin(&sched_lock);
- if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
- ncoll = nselcoll;
- td->td_flags |= TDF_SELECT;
- mtx_unlock_spin(&sched_lock);
- PROC_UNLOCK(td->td_proc);
- error = selscan(td, ibits, obits, uap->nd);
- PROC_LOCK(td->td_proc);
- } else
- mtx_unlock_spin(&sched_lock);
+ if (timevalcmp(&rtv, &atv, >=))
goto done;
- }
ttv = atv;
timevalsub(&ttv, &rtv);
timo = ttv.tv_sec > 24 * 60 * 60 ?
24 * 60 * 60 * hz : tvtohz(&ttv);
}
+
+ /*
+ * An event of interest may occur while we do not hold
+ * sellock, so check TDF_SELECT and the number of
+ * collisions and rescan the file descriptors if
+ * necessary.
+ */
mtx_lock_spin(&sched_lock);
+ if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
+ mtx_unlock_spin(&sched_lock);
+ goto retry;
+ }
td->td_flags &= ~TDF_SELECT;
mtx_unlock_spin(&sched_lock);
if (timo > 0)
- error = cv_timedwait_sig(&selwait, &td->td_proc->p_mtx, timo);
+ error = cv_timedwait_sig(&selwait, &sellock, timo);
else
- error = cv_wait_sig(&selwait, &td->td_proc->p_mtx);
+ error = cv_wait_sig(&selwait, &sellock);
if (error == 0)
goto retry;
done:
+ clear_selinfo_list(td);
+
mtx_lock_spin(&sched_lock);
td->td_flags &= ~TDF_SELECT;
mtx_unlock_spin(&sched_lock);
- PROC_UNLOCK(td->td_proc);
-done_noproclock:
+ mtx_unlock(&sellock);
+
+done_nosellock:
/* select is not restarted after signals... */
if (error == ERESTART)
error = EINTR;
@@ -967,13 +994,13 @@
bits = smallbits;
error = copyin(SCARG(uap, fds), bits, ni);
if (error)
- goto done_noproclock;
+ goto done_nosellock;
if (SCARG(uap, timeout) != INFTIM) {
atv.tv_sec = SCARG(uap, timeout) / 1000;
atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
if (itimerfix(&atv)) {
error = EINVAL;
- goto done_noproclock;
+ goto done_nosellock;
}
getmicrouptime(&rtv);
timevaladd(&atv, &rtv);
@@ -982,59 +1009,59 @@
atv.tv_usec = 0;
}
timo = 0;
- PROC_LOCK(td->td_proc);
+ mtx_lock(&sellock);
retry:
ncoll = nselcoll;
mtx_lock_spin(&sched_lock);
td->td_flags |= TDF_SELECT;
mtx_unlock_spin(&sched_lock);
- PROC_UNLOCK(td->td_proc);
+ mtx_unlock(&sellock);
+
+ /* XXX Is there a better place for this? */
+ TAILQ_INIT(&td->td_selq);
error = pollscan(td, (struct pollfd *)bits, nfds);
- PROC_LOCK(td->td_proc);
+ mtx_lock(&sellock);
if (error || td->td_retval[0])
goto done;
if (atv.tv_sec || atv.tv_usec) {
getmicrouptime(&rtv);
- if (timevalcmp(&rtv, &atv, >=)) {
- /*
- * An event of our interest may occur during locking a process.
- * In order to avoid missing the event that occured during locking
- * the process, test TDF_SELECT and rescan file descriptors if
- * necessary.
- */
- mtx_lock_spin(&sched_lock);
- if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
- ncoll = nselcoll;
- td->td_flags |= TDF_SELECT;
- mtx_unlock_spin(&sched_lock);
- PROC_UNLOCK(td->td_proc);
- error = pollscan(td, (struct pollfd *)bits, nfds);
- PROC_LOCK(td->td_proc);
- } else
- mtx_unlock_spin(&sched_lock);
+ if (timevalcmp(&rtv, &atv, >=))
goto done;
- }
ttv = atv;
timevalsub(&ttv, &rtv);
timo = ttv.tv_sec > 24 * 60 * 60 ?
24 * 60 * 60 * hz : tvtohz(&ttv);
}
+ /*
+ * An event of interest may occur while we do not hold
+ * sellock, so check TDF_SELECT and the number of collisions
+ * and rescan the file descriptors if necessary.
+ */
mtx_lock_spin(&sched_lock);
+ if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
+ mtx_unlock_spin(&sched_lock);
+ goto retry;
+ }
td->td_flags &= ~TDF_SELECT;
mtx_unlock_spin(&sched_lock);
+
if (timo > 0)
- error = cv_timedwait_sig(&selwait, &td->td_proc->p_mtx, timo);
+ error = cv_timedwait_sig(&selwait, &sellock, timo);
else
- error = cv_wait_sig(&selwait, &td->td_proc->p_mtx);
+ error = cv_wait_sig(&selwait, &sellock);
+
if (error == 0)
goto retry;
done:
+ clear_selinfo_list(td);
+
mtx_lock_spin(&sched_lock);
td->td_flags &= ~TDF_SELECT;
mtx_unlock_spin(&sched_lock);
- PROC_UNLOCK(td->td_proc);
-done_noproclock:
+ mtx_unlock(&sellock);
+
+done_nosellock:
/* poll is not restarted after signals... */
if (error == ERESTART)
error = EINTR;
@@ -1126,18 +1153,6 @@
return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
}
-static int
-find_thread_in_proc(struct proc *p, struct thread *td)
-{
- struct thread *td2;
- FOREACH_THREAD_IN_PROC(p, td2) {
- if (td2 == td) {
- return (1);
- }
- }
- return (0);
-}
-
/*
* Record a select request.
*/
@@ -1146,29 +1161,21 @@
struct thread *selector;
struct selinfo *sip;
{
- struct proc *p;
- pid_t mypid;
- mypid = selector->td_proc->p_pid;
- if ((sip->si_pid == mypid) &&
- (sip->si_thread == selector)) { /* XXXKSE should be an ID? */
- return;
- }
- if (sip->si_pid &&
- (p = pfind(sip->si_pid)) &&
- (find_thread_in_proc(p, sip->si_thread))) {
- mtx_lock_spin(&sched_lock);
- if (sip->si_thread->td_wchan == (caddr_t)&selwait) {
- mtx_unlock_spin(&sched_lock);
- PROC_UNLOCK(p);
- sip->si_flags |= SI_COLL;
- return;
- }
- mtx_unlock_spin(&sched_lock);
- PROC_UNLOCK(p);
+ mtx_lock(&sellock);
+ /*
+ * If the thread is not NULL there is another thread
+ * interested in this object, and we need to flag the
+ * collision so selwakeup() can broadcast.
+ */
+ if (sip->si_thread != NULL) {
+ sip->si_flags |= SI_COLL;
+ } else {
+ sip->si_thread = selector;
+ TAILQ_INSERT_TAIL(&selector->td_selq, sip, si_thrlist);
}
- sip->si_pid = mypid;
- sip->si_thread = selector;
+
+ mtx_unlock(&sellock);
}
/*
@@ -1176,37 +1183,37 @@
*/
void
selwakeup(sip)
- register struct selinfo *sip;
+ struct selinfo *sip;
{
struct thread *td;
- register struct proc *p;
- if (sip->si_pid == 0)
- return;
- if (sip->si_flags & SI_COLL) {
+ mtx_lock(&sellock);
+ td = sip->si_thread;
+
+ if ((sip->si_flags & SI_COLL) != 0) {
nselcoll++;
sip->si_flags &= ~SI_COLL;
cv_broadcast(&selwait);
}
- p = pfind(sip->si_pid);
- sip->si_pid = 0;
- td = sip->si_thread;
- if (p != NULL) {
- if (!find_thread_in_proc(p, td)) {
- PROC_UNLOCK(p); /* lock is in pfind() */;
- return;
- }
- mtx_lock_spin(&sched_lock);
- if (td->td_wchan == (caddr_t)&selwait) {
- if (td->td_proc->p_stat == SSLEEP)
- setrunnable(td);
- else
- cv_waitq_remove(td);
- } else
- td->td_flags &= ~TDF_SELECT;
- mtx_unlock_spin(&sched_lock);
- PROC_UNLOCK(p); /* Lock is in pfind() */
+
+ if (td == NULL) {
+ mtx_unlock(&sellock);
+ return;
}
+
+ TAILQ_REMOVE(&td->td_selq, sip, si_thrlist);
+ mtx_lock_spin(&sched_lock);
+ if (td->td_wchan == (caddr_t)&selwait) {
+ if (td->td_proc->p_stat == SSLEEP)
+ setrunnable(td);
+ else
+ cv_waitq_remove(td);
+ } else
+ td->td_flags &= ~TDF_SELECT;
+ mtx_unlock_spin(&sched_lock);
+
+ sip->si_thread = NULL;
+ mtx_unlock(&sellock);
}
static void selectinit __P((void *));
@@ -1218,4 +1225,5 @@
void *dummy;
{
cv_init(&selwait, "select");
+ mtx_init(&sellock, "sellck", MTX_DEF);
}
Index: kern/tty.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/tty.c,v
retrieving revision 1.165
diff -u -r1.165 tty.c
--- kern/tty.c 2 Mar 2002 12:42:23 -0000 1.165
+++ kern/tty.c 11 Mar 2002 02:44:14 -0000
@@ -2273,7 +2273,7 @@
register struct tty *tp;
{
- if (tp->t_rsel.si_pid != 0)
+ if (SEL_WAITING(&tp->t_rsel))
selwakeup(&tp->t_rsel);
if (ISSET(tp->t_state, TS_ASYNC) && tp->t_sigio != NULL)
pgsigio(tp->t_sigio, SIGIO, (tp->t_session != NULL));
@@ -2289,7 +2289,7 @@
register struct tty *tp;
{
- if (tp->t_wsel.si_pid != 0 && tp->t_outq.c_cc <= tp->t_olowat)
+ if (SEL_WAITING(&tp->t_wsel) && tp->t_outq.c_cc <= tp->t_olowat)
selwakeup(&tp->t_wsel);
if (ISSET(tp->t_state, TS_ASYNC) && tp->t_sigio != NULL)
pgsigio(tp->t_sigio, SIGIO, (tp->t_session != NULL));
Index: net/bpf.c
===================================================================
RCS file: /home/ncvs/src/sys/net/bpf.c,v
retrieving revision 1.86
diff -u -r1.86 bpf.c
--- net/bpf.c 14 Dec 2001 22:17:54 -0000 1.86
+++ net/bpf.c 11 Mar 2002 02:44:14 -0000
@@ -514,8 +514,6 @@
pgsigio(d->bd_sigio, d->bd_sig, 0);
selwakeup(&d->bd_sel);
- /* XXX */
- d->bd_sel.si_pid = 0;
}
static void
Index: sys/proc.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/proc.h,v
retrieving revision 1.206
diff -u -r1.206 proc.h
--- sys/proc.h 23 Feb 2002 11:12:57 -0000 1.206
+++ sys/proc.h 11 Mar 2002 22:20:23 -0000
@@ -147,6 +147,7 @@
* m - Giant
* n - not locked, lazy
* o - locked by pgrpsess_lock sx
+ * p - select lock (sellock)
*
* If the locking key specifies two identifiers (for example, p_pptr) then
* either lock is sufficient for read access, but both locks must be held
@@ -259,6 +260,8 @@
TAILQ_ENTRY(thread) td_slpq; /* (j) Sleep queue. XXXKSE */
TAILQ_ENTRY(thread) td_blkq; /* (j) Mutex queue. XXXKSE */
TAILQ_ENTRY(thread) td_runq; /* (j) Run queue(s). XXXKSE */
+
+ TAILQ_HEAD(, selinfo) td_selq; /* (p) List of selinfos */
#define td_startzero td_flags
int td_flags; /* (j) TDF_* flags. */
Index: sys/selinfo.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/selinfo.h,v
retrieving revision 1.12
diff -u -r1.12 selinfo.h
--- sys/selinfo.h 27 Sep 2001 20:33:15 -0000 1.12
+++ sys/selinfo.h 11 Mar 2002 22:27:05 -0000
@@ -45,12 +45,21 @@
* notified when I/O becomes possible.
*/
struct selinfo {
- pid_t si_pid; /* process to be notified */
- struct thread *si_thread; /* thread in that process XXXKSE */
+ TAILQ_ENTRY(selinfo) si_thrlist; /* list hung off of thread */
+ struct thread *si_thread; /* thread waiting */
struct klist si_note; /* kernel note list */
short si_flags; /* see below */
};
#define SI_COLL 0x0001 /* collision occurred */
+
+#define SEL_WAITING(si) \
+ ((si)->si_thread != NULL || ((si)->si_flags & SI_COLL) != 0)
+
+#define SEL_INIT(si) \
+ do { \
+ (si)->si_thread = NULL; \
+ (si)->si_flags = 0; \
+ } while (0)
#ifdef _KERNEL
struct thread;
To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-smp" in the body of the message