--PPYy/fEw/8QCHSq3
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Hi guys,
Some work here at work was approved for sharing with community so
I'm posting it here in hope of a review.
We run some pretty good stress testing on our code, so I think it's
pretty solid.
My only concern is that I've tried my best to preserve kernel source
API, but not binary compat though a few simple #defines.
I can make binary compat, in albeit a somewhat confusing manner, but
that will require some rototilling and weird renaming of calls to
the sleepq and turnstile code. In short, I'd rather not, but I will
if you think it's something that should be done.
There's also a few placeholders for lock profiling which I will
very likely be backporting shortly as well.
Patch is attached.
Comments/questions?
--
- Alfred Perlstein
--PPYy/fEw/8QCHSq3
Content-Type: text/x-diff; charset=us-ascii
Content-Disposition: attachment;
filename="netsmp_rwlock_freebsd6_08312007.diff"
Index: conf/NOTES
===================================================================
RCS file: /cvs/ncvs/src/sys/conf/NOTES,v
retrieving revision 1.1325.2.36
diff -u -r1.1325.2.36 NOTES
--- conf/NOTES 8 Jul 2007 15:30:28 -0000 1.1325.2.36
+++ conf/NOTES 31 Aug 2007 00:39:59 -0000
@@ -189,12 +189,26 @@
# to disable it.
options NO_ADAPTIVE_MUTEXES
+# ADAPTIVE_RWLOCKS changes the behavior of reader/writer locks to spin
+# if the thread that currently owns the rwlock is executing on another
+# CPU. This behaviour is enabled by default, so this option can be used
+# to disable it.
+options NO_ADAPTIVE_RWLOCKS
+
+
# ADAPTIVE_GIANT causes the Giant lock to also be made adaptive when
# running without NO_ADAPTIVE_MUTEXES. Normally, because Giant is assumed
# to be held for extended periods, contention on Giant will cause a thread
# to sleep rather than spinning.
options ADAPTIVE_GIANT
+
+# ADAPTIVE_SX changes the behavior of sx locks to spin if the thread
+# that currently owns the lock is executing on another CPU. Note that
+# in addition to enabling this option, individual sx locks must be
+# initialized with the SX_ADAPTIVESPIN flag.
+options ADAPTIVE_SX
+
# MUTEX_NOINLINE forces mutex operations to call functions to perform each
# operation rather than inlining the simple cases. This can be used to
# shrink the size of the kernel text segment. Note that this behavior is
@@ -207,6 +221,20 @@
# priority waiter.
options MUTEX_WAKE_ALL
+# RWLOCK_NOINLINE forces rwlock operations to call functions to perform each
+# operation rather than inlining the simple cases. This can be used to
+# shrink the size of the kernel text segment. Note that this behavior is
+# already implied by the INVARIANT_SUPPORT, INVARIANTS, KTR, LOCK_PROFILING,
+# and WITNESS options.
+options RWLOCK_NOINLINE
+
+# SX_NOINLINE forces sx lock operations to call functions to perform each
+# operation rather than inlining the simple cases. This can be used to
+# shrink the size of the kernel text segment. Note that this behavior is
+# already implied by the INVARIANT_SUPPORT, INVARIANTS, KTR, LOCK_PROFILING,
+# and WITNESS options.
+options SX_NOINLINE
+
# SMP Debugging Options:
#
# PREEMPTION allows the threads that are in the kernel to be preempted
Index: conf/files
===================================================================
RCS file: /cvs/ncvs/src/sys/conf/files,v
retrieving revision 1.1031.2.67
diff -u -r1.1031.2.67 files
--- conf/files 23 Aug 2007 22:30:14 -0000 1.1031.2.67
+++ conf/files 31 Aug 2007 00:39:59 -0000
@@ -1312,6 +1312,7 @@
kern/kern_proc.c standard
kern/kern_prot.c standard
kern/kern_resource.c standard
+kern/kern_rwlock.c standard
kern/kern_sema.c standard
kern/kern_shutdown.c standard
kern/kern_sig.c standard
Index: conf/options
===================================================================
RCS file: /cvs/ncvs/src/sys/conf/options,v
retrieving revision 1.510.2.21
diff -u -r1.510.2.21 options
--- conf/options 8 Jul 2007 15:30:28 -0000 1.510.2.21
+++ conf/options 31 Aug 2007 00:39:59 -0000
@@ -60,7 +60,9 @@
# Miscellaneous options.
ADAPTIVE_GIANT opt_adaptive_mutexes.h
+ADAPTIVE_SX
NO_ADAPTIVE_MUTEXES opt_adaptive_mutexes.h
+NO_ADAPTIVE_RWLOCKS
ALQ
AUDIT opt_global.h
CODA_COMPAT_5 opt_coda.h
@@ -517,6 +519,8 @@
MSIZE opt_global.h
REGRESSION opt_global.h
RESTARTABLE_PANICS opt_global.h
+RWLOCK_NOINLINE opt_global.h
+SX_NOINLINE opt_global.h
VFS_BIO_DEBUG opt_global.h
# These are VM related options
Index: dev/acpica/acpi_ec.c
===================================================================
RCS file: /cvs/ncvs/src/sys/dev/acpica/acpi_ec.c,v
retrieving revision 1.65.2.2
diff -u -r1.65.2.2 acpi_ec.c
--- dev/acpica/acpi_ec.c 11 May 2006 17:41:00 -0000 1.65.2.2
+++ dev/acpica/acpi_ec.c 31 Aug 2007 01:20:08 -0000
@@ -144,6 +144,7 @@
#include <sys/bus.h>
#include <sys/malloc.h>
#include <sys/module.h>
+#include <sys/lock.h>
#include <sys/sx.h>
#include <machine/bus.h>
Index: kern/kern_ktrace.c
===================================================================
RCS file: /cvs/ncvs/src/sys/kern/kern_ktrace.c,v
retrieving revision 1.101.2.5
diff -u -r1.101.2.5 kern_ktrace.c
--- kern/kern_ktrace.c 6 Sep 2006 21:43:59 -0000 1.101.2.5
+++ kern/kern_ktrace.c 31 Aug 2007 00:39:59 -0000
@@ -53,6 +53,7 @@
#include <sys/vnode.h>
#include <sys/ktrace.h>
#include <sys/sx.h>
+#include <sys/condvar.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/sysproto.h>
cvs diff: kern/kern_rwlock.c is a new entry, no comparison available
Index: kern/kern_sx.c
===================================================================
RCS file: /cvs/ncvs/src/sys/kern/kern_sx.c,v
retrieving revision 1.25.2.4
diff -u -r1.25.2.4 kern_sx.c
--- kern/kern_sx.c 17 Aug 2006 19:53:06 -0000 1.25.2.4
+++ kern/kern_sx.c 31 Aug 2007 01:48:11 -0000
@@ -1,12 +1,14 @@
/*-
- * Copyright (C) 2001 Jason Evans <jasone@freebsd.org>. All rights reserved.
+ * Copyright (c) 2007 Attilio Rao <attilio@freebsd.org>
+ * Copyright (c) 2001 Jason Evans <jasone@freebsd.org>
+ * All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice(s), this list of conditions and the following disclaimer as
- * the first lines of this file unmodified other than the possible
+ * the first lines of this file unmodified other than the possible
* addition of one or more copyright notices.
* 2. Redistributions in binary form must reproduce the above copyright
* notice(s), this list of conditions and the following disclaimer in the
@@ -26,39 +28,90 @@
*/
/*
- * Shared/exclusive locks. This implementation assures deterministic lock
- * granting behavior, so that slocks and xlocks are interleaved.
+ * Shared/exclusive locks. This implementation attempts to ensure
+ * deterministic lock granting behavior, so that slocks and xlocks are
+ * interleaved.
*
* Priority propagation will not generally raise the priority of lock holders,
* so should not be relied upon in combination with sx locks.
*/
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/kern/kern_sx.c,v 1.25.2.4 2006/08/17 19:53:06 jhb Exp $");
-
+#include "opt_adaptive_sx.h"
#include "opt_ddb.h"
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/kern/kern_sx.c,v 1.54 2007/07/06 13:20:44 attilio Exp $");
+
#include <sys/param.h>
-#include <sys/systm.h>
#include <sys/ktr.h>
-#include <sys/linker_set.h>
-#include <sys/condvar.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
+#include <sys/sleepqueue.h>
#include <sys/sx.h>
+#include <sys/systm.h>
+
+#ifdef ADAPTIVE_SX
+#include <machine/cpu.h>
+#endif
#ifdef DDB
#include <ddb/ddb.h>
+#endif
+
+#if !defined(SMP) && defined(ADAPTIVE_SX)
+#error "You must have SMP to enable the ADAPTIVE_SX option"
+#endif
+
+CTASSERT(((SX_ADAPTIVESPIN | SX_RECURSE) & LO_CLASSFLAGS) ==
+ (SX_ADAPTIVESPIN | SX_RECURSE));
+
+/* Handy macros for sleep queues. */
+#define SQ_EXCLUSIVE_QUEUE 0
+#define SQ_SHARED_QUEUE 1
+/*
+ * Variations on DROP_GIANT()/PICKUP_GIANT() for use in this file. We
+ * drop Giant anytime we have to sleep or if we adaptively spin.
+ */
+#define GIANT_DECLARE \
+ int _giantcnt = 0; \
+ WITNESS_SAVE_DECL(Giant) \
+
+#define GIANT_SAVE() do { \
+ if (mtx_owned(&Giant)) { \
+ WITNESS_SAVE(&Giant.mtx_object, Giant); \
+ while (mtx_owned(&Giant)) { \
+ _giantcnt++; \
+ mtx_unlock(&Giant); \
+ } \
+ } \
+} while (0)
+
+#define GIANT_RESTORE() do { \
+ if (_giantcnt > 0) { \
+ mtx_assert(&Giant, MA_NOTOWNED); \
+ while (_giantcnt--) \
+ mtx_lock(&Giant); \
+ WITNESS_RESTORE(&Giant.mtx_object, Giant); \
+ } \
+} while (0)
+
+/*
+ * Returns true if an exclusive lock is recursed. It assumes
+ * curthread currently has an exclusive lock.
+ */
+#define sx_recursed(sx) ((sx)->sx_recurse != 0)
+
+#ifdef DDB
static void db_show_sx(struct lock_object *lock);
#endif
struct lock_class lock_class_sx = {
- "sx",
- LC_SLEEPLOCK | LC_SLEEPABLE | LC_RECURSABLE | LC_UPGRADABLE,
+ .lc_name = "sx",
+ .lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_RECURSABLE | LC_UPGRADABLE,
#ifdef DDB
- db_show_sx
+ .lc_ddb_show = db_show_sx,
#endif
};
@@ -75,243 +128,724 @@
}
void
-sx_init(struct sx *sx, const char *description)
+sx_init_flags(struct sx *sx, const char *description, int opts)
{
+ struct lock_object *lock;
+ int flags;
- sx->sx_lock = mtx_pool_find(mtxpool_lockbuilder, sx);
- sx->sx_cnt = 0;
- cv_init(&sx->sx_shrd_cv, description);
- sx->sx_shrd_wcnt = 0;
- cv_init(&sx->sx_excl_cv, description);
- sx->sx_excl_wcnt = 0;
- sx->sx_xholder = NULL;
- lock_init(&sx->sx_object, &lock_class_sx, description, NULL,
- LO_WITNESS | LO_RECURSABLE | LO_SLEEPABLE | LO_UPGRADABLE);
+ MPASS((opts & ~(SX_QUIET | SX_RECURSE | SX_NOWITNESS | SX_DUPOK |
+ SX_NOPROFILE | SX_ADAPTIVESPIN)) == 0);
+
+ bzero(sx, sizeof(*sx));
+
+ flags = LO_RECURSABLE | LO_SLEEPABLE | LO_UPGRADABLE;
+ if (opts & SX_DUPOK)
+ flags |= LO_DUPOK;
+ if (!(opts & SX_NOWITNESS))
+ flags |= LO_WITNESS;
+ if (opts & SX_QUIET)
+ flags |= LO_QUIET;
+
+ flags |= opts & (SX_ADAPTIVESPIN | SX_RECURSE);
+ sx->sx_lock = SX_LOCK_UNLOCKED;
+ sx->sx_recurse = 0;
+ lock = &sx->lock_object;
+ lock->lo_class = &lock_class_sx;
+ lock->lo_flags = flags;
+ lock->lo_name = lock->lo_type = description;
+ LOCK_LOG_INIT(lock, opts);
+ WITNESS_INIT(lock);
}
void
sx_destroy(struct sx *sx)
{
+ LOCK_LOG_DESTROY(&sx->lock_object, 0);
- KASSERT((sx->sx_cnt == 0 && sx->sx_shrd_wcnt == 0 && sx->sx_excl_wcnt ==
- 0), ("%s (%s): holders or waiters\n", __func__,
- sx->sx_object.lo_name));
-
- sx->sx_lock = NULL;
- cv_destroy(&sx->sx_shrd_cv);
- cv_destroy(&sx->sx_excl_cv);
-
- lock_destroy(&sx->sx_object);
+ KASSERT(sx->sx_lock == SX_LOCK_UNLOCKED, ("sx lock still held"));
+ KASSERT(sx->sx_recurse == 0, ("sx lock still recursed"));
+ sx->sx_lock = SX_LOCK_DESTROYED;
+ WITNESS_DESTROY(&sx->lock_object);
}
-void
-_sx_slock(struct sx *sx, const char *file, int line)
+int
+_sx_slock(struct sx *sx, int opts, const char *file, int line)
{
+ int error = 0;
- mtx_lock(sx->sx_lock);
- KASSERT(sx->sx_xholder != curthread,
- ("%s (%s): slock while xlock is held @ %s:%d\n", __func__,
- sx->sx_object.lo_name, file, line));
- WITNESS_CHECKORDER(&sx->sx_object, LOP_NEWORDER, file, line);
-
- /*
- * Loop in case we lose the race for lock acquisition.
- */
- while (sx->sx_cnt < 0) {
- sx->sx_shrd_wcnt++;
- cv_wait(&sx->sx_shrd_cv, sx->sx_lock);
- sx->sx_shrd_wcnt--;
+ MPASS(curthread != NULL);
+ KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
+ ("sx_slock() of destroyed sx @ %s:%d", file, line));
+ WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER, file, line);
+ error = __sx_slock(sx, opts, file, line);
+ if (!error) {
+ LOCK_LOG_LOCK("SLOCK", &sx->lock_object, 0, 0, file, line);
+ WITNESS_LOCK(&sx->lock_object, 0, file, line);
+ curthread->td_locks++;
}
- /* Acquire a shared lock. */
- sx->sx_cnt++;
-
- LOCK_LOG_LOCK("SLOCK", &sx->sx_object, 0, 0, file, line);
- WITNESS_LOCK(&sx->sx_object, 0, file, line);
- curthread->td_locks++;
-
- mtx_unlock(sx->sx_lock);
+ return (error);
}
int
_sx_try_slock(struct sx *sx, const char *file, int line)
{
+ uintptr_t x;
- mtx_lock(sx->sx_lock);
- if (sx->sx_cnt >= 0) {
- sx->sx_cnt++;
- LOCK_LOG_TRY("SLOCK", &sx->sx_object, 0, 1, file, line);
- WITNESS_LOCK(&sx->sx_object, LOP_TRYLOCK, file, line);
+ x = sx->sx_lock;
+ KASSERT(x != SX_LOCK_DESTROYED,
+ ("sx_try_slock() of destroyed sx @ %s:%d", file, line));
+ if ((x & SX_LOCK_SHARED) && atomic_cmpset_acq_ptr(&sx->sx_lock, x,
+ x + SX_ONE_SHARER)) {
+ LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 1, file, line);
+ WITNESS_LOCK(&sx->lock_object, LOP_TRYLOCK, file, line);
curthread->td_locks++;
- mtx_unlock(sx->sx_lock);
return (1);
- } else {
- LOCK_LOG_TRY("SLOCK", &sx->sx_object, 0, 0, file, line);
- mtx_unlock(sx->sx_lock);
- return (0);
}
+
+ LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 0, file, line);
+ return (0);
}
-void
-_sx_xlock(struct sx *sx, const char *file, int line)
+int
+_sx_xlock(struct sx *sx, int opts, const char *file, int line)
{
+ int error = 0;
- mtx_lock(sx->sx_lock);
-
- /*
- * With sx locks, we're absolutely not permitted to recurse on
- * xlocks, as it is fatal (deadlock). Normally, recursion is handled
- * by WITNESS, but as it is not semantically correct to hold the
- * xlock while in here, we consider it API abuse and put it under
- * INVARIANTS.
- */
- KASSERT(sx->sx_xholder != curthread,
- ("%s (%s): xlock already held @ %s:%d", __func__,
- sx->sx_object.lo_name, file, line));
- WITNESS_CHECKORDER(&sx->sx_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
+ MPASS(curthread != NULL);
+ KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
+ ("sx_xlock() of destroyed sx @ %s:%d", file, line));
+ WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
line);
-
- /* Loop in case we lose the race for lock acquisition. */
- while (sx->sx_cnt != 0) {
- sx->sx_excl_wcnt++;
- cv_wait(&sx->sx_excl_cv, sx->sx_lock);
- sx->sx_excl_wcnt--;
+ error = __sx_xlock(sx, curthread, opts, file, line);
+ if (!error) {
+ LOCK_LOG_LOCK("XLOCK", &sx->lock_object, 0, sx->sx_recurse,
+ file, line);
+ WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line);
+ curthread->td_locks++;
}
- MPASS(sx->sx_cnt == 0);
-
- /* Acquire an exclusive lock. */
- sx->sx_cnt--;
- sx->sx_xholder = curthread;
-
- LOCK_LOG_LOCK("XLOCK", &sx->sx_object, 0, 0, file, line);
- WITNESS_LOCK(&sx->sx_object, LOP_EXCLUSIVE, file, line);
- curthread->td_locks++;
-
- mtx_unlock(sx->sx_lock);
+ return (error);
}
int
_sx_try_xlock(struct sx *sx, const char *file, int line)
{
+ int rval;
- mtx_lock(sx->sx_lock);
- if (sx->sx_cnt == 0) {
- sx->sx_cnt--;
- sx->sx_xholder = curthread;
- LOCK_LOG_TRY("XLOCK", &sx->sx_object, 0, 1, file, line);
- WITNESS_LOCK(&sx->sx_object, LOP_EXCLUSIVE | LOP_TRYLOCK, file,
- line);
+ MPASS(curthread != NULL);
+ KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
+ ("sx_try_xlock() of destroyed sx @ %s:%d", file, line));
+
+ if (sx_xlocked(sx) && (sx->lock_object.lo_flags & SX_RECURSE) != 0) {
+ sx->sx_recurse++;
+ atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
+ rval = 1;
+ } else
+ rval = atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED,
+ (uintptr_t)curthread);
+ LOCK_LOG_TRY("XLOCK", &sx->lock_object, 0, rval, file, line);
+ if (rval) {
+ WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
+ file, line);
curthread->td_locks++;
- mtx_unlock(sx->sx_lock);
- return (1);
- } else {
- LOCK_LOG_TRY("XLOCK", &sx->sx_object, 0, 0, file, line);
- mtx_unlock(sx->sx_lock);
- return (0);
}
+
+ return (rval);
}
void
_sx_sunlock(struct sx *sx, const char *file, int line)
{
- _sx_assert(sx, SX_SLOCKED, file, line);
- mtx_lock(sx->sx_lock);
+ MPASS(curthread != NULL);
+ KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
+ ("sx_sunlock() of destroyed sx @ %s:%d", file, line));
+ _sx_assert(sx, SA_SLOCKED, file, line);
+ curthread->td_locks--;
+ WITNESS_UNLOCK(&sx->lock_object, 0, file, line);
+ LOCK_LOG_LOCK("SUNLOCK", &sx->lock_object, 0, 0, file, line);
+#ifdef LOCK_PROFILING_SHARED
+ if (SX_SHARERS(sx->sx_lock) == 1)
+ lock_profile_release_lock(&sx->lock_object);
+#endif
+ __sx_sunlock(sx, file, line);
+}
+
+void
+_sx_xunlock(struct sx *sx, const char *file, int line)
+{
+ MPASS(curthread != NULL);
+ KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
+ ("sx_xunlock() of destroyed sx @ %s:%d", file, line));
+ _sx_assert(sx, SA_XLOCKED, file, line);
curthread->td_locks--;
- WITNESS_UNLOCK(&sx->sx_object, 0, file, line);
+ WITNESS_UNLOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line);
+ LOCK_LOG_LOCK("XUNLOCK", &sx->lock_object, 0, sx->sx_recurse, file,
+ line);
+ if (!sx_recursed(sx))
+ lock_profile_release_lock(&sx->lock_object);
+ __sx_xunlock(sx, curthread, file, line);
+}
- /* Release. */
- sx->sx_cnt--;
+/*
+ * Try to do a non-blocking upgrade from a shared lock to an exclusive lock.
+ * This will only succeed if this thread holds a single shared lock.
+ * Return 1 if if the upgrade succeed, 0 otherwise.
+ */
+int
+_sx_try_upgrade(struct sx *sx, const char *file, int line)
+{
+ uintptr_t x;
+ int success;
+
+ KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
+ ("sx_try_upgrade() of destroyed sx @ %s:%d", file, line));
+ _sx_assert(sx, SA_SLOCKED, file, line);
/*
- * If we just released the last shared lock, wake any waiters up, giving
- * exclusive lockers precedence. In order to make sure that exclusive
- * lockers won't be blocked forever, don't wake shared lock waiters if
- * there are exclusive lock waiters.
+ * Try to switch from one shared lock to an exclusive lock. We need
+ * to maintain the SX_LOCK_EXCLUSIVE_WAITERS flag if set so that
+ * we will wake up the exclusive waiters when we drop the lock.
*/
- if (sx->sx_excl_wcnt > 0) {
- if (sx->sx_cnt == 0)
- cv_signal(&sx->sx_excl_cv);
- } else if (sx->sx_shrd_wcnt > 0)
- cv_broadcast(&sx->sx_shrd_cv);
-
- LOCK_LOG_LOCK("SUNLOCK", &sx->sx_object, 0, 0, file, line);
-
- mtx_unlock(sx->sx_lock);
+ x = sx->sx_lock & SX_LOCK_EXCLUSIVE_WAITERS;
+ success = atomic_cmpset_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) | x,
+ (uintptr_t)curthread | x);
+ LOCK_LOG_TRY("XUPGRADE", &sx->lock_object, 0, success, file, line);
+ if (success)
+ WITNESS_UPGRADE(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
+ file, line);
+ return (success);
}
+/*
+ * Downgrade an unrecursed exclusive lock into a single shared lock.
+ */
void
-_sx_xunlock(struct sx *sx, const char *file, int line)
+_sx_downgrade(struct sx *sx, const char *file, int line)
{
+ uintptr_t x;
- _sx_assert(sx, SX_XLOCKED, file, line);
- mtx_lock(sx->sx_lock);
- MPASS(sx->sx_cnt == -1);
+ KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
+ ("sx_downgrade() of destroyed sx @ %s:%d", file, line));
+ _sx_assert(sx, SA_XLOCKED | SA_NOTRECURSED, file, line);
+#ifndef INVARIANTS
+ if (sx_recursed(sx))
+ panic("downgrade of a recursed lock");
+#endif
- curthread->td_locks--;
- WITNESS_UNLOCK(&sx->sx_object, LOP_EXCLUSIVE, file, line);
+ WITNESS_DOWNGRADE(&sx->lock_object, 0, file, line);
- /* Release. */
- sx->sx_cnt++;
- sx->sx_xholder = NULL;
+ /*
+ * Try to switch from an exclusive lock with no shared waiters
+ * to one sharer with no shared waiters. If there are
+ * exclusive waiters, we don't need to lock the sleep queue so
+ * long as we preserve the flag. We do one quick try and if
+ * that fails we grab the sleepq lock to keep the flags from
+ * changing and do it the slow way.
+ *
+ * We have to lock the sleep queue if there are shared waiters
+ * so we can wake them up.
+ */
+ x = sx->sx_lock;
+ if (!(x & SX_LOCK_SHARED_WAITERS) &&
+ atomic_cmpset_rel_ptr(&sx->sx_lock, x, SX_SHARERS_LOCK(1) |
+ (x & SX_LOCK_EXCLUSIVE_WAITERS))) {
+ LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line);
+ return;
+ }
/*
- * Wake up waiters if there are any. Give precedence to slock waiters.
+ * Lock the sleep queue so we can read the waiters bits
+ * without any races and wakeup any shared waiters.
*/
- if (sx->sx_shrd_wcnt > 0)
- cv_broadcast(&sx->sx_shrd_cv);
- else if (sx->sx_excl_wcnt > 0)
- cv_signal(&sx->sx_excl_cv);
+ sleepq_lock(&sx->lock_object);
- LOCK_LOG_LOCK("XUNLOCK", &sx->sx_object, 0, 0, file, line);
+ /*
+ * Preserve SX_LOCK_EXCLUSIVE_WAITERS while downgraded to a single
+ * shared lock. If there are any shared waiters, wake them up.
+ */
+ x = sx->sx_lock;
+ atomic_store_rel_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) |
+ (x & SX_LOCK_EXCLUSIVE_WAITERS));
+ if (x & SX_LOCK_SHARED_WAITERS)
+ sleepq_broadcast_queue(&sx->lock_object, SLEEPQ_SX, -1,
+ SQ_SHARED_QUEUE);
+ else
+ sleepq_release(&sx->lock_object);
- mtx_unlock(sx->sx_lock);
+ LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line);
}
+/*
+ * This function represents the so-called 'hard case' for sx_xlock
+ * operation. All 'easy case' failures are redirected to this. Note
+ * that ideally this would be a static function, but it needs to be
+ * accessible from at least sx.h.
+ */
int
-_sx_try_upgrade(struct sx *sx, const char *file, int line)
+_sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file,
+ int line)
{
+ GIANT_DECLARE;
+#ifdef ADAPTIVE_SX
+ volatile struct thread *owner;
+#endif
+ /* uint64_t waittime = 0; */
+ uintptr_t x;
+ int /* contested = 0, */error = 0;
+
+ /* If we already hold an exclusive lock, then recurse. */
+ if (sx_xlocked(sx)) {
+ KASSERT((sx->lock_object.lo_flags & SX_RECURSE) != 0,
+ ("_sx_xlock_hard: recursed on non-recursive sx %s @ %s:%d\n",
+ sx->lock_object.lo_name, file, line));
+ sx->sx_recurse++;
+ atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
+ if (LOCK_LOG_TEST(&sx->lock_object, 0))
+ CTR2(KTR_LOCK, "%s: %p recursing", __func__, sx);
+ return (0);
+ }
- _sx_assert(sx, SX_SLOCKED, file, line);
- mtx_lock(sx->sx_lock);
+ if (LOCK_LOG_TEST(&sx->lock_object, 0))
+ CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
+ sx->lock_object.lo_name, (void *)sx->sx_lock, file, line);
- if (sx->sx_cnt == 1) {
- sx->sx_cnt = -1;
- sx->sx_xholder = curthread;
+ while (!atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid)) {
+#ifdef ADAPTIVE_SX
+ /*
+ * If the lock is write locked and the owner is
+ * running on another CPU, spin until the owner stops
+ * running or the state of the lock changes.
+ */
+ x = sx->sx_lock;
+ if (!(x & SX_LOCK_SHARED) &&
+ (sx->lock_object.lo_flags & SX_ADAPTIVESPIN)) {
+ x = SX_OWNER(x);
+ owner = (struct thread *)x;
+ if (TD_IS_RUNNING(owner)) {
+ if (LOCK_LOG_TEST(&sx->lock_object, 0))
+ CTR3(KTR_LOCK,
+ "%s: spinning on %p held by %p",
+ __func__, sx, owner);
+ GIANT_SAVE();
+ lock_profile_obtain_lock_failed(
+ &sx->lock_object, &contested, &waittime);
+ while (SX_OWNER(sx->sx_lock) == x &&
+ TD_IS_RUNNING(owner))
+ cpu_spinwait();
+ continue;
+ }
+ }
+#endif
- LOCK_LOG_TRY("XUPGRADE", &sx->sx_object, 0, 1, file, line);
- WITNESS_UPGRADE(&sx->sx_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
- file, line);
+ sleepq_lock(&sx->lock_object);
+ x = sx->sx_lock;
- mtx_unlock(sx->sx_lock);
- return (1);
- } else {
- LOCK_LOG_TRY("XUPGRADE", &sx->sx_object, 0, 0, file, line);
- mtx_unlock(sx->sx_lock);
- return (0);
+ /*
+ * If the lock was released while spinning on the
+ * sleep queue chain lock, try again.
+ */
+ if (x == SX_LOCK_UNLOCKED) {
+ sleepq_release(&sx->lock_object);
+ continue;
+ }
+
+#ifdef ADAPTIVE_SX
+ /*
+ * The current lock owner might have started executing
+ * on another CPU (or the lock could have changed
+ * owners) while we were waiting on the sleep queue
+ * chain lock. If so, drop the sleep queue lock and try
+ * again.
+ */
+ if (!(x & SX_LOCK_SHARED) &&
+ (sx->lock_object.lo_flags & SX_ADAPTIVESPIN)) {
+ owner = (struct thread *)SX_OWNER(x);
+ if (TD_IS_RUNNING(owner)) {
+ sleepq_release(&sx->lock_object);
+ continue;
+ }
+ }
+#endif
+
+ /*
+ * If an exclusive lock was released with both shared
+ * and exclusive waiters and a shared waiter hasn't
+ * woken up and acquired the lock yet, sx_lock will be
+ * set to SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS.
+ * If we see that value, try to acquire it once. Note
+ * that we have to preserve SX_LOCK_EXCLUSIVE_WAITERS
+ * as there are other exclusive waiters still. If we
+ * fail, restart the loop.
+ */
+ if (x == (SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS)) {
+ if (atomic_cmpset_acq_ptr(&sx->sx_lock,
+ SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS,
+ tid | SX_LOCK_EXCLUSIVE_WAITERS)) {
+ sleepq_release(&sx->lock_object);
+ CTR2(KTR_LOCK, "%s: %p claimed by new writer",
+ __func__, sx);
+ break;
+ }
+ sleepq_release(&sx->lock_object);
+ continue;
+ }
+
+ /*
+ * Try to set the SX_LOCK_EXCLUSIVE_WAITERS. If we fail,
+ * than loop back and retry.
+ */
+ if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) {
+ if (!atomic_cmpset_ptr(&sx->sx_lock, x,
+ x | SX_LOCK_EXCLUSIVE_WAITERS)) {
+ sleepq_release(&sx->lock_object);
+ continue;
+ }
+ if (LOCK_LOG_TEST(&sx->lock_object, 0))
+ CTR2(KTR_LOCK, "%s: %p set excl waiters flag",
+ __func__, sx);
+ }
+
+ /*
+ * Since we have been unable to acquire the exclusive
+ * lock and the exclusive waiters flag is set, we have
+ * to sleep.
+ */
+#if 0
+ if (LOCK_LOG_TEST(&sx->lock_object, 0))
+ CTR2(KTR_LOCK, "%s: %p blocking on sleep queue",
+ __func__, sx);
+#endif
+
+ GIANT_SAVE();
+ lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
+ &waittime);
+ sleepq_add_queue(&sx->lock_object, NULL, sx->lock_object.lo_name,
+ SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
+ SLEEPQ_INTERRUPTIBLE : 0), SQ_EXCLUSIVE_QUEUE);
+ if (!(opts & SX_INTERRUPTIBLE))
+ sleepq_wait(&sx->lock_object);
+ else
+ error = sleepq_wait_sig(&sx->lock_object);
+
+ if (error) {
+ if (LOCK_LOG_TEST(&sx->lock_object, 0))
+ CTR2(KTR_LOCK,
+ "%s: interruptible sleep by %p suspended by signal",
+ __func__, sx);
+ break;
+ }
+ if (LOCK_LOG_TEST(&sx->lock_object, 0))
+ CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
+ __func__, sx);
}
+
+ GIANT_RESTORE();
+ if (!error)
+ lock_profile_obtain_lock_success(&sx->lock_object, contested,
+ waittime, file, line);
+ return (error);
}
+/*
+ * This function represents the so-called 'hard case' for sx_xunlock
+ * operation. All 'easy case' failures are redirected to this. Note
+ * that ideally this would be a static function, but it needs to be
+ * accessible from at least sx.h.
+ */
void
-_sx_downgrade(struct sx *sx, const char *file, int line)
+_sx_xunlock_hard(struct sx *sx, uintptr_t tid, const char *file, int line)
+{
+ uintptr_t x;
+ int queue;
+
+ MPASS(!(sx->sx_lock & SX_LOCK_SHARED));
+
+ /* If the lock is recursed, then unrecurse one level. */
+ if (sx_xlocked(sx) && sx_recursed(sx)) {
+ if ((--sx->sx_recurse) == 0)
+ atomic_clear_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
+ if (LOCK_LOG_TEST(&sx->lock_object, 0))
+ CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, sx);
+ return;
+ }
+ MPASS(sx->sx_lock & (SX_LOCK_SHARED_WAITERS |
+ SX_LOCK_EXCLUSIVE_WAITERS));
+ if (LOCK_LOG_TEST(&sx->lock_object, 0))
+ CTR2(KTR_LOCK, "%s: %p contested", __func__, sx);
+
+ sleepq_lock(&sx->lock_object);
+ x = SX_LOCK_UNLOCKED;
+
+ /*
+ * The wake up algorithm here is quite simple and probably not
+ * ideal. It gives precedence to shared waiters if they are
+ * present. For this condition, we have to preserve the
+ * state of the exclusive waiters flag.
+ */
+ if (sx->sx_lock & SX_LOCK_SHARED_WAITERS) {
+ queue = SQ_SHARED_QUEUE;
+ x |= (sx->sx_lock & SX_LOCK_EXCLUSIVE_WAITERS);
+ } else
+ queue = SQ_EXCLUSIVE_QUEUE;
+
+ /* Wake up all the waiters for the specific queue. */
+ if (LOCK_LOG_TEST(&sx->lock_object, 0))
+ CTR3(KTR_LOCK, "%s: %p waking up all threads on %s queue",
+ __func__, sx, queue == SQ_SHARED_QUEUE ? "shared" :
+ "exclusive");
+ atomic_store_rel_ptr(&sx->sx_lock, x);
+ sleepq_broadcast_queue(&sx->lock_object, SLEEPQ_SX, -1, queue);
+}
+
+/*
+ * This function represents the so-called 'hard case' for sx_slock
+ * operation. All 'easy case' failures are redirected to this. Note
+ * that ideally this would be a static function, but it needs to be
+ * accessible from at least sx.h.
+ */
+int
+_sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
+{
+ GIANT_DECLARE;
+#ifdef ADAPTIVE_SX
+ volatile struct thread *owner;
+#endif
+#ifdef LOCK_PROFILING_SHARED
+ uint64_t waittime = 0;
+ int contested = 0;
+#endif
+ uintptr_t x;
+ int error = 0;
+
+ /*
+ * As with rwlocks, we don't make any attempt to try to block
+ * shared locks once there is an exclusive waiter.
+ */
+ for (;;) {
+ x = sx->sx_lock;
+
+ /*
+ * If no other thread has an exclusive lock then try to bump up
+ * the count of sharers. Since we have to preserve the state
+ * of SX_LOCK_EXCLUSIVE_WAITERS, if we fail to acquire the
+ * shared lock loop back and retry.
+ */
+ if (x & SX_LOCK_SHARED) {
+ MPASS(!(x & SX_LOCK_SHARED_WAITERS));
+ if (atomic_cmpset_acq_ptr(&sx->sx_lock, x,
+ x + SX_ONE_SHARER)) {
+#ifdef LOCK_PROFILING_SHARED
+ if (SX_SHARERS(x) == 0)
+ lock_profile_obtain_lock_success(
+ &sx->lock_object, contested,
+ waittime, file, line);
+#endif
+ if (LOCK_LOG_TEST(&sx->lock_object, 0))
+ CTR4(KTR_LOCK,
+ "%s: %p succeed %p -> %p", __func__,
+ sx, (void *)x,
+ (void *)(x + SX_ONE_SHARER));
+ break;
+ }
+ continue;
+ }
+
+#ifdef ADAPTIVE_SX
+ /*
+ * If the owner is running on another CPU, spin until
+ * the owner stops running or the state of the lock
+ * changes.
+ */
+ else if (sx->lock_object.lo_flags & SX_ADAPTIVESPIN) {
+ x = SX_OWNER(x);
+ owner = (struct thread *)x;
+ if (TD_IS_RUNNING(owner)) {
+ if (LOCK_LOG_TEST(&sx->lock_object, 0))
+ CTR3(KTR_LOCK,
+ "%s: spinning on %p held by %p",
+ __func__, sx, owner);
+ GIANT_SAVE();
+#ifdef LOCK_PROFILING_SHARED
+ lock_profile_obtain_lock_failed(
+ &sx->lock_object, &contested, &waittime);
+#endif
+ while (SX_OWNER(sx->sx_lock) == x &&
+ TD_IS_RUNNING(owner))
+ cpu_spinwait();
+ continue;
+ }
+ }
+#endif
+
+ /*
+ * Some other thread already has an exclusive lock, so
+ * start the process of blocking.
+ */
+ sleepq_lock(&sx->lock_object);
+ x = sx->sx_lock;
+
+ /*
+ * The lock could have been released while we spun.
+ * In this case loop back and retry.
+ */
+ if (x & SX_LOCK_SHARED) {
+ sleepq_release(&sx->lock_object);
+ continue;
+ }
+
+#ifdef ADAPTIVE_SX
+ /*
+ * If the owner is running on another CPU, spin until
+ * the owner stops running or the state of the lock
+ * changes.
+ */
+ if (!(x & SX_LOCK_SHARED) &&
+ (sx->lock_object.lo_flags & SX_ADAPTIVESPIN)) {
+ owner = (struct thread *)SX_OWNER(x);
+ if (TD_IS_RUNNING(owner)) {
+ sleepq_release(&sx->lock_object);
+ continue;
+ }
+ }
+#endif
+
+ /*
+ * Try to set the SX_LOCK_SHARED_WAITERS flag. If we
+ * fail to set it drop the sleep queue lock and loop
+ * back.
+ */
+ if (!(x & SX_LOCK_SHARED_WAITERS)) {
+ if (!atomic_cmpset_ptr(&sx->sx_lock, x,
+ x | SX_LOCK_SHARED_WAITERS)) {
+ sleepq_release(&sx->lock_object);
+ continue;
+ }
+ if (LOCK_LOG_TEST(&sx->lock_object, 0))
+ CTR2(KTR_LOCK, "%s: %p set shared waiters flag",
+ __func__, sx);
+ }
+
+ /*
+ * Since we have been unable to acquire the shared lock,
+ * we have to sleep.
+ */
+ if (LOCK_LOG_TEST(&sx->lock_object, 0))
+ CTR2(KTR_LOCK, "%s: %p blocking on sleep queue",
+ __func__, sx);
+
+ GIANT_SAVE();
+#ifdef LOCK_PROFILING_SHARED
+ lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
+ &waittime);
+#endif
+ sleepq_add_queue(&sx->lock_object, NULL, sx->lock_object.lo_name,
+ SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
+ SLEEPQ_INTERRUPTIBLE : 0), SQ_SHARED_QUEUE);
+ if (!(opts & SX_INTERRUPTIBLE))
+ sleepq_wait(&sx->lock_object);
+ else
+ error = sleepq_wait_sig(&sx->lock_object);
+
+ if (error) {
+ if (LOCK_LOG_TEST(&sx->lock_object, 0))
+ CTR2(KTR_LOCK,
+ "%s: interruptible sleep by %p suspended by signal",
+ __func__, sx);
+ break;
+ }
+ if (LOCK_LOG_TEST(&sx->lock_object, 0))
+ CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
+ __func__, sx);
+ }
+
+ GIANT_RESTORE();
+ return (error);
+}
+
+/*
+ * This function represents the so-called 'hard case' for sx_sunlock
+ * operation. All 'easy case' failures are redirected to this. Note
+ * that ideally this would be a static function, but it needs to be
+ * accessible from at least sx.h.
+ */
+void
+_sx_sunlock_hard(struct sx *sx, const char *file, int line)
{
+ uintptr_t x;
+
+ for (;;) {
+ x = sx->sx_lock;
+
+ /*
+ * We should never have sharers while at least one thread
+ * holds a shared lock.
+ */
+ KASSERT(!(x & SX_LOCK_SHARED_WAITERS),
+ ("%s: waiting sharers", __func__));
- _sx_assert(sx, SX_XLOCKED, file, line);
- mtx_lock(sx->sx_lock);
- MPASS(sx->sx_cnt == -1);
+ /*
+ * See if there is more than one shared lock held. If
+ * so, just drop one and return.
+ */
+ if (SX_SHARERS(x) > 1) {
+ if (atomic_cmpset_ptr(&sx->sx_lock, x,
+ x - SX_ONE_SHARER)) {
+ if (LOCK_LOG_TEST(&sx->lock_object, 0))
+ CTR4(KTR_LOCK,
+ "%s: %p succeeded %p -> %p",
+ __func__, sx, (void *)x,
+ (void *)(x - SX_ONE_SHARER));
+ break;
+ }
+ continue;
+ }
- WITNESS_DOWNGRADE(&sx->sx_object, 0, file, line);
+ /*
+ * If there aren't any waiters for an exclusive lock,
+ * then try to drop it quickly.
+ */
+ if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) {
+ MPASS(x == SX_SHARERS_LOCK(1));
+ if (atomic_cmpset_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1),
+ SX_LOCK_UNLOCKED)) {
+ if (LOCK_LOG_TEST(&sx->lock_object, 0))
+ CTR2(KTR_LOCK, "%s: %p last succeeded",
+ __func__, sx);
+ break;
+ }
+ continue;
+ }
- sx->sx_cnt = 1;
- sx->sx_xholder = NULL;
- if (sx->sx_shrd_wcnt > 0)
- cv_broadcast(&sx->sx_shrd_cv);
+ /*
+ * At this point, there should just be one sharer with
+ * exclusive waiters.
+ */
+ MPASS(x == (SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS));
- LOCK_LOG_LOCK("XDOWNGRADE", &sx->sx_object, 0, 0, file, line);
+ sleepq_lock(&sx->lock_object);
- mtx_unlock(sx->sx_lock);
+ /*
+ * Wake up semantic here is quite simple:
+ * Just wake up all the exclusive waiters.
+ * Note that the state of the lock could have changed,
+ * so if it fails loop back and retry.
+ */
+ if (!atomic_cmpset_ptr(&sx->sx_lock,
+ SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS,
+ SX_LOCK_UNLOCKED)) {
+ sleepq_release(&sx->lock_object);
+ continue;
+ }
+ if (LOCK_LOG_TEST(&sx->lock_object, 0))
+ CTR2(KTR_LOCK, "%s: %p waking up all thread on"
+ "exclusive queue", __func__, sx);
+ sleepq_broadcast_queue(&sx->lock_object, SLEEPQ_SX, -1,
+ SQ_EXCLUSIVE_QUEUE);
+ break;
+ }
}
#ifdef INVARIANT_SUPPORT
@@ -327,44 +861,76 @@
void
_sx_assert(struct sx *sx, int what, const char *file, int line)
{
+#ifndef WITNESS
+ int slocked = 0;
+#endif
if (panicstr != NULL)
return;
switch (what) {
- case SX_LOCKED:
- case SX_SLOCKED:
+ case SA_SLOCKED:
+ case SA_SLOCKED | SA_NOTRECURSED:
+ case SA_SLOCKED | SA_RECURSED:
+#ifndef WITNESS
+ slocked = 1;
+ /* FALLTHROUGH */
+#endif
+ case SA_LOCKED:
+ case SA_LOCKED | SA_NOTRECURSED:
+ case SA_LOCKED | SA_RECURSED:
#ifdef WITNESS
- witness_assert(&sx->sx_object, what, file, line);
+ witness_assert(&sx->lock_object, what, file, line);
#else
- mtx_lock(sx->sx_lock);
- if (sx->sx_cnt <= 0 &&
- (what == SX_SLOCKED || sx->sx_xholder != curthread))
+ /*
+ * If some other thread has an exclusive lock or we
+ * have one and are asserting a shared lock, fail.
+ * Also, if no one has a lock at all, fail.
+ */
+ if (sx->sx_lock == SX_LOCK_UNLOCKED ||
+ (!(sx->sx_lock & SX_LOCK_SHARED) && (slocked ||
+ sx_xholder(sx) != curthread)))
panic("Lock %s not %slocked @ %s:%d\n",
- sx->sx_object.lo_name, (what == SX_SLOCKED) ?
- "share " : "", file, line);
- mtx_unlock(sx->sx_lock);
+ sx->lock_object.lo_name, slocked ? "share " : "",
+ file, line);
+
+ if (!(sx->sx_lock & SX_LOCK_SHARED)) {
+ if (sx_recursed(sx)) {
+ if (what & SA_NOTRECURSED)
+ panic("Lock %s recursed @ %s:%d\n",
+ sx->lock_object.lo_name, file,
+ line);
+ } else if (what & SA_RECURSED)
+ panic("Lock %s not recursed @ %s:%d\n",
+ sx->lock_object.lo_name, file, line);
+ }
#endif
break;
- case SX_XLOCKED:
- mtx_lock(sx->sx_lock);
- if (sx->sx_xholder != curthread)
+ case SA_XLOCKED:
+ case SA_XLOCKED | SA_NOTRECURSED:
+ case SA_XLOCKED | SA_RECURSED:
+ if (sx_xholder(sx) != curthread)
panic("Lock %s not exclusively locked @ %s:%d\n",
- sx->sx_object.lo_name, file, line);
- mtx_unlock(sx->sx_lock);
+ sx->lock_object.lo_name, file, line);
+ if (sx_recursed(sx)) {
+ if (what & SA_NOTRECURSED)
+ panic("Lock %s recursed @ %s:%d\n",
+ sx->lock_object.lo_name, file, line);
+ } else if (what & SA_RECURSED)
+ panic("Lock %s not recursed @ %s:%d\n",
+ sx->lock_object.lo_name, file, line);
break;
- case SX_UNLOCKED:
+ case SA_UNLOCKED:
#ifdef WITNESS
- witness_assert(&sx->sx_object, what, file, line);
+ witness_assert(&sx->lock_object, what, file, line);
#else
/*
- * We are able to check only exclusive lock here,
- * we cannot assert that *this* thread owns slock.
+ * If we hold an exclusve lock fail. We can't
+ * reliably check to see if we hold a shared lock or
+ * not.
*/
- mtx_lock(sx->sx_lock);
- if (sx->sx_xholder == curthread)
+ if (sx_xholder(sx) == curthread)
panic("Lock %s exclusively locked @ %s:%d\n",
- sx->sx_object.lo_name, file, line);
- mtx_unlock(sx->sx_lock);
+ sx->lock_object.lo_name, file, line);
#endif
break;
default:
@@ -375,7 +941,7 @@
#endif /* INVARIANT_SUPPORT */
#ifdef DDB
-void
+static void
db_show_sx(struct lock_object *lock)
{
struct thread *td;
@@ -384,16 +950,36 @@
sx = (struct sx *)lock;
db_printf(" state: ");
- if (sx->sx_cnt < 0) {
- td = sx->sx_xholder;
+ if (sx->sx_lock == SX_LOCK_UNLOCKED)
+ db_printf("UNLOCKED\n");
+ else if (sx->sx_lock == SX_LOCK_DESTROYED) {
+ db_printf("DESTROYED\n");
+ return;
+ } else if (sx->sx_lock & SX_LOCK_SHARED)
+ db_printf("SLOCK: %ju\n", (uintmax_t)SX_SHARERS(sx->sx_lock));
+ else {
+ td = sx_xholder(sx);
db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
td->td_tid, td->td_proc->p_pid, td->td_proc->p_comm);
- } else if (sx->sx_cnt > 0)
- db_printf("SLOCK: %d locks\n", sx->sx_cnt);
- else
- db_printf("UNLOCKED\n");
- db_printf(" waiters: %d shared, %d exclusive\n", sx->sx_shrd_wcnt,
- sx->sx_excl_wcnt);
+ if (sx_recursed(sx))
+ db_printf(" recursed: %d\n", sx->sx_recurse);
+ }
+
+ db_printf(" waiters: ");
+ switch(sx->sx_lock &
+ (SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS)) {
+ case SX_LOCK_SHARED_WAITERS:
+ db_printf("shared\n");
+ break;
+ case SX_LOCK_EXCLUSIVE_WAITERS:
+ db_printf("exclusive\n");
+ break;
+ case SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS:
+ db_printf("exclusive and shared\n");
+ break;
+ default:
+ db_printf("none\n");
+ }
}
/*
@@ -405,47 +991,26 @@
sx_chain(struct thread *td, struct thread **ownerp)
{
struct sx *sx;
- struct cv *cv;
/*
- * First, see if it looks like td is blocked on a condition
- * variable.
+ * Check to see if this thread is blocked on an sx lock.
+ * First, we check the lock class. If that is ok, then we
+ * compare the lock name against the wait message.
*/
- cv = td->td_wchan;
- if (cv->cv_description != td->td_wmesg)
+#define LOCK_CLASS(lo) (lo)->lo_class
+ sx = td->td_wchan;
+ if (LOCK_CLASS(&sx->lock_object) != &lock_class_sx ||
+ sx->lock_object.lo_name != td->td_wmesg)
return (0);
- /*
- * Ok, see if it looks like td is blocked on the exclusive
- * condition variable.
- */
- sx = (struct sx *)((char *)cv - offsetof(struct sx, sx_excl_cv));
- if (LOCK_CLASS(&sx->sx_object) == &lock_class_sx &&
- sx->sx_excl_wcnt > 0)
- goto ok;
-
- /*
- * Second, see if it looks like td is blocked on the shared
- * condition variable.
- */
- sx = (struct sx *)((char *)cv - offsetof(struct sx, sx_shrd_cv));
- if (LOCK_CLASS(&sx->sx_object) == &lock_class_sx &&
- sx->sx_shrd_wcnt > 0)
- goto ok;
-
- /* Doesn't seem to be an sx lock. */
- return (0);
-
-ok:
/* We think we have an sx lock, so output some details. */
db_printf("blocked on sx \"%s\" ", td->td_wmesg);
- if (sx->sx_cnt >= 0) {
- db_printf("SLOCK (count %d)\n", sx->sx_cnt);
- *ownerp = NULL;
- } else {
+ *ownerp = sx_xholder(sx);
+ if (sx->sx_lock & SX_LOCK_SHARED)
+ db_printf("SLOCK (count %ju)\n",
+ (uintmax_t)SX_SHARERS(sx->sx_lock));
+ else
db_printf("XLOCK\n");
- *ownerp = sx->sx_xholder;
- }
return (1);
}
#endif
Index: kern/subr_sleepqueue.c
===================================================================
RCS file: /cvs/ncvs/src/sys/kern/subr_sleepqueue.c,v
retrieving revision 1.18.2.4
diff -u -r1.18.2.4 subr_sleepqueue.c
--- kern/subr_sleepqueue.c 17 Aug 2006 19:53:06 -0000 1.18.2.4
+++ kern/subr_sleepqueue.c 31 Aug 2007 00:39:59 -0000
@@ -82,6 +82,12 @@
#include <ddb/ddb.h>
#endif
+#include <vm/uma.h>
+
+#ifdef DDB
+#include <ddb/ddb.h>
+#endif
+
/*
* Constants for the hash table of sleep queue chains. These constants are
* the same ones that 4BSD (and possibly earlier versions of BSD) used.
@@ -94,7 +100,7 @@
#define SC_SHIFT 8
#define SC_HASH(wc) (((uintptr_t)(wc) >> SC_SHIFT) & SC_MASK)
#define SC_LOOKUP(wc) &sleepq_chains[SC_HASH(wc)]
-
+#define NR_SLEEPQS 2
/*
* There two different lists of sleep queues. Both lists are connected
* via the sq_hash entries. The first list is the sleep queue chain list
@@ -114,13 +120,13 @@
* c - sleep queue chain lock
*/
struct sleepqueue {
- TAILQ_HEAD(, thread) sq_blocked; /* (c) Blocked threads. */
+ TAILQ_HEAD(, thread) sq_blocked[NR_SLEEPQS]; /* (c) Blocked threads. */
LIST_ENTRY(sleepqueue) sq_hash; /* (c) Chain and free list. */
LIST_HEAD(, sleepqueue) sq_free; /* (c) Free queues. */
void *sq_wchan; /* (c) Wait channel. */
#ifdef INVARIANTS
int sq_type; /* (c) Queue type. */
- struct mtx *sq_lock; /* (c) Associated lock. */
+ struct mtx *sq_lock; /* (c) Associated lock. */
#endif
};
@@ -142,16 +148,22 @@
0, "maxmimum depth achieved of a single chain");
#endif
static struct sleepqueue_chain sleepq_chains[SC_TABLESIZE];
-
-static MALLOC_DEFINE(M_SLEEPQUEUE, "sleep queues", "sleep queues");
+static uma_zone_t sleepq_zone;
/*
* Prototypes for non-exported routines.
*/
+static int sleepq_catch_signals(void *wchan);
+static int sleepq_check_signals(void);
static int sleepq_check_timeout(void);
+#ifdef INVARIANTS
+static void sleepq_dtor(void *mem, int size, void *arg);
+#endif
+static int sleepq_init(void *mem, int size, int flags);
+static void sleepq_resume_thread(struct sleepqueue *sq, struct thread *td,
+ int pri);
static void sleepq_switch(void *wchan);
static void sleepq_timeout(void *arg);
-static void sleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri);
/*
* Early initialization of sleep queues that is called from the sleepinit()
@@ -182,21 +194,24 @@
NULL);
#endif
}
+ sleepq_zone = uma_zcreate("SLEEPQUEUE", sizeof(struct sleepqueue),
+#ifdef INVARIANTS
+ NULL, sleepq_dtor, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
+#else
+ NULL, NULL, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
+#endif
+
thread0.td_sleepqueue = sleepq_alloc();
}
/*
- * Malloc and initialize a new sleep queue for a new thread.
+ * Get a sleep queue for a new thread.
*/
struct sleepqueue *
sleepq_alloc(void)
{
- struct sleepqueue *sq;
- sq = malloc(sizeof(struct sleepqueue), M_SLEEPQUEUE, M_WAITOK | M_ZERO);
- TAILQ_INIT(&sq->sq_blocked);
- LIST_INIT(&sq->sq_free);
- return (sq);
+ return (uma_zalloc(sleepq_zone, M_WAITOK));
}
/*
@@ -206,9 +221,7 @@
sleepq_free(struct sleepqueue *sq)
{
- MPASS(sq != NULL);
- MPASS(TAILQ_EMPTY(&sq->sq_blocked));
- free(sq, M_SLEEPQUEUE);
+ uma_zfree(sleepq_zone, sq);
}
/*
@@ -262,7 +275,8 @@
* woken up.
*/
void
-sleepq_add(void *wchan, struct mtx *lock, const char *wmesg, int flags)
+sleepq_add_queue(void *wchan, struct mtx *lock, const char *wmesg, int flags,
+ int queue)
{
struct sleepqueue_chain *sc;
struct sleepqueue *sq;
@@ -273,10 +287,11 @@
mtx_assert(&sc->sc_lock, MA_OWNED);
MPASS(td->td_sleepqueue != NULL);
MPASS(wchan != NULL);
+ MPASS((queue >= 0) && (queue < NR_SLEEPQS));
/* If this thread is not allowed to sleep, die a horrible death. */
KASSERT(!(td->td_pflags & TDP_NOSLEEPING),
- ("trying to sleep while sleeping is prohibited"));
+ ("Trying sleep, but thread marked as sleeping prohibited"));
/* Look up the sleep queue associated with the wait channel 'wchan'. */
sq = sleepq_lookup(wchan);
@@ -287,6 +302,19 @@
* into the sleep queue already in use by this wait channel.
*/
if (sq == NULL) {
+#ifdef INVARIANTS
+ int i;
+
+ sq = td->td_sleepqueue;
+ for (i = 0; i < NR_SLEEPQS; i++)
+ KASSERT(TAILQ_EMPTY(&sq->sq_blocked[i]),
+ ("thread's sleep queue %d is not empty", i));
+ KASSERT(LIST_EMPTY(&sq->sq_free),
+ ("thread's sleep queue has a non-empty free list"));
+ KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer"));
+ sq->sq_lock = lock;
+ sq->sq_type = flags & SLEEPQ_TYPE;
+#endif
#ifdef SLEEPQUEUE_PROFILING
sc->sc_depth++;
if (sc->sc_depth > sc->sc_max_depth) {
@@ -297,25 +325,17 @@
#endif
sq = td->td_sleepqueue;
LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash);
- KASSERT(TAILQ_EMPTY(&sq->sq_blocked),
- ("thread's sleep queue has a non-empty queue"));
- KASSERT(LIST_EMPTY(&sq->sq_free),
- ("thread's sleep queue has a non-empty free list"));
- KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer"));
sq->sq_wchan = wchan;
-#ifdef INVARIANTS
- sq->sq_lock = lock;
- sq->sq_type = flags & SLEEPQ_TYPE;
-#endif
} else {
MPASS(wchan == sq->sq_wchan);
MPASS(lock == sq->sq_lock);
MPASS((flags & SLEEPQ_TYPE) == sq->sq_type);
LIST_INSERT_HEAD(&sq->sq_free, td->td_sleepqueue, sq_hash);
}
- TAILQ_INSERT_TAIL(&sq->sq_blocked, td, td_slpq);
+ TAILQ_INSERT_TAIL(&sq->sq_blocked[queue], td, td_slpq);
td->td_sleepqueue = NULL;
mtx_lock_spin(&sched_lock);
+ td->td_sqqueue = queue;
td->td_wchan = wchan;
td->td_wmesg = wmesg;
if (flags & SLEEPQ_INTERRUPTIBLE) {
@@ -606,12 +626,13 @@
MPASS(td != NULL);
MPASS(sq->sq_wchan != NULL);
MPASS(td->td_wchan == sq->sq_wchan);
+ MPASS(td->td_sqqueue < NR_SLEEPQS && td->td_sqqueue >= 0);
sc = SC_LOOKUP(sq->sq_wchan);
mtx_assert(&sc->sc_lock, MA_OWNED);
mtx_assert(&sched_lock, MA_OWNED);
/* Remove the thread from the queue. */
- TAILQ_REMOVE(&sq->sq_blocked, td, td_slpq);
+ TAILQ_REMOVE(&sq->sq_blocked[td->td_sqqueue], td, td_slpq);
/*
* Get a sleep queue for this thread. If this is the last waiter,
@@ -652,17 +673,51 @@
setrunnable(td);
}
+#ifdef INVARIANTS
+/*
+ * UMA zone item deallocator.
+ */
+static void
+sleepq_dtor(void *mem, int size, void *arg)
+{
+ struct sleepqueue *sq;
+ int i;
+
+ sq = mem;
+ for (i = 0; i < NR_SLEEPQS; i++)
+ MPASS(TAILQ_EMPTY(&sq->sq_blocked[i]));
+}
+#endif
+
+/*
+ * UMA zone item initializer.
+ */
+static int
+sleepq_init(void *mem, int size, int flags)
+{
+ struct sleepqueue *sq;
+ int i;
+
+ bzero(mem, size);
+ sq = mem;
+ for (i = 0; i < NR_SLEEPQS; i++)
+ TAILQ_INIT(&sq->sq_blocked[i]);
+ LIST_INIT(&sq->sq_free);
+ return (0);
+}
+
/*
* Find the highest priority thread sleeping on a wait channel and resume it.
*/
void
-sleepq_signal(void *wchan, int flags, int pri)
+sleepq_signal_queue(void *wchan, int flags, int pri, int queue)
{
struct sleepqueue *sq;
struct thread *td, *besttd;
CTR2(KTR_PROC, "sleepq_signal(%p, %d)", wchan, flags);
KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
+ MPASS((queue >= 0) && (queue < NR_SLEEPQS));
sq = sleepq_lookup(wchan);
if (sq == NULL) {
sleepq_release(wchan);
@@ -678,7 +733,7 @@
* the tail of sleep queues.
*/
besttd = NULL;
- TAILQ_FOREACH(td, &sq->sq_blocked, td_slpq) {
+ TAILQ_FOREACH(td, &sq->sq_blocked[queue], td_slpq) {
if (besttd == NULL || td->td_priority < besttd->td_priority)
besttd = td;
}
@@ -693,12 +748,13 @@
* Resume all threads sleeping on a specified wait channel.
*/
void
-sleepq_broadcast(void *wchan, int flags, int pri)
+sleepq_broadcast_queue(void *wchan, int flags, int pri, int queue)
{
struct sleepqueue *sq;
CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags);
KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
+ MPASS((queue >= 0) && (queue < NR_SLEEPQS));
sq = sleepq_lookup(wchan);
if (sq == NULL) {
sleepq_release(wchan);
@@ -709,8 +765,9 @@
/* Resume all blocked threads on the sleep queue. */
mtx_lock_spin(&sched_lock);
- while (!TAILQ_EMPTY(&sq->sq_blocked))
- sleepq_resume_thread(sq, TAILQ_FIRST(&sq->sq_blocked), pri);
+ while (!TAILQ_EMPTY(&sq->sq_blocked[queue]))
+ sleepq_resume_thread(sq, TAILQ_FIRST(&sq->sq_blocked[queue]),
+ pri);
mtx_unlock_spin(&sched_lock);
sleepq_release(wchan);
}
@@ -859,6 +916,76 @@
struct sleepqueue_chain *sc;
struct sleepqueue *sq;
#ifdef INVARIANTS
+ struct mtx *lock;
+#endif
+ struct thread *td;
+ void *wchan;
+ int i;
+
+ if (!have_addr)
+ return;
+
+ /*
+ * First, see if there is an active sleep queue for the wait channel
+ * indicated by the address.
+ */
+ wchan = (void *)addr;
+ sc = SC_LOOKUP(wchan);
+ LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
+ if (sq->sq_wchan == wchan)
+ goto found;
+
+ /*
+ * Second, see if there is an active sleep queue at the address
+ * indicated.
+ */
+ for (i = 0; i < SC_TABLESIZE; i++)
+ LIST_FOREACH(sq, &sleepq_chains[i].sc_queues, sq_hash) {
+ if (sq == (struct sleepqueue *)addr)
+ goto found;
+ }
+
+ db_printf("Unable to locate a sleep queue via %p\n", (void *)addr);
+ return;
+found:
+ db_printf("Wait channel: %p\n", sq->sq_wchan);
+#ifdef INVARIANTS
+ db_printf("Queue type: %d\n", sq->sq_type);
+ if (sq->sq_lock) {
+ lock = sq->sq_lock;
+#define LOCK_CLASS(lock) (lock)->mtx_object.lo_class
+ db_printf("Associated Interlock: %p - (%s) %s\n", lock,
+ LOCK_CLASS(lock)->lc_name, lock->mtx_object.lo_name);
+ }
+#endif
+ db_printf("Blocked threads:\n");
+ for (i = 0; i < NR_SLEEPQS; i++) {
+ db_printf("\nQueue[%d]:\n", i);
+ if (TAILQ_EMPTY(&sq->sq_blocked[i]))
+ db_printf("\tempty\n");
+ else
+ TAILQ_FOREACH(td, &sq->sq_blocked[0],
+ td_slpq) {
+ db_printf("\t%p (tid %d, pid %d, \"%s\")\n", td,
+ td->td_tid, td->td_proc->p_pid,
+ /*td->td_name[i] != '\0' ? td->td_name :*/
+ td->td_proc->p_comm);
+ }
+ }
+}
+
+#if 0
+/* Alias 'show sleepqueue' to 'show sleepq'. */
+DB_SET(sleepqueue, db_show_sleepqueue, db_show_cmd_set, 0, NULL);
+#endif
+#endif
+
+#ifdef DDB
+DB_SHOW_COMMAND(sleepq, db_show_sleepqueue)
+{
+ struct sleepqueue_chain *sc;
+ struct sleepqueue *sq;
+#ifdef INVARIANTS
struct lock_object *lock;
#endif
struct thread *td;
Index: kern/subr_turnstile.c
===================================================================
RCS file: /cvs/ncvs/src/sys/kern/subr_turnstile.c,v
retrieving revision 1.152.2.5
diff -u -r1.152.2.5 subr_turnstile.c
--- kern/subr_turnstile.c 23 Jan 2007 22:16:33 -0000 1.152.2.5
+++ kern/subr_turnstile.c 31 Aug 2007 02:15:23 -0000
@@ -114,7 +114,8 @@
* q - td_contested lock
*/
struct turnstile {
- TAILQ_HEAD(, thread) ts_blocked; /* (c + q) Blocked threads. */
+ /* struct mtx ts_lock; */ /* Spin lock for self. */
+ TAILQ_HEAD(, thread) ts_blocked[2]; /* (c + q) Blocked threads. */
TAILQ_HEAD(, thread) ts_pending; /* (c) Pending threads. */
LIST_ENTRY(turnstile) ts_hash; /* (c) Chain and free list. */
LIST_ENTRY(turnstile) ts_link; /* (q) Contested locks. */
@@ -143,6 +144,12 @@
static struct mtx td_contested_lock;
static struct turnstile_chain turnstile_chains[TC_TABLESIZE];
+/* XXX: stats, remove me */
+static u_int turnstile_nullowners;
+SYSCTL_UINT(_debug, OID_AUTO, turnstile_nullowners, CTLFLAG_RD,
+ &turnstile_nullowners, 0, "called with null owner on a shared queue");
+
+
static MALLOC_DEFINE(M_TURNSTILE, "turnstiles", "turnstiles");
/*
@@ -267,6 +274,7 @@
{
struct turnstile_chain *tc;
struct thread *td1, *td2;
+ int queue;
mtx_assert(&sched_lock, MA_OWNED);
MPASS(TD_ON_LOCK(td));
@@ -300,16 +308,18 @@
* Remove thread from blocked chain and determine where
* it should be moved to.
*/
+ queue = td->td_tsqueue;
+ MPASS(queue == TS_EXCLUSIVE_QUEUE || queue == TS_SHARED_QUEUE);
mtx_lock_spin(&td_contested_lock);
- TAILQ_REMOVE(&ts->ts_blocked, td, td_lockq);
- TAILQ_FOREACH(td1, &ts->ts_blocked, td_lockq) {
+ TAILQ_REMOVE(&ts->ts_blocked[queue], td, td_lockq);
+ TAILQ_FOREACH(td1, &ts->ts_blocked[queue], td_lockq) {
MPASS(td1->td_proc->p_magic == P_MAGIC);
if (td1->td_priority > td->td_priority)
break;
}
if (td1 == NULL)
- TAILQ_INSERT_TAIL(&ts->ts_blocked, td, td_lockq);
+ TAILQ_INSERT_TAIL(&ts->ts_blocked[queue], td, td_lockq);
else
TAILQ_INSERT_BEFORE(td1, td, td_lockq);
mtx_unlock_spin(&td_contested_lock);
@@ -412,7 +422,10 @@
* Note that we currently don't try to revoke lent priorities
* when our priority goes up.
*/
- if (td == TAILQ_FIRST(&ts->ts_blocked) && td->td_priority < oldpri) {
+ MPASS(td->td_tsqueue == TS_EXCLUSIVE_QUEUE ||
+ td->td_tsqueue == TS_SHARED_QUEUE);
+ if (td == TAILQ_FIRST(&ts->ts_blocked[td->td_tsqueue]) &&
+ td->td_priority < oldpri) {
mtx_unlock_spin(&tc->tc_lock);
critical_enter();
propagate_priority(td);
@@ -429,8 +442,11 @@
{
mtx_assert(&td_contested_lock, MA_OWNED);
- MPASS(owner->td_proc->p_magic == P_MAGIC);
MPASS(ts->ts_owner == NULL);
+ if (owner == NULL)
+ return;
+
+ MPASS(owner->td_proc->p_magic == P_MAGIC);
ts->ts_owner = owner;
LIST_INSERT_HEAD(&owner->td_contested, ts, ts_link);
}
@@ -444,7 +460,8 @@
struct turnstile *ts;
ts = malloc(sizeof(struct turnstile), M_TURNSTILE, M_WAITOK | M_ZERO);
- TAILQ_INIT(&ts->ts_blocked);
+ TAILQ_INIT(&ts->ts_blocked[TS_EXCLUSIVE_QUEUE]);
+ TAILQ_INIT(&ts->ts_blocked[TS_SHARED_QUEUE]);
TAILQ_INIT(&ts->ts_pending);
LIST_INIT(&ts->ts_free);
return (ts);
@@ -458,7 +475,8 @@
{
MPASS(ts != NULL);
- MPASS(TAILQ_EMPTY(&ts->ts_blocked));
+ MPASS(TAILQ_EMPTY(&ts->ts_blocked[TS_EXCLUSIVE_QUEUE]));
+ MPASS(TAILQ_EMPTY(&ts->ts_blocked[TS_SHARED_QUEUE]));
MPASS(TAILQ_EMPTY(&ts->ts_pending));
free(ts, M_TURNSTILE);
}
@@ -507,6 +525,22 @@
}
/*
+ * Return a pointer to the thread waiting on this turnstile with the
+ * most important priority or NULL if the turnstile has no waiters.
+ */
+static struct thread *
+turnstile_first_waiter(struct turnstile *ts)
+{
+ struct thread *std, *xtd;
+
+ std = TAILQ_FIRST(&ts->ts_blocked[TS_SHARED_QUEUE]);
+ xtd = TAILQ_FIRST(&ts->ts_blocked[TS_EXCLUSIVE_QUEUE]);
+ if (xtd == NULL || (std != NULL && std->td_priority < xtd->td_priority))
+ return (std);
+ return (xtd);
+}
+
+/*
* Take ownership of a turnstile and adjust the priority of the new
* owner appropriately.
*/
@@ -527,7 +561,7 @@
turnstile_setowner(ts, owner);
mtx_unlock_spin(&td_contested_lock);
- td = TAILQ_FIRST(&ts->ts_blocked);
+ td = turnstile_first_waiter(ts);
MPASS(td != NULL);
MPASS(td->td_proc->p_magic == P_MAGIC);
mtx_unlock_spin(&tc->tc_lock);
@@ -548,7 +582,7 @@
* turnstile chain locked and will return with it unlocked.
*/
void
-turnstile_wait(struct lock_object *lock, struct thread *owner)
+turnstile_wait_queue(struct lock_object *lock, struct thread *owner, int queue)
{
struct turnstile_chain *tc;
struct turnstile *ts;
@@ -558,8 +592,13 @@
tc = TC_LOOKUP(lock);
mtx_assert(&tc->tc_lock, MA_OWNED);
MPASS(td->td_turnstile != NULL);
- MPASS(owner != NULL);
- MPASS(owner->td_proc->p_magic == P_MAGIC);
+ if (owner)
+ MPASS(owner->td_proc->p_magic == P_MAGIC);
+ /* XXX: stats, remove me */
+ if (!owner && queue == TS_SHARED_QUEUE) {
+ turnstile_nullowners++;
+ }
+ MPASS(queue == TS_SHARED_QUEUE || queue == TS_EXCLUSIVE_QUEUE);
/* Look up the turnstile associated with the lock 'lock'. */
ts = turnstile_lookup(lock);
@@ -582,25 +621,27 @@
LIST_INSERT_HEAD(&tc->tc_turnstiles, ts, ts_hash);
KASSERT(TAILQ_EMPTY(&ts->ts_pending),
("thread's turnstile has pending threads"));
- KASSERT(TAILQ_EMPTY(&ts->ts_blocked),
- ("thread's turnstile has a non-empty queue"));
+ KASSERT(TAILQ_EMPTY(&ts->ts_blocked[TS_EXCLUSIVE_QUEUE]),
+ ("thread's turnstile has exclusive waiters"));
+ KASSERT(TAILQ_EMPTY(&ts->ts_blocked[TS_SHARED_QUEUE]),
+ ("thread's turnstile has shared waiters"));
KASSERT(LIST_EMPTY(&ts->ts_free),
("thread's turnstile has a non-empty free list"));
KASSERT(ts->ts_lockobj == NULL, ("stale ts_lockobj pointer"));
ts->ts_lockobj = lock;
mtx_lock_spin(&td_contested_lock);
- TAILQ_INSERT_TAIL(&ts->ts_blocked, td, td_lockq);
+ TAILQ_INSERT_TAIL(&ts->ts_blocked[queue], td, td_lockq);
turnstile_setowner(ts, owner);
mtx_unlock_spin(&td_contested_lock);
} else {
- TAILQ_FOREACH(td1, &ts->ts_blocked, td_lockq)
+ TAILQ_FOREACH(td1, &ts->ts_blocked[queue], td_lockq)
if (td1->td_priority > td->td_priority)
break;
mtx_lock_spin(&td_contested_lock);
if (td1 != NULL)
TAILQ_INSERT_BEFORE(td1, td, td_lockq);
else
- TAILQ_INSERT_TAIL(&ts->ts_blocked, td, td_lockq);
+ TAILQ_INSERT_TAIL(&ts->ts_blocked[queue], td, td_lockq);
mtx_unlock_spin(&td_contested_lock);
MPASS(td->td_turnstile != NULL);
LIST_INSERT_HEAD(&ts->ts_free, td->td_turnstile, ts_hash);
@@ -664,7 +705,7 @@
* pending list. This must be called with the turnstile chain locked.
*/
int
-turnstile_signal(struct turnstile *ts)
+turnstile_signal_queue(struct turnstile *ts, int queue)
{
struct turnstile_chain *tc;
struct thread *td;
@@ -675,15 +716,18 @@
MPASS(ts->ts_owner == curthread);
tc = TC_LOOKUP(ts->ts_lockobj);
mtx_assert(&tc->tc_lock, MA_OWNED);
+ MPASS(ts->ts_owner == curthread ||
+ (queue == TS_EXCLUSIVE_QUEUE && ts->ts_owner == NULL));
+ MPASS(queue == TS_SHARED_QUEUE || queue == TS_EXCLUSIVE_QUEUE);
/*
* Pick the highest priority thread blocked on this lock and
* move it to the pending list.
*/
- td = TAILQ_FIRST(&ts->ts_blocked);
+ td = TAILQ_FIRST(&ts->ts_blocked[queue]);
MPASS(td->td_proc->p_magic == P_MAGIC);
mtx_lock_spin(&td_contested_lock);
- TAILQ_REMOVE(&ts->ts_blocked, td, td_lockq);
+ TAILQ_REMOVE(&ts->ts_blocked[queue], td, td_lockq);
mtx_unlock_spin(&td_contested_lock);
TAILQ_INSERT_TAIL(&ts->ts_pending, td, td_lockq);
@@ -692,7 +736,8 @@
* give it to the about-to-be-woken thread. Otherwise take a
* turnstile from the free list and give it to the thread.
*/
- empty = TAILQ_EMPTY(&ts->ts_blocked);
+ empty = TAILQ_EMPTY(&ts->ts_blocked[TS_EXCLUSIVE_QUEUE]) &&
+ TAILQ_EMPTY(&ts->ts_blocked[TS_SHARED_QUEUE]);
if (empty) {
MPASS(LIST_EMPTY(&ts->ts_free));
#ifdef TURNSTILE_PROFILING
@@ -712,7 +757,7 @@
* the turnstile chain locked.
*/
void
-turnstile_broadcast(struct turnstile *ts)
+turnstile_broadcast_queue(struct turnstile *ts, int queue)
{
struct turnstile_chain *tc;
struct turnstile *ts1;
@@ -720,15 +765,17 @@
MPASS(ts != NULL);
MPASS(curthread->td_proc->p_magic == P_MAGIC);
- MPASS(ts->ts_owner == curthread);
+ MPASS(ts->ts_owner == curthread ||
+ (queue == TS_EXCLUSIVE_QUEUE && ts->ts_owner == NULL));
tc = TC_LOOKUP(ts->ts_lockobj);
mtx_assert(&tc->tc_lock, MA_OWNED);
+ MPASS(queue == TS_SHARED_QUEUE || queue == TS_EXCLUSIVE_QUEUE);
/*
* Transfer the blocked list to the pending list.
*/
mtx_lock_spin(&td_contested_lock);
- TAILQ_CONCAT(&ts->ts_pending, &ts->ts_blocked, td_lockq);
+ TAILQ_CONCAT(&ts->ts_pending, &ts->ts_blocked[queue], td_lockq);
mtx_unlock_spin(&td_contested_lock);
/*
@@ -756,15 +803,17 @@
* chain locked.
*/
void
-turnstile_unpend(struct turnstile *ts)
+turnstile_unpend_queue(struct turnstile *ts, int owner_type)
{
TAILQ_HEAD( ,thread) pending_threads;
struct turnstile_chain *tc;
+ struct turnstile *nts;
struct thread *td;
u_char cp, pri;
MPASS(ts != NULL);
- MPASS(ts->ts_owner == curthread);
+ MPASS(ts->ts_owner == curthread ||
+ (owner_type == TS_SHARED_LOCK && ts->ts_owner == NULL));
tc = TC_LOOKUP(ts->ts_lockobj);
mtx_assert(&tc->tc_lock, MA_OWNED);
MPASS(!TAILQ_EMPTY(&ts->ts_pending));
@@ -776,7 +825,8 @@
TAILQ_INIT(&pending_threads);
TAILQ_CONCAT(&pending_threads, &ts->ts_pending, td_lockq);
#ifdef INVARIANTS
- if (TAILQ_EMPTY(&ts->ts_blocked))
+ if (TAILQ_EMPTY(&ts->ts_blocked[TS_EXCLUSIVE_QUEUE]) &&
+ TAILQ_EMPTY(&ts->ts_blocked[TS_SHARED_QUEUE]))
ts->ts_lockobj = NULL;
#endif
@@ -802,8 +852,8 @@
pri = PRI_MAX;
mtx_lock_spin(&sched_lock);
mtx_lock_spin(&td_contested_lock);
- LIST_FOREACH(ts, &td->td_contested, ts_link) {
- cp = TAILQ_FIRST(&ts->ts_blocked)->td_priority;
+ LIST_FOREACH(nts, &td->td_contested, ts_link) {
+ cp = turnstile_first_waiter(nts)->td_priority;
if (cp < pri)
pri = cp;
}
@@ -837,19 +887,69 @@
}
/*
+ * Give up ownership of a turnstile. This must be called with the
+ * turnstile chain locked.
+ */
+void
+turnstile_disown(struct turnstile *ts)
+{
+ struct turnstile_chain *tc;
+ struct thread *td;
+ u_char cp, pri;
+
+ MPASS(ts != NULL);
+ MPASS(ts->ts_owner == curthread);
+ tc = TC_LOOKUP(ts->ts_lockobj);
+ mtx_assert(&tc->tc_lock, MA_OWNED);
+ MPASS(TAILQ_EMPTY(&ts->ts_pending));
+ MPASS(!TAILQ_EMPTY(&ts->ts_blocked[TS_EXCLUSIVE_QUEUE]) ||
+ !TAILQ_EMPTY(&ts->ts_blocked[TS_SHARED_QUEUE]));
+
+ /*
+ * Remove the turnstile from this thread's list of contested locks
+ * since this thread doesn't own it anymore. New threads will
+ * not be blocking on the turnstile until it is claimed by a new
+ * owner.
+ */
+ mtx_lock_spin(&td_contested_lock);
+ ts->ts_owner = NULL;
+ LIST_REMOVE(ts, ts_link);
+ mtx_unlock_spin(&td_contested_lock);
+
+ /*
+ * Adjust the priority of curthread based on other contested
+ * locks it owns. Don't lower the priority below the base
+ * priority however.
+ */
+ td = curthread;
+ pri = PRI_MAX;
+ mtx_lock_spin(&sched_lock);
+ mtx_lock_spin(&td_contested_lock);
+ LIST_FOREACH(ts, &td->td_contested, ts_link) {
+ cp = turnstile_first_waiter(ts)->td_priority;
+ if (cp < pri)
+ pri = cp;
+ }
+ mtx_unlock_spin(&td_contested_lock);
+ sched_unlend_prio(td, pri);
+ mtx_unlock_spin(&sched_lock);
+}
+
+/*
* Return the first thread in a turnstile.
*/
struct thread *
-turnstile_head(struct turnstile *ts)
+turnstile_head_queue(struct turnstile *ts, int queue)
{
#ifdef INVARIANTS
struct turnstile_chain *tc;
MPASS(ts != NULL);
tc = TC_LOOKUP(ts->ts_lockobj);
+ MPASS(queue == TS_SHARED_QUEUE || queue == TS_EXCLUSIVE_QUEUE);
mtx_assert(&tc->tc_lock, MA_OWNED);
#endif
- return (TAILQ_FIRST(&ts->ts_blocked));
+ return (TAILQ_FIRST(&ts->ts_blocked[queue]));
}
#ifdef DDB
@@ -1146,7 +1246,7 @@
* Returns true if a turnstile is empty.
*/
int
-turnstile_empty(struct turnstile *ts)
+turnstile_empty_queue(struct turnstile *ts, int queue)
{
#ifdef INVARIANTS
struct turnstile_chain *tc;
@@ -1154,6 +1254,7 @@
MPASS(ts != NULL);
tc = TC_LOOKUP(ts->ts_lockobj);
mtx_assert(&tc->tc_lock, MA_OWNED);
+ MPASS(queue == TS_SHARED_QUEUE || queue == TS_EXCLUSIVE_QUEUE);
#endif
- return (TAILQ_EMPTY(&ts->ts_blocked));
+ return (TAILQ_EMPTY(&ts->ts_blocked[queue]));
}
Index: netinet6/in6_src.c
===================================================================
RCS file: /cvs/ncvs/src/sys/netinet6/in6_src.c,v
retrieving revision 1.30.2.4
diff -u -r1.30.2.4 in6_src.c
--- netinet6/in6_src.c 25 Dec 2005 14:03:37 -0000 1.30.2.4
+++ netinet6/in6_src.c 31 Aug 2007 01:23:38 -0000
@@ -76,6 +76,7 @@
#include <sys/errno.h>
#include <sys/time.h>
#include <sys/kernel.h>
+#include <sys/lock.h>
#include <sys/sx.h>
#include <net/if.h>
cvs diff: sys/_rwlock.h is a new entry, no comparison available
cvs diff: sys/_sx.h is a new entry, no comparison available
cvs diff: sys/lock_profile.h is a new entry, no comparison available
Index: sys/proc.h
===================================================================
RCS file: /cvs/ncvs/src/sys/sys/proc.h,v
retrieving revision 1.432.2.10
diff -u -r1.432.2.10 proc.h
--- sys/proc.h 11 Jun 2007 11:27:04 -0000 1.432.2.10
+++ sys/proc.h 31 Aug 2007 00:51:44 -0000
@@ -261,12 +261,14 @@
int td_inhibitors; /* (j) Why can not run. */
int td_pflags; /* (k) Private thread (TDP_*) flags. */
int td_dupfd; /* (k) Ret value from fdopen. XXX */
+ int td_sqqueue; /* (t) Sleepqueue queue blocked on. */
void *td_wchan; /* (j) Sleep address. */
const char *td_wmesg; /* (j) Reason for sleep. */
u_char td_lastcpu; /* (j) Last cpu we were on. */
u_char td_oncpu; /* (j) Which cpu we are on. */
volatile u_char td_owepreempt; /* (k*) Preempt on last critical_exit */
short td_locks; /* (k) Count of non-spin locks. */
+ u_char td_tsqueue; /* (t) Turnstile queue blocked on. */
struct turnstile *td_blocked; /* (j) Lock process is blocked on. */
void *td_ithd; /* (n) Unused, kept to preserve ABI. */
const char *td_lockname; /* (j) Name of lock blocked on. */
@@ -324,7 +326,19 @@
struct mdthread td_md; /* (k) Any machine-dependent fields. */
struct td_sched *td_sched; /* (*) Scheduler-specific data. */
struct kaudit_record *td_ar; /* (k) Active audit record, if any. */
-};
+} __attribute__ ((aligned (16))); /* see comment below */
+
+/*
+ * Comment about aligned attribute on struct thread.
+ *
+ * We must force at least 16 byte alignment for "struct thread"
+ * because the rwlocks and sxlocks expect to use the bottom bits
+ * of the pointer for bookkeeping information.
+ *
+ * This causes problems for the thread0 data structure because it
+ * may not be properly aligned otherwise.
+ */
+
/*
* Flags kept in td_flags:
cvs diff: sys/rwlock.h is a new entry, no comparison available
Index: sys/sleepqueue.h
===================================================================
RCS file: /cvs/ncvs/src/sys/sys/sleepqueue.h,v
retrieving revision 1.6.2.1
diff -u -r1.6.2.1 sleepqueue.h
--- sys/sleepqueue.h 27 Feb 2006 00:19:39 -0000 1.6.2.1
+++ sys/sleepqueue.h 31 Aug 2007 01:47:52 -0000
@@ -26,15 +26,16 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: src/sys/sys/sleepqueue.h,v 1.6.2.1 2006/02/27 00:19:39 davidxu Exp $
+ * $FreeBSD: src/sys/sys/sleepqueue.h,v 1.12 2007/03/31 23:23:42 jhb Exp $
*/
#ifndef _SYS_SLEEPQUEUE_H_
#define _SYS_SLEEPQUEUE_H_
/*
- * Sleep queue interface. Sleep/wakeup and condition variables use a sleep
- * queue for the queue of threads blocked on a sleep channel.
+ * Sleep queue interface. Sleep/wakeup, condition variables, and sx
+ * locks use a sleep queue for the queue of threads blocked on a sleep
+ * channel.
*
* A thread calls sleepq_lock() to lock the sleep queue chain associated
* with a given wait channel. A thread can then call call sleepq_add() to
@@ -84,24 +85,33 @@
#define SLEEPQ_TYPE 0x0ff /* Mask of sleep queue types. */
#define SLEEPQ_MSLEEP 0x00 /* Used by msleep/wakeup. */
#define SLEEPQ_CONDVAR 0x01 /* Used for a cv. */
+#define SLEEPQ_SX 0x03 /* Used by an sx lock. */
#define SLEEPQ_INTERRUPTIBLE 0x100 /* Sleep is interruptible. */
void init_sleepqueues(void);
void sleepq_abort(struct thread *td, int intrval);
-void sleepq_add(void *, struct mtx *, const char *, int);
+void sleepq_add_queue(void *, struct mtx *, const char *, int, int);
struct sleepqueue *sleepq_alloc(void);
-void sleepq_broadcast(void *, int, int);
+void sleepq_broadcast_queue(void *, int, int, int);
void sleepq_free(struct sleepqueue *);
void sleepq_lock(void *);
struct sleepqueue *sleepq_lookup(void *);
void sleepq_release(void *);
void sleepq_remove(struct thread *, void *);
-void sleepq_signal(void *, int, int);
+void sleepq_signal_queue(void *, int, int, int);
void sleepq_set_timeout(void *wchan, int timo);
int sleepq_timedwait(void *wchan);
int sleepq_timedwait_sig(void *wchan);
void sleepq_wait(void *);
int sleepq_wait_sig(void *wchan);
+/* Preserve source compat with 6.x */
+#define sleepq_add(wchan, lock, wmesg, flags) \
+ sleepq_add_queue(wchan, lock, wmesg, flags, 0)
+#define sleepq_broadcast(wchan, flags, pri) \
+ sleepq_broadcast_queue(wchan, flags, pri, 0)
+#define sleepq_signal(wchan, flags, pri) \
+ sleepq_signal_queue(wchan, flags, pri, 0)
+
#endif /* _KERNEL */
#endif /* !_SYS_SLEEPQUEUE_H_ */
Index: sys/sx.h
===================================================================
RCS file: /cvs/ncvs/src/sys/sys/sx.h,v
retrieving revision 1.21.2.5
diff -u -r1.21.2.5 sx.h
--- sys/sx.h 27 Aug 2007 13:45:35 -0000 1.21.2.5
+++ sys/sx.h 31 Aug 2007 01:02:47 -0000
@@ -1,5 +1,7 @@
/*-
- * Copyright (C) 2001 Jason Evans <jasone@freebsd.org>. All rights reserved.
+ * Copyright (c) 2007 Attilio Rao <attilio@freebsd.org>
+ * Copyright (c) 2001 Jason Evans <jasone@freebsd.org>
+ * All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -24,39 +26,93 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
- * $FreeBSD: src/sys/sys/sx.h,v 1.21.2.5 2007/08/27 13:45:35 jhb Exp $
+ * $FreeBSD: src/sys/sys/sx.h,v 1.37 2007/07/06 13:20:44 attilio Exp $
*/
#ifndef _SYS_SX_H_
#define _SYS_SX_H_
-#include <sys/queue.h>
#include <sys/_lock.h>
-#include <sys/condvar.h> /* XXX */
+#include <sys/_sx.h>
+#include <sys/lock_profile.h>
-struct sx {
- struct lock_object sx_object; /* Common lock properties. */
- struct mtx *sx_lock; /* General protection lock. */
- int sx_cnt; /* -1: xlock, > 0: slock count. */
- struct cv sx_shrd_cv; /* slock waiters. */
- int sx_shrd_wcnt; /* Number of slock waiters. */
- struct cv sx_excl_cv; /* xlock waiters. */
- int sx_excl_wcnt; /* Number of xlock waiters. */
- struct thread *sx_xholder; /* Thread presently holding xlock. */
-};
+#ifdef _KERNEL
+#include <machine/atomic.h>
+#endif
+
+/*
+ * In general, the sx locks and rwlocks use very similar algorithms.
+ * The main difference in the implementations is how threads are
+ * blocked when a lock is unavailable. For this, sx locks use sleep
+ * queues which do not support priority propagation, and rwlocks use
+ * turnstiles which do.
+ *
+ * The sx_lock field consists of several fields. The low bit
+ * indicates if the lock is locked with a shared or exclusive lock. A
+ * value of 0 indicates an exclusive lock, and a value of 1 indicates
+ * a shared lock. Bit 1 is a boolean indicating if there are any
+ * threads waiting for a shared lock. Bit 2 is a boolean indicating
+ * if there are any threads waiting for an exclusive lock. Bit 3 is a
+ * boolean indicating if an exclusive lock is recursively held. The
+ * rest of the variable's definition is dependent on the value of the
+ * first bit. For an exclusive lock, it is a pointer to the thread
+ * holding the lock, similar to the mtx_lock field of mutexes. For
+ * shared locks, it is a count of read locks that are held.
+ *
+ * When the lock is not locked by any thread, it is encoded as a
+ * shared lock with zero waiters.
+ *
+ * A note about memory barriers. Exclusive locks need to use the same
+ * memory barriers as mutexes: _acq when acquiring an exclusive lock
+ * and _rel when releasing an exclusive lock. On the other side,
+ * shared lock needs to use an _acq barrier when acquiring the lock
+ * but, since they don't update any locked data, no memory barrier is
+ * needed when releasing a shared lock.
+ */
+
+#define SX_LOCK_SHARED 0x01
+#define SX_LOCK_SHARED_WAITERS 0x02
+#define SX_LOCK_EXCLUSIVE_WAITERS 0x04
+#define SX_LOCK_RECURSED 0x08
+#define SX_LOCK_FLAGMASK \
+ (SX_LOCK_SHARED | SX_LOCK_SHARED_WAITERS | \
+ SX_LOCK_EXCLUSIVE_WAITERS | SX_LOCK_RECURSED)
+
+#define SX_OWNER(x) ((x) & ~SX_LOCK_FLAGMASK)
+#define SX_SHARERS_SHIFT 4
+#define SX_SHARERS(x) (SX_OWNER(x) >> SX_SHARERS_SHIFT)
+#define SX_SHARERS_LOCK(x) \
+ ((x) << SX_SHARERS_SHIFT | SX_LOCK_SHARED)
+#define SX_ONE_SHARER (1 << SX_SHARERS_SHIFT)
+
+#define SX_LOCK_UNLOCKED SX_SHARERS_LOCK(0)
+#define SX_LOCK_DESTROYED \
+ (SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS)
#ifdef _KERNEL
+
+/*
+ * Function prototipes. Routines that start with an underscore are not part
+ * of the public interface and are wrappered with a macro.
+ */
void sx_sysinit(void *arg);
-void sx_init(struct sx *sx, const char *description);
+#define sx_init(sx, desc) sx_init_flags((sx), (desc), 0)
+void sx_init_flags(struct sx *sx, const char *description, int opts);
void sx_destroy(struct sx *sx);
-void _sx_slock(struct sx *sx, const char *file, int line);
-void _sx_xlock(struct sx *sx, const char *file, int line);
+int _sx_slock(struct sx *sx, int opts, const char *file, int line);
+int _sx_xlock(struct sx *sx, int opts, const char *file, int line);
int _sx_try_slock(struct sx *sx, const char *file, int line);
int _sx_try_xlock(struct sx *sx, const char *file, int line);
void _sx_sunlock(struct sx *sx, const char *file, int line);
void _sx_xunlock(struct sx *sx, const char *file, int line);
int _sx_try_upgrade(struct sx *sx, const char *file, int line);
void _sx_downgrade(struct sx *sx, const char *file, int line);
+int _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts,
+ const char *file, int line);
+int _sx_slock_hard(struct sx *sx, int opts, const char *file, int line);
+void _sx_xunlock_hard(struct sx *sx, uintptr_t tid, const char *file, int
+ line);
+void _sx_sunlock_hard(struct sx *sx, const char *file, int line);
#if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
void _sx_assert(struct sx *sx, int what, const char *file, int line);
#endif
@@ -79,15 +135,119 @@
SYSUNINIT(name##_sx_sysuninit, SI_SUB_LOCK, SI_ORDER_MIDDLE, \
sx_destroy, (sxa))
-#define sx_xlocked(sx) ((sx)->sx_cnt < 0 && (sx)->sx_xholder == curthread)
-#define sx_slock(sx) _sx_slock((sx), LOCK_FILE, LOCK_LINE)
-#define sx_xlock(sx) _sx_xlock((sx), LOCK_FILE, LOCK_LINE)
+/*
+ * Full lock operations that are suitable to be inlined in non-debug kernels.
+ * If the lock can't be acquired or released trivially then the work is
+ * deferred to 'tougher' functions.
+ */
+
+/* Acquire an exclusive lock. */
+static __inline int
+__sx_xlock(struct sx *sx, struct thread *td, int opts, const char *file,
+ int line)
+{
+ uintptr_t tid = (uintptr_t)td;
+ int error = 0;
+
+ if (!atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid))
+ error = _sx_xlock_hard(sx, tid, opts, file, line);
+ else
+ lock_profile_obtain_lock_success(&sx->lock_object, 0, 0, file,
+ line);
+
+ return (error);
+}
+
+/* Release an exclusive lock. */
+static __inline void
+__sx_xunlock(struct sx *sx, struct thread *td, const char *file, int line)
+{
+ uintptr_t tid = (uintptr_t)td;
+
+ if (!atomic_cmpset_rel_ptr(&sx->sx_lock, tid, SX_LOCK_UNLOCKED))
+ _sx_xunlock_hard(sx, tid, file, line);
+}
+
+/* Acquire a shared lock. */
+static __inline int
+__sx_slock(struct sx *sx, int opts, const char *file, int line)
+{
+ uintptr_t x = sx->sx_lock;
+ int error = 0;
+
+ if (!(x & SX_LOCK_SHARED) ||
+ !atomic_cmpset_acq_ptr(&sx->sx_lock, x, x + SX_ONE_SHARER))
+ error = _sx_slock_hard(sx, opts, file, line);
+#ifdef LOCK_PROFILING_SHARED
+ else if (SX_SHARERS(x) == 0)
+ lock_profile_obtain_lock_success(&sx->lock_object, 0, 0, file,
+ line);
+#endif
+
+ return (error);
+}
+
+/*
+ * Release a shared lock. We can just drop a single shared lock so
+ * long as we aren't trying to drop the last shared lock when other
+ * threads are waiting for an exclusive lock. This takes advantage of
+ * the fact that an unlocked lock is encoded as a shared lock with a
+ * count of 0.
+ */
+static __inline void
+__sx_sunlock(struct sx *sx, const char *file, int line)
+{
+ uintptr_t x = sx->sx_lock;
+
+ if (x == (SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS) ||
+ !atomic_cmpset_ptr(&sx->sx_lock, x, x - SX_ONE_SHARER))
+ _sx_sunlock_hard(sx, file, line);
+}
+
+/*
+ * Public interface for lock operations.
+ */
+#ifndef LOCK_DEBUG
+#error "LOCK_DEBUG not defined, include <sys/lock.h> before <sys/sx.h>"
+#endif
+#if (LOCK_DEBUG > 0) || defined(SX_NOINLINE)
+#define sx_xlock(sx) (void)_sx_xlock((sx), 0, LOCK_FILE, LOCK_LINE)
+#define sx_xlock_sig(sx) \
+ _sx_xlock((sx), SX_INTERRUPTIBLE, LOCK_FILE, LOCK_LINE)
+#define sx_xunlock(sx) _sx_xunlock((sx), LOCK_FILE, LOCK_LINE)
+#define sx_slock(sx) (void)_sx_slock((sx), 0, LOCK_FILE, LOCK_LINE)
+#define sx_slock_sig(sx) \
+ _sx_slock((sx), SX_INTERRUPTIBLE, LOCK_FILE, LOCK_LINE)
+#define sx_sunlock(sx) _sx_sunlock((sx), LOCK_FILE, LOCK_LINE)
+#else
+#define sx_xlock(sx) \
+ (void)__sx_xlock((sx), curthread, 0, LOCK_FILE, LOCK_LINE)
+#define sx_xlock_sig(sx) \
+ __sx_xlock((sx), curthread, SX_INTERRUPTIBLE, LOCK_FILE, LOCK_LINE)
+#define sx_xunlock(sx) \
+ __sx_xunlock((sx), curthread, LOCK_FILE, LOCK_LINE)
+#define sx_slock(sx) (void)__sx_slock((sx), 0, LOCK_FILE, LOCK_LINE)
+#define sx_slock_sig(sx) \
+ __sx_slock((sx), SX_INTERRUPTIBLE, LOCK_FILE, LOCK_LINE)
+#define sx_sunlock(sx) __sx_sunlock((sx), LOCK_FILE, LOCK_LINE)
+#endif /* LOCK_DEBUG > 0 || SX_NOINLINE */
#define sx_try_slock(sx) _sx_try_slock((sx), LOCK_FILE, LOCK_LINE)
#define sx_try_xlock(sx) _sx_try_xlock((sx), LOCK_FILE, LOCK_LINE)
-#define sx_sunlock(sx) _sx_sunlock((sx), LOCK_FILE, LOCK_LINE)
-#define sx_xunlock(sx) _sx_xunlock((sx), LOCK_FILE, LOCK_LINE)
#define sx_try_upgrade(sx) _sx_try_upgrade((sx), LOCK_FILE, LOCK_LINE)
#define sx_downgrade(sx) _sx_downgrade((sx), LOCK_FILE, LOCK_LINE)
+
+/*
+ * Return a pointer to the owning thread if the lock is exclusively
+ * locked.
+ */
+#define sx_xholder(sx) \
+ ((sx)->sx_lock & SX_LOCK_SHARED ? NULL : \
+ (struct thread *)SX_OWNER((sx)->sx_lock))
+
+#define sx_xlocked(sx) \
+ (((sx)->sx_lock & ~(SX_LOCK_FLAGMASK & ~SX_LOCK_SHARED)) == \
+ (uintptr_t)curthread)
+
#define sx_unlock(sx) do { \
if (sx_xlocked(sx)) \
sx_xunlock(sx); \
@@ -95,17 +255,39 @@
sx_sunlock(sx); \
} while (0)
+#define sx_sleep(chan, sx, pri, wmesg, timo) \
+ _sleep((chan), &(sx)->lock_object, (pri), (wmesg), (timo))
+
+/*
+ * Options passed to sx_init_flags().
+ */
+#define SX_DUPOK 0x01
+#define SX_NOPROFILE 0x02
+#define SX_NOWITNESS 0x04
+#define SX_QUIET 0x08
+#define SX_ADAPTIVESPIN 0x10
+#define SX_RECURSE 0x20
+
+/*
+ * Options passed to sx_*lock_hard().
+ */
+#define SX_INTERRUPTIBLE 0x40
+
#if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
#define SA_LOCKED LA_LOCKED
#define SA_SLOCKED LA_SLOCKED
#define SA_XLOCKED LA_XLOCKED
#define SA_UNLOCKED LA_UNLOCKED
+#define SA_RECURSED LA_RECURSED
+#define SA_NOTRECURSED LA_NOTRECURSED
/* Backwards compatability. */
#define SX_LOCKED LA_LOCKED
#define SX_SLOCKED LA_SLOCKED
#define SX_XLOCKED LA_XLOCKED
#define SX_UNLOCKED LA_UNLOCKED
+#define SX_RECURSED LA_RECURSED
+#define SX_NOTRECURSED LA_NOTRECURSED
#endif
#ifdef INVARIANTS
Index: sys/turnstile.h
===================================================================
RCS file: /cvs/ncvs/src/sys/sys/turnstile.h,v
retrieving revision 1.7
diff -u -r1.7 turnstile.h
--- sys/turnstile.h 7 Jan 2005 02:29:24 -0000 1.7
+++ sys/turnstile.h 31 Aug 2007 00:39:59 -0000
@@ -73,20 +73,43 @@
#ifdef _KERNEL
+/* Which queue to block on or which queue to wakeup one or more threads from. */
+#define TS_EXCLUSIVE_QUEUE 0
+#define TS_SHARED_QUEUE 1
+
+/* The type of lock currently held. */
+#define TS_EXCLUSIVE_LOCK TS_EXCLUSIVE_QUEUE
+#define TS_SHARED_LOCK TS_SHARED_QUEUE
+
void init_turnstiles(void);
void turnstile_adjust(struct thread *, u_char);
struct turnstile *turnstile_alloc(void);
-void turnstile_broadcast(struct turnstile *);
+#define turnstile_wakeup(turnstile) turnstile_broadcast(turnstile)
+#define turnstile_broadcast(turnstile) \
+ turnstile_broadcast_queue(turnstile, TS_EXCLUSIVE_QUEUE)
+void turnstile_broadcast_queue(struct turnstile *, int);
void turnstile_claim(struct lock_object *);
-int turnstile_empty(struct turnstile *);
+void turnstile_disown(struct turnstile *);
+#define turnstile_empty(turnstile) \
+ turnstile_empty_queue(turnstile, TS_EXCLUSIVE_QUEUE);
+int turnstile_empty_queue(struct turnstile *, int);
void turnstile_free(struct turnstile *);
-struct thread *turnstile_head(struct turnstile *);
+#define turnstile_head(turnstile) \
+ turnstile_head_queue(turnstile, TS_EXCLUSIVE_QUEUE)
+struct thread *turnstile_head_queue(struct turnstile *, int);
void turnstile_lock(struct lock_object *);
struct turnstile *turnstile_lookup(struct lock_object *);
void turnstile_release(struct lock_object *);
-int turnstile_signal(struct turnstile *);
-void turnstile_unpend(struct turnstile *);
-void turnstile_wait(struct lock_object *, struct thread *);
+#define turnstile_signal(turnstile) \
+ turnstile_signal_queue(turnstile, TS_EXCLUSIVE_QUEUE)
+int turnstile_signal_queue(struct turnstile *, int);
+struct turnstile *turnstile_trywait(struct lock_object *);
+#define turnstile_unpend(turnstile) \
+ turnstile_unpend_queue(turnstile, TS_EXCLUSIVE_QUEUE);
+void turnstile_unpend_queue(struct turnstile *, int);
+#define turnstile_wait(lock_object, thread) \
+ turnstile_wait_queue(lock_object, thread, TS_EXCLUSIVE_QUEUE)
+void turnstile_wait_queue(struct lock_object *, struct thread *, int);
#endif /* _KERNEL */
#endif /* _SYS_TURNSTILE_H_ */
Index: vm/vm_map.c
===================================================================
RCS file: /cvs/ncvs/src/sys/vm/vm_map.c,v
retrieving revision 1.366.2.4
diff -u -r1.366.2.4 vm_map.c
--- vm/vm_map.c 30 Aug 2007 02:32:04 -0000 1.366.2.4
+++ vm/vm_map.c 31 Aug 2007 03:12:00 -0000
@@ -429,7 +429,7 @@
if (map->system_map)
_mtx_lock_flags(&map->system_mtx, 0, file, line);
else
- _sx_xlock(&map->lock, file, line);
+ (void) _sx_xlock(&map->lock, 0, file, line);
map->timestamp++;
}
@@ -450,7 +450,7 @@
if (map->system_map)
_mtx_lock_flags(&map->system_mtx, 0, file, line);
else
- _sx_xlock(&map->lock, file, line);
+ (void) _sx_xlock(&map->lock, 0, file, line);
}
void
--PPYy/fEw/8QCHSq3
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Content-Disposition: inline
_______________________________________________
freebsd-smp@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-smp
To unsubscribe, send any mail to "freebsd-smp-unsubscribe@freebsd.org"
--PPYy/fEw/8QCHSq3--