Hello,
From: Kostik Belousov <kostikbel@gmail.com>
Date: Tue, 12 Jul 2011 17:57:53 +0300
> On Tue, Jul 12, 2011 at 03:02:44PM +0200, Attilio Rao wrote:
>> 2011/7/12 Kostik Belousov <kostikbel@gmail.com>:
>> > On Tue, Jul 12, 2011 at 07:10:28PM +0900, Kohji Okuno wrote:
>> >> Hello,
>> >>
>> >> I think that devfs has a problem.
>> >> I encountered the problem that open("/dev/AAA") returned ENOENT.
>> >> Of course, /dev/AAA exists.
>> >>
>> >> ENOENT was created by the point(***) in devfs_allocv().
>> >> I think that the race condition had occurred between process A an=
d
>> >> vnlru kernel thread.
>> >>
>> >> Please check the following.
>> >>
>> >> If vnlru set VI_DOOMED to vp->v_iflag but vnlru didn't still exec=
ute
>> >> VOP_RECLAIM(), process A cat get valid vp from de->de_vnode.
>> >> But, vget() will return ENOENT, because vp->v_iflag has VI_DOOMED=
..=
>> >>
>> >> When I set the break point to (***), I checked that de->de_vnode =
and
>> >> vp->v_data were NULL.
>> >>
>> >>
>> >> process A: =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
vnlru:
>> >>
>> >> devfs_allocv() {
>> >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
=A0 =A0 vgonel(vp) {
>> >> =A0 =A0... =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
=A0 =A0 =A0 =A0 =A0 =A0 =A0 ...
>> >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
=A0 =A0 =A0 vp->v_iflag |=3D VI_DOOMED;
>> >> =A0 mtx_lock(&devfs_de_interlock); =A0 =A0 =A0 =A0 ...
>> >> =A0 vp =3D de->de_vnode;
>> >> =A0 if (vp !=3D NULL) { =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 V=
I_UNLOCK(vp);
>> >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 _____________=
/ ...
>> >> =A0 VI_LOCK(vp); ____________/ =A0 =A0 =A0 =A0 =A0 =A0 =A0if (VOP=
_RECLAIM(vp, td))
>> >> =A0 mtx_unlock(&devfs_de_interlock); =A0 =A0 =A0 ...
>> >> =A0 =A0... =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
=A0 =A0\ =A0 =A0 =A0 =A0 devfs_reclaim(ap) {
>> >> =A0 error =3D vget(vp,...); =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0\
>> >> =A0 =A0... =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
=A0 =A0 =A0\______ =A0 mtx_lock(&devfs_de_interlock);
>> >> =A0 if (devfs_allocv_drop_refs(...)) { =A0 =A0 =A0 =A0de =3D vp->=
v_data;
>> >> =A0 =A0 ... =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
=A0 =A0 =A0 =A0 =A0 =A0 =A0 if (de !=3D NULL) {
>> >> =A0 } =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
=A0 =A0 =A0 =A0 =A0 de->de_vnode =3D NULL;
>> >> =A0 else if (error) { =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
=A0 vp->v_data =3D NULL;
>> >> =A0 =A0 ... =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
=A0 =A0 =A0 =A0 =A0 =A0 =A0 }
>> >> =A0 =A0 rturn (error); (***) =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0m=
tx_unlock(&devfs_de_interlock);
>> >> =A0 } =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
=A0 =A0 =A0 =A0 =A0 ...
>> >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
=A0 =A0 =A0 }
>> >>
>> >>
>> >>
>> >> I think that devfs_allocv() should be fixed as below.
>> >> How do you think?
>> >>
>> >> devfs_allocv(struct devfs_dirent *de, struct mount *mp, struct vn=
ode **vpp)
>> >> {
>> >> =A0 =A0 =A0 =A0 int error;
>> >> =A0 =A0 =A0 struct vnode *vp;
>> >> =A0 =A0 =A0 struct cdev *dev;
>> >> =A0 =A0 =A0 struct devfs_mount *dmp;
>> >>
>> >> =A0 =A0 =A0 dmp =3D VFSTODEVFS(mp);
>> >> +#if 1
>> >> + retry:
>> >> +#endif
>> >> =A0 =A0 =A0 =A0 if (de->de_flags & DE_DOOMED) {
>> >>
>> >> =A0 =A0 =A0 =A0 =A0 =A0...
>> >>
>> >> =A0 =A0 =A0 =A0 mtx_lock(&devfs_de_interlock);
>> >> =A0 =A0 =A0 =A0 vp =3D de->de_vnode;
>> >> =A0 =A0 =A0 =A0 if (vp !=3D NULL) {
>> >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 VI_LOCK(vp);
>> >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 mtx_unlock(&devfs_de_interlock);
>> >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 sx_xunlock(&dmp->dm_lock);
>> >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 error =3D vget(vp, LK_EXCLUSIVE |=
LK_INTERLOCK, curthread);
>> >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 sx_xlock(&dmp->dm_lock);
>> >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (devfs_allocv_drop_refs(0, dmp=
, de)) {
>> >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (error =3D=3D =
0)
>> >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 v=
put(vp);
>> >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 return (ENOENT);
>> >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 }
>> >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 else if (error) {
>> >> +#if 1
>> >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (error =3D=3D ENOENT=
)
>> >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto re=
try;
>> >> +#endif
>> >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 sx_xunlock(&dmp->dm_l=
ock);
>> >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 return (error);
>> >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 }
>> >>
>> > Thank you for the report.
>> >
>> > The proposed change would revert r179247, which also caused some i=
ssues.
>> > Are you able to reproduce the problem ?
>> >
>> > Could you try the following patch ? I cannot reproduce your situat=
ion,
>> > so the patch is untested by me.
>> >
>> > diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops=
..c
>> > index bf6dab8..bbbfff4 100644
>> > --- a/sys/fs/devfs/devfs_vnops.c
>> > +++ b/sys/fs/devfs/devfs_vnops.c
>> > @@ -397,6 +397,7 @@ devfs_allocv(struct devfs_dirent *de, struct m=
ount *mp, int lockmode,
>> > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0sx_xunlock(&dmp->dm_lock);
>> > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0return (ENOENT);
>> > =A0 =A0 =A0 =A0}
>> > +loop:
>> > =A0 =A0 =A0 =A0DEVFS_DE_HOLD(de);
>> > =A0 =A0 =A0 =A0DEVFS_DMP_HOLD(dmp);
>> > =A0 =A0 =A0 =A0mtx_lock(&devfs_de_interlock);
>> > @@ -412,7 +413,16 @@ devfs_allocv(struct devfs_dirent *de, struct =
mount *mp, int lockmode,
>> > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0vpu=
t(vp);
>> > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0return (ENOENT);
>> > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0}
>> > - =A0 =A0 =A0 =A0 =A0 =A0 =A0 else if (error) {
>> > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 else if (error !=3D 0) {
>> > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (error =3D=3D ENO=
ENT) {
>> > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 mtx_=
lock(&devfs_de_interlock);
>> > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 whil=
e (de->de_vnode !=3D NULL) {
>> > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =
=A0 =A0 =A0 msleep(&de->de_vnode,
>> > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =
=A0 =A0 =A0 =A0 =A0 &devfs_de_interlock, 0, "dvall", 0);
>> > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 }
>> > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 mtx_=
unlock(&devfs_de_interlock);
>> > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto=
loop;
>> > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 }
>> > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0sx_xunlock(&dmp->dm=
_lock);
>> > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0return (error);
>> > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0}
>> > @@ -1259,6 +1269,7 @@ devfs_reclaim(struct vop_reclaim_args *ap)
>> > =A0 =A0 =A0 =A0if (de !=3D NULL) {
>> > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0de->de_vnode =3D NULL;
>> > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0vp->v_data =3D NULL;
>> > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 wakeup(&de->de_vnode);
>> > =A0 =A0 =A0 =A0}
>> > =A0 =A0 =A0 =A0mtx_unlock(&devfs_de_interlock);
>> =
>> I think that this approach may starve the thread for a while.
>> As I told you privately I was able to see on field a livelock becaus=
e
>> of this check. In my case, it was a thread running for 63seconds (at=
>> least, at that point the watchdog was tripping over).
> Feasible explanation was not found at the time, AFAIR. I could believ=
e
> that this is possible with r179247 and driver stuck in the close cdev=
sw
> method.
> =
> More risky change would be to clear de_vnode early. All devfs code sh=
all
> be already safe by checking for VI_DOOMED, and if VI_DOOMED, v_data m=
ay
> be NULL.
> =
> Again, I am unable to test.
> =
> diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c
> index bf6dab8..955bd8b 100644
> --- a/sys/fs/devfs/devfs_vnops.c
> +++ b/sys/fs/devfs/devfs_vnops.c
> @@ -397,6 +397,7 @@ devfs_allocv(struct devfs_dirent *de, struct moun=
t *mp, int lockmode,
> sx_xunlock(&dmp->dm_lock);
> return (ENOENT);
> }
> +loop:
> DEVFS_DE_HOLD(de);
> DEVFS_DMP_HOLD(dmp);
> mtx_lock(&devfs_de_interlock);
> @@ -405,16 +406,21 @@ devfs_allocv(struct devfs_dirent *de, struct mo=
unt *mp, int lockmode,
> VI_LOCK(vp);
> mtx_unlock(&devfs_de_interlock);
> sx_xunlock(&dmp->dm_lock);
> - error =3D vget(vp, lockmode | LK_INTERLOCK, curthread);
> + vget(vp, lockmode | LK_INTERLOCK | LK_RETRY, curthread);
> sx_xlock(&dmp->dm_lock);
> if (devfs_allocv_drop_refs(0, dmp, de)) {
> - if (error =3D=3D 0)
> - vput(vp);
> + vput(vp);
> return (ENOENT);
> }
> - else if (error) {
> - sx_xunlock(&dmp->dm_lock);
> - return (error);
> + else if ((vp->v_iflag & VI_DOOMED) !=3D 0) {
> + mtx_lock(&devfs_de_interlock);
> + if (de->de_vnode =3D=3D vp) {
> + de->de_vnode =3D NULL;
> + vp->v_data =3D NULL;
> + }
> + mtx_unlock(&devfs_de_interlock);
> + vput(vp);
> + goto loop;
> }
> sx_xunlock(&dmp->dm_lock);
> *vpp =3D vp;
I tried Kostik's last patch, and I checked that open() successed twice
by retry. It is difficult for me to reproduce this situation.
At least, my problem is solved by this patch.
Thanks,
Kohji Okuno.
_______________________________________________
freebsd-current@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-current
To unsubscribe, send any mail to "freebsd-current-unsubscribe@freebsd.org"