看板 DFBSD_kernel 關於我們 聯絡資訊
Hi, This patch is the start of mlockall/munlockall support; it adds a field to each vm_map, flags, to support mlockall(MCL_FUTURE) {from FreeBSD} and modifies mmap() and brk() to test for that flag and wire in any newly ill-gotten pages. It also implements munlockall(). This code has been tested in a vkernel, seems to work okay. Questions: 1) what permissions do we want to check for mlockall()? 2) current, I read the vm_map flags under the per-map lock. this is probably overkill for mmap and brk; should I read the value directly instead? 3) in munlockall(), I've marked a section 'XXX', where it might be possible to hit an in-transition map entry (entry->eflags == MAP_ENTRY_IN_TRANSITION). I don't understand places in the vm where that is tested for and the map lock released around it... I didn't see any place where that was set and the per-map lock released afterwards, perhaps I'm missing something? 4) are automatic stack growth pages supposed to be affected by MCL_FUTURE? 5) are pages from the 43bsd compat code supposed to be affected by MCL_FUTURE? diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 49f3a65..95253f1 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -486,6 +486,7 @@ vm_map_init(struct vm_map *map, vm_offset_t min, vm_offset_t max, pmap_t pmap) map->first_free = &map->header; map->hint = &map->header; map->timestamp = 0; + map->flags = 0; lockinit(&map->lock, "thrd_sleep", 0, 0); } @@ -3405,6 +3406,10 @@ Retry: } } + if ((rv == KERN_SUCCESS) && (map->flags & MAP_WIREFUTURE)) { + + } + done: if (use_read_lock) vm_map_unlock_read(map); diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index 5061ffb..93c6a39 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -110,6 +110,7 @@ RB_PROTOTYPE(vm_map_rb_tree, vm_map_entry, rb_entry, rb_vm_m ap_compare); * vm_map_entry_t an entry in an address map. */ +typedef u_int vm_flags_t; typedef u_int vm_eflags_t; /* @@ -224,12 +225,18 @@ struct vm_map { vm_map_entry_t hint; /* hint for quick lookups */ unsigned int timestamp; /* Version number */ vm_map_entry_t first_free; /* First free space hint */ + vm_flags_t flags; /* flags for this vm_map */ struct pmap *pmap; /* Physical map */ #define min_offset header.start #define max_offset header.end }; /* + * vm_flags_t values + */ +#define MAP_WIREFUTURE 0x01 /* wire all future pages */ + +/* * Registered upcall */ struct upcall; diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index a5beeb2..f41d4c6 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -1026,29 +1026,76 @@ sys_mlock(struct mlock_args *uap) } /* - * mlockall_args(int how) - * - * Dummy routine, doesn't actually do anything. + * mlockall(int how) * * No requirements */ int sys_mlockall(struct mlockall_args *uap) { - return (ENOSYS); + struct thread *td = curthread; + struct proc *p = td->td_proc; + vm_map_t map = &p->p_vmspace->vm_map; + int how = uap->how; + int rc = KERN_SUCCESS; + + vm_map_lock(map); + + if (how & MCL_FUTURE) + map->flags |= MAP_WIREFUTURE; + + if (how & MCL_CURRENT) { + rc = ENOSYS; + } + + vm_map_unlock(map); + + return (rc == KERN_SUCCESS) ? 0 : rc; } /* - * munlockall_args(void) + * munlockall(void) * - * Dummy routine, doesn't actually do anything. + * Unwire all user-wired map entries, cancel MCL_FUTURE from mlockall * * No requirements */ int sys_munlockall(struct munlockall_args *uap) { - return (ENOSYS); + struct thread *td = curthread; + struct proc *p = td->td_proc; + vm_map_t map = &p->p_vmspace->vm_map; + vm_map_entry_t entry; + int rc = KERN_SUCCESS; + + vm_map_lock(map); + + /* Clear MAP_WIREFUTURE to cancel mlockall(MCL_FUTURE) */ + map->flags &= ~MAP_WIREFUTURE; + + for (entry = map->header.next; + entry != &map->header; + entry = entry->next) { + if ((entry->eflags & MAP_ENTRY_USER_WIRED) == 0) + continue; + + /* XXX: Deal with MAP_ENTRY_IN_TRANSITION here? */ + + KASSERT(entry->wired_count > 0, + ("wired_count was 0 with USER_WIRED set! %p", entry)); + + /* Drop wired count, if it hits zero, unwire the entry */ + entry->eflags &= ~MAP_ENTRY_USER_WIRED; + entry->wired_count--; + if (entry->wired_count == 0) + vm_fault_unwire(map, entry); + } + + map->timestamp++; + vm_map_unlock(map); + + return (rc); } /* @@ -1111,6 +1158,7 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, v m_prot_t prot, int rv = KERN_SUCCESS; off_t objsize; int docow; + int vflags; if (size == 0) return (0); @@ -1315,6 +1363,16 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, } /* + * If process has marked all future mappings to be wired, do so + */ + vm_map_lock_read(map); + vflags = map->flags; + vm_map_unlock_read(map); + + if ((rv == KERN_SUCCESS) && (vflags & MAP_WIREFUTURE)) + vm_map_unwire(map, *addr, *addr + size, FALSE); + + /* * Set the access time on the vnode */ if (vp != NULL) diff --git a/sys/vm/vm_unix.c b/sys/vm/vm_unix.c index 428c10f..2cc4638 100644 --- a/sys/vm/vm_unix.c +++ b/sys/vm/vm_unix.c @@ -75,6 +75,7 @@ sys_obreak(struct obreak_args *uap) vm_offset_t new, old, base; int rv; int error; + int vflags; error = 0; lwkt_gettoken(&vm_token); @@ -125,6 +126,14 @@ sys_obreak(struct obreak_args *uap) goto done; } vm->vm_dsize += btoc(diff); + + vm_map_lock_read(&vm->vm_map); + vflags = vm->vm_map.flags; + vm_map_unlock_read(&vm->vm_map); + + if (vflags & MAP_WIREFUTURE) + vm_map_unwire(&vm->vm_map, old, new, FALSE); + } else if (new < old) { rv = vm_map_remove(&vm->vm_map, new, old); if (rv != KERN_SUCCESS) { bash-3.2$ reset Erase is backspace. bash-3.2$ reset Erase is backspace. bash-3.2$ cat ~/mlockall.patch2 diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 49f3a65..95253f1 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -486,6 +486,7 @@ vm_map_init(struct vm_map *map, vm_offset_t min, vm_offset_t max, pmap_t pmap) map->first_free = &map->header; map->hint = &map->header; map->timestamp = 0; + map->flags = 0; lockinit(&map->lock, "thrd_sleep", 0, 0); } @@ -3405,6 +3406,10 @@ Retry: } } + if ((rv == KERN_SUCCESS) && (map->flags & MAP_WIREFUTURE)) { + + } + done: if (use_read_lock) vm_map_unlock_read(map); diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index 5061ffb..93c6a39 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -110,6 +110,7 @@ RB_PROTOTYPE(vm_map_rb_tree, vm_map_entry, rb_entry, rb_vm_map_compare); * vm_map_entry_t an entry in an address map. */ +typedef u_int vm_flags_t; typedef u_int vm_eflags_t; /* @@ -224,12 +225,18 @@ struct vm_map { vm_map_entry_t hint; /* hint for quick lookups */ unsigned int timestamp; /* Version number */ vm_map_entry_t first_free; /* First free space hint */ + vm_flags_t flags; /* flags for this vm_map */ struct pmap *pmap; /* Physical map */ #define min_offset header.start #define max_offset header.end }; /* + * vm_flags_t values + */ +#define MAP_WIREFUTURE 0x01 /* wire all future pages */ + +/* * Registered upcall */ struct upcall; diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index a5beeb2..f41d4c6 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -1026,29 +1026,76 @@ sys_mlock(struct mlock_args *uap) } /* - * mlockall_args(int how) - * - * Dummy routine, doesn't actually do anything. + * mlockall(int how) * * No requirements */ int sys_mlockall(struct mlockall_args *uap) { - return (ENOSYS); + struct thread *td = curthread; + struct proc *p = td->td_proc; + vm_map_t map = &p->p_vmspace->vm_map; + int how = uap->how; + int rc = KERN_SUCCESS; + + vm_map_lock(map); + + if (how & MCL_FUTURE) + map->flags |= MAP_WIREFUTURE; + + if (how & MCL_CURRENT) { + rc = ENOSYS; + } + + vm_map_unlock(map); + + return (rc == KERN_SUCCESS) ? 0 : rc; } /* - * munlockall_args(void) + * munlockall(void) * - * Dummy routine, doesn't actually do anything. + * Unwire all user-wired map entries, cancel MCL_FUTURE from mlockall * * No requirements */ int sys_munlockall(struct munlockall_args *uap) { - return (ENOSYS); + struct thread *td = curthread; + struct proc *p = td->td_proc; + vm_map_t map = &p->p_vmspace->vm_map; + vm_map_entry_t entry; + int rc = KERN_SUCCESS; + + vm_map_lock(map); + + /* Clear MAP_WIREFUTURE to cancel mlockall(MCL_FUTURE) */ + map->flags &= ~MAP_WIREFUTURE; + + for (entry = map->header.next; + entry != &map->header; + entry = entry->next) { + if ((entry->eflags & MAP_ENTRY_USER_WIRED) == 0) + continue; + + /* XXX: Deal with MAP_ENTRY_IN_TRANSITION here? */ + + KASSERT(entry->wired_count > 0, + ("wired_count was 0 with USER_WIRED set! %p", entry)); + + /* Drop wired count, if it hits zero, unwire the entry */ + entry->eflags &= ~MAP_ENTRY_USER_WIRED; + entry->wired_count--; + if (entry->wired_count == 0) + vm_fault_unwire(map, entry); + } + + map->timestamp++; + vm_map_unlock(map); + + return (rc); } /* @@ -1111,6 +1158,7 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, int rv = KERN_SUCCESS; off_t objsize; int docow; + int vflags; if (size == 0) return (0); @@ -1315,6 +1363,16 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, } /* + * If process has marked all future mappings to be wired, do so + */ + vm_map_lock_read(map); + vflags = map->flags; + vm_map_unlock_read(map); + + if ((rv == KERN_SUCCESS) && (vflags & MAP_WIREFUTURE)) + vm_map_unwire(map, *addr, *addr + size, FALSE); + + /* * Set the access time on the vnode */ if (vp != NULL) diff --git a/sys/vm/vm_unix.c b/sys/vm/vm_unix.c index 428c10f..2cc4638 100644 --- a/sys/vm/vm_unix.c +++ b/sys/vm/vm_unix.c @@ -75,6 +75,7 @@ sys_obreak(struct obreak_args *uap) vm_offset_t new, old, base; int rv; int error; + int vflags; error = 0; lwkt_gettoken(&vm_token); @@ -125,6 +126,14 @@ sys_obreak(struct obreak_args *uap) goto done; } vm->vm_dsize += btoc(diff); + + vm_map_lock_read(&vm->vm_map); + vflags = vm->vm_map.flags; + vm_map_unlock_read(&vm->vm_map); + + if (vflags & MAP_WIREFUTURE) + vm_map_unwire(&vm->vm_map, old, new, FALSE); + } else if (new < old) { rv = vm_map_remove(&vm->vm_map, new, old); if (rv != KERN_SUCCESS) { Thanks! -- vs