diff options
Diffstat (limited to 'sys/kern')
-rw-r--r-- | sys/kern/kern_exit.c | 46 | ||||
-rw-r--r-- | sys/kern/kern_fork.c | 10 | ||||
-rw-r--r-- | sys/kern/kern_lock.c | 6 | ||||
-rw-r--r-- | sys/kern/kern_mutex.c | 29 | ||||
-rw-r--r-- | sys/kern/kern_sx.c | 11 | ||||
-rw-r--r-- | sys/kern/link_elf.c | 6 | ||||
-rw-r--r-- | sys/kern/link_elf_obj.c | 8 | ||||
-rw-r--r-- | sys/kern/subr_power.c | 46 | ||||
-rw-r--r-- | sys/kern/sys_generic.c | 36 | ||||
-rw-r--r-- | sys/kern/sys_pipe.c | 5 | ||||
-rw-r--r-- | sys/kern/uipc_mqueue.c | 6 | ||||
-rw-r--r-- | sys/kern/vfs_cache.c | 52 | ||||
-rw-r--r-- | sys/kern/vfs_default.c | 6 | ||||
-rw-r--r-- | sys/kern/vfs_lookup.c | 14 | ||||
-rw-r--r-- | sys/kern/vfs_mountroot.c | 2 | ||||
-rw-r--r-- | sys/kern/vfs_subr.c | 51 | ||||
-rw-r--r-- | sys/kern/vfs_syscalls.c | 4 | ||||
-rw-r--r-- | sys/kern/vfs_vnops.c | 22 |
18 files changed, 231 insertions, 129 deletions
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index a32b5a1b3354..ab8ed32ad189 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -127,6 +127,27 @@ proc_realparent(struct proc *child) return (parent); } +static void +reaper_clear(struct proc *p, struct proc *rp) +{ + struct proc *p1; + bool clear; + + sx_assert(&proctree_lock, SX_XLOCKED); + LIST_REMOVE(p, p_reapsibling); + if (p->p_reapsubtree == 1) + return; + clear = true; + LIST_FOREACH(p1, &rp->p_reaplist, p_reapsibling) { + if (p1->p_reapsubtree == p->p_reapsubtree) { + clear = false; + break; + } + } + if (clear) + proc_id_clear(PROC_ID_REAP, p->p_reapsubtree); +} + void reaper_abandon_children(struct proc *p, bool exiting) { @@ -138,7 +159,7 @@ reaper_abandon_children(struct proc *p, bool exiting) return; p1 = p->p_reaper; LIST_FOREACH_SAFE(p2, &p->p_reaplist, p_reapsibling, ptmp) { - LIST_REMOVE(p2, p_reapsibling); + reaper_clear(p2, p); p2->p_reaper = p1; p2->p_reapsubtree = p->p_reapsubtree; LIST_INSERT_HEAD(&p1->p_reaplist, p2, p_reapsibling); @@ -152,27 +173,6 @@ reaper_abandon_children(struct proc *p, bool exiting) p->p_treeflag &= ~P_TREE_REAPER; } -static void -reaper_clear(struct proc *p) -{ - struct proc *p1; - bool clear; - - sx_assert(&proctree_lock, SX_LOCKED); - LIST_REMOVE(p, p_reapsibling); - if (p->p_reapsubtree == 1) - return; - clear = true; - LIST_FOREACH(p1, &p->p_reaper->p_reaplist, p_reapsibling) { - if (p1->p_reapsubtree == p->p_reapsubtree) { - clear = false; - break; - } - } - if (clear) - proc_id_clear(PROC_ID_REAP, p->p_reapsubtree); -} - void proc_clear_orphan(struct proc *p) { @@ -972,7 +972,7 @@ proc_reap(struct thread *td, struct proc *p, int *status, int options) sx_xunlock(PIDHASHLOCK(p->p_pid)); LIST_REMOVE(p, p_sibling); reaper_abandon_children(p, true); - reaper_clear(p); + reaper_clear(p, p->p_reaper); PROC_LOCK(p); proc_clear_orphan(p); PROC_UNLOCK(p); diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index 2ab9b363f8b5..7f6abae187b3 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -610,10 +610,12 @@ do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread * p2->p_flag |= p1->p_flag & P_SUGID; td2->td_pflags |= td->td_pflags & (TDP_ALTSTACK | TDP_SIGFASTBLOCK); td2->td_pflags2 |= td->td_pflags2 & TDP2_UEXTERR; - SESS_LOCK(p1->p_session); - if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT) - p2->p_flag |= P_CONTROLT; - SESS_UNLOCK(p1->p_session); + if (p1->p_flag & P_CONTROLT) { + SESS_LOCK(p1->p_session); + if (p1->p_session->s_ttyvp != NULL) + p2->p_flag |= P_CONTROLT; + SESS_UNLOCK(p1->p_session); + } if (fr->fr_flags & RFPPWAIT) p2->p_flag |= P_PPWAIT; diff --git a/sys/kern/kern_lock.c b/sys/kern/kern_lock.c index 31bff6d2c1aa..76f68677e292 100644 --- a/sys/kern/kern_lock.c +++ b/sys/kern/kern_lock.c @@ -1780,9 +1780,11 @@ lockmgr_chain(struct thread *td, struct thread **ownerp) lk = td->td_wchan; - if (LOCK_CLASS(&lk->lock_object) != &lock_class_lockmgr) + if (!TD_ON_SLEEPQ(td) || sleepq_type(td->td_wchan) != SLEEPQ_LK || + LOCK_CLASS(&lk->lock_object) != &lock_class_lockmgr) return (0); - db_printf("blocked on lockmgr %s", lk->lock_object.lo_name); + db_printf("blocked on lock %p (%s) \"%s\" ", &lk->lock_object, + lock_class_lockmgr.lc_name, lk->lock_object.lo_name); if (lk->lk_lock & LK_SHARE) db_printf("SHARED (count %ju)\n", (uintmax_t)LK_SHARERS(lk->lk_lock)); diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c index 8b5908f5219a..d67c70984528 100644 --- a/sys/kern/kern_mutex.c +++ b/sys/kern/kern_mutex.c @@ -503,8 +503,8 @@ _mtx_trylock_flags_(volatile uintptr_t *c, int opts, const char *file, int line) /* * __mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock. * - * We call this if the lock is either contested (i.e. we need to go to - * sleep waiting for it), or if we need to recurse on it. + * We get here if lock profiling is enabled, the lock is already held by + * someone else or we are recursing on it. */ #if LOCK_DEBUG > 0 void @@ -660,13 +660,8 @@ retry_turnstile: } #endif - /* - * If the mutex isn't already contested and a failure occurs - * setting the contested bit, the mutex was either released - * or the state of the MTX_RECURSED bit changed. - */ - if ((v & MTX_CONTESTED) == 0 && - !atomic_fcmpset_ptr(&m->mtx_lock, &v, v | MTX_CONTESTED)) { + if ((v & MTX_WAITERS) == 0 && + !atomic_fcmpset_ptr(&m->mtx_lock, &v, v | MTX_WAITERS)) { goto retry_turnstile; } @@ -869,7 +864,7 @@ _thread_lock(struct thread *td) WITNESS_LOCK(&m->lock_object, LOP_EXCLUSIVE, file, line); return; } - _mtx_release_lock_quick(m); + atomic_store_rel_ptr(&m->mtx_lock, MTX_UNOWNED); slowpath_unlocked: spinlock_exit(); slowpath_noirq: @@ -959,7 +954,7 @@ retry: } if (m == td->td_lock) break; - _mtx_release_lock_quick(m); + atomic_store_rel_ptr(&m->mtx_lock, MTX_UNOWNED); } LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file, line); @@ -1029,8 +1024,8 @@ thread_lock_set(struct thread *td, struct mtx *new) /* * __mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock. * - * We are only called here if the lock is recursed, contested (i.e. we - * need to wake up a blocked thread) or lockstat probe is active. + * We get here if lock profiling is enabled, the lock is already held by + * someone else or we are recursing on it. */ #if LOCK_DEBUG > 0 void @@ -1071,7 +1066,7 @@ __mtx_unlock_sleep(volatile uintptr_t *c, uintptr_t v) * can be removed from the hash list if it is empty. */ turnstile_chain_lock(&m->lock_object); - _mtx_release_lock_quick(m); + atomic_store_rel_ptr(&m->mtx_lock, MTX_UNOWNED); ts = turnstile_lookup(&m->lock_object); MPASS(ts != NULL); if (LOCK_LOG_TEST(&m->lock_object, opts)) @@ -1207,7 +1202,7 @@ _mtx_destroy(volatile uintptr_t *c) if (!mtx_owned(m)) MPASS(mtx_unowned(m)); else { - MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0); + MPASS((m->mtx_lock & (MTX_RECURSED|MTX_WAITERS)) == 0); /* Perform the non-mtx related part of mtx_unlock_spin(). */ if (LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin) { @@ -1359,8 +1354,8 @@ db_show_mtx(const struct lock_object *lock) db_printf("DESTROYED"); else { db_printf("OWNED"); - if (m->mtx_lock & MTX_CONTESTED) - db_printf(", CONTESTED"); + if (m->mtx_lock & MTX_WAITERS) + db_printf(", WAITERS"); if (m->mtx_lock & MTX_RECURSED) db_printf(", RECURSED"); } diff --git a/sys/kern/kern_sx.c b/sys/kern/kern_sx.c index c005e112d3b9..249faf5b1ec4 100644 --- a/sys/kern/kern_sx.c +++ b/sys/kern/kern_sx.c @@ -1539,16 +1539,19 @@ sx_chain(struct thread *td, struct thread **ownerp) /* * Check to see if this thread is blocked on an sx lock. - * First, we check the lock class. If that is ok, then we - * compare the lock name against the wait message. + * The thread should be on a sleep queue with type SLEEPQ_SX, the + * purported lock should have the lock class index of sx, and the lock + * name should match the wait message. */ sx = td->td_wchan; - if (LOCK_CLASS(&sx->lock_object) != &lock_class_sx || + if (!TD_ON_SLEEPQ(td) || sleepq_type(td->td_wchan) != SLEEPQ_SX || + LOCK_CLASS(&sx->lock_object) != &lock_class_sx || sx->lock_object.lo_name != td->td_wmesg) return (0); /* We think we have an sx lock, so output some details. */ - db_printf("blocked on sx \"%s\" ", td->td_wmesg); + db_printf("blocked on lock %p (%s) \"%s\" ", &sx->lock_object, + lock_class_sx.lc_name, td->td_wmesg); *ownerp = sx_xholder(sx); if (sx->sx_lock & SX_LOCK_SHARED) db_printf("SLOCK (count %ju)\n", diff --git a/sys/kern/link_elf.c b/sys/kern/link_elf.c index bbebadc4c395..ebd203858b66 100644 --- a/sys/kern/link_elf.c +++ b/sys/kern/link_elf.c @@ -518,9 +518,15 @@ link_elf_init(void* arg) (void)link_elf_link_common_finish(linker_kernel_file); linker_kernel_file->flags |= LINKER_FILE_LINKED; TAILQ_INIT(&set_pcpu_list); + ef->pcpu_start = DPCPU_START; + ef->pcpu_stop = DPCPU_STOP; + ef->pcpu_base = DPCPU_START; #ifdef VIMAGE TAILQ_INIT(&set_vnet_list); vnet_save_init((void *)VNET_START, VNET_STOP - VNET_START); + ef->vnet_start = VNET_START; + ef->vnet_stop = VNET_STOP; + ef->vnet_base = VNET_START; #endif } diff --git a/sys/kern/link_elf_obj.c b/sys/kern/link_elf_obj.c index 151aab96f9be..a3a53a39bfd6 100644 --- a/sys/kern/link_elf_obj.c +++ b/sys/kern/link_elf_obj.c @@ -70,6 +70,7 @@ typedef struct { void *addr; + void *origaddr; /* Used by debuggers. */ Elf_Off size; int flags; /* Section flags. */ int sec; /* Original section number. */ @@ -492,7 +493,8 @@ link_elf_link_preload(linker_class_t cls, const char *filename, case SHT_FINI_ARRAY: if (shdr[i].sh_addr == 0) break; - ef->progtab[pb].addr = (void *)shdr[i].sh_addr; + ef->progtab[pb].addr = ef->progtab[pb].origaddr = + (void *)shdr[i].sh_addr; if (shdr[i].sh_type == SHT_PROGBITS) ef->progtab[pb].name = "<<PROGBITS>>"; #ifdef __amd64__ @@ -1088,6 +1090,8 @@ link_elf_load_file(linker_class_t cls, const char *filename, ef->progtab[pb].name = "<<NOBITS>>"; if (ef->progtab[pb].name != NULL && !strcmp(ef->progtab[pb].name, DPCPU_SETNAME)) { + ef->progtab[pb].origaddr = + (void *)(uintptr_t)mapbase; ef->progtab[pb].addr = dpcpu_alloc(shdr[i].sh_size); if (ef->progtab[pb].addr == NULL) { @@ -1101,6 +1105,8 @@ link_elf_load_file(linker_class_t cls, const char *filename, #ifdef VIMAGE else if (ef->progtab[pb].name != NULL && !strcmp(ef->progtab[pb].name, VNET_SETNAME)) { + ef->progtab[pb].origaddr = + (void *)(uintptr_t)mapbase; ef->progtab[pb].addr = vnet_data_alloc(shdr[i].sh_size); if (ef->progtab[pb].addr == NULL) { diff --git a/sys/kern/subr_power.c b/sys/kern/subr_power.c index eb5bd03f5018..f5a581e42bf3 100644 --- a/sys/kern/subr_power.c +++ b/sys/kern/subr_power.c @@ -39,13 +39,14 @@ #include <sys/systm.h> #include <sys/taskqueue.h> -enum power_stype power_standby_stype = POWER_STYPE_STANDBY; -enum power_stype power_suspend_stype = POWER_STYPE_SUSPEND_TO_IDLE; -enum power_stype power_hibernate_stype = POWER_STYPE_HIBERNATE; +enum power_stype power_standby_stype = POWER_STYPE_UNKNOWN; +enum power_stype power_suspend_stype = POWER_STYPE_UNKNOWN; +enum power_stype power_hibernate_stype = POWER_STYPE_UNKNOWN; static u_int power_pm_type = POWER_PM_TYPE_NONE; static power_pm_fn_t power_pm_fn = NULL; static void *power_pm_arg = NULL; +static bool power_pm_supported[POWER_STYPE_COUNT] = {0}; static struct task power_pm_task; enum power_stype @@ -71,6 +72,26 @@ power_stype_to_name(enum power_stype stype) } static int +sysctl_supported_stypes(SYSCTL_HANDLER_ARGS) +{ + int error; + struct sbuf sb; + enum power_stype stype; + + sbuf_new(&sb, NULL, 32, SBUF_AUTOEXTEND); + for (stype = 0; stype < POWER_STYPE_COUNT; stype++) { + if (power_pm_supported[stype]) + sbuf_printf(&sb, "%s ", power_stype_to_name(stype)); + } + sbuf_trim(&sb); + sbuf_finish(&sb); + error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); + sbuf_delete(&sb); + + return (error); +} + +static int power_sysctl_stype(SYSCTL_HANDLER_ARGS) { char name[10]; @@ -86,7 +107,8 @@ power_sysctl_stype(SYSCTL_HANDLER_ARGS) new_stype = power_name_to_stype(name); if (new_stype == POWER_STYPE_UNKNOWN) return (EINVAL); - /* TODO Check to see if the new stype is supported. */ + if (!power_pm_supported[new_stype]) + return (EOPNOTSUPP); if (new_stype != old_stype) *(enum power_stype *)oidp->oid_arg1 = new_stype; return (0); @@ -95,6 +117,9 @@ power_sysctl_stype(SYSCTL_HANDLER_ARGS) static SYSCTL_NODE(_kern, OID_AUTO, power, CTLFLAG_RW, 0, "Generic power management related sysctls"); +SYSCTL_PROC(_kern_power, OID_AUTO, supported_stype, + CTLTYPE_STRING | CTLFLAG_RD, 0, 0, sysctl_supported_stypes, "A", + "List supported sleep types"); SYSCTL_PROC(_kern_power, OID_AUTO, standby, CTLTYPE_STRING | CTLFLAG_RW, &power_standby_stype, 0, power_sysctl_stype, "A", "Sleep type to enter on standby"); @@ -114,7 +139,8 @@ power_pm_deferred_fn(void *arg, int pending) } int -power_pm_register(u_int pm_type, power_pm_fn_t pm_fn, void *pm_arg) +power_pm_register(u_int pm_type, power_pm_fn_t pm_fn, void *pm_arg, + bool pm_supported[static POWER_STYPE_COUNT]) { int error; @@ -123,6 +149,16 @@ power_pm_register(u_int pm_type, power_pm_fn_t pm_fn, void *pm_arg) power_pm_type = pm_type; power_pm_fn = pm_fn; power_pm_arg = pm_arg; + memcpy(power_pm_supported, pm_supported, + sizeof(power_pm_supported)); + if (power_pm_supported[POWER_STYPE_STANDBY]) + power_standby_stype = POWER_STYPE_STANDBY; + if (power_pm_supported[POWER_STYPE_SUSPEND_TO_MEM]) + power_suspend_stype = POWER_STYPE_SUSPEND_TO_MEM; + else if (power_pm_supported[POWER_STYPE_SUSPEND_TO_IDLE]) + power_suspend_stype = POWER_STYPE_SUSPEND_TO_IDLE; + if (power_pm_supported[POWER_STYPE_HIBERNATE]) + power_hibernate_stype = POWER_STYPE_HIBERNATE; error = 0; TASK_INIT(&power_pm_task, 0, power_pm_deferred_fn, NULL); } else { diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c index 5606b36f772f..7d666da9f88b 100644 --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -729,7 +729,7 @@ kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data) { struct file *fp; struct filedesc *fdp; - int error, tmp, locked; + int error, f_flag, tmp, locked; AUDIT_ARG_FD(fd); AUDIT_ARG_CMD(com); @@ -782,30 +782,36 @@ kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data) goto out; } + f_flag = 0; switch (com) { case FIONCLEX: fdp->fd_ofiles[fd].fde_flags &= ~UF_EXCLOSE; - goto out; + break; case FIOCLEX: fdp->fd_ofiles[fd].fde_flags |= UF_EXCLOSE; - goto out; - case FIONBIO: - if ((tmp = *(int *)data)) - atomic_set_int(&fp->f_flag, FNONBLOCK); - else - atomic_clear_int(&fp->f_flag, FNONBLOCK); - data = (void *)&tmp; break; + case FIONBIO: case FIOASYNC: - if ((tmp = *(int *)data)) - atomic_set_int(&fp->f_flag, FASYNC); - else - atomic_clear_int(&fp->f_flag, FASYNC); - data = (void *)&tmp; + f_flag = com == FIONBIO ? FNONBLOCK : FASYNC; + tmp = *(int *)data; + fsetfl_lock(fp); + if (((fp->f_flag & f_flag) != 0) != (tmp != 0)) { + error = fo_ioctl(fp, com, (void *)&tmp, td->td_ucred, + td); + if (error == 0) { + if (tmp != 0) + atomic_set_int(&fp->f_flag, f_flag); + else + atomic_clear_int(&fp->f_flag, f_flag); + } + } + fsetfl_unlock(fp); + break; + default: + error = fo_ioctl(fp, com, data, td->td_ucred, td); break; } - error = fo_ioctl(fp, com, data, td->td_ucred, td); out: switch (locked) { case LA_XLOCKED: diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c index 30527fdd4fd0..57ebe8dc85f0 100644 --- a/sys/kern/sys_pipe.c +++ b/sys/kern/sys_pipe.c @@ -567,7 +567,7 @@ pipespace_new(struct pipe *cpipe, int size) static int curfail = 0; static struct timeval lastfail; - KASSERT(!mtx_owned(PIPE_MTX(cpipe)), ("pipespace: pipe mutex locked")); + PIPE_LOCK_ASSERT(cpipe, MA_NOTOWNED); KASSERT(!(cpipe->pipe_state & PIPE_DIRECTW), ("pipespace: resize of direct writes not allowed")); retry: @@ -1679,8 +1679,7 @@ static void pipe_free_kmem(struct pipe *cpipe) { - KASSERT(!mtx_owned(PIPE_MTX(cpipe)), - ("pipe_free_kmem: pipe mutex locked")); + PIPE_LOCK_ASSERT(cpipe, MA_NOTOWNED); if (cpipe->pipe_buffer.buffer != NULL) { atomic_subtract_long(&amountpipekva, cpipe->pipe_buffer.size); diff --git a/sys/kern/uipc_mqueue.c b/sys/kern/uipc_mqueue.c index 6f2760635bad..a8aec397b352 100644 --- a/sys/kern/uipc_mqueue.c +++ b/sys/kern/uipc_mqueue.c @@ -867,7 +867,7 @@ mqfs_lookupx(struct vop_cachedlookup_args *ap) pd = VTON(dvp); pn = NULL; mqfs = pd->mn_info; - *vpp = NULLVP; + *vpp = NULL; if (dvp->v_type != VDIR) return (ENOTDIR); @@ -886,7 +886,7 @@ mqfs_lookupx(struct vop_cachedlookup_args *ap) return (EINVAL); pn = pd; *vpp = dvp; - VREF(dvp); + vref(dvp); return (0); } @@ -921,7 +921,7 @@ mqfs_lookupx(struct vop_cachedlookup_args *ap) return (error); } if (*vpp == dvp) { - VREF(dvp); + vref(dvp); *vpp = dvp; mqnode_release(pn); return (0); diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c index 13abb9171234..557e451f9a45 100644 --- a/sys/kern/vfs_cache.c +++ b/sys/kern/vfs_cache.c @@ -3340,12 +3340,10 @@ sys___realpathat(struct thread *td, struct __realpathat_args *uap) uap->flags, UIO_USERSPACE)); } -/* - * Retrieve the full filesystem path that correspond to a vnode from the name - * cache (if available) - */ -int -vn_fullpath(struct vnode *vp, char **retbuf, char **freebuf) +static int +vn_fullpath_up_to_pwd_vnode(struct vnode *vp, + struct vnode *(*const get_pwd_vnode)(const struct pwd *), + char **retbuf, char **freebuf) { struct pwd *pwd; char *buf; @@ -3359,11 +3357,13 @@ vn_fullpath(struct vnode *vp, char **retbuf, char **freebuf) buf = malloc(buflen, M_TEMP, M_WAITOK); vfs_smr_enter(); pwd = pwd_get_smr(); - error = vn_fullpath_any_smr(vp, pwd->pwd_rdir, buf, retbuf, &buflen, 0); + error = vn_fullpath_any_smr(vp, get_pwd_vnode(pwd), buf, retbuf, + &buflen, 0); VFS_SMR_ASSERT_NOT_ENTERED(); if (error < 0) { pwd = pwd_hold(curthread); - error = vn_fullpath_any(vp, pwd->pwd_rdir, buf, retbuf, &buflen); + error = vn_fullpath_any(vp, get_pwd_vnode(pwd), buf, retbuf, + &buflen); pwd_drop(pwd); } if (error == 0) @@ -3373,6 +3373,42 @@ vn_fullpath(struct vnode *vp, char **retbuf, char **freebuf) return (error); } +static inline struct vnode * +get_rdir(const struct pwd *pwd) +{ + return (pwd->pwd_rdir); +} + +/* + * Produce a filesystem path that starts from the current chroot directory and + * corresponds to the passed vnode, using the name cache (if available). + */ +int +vn_fullpath(struct vnode *vp, char **retbuf, char **freebuf) +{ + return (vn_fullpath_up_to_pwd_vnode(vp, get_rdir, retbuf, freebuf)); +} + +static inline struct vnode * +get_jdir(const struct pwd *pwd) +{ + return (pwd->pwd_jdir); +} + +/* + * Produce a filesystem path that starts from the current jail's root directory + * and corresponds to the passed vnode, using the name cache (if available). + * + * This function allows to ignore chroots done inside a jail (or the host), + * allowing path checks to remain unaffected by privileged or unprivileged + * chroot calls. + */ +int +vn_fullpath_jail(struct vnode *vp, char **retbuf, char **freebuf) +{ + return (vn_fullpath_up_to_pwd_vnode(vp, get_jdir, retbuf, freebuf)); +} + /* * This function is similar to vn_fullpath, but it attempts to lookup the * pathname relative to the global root mount point. This is required for the diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index 85f67731e1cc..05d1120030f3 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -708,7 +708,7 @@ vop_stdvptocnp(struct vop_vptocnp_args *ap) if (error) return (error); - VREF(vp); + vref(vp); locked = VOP_ISLOCKED(vp); VOP_UNLOCK(vp); NDINIT_ATVP(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE, @@ -727,10 +727,10 @@ vop_stdvptocnp(struct vop_vptocnp_args *ap) ((*dvp)->v_vflag & VV_ROOT) && ((*dvp)->v_mount->mnt_flag & MNT_UNION)) { *dvp = (*dvp)->v_mount->mnt_vnodecovered; - VREF(mvp); + vref(mvp); VOP_UNLOCK(mvp); vn_close(mvp, FREAD, cred, td); - VREF(*dvp); + vref(*dvp); vn_lock(*dvp, LK_SHARED | LK_RETRY); covered = 1; } diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c index fb3e6a7a2534..39c7da803de1 100644 --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -883,7 +883,7 @@ vfs_lookup_degenerate(struct nameidata *ndp, struct vnode *dp, int wantparent) } if (wantparent) { ndp->ni_dvp = dp; - VREF(dp); + vref(dp); } ndp->ni_vp = dp; cnp->cn_namelen = 0; @@ -1121,7 +1121,7 @@ vfs_lookup(struct nameidata *ndp) cnp->cn_lkflags = LK_SHARED; dp = ndp->ni_startdir; - ndp->ni_startdir = NULLVP; + ndp->ni_startdir = NULL; /* * Leading slashes, if any, are supposed to be skipped by the caller. @@ -1284,7 +1284,7 @@ dirloop: (cnp->cn_flags & NOCROSSMOUNT) != 0)) { ndp->ni_dvp = dp; ndp->ni_vp = dp; - VREF(dp); + vref(dp); goto nextname; } if ((dp->v_vflag & VV_ROOT) == 0) @@ -1295,7 +1295,7 @@ dirloop: } tdp = dp; dp = dp->v_mount->mnt_vnodecovered; - VREF(dp); + vref(dp); vput(tdp); vn_lock(dp, enforce_lkflags(dp->v_mount, cnp->cn_lkflags | @@ -1343,7 +1343,7 @@ unionlookup: (dp->v_mount->mnt_flag & MNT_UNION)) { tdp = dp; dp = dp->v_mount->mnt_vnodecovered; - VREF(dp); + vref(dp); vput(tdp); vn_lock(dp, enforce_lkflags(dp->v_mount, cnp->cn_lkflags | @@ -1615,7 +1615,7 @@ vfs_relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, } /* ASSERT(dvp == ndp->ni_startdir) */ if (refstart) - VREF(dvp); + vref(dvp); if ((cnp->cn_flags & LOCKPARENT) == 0) VOP_UNLOCK(dp); /* @@ -1653,7 +1653,7 @@ vfs_relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, /* ASSERT(dvp == ndp->ni_startdir) */ if (refstart) - VREF(dvp); + vref(dvp); if ((cnp->cn_flags & LOCKLEAF) == 0) VOP_UNLOCK(dp); diff --git a/sys/kern/vfs_mountroot.c b/sys/kern/vfs_mountroot.c index e0d1cec5bd71..dd2364f5bf6a 100644 --- a/sys/kern/vfs_mountroot.c +++ b/sys/kern/vfs_mountroot.c @@ -266,7 +266,7 @@ vfs_mountroot_devfs(struct thread *td, struct mount **mpp) if (vfsp == NULL) return (ENOENT); - mp = vfs_mount_alloc(NULLVP, vfsp, "/dev", td->td_ucred); + mp = vfs_mount_alloc(NULL, vfsp, "/dev", td->td_ucred); error = VFS_MOUNT(mp); KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error)); diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index fe299ecc9c56..73e110c05bc1 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -3352,13 +3352,22 @@ vget_abort(struct vnode *vp, enum vgetstate vs) switch (vs) { case VGET_USECOUNT: vrele(vp); - break; + goto out_ok; case VGET_HOLDCNT: vdrop(vp); + goto out_ok; + case VGET_NONE: break; - default: - __assert_unreachable(); } + + __assert_unreachable(); + + /* + * This is a goto label should the cases above have more in common than + * just the 'return' statement. + */ +out_ok: + return; } int @@ -3644,26 +3653,26 @@ vput_final(struct vnode *vp, enum vput_op func) } break; } - if (error == 0) { - if (func == VUNREF) { - VNASSERT((vp->v_vflag & VV_UNREF) == 0, vp, - ("recursive vunref")); - vp->v_vflag |= VV_UNREF; - } - for (;;) { - error = vinactive(vp); - if (want_unlock) - VOP_UNLOCK(vp); - if (error != ERELOOKUP || !want_unlock) - break; - VOP_LOCK(vp, LK_EXCLUSIVE); - } - if (func == VUNREF) - vp->v_vflag &= ~VV_UNREF; - vdropl(vp); - } else { + if (error != 0) { vdefer_inactive(vp); + return; + } + if (func == VUNREF) { + VNASSERT((vp->v_vflag & VV_UNREF) == 0, vp, + ("recursive vunref")); + vp->v_vflag |= VV_UNREF; } + for (;;) { + error = vinactive(vp); + if (want_unlock) + VOP_UNLOCK(vp); + if (error != ERELOOKUP || !want_unlock) + break; + VOP_LOCK(vp, LK_EXCLUSIVE); + } + if (func == VUNREF) + vp->v_vflag &= ~VV_UNREF; + vdropl(vp); return; out: if (func == VPUT) diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index bf3ed9d515dc..9e1275359715 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -1932,7 +1932,7 @@ restart: if (error != 0) return (error); - if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { + if (nd.ni_vp != NULL || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { NDFREE_PNBUF(&nd); if (nd.ni_vp == nd.ni_dvp) vrele(nd.ni_dvp); @@ -4363,7 +4363,7 @@ unionread: struct vnode *tvp = vp; vp = vp->v_mount->mnt_vnodecovered; - VREF(vp); + vref(vp); fp->f_vnode = vp; foffset = 0; vput(tvp); diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index 3d4567b6ab1e..a53df50c06bd 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -806,9 +806,12 @@ file_v_lock(struct file *fp, short lock_bit, short lock_wait_bit) flagsp = &fp->f_vflags; state = atomic_load_16(flagsp); - if ((state & lock_bit) == 0 && - atomic_cmpset_acq_16(flagsp, state, state | lock_bit)) - return; + for (;;) { + if ((state & lock_bit) != 0) + break; + if (atomic_fcmpset_acq_16(flagsp, &state, state | lock_bit)) + return; + } sleepq_lock(flagsp); state = atomic_load_16(flagsp); @@ -842,9 +845,12 @@ file_v_unlock(struct file *fp, short lock_bit, short lock_wait_bit) flagsp = &fp->f_vflags; state = atomic_load_16(flagsp); - if ((state & lock_wait_bit) == 0 && - atomic_cmpset_rel_16(flagsp, state, state & ~lock_bit)) - return; + for (;;) { + if ((state & lock_wait_bit) != 0) + break; + if (atomic_fcmpset_rel_16(flagsp, &state, state & ~lock_bit)) + return; + } sleepq_lock(flagsp); MPASS((*flagsp & lock_bit) != 0); @@ -864,10 +870,6 @@ foffset_lock(struct file *fp, int flags) FILE_V_FOFFSET_LOCK_WAITING); } - /* - * According to McKusick the vn lock was protecting f_offset here. - * It is now protected by the FOFFSET_LOCKED flag. - */ return (atomic_load_long(&fp->f_offset)); } |