diff options
Diffstat (limited to 'sys/ufs/ffs/ffs_alloc.c')
-rw-r--r-- | sys/ufs/ffs/ffs_alloc.c | 112 |
1 files changed, 85 insertions, 27 deletions
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c index 1e1b4f1350a1..3101f618b059 100644 --- a/sys/ufs/ffs/ffs_alloc.c +++ b/sys/ufs/ffs/ffs_alloc.c @@ -110,8 +110,6 @@ static ufs2_daddr_t static void ffs_blkfree_cg(struct ufsmount *, struct fs *, struct vnode *, ufs2_daddr_t, long, ino_t, struct workhead *); -static void ffs_blkfree_trim_completed(struct buf *); -static void ffs_blkfree_trim_task(void *ctx, int pending __unused); #ifdef INVARIANTS static int ffs_checkblk(struct inode *, ufs2_daddr_t, long); #endif @@ -395,8 +393,23 @@ retry: if (bno > 0) { bp->b_blkno = fsbtodb(fs, bno); if (!DOINGSOFTDEP(vp)) + /* + * The usual case is that a smaller fragment that + * was just allocated has been replaced with a bigger + * fragment or a full-size block. If it is marked as + * B_DELWRI, the current contents have not been written + * to disk. It is possible that the block was written + * earlier, but very uncommon. If the block has never + * been written, there is no need to send a BIO_DELETE + * for it when it is freed. The gain from avoiding the + * TRIMs for the common case of unwritten blocks far + * exceeds the cost of the write amplification for the + * uncommon case of failing to send a TRIM for a block + * that had been written. + */ ffs_blkfree(ump, fs, ump->um_devvp, bprev, (long)osize, - ip->i_number, vp->v_type, NULL); + ip->i_number, vp->v_type, NULL, + (bp->b_flags & B_DELWRI) != 0 ? NOTRIM : SINGLETON); delta = btodb(nsize - osize); DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + delta); if (flags & IO_EXT) @@ -521,7 +534,7 @@ ffs_reallocblks_ufs1(ap) struct fs *fs; struct inode *ip; struct vnode *vp; - struct buf *sbp, *ebp; + struct buf *sbp, *ebp, *bp; ufs1_daddr_t *bap, *sbap, *ebap; struct cluster_save *buflist; struct ufsmount *ump; @@ -730,14 +743,29 @@ ffs_reallocblks_ufs1(ap) printf("\n\tnew:"); #endif for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { + bp = buflist->bs_children[i]; if (!DOINGSOFTDEP(vp)) + /* + * The usual case is that a set of N-contiguous blocks + * that was just allocated has been replaced with a + * set of N+1-contiguous blocks. If they are marked as + * B_DELWRI, the current contents have not been written + * to disk. It is possible that the blocks were written + * earlier, but very uncommon. If the blocks have never + * been written, there is no need to send a BIO_DELETE + * for them when they are freed. The gain from avoiding + * the TRIMs for the common case of unwritten blocks + * far exceeds the cost of the write amplification for + * the uncommon case of failing to send a TRIM for the + * blocks that had been written. + */ ffs_blkfree(ump, fs, ump->um_devvp, - dbtofsb(fs, buflist->bs_children[i]->b_blkno), - fs->fs_bsize, ip->i_number, vp->v_type, NULL); - buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); + dbtofsb(fs, bp->b_blkno), + fs->fs_bsize, ip->i_number, vp->v_type, NULL, + (bp->b_flags & B_DELWRI) != 0 ? NOTRIM : SINGLETON); + bp->b_blkno = fsbtodb(fs, blkno); #ifdef INVARIANTS - if (!ffs_checkblk(ip, - dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) + if (!ffs_checkblk(ip, dbtofsb(fs, bp->b_blkno), fs->fs_bsize)) panic("ffs_reallocblks: unallocated block 3"); #endif #ifdef DEBUG @@ -771,7 +799,7 @@ ffs_reallocblks_ufs2(ap) struct fs *fs; struct inode *ip; struct vnode *vp; - struct buf *sbp, *ebp; + struct buf *sbp, *ebp, *bp; ufs2_daddr_t *bap, *sbap, *ebap; struct cluster_save *buflist; struct ufsmount *ump; @@ -978,14 +1006,29 @@ ffs_reallocblks_ufs2(ap) printf("\n\tnew:"); #endif for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { + bp = buflist->bs_children[i]; if (!DOINGSOFTDEP(vp)) + /* + * The usual case is that a set of N-contiguous blocks + * that was just allocated has been replaced with a + * set of N+1-contiguous blocks. If they are marked as + * B_DELWRI, the current contents have not been written + * to disk. It is possible that the blocks were written + * earlier, but very uncommon. If the blocks have never + * been written, there is no need to send a BIO_DELETE + * for them when they are freed. The gain from avoiding + * the TRIMs for the common case of unwritten blocks + * far exceeds the cost of the write amplification for + * the uncommon case of failing to send a TRIM for the + * blocks that had been written. + */ ffs_blkfree(ump, fs, ump->um_devvp, - dbtofsb(fs, buflist->bs_children[i]->b_blkno), - fs->fs_bsize, ip->i_number, vp->v_type, NULL); - buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); + dbtofsb(fs, bp->b_blkno), + fs->fs_bsize, ip->i_number, vp->v_type, NULL, + (bp->b_flags & B_DELWRI) != 0 ? NOTRIM : SINGLETON); + bp->b_blkno = fsbtodb(fs, blkno); #ifdef INVARIANTS - if (!ffs_checkblk(ip, - dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) + if (!ffs_checkblk(ip, dbtofsb(fs, bp->b_blkno), fs->fs_bsize)) panic("ffs_reallocblks: unallocated block 3"); #endif #ifdef DEBUG @@ -1823,8 +1866,7 @@ gotit: /* XXX Fixme. */ UFS_UNLOCK(ump); if (DOINGSOFTDEP(ITOV(ip))) - softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno, - size, 0); + softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno, size, 0); UFS_LOCK(ump); return (blkno); } @@ -2254,6 +2296,17 @@ ffs_blkfree_cg(ump, fs, devvp, bno, size, inum, dephd) bdwrite(bp); } +/* + * Structures and routines associated with trim management. + */ +MALLOC_DEFINE(M_TRIM, "ufs_trim", "UFS trim structures"); + +#define TRIMLIST_HASH(ump, inum) \ + (&(ump)->um_trimhash[(inum) & (ump)->um_trimlisthashsize]) + +static void ffs_blkfree_trim_completed(struct buf *); +static void ffs_blkfree_trim_task(void *ctx, int pending __unused); + struct ffs_blkfree_trim_params { struct task task; struct ufsmount *ump; @@ -2277,7 +2330,7 @@ ffs_blkfree_trim_task(ctx, pending) tp->inum, tp->pdephd); vn_finished_secondary_write(UFSTOVFS(tp->ump)); atomic_add_int(&tp->ump->um_trim_inflight, -1); - free(tp, M_TEMP); + free(tp, M_TRIM); } static void @@ -2287,13 +2340,13 @@ ffs_blkfree_trim_completed(bp) struct ffs_blkfree_trim_params *tp; tp = bp->b_fsprivate1; - free(bp, M_TEMP); + free(bp, M_TRIM); TASK_INIT(&tp->task, 0, ffs_blkfree_trim_task, tp); taskqueue_enqueue(tp->ump->um_trim_tq, &tp->task); } void -ffs_blkfree(ump, fs, devvp, bno, size, inum, vtype, dephd) +ffs_blkfree(ump, fs, devvp, bno, size, inum, vtype, dephd, trimtype) struct ufsmount *ump; struct fs *fs; struct vnode *devvp; @@ -2302,6 +2355,7 @@ ffs_blkfree(ump, fs, devvp, bno, size, inum, vtype, dephd) ino_t inum; enum vtype vtype; struct workhead *dephd; + int trimtype; { struct mount *mp; struct buf *bp; @@ -2319,10 +2373,11 @@ ffs_blkfree(ump, fs, devvp, bno, size, inum, vtype, dephd) return; } /* - * Nothing to delay if TRIM is disabled, or the operation is - * performed on the snapshot. + * Nothing to delay if TRIM is not required for this block or TRIM + * is disabled or the operation is performed on a snapshot. */ - if (((ump->um_flags) & UM_CANDELETE) == 0 || devvp->v_type == VREG) { + if (trimtype == NOTRIM || ((ump->um_flags & UM_CANDELETE) == 0) || + devvp->v_type == VREG) { ffs_blkfree_cg(ump, fs, devvp, bno, size, inum, dephd); return; } @@ -2334,7 +2389,7 @@ ffs_blkfree(ump, fs, devvp, bno, size, inum, vtype, dephd) * and write some new data into it. */ atomic_add_int(&ump->um_trim_inflight, 1); - tp = malloc(sizeof(struct ffs_blkfree_trim_params), M_TEMP, M_WAITOK); + tp = malloc(sizeof(struct ffs_blkfree_trim_params), M_TRIM, M_WAITOK); tp->ump = ump; tp->devvp = devvp; tp->bno = bno; @@ -2347,7 +2402,7 @@ ffs_blkfree(ump, fs, devvp, bno, size, inum, vtype, dephd) } else tp->pdephd = NULL; - bp = malloc(sizeof(*bp), M_TEMP, M_WAITOK | M_ZERO); + bp = malloc(sizeof(*bp), M_TRIM, M_WAITOK | M_ZERO); bp->b_iocmd = BIO_DELETE; bp->b_iooffset = dbtob(fsbtodb(fs, bno)); bp->b_iodone = ffs_blkfree_trim_completed; @@ -2824,7 +2879,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) long blkcnt, blksize; struct file *fp, *vfp; cap_rights_t rights; - int filetype, error; + int filetype, trimtype, error; static struct fileops *origops, bufferedops; if (req->newlen > sizeof cmd) @@ -2956,14 +3011,17 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) blkno = cmd.value; blkcnt = cmd.size; blksize = fs->fs_frag - (blkno % fs->fs_frag); + trimtype = (blksize < blkcnt) ? STARTFREE : SINGLETON; while (blkcnt > 0) { if (blksize > blkcnt) blksize = blkcnt; ffs_blkfree(ump, fs, ump->um_devvp, blkno, - blksize * fs->fs_fsize, UFS_ROOTINO, VDIR, NULL); + blksize * fs->fs_fsize, UFS_ROOTINO, + VDIR, NULL, trimtype); blkno += blksize; blkcnt -= blksize; blksize = fs->fs_frag; + trimtype = (blksize < blkcnt) ? CONTINUEFREE : ENDFREE; } break; |