diff options
author | Ka Ho Ng <khng@FreeBSD.org> | 2021-08-04 19:20:37 +0000 |
---|---|---|
committer | Ka Ho Ng <khng@FreeBSD.org> | 2021-08-05 14:52:26 +0000 |
commit | de2e152959668756333db8a502a3d17a19dac393 (patch) | |
tree | ff67c9360b90c0f33b117e29a8c296ddd14bae36 | |
parent | 98215005b747fef67f44794ca64abd473b98bade (diff) | |
download | src-de2e15295966.tar.gz src-de2e15295966.zip |
Add vnode_pager_purge_range(9) KPI
This KPI is created in addition to the existing vnode_pager_setsize(9)
KPI. The KPI is intended for file systems that are able to turn a range
of file into sparse range, also known as hole-punching.
Sponsored by: The FreeBSD Foundation
Reviewed by: kib
Differential Revision: https://reviews.freebsd.org/D27194
-rw-r--r-- | share/man/man9/Makefile | 1 | ||||
-rw-r--r-- | share/man/man9/vnode_pager_purge_range.9 | 85 | ||||
-rw-r--r-- | sys/vm/vm_extern.h | 1 | ||||
-rw-r--r-- | sys/vm/vnode_pager.c | 140 |
4 files changed, 194 insertions, 33 deletions
diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile index a335f53b27f3..d0012301d889 100644 --- a/share/man/man9/Makefile +++ b/share/man/man9/Makefile @@ -409,6 +409,7 @@ MAN= accept_filter.9 \ vnet.9 \ vnode.9 \ vnode_pager_setsize.9 \ + vnode_pager_purge_range.9 \ VOP_ACCESS.9 \ VOP_ACLCHECK.9 \ VOP_ADVISE.9 \ diff --git a/share/man/man9/vnode_pager_purge_range.9 b/share/man/man9/vnode_pager_purge_range.9 new file mode 100644 index 000000000000..16a240c2a34b --- /dev/null +++ b/share/man/man9/vnode_pager_purge_range.9 @@ -0,0 +1,85 @@ +.\" +.\" SPDX-License-Identifier: BSD-2-Clause-FreeBSD +.\" +.\" Copyright (c) 2021 The FreeBSD Foundation +.\" +.\" This manual page was written by Ka Ho Ng under sponsorship from +.\" the FreeBSD Foundation. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd August 2, 2021 +.Dt VNODE_PAGER_PURGE_RANGE 9 +.Os +.Sh NAME +.Nm vnode_pager_purge_range +.Nd "invalidate the cached contents within the given byte range" +.Sh SYNOPSIS +.In sys/param.h +.In vm/vm.h +.In vm/vm_extern.h +.Ft void +.Fo vnode_pager_purge_range +.Fa "struct vnode *vp" +.Fa "vm_ooffset_t start" +.Fa "vm_ooffset_t end" +.Fc +.Sh DESCRIPTION +.Nm +invalidates the cached contents within the given byte range from the +specified vnode +.Fa vp . +The range to be purged is +.Eo [ +.Fa start , end +.Ec ) . +If the +.Fa end +parameter is the value zero, the affected range starts from +.Fa start +continues to the end of the object. +Pages within the specified range will be removed from the object's queue. +If +.Fa start +or +.Fa end +is not aligned to a page boundary, the invalidated part of the page is zeroed. +This function only cleans the resident pages in the affected region, it is up to +the callers to ensure reading the backing store gets back zeroes. +.Pp +In case the vnode +.Fa vp +does not have a VM object allocated, the effect of calling this function is a +no-op. +.Sh LOCKS +The vnode must be locked on entry and will still be locked on exit. +.Sh SEE ALSO +.Xr vnode 9 +.Sh HISTORY +The +.Nm +manual page first appeared in +.Fx 14 . +.Sh AUTHORS +This +manual page was written by +.An Ka Ho Ng Aq Mt khng@FreeBSD.org . diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h index acdb361d3262..ed365bd41689 100644 --- a/sys/vm/vm_extern.h +++ b/sys/vm/vm_extern.h @@ -120,6 +120,7 @@ void vmspace_free(struct vmspace *); void vmspace_exitfree(struct proc *); void vmspace_switch_aio(struct vmspace *); void vnode_pager_setsize(struct vnode *, vm_ooffset_t); +void vnode_pager_purge_range(struct vnode *, vm_ooffset_t, vm_ooffset_t); int vslock(void *, size_t); void vsunlock(void *, size_t); struct sf_buf *vm_imgact_map_page(vm_object_t object, vm_ooffset_t offset); diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c index d167fcc555fb..4330c17c2033 100644 --- a/sys/vm/vnode_pager.c +++ b/sys/vm/vnode_pager.c @@ -428,6 +428,53 @@ vnode_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, } /* + * Internal routine clearing partial-page content + */ +static void +vnode_pager_subpage_purge(struct vm_page *m, int base, int end) +{ + int size; + + KASSERT(end > base && end <= PAGE_SIZE, + ("%s: start %d end %d", __func__, base, end)); + size = end - base; + + /* + * Clear out partial-page garbage in case + * the page has been mapped. + */ + pmap_zero_page_area(m, base, size); + + /* + * Update the valid bits to reflect the blocks + * that have been zeroed. Some of these valid + * bits may have already been set. + */ + vm_page_set_valid_range(m, base, size); + + /* + * Round up "base" to the next block boundary so + * that the dirty bit for a partially zeroed + * block is not cleared. + */ + base = roundup2(base, DEV_BSIZE); + end = rounddown2(end, DEV_BSIZE); + + if (end > base) { + /* + * Clear out partial-page dirty bits. + * + * note that we do not clear out the + * valid bits. This would prevent + * bogus_page replacement from working + * properly. + */ + vm_page_clear_dirty(m, base, end - base); + } + +} + +/* * Lets the VM system know about a change in size for a file. * We adjust our own internal size and flush any cached pages in * the associated object that are affected by the size change. @@ -489,39 +536,9 @@ vnode_pager_setsize(struct vnode *vp, vm_ooffset_t nsize) m = vm_page_grab(object, OFF_TO_IDX(nsize), VM_ALLOC_NOCREAT); if (m == NULL) goto out; - if (!vm_page_none_valid(m)) { - int base = (int)nsize & PAGE_MASK; - int size = PAGE_SIZE - base; - - /* - * Clear out partial-page garbage in case - * the page has been mapped. - */ - pmap_zero_page_area(m, base, size); - - /* - * Update the valid bits to reflect the blocks that - * have been zeroed. Some of these valid bits may - * have already been set. - */ - vm_page_set_valid_range(m, base, size); - - /* - * Round "base" to the next block boundary so that the - * dirty bit for a partially zeroed block is not - * cleared. - */ - base = roundup2(base, DEV_BSIZE); - - /* - * Clear out partial-page dirty bits. - * - * note that we do not clear out the valid - * bits. This would prevent bogus_page - * replacement from working properly. - */ - vm_page_clear_dirty(m, base, PAGE_SIZE - base); - } + if (!vm_page_none_valid(m)) + vnode_pager_subpage_purge(m, (int)nsize & PAGE_MASK, + PAGE_SIZE); vm_page_xunbusy(m); } out: @@ -535,6 +552,63 @@ out: } /* + * Lets the VM system know about the purged range for a file. We toss away any + * cached pages in the associated object that are affected by the purge + * operation. Partial-page area not aligned to page boundaries will be zeroed + * and the dirty blocks in DEV_BSIZE unit within a page will not be flushed. + */ +void +vnode_pager_purge_range(struct vnode *vp, vm_ooffset_t start, vm_ooffset_t end) +{ + struct vm_page *m; + struct vm_object *object; + vm_pindex_t pi, pistart, piend; + bool same_page; + int base, pend; + + ASSERT_VOP_LOCKED(vp, "vnode_pager_purge_range"); + + object = vp->v_object; + pi = start + PAGE_MASK < start ? OBJ_MAX_SIZE : + OFF_TO_IDX(start + PAGE_MASK); + pistart = OFF_TO_IDX(start); + piend = end == 0 ? OBJ_MAX_SIZE : OFF_TO_IDX(end); + same_page = pistart == piend; + if ((end != 0 && end <= start) || object == NULL) + return; + + VM_OBJECT_WLOCK(object); + + if (pi < piend) + vm_object_page_remove(object, pi, piend, 0); + + if ((start & PAGE_MASK) != 0) { + base = (int)start & PAGE_MASK; + pend = same_page ? (int)end & PAGE_MASK : PAGE_SIZE; + m = vm_page_grab(object, pistart, VM_ALLOC_NOCREAT); + if (m != NULL) { + if (!vm_page_none_valid(m)) + vnode_pager_subpage_purge(m, base, pend); + vm_page_xunbusy(m); + } + if (same_page) + goto out; + } + if ((end & PAGE_MASK) != 0) { + base = same_page ? (int)start & PAGE_MASK : 0 ; + pend = (int)end & PAGE_MASK; + m = vm_page_grab(object, piend, VM_ALLOC_NOCREAT); + if (m != NULL) { + if (!vm_page_none_valid(m)) + vnode_pager_subpage_purge(m, base, pend); + vm_page_xunbusy(m); + } + } +out: + VM_OBJECT_WUNLOCK(object); +} + +/* * calculate the linear (byte) disk address of specified virtual * file address */ |