aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKa Ho Ng <khng@FreeBSD.org>2021-08-04 19:20:37 +0000
committerKa Ho Ng <khng@FreeBSD.org>2021-08-05 14:52:26 +0000
commitde2e152959668756333db8a502a3d17a19dac393 (patch)
treeff67c9360b90c0f33b117e29a8c296ddd14bae36
parent98215005b747fef67f44794ca64abd473b98bade (diff)
downloadsrc-de2e15295966.tar.gz
src-de2e15295966.zip
Add vnode_pager_purge_range(9) KPI
This KPI is created in addition to the existing vnode_pager_setsize(9) KPI. The KPI is intended for file systems that are able to turn a range of file into sparse range, also known as hole-punching. Sponsored by: The FreeBSD Foundation Reviewed by: kib Differential Revision: https://reviews.freebsd.org/D27194
-rw-r--r--share/man/man9/Makefile1
-rw-r--r--share/man/man9/vnode_pager_purge_range.985
-rw-r--r--sys/vm/vm_extern.h1
-rw-r--r--sys/vm/vnode_pager.c140
4 files changed, 194 insertions, 33 deletions
diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile
index a335f53b27f3..d0012301d889 100644
--- a/share/man/man9/Makefile
+++ b/share/man/man9/Makefile
@@ -409,6 +409,7 @@ MAN= accept_filter.9 \
vnet.9 \
vnode.9 \
vnode_pager_setsize.9 \
+ vnode_pager_purge_range.9 \
VOP_ACCESS.9 \
VOP_ACLCHECK.9 \
VOP_ADVISE.9 \
diff --git a/share/man/man9/vnode_pager_purge_range.9 b/share/man/man9/vnode_pager_purge_range.9
new file mode 100644
index 000000000000..16a240c2a34b
--- /dev/null
+++ b/share/man/man9/vnode_pager_purge_range.9
@@ -0,0 +1,85 @@
+.\"
+.\" SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+.\"
+.\" Copyright (c) 2021 The FreeBSD Foundation
+.\"
+.\" This manual page was written by Ka Ho Ng under sponsorship from
+.\" the FreeBSD Foundation.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.Dd August 2, 2021
+.Dt VNODE_PAGER_PURGE_RANGE 9
+.Os
+.Sh NAME
+.Nm vnode_pager_purge_range
+.Nd "invalidate the cached contents within the given byte range"
+.Sh SYNOPSIS
+.In sys/param.h
+.In vm/vm.h
+.In vm/vm_extern.h
+.Ft void
+.Fo vnode_pager_purge_range
+.Fa "struct vnode *vp"
+.Fa "vm_ooffset_t start"
+.Fa "vm_ooffset_t end"
+.Fc
+.Sh DESCRIPTION
+.Nm
+invalidates the cached contents within the given byte range from the
+specified vnode
+.Fa vp .
+The range to be purged is
+.Eo [
+.Fa start , end
+.Ec ) .
+If the
+.Fa end
+parameter is the value zero, the affected range starts from
+.Fa start
+continues to the end of the object.
+Pages within the specified range will be removed from the object's queue.
+If
+.Fa start
+or
+.Fa end
+is not aligned to a page boundary, the invalidated part of the page is zeroed.
+This function only cleans the resident pages in the affected region, it is up to
+the callers to ensure reading the backing store gets back zeroes.
+.Pp
+In case the vnode
+.Fa vp
+does not have a VM object allocated, the effect of calling this function is a
+no-op.
+.Sh LOCKS
+The vnode must be locked on entry and will still be locked on exit.
+.Sh SEE ALSO
+.Xr vnode 9
+.Sh HISTORY
+The
+.Nm
+manual page first appeared in
+.Fx 14 .
+.Sh AUTHORS
+This
+manual page was written by
+.An Ka Ho Ng Aq Mt khng@FreeBSD.org .
diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h
index acdb361d3262..ed365bd41689 100644
--- a/sys/vm/vm_extern.h
+++ b/sys/vm/vm_extern.h
@@ -120,6 +120,7 @@ void vmspace_free(struct vmspace *);
void vmspace_exitfree(struct proc *);
void vmspace_switch_aio(struct vmspace *);
void vnode_pager_setsize(struct vnode *, vm_ooffset_t);
+void vnode_pager_purge_range(struct vnode *, vm_ooffset_t, vm_ooffset_t);
int vslock(void *, size_t);
void vsunlock(void *, size_t);
struct sf_buf *vm_imgact_map_page(vm_object_t object, vm_ooffset_t offset);
diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c
index d167fcc555fb..4330c17c2033 100644
--- a/sys/vm/vnode_pager.c
+++ b/sys/vm/vnode_pager.c
@@ -428,6 +428,53 @@ vnode_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before,
}
/*
+ * Internal routine clearing partial-page content
+ */
+static void
+vnode_pager_subpage_purge(struct vm_page *m, int base, int end)
+{
+ int size;
+
+ KASSERT(end > base && end <= PAGE_SIZE,
+ ("%s: start %d end %d", __func__, base, end));
+ size = end - base;
+
+ /*
+ * Clear out partial-page garbage in case
+ * the page has been mapped.
+ */
+ pmap_zero_page_area(m, base, size);
+
+ /*
+ * Update the valid bits to reflect the blocks
+ * that have been zeroed. Some of these valid
+ * bits may have already been set.
+ */
+ vm_page_set_valid_range(m, base, size);
+
+ /*
+ * Round up "base" to the next block boundary so
+ * that the dirty bit for a partially zeroed
+ * block is not cleared.
+ */
+ base = roundup2(base, DEV_BSIZE);
+ end = rounddown2(end, DEV_BSIZE);
+
+ if (end > base) {
+ /*
+ * Clear out partial-page dirty bits.
+ *
+ * note that we do not clear out the
+ * valid bits. This would prevent
+ * bogus_page replacement from working
+ * properly.
+ */
+ vm_page_clear_dirty(m, base, end - base);
+ }
+
+}
+
+/*
* Lets the VM system know about a change in size for a file.
* We adjust our own internal size and flush any cached pages in
* the associated object that are affected by the size change.
@@ -489,39 +536,9 @@ vnode_pager_setsize(struct vnode *vp, vm_ooffset_t nsize)
m = vm_page_grab(object, OFF_TO_IDX(nsize), VM_ALLOC_NOCREAT);
if (m == NULL)
goto out;
- if (!vm_page_none_valid(m)) {
- int base = (int)nsize & PAGE_MASK;
- int size = PAGE_SIZE - base;
-
- /*
- * Clear out partial-page garbage in case
- * the page has been mapped.
- */
- pmap_zero_page_area(m, base, size);
-
- /*
- * Update the valid bits to reflect the blocks that
- * have been zeroed. Some of these valid bits may
- * have already been set.
- */
- vm_page_set_valid_range(m, base, size);
-
- /*
- * Round "base" to the next block boundary so that the
- * dirty bit for a partially zeroed block is not
- * cleared.
- */
- base = roundup2(base, DEV_BSIZE);
-
- /*
- * Clear out partial-page dirty bits.
- *
- * note that we do not clear out the valid
- * bits. This would prevent bogus_page
- * replacement from working properly.
- */
- vm_page_clear_dirty(m, base, PAGE_SIZE - base);
- }
+ if (!vm_page_none_valid(m))
+ vnode_pager_subpage_purge(m, (int)nsize & PAGE_MASK,
+ PAGE_SIZE);
vm_page_xunbusy(m);
}
out:
@@ -535,6 +552,63 @@ out:
}
/*
+ * Lets the VM system know about the purged range for a file. We toss away any
+ * cached pages in the associated object that are affected by the purge
+ * operation. Partial-page area not aligned to page boundaries will be zeroed
+ * and the dirty blocks in DEV_BSIZE unit within a page will not be flushed.
+ */
+void
+vnode_pager_purge_range(struct vnode *vp, vm_ooffset_t start, vm_ooffset_t end)
+{
+ struct vm_page *m;
+ struct vm_object *object;
+ vm_pindex_t pi, pistart, piend;
+ bool same_page;
+ int base, pend;
+
+ ASSERT_VOP_LOCKED(vp, "vnode_pager_purge_range");
+
+ object = vp->v_object;
+ pi = start + PAGE_MASK < start ? OBJ_MAX_SIZE :
+ OFF_TO_IDX(start + PAGE_MASK);
+ pistart = OFF_TO_IDX(start);
+ piend = end == 0 ? OBJ_MAX_SIZE : OFF_TO_IDX(end);
+ same_page = pistart == piend;
+ if ((end != 0 && end <= start) || object == NULL)
+ return;
+
+ VM_OBJECT_WLOCK(object);
+
+ if (pi < piend)
+ vm_object_page_remove(object, pi, piend, 0);
+
+ if ((start & PAGE_MASK) != 0) {
+ base = (int)start & PAGE_MASK;
+ pend = same_page ? (int)end & PAGE_MASK : PAGE_SIZE;
+ m = vm_page_grab(object, pistart, VM_ALLOC_NOCREAT);
+ if (m != NULL) {
+ if (!vm_page_none_valid(m))
+ vnode_pager_subpage_purge(m, base, pend);
+ vm_page_xunbusy(m);
+ }
+ if (same_page)
+ goto out;
+ }
+ if ((end & PAGE_MASK) != 0) {
+ base = same_page ? (int)start & PAGE_MASK : 0 ;
+ pend = (int)end & PAGE_MASK;
+ m = vm_page_grab(object, piend, VM_ALLOC_NOCREAT);
+ if (m != NULL) {
+ if (!vm_page_none_valid(m))
+ vnode_pager_subpage_purge(m, base, pend);
+ vm_page_xunbusy(m);
+ }
+ }
+out:
+ VM_OBJECT_WUNLOCK(object);
+}
+
+/*
* calculate the linear (byte) disk address of specified virtual
* file address
*/