aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Makefile1
-rw-r--r--lib/libstats/Makefile14
-rw-r--r--share/man/man3/Makefile23
-rw-r--r--share/man/man3/arb.314
-rw-r--r--share/man/man3/stats.3962
-rw-r--r--share/mk/bsd.libnames.mk1
-rw-r--r--share/mk/src.libnames.mk2
-rw-r--r--share/mk/src.opts.mk1
-rw-r--r--sys/amd64/conf/NOTES2
-rw-r--r--sys/conf/files1
-rw-r--r--sys/conf/options1
-rw-r--r--sys/kern/subr_stats.c3912
-rw-r--r--sys/sys/arb.h3
-rw-r--r--sys/sys/stats.h1252
-rw-r--r--tools/build/options/WITHOUT_STATS4
-rw-r--r--tools/build/options/WITH_STATS4
16 files changed, 6194 insertions, 3 deletions
diff --git a/lib/Makefile b/lib/Makefile
index d5ee01327e8c..17ae8d56d0d4 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -152,6 +152,7 @@ SUBDIR.${MK_GSSAPI}+= libgssapi librpcsec_gss
SUBDIR.${MK_ICONV}+= libiconv_modules
SUBDIR.${MK_KERBEROS_SUPPORT}+= libcom_err
SUBDIR.${MK_LDNS}+= libldns
+SUBDIR.${MK_STATS}+= libstats
# The libraries under libclang_rt can only be built by clang, and only make
# sense to build when clang is enabled at all. Furthermore, they can only be
diff --git a/lib/libstats/Makefile b/lib/libstats/Makefile
new file mode 100644
index 000000000000..da7ec10c2b0b
--- /dev/null
+++ b/lib/libstats/Makefile
@@ -0,0 +1,14 @@
+# $FreeBSD$
+
+LIB= stats
+SHLIBDIR?= /lib
+SHLIB_MAJOR= 0
+SRCS= subr_stats.c
+
+# To debug, comment WITHOUT_ASSERT_DEBUG= and uncomment CFLAGS:=
+WITHOUT_ASSERT_DEBUG=
+#CFLAGS:=${CFLAGS:C/-O[0-9]/-O0 -g3/} -DDIAGNOSTIC
+
+.PATH: ${.CURDIR}/../../sys/kern
+
+.include <bsd.lib.mk>
diff --git a/share/man/man3/Makefile b/share/man/man3/Makefile
index d114496cd734..02243e3b27ec 100644
--- a/share/man/man3/Makefile
+++ b/share/man/man3/Makefile
@@ -27,6 +27,7 @@ MAN= arb.3 \
queue.3 \
sigevent.3 \
siginfo.3 \
+ stats.3 \
stdarg.3 \
sysexits.3 \
tgmath.3 \
@@ -67,6 +68,7 @@ MLINKS= arb.3 ARB8_ENTRY.3 \
arb.3 ARB_PREV.3 \
arb.3 ARB_REINSERT.3 \
arb.3 ARB_REMOVE.3 \
+ arb.3 ARB_RESET_TREE.3 \
arb.3 ARB_RIGHT.3 \
arb.3 ARB_RIGHTIDX.3 \
arb.3 ARB_ROOT.3
@@ -269,6 +271,27 @@ MLINKS+= queue.3 LIST_CLASS_ENTRY.3 \
queue.3 TAILQ_PREV.3 \
queue.3 TAILQ_REMOVE.3 \
queue.3 TAILQ_SWAP.3
+MLINKS+= stats.3 stats_tpl_alloc.3 \
+ stats.3 stats_tpl_fetch_allocid.3 \
+ stats.3 stats_tpl_fetch.3 \
+ stats.3 stats_tpl_id2name.3 \
+ stats.3 stats_tpl_sample_rates.3 \
+ stats.3 stats_tpl_sample_rollthedice.3 \
+ stats.3 STATS_VSS_SUM.3 \
+ stats.3 STATS_VSS_MAX.3 \
+ stats.3 STATS_VSS_MIN.3 \
+ stats.3 STATS_VSS_CRHIST32_LIN.3 \
+ stats.3 STATS_VSS_CRHIST64_LIN.3 \
+ stats.3 stats_tpl_add_voistats.3 \
+ stats.3 stats_blob_alloc.3 \
+ stats.3 stats_blob_init.3 \
+ stats.3 stats_blob_clone.3 \
+ stats.3 stats_blob_destroy.3 \
+ stats.3 stats_voistat_fetch_dptr.3 \
+ stats.3 stats_blob_snapshot.3 \
+ stats.3 stats_blob_tostr.3 \
+ stats.3 stats_voistatdata_tostr.3 \
+ stats.3 stats_blob_visit.3
MLINKS+= stdarg.3 va_arg.3 \
stdarg.3 va_copy.3 \
stdarg.3 va_end.3 \
diff --git a/share/man/man3/arb.3 b/share/man/man3/arb.3
index ba81532b151a..4f144e9f80d9 100644
--- a/share/man/man3/arb.3
+++ b/share/man/man3/arb.3
@@ -30,7 +30,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd September 28, 2019
+.Dd October 2, 2019
.Dt ARB 3
.Os
.Sh NAME
@@ -94,7 +94,8 @@
.Nm ARB_INIT ,
.Nm ARB_INSERT ,
.Nm ARB_REMOVE ,
-.Nm ARB_REINSERT
+.Nm ARB_REINSERT ,
+.Nm ARB_RESET_TREE
.Nd "array-based red-black trees"
.Sh SYNOPSIS
.In sys/arb.h
@@ -179,6 +180,8 @@
.Fn ARB_REMOVE NAME "ARB_HEAD *head" "struct TYPE *elm"
.Ft "struct TYPE *"
.Fn ARB_REINSERT NAME "ARB_HEAD *head" "struct TYPE *elm"
+.Ft void
+.Fn ARB_RESET_TREE "ARB_HEAD *head" NAME "int<8|16|32>_t maxnodes"
.Sh DESCRIPTION
These macros define data structures for and array-based red-black trees.
They use a single, continuous chunk of memory, and are useful
@@ -475,7 +478,7 @@ returns the pointer to the removed element otherwise they return
to indicate an error.
.Pp
The
-.Fn RB_REINSERT
+.Fn ARB_REINSERT
macro updates the position of the element
.Fa elm
in the tree.
@@ -485,6 +488,11 @@ is modified in a way that affects comparison, such as by modifying
a node's key.
This is a lower overhead alternative to removing the element
and reinserting it again.
+.Pp
+The
+.Fn ARB_RESET_TREE
+macro discards the tree topology.
+It does not modify embedded object values or the free list.
.Sh SEE ALSO
.Xr queue 3 ,
.Xr tree 3
diff --git a/share/man/man3/stats.3 b/share/man/man3/stats.3
new file mode 100644
index 000000000000..d1f0c3a1d240
--- /dev/null
+++ b/share/man/man3/stats.3
@@ -0,0 +1,962 @@
+.\"
+.\" Copyright (c) 2016-2018 Netflix, Inc.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions, and the following disclaimer,
+.\" without modification, immediately at the beginning of the file.
+.\" 2. The name of the author may not be used to endorse or promote products
+.\" derived from this software without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
+.\" ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd October 7, 2019
+.Dt STATS 3
+.Os
+.Sh NAME
+.Nm stats
+.Nd statistics gathering
+.Sh LIBRARY
+.Lb libstats
+.Sh SYNOPSIS
+.In sys/arb.h
+.In sys/qmath.h
+.In sys/stats.h
+.Ss Stats Blob Template Management Functions
+.Ft int
+.Fo stats_tpl_alloc
+.Fa "const char *name"
+.Fa "uint32_t flags"
+.Fc
+.Ft int
+.Fo stats_tpl_fetch_allocid
+.Fa "const char *name"
+.Fa "uint32_t hash"
+.Fc
+.Ft int
+.Fo stats_tpl_fetch
+.Fa "int tpl_id"
+.Fa "struct statsblob_tpl **tpl"
+.Fc
+.Ft int
+.Fo stats_tpl_id2name
+.Fa "uint32_t tpl_id"
+.Fa "char *buf"
+.Fa "size_t len"
+.Fc
+.Ft int
+.Fo stats_tpl_sample_rates
+.Fa "SYSCTL_HANDLER_ARGS"
+.Fc
+.Ft int
+.Fo stats_tpl_sample_rollthedice
+.Fa "struct stats_tpl_sample_rate *rates"
+.Fa "int nrates"
+.Fa "void *seed_bytes"
+.Fa "size_t seed_len"
+.Fc
+.Ft struct voistatspec
+.Fo STATS_VSS_SUM
+.Fc
+.Ft struct voistatspec
+.Fo STATS_VSS_MAX
+.Fc
+.Ft struct voistatspec
+.Fo STATS_VSS_MIN
+.Fc
+.Ft struct voistatspec
+.Fo STATS_VSS_CRHIST<32|64>_LIN
+.Fa "lb"
+.Fa "ub"
+.Fa "stepinc"
+.Fa "vsdflags"
+.Fc
+.Ft struct voistatspec
+.Fo STATS_VSS_CRHIST<32|64>_EXP
+.Fa "lb"
+.Fa "ub"
+.Fa "stepbase"
+.Fa "stepexp"
+.Fa "vsdflags"
+.Fc
+.Ft struct voistatspec
+.Fo "STATS_VSS_CRHIST<32|64>_LINEXP"
+.Fa "lb"
+.Fa "ub"
+.Fa "nlinsteps"
+.Fa "stepbase"
+.Fa "vsdflags"
+.Fc
+.Ft struct voistatspec
+.Fo "STATS_VSS_CRHIST<32|64>_USR"
+.Fa Sy "HBKTS" Ns Pq Sy "CRBKT" Ns ( Em "lb" ) , "..." Pc ,
+.Fa "vsdflags"
+.Fc
+.Ft struct voistatspec
+.Fo "STATS_VSS_DRHIST<32|64>_USR"
+.Fa Sy "HBKTS" Ns Pq Sy "DRBKT" Ns ( Em "lb" , "ub" ) , "..." Pc ,
+.Fa "vsdflags"
+.Fc
+.Ft struct voistatspec
+.Fo "STATS_VSS_DVHIST<32|64>_USR"
+.Fa Sy "HBKTS" Ns Pq Sy "DVBKT" Ns ( Em "val" ) , "..." Pc ,
+.Fa "vsdflags"
+.Fc
+.Ft struct voistatspec
+.Fo STATS_VSS_TDGSTCLUST<32|64>
+.Fa "nctroids"
+.Fa "prec"
+.Fc
+.Ft int
+.Fo stats_tpl_add_voistats
+.Fa "uint32_t tpl_id"
+.Fa "int32_t voi_id"
+.Fa "const char *voi_name"
+.Fa "enum vsd_dtype voi_dtype"
+.Fa "uint32_t nvss"
+.Fa "struct voistatspec *vss"
+.Fa "uint32_t flags"
+.Fc
+.Ss Stats Blob Data Gathering Functions
+.Ft int
+.Fo stats_voi_update_<abs|rel>_<dtype>
+.Fa "struct statsblob *sb"
+.Fa "int32_t voi_id"
+.Fa "<dtype> voival"
+.Fc
+.Ss Stats Blob Utility Functions
+.Ft struct statsblob *
+.Fo stats_blob_alloc
+.Fa "uint32_t tpl_id"
+.Fa "uint32_t flags"
+.Fc
+.Ft int
+.Fo stats_blob_init
+.Fa "struct statsblob *sb"
+.Fa "uint32_t tpl_id"
+.Fa "uint32_t flags"
+.Fc
+.Ft int
+.Fo stats_blob_clone
+.Fa "struct statsblob **dst"
+.Fa "size_t dstmaxsz"
+.Fa "struct statsblob *src"
+.Fa "uint32_t flags"
+.Fc
+.Ft void
+.Fo stats_blob_destroy
+.Fa "struct statsblob *sb"
+.Fc
+.Ft int
+.Fo stats_voistat_fetch_dptr
+.Fa "struct statsblob *sb"
+.Fa "int32_t voi_id"
+.Fa "enum voi_stype stype"
+.Fa "enum vsd_dtype *retdtype"
+.Fa "struct voistatdata **retvsd"
+.Fa "size_t *retvsdsz"
+.Fc
+.Ft int
+.Fo stats_voistat_fetch_<dtype>
+.Fa "struct statsblob *sb"
+.Fa "int32_t voi_id"
+.Fa "enum voi_stype stype"
+.Fa "<dtype> *ret"
+.Fc
+.Ft int
+.Fo stats_blob_snapshot
+.Fa "struct statsblob **dst"
+.Fa "size_t dstmaxsz"
+.Fa "struct statsblob *src"
+.Fa "uint32_t flags"
+.Fc
+.Ft int
+.Fo stats_blob_tostr
+.Fa "struct statsblob *sb"
+.Fa "struct sbuf *buf"
+.Fa "enum sb_str_fmt fmt"
+.Fa "uint32_t flags"
+.Fc
+.Ft int
+.Fo stats_voistatdata_tostr
+.Fa "const struct voistatdata *vsd"
+.Fa "enum vsd_dtype dtype"
+.Fa "enum sb_str_fmt fmt"
+.Fa "struct sbuf *buf"
+.Fa "int objdump"
+.Fc
+.Ft typedef int
+.Fn "\*(lp*stats_blob_visitcb_t\*(rp" "struct sb_visit *sbv" "void *usrctx"
+.Ft int
+.Fo stats_blob_visit
+.Fa "struct statsblob *sb"
+.Fa "stats_blob_visitcb_t func"
+.Fa "void *usrctx"
+.Fc
+.Sh DESCRIPTION
+The
+.Nm
+framework facilitates real-time kernel and user space statistics gathering.
+The framework is built around the
+.Dq statsblob ,
+an object embedded within a contiguous memory allocation that is mostly opaque
+to consumers and stores all required state.
+A
+.Dq statsblob
+object can itself be embedded within other objects either directly or indirectly
+using a pointer.
+.Pp
+Objects or subsystems for which statistics are to be gathered are initialized
+from a template
+.Dq statsblob ,
+which acts as the blueprint for an arbitrary set of
+Variables Of Interest (VOIs) and their associated statistics.
+Each template defines a schema plus associated metadata, which are kept separate
+to minimize the memory footprint of blobs.
+.Pp
+Data gathering hook functions added at appropriate locations within the code
+base of interest feed VOI data into the framework for processing.
+.Pp
+Each
+.Dq statsblob ,
+consists of a
+.Vt struct statsblob
+header and opaque internal blob structure per the following diagram:
+.Bd -literal -offset indent
+---------------------------------------------------------
+| struct | uint8_t |
+| statsblob | opaque[] |
+---------------------------------------------------------
+.Ed
+.Pp
+The publicly visible 8-byte header is defined as:
+.Bd -literal -offset indent
+struct statsblob {
+ uint8_t abi;
+ uint8_t endian;
+ uint16_t flags;
+ uint16_t maxsz;
+ uint16_t cursz;
+ uint8_t opaque[];
+};
+.Ed
+.Pp
+.Va abi
+specifies which API version the blob's
+.Va opaque
+internals conform to
+.Pq Dv STATS_ABI_V1 is the only version currently defined .
+.Va endian
+specifies the endianness of the blob's fields
+.Po
+.Dv SB_LE
+for little endian,
+.Dv SB_BE
+for big endian, or
+.Dv SB_UE
+for unknown endianness
+.Pc .
+.Va cursz
+specifies the size of the blob, while
+.Va maxsz
+specifies the size of the underlying memory allocation in which the
+blob is embedded.
+Both
+.Va cursz
+and
+.Va maxsz
+default to units of bytes, unless a flag is set in
+.Va flags
+that dictates otherwise.
+.Pp
+Templates are constructed by associating arbitrary VOI IDs with a set of
+statistics, where each statistic is specified using a
+.Vt struct voistatspec
+per the definition below:
+.Bd -literal -offset indent
+struct voistatspec {
+ vss_hlpr_fn hlpr;
+ struct vss_hlpr_info *hlprinfo;
+ struct voistatdata *iv;
+ size_t vsdsz;
+ uint32_t flags;
+ enum vsd_dtype vs_dtype : 8;
+ enum voi_stype stype : 8;
+};
+.Ed
+.Pp
+It is generally expected that consumers will not work with
+.Vt struct voistatspec
+directly, and instead use the
+.Fn STATS_VSS_*
+helper macros.
+.Pp
+The
+.Nm
+framework offers the following statistics for association with VOIs:
+.Bl -tag -width ".Dv VS_STYPE_TDGST"
+.It Dv VS_STYPE_SUM
+The sum of VOI values.
+.It Dv VS_STYPE_MAX
+The maximum VOI value.
+.It Dv VS_STYPE_MIN
+The minimum VOI value.
+.It Dv VS_STYPE_HIST
+A static bucket histogram of VOI values, including a count of
+.Dq out-of-band/bucket Dc
+values which did not match any bucket.
+Histograms can be specified as
+.Dq Em C Ns ontinuous Em R Ns ange Dc
+.Pq CRHIST Pc ,
+.Dq Em D Ns iscrete Em R Ns ange Dc
+.Pq DRHIST Pc
+or
+.Dq Em D Ns iscrete Em V Ns alue Dc
+.Pq DVHIST Pc ,
+with 32 or 64 bit bucket counters, depending on the VOI semantics.
+.It Dv VS_STYPE_TDGST
+A dynamic bucket histogram of VOI values based on the t-digest method
+.Po refer to the t-digest paper in the
+.Sx SEE ALSO
+section below
+.Pc .
+.El
+.Pp
+A
+.Dq visitor software design pattern Ns
+-like scheme is employed to facilitate iterating over a blob's data without
+concern for the blob's structure.
+The data provided to visitor callback functions is encapsulated in
+.Vt struct sb_visit
+per the definition below:
+.Bd -literal -offset indent
+struct sb_visit {
+ struct voistatdata *vs_data;
+ uint32_t tplhash;
+ uint32_t flags;
+ int16_t voi_id;
+ int16_t vs_dsz;
+ enum vsd_dtype voi_dtype : 8;
+ enum vsd_dtype vs_dtype : 8;
+ int8_t vs_stype;
+ uint16_t vs_errs;
+};
+.Ed
+.Pp
+The
+.Fn stats_tpl_sample_rates
+and
+.Fn stats_tpl_sample_rollthedice
+functions utilize
+.Vt struct stats_tpl_sample_rate
+to encapsulate per-template sample rate information per the definition below:
+.Bd -literal -offset indent
+struct stats_tpl_sample_rate {
+ int32_t tpl_slot_id;
+ uint32_t tpl_sample_pct;
+};
+.Ed
+.Pp
+The
+.Va tpl_slot_id
+member holds the template's slot ID obtained from
+.Fn stats_tpl_alloc
+or
+.Fn stats_tpl_fetch_allocid .
+The
+.Va tpl_sample_pct
+member holds the template's sample rate as an integer percentage in the range
+[0,100].
+.Pp
+The
+.Vt stats_tpl_sr_cb_t
+conformant function pointer that is required as the
+.Fa arg1
+of
+.Fn stats_tpl_sample_rates
+is defined as:
+.Bd -literal -offset indent
+enum stats_tpl_sr_cb_action {
+ TPL_SR_UNLOCKED_GET,
+ TPL_SR_RLOCKED_GET,
+ TPL_SR_RUNLOCK,
+ TPL_SR_PUT
+};
+typedef int (*stats_tpl_sr_cb_t)(enum stats_tpl_sr_cb_action action,
+ struct stats_tpl_sample_rate **rates, int *nrates, void *ctx);
+.Ed
+.Pp
+It is required that a conformant function:
+.Bl -dash
+.It
+Return an appropriate
+.Xr errno 2
+on error, otherwise 0.
+.It
+When called with
+.Qq action == TPL_SR_*_GET ,
+return the subsystem's rates list ptr and count, locked or unlocked as
+requested.
+.It
+When called with
+.Qq action == TPL_SR_RUNLOCK ,
+unlock the subsystem's rates list ptr and count.
+Pair with a prior
+.Qq action == TPL_SR_RLOCKED_GET
+call.
+.It
+When called with
+.Qq action == TPL_SR_PUT ,
+update the subsystem's rates list ptr and count to the sysctl processed values
+and return the inactive list details in
+.Fa rates
+and
+.Fa nrates
+for garbage collection by
+.Fn stats_tpl_sample_rates .
+.El
+.Pp
+Where templates need to be referenced via textual means, for example via a MIB
+variable, the following string based template spec formats can be used:
+.Bl -enum
+.It
+.Qq <tplname> Qc Ns
+:<tplhash>
+.Ns , for example
+.Qq TCP_DEFAULT Qc Ns
+:1731235399
+.It
+.Qq <tplname> Qc
+.Ns , for example
+.Qq TCP_DEFAULT Qc
+.It
+:<tplhash>
+.Ns , for example
+:1731235399
+.El
+.Pp
+The first form is the normative spec format generated by the framework, while
+the second and third forms are convenience formats primarily for user input.
+The use of inverted commas around the template name is optional.
+.Ss MIB Variables
+The in-kernel
+.Nm
+framework exposes the following framework-specific variables in the
+.Va kern.stats
+branch of the
+.Xr sysctl 3
+MIB.
+.Bl -tag -width "templates"
+.It templates
+Read-only CSV list of registered templates in normative template spec form.
+.El
+.Ss Template Management Functions
+The
+.Fn stats_tpl_alloc
+function allocates a new template with the specified unique name and returns its
+runtime-stable template slot ID for use with other API functions.
+The
+.Fa flags
+argument is currently unused.
+.Pp
+The
+.Fn stats_tpl_fetch_allocid
+function returns the runtime-stable template slot ID of any registered template
+matching the specified name and hash.
+.Pp
+The
+.Fn stats_tpl_fetch
+function returns the pointer to the registered template object at the specified
+template slot ID.
+.Pp
+The
+.Fn stats_tpl_id2name
+function returns the name of the registered template object at the specified
+template slot ID.
+.Pp
+The
+.Fn stats_tpl_sample_rates
+function provides a generic handler for template sample rates management and
+reporting via
+.Xr sysctl 3
+MIB variables.
+Subsystems can use this function to create a subsystem-specific
+.Xr SYSCTL_PROC 9
+MIB variable that manages and reports subsystem-specific template sampling
+rates.
+Subsystems must supply a
+.Vt stats_tpl_sr_cb_t
+conformant function pointer as the sysctl's
+.Fa arg1 ,
+which is a callback used to interact with the subsystem's stats template sample
+rates list.
+Subsystems can optionally specify the sysctl's
+.Fa arg2
+as non-zero, which causes a zero-initialized allocation of arg2-sized contextual
+memory to be heap-allocated and passed in to all subsystem callbacks made during
+the operation of
+.Fn stats_tpl_sample_rates .
+.Pp
+The
+.Fn stats_tpl_sample_rollthedice
+function makes a weighted random template selection from the supplied array of
+template sampling rates.
+The cumulative percentage of all sampling rates should not exceed 100.
+If no seed is supplied, a PRNG is used to generate a true random number so that
+every selection is independent.
+If a seed is supplied, selection will be made randomly across different seeds, but
+deterministically given the same seed.
+.Pp
+The
+.Fn stats_tpl_add_voistats
+function is used to add a VOI and associated set of statistics to the registered
+template object at the specified template slot ID.
+The set of statistics is passed as an array of
+.Vt struct voistatspec
+which can be initialized using the
+.Fn STATS_VSS_*
+helper macros or manually for non-standard use cases.
+For static
+.Fa vss
+arrays, the
+.Fa nvss
+count of array elements can be determined by passing
+.Fa vss
+to the
+.Fn NVSS
+macro.
+The
+.Dv SB_VOI_RELUPDATE
+flag can be passed to configure the VOI for use with
+.Fn stats_voi_update_rel_<dtype> ,
+which entails maintaining an extra 8 bytes of state in the blob at each update.
+.Ss Data Gathering Functions
+The
+.Fn stats_voi_update_abs_<dtype>
+and
+.Fn stats_voi_update_rel_<dtype>
+functions both update all the statistics associated with the VOI identified by
+.Fa voi_id .
+The
+.Dq abs
+call uses
+.Fa voival
+as an absolute value, whereas the
+.Dq rel
+call uses
+.Fa voival
+as a value relative to that of the previous update function call, by adding it
+to the previous value and using the result for the update.
+Relative updates are only possible for VOIs that were added to the template with
+the
+.Dv SB_VOI_RELUPDATE
+flag specified to
+.Fn stats_tpl_add_voistats .
+.Ss Utility Functions
+The
+.Fn stats_blob_alloc
+function allocates and initializes a new blob based on the registered template
+object at the specified template slot ID.
+.Pp
+The
+.Fn stats_blob_init
+function initializes a new blob in an existing memory allocation based on the
+registered template object at the specified template slot ID.
+.Pp
+The
+.Fn stats_blob_clone
+function duplicates the
+.Fa src
+blob into
+.Fa dst ,
+leaving only the
+.Va maxsz
+field of
+.Fa dst
+untouched.
+The
+.Dv SB_CLONE_ALLOCDST
+flag can be passed to instruct the function to allocate a new blob of
+appropriate size into which to clone
+.Fa src ,
+storing the new pointer in
+.Fa *dst .
+The
+.Dv SB_CLONE_USRDSTNOFAULT
+or
+.Dv SB_CLONE_USRDST
+flags can be set to respectively signal that
+.Xr copyout_nofault 9
+or
+.Xr copyout 9
+should be used because
+.Fa *dst
+is a user space address.
+.Pp
+The
+.Fn stats_blob_snapshot
+function calls
+.Fn stats_blob_clone
+to obtain a copy of
+.Fa src
+and then performs any additional functions required to produce a coherent
+blob snapshot.
+The flags interpreted by
+.Fn stats_blob_clone
+also apply to
+.Fn stats_blob_snapshot .
+Additionally, the
+.Dv SB_CLONE_RSTSRC
+flag can be used to effect a reset of the
+.Fa src
+blob's statistics after a snapshot is successfully taken.
+.Pp
+The
+.Fn stats_blob_destroy
+function destroys a blob previously created with
+.Fn stats_blob_alloc ,
+.Fn stats_blob_clone
+or
+.Fn stats_blob_snapshot .
+.Pp
+The
+.Fn stats_blob_visit
+function allows the caller to iterate over the contents of a blob.
+The callback function
+.Fa func
+is called for every VOI and statistic in the blob, passing a
+.Vt struct sb_visit
+and the user context argument
+.Fa usrctx
+to the callback function.
+The
+.Fa sbv
+passed to the callback function may have one or more of the following flags set
+in the
+.Va flags
+struct member to provide useful metadata about the iteration:
+.Dv SB_IT_FIRST_CB ,
+.Dv SB_IT_LAST_CB ,
+.Dv SB_IT_FIRST_VOI ,
+.Dv SB_IT_LAST_VOI ,
+.Dv SB_IT_FIRST_VOISTAT ,
+.Dv SB_IT_LAST_VOISTAT ,
+.Dv SB_IT_NULLVOI
+and
+.Dv SB_IT_NULLVOISTAT .
+Returning a non-zero value from the callback function terminates the iteration.
+.Pp
+The
+.Fn stats_blob_tostr
+renders a string representation of a blob into the
+.Xr sbuf 9
+.Fa buf .
+Currently supported render formats are
+.Dv SB_STRFMT_FREEFORM
+and
+.Dv SB_STRFMT_JSON .
+The
+.Dv SB_TOSTR_OBJDUMP
+flag can be passed to render version specific opaque implementation detail for
+debugging or string-to-binary blob reconstruction purposes.
+The
+.Dv SB_TOSTR_META
+flag can be passed to render template metadata into the string representation,
+using the blob's template hash to lookup the corresponding template.
+.Pp
+The
+.Fn stats_voistatdata_tostr
+renders a string representation of an individual statistic's data into the
+.Xr sbuf 9
+.Fa buf .
+The same render formats supported by the
+.Fn stats_blob_tostr
+function can be specified, and the
+.Fa objdump
+boolean has the same meaning as the
+.Dv SB_TOSTR_OBJDUMP
+flag.
+.Pp
+The
+.Fn stats_voistat_fetch_dptr
+function returns an internal blob pointer to the specified
+.Fa stype
+statistic data for the VOI
+.Fa voi_id .
+The
+.Fn stats_voistat_fetch_<dtype>
+functions are convenience wrappers around
+.Fn stats_voistat_fetch_dptr
+to perform the extraction for simple data types.
+.Sh IMPLEMENTATION NOTES
+The following notes apply to STATS_ABI_V1 format statsblobs.
+.Ss Space-Time Complexity
+Blobs are laid out as three distinct memory regions following the header:
+.Bd -literal -offset indent
+------------------------------------------------------
+| struct | struct | struct | struct |
+| statsblobv1 | voi [] | voistat [] | voistatdata [] |
+------------------------------------------------------
+.Ed
+.Pp
+Blobs store VOI and statistic blob state
+.Po
+8 bytes for
+.Vt struct voi
+and 8 bytes for
+.Vt struct voistat
+respectively
+.Pc
+in sparse arrays, using the
+.Fa voi_id
+and
+.Vt enum voi_stype
+as array indices.
+This allows O(1) access to any voi/voistat pair in the blob, at the expense of
+8 bytes of wasted memory per vacant slot for templates which do not specify
+contiguously numbered VOIs and/or statistic types.
+Data storage for statistics is only allocated for non-vacant slot pairs.
+.Pp
+To provide a concrete example, a blob with the following specification:
+.Bl -dash
+.It
+Two VOIs; ID 0 and 2; added to the template in that order
+.It
+VOI 0 is of data type
+.Vt int64_t ,
+is configured with
+.Dv SB_VOI_RELUPDATE
+to enable support for relative updates using
+.Fn stats_voi_update_rel_<dtype> ,
+and has a
+.Dv VS_STYPE_MIN
+statistic associated with it.
+.It
+VOI 2 is of data type
+.Vt uint32_t
+with
+.Dv VS_STYPE_SUM
+and
+.Dv VS_STYPE_MAX
+statistics associated with it.
+.El
+.Pp
+would have the following memory layout:
+.Bd -literal
+--------------------------------------
+| header | struct statsblobv1, 32 bytes
+|------------------------------------|
+| voi[0] | struct voi, 8 bytes
+| voi[1] (vacant) | struct voi, 8 bytes
+| voi[2] | struct voi, 8 bytes
+|------------------------------------|
+| voi[2]voistat[VOISTATE] (vacant) | struct voistat, 8 bytes
+| voi[2]voistat[SUM] | struct voistat, 8 bytes
+| voi[2]voistat[MAX] | struct voistat, 8 bytes
+| voi[0]voistat[VOISTATE] | struct voistat, 8 bytes
+| voi[0]voistat[SUM] (vacant) | struct voistat, 8 bytes
+| voi[0]voistat[MAX] (vacant) | struct voistat, 8 bytes
+| voi[0]voistat[MIN] | struct voistat, 8 bytes
+|------------------------------------|
+| voi[2]voistat[SUM]voistatdata | struct voistatdata_int32, 4 bytes
+| voi[2]voistat[MAX]voistatdata | struct voistatdata_int32, 4 bytes
+| voi[0]voistat[VOISTATE]voistatdata | struct voistatdata_numeric, 8 bytes
+| voi[0]voistat[MIN]voistatdata | struct voistatdata_int64, 8 bytes
+--------------------------------------
+ TOTAL 136 bytes
+.Ed
+.Pp
+When rendered to string format using
+.Fn stats_blob_tostr ,
+the
+.Dv SB_STRFMT_FREEFORM
+.Fa fmt
+and the
+.Dv SB_TOSTR_OBJDUMP
+flag, the rendered output is:
+.Bd -literal
+struct statsblobv1@0x8016250a0, abi=1, endian=1, maxsz=136, cursz=136, \\
+ created=6294158585626144, lastrst=6294158585626144, flags=0x0000, \\
+ stats_off=56, statsdata_off=112, tplhash=2994056564
+ vois[0]: id=0, name="", flags=0x0001, dtype=INT_S64, voistatmaxid=3, \\
+ stats_off=80
+ vois[0]stat[0]: stype=VOISTATE, flags=0x0000, dtype=VOISTATE, \\
+ dsz=8, data_off=120
+ voistatdata: prev=0
+ vois[0]stat[1]: stype=-1
+ vois[0]stat[2]: stype=-1
+ vois[0]stat[3]: stype=MIN, flags=0x0000, dtype=INT_S64, \\
+ dsz=8, data_off=128
+ voistatdata: 9223372036854775807
+ vois[1]: id=-1
+ vois[2]: id=2, name="", flags=0x0000, dtype=INT_U32, voistatmaxid=2, \\
+ stats_off=56
+ vois[2]stat[0]: stype=-1
+ vois[2]stat[1]: stype=SUM, flags=0x0000, dtype=INT_U32, dsz=4, \\
+ data_off=112
+ voistatdata: 0
+ vois[2]stat[2]: stype=MAX, flags=0x0000, dtype=INT_U32, dsz=4, \\
+ data_off=116
+ voistatdata: 0
+.Ed
+.Pp
+Note: The
+.Qq \e
+present in the rendered output above indicates a manual line break inserted to
+keep the man page within 80 columns and is not part of the actual output.
+.Ss Locking
+The
+.Nm
+framework does not provide any concurrency protection at the individual blob
+level, instead requiring that consumers guarantee mutual exclusion when calling
+API functions that reference a non-template blob.
+.Pp
+The list of templates is protected with a
+.Xr rwlock 9
+in-kernel, and
+.Xr pthread 3
+rw lock in user space to support concurrency between template management and
+blob initialization operations.
+.Sh RETURN VALUES
+.Fn stats_tpl_alloc
+returns a runtime-stable template slot ID on success, or a negative errno on
+failure.
+-EINVAL is returned if any problems are detected with the arguments.
+-EEXIST is returned if an existing template is registered with the same name.
+-ENOMEM is returned if a required memory allocation fails.
+.Pp
+.Fn stats_tpl_fetch_allocid
+returns a runtime-stable template slot ID, or negative errno on failure.
+-ESRCH is returned if no registered template matches the specified name and/or
+hash.
+.Pp
+.Fn stats_tpl_fetch
+returns 0 on success, or ENOENT if an invalid
+.Fa tpl_id
+is specified.
+.Pp
+.Fn stats_tpl_id2name
+returns 0 on success, or an errno on failure.
+EOVERFLOW is returned if the length of
+.Fa buf
+specified by
+.Fa len
+is too short to hold the template's name.
+ENOENT is returned if an invalid
+.Fa tpl_id
+is specified.
+.Pp
+.Fn stats_tpl_sample_rollthedice
+returns a valid template slot id selected from
+.Fa rates
+or -1 if a NULL selection was made, that is no stats collection this roll.
+.Pp
+.Fn stats_tpl_add_voistats
+return 0 on success, or an errno on failure.
+EINVAL is returned if any problems are detected with the arguments.
+EFBIG is returned if the resulting blob would have exceeded the maximum size.
+EOPNOTSUPP is returned if an attempt is made to add more VOI stats to a
+previously configured VOI.
+ENOMEM is returned if a required memory allocation fails.
+.Pp
+.Fn stats_voi_update_abs_<dtype>
+and
+.Fn stats_voi_update_rel_<dtype>
+return 0 on success, or EINVAL if any problems are detected with the arguments.
+.Pp
+.Fn stats_blob_init
+returns 0 on success, or an errno on failure.
+EINVAL is returned if any problems are detected with the arguments.
+EOVERFLOW is returned if the template blob's
+.Fa cursz
+is larger than the
+.Fa maxsz
+of the blob being initialized.
+.Pp
+.Fn stats_blob_alloc
+returns a pointer to a newly allocated and initialized blob based on the
+specified template with slot ID
+.Fa tpl_id ,
+or NULL if the memory allocation failed.
+.Pp
+.Fn stats_blob_clone
+and
+.Fn stats_blob_snapshot
+return 0 on success, or an errno on failure.
+EINVAL is returned if any problems are detected with the arguments.
+ENOMEM is returned if the SB_CLONE_ALLOCDST flag was specified and the memory
+allocation for
+.Fa dst
+fails.
+EOVERFLOW is returned if the src blob's
+.Fa cursz
+is larger than the
+.Fa maxsz
+of the
+.Fa dst
+blob.
+.Pp
+.Fn stats_blob_visit
+returns 0 on success, or EINVAL if any problems are detected with the arguments.
+.Pp
+.Fn stats_blob_tostr
+and
+.Fn stats_voistatdata_tostr
+return 0 on success, or an errno on failure.
+EINVAL is returned if any problems are detected with the arguments, otherwise
+any error returned by
+.Fn sbuf_error
+for
+.Fa buf
+is returned.
+.Pp
+.Fn stats_voistat_fetch_dptr
+returns 0 on success, or EINVAL if any problems are detected with the arguments.
+.Pp
+.Fn stats_voistat_fetch_<dtype>
+returns 0 on success, or an errno on failure.
+EINVAL is returned if any problems are detected with the arguments.
+EFTYPE is returned if the requested data type does not match the blob's data
+type for the specified
+.Fa voi_id
+and
+.Fa stype .
+.Sh SEE ALSO
+.Xr errno 2 ,
+.Xr arb 3 ,
+.Xr qmath 3 ,
+.Xr tcp 4 ,
+.Xr sbuf 9
+.Rs
+.%A "Ted Dunning"
+.%A "Otmar Ertl"
+.%T "Computing Extremely Accurate Quantiles Using t-digests"
+.%U "https://github.com/tdunning/t-digest/raw/master/docs/t-digest-paper/histo.pdf"
+.Re
+.Sh HISTORY
+The
+.Nm
+framework first appeared in
+.Fx 13.0 .
+.Sh AUTHORS
+.An -nosplit
+The
+.Nm
+framework and this manual page were written by
+.An Lawrence Stewart Aq lstewart@FreeBSD.org
+and sponsored by Netflix, Inc.
diff --git a/share/mk/bsd.libnames.mk b/share/mk/bsd.libnames.mk
index 1bea2ff10786..0f97e9c29bac 100644
--- a/share/mk/bsd.libnames.mk
+++ b/share/mk/bsd.libnames.mk
@@ -137,6 +137,7 @@ LIBSDP?= ${LIBDESTDIR}${LIBDIR_BASE}/libsdp.a
LIBSMB?= ${LIBDESTDIR}${LIBDIR_BASE}/libsmb.a
LIBSSL?= ${LIBDESTDIR}${LIBDIR_BASE}/libssl.a
LIBSSP_NONSHARED?= ${LIBDESTDIR}${LIBDIR_BASE}/libssp_nonshared.a
+LIBSTATS?= ${LIBDESTDIR}${LIBDIR_BASE}/libstats.a
LIBSTDCPLUSPLUS?= ${LIBDESTDIR}${LIBDIR_BASE}/libstdc++.a
LIBSTDTHREADS?= ${LIBDESTDIR}${LIBDIR_BASE}/libstdthreads.a
LIBSYSDECODE?= ${LIBDESTDIR}${LIBDIR_BASE}/libsysdecode.a
diff --git a/share/mk/src.libnames.mk b/share/mk/src.libnames.mk
index 1f4294cb6a9a..98ca8780801e 100644
--- a/share/mk/src.libnames.mk
+++ b/share/mk/src.libnames.mk
@@ -165,6 +165,7 @@ _LIBRARIES= \
smb \
ssl \
ssp_nonshared \
+ stats \
stdthreads \
supcplusplus \
sysdecode \
@@ -346,6 +347,7 @@ _DP_c= compiler_rt
.if ${MK_SSP} != "no"
_DP_c+= ssp_nonshared
.endif
+_DP_stats= sbuf pthread
_DP_stdthreads= pthread
_DP_tacplus= md
_DP_panel= ncurses
diff --git a/share/mk/src.opts.mk b/share/mk/src.opts.mk
index dc928e2e19b3..42f62dd7afca 100644
--- a/share/mk/src.opts.mk
+++ b/share/mk/src.opts.mk
@@ -170,6 +170,7 @@ __DEFAULT_YES_OPTIONS = \
SOURCELESS \
SOURCELESS_HOST \
SOURCELESS_UCODE \
+ STATS \
SVNLITE \
SYSCONS \
SYSTEM_COMPILER \
diff --git a/sys/amd64/conf/NOTES b/sys/amd64/conf/NOTES
index 86308526749e..51e599d701ae 100644
--- a/sys/amd64/conf/NOTES
+++ b/sys/amd64/conf/NOTES
@@ -671,3 +671,5 @@ device ndis
options LINDEBUGFS
options GCOV
+
+options STATS
diff --git a/sys/conf/files b/sys/conf/files
index ff25d99a3168..602ff988e96d 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3832,6 +3832,7 @@ kern/subr_sglist.c standard
kern/subr_sleepqueue.c standard
kern/subr_smp.c standard
kern/subr_stack.c optional ddb | stack | ktr
+kern/subr_stats.c optional stats
kern/subr_taskqueue.c standard
kern/subr_terminal.c optional vt
kern/subr_trap.c standard
diff --git a/sys/conf/options b/sys/conf/options
index 35cbe1910551..96af0bf1e24d 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -418,6 +418,7 @@ RATELIMIT opt_ratelimit.h
RATELIMIT_DEBUG opt_ratelimit.h
INET opt_inet.h
INET6 opt_inet6.h
+STATS opt_global.h
IPDIVERT
IPFILTER opt_ipfilter.h
IPFILTER_DEFAULT_BLOCK opt_ipfilter.h
diff --git a/sys/kern/subr_stats.c b/sys/kern/subr_stats.c
new file mode 100644
index 000000000000..10e5eaef4e27
--- /dev/null
+++ b/sys/kern/subr_stats.c
@@ -0,0 +1,3912 @@
+/*-
+ * Copyright (c) 2014-2018 Netflix, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * Author: Lawrence Stewart <lstewart@netflix.com>
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/arb.h>
+#include <sys/ctype.h>
+#include <sys/errno.h>
+#include <sys/hash.h>
+#include <sys/limits.h>
+#include <sys/malloc.h>
+#include <sys/qmath.h>
+#include <sys/sbuf.h>
+#if defined(DIAGNOSTIC)
+#include <sys/tree.h>
+#endif
+#include <sys/stats.h> /* Must come after qmath.h and arb.h */
+#include <sys/stddef.h>
+#include <sys/stdint.h>
+#include <sys/time.h>
+
+#ifdef _KERNEL
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+#else /* ! _KERNEL */
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#endif /* _KERNEL */
+
+struct voistatdata_voistate {
+ /* Previous VOI value for diff calculation. */
+ struct voistatdata_numeric prev;
+};
+
+#define VS_VSDVALID 0x0001 /* Stat's voistatdata updated at least once. */
+struct voistat {
+ int8_t stype; /* Type of stat e.g. VS_STYPE_SUM. */
+ enum vsd_dtype dtype : 8; /* Data type of this stat's data. */
+ uint16_t data_off; /* Blob offset for this stat's data. */
+ uint16_t dsz; /* Size of stat's data. */
+#define VS_EBITS 8
+ uint16_t errs : VS_EBITS;/* Non-wrapping error count. */
+ uint16_t flags : 16 - VS_EBITS;
+};
+/* The voistat error count is capped to avoid wrapping. */
+#define VS_INCERRS(vs) do { \
+ if ((vs)->errs < (1U << VS_EBITS) - 1) \
+ (vs)->errs++; \
+} while (0)
+
+/*
+ * Ideas for flags:
+ * - Global or entity specific (global would imply use of counter(9)?)
+ * - Whether to reset stats on read or not
+ * - Signal an overflow?
+ * - Compressed voistat array
+ */
+#define VOI_REQSTATE 0x0001 /* VOI requires VS_STYPE_VOISTATE. */
+struct voi {
+ int16_t id; /* VOI id. */
+ enum vsd_dtype dtype : 8; /* Data type of the VOI itself. */
+ int8_t voistatmaxid; /* Largest allocated voistat index. */
+ uint16_t stats_off; /* Blob offset for this VOIs stats. */
+ uint16_t flags;
+};
+
+/*
+ * Memory for the entire blob is allocated as a slab and then offsets are
+ * maintained to carve up the slab into sections holding different data types.
+ *
+ * Ideas for flags:
+ * - Compressed voi array (trade off memory usage vs search time)
+ * - Units of offsets (default bytes, flag for e.g. vm_page/KiB/Mib)
+ */
+struct statsblobv1 {
+ uint8_t abi;
+ uint8_t endian;
+ uint16_t flags;
+ uint16_t maxsz;
+ uint16_t cursz;
+ /* Fields from here down are opaque to consumers. */
+ uint32_t tplhash; /* Base template hash ID. */
+ uint16_t stats_off; /* voistat array blob offset. */
+ uint16_t statsdata_off; /* voistatdata array blob offset. */
+ sbintime_t created; /* Blob creation time. */
+ sbintime_t lastrst; /* Time of last reset. */
+ struct voi vois[]; /* Array indexed by [voi_id]. */
+} __aligned(sizeof(void *));
+_Static_assert(offsetof(struct statsblobv1, cursz) +
+ SIZEOF_MEMBER(struct statsblobv1, cursz) == sizeof(struct statsblob),
+ "statsblobv1 ABI mismatch");
+
+struct statsblobv1_tpl {
+ struct metablob *mb;
+ struct statsblobv1 *sb;
+};
+
+/* Context passed to iterator callbacks. */
+struct sb_iter_ctx {
+ void *usrctx; /* Caller supplied context. */
+ uint32_t flags; /* Flags for current iteration. */
+ int16_t vslot; /* struct voi slot index. */
+ int8_t vsslot; /* struct voistat slot index. */
+};
+
+struct sb_tostrcb_ctx {
+ struct sbuf *buf;
+ struct statsblob_tpl *tpl;
+ enum sb_str_fmt fmt;
+ uint32_t flags;
+};
+
+struct sb_visitcb_ctx {
+ stats_blob_visitcb_t cb;
+ void *usrctx;
+};
+
+/* Stats blob iterator callback. */
+typedef int (*stats_v1_blob_itercb_t)(struct statsblobv1 *sb, struct voi *v,
+ struct voistat *vs, struct sb_iter_ctx *ctx);
+
+#ifdef _KERNEL
+static struct rwlock tpllistlock;
+RW_SYSINIT(stats_tpl_list, &tpllistlock, "Stat template list lock");
+#define TPL_LIST_RLOCK() rw_rlock(&tpllistlock)
+#define TPL_LIST_RUNLOCK() rw_runlock(&tpllistlock)
+#define TPL_LIST_WLOCK() rw_wlock(&tpllistlock)
+#define TPL_LIST_WUNLOCK() rw_wunlock(&tpllistlock)
+#define TPL_LIST_LOCK_ASSERT() rw_assert(&tpllistlock, RA_LOCKED)
+#define TPL_LIST_RLOCK_ASSERT() rw_assert(&tpllistlock, RA_RLOCKED)
+#define TPL_LIST_WLOCK_ASSERT() rw_assert(&tpllistlock, RA_WLOCKED)
+MALLOC_DEFINE(M_STATS, "stats(9) related memory", "stats(9) related memory");
+#define stats_free(ptr) free((ptr), M_STATS)
+#else /* ! _KERNEL */
+static void stats_constructor(void);
+static void stats_destructor(void);
+static pthread_rwlock_t tpllistlock;
+#define TPL_LIST_UNLOCK() pthread_rwlock_unlock(&tpllistlock)
+#define TPL_LIST_RLOCK() pthread_rwlock_rdlock(&tpllistlock)
+#define TPL_LIST_RUNLOCK() TPL_LIST_UNLOCK()
+#define TPL_LIST_WLOCK() pthread_rwlock_wrlock(&tpllistlock)
+#define TPL_LIST_WUNLOCK() TPL_LIST_UNLOCK()
+#define TPL_LIST_LOCK_ASSERT() do { } while (0)
+#define TPL_LIST_RLOCK_ASSERT() do { } while (0)
+#define TPL_LIST_WLOCK_ASSERT() do { } while (0)
+#ifdef NDEBUG
+#define KASSERT(cond, msg) do {} while (0)
+#define stats_abort() do {} while (0)
+#else /* ! NDEBUG */
+#define KASSERT(cond, msg) do { \
+ if (!(cond)) { \
+ panic msg; \
+ } \
+} while (0)
+#define stats_abort() abort()
+#endif /* NDEBUG */
+#define stats_free(ptr) free(ptr)
+#define panic(fmt, ...) do { \
+ fprintf(stderr, (fmt), ##__VA_ARGS__); \
+ stats_abort(); \
+} while (0)
+#endif /* _KERNEL */
+
+#define SB_V1_MAXSZ 65535
+
+/* Obtain a blob offset pointer. */
+#define BLOB_OFFSET(sb, off) ((void *)(((uint8_t *)(sb)) + (off)))
+
+/*
+ * Number of VOIs in the blob's vois[] array. By virtue of struct voi being a
+ * power of 2 size, we can shift instead of divide. The shift amount must be
+ * updated if sizeof(struct voi) ever changes, which the assert should catch.
+ */
+#define NVOIS(sb) ((int32_t)((((struct statsblobv1 *)(sb))->stats_off - \
+ sizeof(struct statsblobv1)) >> 3))
+_Static_assert(sizeof(struct voi) == 8, "statsblobv1 voi ABI mismatch");
+
+/* Try restrict names to alphanumeric and underscore to simplify JSON compat. */
+const char *vs_stype2name[VS_NUM_STYPES] = {
+ [VS_STYPE_VOISTATE] = "VOISTATE",
+ [VS_STYPE_SUM] = "SUM",
+ [VS_STYPE_MAX] = "MAX",
+ [VS_STYPE_MIN] = "MIN",
+ [VS_STYPE_HIST] = "HIST",
+ [VS_STYPE_TDGST] = "TDGST",
+};
+
+const char *vs_stype2desc[VS_NUM_STYPES] = {
+ [VS_STYPE_VOISTATE] = "VOI related state data (not a real stat)",
+ [VS_STYPE_SUM] = "Simple arithmetic accumulator",
+ [VS_STYPE_MAX] = "Maximum observed VOI value",
+ [VS_STYPE_MIN] = "Minimum observed VOI value",
+ [VS_STYPE_HIST] = "Histogram of observed VOI values",
+ [VS_STYPE_TDGST] = "t-digest of observed VOI values",
+};
+
+const char *vsd_dtype2name[VSD_NUM_DTYPES] = {
+ [VSD_DTYPE_VOISTATE] = "VOISTATE",
+ [VSD_DTYPE_INT_S32] = "INT_S32",
+ [VSD_DTYPE_INT_U32] = "INT_U32",
+ [VSD_DTYPE_INT_S64] = "INT_S64",
+ [VSD_DTYPE_INT_U64] = "INT_U64",
+ [VSD_DTYPE_INT_SLONG] = "INT_SLONG",
+ [VSD_DTYPE_INT_ULONG] = "INT_ULONG",
+ [VSD_DTYPE_Q_S32] = "Q_S32",
+ [VSD_DTYPE_Q_U32] = "Q_U32",
+ [VSD_DTYPE_Q_S64] = "Q_S64",
+ [VSD_DTYPE_Q_U64] = "Q_U64",
+ [VSD_DTYPE_CRHIST32] = "CRHIST32",
+ [VSD_DTYPE_DRHIST32] = "DRHIST32",
+ [VSD_DTYPE_DVHIST32] = "DVHIST32",
+ [VSD_DTYPE_CRHIST64] = "CRHIST64",
+ [VSD_DTYPE_DRHIST64] = "DRHIST64",
+ [VSD_DTYPE_DVHIST64] = "DVHIST64",
+ [VSD_DTYPE_TDGSTCLUST32] = "TDGSTCLUST32",
+ [VSD_DTYPE_TDGSTCLUST64] = "TDGSTCLUST64",
+};
+
+const size_t vsd_dtype2size[VSD_NUM_DTYPES] = {
+ [VSD_DTYPE_VOISTATE] = sizeof(struct voistatdata_voistate),
+ [VSD_DTYPE_INT_S32] = sizeof(struct voistatdata_int32),
+ [VSD_DTYPE_INT_U32] = sizeof(struct voistatdata_int32),
+ [VSD_DTYPE_INT_S64] = sizeof(struct voistatdata_int64),
+ [VSD_DTYPE_INT_U64] = sizeof(struct voistatdata_int64),
+ [VSD_DTYPE_INT_SLONG] = sizeof(struct voistatdata_intlong),
+ [VSD_DTYPE_INT_ULONG] = sizeof(struct voistatdata_intlong),
+ [VSD_DTYPE_Q_S32] = sizeof(struct voistatdata_q32),
+ [VSD_DTYPE_Q_U32] = sizeof(struct voistatdata_q32),
+ [VSD_DTYPE_Q_S64] = sizeof(struct voistatdata_q64),
+ [VSD_DTYPE_Q_U64] = sizeof(struct voistatdata_q64),
+ [VSD_DTYPE_CRHIST32] = sizeof(struct voistatdata_crhist32),
+ [VSD_DTYPE_DRHIST32] = sizeof(struct voistatdata_drhist32),
+ [VSD_DTYPE_DVHIST32] = sizeof(struct voistatdata_dvhist32),
+ [VSD_DTYPE_CRHIST64] = sizeof(struct voistatdata_crhist64),
+ [VSD_DTYPE_DRHIST64] = sizeof(struct voistatdata_drhist64),
+ [VSD_DTYPE_DVHIST64] = sizeof(struct voistatdata_dvhist64),
+ [VSD_DTYPE_TDGSTCLUST32] = sizeof(struct voistatdata_tdgstclust32),
+ [VSD_DTYPE_TDGSTCLUST64] = sizeof(struct voistatdata_tdgstclust64),
+};
+
+static const bool vsd_compoundtype[VSD_NUM_DTYPES] = {
+ [VSD_DTYPE_VOISTATE] = true,
+ [VSD_DTYPE_INT_S32] = false,
+ [VSD_DTYPE_INT_U32] = false,
+ [VSD_DTYPE_INT_S64] = false,
+ [VSD_DTYPE_INT_U64] = false,
+ [VSD_DTYPE_INT_SLONG] = false,
+ [VSD_DTYPE_INT_ULONG] = false,
+ [VSD_DTYPE_Q_S32] = false,
+ [VSD_DTYPE_Q_U32] = false,
+ [VSD_DTYPE_Q_S64] = false,
+ [VSD_DTYPE_Q_U64] = false,
+ [VSD_DTYPE_CRHIST32] = true,
+ [VSD_DTYPE_DRHIST32] = true,
+ [VSD_DTYPE_DVHIST32] = true,
+ [VSD_DTYPE_CRHIST64] = true,
+ [VSD_DTYPE_DRHIST64] = true,
+ [VSD_DTYPE_DVHIST64] = true,
+ [VSD_DTYPE_TDGSTCLUST32] = true,
+ [VSD_DTYPE_TDGSTCLUST64] = true,
+};
+
+const struct voistatdata_numeric numeric_limits[2][VSD_DTYPE_Q_U64 + 1] = {
+ [LIM_MIN] = {
+ [VSD_DTYPE_VOISTATE] = {0},
+ [VSD_DTYPE_INT_S32] = {.int32 = {.s32 = INT32_MIN}},
+ [VSD_DTYPE_INT_U32] = {.int32 = {.u32 = 0}},
+ [VSD_DTYPE_INT_S64] = {.int64 = {.s64 = INT64_MIN}},
+ [VSD_DTYPE_INT_U64] = {.int64 = {.u64 = 0}},
+ [VSD_DTYPE_INT_SLONG] = {.intlong = {.slong = LONG_MIN}},
+ [VSD_DTYPE_INT_ULONG] = {.intlong = {.ulong = 0}},
+ [VSD_DTYPE_Q_S32] = {.q32 = {.sq32 = Q_IFMINVAL(INT32_MIN)}},
+ [VSD_DTYPE_Q_U32] = {.q32 = {.uq32 = 0}},
+ [VSD_DTYPE_Q_S64] = {.q64 = {.sq64 = Q_IFMINVAL(INT64_MIN)}},
+ [VSD_DTYPE_Q_U64] = {.q64 = {.uq64 = 0}},
+ },
+ [LIM_MAX] = {
+ [VSD_DTYPE_VOISTATE] = {0},
+ [VSD_DTYPE_INT_S32] = {.int32 = {.s32 = INT32_MAX}},
+ [VSD_DTYPE_INT_U32] = {.int32 = {.u32 = UINT32_MAX}},
+ [VSD_DTYPE_INT_S64] = {.int64 = {.s64 = INT64_MAX}},
+ [VSD_DTYPE_INT_U64] = {.int64 = {.u64 = UINT64_MAX}},
+ [VSD_DTYPE_INT_SLONG] = {.intlong = {.slong = LONG_MAX}},
+ [VSD_DTYPE_INT_ULONG] = {.intlong = {.ulong = ULONG_MAX}},
+ [VSD_DTYPE_Q_S32] = {.q32 = {.sq32 = Q_IFMAXVAL(INT32_MAX)}},
+ [VSD_DTYPE_Q_U32] = {.q32 = {.uq32 = Q_IFMAXVAL(UINT32_MAX)}},
+ [VSD_DTYPE_Q_S64] = {.q64 = {.sq64 = Q_IFMAXVAL(INT64_MAX)}},
+ [VSD_DTYPE_Q_U64] = {.q64 = {.uq64 = Q_IFMAXVAL(UINT64_MAX)}},
+ }
+};
+
+/* tpllistlock protects tpllist and ntpl */
+static uint32_t ntpl;
+static struct statsblob_tpl **tpllist;
+
+static inline void * stats_realloc(void *ptr, size_t oldsz, size_t newsz,
+ int flags);
+//static void stats_v1_blob_finalise(struct statsblobv1 *sb);
+static int stats_v1_blob_init_locked(struct statsblobv1 *sb, uint32_t tpl_id,
+ uint32_t flags);
+static int stats_v1_blob_expand(struct statsblobv1 **sbpp, int newvoibytes,
+ int newvoistatbytes, int newvoistatdatabytes);
+static void stats_v1_blob_iter(struct statsblobv1 *sb,
+ stats_v1_blob_itercb_t icb, void *usrctx, uint32_t flags);
+static inline int stats_v1_vsd_tdgst_add(enum vsd_dtype vs_dtype,
+ struct voistatdata_tdgst *tdgst, s64q_t x, uint64_t weight, int attempt);
+
+static inline int
+ctd32cmp(const struct voistatdata_tdgstctd32 *c1, const struct voistatdata_tdgstctd32 *c2)
+{
+
+ KASSERT(Q_PRECEQ(c1->mu, c2->mu),
+ ("%s: Q_RELPREC(c1->mu,c2->mu)=%d", __func__,
+ Q_RELPREC(c1->mu, c2->mu)));
+
+ return (Q_QLTQ(c1->mu, c2->mu) ? -1 : 1);
+}
+ARB_GENERATE_STATIC(ctdth32, voistatdata_tdgstctd32, ctdlnk, ctd32cmp);
+
+static inline int
+ctd64cmp(const struct voistatdata_tdgstctd64 *c1, const struct voistatdata_tdgstctd64 *c2)
+{
+
+ KASSERT(Q_PRECEQ(c1->mu, c2->mu),
+ ("%s: Q_RELPREC(c1->mu,c2->mu)=%d", __func__,
+ Q_RELPREC(c1->mu, c2->mu)));
+
+ return (Q_QLTQ(c1->mu, c2->mu) ? -1 : 1);
+}
+ARB_GENERATE_STATIC(ctdth64, voistatdata_tdgstctd64, ctdlnk, ctd64cmp);
+
+#ifdef DIAGNOSTIC
+RB_GENERATE_STATIC(rbctdth32, voistatdata_tdgstctd32, rblnk, ctd32cmp);
+RB_GENERATE_STATIC(rbctdth64, voistatdata_tdgstctd64, rblnk, ctd64cmp);
+#endif
+
+static inline sbintime_t
+stats_sbinuptime(void)
+{
+ sbintime_t sbt;
+#ifdef _KERNEL
+
+ sbt = sbinuptime();
+#else /* ! _KERNEL */
+ struct timespec tp;
+
+ clock_gettime(CLOCK_MONOTONIC_FAST, &tp);
+ sbt = tstosbt(tp);
+#endif /* _KERNEL */
+
+ return (sbt);
+}
+
+static inline void *
+stats_realloc(void *ptr, size_t oldsz, size_t newsz, int flags)
+{
+
+#ifdef _KERNEL
+ /* Default to M_NOWAIT if neither M_NOWAIT or M_WAITOK are set. */
+ if (!(flags & (M_WAITOK | M_NOWAIT)))
+ flags |= M_NOWAIT;
+ ptr = realloc(ptr, newsz, M_STATS, flags);
+#else /* ! _KERNEL */
+ ptr = realloc(ptr, newsz);
+ if ((flags & M_ZERO) && ptr != NULL) {
+ if (oldsz == 0)
+ memset(ptr, '\0', newsz);
+ else if (newsz > oldsz)
+ memset(BLOB_OFFSET(ptr, oldsz), '\0', newsz - oldsz);
+ }
+#endif /* _KERNEL */
+
+ return (ptr);
+}
+
+static inline char *
+stats_strdup(const char *s,
+#ifdef _KERNEL
+ int flags)
+{
+ char *copy;
+ size_t len;
+
+ if (!(flags & (M_WAITOK | M_NOWAIT)))
+ flags |= M_NOWAIT;
+
+ len = strlen(s) + 1;
+ if ((copy = malloc(len, M_STATS, flags)) != NULL)
+ bcopy(s, copy, len);
+
+ return (copy);
+#else
+ int flags __unused)
+{
+ return (strdup(s));
+#endif
+}
+
+static inline void
+stats_tpl_update_hash(struct statsblob_tpl *tpl)
+{
+
+ TPL_LIST_WLOCK_ASSERT();
+ tpl->mb->tplhash = hash32_str(tpl->mb->tplname, 0);
+ for (int voi_id = 0; voi_id < NVOIS(tpl->sb); voi_id++) {
+ if (tpl->mb->voi_meta[voi_id].name != NULL)
+ tpl->mb->tplhash = hash32_str(
+ tpl->mb->voi_meta[voi_id].name, tpl->mb->tplhash);
+ }
+ tpl->mb->tplhash = hash32_buf(tpl->sb, tpl->sb->cursz,
+ tpl->mb->tplhash);
+}
+
+static inline uint64_t
+stats_pow_u64(uint64_t base, uint64_t exp)
+{
+ uint64_t result = 1;
+
+ while (exp) {
+ if (exp & 1)
+ result *= base;
+ exp >>= 1;
+ base *= base;
+ }
+
+ return (result);
+}
+
+static inline int
+stats_vss_hist_bkt_hlpr(struct vss_hist_hlpr_info *info, uint32_t curbkt,
+ struct voistatdata_numeric *bkt_lb, struct voistatdata_numeric *bkt_ub)
+{
+ uint64_t step = 0;
+ int error = 0;
+
+ switch (info->scheme) {
+ case BKT_LIN:
+ step = info->lin.stepinc;
+ break;
+ case BKT_EXP:
+ step = stats_pow_u64(info->exp.stepbase,
+ info->exp.stepexp + curbkt);
+ break;
+ case BKT_LINEXP:
+ {
+ uint64_t curstepexp = 1;
+
+ switch (info->voi_dtype) {
+ case VSD_DTYPE_INT_S32:
+ while ((int32_t)stats_pow_u64(info->linexp.stepbase,
+ curstepexp) <= bkt_lb->int32.s32)
+ curstepexp++;
+ break;
+ case VSD_DTYPE_INT_U32:
+ while ((uint32_t)stats_pow_u64(info->linexp.stepbase,
+ curstepexp) <= bkt_lb->int32.u32)
+ curstepexp++;
+ break;
+ case VSD_DTYPE_INT_S64:
+ while ((int64_t)stats_pow_u64(info->linexp.stepbase,
+ curstepexp) <= bkt_lb->int64.s64)
+ curstepexp++;
+ break;
+ case VSD_DTYPE_INT_U64:
+ while ((uint64_t)stats_pow_u64(info->linexp.stepbase,
+ curstepexp) <= bkt_lb->int64.u64)
+ curstepexp++;
+ break;
+ case VSD_DTYPE_INT_SLONG:
+ while ((long)stats_pow_u64(info->linexp.stepbase,
+ curstepexp) <= bkt_lb->intlong.slong)
+ curstepexp++;
+ break;
+ case VSD_DTYPE_INT_ULONG:
+ while ((unsigned long)stats_pow_u64(info->linexp.stepbase,
+ curstepexp) <= bkt_lb->intlong.ulong)
+ curstepexp++;
+ break;
+ case VSD_DTYPE_Q_S32:
+ while ((s32q_t)stats_pow_u64(info->linexp.stepbase,
+ curstepexp) <= Q_GIVAL(bkt_lb->q32.sq32))
+ break;
+ case VSD_DTYPE_Q_U32:
+ while ((u32q_t)stats_pow_u64(info->linexp.stepbase,
+ curstepexp) <= Q_GIVAL(bkt_lb->q32.uq32))
+ break;
+ case VSD_DTYPE_Q_S64:
+ while ((s64q_t)stats_pow_u64(info->linexp.stepbase,
+ curstepexp) <= Q_GIVAL(bkt_lb->q64.sq64))
+ curstepexp++;
+ break;
+ case VSD_DTYPE_Q_U64:
+ while ((u64q_t)stats_pow_u64(info->linexp.stepbase,
+ curstepexp) <= Q_GIVAL(bkt_lb->q64.uq64))
+ curstepexp++;
+ break;
+ default:
+ break;
+ }
+
+ step = stats_pow_u64(info->linexp.stepbase, curstepexp) /
+ info->linexp.linstepdiv;
+ if (step == 0)
+ step = 1;
+ break;
+ }
+ default:
+ break;
+ }
+
+ if (info->scheme == BKT_USR) {
+ *bkt_lb = info->usr.bkts[curbkt].lb;
+ *bkt_ub = info->usr.bkts[curbkt].ub;
+ } else if (step != 0) {
+ switch (info->voi_dtype) {
+ case VSD_DTYPE_INT_S32:
+ bkt_ub->int32.s32 += (int32_t)step;
+ break;
+ case VSD_DTYPE_INT_U32:
+ bkt_ub->int32.u32 += (uint32_t)step;
+ break;
+ case VSD_DTYPE_INT_S64:
+ bkt_ub->int64.s64 += (int64_t)step;
+ break;
+ case VSD_DTYPE_INT_U64:
+ bkt_ub->int64.u64 += (uint64_t)step;
+ break;
+ case VSD_DTYPE_INT_SLONG:
+ bkt_ub->intlong.slong += (long)step;
+ break;
+ case VSD_DTYPE_INT_ULONG:
+ bkt_ub->intlong.ulong += (unsigned long)step;
+ break;
+ case VSD_DTYPE_Q_S32:
+ error = Q_QADDI(&bkt_ub->q32.sq32, step);
+ break;
+ case VSD_DTYPE_Q_U32:
+ error = Q_QADDI(&bkt_ub->q32.uq32, step);
+ break;
+ case VSD_DTYPE_Q_S64:
+ error = Q_QADDI(&bkt_ub->q64.sq64, step);
+ break;
+ case VSD_DTYPE_Q_U64:
+ error = Q_QADDI(&bkt_ub->q64.uq64, step);
+ break;
+ default:
+ break;
+ }
+ } else { /* info->scheme != BKT_USR && step == 0 */
+ return (EINVAL);
+ }
+
+ return (error);
+}
+
+static uint32_t
+stats_vss_hist_nbkts_hlpr(struct vss_hist_hlpr_info *info)
+{
+ struct voistatdata_numeric bkt_lb, bkt_ub;
+ uint32_t nbkts;
+ int done;
+
+ if (info->scheme == BKT_USR) {
+ /* XXXLAS: Setting info->{lb,ub} from macro is tricky. */
+ info->lb = info->usr.bkts[0].lb;
+ info->ub = info->usr.bkts[info->usr.nbkts - 1].lb;
+ }
+
+ nbkts = 0;
+ done = 0;
+ bkt_ub = info->lb;
+
+ do {
+ bkt_lb = bkt_ub;
+ if (stats_vss_hist_bkt_hlpr(info, nbkts++, &bkt_lb, &bkt_ub))
+ return (0);
+
+ if (info->scheme == BKT_USR)
+ done = (nbkts == info->usr.nbkts);
+ else {
+ switch (info->voi_dtype) {
+ case VSD_DTYPE_INT_S32:
+ done = (bkt_ub.int32.s32 > info->ub.int32.s32);
+ break;
+ case VSD_DTYPE_INT_U32:
+ done = (bkt_ub.int32.u32 > info->ub.int32.u32);
+ break;
+ case VSD_DTYPE_INT_S64:
+ done = (bkt_ub.int64.s64 > info->ub.int64.s64);
+ break;
+ case VSD_DTYPE_INT_U64:
+ done = (bkt_ub.int64.u64 > info->ub.int64.u64);
+ break;
+ case VSD_DTYPE_INT_SLONG:
+ done = (bkt_ub.intlong.slong >
+ info->ub.intlong.slong);
+ break;
+ case VSD_DTYPE_INT_ULONG:
+ done = (bkt_ub.intlong.ulong >
+ info->ub.intlong.ulong);
+ break;
+ case VSD_DTYPE_Q_S32:
+ done = Q_QGTQ(bkt_ub.q32.sq32,
+ info->ub.q32.sq32);
+ break;
+ case VSD_DTYPE_Q_U32:
+ done = Q_QGTQ(bkt_ub.q32.uq32,
+ info->ub.q32.uq32);
+ break;
+ case VSD_DTYPE_Q_S64:
+ done = Q_QGTQ(bkt_ub.q64.sq64,
+ info->ub.q64.sq64);
+ break;
+ case VSD_DTYPE_Q_U64:
+ done = Q_QGTQ(bkt_ub.q64.uq64,
+ info->ub.q64.uq64);
+ break;
+ default:
+ return (0);
+ }
+ }
+ } while (!done);
+
+ if (info->flags & VSD_HIST_LBOUND_INF)
+ nbkts++;
+ if (info->flags & VSD_HIST_UBOUND_INF)
+ nbkts++;
+
+ return (nbkts);
+}
+
+int
+stats_vss_hist_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
+ struct vss_hist_hlpr_info *info)
+{
+ struct voistatdata_hist *hist;
+ struct voistatdata_numeric bkt_lb, bkt_ub, *lbinfbktlb, *lbinfbktub,
+ *ubinfbktlb, *ubinfbktub;
+ uint32_t bkt, nbkts, nloop;
+
+ if (vss == NULL || info == NULL || (info->flags &
+ (VSD_HIST_LBOUND_INF|VSD_HIST_UBOUND_INF) && (info->hist_dtype ==
+ VSD_DTYPE_DVHIST32 || info->hist_dtype == VSD_DTYPE_DVHIST64)))
+ return (EINVAL);
+
+ info->voi_dtype = voi_dtype;
+
+ if ((nbkts = stats_vss_hist_nbkts_hlpr(info)) == 0)
+ return (EINVAL);
+
+ switch (info->hist_dtype) {
+ case VSD_DTYPE_CRHIST32:
+ vss->vsdsz = HIST_NBKTS2VSDSZ(crhist32, nbkts);
+ break;
+ case VSD_DTYPE_DRHIST32:
+ vss->vsdsz = HIST_NBKTS2VSDSZ(drhist32, nbkts);
+ break;
+ case VSD_DTYPE_DVHIST32:
+ vss->vsdsz = HIST_NBKTS2VSDSZ(dvhist32, nbkts);
+ break;
+ case VSD_DTYPE_CRHIST64:
+ vss->vsdsz = HIST_NBKTS2VSDSZ(crhist64, nbkts);
+ break;
+ case VSD_DTYPE_DRHIST64:
+ vss->vsdsz = HIST_NBKTS2VSDSZ(drhist64, nbkts);
+ break;
+ case VSD_DTYPE_DVHIST64:
+ vss->vsdsz = HIST_NBKTS2VSDSZ(dvhist64, nbkts);
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ vss->iv = stats_realloc(NULL, 0, vss->vsdsz, M_ZERO);
+ if (vss->iv == NULL)
+ return (ENOMEM);
+
+ hist = (struct voistatdata_hist *)vss->iv;
+ bkt_ub = info->lb;
+
+ for (bkt = (info->flags & VSD_HIST_LBOUND_INF), nloop = 0;
+ bkt < nbkts;
+ bkt++, nloop++) {
+ bkt_lb = bkt_ub;
+ if (stats_vss_hist_bkt_hlpr(info, nloop, &bkt_lb, &bkt_ub))
+ return (EINVAL);
+
+ switch (info->hist_dtype) {
+ case VSD_DTYPE_CRHIST32:
+ VSD(crhist32, hist)->bkts[bkt].lb = bkt_lb;
+ break;
+ case VSD_DTYPE_DRHIST32:
+ VSD(drhist32, hist)->bkts[bkt].lb = bkt_lb;
+ VSD(drhist32, hist)->bkts[bkt].ub = bkt_ub;
+ break;
+ case VSD_DTYPE_DVHIST32:
+ VSD(dvhist32, hist)->bkts[bkt].val = bkt_lb;
+ break;
+ case VSD_DTYPE_CRHIST64:
+ VSD(crhist64, hist)->bkts[bkt].lb = bkt_lb;
+ break;
+ case VSD_DTYPE_DRHIST64:
+ VSD(drhist64, hist)->bkts[bkt].lb = bkt_lb;
+ VSD(drhist64, hist)->bkts[bkt].ub = bkt_ub;
+ break;
+ case VSD_DTYPE_DVHIST64:
+ VSD(dvhist64, hist)->bkts[bkt].val = bkt_lb;
+ break;
+ default:
+ return (EINVAL);
+ }
+ }
+
+ lbinfbktlb = lbinfbktub = ubinfbktlb = ubinfbktub = NULL;
+
+ switch (info->hist_dtype) {
+ case VSD_DTYPE_CRHIST32:
+ lbinfbktlb = &VSD(crhist32, hist)->bkts[0].lb;
+ ubinfbktlb = &VSD(crhist32, hist)->bkts[nbkts - 1].lb;
+ break;
+ case VSD_DTYPE_DRHIST32:
+ lbinfbktlb = &VSD(drhist32, hist)->bkts[0].lb;
+ lbinfbktub = &VSD(drhist32, hist)->bkts[0].ub;
+ ubinfbktlb = &VSD(drhist32, hist)->bkts[nbkts - 1].lb;
+ ubinfbktub = &VSD(drhist32, hist)->bkts[nbkts - 1].ub;
+ break;
+ case VSD_DTYPE_CRHIST64:
+ lbinfbktlb = &VSD(crhist64, hist)->bkts[0].lb;
+ ubinfbktlb = &VSD(crhist64, hist)->bkts[nbkts - 1].lb;
+ break;
+ case VSD_DTYPE_DRHIST64:
+ lbinfbktlb = &VSD(drhist64, hist)->bkts[0].lb;
+ lbinfbktub = &VSD(drhist64, hist)->bkts[0].ub;
+ ubinfbktlb = &VSD(drhist64, hist)->bkts[nbkts - 1].lb;
+ ubinfbktub = &VSD(drhist64, hist)->bkts[nbkts - 1].ub;
+ break;
+ case VSD_DTYPE_DVHIST32:
+ case VSD_DTYPE_DVHIST64:
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ if ((info->flags & VSD_HIST_LBOUND_INF) && lbinfbktlb) {
+ *lbinfbktlb = numeric_limits[LIM_MIN][info->voi_dtype];
+ /*
+ * Assignment from numeric_limit array for Q types assigns max
+ * possible integral/fractional value for underlying data type,
+ * but we must set control bits for this specific histogram per
+ * the user's choice of fractional bits, which we extract from
+ * info->lb.
+ */
+ if (info->voi_dtype == VSD_DTYPE_Q_S32 ||
+ info->voi_dtype == VSD_DTYPE_Q_U32) {
+ /* Signedness doesn't matter for setting control bits. */
+ Q_SCVAL(lbinfbktlb->q32.sq32,
+ Q_GCVAL(info->lb.q32.sq32));
+ } else if (info->voi_dtype == VSD_DTYPE_Q_S64 ||
+ info->voi_dtype == VSD_DTYPE_Q_U64) {
+ /* Signedness doesn't matter for setting control bits. */
+ Q_SCVAL(lbinfbktlb->q64.sq64,
+ Q_GCVAL(info->lb.q64.sq64));
+ }
+ if (lbinfbktub)
+ *lbinfbktub = info->lb;
+ }
+ if ((info->flags & VSD_HIST_UBOUND_INF) && ubinfbktlb) {
+ *ubinfbktlb = bkt_lb;
+ if (ubinfbktub) {
+ *ubinfbktub = numeric_limits[LIM_MAX][info->voi_dtype];
+ if (info->voi_dtype == VSD_DTYPE_Q_S32 ||
+ info->voi_dtype == VSD_DTYPE_Q_U32) {
+ Q_SCVAL(ubinfbktub->q32.sq32,
+ Q_GCVAL(info->lb.q32.sq32));
+ } else if (info->voi_dtype == VSD_DTYPE_Q_S64 ||
+ info->voi_dtype == VSD_DTYPE_Q_U64) {
+ Q_SCVAL(ubinfbktub->q64.sq64,
+ Q_GCVAL(info->lb.q64.sq64));
+ }
+ }
+ }
+
+ return (0);
+}
+
+int
+stats_vss_tdgst_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
+ struct vss_tdgst_hlpr_info *info)
+{
+ struct voistatdata_tdgst *tdgst;
+ struct ctdth32 *ctd32tree;
+ struct ctdth64 *ctd64tree;
+ struct voistatdata_tdgstctd32 *ctd32;
+ struct voistatdata_tdgstctd64 *ctd64;
+
+ info->voi_dtype = voi_dtype;
+
+ switch (info->tdgst_dtype) {
+ case VSD_DTYPE_TDGSTCLUST32:
+ vss->vsdsz = TDGST_NCTRS2VSDSZ(tdgstclust32, info->nctds);
+ break;
+ case VSD_DTYPE_TDGSTCLUST64:
+ vss->vsdsz = TDGST_NCTRS2VSDSZ(tdgstclust64, info->nctds);
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ vss->iv = stats_realloc(NULL, 0, vss->vsdsz, M_ZERO);
+ if (vss->iv == NULL)
+ return (ENOMEM);
+
+ tdgst = (struct voistatdata_tdgst *)vss->iv;
+
+ switch (info->tdgst_dtype) {
+ case VSD_DTYPE_TDGSTCLUST32:
+ ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
+ ARB_INIT(ctd32, ctdlnk, ctd32tree, info->nctds) {
+ Q_INI(&ctd32->mu, 0, 0, info->prec);
+ }
+ break;
+ case VSD_DTYPE_TDGSTCLUST64:
+ ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
+ ARB_INIT(ctd64, ctdlnk, ctd64tree, info->nctds) {
+ Q_INI(&ctd64->mu, 0, 0, info->prec);
+ }
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
+int
+stats_vss_numeric_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
+ struct vss_numeric_hlpr_info *info)
+{
+ struct voistatdata_numeric iv;
+
+ switch (vss->stype) {
+ case VS_STYPE_SUM:
+ iv = stats_ctor_vsd_numeric(0);
+ break;
+ case VS_STYPE_MIN:
+ iv = numeric_limits[LIM_MAX][voi_dtype];
+ break;
+ case VS_STYPE_MAX:
+ iv = numeric_limits[LIM_MIN][voi_dtype];
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ vss->iv = stats_realloc(NULL, 0, vsd_dtype2size[voi_dtype], 0);
+ if (vss->iv == NULL)
+ return (ENOMEM);
+
+ vss->vs_dtype = voi_dtype;
+ vss->vsdsz = vsd_dtype2size[voi_dtype];
+ switch (voi_dtype) {
+ case VSD_DTYPE_INT_S32:
+ *((int32_t *)vss->iv) = iv.int32.s32;
+ break;
+ case VSD_DTYPE_INT_U32:
+ *((uint32_t *)vss->iv) = iv.int32.u32;
+ break;
+ case VSD_DTYPE_INT_S64:
+ *((int64_t *)vss->iv) = iv.int64.s64;
+ break;
+ case VSD_DTYPE_INT_U64:
+ *((uint64_t *)vss->iv) = iv.int64.u64;
+ break;
+ case VSD_DTYPE_INT_SLONG:
+ *((long *)vss->iv) = iv.intlong.slong;
+ break;
+ case VSD_DTYPE_INT_ULONG:
+ *((unsigned long *)vss->iv) = iv.intlong.ulong;
+ break;
+ case VSD_DTYPE_Q_S32:
+ *((s32q_t *)vss->iv) = Q_SCVAL(iv.q32.sq32,
+ Q_CTRLINI(info->prec));
+ break;
+ case VSD_DTYPE_Q_U32:
+ *((u32q_t *)vss->iv) = Q_SCVAL(iv.q32.uq32,
+ Q_CTRLINI(info->prec));
+ break;
+ case VSD_DTYPE_Q_S64:
+ *((s64q_t *)vss->iv) = Q_SCVAL(iv.q64.sq64,
+ Q_CTRLINI(info->prec));
+ break;
+ case VSD_DTYPE_Q_U64:
+ *((u64q_t *)vss->iv) = Q_SCVAL(iv.q64.uq64,
+ Q_CTRLINI(info->prec));
+ break;
+ default:
+ break;
+ }
+
+ return (0);
+}
+
+int
+stats_vss_hlpr_init(enum vsd_dtype voi_dtype, uint32_t nvss,
+ struct voistatspec *vss)
+{
+ int i, ret;
+
+ for (i = nvss - 1; i >= 0; i--) {
+ if (vss[i].hlpr && (ret = vss[i].hlpr(voi_dtype, &vss[i],
+ vss[i].hlprinfo)) != 0)
+ return (ret);
+ }
+
+ return (0);
+}
+
+void
+stats_vss_hlpr_cleanup(uint32_t nvss, struct voistatspec *vss)
+{
+ int i;
+
+ for (i = nvss - 1; i >= 0; i--) {
+ if (vss[i].hlpr) {
+ stats_free((void *)vss[i].iv);
+ vss[i].iv = NULL;
+ }
+ }
+}
+
+int
+stats_tpl_fetch(int tpl_id, struct statsblob_tpl **tpl)
+{
+ int error;
+
+ error = 0;
+
+ TPL_LIST_WLOCK();
+ if (tpl_id < 0 || tpl_id >= (int)ntpl) {
+ error = ENOENT;
+ } else {
+ *tpl = tpllist[tpl_id];
+ /* XXXLAS: Acquire refcount on tpl. */
+ }
+ TPL_LIST_WUNLOCK();
+
+ return (error);
+}
+
+int
+stats_tpl_fetch_allocid(const char *name, uint32_t hash)
+{
+ int i, tpl_id;
+
+ tpl_id = -ESRCH;
+
+ TPL_LIST_RLOCK();
+ for (i = ntpl - 1; i >= 0; i--) {
+ if (name != NULL) {
+ if (strlen(name) == strlen(tpllist[i]->mb->tplname) &&
+ strncmp(name, tpllist[i]->mb->tplname,
+ TPL_MAX_NAME_LEN) == 0 && (!hash || hash ==
+ tpllist[i]->mb->tplhash)) {
+ tpl_id = i;
+ break;
+ }
+ } else if (hash == tpllist[i]->mb->tplhash) {
+ tpl_id = i;
+ break;
+ }
+ }
+ TPL_LIST_RUNLOCK();
+
+ return (tpl_id);
+}
+
+int
+stats_tpl_id2name(uint32_t tpl_id, char *buf, size_t len)
+{
+ int error;
+
+ error = 0;
+
+ TPL_LIST_RLOCK();
+ if (tpl_id < ntpl) {
+ if (buf != NULL && len > strlen(tpllist[tpl_id]->mb->tplname))
+ strlcpy(buf, tpllist[tpl_id]->mb->tplname, len);
+ else
+ error = EOVERFLOW;
+ } else
+ error = ENOENT;
+ TPL_LIST_RUNLOCK();
+
+ return (error);
+}
+
+int
+stats_tpl_sample_rollthedice(struct stats_tpl_sample_rate *rates, int nrates,
+ void *seed_bytes, size_t seed_len)
+{
+ uint32_t cum_pct, rnd_pct;
+ int i;
+
+ cum_pct = 0;
+
+ /*
+ * Choose a pseudorandom or seeded number in range [0,100] and use
+ * it to make a sampling decision and template selection where required.
+ * If no seed is supplied, a PRNG is used to generate a pseudorandom
+ * number so that every selection is independent. If a seed is supplied,
+ * the caller desires random selection across different seeds, but
+ * deterministic selection given the same seed. This is achieved by
+ * hashing the seed and using the hash as the random number source.
+ *
+ * XXXLAS: Characterise hash function output distribution.
+ */
+ if (seed_bytes == NULL)
+ rnd_pct = random() / (INT32_MAX / 100);
+ else
+ rnd_pct = hash32_buf(seed_bytes, seed_len, 0) /
+ (UINT32_MAX / 100U);
+
+ /*
+ * We map the randomly selected percentage on to the interval [0,100]
+ * consisting of the cumulatively summed template sampling percentages.
+ * The difference between the cumulative sum of all template sampling
+ * percentages and 100 is treated as a NULL assignment i.e. no stats
+ * template will be assigned, and -1 returned instead.
+ */
+ for (i = 0; i < nrates; i++) {
+ cum_pct += rates[i].tpl_sample_pct;
+
+ KASSERT(cum_pct <= 100, ("%s cum_pct %u > 100", __func__,
+ cum_pct));
+ if (rnd_pct > cum_pct || rates[i].tpl_sample_pct == 0)
+ continue;
+
+ return (rates[i].tpl_slot_id);
+ }
+
+ return (-1);
+}
+
+int
+stats_v1_blob_clone(struct statsblobv1 **dst, size_t dstmaxsz,
+ struct statsblobv1 *src, uint32_t flags)
+{
+ int error;
+
+ error = 0;
+
+ if (src == NULL || dst == NULL ||
+ src->cursz < sizeof(struct statsblob) ||
+ ((flags & SB_CLONE_ALLOCDST) &&
+ (flags & (SB_CLONE_USRDSTNOFAULT | SB_CLONE_USRDST)))) {
+ error = EINVAL;
+ } else if (flags & SB_CLONE_ALLOCDST) {
+ *dst = stats_realloc(NULL, 0, src->cursz, 0);
+ if (*dst)
+ (*dst)->maxsz = dstmaxsz = src->cursz;
+ else
+ error = ENOMEM;
+ } else if (*dst == NULL || dstmaxsz < sizeof(struct statsblob)) {
+ error = EINVAL;
+ }
+
+ if (!error) {
+ size_t postcurszlen;
+
+ /*
+ * Clone src into dst except for the maxsz field. If dst is too
+ * small to hold all of src, only copy src's header and return
+ * EOVERFLOW.
+ */
+#ifdef _KERNEL
+ if (flags & SB_CLONE_USRDSTNOFAULT)
+ copyout_nofault(src, *dst,
+ offsetof(struct statsblob, maxsz));
+ else if (flags & SB_CLONE_USRDST)
+ copyout(src, *dst, offsetof(struct statsblob, maxsz));
+ else
+#endif
+ memcpy(*dst, src, offsetof(struct statsblob, maxsz));
+
+ if (dstmaxsz >= src->cursz) {
+ postcurszlen = src->cursz -
+ offsetof(struct statsblob, cursz);
+ } else {
+ error = EOVERFLOW;
+ postcurszlen = sizeof(struct statsblob) -
+ offsetof(struct statsblob, cursz);
+ }
+#ifdef _KERNEL
+ if (flags & SB_CLONE_USRDSTNOFAULT)
+ copyout_nofault(&(src->cursz), &((*dst)->cursz),
+ postcurszlen);
+ else if (flags & SB_CLONE_USRDST)
+ copyout(&(src->cursz), &((*dst)->cursz), postcurszlen);
+ else
+#endif
+ memcpy(&((*dst)->cursz), &(src->cursz), postcurszlen);
+ }
+
+ return (error);
+}
+
+int
+stats_v1_tpl_alloc(const char *name, uint32_t flags __unused)
+{
+ struct statsblobv1_tpl *tpl, **newtpllist;
+ struct statsblobv1 *tpl_sb;
+ struct metablob *tpl_mb;
+ int tpl_id;
+
+ if (name != NULL && strlen(name) > TPL_MAX_NAME_LEN)
+ return (-EINVAL);
+
+ if (name != NULL && stats_tpl_fetch_allocid(name, 0) >= 0)
+ return (-EEXIST);
+
+ tpl = stats_realloc(NULL, 0, sizeof(struct statsblobv1_tpl), M_ZERO);
+ tpl_mb = stats_realloc(NULL, 0, sizeof(struct metablob), M_ZERO);
+ tpl_sb = stats_realloc(NULL, 0, sizeof(struct statsblobv1), M_ZERO);
+
+ if (tpl_mb != NULL && name != NULL)
+ tpl_mb->tplname = stats_strdup(name, 0);
+
+ if (tpl == NULL || tpl_sb == NULL || tpl_mb == NULL ||
+ tpl_mb->tplname == NULL) {
+ stats_free(tpl);
+ stats_free(tpl_sb);
+ if (tpl_mb != NULL) {
+ stats_free(tpl_mb->tplname);
+ stats_free(tpl_mb);
+ }
+ return (-ENOMEM);
+ }
+
+ tpl->mb = tpl_mb;
+ tpl->sb = tpl_sb;
+
+ tpl_sb->abi = STATS_ABI_V1;
+ tpl_sb->endian =
+#if BYTE_ORDER == LITTLE_ENDIAN
+ SB_LE;
+#elif BYTE_ORDER == BIG_ENDIAN
+ SB_BE;
+#else
+ SB_UE;
+#endif
+ tpl_sb->cursz = tpl_sb->maxsz = sizeof(struct statsblobv1);
+ tpl_sb->stats_off = tpl_sb->statsdata_off = sizeof(struct statsblobv1);
+
+ TPL_LIST_WLOCK();
+ newtpllist = stats_realloc(tpllist, ntpl * sizeof(void *),
+ (ntpl + 1) * sizeof(void *), 0);
+ if (newtpllist != NULL) {
+ tpl_id = ntpl++;
+ tpllist = (struct statsblob_tpl **)newtpllist;
+ tpllist[tpl_id] = (struct statsblob_tpl *)tpl;
+ stats_tpl_update_hash(tpllist[tpl_id]);
+ } else {
+ stats_free(tpl);
+ stats_free(tpl_sb);
+ if (tpl_mb != NULL) {
+ stats_free(tpl_mb->tplname);
+ stats_free(tpl_mb);
+ }
+ tpl_id = -ENOMEM;
+ }
+ TPL_LIST_WUNLOCK();
+
+ return (tpl_id);
+}
+
+int
+stats_v1_tpl_add_voistats(uint32_t tpl_id, int32_t voi_id, const char *voi_name,
+ enum vsd_dtype voi_dtype, uint32_t nvss, struct voistatspec *vss,
+ uint32_t flags)
+{
+ struct voi *voi;
+ struct voistat *tmpstat;
+ struct statsblobv1 *tpl_sb;
+ struct metablob *tpl_mb;
+ int error, i, newstatdataidx, newvoibytes, newvoistatbytes,
+ newvoistatdatabytes, newvoistatmaxid;
+ uint32_t nbytes;
+
+ if (voi_id < 0 || voi_dtype == 0 || voi_dtype >= VSD_NUM_DTYPES ||
+ nvss == 0 || vss == NULL)
+ return (EINVAL);
+
+ error = nbytes = newvoibytes = newvoistatbytes =
+ newvoistatdatabytes = 0;
+ newvoistatmaxid = -1;
+
+ /* Calculate the number of bytes required for the new voistats. */
+ for (i = nvss - 1; i >= 0; i--) {
+ if (vss[i].stype == 0 || vss[i].stype >= VS_NUM_STYPES ||
+ vss[i].vs_dtype == 0 || vss[i].vs_dtype >= VSD_NUM_DTYPES ||
+ vss[i].iv == NULL || vss[i].vsdsz == 0)
+ return (EINVAL);
+ if ((int)vss[i].stype > newvoistatmaxid)
+ newvoistatmaxid = vss[i].stype;
+ newvoistatdatabytes += vss[i].vsdsz;
+ }
+
+ if (flags & SB_VOI_RELUPDATE) {
+ /* XXXLAS: VOI state bytes may need to vary based on stat types. */
+ newvoistatdatabytes += sizeof(struct voistatdata_voistate);
+ }
+ nbytes += newvoistatdatabytes;
+
+ TPL_LIST_WLOCK();
+ if (tpl_id < ntpl) {
+ tpl_sb = (struct statsblobv1 *)tpllist[tpl_id]->sb;
+ tpl_mb = tpllist[tpl_id]->mb;
+
+ if (voi_id >= NVOIS(tpl_sb) || tpl_sb->vois[voi_id].id == -1) {
+ /* Adding a new VOI and associated stats. */
+ if (voi_id >= NVOIS(tpl_sb)) {
+ /* We need to grow the tpl_sb->vois array. */
+ newvoibytes = (voi_id - (NVOIS(tpl_sb) - 1)) *
+ sizeof(struct voi);
+ nbytes += newvoibytes;
+ }
+ newvoistatbytes =
+ (newvoistatmaxid + 1) * sizeof(struct voistat);
+ } else {
+ /* Adding stats to an existing VOI. */
+ if (newvoistatmaxid >
+ tpl_sb->vois[voi_id].voistatmaxid) {
+ newvoistatbytes = (newvoistatmaxid -
+ tpl_sb->vois[voi_id].voistatmaxid) *
+ sizeof(struct voistat);
+ }
+ /* XXXLAS: KPI does not yet support expanding VOIs. */
+ error = EOPNOTSUPP;
+ }
+ nbytes += newvoistatbytes;
+
+ if (!error && newvoibytes > 0) {
+ struct voi_meta *voi_meta = tpl_mb->voi_meta;
+
+ voi_meta = stats_realloc(voi_meta, voi_meta == NULL ?
+ 0 : NVOIS(tpl_sb) * sizeof(struct voi_meta),
+ (1 + voi_id) * sizeof(struct voi_meta),
+ M_ZERO);
+
+ if (voi_meta == NULL)
+ error = ENOMEM;
+ else
+ tpl_mb->voi_meta = voi_meta;
+ }
+
+ if (!error) {
+ /* NB: Resizing can change where tpl_sb points. */
+ error = stats_v1_blob_expand(&tpl_sb, newvoibytes,
+ newvoistatbytes, newvoistatdatabytes);
+ }
+
+ if (!error) {
+ tpl_mb->voi_meta[voi_id].name = stats_strdup(voi_name,
+ 0);
+ if (tpl_mb->voi_meta[voi_id].name == NULL)
+ error = ENOMEM;
+ }
+
+ if (!error) {
+ /* Update the template list with the resized pointer. */
+ tpllist[tpl_id]->sb = (struct statsblob *)tpl_sb;
+
+ /* Update the template. */
+ voi = &tpl_sb->vois[voi_id];
+
+ if (voi->id < 0) {
+ /* VOI is new and needs to be initialised. */
+ voi->id = voi_id;
+ voi->dtype = voi_dtype;
+ voi->stats_off = tpl_sb->stats_off;
+ if (flags & SB_VOI_RELUPDATE)
+ voi->flags |= VOI_REQSTATE;
+ } else {
+ /*
+ * XXXLAS: When this else block is written, the
+ * "KPI does not yet support expanding VOIs"
+ * error earlier in this function can be
+ * removed. What is required here is to shuffle
+ * the voistat array such that the new stats for
+ * the voi are contiguous, which will displace
+ * stats for other vois that reside after the
+ * voi being updated. The other vois then need
+ * to have their stats_off adjusted post
+ * shuffle.
+ */
+ }
+
+ voi->voistatmaxid = newvoistatmaxid;
+ newstatdataidx = 0;
+
+ if (voi->flags & VOI_REQSTATE) {
+ /* Initialise the voistate stat in slot 0. */
+ tmpstat = BLOB_OFFSET(tpl_sb, voi->stats_off);
+ tmpstat->stype = VS_STYPE_VOISTATE;
+ tmpstat->flags = 0;
+ tmpstat->dtype = VSD_DTYPE_VOISTATE;
+ newstatdataidx = tmpstat->dsz =
+ sizeof(struct voistatdata_numeric);
+ tmpstat->data_off = tpl_sb->statsdata_off;
+ }
+
+ for (i = 0; (uint32_t)i < nvss; i++) {
+ tmpstat = BLOB_OFFSET(tpl_sb, voi->stats_off +
+ (vss[i].stype * sizeof(struct voistat)));
+ KASSERT(tmpstat->stype < 0, ("voistat %p "
+ "already initialised", tmpstat));
+ tmpstat->stype = vss[i].stype;
+ tmpstat->flags = vss[i].flags;
+ tmpstat->dtype = vss[i].vs_dtype;
+ tmpstat->dsz = vss[i].vsdsz;
+ tmpstat->data_off = tpl_sb->statsdata_off +
+ newstatdataidx;
+ memcpy(BLOB_OFFSET(tpl_sb, tmpstat->data_off),
+ vss[i].iv, vss[i].vsdsz);
+ newstatdataidx += vss[i].vsdsz;
+ }
+
+ /* Update the template version hash. */
+ stats_tpl_update_hash(tpllist[tpl_id]);
+ /* XXXLAS: Confirm tpl name/hash pair remains unique. */
+ }
+ } else
+ error = EINVAL;
+ TPL_LIST_WUNLOCK();
+
+ return (error);
+}
+
+struct statsblobv1 *
+stats_v1_blob_alloc(uint32_t tpl_id, uint32_t flags __unused)
+{
+ struct statsblobv1 *sb;
+ int error;
+
+ sb = NULL;
+
+ TPL_LIST_RLOCK();
+ if (tpl_id < ntpl) {
+ sb = stats_realloc(NULL, 0, tpllist[tpl_id]->sb->maxsz, 0);
+ if (sb != NULL) {
+ sb->maxsz = tpllist[tpl_id]->sb->maxsz;
+ error = stats_v1_blob_init_locked(sb, tpl_id, 0);
+ } else
+ error = ENOMEM;
+
+ if (error) {
+ stats_free(sb);
+ sb = NULL;
+ }
+ }
+ TPL_LIST_RUNLOCK();
+
+ return (sb);
+}
+
+void
+stats_v1_blob_destroy(struct statsblobv1 *sb)
+{
+
+ stats_free(sb);
+}
+
+int
+stats_v1_voistat_fetch_dptr(struct statsblobv1 *sb, int32_t voi_id,
+ enum voi_stype stype, enum vsd_dtype *retdtype, struct voistatdata **retvsd,
+ size_t *retvsdsz)
+{
+ struct voi *v;
+ struct voistat *vs;
+
+ if (retvsd == NULL || sb == NULL || sb->abi != STATS_ABI_V1 ||
+ voi_id >= NVOIS(sb))
+ return (EINVAL);
+
+ v = &sb->vois[voi_id];
+ if ((__typeof(v->voistatmaxid))stype > v->voistatmaxid)
+ return (EINVAL);
+
+ vs = BLOB_OFFSET(sb, v->stats_off + (stype * sizeof(struct voistat)));
+ *retvsd = BLOB_OFFSET(sb, vs->data_off);
+ if (retdtype != NULL)
+ *retdtype = vs->dtype;
+ if (retvsdsz != NULL)
+ *retvsdsz = vs->dsz;
+
+ return (0);
+}
+
+int
+stats_v1_blob_init(struct statsblobv1 *sb, uint32_t tpl_id, uint32_t flags)
+{
+ int error;
+
+ error = 0;
+
+ TPL_LIST_RLOCK();
+ if (sb == NULL || tpl_id >= ntpl) {
+ error = EINVAL;
+ } else {
+ error = stats_v1_blob_init_locked(sb, tpl_id, flags);
+ }
+ TPL_LIST_RUNLOCK();
+
+ return (error);
+}
+
+static inline int
+stats_v1_blob_init_locked(struct statsblobv1 *sb, uint32_t tpl_id,
+ uint32_t flags __unused)
+{
+ int error;
+
+ TPL_LIST_RLOCK_ASSERT();
+ error = (sb->maxsz >= tpllist[tpl_id]->sb->cursz) ? 0 : EOVERFLOW;
+ KASSERT(!error,
+ ("sb %d instead of %d bytes", sb->maxsz, tpllist[tpl_id]->sb->cursz));
+
+ if (!error) {
+ memcpy(sb, tpllist[tpl_id]->sb, tpllist[tpl_id]->sb->cursz);
+ sb->created = sb->lastrst = stats_sbinuptime();
+ sb->tplhash = tpllist[tpl_id]->mb->tplhash;
+ }
+
+ return (error);
+}
+
+static int
+stats_v1_blob_expand(struct statsblobv1 **sbpp, int newvoibytes,
+ int newvoistatbytes, int newvoistatdatabytes)
+{
+ struct statsblobv1 *sb;
+ struct voi *tmpvoi;
+ struct voistat *tmpvoistat, *voistat_array;
+ int error, i, idxnewvois, idxnewvoistats, nbytes, nvoistats;
+
+ KASSERT(newvoibytes % sizeof(struct voi) == 0,
+ ("Bad newvoibytes %d", newvoibytes));
+ KASSERT(newvoistatbytes % sizeof(struct voistat) == 0,
+ ("Bad newvoistatbytes %d", newvoistatbytes));
+
+ error = ((newvoibytes % sizeof(struct voi) == 0) &&
+ (newvoistatbytes % sizeof(struct voistat) == 0)) ? 0 : EINVAL;
+ sb = *sbpp;
+ nbytes = newvoibytes + newvoistatbytes + newvoistatdatabytes;
+
+ /*
+ * XXXLAS: Required until we gain support for flags which alter the
+ * units of size/offset fields in key structs.
+ */
+ if (!error && ((((int)sb->cursz) + nbytes) > SB_V1_MAXSZ))
+ error = EFBIG;
+
+ if (!error && (sb->cursz + nbytes > sb->maxsz)) {
+ /* Need to expand our blob. */
+ sb = stats_realloc(sb, sb->maxsz, sb->cursz + nbytes, M_ZERO);
+ if (sb != NULL) {
+ sb->maxsz = sb->cursz + nbytes;
+ *sbpp = sb;
+ } else
+ error = ENOMEM;
+ }
+
+ if (!error) {
+ /*
+ * Shuffle memory within the expanded blob working from the end
+ * backwards, leaving gaps for the new voistat and voistatdata
+ * structs at the beginning of their respective blob regions,
+ * and for the new voi structs at the end of their blob region.
+ */
+ memmove(BLOB_OFFSET(sb, sb->statsdata_off + nbytes),
+ BLOB_OFFSET(sb, sb->statsdata_off),
+ sb->cursz - sb->statsdata_off);
+ memmove(BLOB_OFFSET(sb, sb->stats_off + newvoibytes +
+ newvoistatbytes), BLOB_OFFSET(sb, sb->stats_off),
+ sb->statsdata_off - sb->stats_off);
+
+ /* First index of new voi/voistat structs to be initialised. */
+ idxnewvois = NVOIS(sb);
+ idxnewvoistats = (newvoistatbytes / sizeof(struct voistat)) - 1;
+
+ /* Update housekeeping variables and offsets. */
+ sb->cursz += nbytes;
+ sb->stats_off += newvoibytes;
+ sb->statsdata_off += newvoibytes + newvoistatbytes;
+
+ /* XXXLAS: Zeroing not strictly needed but aids debugging. */
+ memset(&sb->vois[idxnewvois], '\0', newvoibytes);
+ memset(BLOB_OFFSET(sb, sb->stats_off), '\0',
+ newvoistatbytes);
+ memset(BLOB_OFFSET(sb, sb->statsdata_off), '\0',
+ newvoistatdatabytes);
+
+ /* Initialise new voi array members and update offsets. */
+ for (i = 0; i < NVOIS(sb); i++) {
+ tmpvoi = &sb->vois[i];
+ if (i >= idxnewvois) {
+ tmpvoi->id = tmpvoi->voistatmaxid = -1;
+ } else if (tmpvoi->id > -1) {
+ tmpvoi->stats_off += newvoibytes +
+ newvoistatbytes;
+ }
+ }
+
+ /* Initialise new voistat array members and update offsets. */
+ nvoistats = (sb->statsdata_off - sb->stats_off) /
+ sizeof(struct voistat);
+ voistat_array = BLOB_OFFSET(sb, sb->stats_off);
+ for (i = 0; i < nvoistats; i++) {
+ tmpvoistat = &voistat_array[i];
+ if (i <= idxnewvoistats) {
+ tmpvoistat->stype = -1;
+ } else if (tmpvoistat->stype > -1) {
+ tmpvoistat->data_off += nbytes;
+ }
+ }
+ }
+
+ return (error);
+}
+
+static void
+stats_v1_blob_finalise(struct statsblobv1 *sb __unused)
+{
+
+ /* XXXLAS: Fill this in. */
+}
+
+static void
+stats_v1_blob_iter(struct statsblobv1 *sb, stats_v1_blob_itercb_t icb,
+ void *usrctx, uint32_t flags)
+{
+ struct voi *v;
+ struct voistat *vs;
+ struct sb_iter_ctx ctx;
+ int i, j, firstvoi;
+
+ ctx.usrctx = usrctx;
+ ctx.flags |= SB_IT_FIRST_CB;
+ ctx.flags &= ~(SB_IT_FIRST_VOI | SB_IT_LAST_VOI | SB_IT_FIRST_VOISTAT |
+ SB_IT_LAST_VOISTAT);
+ firstvoi = 1;
+
+ for (i = 0; i < NVOIS(sb); i++) {
+ v = &sb->vois[i];
+ ctx.vslot = i;
+ ctx.vsslot = -1;
+ ctx.flags |= SB_IT_FIRST_VOISTAT;
+
+ if (firstvoi)
+ ctx.flags |= SB_IT_FIRST_VOI;
+ else if (i == (NVOIS(sb) - 1))
+ ctx.flags |= SB_IT_LAST_VOI | SB_IT_LAST_CB;
+
+ if (v->id < 0 && (flags & SB_IT_NULLVOI)) {
+ if (icb(sb, v, NULL, &ctx))
+ return;
+ firstvoi = 0;
+ ctx.flags &= ~SB_IT_FIRST_CB;
+ }
+
+ /* If NULL voi, v->voistatmaxid == -1 */
+ for (j = 0; j <= v->voistatmaxid; j++) {
+ vs = &((struct voistat *)BLOB_OFFSET(sb,
+ v->stats_off))[j];
+ if (vs->stype < 0 &&
+ !(flags & SB_IT_NULLVOISTAT))
+ continue;
+
+ if (j == v->voistatmaxid) {
+ ctx.flags |= SB_IT_LAST_VOISTAT;
+ if (i == (NVOIS(sb) - 1))
+ ctx.flags |=
+ SB_IT_LAST_CB;
+ } else
+ ctx.flags &= ~SB_IT_LAST_CB;
+
+ ctx.vsslot = j;
+ if (icb(sb, v, vs, &ctx))
+ return;
+
+ ctx.flags &= ~(SB_IT_FIRST_CB | SB_IT_FIRST_VOISTAT |
+ SB_IT_LAST_VOISTAT);
+ }
+ ctx.flags &= ~(SB_IT_FIRST_VOI | SB_IT_LAST_VOI);
+ }
+}
+
+static inline void
+stats_voistatdata_tdgst_tostr(enum vsd_dtype voi_dtype __unused,
+ const struct voistatdata_tdgst *tdgst, enum vsd_dtype tdgst_dtype,
+ size_t tdgst_dsz __unused, enum sb_str_fmt fmt, struct sbuf *buf, int objdump)
+{
+ const struct ctdth32 *ctd32tree;
+ const struct ctdth64 *ctd64tree;
+ const struct voistatdata_tdgstctd32 *ctd32;
+ const struct voistatdata_tdgstctd64 *ctd64;
+ const char *fmtstr;
+ uint64_t smplcnt, compcnt;
+ int is32bit, qmaxstrlen;
+ uint16_t maxctds, curctds;
+
+ switch (tdgst_dtype) {
+ case VSD_DTYPE_TDGSTCLUST32:
+ smplcnt = CONSTVSD(tdgstclust32, tdgst)->smplcnt;
+ compcnt = CONSTVSD(tdgstclust32, tdgst)->compcnt;
+ maxctds = ARB_MAXNODES(&CONSTVSD(tdgstclust32, tdgst)->ctdtree);
+ curctds = ARB_CURNODES(&CONSTVSD(tdgstclust32, tdgst)->ctdtree);
+ ctd32tree = &CONSTVSD(tdgstclust32, tdgst)->ctdtree;
+ ctd32 = (objdump ? ARB_CNODE(ctd32tree, 0) :
+ ARB_CMIN(ctdth32, ctd32tree));
+ qmaxstrlen = (ctd32 == NULL) ? 1 : Q_MAXSTRLEN(ctd32->mu, 10);
+ is32bit = 1;
+ ctd64tree = NULL;
+ ctd64 = NULL;
+ break;
+ case VSD_DTYPE_TDGSTCLUST64:
+ smplcnt = CONSTVSD(tdgstclust64, tdgst)->smplcnt;
+ compcnt = CONSTVSD(tdgstclust64, tdgst)->compcnt;
+ maxctds = ARB_MAXNODES(&CONSTVSD(tdgstclust64, tdgst)->ctdtree);
+ curctds = ARB_CURNODES(&CONSTVSD(tdgstclust64, tdgst)->ctdtree);
+ ctd64tree = &CONSTVSD(tdgstclust64, tdgst)->ctdtree;
+ ctd64 = (objdump ? ARB_CNODE(ctd64tree, 0) :
+ ARB_CMIN(ctdth64, ctd64tree));
+ qmaxstrlen = (ctd64 == NULL) ? 1 : Q_MAXSTRLEN(ctd64->mu, 10);
+ is32bit = 0;
+ ctd32tree = NULL;
+ ctd32 = NULL;
+ break;
+ default:
+ return;
+ }
+
+ switch (fmt) {
+ case SB_STRFMT_FREEFORM:
+ fmtstr = "smplcnt=%ju, compcnt=%ju, maxctds=%hu, nctds=%hu";
+ break;
+ case SB_STRFMT_JSON:
+ default:
+ fmtstr =
+ "\"smplcnt\":%ju,\"compcnt\":%ju,\"maxctds\":%hu,"
+ "\"nctds\":%hu,\"ctds\":[";
+ break;
+ }
+ sbuf_printf(buf, fmtstr, (uintmax_t)smplcnt, (uintmax_t)compcnt,
+ maxctds, curctds);
+
+ while ((is32bit ? NULL != ctd32 : NULL != ctd64)) {
+ char qstr[qmaxstrlen];
+
+ switch (fmt) {
+ case SB_STRFMT_FREEFORM:
+ fmtstr = "\n\t\t\t\t";
+ break;
+ case SB_STRFMT_JSON:
+ default:
+ fmtstr = "{";
+ break;
+ }
+ sbuf_cat(buf, fmtstr);
+
+ if (objdump) {
+ switch (fmt) {
+ case SB_STRFMT_FREEFORM:
+ fmtstr = "ctd[%hu].";
+ break;
+ case SB_STRFMT_JSON:
+ default:
+ fmtstr = "\"ctd\":%hu,";
+ break;
+ }
+ sbuf_printf(buf, fmtstr, is32bit ?
+ ARB_SELFIDX(ctd32tree, ctd32) :
+ ARB_SELFIDX(ctd64tree, ctd64));
+ }
+
+ switch (fmt) {
+ case SB_STRFMT_FREEFORM:
+ fmtstr = "{mu=";
+ break;
+ case SB_STRFMT_JSON:
+ default:
+ fmtstr = "\"mu\":";
+ break;
+ }
+ sbuf_cat(buf, fmtstr);
+ Q_TOSTR((is32bit ? ctd32->mu : ctd64->mu), -1, 10, qstr,
+ sizeof(qstr));
+ sbuf_cat(buf, qstr);
+
+
+ switch (fmt) {
+ case SB_STRFMT_FREEFORM:
+ fmtstr = is32bit ? ",cnt=%u}" : ",cnt=%ju}";
+ break;
+ case SB_STRFMT_JSON:
+ default:
+ fmtstr = is32bit ? ",\"cnt\":%u}" : ",\"cnt\":%ju}";
+ break;
+ }
+ sbuf_printf(buf, fmtstr,
+ is32bit ? ctd32->cnt : (uintmax_t)ctd64->cnt);
+
+ if (is32bit)
+ ctd32 = (objdump ? ARB_CNODE(ctd32tree,
+ ARB_SELFIDX(ctd32tree, ctd32) + 1) :
+ ARB_CNEXT(ctdth32, ctd32tree, ctd32));
+ else
+ ctd64 = (objdump ? ARB_CNODE(ctd64tree,
+ ARB_SELFIDX(ctd64tree, ctd64) + 1) :
+ ARB_CNEXT(ctdth64, ctd64tree, ctd64));
+
+ if (fmt == SB_STRFMT_JSON &&
+ (is32bit ? NULL != ctd32 : NULL != ctd64))
+ sbuf_putc(buf, ',');
+ }
+ if (fmt == SB_STRFMT_JSON)
+ sbuf_cat(buf, "]");
+}
+
+static inline void
+stats_voistatdata_hist_tostr(enum vsd_dtype voi_dtype,
+ const struct voistatdata_hist *hist, enum vsd_dtype hist_dtype,
+ size_t hist_dsz, enum sb_str_fmt fmt, struct sbuf *buf, int objdump)
+{
+ const struct voistatdata_numeric *bkt_lb, *bkt_ub;
+ const char *fmtstr;
+ int is32bit;
+ uint16_t i, nbkts;
+
+ switch (hist_dtype) {
+ case VSD_DTYPE_CRHIST32:
+ nbkts = HIST_VSDSZ2NBKTS(crhist32, hist_dsz);
+ is32bit = 1;
+ break;
+ case VSD_DTYPE_DRHIST32:
+ nbkts = HIST_VSDSZ2NBKTS(drhist32, hist_dsz);
+ is32bit = 1;
+ break;
+ case VSD_DTYPE_DVHIST32:
+ nbkts = HIST_VSDSZ2NBKTS(dvhist32, hist_dsz);
+ is32bit = 1;
+ break;
+ case VSD_DTYPE_CRHIST64:
+ nbkts = HIST_VSDSZ2NBKTS(crhist64, hist_dsz);
+ is32bit = 0;
+ break;
+ case VSD_DTYPE_DRHIST64:
+ nbkts = HIST_VSDSZ2NBKTS(drhist64, hist_dsz);
+ is32bit = 0;
+ break;
+ case VSD_DTYPE_DVHIST64:
+ nbkts = HIST_VSDSZ2NBKTS(dvhist64, hist_dsz);
+ is32bit = 0;
+ break;
+ default:
+ return;
+ }
+
+ switch (fmt) {
+ case SB_STRFMT_FREEFORM:
+ fmtstr = "nbkts=%hu, ";
+ break;
+ case SB_STRFMT_JSON:
+ default:
+ fmtstr = "\"nbkts\":%hu,";
+ break;
+ }
+ sbuf_printf(buf, fmtstr, nbkts);
+
+ switch (fmt) {
+ case SB_STRFMT_FREEFORM:
+ fmtstr = (is32bit ? "oob=%u" : "oob=%ju");
+ break;
+ case SB_STRFMT_JSON:
+ default:
+ fmtstr = (is32bit ? "\"oob\":%u,\"bkts\":[" :
+ "\"oob\":%ju,\"bkts\":[");
+ break;
+ }
+ sbuf_printf(buf, fmtstr, is32bit ? VSD_CONSTHIST_FIELDVAL(hist,
+ hist_dtype, oob) : (uintmax_t)VSD_CONSTHIST_FIELDVAL(hist,
+ hist_dtype, oob));
+
+ for (i = 0; i < nbkts; i++) {
+ switch (hist_dtype) {
+ case VSD_DTYPE_CRHIST32:
+ case VSD_DTYPE_CRHIST64:
+ bkt_lb = VSD_CONSTCRHIST_FIELDPTR(hist, hist_dtype,
+ bkts[i].lb);
+ if (i < nbkts - 1)
+ bkt_ub = VSD_CONSTCRHIST_FIELDPTR(hist,
+ hist_dtype, bkts[i + 1].lb);
+ else
+ bkt_ub = &numeric_limits[LIM_MAX][voi_dtype];
+ break;
+ case VSD_DTYPE_DRHIST32:
+ case VSD_DTYPE_DRHIST64:
+ bkt_lb = VSD_CONSTDRHIST_FIELDPTR(hist, hist_dtype,
+ bkts[i].lb);
+ bkt_ub = VSD_CONSTDRHIST_FIELDPTR(hist, hist_dtype,
+ bkts[i].ub);
+ break;
+ case VSD_DTYPE_DVHIST32:
+ case VSD_DTYPE_DVHIST64:
+ bkt_lb = bkt_ub = VSD_CONSTDVHIST_FIELDPTR(hist,
+ hist_dtype, bkts[i].val);
+ break;
+ default:
+ break;
+ }
+
+ switch (fmt) {
+ case SB_STRFMT_FREEFORM:
+ fmtstr = "\n\t\t\t\t";
+ break;
+ case SB_STRFMT_JSON:
+ default:
+ fmtstr = "{";
+ break;
+ }
+ sbuf_cat(buf, fmtstr);
+
+ if (objdump) {
+ switch (fmt) {
+ case SB_STRFMT_FREEFORM:
+ fmtstr = "bkt[%hu].";
+ break;
+ case SB_STRFMT_JSON:
+ default:
+ fmtstr = "\"bkt\":%hu,";
+ break;
+ }
+ sbuf_printf(buf, fmtstr, i);
+ }
+
+ switch (fmt) {
+ case SB_STRFMT_FREEFORM:
+ fmtstr = "{lb=";
+ break;
+ case SB_STRFMT_JSON:
+ default:
+ fmtstr = "\"lb\":";
+ break;
+ }
+ sbuf_cat(buf, fmtstr);
+ stats_voistatdata_tostr((const struct voistatdata *)bkt_lb,
+ voi_dtype, voi_dtype, sizeof(struct voistatdata_numeric),
+ fmt, buf, objdump);
+
+ switch (fmt) {
+ case SB_STRFMT_FREEFORM:
+ fmtstr = ",ub=";
+ break;
+ case SB_STRFMT_JSON:
+ default:
+ fmtstr = ",\"ub\":";
+ break;
+ }
+ sbuf_cat(buf, fmtstr);
+ stats_voistatdata_tostr((const struct voistatdata *)bkt_ub,
+ voi_dtype, voi_dtype, sizeof(struct voistatdata_numeric),
+ fmt, buf, objdump);
+
+ switch (fmt) {
+ case SB_STRFMT_FREEFORM:
+ fmtstr = is32bit ? ",cnt=%u}" : ",cnt=%ju}";
+ break;
+ case SB_STRFMT_JSON:
+ default:
+ fmtstr = is32bit ? ",\"cnt\":%u}" : ",\"cnt\":%ju}";
+ break;
+ }
+ sbuf_printf(buf, fmtstr, is32bit ?
+ VSD_CONSTHIST_FIELDVAL(hist, hist_dtype, bkts[i].cnt) :
+ (uintmax_t)VSD_CONSTHIST_FIELDVAL(hist, hist_dtype,
+ bkts[i].cnt));
+
+ if (fmt == SB_STRFMT_JSON && i < nbkts - 1)
+ sbuf_putc(buf, ',');
+ }
+ if (fmt == SB_STRFMT_JSON)
+ sbuf_cat(buf, "]");
+}
+
+int
+stats_voistatdata_tostr(const struct voistatdata *vsd, enum vsd_dtype voi_dtype,
+ enum vsd_dtype vsd_dtype, size_t vsd_sz, enum sb_str_fmt fmt,
+ struct sbuf *buf, int objdump)
+{
+ const char *fmtstr;
+
+ if (vsd == NULL || buf == NULL || voi_dtype >= VSD_NUM_DTYPES ||
+ vsd_dtype >= VSD_NUM_DTYPES || fmt >= SB_STRFMT_NUM_FMTS)
+ return (EINVAL);
+
+ switch (vsd_dtype) {
+ case VSD_DTYPE_VOISTATE:
+ switch (fmt) {
+ case SB_STRFMT_FREEFORM:
+ fmtstr = "prev=";
+ break;
+ case SB_STRFMT_JSON:
+ default:
+ fmtstr = "\"prev\":";
+ break;
+ }
+ sbuf_cat(buf, fmtstr);
+ /*
+ * Render prev by passing it as *vsd and voi_dtype as vsd_dtype.
+ */
+ stats_voistatdata_tostr(
+ (const struct voistatdata *)&CONSTVSD(voistate, vsd)->prev,
+ voi_dtype, voi_dtype, vsd_sz, fmt, buf, objdump);
+ break;
+ case VSD_DTYPE_INT_S32:
+ sbuf_printf(buf, "%d", vsd->int32.s32);
+ break;
+ case VSD_DTYPE_INT_U32:
+ sbuf_printf(buf, "%u", vsd->int32.u32);
+ break;
+ case VSD_DTYPE_INT_S64:
+ sbuf_printf(buf, "%jd", (intmax_t)vsd->int64.s64);
+ break;
+ case VSD_DTYPE_INT_U64:
+ sbuf_printf(buf, "%ju", (uintmax_t)vsd->int64.u64);
+ break;
+ case VSD_DTYPE_INT_SLONG:
+ sbuf_printf(buf, "%ld", vsd->intlong.slong);
+ break;
+ case VSD_DTYPE_INT_ULONG:
+ sbuf_printf(buf, "%lu", vsd->intlong.ulong);
+ break;
+ case VSD_DTYPE_Q_S32:
+ {
+ char qstr[Q_MAXSTRLEN(vsd->q32.sq32, 10)];
+ Q_TOSTR((s32q_t)vsd->q32.sq32, -1, 10, qstr, sizeof(qstr));
+ sbuf_cat(buf, qstr);
+ }
+ break;
+ case VSD_DTYPE_Q_U32:
+ {
+ char qstr[Q_MAXSTRLEN(vsd->q32.uq32, 10)];
+ Q_TOSTR((u32q_t)vsd->q32.uq32, -1, 10, qstr, sizeof(qstr));
+ sbuf_cat(buf, qstr);
+ }
+ break;
+ case VSD_DTYPE_Q_S64:
+ {
+ char qstr[Q_MAXSTRLEN(vsd->q64.sq64, 10)];
+ Q_TOSTR((s64q_t)vsd->q64.sq64, -1, 10, qstr, sizeof(qstr));
+ sbuf_cat(buf, qstr);
+ }
+ break;
+ case VSD_DTYPE_Q_U64:
+ {
+ char qstr[Q_MAXSTRLEN(vsd->q64.uq64, 10)];
+ Q_TOSTR((u64q_t)vsd->q64.uq64, -1, 10, qstr, sizeof(qstr));
+ sbuf_cat(buf, qstr);
+ }
+ break;
+ case VSD_DTYPE_CRHIST32:
+ case VSD_DTYPE_DRHIST32:
+ case VSD_DTYPE_DVHIST32:
+ case VSD_DTYPE_CRHIST64:
+ case VSD_DTYPE_DRHIST64:
+ case VSD_DTYPE_DVHIST64:
+ stats_voistatdata_hist_tostr(voi_dtype, CONSTVSD(hist, vsd),
+ vsd_dtype, vsd_sz, fmt, buf, objdump);
+ break;
+ case VSD_DTYPE_TDGSTCLUST32:
+ case VSD_DTYPE_TDGSTCLUST64:
+ stats_voistatdata_tdgst_tostr(voi_dtype,
+ CONSTVSD(tdgst, vsd), vsd_dtype, vsd_sz, fmt, buf,
+ objdump);
+ break;
+ default:
+ break;
+ }
+
+ return (sbuf_error(buf));
+}
+
+static void
+stats_v1_itercb_tostr_freeform(struct statsblobv1 *sb, struct voi *v,
+ struct voistat *vs, struct sb_iter_ctx *ctx)
+{
+ struct sb_tostrcb_ctx *sctx;
+ struct metablob *tpl_mb;
+ struct sbuf *buf;
+ void *vsd;
+ uint8_t dump;
+
+ sctx = ctx->usrctx;
+ buf = sctx->buf;
+ tpl_mb = sctx->tpl ? sctx->tpl->mb : NULL;
+ dump = ((sctx->flags & SB_TOSTR_OBJDUMP) != 0);
+
+ if (ctx->flags & SB_IT_FIRST_CB) {
+ sbuf_printf(buf, "struct statsblobv1@%p", sb);
+ if (dump) {
+ sbuf_printf(buf, ", abi=%hhu, endian=%hhu, maxsz=%hu, "
+ "cursz=%hu, created=%jd, lastrst=%jd, flags=0x%04hx, "
+ "stats_off=%hu, statsdata_off=%hu",
+ sb->abi, sb->endian, sb->maxsz, sb->cursz,
+ sb->created, sb->lastrst, sb->flags, sb->stats_off,
+ sb->statsdata_off);
+ }
+ sbuf_printf(buf, ", tplhash=%u", sb->tplhash);
+ }
+
+ if (ctx->flags & SB_IT_FIRST_VOISTAT) {
+ sbuf_printf(buf, "\n\tvois[%hd]: id=%hd", ctx->vslot, v->id);
+ if (v->id < 0)
+ return;
+ sbuf_printf(buf, ", name=\"%s\"", (tpl_mb == NULL) ? "" :
+ tpl_mb->voi_meta[v->id].name);
+ if (dump)
+ sbuf_printf(buf, ", flags=0x%04hx, dtype=%s, "
+ "voistatmaxid=%hhd, stats_off=%hu", v->flags,
+ vsd_dtype2name[v->dtype], v->voistatmaxid, v->stats_off);
+ }
+
+ if (!dump && vs->stype <= 0)
+ return;
+
+ sbuf_printf(buf, "\n\t\tvois[%hd]stat[%hhd]: stype=", v->id, ctx->vsslot);
+ if (vs->stype < 0) {
+ sbuf_printf(buf, "%hhd", vs->stype);
+ return;
+ } else
+ sbuf_printf(buf, "%s, errs=%hu", vs_stype2name[vs->stype],
+ vs->errs);
+ vsd = BLOB_OFFSET(sb, vs->data_off);
+ if (dump)
+ sbuf_printf(buf, ", flags=0x%04x, dtype=%s, dsz=%hu, "
+ "data_off=%hu", vs->flags, vsd_dtype2name[vs->dtype],
+ vs->dsz, vs->data_off);
+
+ sbuf_printf(buf, "\n\t\t\tvoistatdata: ");
+ stats_voistatdata_tostr(vsd, v->dtype, vs->dtype, vs->dsz,
+ sctx->fmt, buf, dump);
+}
+
+static void
+stats_v1_itercb_tostr_json(struct statsblobv1 *sb, struct voi *v, struct voistat *vs,
+ struct sb_iter_ctx *ctx)
+{
+ struct sb_tostrcb_ctx *sctx;
+ struct metablob *tpl_mb;
+ struct sbuf *buf;
+ const char *fmtstr;
+ void *vsd;
+ uint8_t dump;
+
+ sctx = ctx->usrctx;
+ buf = sctx->buf;
+ tpl_mb = sctx->tpl ? sctx->tpl->mb : NULL;
+ dump = ((sctx->flags & SB_TOSTR_OBJDUMP) != 0);
+
+ if (ctx->flags & SB_IT_FIRST_CB) {
+ sbuf_putc(buf, '{');
+ if (dump) {
+ sbuf_printf(buf, "\"abi\":%hhu,\"endian\":%hhu,"
+ "\"maxsz\":%hu,\"cursz\":%hu,\"created\":%jd,"
+ "\"lastrst\":%jd,\"flags\":%hu,\"stats_off\":%hu,"
+ "\"statsdata_off\":%hu,", sb->abi,
+ sb->endian, sb->maxsz, sb->cursz, sb->created,
+ sb->lastrst, sb->flags, sb->stats_off,
+ sb->statsdata_off);
+ }
+
+ if (tpl_mb == NULL)
+ fmtstr = "\"tplname\":%s,\"tplhash\":%u,\"vois\":{";
+ else
+ fmtstr = "\"tplname\":\"%s\",\"tplhash\":%u,\"vois\":{";
+
+ sbuf_printf(buf, fmtstr, tpl_mb ? tpl_mb->tplname : "null",
+ sb->tplhash);
+ }
+
+ if (ctx->flags & SB_IT_FIRST_VOISTAT) {
+ if (dump) {
+ sbuf_printf(buf, "\"[%d]\":{\"id\":%d", ctx->vslot,
+ v->id);
+ if (v->id < 0) {
+ sbuf_printf(buf, "},");
+ return;
+ }
+
+ if (tpl_mb == NULL)
+ fmtstr = ",\"name\":%s,\"flags\":%hu,"
+ "\"dtype\":\"%s\",\"voistatmaxid\":%hhd,"
+ "\"stats_off\":%hu,";
+ else
+ fmtstr = ",\"name\":\"%s\",\"flags\":%hu,"
+ "\"dtype\":\"%s\",\"voistatmaxid\":%hhd,"
+ "\"stats_off\":%hu,";
+
+ sbuf_printf(buf, fmtstr, tpl_mb ?
+ tpl_mb->voi_meta[v->id].name : "null", v->flags,
+ vsd_dtype2name[v->dtype], v->voistatmaxid,
+ v->stats_off);
+ } else {
+ if (tpl_mb == NULL) {
+ sbuf_printf(buf, "\"[%hd]\":{", v->id);
+ } else {
+ sbuf_printf(buf, "\"%s\":{",
+ tpl_mb->voi_meta[v->id].name);
+ }
+ }
+ sbuf_cat(buf, "\"stats\":{");
+ }
+
+ vsd = BLOB_OFFSET(sb, vs->data_off);
+ if (dump) {
+ sbuf_printf(buf, "\"[%hhd]\":", ctx->vsslot);
+ if (vs->stype < 0) {
+ sbuf_printf(buf, "{\"stype\":-1},");
+ return;
+ }
+ sbuf_printf(buf, "{\"stype\":\"%s\",\"errs\":%hu,\"flags\":%hu,"
+ "\"dtype\":\"%s\",\"data_off\":%hu,\"voistatdata\":{",
+ vs_stype2name[vs->stype], vs->errs, vs->flags,
+ vsd_dtype2name[vs->dtype], vs->data_off);
+ } else if (vs->stype > 0) {
+ if (tpl_mb == NULL)
+ sbuf_printf(buf, "\"[%hhd]\":", vs->stype);
+ else
+ sbuf_printf(buf, "\"%s\":", vs_stype2name[vs->stype]);
+ } else
+ return;
+
+ if ((vs->flags & VS_VSDVALID) || dump) {
+ if (!dump)
+ sbuf_printf(buf, "{\"errs\":%hu,", vs->errs);
+ /* Simple non-compound VSD types need a key. */
+ if (!vsd_compoundtype[vs->dtype])
+ sbuf_cat(buf, "\"val\":");
+ stats_voistatdata_tostr(vsd, v->dtype, vs->dtype, vs->dsz,
+ sctx->fmt, buf, dump);
+ sbuf_cat(buf, dump ? "}}" : "}");
+ } else
+ sbuf_cat(buf, dump ? "null}" : "null");
+
+ if (ctx->flags & SB_IT_LAST_VOISTAT)
+ sbuf_cat(buf, "}}");
+
+ if (ctx->flags & SB_IT_LAST_CB)
+ sbuf_cat(buf, "}}");
+ else
+ sbuf_putc(buf, ',');
+}
+
+static int
+stats_v1_itercb_tostr(struct statsblobv1 *sb, struct voi *v, struct voistat *vs,
+ struct sb_iter_ctx *ctx)
+{
+ struct sb_tostrcb_ctx *sctx;
+
+ sctx = ctx->usrctx;
+
+ switch (sctx->fmt) {
+ case SB_STRFMT_FREEFORM:
+ stats_v1_itercb_tostr_freeform(sb, v, vs, ctx);
+ break;
+ case SB_STRFMT_JSON:
+ stats_v1_itercb_tostr_json(sb, v, vs, ctx);
+ break;
+ default:
+ break;
+ }
+
+ return (sbuf_error(sctx->buf));
+}
+
+int
+stats_v1_blob_tostr(struct statsblobv1 *sb, struct sbuf *buf,
+ enum sb_str_fmt fmt, uint32_t flags)
+{
+ struct sb_tostrcb_ctx sctx;
+ uint32_t iflags;
+
+ if (sb == NULL || sb->abi != STATS_ABI_V1 || buf == NULL ||
+ fmt >= SB_STRFMT_NUM_FMTS)
+ return (EINVAL);
+
+ sctx.buf = buf;
+ sctx.fmt = fmt;
+ sctx.flags = flags;
+
+ if (flags & SB_TOSTR_META) {
+ if (stats_tpl_fetch(stats_tpl_fetch_allocid(NULL, sb->tplhash),
+ &sctx.tpl))
+ return (EINVAL);
+ } else
+ sctx.tpl = NULL;
+
+ iflags = 0;
+ if (flags & SB_TOSTR_OBJDUMP)
+ iflags |= (SB_IT_NULLVOI | SB_IT_NULLVOISTAT);
+ stats_v1_blob_iter(sb, stats_v1_itercb_tostr, &sctx, iflags);
+
+ return (sbuf_error(buf));
+}
+
+static int
+stats_v1_itercb_visit(struct statsblobv1 *sb, struct voi *v,
+ struct voistat *vs, struct sb_iter_ctx *ctx)
+{
+ struct sb_visitcb_ctx *vctx;
+ struct sb_visit sbv;
+
+ vctx = ctx->usrctx;
+
+ sbv.tplhash = sb->tplhash;
+ sbv.voi_id = v->id;
+ sbv.voi_dtype = v->dtype;
+ sbv.vs_stype = vs->stype;
+ sbv.vs_dtype = vs->dtype;
+ sbv.vs_dsz = vs->dsz;
+ sbv.vs_data = BLOB_OFFSET(sb, vs->data_off);
+ sbv.vs_errs = vs->errs;
+ sbv.flags = ctx->flags & (SB_IT_FIRST_CB | SB_IT_LAST_CB |
+ SB_IT_FIRST_VOI | SB_IT_LAST_VOI | SB_IT_FIRST_VOISTAT |
+ SB_IT_LAST_VOISTAT);
+
+ return (vctx->cb(&sbv, vctx->usrctx));
+}
+
+int
+stats_v1_blob_visit(struct statsblobv1 *sb, stats_blob_visitcb_t func,
+ void *usrctx)
+{
+ struct sb_visitcb_ctx vctx;
+
+ if (sb == NULL || sb->abi != STATS_ABI_V1 || func == NULL)
+ return (EINVAL);
+
+ vctx.cb = func;
+ vctx.usrctx = usrctx;
+
+ stats_v1_blob_iter(sb, stats_v1_itercb_visit, &vctx, 0);
+
+ return (0);
+}
+
+static int
+stats_v1_icb_reset_voistat(struct statsblobv1 *sb, struct voi *v __unused,
+ struct voistat *vs, struct sb_iter_ctx *ctx __unused)
+{
+ void *vsd;
+
+ if (vs->stype == VS_STYPE_VOISTATE)
+ return (0);
+
+ vsd = BLOB_OFFSET(sb, vs->data_off);
+
+ /* Perform the stat type's default reset action. */
+ switch (vs->stype) {
+ case VS_STYPE_SUM:
+ switch (vs->dtype) {
+ case VSD_DTYPE_Q_S32:
+ Q_SIFVAL(VSD(q32, vsd)->sq32, 0);
+ break;
+ case VSD_DTYPE_Q_U32:
+ Q_SIFVAL(VSD(q32, vsd)->uq32, 0);
+ break;
+ case VSD_DTYPE_Q_S64:
+ Q_SIFVAL(VSD(q64, vsd)->sq64, 0);
+ break;
+ case VSD_DTYPE_Q_U64:
+ Q_SIFVAL(VSD(q64, vsd)->uq64, 0);
+ break;
+ default:
+ bzero(vsd, vs->dsz);
+ break;
+ }
+ break;
+ case VS_STYPE_MAX:
+ switch (vs->dtype) {
+ case VSD_DTYPE_Q_S32:
+ Q_SIFVAL(VSD(q32, vsd)->sq32,
+ Q_IFMINVAL(VSD(q32, vsd)->sq32));
+ break;
+ case VSD_DTYPE_Q_U32:
+ Q_SIFVAL(VSD(q32, vsd)->uq32,
+ Q_IFMINVAL(VSD(q32, vsd)->uq32));
+ break;
+ case VSD_DTYPE_Q_S64:
+ Q_SIFVAL(VSD(q64, vsd)->sq64,
+ Q_IFMINVAL(VSD(q64, vsd)->sq64));
+ break;
+ case VSD_DTYPE_Q_U64:
+ Q_SIFVAL(VSD(q64, vsd)->uq64,
+ Q_IFMINVAL(VSD(q64, vsd)->uq64));
+ break;
+ default:
+ memcpy(vsd, &numeric_limits[LIM_MIN][vs->dtype],
+ vs->dsz);
+ break;
+ }
+ break;
+ case VS_STYPE_MIN:
+ switch (vs->dtype) {
+ case VSD_DTYPE_Q_S32:
+ Q_SIFVAL(VSD(q32, vsd)->sq32,
+ Q_IFMAXVAL(VSD(q32, vsd)->sq32));
+ break;
+ case VSD_DTYPE_Q_U32:
+ Q_SIFVAL(VSD(q32, vsd)->uq32,
+ Q_IFMAXVAL(VSD(q32, vsd)->uq32));
+ break;
+ case VSD_DTYPE_Q_S64:
+ Q_SIFVAL(VSD(q64, vsd)->sq64,
+ Q_IFMAXVAL(VSD(q64, vsd)->sq64));
+ break;
+ case VSD_DTYPE_Q_U64:
+ Q_SIFVAL(VSD(q64, vsd)->uq64,
+ Q_IFMAXVAL(VSD(q64, vsd)->uq64));
+ break;
+ default:
+ memcpy(vsd, &numeric_limits[LIM_MAX][vs->dtype],
+ vs->dsz);
+ break;
+ }
+ break;
+ case VS_STYPE_HIST:
+ {
+ /* Reset bucket counts. */
+ struct voistatdata_hist *hist;
+ int i, is32bit;
+ uint16_t nbkts;
+
+ hist = VSD(hist, vsd);
+ switch (vs->dtype) {
+ case VSD_DTYPE_CRHIST32:
+ nbkts = HIST_VSDSZ2NBKTS(crhist32, vs->dsz);
+ is32bit = 1;
+ break;
+ case VSD_DTYPE_DRHIST32:
+ nbkts = HIST_VSDSZ2NBKTS(drhist32, vs->dsz);
+ is32bit = 1;
+ break;
+ case VSD_DTYPE_DVHIST32:
+ nbkts = HIST_VSDSZ2NBKTS(dvhist32, vs->dsz);
+ is32bit = 1;
+ break;
+ case VSD_DTYPE_CRHIST64:
+ nbkts = HIST_VSDSZ2NBKTS(crhist64, vs->dsz);
+ is32bit = 0;
+ break;
+ case VSD_DTYPE_DRHIST64:
+ nbkts = HIST_VSDSZ2NBKTS(drhist64, vs->dsz);
+ is32bit = 0;
+ break;
+ case VSD_DTYPE_DVHIST64:
+ nbkts = HIST_VSDSZ2NBKTS(dvhist64, vs->dsz);
+ is32bit = 0;
+ break;
+ default:
+ return (0);
+ }
+
+ bzero(VSD_HIST_FIELDPTR(hist, vs->dtype, oob),
+ is32bit ? sizeof(uint32_t) : sizeof(uint64_t));
+ for (i = nbkts - 1; i >= 0; i--) {
+ bzero(VSD_HIST_FIELDPTR(hist, vs->dtype,
+ bkts[i].cnt), is32bit ? sizeof(uint32_t) :
+ sizeof(uint64_t));
+ }
+ break;
+ }
+ case VS_STYPE_TDGST:
+ {
+ /* Reset sample count centroids array/tree. */
+ struct voistatdata_tdgst *tdgst;
+ struct ctdth32 *ctd32tree;
+ struct ctdth64 *ctd64tree;
+ struct voistatdata_tdgstctd32 *ctd32;
+ struct voistatdata_tdgstctd64 *ctd64;
+
+ tdgst = VSD(tdgst, vsd);
+ switch (vs->dtype) {
+ case VSD_DTYPE_TDGSTCLUST32:
+ VSD(tdgstclust32, tdgst)->smplcnt = 0;
+ VSD(tdgstclust32, tdgst)->compcnt = 0;
+ ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
+ ARB_INIT(ctd32, ctdlnk, ctd32tree,
+ ARB_MAXNODES(ctd32tree)) {
+ ctd32->cnt = 0;
+ Q_SIFVAL(ctd32->mu, 0);
+ }
+#ifdef DIAGNOSTIC
+ RB_INIT(&VSD(tdgstclust32, tdgst)->rbctdtree);
+#endif
+ break;
+ case VSD_DTYPE_TDGSTCLUST64:
+ VSD(tdgstclust64, tdgst)->smplcnt = 0;
+ VSD(tdgstclust64, tdgst)->compcnt = 0;
+ ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
+ ARB_INIT(ctd64, ctdlnk, ctd64tree,
+ ARB_MAXNODES(ctd64tree)) {
+ ctd64->cnt = 0;
+ Q_SIFVAL(ctd64->mu, 0);
+ }
+#ifdef DIAGNOSTIC
+ RB_INIT(&VSD(tdgstclust64, tdgst)->rbctdtree);
+#endif
+ break;
+ default:
+ return (0);
+ }
+ break;
+ }
+ default:
+ KASSERT(0, ("Unknown VOI stat type %d", vs->stype));
+ break;
+ }
+
+ vs->errs = 0;
+ vs->flags &= ~VS_VSDVALID;
+
+ return (0);
+}
+
+int
+stats_v1_blob_snapshot(struct statsblobv1 **dst, size_t dstmaxsz,
+ struct statsblobv1 *src, uint32_t flags)
+{
+ int error;
+
+ if (src != NULL && src->abi == STATS_ABI_V1) {
+ error = stats_v1_blob_clone(dst, dstmaxsz, src, flags);
+ if (!error) {
+ if (flags & SB_CLONE_RSTSRC) {
+ stats_v1_blob_iter(src,
+ stats_v1_icb_reset_voistat, NULL, 0);
+ src->lastrst = stats_sbinuptime();
+ }
+ stats_v1_blob_finalise(*dst);
+ }
+ } else
+ error = EINVAL;
+
+ return (error);
+}
+
+static inline int
+stats_v1_voi_update_max(enum vsd_dtype voi_dtype __unused,
+ struct voistatdata *voival, struct voistat *vs, void *vsd)
+{
+ int error;
+
+ KASSERT(vs->dtype < VSD_NUM_DTYPES,
+ ("Unknown VSD dtype %d", vs->dtype));
+
+ error = 0;
+
+ switch (vs->dtype) {
+ case VSD_DTYPE_INT_S32:
+ if (VSD(int32, vsd)->s32 < voival->int32.s32) {
+ VSD(int32, vsd)->s32 = voival->int32.s32;
+ vs->flags |= VS_VSDVALID;
+ }
+ break;
+ case VSD_DTYPE_INT_U32:
+ if (VSD(int32, vsd)->u32 < voival->int32.u32) {
+ VSD(int32, vsd)->u32 = voival->int32.u32;
+ vs->flags |= VS_VSDVALID;
+ }
+ break;
+ case VSD_DTYPE_INT_S64:
+ if (VSD(int64, vsd)->s64 < voival->int64.s64) {
+ VSD(int64, vsd)->s64 = voival->int64.s64;
+ vs->flags |= VS_VSDVALID;
+ }
+ break;
+ case VSD_DTYPE_INT_U64:
+ if (VSD(int64, vsd)->u64 < voival->int64.u64) {
+ VSD(int64, vsd)->u64 = voival->int64.u64;
+ vs->flags |= VS_VSDVALID;
+ }
+ break;
+ case VSD_DTYPE_INT_SLONG:
+ if (VSD(intlong, vsd)->slong < voival->intlong.slong) {
+ VSD(intlong, vsd)->slong = voival->intlong.slong;
+ vs->flags |= VS_VSDVALID;
+ }
+ break;
+ case VSD_DTYPE_INT_ULONG:
+ if (VSD(intlong, vsd)->ulong < voival->intlong.ulong) {
+ VSD(intlong, vsd)->ulong = voival->intlong.ulong;
+ vs->flags |= VS_VSDVALID;
+ }
+ break;
+ case VSD_DTYPE_Q_S32:
+ if (Q_QLTQ(VSD(q32, vsd)->sq32, voival->q32.sq32) &&
+ (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->sq32,
+ voival->q32.sq32)))) {
+ vs->flags |= VS_VSDVALID;
+ }
+ break;
+ case VSD_DTYPE_Q_U32:
+ if (Q_QLTQ(VSD(q32, vsd)->uq32, voival->q32.uq32) &&
+ (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->uq32,
+ voival->q32.uq32)))) {
+ vs->flags |= VS_VSDVALID;
+ }
+ break;
+ case VSD_DTYPE_Q_S64:
+ if (Q_QLTQ(VSD(q64, vsd)->sq64, voival->q64.sq64) &&
+ (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->sq64,
+ voival->q64.sq64)))) {
+ vs->flags |= VS_VSDVALID;
+ }
+ break;
+ case VSD_DTYPE_Q_U64:
+ if (Q_QLTQ(VSD(q64, vsd)->uq64, voival->q64.uq64) &&
+ (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->uq64,
+ voival->q64.uq64)))) {
+ vs->flags |= VS_VSDVALID;
+ }
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ return (error);
+}
+
+static inline int
+stats_v1_voi_update_min(enum vsd_dtype voi_dtype __unused,
+ struct voistatdata *voival, struct voistat *vs, void *vsd)
+{
+ int error;
+
+ KASSERT(vs->dtype < VSD_NUM_DTYPES,
+ ("Unknown VSD dtype %d", vs->dtype));
+
+ error = 0;
+
+ switch (vs->dtype) {
+ case VSD_DTYPE_INT_S32:
+ if (VSD(int32, vsd)->s32 > voival->int32.s32) {
+ VSD(int32, vsd)->s32 = voival->int32.s32;
+ vs->flags |= VS_VSDVALID;
+ }
+ break;
+ case VSD_DTYPE_INT_U32:
+ if (VSD(int32, vsd)->u32 > voival->int32.u32) {
+ VSD(int32, vsd)->u32 = voival->int32.u32;
+ vs->flags |= VS_VSDVALID;
+ }
+ break;
+ case VSD_DTYPE_INT_S64:
+ if (VSD(int64, vsd)->s64 > voival->int64.s64) {
+ VSD(int64, vsd)->s64 = voival->int64.s64;
+ vs->flags |= VS_VSDVALID;
+ }
+ break;
+ case VSD_DTYPE_INT_U64:
+ if (VSD(int64, vsd)->u64 > voival->int64.u64) {
+ VSD(int64, vsd)->u64 = voival->int64.u64;
+ vs->flags |= VS_VSDVALID;
+ }
+ break;
+ case VSD_DTYPE_INT_SLONG:
+ if (VSD(intlong, vsd)->slong > voival->intlong.slong) {
+ VSD(intlong, vsd)->slong = voival->intlong.slong;
+ vs->flags |= VS_VSDVALID;
+ }
+ break;
+ case VSD_DTYPE_INT_ULONG:
+ if (VSD(intlong, vsd)->ulong > voival->intlong.ulong) {
+ VSD(intlong, vsd)->ulong = voival->intlong.ulong;
+ vs->flags |= VS_VSDVALID;
+ }
+ break;
+ case VSD_DTYPE_Q_S32:
+ if (Q_QGTQ(VSD(q32, vsd)->sq32, voival->q32.sq32) &&
+ (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->sq32,
+ voival->q32.sq32)))) {
+ vs->flags |= VS_VSDVALID;
+ }
+ break;
+ case VSD_DTYPE_Q_U32:
+ if (Q_QGTQ(VSD(q32, vsd)->uq32, voival->q32.uq32) &&
+ (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->uq32,
+ voival->q32.uq32)))) {
+ vs->flags |= VS_VSDVALID;
+ }
+ break;
+ case VSD_DTYPE_Q_S64:
+ if (Q_QGTQ(VSD(q64, vsd)->sq64, voival->q64.sq64) &&
+ (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->sq64,
+ voival->q64.sq64)))) {
+ vs->flags |= VS_VSDVALID;
+ }
+ break;
+ case VSD_DTYPE_Q_U64:
+ if (Q_QGTQ(VSD(q64, vsd)->uq64, voival->q64.uq64) &&
+ (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->uq64,
+ voival->q64.uq64)))) {
+ vs->flags |= VS_VSDVALID;
+ }
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ return (error);
+}
+
+static inline int
+stats_v1_voi_update_sum(enum vsd_dtype voi_dtype __unused,
+ struct voistatdata *voival, struct voistat *vs, void *vsd)
+{
+ int error;
+
+ KASSERT(vs->dtype < VSD_NUM_DTYPES,
+ ("Unknown VSD dtype %d", vs->dtype));
+
+ error = 0;
+
+ switch (vs->dtype) {
+ case VSD_DTYPE_INT_S32:
+ VSD(int32, vsd)->s32 += voival->int32.s32;
+ break;
+ case VSD_DTYPE_INT_U32:
+ VSD(int32, vsd)->u32 += voival->int32.u32;
+ break;
+ case VSD_DTYPE_INT_S64:
+ VSD(int64, vsd)->s64 += voival->int64.s64;
+ break;
+ case VSD_DTYPE_INT_U64:
+ VSD(int64, vsd)->u64 += voival->int64.u64;
+ break;
+ case VSD_DTYPE_INT_SLONG:
+ VSD(intlong, vsd)->slong += voival->intlong.slong;
+ break;
+ case VSD_DTYPE_INT_ULONG:
+ VSD(intlong, vsd)->ulong += voival->intlong.ulong;
+ break;
+ case VSD_DTYPE_Q_S32:
+ error = Q_QADDQ(&VSD(q32, vsd)->sq32, voival->q32.sq32);
+ break;
+ case VSD_DTYPE_Q_U32:
+ error = Q_QADDQ(&VSD(q32, vsd)->uq32, voival->q32.uq32);
+ break;
+ case VSD_DTYPE_Q_S64:
+ error = Q_QADDQ(&VSD(q64, vsd)->sq64, voival->q64.sq64);
+ break;
+ case VSD_DTYPE_Q_U64:
+ error = Q_QADDQ(&VSD(q64, vsd)->uq64, voival->q64.uq64);
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ if (!error)
+ vs->flags |= VS_VSDVALID;
+
+ return (error);
+}
+
+static inline int
+stats_v1_voi_update_hist(enum vsd_dtype voi_dtype, struct voistatdata *voival,
+ struct voistat *vs, struct voistatdata_hist *hist)
+{
+ struct voistatdata_numeric *bkt_lb, *bkt_ub;
+ uint64_t *oob64, *cnt64;
+ uint32_t *oob32, *cnt32;
+ int error, i, found, is32bit, has_ub, eq_only;
+
+ error = 0;
+
+ switch (vs->dtype) {
+ case VSD_DTYPE_CRHIST32:
+ i = HIST_VSDSZ2NBKTS(crhist32, vs->dsz);
+ is32bit = 1;
+ has_ub = eq_only = 0;
+ oob32 = &VSD(crhist32, hist)->oob;
+ break;
+ case VSD_DTYPE_DRHIST32:
+ i = HIST_VSDSZ2NBKTS(drhist32, vs->dsz);
+ is32bit = has_ub = 1;
+ eq_only = 0;
+ oob32 = &VSD(drhist32, hist)->oob;
+ break;
+ case VSD_DTYPE_DVHIST32:
+ i = HIST_VSDSZ2NBKTS(dvhist32, vs->dsz);
+ is32bit = eq_only = 1;
+ has_ub = 0;
+ oob32 = &VSD(dvhist32, hist)->oob;
+ break;
+ case VSD_DTYPE_CRHIST64:
+ i = HIST_VSDSZ2NBKTS(crhist64, vs->dsz);
+ is32bit = has_ub = eq_only = 0;
+ oob64 = &VSD(crhist64, hist)->oob;
+ break;
+ case VSD_DTYPE_DRHIST64:
+ i = HIST_VSDSZ2NBKTS(drhist64, vs->dsz);
+ is32bit = eq_only = 0;
+ has_ub = 1;
+ oob64 = &VSD(drhist64, hist)->oob;
+ break;
+ case VSD_DTYPE_DVHIST64:
+ i = HIST_VSDSZ2NBKTS(dvhist64, vs->dsz);
+ is32bit = has_ub = 0;
+ eq_only = 1;
+ oob64 = &VSD(dvhist64, hist)->oob;
+ break;
+ default:
+ return (EINVAL);
+ }
+ i--; /* Adjust for 0-based array index. */
+
+ /* XXXLAS: Should probably use a better bucket search algorithm. ARB? */
+ for (found = 0; i >= 0 && !found; i--) {
+ switch (vs->dtype) {
+ case VSD_DTYPE_CRHIST32:
+ bkt_lb = &VSD(crhist32, hist)->bkts[i].lb;
+ cnt32 = &VSD(crhist32, hist)->bkts[i].cnt;
+ break;
+ case VSD_DTYPE_DRHIST32:
+ bkt_lb = &VSD(drhist32, hist)->bkts[i].lb;
+ bkt_ub = &VSD(drhist32, hist)->bkts[i].ub;
+ cnt32 = &VSD(drhist32, hist)->bkts[i].cnt;
+ break;
+ case VSD_DTYPE_DVHIST32:
+ bkt_lb = &VSD(dvhist32, hist)->bkts[i].val;
+ cnt32 = &VSD(dvhist32, hist)->bkts[i].cnt;
+ break;
+ case VSD_DTYPE_CRHIST64:
+ bkt_lb = &VSD(crhist64, hist)->bkts[i].lb;
+ cnt64 = &VSD(crhist64, hist)->bkts[i].cnt;
+ break;
+ case VSD_DTYPE_DRHIST64:
+ bkt_lb = &VSD(drhist64, hist)->bkts[i].lb;
+ bkt_ub = &VSD(drhist64, hist)->bkts[i].ub;
+ cnt64 = &VSD(drhist64, hist)->bkts[i].cnt;
+ break;
+ case VSD_DTYPE_DVHIST64:
+ bkt_lb = &VSD(dvhist64, hist)->bkts[i].val;
+ cnt64 = &VSD(dvhist64, hist)->bkts[i].cnt;
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ switch (voi_dtype) {
+ case VSD_DTYPE_INT_S32:
+ if (voival->int32.s32 >= bkt_lb->int32.s32) {
+ if ((eq_only && voival->int32.s32 ==
+ bkt_lb->int32.s32) ||
+ (!eq_only && (!has_ub ||
+ voival->int32.s32 < bkt_ub->int32.s32)))
+ found = 1;
+ }
+ break;
+ case VSD_DTYPE_INT_U32:
+ if (voival->int32.u32 >= bkt_lb->int32.u32) {
+ if ((eq_only && voival->int32.u32 ==
+ bkt_lb->int32.u32) ||
+ (!eq_only && (!has_ub ||
+ voival->int32.u32 < bkt_ub->int32.u32)))
+ found = 1;
+ }
+ break;
+ case VSD_DTYPE_INT_S64:
+ if (voival->int64.s64 >= bkt_lb->int64.s64)
+ if ((eq_only && voival->int64.s64 ==
+ bkt_lb->int64.s64) ||
+ (!eq_only && (!has_ub ||
+ voival->int64.s64 < bkt_ub->int64.s64)))
+ found = 1;
+ break;
+ case VSD_DTYPE_INT_U64:
+ if (voival->int64.u64 >= bkt_lb->int64.u64)
+ if ((eq_only && voival->int64.u64 ==
+ bkt_lb->int64.u64) ||
+ (!eq_only && (!has_ub ||
+ voival->int64.u64 < bkt_ub->int64.u64)))
+ found = 1;
+ break;
+ case VSD_DTYPE_INT_SLONG:
+ if (voival->intlong.slong >= bkt_lb->intlong.slong)
+ if ((eq_only && voival->intlong.slong ==
+ bkt_lb->intlong.slong) ||
+ (!eq_only && (!has_ub ||
+ voival->intlong.slong <
+ bkt_ub->intlong.slong)))
+ found = 1;
+ break;
+ case VSD_DTYPE_INT_ULONG:
+ if (voival->intlong.ulong >= bkt_lb->intlong.ulong)
+ if ((eq_only && voival->intlong.ulong ==
+ bkt_lb->intlong.ulong) ||
+ (!eq_only && (!has_ub ||
+ voival->intlong.ulong <
+ bkt_ub->intlong.ulong)))
+ found = 1;
+ break;
+ case VSD_DTYPE_Q_S32:
+ if (Q_QGEQ(voival->q32.sq32, bkt_lb->q32.sq32))
+ if ((eq_only && Q_QEQ(voival->q32.sq32,
+ bkt_lb->q32.sq32)) ||
+ (!eq_only && (!has_ub ||
+ Q_QLTQ(voival->q32.sq32,
+ bkt_ub->q32.sq32))))
+ found = 1;
+ break;
+ case VSD_DTYPE_Q_U32:
+ if (Q_QGEQ(voival->q32.uq32, bkt_lb->q32.uq32))
+ if ((eq_only && Q_QEQ(voival->q32.uq32,
+ bkt_lb->q32.uq32)) ||
+ (!eq_only && (!has_ub ||
+ Q_QLTQ(voival->q32.uq32,
+ bkt_ub->q32.uq32))))
+ found = 1;
+ break;
+ case VSD_DTYPE_Q_S64:
+ if (Q_QGEQ(voival->q64.sq64, bkt_lb->q64.sq64))
+ if ((eq_only && Q_QEQ(voival->q64.sq64,
+ bkt_lb->q64.sq64)) ||
+ (!eq_only && (!has_ub ||
+ Q_QLTQ(voival->q64.sq64,
+ bkt_ub->q64.sq64))))
+ found = 1;
+ break;
+ case VSD_DTYPE_Q_U64:
+ if (Q_QGEQ(voival->q64.uq64, bkt_lb->q64.uq64))
+ if ((eq_only && Q_QEQ(voival->q64.uq64,
+ bkt_lb->q64.uq64)) ||
+ (!eq_only && (!has_ub ||
+ Q_QLTQ(voival->q64.uq64,
+ bkt_ub->q64.uq64))))
+ found = 1;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (found) {
+ if (is32bit)
+ *cnt32 += 1;
+ else
+ *cnt64 += 1;
+ } else {
+ if (is32bit)
+ *oob32 += 1;
+ else
+ *oob64 += 1;
+ }
+
+ vs->flags |= VS_VSDVALID;
+ return (error);
+}
+
+static inline int
+stats_v1_vsd_tdgst_compress(enum vsd_dtype vs_dtype,
+ struct voistatdata_tdgst *tdgst, int attempt)
+{
+ struct ctdth32 *ctd32tree;
+ struct ctdth64 *ctd64tree;
+ struct voistatdata_tdgstctd32 *ctd32;
+ struct voistatdata_tdgstctd64 *ctd64;
+ uint64_t ebits, idxmask;
+ uint32_t bitsperidx, nebits;
+ int error, idx, is32bit, maxctds, remctds, tmperr;
+
+ error = 0;
+
+ switch (vs_dtype) {
+ case VSD_DTYPE_TDGSTCLUST32:
+ ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
+ if (!ARB_FULL(ctd32tree))
+ return (0);
+ VSD(tdgstclust32, tdgst)->compcnt++;
+ maxctds = remctds = ARB_MAXNODES(ctd32tree);
+ ARB_RESET_TREE(ctd32tree, ctdth32, maxctds);
+ VSD(tdgstclust32, tdgst)->smplcnt = 0;
+ is32bit = 1;
+ ctd64tree = NULL;
+ ctd64 = NULL;
+#ifdef DIAGNOSTIC
+ RB_INIT(&VSD(tdgstclust32, tdgst)->rbctdtree);
+#endif
+ break;
+ case VSD_DTYPE_TDGSTCLUST64:
+ ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
+ if (!ARB_FULL(ctd64tree))
+ return (0);
+ VSD(tdgstclust64, tdgst)->compcnt++;
+ maxctds = remctds = ARB_MAXNODES(ctd64tree);
+ ARB_RESET_TREE(ctd64tree, ctdth64, maxctds);
+ VSD(tdgstclust64, tdgst)->smplcnt = 0;
+ is32bit = 0;
+ ctd32tree = NULL;
+ ctd32 = NULL;
+#ifdef DIAGNOSTIC
+ RB_INIT(&VSD(tdgstclust64, tdgst)->rbctdtree);
+#endif
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ /*
+ * Rebuild the t-digest ARB by pseudorandomly selecting centroids and
+ * re-inserting the mu/cnt of each as a value and corresponding weight.
+ */
+
+#define bitsperrand 31 /* Per random(3). */
+ ebits = 0;
+ nebits = 0;
+ bitsperidx = fls(maxctds);
+ KASSERT(bitsperidx <= sizeof(ebits) << 3,
+ ("%s: bitsperidx=%d, ebits=%d",
+ __func__, bitsperidx, (int)(sizeof(ebits) << 3)));
+ idxmask = (UINT64_C(1) << bitsperidx) - 1;
+ srandom(stats_sbinuptime());
+
+ /* Initialise the free list with randomised centroid indices. */
+ for (; remctds > 0; remctds--) {
+ while (nebits < bitsperidx) {
+ ebits |= ((uint64_t)random()) << nebits;
+ nebits += bitsperrand;
+ if (nebits > (sizeof(ebits) << 3))
+ nebits = sizeof(ebits) << 3;
+ }
+ idx = ebits & idxmask;
+ nebits -= bitsperidx;
+ ebits >>= bitsperidx;
+
+ /*
+ * Select the next centroid to put on the ARB free list. We
+ * start with the centroid at our randomly selected array index,
+ * and work our way forwards until finding one (the latter
+ * aspect reduces re-insertion randomness, but is good enough).
+ */
+ do {
+ if (idx >= maxctds)
+ idx %= maxctds;
+
+ if (is32bit)
+ ctd32 = ARB_NODE(ctd32tree, idx);
+ else
+ ctd64 = ARB_NODE(ctd64tree, idx);
+ } while ((is32bit ? ARB_ISFREE(ctd32, ctdlnk) :
+ ARB_ISFREE(ctd64, ctdlnk)) && ++idx);
+
+ /* Put the centroid on the ARB free list. */
+ if (is32bit)
+ ARB_RETURNFREE(ctd32tree, ctd32, ctdlnk);
+ else
+ ARB_RETURNFREE(ctd64tree, ctd64, ctdlnk);
+ }
+
+ /*
+ * The free list now contains the randomised indices of every centroid.
+ * Walk the free list from start to end, re-inserting each centroid's
+ * mu/cnt. The tdgst_add() call may or may not consume the free centroid
+ * we re-insert values from during each loop iteration, so we must latch
+ * the index of the next free list centroid before the re-insertion
+ * call. The previous loop above should have left the centroid pointer
+ * pointing to the element at the head of the free list.
+ */
+ KASSERT((is32bit ?
+ ARB_FREEIDX(ctd32tree) == ARB_SELFIDX(ctd32tree, ctd32) :
+ ARB_FREEIDX(ctd64tree) == ARB_SELFIDX(ctd64tree, ctd64)),
+ ("%s: t-digest ARB@%p free list bug", __func__,
+ (is32bit ? (void *)ctd32tree : (void *)ctd64tree)));
+ remctds = maxctds;
+ while ((is32bit ? ctd32 != NULL : ctd64 != NULL)) {
+ tmperr = 0;
+ if (is32bit) {
+ s64q_t x;
+
+ idx = ARB_NEXTFREEIDX(ctd32, ctdlnk);
+ /* Cloning a s32q_t into a s64q_t should never fail. */
+ tmperr = Q_QCLONEQ(&x, ctd32->mu);
+ tmperr = tmperr ? tmperr : stats_v1_vsd_tdgst_add(
+ vs_dtype, tdgst, x, ctd32->cnt, attempt);
+ ctd32 = ARB_NODE(ctd32tree, idx);
+ KASSERT(ctd32 == NULL || ARB_ISFREE(ctd32, ctdlnk),
+ ("%s: t-digest ARB@%p free list bug", __func__,
+ ctd32tree));
+ } else {
+ idx = ARB_NEXTFREEIDX(ctd64, ctdlnk);
+ tmperr = stats_v1_vsd_tdgst_add(vs_dtype, tdgst,
+ ctd64->mu, ctd64->cnt, attempt);
+ ctd64 = ARB_NODE(ctd64tree, idx);
+ KASSERT(ctd64 == NULL || ARB_ISFREE(ctd64, ctdlnk),
+ ("%s: t-digest ARB@%p free list bug", __func__,
+ ctd64tree));
+ }
+ /*
+ * This process should not produce errors, bugs notwithstanding.
+ * Just in case, latch any errors and attempt all re-insertions.
+ */
+ error = tmperr ? tmperr : error;
+ remctds--;
+ }
+
+ KASSERT(remctds == 0, ("%s: t-digest ARB@%p free list bug", __func__,
+ (is32bit ? (void *)ctd32tree : (void *)ctd64tree)));
+
+ return (error);
+}
+
+static inline int
+stats_v1_vsd_tdgst_add(enum vsd_dtype vs_dtype, struct voistatdata_tdgst *tdgst,
+ s64q_t x, uint64_t weight, int attempt)
+{
+#ifdef DIAGNOSTIC
+ char qstr[Q_MAXSTRLEN(x, 10)];
+#endif
+ struct ctdth32 *ctd32tree;
+ struct ctdth64 *ctd64tree;
+ void *closest, *cur, *lb, *ub;
+ struct voistatdata_tdgstctd32 *ctd32;
+ struct voistatdata_tdgstctd64 *ctd64;
+ uint64_t cnt, smplcnt, sum, tmpsum;
+ s64q_t k, minz, q, z;
+ int error, is32bit, n;
+
+ error = 0;
+ minz = Q_INI(&z, 0, 0, Q_NFBITS(x));
+
+ switch (vs_dtype) {
+ case VSD_DTYPE_TDGSTCLUST32:
+ if ((UINT32_MAX - weight) < VSD(tdgstclust32, tdgst)->smplcnt)
+ error = EOVERFLOW;
+ smplcnt = VSD(tdgstclust32, tdgst)->smplcnt;
+ ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
+ is32bit = 1;
+ ctd64tree = NULL;
+ ctd64 = NULL;
+ break;
+ case VSD_DTYPE_TDGSTCLUST64:
+ if ((UINT64_MAX - weight) < VSD(tdgstclust64, tdgst)->smplcnt)
+ error = EOVERFLOW;
+ smplcnt = VSD(tdgstclust64, tdgst)->smplcnt;
+ ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
+ is32bit = 0;
+ ctd32tree = NULL;
+ ctd32 = NULL;
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ if (error)
+ return (error);
+
+ /*
+ * Inspired by Ted Dunning's AVLTreeDigest.java
+ */
+ do {
+#if defined(DIAGNOSTIC)
+ KASSERT(attempt < 5,
+ ("%s: Too many attempts", __func__));
+#endif
+ if (attempt >= 5)
+ return (EAGAIN);
+
+ Q_SIFVAL(minz, Q_IFMAXVAL(minz));
+ closest = ub = NULL;
+ sum = tmpsum = 0;
+
+ if (is32bit)
+ lb = cur = (void *)(ctd32 = ARB_MIN(ctdth32, ctd32tree));
+ else
+ lb = cur = (void *)(ctd64 = ARB_MIN(ctdth64, ctd64tree));
+
+ if (lb == NULL) /* Empty tree. */
+ lb = (is32bit ? (void *)ARB_ROOT(ctd32tree) :
+ (void *)ARB_ROOT(ctd64tree));
+
+ /*
+ * Find the set of centroids with minimum distance to x and
+ * compute the sum of counts for all centroids with mean less
+ * than the first centroid in the set.
+ */
+ for (; cur != NULL;
+ cur = (is32bit ?
+ (void *)(ctd32 = ARB_NEXT(ctdth32, ctd32tree, ctd32)) :
+ (void *)(ctd64 = ARB_NEXT(ctdth64, ctd64tree, ctd64)))) {
+ if (is32bit) {
+ cnt = ctd32->cnt;
+ KASSERT(Q_PRECEQ(ctd32->mu, x),
+ ("%s: Q_RELPREC(mu,x)=%d", __func__,
+ Q_RELPREC(ctd32->mu, x)));
+ /* Ok to assign as both have same precision. */
+ z = ctd32->mu;
+ } else {
+ cnt = ctd64->cnt;
+ KASSERT(Q_PRECEQ(ctd64->mu, x),
+ ("%s: Q_RELPREC(mu,x)=%d", __func__,
+ Q_RELPREC(ctd64->mu, x)));
+ /* Ok to assign as both have same precision. */
+ z = ctd64->mu;
+ }
+
+ error = Q_QSUBQ(&z, x);
+#if defined(DIAGNOSTIC)
+ KASSERT(!error, ("%s: unexpected error %d", __func__,
+ error));
+#endif
+ if (error)
+ return (error);
+
+ z = Q_QABS(z);
+ if (Q_QLTQ(z, minz)) {
+ minz = z;
+ lb = cur;
+ sum = tmpsum;
+ tmpsum += cnt;
+ } else if (Q_QGTQ(z, minz)) {
+ ub = cur;
+ break;
+ }
+ }
+
+ cur = (is32bit ?
+ (void *)(ctd32 = (struct voistatdata_tdgstctd32 *)lb) :
+ (void *)(ctd64 = (struct voistatdata_tdgstctd64 *)lb));
+
+ for (n = 0; cur != ub; cur = (is32bit ?
+ (void *)(ctd32 = ARB_NEXT(ctdth32, ctd32tree, ctd32)) :
+ (void *)(ctd64 = ARB_NEXT(ctdth64, ctd64tree, ctd64)))) {
+ if (is32bit)
+ cnt = ctd32->cnt;
+ else
+ cnt = ctd64->cnt;
+
+ q = Q_CTRLINI(16);
+ if (smplcnt == 1)
+ error = Q_QFRACI(&q, 1, 2);
+ else
+ /* [ sum + ((cnt - 1) / 2) ] / (smplcnt - 1) */
+ error = Q_QFRACI(&q, (sum << 1) + cnt - 1,
+ (smplcnt - 1) << 1);
+ k = q;
+ /* k = q x 4 x samplcnt x attempt */
+ error |= Q_QMULI(&k, 4 * smplcnt * attempt);
+ /* k = k x (1 - q) */
+ error |= Q_QSUBI(&q, 1);
+ q = Q_QABS(q);
+ error |= Q_QMULQ(&k, q);
+#if defined(DIAGNOSTIC)
+#if !defined(_KERNEL)
+ double q_dbl, k_dbl, q2d, k2d;
+ q2d = Q_Q2D(q);
+ k2d = Q_Q2D(k);
+ q_dbl = smplcnt == 1 ? 0.5 :
+ (sum + ((cnt - 1) / 2.0)) / (double)(smplcnt - 1);
+ k_dbl = 4 * smplcnt * q_dbl * (1.0 - q_dbl) * attempt;
+ /*
+ * If the difference between q and q_dbl is greater than
+ * the fractional precision of q, something is off.
+ * NB: q is holding the value of 1 - q
+ */
+ q_dbl = 1.0 - q_dbl;
+ KASSERT((q_dbl > q2d ? q_dbl - q2d : q2d - q_dbl) <
+ (1.05 * ((double)1 / (double)(1ULL << Q_NFBITS(q)))),
+ ("Q-type q bad precision"));
+ KASSERT((k_dbl > k2d ? k_dbl - k2d : k2d - k_dbl) <
+ 1.0 + (0.01 * smplcnt),
+ ("Q-type k bad precision"));
+#endif /* !_KERNEL */
+ KASSERT(!error, ("%s: unexpected error %d", __func__,
+ error));
+#endif /* DIAGNOSTIC */
+ if (error)
+ return (error);
+ if ((is32bit && ((ctd32->cnt + weight) <=
+ (uint64_t)Q_GIVAL(k))) ||
+ (!is32bit && ((ctd64->cnt + weight) <=
+ (uint64_t)Q_GIVAL(k)))) {
+ n++;
+ /* random() produces 31 bits. */
+ if (random() < (INT32_MAX / n))
+ closest = cur;
+ }
+ sum += cnt;
+ }
+ } while (closest == NULL &&
+ (is32bit ? ARB_FULL(ctd32tree) : ARB_FULL(ctd64tree)) &&
+ (error = stats_v1_vsd_tdgst_compress(vs_dtype, tdgst,
+ attempt++)) == 0);
+
+ if (error)
+ return (error);
+
+ if (closest != NULL) {
+ /* Merge with an existing centroid. */
+ if (is32bit) {
+ ctd32 = (struct voistatdata_tdgstctd32 *)closest;
+ error = Q_QSUBQ(&x, ctd32->mu);
+ error = error ? error :
+ Q_QDIVI(&x, ctd32->cnt + weight);
+ if (error || (error = Q_QADDQ(&ctd32->mu, x))) {
+#ifdef DIAGNOSTIC
+ KASSERT(!error, ("%s: unexpected error %d",
+ __func__, error));
+#endif
+ return (error);
+ }
+ ctd32->cnt += weight;
+ error = ARB_REINSERT(ctdth32, ctd32tree, ctd32) ==
+ NULL ? 0 : EALREADY;
+#ifdef DIAGNOSTIC
+ RB_REINSERT(rbctdth32,
+ &VSD(tdgstclust32, tdgst)->rbctdtree, ctd32);
+#endif
+ } else {
+ ctd64 = (struct voistatdata_tdgstctd64 *)closest;
+ error = Q_QSUBQ(&x, ctd64->mu);
+ error = error ? error :
+ Q_QDIVI(&x, ctd64->cnt + weight);
+ if (error || (error = Q_QADDQ(&ctd64->mu, x))) {
+ KASSERT(!error, ("%s: unexpected error %d",
+ __func__, error));
+ return (error);
+ }
+ ctd64->cnt += weight;
+ error = ARB_REINSERT(ctdth64, ctd64tree, ctd64) ==
+ NULL ? 0 : EALREADY;
+#ifdef DIAGNOSTIC
+ RB_REINSERT(rbctdth64,
+ &VSD(tdgstclust64, tdgst)->rbctdtree, ctd64);
+#endif
+ }
+ } else {
+ /*
+ * Add a new centroid. If digest compression is working
+ * correctly, there should always be at least one free.
+ */
+ if (is32bit) {
+ ctd32 = ARB_GETFREE(ctd32tree, ctdlnk);
+#ifdef DIAGNOSTIC
+ KASSERT(ctd32 != NULL,
+ ("%s: t-digest@%p has no free centroids",
+ __func__, tdgst));
+#endif
+ if (ctd32 == NULL)
+ return (EAGAIN);
+ if ((error = Q_QCPYVALQ(&ctd32->mu, x)))
+ return (error);
+ ctd32->cnt = weight;
+ error = ARB_INSERT(ctdth32, ctd32tree, ctd32) == NULL ?
+ 0 : EALREADY;
+#ifdef DIAGNOSTIC
+ RB_INSERT(rbctdth32,
+ &VSD(tdgstclust32, tdgst)->rbctdtree, ctd32);
+#endif
+ } else {
+ ctd64 = ARB_GETFREE(ctd64tree, ctdlnk);
+#ifdef DIAGNOSTIC
+ KASSERT(ctd64 != NULL,
+ ("%s: t-digest@%p has no free centroids",
+ __func__, tdgst));
+#endif
+ if (ctd64 == NULL) /* Should not happen. */
+ return (EAGAIN);
+ /* Direct assignment ok as both have same type/prec. */
+ ctd64->mu = x;
+ ctd64->cnt = weight;
+ error = ARB_INSERT(ctdth64, ctd64tree, ctd64) == NULL ?
+ 0 : EALREADY;
+#ifdef DIAGNOSTIC
+ RB_INSERT(rbctdth64, &VSD(tdgstclust64,
+ tdgst)->rbctdtree, ctd64);
+#endif
+ }
+ }
+
+ if (is32bit)
+ VSD(tdgstclust32, tdgst)->smplcnt += weight;
+ else {
+ VSD(tdgstclust64, tdgst)->smplcnt += weight;
+
+#ifdef DIAGNOSTIC
+ struct rbctdth64 *rbctdtree =
+ &VSD(tdgstclust64, tdgst)->rbctdtree;
+ struct voistatdata_tdgstctd64 *rbctd64;
+ int i = 0;
+ ARB_FOREACH(ctd64, ctdth64, ctd64tree) {
+ rbctd64 = (i == 0 ? RB_MIN(rbctdth64, rbctdtree) :
+ RB_NEXT(rbctdth64, rbctdtree, rbctd64));
+
+ if (i >= ARB_CURNODES(ctd64tree)
+ || ctd64 != rbctd64
+ || ARB_MIN(ctdth64, ctd64tree) !=
+ RB_MIN(rbctdth64, rbctdtree)
+ || ARB_MAX(ctdth64, ctd64tree) !=
+ RB_MAX(rbctdth64, rbctdtree)
+ || ARB_LEFTIDX(ctd64, ctdlnk) !=
+ ARB_SELFIDX(ctd64tree, RB_LEFT(rbctd64, rblnk))
+ || ARB_RIGHTIDX(ctd64, ctdlnk) !=
+ ARB_SELFIDX(ctd64tree, RB_RIGHT(rbctd64, rblnk))
+ || ARB_PARENTIDX(ctd64, ctdlnk) !=
+ ARB_SELFIDX(ctd64tree,
+ RB_PARENT(rbctd64, rblnk))) {
+ Q_TOSTR(ctd64->mu, -1, 10, qstr, sizeof(qstr));
+ printf("ARB ctd=%3d p=%3d l=%3d r=%3d c=%2d "
+ "mu=%s\n",
+ (int)ARB_SELFIDX(ctd64tree, ctd64),
+ ARB_PARENTIDX(ctd64, ctdlnk),
+ ARB_LEFTIDX(ctd64, ctdlnk),
+ ARB_RIGHTIDX(ctd64, ctdlnk),
+ ARB_COLOR(ctd64, ctdlnk),
+ qstr);
+
+ Q_TOSTR(rbctd64->mu, -1, 10, qstr,
+ sizeof(qstr));
+ printf(" RB ctd=%3d p=%3d l=%3d r=%3d c=%2d "
+ "mu=%s\n",
+ (int)ARB_SELFIDX(ctd64tree, rbctd64),
+ (int)ARB_SELFIDX(ctd64tree,
+ RB_PARENT(rbctd64, rblnk)),
+ (int)ARB_SELFIDX(ctd64tree,
+ RB_LEFT(rbctd64, rblnk)),
+ (int)ARB_SELFIDX(ctd64tree,
+ RB_RIGHT(rbctd64, rblnk)),
+ RB_COLOR(rbctd64, rblnk),
+ qstr);
+
+ panic("RB@%p and ARB@%p trees differ\n",
+ rbctdtree, ctd64tree);
+ }
+ i++;
+ }
+#endif /* DIAGNOSTIC */
+ }
+
+ return (error);
+}
+
+static inline int
+stats_v1_voi_update_tdgst(enum vsd_dtype voi_dtype, struct voistatdata *voival,
+ struct voistat *vs, struct voistatdata_tdgst *tdgst)
+{
+ s64q_t x;
+ int error;
+
+ error = 0;
+
+ switch (vs->dtype) {
+ case VSD_DTYPE_TDGSTCLUST32:
+ /* Use same precision as the user's centroids. */
+ Q_INI(&x, 0, 0, Q_NFBITS(
+ ARB_CNODE(&VSD(tdgstclust32, tdgst)->ctdtree, 0)->mu));
+ break;
+ case VSD_DTYPE_TDGSTCLUST64:
+ /* Use same precision as the user's centroids. */
+ Q_INI(&x, 0, 0, Q_NFBITS(
+ ARB_CNODE(&VSD(tdgstclust64, tdgst)->ctdtree, 0)->mu));
+ break;
+ default:
+ KASSERT(vs->dtype == VSD_DTYPE_TDGSTCLUST32 ||
+ vs->dtype == VSD_DTYPE_TDGSTCLUST64,
+ ("%s: vs->dtype(%d) != VSD_DTYPE_TDGSTCLUST<32|64>",
+ __func__, vs->dtype));
+ return (EINVAL);
+ }
+
+ /*
+ * XXXLAS: Should have both a signed and unsigned 'x' variable to avoid
+ * returning EOVERFLOW if the voival would have fit in a u64q_t.
+ */
+ switch (voi_dtype) {
+ case VSD_DTYPE_INT_S32:
+ error = Q_QCPYVALI(&x, voival->int32.s32);
+ break;
+ case VSD_DTYPE_INT_U32:
+ error = Q_QCPYVALI(&x, voival->int32.u32);
+ break;
+ case VSD_DTYPE_INT_S64:
+ error = Q_QCPYVALI(&x, voival->int64.s64);
+ break;
+ case VSD_DTYPE_INT_U64:
+ error = Q_QCPYVALI(&x, voival->int64.u64);
+ break;
+ case VSD_DTYPE_INT_SLONG:
+ error = Q_QCPYVALI(&x, voival->intlong.slong);
+ break;
+ case VSD_DTYPE_INT_ULONG:
+ error = Q_QCPYVALI(&x, voival->intlong.ulong);
+ break;
+ case VSD_DTYPE_Q_S32:
+ error = Q_QCPYVALQ(&x, voival->q32.sq32);
+ break;
+ case VSD_DTYPE_Q_U32:
+ error = Q_QCPYVALQ(&x, voival->q32.uq32);
+ break;
+ case VSD_DTYPE_Q_S64:
+ error = Q_QCPYVALQ(&x, voival->q64.sq64);
+ break;
+ case VSD_DTYPE_Q_U64:
+ error = Q_QCPYVALQ(&x, voival->q64.uq64);
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ if (error ||
+ (error = stats_v1_vsd_tdgst_add(vs->dtype, tdgst, x, 1, 1)))
+ return (error);
+
+ vs->flags |= VS_VSDVALID;
+ return (0);
+}
+
+int
+stats_v1_voi_update(struct statsblobv1 *sb, int32_t voi_id,
+ enum vsd_dtype voi_dtype, struct voistatdata *voival, uint32_t flags)
+{
+ struct voi *v;
+ struct voistat *vs;
+ void *statevsd, *vsd;
+ int error, i, tmperr;
+
+ error = 0;
+
+ if (sb == NULL || sb->abi != STATS_ABI_V1 || voi_id >= NVOIS(sb) ||
+ voi_dtype == 0 || voi_dtype >= VSD_NUM_DTYPES || voival == NULL)
+ return (EINVAL);
+ v = &sb->vois[voi_id];
+ if (voi_dtype != v->dtype || v->id < 0 ||
+ ((flags & SB_VOI_RELUPDATE) && !(v->flags & VOI_REQSTATE)))
+ return (EINVAL);
+
+ vs = BLOB_OFFSET(sb, v->stats_off);
+ if (v->flags & VOI_REQSTATE)
+ statevsd = BLOB_OFFSET(sb, vs->data_off);
+ else
+ statevsd = NULL;
+
+ if (flags & SB_VOI_RELUPDATE) {
+ switch (voi_dtype) {
+ case VSD_DTYPE_INT_S32:
+ voival->int32.s32 +=
+ VSD(voistate, statevsd)->prev.int32.s32;
+ break;
+ case VSD_DTYPE_INT_U32:
+ voival->int32.u32 +=
+ VSD(voistate, statevsd)->prev.int32.u32;
+ break;
+ case VSD_DTYPE_INT_S64:
+ voival->int64.s64 +=
+ VSD(voistate, statevsd)->prev.int64.s64;
+ break;
+ case VSD_DTYPE_INT_U64:
+ voival->int64.u64 +=
+ VSD(voistate, statevsd)->prev.int64.u64;
+ break;
+ case VSD_DTYPE_INT_SLONG:
+ voival->intlong.slong +=
+ VSD(voistate, statevsd)->prev.intlong.slong;
+ break;
+ case VSD_DTYPE_INT_ULONG:
+ voival->intlong.ulong +=
+ VSD(voistate, statevsd)->prev.intlong.ulong;
+ break;
+ case VSD_DTYPE_Q_S32:
+ error = Q_QADDQ(&voival->q32.sq32,
+ VSD(voistate, statevsd)->prev.q32.sq32);
+ break;
+ case VSD_DTYPE_Q_U32:
+ error = Q_QADDQ(&voival->q32.uq32,
+ VSD(voistate, statevsd)->prev.q32.uq32);
+ break;
+ case VSD_DTYPE_Q_S64:
+ error = Q_QADDQ(&voival->q64.sq64,
+ VSD(voistate, statevsd)->prev.q64.sq64);
+ break;
+ case VSD_DTYPE_Q_U64:
+ error = Q_QADDQ(&voival->q64.uq64,
+ VSD(voistate, statevsd)->prev.q64.uq64);
+ break;
+ default:
+ KASSERT(0, ("Unknown VOI data type %d", voi_dtype));
+ break;
+ }
+ }
+
+ if (error)
+ return (error);
+
+ for (i = v->voistatmaxid; i > 0; i--) {
+ vs = &((struct voistat *)BLOB_OFFSET(sb, v->stats_off))[i];
+ if (vs->stype < 0)
+ continue;
+
+ vsd = BLOB_OFFSET(sb, vs->data_off);
+
+ switch (vs->stype) {
+ case VS_STYPE_MAX:
+ tmperr = stats_v1_voi_update_max(voi_dtype, voival,
+ vs, vsd);
+ break;
+ case VS_STYPE_MIN:
+ tmperr = stats_v1_voi_update_min(voi_dtype, voival,
+ vs, vsd);
+ break;
+ case VS_STYPE_SUM:
+ tmperr = stats_v1_voi_update_sum(voi_dtype, voival,
+ vs, vsd);
+ break;
+ case VS_STYPE_HIST:
+ tmperr = stats_v1_voi_update_hist(voi_dtype, voival,
+ vs, vsd);
+ break;
+ case VS_STYPE_TDGST:
+ tmperr = stats_v1_voi_update_tdgst(voi_dtype, voival,
+ vs, vsd);
+ break;
+ default:
+ KASSERT(0, ("Unknown VOI stat type %d", vs->stype));
+ break;
+ }
+
+ if (tmperr) {
+ error = tmperr;
+ VS_INCERRS(vs);
+ }
+ }
+
+ if (statevsd) {
+ switch (voi_dtype) {
+ case VSD_DTYPE_INT_S32:
+ VSD(voistate, statevsd)->prev.int32.s32 =
+ voival->int32.s32;
+ break;
+ case VSD_DTYPE_INT_U32:
+ VSD(voistate, statevsd)->prev.int32.u32 =
+ voival->int32.u32;
+ break;
+ case VSD_DTYPE_INT_S64:
+ VSD(voistate, statevsd)->prev.int64.s64 =
+ voival->int64.s64;
+ break;
+ case VSD_DTYPE_INT_U64:
+ VSD(voistate, statevsd)->prev.int64.u64 =
+ voival->int64.u64;
+ break;
+ case VSD_DTYPE_INT_SLONG:
+ VSD(voistate, statevsd)->prev.intlong.slong =
+ voival->intlong.slong;
+ break;
+ case VSD_DTYPE_INT_ULONG:
+ VSD(voistate, statevsd)->prev.intlong.ulong =
+ voival->intlong.ulong;
+ break;
+ case VSD_DTYPE_Q_S32:
+ error = Q_QCPYVALQ(
+ &VSD(voistate, statevsd)->prev.q32.sq32,
+ voival->q32.sq32);
+ break;
+ case VSD_DTYPE_Q_U32:
+ error = Q_QCPYVALQ(
+ &VSD(voistate, statevsd)->prev.q32.uq32,
+ voival->q32.uq32);
+ break;
+ case VSD_DTYPE_Q_S64:
+ error = Q_QCPYVALQ(
+ &VSD(voistate, statevsd)->prev.q64.sq64,
+ voival->q64.sq64);
+ break;
+ case VSD_DTYPE_Q_U64:
+ error = Q_QCPYVALQ(
+ &VSD(voistate, statevsd)->prev.q64.uq64,
+ voival->q64.uq64);
+ break;
+ default:
+ KASSERT(0, ("Unknown VOI data type %d", voi_dtype));
+ break;
+ }
+ }
+
+ return (error);
+}
+
+#ifdef _KERNEL
+
+static void
+stats_init(void *arg)
+{
+
+}
+SYSINIT(stats, SI_SUB_KDTRACE, SI_ORDER_FIRST, stats_init, NULL);
+
+/*
+ * Sysctl handler to display the list of available stats templates.
+ */
+static int
+stats_tpl_list_available(SYSCTL_HANDLER_ARGS)
+{
+ struct sbuf *s;
+ int err, i;
+
+ err = 0;
+
+ /* We can tolerate ntpl being stale, so do not take the lock. */
+ s = sbuf_new(NULL, NULL, /* +1 per tpl for , */
+ ntpl * (STATS_TPL_MAX_STR_SPEC_LEN + 1), SBUF_FIXEDLEN);
+ if (s == NULL)
+ return (ENOMEM);
+
+ TPL_LIST_RLOCK();
+ for (i = 0; i < ntpl; i++) {
+ err = sbuf_printf(s, "%s\"%s\":%u", i ? "," : "",
+ tpllist[i]->mb->tplname, tpllist[i]->mb->tplhash);
+ if (err) {
+ /* Sbuf overflow condition. */
+ err = EOVERFLOW;
+ break;
+ }
+ }
+ TPL_LIST_RUNLOCK();
+
+ if (!err) {
+ sbuf_finish(s);
+ err = sysctl_handle_string(oidp, sbuf_data(s), 0, req);
+ }
+
+ sbuf_delete(s);
+ return (err);
+}
+
+/*
+ * Called by subsystem-specific sysctls to report and/or parse the list of
+ * templates being sampled and their sampling rates. A stats_tpl_sr_cb_t
+ * conformant function pointer must be passed in as arg1, which is used to
+ * interact with the subsystem's stats template sample rates list. If arg2 > 0,
+ * a zero-initialised allocation of arg2-sized contextual memory is
+ * heap-allocated and passed in to all subsystem callbacks made during the
+ * operation of stats_tpl_sample_rates().
+ *
+ * XXXLAS: Assumes templates are never removed, which is currently true but may
+ * need to be reworked in future if dynamic template management becomes a
+ * requirement e.g. to support kernel module based templates.
+ */
+int
+stats_tpl_sample_rates(SYSCTL_HANDLER_ARGS)
+{
+ char kvpair_fmt[16], tplspec_fmt[16];
+ char tpl_spec[STATS_TPL_MAX_STR_SPEC_LEN];
+ char tpl_name[TPL_MAX_NAME_LEN + 2]; /* +2 for "" */
+ stats_tpl_sr_cb_t subsys_cb;
+ void *subsys_ctx;
+ char *buf, *new_rates_usr_str, *tpl_name_p;
+ struct stats_tpl_sample_rate *rates;
+ struct sbuf *s, _s;
+ uint32_t cum_pct, pct, tpl_hash;
+ int err, i, off, len, newlen, nrates;
+
+ buf = NULL;
+ rates = NULL;
+ err = nrates = 0;
+ subsys_cb = (stats_tpl_sr_cb_t)arg1;
+ KASSERT(subsys_cb != NULL, ("%s: subsys_cb == arg1 == NULL", __func__));
+ if (arg2 > 0)
+ subsys_ctx = malloc(arg2, M_TEMP, M_WAITOK | M_ZERO);
+ else
+ subsys_ctx = NULL;
+
+ /* Grab current count of subsystem rates. */
+ err = subsys_cb(TPL_SR_UNLOCKED_GET, NULL, &nrates, subsys_ctx);
+ if (err)
+ goto done;
+
+ /* +1 to ensure we can append '\0' post copyin, +5 per rate for =nnn, */
+ len = max(req->newlen + 1, nrates * (STATS_TPL_MAX_STR_SPEC_LEN + 5));
+
+ if (req->oldptr != NULL || req->newptr != NULL)
+ buf = malloc(len, M_TEMP, M_WAITOK);
+
+ if (req->oldptr != NULL) {
+ if (nrates == 0) {
+ /* No rates, so return an empty string via oldptr. */
+ err = SYSCTL_OUT(req, "", 1);
+ if (err)
+ goto done;
+ goto process_new;
+ }
+
+ s = sbuf_new(&_s, buf, len, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
+
+ /* Grab locked count of, and ptr to, subsystem rates. */
+ err = subsys_cb(TPL_SR_RLOCKED_GET, &rates, &nrates,
+ subsys_ctx);
+ if (err)
+ goto done;
+ TPL_LIST_RLOCK();
+ for (i = 0; i < nrates && !err; i++) {
+ err = sbuf_printf(s, "%s\"%s\":%u=%u", i ? "," : "",
+ tpllist[rates[i].tpl_slot_id]->mb->tplname,
+ tpllist[rates[i].tpl_slot_id]->mb->tplhash,
+ rates[i].tpl_sample_pct);
+ }
+ TPL_LIST_RUNLOCK();
+ /* Tell subsystem that we're done with its rates list. */
+ err = subsys_cb(TPL_SR_RUNLOCK, &rates, &nrates, subsys_ctx);
+ if (err)
+ goto done;
+
+ err = sbuf_finish(s);
+ if (err)
+ goto done; /* We lost a race for buf to be too small. */
+
+ /* Return the rendered string data via oldptr. */
+ err = SYSCTL_OUT(req, sbuf_data(s), sbuf_len(s));
+ } else {
+ /* Return the upper bound size for buffer sizing requests. */
+ err = SYSCTL_OUT(req, NULL, len);
+ }
+
+process_new:
+ if (err || req->newptr == NULL)
+ goto done;
+
+ newlen = req->newlen - req->newidx;
+ err = SYSCTL_IN(req, buf, newlen);
+ if (err)
+ goto done;
+
+ /*
+ * Initialise format strings at run time.
+ *
+ * Write the max template spec string length into the
+ * template_spec=percent key-value pair parsing format string as:
+ * " %<width>[^=]=%u %n"
+ *
+ * Write the max template name string length into the tplname:tplhash
+ * parsing format string as:
+ * "%<width>[^:]:%u"
+ *
+ * Subtract 1 for \0 appended by sscanf().
+ */
+ sprintf(kvpair_fmt, " %%%zu[^=]=%%u %%n", sizeof(tpl_spec) - 1);
+ sprintf(tplspec_fmt, "%%%zu[^:]:%%u", sizeof(tpl_name) - 1);
+
+ /*
+ * Parse each CSV key-value pair specifying a template and its sample
+ * percentage. Whitespace either side of a key-value pair is ignored.
+ * Templates can be specified by name, hash, or name and hash per the
+ * following formats (chars in [] are optional):
+ * ["]<tplname>["]=<percent>
+ * :hash=pct
+ * ["]<tplname>["]:hash=<percent>
+ */
+ cum_pct = nrates = 0;
+ rates = NULL;
+ buf[newlen] = '\0'; /* buf is at least newlen+1 in size. */
+ new_rates_usr_str = buf;
+ while (isspace(*new_rates_usr_str))
+ new_rates_usr_str++; /* Skip leading whitespace. */
+ while (*new_rates_usr_str != '\0') {
+ tpl_name_p = tpl_name;
+ tpl_name[0] = '\0';
+ tpl_hash = 0;
+ off = 0;
+
+ /*
+ * Parse key-value pair which must perform 2 conversions, then
+ * parse the template spec to extract either name, hash, or name
+ * and hash depending on the three possible spec formats. The
+ * tplspec_fmt format specifier parses name or name and hash
+ * template specs, while the ":%u" format specifier parses
+ * hash-only template specs. If parsing is successfull, ensure
+ * the cumulative sampling percentage does not exceed 100.
+ */
+ err = EINVAL;
+ if (2 != sscanf(new_rates_usr_str, kvpair_fmt, tpl_spec, &pct,
+ &off))
+ break;
+ if ((1 > sscanf(tpl_spec, tplspec_fmt, tpl_name, &tpl_hash)) &&
+ (1 != sscanf(tpl_spec, ":%u", &tpl_hash)))
+ break;
+ if ((cum_pct += pct) > 100)
+ break;
+ err = 0;
+
+ /* Strip surrounding "" from template name if present. */
+ len = strlen(tpl_name);
+ if (len > 0) {
+ if (tpl_name[len - 1] == '"')
+ tpl_name[--len] = '\0';
+ if (tpl_name[0] == '"') {
+ tpl_name_p++;
+ len--;
+ }
+ }
+
+ rates = stats_realloc(rates, 0, /* oldsz is unused in kernel. */
+ (nrates + 1) * sizeof(*rates), M_WAITOK);
+ rates[nrates].tpl_slot_id =
+ stats_tpl_fetch_allocid(len ? tpl_name_p : NULL, tpl_hash);
+ if (rates[nrates].tpl_slot_id < 0) {
+ err = -rates[nrates].tpl_slot_id;
+ break;
+ }
+ rates[nrates].tpl_sample_pct = pct;
+ nrates++;
+ new_rates_usr_str += off;
+ if (*new_rates_usr_str != ',')
+ break; /* End-of-input or malformed. */
+ new_rates_usr_str++; /* Move past comma to next pair. */
+ }
+
+ if (!err) {
+ if ((new_rates_usr_str - buf) < newlen) {
+ /* Entire input has not been consumed. */
+ err = EINVAL;
+ } else {
+ /*
+ * Give subsystem the new rates. They'll return the
+ * appropriate rates pointer for us to garbage collect.
+ */
+ err = subsys_cb(TPL_SR_PUT, &rates, &nrates,
+ subsys_ctx);
+ }
+ }
+ stats_free(rates);
+
+done:
+ free(buf, M_TEMP);
+ free(subsys_ctx, M_TEMP);
+ return (err);
+}
+
+SYSCTL_NODE(_kern, OID_AUTO, stats, CTLFLAG_RW, NULL,
+ "stats(9) MIB");
+
+SYSCTL_PROC(_kern_stats, OID_AUTO, templates, CTLTYPE_STRING|CTLFLAG_RD,
+ NULL, 0, stats_tpl_list_available, "A",
+ "list the name/hash of all available stats(9) templates");
+
+#else /* ! _KERNEL */
+
+static void __attribute__ ((constructor))
+stats_constructor(void)
+{
+
+ pthread_rwlock_init(&tpllistlock, NULL);
+}
+
+static void __attribute__ ((destructor))
+stats_destructor(void)
+{
+
+ pthread_rwlock_destroy(&tpllistlock);
+}
+
+#endif /* _KERNEL */
diff --git a/sys/sys/arb.h b/sys/sys/arb.h
index e5f0a450cb3a..4bcd17bd2f74 100644
--- a/sys/sys/arb.h
+++ b/sys/sys/arb.h
@@ -776,4 +776,7 @@ name##_ARB_REINSERT(struct name *head, struct type *elm) \
#define ARB_ARRFOREACH_REVERSE(x, field, head) \
ARB_ARRFOREACH_REVWCOND(x, field, head, 1)
+#define ARB_RESET_TREE(head, name, maxn) \
+ *(head) = ARB_INITIALIZER(name, maxn)
+
#endif /* _SYS_ARB_H_ */
diff --git a/sys/sys/stats.h b/sys/sys/stats.h
new file mode 100644
index 000000000000..30b1073cfa99
--- /dev/null
+++ b/sys/sys/stats.h
@@ -0,0 +1,1252 @@
+/*-
+ * Copyright (c) 2014-2018 Netflix, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * A kernel and user space statistics gathering API + infrastructure.
+ *
+ * Author: Lawrence Stewart <lstewart@netflix.com>
+ *
+ * Things to ponder:
+ * - Register callbacks for events e.g. counter stat passing a threshold
+ *
+ * - How could this become SIFTRv2? Perhaps publishing records to a ring
+ * mapped between userspace and kernel?
+ *
+ * - Potential stat types:
+ * RATE: events per unit time
+ * TIMESERIES: timestamped records. Stored in voistate?
+ * EWMA: Exponential weighted moving average.
+ *
+ * - How should second order stats work e.g. stat "A" depends on "B"
+ *
+ * - How do variable time windows work e.g. give me per-RTT stats
+ *
+ * - Should the API always require the caller to manage locking? Or should the
+ * API provide optional functionality to lock a blob during operations.
+ *
+ * - Should we continue to store unpacked naturally aligned structs in the
+ * blob or move to packed structs? Relates to inter-host
+ * serialisation/endian issues.
+ */
+
+#ifndef _SYS_STATS_H_
+#define _SYS_STATS_H_
+
+#include <sys/limits.h>
+
+#ifndef _KERNEL
+/*
+ * XXXLAS: Hacks to enable sharing template creation code between kernel and
+ * userland e.g. tcp_stats.c
+ */
+#define VNET(n) n
+#define VNET_DEFINE(t, n) static t n __unused
+#endif /* ! _KERNEL */
+
+#define TPL_MAX_NAME_LEN 64
+
+/*
+ * The longest template string spec format i.e. the normative spec format, is:
+ *
+ * "<tplname>":<tplhash>
+ *
+ * Therefore, the max string length of a template string spec is:
+ *
+ * - TPL_MAX_NAME_LEN
+ * - 2 chars for ""
+ * - 1 char for : separating name and hash
+ * - 10 chars for 32bit hash
+ */
+#define STATS_TPL_MAX_STR_SPEC_LEN (TPL_MAX_NAME_LEN + 13)
+
+struct sbuf;
+struct sysctl_oid;
+struct sysctl_req;
+
+enum sb_str_fmt {
+ SB_STRFMT_FREEFORM = 0,
+ SB_STRFMT_JSON,
+ SB_STRFMT_NUM_FMTS /* +1 to highest numbered format type. */
+};
+
+/* VOI stat types. */
+enum voi_stype {
+ VS_STYPE_VOISTATE = 0, /* Reserved for internal API use. */
+ VS_STYPE_SUM,
+ VS_STYPE_MAX,
+ VS_STYPE_MIN,
+ VS_STYPE_HIST,
+ VS_STYPE_TDGST,
+ VS_NUM_STYPES /* +1 to highest numbered stat type. */
+};
+
+/*
+ * VOI stat data types used as storage for certain stat types and to marshall
+ * data through various API calls.
+ */
+enum vsd_dtype {
+ VSD_DTYPE_VOISTATE = 0, /* Reserved for internal API use. */
+ VSD_DTYPE_INT_S32, /* int32_t */
+ VSD_DTYPE_INT_U32, /* uint32_t */
+ VSD_DTYPE_INT_S64, /* int64_t */
+ VSD_DTYPE_INT_U64, /* uint64_t */
+ VSD_DTYPE_INT_SLONG, /* long */
+ VSD_DTYPE_INT_ULONG, /* unsigned long */
+ VSD_DTYPE_Q_S32, /* s32q_t */
+ VSD_DTYPE_Q_U32, /* u32q_t */
+ VSD_DTYPE_Q_S64, /* s64q_t */
+ VSD_DTYPE_Q_U64, /* u64q_t */
+ VSD_DTYPE_CRHIST32, /* continuous range histogram, 32bit buckets */
+ VSD_DTYPE_DRHIST32, /* discrete range histogram, 32bit buckets */
+ VSD_DTYPE_DVHIST32, /* discrete value histogram, 32bit buckets */
+ VSD_DTYPE_CRHIST64, /* continuous range histogram, 64bit buckets */
+ VSD_DTYPE_DRHIST64, /* discrete range histogram, 64bit buckets */
+ VSD_DTYPE_DVHIST64, /* discrete value histogram, 64bit buckets */
+ VSD_DTYPE_TDGSTCLUST32, /* clustering variant t-digest, 32bit buckets */
+ VSD_DTYPE_TDGSTCLUST64, /* clustering variant t-digest, 64bit buckets */
+ VSD_NUM_DTYPES /* +1 to highest numbered data type. */
+};
+
+struct voistatdata_int32 {
+ union {
+ int32_t s32;
+ uint32_t u32;
+ };
+};
+
+struct voistatdata_int64 {
+ union {
+ int64_t s64;
+ uint64_t u64;
+ //counter_u64_t u64pcpu;
+ };
+};
+
+struct voistatdata_intlong {
+ union {
+ long slong;
+ unsigned long ulong;
+ };
+};
+
+struct voistatdata_q32 {
+ union {
+ s32q_t sq32;
+ u32q_t uq32;
+ };
+};
+
+struct voistatdata_q64 {
+ union {
+ s64q_t sq64;
+ u64q_t uq64;
+ };
+};
+
+struct voistatdata_numeric {
+ union {
+ struct {
+#if BYTE_ORDER == BIG_ENDIAN
+ uint32_t pad;
+#endif
+ union {
+ int32_t s32;
+ uint32_t u32;
+ };
+#if BYTE_ORDER == LITTLE_ENDIAN
+ uint32_t pad;
+#endif
+ } int32;
+
+ struct {
+#if BYTE_ORDER == BIG_ENDIAN
+ uint32_t pad;
+#endif
+ union {
+ s32q_t sq32;
+ u32q_t uq32;
+ };
+#if BYTE_ORDER == LITTLE_ENDIAN
+ uint32_t pad;
+#endif
+ } q32;
+
+ struct {
+#if BYTE_ORDER == BIG_ENDIAN && LONG_BIT == 32
+ uint32_t pad;
+#endif
+ union {
+ long slong;
+ unsigned long ulong;
+ };
+#if BYTE_ORDER == LITTLE_ENDIAN && LONG_BIT == 32
+ uint32_t pad;
+#endif
+ } intlong;
+
+ struct voistatdata_int64 int64;
+ struct voistatdata_q64 q64;
+ };
+};
+
+/* Continuous range histogram with 32bit buckets. */
+struct voistatdata_crhist32 {
+ uint32_t oob;
+ struct {
+ struct voistatdata_numeric lb;
+ uint32_t cnt;
+ } bkts[];
+};
+
+/* Continuous range histogram with 64bit buckets. */
+struct voistatdata_crhist64 {
+ uint64_t oob;
+ struct {
+ struct voistatdata_numeric lb;
+ uint64_t cnt;
+ } bkts[];
+};
+
+/* Discrete range histogram with 32bit buckets. */
+struct voistatdata_drhist32 {
+ uint32_t oob;
+ struct {
+ struct voistatdata_numeric lb, ub;
+ uint32_t cnt;
+ } bkts[];
+};
+
+/* Discrete range histogram with 64bit buckets. */
+struct voistatdata_drhist64 {
+ uint64_t oob;
+ struct {
+ struct voistatdata_numeric lb, ub;
+ uint64_t cnt;
+ } bkts[];
+};
+
+/* Discrete value histogram with 32bit buckets. */
+struct voistatdata_dvhist32 {
+ uint32_t oob;
+ struct {
+ struct voistatdata_numeric val;
+ uint32_t cnt;
+ } bkts[];
+};
+
+/* Discrete value histogram with 64bit buckets. */
+struct voistatdata_dvhist64 {
+ uint64_t oob;
+ struct {
+ struct voistatdata_numeric val;
+ uint64_t cnt;
+ } bkts[];
+};
+
+struct voistatdata_hist {
+ union {
+ struct voistatdata_crhist32 crhist32;
+ struct voistatdata_crhist64 crhist64;
+ struct voistatdata_dvhist32 dvhist32;
+ struct voistatdata_dvhist64 dvhist64;
+ struct voistatdata_drhist32 drhist32;
+ struct voistatdata_drhist64 drhist64;
+ };
+};
+
+struct voistatdata_tdgstctd32 {
+ ARB16_ENTRY() ctdlnk;
+#ifdef DIAGNOSTIC
+ RB_ENTRY(voistatdata_tdgstctd32) rblnk;
+#endif
+ s32q_t mu;
+ int32_t cnt;
+};
+
+struct voistatdata_tdgstctd64 {
+ ARB16_ENTRY() ctdlnk;
+#ifdef DIAGNOSTIC
+ RB_ENTRY(voistatdata_tdgstctd64) rblnk;
+#endif
+ s64q_t mu;
+ int64_t cnt;
+};
+
+struct voistatdata_tdgstctd {
+ union {
+ struct voistatdata_tdgstctd32 tdgstctd32;
+ struct voistatdata_tdgstctd64 tdgstctd64;
+ };
+};
+
+/* Clustering variant, fixed-point t-digest with 32bit mu/counts. */
+struct voistatdata_tdgstclust32 {
+ uint32_t smplcnt; /* Count of samples. */
+ uint32_t compcnt; /* Count of digest compressions. */
+#ifdef DIAGNOSTIC
+ RB_HEAD(rbctdth32, voistatdata_tdgstctd32) rbctdtree;
+#endif
+ /* Array-based red-black tree of centroids. */
+ ARB16_HEAD(ctdth32, voistatdata_tdgstctd32) ctdtree;
+};
+
+/* Clustering variant, fixed-point t-digest with 64bit mu/counts. */
+struct voistatdata_tdgstclust64 {
+ uint64_t smplcnt; /* Count of samples. */
+ uint32_t compcnt; /* Count of digest compressions. */
+#ifdef DIAGNOSTIC
+ RB_HEAD(rbctdth64, voistatdata_tdgstctd64) rbctdtree;
+#endif
+ /* Array-based red-black tree of centroids. */
+ ARB16_HEAD(ctdth64, voistatdata_tdgstctd64) ctdtree;
+};
+
+struct voistatdata_tdgst {
+ union {
+ struct voistatdata_tdgstclust32 tdgstclust32;
+ struct voistatdata_tdgstclust64 tdgstclust64;
+ };
+};
+
+struct voistatdata {
+ union {
+ struct voistatdata_int32 int32;
+ struct voistatdata_int64 int64;
+ struct voistatdata_intlong intlong;
+ struct voistatdata_q32 q32;
+ struct voistatdata_q64 q64;
+ struct voistatdata_crhist32 crhist32;
+ struct voistatdata_crhist64 crhist64;
+ struct voistatdata_dvhist32 dvhist32;
+ struct voistatdata_dvhist64 dvhist64;
+ struct voistatdata_drhist32 drhist32;
+ struct voistatdata_drhist64 drhist64;
+ struct voistatdata_tdgstclust32 tdgstclust32;
+ struct voistatdata_tdgstclust64 tdgstclust64;
+ };
+};
+
+#define VSD_HIST_LBOUND_INF 0x01
+#define VSD_HIST_UBOUND_INF 0x02
+struct vss_hist_hlpr_info {
+ enum hist_bkt_alloc {
+ BKT_LIN, /* Linear steps. */
+ BKT_EXP, /* Exponential steps. */
+ BKT_LINEXP, /* Exponential steps, linear sub-steps. */
+ BKT_USR /* User specified buckets. */
+ } scheme;
+ enum vsd_dtype voi_dtype;
+ enum vsd_dtype hist_dtype;
+ uint32_t flags;
+ struct voistatdata_numeric lb;
+ struct voistatdata_numeric ub;
+ union {
+ struct {
+ const uint64_t stepinc;
+ } lin;
+ struct {
+ const uint64_t stepbase;
+ const uint64_t stepexp;
+ } exp;
+ struct {
+ const uint64_t stepbase;
+ const uint64_t linstepdiv;
+ } linexp;
+ struct {
+ const uint16_t nbkts;
+ const struct {
+ struct voistatdata_numeric lb, ub;
+ } *bkts;
+ } usr;
+ };
+};
+
+struct vss_tdgst_hlpr_info {
+ enum vsd_dtype voi_dtype;
+ enum vsd_dtype tdgst_dtype;
+ uint32_t nctds;
+ uint32_t prec;
+} __aligned(sizeof(void *));
+
+struct vss_numeric_hlpr_info {
+ uint32_t prec;
+};
+
+struct vss_hlpr_info {
+ union {
+ struct vss_tdgst_hlpr_info tdgst;
+ struct vss_hist_hlpr_info hist;
+ struct vss_numeric_hlpr_info numeric;
+ };
+};
+
+struct voistatspec;
+typedef int (*vss_hlpr_fn)(enum vsd_dtype, struct voistatspec *,
+ struct vss_hlpr_info *);
+
+struct voistatspec {
+ vss_hlpr_fn hlpr; /* iv helper function. */
+ struct vss_hlpr_info *hlprinfo; /* Helper function context. */
+ struct voistatdata *iv; /* Initialisation value. */
+ size_t vsdsz; /* Size of iv. */
+ uint32_t flags; /* Stat flags. */
+ enum vsd_dtype vs_dtype : 8; /* Stat's dtype. */
+ enum voi_stype stype : 8; /* Stat type. */
+};
+
+extern const char *vs_stype2name[VS_NUM_STYPES];
+extern const char *vs_stype2desc[VS_NUM_STYPES];
+extern const char *vsd_dtype2name[VSD_NUM_DTYPES];
+extern const size_t vsd_dtype2size[VSD_NUM_DTYPES];
+#define LIM_MIN 0
+#define LIM_MAX 1
+extern const struct voistatdata_numeric numeric_limits[2][VSD_DTYPE_Q_U64 + 1];
+
+#define TYPEOF_MEMBER(type, member) __typeof(((type *)0)->member)
+#define TYPEOF_MEMBER_PTR(type, member) __typeof(*(((type *)0)->member))
+#define SIZEOF_MEMBER(type, member) sizeof(TYPEOF_MEMBER(type, member))
+
+/* Cast a pointer to a voistatdata struct of requested type. */
+#define _VSD(cnst, type, ptr) ((cnst struct voistatdata_##type *)(ptr))
+#define VSD(type, ptr) _VSD(, type, ptr)
+#define CONSTVSD(type, ptr) _VSD(const, type, ptr)
+
+#define NVSS(vss_slots) (sizeof((vss_slots)) / sizeof(struct voistatspec))
+#define STATS_VSS(st, vsf, dt, hlp, hlpi) \
+((struct voistatspec){ \
+ .stype = (st), \
+ .flags = (vsf), \
+ .vs_dtype = (dt), \
+ .hlpr = (hlp), \
+ .hlprinfo = (hlpi), \
+})
+
+#define STATS_VSS_SUM() STATS_VSS(VS_STYPE_SUM, 0, 0, \
+ (vss_hlpr_fn)&stats_vss_numeric_hlpr, NULL)
+
+#define STATS_VSS_MAX() STATS_VSS(VS_STYPE_MAX, 0, 0, \
+ (vss_hlpr_fn)&stats_vss_numeric_hlpr, NULL)
+
+#define STATS_VSS_MIN() STATS_VSS(VS_STYPE_MIN, 0, 0, \
+ (vss_hlpr_fn)&stats_vss_numeric_hlpr, NULL)
+
+#define STATS_VSS_HIST(htype, hist_hlpr_info) STATS_VSS(VS_STYPE_HIST, 0, \
+ htype, (vss_hlpr_fn)&stats_vss_hist_hlpr, \
+ (struct vss_hlpr_info *)(hist_hlpr_info))
+
+#define STATS_VSS_TDIGEST(tdtype, tdgst_hlpr_info) STATS_VSS(VS_STYPE_TDGST, \
+ 0, tdtype, (vss_hlpr_fn)&stats_vss_tdgst_hlpr, \
+ (struct vss_hlpr_info *)(tdgst_hlpr_info))
+
+#define TDGST_NCTRS2VSDSZ(tdtype, nctds) (sizeof(struct voistatdata_##tdtype) + \
+ ((nctds) * sizeof(TYPEOF_MEMBER_PTR(struct voistatdata_##tdtype, \
+ ctdtree.arb_nodes))))
+
+#define TDGST_HLPR_INFO(dt, nc, nf) \
+(&(struct vss_tdgst_hlpr_info){ \
+ .tdgst_dtype = (dt), \
+ .nctds = (nc), \
+ .prec = (nf) \
+})
+
+#define STATS_VSS_TDGSTCLUST32(nctds, prec) \
+ STATS_VSS_TDIGEST(VSD_DTYPE_TDGSTCLUST32, \
+ TDGST_HLPR_INFO(VSD_DTYPE_TDGSTCLUST32, nctds, prec))
+
+#define STATS_VSS_TDGSTCLUST64(nctds, prec) \
+ STATS_VSS_TDIGEST(VSD_DTYPE_TDGSTCLUST64, \
+ TDGST_HLPR_INFO(VSD_DTYPE_TDGSTCLUST64, nctds, prec))
+
+#define HIST_VSDSZ2NBKTS(htype, dsz) \
+ ((dsz - sizeof(struct voistatdata_##htype)) / \
+ sizeof(TYPEOF_MEMBER(struct voistatdata_##htype, bkts[0])))
+
+#define HIST_NBKTS2VSDSZ(htype, nbkts) (sizeof(struct voistatdata_##htype) + \
+ ((nbkts) * sizeof(TYPEOF_MEMBER_PTR(struct voistatdata_##htype, bkts))))
+
+#define HIST_HLPR_INFO_LIN_FIELDS(si) .lin.stepinc = (si)
+
+#define HIST_HLPR_INFO_EXP_FIELDS(sb, se) \
+ .exp.stepbase = (sb), .exp.stepexp = (se)
+
+#define HIST_HLPR_INFO_LINEXP_FIELDS(nss, sb) \
+ .linexp.linstepdiv = (nss), .linexp.stepbase = (sb)
+
+#define HIST_HLPR_INFO_USR_FIELDS(bbs) \
+ .usr.bkts = (TYPEOF_MEMBER(struct vss_hist_hlpr_info, usr.bkts))(bbs), \
+ .usr.nbkts = (sizeof(bbs) / sizeof(struct voistatdata_numeric[2]))
+
+#define HIST_HLPR_INFO(dt, sch, f, lbd, ubd, bkthlpr_fields) \
+(&(struct vss_hist_hlpr_info){ \
+ .scheme = (sch), \
+ .hist_dtype = (dt), \
+ .flags = (f), \
+ .lb = stats_ctor_vsd_numeric(lbd), \
+ .ub = stats_ctor_vsd_numeric(ubd), \
+ bkthlpr_fields \
+})
+
+#define STATS_VSS_CRHIST32_LIN(lb, ub, stepinc, vsdflags) \
+ STATS_VSS_HIST(VSD_DTYPE_CRHIST32, HIST_HLPR_INFO(VSD_DTYPE_CRHIST32, \
+ BKT_LIN, vsdflags, lb, ub, HIST_HLPR_INFO_LIN_FIELDS(stepinc)))
+#define STATS_VSS_CRHIST64_LIN(lb, ub, stepinc, vsdflags) \
+ STATS_VSS_HIST(VSD_DTYPE_CRHIST64, HIST_HLPR_INFO(VSD_DTYPE_CRHIST64, \
+ BKT_LIN, vsdflags, lb, ub, HIST_HLPR_INFO_LIN_FIELDS(stepinc)))
+
+#define STATS_VSS_CRHIST32_EXP(lb, ub, stepbase, stepexp, vsdflags) \
+ STATS_VSS_HIST(VSD_DTYPE_CRHIST32, HIST_HLPR_INFO(VSD_DTYPE_CRHIST32, \
+ BKT_EXP, vsdflags, lb, ub, HIST_HLPR_INFO_EXP_FIELDS(stepbase, stepexp)))
+#define STATS_VSS_CRHIST64_EXP(lb, ub, stepbase, stepexp, vsdflags) \
+ STATS_VSS_HIST(VSD_DTYPE_CRHIST64, HIST_HLPR_INFO(VSD_DTYPE_CRHIST64, \
+ BKT_EXP, vsdflags, lb, ub, HIST_HLPR_INFO_EXP_FIELDS(stepbase, stepexp)))
+
+#define STATS_VSS_CRHIST32_LINEXP(lb, ub, nlinsteps, stepbase, vsdflags) \
+ STATS_VSS_HIST(VSD_DTYPE_CRHIST32, HIST_HLPR_INFO(VSD_DTYPE_CRHIST32, \
+ BKT_LINEXP, vsdflags, lb, ub, HIST_HLPR_INFO_LINEXP_FIELDS(nlinsteps, \
+ stepbase)))
+#define STATS_VSS_CRHIST64_LINEXP(lb, ub, nlinsteps, stepbase, vsdflags) \
+ STATS_VSS_HIST(VSD_DTYPE_CRHIST64, HIST_HLPR_INFO(VSD_DTYPE_CRHIST64, \
+ BKT_LINEXP, vsdflags, lb, ub, HIST_HLPR_INFO_LINEXP_FIELDS(nlinsteps, \
+ stepbase)))
+
+#define STATS_VSS_CRHIST32_USR(bkts, vsdflags) \
+ STATS_VSS_HIST(VSD_DTYPE_CRHIST32, HIST_HLPR_INFO(VSD_DTYPE_CRHIST32, \
+ BKT_USR, vsdflags, 0, 0, HIST_HLPR_INFO_USR_FIELDS(bkts)))
+#define STATS_VSS_CRHIST64_USR(bkts, vsdflags) \
+ STATS_VSS_HIST(VSD_DTYPE_CRHIST64, HIST_HLPR_INFO(VSD_DTYPE_CRHIST64, \
+ BKT_USR, vsdflags, 0, 0, HIST_HLPR_INFO_USR_FIELDS(bkts)))
+
+#define STATS_VSS_DRHIST32_USR(bkts, vsdflags) \
+ STATS_VSS_HIST(VSD_DTYPE_DRHIST32, HIST_HLPR_INFO(VSD_DTYPE_DRHIST32, \
+ BKT_USR, vsdflags, 0, 0, HIST_HLPR_INFO_USR_FIELDS(bkts)))
+#define STATS_VSS_DRHIST64_USR(bkts, vsdflags) \
+ STATS_VSS_HIST(VSD_DTYPE_DRHIST64, HIST_HLPR_INFO(VSD_DTYPE_DRHIST64, \
+ BKT_USR, vsdflags, 0, 0, HIST_HLPR_INFO_USR_FIELDS(bkts)))
+
+#define STATS_VSS_DVHIST32_USR(vals, vsdflags) \
+ STATS_VSS_HIST(VSD_DTYPE_DVHIST32, HIST_HLPR_INFO(VSD_DTYPE_DVHIST32, \
+ BKT_USR, vsdflags, 0, 0, HIST_HLPR_INFO_USR_FIELDS(vals)))
+#define STATS_VSS_DVHIST64_USR(vals, vsdflags) \
+ STATS_VSS_HIST(VSD_DTYPE_DVHIST64, HIST_HLPR_INFO(VSD_DTYPE_DVHIST64, \
+ BKT_USR, vsdflags, 0, 0, HIST_HLPR_INFO_USR_FIELDS(vals)))
+
+#define DRBKT(lb, ub) { stats_ctor_vsd_numeric(lb), stats_ctor_vsd_numeric(ub) }
+#define DVBKT(val) DRBKT(val, val)
+#define CRBKT(lb) DRBKT(lb, lb)
+#define HBKTS(...) ((struct voistatdata_numeric [][2]){__VA_ARGS__})
+
+#define VSD_HIST_FIELD(hist, cnst, hist_dtype, op, field) \
+ (VSD_DTYPE_CRHIST32 == (hist_dtype) ? \
+ op(_VSD(cnst, crhist32, hist)->field) : \
+ (VSD_DTYPE_DRHIST32 == (hist_dtype) ? \
+ op(_VSD(cnst, drhist32, hist)->field) : \
+ (VSD_DTYPE_DVHIST32 == (hist_dtype) ? \
+ op(_VSD(cnst, dvhist32, hist)->field) : \
+ (VSD_DTYPE_CRHIST64 == (hist_dtype) ? \
+ op(_VSD(cnst, crhist64, hist)->field) : \
+ (VSD_DTYPE_DRHIST64 == (hist_dtype) ? \
+ op(_VSD(cnst, drhist64, hist)->field) : \
+ (op(_VSD(cnst, dvhist64, hist)->field)))))))
+#define VSD_HIST_FIELDVAL(hist, hist_dtype, field) \
+ VSD_HIST_FIELD(hist, , hist_dtype, ,field)
+#define VSD_CONSTHIST_FIELDVAL(hist, hist_dtype, field) \
+ VSD_HIST_FIELD(hist, const, hist_dtype, ,field)
+#define VSD_HIST_FIELDPTR(hist, hist_dtype, field) \
+ VSD_HIST_FIELD(hist, , hist_dtype, (void *)&,field)
+#define VSD_CONSTHIST_FIELDPTR(hist, hist_dtype, field) \
+ VSD_HIST_FIELD(hist, const, hist_dtype, (void *)&,field)
+
+#define VSD_CRHIST_FIELD(hist, cnst, hist_dtype, op, field) \
+ (VSD_DTYPE_CRHIST32 == (hist_dtype) ? \
+ op(_VSD(cnst, crhist32, hist)->field) : \
+ op(_VSD(cnst, crhist64, hist)->field))
+#define VSD_CRHIST_FIELDVAL(hist, hist_dtype, field) \
+ VSD_CRHIST_FIELD(hist, , hist_dtype, , field)
+#define VSD_CONSTCRHIST_FIELDVAL(hist, hist_dtype, field) \
+ VSD_CRHIST_FIELD(hist, const, hist_dtype, , field)
+#define VSD_CRHIST_FIELDPTR(hist, hist_dtype, field) \
+ VSD_CRHIST_FIELD(hist, , hist_dtype, &, field)
+#define VSD_CONSTCRHIST_FIELDPTR(hist, hist_dtype, field) \
+ VSD_CRHIST_FIELD(hist, const, hist_dtype, &, field)
+
+#define VSD_DRHIST_FIELD(hist, cnst, hist_dtype, op, field) \
+ (VSD_DTYPE_DRHIST32 == (hist_dtype) ? \
+ op(_VSD(cnst, drhist32, hist)->field) : \
+ op(_VSD(cnst, drhist64, hist)->field))
+#define VSD_DRHIST_FIELDVAL(hist, hist_dtype, field) \
+ VSD_DRHIST_FIELD(hist, , hist_dtype, , field)
+#define VSD_CONSTDRHIST_FIELDVAL(hist, hist_dtype, field) \
+ VSD_DRHIST_FIELD(hist, const, hist_dtype, , field)
+#define VSD_DRHIST_FIELDPTR(hist, hist_dtype, field) \
+ VSD_DRHIST_FIELD(hist, , hist_dtype, &, field)
+#define VSD_CONSTDRHIST_FIELDPTR(hist, hist_dtype, field) \
+ VSD_DRHIST_FIELD(hist, const, hist_dtype, &, field)
+
+#define VSD_DVHIST_FIELD(hist, cnst, hist_dtype, op, field) \
+ (VSD_DTYPE_DVHIST32 == (hist_dtype) ? \
+ op(_VSD(cnst, dvhist32, hist)->field) : \
+ op(_VSD(cnst, dvhist64, hist)->field))
+#define VSD_DVHIST_FIELDVAL(hist, hist_dtype, field) \
+ VSD_DVHIST_FIELD(hist, , hist_dtype, , field)
+#define VSD_CONSTDVHIST_FIELDVAL(hist, hist_dtype, field) \
+ VSD_DVHIST_FIELD(hist, const, hist_dtype, , field)
+#define VSD_DVHIST_FIELDPTR(hist, hist_dtype, field) \
+ VSD_DVHIST_FIELD(hist, , hist_dtype, &, field)
+#define VSD_CONSTDVHIST_FIELDPTR(hist, hist_dtype, field) \
+ VSD_DVHIST_FIELD(hist, const, hist_dtype, &, field)
+
+#define STATS_ABI_V1 1
+struct statsblobv1;
+
+enum sb_endianness {
+ SB_UE = 0, /* Unknown endian. */
+ SB_LE, /* Little endian. */
+ SB_BE /* Big endian. */
+};
+
+struct statsblob {
+ uint8_t abi;
+ uint8_t endian;
+ uint16_t flags;
+ uint16_t maxsz;
+ uint16_t cursz;
+ uint8_t opaque[];
+} __aligned(sizeof(void *));
+
+struct metablob {
+ char *tplname;
+ uint32_t tplhash;
+ struct voi_meta {
+ char *name;
+ char *desc;
+ } *voi_meta;
+};
+
+struct statsblob_tpl {
+ struct metablob *mb; /* Template metadata */
+ struct statsblob *sb; /* Template schema */
+};
+
+struct stats_tpl_sample_rate {
+ /* XXXLAS: Storing slot_id assumes templates are never removed. */
+ int32_t tpl_slot_id;
+ uint32_t tpl_sample_pct;
+};
+
+/* Template sample rates list management callback actions. */
+enum stats_tpl_sr_cb_action {
+ TPL_SR_UNLOCKED_GET,
+ TPL_SR_RLOCKED_GET,
+ TPL_SR_RUNLOCK,
+ TPL_SR_PUT
+};
+
+/*
+ * Callback function pointer passed as arg1 to stats_tpl_sample_rates(). ctx is
+ * a heap-allocated, zero-initialised blob of contextual memory valid during a
+ * single stats_tpl_sample_rates() call and sized per the value passed as arg2.
+ * Returns 0 on success, an errno on error.
+ * - When called with "action == TPL_SR_*_GET", return the subsystem's rates
+ * list ptr and count, locked or unlocked as requested.
+ * - When called with "action == TPL_SR_RUNLOCK", unlock the subsystem's rates
+ * list ptr and count. Pair with a prior "action == TPL_SR_RLOCKED_GET" call.
+ * - When called with "action == TPL_SR_PUT, update the subsystem's rates list
+ * ptr and count to the sysctl processed values and return the inactive list
+ * details in rates/nrates for garbage collection by stats_tpl_sample_rates().
+ */
+typedef int (*stats_tpl_sr_cb_t)(enum stats_tpl_sr_cb_action action,
+ struct stats_tpl_sample_rate **rates, int *nrates, void *ctx);
+
+/* Flags related to iterating over a stats blob. */
+#define SB_IT_FIRST_CB 0x0001
+#define SB_IT_LAST_CB 0x0002
+#define SB_IT_FIRST_VOI 0x0004
+#define SB_IT_LAST_VOI 0x0008
+#define SB_IT_FIRST_VOISTAT 0x0010
+#define SB_IT_LAST_VOISTAT 0x0020
+#define SB_IT_NULLVOI 0x0040
+#define SB_IT_NULLVOISTAT 0x0080
+
+struct sb_visit {
+ struct voistatdata *vs_data;
+ uint32_t tplhash;
+ uint32_t flags;
+ int16_t voi_id;
+ int16_t vs_dsz;
+ uint16_t vs_errs;
+ enum vsd_dtype voi_dtype : 8;
+ enum vsd_dtype vs_dtype : 8;
+ int8_t vs_stype;
+};
+
+/* Stats blob iterator callback called for each struct voi. */
+typedef int (*stats_blob_visitcb_t)(struct sb_visit *sbv, void *usrctx);
+
+/* ABI specific functions. */
+int stats_v1_tpl_alloc(const char *name, uint32_t flags);
+int stats_v1_tpl_add_voistats(uint32_t tpl_id, int32_t voi_id,
+ const char *voi_name, enum vsd_dtype voi_dtype, uint32_t nvss,
+ struct voistatspec *vss, uint32_t flags);
+int stats_v1_blob_init(struct statsblobv1 *sb, uint32_t tpl_id, uint32_t flags);
+struct statsblobv1 * stats_v1_blob_alloc(uint32_t tpl_id, uint32_t flags);
+int stats_v1_blob_clone(struct statsblobv1 **dst, size_t dstmaxsz,
+ struct statsblobv1 *src, uint32_t flags);
+void stats_v1_blob_destroy(struct statsblobv1 *sb);
+#define SB_CLONE_RSTSRC 0x0001 /* Reset src blob if clone successful. */
+#define SB_CLONE_ALLOCDST 0x0002 /* Allocate src->cursz memory for dst. */
+#define SB_CLONE_USRDSTNOFAULT 0x0004 /* Clone to wired userspace dst. */
+#define SB_CLONE_USRDST 0x0008 /* Clone to unwired userspace dst. */
+int stats_v1_blob_snapshot(struct statsblobv1 **dst, size_t dstmaxsz,
+ struct statsblobv1 *src, uint32_t flags);
+#define SB_TOSTR_OBJDUMP 0x00000001
+#define SB_TOSTR_META 0x00000002 /* Lookup metablob and render metadata */
+int stats_v1_blob_tostr(struct statsblobv1 *sb, struct sbuf *buf,
+ enum sb_str_fmt fmt, uint32_t flags);
+int stats_v1_blob_visit(struct statsblobv1 *sb, stats_blob_visitcb_t func,
+ void *usrctx);
+/* VOI related function flags. */
+#define SB_VOI_RELUPDATE 0x00000001 /* voival is relative to previous value. */
+int stats_v1_voi_update(struct statsblobv1 *sb, int32_t voi_id,
+ enum vsd_dtype voi_dtype, struct voistatdata *voival, uint32_t flags);
+int stats_v1_voistat_fetch_dptr(struct statsblobv1 *sb, int32_t voi_id,
+ enum voi_stype stype, enum vsd_dtype *retdtype, struct voistatdata **retvsd,
+ size_t *retvsdsz);
+
+/* End ABI specific functions. */
+
+/* ABI agnostic functions. */
+int stats_vss_hlpr_init(enum vsd_dtype voi_dtype, uint32_t nvss,
+ struct voistatspec *vss);
+void stats_vss_hlpr_cleanup(uint32_t nvss, struct voistatspec *vss);
+int stats_vss_hist_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
+ struct vss_hist_hlpr_info *info);
+int stats_vss_numeric_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
+ struct vss_numeric_hlpr_info *info);
+int stats_vss_tdgst_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
+ struct vss_tdgst_hlpr_info *info);
+int stats_tpl_fetch(int tpl_id, struct statsblob_tpl **tpl);
+int stats_tpl_fetch_allocid(const char *name, uint32_t hash);
+int stats_tpl_id2name(uint32_t tpl_id, char *buf, size_t len);
+int stats_tpl_sample_rates(struct sysctl_oid *oidp, void *arg1, intmax_t arg2,
+ struct sysctl_req *req);
+int stats_tpl_sample_rollthedice(struct stats_tpl_sample_rate *rates,
+ int nrates, void *seed_bytes, size_t seed_len);
+int stats_voistatdata_tostr(const struct voistatdata *vsd,
+ enum vsd_dtype voi_dtype, enum vsd_dtype vsd_dtype, size_t vsd_sz,
+ enum sb_str_fmt fmt, struct sbuf *buf, int objdump);
+
+static inline struct voistatdata_numeric
+stats_ctor_vsd_numeric(uint64_t val)
+{
+ struct voistatdata_numeric tmp;
+
+ tmp.int64.u64 = val;
+
+ return (tmp);
+}
+
+static inline int
+stats_tpl_alloc(const char *name, uint32_t flags)
+{
+
+ return (stats_v1_tpl_alloc(name, flags));
+}
+
+static inline int
+stats_tpl_add_voistats(uint32_t tpl_id, int32_t voi_id, const char *voi_name,
+ enum vsd_dtype voi_dtype, uint32_t nvss, struct voistatspec *vss,
+ uint32_t flags)
+{
+ int ret;
+
+ if ((ret = stats_vss_hlpr_init(voi_dtype, nvss, vss)) == 0) {
+ ret = stats_v1_tpl_add_voistats(tpl_id, voi_id, voi_name,
+ voi_dtype, nvss, vss, flags);
+ }
+ stats_vss_hlpr_cleanup(nvss, vss);
+
+ return (ret);
+}
+
+static inline int
+stats_blob_init(struct statsblob *sb, uint32_t tpl_id, uint32_t flags)
+{
+
+ return (stats_v1_blob_init((struct statsblobv1 *)sb, tpl_id, flags));
+}
+
+static inline struct statsblob *
+stats_blob_alloc(uint32_t tpl_id, uint32_t flags)
+{
+
+ return ((struct statsblob *)stats_v1_blob_alloc(tpl_id, flags));
+}
+
+static inline int
+stats_blob_clone(struct statsblob **dst, size_t dstmaxsz, struct statsblob *src,
+ uint32_t flags)
+{
+
+ return (stats_v1_blob_clone((struct statsblobv1 **)dst, dstmaxsz,
+ (struct statsblobv1 *)src, flags));
+}
+
+static inline void
+stats_blob_destroy(struct statsblob *sb)
+{
+
+ stats_v1_blob_destroy((struct statsblobv1 *)sb);
+}
+
+static inline int
+stats_blob_visit(struct statsblob *sb, stats_blob_visitcb_t func, void *usrctx)
+{
+
+ return (stats_v1_blob_visit((struct statsblobv1 *)sb, func, usrctx));
+}
+
+static inline int
+stats_blob_tostr(struct statsblob *sb, struct sbuf *buf,
+ enum sb_str_fmt fmt, uint32_t flags)
+{
+
+ return (stats_v1_blob_tostr((struct statsblobv1 *)sb, buf, fmt, flags));
+}
+
+static inline int
+stats_voistat_fetch_dptr(struct statsblob *sb, int32_t voi_id,
+ enum voi_stype stype, enum vsd_dtype *retdtype, struct voistatdata **retvsd,
+ size_t *retvsdsz)
+{
+
+ return (stats_v1_voistat_fetch_dptr((struct statsblobv1 *)sb,
+ voi_id, stype, retdtype, retvsd, retvsdsz));
+}
+
+static inline int
+stats_voistat_fetch_s64(struct statsblob *sb, int32_t voi_id,
+ enum voi_stype stype, int64_t *ret)
+{
+ struct voistatdata *vsd;
+ enum vsd_dtype vs_dtype;
+ int error;
+
+ if ((error = stats_voistat_fetch_dptr(sb, voi_id, stype, &vs_dtype, &vsd,
+ NULL)))
+ return (error);
+ else if (VSD_DTYPE_INT_S64 != vs_dtype)
+ return (EFTYPE);
+
+ *ret = vsd->int64.s64;
+ return (0);
+}
+
+static inline int
+stats_voistat_fetch_u64(struct statsblob *sb, int32_t voi_id,
+ enum voi_stype stype, uint64_t *ret)
+{
+ struct voistatdata *vsd;
+ enum vsd_dtype vs_dtype;
+ int error;
+
+ if ((error = stats_voistat_fetch_dptr(sb, voi_id, stype, &vs_dtype, &vsd,
+ NULL)))
+ return (error);
+ else if (VSD_DTYPE_INT_U64 != vs_dtype)
+ return (EFTYPE);
+
+ *ret = vsd->int64.u64;
+ return (0);
+}
+
+static inline int
+stats_voistat_fetch_s32(struct statsblob *sb, int32_t voi_id,
+ enum voi_stype stype, int32_t *ret)
+{
+ struct voistatdata *vsd;
+ enum vsd_dtype vs_dtype;
+ int error;
+
+ if ((error = stats_voistat_fetch_dptr(sb, voi_id, stype, &vs_dtype, &vsd,
+ NULL)))
+ return (error);
+ else if (VSD_DTYPE_INT_S32 != vs_dtype)
+ return (EFTYPE);
+
+ *ret = vsd->int32.s32;
+ return (0);
+}
+
+static inline int
+stats_voistat_fetch_u32(struct statsblob *sb, int32_t voi_id,
+ enum voi_stype stype, uint32_t *ret)
+{
+ struct voistatdata *vsd;
+ enum vsd_dtype vs_dtype;
+ int error;
+
+ if ((error = stats_voistat_fetch_dptr(sb, voi_id, stype, &vs_dtype, &vsd,
+ NULL)))
+ return (error);
+ else if (VSD_DTYPE_INT_U32 != vs_dtype)
+ return (EFTYPE);
+
+ *ret = vsd->int32.u32;
+ return (0);
+}
+
+static inline int
+stats_voistat_fetch_slong(struct statsblob *sb, int32_t voi_id,
+ enum voi_stype stype, long *ret)
+{
+ struct voistatdata *vsd;
+ enum vsd_dtype vs_dtype;
+ int error;
+
+ if ((error = stats_voistat_fetch_dptr(sb, voi_id, stype, &vs_dtype, &vsd,
+ NULL)))
+ return (error);
+ else if (VSD_DTYPE_INT_SLONG != vs_dtype)
+ return (EFTYPE);
+
+ *ret = vsd->intlong.slong;
+ return (0);
+}
+
+static inline int
+stats_voistat_fetch_ulong(struct statsblob *sb, int32_t voi_id,
+ enum voi_stype stype, unsigned long *ret)
+{
+ struct voistatdata *vsd;
+ enum vsd_dtype vs_dtype;
+ int error;
+
+ if ((error = stats_voistat_fetch_dptr(sb, voi_id, stype, &vs_dtype, &vsd,
+ NULL)))
+ return (error);
+ else if (VSD_DTYPE_INT_ULONG != vs_dtype)
+ return (EFTYPE);
+
+ *ret = vsd->intlong.ulong;
+ return (0);
+}
+
+static inline int
+stats_blob_snapshot(struct statsblob **dst, size_t dstmaxsz,
+ struct statsblob *src, uint32_t flags)
+{
+
+ return (stats_v1_blob_snapshot((struct statsblobv1 **)dst, dstmaxsz,
+ (struct statsblobv1 *)src, flags));
+}
+
+static inline int
+stats_voi_update_abs_s32(struct statsblob *sb, int32_t voi_id, int32_t voival)
+{
+
+ if (sb == NULL)
+ return (0);
+
+ struct voistatdata tmp;
+ tmp.int32.s32 = voival;
+
+ return (stats_v1_voi_update((struct statsblobv1 *)sb, voi_id,
+ VSD_DTYPE_INT_S32, &tmp, 0));
+}
+
+static inline int
+stats_voi_update_rel_s32(struct statsblob *sb, int32_t voi_id, int32_t voival)
+{
+
+ if (sb == NULL)
+ return (0);
+
+ struct voistatdata tmp;
+ tmp.int32.s32 = voival;
+
+ return (stats_v1_voi_update((struct statsblobv1 *)sb, voi_id,
+ VSD_DTYPE_INT_S32, &tmp, SB_VOI_RELUPDATE));
+}
+
+static inline int
+stats_voi_update_abs_u32(struct statsblob *sb, int32_t voi_id, uint32_t voival)
+{
+
+ if (sb == NULL)
+ return (0);
+
+ struct voistatdata tmp;
+ tmp.int32.u32 = voival;
+
+ return (stats_v1_voi_update((struct statsblobv1 *)sb, voi_id,
+ VSD_DTYPE_INT_U32, &tmp, 0));
+}
+
+static inline int
+stats_voi_update_rel_u32(struct statsblob *sb, int32_t voi_id, uint32_t voival)
+{
+
+ if (sb == NULL)
+ return (0);
+
+ struct voistatdata tmp;
+ tmp.int32.u32 = voival;
+
+ return (stats_v1_voi_update((struct statsblobv1 *)sb, voi_id,
+ VSD_DTYPE_INT_U32, &tmp, SB_VOI_RELUPDATE));
+}
+
+static inline int
+stats_voi_update_abs_s64(struct statsblob *sb, int32_t voi_id, int64_t voival)
+{
+
+ if (sb == NULL)
+ return (0);
+
+ struct voistatdata tmp;
+ tmp.int64.s64 = voival;
+
+ return (stats_v1_voi_update((struct statsblobv1 *)sb, voi_id,
+ VSD_DTYPE_INT_S64, &tmp, 0));
+}
+
+static inline int
+stats_voi_update_rel_s64(struct statsblob *sb, int32_t voi_id, int64_t voival)
+{
+
+ if (sb == NULL)
+ return (0);
+
+ struct voistatdata tmp;
+ tmp.int64.s64 = voival;
+
+ return (stats_v1_voi_update((struct statsblobv1 *)sb, voi_id,
+ VSD_DTYPE_INT_S64, &tmp, SB_VOI_RELUPDATE));
+}
+
+static inline int
+stats_voi_update_abs_u64(struct statsblob *sb, int32_t voi_id, uint64_t voival)
+{
+
+ if (sb == NULL)
+ return (0);
+
+ struct voistatdata tmp;
+ tmp.int64.u64 = voival;
+
+ return (stats_v1_voi_update((struct statsblobv1 *)sb, voi_id,
+ VSD_DTYPE_INT_U64, &tmp, 0));
+}
+
+static inline int
+stats_voi_update_rel_u64(struct statsblob *sb, int32_t voi_id, uint64_t voival)
+{
+
+ if (sb == NULL)
+ return (0);
+
+ struct voistatdata tmp;
+ tmp.int64.u64 = voival;
+
+ return (stats_v1_voi_update((struct statsblobv1 *)sb, voi_id,
+ VSD_DTYPE_INT_U64, &tmp, SB_VOI_RELUPDATE));
+}
+
+static inline int
+stats_voi_update_abs_slong(struct statsblob *sb, int32_t voi_id, long voival)
+{
+
+ if (sb == NULL)
+ return (0);
+
+ struct voistatdata tmp;
+ tmp.intlong.slong = voival;
+
+ return (stats_v1_voi_update((struct statsblobv1 *)sb, voi_id,
+ VSD_DTYPE_INT_SLONG, &tmp, 0));
+}
+
+static inline int
+stats_voi_update_rel_slong(struct statsblob *sb, int32_t voi_id, long voival)
+{
+
+ if (sb == NULL)
+ return (0);
+
+ struct voistatdata tmp;
+ tmp.intlong.slong = voival;
+
+ return (stats_v1_voi_update((struct statsblobv1 *)sb, voi_id,
+ VSD_DTYPE_INT_SLONG, &tmp, SB_VOI_RELUPDATE));
+}
+
+static inline int
+stats_voi_update_abs_ulong(struct statsblob *sb, int32_t voi_id,
+ unsigned long voival)
+{
+
+ if (sb == NULL)
+ return (0);
+
+ struct voistatdata tmp;
+ tmp.intlong.ulong = voival;
+
+ return (stats_v1_voi_update((struct statsblobv1 *)sb, voi_id,
+ VSD_DTYPE_INT_ULONG, &tmp, 0));
+}
+
+static inline int
+stats_voi_update_rel_ulong(struct statsblob *sb, int32_t voi_id,
+ unsigned long voival)
+{
+
+ if (sb == NULL)
+ return (0);
+
+ struct voistatdata tmp;
+ tmp.intlong.ulong = voival;
+
+ return (stats_v1_voi_update((struct statsblobv1 *)sb, voi_id,
+ VSD_DTYPE_INT_ULONG, &tmp, SB_VOI_RELUPDATE));
+}
+
+static inline int
+stats_voi_update_abs_sq32(struct statsblob *sb, int32_t voi_id, s32q_t voival)
+{
+
+ if (sb == NULL)
+ return (0);
+
+ struct voistatdata tmp;
+ tmp.q32.sq32 = voival;
+
+ return (stats_v1_voi_update((struct statsblobv1 *)sb, voi_id,
+ VSD_DTYPE_Q_S32, &tmp, 0));
+}
+
+static inline int
+stats_voi_update_rel_sq32(struct statsblob *sb, int32_t voi_id, s32q_t voival)
+{
+
+ if (sb == NULL)
+ return (0);
+
+ struct voistatdata tmp;
+ tmp.q32.sq32 = voival;
+
+ return (stats_v1_voi_update((struct statsblobv1 *)sb, voi_id,
+ VSD_DTYPE_Q_S32, &tmp, SB_VOI_RELUPDATE));
+}
+
+static inline int
+stats_voi_update_abs_uq32(struct statsblob *sb, int32_t voi_id, u32q_t voival)
+{
+
+ if (sb == NULL)
+ return (0);
+
+ struct voistatdata tmp;
+ tmp.q32.uq32 = voival;
+
+ return (stats_v1_voi_update((struct statsblobv1 *)sb, voi_id,
+ VSD_DTYPE_Q_U32, &tmp, 0));
+}
+
+static inline int
+stats_voi_update_rel_uq32(struct statsblob *sb, int32_t voi_id, u32q_t voival)
+{
+
+ if (sb == NULL)
+ return (0);
+
+ struct voistatdata tmp;
+ tmp.q32.uq32 = voival;
+
+ return (stats_v1_voi_update((struct statsblobv1 *)sb, voi_id,
+ VSD_DTYPE_Q_U32, &tmp, SB_VOI_RELUPDATE));
+}
+
+static inline int
+stats_voi_update_abs_sq64(struct statsblob *sb, int32_t voi_id, s64q_t voival)
+{
+
+ if (sb == NULL)
+ return (0);
+
+ struct voistatdata tmp;
+ tmp.q64.sq64 = voival;
+
+ return (stats_v1_voi_update((struct statsblobv1 *)sb, voi_id,
+ VSD_DTYPE_Q_S64, &tmp, 0));
+}
+
+static inline int
+stats_voi_update_rel_sq64(struct statsblob *sb, int32_t voi_id, s64q_t voival)
+{
+
+ if (sb == NULL)
+ return (0);
+
+ struct voistatdata tmp;
+ tmp.q64.sq64 = voival;
+
+ return (stats_v1_voi_update((struct statsblobv1 *)sb, voi_id,
+ VSD_DTYPE_Q_S64, &tmp, SB_VOI_RELUPDATE));
+}
+
+static inline int
+stats_voi_update_abs_uq64(struct statsblob *sb, int32_t voi_id, u64q_t voival)
+{
+
+ if (sb == NULL)
+ return (0);
+
+ struct voistatdata tmp;
+ tmp.q64.uq64 = voival;
+
+ return (stats_v1_voi_update((struct statsblobv1 *)sb, voi_id,
+ VSD_DTYPE_Q_U64, &tmp, 0));
+}
+
+static inline int
+stats_voi_update_rel_uq64(struct statsblob *sb, int32_t voi_id, u64q_t voival)
+{
+
+ if (sb == NULL)
+ return (0);
+
+ struct voistatdata tmp;
+ tmp.q64.uq64 = voival;
+
+ return (stats_v1_voi_update((struct statsblobv1 *)sb, voi_id,
+ VSD_DTYPE_Q_U64, &tmp, SB_VOI_RELUPDATE));
+}
+
+/* End ABI agnostic functions. */
+
+#endif /* _SYS_STATS_H_ */
diff --git a/tools/build/options/WITHOUT_STATS b/tools/build/options/WITHOUT_STATS
new file mode 100644
index 000000000000..ebd201ef7915
--- /dev/null
+++ b/tools/build/options/WITHOUT_STATS
@@ -0,0 +1,4 @@
+.\" $FreeBSD$
+Set to neither build nor install
+.Lb libstats
+and dependent binaries.
diff --git a/tools/build/options/WITH_STATS b/tools/build/options/WITH_STATS
new file mode 100644
index 000000000000..a343670548c2
--- /dev/null
+++ b/tools/build/options/WITH_STATS
@@ -0,0 +1,4 @@
+.\" $FreeBSD$
+Set to build and install
+.Lb libstats
+and dependent binaries.