aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMitchell Horne <mhorne@FreeBSD.org>2022-02-22 00:15:45 +0000
committerMitchell Horne <mhorne@FreeBSD.org>2022-02-22 00:15:45 +0000
commitda5b7e90e7409a11a04f80ef0f19e7936811678d (patch)
treeb9f1c4dcf8eabf2d2fb62618603891521083fa47
parent36637dd19dba79088e53c6f2aa026415eae9f8f0 (diff)
downloadsrc-da5b7e90e740.tar.gz
src-da5b7e90e740.zip
boottrace: a simple boot and shutdown-time tracing facility
Boottrace is a facility for capturing trace events during boot and shutdown. This includes kernel initialization, as well as rc. It has been used by NetApp internally for several years, for catching and diagnosing slow devices or subsystems. It is driven from userspace by sysctl interface, and the output is a human-readable log of events (kern.boottrace.log). This commit adds the core boottrace functionality implementing these interfaces. Adding the trace annotations themselves to kernel and userland will happen in follow-up commits. A future commit will also add a boottrace(4) man page. For now, boottrace is unconditionally compiled into the kernel but disabled by default. It can be enabled by setting the kern.boottrace.enabled tunable to 1 in loader.conf(5). There is an existing boot-time event tracing facility, which can be compiled into the kernel with 'options TSLOG'. While there is some functional overlap between this and boottrace, they are distinct. TSLOG is suitable for generating detailed timing information and flamegraphs, and has been used to great success by cperciva@ to diagnose and reduce the overall system boot time. Boottrace aims to more quickly provide an overview of timing and resource usage of the boot (and shutdown) process to a sysadmin who requires this knowledge. Sponsored by: NetApp, Inc. Sponsored by: Klara, Inc. X-NetApp-PR: #23 Differential Revision: https://reviews.freebsd.org/D30184
-rw-r--r--sys/conf/files1
-rw-r--r--sys/kern/kern_boottrace.c612
-rw-r--r--sys/sys/boottrace.h60
3 files changed, 673 insertions, 0 deletions
diff --git a/sys/conf/files b/sys/conf/files
index 7718f9ef6525..dedff7e5b806 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3762,6 +3762,7 @@ kern/init_sysent.c standard
kern/ksched.c optional _kposix_priority_scheduling
kern/kern_acct.c standard
kern/kern_alq.c optional alq
+kern/kern_boottrace.c standard
kern/kern_clock.c standard
kern/kern_clocksource.c standard
kern/kern_condvar.c standard
diff --git a/sys/kern/kern_boottrace.c b/sys/kern/kern_boottrace.c
new file mode 100644
index 000000000000..0a25512c4b12
--- /dev/null
+++ b/sys/kern/kern_boottrace.c
@@ -0,0 +1,612 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2008-2022 NetApp, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/boottrace.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/sbuf.h>
+#include <sys/syscallsubr.h>
+#include <sys/sysctl.h>
+#include <sys/time.h>
+
+#include <machine/stdarg.h>
+
+#define dprintf(fmt, ...) \
+ do { \
+ if (dotrace_debugging) \
+ printf(fmt, ##__VA_ARGS__); \
+ } while (0);
+
+static MALLOC_DEFINE(M_BOOTTRACE, "boottrace", "memory for boot tracing");
+
+#define BT_TABLE_DEFSIZE 3000
+#define BT_TABLE_RUNSIZE 2000
+#define BT_TABLE_SHTSIZE 1000
+#define BT_TABLE_MINSIZE 500
+
+/*
+ * Boot-time & shutdown-time event.
+ */
+struct bt_event {
+ uint64_t tsc; /* Timestamp */
+ uint64_t tick; /* Kernel tick */
+ uint64_t cputime; /* Microseconds of process CPU time */
+ uint32_t cpuid; /* CPU on which the event ran */
+ uint32_t inblock; /* # of blocks in */
+ uint32_t oublock; /* # of blocks out */
+ pid_t pid; /* Current PID */
+ char name[BT_EVENT_NAMELEN]; /* Event name */
+ char tdname[BT_EVENT_TDNAMELEN]; /* Thread name */
+};
+
+struct bt_table {
+ uint32_t size; /* Trace table size */
+ uint32_t curr; /* Trace entry to use */
+ uint32_t wrap; /* Wrap-around, instead of dropping */
+ uint32_t drops_early; /* Trace entries dropped before init */
+ uint32_t drops_full; /* Trace entries dropped after full */
+ struct bt_event *table;
+};
+
+/* Boot-time tracing */
+static struct bt_table bt;
+
+/* Run-time tracing */
+static struct bt_table rt;
+
+/* Shutdown-time tracing */
+static struct bt_table st;
+
+/* Set when system boot is complete, and we've switched to runtime tracing. */
+static bool bootdone;
+
+/* Set when system shutdown has started. */
+static bool shutdown;
+
+/* Turn on dotrace() debug logging to console. */
+static bool dotrace_debugging;
+TUNABLE_BOOL("debug.boottrace.dotrace_debugging", &dotrace_debugging);
+
+/* Trace kernel events */
+static bool dotrace_kernel = true;
+TUNABLE_BOOL("kern.boottrace.dotrace_kernel", &dotrace_kernel);
+
+/* Trace userspace events */
+static bool dotrace_user = true;
+TUNABLE_BOOL("kern.boottrace.dotrace_user", &dotrace_user);
+
+static int sysctl_log(SYSCTL_HANDLER_ARGS);
+static int sysctl_boottrace(SYSCTL_HANDLER_ARGS);
+static int sysctl_runtrace(SYSCTL_HANDLER_ARGS);
+static int sysctl_shuttrace(SYSCTL_HANDLER_ARGS);
+static int sysctl_boottrace_reset(SYSCTL_HANDLER_ARGS);
+
+SYSCTL_NODE(_kern, OID_AUTO, boottrace, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
+ "boottrace statistics");
+
+SYSCTL_PROC(_kern_boottrace, OID_AUTO, log,
+ CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_SKIP,
+ 0, 0, sysctl_log, "A",
+ "Print a log of the boottrace trace data");
+SYSCTL_PROC(_kern_boottrace, OID_AUTO, boottrace,
+ CTLTYPE_STRING | CTLFLAG_WR | CTLFLAG_MPSAFE,
+ 0, 0, sysctl_boottrace, "A",
+ "Capture a boot-time trace event");
+SYSCTL_PROC(_kern_boottrace, OID_AUTO, runtrace,
+ CTLTYPE_STRING | CTLFLAG_WR | CTLFLAG_MPSAFE,
+ 0, 0, sysctl_runtrace, "A",
+ "Capture a run-time trace event");
+SYSCTL_PROC(_kern_boottrace, OID_AUTO, shuttrace,
+ CTLTYPE_STRING | CTLFLAG_WR | CTLFLAG_MPSAFE,
+ 0, 0, sysctl_shuttrace, "A",
+ "Capture a shutdown-time trace event");
+SYSCTL_PROC(_kern_boottrace, OID_AUTO, reset,
+ CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_MPSAFE,
+ 0, 0, sysctl_boottrace_reset, "A",
+ "Reset run-time tracing table");
+
+/*
+ * Global enable.
+ */
+bool __read_mostly boottrace_enabled = false;
+SYSCTL_BOOL(_kern_boottrace, OID_AUTO, enabled, CTLFLAG_RDTUN,
+ &boottrace_enabled, 0,
+ "Boot-time and shutdown-time tracing enabled");
+
+/*
+ * Enable dumping of the shutdown trace entries to console.
+ */
+bool shutdown_trace = false;
+SYSCTL_BOOL(_kern_boottrace, OID_AUTO, shutdown_trace, CTLFLAG_RWTUN,
+ &shutdown_trace, 0,
+ "Enable kernel shutdown tracing to the console");
+
+/*
+ * Set the delta threshold (ms) below which events are ignored, used in
+ * determining what to dump to the console.
+ */
+static int shutdown_trace_threshold;
+SYSCTL_INT(_kern_boottrace, OID_AUTO, shutdown_trace_threshold, CTLFLAG_RWTUN,
+ &shutdown_trace_threshold, 0,
+ "Tracing threshold (ms) below which tracing is ignored");
+
+/*
+ * Dump a trace to buffer or if buffer is NULL to console.
+ *
+ * Non-zero delta_threshold selectively prints entries based on delta
+ * with current and previous entry. Otherwise, delta_threshold of 0
+ * prints every trace entry and delta.
+ *
+ * Output something like this:
+ *
+ * CPU msecs delta process event
+ * 11 1228262715 0 init shutdown pre sync begin
+ * 3 1228265622 2907 init shutdown pre sync complete
+ * 3 1228265623 0 init shutdown turned swap off
+ * 18 1228266466 843 init shutdown unmounted all filesystems
+ *
+ * How to interpret:
+ *
+ * delta column represents the time in milliseconds between this event and the previous.
+ * Usually that means you can take the previous event, current event, match them
+ * up in the code, and whatever lies between is the culprit taking time.
+ *
+ * For example, above: Pre-Sync is taking 2907ms, and something between swap and unmount
+ * filesystems is taking 843 milliseconds.
+ *
+ * An event with a delta of 0 are 'landmark' events that simply exist in the output
+ * for the developer to know where the time measurement begins. The 0 is an arbitrary
+ * number that can effectively be ignored.
+ */
+#define BTC_DELTA_PRINT(bte, msecs, delta) do { \
+ if (sbp) { \
+ sbuf_printf(sbp, fmt, (bte)->cpuid, msecs, delta, \
+ (bte)->tdname, (bte)->name, (bte)->pid, \
+ (bte)->cputime / 1000000, \
+ ((bte)->cputime % 1000000) / 10000, \
+ (bte)->inblock, (bte)->oublock); \
+ } else { \
+ printf(fmt, (bte)->cpuid, msecs, delta, \
+ (bte)->tdname, (bte)->name, (bte)->pid, \
+ (bte)->cputime / 1000000, \
+ ((bte)->cputime % 1000000) / 10000, \
+ (bte)->inblock, (bte)->oublock); \
+ } \
+} while (0)
+
+/*
+ * Print the trace entries to the message buffer, or to an sbuf, if provided.
+ *
+ * Entries with a difference less than dthres will not be printed.
+ */
+static void
+boottrace_display(struct sbuf *sbp, struct bt_table *btp, uint64_t dthres)
+{
+ struct bt_event *evtp;
+ struct bt_event *last_evtp;
+ uint64_t msecs;
+ uint64_t first_msecs;
+ uint64_t last_msecs;
+ uint64_t dmsecs;
+ uint64_t last_dmsecs;
+ uint64_t total_dmsecs;
+ uint32_t i;
+ uint32_t curr;
+ const char *fmt = "%3u %10llu %10llu %-24s %-40s %5d %4d.%02d %5u %5u\n";
+ const char *hdr_fmt = "\n\n%3s %10s %10s %-24s %-40s %5s %6s %5s %5s\n";
+ bool printed;
+ bool last_printed;
+
+ /* Print the header */
+ if (sbp != NULL)
+ sbuf_printf(sbp, hdr_fmt,
+ "CPU", "msecs", "delta", "process",
+ "event", "PID", "CPUtime", "IBlks", "OBlks");
+ else
+ printf(hdr_fmt,
+ "CPU", "msecs", "delta", "process",
+ "event", "PID", "CPUtime", "IBlks", "OBlks");
+
+ first_msecs = 0;
+ last_evtp = NULL;
+ last_msecs = 0;
+ last_dmsecs = 0;
+ last_printed = false;
+ i = curr = btp->curr;
+
+ do {
+ evtp = &btp->table[i];
+ if (evtp->tsc == 0)
+ goto next;
+
+ msecs = cputick2usec(evtp->tick) / 1000;
+ dmsecs = (last_msecs != 0 && msecs > last_msecs) ?
+ msecs - last_msecs : 0;
+ printed = false;
+
+ /*
+ * If a threshold is defined, start filtering events by the
+ * delta of msecs.
+ */
+ if (dthres != 0 && (dmsecs > dthres)) {
+ /*
+ * Print the previous entry as a landmark, even if it
+ * falls below the threshold.
+ */
+ if (last_evtp != NULL && !last_printed)
+ BTC_DELTA_PRINT(last_evtp, last_msecs,
+ last_dmsecs);
+ BTC_DELTA_PRINT(evtp, msecs, dmsecs);
+ printed = true;
+ } else if (dthres == 0) {
+ BTC_DELTA_PRINT(evtp, msecs, dmsecs);
+ printed = true;
+ }
+ if (first_msecs == 0 || msecs < first_msecs)
+ first_msecs = msecs;
+ last_evtp = evtp;
+ last_msecs = msecs;
+ last_dmsecs = dmsecs;
+ last_printed = printed;
+ maybe_yield();
+next:
+ i = (i + 1) % btp->size;
+ } while (i != curr);
+
+ total_dmsecs = last_msecs > first_msecs ?
+ (last_msecs - first_msecs) : 0;
+ if (sbp != NULL)
+ sbuf_printf(sbp, "Total measured time: %ju msecs\n",
+ (uintmax_t)total_dmsecs);
+ else
+ printf("Total measured time: %ju msecs\n",
+ (uintmax_t)total_dmsecs);
+}
+
+/*
+ * Dump trace table entries to the console, given a delta threshold.
+ */
+void
+boottrace_dump_console(void)
+{
+ if (!boottrace_enabled)
+ return;
+
+ if (shutdown || rebooting || KERNEL_PANICKED()) {
+ boottrace_display(NULL, &st, shutdown_trace_threshold);
+ } else {
+ boottrace_display(NULL, &bt, 0);
+ boottrace_display(NULL, &rt, 0);
+ }
+}
+
+/*
+ * Records a new tracing event to the specified table.
+ *
+ * We don't use a lock because we want this to be callable from interrupt
+ * context.
+ */
+static int
+dotrace(struct bt_table *btp, const char *eventname, const char *tdname)
+{
+ uint32_t idx, nxt;
+ struct rusage usage;
+
+ MPASS(boottrace_enabled);
+ if (tdname == NULL)
+ tdname = (curproc->p_flag & P_SYSTEM) ?
+ curthread->td_name : curproc->p_comm;
+
+ dprintf("dotrace[");
+ dprintf("cpu=%u, pid=%d, tsc=%ju, tick=%d, td='%s', event='%s'",
+ PCPU_GET(cpuid), curthread->td_proc->p_pid,
+ (uintmax_t)get_cyclecount(), ticks, tdname, eventname);
+ if (btp->table == NULL) {
+ btp->drops_early++;
+ dprintf(", return=ENOSPC_1]\n");
+ return (ENOSPC);
+ }
+
+ /* Claim a slot in the table. */
+ do {
+ idx = btp->curr;
+ nxt = (idx + 1) % btp->size;
+ if (nxt == 0 && btp->wrap == 0) {
+ btp->drops_full++;
+ dprintf(", return=ENOSPC_2]\n");
+ return (ENOSPC);
+ }
+ } while (!atomic_cmpset_int(&btp->curr, idx, nxt));
+
+ btp->table[idx].cpuid = PCPU_GET(cpuid);
+ btp->table[idx].tsc = get_cyclecount(),
+ btp->table[idx].tick = cpu_ticks();
+ btp->table[idx].pid = curthread->td_proc->p_pid;
+
+ /*
+ * Get the resource usage for the process. Don't attempt this for the
+ * kernel proc0 or for critical section activities.
+ */
+ if ((curthread->td_proc == &proc0) || (curthread->td_critnest != 0)) {
+ btp->table[idx].cputime = 0;
+ btp->table[idx].inblock = 0;
+ btp->table[idx].oublock = 0;
+ } else {
+ kern_getrusage(curthread, RUSAGE_CHILDREN, &usage);
+ btp->table[idx].cputime =
+ (uint64_t)(usage.ru_utime.tv_sec * 1000000 +
+ usage.ru_utime.tv_usec + usage.ru_stime.tv_sec * 1000000 +
+ usage.ru_stime.tv_usec);
+ btp->table[idx].inblock = (uint32_t)usage.ru_inblock;
+ btp->table[idx].oublock = (uint32_t)usage.ru_oublock;
+ }
+ strlcpy(btp->table[idx].name, eventname, BT_EVENT_NAMELEN);
+ strlcpy(btp->table[idx].tdname, tdname, BT_EVENT_TDNAMELEN);
+
+ dprintf(", return=0]\n");
+ return (0);
+}
+
+/*
+ * Log various boot-time events, with the trace message encoded using
+ * printf-like arguments.
+ */
+int
+boottrace(const char *tdname, const char *fmt, ...)
+{
+ char eventname[BT_EVENT_NAMELEN];
+ struct bt_table *btp;
+ va_list ap;
+
+ if (!dotrace_kernel)
+ return (ENXIO);
+
+ va_start(ap, fmt);
+ vsnprintf(eventname, sizeof(eventname), fmt, ap);
+ va_end(ap);
+
+ btp = &bt;
+ if (bootdone)
+ btp = &rt;
+ if (shutdown || rebooting || KERNEL_PANICKED())
+ btp = &st;
+
+ return (dotrace(btp, eventname, tdname));
+}
+
+/*
+ * Log a run-time event & switch over to run-time tracing mode.
+ */
+static int
+runtrace(const char *eventname, const char *tdname)
+{
+
+ bootdone = true;
+ return (dotrace(&rt, eventname, tdname));
+}
+
+/*
+ * Parse a boottrace message from userspace.
+ *
+ * The input from may contain a ':' to denote tdname. If not, tdname is
+ * inferred from the process' name.
+ *
+ * e.g. reboot(8):SIGINT to init(8)
+ */
+static void
+boottrace_parse_message(char *message, char **eventname, char **tdname)
+{
+ char *delim;
+
+ delim = strchr(message, ':');
+ if (delim != NULL) {
+ *delim = '\0';
+ *tdname = message;
+ *eventname = ++delim;
+ } else {
+ *tdname = curproc->p_comm;
+ *eventname = message;
+ }
+}
+
+static int
+_boottrace_sysctl(struct bt_table *btp, struct sysctl_oid *oidp,
+ struct sysctl_req *req)
+{
+ char message[BT_MSGLEN];
+ char *eventname, *tdname;
+ int error;
+
+ if (!dotrace_user)
+ return (ENXIO);
+
+ message[0] = '\0';
+ error = sysctl_handle_string(oidp, message, sizeof(message), req);
+ if (error)
+ return (error);
+
+ boottrace_parse_message(message, &eventname, &tdname);
+ error = dotrace(btp, eventname, tdname);
+ if (error == ENOSPC) {
+ /* Ignore table full error. */
+ error = 0;
+ }
+ return (error);
+}
+
+static int
+sysctl_log(SYSCTL_HANDLER_ARGS)
+{
+ struct sbuf *sbuf;
+ int error;
+
+ if (!boottrace_enabled || req->newptr != NULL)
+ return (0);
+
+ sbuf = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
+ boottrace_display(sbuf, &bt, 0);
+ boottrace_display(sbuf, &rt, 0);
+ sbuf_finish(sbuf);
+ error = SYSCTL_OUT(req, sbuf_data(sbuf), sbuf_len(sbuf));
+ sbuf_delete(sbuf);
+
+ return (error);
+}
+
+static int
+sysctl_boottrace(SYSCTL_HANDLER_ARGS)
+{
+
+ if (!boottrace_enabled)
+ return (0);
+
+ /* No output */
+ if (req->newptr == NULL)
+ return (0);
+
+ /* Trace to rt if we have reached multi-user. */
+ return (_boottrace_sysctl(bootdone ? &rt : &bt, oidp, req));
+}
+
+/*
+ * Log a run-time event and switch over to run-time tracing mode.
+ */
+static int
+sysctl_runtrace(SYSCTL_HANDLER_ARGS)
+{
+
+ if (!boottrace_enabled)
+ return (0);
+
+ /* No output */
+ if (req->newptr == NULL)
+ return (0);
+
+ bootdone = true;
+ return (_boottrace_sysctl(&rt, oidp, req));
+}
+
+/*
+ * Log a shutdown-time event and switch over to shutdown tracing mode.
+ */
+static int
+sysctl_shuttrace(SYSCTL_HANDLER_ARGS)
+{
+
+ if (!boottrace_enabled)
+ return (0);
+
+ /* No output */
+ if (req->newptr == NULL)
+ return (0);
+
+ shutdown = true;
+ return (_boottrace_sysctl(&st, oidp, req));
+}
+
+/*
+ * Reset the runtime tracing buffer.
+ */
+void
+boottrace_reset(const char *actor)
+{
+ char tmpbuf[64];
+
+ snprintf(tmpbuf, sizeof(tmpbuf), "reset: %s", actor);
+ runtrace(tmpbuf, NULL);
+}
+
+/*
+ * Note that a resize implies a reset, i.e., the index is reset to 0.
+ * We never shrink the array; we can only increase its size.
+ */
+int
+boottrace_resize(u_int newsize)
+{
+
+ if (newsize <= rt.size) {
+ return (EINVAL);
+ }
+ rt.table = realloc(rt.table, newsize * sizeof(struct bt_event),
+ M_BOOTTRACE, M_WAITOK | M_ZERO);
+ if (rt.table == NULL)
+ return (ENOMEM);
+
+ rt.size = newsize;
+ boottrace_reset("boottrace_resize");
+ return (0);
+}
+
+static int
+sysctl_boottrace_reset(SYSCTL_HANDLER_ARGS)
+{
+
+ if (!boottrace_enabled)
+ return (0);
+
+ if (req->newptr != NULL)
+ boottrace_reset("sysctl_boottrace_reset");
+
+ return (0);
+}
+
+static void
+boottrace_init(void)
+{
+
+ if (!boottrace_enabled)
+ return;
+
+ /* Boottime trace table */
+ bt.size = BT_TABLE_DEFSIZE;
+ TUNABLE_INT_FETCH("kern.boottrace.table_size", &bt.size);
+ bt.size = max(bt.size, BT_TABLE_MINSIZE);
+ bt.table = malloc(bt.size * sizeof(struct bt_event), M_BOOTTRACE,
+ M_WAITOK | M_ZERO);
+
+ /* Stick in an initial entry. */
+ bt.table[0].cpuid = PCPU_GET(cpuid);
+ strlcpy(bt.table[0].tdname, "boottime", BT_EVENT_TDNAMELEN);
+ strlcpy(bt.table[0].name, "initial event", BT_EVENT_NAMELEN);
+ bt.curr = 1;
+
+ /* Run-time trace table (may wrap-around). */
+ rt.wrap = 1;
+ rt.size = BT_TABLE_RUNSIZE;
+ rt.table = malloc(rt.size * sizeof(struct bt_event), M_BOOTTRACE,
+ M_WAITOK | M_ZERO);
+
+ /* Shutdown trace table */
+ st.size = BT_TABLE_SHTSIZE;
+ st.table = malloc(st.size * sizeof(struct bt_event), M_BOOTTRACE,
+ M_WAITOK | M_ZERO);
+}
+SYSINIT(boottrace, SI_SUB_CPU, SI_ORDER_ANY, boottrace_init, 0);
diff --git a/sys/sys/boottrace.h b/sys/sys/boottrace.h
new file mode 100644
index 000000000000..b09888bff4ac
--- /dev/null
+++ b/sys/sys/boottrace.h
@@ -0,0 +1,60 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2008-2022 NetApp, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _SYS_BOOTTRACE_H_
+#define _SYS_BOOTTRACE_H_
+
+#define _BOOTTRACE_BOOTTRACE "kern.boottrace.boottrace"
+#define _BOOTTRACE_RUNTRACE "kern.boottrace.runtrace"
+#define _BOOTTRACE_SHUTTRACE "kern.boottrace.shuttrace"
+
+/* Messages are formatted as 'tdname:name' */
+#define BT_EVENT_TDNAMELEN 24
+#define BT_EVENT_NAMELEN 40
+#define BT_MSGLEN (BT_EVENT_NAMELEN + 1 + BT_EVENT_TDNAMELEN)
+
+#ifdef _KERNEL
+/*
+ * Convenience macros.
+ */
+#define _BOOTTRACE(tdname, ...) do { \
+ if (boottrace_enabled) \
+ (void)boottrace(tdname, __VA_ARGS__); \
+ } while (0)
+#define BOOTTRACE(...) _BOOTTRACE(NULL, __VA_ARGS__)
+#define BOOTTRACE_INIT(...) _BOOTTRACE("kernel", __VA_ARGS__)
+
+extern bool boottrace_enabled;
+extern bool shutdown_trace;
+
+int boottrace(const char *_tdname, const char *_eventfmt, ...)
+ __printflike(2,3);
+void boottrace_reset(const char *_actor);
+int boottrace_resize(u_int _newsize);
+void boottrace_dump_console(void);
+
+#endif /* _KERNEL */
+#endif /* _SYS_BOOTTRACE_H_ */