aboutsummaryrefslogtreecommitdiff
path: root/sbin/hastd/hast_compression.c
diff options
context:
space:
mode:
authorPawel Jakub Dawidek <pjd@FreeBSD.org>2011-03-06 23:09:33 +0000
committerPawel Jakub Dawidek <pjd@FreeBSD.org>2011-03-06 23:09:33 +0000
commit8cd3d45ad9324b8698d11125105078595b1b7b21 (patch)
tree89777d06274aeb1f7f06b2647305a98d160268c9 /sbin/hastd/hast_compression.c
parent563effb9ffa62d809ca1a97cd41a5429423446b7 (diff)
downloadsrc-8cd3d45ad9324b8698d11125105078595b1b7b21.tar.gz
src-8cd3d45ad9324b8698d11125105078595b1b7b21.zip
Allow to compress on-the-wire data using two algorithms:
- HOLE - it simply turns all-zero blocks into few bytes header; it is extremely fast, so it is turned on by default; it is mostly intended to speed up initial synchronization where we expect many zeros; - LZF - very fast algorithm by Marc Alexander Lehmann, which shows very decent compression ratio and has BSD license. MFC after: 2 weeks
Notes
Notes: svn path=/head/; revision=219354
Diffstat (limited to 'sbin/hastd/hast_compression.c')
-rw-r--r--sbin/hastd/hast_compression.c283
1 files changed, 283 insertions, 0 deletions
diff --git a/sbin/hastd/hast_compression.c b/sbin/hastd/hast_compression.c
new file mode 100644
index 000000000000..4f4a93a59ea5
--- /dev/null
+++ b/sbin/hastd/hast_compression.c
@@ -0,0 +1,283 @@
+/*-
+ * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/endian.h>
+
+#include <errno.h>
+#include <string.h>
+#include <strings.h>
+
+#include <hast.h>
+#include <lzf.h>
+#include <nv.h>
+#include <pjdlog.h>
+
+#include "hast_compression.h"
+
+static bool
+allzeros(const void *data, size_t size)
+{
+ const uint64_t *p = data;
+ unsigned int i;
+ uint64_t v;
+
+ PJDLOG_ASSERT((size % sizeof(*p)) == 0);
+
+ /*
+ * This is the fastest method I found for checking if the given
+ * buffer contain all zeros.
+ * Because inside the loop we don't check at every step, we would
+ * get an answer only after walking through entire buffer.
+ * To return early if the buffer doesn't contain all zeros, we probe
+ * 8 bytes at the begining, in the middle and at the end of the buffer
+ * first.
+ */
+
+ size >>= 3; /* divide by 8 */
+ if ((p[0] | p[size >> 1] | p[size - 1]) != 0)
+ return (false);
+ v = 0;
+ for (i = 0; i < size; i++)
+ v |= *p++;
+ return (v == 0);
+}
+
+static void *
+hast_hole_compress(const unsigned char *data, size_t *sizep)
+{
+ uint32_t size;
+ void *newbuf;
+
+ if (!allzeros(data, *sizep))
+ return (NULL);
+
+ newbuf = malloc(sizeof(size));
+ if (newbuf == NULL) {
+ pjdlog_warning("Unable to compress (no memory: %zu).",
+ (size_t)*sizep);
+ return (NULL);
+ }
+ size = htole32((uint32_t)*sizep);
+ bcopy(&size, newbuf, sizeof(size));
+ *sizep = sizeof(size);
+
+ return (newbuf);
+}
+
+static void *
+hast_hole_decompress(const unsigned char *data, size_t *sizep)
+{
+ uint32_t size;
+ void *newbuf;
+
+ if (*sizep != sizeof(size)) {
+ pjdlog_error("Unable to decompress (invalid size: %zu).",
+ *sizep);
+ return (NULL);
+ }
+
+ bcopy(data, &size, sizeof(size));
+ size = le32toh(size);
+
+ newbuf = malloc(size);
+ if (newbuf == NULL) {
+ pjdlog_error("Unable to decompress (no memory: %zu).",
+ (size_t)size);
+ return (NULL);
+ }
+ bzero(newbuf, size);
+ *sizep = size;
+
+ return (newbuf);
+}
+
+/* Minimum block size to try to compress. */
+#define HAST_LZF_COMPRESS_MIN 1024
+
+static void *
+hast_lzf_compress(const unsigned char *data, size_t *sizep)
+{
+ unsigned char *newbuf;
+ uint32_t origsize;
+ size_t newsize;
+
+ origsize = *sizep;
+
+ if (origsize <= HAST_LZF_COMPRESS_MIN)
+ return (NULL);
+
+ newsize = sizeof(origsize) + origsize - HAST_LZF_COMPRESS_MIN;
+ newbuf = malloc(newsize);
+ if (newbuf == NULL) {
+ pjdlog_warning("Unable to compress (no memory: %zu).",
+ newsize);
+ return (NULL);
+ }
+ newsize = lzf_compress(data, *sizep, newbuf + sizeof(origsize),
+ newsize - sizeof(origsize));
+ if (newsize == 0) {
+ free(newbuf);
+ return (NULL);
+ }
+ origsize = htole32(origsize);
+ bcopy(&origsize, newbuf, sizeof(origsize));
+
+ *sizep = sizeof(origsize) + newsize;
+ return (newbuf);
+}
+
+static void *
+hast_lzf_decompress(const unsigned char *data, size_t *sizep)
+{
+ unsigned char *newbuf;
+ uint32_t origsize;
+ size_t newsize;
+
+ PJDLOG_ASSERT(*sizep > sizeof(origsize));
+
+ bcopy(data, &origsize, sizeof(origsize));
+ origsize = le32toh(origsize);
+ PJDLOG_ASSERT(origsize > HAST_LZF_COMPRESS_MIN);
+
+ newbuf = malloc(origsize);
+ if (newbuf == NULL) {
+ pjdlog_error("Unable to decompress (no memory: %zu).",
+ (size_t)origsize);
+ return (NULL);
+ }
+ newsize = lzf_decompress(data + sizeof(origsize),
+ *sizep - sizeof(origsize), newbuf, origsize);
+ if (newsize == 0) {
+ free(newbuf);
+ pjdlog_error("Unable to decompress.");
+ return (NULL);
+ }
+ PJDLOG_ASSERT(newsize == origsize);
+
+ *sizep = newsize;
+ return (newbuf);
+}
+
+const char *
+compression_name(int num)
+{
+
+ switch (num) {
+ case HAST_COMPRESSION_NONE:
+ return ("none");
+ case HAST_COMPRESSION_HOLE:
+ return ("hole");
+ case HAST_COMPRESSION_LZF:
+ return ("lzf");
+ }
+ return ("unknown");
+}
+
+int
+compression_send(const struct hast_resource *res, struct nv *nv, void **datap,
+ size_t *sizep, bool *freedatap)
+{
+ unsigned char *newbuf;
+ int compression;
+ size_t size;
+
+ size = *sizep;
+ compression = res->hr_compression;
+
+ switch (compression) {
+ case HAST_COMPRESSION_NONE:
+ return (0);
+ case HAST_COMPRESSION_HOLE:
+ newbuf = hast_hole_compress(*datap, &size);
+ break;
+ case HAST_COMPRESSION_LZF:
+ /* Try 'hole' compression first. */
+ newbuf = hast_hole_compress(*datap, &size);
+ if (newbuf != NULL)
+ compression = HAST_COMPRESSION_HOLE;
+ else
+ newbuf = hast_lzf_compress(*datap, &size);
+ break;
+ default:
+ PJDLOG_ABORT("Invalid compression: %d.", res->hr_compression);
+ }
+
+ if (newbuf == NULL) {
+ /* Unable to compress the data. */
+ return (0);
+ }
+ nv_add_string(nv, compression_name(compression), "compression");
+ if (nv_error(nv) != 0) {
+ free(newbuf);
+ errno = nv_error(nv);
+ return (-1);
+ }
+ if (*freedatap)
+ free(*datap);
+ *freedatap = true;
+ *datap = newbuf;
+ *sizep = size;
+
+ return (0);
+}
+
+int
+compression_recv(const struct hast_resource *res __unused, struct nv *nv,
+ void **datap, size_t *sizep, bool *freedatap)
+{
+ unsigned char *newbuf;
+ const char *algo;
+ size_t size;
+
+ algo = nv_get_string(nv, "compression");
+ if (algo == NULL)
+ return (0); /* No compression. */
+
+ newbuf = NULL;
+ size = *sizep;
+
+ if (strcmp(algo, "hole") == 0)
+ newbuf = hast_hole_decompress(*datap, &size);
+ else if (strcmp(algo, "lzf") == 0)
+ newbuf = hast_lzf_decompress(*datap, &size);
+ else {
+ pjdlog_error("Unknown compression algorithm '%s'.", algo);
+ return (-1); /* Unknown compression algorithm. */
+ }
+
+ if (newbuf == NULL)
+ return (-1);
+ if (*freedatap)
+ free(*datap);
+ *freedatap = true;
+ *datap = newbuf;
+ *sizep = size;
+
+ return (0);
+}