aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorConrad Meyer <cem@FreeBSD.org>2019-11-06 06:42:00 +0000
committerConrad Meyer <cem@FreeBSD.org>2019-11-06 06:42:00 +0000
commitea68403922c3b53b00fc999fcb3eaef1feb50177 (patch)
tree9870b0c695852e26fb9f8e19df8d7f5cb6616141
parent90f4bdbe917eaf678feca2b0ff9647b5ae8bbb9d (diff)
Import Zstd 1.4.4vendor/zstd/1.4.4
Notes
Notes: svn path=/vendor/zstd/dist/; revision=354371 svn path=/vendor/zstd/1.4.4/; revision=354372; tag=vendor/zstd/1.4.4
-rw-r--r--CHANGELOG31
-rw-r--r--Makefile4
-rw-r--r--README.md3
-rw-r--r--appveyor.yml19
-rw-r--r--contrib/adaptive-compression/Makefile76
-rw-r--r--contrib/adaptive-compression/README.md91
-rw-r--r--contrib/adaptive-compression/adapt.c1129
-rw-r--r--contrib/adaptive-compression/datagencli.c129
-rwxr-xr-xcontrib/adaptive-compression/test-correctness.sh252
-rwxr-xr-xcontrib/adaptive-compression/test-performance.sh59
-rw-r--r--contrib/gen_html/.gitignore3
-rw-r--r--contrib/pzstd/.gitignore2
-rw-r--r--contrib/seekable_format/examples/.gitignore5
-rw-r--r--doc/educational_decoder/Makefile56
-rw-r--r--doc/educational_decoder/harness.c83
-rw-r--r--doc/educational_decoder/zstd_decompress.c38
-rw-r--r--doc/educational_decoder/zstd_decompress.h4
-rw-r--r--doc/zstd_compression_format.md73
-rw-r--r--doc/zstd_manual.html328
-rw-r--r--examples/.gitignore15
-rw-r--r--examples/streaming_compression.c8
-rw-r--r--examples/streaming_decompression.c18
-rw-r--r--lib/.gitignore3
-rw-r--r--lib/Makefile2
-rw-r--r--lib/README.md15
-rw-r--r--lib/common/bitstream.h9
-rw-r--r--lib/common/compiler.h22
-rw-r--r--lib/common/fse.h2
-rw-r--r--lib/common/fse_decompress.c2
-rw-r--r--lib/common/mem.h75
-rw-r--r--lib/common/pool.c10
-rw-r--r--lib/common/threading.c47
-rw-r--r--lib/common/threading.h33
-rw-r--r--lib/common/xxhash.c10
-rw-r--r--lib/common/zstd_internal.h95
-rw-r--r--lib/compress/zstd_compress.c1089
-rw-r--r--lib/compress/zstd_compress_internal.h124
-rw-r--r--lib/compress/zstd_compress_literals.c15
-rw-r--r--lib/compress/zstd_compress_literals.h2
-rw-r--r--lib/compress/zstd_compress_sequences.c6
-rw-r--r--lib/compress/zstd_compress_sequences.h2
-rw-r--r--lib/compress/zstd_cwksp.h535
-rw-r--r--lib/compress/zstd_double_fast.c23
-rw-r--r--lib/compress/zstd_fast.c83
-rw-r--r--lib/compress/zstd_lazy.c66
-rw-r--r--lib/compress/zstd_ldm.c8
-rw-r--r--lib/compress/zstd_opt.c12
-rw-r--r--lib/compress/zstdmt_compress.c58
-rw-r--r--lib/decompress/huf_decompress.c2
-rw-r--r--lib/decompress/zstd_decompress.c33
-rw-r--r--lib/decompress/zstd_decompress_block.c297
-rw-r--r--lib/deprecated/zbuff.h11
-rw-r--r--lib/dictBuilder/cover.c15
-rw-r--r--lib/dictBuilder/zdict.c2
-rw-r--r--lib/legacy/zstd_v01.c2
-rw-r--r--lib/legacy/zstd_v02.c3
-rw-r--r--lib/legacy/zstd_v03.c3
-rw-r--r--lib/legacy/zstd_v04.c8
-rw-r--r--lib/legacy/zstd_v05.c2
-rw-r--r--lib/legacy/zstd_v06.c2
-rw-r--r--lib/legacy/zstd_v07.c2
-rw-r--r--lib/libzstd.pc.in5
-rw-r--r--lib/zstd.h236
-rw-r--r--programs/.gitignore37
-rw-r--r--programs/README.md9
-rw-r--r--programs/benchzstd.c39
-rw-r--r--programs/benchzstd.h1
-rw-r--r--programs/datagen.c18
-rw-r--r--programs/dibio.c2
-rw-r--r--programs/fileio.c556
-rw-r--r--programs/fileio.h41
-rw-r--r--programs/platform.h2
-rw-r--r--programs/timefn.h6
-rw-r--r--programs/util.c137
-rw-r--r--programs/util.h14
-rw-r--r--programs/zstd.117
-rw-r--r--programs/zstd.1.md29
-rw-r--r--programs/zstdcli.c91
-rw-r--r--programs/zstdgrep.12
-rw-r--r--programs/zstdless.12
-rw-r--r--tests/.gitignore68
-rw-r--r--tests/Makefile21
-rw-r--r--tests/decodecorpus.c4
-rw-r--r--tests/fullbench.c9
-rw-r--r--tests/fuzz/Makefile19
-rw-r--r--tests/fuzz/README.md21
-rw-r--r--tests/fuzz/block_decompress.c2
-rw-r--r--tests/fuzz/block_round_trip.c25
-rw-r--r--tests/fuzz/dictionary_decompress.c23
-rw-r--r--tests/fuzz/dictionary_loader.c93
-rw-r--r--tests/fuzz/dictionary_round_trip.c35
-rw-r--r--tests/fuzz/fuzz.h10
-rwxr-xr-xtests/fuzz/fuzz.py147
-rw-r--r--tests/fuzz/fuzz_data_producer.c85
-rw-r--r--tests/fuzz/fuzz_data_producer.h60
-rw-r--r--tests/fuzz/fuzz_helpers.h32
-rw-r--r--tests/fuzz/regression_driver.c1
-rw-r--r--tests/fuzz/simple_compress.c27
-rw-r--r--tests/fuzz/simple_decompress.c24
-rw-r--r--tests/fuzz/simple_round_trip.c26
-rw-r--r--tests/fuzz/stream_decompress.c24
-rw-r--r--tests/fuzz/stream_round_trip.c44
-rw-r--r--tests/fuzz/zstd_frame_info.c4
-rw-r--r--tests/fuzz/zstd_helpers.c74
-rw-r--r--tests/fuzz/zstd_helpers.h15
-rw-r--r--tests/fuzzer.c363
-rw-r--r--tests/golden-compression/huffman-compressed-larger (renamed from tests/files/huffman-compressed-larger)bin143 -> 143 bytes
-rw-r--r--tests/golden-decompression/rle-first-block.zstbin0 -> 45 bytes
-rwxr-xr-xtests/playTests.sh212
-rw-r--r--tests/poolTests.c2
-rw-r--r--tests/regression/method.c33
-rw-r--r--tests/regression/results.csv328
-rw-r--r--tests/zbufftest.c8
-rw-r--r--tests/zstreamtest.c13
-rw-r--r--zlibWrapper/.gitignore28
-rw-r--r--zlibWrapper/Makefile45
-rw-r--r--zlibWrapper/examples/fitblk.c12
-rw-r--r--zlibWrapper/examples/zwrapbench.c24
-rw-r--r--zlibWrapper/gzclose.c2
-rw-r--r--zlibWrapper/gzlib.c18
-rw-r--r--zlibWrapper/gzread.c24
-rw-r--r--zlibWrapper/gzwrite.c16
-rw-r--r--zlibWrapper/zstd_zlibwrapper.c26
123 files changed, 4374 insertions, 4073 deletions
diff --git a/CHANGELOG b/CHANGELOG
index e5b5afdc83f3..3b882d4cda51 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,34 @@
+v1.4.4
+perf: Improved decompression speed, by > 10%, by @terrelln
+perf: Better compression speed when re-using a context, by @felixhandte
+perf: Fix compression ratio when compressing large files with small dictionary, by @senhuang42
+perf: zstd reference encoder can generate RLE blocks, by @bimbashrestha
+perf: minor generic speed optimization, by @davidbolvansky
+api: new ability to extract sequences from the parser for analysis, by @bimbashrestha
+api: fixed decoding of magic-less frames, by @terrelln
+api: fixed ZSTD_initCStream_advanced() performance with fast modes, reported by @QrczakMK
+cli: Named pipes support, by @bimbashrestha
+cli: short tar's extension support, by @stokito
+cli: command --output-dir-flat= , generates target files into requested directory, by @senhuang42
+cli: commands --stream-size=# and --size-hint=#, by @nmagerko
+cli: command --exclude-compressed, by @shashank0791
+cli: faster `-t` test mode
+cli: improved some error messages, by @vangyzen
+cli: rare deadlock condition within dictionary builder, by @terrelln
+build: single-file decoder with emscripten compilation script, by @cwoffenden
+build: fixed zlibWrapper compilation on Visual Studio, reported by @bluenlive
+build: fixed deprecation warning for certain gcc version, reported by @jasonma163
+build: fix compilation on old gcc versions, by @cemeyer
+build: improved installation directories for cmake script, by Dmitri Shubin
+pack: modified pkgconfig, for better integration into openwrt, requested by @neheb
+misc: Improved documentation : ZSTD_CLEVEL, DYNAMIC_BMI2, ZSTD_CDict, function deprecation, zstd format
+misc: fixed educational decoder : accept larger literals section, and removed UNALIGNED() macro
+
+v1.4.3
+bug: Fix Dictionary Compression Ratio Regression by @cyan4973 (#1709)
+bug: Fix Buffer Overflow in legacy v0.3 decompression by @felixhandte (#1722)
+build: Add support for IAR C/C++ Compiler for Arm by @joseph0918 (#1705)
+
v1.4.2
bug: Fix bug in zstd-0.5 decoder by @terrelln (#1696)
bug: Fix seekable decompression in-memory API by @iburinoc (#1695)
diff --git a/Makefile b/Makefile
index f2ec8c9b1074..efb555c35b39 100644
--- a/Makefile
+++ b/Makefile
@@ -69,6 +69,7 @@ test: MOREFLAGS += -g -DDEBUGLEVEL=$(DEBUGLEVEL) -Werror
test:
MOREFLAGS="$(MOREFLAGS)" $(MAKE) -j -C $(PRGDIR) allVariants
$(MAKE) -C $(TESTDIR) $@
+ ZSTD=../../programs/zstd $(MAKE) -C doc/educational_decoder test
## shortest: same as `make check`
.PHONY: shortest
@@ -99,8 +100,8 @@ man:
contrib: lib
$(MAKE) -C contrib/pzstd all
$(MAKE) -C contrib/seekable_format/examples all
- $(MAKE) -C contrib/adaptive-compression all
$(MAKE) -C contrib/largeNbDicts all
+ cd contrib/single_file_decoder/ ; ./build_test.sh
.PHONY: cleanTabs
cleanTabs:
@@ -116,7 +117,6 @@ clean:
@$(MAKE) -C contrib/gen_html $@ > $(VOID)
@$(MAKE) -C contrib/pzstd $@ > $(VOID)
@$(MAKE) -C contrib/seekable_format/examples $@ > $(VOID)
- @$(MAKE) -C contrib/adaptive-compression $@ > $(VOID)
@$(MAKE) -C contrib/largeNbDicts $@ > $(VOID)
@$(RM) zstd$(EXT) zstdmt$(EXT) tmp*
@$(RM) -r lz4
diff --git a/README.md b/README.md
index 290341cc78e7..9c5f9201307c 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,7 @@ a list of known ports and bindings is provided on [Zstandard homepage](http://ww
[![Build status][AppveyorDevBadge]][AppveyorLink]
[![Build status][CircleDevBadge]][CircleLink]
[![Build status][CirrusDevBadge]][CirrusLink]
+[![Fuzzing Status][OSSFuzzBadge]][OSSFuzzLink]
[travisDevBadge]: https://travis-ci.org/facebook/zstd.svg?branch=dev "Continuous Integration test suite"
[travisLink]: https://travis-ci.org/facebook/zstd
@@ -24,6 +25,8 @@ a list of known ports and bindings is provided on [Zstandard homepage](http://ww
[CircleLink]: https://circleci.com/gh/facebook/zstd
[CirrusDevBadge]: https://api.cirrus-ci.com/github/facebook/zstd.svg?branch=dev
[CirrusLink]: https://cirrus-ci.com/github/facebook/zstd
+[OSSFuzzBadge]: https://oss-fuzz-build-logs.storage.googleapis.com/badges/zstd.svg
+[OSSFuzzLink]: https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:zstd
## Benchmarks
diff --git a/appveyor.yml b/appveyor.yml
index 35f019dd6b09..dd2c02ac4826 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -1,3 +1,7 @@
+# Following tests are run _only_ on master branch
+# To reproduce these tests, it's possible to push into a branch `appveyorTest`
+# or a branch `visual*`, they will intentionnally trigger `master` tests
+
-
version: 1.0.{build}
branches:
@@ -169,13 +173,16 @@
sh -e playTests.sh --test-large-data &&
fullbench.exe -i1 &&
fullbench.exe -i1 -P0 &&
- fuzzer_VS2008_%PLATFORM%_Release.exe %FUZZERTEST% &&
- fuzzer_VS2010_%PLATFORM%_Release.exe %FUZZERTEST% &&
fuzzer_VS2012_%PLATFORM%_Release.exe %FUZZERTEST% &&
fuzzer_VS2013_%PLATFORM%_Release.exe %FUZZERTEST% &&
fuzzer_VS2015_%PLATFORM%_Release.exe %FUZZERTEST%
)
+
+# The following tests are for regular pushes
+# into `dev` or some feature branch
+# There run less tests, for shorter feedback loop
+
-
version: 1.0.{build}
environment:
@@ -249,3 +256,11 @@
COPY build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\fuzzer.exe tests\fuzzer_VS2015_%PLATFORM%_%CONFIGURATION%.exe &&
COPY build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe tests\
)
+
+
+ test_script:
+ - ECHO Testing %COMPILER% %PLATFORM% %CONFIGURATION%
+ - if [%HOST%]==[mingw] (
+ set "CC=%COMPILER%" &&
+ make check
+ )
diff --git a/contrib/adaptive-compression/Makefile b/contrib/adaptive-compression/Makefile
deleted file mode 100644
index 2718e9d6e1be..000000000000
--- a/contrib/adaptive-compression/Makefile
+++ /dev/null
@@ -1,76 +0,0 @@
-
-ZSTDDIR = ../../lib
-PRGDIR = ../../programs
-ZSTDCOMMON_FILES := $(ZSTDDIR)/common/*.c
-ZSTDCOMP_FILES := $(ZSTDDIR)/compress/*.c
-ZSTDDECOMP_FILES := $(ZSTDDIR)/decompress/*.c
-ZSTD_FILES := $(ZSTDDECOMP_FILES) $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES)
-
-MULTITHREAD_LDFLAGS = -pthread
-DEBUGFLAGS= -g -DZSTD_DEBUG=1
-CPPFLAGS += -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
- -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR)
-CFLAGS ?= -O3
-CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
- -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
- -Wstrict-prototypes -Wundef \
- -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
- -Wredundant-decls
-CFLAGS += $(DEBUGFLAGS)
-CFLAGS += $(MOREFLAGS)
-FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MULTITHREAD_LDFLAGS)
-
-all: adapt datagen
-
-adapt: $(ZSTD_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c adapt.c
- $(CC) $(FLAGS) $^ -o $@
-
-adapt-debug: $(ZSTD_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c adapt.c
- $(CC) $(FLAGS) -DDEBUG_MODE=2 $^ -o adapt
-
-datagen : $(PRGDIR)/datagen.c datagencli.c
- $(CC) $(FLAGS) $^ -o $@
-
-test-adapt-correctness: datagen adapt
- @./test-correctness.sh
- @echo "test correctness complete"
-
-test-adapt-performance: datagen adapt
- @./test-performance.sh
- @echo "test performance complete"
-
-clean:
- @$(RM) -f adapt datagen
- @$(RM) -rf *.dSYM
- @$(RM) -f tmp*
- @$(RM) -f tests/*.zst
- @$(RM) -f tests/tmp*
- @echo "finished cleaning"
-
-#-----------------------------------------------------------------------------
-# make install is validated only for Linux, macOS, BSD, Hurd and Solaris targets
-#-----------------------------------------------------------------------------
-ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS))
-
-ifneq (,$(filter $(shell uname),SunOS))
-INSTALL ?= ginstall
-else
-INSTALL ?= install
-endif
-
-PREFIX ?= /usr/local
-DESTDIR ?=
-BINDIR ?= $(PREFIX)/bin
-
-INSTALL_PROGRAM ?= $(INSTALL) -m 755
-
-install: adapt
- @echo Installing binaries
- @$(INSTALL) -d -m 755 $(DESTDIR)$(BINDIR)/
- @$(INSTALL_PROGRAM) adapt $(DESTDIR)$(BINDIR)/zstd-adaptive
- @echo zstd-adaptive installation completed
-
-uninstall:
- @$(RM) $(DESTDIR)$(BINDIR)/zstd-adaptive
- @echo zstd-adaptive programs successfully uninstalled
-endif
diff --git a/contrib/adaptive-compression/README.md b/contrib/adaptive-compression/README.md
deleted file mode 100644
index 0929b16ba763..000000000000
--- a/contrib/adaptive-compression/README.md
+++ /dev/null
@@ -1,91 +0,0 @@
-### Summary
-
-`adapt` is a new compression tool targeted at optimizing performance across network connections and pipelines. The tool is aimed at sensing network speeds and adapting compression level based on network or pipe speeds.
-In situations where the compression level does not appropriately match the network/pipe speed, compression may be bottlenecking the entire pipeline or the files may not be compressed as much as they potentially could be, therefore losing efficiency. It also becomes quite impractical to manually measure and set an optimalcompression level (which could potentially change over time).
-
-### Using `adapt`
-
-In order to build and use the tool, you can simply run `make adapt` in the `adaptive-compression` directory under `contrib`. This will generate an executable available for use. Another possible method of installation is running `make install`, which will create and install the binary as the command `zstd-adaptive`.
-
-Similar to many other compression utilities, `zstd-adaptive` can be invoked by using the following format:
-
-`zstd-adaptive [options] [file(s)]`
-
-Supported options for the above format are described below.
-
-`zstd-adaptive` also supports reading from `stdin` and writing to `stdout`, which is potentially more useful. By default, if no files are given, `zstd-adaptive` reads from and writes to standard I/O. Therefore, you can simply insert it within a pipeline like so:
-
-`cat FILE | zstd-adaptive | ssh "cat - > tmp.zst"`
-
-If a file is provided, it is also possible to force writing to stdout using the `-c` flag like so:
-
-`zstd-adaptive -c FILE | ssh "cat - > tmp.zst"`
-
-Several options described below can be used to control the behavior of `zstd-adaptive`. More specifically, using the `-l#` and `-u#` flags will will set upper and lower bounds so that the compression level will always be within that range. The `-i#` flag can also be used to change the initial compression level. If an initial compression level is not provided, the initial compression level will be chosen such that it is within the appropriate range (it becomes equal to the lower bound).
-
-### Options
-`-oFILE` : write output to `FILE`
-
-`-i#` : provide initial compression level (must within the appropriate bounds)
-
-`-h` : display help/information
-
-`-f` : force the compression level to stay constant
-
-`-c` : force write to `stdout`
-
-`-p` : hide progress bar
-
-`-q` : quiet mode -- do not show progress bar or other information
-
-`-l#` : set a lower bound on the compression level (default is 1)
-
-`-u#` : set an upper bound on the compression level (default is 22)
-### Benchmarking / Test results
-#### Artificial Tests
-These artificial tests were run by using the `pv` command line utility in order to limit pipe speeds (25 MB/s read and 5 MB/s write limits were chosen to mimic severe throughput constraints). A 40 GB backup file was sent through a pipeline, compressed, and written out to a file. Compression time, size, and ratio were computed. Data for `zstd -15` was excluded from these tests because the test runs quite long.
-
-<table>
-<tr><th> 25 MB/s read limit </th></tr>
-<tr><td>
-
-| Compressor Name | Ratio | Compressed Size | Compression Time |
-|:----------------|------:|----------------:|-----------------:|
-| zstd -3 | 2.108 | 20.718 GB | 29m 48.530s |
-| zstd-adaptive | 2.230 | 19.581 GB | 29m 48.798s |
-
-</td><tr>
-</table>
-
-<table>
-<tr><th> 5 MB/s write limit </th></tr>
-<tr><td>
-
-| Compressor Name | Ratio | Compressed Size | Compression Time |
-|:----------------|------:|----------------:|-----------------:|
-| zstd -3 | 2.108 | 20.718 GB | 1h 10m 43.076s |
-| zstd-adaptive | 2.249 | 19.412 GB | 1h 06m 15.577s |
-
-</td></tr>
-</table>
-
-The commands used for this test generally followed the form:
-
-`cat FILE | pv -L 25m -q | COMPRESSION | pv -q > tmp.zst # impose 25 MB/s read limit`
-
-`cat FILE | pv -q | COMPRESSION | pv -L 5m -q > tmp.zst # impose 5 MB/s write limit`
-
-#### SSH Tests
-
-The following tests were performed by piping a relatively large backup file (approximately 80 GB) through compression and over SSH to be stored on a server. The test data includes statistics for time and compressed size on `zstd` at several compression levels, as well as `zstd-adaptive`. The data highlights the potential advantages that `zstd-adaptive` has over using a low static compression level and the negative imapcts that using an excessively high static compression level can have on
-pipe throughput.
-
-| Compressor Name | Ratio | Compressed Size | Compression Time |
-|:----------------|------:|----------------:|-----------------:|
-| zstd -3 | 2.212 | 32.426 GB | 1h 17m 59.756s |
-| zstd -15 | 2.374 | 30.213 GB | 2h 56m 59.441s |
-| zstd-adaptive | 2.315 | 30.993 GB | 1h 18m 52.860s |
-
-The commands used for this test generally followed the form:
-
-`cat FILE | COMPRESSION | ssh dev "cat - > tmp.zst"`
diff --git a/contrib/adaptive-compression/adapt.c b/contrib/adaptive-compression/adapt.c
deleted file mode 100644
index 8fb4047e996b..000000000000
--- a/contrib/adaptive-compression/adapt.c
+++ /dev/null
@@ -1,1129 +0,0 @@
-/*
- * Copyright (c) 2017-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-
-#include <stdio.h> /* fprintf */
-#include <stdlib.h> /* malloc, free */
-#include <pthread.h> /* pthread functions */
-#include <string.h> /* memset */
-#include "zstd_internal.h"
-#include "util.h"
-#include "timefn.h" /* UTIL_time_t, UTIL_getTime, UTIL_getSpanTimeMicro */
-
-#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
-#define PRINT(...) fprintf(stdout, __VA_ARGS__)
-#define DEBUG(l, ...) { if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } }
-#define FILE_CHUNK_SIZE 4 << 20
-#define MAX_NUM_JOBS 2
-#define stdinmark "/*stdin*\\"
-#define stdoutmark "/*stdout*\\"
-#define MAX_PATH 256
-#define DEFAULT_DISPLAY_LEVEL 1
-#define DEFAULT_COMPRESSION_LEVEL 6
-#define MAX_COMPRESSION_LEVEL_CHANGE 2
-#define CONVERGENCE_LOWER_BOUND 5
-#define CLEVEL_DECREASE_COOLDOWN 5
-#define CHANGE_BY_TWO_THRESHOLD 0.1
-#define CHANGE_BY_ONE_THRESHOLD 0.65
-
-#ifndef DEBUG_MODE
-static int g_displayLevel = DEFAULT_DISPLAY_LEVEL;
-#else
-static int g_displayLevel = DEBUG_MODE;
-#endif
-
-static unsigned g_compressionLevel = DEFAULT_COMPRESSION_LEVEL;
-static UTIL_time_t g_startTime;
-static size_t g_streamedSize = 0;
-static unsigned g_useProgressBar = 1;
-static unsigned g_forceCompressionLevel = 0;
-static unsigned g_minCLevel = 1;
-static unsigned g_maxCLevel;
-
-typedef struct {
- void* start;
- size_t size;
- size_t capacity;
-} buffer_t;
-
-typedef struct {
- size_t filled;
- buffer_t buffer;
-} inBuff_t;
-
-typedef struct {
- buffer_t src;
- buffer_t dst;
- unsigned jobID;
- unsigned lastJobPlusOne;
- size_t compressedSize;
- size_t dictSize;
-} jobDescription;
-
-typedef struct {
- pthread_mutex_t pMutex;
- int noError;
-} mutex_t;
-
-typedef struct {
- pthread_cond_t pCond;
- int noError;
-} cond_t;
-
-typedef struct {
- unsigned compressionLevel;
- unsigned numJobs;
- unsigned nextJobID;
- unsigned threadError;
-
- /*
- * JobIDs for the next jobs to be created, compressed, and written
- */
- unsigned jobReadyID;
- unsigned jobCompressedID;
- unsigned jobWriteID;
- unsigned allJobsCompleted;
-
- /*
- * counter for how many jobs in a row the compression level has not changed
- * if the counter becomes >= CONVERGENCE_LOWER_BOUND, the next time the
- * compression level tries to change (by non-zero amount) resets the counter
- * to 1 and does not apply the change
- */
- unsigned convergenceCounter;
-
- /*
- * cooldown counter in order to prevent rapid successive decreases in compression level
- * whenever compression level is decreased, cooldown is set to CLEVEL_DECREASE_COOLDOWN
- * whenever adaptCompressionLevel() is called and cooldown != 0, it is decremented
- * as long as cooldown != 0, the compression level cannot be decreased
- */
- unsigned cooldown;
-
- /*
- * XWaitYCompletion
- * Range from 0.0 to 1.0
- * if the value is not 1.0, then this implies that thread X waited on thread Y to finish
- * and thread Y was XWaitYCompletion finished at the time of the wait (i.e. compressWaitWriteCompletion=0.5
- * implies that the compression thread waited on the write thread and it was only 50% finished writing a job)
- */
- double createWaitCompressionCompletion;
- double compressWaitCreateCompletion;
- double compressWaitWriteCompletion;
- double writeWaitCompressionCompletion;
-
- /*
- * Completion values
- * Range from 0.0 to 1.0
- * Jobs are divided into mini-chunks in order to measure completion
- * these values are updated each time a thread finishes its operation on the
- * mini-chunk (i.e. finishes writing out, compressing, etc. this mini-chunk).
- */
- double compressionCompletion;
- double writeCompletion;
- double createCompletion;
-
- mutex_t jobCompressed_mutex;
- cond_t jobCompressed_cond;
- mutex_t jobReady_mutex;
- cond_t jobReady_cond;
- mutex_t allJobsCompleted_mutex;
- cond_t allJobsCompleted_cond;
- mutex_t jobWrite_mutex;
- cond_t jobWrite_cond;
- mutex_t compressionCompletion_mutex;
- mutex_t createCompletion_mutex;
- mutex_t writeCompletion_mutex;
- mutex_t compressionLevel_mutex;
- size_t lastDictSize;
- inBuff_t input;
- jobDescription* jobs;
- ZSTD_CCtx* cctx;
-} adaptCCtx;
-
-typedef struct {
- adaptCCtx* ctx;
- FILE* dstFile;
-} outputThreadArg;
-
-typedef struct {
- FILE* srcFile;
- adaptCCtx* ctx;
- outputThreadArg* otArg;
-} fcResources;
-
-static void freeCompressionJobs(adaptCCtx* ctx)
-{
- unsigned u;
- for (u=0; u<ctx->numJobs; u++) {
- jobDescription job = ctx->jobs[u];
- free(job.dst.start);
- free(job.src.start);
- }
-}
-
-static int destroyMutex(mutex_t* mutex)
-{
- if (mutex->noError) {
- int const ret = pthread_mutex_destroy(&mutex->pMutex);
- return ret;
- }
- return 0;
-}
-
-static int destroyCond(cond_t* cond)
-{
- if (cond->noError) {
- int const ret = pthread_cond_destroy(&cond->pCond);
- return ret;
- }
- return 0;
-}
-
-static int freeCCtx(adaptCCtx* ctx)
-{
- if (!ctx) return 0;
- {
- int error = 0;
- error |= destroyMutex(&ctx->jobCompressed_mutex);
- error |= destroyCond(&ctx->jobCompressed_cond);
- error |= destroyMutex(&ctx->jobReady_mutex);
- error |= destroyCond(&ctx->jobReady_cond);
- error |= destroyMutex(&ctx->allJobsCompleted_mutex);
- error |= destroyCond(&ctx->allJobsCompleted_cond);
- error |= destroyMutex(&ctx->jobWrite_mutex);
- error |= destroyCond(&ctx->jobWrite_cond);
- error |= destroyMutex(&ctx->compressionCompletion_mutex);
- error |= destroyMutex(&ctx->createCompletion_mutex);
- error |= destroyMutex(&ctx->writeCompletion_mutex);
- error |= destroyMutex(&ctx->compressionLevel_mutex);
- error |= ZSTD_isError(ZSTD_freeCCtx(ctx->cctx));
- free(ctx->input.buffer.start);
- if (ctx->jobs){
- freeCompressionJobs(ctx);
- free(ctx->jobs);
- }
- free(ctx);
- return error;
- }
-}
-
-static int initMutex(mutex_t* mutex)
-{
- int const ret = pthread_mutex_init(&mutex->pMutex, NULL);
- mutex->noError = !ret;
- return ret;
-}
-
-static int initCond(cond_t* cond)
-{
- int const ret = pthread_cond_init(&cond->pCond, NULL);
- cond->noError = !ret;
- return ret;
-}
-
-static int initCCtx(adaptCCtx* ctx, unsigned numJobs)
-{
- ctx->compressionLevel = g_compressionLevel;
- {
- int pthreadError = 0;
- pthreadError |= initMutex(&ctx->jobCompressed_mutex);
- pthreadError |= initCond(&ctx->jobCompressed_cond);
- pthreadError |= initMutex(&ctx->jobReady_mutex);
- pthreadError |= initCond(&ctx->jobReady_cond);
- pthreadError |= initMutex(&ctx->allJobsCompleted_mutex);
- pthreadError |= initCond(&ctx->allJobsCompleted_cond);
- pthreadError |= initMutex(&ctx->jobWrite_mutex);
- pthreadError |= initCond(&ctx->jobWrite_cond);
- pthreadError |= initMutex(&ctx->compressionCompletion_mutex);
- pthreadError |= initMutex(&ctx->createCompletion_mutex);
- pthreadError |= initMutex(&ctx->writeCompletion_mutex);
- pthreadError |= initMutex(&ctx->compressionLevel_mutex);
- if (pthreadError) return pthreadError;
- }
- ctx->numJobs = numJobs;
- ctx->jobReadyID = 0;
- ctx->jobCompressedID = 0;
- ctx->jobWriteID = 0;
- ctx->lastDictSize = 0;
-
-
- ctx->createWaitCompressionCompletion = 1;
- ctx->compressWaitCreateCompletion = 1;
- ctx->compressWaitWriteCompletion = 1;
- ctx->writeWaitCompressionCompletion = 1;
- ctx->createCompletion = 1;
- ctx->writeCompletion = 1;
- ctx->compressionCompletion = 1;
- ctx->convergenceCounter = 0;
- ctx->cooldown = 0;
-
- ctx->jobs = calloc(1, numJobs*sizeof(jobDescription));
-
- if (!ctx->jobs) {
- DISPLAY("Error: could not allocate space for jobs during context creation\n");
- return 1;
- }
-
- /* initializing jobs */
- {
- unsigned jobNum;
- for (jobNum=0; jobNum<numJobs; jobNum++) {
- jobDescription* job = &ctx->jobs[jobNum];
- job->src.start = malloc(2 * FILE_CHUNK_SIZE);
- job->dst.start = malloc(ZSTD_compressBound(FILE_CHUNK_SIZE));
- job->lastJobPlusOne = 0;
- if (!job->src.start || !job->dst.start) {
- DISPLAY("Could not allocate buffers for jobs\n");
- return 1;
- }
- job->src.capacity = FILE_CHUNK_SIZE;
- job->dst.capacity = ZSTD_compressBound(FILE_CHUNK_SIZE);
- }
- }
-
- ctx->nextJobID = 0;
- ctx->threadError = 0;
- ctx->allJobsCompleted = 0;
-
- ctx->cctx = ZSTD_createCCtx();
- if (!ctx->cctx) {
- DISPLAY("Error: could not allocate ZSTD_CCtx\n");
- return 1;
- }
-
- ctx->input.filled = 0;
- ctx->input.buffer.capacity = 2 * FILE_CHUNK_SIZE;
-
- ctx->input.buffer.start = malloc(ctx->input.buffer.capacity);
- if (!ctx->input.buffer.start) {
- DISPLAY("Error: could not allocate input buffer\n");
- return 1;
- }
- return 0;
-}
-
-static adaptCCtx* createCCtx(unsigned numJobs)
-{
-
- adaptCCtx* const ctx = calloc(1, sizeof(adaptCCtx));
- if (ctx == NULL) {
- DISPLAY("Error: could not allocate space for context\n");
- return NULL;
- }
- {
- int const error = initCCtx(ctx, numJobs);
- if (error) {
- freeCCtx(ctx);
- return NULL;
- }
- return ctx;
- }
-}
-
-static void signalErrorToThreads(adaptCCtx* ctx)
-{
- ctx->threadError = 1;
- pthread_mutex_lock(&ctx->jobReady_mutex.pMutex);
- pthread_cond_signal(&ctx->jobReady_cond.pCond);
- pthread_mutex_unlock(&ctx->jobReady_mutex.pMutex);
-
- pthread_mutex_lock(&ctx->jobCompressed_mutex.pMutex);
- pthread_cond_broadcast(&ctx->jobCompressed_cond.pCond);
- pthread_mutex_unlock(&ctx->jobReady_mutex.pMutex);
-
- pthread_mutex_lock(&ctx->jobWrite_mutex.pMutex);
- pthread_cond_signal(&ctx->jobWrite_cond.pCond);
- pthread_mutex_unlock(&ctx->jobWrite_mutex.pMutex);
-
- pthread_mutex_lock(&ctx->allJobsCompleted_mutex.pMutex);
- pthread_cond_signal(&ctx->allJobsCompleted_cond.pCond);
- pthread_mutex_unlock(&ctx->allJobsCompleted_mutex.pMutex);
-}
-
-static void waitUntilAllJobsCompleted(adaptCCtx* ctx)
-{
- if (!ctx) return;
- pthread_mutex_lock(&ctx->allJobsCompleted_mutex.pMutex);
- while (ctx->allJobsCompleted == 0 && !ctx->threadError) {
- pthread_cond_wait(&ctx->allJobsCompleted_cond.pCond, &ctx->allJobsCompleted_mutex.pMutex);
- }
- pthread_mutex_unlock(&ctx->allJobsCompleted_mutex.pMutex);
-}
-
-/* map completion percentages to values for changing compression level */
-static unsigned convertCompletionToChange(double completion)
-{
- if (completion < CHANGE_BY_TWO_THRESHOLD) {
- return 2;
- }
- else if (completion < CHANGE_BY_ONE_THRESHOLD) {
- return 1;
- }
- else {
- return 0;
- }
-}
-
-/*
- * Compression level is changed depending on which part of the compression process is lagging
- * Currently, three theads exist for job creation, compression, and file writing respectively.
- * adaptCompressionLevel() increments or decrements compression level based on which of the threads is lagging
- * job creation or file writing lag => increased compression level
- * compression thread lag => decreased compression level
- * detecting which thread is lagging is done by keeping track of how many calls each thread makes to pthread_cond_wait
- */
-static void adaptCompressionLevel(adaptCCtx* ctx)
-{
- double createWaitCompressionCompletion;
- double compressWaitCreateCompletion;
- double compressWaitWriteCompletion;
- double writeWaitCompressionCompletion;
- double const threshold = 0.00001;
- unsigned prevCompressionLevel;
-
- pthread_mutex_lock(&ctx->compressionLevel_mutex.pMutex);
- prevCompressionLevel = ctx->compressionLevel;
- pthread_mutex_unlock(&ctx->compressionLevel_mutex.pMutex);
-
-
- if (g_forceCompressionLevel) {
- pthread_mutex_lock(&ctx->compressionLevel_mutex.pMutex);
- ctx->compressionLevel = g_compressionLevel;
- pthread_mutex_unlock(&ctx->compressionLevel_mutex.pMutex);
- return;
- }
-
-
- DEBUG(2, "adapting compression level %u\n", prevCompressionLevel);
-
- /* read and reset completion measurements */
- pthread_mutex_lock(&ctx->compressionCompletion_mutex.pMutex);
- DEBUG(2, "createWaitCompressionCompletion %f\n", ctx->createWaitCompressionCompletion);
- DEBUG(2, "writeWaitCompressionCompletion %f\n", ctx->writeWaitCompressionCompletion);
- createWaitCompressionCompletion = ctx->createWaitCompressionCompletion;
- writeWaitCompressionCompletion = ctx->writeWaitCompressionCompletion;
- pthread_mutex_unlock(&ctx->compressionCompletion_mutex.pMutex);
-
- pthread_mutex_lock(&ctx->writeCompletion_mutex.pMutex);
- DEBUG(2, "compressWaitWriteCompletion %f\n", ctx->compressWaitWriteCompletion);
- compressWaitWriteCompletion = ctx->compressWaitWriteCompletion;
- pthread_mutex_unlock(&ctx->writeCompletion_mutex.pMutex);
-
- pthread_mutex_lock(&ctx->createCompletion_mutex.pMutex);
- DEBUG(2, "compressWaitCreateCompletion %f\n", ctx->compressWaitCreateCompletion);
- compressWaitCreateCompletion = ctx->compressWaitCreateCompletion;
- pthread_mutex_unlock(&ctx->createCompletion_mutex.pMutex);
- DEBUG(2, "convergence counter: %u\n", ctx->convergenceCounter);
-
- assert(g_minCLevel <= prevCompressionLevel && g_maxCLevel >= prevCompressionLevel);
-
- /* adaptation logic */
- if (ctx->cooldown) ctx->cooldown--;
-
- if ((1-createWaitCompressionCompletion > threshold || 1-writeWaitCompressionCompletion > threshold) && ctx->cooldown == 0) {
- /* create or write waiting on compression */
- /* use whichever one waited less because it was slower */
- double const completion = MAX(createWaitCompressionCompletion, writeWaitCompressionCompletion);
- unsigned const change = convertCompletionToChange(completion);
- unsigned const boundChange = MIN(change, prevCompressionLevel - g_minCLevel);
- if (ctx->convergenceCounter >= CONVERGENCE_LOWER_BOUND && boundChange != 0) {
- /* reset convergence counter, might have been a spike */
- ctx->convergenceCounter = 0;
- DEBUG(2, "convergence counter reset, no change applied\n");
- }
- else if (boundChange != 0) {
- pthread_mutex_lock(&ctx->compressionLevel_mutex.pMutex);
- ctx->compressionLevel -= boundChange;
- pthread_mutex_unlock(&ctx->compressionLevel_mutex.pMutex);
- ctx->cooldown = CLEVEL_DECREASE_COOLDOWN;
- ctx->convergenceCounter = 1;
-
- DEBUG(2, "create or write threads waiting on compression, tried to decrease compression level by %u\n\n", boundChange);
- }
- }
- else if (1-compressWaitWriteCompletion > threshold || 1-compressWaitCreateCompletion > threshold) {
- /* compress waiting on write */
- double const completion = MIN(compressWaitWriteCompletion, compressWaitCreateCompletion);
- unsigned const change = convertCompletionToChange(completion);
- unsigned const boundChange = MIN(change, g_maxCLevel - prevCompressionLevel);
- if (ctx->convergenceCounter >= CONVERGENCE_LOWER_BOUND && boundChange != 0) {
- /* reset convergence counter, might have been a spike */
- ctx->convergenceCounter = 0;
- DEBUG(2, "convergence counter reset, no change applied\n");
- }
- else if (boundChange != 0) {
- pthread_mutex_lock(&ctx->compressionLevel_mutex.pMutex);
- ctx->compressionLevel += boundChange;
- pthread_mutex_unlock(&ctx->compressionLevel_mutex.pMutex);
- ctx->cooldown = 0;
- ctx->convergenceCounter = 1;
-
- DEBUG(2, "compress waiting on write or create, tried to increase compression level by %u\n\n", boundChange);
- }
-
- }
-
- pthread_mutex_lock(&ctx->compressionLevel_mutex.pMutex);
- if (ctx->compressionLevel == prevCompressionLevel) {
- ctx->convergenceCounter++;
- }
- pthread_mutex_unlock(&ctx->compressionLevel_mutex.pMutex);
-}
-
-static size_t getUseableDictSize(unsigned compressionLevel)
-{
- ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, 0);
- unsigned const overlapLog = compressionLevel >= (unsigned)ZSTD_maxCLevel() ? 0 : 3;
- size_t const overlapSize = 1 << (params.cParams.windowLog - overlapLog);
- return overlapSize;
-}
-
-static void* compressionThread(void* arg)
-{
- adaptCCtx* const ctx = (adaptCCtx*)arg;
- unsigned currJob = 0;
- for ( ; ; ) {
- unsigned const currJobIndex = currJob % ctx->numJobs;
- jobDescription* const job = &ctx->jobs[currJobIndex];
- DEBUG(2, "starting compression for job %u\n", currJob);
-
- {
- /* check if compression thread will have to wait */
- unsigned willWaitForCreate = 0;
- unsigned willWaitForWrite = 0;
-
- pthread_mutex_lock(&ctx->jobReady_mutex.pMutex);
- if (currJob + 1 > ctx->jobReadyID) willWaitForCreate = 1;
- pthread_mutex_unlock(&ctx->jobReady_mutex.pMutex);
-
- pthread_mutex_lock(&ctx->jobWrite_mutex.pMutex);
- if (currJob - ctx->jobWriteID >= ctx->numJobs) willWaitForWrite = 1;
- pthread_mutex_unlock(&ctx->jobWrite_mutex.pMutex);
-
-
- pthread_mutex_lock(&ctx->createCompletion_mutex.pMutex);
- if (willWaitForCreate) {
- DEBUG(2, "compression will wait for create on job %u\n", currJob);
- ctx->compressWaitCreateCompletion = ctx->createCompletion;
- DEBUG(2, "create completion %f\n", ctx->compressWaitCreateCompletion);
-
- }
- else {
- ctx->compressWaitCreateCompletion = 1;
- }
- pthread_mutex_unlock(&ctx->createCompletion_mutex.pMutex);
-
- pthread_mutex_lock(&ctx->writeCompletion_mutex.pMutex);
- if (willWaitForWrite) {
- DEBUG(2, "compression will wait for write on job %u\n", currJob);
- ctx->compressWaitWriteCompletion = ctx->writeCompletion;
- DEBUG(2, "write completion %f\n", ctx->compressWaitWriteCompletion);
- }
- else {
- ctx->compressWaitWriteCompletion = 1;
- }
- pthread_mutex_unlock(&ctx->writeCompletion_mutex.pMutex);
-
- }
-
- /* wait until job is ready */
- pthread_mutex_lock(&ctx->jobReady_mutex.pMutex);
- while (currJob + 1 > ctx->jobReadyID && !ctx->threadError) {
- pthread_cond_wait(&ctx->jobReady_cond.pCond, &ctx->jobReady_mutex.pMutex);
- }
- pthread_mutex_unlock(&ctx->jobReady_mutex.pMutex);
-
- /* wait until job previously in this space is written */
- pthread_mutex_lock(&ctx->jobWrite_mutex.pMutex);
- while (currJob - ctx->jobWriteID >= ctx->numJobs && !ctx->threadError) {
- pthread_cond_wait(&ctx->jobWrite_cond.pCond, &ctx->jobWrite_mutex.pMutex);
- }
- pthread_mutex_unlock(&ctx->jobWrite_mutex.pMutex);
- /* reset compression completion */
- pthread_mutex_lock(&ctx->compressionCompletion_mutex.pMutex);
- ctx->compressionCompletion = 0;
- pthread_mutex_unlock(&ctx->compressionCompletion_mutex.pMutex);
-
- /* adapt compression level */
- if (currJob) adaptCompressionLevel(ctx);
-
- pthread_mutex_lock(&ctx->compressionLevel_mutex.pMutex);
- DEBUG(2, "job %u compressed with level %u\n", currJob, ctx->compressionLevel);
- pthread_mutex_unlock(&ctx->compressionLevel_mutex.pMutex);
-
- /* compress the data */
- {
- size_t const compressionBlockSize = ZSTD_BLOCKSIZE_MAX; /* 128 KB */
- unsigned cLevel;
- unsigned blockNum = 0;
- size_t remaining = job->src.size;
- size_t srcPos = 0;
- size_t dstPos = 0;
-
- pthread_mutex_lock(&ctx->compressionLevel_mutex.pMutex);
- cLevel = ctx->compressionLevel;
- pthread_mutex_unlock(&ctx->compressionLevel_mutex.pMutex);
-
- /* reset compressed size */
- job->compressedSize = 0;
- DEBUG(2, "calling ZSTD_compressBegin()\n");
- /* begin compression */
- {
- size_t const useDictSize = MIN(getUseableDictSize(cLevel), job->dictSize);
- ZSTD_parameters params = ZSTD_getParams(cLevel, 0, useDictSize);
- params.cParams.windowLog = 23;
- {
- size_t const initError = ZSTD_compressBegin_advanced(ctx->cctx, job->src.start + job->dictSize - useDictSize, useDictSize, params, 0);
- size_t const windowSizeError = ZSTD_CCtx_setParameter(ctx->cctx, ZSTD_c_forceMaxWindow, 1);
- if (ZSTD_isError(initError) || ZSTD_isError(windowSizeError)) {
- DISPLAY("Error: something went wrong while starting compression\n");
- signalErrorToThreads(ctx);
- return arg;
- }
- }
- }
- DEBUG(2, "finished with ZSTD_compressBegin()\n");
-
- do {
- size_t const actualBlockSize = MIN(remaining, compressionBlockSize);
-
- /* continue compression */
- if (currJob != 0 || blockNum != 0) { /* not first block of first job flush/overwrite the frame header */
- size_t const hSize = ZSTD_compressContinue(ctx->cctx, job->dst.start + dstPos, job->dst.capacity - dstPos, job->src.start + job->dictSize + srcPos, 0);
- if (ZSTD_isError(hSize)) {
- DISPLAY("Error: something went wrong while continuing compression\n");
- job->compressedSize = hSize;
- signalErrorToThreads(ctx);
- return arg;
- }
- ZSTD_invalidateRepCodes(ctx->cctx);
- }
- {
- size_t const ret = (job->lastJobPlusOne == currJob + 1 && remaining == actualBlockSize) ?
- ZSTD_compressEnd (ctx->cctx, job->dst.start + dstPos, job->dst.capacity - dstPos, job->src.start + job->dictSize + srcPos, actualBlockSize) :
- ZSTD_compressContinue(ctx->cctx, job->dst.start + dstPos, job->dst.capacity - dstPos, job->src.start + job->dictSize + srcPos, actualBlockSize);
- if (ZSTD_isError(ret)) {
- DISPLAY("Error: something went wrong during compression: %s\n", ZSTD_getErrorName(ret));
- signalErrorToThreads(ctx);
- return arg;
- }
- job->compressedSize += ret;
- remaining -= actualBlockSize;
- srcPos += actualBlockSize;
- dstPos += ret;
- blockNum++;
-
- /* update completion */
- pthread_mutex_lock(&ctx->compressionCompletion_mutex.pMutex);
- ctx->compressionCompletion = 1 - (double)remaining/job->src.size;
- pthread_mutex_unlock(&ctx->compressionCompletion_mutex.pMutex);
- }
- } while (remaining != 0);
- job->dst.size = job->compressedSize;
- }
- pthread_mutex_lock(&ctx->jobCompressed_mutex.pMutex);
- ctx->jobCompressedID++;
- pthread_cond_broadcast(&ctx->jobCompressed_cond.pCond);
- pthread_mutex_unlock(&ctx->jobCompressed_mutex.pMutex);
- if (job->lastJobPlusOne == currJob + 1 || ctx->threadError) {
- /* finished compressing all jobs */
- break;
- }
- DEBUG(2, "finished compressing job %u\n", currJob);
- currJob++;
- }
- return arg;
-}
-
-static void displayProgress(unsigned cLevel, unsigned last)
-{
- UTIL_time_t currTime = UTIL_getTime();
- if (!g_useProgressBar) return;
- { double const timeElapsed = (double)(UTIL_getSpanTimeMicro(g_startTime, currTime) / 1000.0);
- double const sizeMB = (double)g_streamedSize / (1 << 20);
- double const avgCompRate = sizeMB * 1000 / timeElapsed;
- fprintf(stderr, "\r| Comp. Level: %2u | Time Elapsed: %7.2f s | Data Size: %7.1f MB | Avg Comp. Rate: %6.2f MB/s |", cLevel, timeElapsed/1000.0, sizeMB, avgCompRate);
- if (last) {
- fprintf(stderr, "\n");
- } else {
- fflush(stderr);
- } }
-}
-
-static void* outputThread(void* arg)
-{
- outputThreadArg* const otArg = (outputThreadArg*)arg;
- adaptCCtx* const ctx = otArg->ctx;
- FILE* const dstFile = otArg->dstFile;
-
- unsigned currJob = 0;
- for ( ; ; ) {
- unsigned const currJobIndex = currJob % ctx->numJobs;
- jobDescription* const job = &ctx->jobs[currJobIndex];
- unsigned willWaitForCompress = 0;
- DEBUG(2, "starting write for job %u\n", currJob);
-
- pthread_mutex_lock(&ctx->jobCompressed_mutex.pMutex);
- if (currJob + 1 > ctx->jobCompressedID) willWaitForCompress = 1;
- pthread_mutex_unlock(&ctx->jobCompressed_mutex.pMutex);
-
-
- pthread_mutex_lock(&ctx->compressionCompletion_mutex.pMutex);
- if (willWaitForCompress) {
- /* write thread is waiting on compression thread */
- ctx->writeWaitCompressionCompletion = ctx->compressionCompletion;
- DEBUG(2, "writer thread waiting for nextJob: %u, writeWaitCompressionCompletion %f\n", currJob, ctx->writeWaitCompressionCompletion);
- }
- else {
- ctx->writeWaitCompressionCompletion = 1;
- }
- pthread_mutex_unlock(&ctx->compressionCompletion_mutex.pMutex);
-
- pthread_mutex_lock(&ctx->jobCompressed_mutex.pMutex);
- while (currJob + 1 > ctx->jobCompressedID && !ctx->threadError) {
- pthread_cond_wait(&ctx->jobCompressed_cond.pCond, &ctx->jobCompressed_mutex.pMutex);
- }
- pthread_mutex_unlock(&ctx->jobCompressed_mutex.pMutex);
-
- /* reset write completion */
- pthread_mutex_lock(&ctx->writeCompletion_mutex.pMutex);
- ctx->writeCompletion = 0;
- pthread_mutex_unlock(&ctx->writeCompletion_mutex.pMutex);
-
- {
- size_t const compressedSize = job->compressedSize;
- size_t remaining = compressedSize;
- if (ZSTD_isError(compressedSize)) {
- DISPLAY("Error: an error occurred during compression\n");
- signalErrorToThreads(ctx);
- return arg;
- }
- {
- size_t const blockSize = MAX(compressedSize >> 7, 1 << 10);
- size_t pos = 0;
- for ( ; ; ) {
- size_t const writeSize = MIN(remaining, blockSize);
- size_t const ret = fwrite(job->dst.start + pos, 1, writeSize, dstFile);
- if (ret != writeSize) break;
- pos += ret;
- remaining -= ret;
-
- /* update completion variable for writing */
- pthread_mutex_lock(&ctx->writeCompletion_mutex.pMutex);
- ctx->writeCompletion = 1 - (double)remaining/compressedSize;
- pthread_mutex_unlock(&ctx->writeCompletion_mutex.pMutex);
-
- if (remaining == 0) break;
- }
- if (pos != compressedSize) {
- DISPLAY("Error: an error occurred during file write operation\n");
- signalErrorToThreads(ctx);
- return arg;
- }
- }
- }
- {
- unsigned cLevel;
- pthread_mutex_lock(&ctx->compressionLevel_mutex.pMutex);
- cLevel = ctx->compressionLevel;
- pthread_mutex_unlock(&ctx->compressionLevel_mutex.pMutex);
- displayProgress(cLevel, job->lastJobPlusOne == currJob + 1);
- }
- pthread_mutex_lock(&ctx->jobWrite_mutex.pMutex);
- ctx->jobWriteID++;
- pthread_cond_signal(&ctx->jobWrite_cond.pCond);
- pthread_mutex_unlock(&ctx->jobWrite_mutex.pMutex);
-
- if (job->lastJobPlusOne == currJob + 1 || ctx->threadError) {
- /* finished with all jobs */
- pthread_mutex_lock(&ctx->allJobsCompleted_mutex.pMutex);
- ctx->allJobsCompleted = 1;
- pthread_cond_signal(&ctx->allJobsCompleted_cond.pCond);
- pthread_mutex_unlock(&ctx->allJobsCompleted_mutex.pMutex);
- break;
- }
- DEBUG(2, "finished writing job %u\n", currJob);
- currJob++;
-
- }
- return arg;
-}
-
-static int createCompressionJob(adaptCCtx* ctx, size_t srcSize, int last)
-{
- unsigned const nextJob = ctx->nextJobID;
- unsigned const nextJobIndex = nextJob % ctx->numJobs;
- jobDescription* const job = &ctx->jobs[nextJobIndex];
-
-
- job->src.size = srcSize;
- job->jobID = nextJob;
- if (last) job->lastJobPlusOne = nextJob + 1;
- {
- /* swap buffer */
- void* const copy = job->src.start;
- job->src.start = ctx->input.buffer.start;
- ctx->input.buffer.start = copy;
- }
- job->dictSize = ctx->lastDictSize;
-
- ctx->nextJobID++;
- /* if not on the last job, reuse data as dictionary in next job */
- if (!last) {
- size_t const oldDictSize = ctx->lastDictSize;
- memcpy(ctx->input.buffer.start, job->src.start + oldDictSize, srcSize);
- ctx->lastDictSize = srcSize;
- ctx->input.filled = srcSize;
- }
-
- /* signal job ready */
- pthread_mutex_lock(&ctx->jobReady_mutex.pMutex);
- ctx->jobReadyID++;
- pthread_cond_signal(&ctx->jobReady_cond.pCond);
- pthread_mutex_unlock(&ctx->jobReady_mutex.pMutex);
-
- return 0;
-}
-
-static int performCompression(adaptCCtx* ctx, FILE* const srcFile, outputThreadArg* otArg)
-{
- /* early error check to exit */
- if (!ctx || !srcFile || !otArg) {
- return 1;
- }
-
- /* create output thread */
- {
- pthread_t out;
- if (pthread_create(&out, NULL, &outputThread, otArg)) {
- DISPLAY("Error: could not create output thread\n");
- signalErrorToThreads(ctx);
- return 1;
- }
- else if (pthread_detach(out)) {
- DISPLAY("Error: could not detach output thread\n");
- signalErrorToThreads(ctx);
- return 1;
- }
- }
-
- /* create compression thread */
- {
- pthread_t compression;
- if (pthread_create(&compression, NULL, &compressionThread, ctx)) {
- DISPLAY("Error: could not create compression thread\n");
- signalErrorToThreads(ctx);
- return 1;
- }
- else if (pthread_detach(compression)) {
- DISPLAY("Error: could not detach compression thread\n");
- signalErrorToThreads(ctx);
- return 1;
- }
- }
- {
- unsigned currJob = 0;
- /* creating jobs */
- for ( ; ; ) {
- size_t pos = 0;
- size_t const readBlockSize = 1 << 15;
- size_t remaining = FILE_CHUNK_SIZE;
- unsigned const nextJob = ctx->nextJobID;
- unsigned willWaitForCompress = 0;
- DEBUG(2, "starting creation of job %u\n", currJob);
-
- pthread_mutex_lock(&ctx->jobCompressed_mutex.pMutex);
- if (nextJob - ctx->jobCompressedID >= ctx->numJobs) willWaitForCompress = 1;
- pthread_mutex_unlock(&ctx->jobCompressed_mutex.pMutex);
-
- pthread_mutex_lock(&ctx->compressionCompletion_mutex.pMutex);
- if (willWaitForCompress) {
- /* creation thread is waiting, take measurement of completion */
- ctx->createWaitCompressionCompletion = ctx->compressionCompletion;
- DEBUG(2, "create thread waiting for nextJob: %u, createWaitCompressionCompletion %f\n", nextJob, ctx->createWaitCompressionCompletion);
- }
- else {
- ctx->createWaitCompressionCompletion = 1;
- }
- pthread_mutex_unlock(&ctx->compressionCompletion_mutex.pMutex);
-
- /* wait until the job has been compressed */
- pthread_mutex_lock(&ctx->jobCompressed_mutex.pMutex);
- while (nextJob - ctx->jobCompressedID >= ctx->numJobs && !ctx->threadError) {
- pthread_cond_wait(&ctx->jobCompressed_cond.pCond, &ctx->jobCompressed_mutex.pMutex);
- }
- pthread_mutex_unlock(&ctx->jobCompressed_mutex.pMutex);
-
- /* reset create completion */
- pthread_mutex_lock(&ctx->createCompletion_mutex.pMutex);
- ctx->createCompletion = 0;
- pthread_mutex_unlock(&ctx->createCompletion_mutex.pMutex);
-
- while (remaining != 0 && !feof(srcFile)) {
- size_t const ret = fread(ctx->input.buffer.start + ctx->input.filled + pos, 1, readBlockSize, srcFile);
- if (ret != readBlockSize && !feof(srcFile)) {
- /* error could not read correct number of bytes */
- DISPLAY("Error: problem occurred during read from src file\n");
- signalErrorToThreads(ctx);
- return 1;
- }
- pos += ret;
- remaining -= ret;
- pthread_mutex_lock(&ctx->createCompletion_mutex.pMutex);
- ctx->createCompletion = 1 - (double)remaining/((size_t)FILE_CHUNK_SIZE);
- pthread_mutex_unlock(&ctx->createCompletion_mutex.pMutex);
- }
- if (remaining != 0 && !feof(srcFile)) {
- DISPLAY("Error: problem occurred during read from src file\n");
- signalErrorToThreads(ctx);
- return 1;
- }
- g_streamedSize += pos;
- /* reading was fine, now create the compression job */
- {
- int const last = feof(srcFile);
- int const error = createCompressionJob(ctx, pos, last);
- if (error != 0) {
- signalErrorToThreads(ctx);
- return error;
- }
- }
- DEBUG(2, "finished creating job %u\n", currJob);
- currJob++;
- if (feof(srcFile)) {
- break;
- }
- }
- }
- /* success -- created all jobs */
- return 0;
-}
-
-static fcResources createFileCompressionResources(const char* const srcFilename, const char* const dstFilenameOrNull)
-{
- fcResources fcr;
- unsigned const stdinUsed = !strcmp(srcFilename, stdinmark);
- FILE* const srcFile = stdinUsed ? stdin : fopen(srcFilename, "rb");
- const char* const outFilenameIntermediate = (stdinUsed && !dstFilenameOrNull) ? stdoutmark : dstFilenameOrNull;
- const char* outFilename = outFilenameIntermediate;
- char fileAndSuffix[MAX_PATH];
- size_t const numJobs = MAX_NUM_JOBS;
-
- memset(&fcr, 0, sizeof(fcr));
-
- if (!outFilenameIntermediate) {
- if (snprintf(fileAndSuffix, MAX_PATH, "%s.zst", srcFilename) + 1 > MAX_PATH) {
- DISPLAY("Error: output filename is too long\n");
- return fcr;
- }
- outFilename = fileAndSuffix;
- }
-
- {
- unsigned const stdoutUsed = !strcmp(outFilename, stdoutmark);
- FILE* const dstFile = stdoutUsed ? stdout : fopen(outFilename, "wb");
- fcr.otArg = malloc(sizeof(outputThreadArg));
- if (!fcr.otArg) {
- DISPLAY("Error: could not allocate space for output thread argument\n");
- return fcr;
- }
- fcr.otArg->dstFile = dstFile;
- }
- /* checking for errors */
- if (!fcr.otArg->dstFile || !srcFile) {
- DISPLAY("Error: some file(s) could not be opened\n");
- return fcr;
- }
-
- /* creating context */
- fcr.ctx = createCCtx(numJobs);
- fcr.otArg->ctx = fcr.ctx;
- fcr.srcFile = srcFile;
- return fcr;
-}
-
-static int freeFileCompressionResources(fcResources* fcr)
-{
- int ret = 0;
- waitUntilAllJobsCompleted(fcr->ctx);
- ret |= (fcr->srcFile != NULL) ? fclose(fcr->srcFile) : 0;
- ret |= (fcr->ctx != NULL) ? freeCCtx(fcr->ctx) : 0;
- if (fcr->otArg) {
- ret |= (fcr->otArg->dstFile != stdout) ? fclose(fcr->otArg->dstFile) : 0;
- free(fcr->otArg);
- /* no need to freeCCtx() on otArg->ctx because it should be the same context */
- }
- return ret;
-}
-
-static int compressFilename(const char* const srcFilename, const char* const dstFilenameOrNull)
-{
- int ret = 0;
- fcResources fcr = createFileCompressionResources(srcFilename, dstFilenameOrNull);
- g_streamedSize = 0;
- ret |= performCompression(fcr.ctx, fcr.srcFile, fcr.otArg);
- ret |= freeFileCompressionResources(&fcr);
- return ret;
-}
-
-static int compressFilenames(const char** filenameTable, unsigned numFiles, unsigned forceStdout)
-{
- int ret = 0;
- unsigned fileNum;
- for (fileNum=0; fileNum<numFiles; fileNum++) {
- const char* filename = filenameTable[fileNum];
- if (!forceStdout) {
- ret |= compressFilename(filename, NULL);
- }
- else {
- ret |= compressFilename(filename, stdoutmark);
- }
-
- }
- return ret;
-}
-
-/*! readU32FromChar() :
- @return : unsigned integer value read from input in `char` format
- allows and interprets K, KB, KiB, M, MB and MiB suffix.
- Will also modify `*stringPtr`, advancing it to position where it stopped reading.
- Note : function result can overflow if digit string > MAX_UINT */
-static unsigned readU32FromChar(const char** stringPtr)
-{
- unsigned result = 0;
- while ((**stringPtr >='0') && (**stringPtr <='9'))
- result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
- if ((**stringPtr=='K') || (**stringPtr=='M')) {
- result <<= 10;
- if (**stringPtr=='M') result <<= 10;
- (*stringPtr)++ ;
- if (**stringPtr=='i') (*stringPtr)++;
- if (**stringPtr=='B') (*stringPtr)++;
- }
- return result;
-}
-
-static void help(const char* progPath)
-{
- PRINT("Usage:\n");
- PRINT(" %s [options] [file(s)]\n", progPath);
- PRINT("\n");
- PRINT("Options:\n");
- PRINT(" -oFILE : specify the output file name\n");
- PRINT(" -i# : provide initial compression level -- default %d, must be in the range [L, U] where L and U are bound values (see below for defaults)\n", DEFAULT_COMPRESSION_LEVEL);
- PRINT(" -h : display help/information\n");
- PRINT(" -f : force the compression level to stay constant\n");
- PRINT(" -c : force write to stdout\n");
- PRINT(" -p : hide progress bar\n");
- PRINT(" -q : quiet mode -- do not show progress bar or other information\n");
- PRINT(" -l# : provide lower bound for compression level -- default 1\n");
- PRINT(" -u# : provide upper bound for compression level -- default %u\n", ZSTD_maxCLevel());
-}
-/* return 0 if successful, else return error */
-int main(int argCount, const char* argv[])
-{
- const char* outFilename = NULL;
- const char** filenameTable = (const char**)malloc(argCount*sizeof(const char*));
- unsigned filenameIdx = 0;
- unsigned forceStdout = 0;
- unsigned providedInitialCLevel = 0;
- int ret = 0;
- int argNum;
- filenameTable[0] = stdinmark;
- g_maxCLevel = ZSTD_maxCLevel();
-
- if (filenameTable == NULL) {
- DISPLAY("Error: could not allocate sapce for filename table.\n");
- return 1;
- }
-
- for (argNum=1; argNum<argCount; argNum++) {
- const char* argument = argv[argNum];
-
- /* output filename designated with "-o" */
- if (argument[0]=='-' && strlen(argument) > 1) {
- switch (argument[1]) {
- case 'o':
- argument += 2;
- outFilename = argument;
- break;
- case 'i':
- argument += 2;
- g_compressionLevel = readU32FromChar(&argument);
- providedInitialCLevel = 1;
- break;
- case 'h':
- help(argv[0]);
- goto _main_exit;
- case 'p':
- g_useProgressBar = 0;
- break;
- case 'c':
- forceStdout = 1;
- outFilename = stdoutmark;
- break;
- case 'f':
- g_forceCompressionLevel = 1;
- break;
- case 'q':
- g_useProgressBar = 0;
- g_displayLevel = 0;
- break;
- case 'l':
- argument += 2;
- g_minCLevel = readU32FromChar(&argument);
- break;
- case 'u':
- argument += 2;
- g_maxCLevel = readU32FromChar(&argument);
- break;
- default:
- DISPLAY("Error: invalid argument provided\n");
- ret = 1;
- goto _main_exit;
- }
- continue;
- }
-
- /* regular files to be compressed */
- filenameTable[filenameIdx++] = argument;
- }
-
- /* check initial, max, and min compression levels */
- {
- unsigned const minMaxInconsistent = g_minCLevel > g_maxCLevel;
- unsigned const initialNotInRange = g_minCLevel > g_compressionLevel || g_maxCLevel < g_compressionLevel;
- if (minMaxInconsistent || (initialNotInRange && providedInitialCLevel)) {
- DISPLAY("Error: provided compression level parameters are invalid\n");
- ret = 1;
- goto _main_exit;
- }
- else if (initialNotInRange) {
- g_compressionLevel = g_minCLevel;
- }
- }
-
- /* error checking with number of files */
- if (filenameIdx > 1 && (outFilename != NULL && strcmp(outFilename, stdoutmark))) {
- DISPLAY("Error: multiple input files provided, cannot use specified output file\n");
- ret = 1;
- goto _main_exit;
- }
-
- /* compress files */
- if (filenameIdx <= 1) {
- ret |= compressFilename(filenameTable[0], outFilename);
- }
- else {
- ret |= compressFilenames(filenameTable, filenameIdx, forceStdout);
- }
-_main_exit:
- free(filenameTable);
- return ret;
-}
diff --git a/contrib/adaptive-compression/datagencli.c b/contrib/adaptive-compression/datagencli.c
deleted file mode 100644
index 5f3c93140493..000000000000
--- a/contrib/adaptive-compression/datagencli.c
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-
-
-/*-************************************
-* Dependencies
-**************************************/
-#include "util.h" /* Compiler options */
-#include <stdio.h> /* fprintf, stderr */
-#include "datagen.h" /* RDG_generate */
-
-
-/*-************************************
-* Constants
-**************************************/
-#define KB *(1 <<10)
-#define MB *(1 <<20)
-#define GB *(1U<<30)
-
-#define SIZE_DEFAULT ((64 KB) + 1)
-#define SEED_DEFAULT 0
-#define COMPRESSIBILITY_DEFAULT 50
-
-
-/*-************************************
-* Macros
-**************************************/
-#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
-#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
-static unsigned displayLevel = 2;
-
-
-/*-*******************************************************
-* Command line
-*********************************************************/
-static int usage(const char* programName)
-{
- DISPLAY( "Compressible data generator\n");
- DISPLAY( "Usage :\n");
- DISPLAY( " %s [args]\n", programName);
- DISPLAY( "\n");
- DISPLAY( "Arguments :\n");
- DISPLAY( " -g# : generate # data (default:%i)\n", SIZE_DEFAULT);
- DISPLAY( " -s# : Select seed (default:%i)\n", SEED_DEFAULT);
- DISPLAY( " -P# : Select compressibility in %% (default:%i%%)\n",
- COMPRESSIBILITY_DEFAULT);
- DISPLAY( " -h : display help and exit\n");
- return 0;
-}
-
-
-int main(int argc, const char** argv)
-{
- unsigned probaU32 = COMPRESSIBILITY_DEFAULT;
- double litProba = 0.0;
- U64 size = SIZE_DEFAULT;
- U32 seed = SEED_DEFAULT;
- const char* const programName = argv[0];
-
- int argNb;
- for(argNb=1; argNb<argc; argNb++) {
- const char* argument = argv[argNb];
-
- if(!argument) continue; /* Protection if argument empty */
-
- /* Handle commands. Aggregated commands are allowed */
- if (*argument=='-') {
- argument++;
- while (*argument!=0) {
- switch(*argument)
- {
- case 'h':
- return usage(programName);
- case 'g':
- argument++;
- size=0;
- while ((*argument>='0') && (*argument<='9'))
- size *= 10, size += *argument++ - '0';
- if (*argument=='K') { size <<= 10; argument++; }
- if (*argument=='M') { size <<= 20; argument++; }
- if (*argument=='G') { size <<= 30; argument++; }
- if (*argument=='B') { argument++; }
- break;
- case 's':
- argument++;
- seed=0;
- while ((*argument>='0') && (*argument<='9'))
- seed *= 10, seed += *argument++ - '0';
- break;
- case 'P':
- argument++;
- probaU32 = 0;
- while ((*argument>='0') && (*argument<='9'))
- probaU32 *= 10, probaU32 += *argument++ - '0';
- if (probaU32>100) probaU32 = 100;
- break;
- case 'L': /* hidden argument : Literal distribution probability */
- argument++;
- litProba=0.;
- while ((*argument>='0') && (*argument<='9'))
- litProba *= 10, litProba += *argument++ - '0';
- if (litProba>100.) litProba=100.;
- litProba /= 100.;
- break;
- case 'v':
- displayLevel = 4;
- argument++;
- break;
- default:
- return usage(programName);
- }
- } } } /* for(argNb=1; argNb<argc; argNb++) */
-
- DISPLAYLEVEL(4, "Compressible data Generator \n");
- if (probaU32!=COMPRESSIBILITY_DEFAULT)
- DISPLAYLEVEL(3, "Compressibility : %i%%\n", probaU32);
- DISPLAYLEVEL(3, "Seed = %u \n", (unsigned)seed);
-
- RDG_genStdout(size, (double)probaU32/100, litProba, seed);
- DISPLAYLEVEL(1, "\n");
-
- return 0;
-}
diff --git a/contrib/adaptive-compression/test-correctness.sh b/contrib/adaptive-compression/test-correctness.sh
deleted file mode 100755
index 3bea867b9cc7..000000000000
--- a/contrib/adaptive-compression/test-correctness.sh
+++ /dev/null
@@ -1,252 +0,0 @@
-echo "correctness tests -- general"
-./datagen -s1 -g1GB > tmp
-./adapt -otmp.zst tmp
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s2 -g500MB > tmp
-./adapt -otmp.zst tmp
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s3 -g250MB > tmp
-./adapt -otmp.zst tmp
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s4 -g125MB > tmp
-./adapt -otmp.zst tmp
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s5 -g50MB > tmp
-./adapt -otmp.zst tmp
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s6 -g25MB > tmp
-./adapt -otmp.zst tmp
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s7 -g10MB > tmp
-./adapt -otmp.zst tmp
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s8 -g5MB > tmp
-./adapt -otmp.zst tmp
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s9 -g500KB > tmp
-./adapt -otmp.zst tmp
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-echo -e "\ncorrectness tests -- streaming"
-./datagen -s10 -g1GB > tmp
-cat tmp | ./adapt > tmp.zst
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s11 -g100MB > tmp
-cat tmp | ./adapt > tmp.zst
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s12 -g10MB > tmp
-cat tmp | ./adapt > tmp.zst
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s13 -g1MB > tmp
-cat tmp | ./adapt > tmp.zst
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s14 -g100KB > tmp
-cat tmp | ./adapt > tmp.zst
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s15 -g10KB > tmp
-cat tmp | ./adapt > tmp.zst
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-echo -e "\ncorrectness tests -- read limit"
-./datagen -s16 -g1GB > tmp
-pv -L 50m -q tmp | ./adapt > tmp.zst
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s17 -g100MB > tmp
-pv -L 50m -q tmp | ./adapt > tmp.zst
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s18 -g10MB > tmp
-pv -L 50m -q tmp | ./adapt > tmp.zst
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s19 -g1MB > tmp
-pv -L 50m -q tmp | ./adapt > tmp.zst
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s20 -g100KB > tmp
-pv -L 50m -q tmp | ./adapt > tmp.zst
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s21 -g10KB > tmp
-pv -L 50m -q tmp | ./adapt > tmp.zst
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-echo -e "\ncorrectness tests -- write limit"
-./datagen -s22 -g1GB > tmp
-pv -q tmp | ./adapt | pv -L 5m -q > tmp.zst
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s23 -g100MB > tmp
-pv -q tmp | ./adapt | pv -L 5m -q > tmp.zst
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s24 -g10MB > tmp
-pv -q tmp | ./adapt | pv -L 5m -q > tmp.zst
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s25 -g1MB > tmp
-pv -q tmp | ./adapt | pv -L 5m -q > tmp.zst
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s26 -g100KB > tmp
-pv -q tmp | ./adapt | pv -L 5m -q > tmp.zst
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s27 -g10KB > tmp
-pv -q tmp | ./adapt | pv -L 5m -q > tmp.zst
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-echo -e "\ncorrectness tests -- read and write limits"
-./datagen -s28 -g1GB > tmp
-pv -L 50m -q tmp | ./adapt | pv -L 5m -q > tmp.zst
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s29 -g100MB > tmp
-pv -L 50m -q tmp | ./adapt | pv -L 5m -q > tmp.zst
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s30 -g10MB > tmp
-pv -L 50m -q tmp | ./adapt | pv -L 5m -q > tmp.zst
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s31 -g1MB > tmp
-pv -L 50m -q tmp | ./adapt | pv -L 5m -q > tmp.zst
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s32 -g100KB > tmp
-pv -L 50m -q tmp | ./adapt | pv -L 5m -q > tmp.zst
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s33 -g10KB > tmp
-pv -L 50m -q tmp | ./adapt | pv -L 5m -q > tmp.zst
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-echo -e "\ncorrectness tests -- forced compression level"
-./datagen -s34 -g1GB > tmp
-./adapt tmp -otmp.zst -i11 -f
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s35 -g100MB > tmp
-./adapt tmp -otmp.zst -i11 -f
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s36 -g10MB > tmp
-./adapt tmp -otmp.zst -i11 -f
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s37 -g1MB > tmp
-./adapt tmp -otmp.zst -i11 -f
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s38 -g100KB > tmp
-./adapt tmp -otmp.zst -i11 -f
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-./datagen -s39 -g10KB > tmp
-./adapt tmp -otmp.zst -i11 -f
-zstd -d tmp.zst -o tmp2
-diff -s -q tmp tmp2
-rm tmp*
-
-echo -e "\ncorrectness tests -- window size test"
-./datagen -s39 -g1GB | pv -L 25m -q | ./adapt -i1 | pv -q > tmp.zst
-zstd -d tmp.zst
-rm tmp*
-
-echo -e "\ncorrectness tests -- testing bounds"
-./datagen -s40 -g1GB | pv -L 25m -q | ./adapt -i1 -u4 | pv -q > tmp.zst
-rm tmp*
-
-./datagen -s41 -g1GB | ./adapt -i14 -l4 > tmp.zst
-rm tmp*
-make clean
diff --git a/contrib/adaptive-compression/test-performance.sh b/contrib/adaptive-compression/test-performance.sh
deleted file mode 100755
index 958cb3cc8cba..000000000000
--- a/contrib/adaptive-compression/test-performance.sh
+++ /dev/null
@@ -1,59 +0,0 @@
-echo "testing time -- no limits set"
-./datagen -s1 -g1GB > tmp
-time ./adapt -otmp1.zst tmp
-time zstd -1 -o tmp2.zst tmp
-rm tmp*
-
-./datagen -s2 -g2GB > tmp
-time ./adapt -otmp1.zst tmp
-time zstd -1 -o tmp2.zst tmp
-rm tmp*
-
-./datagen -s3 -g4GB > tmp
-time ./adapt -otmp1.zst tmp
-time zstd -1 -o tmp2.zst tmp
-rm tmp*
-
-echo -e "\ntesting compression ratio -- no limits set"
-./datagen -s4 -g1GB > tmp
-time ./adapt -otmp1.zst tmp
-time zstd -1 -o tmp2.zst tmp
-ls -l tmp1.zst tmp2.zst
-rm tmp*
-
-./datagen -s5 -g2GB > tmp
-time ./adapt -otmp1.zst tmp
-time zstd -1 -o tmp2.zst tmp
-ls -l tmp1.zst tmp2.zst
-rm tmp*
-
-./datagen -s6 -g4GB > tmp
-time ./adapt -otmp1.zst tmp
-time zstd -1 -o tmp2.zst tmp
-ls -l tmp1.zst tmp2.zst
-rm tmp*
-
-echo e "\ntesting performance at various compression levels -- no limits set"
-./datagen -s7 -g1GB > tmp
-echo "adapt"
-time ./adapt -i5 -f tmp -otmp1.zst
-echo "zstdcli"
-time zstd -5 tmp -o tmp2.zst
-ls -l tmp1.zst tmp2.zst
-rm tmp*
-
-./datagen -s8 -g1GB > tmp
-echo "adapt"
-time ./adapt -i10 -f tmp -otmp1.zst
-echo "zstdcli"
-time zstd -10 tmp -o tmp2.zst
-ls -l tmp1.zst tmp2.zst
-rm tmp*
-
-./datagen -s9 -g1GB > tmp
-echo "adapt"
-time ./adapt -i15 -f tmp -otmp1.zst
-echo "zstdcli"
-time zstd -15 tmp -o tmp2.zst
-ls -l tmp1.zst tmp2.zst
-rm tmp*
diff --git a/contrib/gen_html/.gitignore b/contrib/gen_html/.gitignore
deleted file mode 100644
index 34461142872a..000000000000
--- a/contrib/gen_html/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-# make artefact
-gen_html
-zstd_manual.html
diff --git a/contrib/pzstd/.gitignore b/contrib/pzstd/.gitignore
deleted file mode 100644
index 84e68fb07d50..000000000000
--- a/contrib/pzstd/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-# compilation result
-pzstd
diff --git a/contrib/seekable_format/examples/.gitignore b/contrib/seekable_format/examples/.gitignore
deleted file mode 100644
index 0b83f5e11c6c..000000000000
--- a/contrib/seekable_format/examples/.gitignore
+++ /dev/null
@@ -1,5 +0,0 @@
-seekable_compression
-seekable_decompression
-seekable_decompression_mem
-parallel_processing
-parallel_compression
diff --git a/doc/educational_decoder/Makefile b/doc/educational_decoder/Makefile
index c1d2c4cc42fa..704f867661a7 100644
--- a/doc/educational_decoder/Makefile
+++ b/doc/educational_decoder/Makefile
@@ -1,15 +1,33 @@
+# ################################################################
+# Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# ################################################################
+
+ZSTD ?= zstd # note: requires zstd installation on local system
+
+UNAME?= $(shell uname)
+ifeq ($(UNAME), SunOS)
+DIFF ?= gdiff
+else
+DIFF ?= diff
+endif
+
HARNESS_FILES=*.c
MULTITHREAD_LDFLAGS = -pthread
DEBUGFLAGS= -g -DZSTD_DEBUG=1
CPPFLAGS += -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
-I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR)
-CFLAGS ?= -O3
+CFLAGS ?= -O2
CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
- -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
- -Wstrict-prototypes -Wundef \
+ -Wstrict-aliasing=1 -Wswitch-enum \
+ -Wredundant-decls -Wstrict-prototypes -Wundef \
-Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
- -Wredundant-decls
+ -std=c99
CFLAGS += $(DEBUGFLAGS)
CFLAGS += $(MOREFLAGS)
FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MULTITHREAD_LDFLAGS)
@@ -18,17 +36,27 @@ harness: $(HARNESS_FILES)
$(CC) $(FLAGS) $^ -o $@
clean:
- @$(RM) -f harness
- @$(RM) -rf harness.dSYM
+ @$(RM) harness
+ @$(RM) -rf harness.dSYM # MacOS specific
test: harness
- @zstd README.md -o tmp.zst
+ #
+ # Testing single-file decompression with educational decoder
+ #
+ @$(ZSTD) -f README.md -o tmp.zst
@./harness tmp.zst tmp
- @diff -s tmp README.md
- @$(RM) -f tmp*
- @zstd --train harness.c zstd_decompress.c zstd_decompress.h README.md
- @zstd -D dictionary README.md -o tmp.zst
+ @$(DIFF) -s tmp README.md
+ @$(RM) tmp*
+ #
+ # Testing dictionary decompression with education decoder
+ #
+ # note : files are presented multiple for training, to reach minimum threshold
+ @$(ZSTD) --train harness.c zstd_decompress.c zstd_decompress.h README.md \
+ harness.c zstd_decompress.c zstd_decompress.h README.md \
+ harness.c zstd_decompress.c zstd_decompress.h README.md \
+ -o dictionary
+ @$(ZSTD) -f README.md -D dictionary -o tmp.zst
@./harness tmp.zst tmp dictionary
- @diff -s tmp README.md
- @$(RM) -f tmp* dictionary
- @make clean
+ @$(DIFF) -s tmp README.md
+ @$(RM) tmp* dictionary
+ @$(MAKE) clean
diff --git a/doc/educational_decoder/harness.c b/doc/educational_decoder/harness.c
index 47882b168953..a704f6bdb29f 100644
--- a/doc/educational_decoder/harness.c
+++ b/doc/educational_decoder/harness.c
@@ -21,88 +21,90 @@ typedef unsigned char u8;
// Protect against allocating too much memory for output
#define MAX_OUTPUT_SIZE ((size_t)1024 * 1024 * 1024)
-u8 *input;
-u8 *output;
-u8 *dict;
-
-size_t read_file(const char *path, u8 **ptr) {
- FILE *f = fopen(path, "rb");
+static size_t read_file(const char *path, u8 **ptr)
+{
+ FILE* const f = fopen(path, "rb");
if (!f) {
- fprintf(stderr, "failed to open file %s\n", path);
+ fprintf(stderr, "failed to open file %s \n", path);
exit(1);
}
fseek(f, 0L, SEEK_END);
- size_t size = ftell(f);
+ size_t const size = (size_t)ftell(f);
rewind(f);
*ptr = malloc(size);
if (!ptr) {
- fprintf(stderr, "failed to allocate memory to hold %s\n", path);
+ fprintf(stderr, "failed to allocate memory to hold %s \n", path);
exit(1);
}
- size_t pos = 0;
- while (!feof(f)) {
- size_t read = fread(&(*ptr)[pos], 1, size, f);
- if (ferror(f)) {
- fprintf(stderr, "error while reading file %s\n", path);
- exit(1);
- }
- pos += read;
+ size_t const read = fread(*ptr, 1, size, f);
+ if (read != size) { /* must read everything in one pass */
+ fprintf(stderr, "error while reading file %s \n", path);
+ exit(1);
}
fclose(f);
- return pos;
+ return read;
}
-void write_file(const char *path, const u8 *ptr, size_t size) {
- FILE *f = fopen(path, "wb");
+static void write_file(const char *path, const u8 *ptr, size_t size)
+{
+ FILE* const f = fopen(path, "wb");
+ if (!f) {
+ fprintf(stderr, "failed to open file %s \n", path);
+ exit(1);
+ }
size_t written = 0;
while (written < size) {
- written += fwrite(&ptr[written], 1, size, f);
+ written += fwrite(ptr+written, 1, size, f);
if (ferror(f)) {
fprintf(stderr, "error while writing file %s\n", path);
exit(1);
- }
- }
+ } }
fclose(f);
}
-int main(int argc, char **argv) {
+int main(int argc, char **argv)
+{
if (argc < 3) {
- fprintf(stderr, "usage: %s <file.zst> <out_path> [dictionary]\n",
+ fprintf(stderr, "usage: %s <file.zst> <out_path> [dictionary] \n",
argv[0]);
return 1;
}
- size_t input_size = read_file(argv[1], &input);
+ u8* input;
+ size_t const input_size = read_file(argv[1], &input);
+
+ u8* dict = NULL;
size_t dict_size = 0;
if (argc >= 4) {
dict_size = read_file(argv[3], &dict);
}
- size_t decompressed_size = ZSTD_get_decompressed_size(input, input_size);
- if (decompressed_size == (size_t)-1) {
- decompressed_size = MAX_COMPRESSION_RATIO * input_size;
+ size_t out_capacity = ZSTD_get_decompressed_size(input, input_size);
+ if (out_capacity == (size_t)-1) {
+ out_capacity = MAX_COMPRESSION_RATIO * input_size;
fprintf(stderr, "WARNING: Compressed data does not contain "
"decompressed size, going to assume the compression "
"ratio is at most %d (decompressed size of at most "
- "%zu)\n",
- MAX_COMPRESSION_RATIO, decompressed_size);
+ "%u) \n",
+ MAX_COMPRESSION_RATIO, (unsigned)out_capacity);
}
- if (decompressed_size > MAX_OUTPUT_SIZE) {
+ if (out_capacity > MAX_OUTPUT_SIZE) {
fprintf(stderr,
- "Required output size too large for this implementation\n");
+ "Required output size too large for this implementation \n");
return 1;
}
- output = malloc(decompressed_size);
+
+ u8* const output = malloc(out_capacity);
if (!output) {
- fprintf(stderr, "failed to allocate memory\n");
+ fprintf(stderr, "failed to allocate memory \n");
return 1;
}
@@ -110,16 +112,17 @@ int main(int argc, char **argv) {
if (dict) {
parse_dictionary(parsed_dict, dict, dict_size);
}
- size_t decompressed =
- ZSTD_decompress_with_dict(output, decompressed_size,
- input, input_size, parsed_dict);
+ size_t const decompressed_size =
+ ZSTD_decompress_with_dict(output, out_capacity,
+ input, input_size,
+ parsed_dict);
free_dictionary(parsed_dict);
- write_file(argv[2], output, decompressed);
+ write_file(argv[2], output, decompressed_size);
free(input);
free(output);
free(dict);
- input = output = dict = NULL;
+ return 0;
}
diff --git a/doc/educational_decoder/zstd_decompress.c b/doc/educational_decoder/zstd_decompress.c
index 8e231bbb5a98..64e1b8738d06 100644
--- a/doc/educational_decoder/zstd_decompress.c
+++ b/doc/educational_decoder/zstd_decompress.c
@@ -395,7 +395,7 @@ size_t ZSTD_decompress_with_dict(void *const dst, const size_t dst_len,
/* this decoder assumes decompression of a single frame */
decode_frame(&out, &in, parsed_dict);
- return out.ptr - (u8 *)dst;
+ return (size_t)(out.ptr - (u8 *)dst);
}
/******* FRAME DECODING ******************************************************/
@@ -416,7 +416,7 @@ static void decompress_data(frame_context_t *const ctx, ostream_t *const out,
static void decode_frame(ostream_t *const out, istream_t *const in,
const dictionary_t *const dict) {
- const u32 magic_number = IO_read_bits(in, 32);
+ const u32 magic_number = (u32)IO_read_bits(in, 32);
// Zstandard frame
//
// "Magic_Number
@@ -497,7 +497,7 @@ static void parse_frame_header(frame_header_t *const header,
// 3 Reserved_bit
// 2 Content_Checksum_flag
// 1-0 Dictionary_ID_flag"
- const u8 descriptor = IO_read_bits(in, 8);
+ const u8 descriptor = (u8)IO_read_bits(in, 8);
// decode frame header descriptor into flags
const u8 frame_content_size_flag = descriptor >> 6;
@@ -521,7 +521,7 @@ static void parse_frame_header(frame_header_t *const header,
//
// Bit numbers 7-3 2-0
// Field name Exponent Mantissa"
- u8 window_descriptor = IO_read_bits(in, 8);
+ u8 window_descriptor = (u8)IO_read_bits(in, 8);
u8 exponent = window_descriptor >> 3;
u8 mantissa = window_descriptor & 7;
@@ -541,7 +541,7 @@ static void parse_frame_header(frame_header_t *const header,
const int bytes_array[] = {0, 1, 2, 4};
const int bytes = bytes_array[dictionary_id_flag];
- header->dictionary_id = IO_read_bits(in, bytes * 8);
+ header->dictionary_id = (u32)IO_read_bits(in, bytes * 8);
} else {
header->dictionary_id = 0;
}
@@ -633,8 +633,8 @@ static void decompress_data(frame_context_t *const ctx, ostream_t *const out,
//
// The next 2 bits represent the Block_Type, while the remaining 21 bits
// represent the Block_Size. Format is little-endian."
- last_block = IO_read_bits(in, 1);
- const int block_type = IO_read_bits(in, 2);
+ last_block = (int)IO_read_bits(in, 1);
+ const int block_type = (int)IO_read_bits(in, 2);
const size_t block_len = IO_read_bits(in, 21);
switch (block_type) {
@@ -748,8 +748,8 @@ static size_t decode_literals(frame_context_t *const ctx, istream_t *const in,
// types"
//
// size_format takes between 1 and 2 bits
- int block_type = IO_read_bits(in, 2);
- int size_format = IO_read_bits(in, 2);
+ int block_type = (int)IO_read_bits(in, 2);
+ int size_format = (int)IO_read_bits(in, 2);
if (block_type <= 1) {
// Raw or RLE literals block
@@ -833,6 +833,7 @@ static size_t decode_literals_compressed(frame_context_t *const ctx,
// bits (0-1023)."
num_streams = 1;
// Fall through as it has the same size format
+ /* fallthrough */
case 1:
// "4 streams. Both Compressed_Size and Regenerated_Size use 10 bits
// (0-1023)."
@@ -855,8 +856,7 @@ static size_t decode_literals_compressed(frame_context_t *const ctx,
// Impossible
IMPOSSIBLE();
}
- if (regenerated_size > MAX_LITERALS_SIZE ||
- compressed_size >= regenerated_size) {
+ if (regenerated_size > MAX_LITERALS_SIZE) {
CORRUPTION();
}
@@ -1005,7 +1005,7 @@ static const i16 SEQ_MATCH_LENGTH_DEFAULT_DIST[53] = {
static const u32 SEQ_LITERAL_LENGTH_BASELINES[36] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 18, 20, 22, 24, 28, 32, 40,
- 48, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65538};
+ 48, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536};
static const u8 SEQ_LITERAL_LENGTH_EXTRA_BITS[36] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
@@ -1021,7 +1021,7 @@ static const u8 SEQ_MATCH_LENGTH_EXTRA_BITS[53] = {
2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
/// Offset decoding is simpler so we just need a maximum code value
-static const u8 SEQ_MAX_CODES[3] = {35, -1, 52};
+static const u8 SEQ_MAX_CODES[3] = {35, (u8)-1, 52};
static void decompress_sequences(frame_context_t *const ctx,
istream_t *const in,
@@ -1132,7 +1132,7 @@ static void decompress_sequences(frame_context_t *const ctx, istream_t *in,
// a single 1-bit and then fills the byte with 0-7 0 bits of padding."
const int padding = 8 - highest_set_bit(src[len - 1]);
// The offset starts at the end because FSE streams are read backwards
- i64 bit_offset = len * 8 - padding;
+ i64 bit_offset = (i64)(len * 8 - (size_t)padding);
// "The bitstream starts with initial state values, each using the required
// number of bits in their respective accuracy, decoded previously from
@@ -1409,7 +1409,7 @@ size_t ZSTD_get_decompressed_size(const void *src, const size_t src_len) {
// get decompressed size from ZSTD frame header
{
- const u32 magic_number = IO_read_bits(&in, 32);
+ const u32 magic_number = (u32)IO_read_bits(&in, 32);
if (magic_number == 0xFD2FB528U) {
// ZSTD frame
@@ -1418,7 +1418,7 @@ size_t ZSTD_get_decompressed_size(const void *src, const size_t src_len) {
if (header.frame_content_size == 0 && !header.single_segment_flag) {
// Content size not provided, we can't tell
- return -1;
+ return (size_t)-1;
}
return header.frame_content_size;
@@ -1529,7 +1529,7 @@ void free_dictionary(dictionary_t *const dict) {
/******* END DICTIONARY PARSING ***********************************************/
/******* IO STREAM OPERATIONS *************************************************/
-#define UNALIGNED() ERROR("Attempting to operate on a non-byte aligned stream")
+
/// Reads `num` bits from a bitstream, and updates the internal offset
static inline u64 IO_read_bits(istream_t *const in, const int num_bits) {
if (num_bits > 64 || num_bits <= 0) {
@@ -1608,7 +1608,7 @@ static inline const u8 *IO_get_read_ptr(istream_t *const in, size_t len) {
INP_SIZE();
}
if (in->bit_offset != 0) {
- UNALIGNED();
+ ERROR("Attempting to operate on a non-byte aligned stream");
}
const u8 *const ptr = in->ptr;
in->ptr += len;
@@ -1634,7 +1634,7 @@ static inline void IO_advance_input(istream_t *const in, size_t len) {
INP_SIZE();
}
if (in->bit_offset != 0) {
- UNALIGNED();
+ ERROR("Attempting to operate on a non-byte aligned stream");
}
in->ptr += len;
diff --git a/doc/educational_decoder/zstd_decompress.h b/doc/educational_decoder/zstd_decompress.h
index a01fde331fb8..74b18533850a 100644
--- a/doc/educational_decoder/zstd_decompress.h
+++ b/doc/educational_decoder/zstd_decompress.h
@@ -7,6 +7,8 @@
* in the COPYING file in the root directory of this source tree).
*/
+#include <stddef.h> /* size_t */
+
/******* EXPOSED TYPES ********************************************************/
/*
* Contains the parsed contents of a dictionary
@@ -39,7 +41,7 @@ size_t ZSTD_get_decompressed_size(const void *const src, const size_t src_len);
* Return a valid dictionary_t pointer for use with dictionary initialization
* or decompression
*/
-dictionary_t* create_dictionary();
+dictionary_t* create_dictionary(void);
/*
* Parse a provided dictionary blob for use in decompression
diff --git a/doc/zstd_compression_format.md b/doc/zstd_compression_format.md
index 7d02426a50a0..90ac0fe9bcd5 100644
--- a/doc/zstd_compression_format.md
+++ b/doc/zstd_compression_format.md
@@ -16,7 +16,7 @@ Distribution of this document is unlimited.
### Version
-0.3.2 (17/07/19)
+0.3.4 (16/08/19)
Introduction
@@ -358,6 +358,7 @@ It may be followed by an optional `Content_Checksum`
__`Block_Type`__
The next 2 bits represent the `Block_Type`.
+`Block_Type` influences the meaning of `Block_Size`.
There are 4 block types :
| Value | 0 | 1 | 2 | 3 |
@@ -384,9 +385,12 @@ There are 4 block types :
__`Block_Size`__
The upper 21 bits of `Block_Header` represent the `Block_Size`.
-`Block_Size` is the size of the block excluding the header.
-A block can contain any number of bytes (even zero), up to
-`Block_Maximum_Decompressed_Size`, which is the smallest of:
+When `Block_Type` is `Compressed_Block` or `Raw_Block`,
+`Block_Size` is the size of `Block_Content`, hence excluding `Block_Header`.
+When `Block_Type` is `RLE_Block`, `Block_Content`’s size is always 1,
+and `Block_Size` represents the number of times this byte must be repeated.
+A block can contain and decompress into any number of bytes (even zero),
+up to `Block_Maximum_Decompressed_Size`, which is the smallest of:
- Window_Size
- 128 KB
@@ -1103,18 +1107,18 @@ It follows the following build rule :
The table has a size of `Table_Size = 1 << Accuracy_Log`.
Each cell describes the symbol decoded,
-and instructions to get the next state.
+and instructions to get the next state (`Number_of_Bits` and `Baseline`).
Symbols are scanned in their natural order for "less than 1" probabilities.
Symbols with this probability are being attributed a single cell,
starting from the end of the table and retreating.
These symbols define a full state reset, reading `Accuracy_Log` bits.
-All remaining symbols are allocated in their natural order.
-Starting from symbol `0` and table position `0`,
+Then, all remaining symbols, sorted in natural order, are allocated cells.
+Starting from symbol `0` (if it exists), and table position `0`,
each symbol gets allocated as many cells as its probability.
Cell allocation is spreaded, not linear :
-each successor position follow this rule :
+each successor position follows this rule :
```
position += (tableSize>>1) + (tableSize>>3) + 3;
@@ -1126,40 +1130,41 @@ A position is skipped if already occupied by a "less than 1" probability symbol.
each position in the table, switching to the next symbol when enough
states have been allocated to the current one.
-The result is a list of state values.
-Each state will decode the current symbol.
+The process guarantees that the table is entirely filled.
+Each cell corresponds to a state value, which contains the symbol being decoded.
-To get the `Number_of_Bits` and `Baseline` required for next state,
-it's first necessary to sort all states in their natural order.
-The lower states will need 1 more bit than higher ones.
+To add the `Number_of_Bits` and `Baseline` required to retrieve next state,
+it's first necessary to sort all occurrences of each symbol in state order.
+Lower states will need 1 more bit than higher ones.
The process is repeated for each symbol.
__Example__ :
-Presuming a symbol has a probability of 5.
-It receives 5 state values. States are sorted in natural order.
+Presuming a symbol has a probability of 5,
+it receives 5 cells, corresponding to 5 state values.
+These state values are then sorted in natural order.
-Next power of 2 is 8.
-Space of probabilities is divided into 8 equal parts.
-Presuming the `Accuracy_Log` is 7, it defines 128 states.
+Next power of 2 after 5 is 8.
+Space of probabilities must be divided into 8 equal parts.
+Presuming the `Accuracy_Log` is 7, it defines a space of 128 states.
Divided by 8, each share is 16 large.
-In order to reach 8, 8-5=3 lowest states will count "double",
-doubling the number of shares (32 in width),
-requiring one more bit in the process.
+In order to reach 8 shares, 8-5=3 lowest states will count "double",
+doubling their shares (32 in width), hence requiring one more bit.
Baseline is assigned starting from the higher states using fewer bits,
-and proceeding naturally, then resuming at the first state,
-each takes its allocated width from Baseline.
-
-| state order | 0 | 1 | 2 | 3 | 4 |
-| ---------------- | ----- | ----- | ------ | ---- | ----- |
-| width | 32 | 32 | 32 | 16 | 16 |
-| `Number_of_Bits` | 5 | 5 | 5 | 4 | 4 |
-| range number | 2 | 4 | 6 | 0 | 1 |
-| `Baseline` | 32 | 64 | 96 | 0 | 16 |
-| range | 32-63 | 64-95 | 96-127 | 0-15 | 16-31 |
-
-The next state is determined from current state
+increasing at each state, then resuming at the first state,
+each state takes its allocated width from Baseline.
+
+| state value | 1 | 39 | 77 | 84 | 122 |
+| state order | 0 | 1 | 2 | 3 | 4 |
+| ---------------- | ----- | ----- | ------ | ---- | ------ |
+| width | 32 | 32 | 32 | 16 | 16 |
+| `Number_of_Bits` | 5 | 5 | 5 | 4 | 4 |
+| range number | 2 | 4 | 6 | 0 | 1 |
+| `Baseline` | 32 | 64 | 96 | 0 | 16 |
+| range | 32-63 | 64-95 | 96-127 | 0-15 | 16-31 |
+
+During decoding, the next state value is determined from current state value,
by reading the required `Number_of_Bits`, and adding the specified `Baseline`.
See [Appendix A] for the results of this process applied to the default distributions.
@@ -1653,6 +1658,8 @@ or at least provide a meaningful error code explaining for which reason it canno
Version changes
---------------
+- 0.3.4 : clarifications for FSE decoding table
+- 0.3.3 : clarifications for field Block_Size
- 0.3.2 : remove additional block size restriction on compressed blocks
- 0.3.1 : minor clarification regarding offset history update rules
- 0.3.0 : minor edits to match RFC8478
diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html
index 6d8e74fe5274..43c5555b8ca8 100644
--- a/doc/zstd_manual.html
+++ b/doc/zstd_manual.html
@@ -1,10 +1,10 @@
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<title>zstd 1.4.2 Manual</title>
+<title>zstd 1.4.4 Manual</title>
</head>
<body>
-<h1>zstd 1.4.2 Manual</h1>
+<h1>zstd 1.4.4 Manual</h1>
<hr>
<a name="Contents"></a><h2>Contents</h2>
<ol>
@@ -27,10 +27,16 @@
<li><a href="#Chapter17">Advanced compression functions</a></li>
<li><a href="#Chapter18">Advanced decompression functions</a></li>
<li><a href="#Chapter19">Advanced streaming functions</a></li>
-<li><a href="#Chapter20">Buffer-less and synchronous inner streaming functions</a></li>
-<li><a href="#Chapter21">Buffer-less streaming compression (synchronous mode)</a></li>
-<li><a href="#Chapter22">Buffer-less streaming decompression (synchronous mode)</a></li>
-<li><a href="#Chapter23">Block level API</a></li>
+<li><a href="#Chapter20">! ZSTD_initCStream_usingDict() :</a></li>
+<li><a href="#Chapter21">! ZSTD_initCStream_advanced() :</a></li>
+<li><a href="#Chapter22">! ZSTD_initCStream_usingCDict() :</a></li>
+<li><a href="#Chapter23">! ZSTD_initCStream_usingCDict_advanced() :</a></li>
+<li><a href="#Chapter24">This function is deprecated, and is equivalent to:</a></li>
+<li><a href="#Chapter25">This function is deprecated, and is equivalent to:</a></li>
+<li><a href="#Chapter26">Buffer-less and synchronous inner streaming functions</a></li>
+<li><a href="#Chapter27">Buffer-less streaming compression (synchronous mode)</a></li>
+<li><a href="#Chapter28">Buffer-less streaming decompression (synchronous mode)</a></li>
+<li><a href="#Chapter29">Block level API</a></li>
</ol>
<hr>
<a name="Chapter1"></a><h2>Introduction</h2><pre>
@@ -157,9 +163,13 @@ size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx);
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
int compressionLevel);
-</b><p> Same as ZSTD_compress(), using an explicit ZSTD_CCtx
- The function will compress at requested compression level,
- ignoring any other parameter
+</b><p> Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
+ Important : in order to behave similarly to `ZSTD_compress()`,
+ this function compresses at requested compression level,
+ __ignoring any other parameter__ .
+ If any advanced parameter was set using the advanced API,
+ they will all be reset. Only `compressionLevel` remains.
+
</p></pre><BR>
<h3>Decompression context</h3><pre> When decompressing many times,
@@ -199,18 +209,26 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
</b>/* compression parameters<b>
* Note: When compressing with a ZSTD_CDict these parameters are superseded
- * by the parameters used to construct the ZSTD_CDict. See ZSTD_CCtx_refCDict()
- * for more info (superseded-by-cdict). */
- ZSTD_c_compressionLevel=100, </b>/* Update all compression parameters according to pre-defined cLevel table<b>
+ * by the parameters used to construct the ZSTD_CDict.
+ * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */
+ ZSTD_c_compressionLevel=100, </b>/* Set compression parameters according to pre-defined cLevel table.<b>
+ * Note that exact compression parameters are dynamically determined,
+ * depending on both compression level and srcSize (when known).
* Default level is ZSTD_CLEVEL_DEFAULT==3.
* Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT.
* Note 1 : it's possible to pass a negative compression level.
- * Note 2 : setting a level sets all default values of other compression parameters */
+ * Note 2 : setting a level resets all other compression parameters to default */
+ </b>/* Advanced compression parameters :<b>
+ * It's possible to pin down compression parameters to some specific values.
+ * In which case, these values are no longer dynamically selected by the compressor */
ZSTD_c_windowLog=101, </b>/* Maximum allowed back-reference distance, expressed as power of 2.<b>
+ * This will set a memory budget for streaming decompression,
+ * with larger values requiring more memory
+ * and typically compressing more.
* Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
* Special: value 0 means "use default windowLog".
* Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT
- * requires explicitly allowing such window size at decompression stage if using streaming. */
+ * requires explicitly allowing such size at streaming decompression stage. */
ZSTD_c_hashLog=102, </b>/* Size of the initial probe table, as a power of 2.<b>
* Resulting memory usage is (1 << (hashLog+2)).
* Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
@@ -221,13 +239,13 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
* Resulting memory usage is (1 << (chainLog+2)).
* Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX.
* Larger tables result in better and slower compression.
- * This parameter is useless when using "fast" strategy.
+ * This parameter is useless for "fast" strategy.
* It's still useful when using "dfast" strategy,
* in which case it defines a secondary probe table.
* Special: value 0 means "use default chainLog". */
ZSTD_c_searchLog=104, </b>/* Number of search attempts, as a power of 2.<b>
* More attempts result in better and slower compression.
- * This parameter is useless when using "fast" and "dFast" strategies.
+ * This parameter is useless for "fast" and "dFast" strategies.
* Special: value 0 means "use default searchLog". */
ZSTD_c_minMatch=105, </b>/* Minimum size of searched matches.<b>
* Note that Zstandard can still find matches of smaller size,
@@ -282,7 +300,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
ZSTD_c_contentSizeFlag=200, </b>/* Content size will be written into frame header _whenever known_ (default:1)<b>
* Content size must be known at the beginning of compression.
* This is automatically the case when using ZSTD_compress2(),
- * For streaming variants, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */
+ * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */
ZSTD_c_checksumFlag=201, </b>/* A 32-bits checksum of content is written at end of frame (default:0) */<b>
ZSTD_c_dictIDFlag=202, </b>/* When applicable, dictionary's ID is written into frame header (default:1) */<b>
@@ -301,7 +319,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
* Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads.
* 0 means default, which is dynamically determined based on compression parameters.
* Job size must be a minimum of overlap size, or 1 MB, whichever is largest.
- * The minimum size is automatically and transparently enforced */
+ * The minimum size is automatically and transparently enforced. */
ZSTD_c_overlapLog=402, </b>/* Control the overlap size, as a fraction of window size.<b>
* The overlap size is an amount of data reloaded from previous job at the beginning of a new job.
* It helps preserve compression ratio, while each job is compressed in parallel.
@@ -324,6 +342,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
* ZSTD_c_forceAttachDict
* ZSTD_c_literalCompressionMode
* ZSTD_c_targetCBlockSize
+ * ZSTD_c_srcSizeHint
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
* note : never ever use experimentalParam? names directly;
* also, the enums values themselves are unstable and can still change.
@@ -334,6 +353,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
ZSTD_c_experimentalParam4=1001,
ZSTD_c_experimentalParam5=1002,
ZSTD_c_experimentalParam6=1003,
+ ZSTD_c_experimentalParam7=1004
} ZSTD_cParameter;
</b></pre><BR>
<pre><b>typedef struct {
@@ -672,12 +692,17 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
<pre><b>ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
int compressionLevel);
-</b><p> When compressing multiple messages / blocks using the same dictionary, it's recommended to load it only once.
- ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup cost.
+</b><p> When compressing multiple messages or blocks using the same dictionary,
+ it's recommended to digest the dictionary only once, since it's a costly operation.
+ ZSTD_createCDict() will create a state from digesting a dictionary.
+ The resulting state can be used for future compression operations with very limited startup cost.
ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
- `dictBuffer` can be released after ZSTD_CDict creation, because its content is copied within CDict.
- Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate `dictBuffer` content.
- Note : A ZSTD_CDict can be created from an empty dictBuffer, but it is inefficient when used to compress small data.
+ @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict.
+ Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content.
+ Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer,
+ in which case the only thing that it transports is the @compressionLevel.
+ This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively,
+ expecting a ZSTD_CDict parameter with any data, including those without a known dictionary.
</p></pre><BR>
<pre><b>size_t ZSTD_freeCDict(ZSTD_CDict* CDict);
@@ -794,7 +819,7 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters.
It's a CPU consuming operation, with non-negligible impact on latency.
If there is a need to use the same prefix multiple times, consider loadDictionary instead.
- Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dm_rawContent).
+ Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent).
Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation.
</p></pre><BR>
@@ -838,7 +863,7 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
Note 2 : Prefix buffer is referenced. It **must** outlive decompression.
Prefix buffer must remain unmodified up to the end of frame,
reached when ZSTD_decompressStream() returns 0.
- Note 3 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent).
+ Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent).
Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section)
Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost.
A full dictionary is more costly, as it requires building tables.
@@ -865,6 +890,24 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
<BR></pre>
<pre><b>typedef struct {
+ unsigned int matchPos; </b>/* Match pos in dst */<b>
+ </b>/* If seqDef.offset > 3, then this is seqDef.offset - 3<b>
+ * If seqDef.offset < 3, then this is the corresponding repeat offset
+ * But if seqDef.offset < 3 and litLength == 0, this is the
+ * repeat offset before the corresponding repeat offset
+ * And if seqDef.offset == 3 and litLength == 0, this is the
+ * most recent repeat offset - 1
+ */
+ unsigned int offset;
+ unsigned int litLength; </b>/* Literal length */<b>
+ unsigned int matchLength; </b>/* Match length */<b>
+ </b>/* 0 when seq not rep and seqDef.offset otherwise<b>
+ * when litLength == 0 this will be <= 4, otherwise <= 3 like normal
+ */
+ unsigned int rep;
+} ZSTD_Sequence;
+</b></pre><BR>
+<pre><b>typedef struct {
unsigned windowLog; </b>/**< largest match distance : larger == more compression, more memory needed during decompression */<b>
unsigned chainLog; </b>/**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */<b>
unsigned hashLog; </b>/**< dispatch table : larger == faster, more memory */<b>
@@ -893,21 +936,12 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
</b></pre><BR>
<pre><b>typedef enum {
ZSTD_dlm_byCopy = 0, </b>/**< Copy dictionary content internally */<b>
- ZSTD_dlm_byRef = 1, </b>/**< Reference dictionary content -- the dictionary buffer must outlive its users. */<b>
+ ZSTD_dlm_byRef = 1 </b>/**< Reference dictionary content -- the dictionary buffer must outlive its users. */<b>
} ZSTD_dictLoadMethod_e;
</b></pre><BR>
<pre><b>typedef enum {
- </b>/* Opened question : should we have a format ZSTD_f_auto ?<b>
- * Today, it would mean exactly the same as ZSTD_f_zstd1.
- * But, in the future, should several formats become supported,
- * on the compression side, it would mean "default format".
- * On the decompression side, it would mean "automatic format detection",
- * so that ZSTD_f_zstd1 would mean "accept *only* zstd frames".
- * Since meaning is a little different, another option could be to define different enums for compression and decompression.
- * This question could be kept for later, when there are actually multiple formats to support,
- * but there is also the question of pinning enum values, and pinning value `0` is especially important */
ZSTD_f_zstd1 = 0, </b>/* zstd frame format, specified in zstd_compression_format.md (default) */<b>
- ZSTD_f_zstd1_magicless = 1, </b>/* Variant of zstd frame format, without initial 4-bytes magic number.<b>
+ ZSTD_f_zstd1_magicless = 1 </b>/* Variant of zstd frame format, without initial 4-bytes magic number.<b>
* Useful to save 4 bytes per generated frame.
* Decoder cannot recognise automatically this format, requiring this instruction. */
} ZSTD_format_e;
@@ -918,7 +952,7 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
* to evolve and should be considered only in the context of extremely
* advanced performance tuning.
*
- * Zstd currently supports the use of a CDict in two ways:
+ * Zstd currently supports the use of a CDict in three ways:
*
* - The contents of the CDict can be copied into the working context. This
* means that the compression can search both the dictionary and input
@@ -934,6 +968,12 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
* working context's tables can be reused). For small inputs, this can be
* faster than copying the CDict's tables.
*
+ * - The CDict's tables are not used at all, and instead we use the working
+ * context alone to reload the dictionary and use params based on the source
+ * size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict().
+ * This method is effective when the dictionary sizes are very small relative
+ * to the input size, and the input size is fairly large to begin with.
+ *
* Zstd has a simple internal heuristic that selects which strategy to use
* at the beginning of a compression. However, if experimentation shows that
* Zstd is making poor choices, it is possible to override that choice with
@@ -942,6 +982,7 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
ZSTD_dictDefaultAttach = 0, </b>/* Use the default heuristic. */<b>
ZSTD_dictForceAttach = 1, </b>/* Never copy the dictionary. */<b>
ZSTD_dictForceCopy = 2, </b>/* Always copy the dictionary. */<b>
+ ZSTD_dictForceLoad = 3 </b>/* Always reload the dictionary */<b>
} ZSTD_dictAttachPref_e;
</b></pre><BR>
<pre><b>typedef enum {
@@ -950,7 +991,7 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
* levels will be compressed. */
ZSTD_lcm_huffman = 1, </b>/**< Always attempt Huffman compression. Uncompressed literals will still be<b>
* emitted if Huffman compression is not profitable. */
- ZSTD_lcm_uncompressed = 2, </b>/**< Always emit uncompressed literals. */<b>
+ ZSTD_lcm_uncompressed = 2 </b>/**< Always emit uncompressed literals. */<b>
} ZSTD_literalCompressionMode_e;
</b></pre><BR>
<a name="Chapter15"></a><h2>Frame size functions</h2><pre></pre>
@@ -999,20 +1040,38 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
or an error code (if srcSize is too small)
</p></pre><BR>
+<pre><b>size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
+ size_t outSeqsSize, const void* src, size_t srcSize);
+</b><p> Extract sequences from the sequence store
+ zc can be used to insert custom compression params.
+ This function invokes ZSTD_compress2
+ @return : number of sequences extracted
+
+</p></pre><BR>
+
<a name="Chapter16"></a><h2>Memory management</h2><pre></pre>
<pre><b>size_t ZSTD_estimateCCtxSize(int compressionLevel);
size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
size_t ZSTD_estimateDCtxSize(void);
-</b><p> These functions make it possible to estimate memory usage
- of a future {D,C}Ctx, before its creation.
- ZSTD_estimateCCtxSize() will provide a budget large enough for any compression level up to selected one.
- It will also consider src size to be arbitrarily "large", which is worst case.
- If srcSize is known to always be small, ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation.
- ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
- ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
- Note : CCtx size estimation is only correct for single-threaded compression.
+</b><p> These functions make it possible to estimate memory usage of a future
+ {D,C}Ctx, before its creation.
+
+ ZSTD_estimateCCtxSize() will provide a budget large enough for any
+ compression level up to selected one. Unlike ZSTD_estimateCStreamSize*(),
+ this estimate does not include space for a window buffer, so this estimate
+ is guaranteed to be enough for single-shot compressions, but not streaming
+ compressions. It will however assume the input may be arbitrarily large,
+ which is the worst case. If srcSize is known to always be small,
+ ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation.
+ ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with
+ ZSTD_getCParams() to create cParams from compressionLevel.
+ ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with
+ ZSTD_CCtxParams_setParameter().
+
+ Note: only single-threaded compression is supported. This function will
+ return an error code if ZSTD_c_nbWorkers is >= 1.
</p></pre><BR>
<pre><b>size_t ZSTD_estimateCStreamSize(int compressionLevel);
@@ -1085,7 +1144,8 @@ static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; </b>/**< t
</b><p> Create a digested dictionary for compression
Dictionary content is just referenced, not duplicated.
As a consequence, `dictBuffer` **must** outlive CDict,
- and its content must remain unmodified throughout the lifetime of CDict.
+ and its content must remain unmodified throughout the lifetime of CDict.
+ note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef
</p></pre><BR>
<pre><b>ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
@@ -1116,7 +1176,9 @@ static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; </b>/**< t
const void* src, size_t srcSize,
const void* dict,size_t dictSize,
ZSTD_parameters params);
-</b><p> Same as ZSTD_compress_usingDict(), with fine-tune control over compression parameters (by structure)
+</b><p> Note : this function is now DEPRECATED.
+ It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters.
+ This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x
</p></pre><BR>
<pre><b>size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
@@ -1124,7 +1186,9 @@ static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; </b>/**< t
const void* src, size_t srcSize,
const ZSTD_CDict* cdict,
ZSTD_frameParameters fParams);
-</b><p> Same as ZSTD_compress_usingCDict(), with fine-tune control over frame parameters
+</b><p> Note : this function is now REDUNDANT.
+ It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters.
+ This prototype will be marked as deprecated and generate compilation warning in some future version
</p></pre><BR>
<pre><b>size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
@@ -1301,53 +1365,67 @@ size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
* pledgedSrcSize must be correct. If it is not known at init time, use
* ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs,
* "0" also disables frame content size field. It may be enabled in the future.
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
*/
-size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize);
-</b>/**! ZSTD_initCStream_usingDict() :<b>
- * This function is deprecated, and is equivalent to:
- * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
- * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
- * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
- *
- * Creates of an internal CDict (incompatible with static CCtx), except if
- * dict == NULL or dictSize < 8, in which case no dict is used.
- * Note: dict is loaded with ZSTD_dm_auto (treated as a full zstd dictionary if
- * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.
- */
-size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel);
-</b>/**! ZSTD_initCStream_advanced() :<b>
- * This function is deprecated, and is approximately equivalent to:
- * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
- * ZSTD_CCtx_setZstdParams(zcs, params); // Set the zstd params and leave the rest as-is
- * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
- * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
- *
- * pledgedSrcSize must be correct. If srcSize is not known at init time, use
- * value ZSTD_CONTENTSIZE_UNKNOWN. dict is loaded with ZSTD_dm_auto and ZSTD_dlm_byCopy.
- */
-size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize,
- ZSTD_parameters params, unsigned long long pledgedSrcSize);
-</b>/**! ZSTD_initCStream_usingCDict() :<b>
- * This function is deprecated, and equivalent to:
- * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
- * ZSTD_CCtx_refCDict(zcs, cdict);
- *
- * note : cdict will just be referenced, and must outlive compression session
- */
-size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
-</b>/**! ZSTD_initCStream_usingCDict_advanced() :<b>
- * This function is deprecated, and is approximately equivalent to:
- * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
- * ZSTD_CCtx_setZstdFrameParams(zcs, fParams); // Set the zstd frame params and leave the rest as-is
- * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
- * ZSTD_CCtx_refCDict(zcs, cdict);
- *
- * same as ZSTD_initCStream_usingCDict(), with control over frame parameters.
- * pledgedSrcSize must be correct. If srcSize is not known at init time, use
- * value ZSTD_CONTENTSIZE_UNKNOWN.
- */
-size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams, unsigned long long pledgedSrcSize);
+size_t
+ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
+ int compressionLevel,
+ unsigned long long pledgedSrcSize);
</pre></b><BR>
+<a name="Chapter20"></a><h2>! ZSTD_initCStream_usingDict() :</h2><pre> This function is deprecated, and is equivalent to:
+ ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
+ ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
+
+ Creates of an internal CDict (incompatible with static CCtx), except if
+ dict == NULL or dictSize < 8, in which case no dict is used.
+ Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if
+ it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.
+ Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+
+<BR></pre>
+
+<a name="Chapter21"></a><h2>! ZSTD_initCStream_advanced() :</h2><pre> This function is deprecated, and is approximately equivalent to:
+ ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ // Pseudocode: Set each zstd parameter and leave the rest as-is.
+ for ((param, value) : params) {
+ ZSTD_CCtx_setParameter(zcs, param, value);
+ }
+ ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
+
+ dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy.
+ pledgedSrcSize must be correct.
+ If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
+ Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+
+<BR></pre>
+
+<a name="Chapter22"></a><h2>! ZSTD_initCStream_usingCDict() :</h2><pre> This function is deprecated, and equivalent to:
+ ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ ZSTD_CCtx_refCDict(zcs, cdict);
+
+ note : cdict will just be referenced, and must outlive compression session
+ Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+
+<BR></pre>
+
+<a name="Chapter23"></a><h2>! ZSTD_initCStream_usingCDict_advanced() :</h2><pre> This function is DEPRECATED, and is approximately equivalent to:
+ ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ // Pseudocode: Set each zstd frame parameter and leave the rest as-is.
+ for ((fParam, value) : fParams) {
+ ZSTD_CCtx_setParameter(zcs, fParam, value);
+ }
+ ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ ZSTD_CCtx_refCDict(zcs, cdict);
+
+ same as ZSTD_initCStream_usingCDict(), with control over frame parameters.
+ pledgedSrcSize must be correct. If srcSize is not known at init time, use
+ value ZSTD_CONTENTSIZE_UNKNOWN.
+ Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+
+<BR></pre>
+
<pre><b>size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
</b><p> This function is deprecated, and is equivalent to:
ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
@@ -1361,6 +1439,7 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict*
For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs,
but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead.
@return : 0, or an error code (which can be tested using ZSTD_isError())
+ Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
</p></pre><BR>
@@ -1395,34 +1474,35 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict*
* ZSTD_DCtx_loadDictionary(zds, dict, dictSize);
*
* note: no dictionary will be used if dict == NULL or dictSize < 8
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
*/
size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
-</b>/**<b>
- * This function is deprecated, and is equivalent to:
- *
- * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
- * ZSTD_DCtx_refDDict(zds, ddict);
- *
- * note : ddict is referenced, it must outlive decompression session
- */
-size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);
-</b>/**<b>
- * This function is deprecated, and is equivalent to:
- *
- * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
- *
- * re-use decompression parameters from previous init; saves dictionary loading
- */
-size_t ZSTD_resetDStream(ZSTD_DStream* zds);
</pre></b><BR>
-<a name="Chapter20"></a><h2>Buffer-less and synchronous inner streaming functions</h2><pre>
+<a name="Chapter24"></a><h2>This function is deprecated, and is equivalent to:</h2><pre>
+ ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ ZSTD_DCtx_refDDict(zds, ddict);
+
+ note : ddict is referenced, it must outlive decompression session
+ Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+
+<BR></pre>
+
+<a name="Chapter25"></a><h2>This function is deprecated, and is equivalent to:</h2><pre>
+ ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+
+ re-use decompression parameters from previous init; saves dictionary loading
+ Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+
+<BR></pre>
+
+<a name="Chapter26"></a><h2>Buffer-less and synchronous inner streaming functions</h2><pre>
This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
But it's also a complex one, with several restrictions, documented below.
Prefer normal streaming API for an easier experience.
<BR></pre>
-<a name="Chapter21"></a><h2>Buffer-less streaming compression (synchronous mode)</h2><pre>
+<a name="Chapter27"></a><h2>Buffer-less streaming compression (synchronous mode)</h2><pre>
A ZSTD_CCtx object is required to track streaming operations.
Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
ZSTD_CCtx object can be re-used multiple times within successive compression operations.
@@ -1458,7 +1538,7 @@ size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); </b>/* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */<b>
size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); </b>/**< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */<b>
</pre></b><BR>
-<a name="Chapter22"></a><h2>Buffer-less streaming decompression (synchronous mode)</h2><pre>
+<a name="Chapter28"></a><h2>Buffer-less streaming decompression (synchronous mode)</h2><pre>
A ZSTD_DCtx object is required to track streaming operations.
Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
A ZSTD_DCtx object can be re-used multiple times.
@@ -1554,10 +1634,10 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
<pre><b>typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
</b></pre><BR>
-<a name="Chapter23"></a><h2>Block level API</h2><pre></pre>
+<a name="Chapter29"></a><h2>Block level API</h2><pre></pre>
-<pre><b></b><p> Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes).
- User will have to take in charge required information to regenerate data, such as compressed and content sizes.
+<pre><b></b><p> Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes).
+ But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes.
A few rules to respect :
- Compressing and decompressing require a context structure
@@ -1568,12 +1648,14 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
+ copyCCtx() and copyDCtx() can be used too
- Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB
+ If input is larger than a block size, it's necessary to split input data into multiple blocks
- + For inputs larger than a single block, really consider using regular ZSTD_compress() instead.
- Frame metadata is not that costly, and quickly becomes negligible as source size grows larger.
- - When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero.
- In which case, nothing is produced into `dst` !
- + User must test for such outcome and deal directly with uncompressed data
- + ZSTD_decompressBlock() doesn't accept uncompressed data as input !!!
+ + For inputs larger than a single block, consider using regular ZSTD_compress() instead.
+ Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block.
+ - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) !
+ ===> In which case, nothing is produced into `dst` !
+ + User __must__ test for such outcome and deal directly with uncompressed data
+ + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0.
+ Doing so would mess up with statistics history, leading to potential data corruption.
+ + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !!
+ In case of multiple successive blocks, should some of them be uncompressed,
decoder must be informed of their existence in order to follow proper history.
Use ZSTD_insertBlock() for such a case.
diff --git a/examples/.gitignore b/examples/.gitignore
deleted file mode 100644
index d682cae38a8d..000000000000
--- a/examples/.gitignore
+++ /dev/null
@@ -1,15 +0,0 @@
-#build
-simple_compression
-simple_decompression
-multiple_simple_compression
-dictionary_compression
-dictionary_decompression
-streaming_compression
-streaming_decompression
-multiple_streaming_compression
-streaming_memory_usage
-
-#test artefact
-tmp*
-test*
-*.zst
diff --git a/examples/streaming_compression.c b/examples/streaming_compression.c
index d1353a684a62..d0b04895f018 100644
--- a/examples/streaming_compression.c
+++ b/examples/streaming_compression.c
@@ -44,8 +44,8 @@ static void compressFile_orDie(const char* fname, const char* outName, int cLeve
* and writes all output produced to the output file.
*/
size_t const toRead = buffInSize;
- size_t read;
- while ((read = fread_orDie(buffIn, toRead, fin))) {
+ for (;;) {
+ size_t read = fread_orDie(buffIn, toRead, fin);
/* Select the flush mode.
* If the read may not be finished (read == toRead) we use
* ZSTD_e_continue. If this is the last chunk, we use ZSTD_e_end.
@@ -76,6 +76,10 @@ static void compressFile_orDie(const char* fname, const char* outName, int cLeve
} while (!finished);
CHECK(input.pos == input.size,
"Impossible: zstd only returns 0 when the input is completely consumed!");
+
+ if (lastChunk) {
+ break;
+ }
}
ZSTD_freeCCtx(cctx);
diff --git a/examples/streaming_decompression.c b/examples/streaming_decompression.c
index bcd861b756c1..d26b45b34c74 100644
--- a/examples/streaming_decompression.c
+++ b/examples/streaming_decompression.c
@@ -34,7 +34,10 @@ static void decompressFile_orDie(const char* fname)
*/
size_t const toRead = buffInSize;
size_t read;
+ size_t lastRet = 0;
+ int isEmpty = 1;
while ( (read = fread_orDie(buffIn, toRead, fin)) ) {
+ isEmpty = 0;
ZSTD_inBuffer input = { buffIn, read, 0 };
/* Given a valid frame, zstd won't consume the last byte of the frame
* until it has flushed all of the decompressed data of the frame.
@@ -53,9 +56,24 @@ static void decompressFile_orDie(const char* fname)
size_t const ret = ZSTD_decompressStream(dctx, &output , &input);
CHECK_ZSTD(ret);
fwrite_orDie(buffOut, output.pos, fout);
+ lastRet = ret;
}
}
+ if (isEmpty) {
+ fprintf(stderr, "input is empty\n");
+ exit(1);
+ }
+
+ if (lastRet != 0) {
+ /* The last return value from ZSTD_decompressStream did not end on a
+ * frame, but we reached the end of the file! We assume this is an
+ * error, and the input was truncated.
+ */
+ fprintf(stderr, "EOF before end of stream: %zu\n", lastRet);
+ exit(1);
+ }
+
ZSTD_freeDCtx(dctx);
fclose_orDie(fin);
fclose_orDie(fout);
diff --git a/lib/.gitignore b/lib/.gitignore
deleted file mode 100644
index 4cd50ac61e52..000000000000
--- a/lib/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-# make install artefact
-libzstd.pc
-libzstd-nomt
diff --git a/lib/Makefile b/lib/Makefile
index 87a396c53ebd..273ceb90490d 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -244,8 +244,6 @@ libzstd.pc:
libzstd.pc: libzstd.pc.in
@echo creating pkgconfig
@sed -e 's|@PREFIX@|$(PREFIX)|' \
- -e 's|@LIBDIR@|$(LIBDIR)|' \
- -e 's|@INCLUDEDIR@|$(INCLUDEDIR)|' \
-e 's|@VERSION@|$(VERSION)|' \
$< >$@
diff --git a/lib/README.md b/lib/README.md
index 792729b1f9ba..0062c0d63e04 100644
--- a/lib/README.md
+++ b/lib/README.md
@@ -27,10 +27,10 @@ Enabling multithreading requires 2 conditions :
Both conditions are automatically applied when invoking `make lib-mt` target.
When linking a POSIX program with a multithreaded version of `libzstd`,
-note that it's necessary to request the `-pthread` flag during link stage.
+note that it's necessary to invoke the `-pthread` flag during link stage.
Multithreading capabilities are exposed
-via the [advanced API defined in `lib/zstd.h`](https://github.com/facebook/zstd/blob/v1.3.8/lib/zstd.h#L592).
+via the [advanced API defined in `lib/zstd.h`](https://github.com/facebook/zstd/blob/v1.4.3/lib/zstd.h#L351).
#### API
@@ -112,6 +112,17 @@ The file structure is designed to make this selection manually achievable for an
will expose the deprecated `ZSTDMT` API exposed by `zstdmt_compress.h` in
the shared library, which is now hidden by default.
+- The build macro `DYNAMIC_BMI2` can be set to 1 or 0 in order to generate binaries
+ which can detect at runtime the presence of BMI2 instructions, and use them only if present.
+ These instructions contribute to better performance, notably on the decoder side.
+ By default, this feature is automatically enabled on detecting
+ the right instruction set (x64) and compiler (clang or gcc >= 5).
+ It's obviously disabled for different cpus,
+ or when BMI2 instruction set is _required_ by the compiler command line
+ (in this case, only the BMI2 code path is generated).
+ Setting this macro will either force to generate the BMI2 dispatcher (1)
+ or prevent it (0). It overrides automatic detection.
+
#### Windows : using MinGW+MSYS to create DLL
diff --git a/lib/common/bitstream.h b/lib/common/bitstream.h
index d955bd677b5b..1c294b80d13f 100644
--- a/lib/common/bitstream.h
+++ b/lib/common/bitstream.h
@@ -57,6 +57,8 @@ extern "C" {
=========================================*/
#if defined(__BMI__) && defined(__GNUC__)
# include <immintrin.h> /* support for bextr (experimental) */
+#elif defined(__ICCARM__)
+# include <intrinsics.h>
#endif
#define STREAM_ACCUMULATOR_MIN_32 25
@@ -162,7 +164,9 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
_BitScanReverse ( &r, val );
return (unsigned) r;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
- return 31 - __builtin_clz (val);
+ return __builtin_clz (val) ^ 31;
+# elif defined(__ICCARM__) /* IAR Intrinsic */
+ return 31 - __CLZ(val);
# else /* Software version */
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,
11, 14, 16, 18, 22, 25, 3, 30,
@@ -240,9 +244,9 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
{
size_t const nbBytes = bitC->bitPos >> 3;
assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+ assert(bitC->ptr <= bitC->endPtr);
MEM_writeLEST(bitC->ptr, bitC->bitContainer);
bitC->ptr += nbBytes;
- assert(bitC->ptr <= bitC->endPtr);
bitC->bitPos &= 7;
bitC->bitContainer >>= nbBytes*8;
}
@@ -256,6 +260,7 @@ MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
{
size_t const nbBytes = bitC->bitPos >> 3;
assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+ assert(bitC->ptr <= bitC->endPtr);
MEM_writeLEST(bitC->ptr, bitC->bitContainer);
bitC->ptr += nbBytes;
if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
diff --git a/lib/common/compiler.h b/lib/common/compiler.h
index 87bf51ae8c89..1877a0c1d9be 100644
--- a/lib/common/compiler.h
+++ b/lib/common/compiler.h
@@ -23,7 +23,7 @@
# define INLINE_KEYWORD
#endif
-#if defined(__GNUC__)
+#if defined(__GNUC__) || defined(__ICCARM__)
# define FORCE_INLINE_ATTR __attribute__((always_inline))
#elif defined(_MSC_VER)
# define FORCE_INLINE_ATTR __forceinline
@@ -61,11 +61,18 @@
# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
#endif
+/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
+#if defined(__GNUC__)
+# define UNUSED_ATTR __attribute__((unused))
+#else
+# define UNUSED_ATTR
+#endif
+
/* force no inlining */
#ifdef _MSC_VER
# define FORCE_NOINLINE static __declspec(noinline)
#else
-# ifdef __GNUC__
+# if defined(__GNUC__) || defined(__ICCARM__)
# define FORCE_NOINLINE static __attribute__((__noinline__))
# else
# define FORCE_NOINLINE static
@@ -76,7 +83,7 @@
#ifndef __has_attribute
#define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */
#endif
-#if defined(__GNUC__)
+#if defined(__GNUC__) || defined(__ICCARM__)
# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
#else
# define TARGET_ATTRIBUTE(target)
@@ -127,9 +134,14 @@
} \
}
-/* vectorization */
+/* vectorization
+ * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
#if !defined(__clang__) && defined(__GNUC__)
-# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
+# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
+# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
+# else
+# define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")")
+# endif
#else
# define DONT_VECTORIZE
#endif
diff --git a/lib/common/fse.h b/lib/common/fse.h
index 811c670bddc3..a7553e3721c4 100644
--- a/lib/common/fse.h
+++ b/lib/common/fse.h
@@ -308,7 +308,7 @@ If there is an error, the function will return an error code, which can be teste
*******************************************/
/* FSE buffer bounds */
#define FSE_NCOUNTBOUND 512
-#define FSE_BLOCKBOUND(size) (size + (size>>7))
+#define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
diff --git a/lib/common/fse_decompress.c b/lib/common/fse_decompress.c
index 72bbead5beea..4f0737898209 100644
--- a/lib/common/fse_decompress.c
+++ b/lib/common/fse_decompress.c
@@ -52,7 +52,9 @@
#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
/* check and forward error code */
+#ifndef CHECK_F
#define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; }
+#endif
/* **************************************************************
diff --git a/lib/common/mem.h b/lib/common/mem.h
index 5da248756ffd..530d30c8f758 100644
--- a/lib/common/mem.h
+++ b/lib/common/mem.h
@@ -47,6 +47,79 @@ extern "C" {
#define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
+/* detects whether we are being compiled under msan */
+#if defined (__has_feature)
+# if __has_feature(memory_sanitizer)
+# define MEMORY_SANITIZER 1
+# endif
+#endif
+
+#if defined (MEMORY_SANITIZER)
+/* Not all platforms that support msan provide sanitizers/msan_interface.h.
+ * We therefore declare the functions we need ourselves, rather than trying to
+ * include the header file... */
+
+#include <stdint.h> /* intptr_t */
+
+/* Make memory region fully initialized (without changing its contents). */
+void __msan_unpoison(const volatile void *a, size_t size);
+
+/* Make memory region fully uninitialized (without changing its contents).
+ This is a legacy interface that does not update origin information. Use
+ __msan_allocated_memory() instead. */
+void __msan_poison(const volatile void *a, size_t size);
+
+/* Returns the offset of the first (at least partially) poisoned byte in the
+ memory range, or -1 if the whole range is good. */
+intptr_t __msan_test_shadow(const volatile void *x, size_t size);
+#endif
+
+/* detects whether we are being compiled under asan */
+#if defined (__has_feature)
+# if __has_feature(address_sanitizer)
+# define ADDRESS_SANITIZER 1
+# endif
+#elif defined(__SANITIZE_ADDRESS__)
+# define ADDRESS_SANITIZER 1
+#endif
+
+#if defined (ADDRESS_SANITIZER)
+/* Not all platforms that support asan provide sanitizers/asan_interface.h.
+ * We therefore declare the functions we need ourselves, rather than trying to
+ * include the header file... */
+
+/**
+ * Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
+ *
+ * This memory must be previously allocated by your program. Instrumented
+ * code is forbidden from accessing addresses in this region until it is
+ * unpoisoned. This function is not guaranteed to poison the entire region -
+ * it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
+ * alignment restrictions.
+ *
+ * \note This function is not thread-safe because no two threads can poison or
+ * unpoison memory in the same memory region simultaneously.
+ *
+ * \param addr Start of memory region.
+ * \param size Size of memory region. */
+void __asan_poison_memory_region(void const volatile *addr, size_t size);
+
+/**
+ * Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
+ *
+ * This memory must be previously allocated by your program. Accessing
+ * addresses in this region is allowed until this region is poisoned again.
+ * This function could unpoison a super-region of <c>[addr, addr+size)</c> due
+ * to ASan alignment restrictions.
+ *
+ * \note This function is not thread-safe because no two threads can
+ * poison or unpoison memory in the same memory region simultaneously.
+ *
+ * \param addr Start of memory region.
+ * \param size Size of memory region. */
+void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
+#endif
+
/*-**************************************************************
* Basic Types
@@ -102,7 +175,7 @@ MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (size
#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
# define MEM_FORCE_MEMORY_ACCESS 2
-# elif defined(__INTEL_COMPILER) || defined(__GNUC__)
+# elif defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
# define MEM_FORCE_MEMORY_ACCESS 1
# endif
#endif
diff --git a/lib/common/pool.c b/lib/common/pool.c
index 7a8294543280..f575935076cf 100644
--- a/lib/common/pool.c
+++ b/lib/common/pool.c
@@ -127,9 +127,13 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
ctx->queueTail = 0;
ctx->numThreadsBusy = 0;
ctx->queueEmpty = 1;
- (void)ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL);
- (void)ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL);
- (void)ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL);
+ {
+ int error = 0;
+ error |= ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL);
+ error |= ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL);
+ error |= ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL);
+ if (error) { POOL_free(ctx); return NULL; }
+ }
ctx->shutdown = 0;
/* Allocate space for the thread handles */
ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
diff --git a/lib/common/threading.c b/lib/common/threading.c
index f3d4fa841843..482664bd9ada 100644
--- a/lib/common/threading.c
+++ b/lib/common/threading.c
@@ -14,6 +14,8 @@
* This file will hold wrapper for systems, which do not support pthreads
*/
+#include "threading.h"
+
/* create fake symbol to avoid empty translation unit warning */
int g_ZSTD_threading_useless_symbol;
@@ -28,7 +30,6 @@ int g_ZSTD_threading_useless_symbol;
/* === Dependencies === */
#include <process.h>
#include <errno.h>
-#include "threading.h"
/* === Implementation === */
@@ -73,3 +74,47 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr)
}
#endif /* ZSTD_MULTITHREAD */
+
+#if defined(ZSTD_MULTITHREAD) && DEBUGLEVEL >= 1 && !defined(_WIN32)
+
+#include <stdlib.h>
+
+int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr)
+{
+ *mutex = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t));
+ if (!*mutex)
+ return 1;
+ return pthread_mutex_init(*mutex, attr);
+}
+
+int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex)
+{
+ if (!*mutex)
+ return 0;
+ {
+ int const ret = pthread_mutex_destroy(*mutex);
+ free(*mutex);
+ return ret;
+ }
+}
+
+int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr)
+{
+ *cond = (pthread_cond_t*)malloc(sizeof(pthread_cond_t));
+ if (!*cond)
+ return 1;
+ return pthread_cond_init(*cond, attr);
+}
+
+int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond)
+{
+ if (!*cond)
+ return 0;
+ {
+ int const ret = pthread_cond_destroy(*cond);
+ free(*cond);
+ return ret;
+ }
+}
+
+#endif
diff --git a/lib/common/threading.h b/lib/common/threading.h
index d806c89d01c9..3193ca7db86c 100644
--- a/lib/common/threading.h
+++ b/lib/common/threading.h
@@ -13,6 +13,8 @@
#ifndef THREADING_H_938743
#define THREADING_H_938743
+#include "debug.h"
+
#if defined (__cplusplus)
extern "C" {
#endif
@@ -75,10 +77,12 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
*/
-#elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection method */
+#elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection method */
/* === POSIX Systems === */
# include <pthread.h>
+#if DEBUGLEVEL < 1
+
#define ZSTD_pthread_mutex_t pthread_mutex_t
#define ZSTD_pthread_mutex_init(a, b) pthread_mutex_init((a), (b))
#define ZSTD_pthread_mutex_destroy(a) pthread_mutex_destroy((a))
@@ -96,6 +100,33 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
#define ZSTD_pthread_join(a, b) pthread_join((a),(b))
+#else /* DEBUGLEVEL >= 1 */
+
+/* Debug implementation of threading.
+ * In this implementation we use pointers for mutexes and condition variables.
+ * This way, if we forget to init/destroy them the program will crash or ASAN
+ * will report leaks.
+ */
+
+#define ZSTD_pthread_mutex_t pthread_mutex_t*
+int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr);
+int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex);
+#define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock(*(a))
+#define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock(*(a))
+
+#define ZSTD_pthread_cond_t pthread_cond_t*
+int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr);
+int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond);
+#define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait(*(a), *(b))
+#define ZSTD_pthread_cond_signal(a) pthread_cond_signal(*(a))
+#define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast(*(a))
+
+#define ZSTD_pthread_t pthread_t
+#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
+#define ZSTD_pthread_join(a, b) pthread_join((a),(b))
+
+#endif
+
#else /* ZSTD_MULTITHREAD not defined */
/* No multithreading support */
diff --git a/lib/common/xxhash.c b/lib/common/xxhash.c
index 30599aaae41f..99d245962185 100644
--- a/lib/common/xxhash.c
+++ b/lib/common/xxhash.c
@@ -53,7 +53,8 @@
# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
# define XXH_FORCE_MEMORY_ACCESS 2
# elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
- (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+ (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \
+ defined(__ICCARM__)
# define XXH_FORCE_MEMORY_ACCESS 1
# endif
#endif
@@ -120,7 +121,7 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcp
# define INLINE_KEYWORD
#endif
-#if defined(__GNUC__)
+#if defined(__GNUC__) || defined(__ICCARM__)
# define FORCE_INLINE_ATTR __attribute__((always_inline))
#elif defined(_MSC_VER)
# define FORCE_INLINE_ATTR __forceinline
@@ -206,7 +207,12 @@ static U64 XXH_read64(const void* memPtr)
# define XXH_rotl32(x,r) _rotl(x,r)
# define XXH_rotl64(x,r) _rotl64(x,r)
#else
+#if defined(__ICCARM__)
+# include <intrinsics.h>
+# define XXH_rotl32(x,r) __ROR(x,(32 - r))
+#else
# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+#endif
# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
#endif
diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h
index 81b16eac2ea6..dcdcbdb81cd7 100644
--- a/lib/common/zstd_internal.h
+++ b/lib/common/zstd_internal.h
@@ -56,9 +56,9 @@ extern "C" {
/**
* Return the specified error if the condition evaluates to true.
*
- * In debug modes, prints additional information. In order to do that
- * (particularly, printing the conditional that failed), this can't just wrap
- * RETURN_ERROR().
+ * In debug modes, prints additional information.
+ * In order to do that (particularly, printing the conditional that failed),
+ * this can't just wrap RETURN_ERROR().
*/
#define RETURN_ERROR_IF(cond, err, ...) \
if (cond) { \
@@ -197,79 +197,56 @@ static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); }
#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
-#define WILDCOPY_OVERLENGTH 8
-#define VECLEN 16
+#define WILDCOPY_OVERLENGTH 32
+#define WILDCOPY_VECLEN 16
typedef enum {
ZSTD_no_overlap,
- ZSTD_overlap_src_before_dst,
+ ZSTD_overlap_src_before_dst
/* ZSTD_overlap_dst_before_src, */
} ZSTD_overlap_e;
/*! ZSTD_wildcopy() :
- * custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */
+ * Custom version of memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
+ * @param ovtype controls the overlap detection
+ * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
+ * - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
+ * The src buffer must be before the dst buffer.
+ */
MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
-void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
+void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
{
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
const BYTE* ip = (const BYTE*)src;
BYTE* op = (BYTE*)dst;
BYTE* const oend = op + length;
- assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
- if (length < VECLEN || (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN)) {
- do
- COPY8(op, ip)
- while (op < oend);
- }
- else {
- if ((length & 8) == 0)
- COPY8(op, ip);
- do {
+ assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN));
+
+ if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
+ /* Handle short offset copies. */
+ do {
+ COPY8(op, ip)
+ } while (op < oend);
+ } else {
+ assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
+ /* Separate out the first two COPY16() calls because the copy length is
+ * almost certain to be short, so the branches have different
+ * probabilities.
+ * On gcc-9 unrolling once is +1.6%, twice is +2%, thrice is +1.8%.
+ * On clang-8 unrolling once is +1.4%, twice is +3.3%, thrice is +3%.
+ */
COPY16(op, ip);
- }
- while (op < oend);
- }
-}
-
-/*! ZSTD_wildcopy_16min() :
- * same semantics as ZSTD_wilcopy() except guaranteed to be able to copy 16 bytes at the start */
-MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
-void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
-{
- ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
- const BYTE* ip = (const BYTE*)src;
- BYTE* op = (BYTE*)dst;
- BYTE* const oend = op + length;
-
- assert(length >= 8);
- assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
-
- if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) {
- do
- COPY8(op, ip)
- while (op < oend);
- }
- else {
- if ((length & 8) == 0)
- COPY8(op, ip);
- do {
COPY16(op, ip);
- }
- while (op < oend);
+ if (op >= oend) return;
+ do {
+ COPY16(op, ip);
+ COPY16(op, ip);
+ }
+ while (op < oend);
}
}
-MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */
-{
- const BYTE* ip = (const BYTE*)src;
- BYTE* op = (BYTE*)dst;
- BYTE* const oend = (BYTE*)dstEnd;
- do
- COPY8(op, ip)
- while (op < oend);
-}
-
/*-*******************************************
* Private declarations
@@ -323,7 +300,9 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus
_BitScanReverse(&r, val);
return (unsigned)r;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
- return 31 - __builtin_clz(val);
+ return __builtin_clz (val) ^ 31;
+# elif defined(__ICCARM__) /* IAR Intrinsic */
+ return 31 - __CLZ(val);
# else /* Software version */
static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index 8ad15e1cc486..35346b92cb1a 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -42,15 +42,15 @@ size_t ZSTD_compressBound(size_t srcSize) {
* Context memory management
***************************************/
struct ZSTD_CDict_s {
- void* dictBuffer;
const void* dictContent;
size_t dictContentSize;
- void* workspace;
- size_t workspaceSize;
+ U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
+ ZSTD_cwksp workspace;
ZSTD_matchState_t matchState;
ZSTD_compressedBlockState_t cBlockState;
ZSTD_customMem customMem;
U32 dictID;
+ int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */
}; /* typedef'd to ZSTD_CDict within "zstd.h" */
ZSTD_CCtx* ZSTD_createCCtx(void)
@@ -84,23 +84,26 @@ ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
ZSTD_CCtx* ZSTD_initStaticCCtx(void *workspace, size_t workspaceSize)
{
- ZSTD_CCtx* const cctx = (ZSTD_CCtx*) workspace;
+ ZSTD_cwksp ws;
+ ZSTD_CCtx* cctx;
if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */
if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */
- memset(workspace, 0, workspaceSize); /* may be a bit generous, could memset be smaller ? */
+ ZSTD_cwksp_init(&ws, workspace, workspaceSize);
+
+ cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx));
+ if (cctx == NULL) {
+ return NULL;
+ }
+ memset(cctx, 0, sizeof(ZSTD_CCtx));
+ ZSTD_cwksp_move(&cctx->workspace, &ws);
cctx->staticSize = workspaceSize;
- cctx->workSpace = (void*)(cctx+1);
- cctx->workSpaceSize = workspaceSize - sizeof(ZSTD_CCtx);
/* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */
- if (cctx->workSpaceSize < HUF_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t)) return NULL;
- assert(((size_t)cctx->workSpace & (sizeof(void*)-1)) == 0); /* ensure correct alignment */
- cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)cctx->workSpace;
- cctx->blockState.nextCBlock = cctx->blockState.prevCBlock + 1;
- {
- void* const ptr = cctx->blockState.nextCBlock + 1;
- cctx->entropyWorkspace = (U32*)ptr;
- }
+ if (!ZSTD_cwksp_check_available(&cctx->workspace, HUF_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL;
+ cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
+ cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
+ cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(
+ &cctx->workspace, HUF_WORKSPACE_SIZE);
cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
return cctx;
}
@@ -128,11 +131,11 @@ static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx)
{
assert(cctx != NULL);
assert(cctx->staticSize == 0);
- ZSTD_free(cctx->workSpace, cctx->customMem); cctx->workSpace = NULL;
ZSTD_clearAllDicts(cctx);
#ifdef ZSTD_MULTITHREAD
ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL;
#endif
+ ZSTD_cwksp_free(&cctx->workspace, cctx->customMem);
}
size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
@@ -140,8 +143,13 @@ size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
if (cctx==NULL) return 0; /* support free on NULL */
RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
"not compatible with static CCtx");
- ZSTD_freeCCtxContent(cctx);
- ZSTD_free(cctx, cctx->customMem);
+ {
+ int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx);
+ ZSTD_freeCCtxContent(cctx);
+ if (!cctxInWorkspace) {
+ ZSTD_free(cctx, cctx->customMem);
+ }
+ }
return 0;
}
@@ -160,7 +168,9 @@ static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx)
size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx)
{
if (cctx==NULL) return 0; /* support sizeof on NULL */
- return sizeof(*cctx) + cctx->workSpaceSize
+ /* cctx may be in the workspace */
+ return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx))
+ + ZSTD_cwksp_sizeof(&cctx->workspace)
+ ZSTD_sizeof_localDict(cctx->localDict)
+ ZSTD_sizeof_mtctx(cctx);
}
@@ -229,23 +239,23 @@ size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_paramete
RETURN_ERROR_IF(!cctxParams, GENERIC);
FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) );
memset(cctxParams, 0, sizeof(*cctxParams));
+ assert(!ZSTD_checkCParams(params.cParams));
cctxParams->cParams = params.cParams;
cctxParams->fParams = params.fParams;
cctxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */
- assert(!ZSTD_checkCParams(params.cParams));
return 0;
}
/* ZSTD_assignParamsToCCtxParams() :
* params is presumed valid at this stage */
static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams(
- ZSTD_CCtx_params cctxParams, ZSTD_parameters params)
+ const ZSTD_CCtx_params* cctxParams, ZSTD_parameters params)
{
- ZSTD_CCtx_params ret = cctxParams;
+ ZSTD_CCtx_params ret = *cctxParams;
+ assert(!ZSTD_checkCParams(params.cParams));
ret.cParams = params.cParams;
ret.fParams = params.fParams;
ret.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */
- assert(!ZSTD_checkCParams(params.cParams));
return ret;
}
@@ -378,7 +388,7 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
case ZSTD_c_forceAttachDict:
ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceCopy);
bounds.lowerBound = ZSTD_dictDefaultAttach;
- bounds.upperBound = ZSTD_dictForceCopy; /* note : how to ensure at compile time that this is the highest value enum ? */
+ bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */
return bounds;
case ZSTD_c_literalCompressionMode:
@@ -392,6 +402,11 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX;
return bounds;
+ case ZSTD_c_srcSizeHint:
+ bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN;
+ bounds.upperBound = ZSTD_SRCSIZEHINT_MAX;
+ return bounds;
+
default:
{ ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 };
return boundError;
@@ -448,6 +463,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
case ZSTD_c_forceAttachDict:
case ZSTD_c_literalCompressionMode:
case ZSTD_c_targetCBlockSize:
+ case ZSTD_c_srcSizeHint:
default:
return 0;
}
@@ -494,6 +510,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
case ZSTD_c_ldmMinMatch:
case ZSTD_c_ldmBucketSizeLog:
case ZSTD_c_targetCBlockSize:
+ case ZSTD_c_srcSizeHint:
break;
default: RETURN_ERROR(parameter_unsupported);
@@ -517,33 +534,33 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
if (value) { /* 0 : does not change current level */
CCtxParams->compressionLevel = value;
}
- if (CCtxParams->compressionLevel >= 0) return CCtxParams->compressionLevel;
+ if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel;
return 0; /* return type (size_t) cannot represent negative values */
}
case ZSTD_c_windowLog :
if (value!=0) /* 0 => use default */
BOUNDCHECK(ZSTD_c_windowLog, value);
- CCtxParams->cParams.windowLog = value;
+ CCtxParams->cParams.windowLog = (U32)value;
return CCtxParams->cParams.windowLog;
case ZSTD_c_hashLog :
if (value!=0) /* 0 => use default */
BOUNDCHECK(ZSTD_c_hashLog, value);
- CCtxParams->cParams.hashLog = value;
+ CCtxParams->cParams.hashLog = (U32)value;
return CCtxParams->cParams.hashLog;
case ZSTD_c_chainLog :
if (value!=0) /* 0 => use default */
BOUNDCHECK(ZSTD_c_chainLog, value);
- CCtxParams->cParams.chainLog = value;
+ CCtxParams->cParams.chainLog = (U32)value;
return CCtxParams->cParams.chainLog;
case ZSTD_c_searchLog :
if (value!=0) /* 0 => use default */
BOUNDCHECK(ZSTD_c_searchLog, value);
- CCtxParams->cParams.searchLog = value;
- return value;
+ CCtxParams->cParams.searchLog = (U32)value;
+ return (size_t)value;
case ZSTD_c_minMatch :
if (value!=0) /* 0 => use default */
@@ -674,6 +691,12 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
CCtxParams->targetCBlockSize = value;
return CCtxParams->targetCBlockSize;
+ case ZSTD_c_srcSizeHint :
+ if (value!=0) /* 0 ==> default */
+ BOUNDCHECK(ZSTD_c_srcSizeHint, value);
+ CCtxParams->srcSizeHint = value;
+ return CCtxParams->srcSizeHint;
+
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
}
}
@@ -779,6 +802,9 @@ size_t ZSTD_CCtxParams_getParameter(
case ZSTD_c_targetCBlockSize :
*value = (int)CCtxParams->targetCBlockSize;
break;
+ case ZSTD_c_srcSizeHint :
+ *value = (int)CCtxParams->srcSizeHint;
+ break;
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
}
return 0;
@@ -1029,7 +1055,11 @@ ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize)
{
- ZSTD_compressionParameters cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize);
+ ZSTD_compressionParameters cParams;
+ if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) {
+ srcSizeHint = CCtxParams->srcSizeHint;
+ }
+ cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize);
if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog;
if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog;
@@ -1049,10 +1079,19 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
size_t const hSize = ((size_t)1) << cParams->hashLog;
U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
- size_t const h3Size = ((size_t)1) << hashLog3;
- size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
- size_t const optPotentialSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits)) * sizeof(U32)
- + (ZSTD_OPT_NUM+1) * (sizeof(ZSTD_match_t)+sizeof(ZSTD_optimal_t));
+ size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
+ /* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't
+ * surrounded by redzones in ASAN. */
+ size_t const tableSpace = chainSize * sizeof(U32)
+ + hSize * sizeof(U32)
+ + h3Size * sizeof(U32);
+ size_t const optPotentialSpace =
+ ZSTD_cwksp_alloc_size((MaxML+1) * sizeof(U32))
+ + ZSTD_cwksp_alloc_size((MaxLL+1) * sizeof(U32))
+ + ZSTD_cwksp_alloc_size((MaxOff+1) * sizeof(U32))
+ + ZSTD_cwksp_alloc_size((1<<Litbits) * sizeof(U32))
+ + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
+ + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
? optPotentialSpace
: 0;
@@ -1069,20 +1108,23 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
U32 const divider = (cParams.minMatch==3) ? 3 : 4;
size_t const maxNbSeq = blockSize / divider;
- size_t const tokenSpace = WILDCOPY_OVERLENGTH + blockSize + 11*maxNbSeq;
- size_t const entropySpace = HUF_WORKSPACE_SIZE;
- size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t);
+ size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
+ + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef))
+ + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
+ size_t const entropySpace = ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE);
+ size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 1);
size_t const ldmSpace = ZSTD_ldm_getTableSize(params->ldmParams);
- size_t const ldmSeqSpace = ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize) * sizeof(rawSeq);
+ size_t const ldmSeqSpace = ZSTD_cwksp_alloc_size(ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize) * sizeof(rawSeq));
size_t const neededSpace = entropySpace + blockStateSpace + tokenSpace +
matchStateSize + ldmSpace + ldmSeqSpace;
+ size_t const cctxSpace = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx));
- DEBUGLOG(5, "sizeof(ZSTD_CCtx) : %u", (U32)sizeof(ZSTD_CCtx));
- DEBUGLOG(5, "estimate workSpace : %u", (U32)neededSpace);
- return sizeof(ZSTD_CCtx) + neededSpace;
+ DEBUGLOG(5, "sizeof(ZSTD_CCtx) : %u", (U32)cctxSpace);
+ DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace);
+ return cctxSpace + neededSpace;
}
}
@@ -1118,7 +1160,8 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
size_t const inBuffSize = ((size_t)1 << cParams.windowLog) + blockSize;
size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1;
- size_t const streamingSize = inBuffSize + outBuffSize;
+ size_t const streamingSize = ZSTD_cwksp_alloc_size(inBuffSize)
+ + ZSTD_cwksp_alloc_size(outBuffSize);
return CCtxSize + streamingSize;
}
@@ -1186,17 +1229,6 @@ size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx)
return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */
}
-
-
-static U32 ZSTD_equivalentCParams(ZSTD_compressionParameters cParams1,
- ZSTD_compressionParameters cParams2)
-{
- return (cParams1.hashLog == cParams2.hashLog)
- & (cParams1.chainLog == cParams2.chainLog)
- & (cParams1.strategy == cParams2.strategy) /* opt parser space */
- & ((cParams1.minMatch==3) == (cParams2.minMatch==3)); /* hashlog3 space */
-}
-
static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1,
ZSTD_compressionParameters cParams2)
{
@@ -1211,71 +1243,6 @@ static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1,
assert(cParams1.strategy == cParams2.strategy);
}
-/** The parameters are equivalent if ldm is not enabled in both sets or
- * all the parameters are equivalent. */
-static U32 ZSTD_equivalentLdmParams(ldmParams_t ldmParams1,
- ldmParams_t ldmParams2)
-{
- return (!ldmParams1.enableLdm && !ldmParams2.enableLdm) ||
- (ldmParams1.enableLdm == ldmParams2.enableLdm &&
- ldmParams1.hashLog == ldmParams2.hashLog &&
- ldmParams1.bucketSizeLog == ldmParams2.bucketSizeLog &&
- ldmParams1.minMatchLength == ldmParams2.minMatchLength &&
- ldmParams1.hashRateLog == ldmParams2.hashRateLog);
-}
-
-typedef enum { ZSTDb_not_buffered, ZSTDb_buffered } ZSTD_buffered_policy_e;
-
-/* ZSTD_sufficientBuff() :
- * check internal buffers exist for streaming if buffPol == ZSTDb_buffered .
- * Note : they are assumed to be correctly sized if ZSTD_equivalentCParams()==1 */
-static U32 ZSTD_sufficientBuff(size_t bufferSize1, size_t maxNbSeq1,
- size_t maxNbLit1,
- ZSTD_buffered_policy_e buffPol2,
- ZSTD_compressionParameters cParams2,
- U64 pledgedSrcSize)
-{
- size_t const windowSize2 = MAX(1, (size_t)MIN(((U64)1 << cParams2.windowLog), pledgedSrcSize));
- size_t const blockSize2 = MIN(ZSTD_BLOCKSIZE_MAX, windowSize2);
- size_t const maxNbSeq2 = blockSize2 / ((cParams2.minMatch == 3) ? 3 : 4);
- size_t const maxNbLit2 = blockSize2;
- size_t const neededBufferSize2 = (buffPol2==ZSTDb_buffered) ? windowSize2 + blockSize2 : 0;
- DEBUGLOG(4, "ZSTD_sufficientBuff: is neededBufferSize2=%u <= bufferSize1=%u",
- (U32)neededBufferSize2, (U32)bufferSize1);
- DEBUGLOG(4, "ZSTD_sufficientBuff: is maxNbSeq2=%u <= maxNbSeq1=%u",
- (U32)maxNbSeq2, (U32)maxNbSeq1);
- DEBUGLOG(4, "ZSTD_sufficientBuff: is maxNbLit2=%u <= maxNbLit1=%u",
- (U32)maxNbLit2, (U32)maxNbLit1);
- return (maxNbLit2 <= maxNbLit1)
- & (maxNbSeq2 <= maxNbSeq1)
- & (neededBufferSize2 <= bufferSize1);
-}
-
-/** Equivalence for resetCCtx purposes */
-static U32 ZSTD_equivalentParams(ZSTD_CCtx_params params1,
- ZSTD_CCtx_params params2,
- size_t buffSize1,
- size_t maxNbSeq1, size_t maxNbLit1,
- ZSTD_buffered_policy_e buffPol2,
- U64 pledgedSrcSize)
-{
- DEBUGLOG(4, "ZSTD_equivalentParams: pledgedSrcSize=%u", (U32)pledgedSrcSize);
- if (!ZSTD_equivalentCParams(params1.cParams, params2.cParams)) {
- DEBUGLOG(4, "ZSTD_equivalentCParams() == 0");
- return 0;
- }
- if (!ZSTD_equivalentLdmParams(params1.ldmParams, params2.ldmParams)) {
- DEBUGLOG(4, "ZSTD_equivalentLdmParams() == 0");
- return 0;
- }
- if (!ZSTD_sufficientBuff(buffSize1, maxNbSeq1, maxNbLit1, buffPol2,
- params2.cParams, pledgedSrcSize)) {
- DEBUGLOG(4, "ZSTD_sufficientBuff() == 0");
- return 0;
- }
- return 1;
-}
-
static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
{
int i;
@@ -1301,87 +1268,104 @@ static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms)
ms->dictMatchState = NULL;
}
-/*! ZSTD_continueCCtx() :
- * reuse CCtx without reset (note : requires no dictionary) */
-static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pledgedSrcSize)
-{
- size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize));
- size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
- DEBUGLOG(4, "ZSTD_continueCCtx: re-use context in place");
+/**
+ * Indicates whether this compression proceeds directly from user-provided
+ * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
+ * whether the context needs to buffer the input/output (ZSTDb_buffered).
+ */
+typedef enum {
+ ZSTDb_not_buffered,
+ ZSTDb_buffered
+} ZSTD_buffered_policy_e;
- cctx->blockSize = blockSize; /* previous block size could be different even for same windowLog, due to pledgedSrcSize */
- cctx->appliedParams = params;
- cctx->blockState.matchState.cParams = params.cParams;
- cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
- cctx->consumedSrcSize = 0;
- cctx->producedCSize = 0;
- if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
- cctx->appliedParams.fParams.contentSizeFlag = 0;
- DEBUGLOG(4, "pledged content size : %u ; flag : %u",
- (U32)pledgedSrcSize, cctx->appliedParams.fParams.contentSizeFlag);
- cctx->stage = ZSTDcs_init;
- cctx->dictID = 0;
- if (params.ldmParams.enableLdm)
- ZSTD_window_clear(&cctx->ldmState.window);
- ZSTD_referenceExternalSequences(cctx, NULL, 0);
- ZSTD_invalidateMatchState(&cctx->blockState.matchState);
- ZSTD_reset_compressedBlockState(cctx->blockState.prevCBlock);
- XXH64_reset(&cctx->xxhState, 0);
- return 0;
-}
+/**
+ * Controls, for this matchState reset, whether the tables need to be cleared /
+ * prepared for the coming compression (ZSTDcrp_makeClean), or whether the
+ * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a
+ * subsequent operation will overwrite the table space anyways (e.g., copying
+ * the matchState contents in from a CDict).
+ */
+typedef enum {
+ ZSTDcrp_makeClean,
+ ZSTDcrp_leaveDirty
+} ZSTD_compResetPolicy_e;
-typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e;
+/**
+ * Controls, for this matchState reset, whether indexing can continue where it
+ * left off (ZSTDirp_continue), or whether it needs to be restarted from zero
+ * (ZSTDirp_reset).
+ */
+typedef enum {
+ ZSTDirp_continue,
+ ZSTDirp_reset
+} ZSTD_indexResetPolicy_e;
-typedef enum { ZSTD_resetTarget_CDict, ZSTD_resetTarget_CCtx } ZSTD_resetTarget_e;
+typedef enum {
+ ZSTD_resetTarget_CDict,
+ ZSTD_resetTarget_CCtx
+} ZSTD_resetTarget_e;
-static void*
+static size_t
ZSTD_reset_matchState(ZSTD_matchState_t* ms,
- void* ptr,
+ ZSTD_cwksp* ws,
const ZSTD_compressionParameters* cParams,
- ZSTD_compResetPolicy_e const crp, ZSTD_resetTarget_e const forWho)
+ const ZSTD_compResetPolicy_e crp,
+ const ZSTD_indexResetPolicy_e forceResetIndex,
+ const ZSTD_resetTarget_e forWho)
{
size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
size_t const hSize = ((size_t)1) << cParams->hashLog;
U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
- size_t const h3Size = ((size_t)1) << hashLog3;
- size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
-
- assert(((size_t)ptr & 3) == 0);
+ size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
+
+ DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset);
+ if (forceResetIndex == ZSTDirp_reset) {
+ memset(&ms->window, 0, sizeof(ms->window));
+ ms->window.dictLimit = 1; /* start from 1, so that 1st position is valid */
+ ms->window.lowLimit = 1; /* it ensures first and later CCtx usages compress the same */
+ ms->window.nextSrc = ms->window.base + 1; /* see issue #1241 */
+ ZSTD_cwksp_mark_tables_dirty(ws);
+ }
ms->hashLog3 = hashLog3;
- memset(&ms->window, 0, sizeof(ms->window));
- ms->window.dictLimit = 1; /* start from 1, so that 1st position is valid */
- ms->window.lowLimit = 1; /* it ensures first and later CCtx usages compress the same */
- ms->window.nextSrc = ms->window.base + 1; /* see issue #1241 */
+
ZSTD_invalidateMatchState(ms);
+ assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */
+
+ ZSTD_cwksp_clear_tables(ws);
+
+ DEBUGLOG(5, "reserving table space");
+ /* table Space */
+ ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32));
+ ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32));
+ ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32));
+ RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
+ "failed a workspace allocation in ZSTD_reset_matchState");
+
+ DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty);
+ if (crp!=ZSTDcrp_leaveDirty) {
+ /* reset tables only */
+ ZSTD_cwksp_clean_tables(ws);
+ }
+
/* opt parser space */
if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) {
DEBUGLOG(4, "reserving optimal parser space");
- ms->opt.litFreq = (unsigned*)ptr;
- ms->opt.litLengthFreq = ms->opt.litFreq + (1<<Litbits);
- ms->opt.matchLengthFreq = ms->opt.litLengthFreq + (MaxLL+1);
- ms->opt.offCodeFreq = ms->opt.matchLengthFreq + (MaxML+1);
- ptr = ms->opt.offCodeFreq + (MaxOff+1);
- ms->opt.matchTable = (ZSTD_match_t*)ptr;
- ptr = ms->opt.matchTable + ZSTD_OPT_NUM+1;
- ms->opt.priceTable = (ZSTD_optimal_t*)ptr;
- ptr = ms->opt.priceTable + ZSTD_OPT_NUM+1;
+ ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<<Litbits) * sizeof(unsigned));
+ ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned));
+ ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned));
+ ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned));
+ ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t));
+ ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
}
- /* table Space */
- DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_noMemset);
- assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
- if (crp!=ZSTDcrp_noMemset) memset(ptr, 0, tableSpace); /* reset tables only */
- ms->hashTable = (U32*)(ptr);
- ms->chainTable = ms->hashTable + hSize;
- ms->hashTable3 = ms->chainTable + chainSize;
- ptr = ms->hashTable3 + h3Size;
-
ms->cParams = *cParams;
- assert(((size_t)ptr & 3) == 0);
- return ptr;
+ RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
+ "failed a workspace allocation in ZSTD_reset_matchState");
+
+ return 0;
}
/* ZSTD_indexTooCloseToMax() :
@@ -1397,13 +1381,6 @@ static int ZSTD_indexTooCloseToMax(ZSTD_window_t w)
return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN);
}
-#define ZSTD_WORKSPACETOOLARGE_FACTOR 3 /* define "workspace is too large" as this number of times larger than needed */
-#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 /* when workspace is continuously too large
- * during at least this number of times,
- * context's memory usage is considered wasteful,
- * because it's sized to handle a worst case scenario which rarely happens.
- * In which case, resize it down to free some memory */
-
/*! ZSTD_resetCCtx_internal() :
note : `params` are assumed fully validated at this stage */
static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
@@ -1412,30 +1389,12 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
ZSTD_compResetPolicy_e const crp,
ZSTD_buffered_policy_e const zbuff)
{
+ ZSTD_cwksp* const ws = &zc->workspace;
DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u",
(U32)pledgedSrcSize, params.cParams.windowLog);
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
- if (crp == ZSTDcrp_continue) {
- if (ZSTD_equivalentParams(zc->appliedParams, params,
- zc->inBuffSize,
- zc->seqStore.maxNbSeq, zc->seqStore.maxNbLit,
- zbuff, pledgedSrcSize) ) {
- DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> consider continue mode");
- zc->workSpaceOversizedDuration += (zc->workSpaceOversizedDuration > 0); /* if it was too large, it still is */
- if (zc->workSpaceOversizedDuration <= ZSTD_WORKSPACETOOLARGE_MAXDURATION) {
- DEBUGLOG(4, "continue mode confirmed (wLog1=%u, blockSize1=%zu)",
- zc->appliedParams.cParams.windowLog, zc->blockSize);
- if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) {
- /* prefer a reset, faster than a rescale */
- ZSTD_reset_matchState(&zc->blockState.matchState,
- zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32,
- &params.cParams,
- crp, ZSTD_resetTarget_CCtx);
- }
- return ZSTD_continueCCtx(zc, params, pledgedSrcSize);
- } } }
- DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx");
+ zc->isFirstBlock = 1;
if (params.ldmParams.enableLdm) {
/* Adjust long distance matching parameters */
@@ -1449,58 +1408,74 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
U32 const divider = (params.cParams.minMatch==3) ? 3 : 4;
size_t const maxNbSeq = blockSize / divider;
- size_t const tokenSpace = WILDCOPY_OVERLENGTH + blockSize + 11*maxNbSeq;
+ size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
+ + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef))
+ + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0;
size_t const buffInSize = (zbuff==ZSTDb_buffered) ? windowSize + blockSize : 0;
size_t const matchStateSize = ZSTD_sizeof_matchState(&params.cParams, /* forCCtx */ 1);
size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize);
- void* ptr; /* used to partition workSpace */
- /* Check if workSpace is large enough, alloc a new one if needed */
- { size_t const entropySpace = HUF_WORKSPACE_SIZE;
- size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t);
- size_t const bufferSpace = buffInSize + buffOutSize;
+ ZSTD_indexResetPolicy_e needsIndexReset = ZSTDirp_continue;
+
+ if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) {
+ needsIndexReset = ZSTDirp_reset;
+ }
+
+ ZSTD_cwksp_bump_oversized_duration(ws, 0);
+
+ /* Check if workspace is large enough, alloc a new one if needed */
+ { size_t const cctxSpace = zc->staticSize ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0;
+ size_t const entropySpace = ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE);
+ size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
+ size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize) + ZSTD_cwksp_alloc_size(buffOutSize);
size_t const ldmSpace = ZSTD_ldm_getTableSize(params.ldmParams);
- size_t const ldmSeqSpace = maxNbLdmSeq * sizeof(rawSeq);
+ size_t const ldmSeqSpace = ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq));
- size_t const neededSpace = entropySpace + blockStateSpace + ldmSpace +
- ldmSeqSpace + matchStateSize + tokenSpace +
- bufferSpace;
+ size_t const neededSpace =
+ cctxSpace +
+ entropySpace +
+ blockStateSpace +
+ ldmSpace +
+ ldmSeqSpace +
+ matchStateSize +
+ tokenSpace +
+ bufferSpace;
- int const workSpaceTooSmall = zc->workSpaceSize < neededSpace;
- int const workSpaceTooLarge = zc->workSpaceSize > ZSTD_WORKSPACETOOLARGE_FACTOR * neededSpace;
- int const workSpaceWasteful = workSpaceTooLarge && (zc->workSpaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION);
- zc->workSpaceOversizedDuration = workSpaceTooLarge ? zc->workSpaceOversizedDuration+1 : 0;
+ int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace;
+ int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace);
DEBUGLOG(4, "Need %zuKB workspace, including %zuKB for match state, and %zuKB for buffers",
neededSpace>>10, matchStateSize>>10, bufferSpace>>10);
DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
- if (workSpaceTooSmall || workSpaceWasteful) {
- DEBUGLOG(4, "Resize workSpaceSize from %zuKB to %zuKB",
- zc->workSpaceSize >> 10,
+ if (workspaceTooSmall || workspaceWasteful) {
+ DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB",
+ ZSTD_cwksp_sizeof(ws) >> 10,
neededSpace >> 10);
RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize");
- zc->workSpaceSize = 0;
- ZSTD_free(zc->workSpace, zc->customMem);
- zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem);
- RETURN_ERROR_IF(zc->workSpace == NULL, memory_allocation);
- zc->workSpaceSize = neededSpace;
- zc->workSpaceOversizedDuration = 0;
+ needsIndexReset = ZSTDirp_reset;
+ ZSTD_cwksp_free(ws, zc->customMem);
+ FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem));
+
+ DEBUGLOG(5, "reserving object space");
/* Statically sized space.
* entropyWorkspace never moves,
* though prev/next block swap places */
- assert(((size_t)zc->workSpace & 3) == 0); /* ensure correct alignment */
- assert(zc->workSpaceSize >= 2 * sizeof(ZSTD_compressedBlockState_t));
- zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)zc->workSpace;
- zc->blockState.nextCBlock = zc->blockState.prevCBlock + 1;
- ptr = zc->blockState.nextCBlock + 1;
- zc->entropyWorkspace = (U32*)ptr;
+ assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t)));
+ zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
+ RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock");
+ zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
+ RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock");
+ zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, HUF_WORKSPACE_SIZE);
+ RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate entropyWorkspace");
} }
+ ZSTD_cwksp_clear(ws);
+
/* init params */
zc->appliedParams = params;
zc->blockState.matchState.cParams = params.cParams;
@@ -1519,58 +1494,58 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock);
- ptr = ZSTD_reset_matchState(&zc->blockState.matchState,
- zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32,
- &params.cParams,
- crp, ZSTD_resetTarget_CCtx);
-
- /* ldm hash table */
- /* initialize bucketOffsets table later for pointer alignment */
- if (params.ldmParams.enableLdm) {
- size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog;
- memset(ptr, 0, ldmHSize * sizeof(ldmEntry_t));
- assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
- zc->ldmState.hashTable = (ldmEntry_t*)ptr;
- ptr = zc->ldmState.hashTable + ldmHSize;
- zc->ldmSequences = (rawSeq*)ptr;
- ptr = zc->ldmSequences + maxNbLdmSeq;
- zc->maxNbLdmSequences = maxNbLdmSeq;
-
- memset(&zc->ldmState.window, 0, sizeof(zc->ldmState.window));
- }
- assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
-
- /* sequences storage */
- zc->seqStore.maxNbSeq = maxNbSeq;
- zc->seqStore.sequencesStart = (seqDef*)ptr;
- ptr = zc->seqStore.sequencesStart + maxNbSeq;
- zc->seqStore.llCode = (BYTE*) ptr;
- zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq;
- zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq;
- zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq;
/* ZSTD_wildcopy() is used to copy into the literals buffer,
* so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes.
*/
+ zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH);
zc->seqStore.maxNbLit = blockSize;
- ptr = zc->seqStore.litStart + blockSize + WILDCOPY_OVERLENGTH;
+
+ /* buffers */
+ zc->inBuffSize = buffInSize;
+ zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize);
+ zc->outBuffSize = buffOutSize;
+ zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize);
/* ldm bucketOffsets table */
if (params.ldmParams.enableLdm) {
+ /* TODO: avoid memset? */
size_t const ldmBucketSize =
((size_t)1) << (params.ldmParams.hashLog -
params.ldmParams.bucketSizeLog);
- memset(ptr, 0, ldmBucketSize);
- zc->ldmState.bucketOffsets = (BYTE*)ptr;
- ptr = zc->ldmState.bucketOffsets + ldmBucketSize;
- ZSTD_window_clear(&zc->ldmState.window);
+ zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, ldmBucketSize);
+ memset(zc->ldmState.bucketOffsets, 0, ldmBucketSize);
}
+
+ /* sequences storage */
ZSTD_referenceExternalSequences(zc, NULL, 0);
+ zc->seqStore.maxNbSeq = maxNbSeq;
+ zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
+ zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
+ zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
+ zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));
+
+ FORWARD_IF_ERROR(ZSTD_reset_matchState(
+ &zc->blockState.matchState,
+ ws,
+ &params.cParams,
+ crp,
+ needsIndexReset,
+ ZSTD_resetTarget_CCtx));
- /* buffers */
- zc->inBuffSize = buffInSize;
- zc->inBuff = (char*)ptr;
- zc->outBuffSize = buffOutSize;
- zc->outBuff = zc->inBuff + buffInSize;
+ /* ldm hash table */
+ if (params.ldmParams.enableLdm) {
+ /* TODO: avoid memset? */
+ size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog;
+ zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
+ memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
+ zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
+ zc->maxNbLdmSequences = maxNbLdmSeq;
+
+ memset(&zc->ldmState.window, 0, sizeof(zc->ldmState.window));
+ ZSTD_window_clear(&zc->ldmState.window);
+ }
+
+ DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
return 0;
}
@@ -1604,15 +1579,15 @@ static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = {
};
static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict,
- ZSTD_CCtx_params params,
+ const ZSTD_CCtx_params* params,
U64 pledgedSrcSize)
{
size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy];
return ( pledgedSrcSize <= cutoff
|| pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
- || params.attachDictPref == ZSTD_dictForceAttach )
- && params.attachDictPref != ZSTD_dictForceCopy
- && !params.forceWindow; /* dictMatchState isn't correctly
+ || params->attachDictPref == ZSTD_dictForceAttach )
+ && params->attachDictPref != ZSTD_dictForceCopy
+ && !params->forceWindow; /* dictMatchState isn't correctly
* handled in _enforceMaxDist */
}
@@ -1630,8 +1605,8 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
* has its own tables. */
params.cParams = ZSTD_adjustCParams_internal(*cdict_cParams, pledgedSrcSize, 0);
params.cParams.windowLog = windowLog;
- ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
- ZSTDcrp_continue, zbuff);
+ FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
+ ZSTDcrp_makeClean, zbuff));
assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
}
@@ -1679,30 +1654,36 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
/* Copy only compression parameters related to tables. */
params.cParams = *cdict_cParams;
params.cParams.windowLog = windowLog;
- ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
- ZSTDcrp_noMemset, zbuff);
+ FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
+ ZSTDcrp_leaveDirty, zbuff));
assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog);
assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog);
}
+ ZSTD_cwksp_mark_tables_dirty(&cctx->workspace);
+
/* copy tables */
{ size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog);
size_t const hSize = (size_t)1 << cdict_cParams->hashLog;
- size_t const tableSpace = (chainSize + hSize) * sizeof(U32);
- assert((U32*)cctx->blockState.matchState.chainTable == (U32*)cctx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */
- assert((U32*)cctx->blockState.matchState.hashTable3 == (U32*)cctx->blockState.matchState.chainTable + chainSize);
- assert((U32*)cdict->matchState.chainTable == (U32*)cdict->matchState.hashTable + hSize); /* chainTable must follow hashTable */
- assert((U32*)cdict->matchState.hashTable3 == (U32*)cdict->matchState.chainTable + chainSize);
- memcpy(cctx->blockState.matchState.hashTable, cdict->matchState.hashTable, tableSpace); /* presumes all tables follow each other */
+
+ memcpy(cctx->blockState.matchState.hashTable,
+ cdict->matchState.hashTable,
+ hSize * sizeof(U32));
+ memcpy(cctx->blockState.matchState.chainTable,
+ cdict->matchState.chainTable,
+ chainSize * sizeof(U32));
}
/* Zero the hashTable3, since the cdict never fills it */
- { size_t const h3Size = (size_t)1 << cctx->blockState.matchState.hashLog3;
+ { int const h3log = cctx->blockState.matchState.hashLog3;
+ size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
assert(cdict->matchState.hashLog3 == 0);
memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32));
}
+ ZSTD_cwksp_mark_tables_clean(&cctx->workspace);
+
/* copy dictionary offsets */
{ ZSTD_matchState_t const* srcMatchState = &cdict->matchState;
ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState;
@@ -1724,7 +1705,7 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
* in-place. We decide here which strategy to use. */
static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx,
const ZSTD_CDict* cdict,
- ZSTD_CCtx_params params,
+ const ZSTD_CCtx_params* params,
U64 pledgedSrcSize,
ZSTD_buffered_policy_e zbuff)
{
@@ -1734,10 +1715,10 @@ static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx,
if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) {
return ZSTD_resetCCtx_byAttachingCDict(
- cctx, cdict, params, pledgedSrcSize, zbuff);
+ cctx, cdict, *params, pledgedSrcSize, zbuff);
} else {
return ZSTD_resetCCtx_byCopyingCDict(
- cctx, cdict, params, pledgedSrcSize, zbuff);
+ cctx, cdict, *params, pledgedSrcSize, zbuff);
}
}
@@ -1763,7 +1744,7 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
params.cParams = srcCCtx->appliedParams.cParams;
params.fParams = fParams;
ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize,
- ZSTDcrp_noMemset, zbuff);
+ ZSTDcrp_leaveDirty, zbuff);
assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog);
assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy);
assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog);
@@ -1771,16 +1752,27 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3);
}
+ ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace);
+
/* copy tables */
{ size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog);
size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog;
- size_t const h3Size = (size_t)1 << srcCCtx->blockState.matchState.hashLog3;
- size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
- assert((U32*)dstCCtx->blockState.matchState.chainTable == (U32*)dstCCtx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */
- assert((U32*)dstCCtx->blockState.matchState.hashTable3 == (U32*)dstCCtx->blockState.matchState.chainTable + chainSize);
- memcpy(dstCCtx->blockState.matchState.hashTable, srcCCtx->blockState.matchState.hashTable, tableSpace); /* presumes all tables follow each other */
+ int const h3log = srcCCtx->blockState.matchState.hashLog3;
+ size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
+
+ memcpy(dstCCtx->blockState.matchState.hashTable,
+ srcCCtx->blockState.matchState.hashTable,
+ hSize * sizeof(U32));
+ memcpy(dstCCtx->blockState.matchState.chainTable,
+ srcCCtx->blockState.matchState.chainTable,
+ chainSize * sizeof(U32));
+ memcpy(dstCCtx->blockState.matchState.hashTable3,
+ srcCCtx->blockState.matchState.hashTable3,
+ h3Size * sizeof(U32));
}
+ ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace);
+
/* copy dictionary offsets */
{
const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState;
@@ -1831,6 +1823,20 @@ ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerVa
int rowNb;
assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */
assert(size < (1U<<31)); /* can be casted to int */
+
+#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
+ /* To validate that the table re-use logic is sound, and that we don't
+ * access table space that we haven't cleaned, we re-"poison" the table
+ * space every time we mark it dirty.
+ *
+ * This function however is intended to operate on those dirty tables and
+ * re-clean them. So when this function is used correctly, we can unpoison
+ * the memory it operated on. This introduces a blind spot though, since
+ * if we now try to operate on __actually__ poisoned memory, we will not
+ * detect that. */
+ __msan_unpoison(table, size * sizeof(U32));
+#endif
+
for (rowNb=0 ; rowNb < nbRows ; rowNb++) {
int column;
for (column=0; column<ZSTD_ROWSIZE; column++) {
@@ -1938,7 +1944,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
ZSTD_entropyCTables_t* nextEntropy,
const ZSTD_CCtx_params* cctxParams,
void* dst, size_t dstCapacity,
- void* workspace, size_t wkspSize,
+ void* entropyWorkspace, size_t entropyWkspSize,
const int bmi2)
{
const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
@@ -1955,7 +1961,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
BYTE* const ostart = (BYTE*)dst;
BYTE* const oend = ostart + dstCapacity;
BYTE* op = ostart;
- size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
+ size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
BYTE* seqHead;
BYTE* lastNCount = NULL;
@@ -1964,14 +1970,14 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
/* Compress literals */
{ const BYTE* const literals = seqStorePtr->litStart;
- size_t const litSize = seqStorePtr->lit - literals;
+ size_t const litSize = (size_t)(seqStorePtr->lit - literals);
size_t const cSize = ZSTD_compressLiterals(
&prevEntropy->huf, &nextEntropy->huf,
cctxParams->cParams.strategy,
ZSTD_disableLiteralsCompression(cctxParams),
op, dstCapacity,
literals, litSize,
- workspace, wkspSize,
+ entropyWorkspace, entropyWkspSize,
bmi2);
FORWARD_IF_ERROR(cSize);
assert(cSize <= dstCapacity);
@@ -1981,17 +1987,22 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
/* Sequences Header */
RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
dstSize_tooSmall);
- if (nbSeq < 0x7F)
+ if (nbSeq < 128) {
*op++ = (BYTE)nbSeq;
- else if (nbSeq < LONGNBSEQ)
- op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
- else
- op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
+ } else if (nbSeq < LONGNBSEQ) {
+ op[0] = (BYTE)((nbSeq>>8) + 0x80);
+ op[1] = (BYTE)nbSeq;
+ op+=2;
+ } else {
+ op[0]=0xFF;
+ MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ));
+ op+=3;
+ }
assert(op <= oend);
if (nbSeq==0) {
/* Copy the old tables over as if we repeated them */
memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
- return op - ostart;
+ return (size_t)(op - ostart);
}
/* seqHead : flags for FSE encoding type */
@@ -2002,7 +2013,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
ZSTD_seqToCodes(seqStorePtr);
/* build CTable for Literal Lengths */
{ unsigned max = MaxLL;
- size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
+ size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */
DEBUGLOG(5, "Building LL table");
nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode;
LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode,
@@ -2012,10 +2023,14 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
ZSTD_defaultAllowed, strategy);
assert(set_basic < set_compressed && set_rle < set_compressed);
assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
- { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
- count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
- prevEntropy->fse.litlengthCTable, sizeof(prevEntropy->fse.litlengthCTable),
- workspace, wkspSize);
+ { size_t const countSize = ZSTD_buildCTable(
+ op, (size_t)(oend - op),
+ CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
+ count, max, llCodeTable, nbSeq,
+ LL_defaultNorm, LL_defaultNormLog, MaxLL,
+ prevEntropy->fse.litlengthCTable,
+ sizeof(prevEntropy->fse.litlengthCTable),
+ entropyWorkspace, entropyWkspSize);
FORWARD_IF_ERROR(countSize);
if (LLtype == set_compressed)
lastNCount = op;
@@ -2024,7 +2039,8 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
} }
/* build CTable for Offsets */
{ unsigned max = MaxOff;
- size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
+ size_t const mostFrequent = HIST_countFast_wksp(
+ count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */
/* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
DEBUGLOG(5, "Building OF table");
@@ -2035,10 +2051,14 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
OF_defaultNorm, OF_defaultNormLog,
defaultPolicy, strategy);
assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
- { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
- count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
- prevEntropy->fse.offcodeCTable, sizeof(prevEntropy->fse.offcodeCTable),
- workspace, wkspSize);
+ { size_t const countSize = ZSTD_buildCTable(
+ op, (size_t)(oend - op),
+ CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
+ count, max, ofCodeTable, nbSeq,
+ OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
+ prevEntropy->fse.offcodeCTable,
+ sizeof(prevEntropy->fse.offcodeCTable),
+ entropyWorkspace, entropyWkspSize);
FORWARD_IF_ERROR(countSize);
if (Offtype == set_compressed)
lastNCount = op;
@@ -2047,7 +2067,8 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
} }
/* build CTable for MatchLengths */
{ unsigned max = MaxML;
- size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
+ size_t const mostFrequent = HIST_countFast_wksp(
+ count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */
DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode;
MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode,
@@ -2056,10 +2077,14 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
ML_defaultNorm, ML_defaultNormLog,
ZSTD_defaultAllowed, strategy);
assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
- { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
- count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
- prevEntropy->fse.matchlengthCTable, sizeof(prevEntropy->fse.matchlengthCTable),
- workspace, wkspSize);
+ { size_t const countSize = ZSTD_buildCTable(
+ op, (size_t)(oend - op),
+ CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
+ count, max, mlCodeTable, nbSeq,
+ ML_defaultNorm, ML_defaultNormLog, MaxML,
+ prevEntropy->fse.matchlengthCTable,
+ sizeof(prevEntropy->fse.matchlengthCTable),
+ entropyWorkspace, entropyWkspSize);
FORWARD_IF_ERROR(countSize);
if (MLtype == set_compressed)
lastNCount = op;
@@ -2070,7 +2095,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
*seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
{ size_t const bitstreamSize = ZSTD_encodeSequences(
- op, oend - op,
+ op, (size_t)(oend - op),
CTable_MatchLength, mlCodeTable,
CTable_OffsetBits, ofCodeTable,
CTable_LitLength, llCodeTable,
@@ -2097,7 +2122,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
}
DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart));
- return op - ostart;
+ return (size_t)(op - ostart);
}
MEM_STATIC size_t
@@ -2107,13 +2132,13 @@ ZSTD_compressSequences(seqStore_t* seqStorePtr,
const ZSTD_CCtx_params* cctxParams,
void* dst, size_t dstCapacity,
size_t srcSize,
- void* workspace, size_t wkspSize,
+ void* entropyWorkspace, size_t entropyWkspSize,
int bmi2)
{
size_t const cSize = ZSTD_compressSequences_internal(
seqStorePtr, prevEntropy, nextEntropy, cctxParams,
dst, dstCapacity,
- workspace, wkspSize, bmi2);
+ entropyWorkspace, entropyWkspSize, bmi2);
if (cSize == 0) return 0;
/* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
* Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
@@ -2264,19 +2289,113 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
return ZSTDbss_compress;
}
+static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
+{
+ const seqStore_t* seqStore = ZSTD_getSeqStore(zc);
+ const seqDef* seqs = seqStore->sequencesStart;
+ size_t seqsSize = seqStore->sequences - seqs;
+
+ ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex];
+ size_t i; size_t position; int repIdx;
+
+ assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences);
+ for (i = 0, position = 0; i < seqsSize; ++i) {
+ outSeqs[i].offset = seqs[i].offset;
+ outSeqs[i].litLength = seqs[i].litLength;
+ outSeqs[i].matchLength = seqs[i].matchLength + MINMATCH;
+
+ if (i == seqStore->longLengthPos) {
+ if (seqStore->longLengthID == 1) {
+ outSeqs[i].litLength += 0x10000;
+ } else if (seqStore->longLengthID == 2) {
+ outSeqs[i].matchLength += 0x10000;
+ }
+ }
+
+ if (outSeqs[i].offset <= ZSTD_REP_NUM) {
+ outSeqs[i].rep = outSeqs[i].offset;
+ repIdx = (unsigned int)i - outSeqs[i].offset;
+
+ if (outSeqs[i].litLength == 0) {
+ if (outSeqs[i].offset < 3) {
+ --repIdx;
+ } else {
+ repIdx = (unsigned int)i - 1;
+ }
+ ++outSeqs[i].rep;
+ }
+ assert(repIdx >= -3);
+ outSeqs[i].offset = repIdx >= 0 ? outSeqs[repIdx].offset : repStartValue[-repIdx - 1];
+ if (outSeqs[i].rep == 4) {
+ --outSeqs[i].offset;
+ }
+ } else {
+ outSeqs[i].offset -= ZSTD_REP_NUM;
+ }
+
+ position += outSeqs[i].litLength;
+ outSeqs[i].matchPos = (unsigned int)position;
+ position += outSeqs[i].matchLength;
+ }
+ zc->seqCollector.seqIndex += seqsSize;
+}
+
+size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
+ size_t outSeqsSize, const void* src, size_t srcSize)
+{
+ const size_t dstCapacity = ZSTD_compressBound(srcSize);
+ void* dst = ZSTD_malloc(dstCapacity, ZSTD_defaultCMem);
+ SeqCollector seqCollector;
+
+ RETURN_ERROR_IF(dst == NULL, memory_allocation);
+
+ seqCollector.collectSequences = 1;
+ seqCollector.seqStart = outSeqs;
+ seqCollector.seqIndex = 0;
+ seqCollector.maxSequences = outSeqsSize;
+ zc->seqCollector = seqCollector;
+
+ ZSTD_compress2(zc, dst, dstCapacity, src, srcSize);
+ ZSTD_free(dst, ZSTD_defaultCMem);
+ return zc->seqCollector.seqIndex;
+}
+
+/* Returns true if the given block is a RLE block */
+static int ZSTD_isRLE(const BYTE *ip, size_t length) {
+ size_t i;
+ if (length < 2) return 1;
+ for (i = 1; i < length; ++i) {
+ if (ip[0] != ip[i]) return 0;
+ }
+ return 1;
+}
+
static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
void* dst, size_t dstCapacity,
- const void* src, size_t srcSize)
+ const void* src, size_t srcSize, U32 frame)
{
+ /* This the upper bound for the length of an rle block.
+ * This isn't the actual upper bound. Finding the real threshold
+ * needs further investigation.
+ */
+ const U32 rleMaxLength = 25;
size_t cSize;
+ const BYTE* ip = (const BYTE*)src;
+ BYTE* op = (BYTE*)dst;
DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
- (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate);
+ (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
+ (unsigned)zc->blockState.matchState.nextToUpdate);
{ const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
FORWARD_IF_ERROR(bss);
if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; }
}
+ if (zc->seqCollector.collectSequences) {
+ ZSTD_copyBlockSequences(zc);
+ return 0;
+ }
+
/* encode sequences and literals */
cSize = ZSTD_compressSequences(&zc->seqStore,
&zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
@@ -2286,8 +2405,21 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
zc->bmi2);
+ if (frame &&
+ /* We don't want to emit our first block as a RLE even if it qualifies because
+ * doing so will cause the decoder (cli only) to throw a "should consume all input error."
+ * This is only an issue for zstd <= v1.4.3
+ */
+ !zc->isFirstBlock &&
+ cSize < rleMaxLength &&
+ ZSTD_isRLE(ip, srcSize))
+ {
+ cSize = 1;
+ op[0] = ip[0];
+ }
+
out:
- if (!ZSTD_isError(cSize) && cSize != 0) {
+ if (!ZSTD_isError(cSize) && cSize > 1) {
/* confirm repcodes and entropy tables when emitting a compressed block */
ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
zc->blockState.prevCBlock = zc->blockState.nextCBlock;
@@ -2304,7 +2436,11 @@ out:
}
-static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, void const* ip, void const* iend)
+static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,
+ ZSTD_cwksp* ws,
+ ZSTD_CCtx_params const* params,
+ void const* ip,
+ void const* iend)
{
if (ZSTD_window_needOverflowCorrection(ms->window, iend)) {
U32 const maxDist = (U32)1 << params->cParams.windowLog;
@@ -2313,7 +2449,9 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, ZSTD_CCtx_params
ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
+ ZSTD_cwksp_mark_tables_dirty(ws);
ZSTD_reduceIndex(ms, params, correction);
+ ZSTD_cwksp_mark_tables_clean(ws);
if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
else ms->nextToUpdate -= correction;
/* invalidate dictionaries on overflow correction */
@@ -2322,7 +2460,6 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, ZSTD_CCtx_params
}
}
-
/*! ZSTD_compress_frameChunk() :
* Compress a chunk of data into one or multiple blocks.
* All blocks will be terminated, all input will be consumed.
@@ -2356,7 +2493,8 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
"not enough space to store compressed block");
if (remaining < blockSize) blockSize = remaining;
- ZSTD_overflowCorrectIfNeeded(ms, &cctx->appliedParams, ip, ip + blockSize);
+ ZSTD_overflowCorrectIfNeeded(
+ ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize);
ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
/* Ensure hash/chain table insertion resumes no sooner than lowlimit */
@@ -2364,15 +2502,16 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
{ size_t cSize = ZSTD_compressBlock_internal(cctx,
op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
- ip, blockSize);
+ ip, blockSize, 1 /* frame */);
FORWARD_IF_ERROR(cSize);
-
if (cSize == 0) { /* block is not compressible */
cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
FORWARD_IF_ERROR(cSize);
} else {
- U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
- MEM_writeLE24(op, cBlockHeader24);
+ const U32 cBlockHeader = cSize == 1 ?
+ lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
+ lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
+ MEM_writeLE24(op, cBlockHeader);
cSize += ZSTD_blockHeaderSize;
}
@@ -2382,6 +2521,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
op += cSize;
assert(dstCapacity >= cSize);
dstCapacity -= cSize;
+ cctx->isFirstBlock = 0;
DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u",
(unsigned)cSize);
} }
@@ -2392,25 +2532,25 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
- ZSTD_CCtx_params params, U64 pledgedSrcSize, U32 dictID)
+ const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID)
{ BYTE* const op = (BYTE*)dst;
U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */
- U32 const dictIDSizeCode = params.fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */
- U32 const checksumFlag = params.fParams.checksumFlag>0;
- U32 const windowSize = (U32)1 << params.cParams.windowLog;
- U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
- BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
- U32 const fcsCode = params.fParams.contentSizeFlag ?
+ U32 const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */
+ U32 const checksumFlag = params->fParams.checksumFlag>0;
+ U32 const windowSize = (U32)1 << params->cParams.windowLog;
+ U32 const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
+ BYTE const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
+ U32 const fcsCode = params->fParams.contentSizeFlag ?
(pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */
BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) );
size_t pos=0;
- assert(!(params.fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN));
+ assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN));
RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall);
DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",
- !params.fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode);
+ !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode);
- if (params.format == ZSTD_f_zstd1) {
+ if (params->format == ZSTD_f_zstd1) {
MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
pos = 4;
}
@@ -2476,7 +2616,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
"missing init (ZSTD_compressBegin)");
if (frame && (cctx->stage==ZSTDcs_init)) {
- fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams,
+ fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams,
cctx->pledgedSrcSizePlusOne-1, cctx->dictID);
FORWARD_IF_ERROR(fhSize);
assert(fhSize <= dstCapacity);
@@ -2496,13 +2636,15 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
if (!frame) {
/* overflow check and correction for block mode */
- ZSTD_overflowCorrectIfNeeded(ms, &cctx->appliedParams, src, (BYTE const*)src + srcSize);
+ ZSTD_overflowCorrectIfNeeded(
+ ms, &cctx->workspace, &cctx->appliedParams,
+ src, (BYTE const*)src + srcSize);
}
DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize);
{ size_t const cSize = frame ?
ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
- ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize);
+ ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */);
FORWARD_IF_ERROR(cSize);
cctx->consumedSrcSize += srcSize;
cctx->producedCSize += (cSize + fhSize);
@@ -2538,8 +2680,9 @@ size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
- size_t const blockSizeMax = ZSTD_getBlockSize(cctx);
- RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong);
+ DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize);
+ { size_t const blockSizeMax = ZSTD_getBlockSize(cctx);
+ RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong); }
return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);
}
@@ -2548,6 +2691,7 @@ size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const
* @return : 0, or an error code
*/
static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
+ ZSTD_cwksp* ws,
ZSTD_CCtx_params const* params,
const void* src, size_t srcSize,
ZSTD_dictTableLoadMethod_e dtlm)
@@ -2564,11 +2708,11 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
if (srcSize <= HASH_READ_SIZE) return 0;
while (iend - ip > HASH_READ_SIZE) {
- size_t const remaining = iend - ip;
+ size_t const remaining = (size_t)(iend - ip);
size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX);
const BYTE* const ichunk = ip + chunk;
- ZSTD_overflowCorrectIfNeeded(ms, params, ip, ichunk);
+ ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk);
switch(params->cParams.strategy)
{
@@ -2627,10 +2771,11 @@ static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSym
/*! ZSTD_loadZstdDictionary() :
* @return : dictID, or an error code
* assumptions : magic number supposed already checked
- * dictSize supposed > 8
+ * dictSize supposed >= 8
*/
static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
ZSTD_matchState_t* ms,
+ ZSTD_cwksp* ws,
ZSTD_CCtx_params const* params,
const void* dict, size_t dictSize,
ZSTD_dictTableLoadMethod_e dtlm,
@@ -2643,7 +2788,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
size_t dictID;
ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
- assert(dictSize > 8);
+ assert(dictSize >= 8);
assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);
dictPtr += 4; /* skip magic number */
@@ -2726,7 +2871,8 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid;
bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid;
bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid;
- FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(ms, params, dictPtr, dictContentSize, dtlm));
+ FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(
+ ms, ws, params, dictPtr, dictContentSize, dtlm));
return dictID;
}
}
@@ -2736,6 +2882,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
static size_t
ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
ZSTD_matchState_t* ms,
+ ZSTD_cwksp* ws,
const ZSTD_CCtx_params* params,
const void* dict, size_t dictSize,
ZSTD_dictContentType_e dictContentType,
@@ -2743,27 +2890,35 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
void* workspace)
{
DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize);
- if ((dict==NULL) || (dictSize<=8)) return 0;
+ if ((dict==NULL) || (dictSize<8)) {
+ RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong);
+ return 0;
+ }
ZSTD_reset_compressedBlockState(bs);
/* dict restricted modes */
if (dictContentType == ZSTD_dct_rawContent)
- return ZSTD_loadDictionaryContent(ms, params, dict, dictSize, dtlm);
+ return ZSTD_loadDictionaryContent(ms, ws, params, dict, dictSize, dtlm);
if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
if (dictContentType == ZSTD_dct_auto) {
DEBUGLOG(4, "raw content dictionary detected");
- return ZSTD_loadDictionaryContent(ms, params, dict, dictSize, dtlm);
+ return ZSTD_loadDictionaryContent(
+ ms, ws, params, dict, dictSize, dtlm);
}
RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong);
assert(0); /* impossible */
}
/* dict as full zstd dictionary */
- return ZSTD_loadZstdDictionary(bs, ms, params, dict, dictSize, dtlm, workspace);
+ return ZSTD_loadZstdDictionary(
+ bs, ms, ws, params, dict, dictSize, dtlm, workspace);
}
+#define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB)
+#define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6)
+
/*! ZSTD_compressBegin_internal() :
* @return : 0, or an error code */
static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
@@ -2771,23 +2926,34 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
ZSTD_dictContentType_e dictContentType,
ZSTD_dictTableLoadMethod_e dtlm,
const ZSTD_CDict* cdict,
- ZSTD_CCtx_params params, U64 pledgedSrcSize,
+ const ZSTD_CCtx_params* params, U64 pledgedSrcSize,
ZSTD_buffered_policy_e zbuff)
{
- DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params.cParams.windowLog);
+ DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog);
/* params are supposed to be fully validated at this point */
- assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
+ assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
assert(!((dict) && (cdict))); /* either dict or cdict, not both */
-
- if (cdict && cdict->dictContentSize>0) {
+ if ( (cdict)
+ && (cdict->dictContentSize > 0)
+ && ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
+ || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
+ || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
+ || cdict->compressionLevel == 0)
+ && (params->attachDictPref != ZSTD_dictForceLoad) ) {
return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff);
}
- FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
- ZSTDcrp_continue, zbuff) );
- { size_t const dictID = ZSTD_compress_insertDictionary(
- cctx->blockState.prevCBlock, &cctx->blockState.matchState,
- &params, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace);
+ FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize,
+ ZSTDcrp_makeClean, zbuff) );
+ { size_t const dictID = cdict ?
+ ZSTD_compress_insertDictionary(
+ cctx->blockState.prevCBlock, &cctx->blockState.matchState,
+ &cctx->workspace, params, cdict->dictContent, cdict->dictContentSize,
+ dictContentType, dtlm, cctx->entropyWorkspace)
+ : ZSTD_compress_insertDictionary(
+ cctx->blockState.prevCBlock, &cctx->blockState.matchState,
+ &cctx->workspace, params, dict, dictSize,
+ dictContentType, dtlm, cctx->entropyWorkspace);
FORWARD_IF_ERROR(dictID);
assert(dictID <= UINT_MAX);
cctx->dictID = (U32)dictID;
@@ -2800,12 +2966,12 @@ size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
ZSTD_dictContentType_e dictContentType,
ZSTD_dictTableLoadMethod_e dtlm,
const ZSTD_CDict* cdict,
- ZSTD_CCtx_params params,
+ const ZSTD_CCtx_params* params,
unsigned long long pledgedSrcSize)
{
- DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params.cParams.windowLog);
+ DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog);
/* compression parameters verification and optimization */
- FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) );
+ FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) );
return ZSTD_compressBegin_internal(cctx,
dict, dictSize, dictContentType, dtlm,
cdict,
@@ -2820,21 +2986,21 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
ZSTD_parameters params, unsigned long long pledgedSrcSize)
{
ZSTD_CCtx_params const cctxParams =
- ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
+ ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params);
return ZSTD_compressBegin_advanced_internal(cctx,
dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast,
NULL /*cdict*/,
- cctxParams, pledgedSrcSize);
+ &cctxParams, pledgedSrcSize);
}
size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
{
ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
ZSTD_CCtx_params const cctxParams =
- ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
+ ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params);
DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize);
return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
- cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
+ &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
}
size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)
@@ -2857,7 +3023,7 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
/* special case : empty frame */
if (cctx->stage == ZSTDcs_init) {
- fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, 0, 0);
+ fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0);
FORWARD_IF_ERROR(fhSize);
dstCapacity -= fhSize;
op += fhSize;
@@ -2918,13 +3084,13 @@ static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx,
ZSTD_parameters params)
{
ZSTD_CCtx_params const cctxParams =
- ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
+ ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params);
DEBUGLOG(4, "ZSTD_compress_internal");
return ZSTD_compress_advanced_internal(cctx,
dst, dstCapacity,
src, srcSize,
dict, dictSize,
- cctxParams);
+ &cctxParams);
}
size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
@@ -2948,7 +3114,7 @@ size_t ZSTD_compress_advanced_internal(
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const void* dict,size_t dictSize,
- ZSTD_CCtx_params params)
+ const ZSTD_CCtx_params* params)
{
DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize);
FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
@@ -2964,9 +3130,9 @@ size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx,
int compressionLevel)
{
ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize + (!srcSize), dict ? dictSize : 0);
- ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
+ ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params);
assert(params.fParams.contentSizeFlag == 1);
- return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, cctxParams);
+ return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams);
}
size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
@@ -3001,8 +3167,11 @@ size_t ZSTD_estimateCDictSize_advanced(
ZSTD_dictLoadMethod_e dictLoadMethod)
{
DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict));
- return sizeof(ZSTD_CDict) + HUF_WORKSPACE_SIZE + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0)
- + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
+ return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
+ + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
+ + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0)
+ + (dictLoadMethod == ZSTD_dlm_byRef ? 0
+ : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *))));
}
size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel)
@@ -3015,7 +3184,9 @@ size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
{
if (cdict==NULL) return 0; /* support sizeof on NULL */
DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict));
- return cdict->workspaceSize + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict);
+ /* cdict may be in the workspace */
+ return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict))
+ + ZSTD_cwksp_sizeof(&cdict->workspace);
}
static size_t ZSTD_initCDict_internal(
@@ -3029,28 +3200,29 @@ static size_t ZSTD_initCDict_internal(
assert(!ZSTD_checkCParams(cParams));
cdict->matchState.cParams = cParams;
if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {
- cdict->dictBuffer = NULL;
cdict->dictContent = dictBuffer;
} else {
- void* const internalBuffer = ZSTD_malloc(dictSize, cdict->customMem);
- cdict->dictBuffer = internalBuffer;
- cdict->dictContent = internalBuffer;
+ void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*)));
RETURN_ERROR_IF(!internalBuffer, memory_allocation);
+ cdict->dictContent = internalBuffer;
memcpy(internalBuffer, dictBuffer, dictSize);
}
cdict->dictContentSize = dictSize;
+ cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE);
+
+
/* Reset the state to no dictionary */
ZSTD_reset_compressedBlockState(&cdict->cBlockState);
- { void* const end = ZSTD_reset_matchState(&cdict->matchState,
- (U32*)cdict->workspace + HUF_WORKSPACE_SIZE_U32,
- &cParams,
- ZSTDcrp_continue, ZSTD_resetTarget_CDict);
- assert(end == (char*)cdict->workspace + cdict->workspaceSize);
- (void)end;
- }
+ FORWARD_IF_ERROR(ZSTD_reset_matchState(
+ &cdict->matchState,
+ &cdict->workspace,
+ &cParams,
+ ZSTDcrp_makeClean,
+ ZSTDirp_reset,
+ ZSTD_resetTarget_CDict));
/* (Maybe) load the dictionary
- * Skips loading the dictionary if it is <= 8 bytes.
+ * Skips loading the dictionary if it is < 8 bytes.
*/
{ ZSTD_CCtx_params params;
memset(&params, 0, sizeof(params));
@@ -3058,9 +3230,9 @@ static size_t ZSTD_initCDict_internal(
params.fParams.contentSizeFlag = 1;
params.cParams = cParams;
{ size_t const dictID = ZSTD_compress_insertDictionary(
- &cdict->cBlockState, &cdict->matchState, &params,
- cdict->dictContent, cdict->dictContentSize,
- dictContentType, ZSTD_dtlm_full, cdict->workspace);
+ &cdict->cBlockState, &cdict->matchState, &cdict->workspace,
+ &params, cdict->dictContent, cdict->dictContentSize,
+ dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace);
FORWARD_IF_ERROR(dictID);
assert(dictID <= (size_t)(U32)-1);
cdict->dictID = (U32)dictID;
@@ -3078,18 +3250,29 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
DEBUGLOG(3, "ZSTD_createCDict_advanced, mode %u", (unsigned)dictContentType);
if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
- { ZSTD_CDict* const cdict = (ZSTD_CDict*)ZSTD_malloc(sizeof(ZSTD_CDict), customMem);
- size_t const workspaceSize = HUF_WORKSPACE_SIZE + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0);
+ { size_t const workspaceSize =
+ ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) +
+ ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) +
+ ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) +
+ (dictLoadMethod == ZSTD_dlm_byRef ? 0
+ : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))));
void* const workspace = ZSTD_malloc(workspaceSize, customMem);
+ ZSTD_cwksp ws;
+ ZSTD_CDict* cdict;
- if (!cdict || !workspace) {
- ZSTD_free(cdict, customMem);
+ if (!workspace) {
ZSTD_free(workspace, customMem);
return NULL;
}
+
+ ZSTD_cwksp_init(&ws, workspace, workspaceSize);
+
+ cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));
+ assert(cdict != NULL);
+ ZSTD_cwksp_move(&cdict->workspace, &ws);
cdict->customMem = customMem;
- cdict->workspace = workspace;
- cdict->workspaceSize = workspaceSize;
+ cdict->compressionLevel = 0; /* signals advanced API usage */
+
if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
dictBuffer, dictSize,
dictLoadMethod, dictContentType,
@@ -3105,9 +3288,12 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)
{
ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
- return ZSTD_createCDict_advanced(dict, dictSize,
- ZSTD_dlm_byCopy, ZSTD_dct_auto,
- cParams, ZSTD_defaultCMem);
+ ZSTD_CDict* cdict = ZSTD_createCDict_advanced(dict, dictSize,
+ ZSTD_dlm_byCopy, ZSTD_dct_auto,
+ cParams, ZSTD_defaultCMem);
+ if (cdict)
+ cdict->compressionLevel = compressionLevel == 0 ? ZSTD_CLEVEL_DEFAULT : compressionLevel;
+ return cdict;
}
ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
@@ -3122,9 +3308,11 @@ size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
{
if (cdict==NULL) return 0; /* support free on NULL */
{ ZSTD_customMem const cMem = cdict->customMem;
- ZSTD_free(cdict->workspace, cMem);
- ZSTD_free(cdict->dictBuffer, cMem);
- ZSTD_free(cdict, cMem);
+ int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict);
+ ZSTD_cwksp_free(&cdict->workspace, cMem);
+ if (!cdictInWorkspace) {
+ ZSTD_free(cdict, cMem);
+ }
return 0;
}
}
@@ -3150,28 +3338,30 @@ const ZSTD_CDict* ZSTD_initStaticCDict(
ZSTD_compressionParameters cParams)
{
size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0);
- size_t const neededSize = sizeof(ZSTD_CDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize)
- + HUF_WORKSPACE_SIZE + matchStateSize;
- ZSTD_CDict* const cdict = (ZSTD_CDict*) workspace;
- void* ptr;
+ size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
+ + (dictLoadMethod == ZSTD_dlm_byRef ? 0
+ : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))))
+ + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
+ + matchStateSize;
+ ZSTD_CDict* cdict;
+
if ((size_t)workspace & 7) return NULL; /* 8-aligned */
+
+ {
+ ZSTD_cwksp ws;
+ ZSTD_cwksp_init(&ws, workspace, workspaceSize);
+ cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));
+ if (cdict == NULL) return NULL;
+ ZSTD_cwksp_move(&cdict->workspace, &ws);
+ }
+
DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u",
(unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize));
if (workspaceSize < neededSize) return NULL;
- if (dictLoadMethod == ZSTD_dlm_byCopy) {
- memcpy(cdict+1, dict, dictSize);
- dict = cdict+1;
- ptr = (char*)workspace + sizeof(ZSTD_CDict) + dictSize;
- } else {
- ptr = cdict+1;
- }
- cdict->workspace = ptr;
- cdict->workspaceSize = HUF_WORKSPACE_SIZE + matchStateSize;
-
if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
dict, dictSize,
- ZSTD_dlm_byRef, dictContentType,
+ dictLoadMethod, dictContentType,
cParams) ))
return NULL;
@@ -3193,7 +3383,15 @@ size_t ZSTD_compressBegin_usingCDict_advanced(
DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced");
RETURN_ERROR_IF(cdict==NULL, dictionary_wrong);
{ ZSTD_CCtx_params params = cctx->requestedParams;
- params.cParams = ZSTD_getCParamsFromCDict(cdict);
+ params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
+ || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
+ || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
+ || cdict->compressionLevel == 0 )
+ && (params.attachDictPref != ZSTD_dictForceLoad) ?
+ ZSTD_getCParamsFromCDict(cdict)
+ : ZSTD_getCParams(cdict->compressionLevel,
+ pledgedSrcSize,
+ cdict->dictContentSize);
/* Increase window log to fit the entire dictionary and source if the
* source size is known. Limit the increase to 19, which is the
* window log for compression level 1 with the largest source size.
@@ -3207,7 +3405,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced(
return ZSTD_compressBegin_internal(cctx,
NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast,
cdict,
- params, pledgedSrcSize,
+ &params, pledgedSrcSize,
ZSTDb_not_buffered);
}
}
@@ -3298,7 +3496,7 @@ static size_t ZSTD_resetCStream_internal(ZSTD_CStream* cctx,
FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
dict, dictSize, dictContentType, ZSTD_dtlm_fast,
cdict,
- params, pledgedSrcSize,
+ &params, pledgedSrcSize,
ZSTDb_buffered) );
cctx->inToCompress = 0;
@@ -3332,13 +3530,14 @@ size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss)
* Assumption 2 : either dict, or cdict, is defined, not both */
size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
const void* dict, size_t dictSize, const ZSTD_CDict* cdict,
- ZSTD_CCtx_params params, unsigned long long pledgedSrcSize)
+ const ZSTD_CCtx_params* params,
+ unsigned long long pledgedSrcSize)
{
DEBUGLOG(4, "ZSTD_initCStream_internal");
FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) );
FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) );
- assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
- zcs->requestedParams = params;
+ assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
+ zcs->requestedParams = *params;
assert(!((dict) && (cdict))); /* either dict or cdict, not both */
if (dict) {
FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) );
@@ -3377,7 +3576,7 @@ size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)
/* ZSTD_initCStream_advanced() :
* pledgedSrcSize must be exact.
* if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
- * dict is loaded with default parameters ZSTD_dm_auto and ZSTD_dlm_byCopy. */
+ * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */
size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
const void* dict, size_t dictSize,
ZSTD_parameters params, unsigned long long pss)
@@ -3391,7 +3590,7 @@ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) );
FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) );
FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) );
- zcs->requestedParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params);
+ zcs->requestedParams = ZSTD_assignParamsToCCtxParams(&zcs->requestedParams, params);
FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) );
return 0;
}
@@ -3641,7 +3840,7 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
if (cctx->mtctx == NULL) {
DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u",
params.nbWorkers);
- cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbWorkers, cctx->customMem);
+ cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem);
RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation);
}
/* mt compression */
@@ -3769,8 +3968,8 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
{ 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */
{ 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */
{ 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */
- { 21, 16, 17, 1, 5, 1, ZSTD_dfast }, /* level 3 */
- { 21, 18, 18, 1, 5, 1, ZSTD_dfast }, /* level 4 */
+ { 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */
+ { 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */
{ 21, 18, 19, 2, 5, 2, ZSTD_greedy }, /* level 5 */
{ 21, 19, 19, 3, 5, 4, ZSTD_greedy }, /* level 6 */
{ 21, 19, 19, 3, 5, 8, ZSTD_lazy }, /* level 7 */
@@ -3794,8 +3993,8 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
/* W, C, H, S, L, T, strat */
{ 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
{ 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */
- { 18, 14, 14, 1, 5, 1, ZSTD_dfast }, /* level 2 */
- { 18, 16, 16, 1, 4, 1, ZSTD_dfast }, /* level 3 */
+ { 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */
+ { 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */
{ 18, 16, 17, 2, 5, 2, ZSTD_greedy }, /* level 4.*/
{ 18, 18, 18, 3, 5, 2, ZSTD_greedy }, /* level 5.*/
{ 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/
@@ -3821,8 +4020,8 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
{ 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
{ 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */
{ 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */
- { 17, 15, 16, 2, 5, 1, ZSTD_dfast }, /* level 3 */
- { 17, 17, 17, 2, 4, 1, ZSTD_dfast }, /* level 4 */
+ { 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */
+ { 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */
{ 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */
{ 17, 17, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */
{ 17, 17, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */
@@ -3847,7 +4046,7 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
{ 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
{ 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */
{ 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */
- { 14, 14, 15, 2, 4, 1, ZSTD_dfast }, /* level 3 */
+ { 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */
{ 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */
{ 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/
{ 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */
diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index 10b59d3907a6..14036f873f53 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -19,6 +19,7 @@
* Dependencies
***************************************/
#include "zstd_internal.h"
+#include "zstd_cwksp.h"
#ifdef ZSTD_MULTITHREAD
# include "zstdmt_compress.h"
#endif
@@ -134,9 +135,15 @@ typedef struct {
typedef struct ZSTD_matchState_t ZSTD_matchState_t;
struct ZSTD_matchState_t {
ZSTD_window_t window; /* State for window round buffer management */
- U32 loadedDictEnd; /* index of end of dictionary, within context's referential. When dict referential is copied into active context (i.e. not attached), effectively same value as dictSize, since referential starts from zero */
+ U32 loadedDictEnd; /* index of end of dictionary, within context's referential.
+ * When loadedDictEnd != 0, a dictionary is in use, and still valid.
+ * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance.
+ * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity().
+ * When dict referential is copied into active context (i.e. not attached),
+ * loadedDictEnd == dictSize, since referential starts from zero.
+ */
U32 nextToUpdate; /* index from which to continue table update */
- U32 hashLog3; /* dispatch table : larger == faster, more memory */
+ U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */
U32* hashTable;
U32* hashTable3;
U32* chainTable;
@@ -186,6 +193,13 @@ typedef struct {
size_t capacity; /* The capacity starting from `seq` pointer */
} rawSeqStore_t;
+typedef struct {
+ int collectSequences;
+ ZSTD_Sequence* seqStart;
+ size_t seqIndex;
+ size_t maxSequences;
+} SeqCollector;
+
struct ZSTD_CCtx_params_s {
ZSTD_format_e format;
ZSTD_compressionParameters cParams;
@@ -197,6 +211,9 @@ struct ZSTD_CCtx_params_s {
size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize.
* No target when targetCBlockSize == 0.
* There is no guarantee on compressed block size */
+ int srcSizeHint; /* User's best guess of source size.
+ * Hint is not valid when srcSizeHint == 0.
+ * There is no guarantee that hint is close to actual source size */
ZSTD_dictAttachPref_e attachDictPref;
ZSTD_literalCompressionMode_e literalCompressionMode;
@@ -222,9 +239,7 @@ struct ZSTD_CCtx_s {
ZSTD_CCtx_params appliedParams;
U32 dictID;
- int workSpaceOversizedDuration;
- void* workSpace;
- size_t workSpaceSize;
+ ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
size_t blockSize;
unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */
unsigned long long consumedSrcSize;
@@ -232,6 +247,8 @@ struct ZSTD_CCtx_s {
XXH64_state_t xxhState;
ZSTD_customMem customMem;
size_t staticSize;
+ SeqCollector seqCollector;
+ int isFirstBlock;
seqStore_t seqStore; /* sequences storage ptrs */
ldmState_t ldmState; /* long distance matching state */
@@ -331,26 +348,57 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
return (srcSize >> minlog) + 2;
}
+/*! ZSTD_safecopyLiterals() :
+ * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
+ * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
+ * large copies.
+ */
+static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {
+ assert(iend > ilimit_w);
+ if (ip <= ilimit_w) {
+ ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
+ op += ilimit_w - ip;
+ ip = ilimit_w;
+ }
+ while (ip < iend) *op++ = *ip++;
+}
+
/*! ZSTD_storeSeq() :
- * Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
- * `offsetCode` : distance to match + 3 (values 1-3 are repCodes).
+ * Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.
+ * `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
* `mlBase` : matchLength - MINMATCH
+ * Allowed to overread literals up to litLimit.
*/
-MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t mlBase)
+HINT_INLINE UNUSED_ATTR
+void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)
{
+ BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
+ BYTE const* const litEnd = literals + litLength;
#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
static const BYTE* g_start = NULL;
if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
{ U32 const pos = (U32)((const BYTE*)literals - g_start);
DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
- pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offsetCode);
+ pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode);
}
#endif
assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
/* copy Literals */
assert(seqStorePtr->maxNbLit <= 128 KB);
assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
- ZSTD_wildcopy(seqStorePtr->lit, literals, litLength, ZSTD_no_overlap);
+ assert(literals + litLength <= litLimit);
+ if (litEnd <= litLimit_w) {
+ /* Common case we can use wildcopy.
+ * First copy 16 bytes, because literals are likely short.
+ */
+ assert(WILDCOPY_OVERLENGTH >= 16);
+ ZSTD_copy16(seqStorePtr->lit, literals);
+ if (litLength > 16) {
+ ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
+ }
+ } else {
+ ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
+ }
seqStorePtr->lit += litLength;
/* literal Length */
@@ -362,7 +410,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v
seqStorePtr->sequences[0].litLength = (U16)litLength;
/* match offset */
- seqStorePtr->sequences[0].offset = offsetCode + 1;
+ seqStorePtr->sequences[0].offset = offCode + 1;
/* match Length */
if (mlBase>0xFFFF) {
@@ -763,24 +811,37 @@ ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
/* Similar to ZSTD_window_enforceMaxDist(),
* but only invalidates dictionary
- * when input progresses beyond window size. */
+ * when input progresses beyond window size.
+ * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
+ * loadedDictEnd uses same referential as window->base
+ * maxDist is the window size */
MEM_STATIC void
-ZSTD_checkDictValidity(ZSTD_window_t* window,
+ZSTD_checkDictValidity(const ZSTD_window_t* window,
const void* blockEnd,
U32 maxDist,
U32* loadedDictEndPtr,
const ZSTD_matchState_t** dictMatchStatePtr)
{
- U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
- U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
- DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
- (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
-
- if (loadedDictEnd && (blockEndIdx > maxDist + loadedDictEnd)) {
- /* On reaching window size, dictionaries are invalidated */
- if (loadedDictEndPtr) *loadedDictEndPtr = 0;
- if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
- }
+ assert(loadedDictEndPtr != NULL);
+ assert(dictMatchStatePtr != NULL);
+ { U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
+ U32 const loadedDictEnd = *loadedDictEndPtr;
+ DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
+ (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
+ assert(blockEndIdx >= loadedDictEnd);
+
+ if (blockEndIdx > loadedDictEnd + maxDist) {
+ /* On reaching window size, dictionaries are invalidated.
+ * For simplification, if window size is reached anywhere within next block,
+ * the dictionary is invalidated for the full block.
+ */
+ DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
+ *loadedDictEndPtr = 0;
+ *dictMatchStatePtr = NULL;
+ } else {
+ if (*loadedDictEndPtr != 0) {
+ DEBUGLOG(6, "dictionary considered valid for current block");
+ } } }
}
/**
@@ -822,6 +883,17 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
return contiguous;
}
+MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog)
+{
+ U32 const maxDistance = 1U << windowLog;
+ U32 const lowestValid = ms->window.lowLimit;
+ U32 const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
+ U32 const isDictionary = (ms->loadedDictEnd != 0);
+ U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
+ return matchLowest;
+}
+
+
/* debug functions */
#if (DEBUGLEVEL>=2)
@@ -880,7 +952,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
const void* dict, size_t dictSize,
const ZSTD_CDict* cdict,
- ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
+ const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
void ZSTD_resetSeqStore(seqStore_t* ssPtr);
@@ -895,7 +967,7 @@ size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
ZSTD_dictContentType_e dictContentType,
ZSTD_dictTableLoadMethod_e dtlm,
const ZSTD_CDict* cdict,
- ZSTD_CCtx_params params,
+ const ZSTD_CCtx_params* params,
unsigned long long pledgedSrcSize);
/* ZSTD_compress_advanced_internal() :
@@ -904,7 +976,7 @@ size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const void* dict,size_t dictSize,
- ZSTD_CCtx_params params);
+ const ZSTD_CCtx_params* params);
/* ZSTD_writeLastEmptyBlock() :
diff --git a/lib/compress/zstd_compress_literals.c b/lib/compress/zstd_compress_literals.c
index eb3e5a44bc0c..6c133311821e 100644
--- a/lib/compress/zstd_compress_literals.c
+++ b/lib/compress/zstd_compress_literals.c
@@ -70,7 +70,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
ZSTD_strategy strategy, int disableLiteralCompression,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
- void* workspace, size_t wkspSize,
+ void* entropyWorkspace, size_t entropyWorkspaceSize,
const int bmi2)
{
size_t const minGain = ZSTD_minGain(srcSize, strategy);
@@ -99,10 +99,15 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
{ HUF_repeat repeat = prevHuf->repeatMode;
int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
- cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
- workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2)
- : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
- workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
+ cLitSize = singleStream ?
+ HUF_compress1X_repeat(
+ ostart+lhSize, dstCapacity-lhSize, src, srcSize,
+ 255, 11, entropyWorkspace, entropyWorkspaceSize,
+ (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) :
+ HUF_compress4X_repeat(
+ ostart+lhSize, dstCapacity-lhSize, src, srcSize,
+ 255, 11, entropyWorkspace, entropyWorkspaceSize,
+ (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
if (repeat != HUF_repeat_none) {
/* reused the existing table */
hType = set_repeat;
diff --git a/lib/compress/zstd_compress_literals.h b/lib/compress/zstd_compress_literals.h
index 7adbecc0bebb..97273d7cfdb5 100644
--- a/lib/compress/zstd_compress_literals.h
+++ b/lib/compress/zstd_compress_literals.h
@@ -23,7 +23,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
ZSTD_strategy strategy, int disableLiteralCompression,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
- void* workspace, size_t wkspSize,
+ void* entropyWorkspace, size_t entropyWorkspaceSize,
const int bmi2);
#endif /* ZSTD_COMPRESS_LITERALS_H */
diff --git a/lib/compress/zstd_compress_sequences.c b/lib/compress/zstd_compress_sequences.c
index 3c3deae08cb1..0ff7a26823b0 100644
--- a/lib/compress/zstd_compress_sequences.c
+++ b/lib/compress/zstd_compress_sequences.c
@@ -222,7 +222,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
const BYTE* codeTable, size_t nbSeq,
const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
const FSE_CTable* prevCTable, size_t prevCTableSize,
- void* workspace, size_t workspaceSize)
+ void* entropyWorkspace, size_t entropyWorkspaceSize)
{
BYTE* op = (BYTE*)dst;
const BYTE* const oend = op + dstCapacity;
@@ -238,7 +238,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
memcpy(nextCTable, prevCTable, prevCTableSize);
return 0;
case set_basic:
- FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); /* note : could be pre-calculated */
+ FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize)); /* note : could be pre-calculated */
return 0;
case set_compressed: {
S16 norm[MaxSeq + 1];
@@ -252,7 +252,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max));
{ size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
FORWARD_IF_ERROR(NCountSize);
- FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, workspace, workspaceSize));
+ FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize));
return NCountSize;
}
}
diff --git a/lib/compress/zstd_compress_sequences.h b/lib/compress/zstd_compress_sequences.h
index f5234d94c81d..57e8e367b093 100644
--- a/lib/compress/zstd_compress_sequences.h
+++ b/lib/compress/zstd_compress_sequences.h
@@ -35,7 +35,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
const BYTE* codeTable, size_t nbSeq,
const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
const FSE_CTable* prevCTable, size_t prevCTableSize,
- void* workspace, size_t workspaceSize);
+ void* entropyWorkspace, size_t entropyWorkspaceSize);
size_t ZSTD_encodeSequences(
void* dst, size_t dstCapacity,
diff --git a/lib/compress/zstd_cwksp.h b/lib/compress/zstd_cwksp.h
new file mode 100644
index 000000000000..fc9765bd3f37
--- /dev/null
+++ b/lib/compress/zstd_cwksp.h
@@ -0,0 +1,535 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_CWKSP_H
+#define ZSTD_CWKSP_H
+
+/*-*************************************
+* Dependencies
+***************************************/
+#include "zstd_internal.h"
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*-*************************************
+* Constants
+***************************************/
+
+/* define "workspace is too large" as this number of times larger than needed */
+#define ZSTD_WORKSPACETOOLARGE_FACTOR 3
+
+/* when workspace is continuously too large
+ * during at least this number of times,
+ * context's memory usage is considered wasteful,
+ * because it's sized to handle a worst case scenario which rarely happens.
+ * In which case, resize it down to free some memory */
+#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
+
+/* Since the workspace is effectively its own little malloc implementation /
+ * arena, when we run under ASAN, we should similarly insert redzones between
+ * each internal element of the workspace, so ASAN will catch overruns that
+ * reach outside an object but that stay inside the workspace.
+ *
+ * This defines the size of that redzone.
+ */
+#ifndef ZSTD_CWKSP_ASAN_REDZONE_SIZE
+#define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128
+#endif
+
+/*-*************************************
+* Structures
+***************************************/
+typedef enum {
+ ZSTD_cwksp_alloc_objects,
+ ZSTD_cwksp_alloc_buffers,
+ ZSTD_cwksp_alloc_aligned
+} ZSTD_cwksp_alloc_phase_e;
+
+/**
+ * Zstd fits all its internal datastructures into a single continuous buffer,
+ * so that it only needs to perform a single OS allocation (or so that a buffer
+ * can be provided to it and it can perform no allocations at all). This buffer
+ * is called the workspace.
+ *
+ * Several optimizations complicate that process of allocating memory ranges
+ * from this workspace for each internal datastructure:
+ *
+ * - These different internal datastructures have different setup requirements:
+ *
+ * - The static objects need to be cleared once and can then be trivially
+ * reused for each compression.
+ *
+ * - Various buffers don't need to be initialized at all--they are always
+ * written into before they're read.
+ *
+ * - The matchstate tables have a unique requirement that they don't need
+ * their memory to be totally cleared, but they do need the memory to have
+ * some bound, i.e., a guarantee that all values in the memory they've been
+ * allocated is less than some maximum value (which is the starting value
+ * for the indices that they will then use for compression). When this
+ * guarantee is provided to them, they can use the memory without any setup
+ * work. When it can't, they have to clear the area.
+ *
+ * - These buffers also have different alignment requirements.
+ *
+ * - We would like to reuse the objects in the workspace for multiple
+ * compressions without having to perform any expensive reallocation or
+ * reinitialization work.
+ *
+ * - We would like to be able to efficiently reuse the workspace across
+ * multiple compressions **even when the compression parameters change** and
+ * we need to resize some of the objects (where possible).
+ *
+ * To attempt to manage this buffer, given these constraints, the ZSTD_cwksp
+ * abstraction was created. It works as follows:
+ *
+ * Workspace Layout:
+ *
+ * [ ... workspace ... ]
+ * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers]
+ *
+ * The various objects that live in the workspace are divided into the
+ * following categories, and are allocated separately:
+ *
+ * - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict,
+ * so that literally everything fits in a single buffer. Note: if present,
+ * this must be the first object in the workspace, since ZSTD_free{CCtx,
+ * CDict}() rely on a pointer comparison to see whether one or two frees are
+ * required.
+ *
+ * - Fixed size objects: these are fixed-size, fixed-count objects that are
+ * nonetheless "dynamically" allocated in the workspace so that we can
+ * control how they're initialized separately from the broader ZSTD_CCtx.
+ * Examples:
+ * - Entropy Workspace
+ * - 2 x ZSTD_compressedBlockState_t
+ * - CDict dictionary contents
+ *
+ * - Tables: these are any of several different datastructures (hash tables,
+ * chain tables, binary trees) that all respect a common format: they are
+ * uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
+ * Their sizes depend on the cparams.
+ *
+ * - Aligned: these buffers are used for various purposes that require 4 byte
+ * alignment, but don't require any initialization before they're used.
+ *
+ * - Buffers: these buffers are used for various purposes that don't require
+ * any alignment or initialization before they're used. This means they can
+ * be moved around at no cost for a new compression.
+ *
+ * Allocating Memory:
+ *
+ * The various types of objects must be allocated in order, so they can be
+ * correctly packed into the workspace buffer. That order is:
+ *
+ * 1. Objects
+ * 2. Buffers
+ * 3. Aligned
+ * 4. Tables
+ *
+ * Attempts to reserve objects of different types out of order will fail.
+ */
+typedef struct {
+ void* workspace;
+ void* workspaceEnd;
+
+ void* objectEnd;
+ void* tableEnd;
+ void* tableValidEnd;
+ void* allocStart;
+
+ int allocFailed;
+ int workspaceOversizedDuration;
+ ZSTD_cwksp_alloc_phase_e phase;
+} ZSTD_cwksp;
+
+/*-*************************************
+* Functions
+***************************************/
+
+MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws);
+
+MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
+ (void)ws;
+ assert(ws->workspace <= ws->objectEnd);
+ assert(ws->objectEnd <= ws->tableEnd);
+ assert(ws->objectEnd <= ws->tableValidEnd);
+ assert(ws->tableEnd <= ws->allocStart);
+ assert(ws->tableValidEnd <= ws->allocStart);
+ assert(ws->allocStart <= ws->workspaceEnd);
+}
+
+/**
+ * Align must be a power of 2.
+ */
+MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) {
+ size_t const mask = align - 1;
+ assert((align & mask) == 0);
+ return (size + mask) & ~mask;
+}
+
+/**
+ * Use this to determine how much space in the workspace we will consume to
+ * allocate this object. (Normally it should be exactly the size of the object,
+ * but under special conditions, like ASAN, where we pad each object, it might
+ * be larger.)
+ *
+ * Since tables aren't currently redzoned, you don't need to call through this
+ * to figure out how much space you need for the matchState tables. Everything
+ * else is though.
+ */
+MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
+#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+ return size + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
+#else
+ return size;
+#endif
+}
+
+MEM_STATIC void ZSTD_cwksp_internal_advance_phase(
+ ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) {
+ assert(phase >= ws->phase);
+ if (phase > ws->phase) {
+ if (ws->phase < ZSTD_cwksp_alloc_buffers &&
+ phase >= ZSTD_cwksp_alloc_buffers) {
+ ws->tableValidEnd = ws->objectEnd;
+ }
+ if (ws->phase < ZSTD_cwksp_alloc_aligned &&
+ phase >= ZSTD_cwksp_alloc_aligned) {
+ /* If unaligned allocations down from a too-large top have left us
+ * unaligned, we need to realign our alloc ptr. Technically, this
+ * can consume space that is unaccounted for in the neededSpace
+ * calculation. However, I believe this can only happen when the
+ * workspace is too large, and specifically when it is too large
+ * by a larger margin than the space that will be consumed. */
+ /* TODO: cleaner, compiler warning friendly way to do this??? */
+ ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1));
+ if (ws->allocStart < ws->tableValidEnd) {
+ ws->tableValidEnd = ws->allocStart;
+ }
+ }
+ ws->phase = phase;
+ }
+}
+
+/**
+ * Returns whether this object/buffer/etc was allocated in this workspace.
+ */
+MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) {
+ return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd);
+}
+
+/**
+ * Internal function. Do not use directly.
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_internal(
+ ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) {
+ void* alloc;
+ void* bottom = ws->tableEnd;
+ ZSTD_cwksp_internal_advance_phase(ws, phase);
+ alloc = (BYTE *)ws->allocStart - bytes;
+
+#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+ /* over-reserve space */
+ alloc = (BYTE *)alloc - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
+#endif
+
+ DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
+ alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
+ ZSTD_cwksp_assert_internal_consistency(ws);
+ assert(alloc >= bottom);
+ if (alloc < bottom) {
+ DEBUGLOG(4, "cwksp: alloc failed!");
+ ws->allocFailed = 1;
+ return NULL;
+ }
+ if (alloc < ws->tableValidEnd) {
+ ws->tableValidEnd = alloc;
+ }
+ ws->allocStart = alloc;
+
+#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+ /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
+ * either size. */
+ alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
+ __asan_unpoison_memory_region(alloc, bytes);
+#endif
+
+ return alloc;
+}
+
+/**
+ * Reserves and returns unaligned memory.
+ */
+MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) {
+ return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers);
+}
+
+/**
+ * Reserves and returns memory sized on and aligned on sizeof(unsigned).
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) {
+ assert((bytes & (sizeof(U32)-1)) == 0);
+ return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned);
+}
+
+/**
+ * Aligned on sizeof(unsigned). These buffers have the special property that
+ * their values remain constrained, allowing us to re-use them without
+ * memset()-ing them.
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
+ const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
+ void* alloc = ws->tableEnd;
+ void* end = (BYTE *)alloc + bytes;
+ void* top = ws->allocStart;
+
+ DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining",
+ alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
+ assert((bytes & (sizeof(U32)-1)) == 0);
+ ZSTD_cwksp_internal_advance_phase(ws, phase);
+ ZSTD_cwksp_assert_internal_consistency(ws);
+ assert(end <= top);
+ if (end > top) {
+ DEBUGLOG(4, "cwksp: table alloc failed!");
+ ws->allocFailed = 1;
+ return NULL;
+ }
+ ws->tableEnd = end;
+
+#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+ __asan_unpoison_memory_region(alloc, bytes);
+#endif
+
+ return alloc;
+}
+
+/**
+ * Aligned on sizeof(void*).
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
+ size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*));
+ void* alloc = ws->objectEnd;
+ void* end = (BYTE*)alloc + roundedBytes;
+
+#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+ /* over-reserve space */
+ end = (BYTE *)end + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
+#endif
+
+ DEBUGLOG(5,
+ "cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining",
+ alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes);
+ assert(((size_t)alloc & (sizeof(void*)-1)) == 0);
+ assert((bytes & (sizeof(void*)-1)) == 0);
+ ZSTD_cwksp_assert_internal_consistency(ws);
+ /* we must be in the first phase, no advance is possible */
+ if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) {
+ DEBUGLOG(4, "cwksp: object alloc failed!");
+ ws->allocFailed = 1;
+ return NULL;
+ }
+ ws->objectEnd = end;
+ ws->tableEnd = end;
+ ws->tableValidEnd = end;
+
+#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+ /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
+ * either size. */
+ alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
+ __asan_unpoison_memory_region(alloc, bytes);
+#endif
+
+ return alloc;
+}
+
+MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) {
+ DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty");
+
+#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
+ /* To validate that the table re-use logic is sound, and that we don't
+ * access table space that we haven't cleaned, we re-"poison" the table
+ * space every time we mark it dirty. */
+ {
+ size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
+ assert(__msan_test_shadow(ws->objectEnd, size) == -1);
+ __msan_poison(ws->objectEnd, size);
+ }
+#endif
+
+ assert(ws->tableValidEnd >= ws->objectEnd);
+ assert(ws->tableValidEnd <= ws->allocStart);
+ ws->tableValidEnd = ws->objectEnd;
+ ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+MEM_STATIC void ZSTD_cwksp_mark_tables_clean(ZSTD_cwksp* ws) {
+ DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_clean");
+ assert(ws->tableValidEnd >= ws->objectEnd);
+ assert(ws->tableValidEnd <= ws->allocStart);
+ if (ws->tableValidEnd < ws->tableEnd) {
+ ws->tableValidEnd = ws->tableEnd;
+ }
+ ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+/**
+ * Zero the part of the allocated tables not already marked clean.
+ */
+MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
+ DEBUGLOG(4, "cwksp: ZSTD_cwksp_clean_tables");
+ assert(ws->tableValidEnd >= ws->objectEnd);
+ assert(ws->tableValidEnd <= ws->allocStart);
+ if (ws->tableValidEnd < ws->tableEnd) {
+ memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd);
+ }
+ ZSTD_cwksp_mark_tables_clean(ws);
+}
+
+/**
+ * Invalidates table allocations.
+ * All other allocations remain valid.
+ */
+MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) {
+ DEBUGLOG(4, "cwksp: clearing tables!");
+
+#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+ {
+ size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
+ __asan_poison_memory_region(ws->objectEnd, size);
+ }
+#endif
+
+ ws->tableEnd = ws->objectEnd;
+ ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+/**
+ * Invalidates all buffer, aligned, and table allocations.
+ * Object allocations remain valid.
+ */
+MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
+ DEBUGLOG(4, "cwksp: clearing!");
+
+#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
+ /* To validate that the context re-use logic is sound, and that we don't
+ * access stuff that this compression hasn't initialized, we re-"poison"
+ * the workspace (or at least the non-static, non-table parts of it)
+ * every time we start a new compression. */
+ {
+ size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->tableValidEnd;
+ __msan_poison(ws->tableValidEnd, size);
+ }
+#endif
+
+#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+ {
+ size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->objectEnd;
+ __asan_poison_memory_region(ws->objectEnd, size);
+ }
+#endif
+
+ ws->tableEnd = ws->objectEnd;
+ ws->allocStart = ws->workspaceEnd;
+ ws->allocFailed = 0;
+ if (ws->phase > ZSTD_cwksp_alloc_buffers) {
+ ws->phase = ZSTD_cwksp_alloc_buffers;
+ }
+ ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+/**
+ * The provided workspace takes ownership of the buffer [start, start+size).
+ * Any existing values in the workspace are ignored (the previously managed
+ * buffer, if present, must be separately freed).
+ */
+MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) {
+ DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size);
+ assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */
+ ws->workspace = start;
+ ws->workspaceEnd = (BYTE*)start + size;
+ ws->objectEnd = ws->workspace;
+ ws->tableValidEnd = ws->objectEnd;
+ ws->phase = ZSTD_cwksp_alloc_objects;
+ ZSTD_cwksp_clear(ws);
+ ws->workspaceOversizedDuration = 0;
+ ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) {
+ void* workspace = ZSTD_malloc(size, customMem);
+ DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size);
+ RETURN_ERROR_IF(workspace == NULL, memory_allocation);
+ ZSTD_cwksp_init(ws, workspace, size);
+ return 0;
+}
+
+MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) {
+ void *ptr = ws->workspace;
+ DEBUGLOG(4, "cwksp: freeing workspace");
+ memset(ws, 0, sizeof(ZSTD_cwksp));
+ ZSTD_free(ptr, customMem);
+}
+
+/**
+ * Moves the management of a workspace from one cwksp to another. The src cwksp
+ * is left in an invalid state (src must be re-init()'ed before its used again).
+ */
+MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) {
+ *dst = *src;
+ memset(src, 0, sizeof(ZSTD_cwksp));
+}
+
+MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
+ return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
+}
+
+MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
+ return ws->allocFailed;
+}
+
+/*-*************************************
+* Functions Checking Free Space
+***************************************/
+
+MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) {
+ return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd);
+}
+
+MEM_STATIC int ZSTD_cwksp_check_available(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+ return ZSTD_cwksp_available_space(ws) >= additionalNeededSpace;
+}
+
+MEM_STATIC int ZSTD_cwksp_check_too_large(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+ return ZSTD_cwksp_check_available(
+ ws, additionalNeededSpace * ZSTD_WORKSPACETOOLARGE_FACTOR);
+}
+
+MEM_STATIC int ZSTD_cwksp_check_wasteful(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+ return ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)
+ && ws->workspaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION;
+}
+
+MEM_STATIC void ZSTD_cwksp_bump_oversized_duration(
+ ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+ if (ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)) {
+ ws->workspaceOversizedDuration++;
+ } else {
+ ws->workspaceOversizedDuration = 0;
+ }
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_CWKSP_H */
diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c
index 5957255d9041..a661a48534da 100644
--- a/lib/compress/zstd_double_fast.c
+++ b/lib/compress/zstd_double_fast.c
@@ -65,6 +65,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
const U32 lowestValid = ms->window.dictLimit;
const U32 maxDistance = 1U << cParams->windowLog;
+ /* presumes that, if there is a dictionary, it must be using Attach mode */
const U32 prefixLowestIndex = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid;
const BYTE* const prefixLowest = base + prefixLowestIndex;
const BYTE* const iend = istart + srcSize;
@@ -147,7 +148,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
ip++;
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
goto _match_stored;
}
@@ -156,7 +157,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
&& ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
ip++;
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
goto _match_stored;
}
@@ -246,7 +247,7 @@ _match_found:
offset_2 = offset_1;
offset_1 = offset;
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
_match_stored:
/* match found */
@@ -277,7 +278,7 @@ _match_stored:
const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
- ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
ip += repLength2;
@@ -296,7 +297,7 @@ _match_stored:
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
- ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
ip += rLength;
anchor = ip;
continue; /* faster when present ... (?) */
@@ -369,9 +370,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
const BYTE* const ilimit = iend - 8;
const BYTE* const base = ms->window.base;
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
- const U32 maxDistance = 1U << cParams->windowLog;
- const U32 lowestValid = ms->window.lowLimit;
- const U32 lowLimit = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid;
+ const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
const U32 dictStartIndex = lowLimit;
const U32 dictLimit = ms->window.dictLimit;
const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
@@ -412,7 +411,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
ip++;
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
} else {
if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
@@ -423,7 +422,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
offset_2 = offset_1;
offset_1 = offset;
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
@@ -448,7 +447,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
}
offset_2 = offset_1;
offset_1 = offset;
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} else {
ip += ((ip-anchor) >> kSearchStrength) + 1;
@@ -480,7 +479,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
- ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
ip += repLength2;
diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c
index a05b8a47f14e..6dbefee6b7fc 100644
--- a/lib/compress/zstd_fast.c
+++ b/lib/compress/zstd_fast.c
@@ -8,7 +8,7 @@
* You may select, at your option, one of the above-listed licenses.
*/
-#include "zstd_compress_internal.h"
+#include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
#include "zstd_fast.h"
@@ -43,8 +43,8 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
}
-FORCE_INLINE_TEMPLATE
-size_t ZSTD_compressBlock_fast_generic(
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_compressBlock_fast_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize,
U32 const mls)
@@ -71,10 +71,10 @@ size_t ZSTD_compressBlock_fast_generic(
U32 offsetSaved = 0;
/* init */
+ DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
ip0 += (ip0 == prefixStart);
ip1 = ip0 + 1;
- {
- U32 const maxRep = (U32)(ip0 - prefixStart);
+ { U32 const maxRep = (U32)(ip0 - prefixStart);
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
}
@@ -117,8 +117,7 @@ size_t ZSTD_compressBlock_fast_generic(
match0 = match1;
goto _offset;
}
- {
- size_t const step = ((ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
+ { size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
assert(step >= 2);
ip0 += step;
ip1 += step;
@@ -137,7 +136,7 @@ _offset: /* Requires: ip0, match0 */
_match: /* Requires: ip0, match0, offcode */
/* Count the forward length */
mLength += ZSTD_count(ip0+mLength+4, match0+mLength+4, iend) + 4;
- ZSTD_storeSeq(seqStore, ip0-anchor, anchor, offcode, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
/* match found */
ip0 += mLength;
anchor = ip0;
@@ -149,16 +148,15 @@ _match: /* Requires: ip0, match0, offcode */
hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
- while ( (ip0 <= ilimit)
- && ( (offset_2>0)
- & (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) )) {
+ while ( ((ip0 <= ilimit) & (offset_2>0)) /* offset_2==0 means offset_2 is invalidated */
+ && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
/* store sequence */
size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
- U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
+ { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
ip0 += rLength;
ip1 = ip0 + 1;
- ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
anchor = ip0;
continue; /* faster when present (confirmed on gcc-8) ... (?) */
}
@@ -178,8 +176,7 @@ size_t ZSTD_compressBlock_fast(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- ZSTD_compressionParameters const* cParams = &ms->cParams;
- U32 const mls = cParams->minMatch;
+ U32 const mls = ms->cParams.minMatch;
assert(ms->dictMatchState == NULL);
switch(mls)
{
@@ -239,6 +236,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
/* init */
+ DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic");
ip += (dictAndPrefixLength == 0);
/* dictMatchState repCode checks don't currently handle repCode == 0
* disabling. */
@@ -263,7 +261,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
ip++;
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
} else if ( (matchIndex <= prefixStartIndex) ) {
size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
U32 const dictMatchIndex = dictHashTable[dictHash];
@@ -283,7 +281,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
} /* catch up */
offset_2 = offset_1;
offset_1 = offset;
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
}
} else if (MEM_read32(match) != MEM_read32(ip)) {
/* it's not a match, and we're not going to check the dictionary */
@@ -298,7 +296,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
&& (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
offset_2 = offset_1;
offset_1 = offset;
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
}
/* match found */
@@ -323,7 +321,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
- ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
ip += repLength2;
anchor = ip;
@@ -346,8 +344,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- ZSTD_compressionParameters const* cParams = &ms->cParams;
- U32 const mls = cParams->minMatch;
+ U32 const mls = ms->cParams.minMatch;
assert(ms->dictMatchState != NULL);
switch(mls)
{
@@ -379,9 +376,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
const BYTE* ip = istart;
const BYTE* anchor = istart;
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
- const U32 maxDistance = 1U << cParams->windowLog;
- const U32 validLow = ms->window.lowLimit;
- const U32 lowLimit = (endIndex - validLow > maxDistance) ? endIndex - maxDistance : validLow;
+ const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
const U32 dictStartIndex = lowLimit;
const BYTE* const dictStart = dictBase + dictStartIndex;
const U32 dictLimit = ms->window.dictLimit;
@@ -392,6 +387,8 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
const BYTE* const ilimit = iend - 8;
U32 offset_1=rep[0], offset_2=rep[1];
+ DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic");
+
/* switch to "regular" variant if extDict is invalidated due to maxDistance */
if (prefixStartIndex == dictStartIndex)
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
@@ -406,16 +403,17 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
const U32 repIndex = current + 1 - offset_1;
const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
- size_t mLength;
hashTable[h] = current; /* update hash table */
assert(offset_1 <= current +1); /* check repIndex */
if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
- const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
- mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
+ const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
+ size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
ip++;
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH);
+ ip += rLength;
+ anchor = ip;
} else {
if ( (matchIndex < dictStartIndex) ||
(MEM_read32(match) != MEM_read32(ip)) ) {
@@ -423,21 +421,17 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
ip += ((ip-anchor) >> kSearchStrength) + stepSize;
continue;
}
- { const BYTE* matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
- const BYTE* lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
- U32 offset;
- mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
+ { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
+ const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
+ U32 const offset = current - matchIndex;
+ size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
- offset = current - matchIndex;
- offset_2 = offset_1;
- offset_1 = offset;
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+ offset_2 = offset_1; offset_1 = offset; /* update offset history */
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+ ip += mLength;
+ anchor = ip;
} }
- /* found a match : store it */
- ip += mLength;
- anchor = ip;
-
if (ip <= ilimit) {
/* Fill Table */
hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2;
@@ -446,13 +440,13 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
while (ip <= ilimit) {
U32 const current2 = (U32)(ip-base);
U32 const repIndex2 = current2 - offset_2;
- const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
+ const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
- U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
- ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
+ { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */
+ ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH);
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
ip += repLength2;
anchor = ip;
@@ -474,8 +468,7 @@ size_t ZSTD_compressBlock_fast_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- ZSTD_compressionParameters const* cParams = &ms->cParams;
- U32 const mls = cParams->minMatch;
+ U32 const mls = ms->cParams.minMatch;
switch(mls)
{
default: /* includes case 3 */
diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c
index 94d906c01f1c..9ad7e03b54ca 100644
--- a/lib/compress/zstd_lazy.c
+++ b/lib/compress/zstd_lazy.c
@@ -242,9 +242,7 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
const BYTE* const base = ms->window.base;
U32 const current = (U32)(ip-base);
- U32 const maxDistance = 1U << cParams->windowLog;
- U32 const windowValid = ms->window.lowLimit;
- U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
+ U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
U32* const bt = ms->chainTable;
U32 const btLog = cParams->chainLog - 1;
@@ -497,8 +495,10 @@ size_t ZSTD_HcFindBestMatch_generic (
const BYTE* const dictEnd = dictBase + dictLimit;
const U32 current = (U32)(ip-base);
const U32 maxDistance = 1U << cParams->windowLog;
- const U32 lowValid = ms->window.lowLimit;
- const U32 lowLimit = (current - lowValid > maxDistance) ? current - maxDistance : lowValid;
+ const U32 lowestValid = ms->window.lowLimit;
+ const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
+ const U32 isDictionary = (ms->loadedDictEnd != 0);
+ const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
const U32 minChain = current > chainSize ? current - chainSize : 0;
U32 nbAttempts = 1U << cParams->searchLog;
size_t ml=4-1;
@@ -619,12 +619,14 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
/* *******************************
* Common parser - lazy strategy
*********************************/
-FORCE_INLINE_TEMPLATE
-size_t ZSTD_compressBlock_lazy_generic(
+typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_compressBlock_lazy_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore,
U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize,
- const U32 searchMethod, const U32 depth,
+ const searchMethod_e searchMethod, const U32 depth,
ZSTD_dictMode_e const dictMode)
{
const BYTE* const istart = (const BYTE*)src;
@@ -640,8 +642,10 @@ size_t ZSTD_compressBlock_lazy_generic(
ZSTD_matchState_t* ms,
const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?
- (searchMethod ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
- (searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS);
+ (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS
+ : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
+ (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS
+ : ZSTD_HcFindBestMatch_selectMLS);
U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
const ZSTD_matchState_t* const dms = ms->dictMatchState;
@@ -806,7 +810,7 @@ size_t ZSTD_compressBlock_lazy_generic(
/* store sequence */
_storeSequence:
{ size_t const litLength = start - anchor;
- ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
anchor = ip = start + matchLength;
}
@@ -824,7 +828,7 @@ _storeSequence:
const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */
- ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
ip += matchLength;
anchor = ip;
continue;
@@ -839,7 +843,7 @@ _storeSequence:
/* store sequence */
matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
- ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
ip += matchLength;
anchor = ip;
continue; /* faster when present ... (?) */
@@ -850,7 +854,7 @@ _storeSequence:
rep[1] = offset_2 ? offset_2 : savedOffset;
/* Return the last literals size */
- return iend - anchor;
+ return (size_t)(iend - anchor);
}
@@ -858,56 +862,56 @@ size_t ZSTD_compressBlock_btlazy2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 1, 2, ZSTD_noDict);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
}
size_t ZSTD_compressBlock_lazy2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 2, ZSTD_noDict);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
}
size_t ZSTD_compressBlock_lazy(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 1, ZSTD_noDict);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
}
size_t ZSTD_compressBlock_greedy(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 0, ZSTD_noDict);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
}
size_t ZSTD_compressBlock_btlazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 1, 2, ZSTD_dictMatchState);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
}
size_t ZSTD_compressBlock_lazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 2, ZSTD_dictMatchState);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
}
size_t ZSTD_compressBlock_lazy_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 1, ZSTD_dictMatchState);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
}
size_t ZSTD_compressBlock_greedy_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 0, ZSTD_dictMatchState);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
}
@@ -916,7 +920,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore,
U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize,
- const U32 searchMethod, const U32 depth)
+ const searchMethod_e searchMethod, const U32 depth)
{
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
@@ -934,7 +938,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
typedef size_t (*searchMax_f)(
ZSTD_matchState_t* ms,
const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
- searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
+ searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
U32 offset_1 = rep[0], offset_2 = rep[1];
@@ -1047,7 +1051,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
/* store sequence */
_storeSequence:
{ size_t const litLength = start - anchor;
- ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
anchor = ip = start + matchLength;
}
@@ -1062,7 +1066,7 @@ _storeSequence:
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
- ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
ip += matchLength;
anchor = ip;
continue; /* faster when present ... (?) */
@@ -1075,7 +1079,7 @@ _storeSequence:
rep[1] = offset_2;
/* Return the last literals size */
- return iend - anchor;
+ return (size_t)(iend - anchor);
}
@@ -1083,7 +1087,7 @@ size_t ZSTD_compressBlock_greedy_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 0);
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
}
size_t ZSTD_compressBlock_lazy_extDict(
@@ -1091,7 +1095,7 @@ size_t ZSTD_compressBlock_lazy_extDict(
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 1);
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
}
size_t ZSTD_compressBlock_lazy2_extDict(
@@ -1099,7 +1103,7 @@ size_t ZSTD_compressBlock_lazy2_extDict(
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 2);
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
}
size_t ZSTD_compressBlock_btlazy2_extDict(
@@ -1107,5 +1111,5 @@ size_t ZSTD_compressBlock_btlazy2_extDict(
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 1, 2);
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
}
diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c
index 3dcf86e6e8a3..c3312ad3e3d2 100644
--- a/lib/compress/zstd_ldm.c
+++ b/lib/compress/zstd_ldm.c
@@ -49,9 +49,9 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params)
{
size_t const ldmHSize = ((size_t)1) << params.hashLog;
size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
- size_t const ldmBucketSize =
- ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
- size_t const totalSize = ldmBucketSize + ldmHSize * sizeof(ldmEntry_t);
+ size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
+ size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
+ + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
return params.enableLdm ? totalSize : 0;
}
@@ -583,7 +583,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
rep[i] = rep[i-1];
rep[0] = sequence.offset;
/* Store the sequence */
- ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength,
+ ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
sequence.offset + ZSTD_REP_MOVE,
sequence.matchLength - MINMATCH);
ip += sequence.matchLength;
diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c
index e32e542e021a..2e50fca6ff53 100644
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -552,7 +552,6 @@ U32 ZSTD_insertBtAndGetAllMatches (
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
- U32 const maxDistance = 1U << cParams->windowLog;
const BYTE* const base = ms->window.base;
U32 const current = (U32)(ip-base);
U32 const hashLog = cParams->hashLog;
@@ -569,8 +568,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
const BYTE* const dictEnd = dictBase + dictLimit;
const BYTE* const prefixStart = base + dictLimit;
U32 const btLow = (btMask >= current) ? 0 : current - btMask;
- U32 const windowValid = ms->window.lowLimit;
- U32 const windowLow = ((current - windowValid) > maxDistance) ? current - maxDistance : windowValid;
+ U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
U32 const matchLow = windowLow ? windowLow : 1;
U32* smallerPtr = bt + 2*(current&btMask);
U32* largerPtr = bt + 2*(current&btMask) + 1;
@@ -674,19 +672,21 @@ U32 ZSTD_insertBtAndGetAllMatches (
while (nbCompares-- && (matchIndex >= matchLow)) {
U32* const nextPtr = bt + 2*(matchIndex & btMask);
- size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
const BYTE* match;
+ size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
assert(current > matchIndex);
if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
match = base + matchIndex;
+ if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
} else {
match = dictBase + matchIndex;
+ assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
if (matchIndex+matchLength >= dictLimit)
- match = base + matchIndex; /* prepare for match[matchLength] */
+ match = base + matchIndex; /* prepare for match[matchLength] read */
}
if (matchLength > bestLength) {
@@ -1098,7 +1098,7 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
assert(anchor + llen <= iend);
ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
- ZSTD_storeSeq(seqStore, llen, anchor, offCode, mlen-MINMATCH);
+ ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
anchor += advance;
ip = anchor;
} }
diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index 9e537b884853..bc3062b5309b 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -668,7 +668,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
/* init */
if (job->cdict) {
- size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, jobParams, job->fullFrameSize);
+ size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, &jobParams, job->fullFrameSize);
assert(job->firstJob); /* only allowed for first job */
if (ZSTD_isError(initError)) JOB_ERROR(initError);
} else { /* srcStart points at reloaded section */
@@ -680,7 +680,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
ZSTD_dtlm_fast,
NULL, /*cdict*/
- jobParams, pledgedSrcSize);
+ &jobParams, pledgedSrcSize);
if (ZSTD_isError(initError)) JOB_ERROR(initError);
} }
@@ -927,12 +927,18 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
unsigned jobID;
DEBUGLOG(3, "ZSTDMT_releaseAllJobResources");
for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
+ /* Copy the mutex/cond out */
+ ZSTD_pthread_mutex_t const mutex = mtctx->jobs[jobID].job_mutex;
+ ZSTD_pthread_cond_t const cond = mtctx->jobs[jobID].job_cond;
+
DEBUGLOG(4, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start);
ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
- mtctx->jobs[jobID].dstBuff = g_nullBuffer;
- mtctx->jobs[jobID].cSize = 0;
+
+ /* Clear the job description, but keep the mutex/cond */
+ memset(&mtctx->jobs[jobID], 0, sizeof(mtctx->jobs[jobID]));
+ mtctx->jobs[jobID].job_mutex = mutex;
+ mtctx->jobs[jobID].job_cond = cond;
}
- memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription));
mtctx->inBuff.buffer = g_nullBuffer;
mtctx->inBuff.filled = 0;
mtctx->allJobsCompleted = 1;
@@ -1028,9 +1034,9 @@ size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter,
/* Sets parameters relevant to the compression job,
* initializing others to default values. */
-static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
+static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(const ZSTD_CCtx_params* params)
{
- ZSTD_CCtx_params jobParams = params;
+ ZSTD_CCtx_params jobParams = *params;
/* Clear parameters related to multithreading */
jobParams.forceWindow = 0;
jobParams.nbWorkers = 0;
@@ -1151,16 +1157,16 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
/* ===== Multi-threaded compression ===== */
/* ------------------------------------------ */
-static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
+static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params)
{
unsigned jobLog;
- if (params.ldmParams.enableLdm) {
+ if (params->ldmParams.enableLdm) {
/* In Long Range Mode, the windowLog is typically oversized.
* In which case, it's preferable to determine the jobSize
* based on chainLog instead. */
- jobLog = MAX(21, params.cParams.chainLog + 4);
+ jobLog = MAX(21, params->cParams.chainLog + 4);
} else {
- jobLog = MAX(20, params.cParams.windowLog + 2);
+ jobLog = MAX(20, params->cParams.windowLog + 2);
}
return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX);
}
@@ -1193,27 +1199,27 @@ static int ZSTDMT_overlapLog(int ovlog, ZSTD_strategy strat)
return ovlog;
}
-static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params)
+static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)
{
- int const overlapRLog = 9 - ZSTDMT_overlapLog(params.overlapLog, params.cParams.strategy);
- int ovLog = (overlapRLog >= 8) ? 0 : (params.cParams.windowLog - overlapRLog);
+ int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy);
+ int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog);
assert(0 <= overlapRLog && overlapRLog <= 8);
- if (params.ldmParams.enableLdm) {
+ if (params->ldmParams.enableLdm) {
/* In Long Range Mode, the windowLog is typically oversized.
* In which case, it's preferable to determine the jobSize
* based on chainLog instead.
* Then, ovLog becomes a fraction of the jobSize, rather than windowSize */
- ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
+ ovLog = MIN(params->cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
- overlapRLog;
}
assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX);
- DEBUGLOG(4, "overlapLog : %i", params.overlapLog);
+ DEBUGLOG(4, "overlapLog : %i", params->overlapLog);
DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
return (ovLog==0) ? 0 : (size_t)1 << ovLog;
}
static unsigned
-ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers)
+ZSTDMT_computeNbJobs(const ZSTD_CCtx_params* params, size_t srcSize, unsigned nbWorkers)
{
assert(nbWorkers>0);
{ size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params);
@@ -1236,9 +1242,9 @@ static size_t ZSTDMT_compress_advanced_internal(
const ZSTD_CDict* cdict,
ZSTD_CCtx_params params)
{
- ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params);
- size_t const overlapSize = ZSTDMT_computeOverlapSize(params);
- unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers);
+ ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(&params);
+ size_t const overlapSize = ZSTDMT_computeOverlapSize(&params);
+ unsigned const nbJobs = ZSTDMT_computeNbJobs(&params, srcSize, params.nbWorkers);
size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs;
size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */
const char* const srcStart = (const char*)src;
@@ -1256,7 +1262,7 @@ static size_t ZSTDMT_compress_advanced_internal(
ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode");
if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams);
- return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, jobParams);
+ return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, &jobParams);
}
assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
@@ -1404,12 +1410,12 @@ size_t ZSTDMT_initCStream_internal(
mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
if (mtctx->singleBlockingThread) {
- ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(params);
+ ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(&params);
DEBUGLOG(5, "ZSTDMT_initCStream_internal: switch to single blocking thread mode");
assert(singleThreadParams.nbWorkers == 0);
return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0],
dict, dictSize, cdict,
- singleThreadParams, pledgedSrcSize);
+ &singleThreadParams, pledgedSrcSize);
}
DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers);
@@ -1435,11 +1441,11 @@ size_t ZSTDMT_initCStream_internal(
mtctx->cdict = cdict;
}
- mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(params);
+ mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(&params);
DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10));
mtctx->targetSectionSize = params.jobSize;
if (mtctx->targetSectionSize == 0) {
- mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
+ mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(&params);
}
assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX);
diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c
index 3f8bd2973207..bb2d0a96bc1e 100644
--- a/lib/decompress/huf_decompress.c
+++ b/lib/decompress/huf_decompress.c
@@ -61,7 +61,9 @@
* Error Management
****************************************************************/
#define HUF_isError ERR_isError
+#ifndef CHECK_F
#define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
+#endif
/* **************************************************************
diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c
index cf99a137725a..dd4591b7be9a 100644
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@@ -88,10 +88,7 @@ size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); }
static size_t ZSTD_startingInputLength(ZSTD_format_e format)
{
- size_t const startingInputLength = (format==ZSTD_f_zstd1_magicless) ?
- ZSTD_FRAMEHEADERSIZE_PREFIX - ZSTD_FRAMEIDSIZE :
- ZSTD_FRAMEHEADERSIZE_PREFIX;
- ZSTD_STATIC_ASSERT(ZSTD_FRAMEHEADERSIZE_PREFIX >= ZSTD_FRAMEIDSIZE);
+ size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format);
/* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */
assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) );
return startingInputLength;
@@ -376,7 +373,7 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
{
unsigned long long totalDstSize = 0;
- while (srcSize >= ZSTD_FRAMEHEADERSIZE_PREFIX) {
+ while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) {
U32 const magicNumber = MEM_readLE32(src);
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
@@ -574,9 +571,10 @@ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
}
/** ZSTD_insertBlock() :
- insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
+ * insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize)
{
+ DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize);
ZSTD_checkContinuity(dctx, blockStart);
dctx->previousDstEnd = (const char*)blockStart + blockSize;
return blockSize;
@@ -628,11 +626,12 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
/* check */
RETURN_ERROR_IF(
- remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN+ZSTD_blockHeaderSize,
+ remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize,
srcSize_wrong);
/* Frame Header */
- { size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_FRAMEHEADERSIZE_PREFIX);
+ { size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal(
+ ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format);
if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize,
srcSize_wrong);
@@ -713,7 +712,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
dictSize = ZSTD_DDict_dictSize(ddict);
}
- while (srcSize >= ZSTD_FRAMEHEADERSIZE_PREFIX) {
+ while (srcSize >= ZSTD_startingInputLength(dctx->format)) {
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
if (ZSTD_isLegacy(src, srcSize)) {
@@ -1097,7 +1096,7 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12));
for (i=0; i<3; i++) {
U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4;
- RETURN_ERROR_IF(rep==0 || rep >= dictContentSize,
+ RETURN_ERROR_IF(rep==0 || rep > dictContentSize,
dictionary_corrupted);
entropy->rep[i] = rep;
} }
@@ -1266,7 +1265,7 @@ size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx,
{
RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
ZSTD_clearDict(dctx);
- if (dict && dictSize >= 8) {
+ if (dict && dictSize != 0) {
dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem);
RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation);
dctx->ddict = dctx->ddictLocal;
@@ -1299,14 +1298,14 @@ size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSiz
/* ZSTD_initDStream_usingDict() :
- * return : expected size, aka ZSTD_FRAMEHEADERSIZE_PREFIX.
+ * return : expected size, aka ZSTD_startingInputLength().
* this function cannot fail */
size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
{
DEBUGLOG(4, "ZSTD_initDStream_usingDict");
FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) );
FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) );
- return ZSTD_FRAMEHEADERSIZE_PREFIX;
+ return ZSTD_startingInputLength(zds->format);
}
/* note : this variant can't fail */
@@ -1323,16 +1322,16 @@ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
{
FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) );
FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) );
- return ZSTD_FRAMEHEADERSIZE_PREFIX;
+ return ZSTD_startingInputLength(dctx->format);
}
/* ZSTD_resetDStream() :
- * return : expected size, aka ZSTD_FRAMEHEADERSIZE_PREFIX.
+ * return : expected size, aka ZSTD_startingInputLength().
* this function cannot fail */
size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
{
FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only));
- return ZSTD_FRAMEHEADERSIZE_PREFIX;
+ return ZSTD_startingInputLength(dctx->format);
}
@@ -1563,7 +1562,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
zds->lhSize += remainingInput;
}
input->pos = input->size;
- return (MAX(ZSTD_FRAMEHEADERSIZE_MIN, hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
+ return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
}
assert(ip != NULL);
memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;
diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c
index 24f4859c56c9..767e5f9a0b08 100644
--- a/lib/decompress/zstd_decompress_block.c
+++ b/lib/decompress/zstd_decompress_block.c
@@ -79,6 +79,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
{
+ DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected);
{ const BYTE* const istart = (const BYTE*) src;
@@ -87,6 +88,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
switch(litEncType)
{
case set_repeat:
+ DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted);
/* fall-through */
@@ -116,7 +118,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
/* 2 - 2 - 18 - 18 */
lhSize = 5;
litSize = (lhc >> 4) & 0x3FFFF;
- litCSize = (lhc >> 22) + (istart[4] << 10);
+ litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
break;
}
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected);
@@ -391,7 +393,8 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
symbolNext[s] = 1;
} else {
if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
- symbolNext[s] = normalizedCounter[s];
+ assert(normalizedCounter[s]>=0);
+ symbolNext[s] = (U16)normalizedCounter[s];
} } }
memcpy(dt, &DTableH, sizeof(DTableH));
}
@@ -570,38 +573,118 @@ typedef struct {
size_t pos;
} seqState_t;
+/*! ZSTD_overlapCopy8() :
+ * Copies 8 bytes from ip to op and updates op and ip where ip <= op.
+ * If the offset is < 8 then the offset is spread to at least 8 bytes.
+ *
+ * Precondition: *ip <= *op
+ * Postcondition: *op - *op >= 8
+ */
+static void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
+ assert(*ip <= *op);
+ if (offset < 8) {
+ /* close range match, overlap */
+ static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
+ static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
+ int const sub2 = dec64table[offset];
+ (*op)[0] = (*ip)[0];
+ (*op)[1] = (*ip)[1];
+ (*op)[2] = (*ip)[2];
+ (*op)[3] = (*ip)[3];
+ *ip += dec32table[offset];
+ ZSTD_copy4(*op+4, *ip);
+ *ip -= sub2;
+ } else {
+ ZSTD_copy8(*op, *ip);
+ }
+ *ip += 8;
+ *op += 8;
+ assert(*op - *ip >= 8);
+}
+
+/*! ZSTD_safecopy() :
+ * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
+ * and write up to 16 bytes past oend_w (op >= oend_w is allowed).
+ * This function is only called in the uncommon case where the sequence is near the end of the block. It
+ * should be fast for a single long sequence, but can be slow for several short sequences.
+ *
+ * @param ovtype controls the overlap detection
+ * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
+ * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
+ * The src buffer must be before the dst buffer.
+ */
+static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
+ ptrdiff_t const diff = op - ip;
+ BYTE* const oend = op + length;
-/* ZSTD_execSequenceLast7():
- * exceptional case : decompress a match starting within last 7 bytes of output buffer.
- * requires more careful checks, to ensure there is no overflow.
- * performance does not matter though.
- * note : this case is supposed to be never generated "naturally" by reference encoder,
- * since in most cases it needs at least 8 bytes to look for a match.
- * but it's allowed by the specification. */
+ assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
+ (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
+
+ if (length < 8) {
+ /* Handle short lengths. */
+ while (op < oend) *op++ = *ip++;
+ return;
+ }
+ if (ovtype == ZSTD_overlap_src_before_dst) {
+ /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
+ assert(length >= 8);
+ ZSTD_overlapCopy8(&op, &ip, diff);
+ assert(op - ip >= 8);
+ assert(op <= oend);
+ }
+
+ if (oend <= oend_w) {
+ /* No risk of overwrite. */
+ ZSTD_wildcopy(op, ip, length, ovtype);
+ return;
+ }
+ if (op <= oend_w) {
+ /* Wildcopy until we get close to the end. */
+ assert(oend > oend_w);
+ ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
+ ip += oend_w - op;
+ op = oend_w;
+ }
+ /* Handle the leftovers. */
+ while (op < oend) *op++ = *ip++;
+}
+
+/* ZSTD_execSequenceEnd():
+ * This version handles cases that are near the end of the output buffer. It requires
+ * more careful checks to make sure there is no overflow. By separating out these hard
+ * and unlikely cases, we can speed up the common cases.
+ *
+ * NOTE: This function needs to be fast for a single long sequence, but doesn't need
+ * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
+ */
FORCE_NOINLINE
-size_t ZSTD_execSequenceLast7(BYTE* op,
- BYTE* const oend, seq_t sequence,
- const BYTE** litPtr, const BYTE* const litLimit,
- const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
+size_t ZSTD_execSequenceEnd(BYTE* op,
+ BYTE* const oend, seq_t sequence,
+ const BYTE** litPtr, const BYTE* const litLimit,
+ const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
{
BYTE* const oLitEnd = op + sequence.litLength;
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
const BYTE* match = oLitEnd - sequence.offset;
+ BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
- /* check */
- RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must fit within dstBuffer");
+ /* bounds checks */
+ assert(oLitEnd < oMatchEnd);
+ RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must fit within dstBuffer");
RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "try to read beyond literal buffer");
/* copy literals */
- while (op < oLitEnd) *op++ = *(*litPtr)++;
+ ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
+ op = oLitEnd;
+ *litPtr = iLitEnd;
/* copy Match */
- if (sequence.offset > (size_t)(oLitEnd - base)) {
+ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
/* offset beyond prefix */
- RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - vBase),corruption_detected);
- match = dictEnd - (base-match);
+ RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
+ match = dictEnd - (prefixStart-match);
if (match + sequence.matchLength <= dictEnd) {
memmove(oLitEnd, match, sequence.matchLength);
return sequenceLength;
@@ -611,13 +694,12 @@ size_t ZSTD_execSequenceLast7(BYTE* op,
memmove(oLitEnd, match, length1);
op = oLitEnd + length1;
sequence.matchLength -= length1;
- match = base;
+ match = prefixStart;
} }
- while (op < oMatchEnd) *op++ = *match++;
+ ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
return sequenceLength;
}
-
HINT_INLINE
size_t ZSTD_execSequence(BYTE* op,
BYTE* const oend, seq_t sequence,
@@ -631,20 +713,29 @@ size_t ZSTD_execSequence(BYTE* op,
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
const BYTE* match = oLitEnd - sequence.offset;
- /* check */
- RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
- RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
- if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
-
- /* copy Literals */
- if (sequence.litLength > 8)
- ZSTD_wildcopy_16min(op, (*litPtr), sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
- else
- ZSTD_copy8(op, *litPtr);
+ /* Errors and uncommon cases handled here. */
+ assert(oLitEnd < oMatchEnd);
+ if (iLitEnd > litLimit || oMatchEnd > oend_w)
+ return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
+
+ /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
+ assert(iLitEnd <= litLimit /* Literal length is in bounds */);
+ assert(oLitEnd <= oend_w /* Can wildcopy literals */);
+ assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
+
+ /* Copy Literals:
+ * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
+ * We likely don't need the full 32-byte wildcopy.
+ */
+ assert(WILDCOPY_OVERLENGTH >= 16);
+ ZSTD_copy16(op, (*litPtr));
+ if (sequence.litLength > 16) {
+ ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
+ }
op = oLitEnd;
*litPtr = iLitEnd; /* update for next sequence */
- /* copy Match */
+ /* Copy Match */
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
/* offset beyond prefix -> go into extDict */
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
@@ -659,123 +750,33 @@ size_t ZSTD_execSequence(BYTE* op,
op = oLitEnd + length1;
sequence.matchLength -= length1;
match = prefixStart;
- if (op > oend_w || sequence.matchLength < MINMATCH) {
- U32 i;
- for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
- return sequenceLength;
- }
} }
- /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */
-
- /* match within prefix */
- if (sequence.offset < 8) {
- /* close range match, overlap */
- static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
- static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
- int const sub2 = dec64table[sequence.offset];
- op[0] = match[0];
- op[1] = match[1];
- op[2] = match[2];
- op[3] = match[3];
- match += dec32table[sequence.offset];
- ZSTD_copy4(op+4, match);
- match -= sub2;
- } else {
- ZSTD_copy8(op, match);
- }
- op += 8; match += 8;
-
- if (oMatchEnd > oend-(16-MINMATCH)) {
- if (op < oend_w) {
- ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
- match += oend_w - op;
- op = oend_w;
- }
- while (op < oMatchEnd) *op++ = *match++;
- } else {
- ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
+ /* Match within prefix of 1 or more bytes */
+ assert(op <= oMatchEnd);
+ assert(oMatchEnd <= oend_w);
+ assert(match >= prefixStart);
+ assert(sequence.matchLength >= 1);
+
+ /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
+ * without overlap checking.
+ */
+ if (sequence.offset >= WILDCOPY_VECLEN) {
+ /* We bet on a full wildcopy for matches, since we expect matches to be
+ * longer than literals (in general). In silesia, ~10% of matches are longer
+ * than 16 bytes.
+ */
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
+ return sequenceLength;
}
- return sequenceLength;
-}
-
-
-HINT_INLINE
-size_t ZSTD_execSequenceLong(BYTE* op,
- BYTE* const oend, seq_t sequence,
- const BYTE** litPtr, const BYTE* const litLimit,
- const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd)
-{
- BYTE* const oLitEnd = op + sequence.litLength;
- size_t const sequenceLength = sequence.litLength + sequence.matchLength;
- BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
- BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
- const BYTE* const iLitEnd = *litPtr + sequence.litLength;
- const BYTE* match = sequence.match;
+ assert(sequence.offset < WILDCOPY_VECLEN);
- /* check */
- RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
- RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
- if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
-
- /* copy Literals */
- if (sequence.litLength > 8)
- ZSTD_wildcopy_16min(op, *litPtr, sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
- else
- ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
+ /* Copy 8 bytes and spread the offset to be >= 8. */
+ ZSTD_overlapCopy8(&op, &match, sequence.offset);
- op = oLitEnd;
- *litPtr = iLitEnd; /* update for next sequence */
-
- /* copy Match */
- if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
- /* offset beyond prefix */
- RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - dictStart), corruption_detected);
- if (match + sequence.matchLength <= dictEnd) {
- memmove(oLitEnd, match, sequence.matchLength);
- return sequenceLength;
- }
- /* span extDict & currentPrefixSegment */
- { size_t const length1 = dictEnd - match;
- memmove(oLitEnd, match, length1);
- op = oLitEnd + length1;
- sequence.matchLength -= length1;
- match = prefixStart;
- if (op > oend_w || sequence.matchLength < MINMATCH) {
- U32 i;
- for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
- return sequenceLength;
- }
- } }
- assert(op <= oend_w);
- assert(sequence.matchLength >= MINMATCH);
-
- /* match within prefix */
- if (sequence.offset < 8) {
- /* close range match, overlap */
- static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
- static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
- int const sub2 = dec64table[sequence.offset];
- op[0] = match[0];
- op[1] = match[1];
- op[2] = match[2];
- op[3] = match[3];
- match += dec32table[sequence.offset];
- ZSTD_copy4(op+4, match);
- match -= sub2;
- } else {
- ZSTD_copy8(op, match);
- }
- op += 8; match += 8;
-
- if (oMatchEnd > oend-(16-MINMATCH)) {
- if (op < oend_w) {
- ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
- match += oend_w - op;
- op = oend_w;
- }
- while (op < oMatchEnd) *op++ = *match++;
- } else {
- ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
+ /* If the match length is > 8 bytes, then continue with the wildcopy. */
+ if (sequence.matchLength > 8) {
+ assert(op < oMatchEnd);
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
}
return sequenceLength;
}
@@ -1095,7 +1096,7 @@ ZSTD_decompressSequencesLong_body(
/* decode and decompress */
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
- size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
sequences[seqNb & STORED_SEQS_MASK] = sequence;
@@ -1106,7 +1107,7 @@ ZSTD_decompressSequencesLong_body(
/* finish queue */
seqNb -= seqAdvance;
for ( ; seqNb<nbSeq ; seqNb++) {
- size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
op += oneSeqSize;
}
diff --git a/lib/deprecated/zbuff.h b/lib/deprecated/zbuff.h
index a93115da4a1c..04183eab98f6 100644
--- a/lib/deprecated/zbuff.h
+++ b/lib/deprecated/zbuff.h
@@ -36,16 +36,17 @@ extern "C" {
*****************************************************************/
/* Deprecation warnings */
/* Should these warnings be a problem,
- it is generally possible to disable them,
- typically with -Wno-deprecated-declarations for gcc
- or _CRT_SECURE_NO_WARNINGS in Visual.
- Otherwise, it's also possible to define ZBUFF_DISABLE_DEPRECATE_WARNINGS */
+ * it is generally possible to disable them,
+ * typically with -Wno-deprecated-declarations for gcc
+ * or _CRT_SECURE_NO_WARNINGS in Visual.
+ * Otherwise, it's also possible to define ZBUFF_DISABLE_DEPRECATE_WARNINGS
+ */
#ifdef ZBUFF_DISABLE_DEPRECATE_WARNINGS
# define ZBUFF_DEPRECATED(message) ZSTDLIB_API /* disable deprecation warnings */
#else
# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
# define ZBUFF_DEPRECATED(message) [[deprecated(message)]] ZSTDLIB_API
-# elif (defined(__GNUC__) && (__GNUC__ >= 5)) || defined(__clang__)
+# elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__)
# define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated(message)))
# elif defined(__GNUC__) && (__GNUC__ >= 3)
# define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated))
diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c
index 621996759b6a..2e129dd9183a 100644
--- a/lib/dictBuilder/cover.c
+++ b/lib/dictBuilder/cover.c
@@ -638,8 +638,8 @@ void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLeve
"compared to the source size %u! "
"size(source)/size(dictionary) = %f, but it should be >= "
"10! This may lead to a subpar dictionary! We recommend "
- "training on sources at least 10x, and up to 100x the "
- "size of the dictionary!\n", (U32)maxDictSize,
+ "training on sources at least 10x, and preferably 100x "
+ "the size of the dictionary! \n", (U32)maxDictSize,
(U32)nbDmers, ratio);
}
@@ -919,13 +919,12 @@ void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
}
}
/* Save the dictionary, parameters, and size */
- if (!dict) {
- return;
+ if (dict) {
+ memcpy(best->dict, dict, dictSize);
+ best->dictSize = dictSize;
+ best->parameters = parameters;
+ best->compressedSize = compressedSize;
}
- memcpy(best->dict, dict, dictSize);
- best->dictSize = dictSize;
- best->parameters = parameters;
- best->compressedSize = compressedSize;
}
if (liveJobs == 0) {
ZSTD_pthread_cond_broadcast(&best->cond);
diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c
index ee21ee1a9d34..4a263d822d85 100644
--- a/lib/dictBuilder/zdict.c
+++ b/lib/dictBuilder/zdict.c
@@ -571,7 +571,7 @@ static void ZDICT_fillNoise(void* buffer, size_t length)
unsigned const prime1 = 2654435761U;
unsigned const prime2 = 2246822519U;
unsigned acc = prime1;
- size_t p=0;;
+ size_t p=0;
for (p=0; p<length; p++) {
acc *= prime2;
((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21);
diff --git a/lib/legacy/zstd_v01.c b/lib/legacy/zstd_v01.c
index ae8cba2a3fa2..8112527f058b 100644
--- a/lib/legacy/zstd_v01.c
+++ b/lib/legacy/zstd_v01.c
@@ -346,7 +346,7 @@ FORCE_INLINE unsigned FSE_highbit32 (U32 val)
_BitScanReverse ( &r, val );
return (unsigned) r;
# elif defined(__GNUC__) && (GCC_VERSION >= 304) /* GCC Intrinsic */
- return 31 - __builtin_clz (val);
+ return __builtin_clz (val) ^ 31;
# else /* Software version */
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
diff --git a/lib/legacy/zstd_v02.c b/lib/legacy/zstd_v02.c
index 793df6024bbd..c8783799b9c8 100644
--- a/lib/legacy/zstd_v02.c
+++ b/lib/legacy/zstd_v02.c
@@ -353,7 +353,7 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
_BitScanReverse ( &r, val );
return (unsigned) r;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
- return 31 - __builtin_clz (val);
+ return __builtin_clz (val) ^ 31;
# else /* Software version */
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
@@ -2889,6 +2889,7 @@ static size_t ZSTD_decodeLiteralsBlock(void* ctx,
const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
if (litSize > srcSize-11) /* risk of reading too far with wildcopy */
{
+ if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
if (litSize > srcSize-3) return ERROR(corruption_detected);
memcpy(dctx->litBuffer, istart, litSize);
dctx->litPtr = dctx->litBuffer;
diff --git a/lib/legacy/zstd_v03.c b/lib/legacy/zstd_v03.c
index 7a0e7c9b69fb..162bd63024d7 100644
--- a/lib/legacy/zstd_v03.c
+++ b/lib/legacy/zstd_v03.c
@@ -356,7 +356,7 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
_BitScanReverse ( &r, val );
return (unsigned) r;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
- return 31 - __builtin_clz (val);
+ return __builtin_clz (val) ^ 31;
# else /* Software version */
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
@@ -2530,6 +2530,7 @@ static size_t ZSTD_decodeLiteralsBlock(void* ctx,
const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
if (litSize > srcSize-11) /* risk of reading too far with wildcopy */
{
+ if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
if (litSize > srcSize-3) return ERROR(corruption_detected);
memcpy(dctx->litBuffer, istart, litSize);
dctx->litPtr = dctx->litBuffer;
diff --git a/lib/legacy/zstd_v04.c b/lib/legacy/zstd_v04.c
index 645a6e313c05..4dec308168ef 100644
--- a/lib/legacy/zstd_v04.c
+++ b/lib/legacy/zstd_v04.c
@@ -627,7 +627,7 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
_BitScanReverse ( &r, val );
return (unsigned) r;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
- return 31 - __builtin_clz (val);
+ return __builtin_clz (val) ^ 31;
# else /* Software version */
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
@@ -2655,6 +2655,7 @@ static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
if (litSize > srcSize-11) /* risk of reading too far with wildcopy */
{
+ if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
if (litSize > srcSize-3) return ERROR(corruption_detected);
memcpy(dctx->litBuffer, istart, litSize);
dctx->litPtr = dctx->litBuffer;
@@ -3034,9 +3035,12 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
{
/* blockType == blockCompressed */
const BYTE* ip = (const BYTE*)src;
+ size_t litCSize;
+
+ if (srcSize > BLOCKSIZE) return ERROR(corruption_detected);
/* Decode literals sub-block */
- size_t litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
+ litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
if (ZSTD_isError(litCSize)) return litCSize;
ip += litCSize;
srcSize -= litCSize;
diff --git a/lib/legacy/zstd_v05.c b/lib/legacy/zstd_v05.c
index e347b00dcf2e..570e0ff86ee8 100644
--- a/lib/legacy/zstd_v05.c
+++ b/lib/legacy/zstd_v05.c
@@ -756,7 +756,7 @@ MEM_STATIC unsigned BITv05_highbit32 (U32 val)
_BitScanReverse ( &r, val );
return (unsigned) r;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
- return 31 - __builtin_clz (val);
+ return __builtin_clz (val) ^ 31;
# else /* Software version */
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
diff --git a/lib/legacy/zstd_v06.c b/lib/legacy/zstd_v06.c
index f907a3a71226..2a08e8dee842 100644
--- a/lib/legacy/zstd_v06.c
+++ b/lib/legacy/zstd_v06.c
@@ -860,7 +860,7 @@ MEM_STATIC unsigned BITv06_highbit32 ( U32 val)
_BitScanReverse ( &r, val );
return (unsigned) r;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
- return 31 - __builtin_clz (val);
+ return __builtin_clz (val) ^ 31;
# else /* Software version */
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
diff --git a/lib/legacy/zstd_v07.c b/lib/legacy/zstd_v07.c
index a83ddc9a68b0..a2eeff808e0e 100644
--- a/lib/legacy/zstd_v07.c
+++ b/lib/legacy/zstd_v07.c
@@ -530,7 +530,7 @@ MEM_STATIC unsigned BITv07_highbit32 (U32 val)
_BitScanReverse ( &r, val );
return (unsigned) r;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
- return 31 - __builtin_clz (val);
+ return __builtin_clz (val) ^ 31;
# else /* Software version */
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
diff --git a/lib/libzstd.pc.in b/lib/libzstd.pc.in
index 1d07b91f25ab..e7880be475d0 100644
--- a/lib/libzstd.pc.in
+++ b/lib/libzstd.pc.in
@@ -3,8 +3,9 @@
# BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
prefix=@PREFIX@
-libdir=@LIBDIR@
-includedir=@INCLUDEDIR@
+exec_prefix=${prefix}
+includedir=${prefix}/include
+libdir=${exec_prefix}/lib
Name: zstd
Description: fast lossless compression algorithm library
diff --git a/lib/zstd.h b/lib/zstd.h
index 4a1f81610c21..72080ea87e84 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -15,6 +15,7 @@ extern "C" {
#define ZSTD_H_235446
/* ====== Dependency ======*/
+#include <limits.h> /* INT_MAX */
#include <stddef.h> /* size_t */
@@ -71,7 +72,7 @@ extern "C" {
/*------ Version ------*/
#define ZSTD_VERSION_MAJOR 1
#define ZSTD_VERSION_MINOR 4
-#define ZSTD_VERSION_RELEASE 2
+#define ZSTD_VERSION_RELEASE 4
#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library version */
@@ -196,9 +197,13 @@ ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx);
/*! ZSTD_compressCCtx() :
- * Same as ZSTD_compress(), using an explicit ZSTD_CCtx
- * The function will compress at requested compression level,
- * ignoring any other parameter */
+ * Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
+ * Important : in order to behave similarly to `ZSTD_compress()`,
+ * this function compresses at requested compression level,
+ * __ignoring any other parameter__ .
+ * If any advanced parameter was set using the advanced API,
+ * they will all be reset. Only `compressionLevel` remains.
+ */
ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
@@ -233,7 +238,7 @@ ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
* using ZSTD_CCtx_set*() functions.
* Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame.
* "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` !
- * They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()
+ * __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ .
*
* It's possible to reset all parameters to "default" using ZSTD_CCtx_reset().
*
@@ -261,18 +266,26 @@ typedef enum {
/* compression parameters
* Note: When compressing with a ZSTD_CDict these parameters are superseded
- * by the parameters used to construct the ZSTD_CDict. See ZSTD_CCtx_refCDict()
- * for more info (superseded-by-cdict). */
- ZSTD_c_compressionLevel=100, /* Update all compression parameters according to pre-defined cLevel table
+ * by the parameters used to construct the ZSTD_CDict.
+ * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */
+ ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table.
+ * Note that exact compression parameters are dynamically determined,
+ * depending on both compression level and srcSize (when known).
* Default level is ZSTD_CLEVEL_DEFAULT==3.
* Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT.
* Note 1 : it's possible to pass a negative compression level.
- * Note 2 : setting a level sets all default values of other compression parameters */
+ * Note 2 : setting a level resets all other compression parameters to default */
+ /* Advanced compression parameters :
+ * It's possible to pin down compression parameters to some specific values.
+ * In which case, these values are no longer dynamically selected by the compressor */
ZSTD_c_windowLog=101, /* Maximum allowed back-reference distance, expressed as power of 2.
+ * This will set a memory budget for streaming decompression,
+ * with larger values requiring more memory
+ * and typically compressing more.
* Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
* Special: value 0 means "use default windowLog".
* Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT
- * requires explicitly allowing such window size at decompression stage if using streaming. */
+ * requires explicitly allowing such size at streaming decompression stage. */
ZSTD_c_hashLog=102, /* Size of the initial probe table, as a power of 2.
* Resulting memory usage is (1 << (hashLog+2)).
* Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
@@ -283,13 +296,13 @@ typedef enum {
* Resulting memory usage is (1 << (chainLog+2)).
* Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX.
* Larger tables result in better and slower compression.
- * This parameter is useless when using "fast" strategy.
+ * This parameter is useless for "fast" strategy.
* It's still useful when using "dfast" strategy,
* in which case it defines a secondary probe table.
* Special: value 0 means "use default chainLog". */
ZSTD_c_searchLog=104, /* Number of search attempts, as a power of 2.
* More attempts result in better and slower compression.
- * This parameter is useless when using "fast" and "dFast" strategies.
+ * This parameter is useless for "fast" and "dFast" strategies.
* Special: value 0 means "use default searchLog". */
ZSTD_c_minMatch=105, /* Minimum size of searched matches.
* Note that Zstandard can still find matches of smaller size,
@@ -344,7 +357,7 @@ typedef enum {
ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1)
* Content size must be known at the beginning of compression.
* This is automatically the case when using ZSTD_compress2(),
- * For streaming variants, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */
+ * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */
ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */
ZSTD_c_dictIDFlag=202, /* When applicable, dictionary's ID is written into frame header (default:1) */
@@ -363,7 +376,7 @@ typedef enum {
* Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads.
* 0 means default, which is dynamically determined based on compression parameters.
* Job size must be a minimum of overlap size, or 1 MB, whichever is largest.
- * The minimum size is automatically and transparently enforced */
+ * The minimum size is automatically and transparently enforced. */
ZSTD_c_overlapLog=402, /* Control the overlap size, as a fraction of window size.
* The overlap size is an amount of data reloaded from previous job at the beginning of a new job.
* It helps preserve compression ratio, while each job is compressed in parallel.
@@ -386,6 +399,7 @@ typedef enum {
* ZSTD_c_forceAttachDict
* ZSTD_c_literalCompressionMode
* ZSTD_c_targetCBlockSize
+ * ZSTD_c_srcSizeHint
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
* note : never ever use experimentalParam? names directly;
* also, the enums values themselves are unstable and can still change.
@@ -396,6 +410,7 @@ typedef enum {
ZSTD_c_experimentalParam4=1001,
ZSTD_c_experimentalParam5=1002,
ZSTD_c_experimentalParam6=1003,
+ ZSTD_c_experimentalParam7=1004
} ZSTD_cParameter;
typedef struct {
@@ -793,12 +808,17 @@ ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
typedef struct ZSTD_CDict_s ZSTD_CDict;
/*! ZSTD_createCDict() :
- * When compressing multiple messages / blocks using the same dictionary, it's recommended to load it only once.
- * ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup cost.
+ * When compressing multiple messages or blocks using the same dictionary,
+ * it's recommended to digest the dictionary only once, since it's a costly operation.
+ * ZSTD_createCDict() will create a state from digesting a dictionary.
+ * The resulting state can be used for future compression operations with very limited startup cost.
* ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
- * `dictBuffer` can be released after ZSTD_CDict creation, because its content is copied within CDict.
- * Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate `dictBuffer` content.
- * Note : A ZSTD_CDict can be created from an empty dictBuffer, but it is inefficient when used to compress small data. */
+ * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict.
+ * Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content.
+ * Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer,
+ * in which case the only thing that it transports is the @compressionLevel.
+ * This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively,
+ * expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */
ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
int compressionLevel);
@@ -925,7 +945,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
* Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters.
* It's a CPU consuming operation, with non-negligible impact on latency.
* If there is a need to use the same prefix multiple times, consider loadDictionary instead.
- * Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dm_rawContent).
+ * Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent).
* Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */
ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
const void* prefix, size_t prefixSize);
@@ -969,7 +989,7 @@ ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
* Note 2 : Prefix buffer is referenced. It **must** outlive decompression.
* Prefix buffer must remain unmodified up to the end of frame,
* reached when ZSTD_decompressStream() returns 0.
- * Note 3 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent).
+ * Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent).
* Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section)
* Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost.
* A full dictionary is more costly, as it requires building tables.
@@ -1014,8 +1034,8 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
* Some of them might be removed in the future (especially when redundant with existing stable functions)
* ***************************************************************************************/
-#define ZSTD_FRAMEHEADERSIZE_PREFIX 5 /* minimum input size required to query frame header size */
-#define ZSTD_FRAMEHEADERSIZE_MIN 6
+#define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1) /* minimum input size required to query frame header size */
+#define ZSTD_FRAMEHEADERSIZE_MIN(format) ((format) == ZSTD_f_zstd1 ? 6 : 2)
#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* can be useful for static allocation */
#define ZSTD_SKIPPABLEHEADERSIZE 8
@@ -1063,6 +1083,8 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
/* Advanced parameter bounds */
#define ZSTD_TARGETCBLOCKSIZE_MIN 64
#define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX
+#define ZSTD_SRCSIZEHINT_MIN 0
+#define ZSTD_SRCSIZEHINT_MAX INT_MAX
/* internal */
#define ZSTD_HASHLOG3_MAX 17
@@ -1073,6 +1095,24 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params;
typedef struct {
+ unsigned int matchPos; /* Match pos in dst */
+ /* If seqDef.offset > 3, then this is seqDef.offset - 3
+ * If seqDef.offset < 3, then this is the corresponding repeat offset
+ * But if seqDef.offset < 3 and litLength == 0, this is the
+ * repeat offset before the corresponding repeat offset
+ * And if seqDef.offset == 3 and litLength == 0, this is the
+ * most recent repeat offset - 1
+ */
+ unsigned int offset;
+ unsigned int litLength; /* Literal length */
+ unsigned int matchLength; /* Match length */
+ /* 0 when seq not rep and seqDef.offset otherwise
+ * when litLength == 0 this will be <= 4, otherwise <= 3 like normal
+ */
+ unsigned int rep;
+} ZSTD_Sequence;
+
+typedef struct {
unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */
unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
unsigned hashLog; /**< dispatch table : larger == faster, more memory */
@@ -1101,21 +1141,12 @@ typedef enum {
typedef enum {
ZSTD_dlm_byCopy = 0, /**< Copy dictionary content internally */
- ZSTD_dlm_byRef = 1, /**< Reference dictionary content -- the dictionary buffer must outlive its users. */
+ ZSTD_dlm_byRef = 1 /**< Reference dictionary content -- the dictionary buffer must outlive its users. */
} ZSTD_dictLoadMethod_e;
typedef enum {
- /* Opened question : should we have a format ZSTD_f_auto ?
- * Today, it would mean exactly the same as ZSTD_f_zstd1.
- * But, in the future, should several formats become supported,
- * on the compression side, it would mean "default format".
- * On the decompression side, it would mean "automatic format detection",
- * so that ZSTD_f_zstd1 would mean "accept *only* zstd frames".
- * Since meaning is a little different, another option could be to define different enums for compression and decompression.
- * This question could be kept for later, when there are actually multiple formats to support,
- * but there is also the question of pinning enum values, and pinning value `0` is especially important */
ZSTD_f_zstd1 = 0, /* zstd frame format, specified in zstd_compression_format.md (default) */
- ZSTD_f_zstd1_magicless = 1, /* Variant of zstd frame format, without initial 4-bytes magic number.
+ ZSTD_f_zstd1_magicless = 1 /* Variant of zstd frame format, without initial 4-bytes magic number.
* Useful to save 4 bytes per generated frame.
* Decoder cannot recognise automatically this format, requiring this instruction. */
} ZSTD_format_e;
@@ -1126,7 +1157,7 @@ typedef enum {
* to evolve and should be considered only in the context of extremely
* advanced performance tuning.
*
- * Zstd currently supports the use of a CDict in two ways:
+ * Zstd currently supports the use of a CDict in three ways:
*
* - The contents of the CDict can be copied into the working context. This
* means that the compression can search both the dictionary and input
@@ -1142,6 +1173,12 @@ typedef enum {
* working context's tables can be reused). For small inputs, this can be
* faster than copying the CDict's tables.
*
+ * - The CDict's tables are not used at all, and instead we use the working
+ * context alone to reload the dictionary and use params based on the source
+ * size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict().
+ * This method is effective when the dictionary sizes are very small relative
+ * to the input size, and the input size is fairly large to begin with.
+ *
* Zstd has a simple internal heuristic that selects which strategy to use
* at the beginning of a compression. However, if experimentation shows that
* Zstd is making poor choices, it is possible to override that choice with
@@ -1150,6 +1187,7 @@ typedef enum {
ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */
ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */
ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */
+ ZSTD_dictForceLoad = 3 /* Always reload the dictionary */
} ZSTD_dictAttachPref_e;
typedef enum {
@@ -1158,7 +1196,7 @@ typedef enum {
* levels will be compressed. */
ZSTD_lcm_huffman = 1, /**< Always attempt Huffman compression. Uncompressed literals will still be
* emitted if Huffman compression is not profitable. */
- ZSTD_lcm_uncompressed = 2, /**< Always emit uncompressed literals. */
+ ZSTD_lcm_uncompressed = 2 /**< Always emit uncompressed literals. */
} ZSTD_literalCompressionMode_e;
@@ -1210,20 +1248,38 @@ ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcS
* or an error code (if srcSize is too small) */
ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
+/*! ZSTD_getSequences() :
+ * Extract sequences from the sequence store
+ * zc can be used to insert custom compression params.
+ * This function invokes ZSTD_compress2
+ * @return : number of sequences extracted
+ */
+ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
+ size_t outSeqsSize, const void* src, size_t srcSize);
+
/***************************************
* Memory management
***************************************/
/*! ZSTD_estimate*() :
- * These functions make it possible to estimate memory usage
- * of a future {D,C}Ctx, before its creation.
- * ZSTD_estimateCCtxSize() will provide a budget large enough for any compression level up to selected one.
- * It will also consider src size to be arbitrarily "large", which is worst case.
- * If srcSize is known to always be small, ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation.
- * ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
- * ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
- * Note : CCtx size estimation is only correct for single-threaded compression. */
+ * These functions make it possible to estimate memory usage of a future
+ * {D,C}Ctx, before its creation.
+ *
+ * ZSTD_estimateCCtxSize() will provide a budget large enough for any
+ * compression level up to selected one. Unlike ZSTD_estimateCStreamSize*(),
+ * this estimate does not include space for a window buffer, so this estimate
+ * is guaranteed to be enough for single-shot compressions, but not streaming
+ * compressions. It will however assume the input may be arbitrarily large,
+ * which is the worst case. If srcSize is known to always be small,
+ * ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation.
+ * ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with
+ * ZSTD_getCParams() to create cParams from compressionLevel.
+ * ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with
+ * ZSTD_CCtxParams_setParameter().
+ *
+ * Note: only single-threaded compression is supported. This function will
+ * return an error code if ZSTD_c_nbWorkers is >= 1. */
ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
@@ -1334,7 +1390,8 @@ ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictS
* Create a digested dictionary for compression
* Dictionary content is just referenced, not duplicated.
* As a consequence, `dictBuffer` **must** outlive CDict,
- * and its content must remain unmodified throughout the lifetime of CDict. */
+ * and its content must remain unmodified throughout the lifetime of CDict.
+ * note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */
ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
/*! ZSTD_getCParams() :
@@ -1361,7 +1418,9 @@ ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
/*! ZSTD_compress_advanced() :
- * Same as ZSTD_compress_usingDict(), with fine-tune control over compression parameters (by structure) */
+ * Note : this function is now DEPRECATED.
+ * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters.
+ * This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */
ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
@@ -1369,7 +1428,9 @@ ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
ZSTD_parameters params);
/*! ZSTD_compress_usingCDict_advanced() :
- * Same as ZSTD_compress_usingCDict(), with fine-tune control over frame parameters */
+ * Note : this function is now REDUNDANT.
+ * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters.
+ * This prototype will be marked as deprecated and generate compilation warning in some future version */
ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
@@ -1441,6 +1502,12 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre
* There is no guarantee on compressed block size (default:0) */
#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
+/* User's best guess of source size.
+ * Hint is not valid when srcSizeHint == 0.
+ * There is no guarantee that hint is close to actual source size,
+ * but compression ratio may regress significantly if guess considerably underestimates */
+#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7
+
/*! ZSTD_CCtx_getParameter() :
* Get the requested compression parameter value, selected by enum ZSTD_cParameter,
* and store it into int* value.
@@ -1613,8 +1680,13 @@ ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs (
* pledgedSrcSize must be correct. If it is not known at init time, use
* ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs,
* "0" also disables frame content size field. It may be enabled in the future.
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
*/
-ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize);
+ZSTDLIB_API size_t
+ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
+ int compressionLevel,
+ unsigned long long pledgedSrcSize);
+
/**! ZSTD_initCStream_usingDict() :
* This function is deprecated, and is equivalent to:
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
@@ -1623,42 +1695,66 @@ ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLe
*
* Creates of an internal CDict (incompatible with static CCtx), except if
* dict == NULL or dictSize < 8, in which case no dict is used.
- * Note: dict is loaded with ZSTD_dm_auto (treated as a full zstd dictionary if
+ * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if
* it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
*/
-ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel);
+ZSTDLIB_API size_t
+ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
+ const void* dict, size_t dictSize,
+ int compressionLevel);
+
/**! ZSTD_initCStream_advanced() :
* This function is deprecated, and is approximately equivalent to:
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
- * ZSTD_CCtx_setZstdParams(zcs, params); // Set the zstd params and leave the rest as-is
+ * // Pseudocode: Set each zstd parameter and leave the rest as-is.
+ * for ((param, value) : params) {
+ * ZSTD_CCtx_setParameter(zcs, param, value);
+ * }
* ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
* ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
*
- * pledgedSrcSize must be correct. If srcSize is not known at init time, use
- * value ZSTD_CONTENTSIZE_UNKNOWN. dict is loaded with ZSTD_dm_auto and ZSTD_dlm_byCopy.
+ * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy.
+ * pledgedSrcSize must be correct.
+ * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
*/
-ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize,
- ZSTD_parameters params, unsigned long long pledgedSrcSize);
+ZSTDLIB_API size_t
+ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
+ const void* dict, size_t dictSize,
+ ZSTD_parameters params,
+ unsigned long long pledgedSrcSize);
+
/**! ZSTD_initCStream_usingCDict() :
* This function is deprecated, and equivalent to:
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
* ZSTD_CCtx_refCDict(zcs, cdict);
*
* note : cdict will just be referenced, and must outlive compression session
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
*/
ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
+
/**! ZSTD_initCStream_usingCDict_advanced() :
- * This function is deprecated, and is approximately equivalent to:
+ * This function is DEPRECATED, and is approximately equivalent to:
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
- * ZSTD_CCtx_setZstdFrameParams(zcs, fParams); // Set the zstd frame params and leave the rest as-is
+ * // Pseudocode: Set each zstd frame parameter and leave the rest as-is.
+ * for ((fParam, value) : fParams) {
+ * ZSTD_CCtx_setParameter(zcs, fParam, value);
+ * }
* ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
* ZSTD_CCtx_refCDict(zcs, cdict);
*
* same as ZSTD_initCStream_usingCDict(), with control over frame parameters.
* pledgedSrcSize must be correct. If srcSize is not known at init time, use
* value ZSTD_CONTENTSIZE_UNKNOWN.
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
*/
-ZSTDLIB_API size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams, unsigned long long pledgedSrcSize);
+ZSTDLIB_API size_t
+ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
+ const ZSTD_CDict* cdict,
+ ZSTD_frameParameters fParams,
+ unsigned long long pledgedSrcSize);
/*! ZSTD_resetCStream() :
* This function is deprecated, and is equivalent to:
@@ -1673,6 +1769,7 @@ ZSTDLIB_API size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const
* For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs,
* but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead.
* @return : 0, or an error code (which can be tested using ZSTD_isError())
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
*/
ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
@@ -1718,8 +1815,10 @@ ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);
* ZSTD_DCtx_loadDictionary(zds, dict, dictSize);
*
* note: no dictionary will be used if dict == NULL or dictSize < 8
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
*/
ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
+
/**
* This function is deprecated, and is equivalent to:
*
@@ -1727,14 +1826,17 @@ ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dic
* ZSTD_DCtx_refDDict(zds, ddict);
*
* note : ddict is referenced, it must outlive decompression session
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
*/
ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);
+
/**
* This function is deprecated, and is equivalent to:
*
* ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
*
* re-use decompression parameters from previous init; saves dictionary loading
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
*/
ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
@@ -1908,8 +2010,8 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
/*!
Block functions produce and decode raw zstd blocks, without frame metadata.
- Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes).
- User will have to take in charge required information to regenerate data, such as compressed and content sizes.
+ Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes).
+ But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes.
A few rules to respect :
- Compressing and decompressing require a context structure
@@ -1920,12 +2022,14 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
+ copyCCtx() and copyDCtx() can be used too
- Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB
+ If input is larger than a block size, it's necessary to split input data into multiple blocks
- + For inputs larger than a single block, really consider using regular ZSTD_compress() instead.
- Frame metadata is not that costly, and quickly becomes negligible as source size grows larger.
- - When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero.
- In which case, nothing is produced into `dst` !
- + User must test for such outcome and deal directly with uncompressed data
- + ZSTD_decompressBlock() doesn't accept uncompressed data as input !!!
+ + For inputs larger than a single block, consider using regular ZSTD_compress() instead.
+ Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block.
+ - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) !
+ ===> In which case, nothing is produced into `dst` !
+ + User __must__ test for such outcome and deal directly with uncompressed data
+ + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0.
+ Doing so would mess up with statistics history, leading to potential data corruption.
+ + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !!
+ In case of multiple successive blocks, should some of them be uncompressed,
decoder must be informed of their existence in order to follow proper history.
Use ZSTD_insertBlock() for such a case.
diff --git a/programs/.gitignore b/programs/.gitignore
deleted file mode 100644
index 0a8e18fbb2f8..000000000000
--- a/programs/.gitignore
+++ /dev/null
@@ -1,37 +0,0 @@
-# local binary (Makefile)
-zstd
-zstd32
-zstd4
-zstd-compress
-zstd-decompress
-zstd-frugal
-zstd-small
-zstd-nolegacy
-
-# Object files
-*.o
-*.ko
-default.profraw
-have_zlib
-
-# Executables
-*.exe
-*.out
-*.app
-
-# Default result files
-dictionary
-grillResults.txt
-_*
-tmp*
-*.zst
-result
-out
-
-# fuzzer
-afl
-
-# Misc files
-*.bat
-!windres/generate_res.bat
-dirTest*
diff --git a/programs/README.md b/programs/README.md
index c3a5590d6de5..7668d49a2073 100644
--- a/programs/README.md
+++ b/programs/README.md
@@ -173,10 +173,13 @@ Benchmark arguments :
```
#### Restricted usage of Environment Variables
-Using environment variables to set compression/decompression parameters has security implications. Therefore,
-we intentionally restrict its usage. Currently, only `ZSTD_CLEVEL` is supported for setting compression level.
+Using environment variables to set parameters has security implications.
+Therefore, this avenue is intentionally restricted.
+Only `ZSTD_CLEVEL` is supported currently, for setting compression level.
+`ZSTD_CLEVEL` can be used to set the level between 1 and 19 (the "normal" range).
If the value of `ZSTD_CLEVEL` is not a valid integer, it will be ignored with a warning message.
-Note that command line options will override corresponding environment variable settings.
+`ZSTD_CLEVEL` just replaces the default compression level (`3`).
+It can be overridden by corresponding command line arguments.
#### Long distance matching mode
The long distance matching mode, enabled with `--long`, is designed to improve
diff --git a/programs/benchzstd.c b/programs/benchzstd.c
index 263dc08887f7..7439677c7f3e 100644
--- a/programs/benchzstd.c
+++ b/programs/benchzstd.c
@@ -88,7 +88,7 @@ static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
#endif
#define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
-#define EXM_THROW_INT(errorNum, ...) { \
+#define RETURN_ERROR_INT(errorNum, ...) { \
DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
DISPLAYLEVEL(1, "Error %i : ", errorNum); \
DISPLAYLEVEL(1, __VA_ARGS__); \
@@ -401,9 +401,9 @@ BMK_benchMemAdvancedNoAlloc(
BMK_initCCtxArgs cctxprep;
BMK_initDCtxArgs dctxprep;
- cbp.benchFn = local_defaultCompress;
+ cbp.benchFn = local_defaultCompress; /* ZSTD_compress2 */
cbp.benchPayload = cctx;
- cbp.initFn = local_initCCtx;
+ cbp.initFn = local_initCCtx; /* BMK_initCCtx */
cbp.initPayload = &cctxprep;
cbp.errorFn = ZSTD_isError;
cbp.blockCount = nbBlocks;
@@ -534,8 +534,8 @@ BMK_benchMemAdvancedNoAlloc(
if (u==srcSize-1) { /* should never happen */
DISPLAY("no difference detected\n");
}
- }
- }
+ } /* for (u=0; u<srcSize; u++) */
+ } /* if ((adv->mode == BMK_both) && (crcOrig!=crcCheck)) */
} /* CRC Checking */
if (displayLevel == 1) { /* hidden display mode -q, used by python speed benchmark */
@@ -754,8 +754,7 @@ static int BMK_loadFiles(void* buffer, size_t bufferSize,
size_t pos = 0, totalSize = 0;
unsigned n;
for (n=0; n<nbFiles; n++) {
- FILE* f;
- U64 fileSize = UTIL_getFileSize(fileNamesTable[n]);
+ U64 fileSize = UTIL_getFileSize(fileNamesTable[n]); /* last file may be shortened */
if (UTIL_isDirectory(fileNamesTable[n])) {
DISPLAYLEVEL(2, "Ignoring %s directory... \n", fileNamesTable[n]);
fileSizes[n] = 0;
@@ -766,20 +765,20 @@ static int BMK_loadFiles(void* buffer, size_t bufferSize,
fileSizes[n] = 0;
continue;
}
- f = fopen(fileNamesTable[n], "rb");
- if (f==NULL) EXM_THROW_INT(10, "impossible to open file %s", fileNamesTable[n]);
- DISPLAYUPDATE(2, "Loading %s... \r", fileNamesTable[n]);
- if (fileSize > bufferSize-pos) fileSize = bufferSize-pos, nbFiles=n; /* buffer too small - stop after this file */
- { size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
- if (readSize != (size_t)fileSize) EXM_THROW_INT(11, "could not read %s", fileNamesTable[n]);
- pos += readSize;
- }
- fileSizes[n] = (size_t)fileSize;
- totalSize += (size_t)fileSize;
- fclose(f);
- }
+ { FILE* const f = fopen(fileNamesTable[n], "rb");
+ if (f==NULL) RETURN_ERROR_INT(10, "impossible to open file %s", fileNamesTable[n]);
+ DISPLAYUPDATE(2, "Loading %s... \r", fileNamesTable[n]);
+ if (fileSize > bufferSize-pos) fileSize = bufferSize-pos, nbFiles=n; /* buffer too small - stop after this file */
+ { size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
+ if (readSize != (size_t)fileSize) RETURN_ERROR_INT(11, "could not read %s", fileNamesTable[n]);
+ pos += readSize;
+ }
+ fileSizes[n] = (size_t)fileSize;
+ totalSize += (size_t)fileSize;
+ fclose(f);
+ } }
- if (totalSize == 0) EXM_THROW_INT(12, "no data to bench");
+ if (totalSize == 0) RETURN_ERROR_INT(12, "no data to bench");
return 0;
}
diff --git a/programs/benchzstd.h b/programs/benchzstd.h
index 2c7627713ee4..ef7d9fb11145 100644
--- a/programs/benchzstd.h
+++ b/programs/benchzstd.h
@@ -205,7 +205,6 @@ BMK_benchOutcome_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize,
-
#endif /* BENCH_ZSTD_H_3242387 */
#if defined (__cplusplus)
diff --git a/programs/datagen.c b/programs/datagen.c
index 026b9a1855da..ead9b2d2415a 100644
--- a/programs/datagen.c
+++ b/programs/datagen.c
@@ -55,17 +55,18 @@ static U32 RDG_rand(U32* src)
return rand32 >> 5;
}
+typedef U32 fixedPoint_24_8;
-static void RDG_fillLiteralDistrib(BYTE* ldt, double ld)
+static void RDG_fillLiteralDistrib(BYTE* ldt, fixedPoint_24_8 ld)
{
BYTE const firstChar = (ld<=0.0) ? 0 : '(';
BYTE const lastChar = (ld<=0.0) ? 255 : '}';
BYTE character = (ld<=0.0) ? 0 : '0';
U32 u;
- if (ld<=0.0) ld = 0.0;
+ if (ld<=0) ld = 0;
for (u=0; u<LTSIZE; ) {
- U32 const weight = (U32)((double)(LTSIZE - u) * ld) + 1;
+ U32 const weight = (((LTSIZE - u) * ld) >> 8) + 1;
U32 const end = MIN ( u + weight , LTSIZE);
while (u < end) ldt[u++] = character;
character++;
@@ -92,7 +93,8 @@ static U32 RDG_randLength(U32* seedPtr)
return (RDG_rand(seedPtr) & 0x1FF) + 0xF;
}
-static void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double matchProba, const BYTE* ldt, U32* seedPtr)
+static void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize,
+ double matchProba, const BYTE* ldt, U32* seedPtr)
{
BYTE* const buffPtr = (BYTE*)buffer;
U32 const matchProba32 = (U32)(32768 * matchProba);
@@ -128,13 +130,13 @@ static void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, doubl
U32 const randOffset = RDG_rand15Bits(seedPtr) + 1;
U32 const offset = repeatOffset ? prevOffset : (U32) MIN(randOffset , pos);
size_t match = pos - offset;
- while (pos < d) buffPtr[pos++] = buffPtr[match++]; /* correctly manages overlaps */
+ while (pos < d) { buffPtr[pos++] = buffPtr[match++]; /* correctly manages overlaps */ }
prevOffset = offset;
} else {
/* Literal (noise) */
U32 const length = RDG_randLength(seedPtr);
U32 const d = (U32) MIN(pos + length, buffSize);
- while (pos < d) buffPtr[pos++] = RDG_genChar(seedPtr, ldt);
+ while (pos < d) { buffPtr[pos++] = RDG_genChar(seedPtr, ldt); }
} }
}
@@ -145,7 +147,7 @@ void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba
BYTE ldt[LTSIZE];
memset(ldt, '0', sizeof(ldt)); /* yes, character '0', this is intentional */
if (litProba<=0.0) litProba = matchProba / 4.5;
- RDG_fillLiteralDistrib(ldt, litProba);
+ RDG_fillLiteralDistrib(ldt, (fixedPoint_24_8)(litProba * 256 + 0.001));
RDG_genBlock(buffer, size, 0, matchProba, ldt, &seed32);
}
@@ -163,7 +165,7 @@ void RDG_genStdout(unsigned long long size, double matchProba, double litProba,
if (buff==NULL) { perror("datagen"); exit(1); }
if (litProba<=0.0) litProba = matchProba / 4.5;
memset(ldt, '0', sizeof(ldt)); /* yes, character '0', this is intentional */
- RDG_fillLiteralDistrib(ldt, litProba);
+ RDG_fillLiteralDistrib(ldt, (fixedPoint_24_8)(litProba * 256 + 0.001));
SET_BINARY_MODE(stdout);
/* Generate initial dict */
diff --git a/programs/dibio.c b/programs/dibio.c
index 12eb32680859..ea4bb4bf1f30 100644
--- a/programs/dibio.c
+++ b/programs/dibio.c
@@ -201,7 +201,7 @@ static void DiB_fillNoise(void* buffer, size_t length)
unsigned const prime1 = 2654435761U;
unsigned const prime2 = 2246822519U;
unsigned acc = prime1;
- size_t p=0;;
+ size_t p=0;
for (p=0; p<length; p++) {
acc *= prime2;
diff --git a/programs/fileio.c b/programs/fileio.c
index 569a410c1a24..9833767282ee 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -30,6 +30,7 @@
#include <string.h> /* strcmp, strlen */
#include <assert.h>
#include <errno.h> /* errno */
+#include <limits.h> /* INT_MAX */
#include <signal.h>
#include "timefn.h" /* UTIL_getTime, UTIL_clockSpanMicro */
@@ -283,9 +284,10 @@ void FIO_addAbortHandler()
/*-*************************************
-* Parameters: Typedefs
+* Parameters: FIO_prefs_t
***************************************/
+/* typedef'd to FIO_prefs_t within fileio.h */
struct FIO_prefs_s {
/* Algorithm preferences */
@@ -304,7 +306,10 @@ struct FIO_prefs_s {
int ldmMinMatch;
int ldmBucketSizeLog;
int ldmHashRateLog;
+ size_t streamSrcSize;
size_t targetCBlockSize;
+ int srcSizeHint;
+ int testMode;
ZSTD_literalCompressionMode_e literalCompressionMode;
/* IO preferences */
@@ -314,6 +319,8 @@ struct FIO_prefs_s {
/* Computation resources preferences */
unsigned memLimit;
int nbWorkers;
+
+ int excludeCompressedFiles;
};
@@ -349,8 +356,12 @@ FIO_prefs_t* FIO_createPreferences(void)
ret->ldmMinMatch = 0;
ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET;
ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET;
+ ret->streamSrcSize = 0;
ret->targetCBlockSize = 0;
+ ret->srcSizeHint = 0;
+ ret->testMode = 0;
ret->literalCompressionMode = ZSTD_lcm_auto;
+ ret->excludeCompressedFiles = 0;
return ret;
}
@@ -394,6 +405,8 @@ void FIO_setNbWorkers(FIO_prefs_t* const prefs, int nbWorkers) {
prefs->nbWorkers = nbWorkers;
}
+void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles) { prefs->excludeCompressedFiles = excludeCompressedFiles; }
+
void FIO_setBlockSize(FIO_prefs_t* const prefs, int blockSize) {
if (blockSize && prefs->nbWorkers==0)
DISPLAYLEVEL(2, "Setting block size is useless in single-thread mode \n");
@@ -418,10 +431,22 @@ void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) {
prefs->rsyncable = rsyncable;
}
+void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize) {
+ prefs->streamSrcSize = streamSrcSize;
+}
+
void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize) {
prefs->targetCBlockSize = targetCBlockSize;
}
+void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint) {
+ prefs->srcSizeHint = (int)MIN((size_t)INT_MAX, srcSizeHint);
+}
+
+void FIO_setTestMode(FIO_prefs_t* const prefs, int testMode) {
+ prefs->testMode = (testMode!=0);
+}
+
void FIO_setLiteralCompressionMode(
FIO_prefs_t* const prefs,
ZSTD_literalCompressionMode_e mode) {
@@ -500,7 +525,11 @@ static FILE* FIO_openSrcFile(const char* srcFileName)
return NULL;
}
- if (!UTIL_isRegularFile(srcFileName)) {
+ if (!UTIL_isRegularFile(srcFileName)
+#ifndef _MSC_VER
+ && !UTIL_isFIFO(srcFileName)
+#endif /* _MSC_VER */
+ ) {
DISPLAYLEVEL(1, "zstd: %s is not a regular file -- ignored \n",
srcFileName);
return NULL;
@@ -516,8 +545,12 @@ static FILE* FIO_openSrcFile(const char* srcFileName)
/** FIO_openDstFile() :
* condition : `dstFileName` must be non-NULL.
* @result : FILE* to `dstFileName`, or NULL if it fails */
-static FILE* FIO_openDstFile(FIO_prefs_t* const prefs, const char* srcFileName, const char* dstFileName)
+static FILE*
+FIO_openDstFile(FIO_prefs_t* const prefs,
+ const char* srcFileName, const char* dstFileName)
{
+ if (prefs->testMode) return NULL; /* do not open file in test mode */
+
assert(dstFileName != NULL);
if (!strcmp (dstFileName, stdoutmark)) {
DISPLAYLEVEL(4,"Using stdout for output \n");
@@ -542,10 +575,14 @@ static FILE* FIO_openDstFile(FIO_prefs_t* const prefs, const char* srcFileName,
if (UTIL_isRegularFile(dstFileName)) {
/* Check if destination file already exists */
FILE* const fCheck = fopen( dstFileName, "rb" );
+#if !defined(_WIN32)
+ /* this test does not work on Windows :
+ * `NUL` and `nul` are detected as regular files */
if (!strcmp(dstFileName, nulmark)) {
EXM_THROW(40, "%s is unexpectedly categorized as a regular file",
dstFileName);
}
+#endif
if (fCheck != NULL) { /* dst file exists, authorization prompt */
fclose(fCheck);
if (!prefs->overwrite) {
@@ -572,7 +609,7 @@ static FILE* FIO_openDstFile(FIO_prefs_t* const prefs, const char* srcFileName,
{ FILE* const f = fopen( dstFileName, "wb" );
if (f == NULL) {
DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno));
- } else {
+ } else if(srcFileName != NULL && strcmp (srcFileName, stdinmark)) {
chmod(dstFileName, 00600);
}
return f;
@@ -615,6 +652,96 @@ static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName)
return (size_t)fileSize;
}
+
+
+/* FIO_checkFilenameCollisions() :
+ * Checks for and warns if there are any files that would have the same output path
+ */
+int FIO_checkFilenameCollisions(const char** filenameTable, unsigned nbFiles) {
+ const char **filenameTableSorted, *c, *prevElem, *filename;
+ unsigned u;
+
+ #if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */
+ c = "\\";
+ #else
+ c = "/";
+ #endif
+
+ filenameTableSorted = (const char**) malloc(sizeof(char*) * nbFiles);
+ if (!filenameTableSorted) {
+ DISPLAY("Unable to malloc new str array, not checking for name collisions\n");
+ return 1;
+ }
+
+ for (u = 0; u < nbFiles; ++u) {
+ filename = strrchr(filenameTable[u], c[0]);
+ if (filename == NULL) {
+ filenameTableSorted[u] = filenameTable[u];
+ } else {
+ filenameTableSorted[u] = filename+1;
+ }
+ }
+
+ qsort((void*)filenameTableSorted, nbFiles, sizeof(char*), UTIL_compareStr);
+ prevElem = filenameTableSorted[0];
+ for (u = 1; u < nbFiles; ++u) {
+ if (strcmp(prevElem, filenameTableSorted[u]) == 0) {
+ DISPLAY("WARNING: Two files have same filename: %s\n", prevElem);
+ }
+ prevElem = filenameTableSorted[u];
+ }
+
+ free((void*)filenameTableSorted);
+ return 0;
+}
+
+static const char*
+extractFilename(const char* path, char separator)
+{
+ const char* search = strrchr(path, separator);
+ if (search == NULL) return path;
+ return search+1;
+}
+
+/* FIO_createFilename_fromOutDir() :
+ * Takes a source file name and specified output directory, and
+ * allocates memory for and returns a pointer to final path.
+ * This function never returns an error (it may abort() in case of pb)
+ */
+static char*
+FIO_createFilename_fromOutDir(const char* path, const char* outDirName, const size_t suffixLen)
+{
+ const char* filenameStart;
+ char separator;
+ char* result;
+
+#if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */
+ separator = '\\';
+#else
+ separator = '/';
+#endif
+
+ filenameStart = extractFilename(path, separator);
+#if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */
+ filenameStart = extractFilename(filenameStart, '/'); /* sometimes, '/' separator is also used on Windows (mingw+msys2) */
+#endif
+
+ result = (char*) calloc(1, strlen(outDirName) + 1 + strlen(filenameStart) + suffixLen + 1);
+ if (!result) {
+ EXM_THROW(30, "zstd: FIO_createFilename_fromOutDir: %s", strerror(errno));
+ }
+
+ memcpy(result, outDirName, strlen(outDirName));
+ if (outDirName[strlen(outDirName)-1] == separator) {
+ memcpy(result + strlen(outDirName), filenameStart, strlen(filenameStart));
+ } else {
+ memcpy(result + strlen(outDirName), &separator, 1);
+ memcpy(result + strlen(outDirName) + 1, filenameStart, strlen(filenameStart));
+ }
+
+ return result;
+}
+
#ifndef ZSTD_NOCOMPRESS
/* **********************************************************************
@@ -633,7 +760,6 @@ typedef struct {
static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
const char* dictFileName, int cLevel,
- U64 srcSize,
ZSTD_compressionParameters comprParams) {
cRess_t ress;
memset(&ress, 0, sizeof(ress));
@@ -667,6 +793,8 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, cLevel) );
/* max compressed block size */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetCBlockSize, (int)prefs->targetCBlockSize) );
+ /* source size hint */
+ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_srcSizeHint, (int)prefs->srcSizeHint) );
/* long distance matching */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableLongDistanceMatching, prefs->ldmFlag) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashLog, prefs->ldmHashLog) );
@@ -698,10 +826,7 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) );
#endif
/* dictionary */
- CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, srcSize) ); /* set the value temporarily for dictionary loading, to adapt compression parameters */
CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, dictBuffer, dictBuffSize) );
- CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, ZSTD_CONTENTSIZE_UNKNOWN) ); /* reset */
-
free(dictBuffer);
}
@@ -718,13 +843,12 @@ static void FIO_freeCResources(cRess_t ress)
#ifdef ZSTD_GZCOMPRESS
static unsigned long long
-FIO_compressGzFrame(cRess_t* ress,
+FIO_compressGzFrame(const cRess_t* ress, /* buffers & handlers are used, but not changed */
const char* srcFileName, U64 const srcFileSize,
int compressionLevel, U64* readsize)
{
unsigned long long inFileSize = 0, outFileSize = 0;
z_stream strm;
- int ret;
if (compressionLevel > Z_BEST_COMPRESSION)
compressionLevel = Z_BEST_COMPRESSION;
@@ -733,11 +857,12 @@ FIO_compressGzFrame(cRess_t* ress,
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
- ret = deflateInit2(&strm, compressionLevel, Z_DEFLATED,
+ { int const ret = deflateInit2(&strm, compressionLevel, Z_DEFLATED,
15 /* maxWindowLogSize */ + 16 /* gzip only */,
8, Z_DEFAULT_STRATEGY); /* see http://www.zlib.net/manual.html */
- if (ret != Z_OK)
- EXM_THROW(71, "zstd: %s: deflateInit2 error %d \n", srcFileName, ret);
+ if (ret != Z_OK) {
+ EXM_THROW(71, "zstd: %s: deflateInit2 error %d \n", srcFileName, ret);
+ } }
strm.next_in = 0;
strm.avail_in = 0;
@@ -745,6 +870,7 @@ FIO_compressGzFrame(cRess_t* ress,
strm.avail_out = (uInt)ress->dstBufferSize;
while (1) {
+ int ret;
if (strm.avail_in == 0) {
size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile);
if (inSize == 0) break;
@@ -755,32 +881,31 @@ FIO_compressGzFrame(cRess_t* ress,
ret = deflate(&strm, Z_NO_FLUSH);
if (ret != Z_OK)
EXM_THROW(72, "zstd: %s: deflate error %d \n", srcFileName, ret);
- { size_t const decompBytes = ress->dstBufferSize - strm.avail_out;
- if (decompBytes) {
- if (fwrite(ress->dstBuffer, 1, decompBytes, ress->dstFile) != decompBytes)
- EXM_THROW(73, "Write error : cannot write to output file");
- outFileSize += decompBytes;
+ { size_t const cSize = ress->dstBufferSize - strm.avail_out;
+ if (cSize) {
+ if (fwrite(ress->dstBuffer, 1, cSize, ress->dstFile) != cSize)
+ EXM_THROW(73, "Write error : cannot write to output file : %s ", strerror(errno));
+ outFileSize += cSize;
strm.next_out = (Bytef*)ress->dstBuffer;
strm.avail_out = (uInt)ress->dstBufferSize;
- }
- }
- if (srcFileSize == UTIL_FILESIZE_UNKNOWN)
- DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%",
+ } }
+ if (srcFileSize == UTIL_FILESIZE_UNKNOWN) {
+ DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%% ",
(unsigned)(inFileSize>>20),
(double)outFileSize/inFileSize*100)
- else
- DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%",
+ } else {
+ DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%% ",
(unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
(double)outFileSize/inFileSize*100);
- }
+ } }
while (1) {
- ret = deflate(&strm, Z_FINISH);
- { size_t const decompBytes = ress->dstBufferSize - strm.avail_out;
- if (decompBytes) {
- if (fwrite(ress->dstBuffer, 1, decompBytes, ress->dstFile) != decompBytes)
- EXM_THROW(75, "Write error : %s", strerror(errno));
- outFileSize += decompBytes;
+ int const ret = deflate(&strm, Z_FINISH);
+ { size_t const cSize = ress->dstBufferSize - strm.avail_out;
+ if (cSize) {
+ if (fwrite(ress->dstBuffer, 1, cSize, ress->dstFile) != cSize)
+ EXM_THROW(75, "Write error : %s ", strerror(errno));
+ outFileSize += cSize;
strm.next_out = (Bytef*)ress->dstBuffer;
strm.avail_out = (uInt)ress->dstBufferSize;
} }
@@ -789,11 +914,11 @@ FIO_compressGzFrame(cRess_t* ress,
EXM_THROW(77, "zstd: %s: deflate error %d \n", srcFileName, ret);
}
- ret = deflateEnd(&strm);
- if (ret != Z_OK)
- EXM_THROW(79, "zstd: %s: deflateEnd error %d \n", srcFileName, ret);
+ { int const ret = deflateEnd(&strm);
+ if (ret != Z_OK) {
+ EXM_THROW(79, "zstd: %s: deflateEnd error %d \n", srcFileName, ret);
+ } }
*readsize = inFileSize;
-
return outFileSize;
}
#endif
@@ -816,14 +941,14 @@ FIO_compressLzmaFrame(cRess_t* ress,
if (plain_lzma) {
lzma_options_lzma opt_lzma;
if (lzma_lzma_preset(&opt_lzma, compressionLevel))
- EXM_THROW(71, "zstd: %s: lzma_lzma_preset error", srcFileName);
+ EXM_THROW(81, "zstd: %s: lzma_lzma_preset error", srcFileName);
ret = lzma_alone_encoder(&strm, &opt_lzma); /* LZMA */
if (ret != LZMA_OK)
- EXM_THROW(71, "zstd: %s: lzma_alone_encoder error %d", srcFileName, ret);
+ EXM_THROW(82, "zstd: %s: lzma_alone_encoder error %d", srcFileName, ret);
} else {
ret = lzma_easy_encoder(&strm, compressionLevel, LZMA_CHECK_CRC64); /* XZ */
if (ret != LZMA_OK)
- EXM_THROW(71, "zstd: %s: lzma_easy_encoder error %d", srcFileName, ret);
+ EXM_THROW(83, "zstd: %s: lzma_easy_encoder error %d", srcFileName, ret);
}
strm.next_in = 0;
@@ -843,11 +968,11 @@ FIO_compressLzmaFrame(cRess_t* ress,
ret = lzma_code(&strm, action);
if (ret != LZMA_OK && ret != LZMA_STREAM_END)
- EXM_THROW(72, "zstd: %s: lzma_code encoding error %d", srcFileName, ret);
+ EXM_THROW(84, "zstd: %s: lzma_code encoding error %d", srcFileName, ret);
{ size_t const compBytes = ress->dstBufferSize - strm.avail_out;
if (compBytes) {
if (fwrite(ress->dstBuffer, 1, compBytes, ress->dstFile) != compBytes)
- EXM_THROW(73, "Write error : %s", strerror(errno));
+ EXM_THROW(85, "Write error : %s", strerror(errno));
outFileSize += compBytes;
strm.next_out = (BYTE*)ress->dstBuffer;
strm.avail_out = ress->dstBufferSize;
@@ -1003,6 +1128,9 @@ FIO_compressZstdFrame(FIO_prefs_t* const prefs,
/* init */
if (fileSize != UTIL_FILESIZE_UNKNOWN) {
CHECK(ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize));
+ } else if (prefs->streamSrcSize > 0) {
+ /* unknown source size; use the declared stream size */
+ CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, prefs->streamSrcSize) );
}
(void)srcFileName;
@@ -1262,9 +1390,7 @@ static int FIO_compressFilename_dstFile(FIO_prefs_t* const prefs,
int result;
stat_t statbuf;
int transfer_permissions = 0;
-
assert(ress.srcFile != NULL);
-
if (ress.dstFile == NULL) {
closeDstFile = 1;
DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s", dstFileName);
@@ -1308,6 +1434,21 @@ static int FIO_compressFilename_dstFile(FIO_prefs_t* const prefs,
return result;
}
+/* List used to compare file extensions (used with --exclude-compressed flag)
+* Different from the suffixList and should only apply to ZSTD compress operationResult
+*/
+static const char *compressedFileExtensions[] = {
+ ZSTD_EXTENSION,
+ TZSTD_EXTENSION,
+ GZ_EXTENSION,
+ TGZ_EXTENSION,
+ LZMA_EXTENSION,
+ XZ_EXTENSION,
+ TXZ_EXTENSION,
+ LZ4_EXTENSION,
+ TLZ4_EXTENSION,
+ NULL
+};
/*! FIO_compressFilename_srcFile() :
* @return : 0 : compression completed correctly,
@@ -1334,6 +1475,15 @@ FIO_compressFilename_srcFile(FIO_prefs_t* const prefs,
return 1;
}
+ /* Check if "srcFile" is compressed. Only done if --exclude-compressed flag is used
+ * YES => ZSTD will skip compression of the file and will return 0.
+ * NO => ZSTD will resume with compress operation.
+ */
+ if (prefs->excludeCompressedFiles == 1 && UTIL_isCompressedFile(srcFileName, compressedFileExtensions)) {
+ DISPLAYLEVEL(4, "File is already compressed : %s \n", srcFileName);
+ return 0;
+ }
+
ress.srcFile = FIO_openSrcFile(srcFileName);
if (ress.srcFile == NULL) return 1; /* srcFile could not be opened */
@@ -1355,16 +1505,11 @@ FIO_compressFilename_srcFile(FIO_prefs_t* const prefs,
return result;
}
-
-int FIO_compressFilename(FIO_prefs_t* const prefs,
- const char* dstFileName, const char* srcFileName,
- const char* dictFileName, int compressionLevel,
- ZSTD_compressionParameters comprParams)
+int FIO_compressFilename(FIO_prefs_t* const prefs, const char* dstFileName,
+ const char* srcFileName, const char* dictFileName,
+ int compressionLevel, ZSTD_compressionParameters comprParams)
{
- U64 const fileSize = UTIL_getFileSize(srcFileName);
- U64 const srcSize = (fileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : fileSize;
-
- cRess_t const ress = FIO_createCResources(prefs, dictFileName, compressionLevel, srcSize, comprParams);
+ cRess_t const ress = FIO_createCResources(prefs, dictFileName, compressionLevel, comprParams);
int const result = FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel);
@@ -1372,57 +1517,65 @@ int FIO_compressFilename(FIO_prefs_t* const prefs,
return result;
}
-
/* FIO_determineCompressedName() :
* create a destination filename for compressed srcFileName.
* @return a pointer to it.
* This function never returns an error (it may abort() in case of pb)
*/
static const char*
-FIO_determineCompressedName(const char* srcFileName, const char* suffix)
+FIO_determineCompressedName(const char* srcFileName, const char* outDirName, const char* suffix)
{
static size_t dfnbCapacity = 0;
static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */
+ char* outDirFilename = NULL;
+ size_t sfnSize = strlen(srcFileName);
+ size_t const srcSuffixLen = strlen(suffix);
+ if (outDirName) {
+ outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, srcSuffixLen);
+ sfnSize = strlen(outDirFilename);
+ assert(outDirFilename != NULL);
+ }
- size_t const sfnSize = strlen(srcFileName);
- size_t const suffixSize = strlen(suffix);
-
- if (dfnbCapacity <= sfnSize+suffixSize+1) {
+ if (dfnbCapacity <= sfnSize+srcSuffixLen+1) {
/* resize buffer for dstName */
free(dstFileNameBuffer);
- dfnbCapacity = sfnSize + suffixSize + 30;
+ dfnbCapacity = sfnSize + srcSuffixLen + 30;
dstFileNameBuffer = (char*)malloc(dfnbCapacity);
if (!dstFileNameBuffer) {
EXM_THROW(30, "zstd: %s", strerror(errno));
- } }
+ }
+ }
assert(dstFileNameBuffer != NULL);
- memcpy(dstFileNameBuffer, srcFileName, sfnSize);
- memcpy(dstFileNameBuffer+sfnSize, suffix, suffixSize+1 /* Include terminating null */);
+ if (outDirFilename) {
+ memcpy(dstFileNameBuffer, outDirFilename, sfnSize);
+ free(outDirFilename);
+ } else {
+ memcpy(dstFileNameBuffer, srcFileName, sfnSize);
+ }
+ memcpy(dstFileNameBuffer+sfnSize, suffix, srcSuffixLen+1 /* Include terminating null */);
return dstFileNameBuffer;
}
/* FIO_compressMultipleFilenames() :
* compress nbFiles files
- * into one destination (outFileName)
- * or into one file each (outFileName == NULL, but suffix != NULL).
+ * into either one destination (outFileName),
+ * or into one file each (outFileName == NULL, but suffix != NULL),
+ * or into a destination folder (specified with -O)
*/
int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs,
const char** inFileNamesTable, unsigned nbFiles,
+ const char* outDirName,
const char* outFileName, const char* suffix,
const char* dictFileName, int compressionLevel,
ZSTD_compressionParameters comprParams)
{
int error = 0;
- U64 const firstFileSize = UTIL_getFileSize(inFileNamesTable[0]);
- U64 const firstSrcSize = (firstFileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : firstFileSize;
- U64 const srcSize = (nbFiles != 1) ? ZSTD_CONTENTSIZE_UNKNOWN : firstSrcSize ;
- cRess_t ress = FIO_createCResources(prefs, dictFileName, compressionLevel, srcSize, comprParams);
+ cRess_t ress = FIO_createCResources(prefs, dictFileName, compressionLevel, comprParams);
/* init */
assert(outFileName != NULL || suffix != NULL);
-
if (outFileName != NULL) { /* output into a single destination (stdout typically) */
ress.dstFile = FIO_openDstFile(prefs, NULL, outFileName);
if (ress.dstFile == NULL) { /* could not open outFileName */
@@ -1440,9 +1593,12 @@ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs,
unsigned u;
for (u=0; u<nbFiles; u++) {
const char* const srcFileName = inFileNamesTable[u];
- const char* const dstFileName = FIO_determineCompressedName(srcFileName, suffix); /* cannot fail */
+ const char* const dstFileName = FIO_determineCompressedName(srcFileName, outDirName, suffix); /* cannot fail */
error |= FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel);
- } }
+ }
+ if (outDirName)
+ FIO_checkFilenameCollisions(inFileNamesTable ,nbFiles);
+ }
FIO_freeCResources(ress);
return error;
@@ -1504,7 +1660,11 @@ static void FIO_freeDResources(dRess_t ress)
/** FIO_fwriteSparse() :
* @return : storedSkips, to be provided to next call to FIO_fwriteSparse() of LZ4IO_fwriteSparseEnd() */
-static unsigned FIO_fwriteSparse(FIO_prefs_t* const prefs, FILE* file, const void* buffer, size_t bufferSize, unsigned storedSkips)
+static unsigned
+FIO_fwriteSparse(const FIO_prefs_t* const prefs,
+ FILE* file,
+ const void* buffer, size_t bufferSize,
+ unsigned storedSkips)
{
const size_t* const bufferT = (const size_t*)buffer; /* Buffer is supposed malloc'ed, hence aligned on size_t */
size_t bufferSizeT = bufferSize / sizeof(size_t);
@@ -1512,10 +1672,12 @@ static unsigned FIO_fwriteSparse(FIO_prefs_t* const prefs, FILE* file, const voi
const size_t* ptrT = bufferT;
static const size_t segmentSizeT = (32 KB) / sizeof(size_t); /* 0-test re-attempted every 32 KB */
+ if (prefs->testMode) return 0; /* do not output anything in test mode */
+
if (!prefs->sparseFileSupport) { /* normal write */
size_t const sizeCheck = fwrite(buffer, 1, bufferSize, file);
if (sizeCheck != bufferSize)
- EXM_THROW(70, "Write error : %s (cannot write decoded block)",
+ EXM_THROW(70, "Write error : cannot write decoded block : %s",
strerror(errno));
return 0;
}
@@ -1524,7 +1686,7 @@ static unsigned FIO_fwriteSparse(FIO_prefs_t* const prefs, FILE* file, const voi
if (storedSkips > 1 GB) {
int const seekResult = LONG_SEEK(file, 1 GB, SEEK_CUR);
if (seekResult != 0)
- EXM_THROW(71, "1 GB skip error (sparse file support)");
+ EXM_THROW(91, "1 GB skip error (sparse file support)");
storedSkips -= 1 GB;
}
@@ -1540,13 +1702,14 @@ static unsigned FIO_fwriteSparse(FIO_prefs_t* const prefs, FILE* file, const voi
if (nb0T != seg0SizeT) { /* not all 0s */
int const seekResult = LONG_SEEK(file, storedSkips, SEEK_CUR);
- if (seekResult) EXM_THROW(72, "Sparse skip error ; try --no-sparse");
+ if (seekResult) EXM_THROW(92, "Sparse skip error ; try --no-sparse");
storedSkips = 0;
seg0SizeT -= nb0T;
ptrT += nb0T;
{ size_t const sizeCheck = fwrite(ptrT, sizeof(size_t), seg0SizeT, file);
if (sizeCheck != seg0SizeT)
- EXM_THROW(73, "Write error : cannot write decoded block");
+ EXM_THROW(93, "Write error : cannot write decoded block : %s",
+ strerror(errno));
} }
ptrT += seg0SizeT;
}
@@ -1563,19 +1726,21 @@ static unsigned FIO_fwriteSparse(FIO_prefs_t* const prefs, FILE* file, const voi
if (restPtr != restEnd) {
int seekResult = LONG_SEEK(file, storedSkips, SEEK_CUR);
if (seekResult)
- EXM_THROW(74, "Sparse skip error ; try --no-sparse");
+ EXM_THROW(94, "Sparse skip error ; try --no-sparse");
storedSkips = 0;
{ size_t const sizeCheck = fwrite(restPtr, 1, (size_t)(restEnd - restPtr), file);
if (sizeCheck != (size_t)(restEnd - restPtr))
- EXM_THROW(75, "Write error : cannot write decoded end of block");
+ EXM_THROW(95, "Write error : cannot write decoded end of block : %s",
+ strerror(errno));
} } } }
return storedSkips;
}
static void
-FIO_fwriteSparseEnd(FIO_prefs_t* const prefs, FILE* file, unsigned storedSkips)
+FIO_fwriteSparseEnd(const FIO_prefs_t* const prefs, FILE* file, unsigned storedSkips)
{
+ if (prefs->testMode) assert(storedSkips == 0);
if (storedSkips>0) {
assert(prefs->sparseFileSupport > 0); /* storedSkips>0 implies sparse support is enabled */
(void)prefs; /* assert can be disabled, in which case prefs becomes unused */
@@ -1585,14 +1750,14 @@ FIO_fwriteSparseEnd(FIO_prefs_t* const prefs, FILE* file, unsigned storedSkips)
* so that skipped ones get implicitly translated as zero by FS */
{ const char lastZeroByte[1] = { 0 };
if (fwrite(lastZeroByte, 1, 1, file) != 1)
- EXM_THROW(69, "Write error : cannot write last zero");
+ EXM_THROW(69, "Write error : cannot write last zero : %s", strerror(errno));
} }
}
/** FIO_passThrough() : just copy input into output, for compatibility with gzip -df mode
@return : 0 (no error) */
-static int FIO_passThrough(FIO_prefs_t* const prefs,
+static int FIO_passThrough(const FIO_prefs_t* const prefs,
FILE* foutput, FILE* finput,
void* buffer, size_t bufferSize,
size_t alreadyLoaded)
@@ -1604,7 +1769,7 @@ static int FIO_passThrough(FIO_prefs_t* const prefs,
/* assumption : ress->srcBufferLoaded bytes already loaded and stored within buffer */
{ size_t const sizeCheck = fwrite(buffer, 1, alreadyLoaded, foutput);
if (sizeCheck != alreadyLoaded) {
- DISPLAYLEVEL(1, "Pass-through write error \n");
+ DISPLAYLEVEL(1, "Pass-through write error : %s\n", strerror(errno));
return 1;
} }
@@ -1632,7 +1797,10 @@ static unsigned FIO_highbit64(unsigned long long v)
/* FIO_zstdErrorHelp() :
* detailed error message when requested window size is too large */
-static void FIO_zstdErrorHelp(FIO_prefs_t* const prefs, dRess_t* ress, size_t err, char const* srcFileName)
+static void
+FIO_zstdErrorHelp(const FIO_prefs_t* const prefs,
+ const dRess_t* ress,
+ size_t err, const char* srcFileName)
{
ZSTD_frameHeader header;
@@ -1664,12 +1832,10 @@ static void FIO_zstdErrorHelp(FIO_prefs_t* const prefs, dRess_t* ress, size_t er
* @return : size of decoded zstd frame, or an error code
*/
#define FIO_ERROR_FRAME_DECODING ((unsigned long long)(-2))
-static unsigned long long FIO_decompressZstdFrame(
- FIO_prefs_t* const prefs,
- dRess_t* ress,
- FILE* finput,
- const char* srcFileName,
- U64 alreadyDecoded)
+static unsigned long long
+FIO_decompressZstdFrame(const FIO_prefs_t* const prefs,
+ dRess_t* ress, FILE* finput,
+ const char* srcFileName, U64 alreadyDecoded)
{
U64 frameSize = 0;
U32 storedSkips = 0;
@@ -1711,11 +1877,6 @@ static unsigned long long FIO_decompressZstdFrame(
}
if (readSizeHint == 0) break; /* end of frame */
- if (inBuff.size != inBuff.pos) {
- DISPLAYLEVEL(1, "%s : Decoding error (37) : should consume entire input \n",
- srcFileName);
- return FIO_ERROR_FRAME_DECODING;
- }
/* Fill input buffer */
{ size_t const toDecode = MIN(readSizeHint, ress->srcBufferSize); /* support large skippable frames */
@@ -1738,13 +1899,16 @@ static unsigned long long FIO_decompressZstdFrame(
#ifdef ZSTD_GZDECOMPRESS
-static unsigned long long FIO_decompressGzFrame(dRess_t* ress,
- FILE* srcFile, const char* srcFileName)
+static unsigned long long
+FIO_decompressGzFrame(const FIO_prefs_t* const prefs,
+ dRess_t* ress, FILE* srcFile,
+ const char* srcFileName)
{
unsigned long long outFileSize = 0;
z_stream strm;
int flush = Z_NO_FLUSH;
int decodingError = 0;
+ unsigned storedSkips = 0;
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
@@ -1779,10 +1943,7 @@ static unsigned long long FIO_decompressGzFrame(dRess_t* ress,
}
{ size_t const decompBytes = ress->dstBufferSize - strm.avail_out;
if (decompBytes) {
- if (fwrite(ress->dstBuffer, 1, decompBytes, ress->dstFile) != decompBytes) {
- DISPLAYLEVEL(1, "zstd: %s \n", strerror(errno));
- decodingError = 1; break;
- }
+ storedSkips = FIO_fwriteSparse(prefs, ress->dstFile, ress->dstBuffer, decompBytes, storedSkips);
outFileSize += decompBytes;
strm.next_out = (Bytef*)ress->dstBuffer;
strm.avail_out = (uInt)ress->dstBufferSize;
@@ -1799,19 +1960,24 @@ static unsigned long long FIO_decompressGzFrame(dRess_t* ress,
DISPLAYLEVEL(1, "zstd: %s: inflateEnd error \n", srcFileName);
decodingError = 1;
}
+ FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips);
return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize;
}
#endif
#ifdef ZSTD_LZMADECOMPRESS
-static unsigned long long FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile, const char* srcFileName, int plain_lzma)
+static unsigned long long
+FIO_decompressLzmaFrame(const FIO_prefs_t* const prefs,
+ dRess_t* ress, FILE* srcFile,
+ const char* srcFileName, int plain_lzma)
{
unsigned long long outFileSize = 0;
lzma_stream strm = LZMA_STREAM_INIT;
lzma_action action = LZMA_RUN;
lzma_ret initRet;
int decodingError = 0;
+ unsigned storedSkips = 0;
strm.next_in = 0;
strm.avail_in = 0;
@@ -1854,10 +2020,7 @@ static unsigned long long FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile,
}
{ size_t const decompBytes = ress->dstBufferSize - strm.avail_out;
if (decompBytes) {
- if (fwrite(ress->dstBuffer, 1, decompBytes, ress->dstFile) != decompBytes) {
- DISPLAYLEVEL(1, "zstd: %s \n", strerror(errno));
- decodingError = 1; break;
- }
+ storedSkips = FIO_fwriteSparse(prefs, ress->dstFile, ress->dstBuffer, decompBytes, storedSkips);
outFileSize += decompBytes;
strm.next_out = (BYTE*)ress->dstBuffer;
strm.avail_out = ress->dstBufferSize;
@@ -1869,19 +2032,23 @@ static unsigned long long FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile,
memmove(ress->srcBuffer, strm.next_in, strm.avail_in);
ress->srcBufferLoaded = strm.avail_in;
lzma_end(&strm);
+ FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips);
return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize;
}
#endif
#ifdef ZSTD_LZ4DECOMPRESS
-static unsigned long long FIO_decompressLz4Frame(dRess_t* ress,
- FILE* srcFile, const char* srcFileName)
+static unsigned long long
+FIO_decompressLz4Frame(const FIO_prefs_t* const prefs,
+ dRess_t* ress, FILE* srcFile,
+ const char* srcFileName)
{
unsigned long long filesize = 0;
LZ4F_errorCode_t nextToLoad;
LZ4F_decompressionContext_t dCtx;
LZ4F_errorCode_t const errorCode = LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION);
int decodingError = 0;
+ unsigned storedSkips = 0;
if (LZ4F_isError(errorCode)) {
DISPLAYLEVEL(1, "zstd: failed to create lz4 decompression context \n");
@@ -1925,10 +2092,7 @@ static unsigned long long FIO_decompressLz4Frame(dRess_t* ress,
/* Write Block */
if (decodedBytes) {
- if (fwrite(ress->dstBuffer, 1, decodedBytes, ress->dstFile) != decodedBytes) {
- DISPLAYLEVEL(1, "zstd: %s \n", strerror(errno));
- decodingError = 1; nextToLoad = 0; break;
- }
+ storedSkips = FIO_fwriteSparse(prefs, ress->dstFile, ress->dstBuffer, decodedBytes, storedSkips);
filesize += decodedBytes;
DISPLAYUPDATE(2, "\rDecompressed : %u MB ", (unsigned)(filesize>>20));
}
@@ -1949,6 +2113,7 @@ static unsigned long long FIO_decompressLz4Frame(dRess_t* ress,
LZ4F_freeDecompressionContext(dCtx);
ress->srcBufferLoaded = 0; /* LZ4F will reach exact frame boundary */
+ FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips);
return decodingError ? FIO_ERROR_FRAME_DECODING : filesize;
}
@@ -1962,8 +2127,9 @@ static unsigned long long FIO_decompressLz4Frame(dRess_t* ress,
* @return : 0 : OK
* 1 : error
*/
-static int FIO_decompressFrames(FIO_prefs_t* const prefs, dRess_t ress, FILE* srcFile,
- const char* dstFileName, const char* srcFileName)
+static int FIO_decompressFrames(const FIO_prefs_t* const prefs,
+ dRess_t ress, FILE* srcFile,
+ const char* dstFileName, const char* srcFileName)
{
unsigned readSomething = 0;
unsigned long long filesize = 0;
@@ -1995,7 +2161,7 @@ static int FIO_decompressFrames(FIO_prefs_t* const prefs, dRess_t ress, FILE* sr
filesize += frameSize;
} else if (buf[0] == 31 && buf[1] == 139) { /* gz magic number */
#ifdef ZSTD_GZDECOMPRESS
- unsigned long long const frameSize = FIO_decompressGzFrame(&ress, srcFile, srcFileName);
+ unsigned long long const frameSize = FIO_decompressGzFrame(prefs, &ress, srcFile, srcFileName);
if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
filesize += frameSize;
#else
@@ -2005,7 +2171,7 @@ static int FIO_decompressFrames(FIO_prefs_t* const prefs, dRess_t ress, FILE* sr
} else if ((buf[0] == 0xFD && buf[1] == 0x37) /* xz magic number */
|| (buf[0] == 0x5D && buf[1] == 0x00)) { /* lzma header (no magic number) */
#ifdef ZSTD_LZMADECOMPRESS
- unsigned long long const frameSize = FIO_decompressLzmaFrame(&ress, srcFile, srcFileName, buf[0] != 0xFD);
+ unsigned long long const frameSize = FIO_decompressLzmaFrame(prefs, &ress, srcFile, srcFileName, buf[0] != 0xFD);
if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
filesize += frameSize;
#else
@@ -2014,7 +2180,7 @@ static int FIO_decompressFrames(FIO_prefs_t* const prefs, dRess_t ress, FILE* sr
#endif
} else if (MEM_readLE32(buf) == LZ4_MAGICNUMBER) {
#ifdef ZSTD_LZ4DECOMPRESS
- unsigned long long const frameSize = FIO_decompressLz4Frame(&ress, srcFile, srcFileName);
+ unsigned long long const frameSize = FIO_decompressLz4Frame(prefs, &ress, srcFile, srcFileName);
if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
filesize += frameSize;
#else
@@ -2054,11 +2220,11 @@ static int FIO_decompressDstFile(FIO_prefs_t* const prefs,
int transfer_permissions = 0;
int releaseDstFile = 0;
- if (ress.dstFile == NULL) {
+ if ((ress.dstFile == NULL) && (prefs->testMode==0)) {
releaseDstFile = 1;
ress.dstFile = FIO_openDstFile(prefs, srcFileName, dstFileName);
- if (ress.dstFile==0) return 1;
+ if (ress.dstFile==NULL) return 1;
/* Must only be added after FIO_openDstFile() succeeds.
* Otherwise we may delete the destination file if it already exists,
@@ -2071,7 +2237,6 @@ static int FIO_decompressDstFile(FIO_prefs_t* const prefs,
transfer_permissions = 1;
}
-
result = FIO_decompressFrames(prefs, ress, srcFile, dstFileName, srcFileName);
if (releaseDstFile) {
@@ -2155,70 +2320,119 @@ int FIO_decompressFilename(FIO_prefs_t* const prefs,
return decodingError;
}
+static const char *suffixList[] = {
+ ZSTD_EXTENSION,
+ TZSTD_EXTENSION,
+#ifdef ZSTD_GZDECOMPRESS
+ GZ_EXTENSION,
+ TGZ_EXTENSION,
+#endif
+#ifdef ZSTD_LZMADECOMPRESS
+ LZMA_EXTENSION,
+ XZ_EXTENSION,
+ TXZ_EXTENSION,
+#endif
+#ifdef ZSTD_LZ4DECOMPRESS
+ LZ4_EXTENSION,
+ TLZ4_EXTENSION,
+#endif
+ NULL
+};
+
+static const char *suffixListStr =
+ ZSTD_EXTENSION "/" TZSTD_EXTENSION
+#ifdef ZSTD_GZDECOMPRESS
+ "/" GZ_EXTENSION "/" TGZ_EXTENSION
+#endif
+#ifdef ZSTD_LZMADECOMPRESS
+ "/" LZMA_EXTENSION "/" XZ_EXTENSION "/" TXZ_EXTENSION
+#endif
+#ifdef ZSTD_LZ4DECOMPRESS
+ "/" LZ4_EXTENSION "/" TLZ4_EXTENSION
+#endif
+;
/* FIO_determineDstName() :
* create a destination filename from a srcFileName.
* @return a pointer to it.
* @return == NULL if there is an error */
static const char*
-FIO_determineDstName(const char* srcFileName)
+FIO_determineDstName(const char* srcFileName, const char* outDirName)
{
static size_t dfnbCapacity = 0;
static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */
-
- size_t const sfnSize = strlen(srcFileName);
- size_t suffixSize;
- const char* const suffixPtr = strrchr(srcFileName, '.');
- if (suffixPtr == NULL) {
- DISPLAYLEVEL(1, "zstd: %s: unknown suffix -- ignored \n",
- srcFileName);
+ size_t dstFileNameEndPos;
+ char* outDirFilename = NULL;
+ const char* dstSuffix = "";
+ size_t dstSuffixLen = 0;
+
+ size_t sfnSize = strlen(srcFileName);
+
+ size_t srcSuffixLen;
+ const char* const srcSuffix = strrchr(srcFileName, '.');
+ if (srcSuffix == NULL) {
+ DISPLAYLEVEL(1,
+ "zstd: %s: unknown suffix (%s expected). "
+ "Can't derive the output file name. "
+ "Specify it with -o dstFileName. Ignoring.\n",
+ srcFileName, suffixListStr);
return NULL;
}
- suffixSize = strlen(suffixPtr);
-
- /* check suffix is authorized */
- if (sfnSize <= suffixSize
- || ( strcmp(suffixPtr, ZSTD_EXTENSION)
- #ifdef ZSTD_GZDECOMPRESS
- && strcmp(suffixPtr, GZ_EXTENSION)
- #endif
- #ifdef ZSTD_LZMADECOMPRESS
- && strcmp(suffixPtr, XZ_EXTENSION)
- && strcmp(suffixPtr, LZMA_EXTENSION)
- #endif
- #ifdef ZSTD_LZ4DECOMPRESS
- && strcmp(suffixPtr, LZ4_EXTENSION)
- #endif
- ) ) {
- const char* suffixlist = ZSTD_EXTENSION
- #ifdef ZSTD_GZDECOMPRESS
- "/" GZ_EXTENSION
- #endif
- #ifdef ZSTD_LZMADECOMPRESS
- "/" XZ_EXTENSION "/" LZMA_EXTENSION
- #endif
- #ifdef ZSTD_LZ4DECOMPRESS
- "/" LZ4_EXTENSION
- #endif
- ;
- DISPLAYLEVEL(1, "zstd: %s: unknown suffix (%s expected) -- ignored \n",
- srcFileName, suffixlist);
- return NULL;
+ srcSuffixLen = strlen(srcSuffix);
+
+ {
+ const char** matchedSuffixPtr;
+ for (matchedSuffixPtr = suffixList; *matchedSuffixPtr != NULL; matchedSuffixPtr++) {
+ if (!strcmp(*matchedSuffixPtr, srcSuffix)) {
+ break;
+ }
+ }
+
+ /* check suffix is authorized */
+ if (sfnSize <= srcSuffixLen || *matchedSuffixPtr == NULL) {
+ DISPLAYLEVEL(1,
+ "zstd: %s: unknown suffix (%s expected). "
+ "Can't derive the output file name. "
+ "Specify it with -o dstFileName. Ignoring.\n",
+ srcFileName, suffixListStr);
+ return NULL;
+ }
+
+ if ((*matchedSuffixPtr)[1] == 't') {
+ dstSuffix = ".tar";
+ dstSuffixLen = strlen(dstSuffix);
+ }
}
- /* allocate enough space to write dstFilename into it */
- if (dfnbCapacity+suffixSize <= sfnSize+1) {
+ if (outDirName) {
+ outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, 0);
+ sfnSize = strlen(outDirFilename);
+ assert(outDirFilename != NULL);
+ }
+
+ if (dfnbCapacity+srcSuffixLen <= sfnSize+1+dstSuffixLen) {
+ /* allocate enough space to write dstFilename into it */
free(dstFileNameBuffer);
dfnbCapacity = sfnSize + 20;
dstFileNameBuffer = (char*)malloc(dfnbCapacity);
if (dstFileNameBuffer==NULL)
- EXM_THROW(74, "%s : not enough memory for dstFileName", strerror(errno));
+ EXM_THROW(74, "%s : not enough memory for dstFileName",
+ strerror(errno));
}
/* return dst name == src name truncated from suffix */
assert(dstFileNameBuffer != NULL);
- memcpy(dstFileNameBuffer, srcFileName, sfnSize - suffixSize);
- dstFileNameBuffer[sfnSize-suffixSize] = '\0';
+ dstFileNameEndPos = sfnSize - srcSuffixLen;
+ if (outDirFilename) {
+ memcpy(dstFileNameBuffer, outDirFilename, dstFileNameEndPos);
+ free(outDirFilename);
+ } else {
+ memcpy(dstFileNameBuffer, srcFileName, dstFileNameEndPos);
+ }
+
+ /* The short tar extensions tzst, tgz, txz and tlz4 files should have "tar"
+ * extension on decompression. Also writes terminating null. */
+ strcpy(dstFileNameBuffer + dstFileNameEndPos, dstSuffix);
return dstFileNameBuffer;
/* note : dstFileNameBuffer memory is not going to be free */
@@ -2227,8 +2441,8 @@ FIO_determineDstName(const char* srcFileName)
int
FIO_decompressMultipleFilenames(FIO_prefs_t* const prefs,
- const char* srcNamesTable[], unsigned nbFiles,
- const char* outFileName,
+ const char** srcNamesTable, unsigned nbFiles,
+ const char* outDirName, const char* outFileName,
const char* dictFileName)
{
int error = 0;
@@ -2236,30 +2450,32 @@ FIO_decompressMultipleFilenames(FIO_prefs_t* const prefs,
if (outFileName) {
unsigned u;
- ress.dstFile = FIO_openDstFile(prefs, NULL, outFileName);
- if (ress.dstFile == 0) EXM_THROW(71, "cannot open %s", outFileName);
+ if (!prefs->testMode) {
+ ress.dstFile = FIO_openDstFile(prefs, NULL, outFileName);
+ if (ress.dstFile == 0) EXM_THROW(19, "cannot open %s", outFileName);
+ }
for (u=0; u<nbFiles; u++)
error |= FIO_decompressSrcFile(prefs, ress, outFileName, srcNamesTable[u]);
- if (fclose(ress.dstFile))
+ if ((!prefs->testMode) && (fclose(ress.dstFile)))
EXM_THROW(72, "Write error : %s : cannot properly close output file",
strerror(errno));
} else {
unsigned u;
for (u=0; u<nbFiles; u++) { /* create dstFileName */
const char* const srcFileName = srcNamesTable[u];
- const char* const dstFileName = FIO_determineDstName(srcFileName);
+ const char* const dstFileName = FIO_determineDstName(srcFileName, outDirName);
if (dstFileName == NULL) { error=1; continue; }
error |= FIO_decompressSrcFile(prefs, ress, dstFileName, srcFileName);
}
+ if (outDirName)
+ FIO_checkFilenameCollisions(srcNamesTable ,nbFiles);
}
FIO_freeDResources(ress);
return error;
}
-
-
/* **************************************************************************
* .zst file info (--list command)
***************************************************************************/
@@ -2298,7 +2514,7 @@ FIO_analyzeFrames(fileInfo_t* info, FILE* const srcFile)
for ( ; ; ) {
BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
size_t const numBytesRead = fread(headerBuffer, 1, sizeof(headerBuffer), srcFile);
- if (numBytesRead < ZSTD_FRAMEHEADERSIZE_MIN) {
+ if (numBytesRead < ZSTD_FRAMEHEADERSIZE_MIN(ZSTD_f_zstd1)) {
if ( feof(srcFile)
&& (numBytesRead == 0)
&& (info->compressedSize > 0)
diff --git a/programs/fileio.h b/programs/fileio.h
index 311f8c0e1f08..a7da089f67da 100644
--- a/programs/fileio.h
+++ b/programs/fileio.h
@@ -26,15 +26,27 @@ extern "C" {
#define stdinmark "/*stdin*\\"
#define stdoutmark "/*stdout*\\"
#ifdef _WIN32
-# define nulmark "nul"
+# define nulmark "NUL"
#else
# define nulmark "/dev/null"
#endif
+
+/**
+ * We test whether the extension we found starts with 't', and if so, we append
+ * ".tar" to the end of the output name.
+ */
#define LZMA_EXTENSION ".lzma"
#define XZ_EXTENSION ".xz"
+#define TXZ_EXTENSION ".txz"
+
#define GZ_EXTENSION ".gz"
+#define TGZ_EXTENSION ".tgz"
+
#define ZSTD_EXTENSION ".zst"
+#define TZSTD_EXTENSION ".tzst"
+
#define LZ4_EXTENSION ".lz4"
+#define TLZ4_EXTENSION ".tlz4"
/*-*************************************
@@ -71,25 +83,30 @@ void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog);
void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag);
void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse); /**< 0: no sparse; 1: disable on stdout; 2: always enabled */
void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable);
+void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize);
void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize);
+void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint);
+void FIO_setTestMode(FIO_prefs_t* const prefs, int testMode);
void FIO_setLiteralCompressionMode(
FIO_prefs_t* const prefs,
ZSTD_literalCompressionMode_e mode);
void FIO_setNoProgress(unsigned noProgress);
void FIO_setNotificationLevel(int level);
+void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles);
/*-*************************************
* Single File functions
***************************************/
/** FIO_compressFilename() :
- @return : 0 == ok; 1 == pb with src file. */
+ * @return : 0 == ok; 1 == pb with src file. */
int FIO_compressFilename (FIO_prefs_t* const prefs,
- const char* outfilename, const char* infilename, const char* dictFileName,
- int compressionLevel, ZSTD_compressionParameters comprParams);
+ const char* outfilename, const char* infilename,
+ const char* dictFileName, int compressionLevel,
+ ZSTD_compressionParameters comprParams);
/** FIO_decompressFilename() :
- @return : 0 == ok; 1 == pb with src file. */
+ * @return : 0 == ok; 1 == pb with src file. */
int FIO_decompressFilename (FIO_prefs_t* const prefs,
const char* outfilename, const char* infilename, const char* dictFileName);
@@ -100,20 +117,28 @@ int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int dis
* Multiple File functions
***************************************/
/** FIO_compressMultipleFilenames() :
- @return : nb of missing files */
+ * @return : nb of missing files */
int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs,
- const char** srcNamesTable, unsigned nbFiles,
+ const char** inFileNamesTable, unsigned nbFiles,
+ const char* outDirName,
const char* outFileName, const char* suffix,
const char* dictFileName, int compressionLevel,
ZSTD_compressionParameters comprParams);
/** FIO_decompressMultipleFilenames() :
- @return : nb of missing or skipped files */
+ * @return : nb of missing or skipped files */
int FIO_decompressMultipleFilenames(FIO_prefs_t* const prefs,
const char** srcNamesTable, unsigned nbFiles,
+ const char* outDirName,
const char* outFileName,
const char* dictFileName);
+/* FIO_checkFilenameCollisions() :
+ * Checks for and warns if there are any files that would have the same output path
+ */
+int FIO_checkFilenameCollisions(const char** filenameTable, unsigned nbFiles);
+
+
/*-*************************************
* Advanced stuff (should actually be hosted elsewhere)
diff --git a/programs/platform.h b/programs/platform.h
index 38ded872743e..5934e59cf12d 100644
--- a/programs/platform.h
+++ b/programs/platform.h
@@ -92,7 +92,7 @@ extern "C" {
# if defined(__linux__) || defined(__linux)
# ifndef _POSIX_C_SOURCE
-# define _POSIX_C_SOURCE 200112L /* feature test macro : https://www.gnu.org/software/libc/manual/html_node/Feature-Test-Macros.html */
+# define _POSIX_C_SOURCE 200809L /* feature test macro : https://www.gnu.org/software/libc/manual/html_node/Feature-Test-Macros.html */
# endif
# endif
# include <unistd.h> /* declares _POSIX_VERSION */
diff --git a/programs/timefn.h b/programs/timefn.h
index d1ddd31b1c00..2db3765b9308 100644
--- a/programs/timefn.h
+++ b/programs/timefn.h
@@ -19,12 +19,6 @@ extern "C" {
/*-****************************************
* Dependencies
******************************************/
-#include <sys/types.h> /* utime */
-#if defined(_MSC_VER)
-# include <sys/utime.h> /* utime */
-#else
-# include <utime.h> /* utime */
-#endif
#include <time.h> /* clock_t, clock, CLOCKS_PER_SEC */
diff --git a/programs/util.c b/programs/util.c
index fb77d1783928..5d15450d2e13 100644
--- a/programs/util.c
+++ b/programs/util.c
@@ -20,6 +20,9 @@ extern "C" {
#include <errno.h>
#include <assert.h>
+#if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__)
+#include <direct.h> /* needed for _mkdir in windows */
+#endif
int UTIL_fileExist(const char* filename)
{
@@ -54,14 +57,26 @@ int UTIL_getFileStat(const char* infilename, stat_t *statbuf)
int UTIL_setFileStat(const char *filename, stat_t *statbuf)
{
int res = 0;
- struct utimbuf timebuf;
if (!UTIL_isRegularFile(filename))
return -1;
- timebuf.actime = time(NULL);
- timebuf.modtime = statbuf->st_mtime;
- res += utime(filename, &timebuf); /* set access and modification times */
+ /* set access and modification times */
+#if defined(_WIN32) || (PLATFORM_POSIX_VERSION < 200809L)
+ {
+ struct utimbuf timebuf;
+ timebuf.actime = time(NULL);
+ timebuf.modtime = statbuf->st_mtime;
+ res += utime(filename, &timebuf);
+ }
+#else
+ {
+ /* (atime, mtime) */
+ struct timespec timebuf[2] = { {0, UTIME_NOW} };
+ timebuf[1] = statbuf->st_mtim;
+ res += utimensat(AT_FDCWD, filename, timebuf, 0);
+ }
+#endif
#if !defined(_WIN32)
res += chown(filename, statbuf->st_uid, statbuf->st_gid); /* Copy ownership */
@@ -87,23 +102,45 @@ U32 UTIL_isDirectory(const char* infilename)
return 0;
}
-int UTIL_isSameFile(const char* file1, const char* file2)
+int UTIL_compareStr(const void *p1, const void *p2) {
+ return strcmp(* (char * const *) p1, * (char * const *) p2);
+}
+
+int UTIL_isSameFile(const char* fName1, const char* fName2)
{
-#if defined(_MSC_VER)
+ assert(fName1 != NULL); assert(fName2 != NULL);
+#if defined(_MSC_VER) || defined(_WIN32)
/* note : Visual does not support file identification by inode.
+ * inode does not work on Windows, even with a posix layer, like msys2.
* The following work-around is limited to detecting exact name repetition only,
* aka `filename` is considered different from `subdir/../filename` */
- return !strcmp(file1, file2);
+ return !strcmp(fName1, fName2);
#else
- stat_t file1Stat;
- stat_t file2Stat;
- return UTIL_getFileStat(file1, &file1Stat)
- && UTIL_getFileStat(file2, &file2Stat)
- && (file1Stat.st_dev == file2Stat.st_dev)
- && (file1Stat.st_ino == file2Stat.st_ino);
+ { stat_t file1Stat;
+ stat_t file2Stat;
+ return UTIL_getFileStat(fName1, &file1Stat)
+ && UTIL_getFileStat(fName2, &file2Stat)
+ && (file1Stat.st_dev == file2Stat.st_dev)
+ && (file1Stat.st_ino == file2Stat.st_ino);
+ }
#endif
}
+#ifndef _MSC_VER
+/* Using this to distinguish named pipes */
+U32 UTIL_isFIFO(const char* infilename)
+{
+/* macro guards, as defined in : https://linux.die.net/man/2/lstat */
+#if PLATFORM_POSIX_VERSION >= 200112L
+ stat_t statbuf;
+ int r = UTIL_getFileStat(infilename, &statbuf);
+ if (!r && S_ISFIFO(statbuf.st_mode)) return 1;
+#endif
+ (void)infilename;
+ return 0;
+}
+#endif
+
U32 UTIL_isLink(const char* infilename)
{
/* macro guards, as defined in : https://linux.die.net/man/2/lstat */
@@ -221,19 +258,20 @@ int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char
DIR *dir;
struct dirent *entry;
char* path;
- int dirLength, fnameLength, pathLength, nbFiles = 0;
+ size_t dirLength, fnameLength, pathLength;
+ int nbFiles = 0;
if (!(dir = opendir(dirName))) {
UTIL_DISPLAYLEVEL(1, "Cannot open directory '%s': %s\n", dirName, strerror(errno));
return 0;
}
- dirLength = (int)strlen(dirName);
+ dirLength = strlen(dirName);
errno = 0;
while ((entry = readdir(dir)) != NULL) {
if (strcmp (entry->d_name, "..") == 0 ||
strcmp (entry->d_name, ".") == 0) continue;
- fnameLength = (int)strlen(entry->d_name);
+ fnameLength = strlen(entry->d_name);
path = (char*) malloc(dirLength + fnameLength + 2);
if (!path) { closedir(dir); return 0; }
memcpy(path, dirName, dirLength);
@@ -255,7 +293,8 @@ int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char
} else {
if (*bufStart + *pos + pathLength >= *bufEnd) {
ptrdiff_t newListSize = (*bufEnd - *bufStart) + LIST_SIZE_INCREASE;
- *bufStart = (char*)UTIL_realloc(*bufStart, newListSize);
+ assert(newListSize >= 0);
+ *bufStart = (char*)UTIL_realloc(*bufStart, (size_t)newListSize);
*bufEnd = *bufStart + newListSize;
if (*bufStart == NULL) { free(path); closedir(dir); return 0; }
}
@@ -289,6 +328,27 @@ int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char
#endif /* #ifdef _WIN32 */
+int UTIL_isCompressedFile(const char *inputName, const char *extensionList[])
+{
+ const char* ext = UTIL_getFileExtension(inputName);
+ while(*extensionList!=NULL)
+ {
+ const int isCompressedExtension = strcmp(ext,*extensionList);
+ if(isCompressedExtension==0)
+ return 1;
+ ++extensionList;
+ }
+ return 0;
+}
+
+/*Utility function to get file extension from file */
+const char* UTIL_getFileExtension(const char* infilename)
+{
+ const char* extension = strrchr(infilename, '.');
+ if(!extension || extension==infilename) return "";
+ return extension;
+}
+
/*
* UTIL_createFileList - takes a list of files and directories (params: inputNames, inputNamesNb), scans directories,
* and returns a new list of files (params: return value, allocatedBuffer, allocatedNamesNb).
@@ -304,7 +364,6 @@ UTIL_createFileList(const char **inputNames, unsigned inputNamesNb,
unsigned i, nbFiles;
char* buf = (char*)malloc(LIST_SIZE_INCREASE);
char* bufend = buf + LIST_SIZE_INCREASE;
- const char** fileTable;
if (!buf) return NULL;
@@ -313,36 +372,37 @@ UTIL_createFileList(const char **inputNames, unsigned inputNamesNb,
size_t const len = strlen(inputNames[i]);
if (buf + pos + len >= bufend) {
ptrdiff_t newListSize = (bufend - buf) + LIST_SIZE_INCREASE;
- buf = (char*)UTIL_realloc(buf, newListSize);
+ assert(newListSize >= 0);
+ buf = (char*)UTIL_realloc(buf, (size_t)newListSize);
bufend = buf + newListSize;
if (!buf) return NULL;
}
if (buf + pos + len < bufend) {
- memcpy(buf+pos, inputNames[i], len+1); /* with final \0 */
+ memcpy(buf+pos, inputNames[i], len+1); /* including final \0 */
pos += len + 1;
nbFiles++;
}
} else {
- nbFiles += UTIL_prepareFileList(inputNames[i], &buf, &pos, &bufend, followLinks);
+ nbFiles += (unsigned)UTIL_prepareFileList(inputNames[i], &buf, &pos, &bufend, followLinks);
if (buf == NULL) return NULL;
} }
if (nbFiles == 0) { free(buf); return NULL; }
- fileTable = (const char**)malloc((nbFiles+1) * sizeof(const char*));
- if (!fileTable) { free(buf); return NULL; }
-
- for (i=0, pos=0; i<nbFiles; i++) {
- fileTable[i] = buf + pos;
- pos += strlen(fileTable[i]) + 1;
- }
+ { const char** const fileTable = (const char**)malloc((nbFiles + 1) * sizeof(*fileTable));
+ if (!fileTable) { free(buf); return NULL; }
- if (buf + pos > bufend) { free(buf); free((void*)fileTable); return NULL; }
+ for (i = 0, pos = 0; i < nbFiles; i++) {
+ fileTable[i] = buf + pos;
+ if (buf + pos > bufend) { free(buf); free((void*)fileTable); return NULL; }
+ pos += strlen(fileTable[i]) + 1;
+ }
- *allocatedBuffer = buf;
- *allocatedNamesNb = nbFiles;
+ *allocatedBuffer = buf;
+ *allocatedNamesNb = nbFiles;
- return fileTable;
+ return fileTable;
+ }
}
@@ -375,8 +435,13 @@ int UTIL_countPhysicalCores(void)
DWORD returnLength = 0;
size_t byteOffset = 0;
- glpi = (LPFN_GLPI)GetProcAddress(GetModuleHandle(TEXT("kernel32")),
- "GetLogicalProcessorInformation");
+#if defined(_MSC_VER)
+/* Visual Studio does not like the following cast */
+# pragma warning( disable : 4054 ) /* conversion from function ptr to data ptr */
+# pragma warning( disable : 4055 ) /* conversion from data ptr to function ptr */
+#endif
+ glpi = (LPFN_GLPI)(void*)GetProcAddress(GetModuleHandle(TEXT("kernel32")),
+ "GetLogicalProcessorInformation");
if (glpi == NULL) {
goto failed;
@@ -494,7 +559,7 @@ int UTIL_countPhysicalCores(void)
if (fgets(buff, BUF_SIZE, cpuinfo) != NULL) {
if (strncmp(buff, "siblings", 8) == 0) {
const char* const sep = strchr(buff, ':');
- if (*sep == '\0') {
+ if (sep == NULL || *sep == '\0') {
/* formatting was broken? */
goto failed;
}
@@ -503,7 +568,7 @@ int UTIL_countPhysicalCores(void)
}
if (strncmp(buff, "cpu cores", 9) == 0) {
const char* const sep = strchr(buff, ':');
- if (*sep == '\0') {
+ if (sep == NULL || *sep == '\0') {
/* formatting was broken? */
goto failed;
}
diff --git a/programs/util.h b/programs/util.h
index d6e5bb550ec7..1f524f2934ad 100644
--- a/programs/util.h
+++ b/programs/util.h
@@ -25,17 +25,21 @@ extern "C" {
#include <stdio.h> /* fprintf */
#include <sys/types.h> /* stat, utime */
#include <sys/stat.h> /* stat, chmod */
-#if defined(_MSC_VER)
+#if defined(_WIN32)
# include <sys/utime.h> /* utime */
# include <io.h> /* _chmod */
#else
# include <unistd.h> /* chown, stat */
+#if PLATFORM_POSIX_VERSION < 200809L
# include <utime.h> /* utime */
+#else
+# include <fcntl.h> /* AT_FDCWD */
+# include <sys/stat.h> /* utimensat */
+#endif
#endif
#include <time.h> /* clock_t, clock, CLOCKS_PER_SEC, nanosleep */
#include "mem.h" /* U32, U64 */
-
/*-************************************************************
* Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW
***************************************************************/
@@ -129,7 +133,13 @@ int UTIL_setFileStat(const char* filename, stat_t* statbuf);
U32 UTIL_isDirectory(const char* infilename);
int UTIL_getFileStat(const char* infilename, stat_t* statbuf);
int UTIL_isSameFile(const char* file1, const char* file2);
+int UTIL_compareStr(const void *p1, const void *p2);
+int UTIL_isCompressedFile(const char* infilename, const char *extensionList[]);
+const char* UTIL_getFileExtension(const char* infilename);
+#ifndef _MSC_VER
+U32 UTIL_isFIFO(const char* infilename);
+#endif
U32 UTIL_isLink(const char* infilename);
#define UTIL_FILESIZE_UNKNOWN ((U64)(-1))
U64 UTIL_getFileSize(const char* infilename);
diff --git a/programs/zstd.1 b/programs/zstd.1
index b25d353763e0..fef0e76e081a 100644
--- a/programs/zstd.1
+++ b/programs/zstd.1
@@ -1,5 +1,5 @@
.
-.TH "ZSTD" "1" "July 2019" "zstd 1.4.2" "User Commands"
+.TH "ZSTD" "1" "October 2019" "zstd 1.4.4" "User Commands"
.
.SH "NAME"
\fBzstd\fR \- zstd, zstdmt, unzstd, zstdcat \- Compress or decompress \.zst files
@@ -127,6 +127,14 @@ Does not spawn a thread for compression, use a single thread for both I/O and co
\fBzstd\fR will dynamically adapt compression level to perceived I/O conditions\. Compression level adaptation can be observed live by using command \fB\-v\fR\. Adaptation can be constrained between supplied \fBmin\fR and \fBmax\fR levels\. The feature works when combined with multi\-threading and \fB\-\-long\fR mode\. It does not work with \fB\-\-single\-thread\fR\. It sets window size to 8 MB by default (can be changed manually, see \fBwlog\fR)\. Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible\. \fInote\fR : at the time of this writing, \fB\-\-adapt\fR can remain stuck at low speed when combined with multiple worker threads (>=2)\.
.
.TP
+\fB\-\-stream\-size=#\fR
+Sets the pledged source size of input coming from a stream\. This value must be exact, as it will be included in the produced frame header\. Incorrect stream sizes will cause an error\. This information will be used to better optimize compression parameters, resulting in better and potentially faster compression, especially for smaller source sizes\.
+.
+.TP
+\fB\-\-size\-hint=#\fR
+When handling input from a stream, \fBzstd\fR must guess how large the source size will be when optimizing compression parameters\. If the stream size is relatively small, this guess may be a poor one, resulting in a higher compression ratio than expected\. This feature allows for controlling the guess when needed\. Exact guesses result in better compression ratios\. Overestimates result in slightly degraded compression ratios, while underestimates may result in significant degradation\.
+.
+.TP
\fB\-\-rsyncable\fR
\fBzstd\fR will periodically synchronize the compression state to make the compressed file more rsync\-friendly\. There is a negligible impact to compression ratio, and the faster compression levels will see a small compression speed hit\. This feature does not work with \fB\-\-single\-thread\fR\. You probably don\'t want to use it with long range mode, since it will decrease the effectiveness of the synchronization points, but your milage may vary\.
.
@@ -167,6 +175,10 @@ keep source file(s) after successful compression or decompression\. This is the
operate recursively on directories
.
.TP
+\fB\-\-output\-dir\-flat[=dir]\fR
+resulting files are stored into target \fBdir\fR directory, instead of same directory as origin file\. Be aware that this command can introduce name collision issues, if multiple files, from different directories, end up having the same name\. Collision resolution ensures first file with a given name will be present in \fBdir\fR, while in combination with \fB\-f\fR, the last file will be present instead\.
+.
+.TP
\fB\-\-format=FORMAT\fR
compress and decompress in other formats\. If compiled with support, zstd can compress to or decompress from other compression algorithm formats\. Possibly available options are \fBzstd\fR, \fBgzip\fR, \fBxz\fR, \fBlzma\fR, and \fBlz4\fR\. If no such format is provided, \fBzstd\fR is the default\.
.
@@ -198,6 +210,9 @@ add integrity check computed from uncompressed data (default: enabled)
\fB\-\-\fR
All arguments after \fB\-\-\fR are treated as files
.
+.SS "Restricted usage of Environment Variables"
+Using environment variables to set parameters has security implications\. Therefore, this avenue is intentionally restricted\. Only \fBZSTD_CLEVEL\fR is supported currently, for setting compression level\. \fBZSTD_CLEVEL\fR can be used to set the level between 1 and 19 (the "normal" range)\. If the value of \fBZSTD_CLEVEL\fR is not a valid integer, it will be ignored with a warning message\. \fBZSTD_CLEVEL\fR just replaces the default compression level (\fB3\fR)\. It can be overridden by corresponding command line arguments\.
+.
.SH "DICTIONARY BUILDER"
\fBzstd\fR offers \fIdictionary\fR compression, which greatly improves efficiency on small files and messages\. It\'s possible to train \fBzstd\fR with a set of samples, the result of which is saved into a file called a \fBdictionary\fR\. Then during compression and decompression, reference the same dictionary, using command \fB\-D dictionaryFileName\fR\. Compression of small files similar to the sample set will be greatly improved\.
.
diff --git a/programs/zstd.1.md b/programs/zstd.1.md
index 3ab2667a0483..e3daa4c87ac7 100644
--- a/programs/zstd.1.md
+++ b/programs/zstd.1.md
@@ -144,6 +144,18 @@ the last one takes effect.
Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible.
_note_ : at the time of this writing, `--adapt` can remain stuck at low speed
when combined with multiple worker threads (>=2).
+* `--stream-size=#` :
+ Sets the pledged source size of input coming from a stream. This value must be exact, as it
+ will be included in the produced frame header. Incorrect stream sizes will cause an error.
+ This information will be used to better optimize compression parameters, resulting in
+ better and potentially faster compression, especially for smaller source sizes.
+* `--size-hint=#`:
+ When handling input from a stream, `zstd` must guess how large the source size
+ will be when optimizing compression parameters. If the stream size is relatively
+ small, this guess may be a poor one, resulting in a higher compression ratio than
+ expected. This feature allows for controlling the guess when needed.
+ Exact guesses result in better compression ratios. Overestimates result in slightly
+ degraded compression ratios, while underestimates may result in significant degradation.
* `--rsyncable` :
`zstd` will periodically synchronize the compression state to make the
compressed file more rsync-friendly. There is a negligible impact to
@@ -179,6 +191,13 @@ the last one takes effect.
This is the default behavior.
* `-r`:
operate recursively on directories
+* `--output-dir-flat[=dir]`:
+ resulting files are stored into target `dir` directory,
+ instead of same directory as origin file.
+ Be aware that this command can introduce name collision issues,
+ if multiple files, from different directories, end up having the same name.
+ Collision resolution ensures first file with a given name will be present in `dir`,
+ while in combination with `-f`, the last file will be present instead.
* `--format=FORMAT`:
compress and decompress in other formats. If compiled with
support, zstd can compress to or decompress from other compression algorithm
@@ -202,6 +221,16 @@ the last one takes effect.
* `--`:
All arguments after `--` are treated as files
+### Restricted usage of Environment Variables
+
+Using environment variables to set parameters has security implications.
+Therefore, this avenue is intentionally restricted.
+Only `ZSTD_CLEVEL` is supported currently, for setting compression level.
+`ZSTD_CLEVEL` can be used to set the level between 1 and 19 (the "normal" range).
+If the value of `ZSTD_CLEVEL` is not a valid integer, it will be ignored with a warning message.
+`ZSTD_CLEVEL` just replaces the default compression level (`3`).
+It can be overridden by corresponding command line arguments.
+
DICTIONARY BUILDER
------------------
diff --git a/programs/zstdcli.c b/programs/zstdcli.c
index de286cdf283e..cd6b40bc089f 100644
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@ -136,16 +136,19 @@ static int usage_advanced(const char* programName)
DISPLAY( " -q : suppress warnings; specify twice to suppress errors too\n");
DISPLAY( " -c : force write to standard output, even if it is the console\n");
DISPLAY( " -l : print information about zstd compressed files \n");
+ DISPLAY( "--exclude-compressed: only compress files that are not previously compressed \n");
#ifndef ZSTD_NOCOMPRESS
DISPLAY( "--ultra : enable levels beyond %i, up to %i (requires more memory)\n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel());
DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog);
- DISPLAY( "--fast[=#]: switch to ultra fast compression level (default: %u)\n", 1);
+ DISPLAY( "--fast[=#]: switch to very fast compression levels (default: %u)\n", 1);
DISPLAY( "--adapt : dynamically adapt compression level to I/O conditions \n");
+ DISPLAY( "--stream-size=# : optimize compression parameters for streaming input of given number of bytes \n");
+ DISPLAY( "--size-hint=# optimize compression parameters for streaming input of approximately this size\n");
DISPLAY( "--target-compressed-block-size=# : make compressed block near targeted size \n");
#ifdef ZSTD_MULTITHREAD
DISPLAY( " -T# : spawns # compression threads (default: 1, 0==# cores) \n");
DISPLAY( " -B# : select size of each job (default: 0==automatic) \n");
- DISPLAY( " --rsyncable : compress using a rsync-friendly method (-B sets block size) \n");
+ DISPLAY( "--rsyncable : compress using a rsync-friendly method (-B sets block size) \n");
#endif
DISPLAY( "--no-dictID : don't write dictID into header (dictionary compression)\n");
DISPLAY( "--[no-]check : integrity check (default: enabled) \n");
@@ -153,6 +156,7 @@ static int usage_advanced(const char* programName)
#endif
#ifdef UTIL_HAS_CREATEFILELIST
DISPLAY( " -r : operate recursively on directories \n");
+ DISPLAY( "--output-dir-flat[=directory]: all resulting files stored into `directory`. \n");
#endif
DISPLAY( "--format=zstd : compress files to the .zst format (default) \n");
#ifdef ZSTD_GZCOMPRESS
@@ -284,7 +288,7 @@ static unsigned readU32FromChar(const char** stringPtr) {
* If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
* @return 0 and doesn't modify *stringPtr otherwise.
*/
-static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
+static int longCommandWArg(const char** stringPtr, const char* longCommand)
{
size_t const comSize = strlen(longCommand);
int const result = !strncmp(*stringPtr, longCommand, comSize);
@@ -427,8 +431,8 @@ static ZDICT_fastCover_params_t defaultFastCoverParams(void)
static unsigned parseAdaptParameters(const char* stringPtr, int* adaptMinPtr, int* adaptMaxPtr)
{
for ( ; ;) {
- if (longCommandWArg(&stringPtr, "min=")) { *adaptMinPtr = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
- if (longCommandWArg(&stringPtr, "max=")) { *adaptMaxPtr = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
+ if (longCommandWArg(&stringPtr, "min=")) { *adaptMinPtr = (int)readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
+ if (longCommandWArg(&stringPtr, "max=")) { *adaptMaxPtr = (int)readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
DISPLAYLEVEL(4, "invalid compression parameter \n");
return 0;
}
@@ -523,7 +527,7 @@ static int init_cLevel(void) {
DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: numeric value too large\n", ENV_CLEVEL, env);
return ZSTDCLI_CLEVEL_DEFAULT;
} else if (*ptr == 0) {
- return sign * absLevel;
+ return sign * (int)absLevel;
}
}
@@ -560,6 +564,7 @@ int main(int argCount, const char* argv[])
adaptMax = MAXCLEVEL,
rsyncable = 0,
nextArgumentIsOutFileName = 0,
+ nextArgumentIsOutDirName = 0,
nextArgumentIsMaxDict = 0,
nextArgumentIsDictID = 0,
nextArgumentsAreFiles = 0,
@@ -580,15 +585,18 @@ int main(int argCount, const char* argv[])
int cLevelLast = -1000000000;
unsigned recursive = 0;
unsigned memLimit = 0;
- const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*)); /* argCount >= 1 */
+ const char** filenameTable = (const char**)malloc((size_t)argCount * sizeof(const char*)); /* argCount >= 1 */
unsigned filenameIdx = 0;
const char* programName = argv[0];
const char* outFileName = NULL;
+ const char* outDirName = NULL;
const char* dictFileName = NULL;
const char* suffix = ZSTD_EXTENSION;
unsigned maxDictSize = g_defaultMaxDictSize;
unsigned dictID = 0;
+ size_t streamSrcSize = 0;
size_t targetCBlockSize = 0;
+ size_t srcSizeHint = 0;
int dictCLevel = g_defaultDictCLevel;
unsigned dictSelect = g_defaultSelectivityLevel;
#ifdef UTIL_HAS_CREATEFILELIST
@@ -682,6 +690,7 @@ int main(int argCount, const char* argv[])
if (!strcmp(argument, "--keep")) { FIO_setRemoveSrcFile(prefs, 0); continue; }
if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(prefs, 1); continue; }
if (!strcmp(argument, "--priority=rt")) { setRealTimePrio = 1; continue; }
+ if (!strcmp(argument, "--output-dir-flat")) {nextArgumentIsOutDirName=1; lastCommand=1; continue; }
if (!strcmp(argument, "--adapt")) { adapt = 1; continue; }
if (longCommandWArg(&argument, "--adapt=")) { adapt = 1; if (!parseAdaptParameters(argument, &adaptMin, &adaptMax)) CLEAN_RETURN(badusage(programName)); continue; }
if (!strcmp(argument, "--single-thread")) { nbWorkers = 0; singleThread = 1; continue; }
@@ -700,7 +709,7 @@ int main(int argCount, const char* argv[])
if (!strcmp(argument, "--compress-literals")) { literalCompressionMode = ZSTD_lcm_huffman; continue; }
if (!strcmp(argument, "--no-compress-literals")) { literalCompressionMode = ZSTD_lcm_uncompressed; continue; }
if (!strcmp(argument, "--no-progress")) { FIO_setNoProgress(1); continue; }
-
+ if (!strcmp(argument, "--exclude-compressed")) { FIO_setExcludeCompressedFile(prefs, 1); continue; }
/* long commands with arguments */
#ifndef ZSTD_NODICT
if (longCommandWArg(&argument, "--train-cover")) {
@@ -737,7 +746,7 @@ int main(int argCount, const char* argv[])
continue;
}
#endif
- if (longCommandWArg(&argument, "--threads=")) { nbWorkers = readU32FromChar(&argument); continue; }
+ if (longCommandWArg(&argument, "--threads=")) { nbWorkers = (int)readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--memlimit=")) { memLimit = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--memory=")) { memLimit = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--memlimit-decompress=")) { memLimit = readU32FromChar(&argument); continue; }
@@ -745,7 +754,10 @@ int main(int argCount, const char* argv[])
if (longCommandWArg(&argument, "--maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--dictID=")) { dictID = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) CLEAN_RETURN(badusage(programName)); continue; }
+ if (longCommandWArg(&argument, "--stream-size=")) { streamSrcSize = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readU32FromChar(&argument); continue; }
+ if (longCommandWArg(&argument, "--size-hint=")) { srcSizeHint = readU32FromChar(&argument); continue; }
+ if (longCommandWArg(&argument, "--output-dir-flat=")) { outDirName = argument; continue; }
if (longCommandWArg(&argument, "--long")) {
unsigned ldmWindowLog = 0;
ldmFlag = 1;
@@ -797,7 +809,7 @@ int main(int argCount, const char* argv[])
#ifndef ZSTD_NOCOMPRESS
/* compression Level */
if ((*argument>='0') && (*argument<='9')) {
- dictCLevel = cLevel = readU32FromChar(&argument);
+ dictCLevel = cLevel = (int)readU32FromChar(&argument);
continue;
}
#endif
@@ -869,7 +881,7 @@ int main(int argCount, const char* argv[])
case 'e':
/* compression Level */
argument++;
- cLevelLast = readU32FromChar(&argument);
+ cLevelLast = (int)readU32FromChar(&argument);
break;
/* Modify Nb Iterations (benchmark only) */
@@ -895,7 +907,7 @@ int main(int argCount, const char* argv[])
/* nb of threads (hidden option) */
case 'T':
argument++;
- nbWorkers = readU32FromChar(&argument);
+ nbWorkers = (int)readU32FromChar(&argument);
break;
/* Dictionary Selection level */
@@ -959,6 +971,13 @@ int main(int argCount, const char* argv[])
continue;
}
+ if (nextArgumentIsOutDirName) {
+ nextArgumentIsOutDirName = 0;
+ lastCommand = 0;
+ outDirName = argument;
+ continue;
+ }
+
/* add filename to list */
filenameTable[filenameIdx++] = argument;
}
@@ -986,7 +1005,11 @@ int main(int argCount, const char* argv[])
if (!followLinks) {
unsigned u;
for (u=0, fileNamesNb=0; u<filenameIdx; u++) {
- if (UTIL_isLink(filenameTable[u])) {
+ if (UTIL_isLink(filenameTable[u])
+#ifndef _MSC_VER
+ && !UTIL_isFIFO(filenameTable[u])
+#endif /* _MSC_VER */
+ ) {
DISPLAYLEVEL(2, "Warning : %s is a symbolic link, ignoring\n", filenameTable[u]);
} else {
filenameTable[fileNamesNb++] = filenameTable[u];
@@ -1025,16 +1048,16 @@ int main(int argCount, const char* argv[])
#ifndef ZSTD_NOBENCH
benchParams.blockSize = blockSize;
benchParams.nbWorkers = nbWorkers;
- benchParams.realTime = setRealTimePrio;
+ benchParams.realTime = (unsigned)setRealTimePrio;
benchParams.nbSeconds = bench_nbSeconds;
benchParams.ldmFlag = ldmFlag;
- benchParams.ldmMinMatch = g_ldmMinMatch;
- benchParams.ldmHashLog = g_ldmHashLog;
+ benchParams.ldmMinMatch = (int)g_ldmMinMatch;
+ benchParams.ldmHashLog = (int)g_ldmHashLog;
if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) {
- benchParams.ldmBucketSizeLog = g_ldmBucketSizeLog;
+ benchParams.ldmBucketSizeLog = (int)g_ldmBucketSizeLog;
}
if (g_ldmHashRateLog != LDM_PARAM_DEFAULT) {
- benchParams.ldmHashRateLog = g_ldmHashRateLog;
+ benchParams.ldmHashRateLog = (int)g_ldmHashRateLog;
}
benchParams.literalCompressionMode = literalCompressionMode;
@@ -1075,16 +1098,16 @@ int main(int argCount, const char* argv[])
#ifndef ZSTD_NODICT
ZDICT_params_t zParams;
zParams.compressionLevel = dictCLevel;
- zParams.notificationLevel = g_displayLevel;
+ zParams.notificationLevel = (unsigned)g_displayLevel;
zParams.dictID = dictID;
if (dict == cover) {
int const optimize = !coverParams.k || !coverParams.d;
- coverParams.nbThreads = nbWorkers;
+ coverParams.nbThreads = (unsigned)nbWorkers;
coverParams.zParams = zParams;
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, blockSize, NULL, &coverParams, NULL, optimize);
} else if (dict == fastCover) {
int const optimize = !fastCoverParams.k || !fastCoverParams.d;
- fastCoverParams.nbThreads = nbWorkers;
+ fastCoverParams.nbThreads = (unsigned)nbWorkers;
fastCoverParams.zParams = zParams;
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, blockSize, NULL, NULL, &fastCoverParams, optimize);
} else {
@@ -1103,7 +1126,7 @@ int main(int argCount, const char* argv[])
}
#ifndef ZSTD_NODECOMPRESS
- if (operation==zom_test) { outFileName=nulmark; FIO_setRemoveSrcFile(prefs, 0); } /* test mode */
+ if (operation==zom_test) { FIO_setTestMode(prefs, 1); outFileName=nulmark; FIO_setRemoveSrcFile(prefs, 0); } /* test mode */
#endif
/* No input filename ==> use stdin and stdout */
@@ -1139,18 +1162,20 @@ int main(int argCount, const char* argv[])
if (operation==zom_compress) {
#ifndef ZSTD_NOCOMPRESS
FIO_setNbWorkers(prefs, nbWorkers);
- FIO_setBlockSize(prefs, (U32)blockSize);
- if (g_overlapLog!=OVERLAP_LOG_DEFAULT) FIO_setOverlapLog(prefs, g_overlapLog);
- FIO_setLdmFlag(prefs, ldmFlag);
- FIO_setLdmHashLog(prefs, g_ldmHashLog);
- FIO_setLdmMinMatch(prefs, g_ldmMinMatch);
- if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) FIO_setLdmBucketSizeLog(prefs, g_ldmBucketSizeLog);
- if (g_ldmHashRateLog != LDM_PARAM_DEFAULT) FIO_setLdmHashRateLog(prefs, g_ldmHashRateLog);
- FIO_setAdaptiveMode(prefs, adapt);
+ FIO_setBlockSize(prefs, (int)blockSize);
+ if (g_overlapLog!=OVERLAP_LOG_DEFAULT) FIO_setOverlapLog(prefs, (int)g_overlapLog);
+ FIO_setLdmFlag(prefs, (unsigned)ldmFlag);
+ FIO_setLdmHashLog(prefs, (int)g_ldmHashLog);
+ FIO_setLdmMinMatch(prefs, (int)g_ldmMinMatch);
+ if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) FIO_setLdmBucketSizeLog(prefs, (int)g_ldmBucketSizeLog);
+ if (g_ldmHashRateLog != LDM_PARAM_DEFAULT) FIO_setLdmHashRateLog(prefs, (int)g_ldmHashRateLog);
+ FIO_setAdaptiveMode(prefs, (unsigned)adapt);
FIO_setAdaptMin(prefs, adaptMin);
FIO_setAdaptMax(prefs, adaptMax);
FIO_setRsyncable(prefs, rsyncable);
+ FIO_setStreamSrcSize(prefs, streamSrcSize);
FIO_setTargetCBlockSize(prefs, targetCBlockSize);
+ FIO_setSrcSizeHint(prefs, srcSizeHint);
FIO_setLiteralCompressionMode(prefs, literalCompressionMode);
if (adaptMin > cLevel) cLevel = adaptMin;
if (adaptMax < cLevel) cLevel = adaptMax;
@@ -1158,9 +1183,9 @@ int main(int argCount, const char* argv[])
if ((filenameIdx==1) && outFileName)
operationResult = FIO_compressFilename(prefs, outFileName, filenameTable[0], dictFileName, cLevel, compressionParams);
else
- operationResult = FIO_compressMultipleFilenames(prefs, filenameTable, filenameIdx, outFileName, suffix, dictFileName, cLevel, compressionParams);
+ operationResult = FIO_compressMultipleFilenames(prefs, filenameTable, filenameIdx, outDirName, outFileName, suffix, dictFileName, cLevel, compressionParams);
#else
- (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; /* not used when ZSTD_NOCOMPRESS set */
+ (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint; /* not used when ZSTD_NOCOMPRESS set */
DISPLAY("Compression not supported \n");
#endif
} else { /* decompression or test */
@@ -1176,7 +1201,7 @@ int main(int argCount, const char* argv[])
if (filenameIdx==1 && outFileName)
operationResult = FIO_decompressFilename(prefs, outFileName, filenameTable[0], dictFileName);
else
- operationResult = FIO_decompressMultipleFilenames(prefs, filenameTable, filenameIdx, outFileName, dictFileName);
+ operationResult = FIO_decompressMultipleFilenames(prefs, filenameTable, filenameIdx, outDirName, outFileName, dictFileName);
#else
DISPLAY("Decompression not supported \n");
#endif
diff --git a/programs/zstdgrep.1 b/programs/zstdgrep.1
index 0bc6ed7f5838..b97f8cabcc00 100644
--- a/programs/zstdgrep.1
+++ b/programs/zstdgrep.1
@@ -1,5 +1,5 @@
.
-.TH "ZSTDGREP" "1" "July 2019" "zstd 1.4.2" "User Commands"
+.TH "ZSTDGREP" "1" "October 2019" "zstd 1.4.4" "User Commands"
.
.SH "NAME"
\fBzstdgrep\fR \- print lines matching a pattern in zstandard\-compressed files
diff --git a/programs/zstdless.1 b/programs/zstdless.1
index 73e9504406a4..1ecc8bdc531c 100644
--- a/programs/zstdless.1
+++ b/programs/zstdless.1
@@ -1,5 +1,5 @@
.
-.TH "ZSTDLESS" "1" "July 2019" "zstd 1.4.2" "User Commands"
+.TH "ZSTDLESS" "1" "October 2019" "zstd 1.4.4" "User Commands"
.
.SH "NAME"
\fBzstdless\fR \- view zstandard\-compressed files
diff --git a/tests/.gitignore b/tests/.gitignore
deleted file mode 100644
index 4edf6ce13838..000000000000
--- a/tests/.gitignore
+++ /dev/null
@@ -1,68 +0,0 @@
-# local binary (Makefile)
-fullbench
-fullbench32
-fullbench-lib
-fuzzer
-fuzzer32
-fuzzer-dll
-zbufftest
-zbufftest32
-zbufftest-dll
-zstreamtest
-zstreamtest32
-zstreamtest_asan
-zstreamtest_tsan
-zstreamtest-dll
-datagen
-paramgrill
-paramgrill32
-roundTripCrash
-longmatch
-symbols
-legacy
-decodecorpus
-pool
-poolTests
-invalidDictionaries
-checkTag
-zcat
-zstdcat
-tm
-
-# Tmp test directory
-zstdtest
-speedTest
-versionsTest
-namespaceTest
-
-# Local script
-startSpeedTest
-speedTest.pid
-
-# Object files
-*.o
-*.ko
-
-# Executables
-*.exe
-*.out
-*.app
-
-# Default result files
-dictionary
-grillResults.txt
-_*
-tmp*
-*.zst
-*.gz
-!gzip/hufts-segv.gz
-result
-out
-*.zstd
-
-# fuzzer
-afl
-
-# Misc files
-*.bat
-dirTest*
diff --git a/tests/Makefile b/tests/Makefile
index bd2f909769cb..3917a7cf8ac5 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -160,19 +160,13 @@ fuzzer-dll : LDFLAGS+= -L$(ZSTDDIR) -lzstd
fuzzer-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c fuzzer.c
$(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT)
-zbufftest : CPPFLAGS += -I$(ZSTDDIR)/deprecated
-zbufftest : CFLAGS += -Wno-deprecated-declarations # required to silence deprecation warnings
-zbufftest : $(ZSTD_OBJECTS) $(ZBUFF_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c zbufftest.c
- $(CC) $(FLAGS) $^ -o $@$(EXT)
-
-zbufftest32 : CPPFLAGS += -I$(ZSTDDIR)/deprecated
-zbufftest32 : CFLAGS += -Wno-deprecated-declarations -m32
-zbufftest32 : $(ZSTD_FILES) $(ZBUFF_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c zbufftest.c
+zbufftest zbufftest32 zbufftest-dll : CPPFLAGS += -I$(ZSTDDIR)/deprecated
+zbufftest zbufftest32 zbufftest-dll : CFLAGS += -Wno-deprecated-declarations # required to silence deprecation warnings
+zbufftest32 : CFLAGS += -m32
+zbufftest zbufftest32 : $(ZSTD_OBJECTS) $(ZBUFF_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c zbufftest.c
$(CC) $(FLAGS) $^ -o $@$(EXT)
zbufftest-dll : zstd-dll
-zbufftest-dll : CPPFLAGS += -I$(ZSTDDIR)/deprecated
-zbufftest-dll : CFLAGS += -Wno-deprecated-declarations # required to silence deprecation warnings
zbufftest-dll : LDFLAGS+= -L$(ZSTDDIR) -lzstd
zbufftest-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c zbufftest.c
$(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT)
@@ -250,7 +244,8 @@ clean:
$(MAKE) -C $(ZSTDDIR) clean
$(MAKE) -C $(PRGDIR) clean
@$(RM) -fR $(TESTARTEFACT)
- @$(RM) -f core *.o tmp* *.tmp result* *.gcda dictionary *.zst \
+ @$(RM) -rf tmp* # some test directories are named tmp*
+ @$(RM) core *.o *.tmp result* *.gcda dictionary *.zst \
$(PRGDIR)/zstd$(EXT) $(PRGDIR)/zstd32$(EXT) \
fullbench$(EXT) fullbench32$(EXT) \
fullbench-lib$(EXT) fullbench-dll$(EXT) \
@@ -264,7 +259,7 @@ clean:
#----------------------------------------------------------------------------------
-#make valgrindTest is validated only for Linux, macOS, BSD, Hurd and Solaris targets
+# valgrind tests are validated only for some posix platforms
#----------------------------------------------------------------------------------
ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS))
HOST_OS = POSIX
@@ -286,7 +281,7 @@ valgrindTest: zstd datagen fuzzer fullbench
endif
-ifneq (,$(filter MSYS%,$(shell uname)))
+ifneq (,$(filter MINGW% MSYS%,$(shell uname)))
HOST_OS = MSYS
endif
diff --git a/tests/decodecorpus.c b/tests/decodecorpus.c
index dbc27bc90041..91873ba46031 100644
--- a/tests/decodecorpus.c
+++ b/tests/decodecorpus.c
@@ -758,8 +758,8 @@ static U32 generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore,
DISPLAYLEVEL(7, " repeat offset: %d\n", (int)repIndex);
}
/* use libzstd sequence handling */
- ZSTD_storeSeq(seqStore, literalLen, literals, offsetCode,
- matchLen - MINMATCH);
+ ZSTD_storeSeq(seqStore, literalLen, literals, literals + literalLen,
+ offsetCode, matchLen - MINMATCH);
literalsSize -= literalLen;
excessMatch -= (matchLen - MIN_SEQ_LEN);
diff --git a/tests/fullbench.c b/tests/fullbench.c
index f750ee0d78f6..be49b1428ff5 100644
--- a/tests/fullbench.c
+++ b/tests/fullbench.c
@@ -45,7 +45,6 @@
#define NBLOOPS 6
#define TIMELOOP_S 2
-#define KNUTH 2654435761U
#define MAX_MEM (1984 MB)
#define DEFAULT_CLEVEL 1
@@ -450,7 +449,7 @@ static int benchMem(unsigned benchNb,
case 31: /* ZSTD_decodeLiteralsBlock : starts literals block in dstBuff2 */
{ size_t frameHeaderSize;
g_cSize = ZSTD_compress(dstBuff, dstBuffSize, src, srcSize, cLevel);
- frameHeaderSize = ZSTD_frameHeaderSize(dstBuff, ZSTD_FRAMEHEADERSIZE_PREFIX);
+ frameHeaderSize = ZSTD_frameHeaderSize(dstBuff, ZSTD_FRAMEHEADERSIZE_PREFIX(ZSTD_f_zstd1));
CONTROL(!ZSTD_isError(frameHeaderSize));
/* check block is compressible, hence contains a literals section */
{ blockProperties_t bp;
@@ -471,10 +470,10 @@ static int benchMem(unsigned benchNb,
const BYTE* ip = dstBuff;
const BYTE* iend;
{ size_t const cSize = ZSTD_compress(dstBuff, dstBuffSize, src, srcSize, cLevel);
- CONTROL(cSize > ZSTD_FRAMEHEADERSIZE_PREFIX);
+ CONTROL(cSize > ZSTD_FRAMEHEADERSIZE_PREFIX(ZSTD_f_zstd1));
}
/* Skip frame Header */
- { size_t const frameHeaderSize = ZSTD_frameHeaderSize(dstBuff, ZSTD_FRAMEHEADERSIZE_PREFIX);
+ { size_t const frameHeaderSize = ZSTD_frameHeaderSize(dstBuff, ZSTD_FRAMEHEADERSIZE_PREFIX(ZSTD_f_zstd1));
CONTROL(!ZSTD_isError(frameHeaderSize));
ip += frameHeaderSize;
}
@@ -722,7 +721,7 @@ static int usage_advanced(const char* exename)
DISPLAY( "\nAdvanced options :\n");
DISPLAY( " -b# : test only function # \n");
DISPLAY( " -l# : benchmark functions at that compression level (default : %i)\n", DEFAULT_CLEVEL);
- DISPLAY( " --zstd : custom parameter selection. Format same as zstdcli \n");
+ DISPLAY( "--zstd= : custom parameter selection. Format same as zstdcli \n");
DISPLAY( " -P# : sample compressibility (default : %.1f%%)\n", COMPRESSIBILITY_DEFAULT * 100);
DISPLAY( " -B# : sample size (default : %u)\n", (unsigned)kSampleSizeDefault);
DISPLAY( " -i# : iteration loops [1-9](default : %i)\n", NBLOOPS);
diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile
index 8bf16b1fb0e4..f66dadef041c 100644
--- a/tests/fuzz/Makefile
+++ b/tests/fuzz/Makefile
@@ -40,8 +40,8 @@ FUZZ_LDFLAGS := -pthread $(LDFLAGS)
FUZZ_ARFLAGS := $(ARFLAGS)
FUZZ_TARGET_FLAGS = $(FUZZ_CPPFLAGS) $(FUZZ_CXXFLAGS) $(FUZZ_LDFLAGS)
-FUZZ_HEADERS := fuzz_helpers.h fuzz.h zstd_helpers.h
-FUZZ_SRC := $(PRGDIR)/util.c zstd_helpers.c
+FUZZ_HEADERS := fuzz_helpers.h fuzz.h zstd_helpers.h fuzz_data_producer.h
+FUZZ_SRC := $(PRGDIR)/util.c zstd_helpers.c fuzz_data_producer.c
ZSTDCOMMON_SRC := $(ZSTDDIR)/common/*.c
ZSTDCOMP_SRC := $(ZSTDDIR)/compress/*.c
@@ -73,7 +73,8 @@ FUZZ_TARGETS := \
dictionary_round_trip \
dictionary_decompress \
zstd_frame_info \
- simple_compress
+ simple_compress \
+ dictionary_loader
all: $(FUZZ_TARGETS)
@@ -110,18 +111,12 @@ simple_compress: $(FUZZ_HEADERS) $(FUZZ_OBJ) simple_compress.o
zstd_frame_info: $(FUZZ_HEADERS) $(FUZZ_OBJ) zstd_frame_info.o
$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_OBJ) zstd_frame_info.o $(LIB_FUZZING_ENGINE) -o $@
+dictionary_loader: $(FUZZ_HEADERS) $(FUZZ_OBJ) dictionary_loader.o
+ $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_OBJ) dictionary_loader.o $(LIB_FUZZING_ENGINE) -o $@
+
libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c regression_driver.o
$(AR) $(FUZZ_ARFLAGS) $@ regression_driver.o
-# Install libfuzzer (not usable for MSAN testing)
-# Provided for convenience. To use this library run make libFuzzer and
-# set LDFLAGS=-L.
-.PHONY: libFuzzer
-libFuzzer:
- @$(RM) -rf Fuzzer
- @git clone https://chromium.googlesource.com/chromium/llvm-project/compiler-rt/lib/fuzzer Fuzzer
- @cd Fuzzer && ./build.sh
-
corpora/%_seed_corpus.zip:
@mkdir -p corpora
$(DOWNLOAD) $@ $(CORPORA_URL_PREFIX)$*_seed_corpus.zip
diff --git a/tests/fuzz/README.md b/tests/fuzz/README.md
index 9e0bb259a9e7..71afa40631a1 100644
--- a/tests/fuzz/README.md
+++ b/tests/fuzz/README.md
@@ -35,6 +35,8 @@ The environment variables can be overridden with the corresponding flags
`--cc`, `--cflags`, etc.
The specific fuzzing engine is selected with `LIB_FUZZING_ENGINE` or
`--lib-fuzzing-engine`, the default is `libregression.a`.
+Alternatively, you can use Clang's built in fuzzing engine with
+`--enable-fuzzer`.
It has flags that can easily set up sanitizers `--enable-{a,ub,m}san`, and
coverage instrumentation `--enable-coverage`.
It sets sane defaults which can be overridden with flags `--debug`,
@@ -51,22 +53,25 @@ The command used to run the fuzzer is printed for debugging.
## LibFuzzer
```
-# Build libfuzzer if necessary
-make libFuzzer
# Build the fuzz targets
-./fuzz.py build all --enable-coverage --enable-asan --enable-ubsan --lib-fuzzing-engine Fuzzer/libFuzzer.a --cc clang --cxx clang++
+./fuzz.py build all --enable-fuzzer --enable-asan --enable-ubsan --cc clang --cxx clang++
# OR equivalently
-CC=clang CXX=clang++ LIB_FUZZING_ENGINE=Fuzzer/libFuzzer.a ./fuzz.py build all --enable-coverage --enable-asan --enable-ubsan
+CC=clang CXX=clang++ ./fuzz.py build all --enable-fuzzer --enable-asan --enable-ubsan
# Run the fuzzer
-./fuzz.py libfuzzer TARGET -max_len=8192 -jobs=4
+./fuzz.py libfuzzer TARGET <libfuzzer args like -jobs=4>
```
where `TARGET` could be `simple_decompress`, `stream_round_trip`, etc.
### MSAN
-Fuzzing with `libFuzzer` and `MSAN` will require building a C++ standard library
-and libFuzzer with MSAN.
+Fuzzing with `libFuzzer` and `MSAN` is as easy as:
+
+```
+CC=clang CXX=clang++ ./fuzz.py build all --enable-fuzzer --enable-msan
+./fuzz.py libfuzzer TARGET <libfuzzer args>
+```
+
`fuzz.py` respects the environment variables / flags `MSAN_EXTRA_CPPFLAGS`,
`MSAN_EXTRA_CFLAGS`, `MSAN_EXTRA_CXXFLAGS`, `MSAN_EXTRA_LDFLAGS` to easily pass
the extra parameters only for MSAN.
@@ -85,7 +90,7 @@ CC=afl-clang CXX=afl-clang++ ./fuzz.py build all --enable-asan --enable-ubsan
## Regression Testing
-The regression rest supports the `all` target to run all the fuzzers in one
+The regression test supports the `all` target to run all the fuzzers in one
command.
```
diff --git a/tests/fuzz/block_decompress.c b/tests/fuzz/block_decompress.c
index 3cccc32f4e52..a904b44624d9 100644
--- a/tests/fuzz/block_decompress.c
+++ b/tests/fuzz/block_decompress.c
@@ -28,8 +28,6 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
{
size_t const neededBufSize = ZSTD_BLOCKSIZE_MAX;
- FUZZ_seed(&src, &size);
-
/* Allocate all buffers and contexts if not already allocated */
if (neededBufSize > bufSize) {
free(rBuf);
diff --git a/tests/fuzz/block_round_trip.c b/tests/fuzz/block_round_trip.c
index 64ca5fc40f9c..89f060a6eb38 100644
--- a/tests/fuzz/block_round_trip.c
+++ b/tests/fuzz/block_round_trip.c
@@ -20,21 +20,20 @@
#include <string.h>
#include "fuzz_helpers.h"
#include "zstd.h"
-
-static const int kMaxClevel = 19;
+#include "zstd_helpers.h"
+#include "fuzz_data_producer.h"
static ZSTD_CCtx *cctx = NULL;
static ZSTD_DCtx *dctx = NULL;
static void* cBuf = NULL;
static void* rBuf = NULL;
static size_t bufSize = 0;
-static uint32_t seed;
static size_t roundTripTest(void *result, size_t resultCapacity,
void *compressed, size_t compressedCapacity,
- const void *src, size_t srcSize)
+ const void *src, size_t srcSize,
+ int cLevel)
{
- int const cLevel = FUZZ_rand(&seed) % kMaxClevel;
ZSTD_parameters const params = ZSTD_getParams(cLevel, srcSize, 0);
size_t ret = ZSTD_compressBegin_advanced(cctx, NULL, 0, params, srcSize);
FUZZ_ZASSERT(ret);
@@ -52,12 +51,16 @@ static size_t roundTripTest(void *result, size_t resultCapacity,
int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
{
- size_t neededBufSize;
+ /* Give a random portion of src data to the producer, to use for
+ parameter generation. The rest will be used for (de)compression */
+ FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
+ size = FUZZ_dataProducer_reserveDataPrefix(producer);
+
+ int const cLevel = FUZZ_dataProducer_int32Range(producer, kMinClevel, kMaxClevel);
- seed = FUZZ_seed(&src, &size);
- neededBufSize = size;
+ size_t neededBufSize = size;
if (size > ZSTD_BLOCKSIZE_MAX)
- return 0;
+ size = ZSTD_BLOCKSIZE_MAX;
/* Allocate all buffers and contexts if not already allocated */
if (neededBufSize > bufSize || !cBuf || !rBuf) {
@@ -79,11 +82,13 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
{
size_t const result =
- roundTripTest(rBuf, neededBufSize, cBuf, neededBufSize, src, size);
+ roundTripTest(rBuf, neededBufSize, cBuf, neededBufSize, src, size,
+ cLevel);
FUZZ_ZASSERT(result);
FUZZ_ASSERT_MSG(result == size, "Incorrect regenerated size");
FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!");
}
+ FUZZ_dataProducer_free(producer);
#ifndef STATEFUL_FUZZING
ZSTD_freeCCtx(cctx); cctx = NULL;
ZSTD_freeDCtx(dctx); dctx = NULL;
diff --git a/tests/fuzz/dictionary_decompress.c b/tests/fuzz/dictionary_decompress.c
index e900054f5a6f..9cc69fa37866 100644
--- a/tests/fuzz/dictionary_decompress.c
+++ b/tests/fuzz/dictionary_decompress.c
@@ -18,33 +18,37 @@
#include <stdio.h>
#include "fuzz_helpers.h"
#include "zstd_helpers.h"
+#include "fuzz_data_producer.h"
static ZSTD_DCtx *dctx = NULL;
int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
{
- uint32_t seed = FUZZ_seed(&src, &size);
+ /* Give a random portion of src data to the producer, to use for
+ parameter generation. The rest will be used for (de)compression */
+ FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
+ size = FUZZ_dataProducer_reserveDataPrefix(producer);
+
FUZZ_dict_t dict;
ZSTD_DDict* ddict = NULL;
- int i;
if (!dctx) {
dctx = ZSTD_createDCtx();
FUZZ_ASSERT(dctx);
}
- dict = FUZZ_train(src, size, &seed);
- if (FUZZ_rand32(&seed, 0, 1) == 0) {
+ dict = FUZZ_train(src, size, producer);
+ if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) {
ddict = ZSTD_createDDict(dict.buff, dict.size);
FUZZ_ASSERT(ddict);
} else {
FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced(
dctx, dict.buff, dict.size,
- (ZSTD_dictLoadMethod_e)FUZZ_rand32(&seed, 0, 1),
- (ZSTD_dictContentType_e)FUZZ_rand32(&seed, 0, 2)));
+ (ZSTD_dictLoadMethod_e)FUZZ_dataProducer_uint32Range(producer, 0, 1),
+ (ZSTD_dictContentType_e)FUZZ_dataProducer_uint32Range(producer, 0, 2)));
}
- /* Run it 10 times over 10 output sizes. Reuse the context and dict. */
- for (i = 0; i < 10; ++i) {
- size_t const bufSize = FUZZ_rand32(&seed, 0, 2 * size);
+
+ {
+ size_t const bufSize = FUZZ_dataProducer_uint32Range(producer, 0, 10 * size);
void* rBuf = malloc(bufSize);
FUZZ_ASSERT(rBuf);
if (ddict) {
@@ -55,6 +59,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
free(rBuf);
}
free(dict.buff);
+ FUZZ_dataProducer_free(producer);
ZSTD_freeDDict(ddict);
#ifndef STATEFUL_FUZZING
ZSTD_freeDCtx(dctx); dctx = NULL;
diff --git a/tests/fuzz/dictionary_loader.c b/tests/fuzz/dictionary_loader.c
new file mode 100644
index 000000000000..cb34f5d22d9a
--- /dev/null
+++ b/tests/fuzz/dictionary_loader.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ */
+
+/**
+ * This fuzz target makes sure that whenever a compression dictionary can be
+ * loaded, the data can be round tripped.
+ */
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "fuzz_helpers.h"
+#include "zstd_helpers.h"
+#include "fuzz_data_producer.h"
+
+/**
+ * Compresses the data and returns the compressed size or an error.
+ */
+static size_t compress(void* compressed, size_t compressedCapacity,
+ void const* source, size_t sourceSize,
+ void const* dict, size_t dictSize,
+ ZSTD_dictLoadMethod_e dictLoadMethod,
+ ZSTD_dictContentType_e dictContentType)
+{
+ ZSTD_CCtx* cctx = ZSTD_createCCtx();
+ FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary_advanced(
+ cctx, dict, dictSize, dictLoadMethod, dictContentType));
+ size_t const compressedSize = ZSTD_compress2(
+ cctx, compressed, compressedCapacity, source, sourceSize);
+ ZSTD_freeCCtx(cctx);
+ return compressedSize;
+}
+
+static size_t decompress(void* result, size_t resultCapacity,
+ void const* compressed, size_t compressedSize,
+ void const* dict, size_t dictSize,
+ ZSTD_dictLoadMethod_e dictLoadMethod,
+ ZSTD_dictContentType_e dictContentType)
+{
+ ZSTD_DCtx* dctx = ZSTD_createDCtx();
+ FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced(
+ dctx, dict, dictSize, dictLoadMethod, dictContentType));
+ size_t const resultSize = ZSTD_decompressDCtx(
+ dctx, result, resultCapacity, compressed, compressedSize);
+ FUZZ_ZASSERT(resultSize);
+ ZSTD_freeDCtx(dctx);
+ return resultSize;
+}
+
+int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
+{
+ FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
+ ZSTD_dictLoadMethod_e const dlm =
+ size = FUZZ_dataProducer_uint32Range(producer, 0, 1);
+ ZSTD_dictContentType_e const dct =
+ FUZZ_dataProducer_uint32Range(producer, 0, 2);
+ size = FUZZ_dataProducer_remainingBytes(producer);
+
+ DEBUGLOG(2, "Dict load method %d", dlm);
+ DEBUGLOG(2, "Dict content type %d", dct);
+ DEBUGLOG(2, "Dict size %u", (unsigned)size);
+
+ void* const rBuf = malloc(size);
+ FUZZ_ASSERT(rBuf);
+ size_t const cBufSize = ZSTD_compressBound(size);
+ void* const cBuf = malloc(cBufSize);
+ FUZZ_ASSERT(cBuf);
+
+ size_t const cSize =
+ compress(cBuf, cBufSize, src, size, src, size, dlm, dct);
+ /* compression failing is okay */
+ if (ZSTD_isError(cSize)) {
+ FUZZ_ASSERT_MSG(dct != ZSTD_dct_rawContent, "Raw must always succeed!");
+ goto out;
+ }
+ size_t const rSize =
+ decompress(rBuf, size, cBuf, cSize, src, size, dlm, dct);
+ FUZZ_ASSERT_MSG(rSize == size, "Incorrect regenerated size");
+ FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!");
+
+out:
+ free(cBuf);
+ free(rBuf);
+ FUZZ_dataProducer_free(producer);
+ return 0;
+}
diff --git a/tests/fuzz/dictionary_round_trip.c b/tests/fuzz/dictionary_round_trip.c
index e28c65c98f06..9411b50a74eb 100644
--- a/tests/fuzz/dictionary_round_trip.c
+++ b/tests/fuzz/dictionary_round_trip.c
@@ -19,22 +19,21 @@
#include <string.h>
#include "fuzz_helpers.h"
#include "zstd_helpers.h"
-
-static const int kMaxClevel = 19;
+#include "fuzz_data_producer.h"
static ZSTD_CCtx *cctx = NULL;
static ZSTD_DCtx *dctx = NULL;
-static uint32_t seed;
static size_t roundTripTest(void *result, size_t resultCapacity,
void *compressed, size_t compressedCapacity,
- const void *src, size_t srcSize)
+ const void *src, size_t srcSize,
+ FUZZ_dataProducer_t *producer)
{
ZSTD_dictContentType_e dictContentType = ZSTD_dct_auto;
- FUZZ_dict_t dict = FUZZ_train(src, srcSize, &seed);
+ FUZZ_dict_t dict = FUZZ_train(src, srcSize, producer);
size_t cSize;
- if ((FUZZ_rand(&seed) & 15) == 0) {
- int const cLevel = FUZZ_rand(&seed) % kMaxClevel;
+ if (FUZZ_dataProducer_uint32Range(producer, 0, 15) == 0) {
+ int const cLevel = FUZZ_dataProducer_int32Range(producer, kMinClevel, kMaxClevel);
cSize = ZSTD_compress_usingDict(cctx,
compressed, compressedCapacity,
@@ -42,20 +41,20 @@ static size_t roundTripTest(void *result, size_t resultCapacity,
dict.buff, dict.size,
cLevel);
} else {
- dictContentType = FUZZ_rand32(&seed, 0, 2);
- FUZZ_setRandomParameters(cctx, srcSize, &seed);
+ dictContentType = FUZZ_dataProducer_uint32Range(producer, 0, 2);
+ FUZZ_setRandomParameters(cctx, srcSize, producer);
/* Disable checksum so we can use sizes smaller than compress bound. */
FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 0));
FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary_advanced(
cctx, dict.buff, dict.size,
- (ZSTD_dictLoadMethod_e)FUZZ_rand32(&seed, 0, 1),
+ (ZSTD_dictLoadMethod_e)FUZZ_dataProducer_uint32Range(producer, 0, 1),
dictContentType));
cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize);
}
FUZZ_ZASSERT(cSize);
FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced(
dctx, dict.buff, dict.size,
- (ZSTD_dictLoadMethod_e)FUZZ_rand32(&seed, 0, 1),
+ (ZSTD_dictLoadMethod_e)FUZZ_dataProducer_uint32Range(producer, 0, 1),
dictContentType));
{
size_t const ret = ZSTD_decompressDCtx(
@@ -67,17 +66,20 @@ static size_t roundTripTest(void *result, size_t resultCapacity,
int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
{
+ /* Give a random portion of src data to the producer, to use for
+ parameter generation. The rest will be used for (de)compression */
+ FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
+ size = FUZZ_dataProducer_reserveDataPrefix(producer);
+
size_t const rBufSize = size;
void* rBuf = malloc(rBufSize);
size_t cBufSize = ZSTD_compressBound(size);
- void* cBuf;
-
- seed = FUZZ_seed(&src, &size);
+ void *cBuf;
/* Half of the time fuzz with a 1 byte smaller output size.
* This will still succeed because we force the checksum to be disabled,
* giving us 4 bytes of overhead.
*/
- cBufSize -= FUZZ_rand32(&seed, 0, 1);
+ cBufSize -= FUZZ_dataProducer_uint32Range(producer, 0, 1);
cBuf = malloc(cBufSize);
if (!cctx) {
@@ -91,13 +93,14 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
{
size_t const result =
- roundTripTest(rBuf, rBufSize, cBuf, cBufSize, src, size);
+ roundTripTest(rBuf, rBufSize, cBuf, cBufSize, src, size, producer);
FUZZ_ZASSERT(result);
FUZZ_ASSERT_MSG(result == size, "Incorrect regenerated size");
FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!");
}
free(rBuf);
free(cBuf);
+ FUZZ_dataProducer_free(producer);
#ifndef STATEFUL_FUZZING
ZSTD_freeCCtx(cctx); cctx = NULL;
ZSTD_freeDCtx(dctx); dctx = NULL;
diff --git a/tests/fuzz/fuzz.h b/tests/fuzz/fuzz.h
index 8850025b0fd2..6d53aa6d5c7f 100644
--- a/tests/fuzz/fuzz.h
+++ b/tests/fuzz/fuzz.h
@@ -17,12 +17,6 @@
* test code paths which are only executed when contexts are reused.
* WARNING: Makes reproducing crashes much harder.
* Default: Not defined.
- * @param FUZZ_RNG_SEED_SIZE:
- * The number of bytes of the source to look at when constructing a seed
- * for the deterministic RNG. These bytes are discarded before passing
- * the data to zstd functions. Every fuzzer initializes the RNG exactly
- * once before doing anything else, even if it is unused.
- * Default: 4.
* @param DEBUGLEVEL:
* This is a parameter for the zstd library. Defining `DEBUGLEVEL=1`
* enables assert() statements in the zstd library. Higher levels enable
@@ -42,10 +36,6 @@
#ifndef FUZZ_H
#define FUZZ_H
-#ifndef FUZZ_RNG_SEED_SIZE
-# define FUZZ_RNG_SEED_SIZE 4
-#endif
-
#include <stddef.h>
#include <stdint.h>
diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py
index d993209a07ce..87f115afde99 100755
--- a/tests/fuzz/fuzz.py
+++ b/tests/fuzz/fuzz.py
@@ -24,21 +24,40 @@ def abs_join(a, *p):
return os.path.abspath(os.path.join(a, *p))
+class InputType(object):
+ RAW_DATA = 1
+ COMPRESSED_DATA = 2
+ DICTIONARY_DATA = 3
+
+
+class FrameType(object):
+ ZSTD = 1
+ BLOCK = 2
+
+
+class TargetInfo(object):
+ def __init__(self, input_type, frame_type=FrameType.ZSTD):
+ self.input_type = input_type
+ self.frame_type = frame_type
+
+
# Constants
FUZZ_DIR = os.path.abspath(os.path.dirname(__file__))
CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora')
-TARGETS = [
- 'simple_round_trip',
- 'stream_round_trip',
- 'block_round_trip',
- 'simple_decompress',
- 'stream_decompress',
- 'block_decompress',
- 'dictionary_round_trip',
- 'dictionary_decompress',
- 'zstd_frame_info',
- 'simple_compress',
-]
+TARGET_INFO = {
+ 'simple_round_trip': TargetInfo(InputType.RAW_DATA),
+ 'stream_round_trip': TargetInfo(InputType.RAW_DATA),
+ 'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK),
+ 'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA),
+ 'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA),
+ 'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK),
+ 'dictionary_round_trip': TargetInfo(InputType.RAW_DATA),
+ 'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA),
+ 'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA),
+ 'simple_compress': TargetInfo(InputType.RAW_DATA),
+ 'dictionary_loader': TargetInfo(InputType.DICTIONARY_DATA),
+}
+TARGETS = list(TARGET_INFO.keys())
ALL_TARGETS = TARGETS + ['all']
FUZZ_RNG_SEED_SIZE = 4
@@ -56,6 +75,7 @@ LIB_FUZZING_ENGINE = os.environ.get('LIB_FUZZING_ENGINE', 'libregression.a')
AFL_FUZZ = os.environ.get('AFL_FUZZ', 'afl-fuzz')
DECODECORPUS = os.environ.get('DECODECORPUS',
abs_join(FUZZ_DIR, '..', 'decodecorpus'))
+ZSTD = os.environ.get('ZSTD', abs_join(FUZZ_DIR, '..', '..', 'zstd'))
# Sanitizer environment variables
MSAN_EXTRA_CPPFLAGS = os.environ.get('MSAN_EXTRA_CPPFLAGS', '')
@@ -67,7 +87,7 @@ MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '')
def create(r):
d = os.path.abspath(r)
if not os.path.isdir(d):
- os.mkdir(d)
+ os.makedirs(d)
return d
@@ -158,7 +178,7 @@ def compiler_version(cc, cxx):
assert(b'clang' in cxx_version_bytes)
compiler = 'clang'
elif b'gcc' in cc_version_bytes:
- assert(b'gcc' in cxx_version_bytes)
+ assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes)
compiler = 'gcc'
if compiler is not None:
version_regex = b'([0-9])+\.([0-9])+\.([0-9])+'
@@ -643,7 +663,7 @@ def gen_parser(args):
parser.add_argument(
'--max-size-log',
type=int,
- default=13,
+ default=18,
help='Maximum sample size to generate')
parser.add_argument(
'--seed',
@@ -657,6 +677,11 @@ def gen_parser(args):
help="decodecorpus binary (default: $DECODECORPUS='{}')".format(
DECODECORPUS))
parser.add_argument(
+ '--zstd',
+ type=str,
+ default=ZSTD,
+ help="zstd binary (default: $ZSTD='{}')".format(ZSTD))
+ parser.add_argument(
'--fuzz-rng-seed-size',
type=int,
default=4,
@@ -690,46 +715,66 @@ def gen(args):
return 1
seed = create(args.seed)
- with tmpdir() as compressed:
- with tmpdir() as decompressed:
- cmd = [
- args.decodecorpus,
- '-n{}'.format(args.number),
- '-p{}/'.format(compressed),
- '-o{}'.format(decompressed),
+ with tmpdir() as compressed, tmpdir() as decompressed, tmpdir() as dict:
+ info = TARGET_INFO[args.TARGET]
+
+ if info.input_type == InputType.DICTIONARY_DATA:
+ number = max(args.number, 1000)
+ else:
+ number = args.number
+ cmd = [
+ args.decodecorpus,
+ '-n{}'.format(args.number),
+ '-p{}/'.format(compressed),
+ '-o{}'.format(decompressed),
+ ]
+
+ if info.frame_type == FrameType.BLOCK:
+ cmd += [
+ '--gen-blocks',
+ '--max-block-size-log={}'.format(min(args.max_size_log, 17))
]
+ else:
+ cmd += ['--max-content-size-log={}'.format(args.max_size_log)]
- if 'block_' in args.TARGET:
- cmd += [
- '--gen-blocks',
- '--max-block-size-log={}'.format(args.max_size_log)
+ print(' '.join(cmd))
+ subprocess.check_call(cmd)
+
+ if info.input_type == InputType.RAW_DATA:
+ print('using decompressed data in {}'.format(decompressed))
+ samples = decompressed
+ elif info.input_type == InputType.COMPRESSED_DATA:
+ print('using compressed data in {}'.format(compressed))
+ samples = compressed
+ else:
+ assert info.input_type == InputType.DICTIONARY_DATA
+ print('making dictionary data from {}'.format(decompressed))
+ samples = dict
+ min_dict_size_log = 9
+ max_dict_size_log = max(min_dict_size_log + 1, args.max_size_log)
+ for dict_size_log in range(min_dict_size_log, max_dict_size_log):
+ dict_size = 1 << dict_size_log
+ cmd = [
+ args.zstd,
+ '--train',
+ '-r', decompressed,
+ '--maxdict={}'.format(dict_size),
+ '-o', abs_join(dict, '{}.zstd-dict'.format(dict_size))
]
- else:
- cmd += ['--max-content-size-log={}'.format(args.max_size_log)]
-
- print(' '.join(cmd))
- subprocess.check_call(cmd)
-
- if '_round_trip' in args.TARGET:
- print('using decompressed data in {}'.format(decompressed))
- samples = decompressed
- elif '_decompress' in args.TARGET:
- print('using compressed data in {}'.format(compressed))
- samples = compressed
-
- # Copy the samples over and prepend the RNG seeds
- for name in os.listdir(samples):
- samplename = abs_join(samples, name)
- outname = abs_join(seed, name)
- rng_seed = os.urandom(args.fuzz_rng_seed_size)
- with open(samplename, 'rb') as sample:
- with open(outname, 'wb') as out:
- out.write(rng_seed)
- CHUNK_SIZE = 131072
+ print(' '.join(cmd))
+ subprocess.check_call(cmd)
+
+ # Copy the samples over and prepend the RNG seeds
+ for name in os.listdir(samples):
+ samplename = abs_join(samples, name)
+ outname = abs_join(seed, name)
+ with open(samplename, 'rb') as sample:
+ with open(outname, 'wb') as out:
+ CHUNK_SIZE = 131072
+ chunk = sample.read(CHUNK_SIZE)
+ while len(chunk) > 0:
+ out.write(chunk)
chunk = sample.read(CHUNK_SIZE)
- while len(chunk) > 0:
- out.write(chunk)
- chunk = sample.read(CHUNK_SIZE)
return 0
diff --git a/tests/fuzz/fuzz_data_producer.c b/tests/fuzz/fuzz_data_producer.c
new file mode 100644
index 000000000000..b465337eac3b
--- /dev/null
+++ b/tests/fuzz/fuzz_data_producer.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ */
+
+#include "fuzz_data_producer.h"
+
+struct FUZZ_dataProducer_s{
+ const uint8_t *data;
+ size_t size;
+};
+
+FUZZ_dataProducer_t *FUZZ_dataProducer_create(const uint8_t *data, size_t size) {
+ FUZZ_dataProducer_t *producer = malloc(sizeof(FUZZ_dataProducer_t));
+
+ FUZZ_ASSERT(producer != NULL);
+
+ producer->data = data;
+ producer->size = size;
+ return producer;
+}
+
+void FUZZ_dataProducer_free(FUZZ_dataProducer_t *producer) { free(producer); }
+
+uint32_t FUZZ_dataProducer_uint32Range(FUZZ_dataProducer_t *producer, uint32_t min,
+ uint32_t max) {
+ FUZZ_ASSERT(min <= max);
+
+ uint32_t range = max - min;
+ uint32_t rolling = range;
+ uint32_t result = 0;
+
+ while (rolling > 0 && producer->size > 0) {
+ uint8_t next = *(producer->data + producer->size - 1);
+ producer->size -= 1;
+ result = (result << 8) | next;
+ rolling >>= 8;
+ }
+
+ if (range == 0xffffffff) {
+ return result;
+ }
+
+ return min + result % (range + 1);
+}
+
+uint32_t FUZZ_dataProducer_uint32(FUZZ_dataProducer_t *producer) {
+ return FUZZ_dataProducer_uint32Range(producer, 0, 0xffffffff);
+}
+
+int32_t FUZZ_dataProducer_int32Range(FUZZ_dataProducer_t *producer,
+ int32_t min, int32_t max)
+{
+ FUZZ_ASSERT(min <= max);
+
+ if (min < 0)
+ return (int)FUZZ_dataProducer_uint32Range(producer, 0, max - min) + min;
+
+ return FUZZ_dataProducer_uint32Range(producer, min, max);
+}
+
+size_t FUZZ_dataProducer_remainingBytes(FUZZ_dataProducer_t *producer){
+ return producer->size;
+}
+
+size_t FUZZ_dataProducer_contract(FUZZ_dataProducer_t *producer, size_t newSize)
+{
+ newSize = newSize > producer->size ? producer->size : newSize;
+
+ size_t remaining = producer->size - newSize;
+ producer->data = producer->data + remaining;
+ producer->size = newSize;
+ return remaining;
+}
+
+size_t FUZZ_dataProducer_reserveDataPrefix(FUZZ_dataProducer_t *producer)
+{
+ size_t producerSliceSize = FUZZ_dataProducer_uint32Range(
+ producer, 0, producer->size);
+ return FUZZ_dataProducer_contract(producer, producerSliceSize);
+}
diff --git a/tests/fuzz/fuzz_data_producer.h b/tests/fuzz/fuzz_data_producer.h
new file mode 100644
index 000000000000..f2b609677d7c
--- /dev/null
+++ b/tests/fuzz/fuzz_data_producer.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ */
+
+/**
+ * Helper APIs for generating random data from input data stream.
+ The producer reads bytes from the end of the input and appends them together
+ to generate a random number in the requested range. If it runs out of input
+ data, it will keep returning the same value (min) over and over again.
+
+ */
+
+#ifndef FUZZ_DATA_PRODUCER_H
+#define FUZZ_DATA_PRODUCER_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "fuzz_helpers.h"
+
+/* Struct used for maintaining the state of the data */
+typedef struct FUZZ_dataProducer_s FUZZ_dataProducer_t;
+
+/* Returns a data producer state struct. Use for producer initialization. */
+FUZZ_dataProducer_t *FUZZ_dataProducer_create(const uint8_t *data, size_t size);
+
+/* Frees the data producer */
+void FUZZ_dataProducer_free(FUZZ_dataProducer_t *producer);
+
+/* Returns value between [min, max] */
+uint32_t FUZZ_dataProducer_uint32Range(FUZZ_dataProducer_t *producer, uint32_t min,
+ uint32_t max);
+
+/* Returns a uint32 value */
+uint32_t FUZZ_dataProducer_uint32(FUZZ_dataProducer_t *producer);
+
+/* Returns a signed value between [min, max] */
+int32_t FUZZ_dataProducer_int32Range(FUZZ_dataProducer_t *producer,
+ int32_t min, int32_t max);
+
+/* Returns the size of the remaining bytes of data in the producer */
+size_t FUZZ_dataProducer_remainingBytes(FUZZ_dataProducer_t *producer);
+
+/* Restricts the producer to only the last newSize bytes of data.
+If newSize > current data size, nothing happens. Returns the number of bytes
+the producer won't use anymore, after contracting. */
+size_t FUZZ_dataProducer_contract(FUZZ_dataProducer_t *producer, size_t newSize);
+
+/* Restricts the producer to use only the last X bytes of data, where X is
+ a random number in the interval [0, data_size]. Returns the size of the
+ remaining data the producer won't use anymore (the prefix). */
+size_t FUZZ_dataProducer_reserveDataPrefix(FUZZ_dataProducer_t *producer);
+#endif // FUZZ_DATA_PRODUCER_H
diff --git a/tests/fuzz/fuzz_helpers.h b/tests/fuzz/fuzz_helpers.h
index 0cf79d0d7ce9..3de917fd1263 100644
--- a/tests/fuzz/fuzz_helpers.h
+++ b/tests/fuzz/fuzz_helpers.h
@@ -14,6 +14,7 @@
#ifndef FUZZ_HELPERS_H
#define FUZZ_HELPERS_H
+#include "debug.h"
#include "fuzz.h"
#include "xxhash.h"
#include "zstd.h"
@@ -54,37 +55,6 @@ extern "C" {
#define FUZZ_STATIC static
#endif
-/**
- * Deterministically constructs a seed based on the fuzz input.
- * Consumes up to the first FUZZ_RNG_SEED_SIZE bytes of the input.
- */
-FUZZ_STATIC uint32_t FUZZ_seed(uint8_t const **src, size_t* size) {
- uint8_t const *data = *src;
- size_t const toHash = MIN(FUZZ_RNG_SEED_SIZE, *size);
- *size -= toHash;
- *src += toHash;
- return XXH32(data, toHash, 0);
-}
-
-#define FUZZ_rotl32(x, r) (((x) << (r)) | ((x) >> (32 - (r))))
-
-FUZZ_STATIC uint32_t FUZZ_rand(uint32_t *state) {
- static const uint32_t prime1 = 2654435761U;
- static const uint32_t prime2 = 2246822519U;
- uint32_t rand32 = *state;
- rand32 *= prime1;
- rand32 += prime2;
- rand32 = FUZZ_rotl32(rand32, 13);
- *state = rand32;
- return rand32 >> 5;
-}
-
-/* Returns a random numer in the range [min, max]. */
-FUZZ_STATIC uint32_t FUZZ_rand32(uint32_t *state, uint32_t min, uint32_t max) {
- uint32_t random = FUZZ_rand(state);
- return min + (random % (max - min + 1));
-}
-
#ifdef __cplusplus
}
#endif
diff --git a/tests/fuzz/regression_driver.c b/tests/fuzz/regression_driver.c
index 658c685f4f89..e3ebcd5ced74 100644
--- a/tests/fuzz/regression_driver.c
+++ b/tests/fuzz/regression_driver.c
@@ -36,6 +36,7 @@ int main(int argc, char const **argv) {
fprintf(stderr, "WARNING: No files passed to %s\n", argv[0]);
for (i = 0; i < numFiles; ++i) {
char const *fileName = files[i];
+ DEBUGLOG(3, "Running %s", fileName);
size_t const fileSize = UTIL_getFileSize(fileName);
size_t readSize;
FILE *file;
diff --git a/tests/fuzz/simple_compress.c b/tests/fuzz/simple_compress.c
index aaed4035750c..487be3a354ce 100644
--- a/tests/fuzz/simple_compress.c
+++ b/tests/fuzz/simple_compress.c
@@ -18,28 +18,33 @@
#include <stdio.h>
#include "fuzz_helpers.h"
#include "zstd.h"
+#include "zstd_helpers.h"
+#include "fuzz_data_producer.h"
static ZSTD_CCtx *cctx = NULL;
int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
{
- uint32_t seed = FUZZ_seed(&src, &size);
+ /* Give a random portion of src data to the producer, to use for
+ parameter generation. The rest will be used for (de)compression */
+ FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
+ size = FUZZ_dataProducer_reserveDataPrefix(producer);
+
size_t const maxSize = ZSTD_compressBound(size);
- int i;
+ size_t const bufSize = FUZZ_dataProducer_uint32Range(producer, 0, maxSize);
+
+ int const cLevel = FUZZ_dataProducer_int32Range(producer, kMinClevel, kMaxClevel);
+
if (!cctx) {
cctx = ZSTD_createCCtx();
FUZZ_ASSERT(cctx);
}
- /* Run it 10 times over 10 output sizes. Reuse the context. */
- for (i = 0; i < 10; ++i) {
- int const level = (int)FUZZ_rand32(&seed, 0, 19 + 3) - 3; /* [-3, 19] */
- size_t const bufSize = FUZZ_rand32(&seed, 0, maxSize);
- void* rBuf = malloc(bufSize);
- FUZZ_ASSERT(rBuf);
- ZSTD_compressCCtx(cctx, rBuf, bufSize, src, size, level);
- free(rBuf);
- }
+ void *rBuf = malloc(bufSize);
+ FUZZ_ASSERT(rBuf);
+ ZSTD_compressCCtx(cctx, rBuf, bufSize, src, size, cLevel);
+ free(rBuf);
+ FUZZ_dataProducer_free(producer);
#ifndef STATEFUL_FUZZING
ZSTD_freeCCtx(cctx); cctx = NULL;
#endif
diff --git a/tests/fuzz/simple_decompress.c b/tests/fuzz/simple_decompress.c
index af3f302bb092..6182746a1d16 100644
--- a/tests/fuzz/simple_decompress.c
+++ b/tests/fuzz/simple_decompress.c
@@ -17,26 +17,30 @@
#include <stdio.h>
#include "fuzz_helpers.h"
#include "zstd.h"
+#include "fuzz_data_producer.h"
static ZSTD_DCtx *dctx = NULL;
int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
{
+ /* Give a random portion of src data to the producer, to use for
+ parameter generation. The rest will be used for (de)compression */
+ FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
+ size = FUZZ_dataProducer_reserveDataPrefix(producer);
- uint32_t seed = FUZZ_seed(&src, &size);
- int i;
if (!dctx) {
dctx = ZSTD_createDCtx();
FUZZ_ASSERT(dctx);
}
- /* Run it 10 times over 10 output sizes. Reuse the context. */
- for (i = 0; i < 10; ++i) {
- size_t const bufSize = FUZZ_rand32(&seed, 0, 2 * size);
- void* rBuf = malloc(bufSize);
- FUZZ_ASSERT(rBuf);
- ZSTD_decompressDCtx(dctx, rBuf, bufSize, src, size);
- free(rBuf);
- }
+
+ size_t const bufSize = FUZZ_dataProducer_uint32Range(producer, 0, 10 * size);
+ void *rBuf = malloc(bufSize);
+ FUZZ_ASSERT(rBuf);
+
+ ZSTD_decompressDCtx(dctx, rBuf, bufSize, src, size);
+ free(rBuf);
+
+ FUZZ_dataProducer_free(producer);
#ifndef STATEFUL_FUZZING
ZSTD_freeDCtx(dctx); dctx = NULL;
diff --git a/tests/fuzz/simple_round_trip.c b/tests/fuzz/simple_round_trip.c
index 7e3b6609822f..2d1d0598a14b 100644
--- a/tests/fuzz/simple_round_trip.c
+++ b/tests/fuzz/simple_round_trip.c
@@ -20,23 +20,23 @@
#include <string.h>
#include "fuzz_helpers.h"
#include "zstd_helpers.h"
-
-static const int kMaxClevel = 19;
+#include "fuzz_data_producer.h"
static ZSTD_CCtx *cctx = NULL;
static ZSTD_DCtx *dctx = NULL;
-static uint32_t seed;
static size_t roundTripTest(void *result, size_t resultCapacity,
void *compressed, size_t compressedCapacity,
- const void *src, size_t srcSize)
+ const void *src, size_t srcSize,
+ FUZZ_dataProducer_t *producer)
{
size_t cSize;
- if (FUZZ_rand(&seed) & 1) {
- FUZZ_setRandomParameters(cctx, srcSize, &seed);
+ if (FUZZ_dataProducer_uint32Range(producer, 0, 1)) {
+ FUZZ_setRandomParameters(cctx, srcSize, producer);
cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize);
} else {
- int const cLevel = FUZZ_rand(&seed) % kMaxClevel;
+ int const cLevel = FUZZ_dataProducer_int32Range(producer, kMinClevel, kMaxClevel);
+
cSize = ZSTD_compressCCtx(
cctx, compressed, compressedCapacity, src, srcSize, cLevel);
}
@@ -51,12 +51,17 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
size_t cBufSize = ZSTD_compressBound(size);
void* cBuf;
- seed = FUZZ_seed(&src, &size);
+ /* Give a random portion of src data to the producer, to use for
+ parameter generation. The rest will be used for (de)compression */
+ FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
+ size = FUZZ_dataProducer_reserveDataPrefix(producer);
+
/* Half of the time fuzz with a 1 byte smaller output size.
* This will still succeed because we don't use a dictionary, so the dictID
* field is empty, giving us 4 bytes of overhead.
*/
- cBufSize -= FUZZ_rand32(&seed, 0, 1);
+ cBufSize -= FUZZ_dataProducer_uint32Range(producer, 0, 1);
+
cBuf = malloc(cBufSize);
FUZZ_ASSERT(cBuf && rBuf);
@@ -72,13 +77,14 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
{
size_t const result =
- roundTripTest(rBuf, rBufSize, cBuf, cBufSize, src, size);
+ roundTripTest(rBuf, rBufSize, cBuf, cBufSize, src, size, producer);
FUZZ_ZASSERT(result);
FUZZ_ASSERT_MSG(result == size, "Incorrect regenerated size");
FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!");
}
free(rBuf);
free(cBuf);
+ FUZZ_dataProducer_free(producer);
#ifndef STATEFUL_FUZZING
ZSTD_freeCCtx(cctx); cctx = NULL;
ZSTD_freeDCtx(dctx); dctx = NULL;
diff --git a/tests/fuzz/stream_decompress.c b/tests/fuzz/stream_decompress.c
index 68e120d7ef6d..c71cc9d3ec6f 100644
--- a/tests/fuzz/stream_decompress.c
+++ b/tests/fuzz/stream_decompress.c
@@ -19,6 +19,7 @@
#include <stdio.h>
#include "fuzz_helpers.h"
#include "zstd.h"
+#include "fuzz_data_producer.h"
static size_t const kBufSize = ZSTD_BLOCKSIZE_MAX;
@@ -26,22 +27,23 @@ static ZSTD_DStream *dstream = NULL;
static void* buf = NULL;
uint32_t seed;
-static ZSTD_outBuffer makeOutBuffer(void)
+static ZSTD_outBuffer makeOutBuffer(FUZZ_dataProducer_t *producer)
{
ZSTD_outBuffer buffer = { buf, 0, 0 };
- buffer.size = (FUZZ_rand(&seed) % kBufSize) + 1;
+ buffer.size = (FUZZ_dataProducer_uint32Range(producer, 1, kBufSize));
FUZZ_ASSERT(buffer.size <= kBufSize);
return buffer;
}
-static ZSTD_inBuffer makeInBuffer(const uint8_t **src, size_t *size)
+static ZSTD_inBuffer makeInBuffer(const uint8_t **src, size_t *size,
+ FUZZ_dataProducer_t *producer)
{
ZSTD_inBuffer buffer = { *src, 0, 0 };
FUZZ_ASSERT(*size > 0);
- buffer.size = (FUZZ_rand(&seed) % *size) + 1;
+ buffer.size = (FUZZ_dataProducer_uint32Range(producer, 1, *size));
FUZZ_ASSERT(buffer.size <= *size);
*src += buffer.size;
*size -= buffer.size;
@@ -51,13 +53,16 @@ static ZSTD_inBuffer makeInBuffer(const uint8_t **src, size_t *size)
int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
{
- seed = FUZZ_seed(&src, &size);
+ /* Give a random portion of src data to the producer, to use for
+ parameter generation. The rest will be used for (de)compression */
+ FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
+ size = FUZZ_dataProducer_reserveDataPrefix(producer);
/* Allocate all buffers and contexts if not already allocated */
if (!buf) {
buf = malloc(kBufSize);
- FUZZ_ASSERT(buf);
- }
+ FUZZ_ASSERT(buf);
+ }
if (!dstream) {
dstream = ZSTD_createDStream();
@@ -67,9 +72,9 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
}
while (size > 0) {
- ZSTD_inBuffer in = makeInBuffer(&src, &size);
+ ZSTD_inBuffer in = makeInBuffer(&src, &size, producer);
while (in.pos != in.size) {
- ZSTD_outBuffer out = makeOutBuffer();
+ ZSTD_outBuffer out = makeOutBuffer(producer);
size_t const rc = ZSTD_decompressStream(dstream, &out, &in);
if (ZSTD_isError(rc)) goto error;
}
@@ -79,5 +84,6 @@ error:
#ifndef STATEFUL_FUZZING
ZSTD_freeDStream(dstream); dstream = NULL;
#endif
+ FUZZ_dataProducer_free(producer);
return 0;
}
diff --git a/tests/fuzz/stream_round_trip.c b/tests/fuzz/stream_round_trip.c
index d13c2dbe7e00..703b11713364 100644
--- a/tests/fuzz/stream_round_trip.c
+++ b/tests/fuzz/stream_round_trip.c
@@ -20,31 +20,33 @@
#include <string.h>
#include "fuzz_helpers.h"
#include "zstd_helpers.h"
+#include "fuzz_data_producer.h"
ZSTD_CCtx *cctx = NULL;
static ZSTD_DCtx *dctx = NULL;
static uint8_t* cBuf = NULL;
static uint8_t* rBuf = NULL;
static size_t bufSize = 0;
-static uint32_t seed;
-static ZSTD_outBuffer makeOutBuffer(uint8_t *dst, size_t capacity)
+static ZSTD_outBuffer makeOutBuffer(uint8_t *dst, size_t capacity,
+ FUZZ_dataProducer_t *producer)
{
ZSTD_outBuffer buffer = { dst, 0, 0 };
FUZZ_ASSERT(capacity > 0);
- buffer.size = (FUZZ_rand(&seed) % capacity) + 1;
+ buffer.size = (FUZZ_dataProducer_uint32Range(producer, 1, capacity));
FUZZ_ASSERT(buffer.size <= capacity);
return buffer;
}
-static ZSTD_inBuffer makeInBuffer(const uint8_t **src, size_t *size)
+static ZSTD_inBuffer makeInBuffer(const uint8_t **src, size_t *size,
+ FUZZ_dataProducer_t *producer)
{
ZSTD_inBuffer buffer = { *src, 0, 0 };
FUZZ_ASSERT(*size > 0);
- buffer.size = (FUZZ_rand(&seed) % *size) + 1;
+ buffer.size = (FUZZ_dataProducer_uint32Range(producer, 1, *size));
FUZZ_ASSERT(buffer.size <= *size);
*src += buffer.size;
*size -= buffer.size;
@@ -53,23 +55,24 @@ static ZSTD_inBuffer makeInBuffer(const uint8_t **src, size_t *size)
}
static size_t compress(uint8_t *dst, size_t capacity,
- const uint8_t *src, size_t srcSize)
+ const uint8_t *src, size_t srcSize,
+ FUZZ_dataProducer_t *producer)
{
size_t dstSize = 0;
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
- FUZZ_setRandomParameters(cctx, srcSize, &seed);
+ FUZZ_setRandomParameters(cctx, srcSize, producer);
while (srcSize > 0) {
- ZSTD_inBuffer in = makeInBuffer(&src, &srcSize);
+ ZSTD_inBuffer in = makeInBuffer(&src, &srcSize, producer);
/* Mode controls the action. If mode == -1 we pick a new mode */
int mode = -1;
while (in.pos < in.size || mode != -1) {
- ZSTD_outBuffer out = makeOutBuffer(dst, capacity);
+ ZSTD_outBuffer out = makeOutBuffer(dst, capacity, producer);
/* Previous action finished, pick a new mode. */
- if (mode == -1) mode = FUZZ_rand(&seed) % 10;
+ if (mode == -1) mode = FUZZ_dataProducer_uint32Range(producer, 0, 9);
switch (mode) {
- case 0: /* fall-though */
- case 1: /* fall-though */
+ case 0: /* fall-through */
+ case 1: /* fall-through */
case 2: {
size_t const ret =
ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush);
@@ -85,9 +88,9 @@ static size_t compress(uint8_t *dst, size_t capacity,
/* Reset the compressor when the frame is finished */
if (ret == 0) {
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
- if ((FUZZ_rand(&seed) & 7) == 0) {
+ if (FUZZ_dataProducer_uint32Range(producer, 0, 7) == 0) {
size_t const remaining = in.size - in.pos;
- FUZZ_setRandomParameters(cctx, remaining, &seed);
+ FUZZ_setRandomParameters(cctx, remaining, producer);
}
mode = -1;
}
@@ -107,7 +110,7 @@ static size_t compress(uint8_t *dst, size_t capacity,
}
for (;;) {
ZSTD_inBuffer in = {NULL, 0, 0};
- ZSTD_outBuffer out = makeOutBuffer(dst, capacity);
+ ZSTD_outBuffer out = makeOutBuffer(dst, capacity, producer);
size_t const ret = ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end);
FUZZ_ZASSERT(ret);
@@ -124,8 +127,12 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
{
size_t neededBufSize;
- seed = FUZZ_seed(&src, &size);
- neededBufSize = ZSTD_compressBound(size) * 2;
+ /* Give a random portion of src data to the producer, to use for
+ parameter generation. The rest will be used for (de)compression */
+ FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
+ size = FUZZ_dataProducer_reserveDataPrefix(producer);
+
+ neededBufSize = ZSTD_compressBound(size) * 15;
/* Allocate all buffers and contexts if not already allocated */
if (neededBufSize > bufSize) {
@@ -146,7 +153,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
}
{
- size_t const cSize = compress(cBuf, neededBufSize, src, size);
+ size_t const cSize = compress(cBuf, neededBufSize, src, size, producer);
size_t const rSize =
ZSTD_decompressDCtx(dctx, rBuf, neededBufSize, cBuf, cSize);
FUZZ_ZASSERT(rSize);
@@ -154,6 +161,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!");
}
+ FUZZ_dataProducer_free(producer);
#ifndef STATEFUL_FUZZING
ZSTD_freeCCtx(cctx); cctx = NULL;
ZSTD_freeDCtx(dctx); dctx = NULL;
diff --git a/tests/fuzz/zstd_frame_info.c b/tests/fuzz/zstd_frame_info.c
index 7512d5f493b4..359cf128f5b8 100644
--- a/tests/fuzz/zstd_frame_info.c
+++ b/tests/fuzz/zstd_frame_info.c
@@ -21,10 +21,6 @@
int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
{
ZSTD_frameHeader zfh;
- /* Consume the seed to be compatible with the corpora of other decompression
- * fuzzers.
- */
- FUZZ_seed(&src, &size);
/* You can fuzz any helper functions here that are fast, and take zstd
* compressed data as input. E.g. don't expect the input to be a dictionary,
* so don't fuzz ZSTD_getDictID_fromDict().
diff --git a/tests/fuzz/zstd_helpers.c b/tests/fuzz/zstd_helpers.c
index 9dff2895a9c0..90bf1a156787 100644
--- a/tests/fuzz/zstd_helpers.c
+++ b/tests/fuzz/zstd_helpers.c
@@ -17,53 +17,56 @@
#include "zstd.h"
#include "zdict.h"
+const int kMinClevel = -3;
+const int kMaxClevel = 19;
+
static void set(ZSTD_CCtx *cctx, ZSTD_cParameter param, int value)
{
FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, param, value));
}
static void setRand(ZSTD_CCtx *cctx, ZSTD_cParameter param, unsigned min,
- unsigned max, uint32_t *state) {
- unsigned const value = FUZZ_rand32(state, min, max);
+ unsigned max, FUZZ_dataProducer_t *producer) {
+ unsigned const value = FUZZ_dataProducer_uint32Range(producer, min, max);
set(cctx, param, value);
}
-ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, uint32_t *state)
+ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, FUZZ_dataProducer_t *producer)
{
/* Select compression parameters */
ZSTD_compressionParameters cParams;
- cParams.windowLog = FUZZ_rand32(state, ZSTD_WINDOWLOG_MIN, 15);
- cParams.hashLog = FUZZ_rand32(state, ZSTD_HASHLOG_MIN, 15);
- cParams.chainLog = FUZZ_rand32(state, ZSTD_CHAINLOG_MIN, 16);
- cParams.searchLog = FUZZ_rand32(state, ZSTD_SEARCHLOG_MIN, 9);
- cParams.minMatch = FUZZ_rand32(state, ZSTD_MINMATCH_MIN,
+ cParams.windowLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, 15);
+ cParams.hashLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_HASHLOG_MIN, 15);
+ cParams.chainLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_CHAINLOG_MIN, 16);
+ cParams.searchLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_SEARCHLOG_MIN, 9);
+ cParams.minMatch = FUZZ_dataProducer_uint32Range(producer, ZSTD_MINMATCH_MIN,
ZSTD_MINMATCH_MAX);
- cParams.targetLength = FUZZ_rand32(state, 0, 512);
- cParams.strategy = FUZZ_rand32(state, ZSTD_STRATEGY_MIN, ZSTD_STRATEGY_MAX);
+ cParams.targetLength = FUZZ_dataProducer_uint32Range(producer, 0, 512);
+ cParams.strategy = FUZZ_dataProducer_uint32Range(producer, ZSTD_STRATEGY_MIN, ZSTD_STRATEGY_MAX);
return ZSTD_adjustCParams(cParams, srcSize, 0);
}
-ZSTD_frameParameters FUZZ_randomFParams(uint32_t *state)
+ZSTD_frameParameters FUZZ_randomFParams(FUZZ_dataProducer_t *producer)
{
/* Select frame parameters */
ZSTD_frameParameters fParams;
- fParams.contentSizeFlag = FUZZ_rand32(state, 0, 1);
- fParams.checksumFlag = FUZZ_rand32(state, 0, 1);
- fParams.noDictIDFlag = FUZZ_rand32(state, 0, 1);
+ fParams.contentSizeFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1);
+ fParams.checksumFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1);
+ fParams.noDictIDFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1);
return fParams;
}
-ZSTD_parameters FUZZ_randomParams(size_t srcSize, uint32_t *state)
+ZSTD_parameters FUZZ_randomParams(size_t srcSize, FUZZ_dataProducer_t *producer)
{
ZSTD_parameters params;
- params.cParams = FUZZ_randomCParams(srcSize, state);
- params.fParams = FUZZ_randomFParams(state);
+ params.cParams = FUZZ_randomCParams(srcSize, producer);
+ params.fParams = FUZZ_randomFParams(producer);
return params;
}
-void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, uint32_t *state)
+void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer_t *producer)
{
- ZSTD_compressionParameters cParams = FUZZ_randomCParams(srcSize, state);
+ ZSTD_compressionParameters cParams = FUZZ_randomCParams(srcSize, producer);
set(cctx, ZSTD_c_windowLog, cParams.windowLog);
set(cctx, ZSTD_c_hashLog, cParams.hashLog);
set(cctx, ZSTD_c_chainLog, cParams.chainLog);
@@ -72,27 +75,30 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, uint32_t *state)
set(cctx, ZSTD_c_targetLength, cParams.targetLength);
set(cctx, ZSTD_c_strategy, cParams.strategy);
/* Select frame parameters */
- setRand(cctx, ZSTD_c_contentSizeFlag, 0, 1, state);
- setRand(cctx, ZSTD_c_checksumFlag, 0, 1, state);
- setRand(cctx, ZSTD_c_dictIDFlag, 0, 1, state);
+ setRand(cctx, ZSTD_c_contentSizeFlag, 0, 1, producer);
+ setRand(cctx, ZSTD_c_checksumFlag, 0, 1, producer);
+ setRand(cctx, ZSTD_c_dictIDFlag, 0, 1, producer);
/* Select long distance matching parameters */
- setRand(cctx, ZSTD_c_enableLongDistanceMatching, 0, 1, state);
- setRand(cctx, ZSTD_c_ldmHashLog, ZSTD_HASHLOG_MIN, 16, state);
+ setRand(cctx, ZSTD_c_enableLongDistanceMatching, 0, 1, producer);
+ setRand(cctx, ZSTD_c_ldmHashLog, ZSTD_HASHLOG_MIN, 16, producer);
setRand(cctx, ZSTD_c_ldmMinMatch, ZSTD_LDM_MINMATCH_MIN,
- ZSTD_LDM_MINMATCH_MAX, state);
+ ZSTD_LDM_MINMATCH_MAX, producer);
setRand(cctx, ZSTD_c_ldmBucketSizeLog, 0, ZSTD_LDM_BUCKETSIZELOG_MAX,
- state);
+ producer);
setRand(cctx, ZSTD_c_ldmHashRateLog, ZSTD_LDM_HASHRATELOG_MIN,
- ZSTD_LDM_HASHRATELOG_MAX, state);
+ ZSTD_LDM_HASHRATELOG_MAX, producer);
/* Set misc parameters */
- setRand(cctx, ZSTD_c_nbWorkers, 0, 2, state);
- setRand(cctx, ZSTD_c_rsyncable, 0, 1, state);
- setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, state);
- setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, state);
- setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, state);
+ setRand(cctx, ZSTD_c_nbWorkers, 0, 2, producer);
+ setRand(cctx, ZSTD_c_rsyncable, 0, 1, producer);
+ setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, producer);
+ setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, producer);
+ setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, producer);
+ if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) {
+ setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, producer);
+ }
}
-FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, uint32_t *state)
+FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, FUZZ_dataProducer_t *producer)
{
size_t const dictSize = MAX(srcSize / 8, 1024);
size_t const totalSampleSize = dictSize * 11;
@@ -107,7 +113,7 @@ FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, uint32_t *state)
for (sample = 0; sample < nbSamples; ++sample) {
size_t const remaining = totalSampleSize - pos;
- size_t const offset = FUZZ_rand32(state, 0, MAX(srcSize, 1) - 1);
+ size_t const offset = FUZZ_dataProducer_uint32Range(producer, 0, MAX(srcSize, 1) - 1);
size_t const limit = MIN(srcSize - offset, remaining);
size_t const toCopy = MIN(limit, remaining / (nbSamples - sample));
memcpy(samples + pos, src + offset, toCopy);
diff --git a/tests/fuzz/zstd_helpers.h b/tests/fuzz/zstd_helpers.h
index 457e6e995f0d..2210bcaf8f54 100644
--- a/tests/fuzz/zstd_helpers.h
+++ b/tests/fuzz/zstd_helpers.h
@@ -17,17 +17,21 @@
#define ZSTD_STATIC_LINKING_ONLY
#include "zstd.h"
+#include "fuzz_data_producer.h"
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
-void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, uint32_t *state);
+extern const int kMinClevel;
+extern const int kMaxClevel;
-ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, uint32_t *state);
-ZSTD_frameParameters FUZZ_randomFParams(uint32_t *state);
-ZSTD_parameters FUZZ_randomParams(size_t srcSize, uint32_t *state);
+void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer_t *producer);
+
+ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, FUZZ_dataProducer_t *producer);
+ZSTD_frameParameters FUZZ_randomFParams(FUZZ_dataProducer_t *producer);
+ZSTD_parameters FUZZ_randomParams(size_t srcSize, FUZZ_dataProducer_t *producer);
typedef struct {
void* buff;
@@ -38,8 +42,7 @@ typedef struct {
* NOTE: Don't use this to train production dictionaries, it is only optimized
* for speed, and doesn't care about dictionary quality.
*/
-FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, uint32_t *state);
-
+FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, FUZZ_dataProducer_t *producer);
#ifdef __cplusplus
}
diff --git a/tests/fuzzer.c b/tests/fuzzer.c
index f42de9ed55c3..88f3b83f834c 100644
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@@ -123,9 +123,10 @@ static U32 FUZ_highbit32(U32 v32)
exit(1); \
} }
-#define CHECK_V(var, fn) size_t const var = fn; if (ZSTD_isError(var)) goto _output_error
-#define CHECK(fn) { CHECK_V(err, fn); }
-#define CHECKPLUS(var, fn, more) { CHECK_V(var, fn); more; }
+#define CHECK_VAR(var, fn) var = fn; if (ZSTD_isError(var)) { DISPLAYLEVEL(1, "%s : fails : %s \n", #fn, ZSTD_getErrorName(var)); goto _output_error; }
+#define CHECK_NEWV(var, fn) size_t const CHECK_VAR(var, fn)
+#define CHECK(fn) { CHECK_NEWV(err, fn); }
+#define CHECKPLUS(var, fn, more) { CHECK_NEWV(var, fn); more; }
#define CHECK_OP(op, lhs, rhs) { \
if (!((lhs) op (rhs))) { \
@@ -231,7 +232,7 @@ static int FUZ_mallocTests_internal(unsigned seed, double compressibility, unsig
/* advanced MT API test */
if (part <= 3)
- { unsigned nbThreads;
+ { int nbThreads;
for (nbThreads=1; nbThreads<=4; nbThreads++) {
int compressionLevel;
for (compressionLevel=1; compressionLevel<=6; compressionLevel++) {
@@ -242,7 +243,7 @@ static int FUZ_mallocTests_internal(unsigned seed, double compressibility, unsig
CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, nbThreads) );
CHECK_Z( ZSTD_compress2(cctx, outBuffer, outSize, inBuffer, inSize) );
ZSTD_freeCCtx(cctx);
- DISPLAYLEVEL(3, "compress_generic,-T%u,end level %i : ",
+ DISPLAYLEVEL(3, "compress_generic,-T%i,end level %i : ",
nbThreads, compressionLevel);
FUZ_displayMallocStats(malcount);
} } }
@@ -303,11 +304,33 @@ static int FUZ_mallocTests(unsigned seed, double compressibility, unsigned part)
#endif
+static void FUZ_decodeSequences(BYTE* dst, ZSTD_Sequence* seqs, size_t seqsSize, BYTE* src, size_t size)
+{
+ size_t i;
+ size_t j;
+ for(i = 0; i < seqsSize - 1; ++i) {
+ assert(dst + seqs[i].litLength + seqs[i].matchLength < dst + size);
+ assert(src + seqs[i].litLength + seqs[i].matchLength < src + size);
+
+ memcpy(dst, src, seqs[i].litLength);
+ dst += seqs[i].litLength;
+ src += seqs[i].litLength;
+ size -= seqs[i].litLength;
+
+ for (j = 0; j < seqs[i].matchLength; ++j)
+ dst[j] = dst[j - seqs[i].offset];
+ dst += seqs[i].matchLength;
+ src += seqs[i].matchLength;
+ size -= seqs[i].matchLength;
+ }
+ memcpy(dst, src, size);
+}
+
/*=============================================
* Unit tests
=============================================*/
-static int basicUnitTests(U32 seed, double compressibility)
+static int basicUnitTests(U32 const seed, double compressibility)
{
size_t const CNBuffSize = 5 MB;
void* const CNBuffer = malloc(CNBuffSize);
@@ -345,10 +368,9 @@ static int basicUnitTests(U32 seed, double compressibility)
DISPLAYLEVEL(3, "test%3u : compress %u bytes : ", testNb++, (unsigned)CNBuffSize);
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
if (cctx==NULL) goto _output_error;
- CHECKPLUS(r, ZSTD_compressCCtx(cctx,
+ CHECK_VAR(cSize, ZSTD_compressCCtx(cctx,
compressedBuffer, compressedBufferSize,
- CNBuffer, CNBuffSize, 1),
- cSize=r );
+ CNBuffer, CNBuffSize, 1) );
DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/CNBuffSize*100);
DISPLAYLEVEL(3, "test%3i : size of cctx for level 1 : ", testNb++);
@@ -401,7 +423,7 @@ static int basicUnitTests(U32 seed, double compressibility)
DISPLAYLEVEL(3, "test%3i : check decompressed result : ", testNb++);
{ size_t u;
for (u=0; u<CNBuffSize; u++) {
- if (((BYTE*)decodedBuffer)[u] != ((BYTE*)CNBuffer)[u]) goto _output_error;;
+ if (((BYTE*)decodedBuffer)[u] != ((BYTE*)CNBuffer)[u]) goto _output_error;
} }
DISPLAYLEVEL(3, "OK \n");
@@ -450,12 +472,11 @@ static int basicUnitTests(U32 seed, double compressibility)
DISPLAYLEVEL(3, "test%3i : ZSTD_decompressBound test with content size missing : ", testNb++);
{ /* create compressed buffer with content size missing */
- ZSTD_CCtx* cctx = ZSTD_createCCtx();
+ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0) );
- CHECKPLUS(r, ZSTD_compress2(cctx,
+ CHECK_VAR(cSize, ZSTD_compress2(cctx,
compressedBuffer, compressedBufferSize,
- CNBuffer, CNBuffSize),
- cSize=r );
+ CNBuffer, CNBuffSize) );
ZSTD_freeCCtx(cctx);
}
{ /* ensure frame content size is missing */
@@ -742,10 +763,9 @@ static int basicUnitTests(U32 seed, double compressibility)
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : simple compression test with static CCtx : ", testNb++);
- CHECKPLUS(r, ZSTD_compressCCtx(staticCCtx,
- compressedBuffer, compressedBufferSize,
- CNBuffer, CNBuffSize, STATIC_CCTX_LEVEL),
- cSize=r );
+ CHECK_VAR(cSize, ZSTD_compressCCtx(staticCCtx,
+ compressedBuffer, compressedBufferSize,
+ CNBuffer, CNBuffSize, STATIC_CCTX_LEVEL) );
DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n",
(unsigned)cSize, (double)cSize/CNBuffSize*100);
@@ -760,7 +780,7 @@ static int basicUnitTests(U32 seed, double compressibility)
{ size_t u;
for (u=0; u<CNBuffSize; u++) {
if (((BYTE*)decodedBuffer)[u] != ((BYTE*)CNBuffer)[u])
- goto _output_error;;
+ goto _output_error;
} }
DISPLAYLEVEL(3, "OK \n");
@@ -777,7 +797,7 @@ static int basicUnitTests(U32 seed, double compressibility)
CHECK( ZSTD_initCStream(staticCCtx, 1) );
DISPLAYLEVEL(3, "OK \n");
- DISPLAYLEVEL(3, "test%3i : init CStream with dictionary (should fail) : ", testNb++);
+ DISPLAYLEVEL(3, "test%3i : init static CStream with dictionary (should fail) : ", testNb++);
{ size_t const r = ZSTD_initCStream_usingDict(staticCCtx, CNBuffer, 64 KB, 1);
if (!ZSTD_isError(r)) goto _output_error; }
DISPLAYLEVEL(3, "OK \n");
@@ -810,20 +830,19 @@ static int basicUnitTests(U32 seed, double compressibility)
/* ZSTDMT simple MT compression test */
DISPLAYLEVEL(3, "test%3i : create ZSTDMT CCtx : ", testNb++);
- { ZSTDMT_CCtx* mtctx = ZSTDMT_createCCtx(2);
+ { ZSTDMT_CCtx* const mtctx = ZSTDMT_createCCtx(2);
if (mtctx==NULL) {
- DISPLAY("mtctx : mot enough memory, aborting \n");
+ DISPLAY("mtctx : not enough memory, aborting \n");
testResult = 1;
goto _end;
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3u : compress %u bytes with 2 threads : ", testNb++, (unsigned)CNBuffSize);
- CHECKPLUS(r, ZSTDMT_compressCCtx(mtctx,
+ CHECK_VAR(cSize, ZSTDMT_compressCCtx(mtctx,
compressedBuffer, compressedBufferSize,
CNBuffer, CNBuffSize,
- 1),
- cSize=r );
+ 1) );
DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/CNBuffSize*100);
DISPLAYLEVEL(3, "test%3i : decompressed size test : ", testNb++);
@@ -842,7 +861,7 @@ static int basicUnitTests(U32 seed, double compressibility)
DISPLAYLEVEL(3, "test%3i : check decompressed result : ", testNb++);
{ size_t u;
for (u=0; u<CNBuffSize; u++) {
- if (((BYTE*)decodedBuffer)[u] != ((BYTE*)CNBuffer)[u]) goto _output_error;;
+ if (((BYTE*)decodedBuffer)[u] != ((BYTE*)CNBuffer)[u]) goto _output_error;
} }
DISPLAYLEVEL(3, "OK \n");
@@ -850,11 +869,10 @@ static int basicUnitTests(U32 seed, double compressibility)
{ ZSTD_parameters params = ZSTD_getParams(1, CNBuffSize, 0);
params.fParams.checksumFlag = 1;
params.fParams.contentSizeFlag = 1;
- CHECKPLUS(r, ZSTDMT_compress_advanced(mtctx,
+ CHECK_VAR(cSize, ZSTDMT_compress_advanced(mtctx,
compressedBuffer, compressedBufferSize,
CNBuffer, CNBuffSize,
- NULL, params, 3 /*overlapRLog*/),
- cSize=r );
+ NULL, params, 3 /*overlapRLog*/) );
}
DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/CNBuffSize*100);
@@ -928,10 +946,10 @@ static int basicUnitTests(U32 seed, double compressibility)
/* only use the first half so we don't push against size limit of compressedBuffer */
size_t const segSize = (CNBuffSize / 2) / segs;
for (i = 0; i < segs; i++) {
- CHECK_V(r, ZSTD_compress(
- (BYTE *)compressedBuffer + off, CNBuffSize - off,
- (BYTE *)CNBuffer + segSize * i,
- segSize, 5));
+ CHECK_NEWV(r, ZSTD_compress(
+ (BYTE*)compressedBuffer + off, CNBuffSize - off,
+ (BYTE*)CNBuffer + segSize * (size_t)i, segSize,
+ 5) );
off += r;
if (i == segs/2) {
/* insert skippable frame */
@@ -956,7 +974,7 @@ static int basicUnitTests(U32 seed, double compressibility)
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : decompress multiple frames : ", testNb++);
- { CHECK_V(r, ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize));
+ { CHECK_NEWV(r, ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize));
if (r != CNBuffSize / 2) goto _output_error; }
DISPLAYLEVEL(3, "OK \n");
@@ -983,8 +1001,9 @@ static int basicUnitTests(U32 seed, double compressibility)
DISPLAYLEVEL(3, "test%3i : compress with flat dictionary : ", testNb++);
cSize = 0;
- CHECKPLUS(r, ZSTD_compressEnd(ctxOrig, compressedBuffer, compressedBufferSize,
- (const char*)CNBuffer + dictSize, CNBuffSize - dictSize),
+ CHECKPLUS(r, ZSTD_compressEnd(ctxOrig,
+ compressedBuffer, compressedBufferSize,
+ (const char*)CNBuffer + dictSize, CNBuffSize - dictSize),
cSize += r);
DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/CNBuffSize*100);
@@ -999,8 +1018,9 @@ static int basicUnitTests(U32 seed, double compressibility)
DISPLAYLEVEL(3, "test%3i : compress with duplicated context : ", testNb++);
{ size_t const cSizeOrig = cSize;
cSize = 0;
- CHECKPLUS(r, ZSTD_compressEnd(ctxDuplicated, compressedBuffer, compressedBufferSize,
- (const char*)CNBuffer + dictSize, CNBuffSize - dictSize),
+ CHECKPLUS(r, ZSTD_compressEnd(ctxDuplicated,
+ compressedBuffer, compressedBufferSize,
+ (const char*)CNBuffer + dictSize, CNBuffSize - dictSize),
cSize += r);
if (cSize != cSizeOrig) goto _output_error; /* should be identical ==> same size */
}
@@ -1024,7 +1044,7 @@ static int basicUnitTests(U32 seed, double compressibility)
DISPLAYLEVEL(3, "test%3i : decompress with static DDict : ", testNb++);
{ size_t const ddictBufferSize = ZSTD_estimateDDictSize(dictSize, ZSTD_dlm_byCopy);
- void* ddictBuffer = malloc(ddictBufferSize);
+ void* const ddictBuffer = malloc(ddictBufferSize);
if (ddictBuffer == NULL) goto _output_error;
{ const ZSTD_DDict* const ddict = ZSTD_initStaticDDict(ddictBuffer, ddictBufferSize, CNBuffer, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
size_t const r = ZSTD_decompress_usingDDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, ddict);
@@ -1042,15 +1062,66 @@ static int basicUnitTests(U32 seed, double compressibility)
}
CHECK( ZSTD_copyCCtx(ctxDuplicated, ctxOrig, testSize) );
- CHECKPLUS(r, ZSTD_compressEnd(ctxDuplicated, compressedBuffer, ZSTD_compressBound(testSize),
- (const char*)CNBuffer + dictSize, testSize),
- cSize = r);
+ CHECK_VAR(cSize, ZSTD_compressEnd(ctxDuplicated, compressedBuffer, ZSTD_compressBound(testSize),
+ (const char*)CNBuffer + dictSize, testSize) );
{ ZSTD_frameHeader zfh;
if (ZSTD_getFrameHeader(&zfh, compressedBuffer, cSize)) goto _output_error;
if ((zfh.frameContentSize != testSize) && (zfh.frameContentSize != 0)) goto _output_error;
} }
DISPLAYLEVEL(3, "OK \n");
+ if ((int)(compressibility * 100 + 0.1) == FUZ_compressibility_default) { /* test only valid with known input */
+ size_t const flatdictSize = 22 KB;
+ size_t const contentSize = 9 KB;
+ const void* const dict = (const char*)CNBuffer;
+ const void* const contentStart = (const char*)dict + flatdictSize;
+ size_t const target_nodict_cSize[22+1] = { 3840, 3770, 3870, 3830, 3770,
+ 3770, 3770, 3770, 3750, 3750,
+ 3740, 3670, 3670, 3660, 3660,
+ 3660, 3660, 3660, 3660, 3660,
+ 3660, 3660, 3660 };
+ size_t const target_wdict_cSize[22+1] = { 2830, 2890, 2890, 2820, 2940,
+ 2950, 2950, 2920, 2900, 2890,
+ 2910, 2910, 2910, 2770, 2760,
+ 2750, 2750, 2750, 2750, 2750,
+ 2750, 2750, 2750 };
+ int l = 1;
+ int const maxLevel = ZSTD_maxCLevel();
+
+ DISPLAYLEVEL(3, "test%3i : flat-dictionary efficiency test : \n", testNb++);
+ assert(maxLevel == 22);
+ RDG_genBuffer(CNBuffer, flatdictSize + contentSize, compressibility, 0., seed);
+ DISPLAYLEVEL(4, "content hash : %016llx; dict hash : %016llx \n", XXH64(contentStart, contentSize, 0), XXH64(dict, flatdictSize, 0));
+
+ for ( ; l <= maxLevel; l++) {
+ size_t const nodict_cSize = ZSTD_compress(compressedBuffer, compressedBufferSize,
+ contentStart, contentSize, l);
+ if (nodict_cSize > target_nodict_cSize[l]) {
+ DISPLAYLEVEL(1, "error : compression at level %i worse than expected (%u > %u) \n",
+ l, (unsigned)nodict_cSize, (unsigned)target_nodict_cSize[l]);
+ goto _output_error;
+ }
+ DISPLAYLEVEL(4, "level %i : max expected %u >= reached %u \n",
+ l, (unsigned)target_nodict_cSize[l], (unsigned)nodict_cSize);
+ }
+ for ( l=1 ; l <= maxLevel; l++) {
+ size_t const wdict_cSize = ZSTD_compress_usingDict(ctxOrig,
+ compressedBuffer, compressedBufferSize,
+ contentStart, contentSize,
+ dict, flatdictSize,
+ l);
+ if (wdict_cSize > target_wdict_cSize[l]) {
+ DISPLAYLEVEL(1, "error : compression with dictionary at level %i worse than expected (%u > %u) \n",
+ l, (unsigned)wdict_cSize, (unsigned)target_wdict_cSize[l]);
+ goto _output_error;
+ }
+ DISPLAYLEVEL(4, "level %i with dictionary : max expected %u >= reached %u \n",
+ l, (unsigned)target_wdict_cSize[l], (unsigned)wdict_cSize);
+ }
+
+ DISPLAYLEVEL(4, "compression efficiency tests OK \n");
+ }
+
ZSTD_freeCCtx(ctxOrig);
ZSTD_freeCCtx(ctxDuplicated);
ZSTD_freeDCtx(dctx);
@@ -1316,7 +1387,7 @@ static int basicUnitTests(U32 seed, double compressibility)
}
DISPLAYLEVEL(3, "OK \n");
- DISPLAYLEVEL(3, "test%3i : Building cdict w/ ZSTD_dm_fullDict on a good dictionary : ", testNb++);
+ DISPLAYLEVEL(3, "test%3i : Building cdict w/ ZSTD_dct_fullDict on a good dictionary : ", testNb++);
{ ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize);
ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_fullDict, cParams, ZSTD_defaultCMem);
if (cdict==NULL) goto _output_error;
@@ -1324,7 +1395,7 @@ static int basicUnitTests(U32 seed, double compressibility)
}
DISPLAYLEVEL(3, "OK \n");
- DISPLAYLEVEL(3, "test%3i : Building cdict w/ ZSTD_dm_fullDict on a rawContent (must fail) : ", testNb++);
+ DISPLAYLEVEL(3, "test%3i : Building cdict w/ ZSTD_dct_fullDict on a rawContent (must fail) : ", testNb++);
{ ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize);
ZSTD_CDict* const cdict = ZSTD_createCDict_advanced((const char*)dictBuffer+1, dictSize-1, ZSTD_dlm_byRef, ZSTD_dct_fullDict, cParams, ZSTD_defaultCMem);
if (cdict!=NULL) goto _output_error;
@@ -1332,7 +1403,7 @@ static int basicUnitTests(U32 seed, double compressibility)
}
DISPLAYLEVEL(3, "OK \n");
- DISPLAYLEVEL(3, "test%3i : Loading rawContent starting with dict header w/ ZSTD_dm_auto should fail : ", testNb++);
+ DISPLAYLEVEL(3, "test%3i : Loading rawContent starting with dict header w/ ZSTD_dct_auto should fail : ", testNb++);
{
size_t ret;
MEM_writeLE32((char*)dictBuffer+2, ZSTD_MAGIC_DICTIONARY);
@@ -1346,7 +1417,7 @@ static int basicUnitTests(U32 seed, double compressibility)
}
DISPLAYLEVEL(3, "OK \n");
- DISPLAYLEVEL(3, "test%3i : Loading rawContent starting with dict header w/ ZSTD_dm_rawContent should pass : ", testNb++);
+ DISPLAYLEVEL(3, "test%3i : Loading rawContent starting with dict header w/ ZSTD_dct_rawContent should pass : ", testNb++);
{
size_t ret;
MEM_writeLE32((char*)dictBuffer+2, ZSTD_MAGIC_DICTIONARY);
@@ -1598,6 +1669,7 @@ static int basicUnitTests(U32 seed, double compressibility)
size_t const sampleUnitSize = 8 KB;
U32 const nbSamples = (U32)(totalSampleSize / sampleUnitSize);
size_t* const samplesSizes = (size_t*) malloc(nbSamples * sizeof(size_t));
+ U32 seed32 = seed;
ZDICT_cover_params_t params;
U32 dictID;
@@ -1610,8 +1682,8 @@ static int basicUnitTests(U32 seed, double compressibility)
DISPLAYLEVEL(3, "test%3i : ZDICT_trainFromBuffer_cover : ", testNb++);
{ U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
memset(&params, 0, sizeof(params));
- params.d = 1 + (FUZ_rand(&seed) % 16);
- params.k = params.d + (FUZ_rand(&seed) % 256);
+ params.d = 1 + (FUZ_rand(&seed32) % 16);
+ params.k = params.d + (FUZ_rand(&seed32) % 256);
dictSize = ZDICT_trainFromBuffer_cover(dictBuffer, dictSize,
CNBuffer, samplesSizes, nbSamples,
params);
@@ -1814,6 +1886,36 @@ static int basicUnitTests(U32 seed, double compressibility)
DISPLAYLEVEL(3, "streaming OK : regenerated %u bytes \n", (unsigned)out.pos);
}
+ /* basic block compression */
+ DISPLAYLEVEL(3, "test%3i : empty magic-less format test : ", testNb++);
+ CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_format, ZSTD_f_zstd1_magicless) );
+ { ZSTD_inBuffer in = { CNBuffer, 0, 0 };
+ ZSTD_outBuffer out = { compressedBuffer, ZSTD_compressBound(0), 0 };
+ size_t const result = ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end);
+ if (result != 0) goto _output_error;
+ if (in.pos != in.size) goto _output_error;
+ cSize = out.pos;
+ }
+ DISPLAYLEVEL(3, "OK (compress : %u -> %u bytes)\n", (unsigned)0, (unsigned)cSize);
+
+ DISPLAYLEVEL(3, "test%3i : decompress of empty magic-less frame : ", testNb++);
+ ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters);
+ CHECK( ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless) );
+ /* one shot */
+ { size_t const result = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize);
+ if (result != 0) goto _output_error;
+ DISPLAYLEVEL(3, "one-shot OK, ");
+ }
+ /* streaming */
+ { ZSTD_inBuffer in = { compressedBuffer, cSize, 0 };
+ ZSTD_outBuffer out = { decodedBuffer, CNBuffSize, 0 };
+ size_t const result = ZSTD_decompressStream(dctx, &out, &in);
+ if (result != 0) goto _output_error;
+ if (in.pos != in.size) goto _output_error;
+ if (out.pos != 0) goto _output_error;
+ DISPLAYLEVEL(3, "streaming OK : regenerated %u bytes \n", (unsigned)out.pos);
+ }
+
ZSTD_freeCCtx(cctx);
ZSTD_freeDCtx(dctx);
}
@@ -1830,13 +1932,12 @@ static int basicUnitTests(U32 seed, double compressibility)
DISPLAYLEVEL(3, "test%3i : Block compression test : ", testNb++);
CHECK( ZSTD_compressBegin(cctx, 5) );
CHECK( ZSTD_getBlockSize(cctx) >= blockSize);
- cSize = ZSTD_compressBlock(cctx, compressedBuffer, ZSTD_compressBound(blockSize), CNBuffer, blockSize);
- if (ZSTD_isError(cSize)) goto _output_error;
+ CHECK_VAR(cSize, ZSTD_compressBlock(cctx, compressedBuffer, ZSTD_compressBound(blockSize), CNBuffer, blockSize) );
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : Block decompression test : ", testNb++);
CHECK( ZSTD_decompressBegin(dctx) );
- { CHECK_V(r, ZSTD_decompressBlock(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) );
+ { CHECK_NEWV(r, ZSTD_decompressBlock(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) );
if (r != blockSize) goto _output_error; }
DISPLAYLEVEL(3, "OK \n");
@@ -1851,30 +1952,36 @@ static int basicUnitTests(U32 seed, double compressibility)
assert(blockCSize != 0);
if (ZSTD_isError(blockCSize)) goto _output_error;
compressed += blockCSize;
- }
- }
+ } }
DISPLAYLEVEL(3, "OK \n");
/* dictionary block compression */
DISPLAYLEVEL(3, "test%3i : Dictionary Block compression test : ", testNb++);
CHECK( ZSTD_compressBegin_usingDict(cctx, CNBuffer, dictSize, 5) );
- cSize = ZSTD_compressBlock(cctx, compressedBuffer, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize, blockSize);
- if (ZSTD_isError(cSize)) goto _output_error;
- cSize2 = ZSTD_compressBlock(cctx, (char*)compressedBuffer+cSize, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize+blockSize, blockSize);
- if (ZSTD_isError(cSize2)) goto _output_error;
- memcpy((char*)compressedBuffer+cSize, (char*)CNBuffer+dictSize+blockSize, blockSize); /* fake non-compressed block */
- cSize2 = ZSTD_compressBlock(cctx, (char*)compressedBuffer+cSize+blockSize, ZSTD_compressBound(blockSize),
- (char*)CNBuffer+dictSize+2*blockSize, blockSize);
- if (ZSTD_isError(cSize2)) goto _output_error;
+ CHECK_VAR(cSize, ZSTD_compressBlock(cctx, compressedBuffer, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize, blockSize));
+ RDG_genBuffer((char*)CNBuffer+dictSize+blockSize, blockSize, 0.0, 0.0, seed); /* create a non-compressible second block */
+ { CHECK_NEWV(r, ZSTD_compressBlock(cctx, (char*)compressedBuffer+cSize, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize+blockSize, blockSize) ); /* for cctx history consistency */
+ assert(r == 0); /* non-compressible block */ }
+ memcpy((char*)compressedBuffer+cSize, (char*)CNBuffer+dictSize+blockSize, blockSize); /* send non-compressed block (without header) */
+ CHECK_VAR(cSize2, ZSTD_compressBlock(cctx, (char*)compressedBuffer+cSize+blockSize, ZSTD_compressBound(blockSize),
+ (char*)CNBuffer+dictSize+2*blockSize, blockSize));
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : Dictionary Block decompression test : ", testNb++);
CHECK( ZSTD_decompressBegin_usingDict(dctx, CNBuffer, dictSize) );
- { CHECK_V( r, ZSTD_decompressBlock(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) );
- if (r != blockSize) goto _output_error; }
+ { CHECK_NEWV( r, ZSTD_decompressBlock(dctx, decodedBuffer, blockSize, compressedBuffer, cSize) );
+ if (r != blockSize) {
+ DISPLAYLEVEL(1, "ZSTD_decompressBlock() with _usingDict() fails : %u, instead of %u expected \n", (unsigned)r, (unsigned)blockSize);
+ goto _output_error;
+ } }
+ memcpy((char*)decodedBuffer+blockSize, (char*)compressedBuffer+cSize, blockSize);
ZSTD_insertBlock(dctx, (char*)decodedBuffer+blockSize, blockSize); /* insert non-compressed block into dctx history */
- { CHECK_V( r, ZSTD_decompressBlock(dctx, (char*)decodedBuffer+2*blockSize, CNBuffSize, (char*)compressedBuffer+cSize+blockSize, cSize2) );
- if (r != blockSize) goto _output_error; }
+ { CHECK_NEWV( r, ZSTD_decompressBlock(dctx, (char*)decodedBuffer+2*blockSize, blockSize, (char*)compressedBuffer+cSize+blockSize, cSize2) );
+ if (r != blockSize) {
+ DISPLAYLEVEL(1, "ZSTD_decompressBlock() with _usingDict() and after insertBlock() fails : %u, instead of %u expected \n", (unsigned)r, (unsigned)blockSize);
+ goto _output_error;
+ } }
+ assert(memcpy((char*)CNBuffer+dictSize, decodedBuffer, blockSize*3)); /* ensure regenerated content is identical to origin */
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : Block compression with CDict : ", testNb++);
@@ -1900,21 +2007,59 @@ static int basicUnitTests(U32 seed, double compressibility)
sampleSize += 96 KB;
cSize = ZSTD_compress(compressedBuffer, ZSTD_compressBound(sampleSize), CNBuffer, sampleSize, 1);
if (ZSTD_isError(cSize)) goto _output_error;
- { CHECK_V(regenSize, ZSTD_decompress(decodedBuffer, sampleSize, compressedBuffer, cSize));
+ { CHECK_NEWV(regenSize, ZSTD_decompress(decodedBuffer, sampleSize, compressedBuffer, cSize));
if (regenSize!=sampleSize) goto _output_error; }
DISPLAYLEVEL(3, "OK \n");
}
+ DISPLAYLEVEL(3, "test%3i : ZSTD_getSequences decode from sequences test : ", testNb++);
+ {
+ size_t srcSize = 100 KB;
+ BYTE* src = (BYTE*)CNBuffer;
+ BYTE* decoded = (BYTE*)compressedBuffer;
+
+ ZSTD_CCtx* cctx = ZSTD_createCCtx();
+ ZSTD_Sequence* seqs = (ZSTD_Sequence*)malloc(srcSize * sizeof(ZSTD_Sequence));
+ size_t seqsSize;
+
+ if (seqs == NULL) goto _output_error;
+ assert(cctx != NULL);
+
+ /* Populate src with random data */
+ RDG_genBuffer(CNBuffer, srcSize, compressibility, 0., seed);
+
+ /* get the sequences */
+ seqsSize = ZSTD_getSequences(cctx, seqs, srcSize, src, srcSize);
+
+ /* "decode" and compare the sequences */
+ FUZ_decodeSequences(decoded, seqs, seqsSize, src, srcSize);
+ assert(!memcmp(CNBuffer, compressedBuffer, srcSize));
+
+ ZSTD_freeCCtx(cctx);
+ free(seqs);
+ }
+
+ /* Multiple blocks of zeros test */
+ #define LONGZEROSLENGTH 1000000 /* 1MB of zeros */
+ DISPLAYLEVEL(3, "test%3i : compress %u zeroes : ", testNb++, LONGZEROSLENGTH);
+ memset(CNBuffer, 0, LONGZEROSLENGTH);
+ CHECK_VAR(cSize, ZSTD_compress(compressedBuffer, ZSTD_compressBound(LONGZEROSLENGTH), CNBuffer, LONGZEROSLENGTH, 1) );
+ DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/LONGZEROSLENGTH*100);
+
+ DISPLAYLEVEL(3, "test%3i : decompress %u zeroes : ", testNb++, LONGZEROSLENGTH);
+ { CHECK_NEWV(r, ZSTD_decompress(decodedBuffer, LONGZEROSLENGTH, compressedBuffer, cSize) );
+ if (r != LONGZEROSLENGTH) goto _output_error; }
+ DISPLAYLEVEL(3, "OK \n");
+
/* All zeroes test (test bug #137) */
#define ZEROESLENGTH 100
DISPLAYLEVEL(3, "test%3i : compress %u zeroes : ", testNb++, ZEROESLENGTH);
memset(CNBuffer, 0, ZEROESLENGTH);
- { CHECK_V(r, ZSTD_compress(compressedBuffer, ZSTD_compressBound(ZEROESLENGTH), CNBuffer, ZEROESLENGTH, 1) );
- cSize = r; }
+ CHECK_VAR(cSize, ZSTD_compress(compressedBuffer, ZSTD_compressBound(ZEROESLENGTH), CNBuffer, ZEROESLENGTH, 1) );
DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/ZEROESLENGTH*100);
DISPLAYLEVEL(3, "test%3i : decompress %u zeroes : ", testNb++, ZEROESLENGTH);
- { CHECK_V(r, ZSTD_decompress(decodedBuffer, ZEROESLENGTH, compressedBuffer, cSize) );
+ { CHECK_NEWV(r, ZSTD_decompress(decodedBuffer, ZEROESLENGTH, compressedBuffer, cSize) );
if (r != ZEROESLENGTH) goto _output_error; }
DISPLAYLEVEL(3, "OK \n");
@@ -1962,13 +2107,12 @@ static int basicUnitTests(U32 seed, double compressibility)
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : compress lots 3-bytes sequences : ", testNb++);
- { CHECK_V(r, ZSTD_compress(compressedBuffer, ZSTD_compressBound(_3BYTESTESTLENGTH),
- CNBuffer, _3BYTESTESTLENGTH, 19) );
- cSize = r; }
+ CHECK_VAR(cSize, ZSTD_compress(compressedBuffer, ZSTD_compressBound(_3BYTESTESTLENGTH),
+ CNBuffer, _3BYTESTESTLENGTH, 19) );
DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/_3BYTESTESTLENGTH*100);
DISPLAYLEVEL(3, "test%3i : decompress lots 3-bytes sequence : ", testNb++);
- { CHECK_V(r, ZSTD_decompress(decodedBuffer, _3BYTESTESTLENGTH, compressedBuffer, cSize) );
+ { CHECK_NEWV(r, ZSTD_decompress(decodedBuffer, _3BYTESTESTLENGTH, compressedBuffer, cSize) );
if (r != _3BYTESTESTLENGTH) goto _output_error; }
DISPLAYLEVEL(3, "OK \n");
@@ -2084,6 +2228,79 @@ static int basicUnitTests(U32 seed, double compressibility)
}
DISPLAYLEVEL(3, "OK \n");
+ DISPLAYLEVEL(3, "test%3i : table cleanliness through index reduction : ", testNb++);
+ {
+ int cLevel;
+ size_t approxIndex = 0;
+ size_t maxIndex = ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX)); /* ZSTD_CURRENT_MAX from zstd_compress_internal.h */
+
+ /* Provision enough space in a static context so that we can do all
+ * this without ever reallocating, which would reset the indices. */
+ size_t const staticCCtxSize = ZSTD_estimateCStreamSize(22);
+ void* const staticCCtxBuffer = malloc(staticCCtxSize);
+ ZSTD_CCtx* cctx = ZSTD_initStaticCCtx(staticCCtxBuffer, staticCCtxSize);
+
+ /* bump the indices so the following compressions happen at high
+ * indices. */
+ {
+ ZSTD_outBuffer out = { compressedBuffer, compressedBufferSize, 0 };
+ ZSTD_inBuffer in = { CNBuffer, CNBuffSize, 0 };
+ ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
+ CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, -500));
+ while (approxIndex <= (maxIndex / 4) * 3) {
+ CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush));
+ approxIndex += in.pos;
+ CHECK(in.pos == in.size);
+ in.pos = 0;
+ out.pos = 0;
+ }
+ CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end));
+ }
+
+ /* spew a bunch of stuff into the table area */
+ for (cLevel = 1; cLevel <= 22; cLevel++) {
+ ZSTD_outBuffer out = { compressedBuffer, compressedBufferSize / cLevel, 0 };
+ ZSTD_inBuffer in = { CNBuffer, CNBuffSize, 0 };
+ ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
+ CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel));
+ CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush));
+ CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end));
+ approxIndex += in.pos;
+ }
+
+ /* now crank the indices so we overflow */
+ {
+ ZSTD_outBuffer out = { compressedBuffer, compressedBufferSize, 0 };
+ ZSTD_inBuffer in = { CNBuffer, CNBuffSize, 0 };
+ ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
+ CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, -500));
+ while (approxIndex <= maxIndex) {
+ CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush));
+ approxIndex += in.pos;
+ CHECK(in.pos == in.size);
+ in.pos = 0;
+ out.pos = 0;
+ }
+ CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end));
+ }
+
+ /* do a bunch of compressions again in low indices and ensure we don't
+ * hit untracked invalid indices */
+ for (cLevel = 1; cLevel <= 22; cLevel++) {
+ ZSTD_outBuffer out = { compressedBuffer, compressedBufferSize / cLevel, 0 };
+ ZSTD_inBuffer in = { CNBuffer, CNBuffSize, 0 };
+ ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
+ CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel));
+ CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush));
+ CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end));
+ approxIndex += in.pos;
+ }
+
+ ZSTD_freeCCtx(cctx);
+ free(staticCCtxBuffer);
+ }
+ DISPLAYLEVEL(3, "OK \n");
+
_end:
free(CNBuffer);
free(compressedBuffer);
diff --git a/tests/files/huffman-compressed-larger b/tests/golden-compression/huffman-compressed-larger
index f594f1ae9816..f594f1ae9816 100644
--- a/tests/files/huffman-compressed-larger
+++ b/tests/golden-compression/huffman-compressed-larger
Binary files differ
diff --git a/tests/golden-decompression/rle-first-block.zst b/tests/golden-decompression/rle-first-block.zst
new file mode 100644
index 000000000000..fd067edd74ef
--- /dev/null
+++ b/tests/golden-decompression/rle-first-block.zst
Binary files differ
diff --git a/tests/playTests.sh b/tests/playTests.sh
index 69387321f92f..c09e0c21cc2b 100755
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@@ -64,11 +64,12 @@ PRGDIR="$SCRIPT_DIR/../programs"
TESTDIR="$SCRIPT_DIR/../tests"
UNAME=$(uname)
-isTerminal=false
+detectedTerminal=false
if [ -t 0 ] && [ -t 1 ]
then
- isTerminal=true
+ detectedTerminal=true
fi
+isTerminal=${isTerminal:-$detectedTerminal}
isWindows=false
INTOVOID="/dev/null"
@@ -108,7 +109,6 @@ else
fi
-
println "\n===> simple tests "
./datagen > tmp
@@ -212,8 +212,39 @@ $ZSTD tmp -c --no-compress-literals -19 | $ZSTD -t
$ZSTD tmp -c --compress-literals -1 | $ZSTD -t
$ZSTD tmp -c --compress-literals --fast=1 | $ZSTD -t
$ZSTD tmp -c --compress-literals -19 | $ZSTD -t
-$ZSTD -b --fast=1 -i1e1 tmp --compress-literals
-$ZSTD -b --fast=1 -i1e1 tmp --no-compress-literals
+$ZSTD -b --fast=1 -i0e1 tmp --compress-literals
+$ZSTD -b --fast=1 -i0e1 tmp --no-compress-literals
+
+println "test: --exclude-compressed flag"
+rm -rf precompressedFilterTestDir
+mkdir -p precompressedFilterTestDir
+./datagen $size > precompressedFilterTestDir/input.5
+./datagen $size > precompressedFilterTestDir/input.6
+$ZSTD --exclude-compressed --long --rm -r precompressedFilterTestDir
+sleep 5
+./datagen $size > precompressedFilterTestDir/input.7
+./datagen $size > precompressedFilterTestDir/input.8
+$ZSTD --exclude-compressed --long --rm -r precompressedFilterTestDir
+test ! -f precompressedFilterTestDir/input.5.zst.zst
+test ! -f precompressedFilterTestDir/input.6.zst.zst
+file1timestamp=`date -r precompressedFilterTestDir/input.5.zst +%s`
+file2timestamp=`date -r precompressedFilterTestDir/input.7.zst +%s`
+if [[ $file2timestamp -ge $file1timestamp ]]; then
+ println "Test is successful. input.5.zst is precompressed and therefore not compressed/modified again."
+else
+ println "Test is not successful"
+fi
+#File Extension check.
+./datagen $size > precompressedFilterTestDir/input.zstbar
+$ZSTD --exclude-compressed --long --rm -r precompressedFilterTestDir
+#ZSTD should compress input.zstbar
+test -f precompressedFilterTestDir/input.zstbar.zst
+#Check without the --exclude-compressed flag
+$ZSTD --long --rm -r precompressedFilterTestDir
+#Files should get compressed again without the --exclude-compressed flag.
+test -f precompressedFilterTestDir/input.5.zst.zst
+test -f precompressedFilterTestDir/input.6.zst.zst
+println "Test completed"
println "test : file removal"
$ZSTD -f --rm tmp
@@ -240,8 +271,13 @@ rm tmp # erase source file
touch tmp.zst # create destination file
$ZSTD -f tmp && die "attempt to compress a non existing file"
test -f tmp.zst # destination file should still be present
-rm tmp*
+rm -rf tmp* # may also erase tmp* directory from previous failed run
+println "\n===> decompression only tests "
+head -c 1048576 /dev/zero > tmp
+$ZSTD -d -o tmp1 "$TESTDIR/golden-decompression/rle-first-block.zst"
+$DIFF -s tmp1 tmp
+rm tmp*
println "test : compress multiple files"
println hello > tmp1
@@ -265,6 +301,28 @@ if [ "$?" -eq 139 ]; then
fi
rm tmp*
+println "test : compress multiple files into an output directory, --output-dir-flat"
+println henlo > tmp1
+mkdir tmpInputTestDir
+mkdir tmpInputTestDir/we
+mkdir tmpInputTestDir/we/must
+mkdir tmpInputTestDir/we/must/go
+mkdir tmpInputTestDir/we/must/go/deeper
+println cool > tmpInputTestDir/we/must/go/deeper/tmp2
+mkdir tmpOutDir
+$ZSTD tmp1 tmpInputTestDir/we/must/go/deeper/tmp2 --output-dir-flat tmpOutDir
+test -f tmpOutDir/tmp1.zst
+test -f tmpOutDir/tmp2.zst
+println "test : decompress multiple files into an output directory, --output-dir-flat"
+mkdir tmpOutDirDecomp
+$ZSTD tmpOutDir -r -d --output-dir-flat tmpOutDirDecomp
+test -f tmpOutDirDecomp/tmp2
+test -f tmpOutDirDecomp/tmp1
+rm -f tmpOutDirDecomp/*
+$ZSTD tmpOutDir -r -d --output-dir-flat=tmpOutDirDecomp
+test -f tmpOutDirDecomp/tmp2
+test -f tmpOutDirDecomp/tmp1
+rm -rf tmp*
println "\n===> Advanced compression parameters "
println "Hello world!" | $ZSTD --zstd=windowLog=21, - -o tmp.zst && die "wrong parameters not detected!"
@@ -408,6 +466,52 @@ ls -ls tmp* # check size of tmpdec (should be 2*(tmp1 + tmp2 + tmp3))
println "compress multiple files including a missing one (notHere) : "
$ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!"
+println "\n===> stream-size mode"
+
+./datagen -g11000 > tmp
+println "test : basic file compression vs sized streaming compression"
+file_size=$($ZSTD -14 -f tmp -o tmp.zst && wc -c < tmp.zst)
+stream_size=$(cat tmp | $ZSTD -14 --stream-size=11000 | wc -c)
+if [ "$stream_size" -gt "$file_size" ]; then
+ die "hinted compression larger than expected"
+fi
+println "test : sized streaming compression and decompression"
+cat tmp | $ZSTD -14 -f tmp -o --stream-size=11000 tmp.zst
+$ZSTD -df tmp.zst -o tmp_decompress
+cmp tmp tmp_decompress || die "difference between original and decompressed file"
+println "test : incorrect stream size"
+cat tmp | $ZSTD -14 -f -o tmp.zst --stream-size=11001 && die "should fail with incorrect stream size"
+
+
+println "\n===> size-hint mode"
+
+./datagen -g11000 > tmp
+./datagen -g11000 > tmp2
+./datagen > tmpDict
+println "test : basic file compression vs hinted streaming compression"
+file_size=$($ZSTD -14 -f tmp -o tmp.zst && wc -c < tmp.zst)
+stream_size=$(cat tmp | $ZSTD -14 --size-hint=11000 | wc -c)
+if [ "$stream_size" -ge "$file_size" ]; then
+ die "hinted compression larger than expected"
+fi
+println "test : hinted streaming compression and decompression"
+cat tmp | $ZSTD -14 -f -o tmp.zst --size-hint=11000
+$ZSTD -df tmp.zst -o tmp_decompress
+cmp tmp tmp_decompress || die "difference between original and decompressed file"
+println "test : hinted streaming compression with dictionary"
+cat tmp | $ZSTD -14 -f -D tmpDict --size-hint=11000 | $ZSTD -t -D tmpDict
+println "test : multiple file compression with hints and dictionary"
+$ZSTD -14 -f -D tmpDict --size-hint=11000 tmp tmp2
+$ZSTD -14 -f -o tmp1_.zst -D tmpDict --size-hint=11000 tmp
+$ZSTD -14 -f -o tmp2_.zst -D tmpDict --size-hint=11000 tmp2
+cmp tmp.zst tmp1_.zst || die "first file's output differs"
+cmp tmp2.zst tmp2_.zst || die "second file's output differs"
+println "test : incorrect hinted stream sizes"
+cat tmp | $ZSTD -14 -f --size-hint=11050 | $ZSTD -t # slightly too high
+cat tmp | $ZSTD -14 -f --size-hint=10950 | $ZSTD -t # slightly too low
+cat tmp | $ZSTD -14 -f --size-hint=22000 | $ZSTD -t # considerably too high
+cat tmp | $ZSTD -14 -f --size-hint=5500 | $ZSTD -t # considerably too low
+
println "\n===> dictionary tests "
@@ -480,6 +584,15 @@ $ZSTD -o tmpDict --train "$TESTDIR"/*.c "$PRGDIR"/*.c
test -f tmpDict
$ZSTD --train "$TESTDIR"/*.c "$PRGDIR"/*.c
test -f dictionary
+println "- Test dictionary training fails"
+echo "000000000000000000000000000000000" > tmpz
+$ZSTD --train tmpz tmpz tmpz tmpz tmpz tmpz tmpz tmpz tmpz && die "Dictionary training should fail : source is all zeros"
+if [ -n "$hasMT" ]
+then
+ $ZSTD --train -T0 tmpz tmpz tmpz tmpz tmpz tmpz tmpz tmpz tmpz && die "Dictionary training should fail : source is all zeros"
+ println "- Create dictionary with multithreading enabled"
+ $ZSTD --train -T0 "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpDict
+fi
rm tmp* dictionary
@@ -500,30 +613,27 @@ println "- Create dictionary with short dictID"
$ZSTD --train-fastcover=k=46,d=8,f=15,split=80 "$TESTDIR"/*.c "$PRGDIR"/*.c --dictID=1 -o tmpDict1
cmp tmpDict tmpDict1 && die "dictionaries should have different ID !"
println "- Create dictionaries with shrink-dict flag enabled"
-$ZSTD --train-fastcover=steps=256,shrink "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpShrinkDict
-$ZSTD --train-fastcover=steps=256,shrink=1 "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpShrinkDict1
-$ZSTD --train-fastcover=steps=256,shrink=5 "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpShrinkDict2
+$ZSTD --train-fastcover=steps=1,shrink "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpShrinkDict
+$ZSTD --train-fastcover=steps=1,shrink=1 "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpShrinkDict1
+$ZSTD --train-fastcover=steps=1,shrink=5 "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpShrinkDict2
println "- Create dictionary with size limit"
-$ZSTD --train-fastcover=steps=8 "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpDict2 --maxdict=4K
-println "- Compare size of dictionary from 90% training samples with 80% training samples"
-$ZSTD --train-fastcover=split=90 -r "$TESTDIR"/*.c "$PRGDIR"/*.c
-$ZSTD --train-fastcover=split=80 -r "$TESTDIR"/*.c "$PRGDIR"/*.c
+$ZSTD --train-fastcover=steps=1 "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpDict2 --maxdict=4K
println "- Create dictionary using all samples for both training and testing"
-$ZSTD --train-fastcover=split=100 -r "$TESTDIR"/*.c "$PRGDIR"/*.c
+$ZSTD --train-fastcover=k=56,d=8,split=100 -r "$TESTDIR"/*.c "$PRGDIR"/*.c
println "- Create dictionary using f=16"
-$ZSTD --train-fastcover=f=16 -r "$TESTDIR"/*.c "$PRGDIR"/*.c
-$ZSTD --train-fastcover=accel=15 -r "$TESTDIR"/*.c "$PRGDIR"/*.c && die "Created dictionary using accel=15"
+$ZSTD --train-fastcover=k=56,d=8,f=16 -r "$TESTDIR"/*.c "$PRGDIR"/*.c
+$ZSTD --train-fastcover=k=56,d=8,accel=15 -r "$TESTDIR"/*.c "$PRGDIR"/*.c && die "Created dictionary using accel=15"
println "- Create dictionary using accel=2"
-$ZSTD --train-fastcover=accel=2 -r "$TESTDIR"/*.c "$PRGDIR"/*.c
+$ZSTD --train-fastcover=k=56,d=8,accel=2 -r "$TESTDIR"/*.c "$PRGDIR"/*.c
println "- Create dictionary using accel=10"
-$ZSTD --train-fastcover=accel=10 -r "$TESTDIR"/*.c "$PRGDIR"/*.c
+$ZSTD --train-fastcover=k=56,d=8,accel=10 -r "$TESTDIR"/*.c "$PRGDIR"/*.c
println "- Create dictionary with multithreading"
$ZSTD --train-fastcover -T4 -r "$TESTDIR"/*.c "$PRGDIR"/*.c
println "- Test -o before --train-fastcover"
rm -f tmpDict dictionary
-$ZSTD -o tmpDict --train-fastcover "$TESTDIR"/*.c "$PRGDIR"/*.c
+$ZSTD -o tmpDict --train-fastcover=k=56,d=8 "$TESTDIR"/*.c "$PRGDIR"/*.c
test -f tmpDict
-$ZSTD --train-fastcover "$TESTDIR"/*.c "$PRGDIR"/*.c
+$ZSTD --train-fastcover=k=56,d=8 "$TESTDIR"/*.c "$PRGDIR"/*.c
test -f dictionary
rm tmp* dictionary
@@ -583,8 +693,8 @@ $ZSTD -t tmpSplit.* && die "bad file not detected !"
println "\n===> golden files tests "
-$ZSTD -t -r "$TESTDIR/files"
-$ZSTD -c -r "$TESTDIR/files" | $ZSTD -t
+$ZSTD -t -r "$TESTDIR/golden-compression"
+$ZSTD -c -r "$TESTDIR/golden-compression" | $ZSTD -t
println "\n===> benchmark mode tests "
@@ -597,10 +707,10 @@ $ZSTD -i0b0e3 tmp1
println "bench negative level"
$ZSTD -bi0 --fast tmp1
println "with recursive and quiet modes"
-$ZSTD -rqi1b1e2 tmp1
+$ZSTD -rqi0b1e2 tmp1
println "benchmark decompression only"
$ZSTD -f tmp1
-$ZSTD -b -d -i1 tmp1.zst
+$ZSTD -b -d -i0 tmp1.zst
println "\n===> zstd compatibility tests "
@@ -766,6 +876,46 @@ if [ $LZ4MODE -ne 1 ]; then
grep ".lz4" tmplg > $INTOVOID && die "Unsupported suffix listed"
fi
+println "\n===> tar extension tests "
+
+rm -f tmp tmp.tar tmp.tzst tmp.tgz tmp.txz tmp.tlz4
+
+./datagen > tmp
+tar cf tmp.tar tmp
+$ZSTD tmp.tar -o tmp.tzst
+rm tmp.tar
+$ZSTD -d tmp.tzst
+[ -e tmp.tar ] || die ".tzst failed to decompress to .tar!"
+rm -f tmp.tar tmp.tzst
+
+if [ $GZIPMODE -eq 1 ]; then
+ tar czf tmp.tgz tmp
+ $ZSTD -d tmp.tgz
+ [ -e tmp.tar ] || die ".tgz failed to decompress to .tar!"
+ rm -f tmp.tar tmp.tgz
+fi
+
+if [ $LZMAMODE -eq 1 ]; then
+ tar c tmp | $ZSTD --format=xz > tmp.txz
+ $ZSTD -d tmp.txz
+ [ -e tmp.tar ] || die ".txz failed to decompress to .tar!"
+ rm -f tmp.tar tmp.txz
+fi
+
+if [ $LZ4MODE -eq 1 ]; then
+ tar c tmp | $ZSTD --format=lz4 > tmp.tlz4
+ $ZSTD -d tmp.tlz4
+ [ -e tmp.tar ] || die ".tlz4 failed to decompress to .tar!"
+ rm -f tmp.tar tmp.tlz4
+fi
+
+touch tmp.t tmp.tz tmp.tzs
+! $ZSTD -d tmp.t
+! $ZSTD -d tmp.tz
+! $ZSTD -d tmp.tzs
+
+exit
+
println "\n===> zstd round-trip tests "
roundTripTest
@@ -1020,4 +1170,18 @@ test -f dictionary
rm -f tmp* dictionary
+if [ "$isWindows" = false ] ; then
+
+println "\n===> zstd fifo named pipe test "
+head -c 10 /dev/zero > tmp_original
+mkfifo named_pipe
+head -c 10 /dev/zero > named_pipe &
+$ZSTD named_pipe -o tmp_compressed
+$ZSTD -d -o tmp_decompressed tmp_compressed
+$DIFF -s tmp_original tmp_decompressed
+rm -rf tmp*
+rm -rf named_pipe
+
+fi
+
rm -f tmp*
diff --git a/tests/poolTests.c b/tests/poolTests.c
index 26d57fb5c867..02ec62af9f37 100644
--- a/tests/poolTests.c
+++ b/tests/poolTests.c
@@ -46,7 +46,7 @@ static int testOrder(size_t numThreads, size_t queueSize)
POOL_ctx* const ctx = POOL_create(numThreads, queueSize);
ASSERT_TRUE(ctx);
data.i = 0;
- (void)ZSTD_pthread_mutex_init(&data.mutex, NULL);
+ ASSERT_FALSE(ZSTD_pthread_mutex_init(&data.mutex, NULL));
{ size_t i;
for (i = 0; i < 16; ++i) {
POOL_add(ctx, &fn, &data);
diff --git a/tests/regression/method.c b/tests/regression/method.c
index 1e84021c3ef3..b74f548196fd 100644
--- a/tests/regression/method.c
+++ b/tests/regression/method.c
@@ -444,6 +444,8 @@ static int init_cstream(
ZSTD_parameters const params = config_get_zstd_params(config, 0, 0);
ZSTD_CDict* dict = NULL;
if (cdict) {
+ if (!config->use_dictionary)
+ return 1;
*cdict = ZSTD_createCDict_advanced(
state->dictionary.data,
state->dictionary.size,
@@ -459,14 +461,18 @@ static int init_cstream(
} else {
zret = ZSTD_initCStream_advanced(
zcs,
- state->dictionary.data,
- state->dictionary.size,
+ config->use_dictionary ? state->dictionary.data : NULL,
+ config->use_dictionary ? state->dictionary.size : 0,
params,
ZSTD_CONTENTSIZE_UNKNOWN);
}
} else {
int const level = config_get_level(config);
+ if (level == CONFIG_NO_LEVEL)
+ return 1;
if (cdict) {
+ if (!config->use_dictionary)
+ return 1;
*cdict = ZSTD_createCDict(
state->dictionary.data,
state->dictionary.size,
@@ -477,7 +483,10 @@ static int init_cstream(
zret = ZSTD_initCStream_usingCDict(zcs, *cdict);
} else if (config->use_dictionary) {
zret = ZSTD_initCStream_usingDict(
- zcs, state->dictionary.data, state->dictionary.size, level);
+ zcs,
+ state->dictionary.data,
+ state->dictionary.size,
+ level);
} else {
zret = ZSTD_initCStream(zcs, level);
}
@@ -506,9 +515,17 @@ static result_t old_streaming_compress_internal(
result = result_error(result_error_compression_error);
goto out;
}
+ if (!advanced && config_get_level(config) == CONFIG_NO_LEVEL) {
+ result = result_error(result_error_skip);
+ goto out;
+ }
+ if (cdict && !config->use_dictionary) {
+ result = result_error(result_error_skip);
+ goto out;
+ }
if (init_cstream(state, zcs, config, advanced, cdict ? &cd : NULL)) {
- result = result_error(result_error_compression_error);
- goto out;
+ result = result_error(result_error_compression_error);
+ goto out;
}
result_data_t data = {.total_size = 0};
@@ -629,21 +646,21 @@ method_t const old_streaming = {
method_t const old_streaming_advanced = {
.name = "old streaming advanced",
.create = buffer_state_create,
- .compress = old_streaming_compress,
+ .compress = old_streaming_compress_advanced,
.destroy = buffer_state_destroy,
};
method_t const old_streaming_cdict = {
.name = "old streaming cdcit",
.create = buffer_state_create,
- .compress = old_streaming_compress,
+ .compress = old_streaming_compress_cdict,
.destroy = buffer_state_destroy,
};
method_t const old_streaming_advanced_cdict = {
.name = "old streaming advanced cdict",
.create = buffer_state_create,
- .compress = old_streaming_compress,
+ .compress = old_streaming_compress_cdict_advanced,
.destroy = buffer_state_destroy,
};
diff --git a/tests/regression/results.csv b/tests/regression/results.csv
index e66787fc02f5..a0e1566a5d50 100644
--- a/tests/regression/results.csv
+++ b/tests/regression/results.csv
@@ -33,7 +33,7 @@ silesia, level 19, compress
silesia, long distance mode, compress cctx, 4849491
silesia, multithreaded, compress cctx, 4849491
silesia, multithreaded long distance mode, compress cctx, 4849491
-silesia, small window log, compress cctx, 7112784
+silesia, small window log, compress cctx, 7112472
silesia, small hash log, compress cctx, 6554898
silesia, small chain log, compress cctx, 4931093
silesia, explicit params, compress cctx, 4794609
@@ -97,7 +97,7 @@ silesia, level 19, zstdcli,
silesia, long distance mode, zstdcli, 4839698
silesia, multithreaded, zstdcli, 4849539
silesia, multithreaded long distance mode, zstdcli, 4839698
-silesia, small window log, zstdcli, 7123892
+silesia, small window log, zstdcli, 7123580
silesia, small hash log, zstdcli, 6554946
silesia, small chain log, zstdcli, 4931141
silesia, explicit params, zstdcli, 4797048
@@ -123,7 +123,7 @@ silesia.tar, no source size, zstdcli,
silesia.tar, long distance mode, zstdcli, 4853140
silesia.tar, multithreaded, zstdcli, 4861462
silesia.tar, multithreaded long distance mode, zstdcli, 4853140
-silesia.tar, small window log, zstdcli, 7127964
+silesia.tar, small window log, zstdcli, 7127589
silesia.tar, small hash log, zstdcli, 6587841
silesia.tar, small chain log, zstdcli, 4943269
silesia.tar, explicit params, zstdcli, 4822318
@@ -188,7 +188,7 @@ silesia, no source size, advanced
silesia, long distance mode, advanced one pass, 4839650
silesia, multithreaded, advanced one pass, 4849491
silesia, multithreaded long distance mode, advanced one pass, 4839650
-silesia, small window log, advanced one pass, 7123844
+silesia, small window log, advanced one pass, 7123532
silesia, small hash log, advanced one pass, 6554898
silesia, small chain log, advanced one pass, 4931093
silesia, explicit params, advanced one pass, 4797035
@@ -214,7 +214,7 @@ silesia.tar, no source size, advanced
silesia.tar, long distance mode, advanced one pass, 4848046
silesia.tar, multithreaded, advanced one pass, 4860726
silesia.tar, multithreaded long distance mode, advanced one pass, 4847343
-silesia.tar, small window log, advanced one pass, 7127924
+silesia.tar, small window log, advanced one pass, 7127549
silesia.tar, small hash log, advanced one pass, 6587833
silesia.tar, small chain log, advanced one pass, 4943266
silesia.tar, explicit params, advanced one pass, 4808543
@@ -280,7 +280,7 @@ silesia, no source size, advanced
silesia, long distance mode, advanced one pass small out, 4839650
silesia, multithreaded, advanced one pass small out, 4849491
silesia, multithreaded long distance mode, advanced one pass small out, 4839650
-silesia, small window log, advanced one pass small out, 7123844
+silesia, small window log, advanced one pass small out, 7123532
silesia, small hash log, advanced one pass small out, 6554898
silesia, small chain log, advanced one pass small out, 4931093
silesia, explicit params, advanced one pass small out, 4797035
@@ -306,7 +306,7 @@ silesia.tar, no source size, advanced
silesia.tar, long distance mode, advanced one pass small out, 4848046
silesia.tar, multithreaded, advanced one pass small out, 4860726
silesia.tar, multithreaded long distance mode, advanced one pass small out, 4847343
-silesia.tar, small window log, advanced one pass small out, 7127924
+silesia.tar, small window log, advanced one pass small out, 7127549
silesia.tar, small hash log, advanced one pass small out, 6587833
silesia.tar, small chain log, advanced one pass small out, 4943266
silesia.tar, explicit params, advanced one pass small out, 4808543
@@ -372,7 +372,7 @@ silesia, no source size, advanced
silesia, long distance mode, advanced streaming, 4839650
silesia, multithreaded, advanced streaming, 4849491
silesia, multithreaded long distance mode, advanced streaming, 4839650
-silesia, small window log, advanced streaming, 7123846
+silesia, small window log, advanced streaming, 7123534
silesia, small hash log, advanced streaming, 6554898
silesia, small chain log, advanced streaming, 4931093
silesia, explicit params, advanced streaming, 4797048
@@ -398,7 +398,7 @@ silesia.tar, no source size, advanced
silesia.tar, long distance mode, advanced streaming, 4848046
silesia.tar, multithreaded, advanced streaming, 4861458
silesia.tar, multithreaded long distance mode, advanced streaming, 4853136
-silesia.tar, small window log, advanced streaming, 7127924
+silesia.tar, small window log, advanced streaming, 7127549
silesia.tar, small hash log, advanced streaming, 6587834
silesia.tar, small chain log, advanced streaming, 4943271
silesia.tar, explicit params, advanced streaming, 4808570
@@ -461,17 +461,9 @@ silesia, level 13, old stre
silesia, level 16, old streaming, 4377389
silesia, level 19, old streaming, 4293262
silesia, no source size, old streaming, 4849455
-silesia, long distance mode, old streaming, 12000408
-silesia, multithreaded, old streaming, 12000408
-silesia, multithreaded long distance mode, old streaming, 12000408
-silesia, small window log, old streaming, 12000408
-silesia, small hash log, old streaming, 12000408
-silesia, small chain log, old streaming, 12000408
-silesia, explicit params, old streaming, 12000408
silesia, uncompressed literals, old streaming, 4849491
silesia, uncompressed literals optimal, old streaming, 4293262
silesia, huffman literals, old streaming, 6183385
-silesia, multithreaded with advanced params, old streaming, 12000408
silesia.tar, level -5, old streaming, 6982738
silesia.tar, level -3, old streaming, 6641264
silesia.tar, level -1, old streaming, 6190789
@@ -487,17 +479,9 @@ silesia.tar, level 13, old stre
silesia.tar, level 16, old streaming, 4381284
silesia.tar, level 19, old streaming, 4281511
silesia.tar, no source size, old streaming, 4861372
-silesia.tar, long distance mode, old streaming, 12022046
-silesia.tar, multithreaded, old streaming, 12022046
-silesia.tar, multithreaded long distance mode, old streaming, 12022046
-silesia.tar, small window log, old streaming, 12022046
-silesia.tar, small hash log, old streaming, 12022046
-silesia.tar, small chain log, old streaming, 12022046
-silesia.tar, explicit params, old streaming, 12022046
silesia.tar, uncompressed literals, old streaming, 4861376
silesia.tar, uncompressed literals optimal, old streaming, 4281511
silesia.tar, huffman literals, old streaming, 6190789
-silesia.tar, multithreaded with advanced params, old streaming, 12022046
github, level -5, old streaming, 205285
github, level -5 with dict, old streaming, 46718
github, level -3, old streaming, 190643
@@ -527,17 +511,9 @@ github, level 16 with dict, old stre
github, level 19, old streaming, 133717
github, level 19 with dict, old streaming, 37576
github, no source size, old streaming, 140631
-github, long distance mode, old streaming, 412933
-github, multithreaded, old streaming, 412933
-github, multithreaded long distance mode, old streaming, 412933
-github, small window log, old streaming, 412933
-github, small hash log, old streaming, 412933
-github, small chain log, old streaming, 412933
-github, explicit params, old streaming, 412933
github, uncompressed literals, old streaming, 136311
github, uncompressed literals optimal, old streaming, 133717
github, huffman literals, old streaming, 175568
-github, multithreaded with advanced params, old streaming, 412933
silesia, level -5, old streaming advanced, 6882466
silesia, level -3, old streaming advanced, 6568358
silesia, level -1, old streaming advanced, 6183385
@@ -553,17 +529,17 @@ silesia, level 13, old stre
silesia, level 16, old streaming advanced, 4377389
silesia, level 19, old streaming advanced, 4293262
silesia, no source size, old streaming advanced, 4849455
-silesia, long distance mode, old streaming advanced, 12000408
-silesia, multithreaded, old streaming advanced, 12000408
-silesia, multithreaded long distance mode, old streaming advanced, 12000408
-silesia, small window log, old streaming advanced, 12000408
-silesia, small hash log, old streaming advanced, 12000408
-silesia, small chain log, old streaming advanced, 12000408
-silesia, explicit params, old streaming advanced, 12000408
+silesia, long distance mode, old streaming advanced, 4849491
+silesia, multithreaded, old streaming advanced, 4849491
+silesia, multithreaded long distance mode, old streaming advanced, 4849491
+silesia, small window log, old streaming advanced, 7123534
+silesia, small hash log, old streaming advanced, 6554898
+silesia, small chain log, old streaming advanced, 4931093
+silesia, explicit params, old streaming advanced, 4797048
silesia, uncompressed literals, old streaming advanced, 4849491
silesia, uncompressed literals optimal, old streaming advanced, 4293262
silesia, huffman literals, old streaming advanced, 6183385
-silesia, multithreaded with advanced params, old streaming advanced, 12000408
+silesia, multithreaded with advanced params, old streaming advanced, 4849491
silesia.tar, level -5, old streaming advanced, 6982738
silesia.tar, level -3, old streaming advanced, 6641264
silesia.tar, level -1, old streaming advanced, 6190789
@@ -579,238 +555,82 @@ silesia.tar, level 13, old stre
silesia.tar, level 16, old streaming advanced, 4381284
silesia.tar, level 19, old streaming advanced, 4281511
silesia.tar, no source size, old streaming advanced, 4861372
-silesia.tar, long distance mode, old streaming advanced, 12022046
-silesia.tar, multithreaded, old streaming advanced, 12022046
-silesia.tar, multithreaded long distance mode, old streaming advanced, 12022046
-silesia.tar, small window log, old streaming advanced, 12022046
-silesia.tar, small hash log, old streaming advanced, 12022046
-silesia.tar, small chain log, old streaming advanced, 12022046
-silesia.tar, explicit params, old streaming advanced, 12022046
+silesia.tar, long distance mode, old streaming advanced, 4861376
+silesia.tar, multithreaded, old streaming advanced, 4861376
+silesia.tar, multithreaded long distance mode, old streaming advanced, 4861376
+silesia.tar, small window log, old streaming advanced, 7127552
+silesia.tar, small hash log, old streaming advanced, 6587834
+silesia.tar, small chain log, old streaming advanced, 4943271
+silesia.tar, explicit params, old streaming advanced, 4808570
silesia.tar, uncompressed literals, old streaming advanced, 4861376
silesia.tar, uncompressed literals optimal, old streaming advanced, 4281511
silesia.tar, huffman literals, old streaming advanced, 6190789
-silesia.tar, multithreaded with advanced params, old streaming advanced, 12022046
-github, level -5, old streaming advanced, 205285
-github, level -5 with dict, old streaming advanced, 46718
-github, level -3, old streaming advanced, 190643
-github, level -3 with dict, old streaming advanced, 45395
-github, level -1, old streaming advanced, 175568
-github, level -1 with dict, old streaming advanced, 43170
-github, level 0, old streaming advanced, 136311
-github, level 0 with dict, old streaming advanced, 41148
-github, level 1, old streaming advanced, 142450
-github, level 1 with dict, old streaming advanced, 41682
-github, level 3, old streaming advanced, 136311
-github, level 3 with dict, old streaming advanced, 41148
-github, level 4, old streaming advanced, 136144
-github, level 4 with dict, old streaming advanced, 41251
-github, level 5, old streaming advanced, 135106
-github, level 5 with dict, old streaming advanced, 38938
-github, level 6, old streaming advanced, 135108
-github, level 6 with dict, old streaming advanced, 38632
-github, level 7, old streaming advanced, 135108
-github, level 7 with dict, old streaming advanced, 38766
-github, level 9, old streaming advanced, 135108
-github, level 9 with dict, old streaming advanced, 39326
-github, level 13, old streaming advanced, 133717
-github, level 13 with dict, old streaming advanced, 39716
-github, level 16, old streaming advanced, 133717
-github, level 16 with dict, old streaming advanced, 37577
+silesia.tar, multithreaded with advanced params, old streaming advanced, 4861376
+github, level -5, old streaming advanced, 216734
+github, level -5 with dict, old streaming advanced, 49562
+github, level -3, old streaming advanced, 192160
+github, level -3 with dict, old streaming advanced, 44956
+github, level -1, old streaming advanced, 181108
+github, level -1 with dict, old streaming advanced, 42383
+github, level 0, old streaming advanced, 141090
+github, level 0 with dict, old streaming advanced, 41113
+github, level 1, old streaming advanced, 143682
+github, level 1 with dict, old streaming advanced, 42430
+github, level 3, old streaming advanced, 141090
+github, level 3 with dict, old streaming advanced, 41113
+github, level 4, old streaming advanced, 141090
+github, level 4 with dict, old streaming advanced, 41084
+github, level 5, old streaming advanced, 139391
+github, level 5 with dict, old streaming advanced, 39159
+github, level 6, old streaming advanced, 139394
+github, level 6 with dict, old streaming advanced, 38749
+github, level 7, old streaming advanced, 138675
+github, level 7 with dict, old streaming advanced, 38746
+github, level 9, old streaming advanced, 138675
+github, level 9 with dict, old streaming advanced, 38987
+github, level 13, old streaming advanced, 138675
+github, level 13 with dict, old streaming advanced, 39724
+github, level 16, old streaming advanced, 138675
+github, level 16 with dict, old streaming advanced, 40771
github, level 19, old streaming advanced, 133717
github, level 19 with dict, old streaming advanced, 37576
github, no source size, old streaming advanced, 140631
-github, long distance mode, old streaming advanced, 412933
-github, multithreaded, old streaming advanced, 412933
-github, multithreaded long distance mode, old streaming advanced, 412933
-github, small window log, old streaming advanced, 412933
-github, small hash log, old streaming advanced, 412933
-github, small chain log, old streaming advanced, 412933
-github, explicit params, old streaming advanced, 412933
-github, uncompressed literals, old streaming advanced, 136311
+github, long distance mode, old streaming advanced, 141090
+github, multithreaded, old streaming advanced, 141090
+github, multithreaded long distance mode, old streaming advanced, 141090
+github, small window log, old streaming advanced, 141090
+github, small hash log, old streaming advanced, 141578
+github, small chain log, old streaming advanced, 139258
+github, explicit params, old streaming advanced, 140930
+github, uncompressed literals, old streaming advanced, 141090
github, uncompressed literals optimal, old streaming advanced, 133717
-github, huffman literals, old streaming advanced, 175568
-github, multithreaded with advanced params, old streaming advanced, 412933
-silesia, level -5, old streaming cdcit, 6882466
-silesia, level -3, old streaming cdcit, 6568358
-silesia, level -1, old streaming cdcit, 6183385
-silesia, level 0, old streaming cdcit, 4849491
-silesia, level 1, old streaming cdcit, 5314109
-silesia, level 3, old streaming cdcit, 4849491
-silesia, level 4, old streaming cdcit, 4786913
-silesia, level 5, old streaming cdcit, 4710178
-silesia, level 6, old streaming cdcit, 4659996
-silesia, level 7, old streaming cdcit, 4596234
-silesia, level 9, old streaming cdcit, 4543862
-silesia, level 13, old streaming cdcit, 4482073
-silesia, level 16, old streaming cdcit, 4377389
-silesia, level 19, old streaming cdcit, 4293262
-silesia, no source size, old streaming cdcit, 4849455
-silesia, long distance mode, old streaming cdcit, 12000408
-silesia, multithreaded, old streaming cdcit, 12000408
-silesia, multithreaded long distance mode, old streaming cdcit, 12000408
-silesia, small window log, old streaming cdcit, 12000408
-silesia, small hash log, old streaming cdcit, 12000408
-silesia, small chain log, old streaming cdcit, 12000408
-silesia, explicit params, old streaming cdcit, 12000408
-silesia, uncompressed literals, old streaming cdcit, 4849491
-silesia, uncompressed literals optimal, old streaming cdcit, 4293262
-silesia, huffman literals, old streaming cdcit, 6183385
-silesia, multithreaded with advanced params, old streaming cdcit, 12000408
-silesia.tar, level -5, old streaming cdcit, 6982738
-silesia.tar, level -3, old streaming cdcit, 6641264
-silesia.tar, level -1, old streaming cdcit, 6190789
-silesia.tar, level 0, old streaming cdcit, 4861376
-silesia.tar, level 1, old streaming cdcit, 5336879
-silesia.tar, level 3, old streaming cdcit, 4861376
-silesia.tar, level 4, old streaming cdcit, 4799583
-silesia.tar, level 5, old streaming cdcit, 4722276
-silesia.tar, level 6, old streaming cdcit, 4672240
-silesia.tar, level 7, old streaming cdcit, 4606657
-silesia.tar, level 9, old streaming cdcit, 4554106
-silesia.tar, level 13, old streaming cdcit, 4491707
-silesia.tar, level 16, old streaming cdcit, 4381284
-silesia.tar, level 19, old streaming cdcit, 4281511
-silesia.tar, no source size, old streaming cdcit, 4861372
-silesia.tar, long distance mode, old streaming cdcit, 12022046
-silesia.tar, multithreaded, old streaming cdcit, 12022046
-silesia.tar, multithreaded long distance mode, old streaming cdcit, 12022046
-silesia.tar, small window log, old streaming cdcit, 12022046
-silesia.tar, small hash log, old streaming cdcit, 12022046
-silesia.tar, small chain log, old streaming cdcit, 12022046
-silesia.tar, explicit params, old streaming cdcit, 12022046
-silesia.tar, uncompressed literals, old streaming cdcit, 4861376
-silesia.tar, uncompressed literals optimal, old streaming cdcit, 4281511
-silesia.tar, huffman literals, old streaming cdcit, 6190789
-silesia.tar, multithreaded with advanced params, old streaming cdcit, 12022046
-github, level -5, old streaming cdcit, 205285
+github, huffman literals, old streaming advanced, 181108
+github, multithreaded with advanced params, old streaming advanced, 141090
github, level -5 with dict, old streaming cdcit, 46718
-github, level -3, old streaming cdcit, 190643
github, level -3 with dict, old streaming cdcit, 45395
-github, level -1, old streaming cdcit, 175568
github, level -1 with dict, old streaming cdcit, 43170
-github, level 0, old streaming cdcit, 136311
github, level 0 with dict, old streaming cdcit, 41148
-github, level 1, old streaming cdcit, 142450
github, level 1 with dict, old streaming cdcit, 41682
-github, level 3, old streaming cdcit, 136311
github, level 3 with dict, old streaming cdcit, 41148
-github, level 4, old streaming cdcit, 136144
github, level 4 with dict, old streaming cdcit, 41251
-github, level 5, old streaming cdcit, 135106
github, level 5 with dict, old streaming cdcit, 38938
-github, level 6, old streaming cdcit, 135108
github, level 6 with dict, old streaming cdcit, 38632
-github, level 7, old streaming cdcit, 135108
github, level 7 with dict, old streaming cdcit, 38766
-github, level 9, old streaming cdcit, 135108
github, level 9 with dict, old streaming cdcit, 39326
-github, level 13, old streaming cdcit, 133717
github, level 13 with dict, old streaming cdcit, 39716
-github, level 16, old streaming cdcit, 133717
github, level 16 with dict, old streaming cdcit, 37577
-github, level 19, old streaming cdcit, 133717
github, level 19 with dict, old streaming cdcit, 37576
-github, no source size, old streaming cdcit, 140631
-github, long distance mode, old streaming cdcit, 412933
-github, multithreaded, old streaming cdcit, 412933
-github, multithreaded long distance mode, old streaming cdcit, 412933
-github, small window log, old streaming cdcit, 412933
-github, small hash log, old streaming cdcit, 412933
-github, small chain log, old streaming cdcit, 412933
-github, explicit params, old streaming cdcit, 412933
-github, uncompressed literals, old streaming cdcit, 136311
-github, uncompressed literals optimal, old streaming cdcit, 133717
-github, huffman literals, old streaming cdcit, 175568
-github, multithreaded with advanced params, old streaming cdcit, 412933
-silesia, level -5, old streaming advanced cdict, 6882466
-silesia, level -3, old streaming advanced cdict, 6568358
-silesia, level -1, old streaming advanced cdict, 6183385
-silesia, level 0, old streaming advanced cdict, 4849491
-silesia, level 1, old streaming advanced cdict, 5314109
-silesia, level 3, old streaming advanced cdict, 4849491
-silesia, level 4, old streaming advanced cdict, 4786913
-silesia, level 5, old streaming advanced cdict, 4710178
-silesia, level 6, old streaming advanced cdict, 4659996
-silesia, level 7, old streaming advanced cdict, 4596234
-silesia, level 9, old streaming advanced cdict, 4543862
-silesia, level 13, old streaming advanced cdict, 4482073
-silesia, level 16, old streaming advanced cdict, 4377389
-silesia, level 19, old streaming advanced cdict, 4293262
-silesia, no source size, old streaming advanced cdict, 4849455
-silesia, long distance mode, old streaming advanced cdict, 12000408
-silesia, multithreaded, old streaming advanced cdict, 12000408
-silesia, multithreaded long distance mode, old streaming advanced cdict, 12000408
-silesia, small window log, old streaming advanced cdict, 12000408
-silesia, small hash log, old streaming advanced cdict, 12000408
-silesia, small chain log, old streaming advanced cdict, 12000408
-silesia, explicit params, old streaming advanced cdict, 12000408
-silesia, uncompressed literals, old streaming advanced cdict, 4849491
-silesia, uncompressed literals optimal, old streaming advanced cdict, 4293262
-silesia, huffman literals, old streaming advanced cdict, 6183385
-silesia, multithreaded with advanced params, old streaming advanced cdict, 12000408
-silesia.tar, level -5, old streaming advanced cdict, 6982738
-silesia.tar, level -3, old streaming advanced cdict, 6641264
-silesia.tar, level -1, old streaming advanced cdict, 6190789
-silesia.tar, level 0, old streaming advanced cdict, 4861376
-silesia.tar, level 1, old streaming advanced cdict, 5336879
-silesia.tar, level 3, old streaming advanced cdict, 4861376
-silesia.tar, level 4, old streaming advanced cdict, 4799583
-silesia.tar, level 5, old streaming advanced cdict, 4722276
-silesia.tar, level 6, old streaming advanced cdict, 4672240
-silesia.tar, level 7, old streaming advanced cdict, 4606657
-silesia.tar, level 9, old streaming advanced cdict, 4554106
-silesia.tar, level 13, old streaming advanced cdict, 4491707
-silesia.tar, level 16, old streaming advanced cdict, 4381284
-silesia.tar, level 19, old streaming advanced cdict, 4281511
-silesia.tar, no source size, old streaming advanced cdict, 4861372
-silesia.tar, long distance mode, old streaming advanced cdict, 12022046
-silesia.tar, multithreaded, old streaming advanced cdict, 12022046
-silesia.tar, multithreaded long distance mode, old streaming advanced cdict, 12022046
-silesia.tar, small window log, old streaming advanced cdict, 12022046
-silesia.tar, small hash log, old streaming advanced cdict, 12022046
-silesia.tar, small chain log, old streaming advanced cdict, 12022046
-silesia.tar, explicit params, old streaming advanced cdict, 12022046
-silesia.tar, uncompressed literals, old streaming advanced cdict, 4861376
-silesia.tar, uncompressed literals optimal, old streaming advanced cdict, 4281511
-silesia.tar, huffman literals, old streaming advanced cdict, 6190789
-silesia.tar, multithreaded with advanced params, old streaming advanced cdict, 12022046
-github, level -5, old streaming advanced cdict, 205285
-github, level -5 with dict, old streaming advanced cdict, 46718
-github, level -3, old streaming advanced cdict, 190643
-github, level -3 with dict, old streaming advanced cdict, 45395
-github, level -1, old streaming advanced cdict, 175568
-github, level -1 with dict, old streaming advanced cdict, 43170
-github, level 0, old streaming advanced cdict, 136311
-github, level 0 with dict, old streaming advanced cdict, 41148
-github, level 1, old streaming advanced cdict, 142450
-github, level 1 with dict, old streaming advanced cdict, 41682
-github, level 3, old streaming advanced cdict, 136311
-github, level 3 with dict, old streaming advanced cdict, 41148
-github, level 4, old streaming advanced cdict, 136144
-github, level 4 with dict, old streaming advanced cdict, 41251
-github, level 5, old streaming advanced cdict, 135106
-github, level 5 with dict, old streaming advanced cdict, 38938
-github, level 6, old streaming advanced cdict, 135108
-github, level 6 with dict, old streaming advanced cdict, 38632
-github, level 7, old streaming advanced cdict, 135108
-github, level 7 with dict, old streaming advanced cdict, 38766
-github, level 9, old streaming advanced cdict, 135108
-github, level 9 with dict, old streaming advanced cdict, 39326
-github, level 13, old streaming advanced cdict, 133717
-github, level 13 with dict, old streaming advanced cdict, 39716
-github, level 16, old streaming advanced cdict, 133717
-github, level 16 with dict, old streaming advanced cdict, 37577
-github, level 19, old streaming advanced cdict, 133717
+github, level -5 with dict, old streaming advanced cdict, 49562
+github, level -3 with dict, old streaming advanced cdict, 44956
+github, level -1 with dict, old streaming advanced cdict, 42383
+github, level 0 with dict, old streaming advanced cdict, 41113
+github, level 1 with dict, old streaming advanced cdict, 42430
+github, level 3 with dict, old streaming advanced cdict, 41113
+github, level 4 with dict, old streaming advanced cdict, 41084
+github, level 5 with dict, old streaming advanced cdict, 39158
+github, level 6 with dict, old streaming advanced cdict, 38748
+github, level 7 with dict, old streaming advanced cdict, 38744
+github, level 9 with dict, old streaming advanced cdict, 38986
+github, level 13 with dict, old streaming advanced cdict, 39724
+github, level 16 with dict, old streaming advanced cdict, 40771
github, level 19 with dict, old streaming advanced cdict, 37576
-github, no source size, old streaming advanced cdict, 140631
-github, long distance mode, old streaming advanced cdict, 412933
-github, multithreaded, old streaming advanced cdict, 412933
-github, multithreaded long distance mode, old streaming advanced cdict, 412933
-github, small window log, old streaming advanced cdict, 412933
-github, small hash log, old streaming advanced cdict, 412933
-github, small chain log, old streaming advanced cdict, 412933
-github, explicit params, old streaming advanced cdict, 412933
-github, uncompressed literals, old streaming advanced cdict, 136311
-github, uncompressed literals optimal, old streaming advanced cdict, 133717
-github, huffman literals, old streaming advanced cdict, 175568
-github, multithreaded with advanced params, old streaming advanced cdict, 412933
diff --git a/tests/zbufftest.c b/tests/zbufftest.c
index 8cbde3f4f38e..8a4a27907b0e 100644
--- a/tests/zbufftest.c
+++ b/tests/zbufftest.c
@@ -81,7 +81,7 @@ static U64 g_clockTime = 0;
@return : a 27 bits random value, from a 32-bits `seed`.
`seed` is also modified */
# define FUZ_rotl32(x,r) ((x << r) | (x >> (32 - r)))
-unsigned int FUZ_rand(unsigned int* seedPtr)
+static unsigned int FUZ_rand(unsigned int* seedPtr)
{
U32 rand32 = *seedPtr;
rand32 *= prime1;
@@ -184,7 +184,7 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo
DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++);
{ size_t i;
for (i=0; i<CNBufferSize; i++) {
- if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error;;
+ if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error;
} }
DISPLAYLEVEL(4, "OK \n");
@@ -213,7 +213,7 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo
DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++);
{ size_t i;
for (i=0; i<CNBufferSize; i++) {
- if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error;;
+ if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error;
} }
DISPLAYLEVEL(4, "OK \n");
@@ -467,7 +467,7 @@ _output_error:
/*-*******************************************************
* Command line
*********************************************************/
-int FUZ_usage(const char* programName)
+static int FUZ_usage(const char* programName)
{
DISPLAY( "Usage :\n");
DISPLAY( " %s [args]\n", programName);
diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c
index 97d4e33e150c..2047d4bd85cc 100644
--- a/tests/zstreamtest.c
+++ b/tests/zstreamtest.c
@@ -481,7 +481,7 @@ static int basicUnitTests(U32 seed, double compressibility)
DISPLAYLEVEL(3, "test%3i : check decompressed result : ", testNb++);
{ size_t i;
for (i=0; i<CNBufferSize; i++) {
- if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error;;
+ if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error;
} }
DISPLAYLEVEL(3, "OK \n");
@@ -1151,6 +1151,16 @@ static int basicUnitTests(U32 seed, double compressibility)
}
DISPLAYLEVEL(3, "OK \n");
+ DISPLAYLEVEL(3, "test%3i : ZSTD_c_srcSizeHint bounds : ", testNb++);
+ ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters);
+ CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_srcSizeHint, INT_MAX));
+ { int srcSizeHint;
+ CHECK_Z(ZSTD_CCtx_getParameter(zc, ZSTD_c_srcSizeHint, &srcSizeHint));
+ CHECK(!(srcSizeHint == INT_MAX), "srcSizeHint doesn't match");
+ }
+ CHECK(!ZSTD_isError(ZSTD_CCtx_setParameter(zc, ZSTD_c_srcSizeHint, -1)), "Out of range doesn't error");
+ DISPLAYLEVEL(3, "OK \n");
+
/* Overlen overwriting window data bug */
DISPLAYLEVEL(3, "test%3i : wildcopy doesn't overwrite potential match data : ", testNb++);
{ /* This test has a window size of 1024 bytes and consists of 3 blocks:
@@ -2106,6 +2116,7 @@ static int fuzzerTests_newAPI(U32 seed, int nbTests, int startTest,
if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_ldmMinMatch, FUZ_randomClampedLength(&lseed, ZSTD_LDM_MINMATCH_MIN, ZSTD_LDM_MINMATCH_MAX), opaqueAPI) );
if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_ldmBucketSizeLog, FUZ_randomClampedLength(&lseed, ZSTD_LDM_BUCKETSIZELOG_MIN, ZSTD_LDM_BUCKETSIZELOG_MAX), opaqueAPI) );
if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_ldmHashRateLog, FUZ_randomClampedLength(&lseed, ZSTD_LDM_HASHRATELOG_MIN, ZSTD_LDM_HASHRATELOG_MAX), opaqueAPI) );
+ if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_srcSizeHint, FUZ_randomClampedLength(&lseed, ZSTD_SRCSIZEHINT_MIN, ZSTD_SRCSIZEHINT_MAX), opaqueAPI) );
}
/* mess with frame parameters */
diff --git a/zlibWrapper/.gitignore b/zlibWrapper/.gitignore
deleted file mode 100644
index b037ae6f2934..000000000000
--- a/zlibWrapper/.gitignore
+++ /dev/null
@@ -1,28 +0,0 @@
-# object artifacts
-*.o
-
-# Default result files
-_*
-example
-example_zstd.*
-example_gz.*
-fitblk
-fitblk_zstd.*
-zwrapbench
-foo.gz
-
-minigzip
-minigzip_zstd
-example
-example_zstd
-fitblk
-fitblk_zstd
-zwrapbench
-
-# Misc files
-*.bat
-*.zip
-*.txt
-
-# Directories
-minizip/
diff --git a/zlibWrapper/Makefile b/zlibWrapper/Makefile
index d4fc33b51e7b..6addb743cbe6 100644
--- a/zlibWrapper/Makefile
+++ b/zlibWrapper/Makefile
@@ -18,13 +18,15 @@ EXAMPLE_PATH = examples
PROGRAMS_PATH = ../programs
TEST_FILE = ../doc/zstd_compression_format.md
-CPPFLAGS = -DXXH_NAMESPACE=ZSTD_ -I$(ZLIB_PATH) -I$(PROGRAMS_PATH) \
- -I$(ZSTDLIBDIR) -I$(ZSTDLIBDIR)/common -I$(ZLIBWRAPPER_PATH)
-CFLAGS ?= $(MOREFLAGS) -O3 -std=gnu99
-CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wswitch-enum \
- -Wdeclaration-after-statement -Wstrict-prototypes -Wundef \
- -Wstrict-aliasing=1
-
+CPPFLAGS += -DXXH_NAMESPACE=ZSTD_ -I$(ZLIB_PATH) -I$(PROGRAMS_PATH) \
+ -I$(ZSTDLIBDIR) -I$(ZSTDLIBDIR)/common -I$(ZLIBWRAPPER_PATH)
+STDFLAGS = -std=c90 -pedantic -Wno-long-long -Wno-variadic-macros -Wc++-compat \
+ -DNO_snprintf -DNO_vsnprintf # strict ISO C90 is missing these prototypes
+DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wswitch-enum \
+ -Wdeclaration-after-statement -Wstrict-prototypes -Wundef \
+ -Wstrict-aliasing=1
+CFLAGS ?= -O3
+CFLAGS += $(STDFLAGS) $(DEBUGFLAGS) $(MOREFLAGS)
# Define *.exe as extension for Windows systems
ifneq (,$(filter Windows%,$(OS)))
@@ -33,6 +35,11 @@ else
EXT =
endif
+default : release
+
+release : STDFLAGS =
+release : DEBUGFLAGS =
+release : all
all: fitblk example zwrapbench minigzip
@@ -68,35 +75,35 @@ valgrindTest: clean example fitblk example_zstd fitblk_zstd zwrapbench
$(VALGRIND) ./zwrapbench -rqi1b1e5 ../lib ../programs ../tests
#.c.o:
-# $(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $<
+# $(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@
-minigzip: $(EXAMPLE_PATH)/minigzip.o $(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.o $(GZFILES) $(ZSTDLIBRARY)
+minigzip: $(EXAMPLE_PATH)/minigzip.o zstd_zlibwrapper.o $(GZFILES) $(ZSTDLIBRARY)
$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $^ $(ZSTDLIBRARY) $(ZLIB_LIBRARY) -o $@
-minigzip_zstd: $(EXAMPLE_PATH)/minigzip.o $(ZLIBWRAPPER_PATH)/zstdTurnedOn_zlibwrapper.o $(GZFILES) $(ZSTDLIBRARY)
+minigzip_zstd: $(EXAMPLE_PATH)/minigzip.o zstdTurnedOn_zlibwrapper.o $(GZFILES) $(ZSTDLIBRARY)
$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $^ $(ZSTDLIBRARY) $(ZLIB_LIBRARY) -o $@
-example: $(EXAMPLE_PATH)/example.o $(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.o $(GZFILES) $(ZSTDLIBRARY)
+example: $(EXAMPLE_PATH)/example.o zstd_zlibwrapper.o $(GZFILES) $(ZSTDLIBRARY)
$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $^ $(ZLIB_LIBRARY) -o $@
-example_zstd: $(EXAMPLE_PATH)/example.o $(ZLIBWRAPPER_PATH)/zstdTurnedOn_zlibwrapper.o $(GZFILES) $(ZSTDLIBRARY)
+example_zstd: $(EXAMPLE_PATH)/example.o zstdTurnedOn_zlibwrapper.o $(GZFILES) $(ZSTDLIBRARY)
$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $^ $(ZLIB_LIBRARY) -o $@
-fitblk: $(EXAMPLE_PATH)/fitblk.o $(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.o $(ZSTDLIBRARY)
+fitblk: $(EXAMPLE_PATH)/fitblk.o zstd_zlibwrapper.o $(ZSTDLIBRARY)
$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $^ $(ZLIB_LIBRARY) -o $@
-fitblk_zstd: $(EXAMPLE_PATH)/fitblk.o $(ZLIBWRAPPER_PATH)/zstdTurnedOn_zlibwrapper.o $(ZSTDLIBRARY)
+fitblk_zstd: $(EXAMPLE_PATH)/fitblk.o zstdTurnedOn_zlibwrapper.o $(ZSTDLIBRARY)
$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $^ $(ZLIB_LIBRARY) -o $@
-zwrapbench: $(EXAMPLE_PATH)/zwrapbench.o $(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.o $(PROGRAMS_PATH)/util.o $(PROGRAMS_PATH)/timefn.o $(PROGRAMS_PATH)/datagen.o $(ZSTDLIBRARY)
+zwrapbench: $(EXAMPLE_PATH)/zwrapbench.o zstd_zlibwrapper.o $(PROGRAMS_PATH)/util.o $(PROGRAMS_PATH)/timefn.o $(PROGRAMS_PATH)/datagen.o $(ZSTDLIBRARY)
$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $^ $(ZLIB_LIBRARY) -o $@
-$(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.o: $(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.c $(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.h
- $(CC) $(CFLAGS) $(CPPFLAGS) -I. -c -o $@ $(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.c
+zstd_zlibwrapper.o: $(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.c $(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.h
-$(ZLIBWRAPPER_PATH)/zstdTurnedOn_zlibwrapper.o: $(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.c $(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.h
- $(CC) $(CFLAGS) $(CPPFLAGS) -DZWRAP_USE_ZSTD=1 -I. -c -o $@ $(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.c
+zstdTurnedOn_zlibwrapper.o: CPPFLAGS += -DZWRAP_USE_ZSTD=1
+zstdTurnedOn_zlibwrapper.o: $(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.c $(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.h
+ $(CC) $(CPPFLAGS) $(CFLAGS) $< -c -o $@
$(ZSTDLIBDIR)/libzstd.a:
$(MAKE) -C $(ZSTDLIBDIR) libzstd.a
diff --git a/zlibWrapper/examples/fitblk.c b/zlibWrapper/examples/fitblk.c
index 6418ca38763c..669b176eb8cb 100644
--- a/zlibWrapper/examples/fitblk.c
+++ b/zlibWrapper/examples/fitblk.c
@@ -159,7 +159,7 @@ int main(int argc, char **argv)
if (ZWRAP_isUsingZSTDcompression()) printf("zstd version %s\n", zstdVersion());
/* allocate memory for buffers and compression engine */
- blk = malloc(size + EXCESS);
+ blk = (unsigned char*)malloc(size + EXCESS);
def.zalloc = Z_NULL;
def.zfree = Z_NULL;
def.opaque = Z_NULL;
@@ -180,8 +180,8 @@ int main(int argc, char **argv)
if (ret == Z_STREAM_END && def.avail_out >= EXCESS) {
/* write block to stdout */
have = size + EXCESS - def.avail_out;
- // if (fwrite(blk, 1, have, stdout) != have || ferror(stdout))
- // quit("error writing output");
+ /* if (fwrite(blk, 1, have, stdout) != have || ferror(stdout))
+ * quit("error writing output"); */
/* clean up and print results to stderr */
ret = deflateEnd(&def);
@@ -200,7 +200,7 @@ int main(int argc, char **argv)
inf.avail_in = 0;
inf.next_in = Z_NULL;
ret = inflateInit(&inf);
- tmp = malloc(size + EXCESS);
+ tmp = (unsigned char*)malloc(size + EXCESS);
if (ret != Z_OK || tmp == NULL)
quit("out of memory");
ret = deflateReset(&def);
@@ -237,8 +237,8 @@ int main(int argc, char **argv)
/* done -- write block to stdout */
have = size - def.avail_out;
-// if (fwrite(blk, 1, have, stdout) != have || ferror(stdout))
-// quit("error writing output");
+ /* if (fwrite(blk, 1, have, stdout) != have || ferror(stdout))
+ * quit("error writing output"); */
/* clean up and print results to stderr */
free(tmp);
diff --git a/zlibWrapper/examples/zwrapbench.c b/zlibWrapper/examples/zwrapbench.c
index 61031b9de798..35893a4b4384 100644
--- a/zlibWrapper/examples/zwrapbench.c
+++ b/zlibWrapper/examples/zwrapbench.c
@@ -311,14 +311,14 @@ static int BMK_benchMem(z_const void* srcBuffer, size_t srcSize,
ret = deflateReset(&def);
if (ret != Z_OK) EXM_THROW(1, "deflateReset failure");
if (useSetDict) {
- ret = deflateSetDictionary(&def, dictBuffer, dictBufferSize);
+ ret = deflateSetDictionary(&def, (const z_Bytef*)dictBuffer, dictBufferSize);
if (ret != Z_OK) EXM_THROW(1, "deflateSetDictionary failure");
if (ZWRAP_isUsingZSTDcompression()) useSetDict = 0; /* zstd doesn't require deflateSetDictionary after ZWRAP_deflateReset_keepDict */
}
- def.next_in = (z_const void*) blockTable[blockNb].srcPtr;
+ def.next_in = (z_const z_Bytef*) blockTable[blockNb].srcPtr;
def.avail_in = (uInt)blockTable[blockNb].srcSize;
def.total_in = 0;
- def.next_out = (void*) blockTable[blockNb].cPtr;
+ def.next_out = (z_Bytef*) blockTable[blockNb].cPtr;
def.avail_out = (uInt)blockTable[blockNb].cRoom;
def.total_out = 0;
ret = deflate(&def, Z_FINISH);
@@ -343,13 +343,13 @@ static int BMK_benchMem(z_const void* srcBuffer, size_t srcSize,
ret = deflateInit(&def, cLevel);
if (ret != Z_OK) EXM_THROW(1, "deflateInit failure");
if (dictBuffer) {
- ret = deflateSetDictionary(&def, dictBuffer, dictBufferSize);
+ ret = deflateSetDictionary(&def, (const z_Bytef*)dictBuffer, dictBufferSize);
if (ret != Z_OK) EXM_THROW(1, "deflateSetDictionary failure");
}
- def.next_in = (z_const void*) blockTable[blockNb].srcPtr;
+ def.next_in = (z_const z_Bytef*) blockTable[blockNb].srcPtr;
def.avail_in = (uInt)blockTable[blockNb].srcSize;
def.total_in = 0;
- def.next_out = (void*) blockTable[blockNb].cPtr;
+ def.next_out = (z_Bytef*) blockTable[blockNb].cPtr;
def.avail_out = (uInt)blockTable[blockNb].cRoom;
def.total_out = 0;
ret = deflate(&def, Z_FINISH);
@@ -451,15 +451,15 @@ static int BMK_benchMem(z_const void* srcBuffer, size_t srcSize,
else
ret = inflateReset(&inf);
if (ret != Z_OK) EXM_THROW(1, "inflateReset failure");
- inf.next_in = (z_const void*) blockTable[blockNb].cPtr;
+ inf.next_in = (z_const z_Bytef*) blockTable[blockNb].cPtr;
inf.avail_in = (uInt)blockTable[blockNb].cSize;
inf.total_in = 0;
- inf.next_out = (void*) blockTable[blockNb].resPtr;
+ inf.next_out = (z_Bytef*) blockTable[blockNb].resPtr;
inf.avail_out = (uInt)blockTable[blockNb].srcSize;
inf.total_out = 0;
ret = inflate(&inf, Z_FINISH);
if (ret == Z_NEED_DICT) {
- ret = inflateSetDictionary(&inf, dictBuffer, dictBufferSize);
+ ret = inflateSetDictionary(&inf, (const z_Bytef*)dictBuffer, dictBufferSize);
if (ret != Z_OK) EXM_THROW(1, "inflateSetDictionary failure");
ret = inflate(&inf, Z_FINISH);
}
@@ -483,15 +483,15 @@ static int BMK_benchMem(z_const void* srcBuffer, size_t srcSize,
inf.opaque = Z_NULL;
ret = inflateInit(&inf);
if (ret != Z_OK) EXM_THROW(1, "inflateInit failure");
- inf.next_in = (z_const void*) blockTable[blockNb].cPtr;
+ inf.next_in = (z_const z_Bytef*) blockTable[blockNb].cPtr;
inf.avail_in = (uInt)blockTable[blockNb].cSize;
inf.total_in = 0;
- inf.next_out = (void*) blockTable[blockNb].resPtr;
+ inf.next_out = (z_Bytef*) blockTable[blockNb].resPtr;
inf.avail_out = (uInt)blockTable[blockNb].srcSize;
inf.total_out = 0;
ret = inflate(&inf, Z_FINISH);
if (ret == Z_NEED_DICT) {
- ret = inflateSetDictionary(&inf, dictBuffer, dictBufferSize);
+ ret = inflateSetDictionary(&inf, (const z_Bytef*) dictBuffer, dictBufferSize);
if (ret != Z_OK) EXM_THROW(1, "inflateSetDictionary failure");
ret = inflate(&inf, Z_FINISH);
}
diff --git a/zlibWrapper/gzclose.c b/zlibWrapper/gzclose.c
index d4493d010d74..25d3789b128a 100644
--- a/zlibWrapper/gzclose.c
+++ b/zlibWrapper/gzclose.c
@@ -19,7 +19,7 @@ int ZEXPORT gzclose(file)
if (file == NULL)
return Z_STREAM_ERROR;
- state = (gz_statep)file;
+ state.file = file;
return state.state->mode == GZ_READ ? gzclose_r(file) : gzclose_w(file);
#else
diff --git a/zlibWrapper/gzlib.c b/zlibWrapper/gzlib.c
index 3070dd8b4975..b1fb98517e4d 100644
--- a/zlibWrapper/gzlib.c
+++ b/zlibWrapper/gzlib.c
@@ -216,7 +216,7 @@ local gzFile gz_open(path, fd, mode)
#if !defined(NO_snprintf) && !defined(NO_vsnprintf)
(void)snprintf(state.state->path, len + 1, "%s", (const char *)path);
#else
- strcpy(state.state->path, path);
+ strcpy(state.state->path, (const char*)path);
#endif
/* compute the flags for open() */
@@ -325,7 +325,7 @@ int ZEXPORT gzbuffer(file, size)
/* get internal structure and check integrity */
if (file == NULL)
return -1;
- state = (gz_statep)file;
+ state.file = file;
if (state.state->mode != GZ_READ && state.state->mode != GZ_WRITE)
return -1;
@@ -351,7 +351,7 @@ int ZEXPORT gzrewind(file)
/* get internal structure */
if (file == NULL)
return -1;
- state = (gz_statep)file;
+ state.file = file;
/* check that we're reading and that there's no error */
if (state.state->mode != GZ_READ ||
@@ -378,7 +378,7 @@ z_off64_t ZEXPORT gzseek64(file, offset, whence)
/* get internal structure and check integrity */
if (file == NULL)
return -1;
- state = (gz_statep)file;
+ state.file = file;
if (state.state->mode != GZ_READ && state.state->mode != GZ_WRITE)
return -1;
@@ -463,7 +463,7 @@ z_off64_t ZEXPORT gztell64(file)
/* get internal structure and check integrity */
if (file == NULL)
return -1;
- state = (gz_statep)file;
+ state.file = file;
if (state.state->mode != GZ_READ && state.state->mode != GZ_WRITE)
return -1;
@@ -491,7 +491,7 @@ z_off64_t ZEXPORT gzoffset64(file)
/* get internal structure and check integrity */
if (file == NULL)
return -1;
- state = (gz_statep)file;
+ state.file = file;
if (state.state->mode != GZ_READ && state.state->mode != GZ_WRITE)
return -1;
@@ -523,7 +523,7 @@ int ZEXPORT gzeof(file)
/* get internal structure and check integrity */
if (file == NULL)
return 0;
- state = (gz_statep)file;
+ state.file = file;
if (state.state->mode != GZ_READ && state.state->mode != GZ_WRITE)
return 0;
@@ -541,7 +541,7 @@ const char * ZEXPORT gzerror(file, errnum)
/* get internal structure and check integrity */
if (file == NULL)
return NULL;
- state = (gz_statep)file;
+ state.file = file;
if (state.state->mode != GZ_READ && state.state->mode != GZ_WRITE)
return NULL;
@@ -561,7 +561,7 @@ void ZEXPORT gzclearerr(file)
/* get internal structure and check integrity */
if (file == NULL)
return;
- state = (gz_statep)file;
+ state.file = file;
if (state.state->mode != GZ_READ && state.state->mode != GZ_WRITE)
return;
diff --git a/zlibWrapper/gzread.c b/zlibWrapper/gzread.c
index 88fc06c77f42..359d17889130 100644
--- a/zlibWrapper/gzread.c
+++ b/zlibWrapper/gzread.c
@@ -8,6 +8,14 @@
#include "gzguts.h"
+/* fix for Visual Studio, which doesn't support ssize_t type.
+ * see https://github.com/facebook/zstd/issues/1800#issuecomment-545945050 */
+#if defined(_MSC_VER) && !defined(ssize_t)
+# include <BaseTsd.h>
+ typedef SSIZE_T ssize_t;
+#endif
+
+
/* Local functions */
local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *));
local int gz_avail OF((gz_statep));
@@ -386,7 +394,7 @@ int ZEXPORT gzread(file, buf, len)
/* get internal structure */
if (file == NULL)
return -1;
- state = (gz_statep)file;
+ state.file = file;
/* check that we're reading and that there's no (serious) error */
if (state.state->mode != GZ_READ ||
@@ -424,7 +432,7 @@ z_size_t ZEXPORT gzfread(buf, size, nitems, file)
/* get internal structure */
if (file == NULL)
return 0;
- state = (gz_statep)file;
+ state.file = file;
/* check that we're reading and that there's no (serious) error */
if (state.state->mode != GZ_READ ||
@@ -470,7 +478,7 @@ int ZEXPORT gzgetc(file)
/* get internal structure */
if (file == NULL)
return -1;
- state = (gz_statep)file;
+ state.file = file;
/* check that we're reading and that there's no (serious) error */
if (state.state->mode != GZ_READ ||
@@ -485,7 +493,7 @@ int ZEXPORT gzgetc(file)
}
/* nothing there -- try gz_read() */
- ret = (unsigned)gz_read(state, buf, 1);
+ ret = (int)gz_read(state, buf, 1);
return ret < 1 ? -1 : buf[0];
}
@@ -505,7 +513,7 @@ int ZEXPORT gzungetc(c, file)
/* get internal structure */
if (file == NULL)
return -1;
- state = (gz_statep)file;
+ state.file = file;
/* check that we're reading and that there's no (serious) error */
if (state.state->mode != GZ_READ ||
@@ -569,7 +577,7 @@ char * ZEXPORT gzgets(file, buf, len)
/* check parameters and get internal structure */
if (file == NULL || buf == NULL || len < 1)
return NULL;
- state = (gz_statep)file;
+ state.file = file;
/* check that we're reading and that there's no (serious) error */
if (state.state->mode != GZ_READ ||
@@ -628,7 +636,7 @@ int ZEXPORT gzdirect(file)
/* get internal structure */
if (file == NULL)
return 0;
- state = (gz_statep)file;
+ state.file = file;
/* if the state is not known, but we can find out, then do so (this is
mainly for right after a gzopen() or gzdopen()) */
@@ -649,7 +657,7 @@ int ZEXPORT gzclose_r(file)
/* get internal structure */
if (file == NULL)
return Z_STREAM_ERROR;
- state = (gz_statep)file;
+ state.file = file;
/* check that we're reading */
if (state.state->mode != GZ_READ)
diff --git a/zlibWrapper/gzwrite.c b/zlibWrapper/gzwrite.c
index 21d5f84727aa..422ff17db978 100644
--- a/zlibWrapper/gzwrite.c
+++ b/zlibWrapper/gzwrite.c
@@ -258,7 +258,7 @@ int ZEXPORT gzwrite(file, buf, len)
/* get internal structure */
if (file == NULL)
return 0;
- state = (gz_statep)file;
+ state.file = file;
/* check that we're writing and that there's no error */
if (state.state->mode != GZ_WRITE || state.state->err != Z_OK)
@@ -289,7 +289,7 @@ z_size_t ZEXPORT gzfwrite(buf, size, nitems, file)
assert(size != 0);
if (file == NULL)
return 0;
- state = (gz_statep)file;
+ state.file = file;
/* check that we're writing and that there's no error */
if (state.state->mode != GZ_WRITE || state.state->err != Z_OK)
@@ -319,7 +319,7 @@ int ZEXPORT gzputc(file, c)
/* get internal structure */
if (file == NULL)
return -1;
- state = (gz_statep)file;
+ state.file = file;
strm = &(state.state->strm);
/* check that we're writing and that there's no error */
@@ -366,7 +366,7 @@ int ZEXPORT gzputs(file, str)
/* get internal structure */
if (file == NULL)
return -1;
- state = (gz_statep)file;
+ state.file = file;
/* check that we're writing and that there's no error */
if (state.state->mode != GZ_WRITE || state.state->err != Z_OK)
@@ -393,7 +393,7 @@ int ZEXPORTVA gzvprintf(gzFile file, const char *format, va_list va)
/* get internal structure */
if (file == NULL)
return Z_STREAM_ERROR;
- state = (gz_statep)file;
+ state.file = file;
strm = &(state.state->strm);
/* check that we're writing and that there's no error */
@@ -565,7 +565,7 @@ int ZEXPORT gzflush(file, flush)
/* get internal structure */
if (file == NULL)
return Z_STREAM_ERROR;
- state = (gz_statep)file;
+ state.file = file;
/* check that we're writing and that there's no error */
if (state.state->mode != GZ_WRITE || state.state->err != Z_OK)
@@ -599,7 +599,7 @@ int ZEXPORT gzsetparams(file, level, strategy)
/* get internal structure */
if (file == NULL)
return Z_STREAM_ERROR;
- state = (gz_statep)file;
+ state.file = file;
strm = &(state.state->strm);
/* check that we're writing and that there's no error */
@@ -639,7 +639,7 @@ int ZEXPORT gzclose_w(file)
/* get internal structure */
if (file == NULL)
return Z_STREAM_ERROR;
- state = (gz_statep)file;
+ state.file = file;
/* check that we're writing */
if (state.state->mode != GZ_WRITE)
diff --git a/zlibWrapper/zstd_zlibwrapper.c b/zlibWrapper/zstd_zlibwrapper.c
index 0ee5a310873b..3fa442106f14 100644
--- a/zlibWrapper/zstd_zlibwrapper.c
+++ b/zlibWrapper/zstd_zlibwrapper.c
@@ -31,7 +31,7 @@
/* === Constants === */
#define Z_INFLATE_SYNC 8
#define ZLIB_HEADERSIZE 4
-#define ZSTD_HEADERSIZE ZSTD_FRAMEHEADERSIZE_MIN
+#define ZSTD_HEADERSIZE ZSTD_FRAMEHEADERSIZE_MIN(ZSTD_f_zstd1)
#define ZWRAP_DEFAULT_CLEVEL 3 /* Z_DEFAULT_COMPRESSION is translated to ZWRAP_DEFAULT_CLEVEL for zstd */
@@ -54,7 +54,7 @@ int ZWRAP_isUsingZSTDcompression(void) { return g_ZWRAP_useZSTDcompression; }
static ZWRAP_decompress_type g_ZWRAPdecompressionType = ZWRAP_AUTO;
-void ZWRAP_setDecompressionType(ZWRAP_decompress_type type) { g_ZWRAPdecompressionType = type; };
+void ZWRAP_setDecompressionType(ZWRAP_decompress_type type) { g_ZWRAPdecompressionType = type; }
ZWRAP_decompress_type ZWRAP_getDecompressionType(void) { return g_ZWRAPdecompressionType; }
@@ -99,7 +99,7 @@ typedef struct {
unsigned long long pledgedSrcSize;
} ZWRAP_CCtx;
-typedef ZWRAP_CCtx internal_state;
+/* typedef ZWRAP_CCtx internal_state; */
@@ -121,8 +121,10 @@ static ZWRAP_CCtx* ZWRAP_createCCtx(z_streamp strm)
if (zwc==NULL) return NULL;
memset(zwc, 0, sizeof(ZWRAP_CCtx));
memcpy(&zwc->allocFunc, strm, sizeof(z_stream));
- { ZSTD_customMem const ZWRAP_customMem = { ZWRAP_allocFunction, ZWRAP_freeFunction, &zwc->allocFunc };
- zwc->customMem = ZWRAP_customMem; }
+ { ZSTD_customMem ZWRAP_customMem = { ZWRAP_allocFunction, ZWRAP_freeFunction, NULL };
+ ZWRAP_customMem.opaque = &zwc->allocFunc;
+ zwc->customMem = ZWRAP_customMem;
+ }
} else {
zwc = (ZWRAP_CCtx*)calloc(1, sizeof(*zwc));
if (zwc==NULL) return NULL;
@@ -455,15 +457,17 @@ static void ZWRAP_initDCtx(ZWRAP_DCtx* zwd)
static ZWRAP_DCtx* ZWRAP_createDCtx(z_streamp strm)
{
ZWRAP_DCtx* zwd;
- MEM_STATIC_ASSERT(sizeof(zwd->headerBuf) >= ZSTD_FRAMEHEADERSIZE_MIN); /* check static buffer size condition */
+ MEM_STATIC_ASSERT(sizeof(zwd->headerBuf) >= ZSTD_HEADERSIZE); /* check static buffer size condition */
if (strm->zalloc && strm->zfree) {
zwd = (ZWRAP_DCtx*)strm->zalloc(strm->opaque, 1, sizeof(ZWRAP_DCtx));
if (zwd==NULL) return NULL;
memset(zwd, 0, sizeof(ZWRAP_DCtx));
zwd->allocFunc = *strm; /* just to copy zalloc, zfree & opaque */
- { ZSTD_customMem const ZWRAP_customMem = { ZWRAP_allocFunction, ZWRAP_freeFunction, &zwd->allocFunc };
- zwd->customMem = ZWRAP_customMem; }
+ { ZSTD_customMem ZWRAP_customMem = { ZWRAP_allocFunction, ZWRAP_freeFunction, NULL };
+ ZWRAP_customMem.opaque = &zwd->allocFunc;
+ zwd->customMem = ZWRAP_customMem;
+ }
} else {
zwd = (ZWRAP_DCtx*)calloc(1, sizeof(*zwd));
if (zwd==NULL) return NULL;
@@ -509,7 +513,7 @@ static int ZWRAPD_finishWithErrorMsg(z_streamp strm, char* message)
ZEXTERN int ZEXPORT z_inflateInit_ OF((z_streamp strm,
- const char *version, int stream_size))
+ const char* version, int stream_size))
{
if (g_ZWRAPdecompressionType == ZWRAP_FORCE_ZLIB) {
strm->reserved = ZWRAP_ZLIB_STREAM;
@@ -520,7 +524,7 @@ ZEXTERN int ZEXPORT z_inflateInit_ OF((z_streamp strm,
LOG_WRAPPERD("- inflateInit\n");
if (zwd == NULL) return ZWRAPD_finishWithError(zwd, strm, 0);
- zwd->version = ZSTD_malloc(strlen(version)+1, zwd->customMem);
+ zwd->version = (char*)ZSTD_malloc(strlen(version)+1, zwd->customMem);
if (zwd->version == NULL) return ZWRAPD_finishWithError(zwd, strm, 0);
strcpy(zwd->version, version);
@@ -1003,7 +1007,7 @@ ZEXTERN int ZEXPORT z_inflateBackEnd OF((z_streamp strm))
}
-ZEXTERN uLong ZEXPORT z_zlibCompileFlags OF((void)) { return zlibCompileFlags(); };
+ZEXTERN uLong ZEXPORT z_zlibCompileFlags OF((void)) { return zlibCompileFlags(); }