15 files changed, 848 insertions, 259 deletions
diff --git a/programs/Makefile b/programs/Makefile
index c5469cfc4def..cbb67e9fe349 100644
--- a/programs/Makefile
+++ b/programs/Makefile
@@ -1,12 +1,10 @@
-# ##########################################################################
+# ################################################################
 # Copyright (c) 2015-present, Yann Collet, Facebook, Inc.
 # All rights reserved.
 #
-# This Makefile is validated for Linux, macOS, *BSD, Hurd, Solaris, MSYS2 targets
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree. An additional grant
-# of patent rights can be found in the PATENTS file in the same directory.
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
 # ##########################################################################
 # zstd : Command Line Utility, supporting gzip-like arguments
 # zstd32 : Same as zstd, but forced to compile in 32-bits mode
@@ -42,7 +40,7 @@ CPPFLAGS+= -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
            -DZSTD_NEWAPI \
            -DXXH_NAMESPACE=ZSTD_   # because xxhash.o already compiled with this macro from library
 CFLAGS  ?= -O3
-DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
+DEBUGFLAGS+=-Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
             -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
             -Wstrict-prototypes -Wundef -Wpointer-arith -Wformat-security \
             -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
@@ -68,8 +66,7 @@ endif
 else
 endif
 
-ZSTDLIB_FILES := $(wildcard $(ZSTD_FILES)) $(wildcard $(ZSTDLEGACY_FILES)) $(wildcard $(ZDICT_FILES))
-ZSTDLIB_OBJ   := $(patsubst %.c,%.o,$(ZSTDLIB_FILES))
+ZSTDLIB_FILES := $(sort $(wildcard $(ZSTD_FILES)) $(wildcard $(ZSTDLEGACY_FILES)) $(wildcard $(ZDICT_FILES)))
 
 # Define *.exe as extension for Windows systems
 ifneq (,$(filter Windows%,$(OS)))
@@ -143,13 +140,10 @@ allVariants: zstd zstd-compress zstd-decompress zstd-small zstd-nolegacy
 
 $(ZSTDDECOMP_O): CFLAGS += $(ALIGN_LOOP)
 
-zstd zstd4 : CPPFLAGS += $(THREAD_CPP) $(ZLIBCPP) $(LZMACPP)
-zstd zstd4 : LDFLAGS += $(THREAD_LD) $(ZLIBLD) $(LZMALD)
-zstd4 : CPPFLAGS += $(LZ4CPP)
-zstd4 : LDFLAGS += $(LZ4LD)
-zstd : LZ4_MSG := - lz4 support is disabled
-zstd zstd4 : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
-zstd zstd4 : $(ZSTDLIB_FILES) zstdcli.o fileio.o bench.o datagen.o dibio.o
+zstd : CPPFLAGS += $(THREAD_CPP) $(ZLIBCPP) $(LZMACPP) $(LZ4CPP)
+zstd : LDFLAGS += $(THREAD_LD) $(ZLIBLD) $(LZMALD) $(LZ4LD)
+zstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
+zstd : $(ZSTDLIB_FILES) zstdcli.o fileio.o bench.o datagen.o dibio.o
 	@echo "$(THREAD_MSG)"
 	@echo "$(ZLIB_MSG)"
 	@echo "$(LZMA_MSG)"
@@ -212,8 +206,8 @@ zstd-decompress: $(ZSTDCOMMON_FILES) $(ZSTDDECOMP_FILES) zstdcli.c fileio.c
 zstd-compress: $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) zstdcli.c fileio.c
 	$(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NODECOMPRESS $^ -o $@$(EXT)
 
-# zstd is now built with multithreading enabled y default
 zstdmt: zstd
+	ln -sf zstd zstdmt
 
 .PHONY: generate_res
 generate_res:
@@ -255,25 +249,35 @@ ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD Ne
 list:
 	@$(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null | awk -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' | sort | egrep -v -e '^[^[:alnum:]]' -e '^$@$$' | xargs
 
-ifneq (,$(filter $(shell uname),SunOS))
-INSTALL ?= ginstall
-else
-INSTALL ?= install
-endif
-
-PREFIX  ?= /usr/local
-DESTDIR ?=
-BINDIR  ?= $(PREFIX)/bin
+DESTDIR     ?=
+# directory variables : GNU conventions prefer lowercase
+# see https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html
+# support both lower and uppercase (BSD), use uppercase in script
+prefix      ?= /usr/local
+PREFIX      ?= $(prefix)
+exec_prefix ?= $(PREFIX)
+bindir      ?= $(exec_prefix)/bin
+BINDIR      ?= $(bindir)
+datarootdir ?= $(PREFIX)/share
+mandir      ?= $(datarootdir)/man
+man1dir     ?= $(mandir)/man1
 
 ifneq (,$(filter $(shell uname),OpenBSD FreeBSD NetBSD DragonFly SunOS))
 MANDIR  ?= $(PREFIX)/man/man1
 else
-MANDIR  ?= $(PREFIX)/share/man/man1
+MANDIR  ?= $(man1dir)
+endif
+
+ifneq (,$(filter $(shell uname),SunOS))
+INSTALL ?= ginstall
+else
+INSTALL ?= install
 endif
 
-INSTALL_PROGRAM ?= $(INSTALL) -m 755
-INSTALL_SCRIPT  ?= $(INSTALL) -m 755
-INSTALL_MAN     ?= $(INSTALL) -m 644
+INSTALL_PROGRAM ?= $(INSTALL)
+INSTALL_SCRIPT  ?= $(INSTALL_PROGRAM)
+INSTALL_DATA    ?= $(INSTALL) -m 644
+INSTALL_MAN     ?= $(INSTALL_DATA)
 
 .PHONY: install
 install: zstd
diff --git a/programs/README.md b/programs/README.md
index 8b65dfdb3f24..2da9a6db1261 100644
--- a/programs/README.md
+++ b/programs/README.md
@@ -40,6 +40,16 @@ There are however other Makefile targets that create different variations of CLI
   In which case, linking stage will fail if `lzma` library cannot be found.
   This might be useful to prevent silent feature disabling.
 
+- __HAVE_LZ4__ : `zstd` can compress and decompress files in `.lz4` formats.
+  This is ordered through commands `--format=lz4`.
+  Alternatively, symlinks named `lz4`, or `unlz4` will mimic intended behavior.
+  `.lz4` support is automatically enabled when `lz4` library is detected at build time.
+  It's possible to disable `.lz4` support, by setting HAVE_LZ4=0 .
+  Example : make zstd HAVE_LZ4=0
+  It's also possible to force compilation with lz4 support, using HAVE_LZ4=1.
+  In which case, linking stage will fail if `lz4` library cannot be found.
+  This might be useful to prevent silent feature disabling.
+
 - __ZSTD_LEGACY_SUPPORT__ : `zstd` can decompress files compressed by older versions of `zstd`.
   Starting v0.8.0, all versions of `zstd` produce frames compliant with the [specification](../doc/zstd_compression_format.md), and are therefore compatible.
   But older versions (< v0.8.0) produced different, incompatible, frames.
@@ -113,6 +123,7 @@ Advanced arguments :
  -c     : force write to standard output, even if it is the console
  -l     : print information about zstd compressed files
 --ultra : enable levels beyond 19, up to 22 (requires more memory)
+--long  : enable long distance matching (requires more memory)
 --no-dictID : don't write dictID into header (dictionary compression)
 --[no-]check : integrity check (default:enabled)
  -r     : operate recursively on directories
@@ -139,3 +150,60 @@ Benchmark arguments :
  -B#    : cut file into independent blocks of size # (default: no block)
 --priority=rt : set process priority to real-time
 ```
+
+
+#### Long distance matching mode
+The long distance matching mode, enabled with `--long`, is designed to improve
+the compression ratio for files with long matches at a large distance (up to the
+maximum window size, `128 MiB`) while still maintaining compression speed. 
+
+Enabling this mode sets the window size to `128 MiB` and thus increases the memory
+usage for both the compressor and decompressor. Performance in terms of speed is
+dependent on long matches being found. Compression speed may degrade if few long
+matches are found. Decompression speed usually improves when there are many long
+distance matches.
+
+Below are graphs comparing the compression speed, compression ratio, and
+decompression speed with and without long distance matching on an ideal use
+case: a tar of four versions of clang (versions `3.4.1`, `3.4.2`, `3.5.0`,
+`3.5.1`) with a total size of `244889600 B`. This is an ideal use case as there
+are many long distance matches within the maximum window size of `128 MiB` (each
+version is less than `128 MiB`). 
+
+Compression Speed vs Ratio | Decompression Speed
+---------------------------|---------------------
+![Compression Speed vs Ratio](../doc/images/ldmCspeed.png "Compression Speed vs Ratio") | ![Decompression Speed](../doc/images/ldmDspeed.png "Decompression Speed")
+
+| Method | Compression ratio | Compression speed | Decompression speed  |
+|:-------|------------------:|-------------------------:|---------------------------:|
+| `zstd -1`   | `5.065`   | `284.8 MB/s`  | `759.3 MB/s`  |
+| `zstd -5`  | `5.826`    | `124.9 MB/s`  | `674.0 MB/s`  |
+| `zstd -10` | `6.504`    | `29.5 MB/s`   | `771.3 MB/s`  |
+| `zstd -1 --long` | `17.426` | `220.6 MB/s` | `1638.4 MB/s` |
+| `zstd -5 --long` | `19.661` | `165.5 MB/s` | `1530.6 MB/s`|
+| `zstd -10 --long`| `21.949` | `75.6 MB/s` | `1632.6 MB/s`|
+
+On this file, the compression ratio improves significantly with minimal impact
+on compression speed, and the decompression speed doubles.
+
+On the other extreme, compressing a file with few long distance matches (such as
+the [Silesia compression corpus]) will likely lead to a deterioration in
+compression speed (for lower levels) with minimal change in compression ratio.
+
+The below table illustrates this on the [Silesia compression corpus].
+
+[Silesia compression corpus]: http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia
+
+| Method | Compression ratio | Compression speed | Decompression speed  |
+|:-------|------------------:|-------------------------:|---------------------------:|
+| `zstd -1`   | `2.878`   | `231.7 MB/s`  | `594.4 MB/s`  |
+| `zstd -1 --long` | `2.929` | `106.5 MB/s` | `517.9 MB/s` |
+| `zstd -5`  | `3.274`    | `77.1 MB/s`  | `464.2 MB/s`  |
+| `zstd -5 --long` | `3.319` | `51.7 MB/s` | `371.9 MB/s` |
+| `zstd -10` | `3.523`    | `16.4 MB/s`   | `489.2 MB/s`  |
+| `zstd -10 --long`| `3.566` | `16.2 MB/s` | `415.7 MB/s`  |
+
+
+
+
+
diff --git a/programs/bench.c b/programs/bench.c
index 2b48a4663a86..ec99c61cebf6 100644
--- a/programs/bench.c
+++ b/programs/bench.c
@@ -5,6 +5,7 @@
  * This source code is licensed under both the BSD-style license (found in the
  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
  */
 
 
@@ -129,6 +130,31 @@ void BMK_setNbThreads(unsigned nbThreads) {
 #endif
     g_nbThreads = nbThreads;
 }
+static U32 g_ldmFlag = 0;
+void BMK_setLdmFlag(unsigned ldmFlag) {
+    g_ldmFlag = ldmFlag;
+}
+
+static U32 g_ldmMinMatch = 0;
+void BMK_setLdmMinMatch(unsigned ldmMinMatch) {
+    g_ldmMinMatch = ldmMinMatch;
+}
+
+static U32 g_ldmHashLog = 0;
+void BMK_setLdmHashLog(unsigned ldmHashLog) {
+    g_ldmHashLog = ldmHashLog;
+}
+
+#define BMK_LDM_PARAM_NOTSET 9999
+static U32 g_ldmBucketSizeLog = BMK_LDM_PARAM_NOTSET;
+void BMK_setLdmBucketSizeLog(unsigned ldmBucketSizeLog) {
+    g_ldmBucketSizeLog = ldmBucketSizeLog;
+}
+
+static U32 g_ldmHashEveryLog = BMK_LDM_PARAM_NOTSET;
+void BMK_setLdmHashEveryLog(unsigned ldmHashEveryLog) {
+    g_ldmHashEveryLog = ldmHashEveryLog;
+}
 
 
 /* ********************************************************
@@ -170,7 +196,6 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
     size_t cSize = 0;
     double ratio = 0.;
     U32 nbBlocks;
-    UTIL_freq_t ticksPerSecond;
 
     /* checks */
     if (!compressedBuffer || !resultBuffer || !blockTable || !ctx || !dctx)
@@ -178,7 +203,6 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
 
     /* init */
     if (strlen(displayName)>17) displayName += strlen(displayName)-17;   /* display last 17 characters */
-    UTIL_initTimer(&ticksPerSecond);
 
     if (g_decodeOnly) {  /* benchmark only decompression : source must be already compressed */
         const char* srcPtr = (const char*)srcBuffer;
@@ -238,15 +262,15 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
         const char* const marks[NB_MARKS] = { " |", " /", " =",  "\\" };
         U32 markNb = 0;
 
-        UTIL_getTime(&coolTime);
+        coolTime = UTIL_getTime();
         DISPLAYLEVEL(2, "\r%79s\r", "");
         while (!cCompleted || !dCompleted) {
 
             /* overheat protection */
-            if (UTIL_clockSpanMicro(coolTime, ticksPerSecond) > ACTIVEPERIOD_MICROSEC) {
+            if (UTIL_clockSpanMicro(coolTime) > ACTIVEPERIOD_MICROSEC) {
                 DISPLAYLEVEL(2, "\rcooling down ...    \r");
                 UTIL_sleep(COOLPERIOD_SEC);
-                UTIL_getTime(&coolTime);
+                coolTime = UTIL_getTime();
             }
 
             if (!g_decodeOnly) {
@@ -256,8 +280,8 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
                 if (!cCompleted) memset(compressedBuffer, 0xE5, maxCompressedSize);  /* warm up and erase result buffer */
 
                 UTIL_sleepMilli(1);  /* give processor time to other processes */
-                UTIL_waitForNextTick(ticksPerSecond);
-                UTIL_getTime(&clockStart);
+                UTIL_waitForNextTick();
+                clockStart = UTIL_getTime();
 
                 if (!cCompleted) {   /* still some time to do compression tests */
                     U64 const clockLoop = g_nbSeconds ? TIMELOOP_MICROSEC : 1;
@@ -266,6 +290,15 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
 #ifdef ZSTD_NEWAPI
                     ZSTD_CCtx_setParameter(ctx, ZSTD_p_nbThreads, g_nbThreads);
                     ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionLevel, cLevel);
+                    ZSTD_CCtx_setParameter(ctx, ZSTD_p_enableLongDistanceMatching, g_ldmFlag);
+                    ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmMinMatch, g_ldmMinMatch);
+                    ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashLog, g_ldmHashLog);
+                    if (g_ldmBucketSizeLog != BMK_LDM_PARAM_NOTSET) {
+                      ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmBucketSizeLog, g_ldmBucketSizeLog);
+                    }
+                    if (g_ldmHashEveryLog != BMK_LDM_PARAM_NOTSET) {
+                      ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashEveryLog, g_ldmHashEveryLog);
+                    }
                     ZSTD_CCtx_setParameter(ctx, ZSTD_p_windowLog, comprParams->windowLog);
                     ZSTD_CCtx_setParameter(ctx, ZSTD_p_chainLog, comprParams->chainLog);
                     ZSTD_CCtx_setParameter(ctx, ZSTD_p_searchLog, comprParams->searchLog);
@@ -284,7 +317,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
                     if (comprParams->searchLength) zparams.cParams.searchLength = comprParams->searchLength;
                     if (comprParams->targetLength) zparams.cParams.targetLength = comprParams->targetLength;
                     if (comprParams->strategy) zparams.cParams.strategy = comprParams->strategy;
-                    cdict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, 1 /*byRef*/, ZSTD_dm_auto, zparams.cParams, cmem);
+                    cdict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, ZSTD_dlm_byRef, ZSTD_dm_auto, zparams.cParams, cmem);
                     if (cdict==NULL) EXM_THROW(1, "ZSTD_createCDict_advanced() allocation failure");
 #endif
                     do {
@@ -329,9 +362,9 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
                             blockTable[blockNb].cSize = rSize;
                         }
                         nbLoops++;
-                    } while (UTIL_clockSpanMicro(clockStart, ticksPerSecond) < clockLoop);
+                    } while (UTIL_clockSpanMicro(clockStart) < clockLoop);
                     ZSTD_freeCDict(cdict);
-                    {   U64 const clockSpanMicro = UTIL_clockSpanMicro(clockStart, ticksPerSecond);
+                    {   U64 const clockSpanMicro = UTIL_clockSpanMicro(clockStart);
                         if (clockSpanMicro < fastestC*nbLoops) fastestC = clockSpanMicro / nbLoops;
                         totalCTime += clockSpanMicro;
                         cCompleted = (totalCTime >= maxTime);
@@ -356,15 +389,14 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
             if (!dCompleted) memset(resultBuffer, 0xD6, srcSize);  /* warm result buffer */
 
             UTIL_sleepMilli(1); /* give processor time to other processes */
-            UTIL_waitForNextTick(ticksPerSecond);
+            UTIL_waitForNextTick();
 
             if (!dCompleted) {
                 U64 clockLoop = g_nbSeconds ? TIMELOOP_MICROSEC : 1;
                 U32 nbLoops = 0;
-                UTIL_time_t clockStart;
                 ZSTD_DDict* const ddict = ZSTD_createDDict(dictBuffer, dictBufferSize);
+                UTIL_time_t const clockStart = UTIL_getTime();
                 if (!ddict) EXM_THROW(2, "ZSTD_createDDict() allocation failure");
-                UTIL_getTime(&clockStart);
                 do {
                     U32 blockNb;
                     for (blockNb=0; blockNb<nbBlocks; blockNb++) {
@@ -373,17 +405,15 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
                             blockTable[blockNb].cPtr, blockTable[blockNb].cSize,
                             ddict);
                         if (ZSTD_isError(regenSize)) {
-                            DISPLAY("ZSTD_decompress_usingDDict() failed on block %u of size %u : %s  \n",
+                            EXM_THROW(2, "ZSTD_decompress_usingDDict() failed on block %u of size %u : %s  \n",
                                       blockNb, (U32)blockTable[blockNb].cSize, ZSTD_getErrorName(regenSize));
-                            clockLoop = 0;   /* force immediate test end */
-                            break;
                         }
                         blockTable[blockNb].resSize = regenSize;
                     }
                     nbLoops++;
-                } while (UTIL_clockSpanMicro(clockStart, ticksPerSecond) < clockLoop);
+                } while (UTIL_clockSpanMicro(clockStart) < clockLoop);
                 ZSTD_freeDDict(ddict);
-                {   U64 const clockSpanMicro = UTIL_clockSpanMicro(clockStart, ticksPerSecond);
+                {   U64 const clockSpanMicro = UTIL_clockSpanMicro(clockStart);
                     if (clockSpanMicro < fastestD*nbLoops) fastestD = clockSpanMicro / nbLoops;
                     totalDTime += clockSpanMicro;
                     dCompleted = (totalDTime >= maxTime);
diff --git a/programs/bench.h b/programs/bench.h
index 5f8d61a25b30..82bb345696a7 100644
--- a/programs/bench.h
+++ b/programs/bench.h
@@ -5,6 +5,7 @@
  * This source code is licensed under both the BSD-style license (found in the
  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
  */
 
 
@@ -25,5 +26,10 @@ void BMK_setNbThreads(unsigned nbThreads);
 void BMK_setNotificationLevel(unsigned level);
 void BMK_setAdditionalParam(int additionalParam);
 void BMK_setDecodeOnlyMode(unsigned decodeFlag);
+void BMK_setLdmFlag(unsigned ldmFlag);
+void BMK_setLdmMinMatch(unsigned ldmMinMatch);
+void BMK_setLdmHashLog(unsigned ldmHashLog);
+void BMK_setLdmBucketSizeLog(unsigned ldmBucketSizeLog);
+void BMK_setLdmHashEveryLog(unsigned ldmHashEveryLog);
 
 #endif   /* BENCH_H_121279284357 */
diff --git a/programs/datagen.c b/programs/datagen.c
index b1da8e78b31e..a489d6af08d7 100644
--- a/programs/datagen.c
+++ b/programs/datagen.c
@@ -5,6 +5,7 @@
  * This source code is licensed under both the BSD-style license (found in the
  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
  */
 
 
diff --git a/programs/datagen.h b/programs/datagen.h
index 5b1b7c47cc5d..2fcc980e5e76 100644
--- a/programs/datagen.h
+++ b/programs/datagen.h
@@ -5,6 +5,7 @@
  * This source code is licensed under both the BSD-style license (found in the
  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
  */
 
 
diff --git a/programs/dibio.c b/programs/dibio.c
index ab2dc285a273..2cb2a42671a0 100644
--- a/programs/dibio.c
+++ b/programs/dibio.c
@@ -5,6 +5,7 @@
  * This source code is licensed under both the BSD-style license (found in the
  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
  */
 
 
@@ -13,7 +14,7 @@
 *  Compiler Warnings
 ****************************************/
 #ifdef _MSC_VER
-#  pragma warning(disable : 4127)                /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4127)    /* disable: C4127: conditional expression is constant */
 #endif
 
 
@@ -43,7 +44,7 @@
 #define SAMPLESIZE_MAX (128 KB)
 #define MEMMULT 11    /* rough estimation : memory cost to analyze 1 byte of sample */
 #define COVER_MEMMULT 9    /* rough estimation : memory cost to analyze 1 byte of sample */
-static const size_t maxMemory = (sizeof(size_t) == 4) ? (2 GB - 64 MB) : ((size_t)(512 MB) << sizeof(size_t));
+static const size_t g_maxMemory = (sizeof(size_t) == 4) ? (2 GB - 64 MB) : ((size_t)(512 MB) << sizeof(size_t));
 
 #define NOISELENGTH 32
 
@@ -52,13 +53,12 @@ static const size_t maxMemory = (sizeof(size_t) == 4) ? (2 GB - 64 MB) : ((size_
 *  Console display
 ***************************************/
 #define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
-#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
-static int g_displayLevel = 0;   /* 0 : no display;   1: errors;   2: default;  4: full information */
+#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
 
-#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
-            if ((DIB_clockSpan(g_time) > refreshRate) || (g_displayLevel>=4)) \
+#define DISPLAYUPDATE(l, ...) if (displayLevel>=l) { \
+            if ((DIB_clockSpan(g_time) > refreshRate) || (displayLevel>=4)) \
             { g_time = clock(); DISPLAY(__VA_ARGS__); \
-            if (g_displayLevel>=4) fflush(stderr); } }
+            if (displayLevel>=4) fflush(stderr); } }
 static const clock_t refreshRate = CLOCKS_PER_SEC * 2 / 10;
 static clock_t g_time = 0;
 
@@ -75,9 +75,9 @@ static clock_t DIB_clockSpan(clock_t nPrevious) { return clock() - nPrevious; }
 #define EXM_THROW(error, ...)                                             \
 {                                                                         \
     DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
-    DISPLAYLEVEL(1, "Error %i : ", error);                                \
-    DISPLAYLEVEL(1, __VA_ARGS__);                                         \
-    DISPLAYLEVEL(1, "\n");                                                \
+    DISPLAY("Error %i : ", error);                                        \
+    DISPLAY(__VA_ARGS__);                                                 \
+    DISPLAY("\n");                                                        \
     exit(error);                                                          \
 }
 
@@ -97,32 +97,55 @@ const char* DiB_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCo
 *  File related operations
 **********************************************************/
 /** DiB_loadFiles() :
-*   @return : nb of files effectively loaded into `buffer` */
+ *  load samples from files listed in fileNamesTable into buffer.
+ *  works even if buffer is too small to load all samples.
+ *  Also provides the size of each sample into sampleSizes table
+ *  which must be sized correctly, using DiB_fileStats().
+ * @return : nb of samples effectively loaded into `buffer`
+ * *bufferSizePtr is modified, it provides the amount data loaded within buffer.
+ *  sampleSizes is filled with the size of each sample.
+ */
 static unsigned DiB_loadFiles(void* buffer, size_t* bufferSizePtr,
-                              size_t* fileSizes,
-                              const char** fileNamesTable, unsigned nbFiles)
+                              size_t* sampleSizes, unsigned sstSize,
+                              const char** fileNamesTable, unsigned nbFiles, size_t targetChunkSize,
+                              unsigned displayLevel)
 {
     char* const buff = (char*)buffer;
     size_t pos = 0;
-    unsigned n;
+    unsigned nbLoadedChunks = 0, fileIndex;
 
-    for (n=0; n<nbFiles; n++) {
-        const char* const fileName = fileNamesTable[n];
+    for (fileIndex=0; fileIndex<nbFiles; fileIndex++) {
+        const char* const fileName = fileNamesTable[fileIndex];
         unsigned long long const fs64 = UTIL_getFileSize(fileName);
-        size_t const fileSize = (size_t) MIN(fs64, SAMPLESIZE_MAX);
-        if (fileSize > *bufferSizePtr-pos) break;
-        {   FILE* const f = fopen(fileName, "rb");
-            if (f==NULL) EXM_THROW(10, "zstd: dictBuilder: %s %s ", fileName, strerror(errno));
-            DISPLAYUPDATE(2, "Loading %s...       \r", fileName);
-            { size_t const readSize = fread(buff+pos, 1, fileSize, f);
-              if (readSize != fileSize) EXM_THROW(11, "Pb reading %s", fileName);
-              pos += readSize; }
-            fileSizes[n] = fileSize;
-            fclose(f);
-    }   }
+        unsigned long long remainingToLoad = fs64;
+        U32 const nbChunks = targetChunkSize ? (U32)((fs64 + (targetChunkSize-1)) / targetChunkSize) : 1;
+        U64 const chunkSize = targetChunkSize ? MIN(targetChunkSize, fs64) : fs64;
+        size_t const maxChunkSize = (size_t)MIN(chunkSize, SAMPLESIZE_MAX);
+        U32 cnb;
+        FILE* const f = fopen(fileName, "rb");
+        if (f==NULL) EXM_THROW(10, "zstd: dictBuilder: %s %s ", fileName, strerror(errno));
+        DISPLAYUPDATE(2, "Loading %s...       \r", fileName);
+        for (cnb=0; cnb<nbChunks; cnb++) {
+            size_t const toLoad = (size_t)MIN(maxChunkSize, remainingToLoad);
+            if (toLoad > *bufferSizePtr-pos) break;
+            {   size_t const readSize = fread(buff+pos, 1, toLoad, f);
+                if (readSize != toLoad) EXM_THROW(11, "Pb reading %s", fileName);
+                pos += readSize;
+                sampleSizes[nbLoadedChunks++] = toLoad;
+                remainingToLoad -= targetChunkSize;
+                if (nbLoadedChunks == sstSize) { /* no more space left in sampleSizes table */
+                    fileIndex = nbFiles;  /* stop there */
+                    break;
+                }
+                if (toLoad < targetChunkSize) {
+                    fseek(f, (long)(targetChunkSize - toLoad), SEEK_CUR);
+        }   }   }
+        fclose(f);
+    }
     DISPLAYLEVEL(2, "\r%79s\r", "");
     *bufferSizePtr = pos;
-    return n;
+    DISPLAYLEVEL(4, "loaded : %u KB \n", (U32)(pos >> 10))
+    return nbLoadedChunks;
 }
 
 #define DiB_rotl32(x,r) ((x << r) | (x >> (32 - r)))
@@ -138,16 +161,19 @@ static U32 DiB_rand(U32* src)
     return rand32 >> 5;
 }
 
+/* DiB_shuffle() :
+ * shuffle a table of file names in a semi-random way
+ * It improves dictionary quality by reducing "locality" impact, so if sample set is very large,
+ * it will load random elements from it, instead of just the first ones. */
 static void DiB_shuffle(const char** fileNamesTable, unsigned nbFiles) {
-  /* Initialize the pseudorandom number generator */
-  U32 seed = 0xFD2FB528;
-  unsigned i;
-  for (i = nbFiles - 1; i > 0; --i) {
-    unsigned const j = DiB_rand(&seed) % (i + 1);
-    const char* tmp = fileNamesTable[j];
-    fileNamesTable[j] = fileNamesTable[i];
-    fileNamesTable[i] = tmp;
-  }
+    U32 seed = 0xFD2FB528;
+    unsigned i;
+    for (i = nbFiles - 1; i > 0; --i) {
+        unsigned const j = DiB_rand(&seed) % (i + 1);
+        const char* const tmp = fileNamesTable[j];
+        fileNamesTable[j] = fileNamesTable[i];
+        fileNamesTable[i] = tmp;
+    }
 }
 
 
@@ -161,7 +187,7 @@ static size_t DiB_findMaxMem(unsigned long long requiredMem)
 
     requiredMem = (((requiredMem >> 23) + 1) << 23);
     requiredMem += step;
-    if (requiredMem > maxMemory) requiredMem = maxMemory;
+    if (requiredMem > g_maxMemory) requiredMem = g_maxMemory;
 
     while (!testmem) {
         testmem = malloc((size_t)requiredMem);
@@ -201,18 +227,33 @@ static void DiB_saveDict(const char* dictFileName,
 }
 
 
-static int g_tooLargeSamples = 0;
-static U64 DiB_getTotalCappedFileSize(const char** fileNamesTable, unsigned nbFiles)
+typedef struct {
+    U64 totalSizeToLoad;
+    unsigned oneSampleTooLarge;
+    unsigned nbSamples;
+} fileStats;
+
+/*! DiB_fileStats() :
+ *  Given a list of files, and a chunkSize (0 == no chunk, whole files)
+ *  provides the amount of data to be loaded and the resulting nb of samples.
+ *  This is useful primarily for allocation purpose => sample buffer, and sample sizes table.
+ */
+static fileStats DiB_fileStats(const char** fileNamesTable, unsigned nbFiles, size_t chunkSize, unsigned displayLevel)
 {
-    U64 total = 0;
+    fileStats fs;
     unsigned n;
+    memset(&fs, 0, sizeof(fs));
     for (n=0; n<nbFiles; n++) {
         U64 const fileSize = UTIL_getFileSize(fileNamesTable[n]);
-        U64 const cappedFileSize = MIN(fileSize, SAMPLESIZE_MAX);
-        total += cappedFileSize;
-        g_tooLargeSamples |= (fileSize > 2*SAMPLESIZE_MAX);
+        U32 const nbSamples = (U32)(chunkSize ? (fileSize + (chunkSize-1)) / chunkSize : 1);
+        U64 const chunkToLoad = chunkSize ? MIN(chunkSize, fileSize) : fileSize;
+        size_t const cappedChunkSize = (size_t)MIN(chunkToLoad, SAMPLESIZE_MAX);
+        fs.totalSizeToLoad += cappedChunkSize * nbSamples;
+        fs.oneSampleTooLarge |= (chunkSize > 2*SAMPLESIZE_MAX);
+        fs.nbSamples += nbSamples;
     }
-    return total;
+    DISPLAYLEVEL(4, "Preparing to load : %u KB \n", (U32)(fs.totalSizeToLoad >> 10));
+    return fs;
 }
 
 
@@ -229,64 +270,66 @@ size_t ZDICT_trainFromBuffer_unsafe_legacy(void* dictBuffer, size_t dictBufferCa
 
 
 int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
-                       const char** fileNamesTable, unsigned nbFiles,
+                       const char** fileNamesTable, unsigned nbFiles, size_t chunkSize,
                        ZDICT_legacy_params_t *params, ZDICT_cover_params_t *coverParams,
                        int optimizeCover)
 {
+    unsigned const displayLevel = params ? params->zParams.notificationLevel :
+                        coverParams ? coverParams->zParams.notificationLevel :
+                        0;   /* should never happen */
     void* const dictBuffer = malloc(maxDictSize);
-    size_t* const fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t));
-    unsigned long long const totalSizeToLoad = DiB_getTotalCappedFileSize(fileNamesTable, nbFiles);
+    fileStats const fs = DiB_fileStats(fileNamesTable, nbFiles, chunkSize, displayLevel);
+    size_t* const sampleSizes = (size_t*)malloc(fs.nbSamples * sizeof(size_t));
     size_t const memMult = params ? MEMMULT : COVER_MEMMULT;
-    size_t const maxMem =  DiB_findMaxMem(totalSizeToLoad * memMult) / memMult;
-    size_t benchedSize = (size_t) MIN ((unsigned long long)maxMem, totalSizeToLoad);
-    void* const srcBuffer = malloc(benchedSize+NOISELENGTH);
+    size_t const maxMem =  DiB_findMaxMem(fs.totalSizeToLoad * memMult) / memMult;
+    size_t loadedSize = (size_t) MIN ((unsigned long long)maxMem, fs.totalSizeToLoad);
+    void* const srcBuffer = malloc(loadedSize+NOISELENGTH);
     int result = 0;
 
     /* Checks */
-    if (params) g_displayLevel = params->zParams.notificationLevel;
-    else if (coverParams) g_displayLevel = coverParams->zParams.notificationLevel;
-    else EXM_THROW(13, "Neither dictionary algorith selected");   /* should not happen */
-    if ((!fileSizes) || (!srcBuffer) || (!dictBuffer)) EXM_THROW(12, "not enough memory for DiB_trainFiles");   /* should not happen */
-    if (g_tooLargeSamples) {
-        DISPLAYLEVEL(2, "!  Warning : some samples are very large \n");
-        DISPLAYLEVEL(2, "!  Note that dictionary is only useful for small files or beginning of large files. \n");
-        DISPLAYLEVEL(2, "!  As a consequence, only the first %u bytes of each file are loaded \n", SAMPLESIZE_MAX);
+    if ((!sampleSizes) || (!srcBuffer) || (!dictBuffer))
+        EXM_THROW(12, "not enough memory for DiB_trainFiles");   /* should not happen */
+    if (fs.oneSampleTooLarge) {
+        DISPLAYLEVEL(2, "!  Warning : some sample(s) are very large \n");
+        DISPLAYLEVEL(2, "!  Note that dictionary is only useful for small samples. \n");
+        DISPLAYLEVEL(2, "!  As a consequence, only the first %u bytes of each sample are loaded \n", SAMPLESIZE_MAX);
     }
-    if ((nbFiles < 5) || (totalSizeToLoad < 9 * (unsigned long long)maxDictSize)) {
+    if (fs.nbSamples < 5) {
         DISPLAYLEVEL(2, "!  Warning : nb of samples too low for proper processing ! \n");
         DISPLAYLEVEL(2, "!  Please provide _one file per sample_. \n");
-        DISPLAYLEVEL(2, "!  Do not concatenate samples together into a single file, \n");
-        DISPLAYLEVEL(2, "!  as dictBuilder will be unable to find the beginning of each sample, \n");
-        DISPLAYLEVEL(2, "!  resulting in poor dictionary quality. \n");
+        DISPLAYLEVEL(2, "!  Alternatively, split files into fixed-size blocks representative of samples, with -B# \n");
+        EXM_THROW(14, "nb of samples too low");   /* we now clearly forbid this case */
+    }
+    if (fs.totalSizeToLoad < (unsigned long long)(8 * maxDictSize)) {
+        DISPLAYLEVEL(2, "!  Warning : data size of samples too small for target dictionary size \n");
+        DISPLAYLEVEL(2, "!  Samples should be about 100x larger than target dictionary size \n");
     }
 
     /* init */
-    if (benchedSize < totalSizeToLoad)
-        DISPLAYLEVEL(1, "Not enough memory; training on %u MB only...\n", (unsigned)(benchedSize >> 20));
+    if (loadedSize < fs.totalSizeToLoad)
+        DISPLAYLEVEL(1, "Not enough memory; training on %u MB only...\n", (unsigned)(loadedSize >> 20));
 
     /* Load input buffer */
     DISPLAYLEVEL(3, "Shuffling input files\n");
     DiB_shuffle(fileNamesTable, nbFiles);
-    nbFiles = DiB_loadFiles(srcBuffer, &benchedSize, fileSizes, fileNamesTable, nbFiles);
+    nbFiles = DiB_loadFiles(srcBuffer, &loadedSize, sampleSizes, fs.nbSamples, fileNamesTable, nbFiles, chunkSize, displayLevel);
 
-    {
-        size_t dictSize;
+    {   size_t dictSize;
         if (params) {
-            DiB_fillNoise((char*)srcBuffer + benchedSize, NOISELENGTH);   /* guard band, for end of buffer condition */
+            DiB_fillNoise((char*)srcBuffer + loadedSize, NOISELENGTH);   /* guard band, for end of buffer condition */
             dictSize = ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, maxDictSize,
-                                                           srcBuffer, fileSizes, nbFiles,
+                                                           srcBuffer, sampleSizes, fs.nbSamples,
                                                            *params);
         } else if (optimizeCover) {
             dictSize = ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, maxDictSize,
-                                                           srcBuffer, fileSizes, nbFiles,
+                                                           srcBuffer, sampleSizes, fs.nbSamples,
                                                            coverParams);
             if (!ZDICT_isError(dictSize)) {
                 DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\n", coverParams->k, coverParams->d, coverParams->steps);
             }
         } else {
-            dictSize =
-                ZDICT_trainFromBuffer_cover(dictBuffer, maxDictSize, srcBuffer,
-                                            fileSizes, nbFiles, *coverParams);
+            dictSize = ZDICT_trainFromBuffer_cover(dictBuffer, maxDictSize, srcBuffer,
+                                                   sampleSizes, fs.nbSamples, *coverParams);
         }
         if (ZDICT_isError(dictSize)) {
             DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize));   /* should not happen */
@@ -301,7 +344,7 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
     /* clean up */
 _cleanup:
     free(srcBuffer);
+    free(sampleSizes);
     free(dictBuffer);
-    free(fileSizes);
     return result;
 }
diff --git a/programs/dibio.h b/programs/dibio.h
index 0227239b26db..499e3036520c 100644
--- a/programs/dibio.h
+++ b/programs/dibio.h
@@ -5,6 +5,7 @@
  * This source code is licensed under both the BSD-style license (found in the
  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
  */
 
 /* This library is designed for a single-threaded console application.
@@ -31,7 +32,7 @@
     @return : 0 == ok. Any other : error.
 */
 int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
-                       const char** fileNamesTable, unsigned nbFiles,
+                       const char** fileNamesTable, unsigned nbFiles, size_t chunkSize,
                        ZDICT_legacy_params_t *params, ZDICT_cover_params_t *coverParams,
                        int optimizeCover);
 
diff --git a/programs/fileio.c b/programs/fileio.c
index 65b4c7579194..70158f930952 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -5,6 +5,7 @@
  * This source code is licensed under both the BSD-style license (found in the
  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
  */
 
 
@@ -36,6 +37,7 @@
 #  include <io.h>
 #endif
 
+#include "bitstream.h"
 #include "mem.h"
 #include "fileio.h"
 #define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_magicNumber, ZSTD_frameHeaderSize_max */
@@ -55,6 +57,7 @@
 
 #define LZ4_MAGICNUMBER 0x184D2204
 #if defined(ZSTD_LZ4COMPRESS) || defined(ZSTD_LZ4DECOMPRESS)
+#  define LZ4F_ENABLE_OBSOLETE_ENUMS
 #  include <lz4frame.h>
 #  include <lz4.h>
 #endif
@@ -105,9 +108,6 @@ static clock_t g_time = 0;
 
 
 /*-*************************************
-*  Errors
-***************************************/
-/*-*************************************
 *  Debug
 ***************************************/
 #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
@@ -140,6 +140,24 @@ static clock_t g_time = 0;
 }   }
 
 
+/*-************************************
+*  Signal (Ctrl-C trapping)
+**************************************/
+#include  <signal.h>
+
+static const char* g_artefact = NULL;
+static void INThandler(int sig)
+{
+    assert(sig==SIGINT); (void)sig;
+#if !defined(_MSC_VER)
+    signal(sig, SIG_IGN);  /* this invocation generates a buggy warning in Visual Studio */
+#endif
+    if (g_artefact) remove(g_artefact);
+    DISPLAY("\n");
+    exit(2);
+}
+
+
 /* ************************************************************
 * Avoid fseek()'s 2GiB barrier with MSVC, MacOS, *BSD, MinGW
 ***************************************************************/
@@ -213,6 +231,30 @@ void FIO_setOverlapLog(unsigned overlapLog){
         DISPLAYLEVEL(2, "Setting overlapLog is useless in single-thread mode \n");
     g_overlapLog = overlapLog;
 }
+static U32 g_ldmFlag = 0;
+void FIO_setLdmFlag(unsigned ldmFlag) {
+    g_ldmFlag = (ldmFlag>0);
+}
+static U32 g_ldmHashLog = 0;
+void FIO_setLdmHashLog(unsigned ldmHashLog) {
+    g_ldmHashLog = ldmHashLog;
+}
+static U32 g_ldmMinMatch = 0;
+void FIO_setLdmMinMatch(unsigned ldmMinMatch) {
+    g_ldmMinMatch = ldmMinMatch;
+}
+
+#define FIO_LDM_PARAM_NOTSET 9999
+static U32 g_ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET;
+void FIO_setLdmBucketSizeLog(unsigned ldmBucketSizeLog) {
+    g_ldmBucketSizeLog = ldmBucketSizeLog;
+}
+
+static U32 g_ldmHashEveryLog = FIO_LDM_PARAM_NOTSET;
+void FIO_setLdmHashEveryLog(unsigned ldmHashEveryLog) {
+    g_ldmHashEveryLog = ldmHashEveryLog;
+}
+
 
 
 /*-*************************************
@@ -325,13 +367,16 @@ static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName)
     fileHandle = fopen(fileName, "rb");
     if (fileHandle==0) EXM_THROW(31, "%s: %s", fileName, strerror(errno));
     fileSize = UTIL_getFileSize(fileName);
-    if (fileSize > DICTSIZE_MAX)
+    if (fileSize > DICTSIZE_MAX) {
         EXM_THROW(32, "Dictionary file %s is too large (> %u MB)",
                         fileName, DICTSIZE_MAX >> 20);   /* avoid extreme cases */
+    }
     *bufferPtr = malloc((size_t)fileSize);
     if (*bufferPtr==NULL) EXM_THROW(34, "%s", strerror(errno));
-    { size_t const readSize = fread(*bufferPtr, 1, (size_t)fileSize, fileHandle);
-      if (readSize!=fileSize) EXM_THROW(35, "Error reading dictionary file %s", fileName); }
+    {   size_t const readSize = fread(*bufferPtr, 1, (size_t)fileSize, fileHandle);
+        if (readSize!=fileSize)
+            EXM_THROW(35, "Error reading dictionary file %s", fileName);
+    }
     fclose(fileHandle);
     return (size_t)fileSize;
 }
@@ -356,7 +401,7 @@ typedef struct {
 } cRess_t;
 
 static cRess_t FIO_createCResources(const char* dictFileName, int cLevel,
-                                    U64 srcSize, int srcIsRegularFile,
+                                    U64 srcSize,
                                     ZSTD_compressionParameters* comprParams) {
     cRess_t ress;
     memset(&ress, 0, sizeof(ress));
@@ -394,12 +439,23 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel,
 
 #ifdef ZSTD_NEWAPI
         {   /* frame parameters */
-            CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_contentSizeFlag, srcIsRegularFile) );
             CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_dictIDFlag, g_dictIDFlag) );
             CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_checksumFlag, g_checksumFlag) );
-            CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, srcSize) );
-            /* compression parameters */
+            (void)srcSize;
+            /* compression level */
             CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionLevel, cLevel) );
+            /* long distance matching */
+            CHECK( ZSTD_CCtx_setParameter(
+                          ress.cctx, ZSTD_p_enableLongDistanceMatching, g_ldmFlag) );
+            CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmHashLog, g_ldmHashLog) );
+            CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmMinMatch, g_ldmMinMatch) );
+            if (g_ldmBucketSizeLog != FIO_LDM_PARAM_NOTSET) {
+                CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmBucketSizeLog, g_ldmBucketSizeLog) );
+            }
+            if (g_ldmHashEveryLog != FIO_LDM_PARAM_NOTSET) {
+                CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmHashEveryLog, g_ldmHashEveryLog) );
+            }
+            /* compression parameters */
             CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_windowLog, comprParams->windowLog) );
             CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_chainLog, comprParams->chainLog) );
             CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_hashLog, comprParams->hashLog) );
@@ -408,13 +464,13 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel,
             CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_targetLength, comprParams->targetLength) );
             CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionStrategy, (U32)comprParams->strategy) );
             /* multi-threading */
+            DISPLAYLEVEL(5,"set nb threads = %u \n", g_nbThreads);
             CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_nbThreads, g_nbThreads) );
             /* dictionary */
             CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, dictBuffer, dictBuffSize) );
         }
 #elif defined(ZSTD_MULTITHREAD)
         {   ZSTD_parameters params = ZSTD_getParams(cLevel, srcSize, dictBuffSize);
-            params.fParams.contentSizeFlag = srcIsRegularFile;
             params.fParams.checksumFlag = g_checksumFlag;
             params.fParams.noDictIDFlag = !g_dictIDFlag;
             if (comprParams->windowLog) params.cParams.windowLog = comprParams->windowLog;
@@ -429,7 +485,6 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel,
         }
 #else
         {   ZSTD_parameters params = ZSTD_getParams(cLevel, srcSize, dictBuffSize);
-            params.fParams.contentSizeFlag = srcIsRegularFile;
             params.fParams.checksumFlag = g_checksumFlag;
             params.fParams.noDictIDFlag = !g_dictIDFlag;
             if (comprParams->windowLog) params.cParams.windowLog = comprParams->windowLog;
@@ -719,6 +774,7 @@ static int FIO_compressFilename_internal(cRess_t ress,
     U64 readsize = 0;
     U64 compressedfilesize = 0;
     U64 const fileSize = UTIL_getFileSize(srcFileName);
+    DISPLAYLEVEL(5, "%s: %u bytes \n", srcFileName, (U32)fileSize);
 
     switch (g_compressionType) {
         case FIO_zstdCompression:
@@ -758,11 +814,12 @@ static int FIO_compressFilename_internal(cRess_t ress,
 
     /* init */
 #ifdef ZSTD_NEWAPI
-    /* nothing, reset is implied */
+    if (fileSize!=0)  /* when src is stdin, fileSize==0, but is effectively unknown */
+        ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize);  /* note : fileSize==0 means "empty" */
 #elif defined(ZSTD_MULTITHREAD)
-    CHECK( ZSTDMT_resetCStream(ress.cctx, fileSize) );
+    CHECK( ZSTDMT_resetCStream(ress.cctx, fileSize) );   /* note : fileSize==0 means "unknown" */
 #else
-    CHECK( ZSTD_resetCStream(ress.cctx, fileSize) );
+    CHECK( ZSTD_resetCStream(ress.cctx, fileSize) );   /* note : fileSize==0 means "unknown" */
 #endif
 
     /* Main compression loop */
@@ -811,10 +868,10 @@ static int FIO_compressFilename_internal(cRess_t ress,
 
     /* End of Frame */
     {   size_t result = 1;
-        while (result!=0) {   /* note : is there any possibility of endless loop ? */
+        while (result != 0) {
             ZSTD_outBuffer outBuff = { ress.dstBuffer, ress.dstBufferSize, 0 };
 #ifdef ZSTD_NEWAPI
-            ZSTD_inBuffer inBuff = { NULL, 0, 0};
+            ZSTD_inBuffer inBuff = { NULL, 0, 0 };
             result = ZSTD_compress_generic(ress.cctx,
                         &outBuff, &inBuff, ZSTD_e_end);
 #elif defined(ZSTD_MULTITHREAD)
@@ -822,11 +879,14 @@ static int FIO_compressFilename_internal(cRess_t ress,
 #else
             result = ZSTD_endStream(ress.cctx, &outBuff);
 #endif
-            if (ZSTD_isError(result))
+            if (ZSTD_isError(result)) {
                 EXM_THROW(26, "Compression error during frame end : %s",
                             ZSTD_getErrorName(result));
-            { size_t const sizeCheck = fwrite(ress.dstBuffer, 1, outBuff.pos, dstFile);
-              if (sizeCheck!=outBuff.pos) EXM_THROW(27, "Write error : cannot write frame end into %s", dstFileName); }
+            }
+            {   size_t const sizeCheck = fwrite(ress.dstBuffer, 1, outBuff.pos, dstFile);
+                if (sizeCheck!=outBuff.pos)
+                    EXM_THROW(27, "Write error : cannot write frame end into %s", dstFileName);
+            }
             compressedfilesize += outBuff.pos;
         }
     }
@@ -890,6 +950,14 @@ static int FIO_compressFilename_dstFile(cRess_t ress,
     ress.dstFile = FIO_openDstFile(dstFileName);
     if (ress.dstFile==NULL) return 1;  /* could not open dstFileName */
 
+    if (UTIL_isRegularFile(dstFileName)) {
+        g_artefact = dstFileName;
+        signal(SIGINT, INThandler);
+    } else {
+        g_artefact = NULL;
+    }
+
+
     if (strcmp (srcFileName, stdinmark) && UTIL_getFileStat(srcFileName, &statbuf))
         stat_result = 1;
     result = FIO_compressFilename_srcFile(ress, dstFileName, srcFileName, compressionLevel);
@@ -904,6 +972,9 @@ static int FIO_compressFilename_dstFile(cRess_t ress,
     }
     else if (strcmp (dstFileName, stdoutmark) && stat_result)
         UTIL_setFileStat(dstFileName, &statbuf);
+
+    signal(SIGINT, SIG_DFL);
+
     return result;
 }
 
@@ -913,9 +984,8 @@ int FIO_compressFilename(const char* dstFileName, const char* srcFileName,
 {
     clock_t const start = clock();
     U64 const srcSize = UTIL_getFileSize(srcFileName);
-    int const isRegularFile = UTIL_isRegularFile(srcFileName);
 
-    cRess_t const ress = FIO_createCResources(dictFileName, compressionLevel, srcSize, isRegularFile, comprParams);
+    cRess_t const ress = FIO_createCResources(dictFileName, compressionLevel, srcSize, comprParams);
     int const result = FIO_compressFilename_dstFile(ress, dstFileName, srcFileName, compressionLevel);
 
     double const seconds = (double)(clock() - start) / CLOCKS_PER_SEC;
@@ -936,8 +1006,7 @@ int FIO_compressMultipleFilenames(const char** inFileNamesTable, unsigned nbFile
     char*  dstFileName = (char*)malloc(FNSPACE);
     size_t const suffixSize = suffix ? strlen(suffix) : 0;
     U64 const srcSize = (nbFiles != 1) ? 0 : UTIL_getFileSize(inFileNamesTable[0]) ;
-    int const isRegularFile = (nbFiles != 1) ? 0 : UTIL_isRegularFile(inFileNamesTable[0]);
-    cRess_t ress = FIO_createCResources(dictFileName, compressionLevel, srcSize, isRegularFile, comprParams);
+    cRess_t ress = FIO_createCResources(dictFileName, compressionLevel, srcSize, comprParams);
 
     /* init */
     if (dstFileName==NULL)
@@ -986,8 +1055,8 @@ int FIO_compressMultipleFilenames(const char** inFileNamesTable, unsigned nbFile
  ***************************************************************************/
 typedef struct {
     void*  srcBuffer;
-    size_t srcBufferLoaded;
     size_t srcBufferSize;
+    size_t srcBufferLoaded;
     void*  dstBuffer;
     size_t dstBufferSize;
     ZSTD_DStream* dctx;
@@ -1002,7 +1071,7 @@ static dRess_t FIO_createDResources(const char* dictFileName)
     /* Allocation */
     ress.dctx = ZSTD_createDStream();
     if (ress.dctx==NULL) EXM_THROW(60, "Can't create ZSTD_DStream");
-    ZSTD_setDStreamParameter(ress.dctx, DStream_p_maxWindowSize, g_memLimit);
+    CHECK( ZSTD_setDStreamParameter(ress.dctx, DStream_p_maxWindowSize, g_memLimit) );
     ress.srcBufferSize = ZSTD_DStreamInSize();
     ress.srcBuffer = malloc(ress.srcBufferSize);
     ress.dstBufferSize = ZSTD_DStreamOutSize();
@@ -1133,6 +1202,35 @@ static unsigned FIO_passThrough(FILE* foutput, FILE* finput, void* buffer, size_
     return 0;
 }
 
+static void FIO_zstdErrorHelp(dRess_t* ress, size_t ret, char const* srcFileName)
+{
+    ZSTD_frameHeader header;
+    /* No special help for these errors */
+    if (ZSTD_getErrorCode(ret) != ZSTD_error_frameParameter_windowTooLarge)
+        return;
+    /* Try to decode the frame header */
+    ret = ZSTD_getFrameHeader(&header, ress->srcBuffer, ress->srcBufferLoaded);
+    if (ret == 0) {
+        U32 const windowSize = (U32)header.windowSize;
+        U32 const windowLog = BIT_highbit32(windowSize) + ((windowSize & (windowSize - 1)) != 0);
+        U32 const windowMB = (windowSize >> 20) + (windowSize & ((1 MB) - 1));
+        assert(header.windowSize <= (U64)((U32)-1));
+        assert(g_memLimit > 0);
+        DISPLAYLEVEL(1, "%s : Window size larger than maximum : %llu > %u\n",
+                        srcFileName, header.windowSize, g_memLimit);
+        if (windowLog <= ZSTD_WINDOWLOG_MAX) {
+            DISPLAYLEVEL(1, "%s : Use --long=%u or --memory=%uMB\n",
+                            srcFileName, windowLog, windowMB);
+            return;
+        }
+    } else if (ZSTD_getErrorCode(ret) != ZSTD_error_frameParameter_windowTooLarge) {
+        DISPLAYLEVEL(1, "%s : Error decoding frame header to read window size : %s\n",
+                        srcFileName, ZSTD_getErrorName(ret));
+        return;
+    }
+    DISPLAYLEVEL(1, "%s : Window log larger than ZSTD_WINDOWLOG_MAX=%u not supported\n",
+                    srcFileName, ZSTD_WINDOWLOG_MAX);
+}
 
 /** FIO_decompressFrame() :
  *  @return : size of decoded zstd frame, or an error code
@@ -1146,14 +1244,18 @@ unsigned long long FIO_decompressZstdFrame(dRess_t* ress,
     U64 frameSize = 0;
     U32 storedSkips = 0;
 
+    size_t const srcFileLength = strlen(srcFileName);
+    if (srcFileLength>20) srcFileName += srcFileLength-20;  /* display last 20 characters only */
+
     ZSTD_resetDStream(ress->dctx);
-    if (strlen(srcFileName)>20) srcFileName += strlen(srcFileName)-20;   /* display last 20 characters */
 
-    /* Header loading (optional, saves one loop) */
-    {   size_t const toRead = 9;
-        if (ress->srcBufferLoaded < toRead)
-            ress->srcBufferLoaded += fread(((char*)ress->srcBuffer) + ress->srcBufferLoaded, 1, toRead - ress->srcBufferLoaded, finput);
-    }
+    /* Header loading : ensures ZSTD_getFrameHeader() will succeed */
+    {   size_t const toDecode = ZSTD_FRAMEHEADERSIZE_MAX;
+        if (ress->srcBufferLoaded < toDecode) {
+            size_t const toRead = toDecode - ress->srcBufferLoaded;
+            void* const startPosition = (char*)ress->srcBuffer + ress->srcBufferLoaded;
+            ress->srcBufferLoaded += fread(startPosition, 1, toRead, finput);
+    }   }
 
     /* Main decompression Loop */
     while (1) {
@@ -1163,6 +1265,7 @@ unsigned long long FIO_decompressZstdFrame(dRess_t* ress,
         if (ZSTD_isError(readSizeHint)) {
             DISPLAYLEVEL(1, "%s : Decoding error (36) : %s \n",
                             srcFileName, ZSTD_getErrorName(readSizeHint));
+            FIO_zstdErrorHelp(ress, readSizeHint, srcFileName);
             return FIO_ERROR_FRAME_DECODING;
         }
 
@@ -1185,14 +1288,17 @@ unsigned long long FIO_decompressZstdFrame(dRess_t* ress,
         }
 
         /* Fill input buffer */
-        {   size_t const toRead = MIN(readSizeHint, ress->srcBufferSize);  /* support large skippable frames */
-            if (ress->srcBufferLoaded < toRead)
-                ress->srcBufferLoaded += fread((char*)ress->srcBuffer + ress->srcBufferLoaded,
-                                               1, toRead - ress->srcBufferLoaded, finput);
-            if (ress->srcBufferLoaded < toRead) {
-                DISPLAYLEVEL(1, "%s : Read error (39) : premature end \n",
-                                srcFileName);
-                return FIO_ERROR_FRAME_DECODING;
+        {   size_t const toDecode = MIN(readSizeHint, ress->srcBufferSize);  /* support large skippable frames */
+            if (ress->srcBufferLoaded < toDecode) {
+                size_t const toRead = toDecode - ress->srcBufferLoaded;   /* > 0 */
+                void* const startPosition = (char*)ress->srcBuffer + ress->srcBufferLoaded;
+                size_t const readSize = fread(startPosition, 1, toRead, finput);
+                if (readSize==0) {
+                    DISPLAYLEVEL(1, "%s : Read error (39) : premature end \n",
+                                    srcFileName);
+                    return FIO_ERROR_FRAME_DECODING;
+                }
+                ress->srcBufferLoaded += readSize;
     }   }   }
 
     FIO_fwriteSparseEnd(ress->dstFile, storedSkips);
@@ -1523,11 +1629,11 @@ static int FIO_decompressSrcFile(dRess_t ress, const char* dstFileName, const ch
 
     /* Close file */
     if (fclose(srcFile)) {
-        DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno));  /* error should never happen */
+        DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno));  /* error should not happen */
         return 1;
     }
     if ( g_removeSrcFile /* --rm */
-      && (result==0)   /* decompression successful */
+      && (result==0)     /* decompression successful */
       && strcmp(srcFileName, stdinmark) ) /* not stdin */ {
         if (remove(srcFileName)) {
             /* failed to remove src file */
@@ -1553,7 +1659,15 @@ static int FIO_decompressDstFile(dRess_t ress,
     ress.dstFile = FIO_openDstFile(dstFileName);
     if (ress.dstFile==0) return 1;
 
-    if (strcmp (srcFileName, stdinmark) && UTIL_getFileStat(srcFileName, &statbuf))
+    if (UTIL_isRegularFile(dstFileName)) {
+        g_artefact = dstFileName;
+        signal(SIGINT, INThandler);
+    } else {
+        g_artefact = NULL;
+    }
+
+    if ( strcmp(srcFileName, stdinmark)
+      && UTIL_getFileStat(srcFileName, &statbuf) )
         stat_result = 1;
     result = FIO_decompressSrcFile(ress, dstFileName, srcFileName);
 
@@ -1563,11 +1677,18 @@ static int FIO_decompressDstFile(dRess_t ress,
     }
 
     if ( (result != 0)  /* operation failure */
-       && strcmp(dstFileName, nulmark)  /* special case : don't remove() /dev/null (#316) */
-       && remove(dstFileName) /* remove artefact */ )
-        result=1;   /* don't do anything special if remove() fails */
-    else if (strcmp (dstFileName, stdoutmark) && stat_result)
-        UTIL_setFileStat(dstFileName, &statbuf);
+      && strcmp(dstFileName, nulmark)      /* special case : don't remove() /dev/null (#316) */
+      && strcmp(dstFileName, stdoutmark) ) /* special case : don't remove() stdout */
+        remove(dstFileName);  /* remove decompression artefact; note don't do anything special if remove() fails */
+    else {  /* operation success */
+        if ( strcmp(dstFileName, stdoutmark) /* special case : don't chmod stdout */
+          && strcmp(dstFileName, nulmark)    /* special case : don't chmod /dev/null */
+          && stat_result )                   /* file permissions correctly extracted from src */
+            UTIL_setFileStat(dstFileName, &statbuf);  /* transfer file permissions from src into dst */
+    }
+
+    signal(SIGINT, SIG_DFL);
+
     return result;
 }
 
@@ -1659,12 +1780,14 @@ int FIO_decompressMultipleFilenames(const char** srcNamesTable, unsigned nbFiles
  ***************************************************************************/
 
 typedef struct {
+    U64 decompressedSize;
+    U64 compressedSize;
+    U64 windowSize;
     int numActualFrames;
     int numSkippableFrames;
-    unsigned long long decompressedSize;
     int decompUnavailable;
-    unsigned long long compressedSize;
     int usesCheck;
+    U32 nbFiles;
 } fileInfo_t;
 
 /** getFileInfo() :
@@ -1681,14 +1804,16 @@ static int getFileInfo(fileInfo_t* info, const char* inFileName){
         DISPLAY("Error: could not open source file %s\n", inFileName);
         return 3;
     }
-    info->compressedSize = (unsigned long long)UTIL_getFileSize(inFileName);
+    info->compressedSize = UTIL_getFileSize(inFileName);
 
     /* begin analyzing frame */
     for ( ; ; ) {
         BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
         size_t const numBytesRead = fread(headerBuffer, 1, sizeof(headerBuffer), srcFile);
         if (numBytesRead < ZSTD_frameHeaderSize_min) {
-            if (feof(srcFile) && numBytesRead == 0 && info->compressedSize > 0) {
+            if ( feof(srcFile)
+              && (numBytesRead == 0)
+              && (info->compressedSize > 0) ) {
                 break;
             }
             else if (feof(srcFile)) {
@@ -1705,12 +1830,19 @@ static int getFileInfo(fileInfo_t* info, const char* inFileName){
         {   U32 const magicNumber = MEM_readLE32(headerBuffer);
             /* Zstandard frame */
             if (magicNumber == ZSTD_MAGICNUMBER) {
+                ZSTD_frameHeader header;
                 U64 const frameContentSize = ZSTD_getFrameContentSize(headerBuffer, numBytesRead);
                 if (frameContentSize == ZSTD_CONTENTSIZE_ERROR || frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN) {
                     info->decompUnavailable = 1;
                 } else {
                     info->decompressedSize += frameContentSize;
                 }
+                if (ZSTD_getFrameHeader(&header, headerBuffer, numBytesRead) != 0) {
+                    DISPLAY("Error: could not decode frame header\n");
+                    detectError = 1;
+                    break;
+                }
+                info->windowSize = header.windowSize;
                 /* move to the end of the frame header */
                 {   size_t const headerSize = ZSTD_frameHeaderSize(headerBuffer, numBytesRead);
                     if (ZSTD_isError(headerSize)) {
@@ -1790,37 +1922,45 @@ static int getFileInfo(fileInfo_t* info, const char* inFileName){
         }
     }  /* end analyzing frame */
     fclose(srcFile);
+    info->nbFiles = 1;
     return detectError;
 }
 
 static void displayInfo(const char* inFileName, fileInfo_t* info, int displayLevel){
     unsigned const unit = info->compressedSize < (1 MB) ? (1 KB) : (1 MB);
     const char* const unitStr = info->compressedSize < (1 MB) ? "KB" : "MB";
+    double const windowSizeUnit = (double)info->windowSize / unit;
     double const compressedSizeUnit = (double)info->compressedSize / unit;
     double const decompressedSizeUnit = (double)info->decompressedSize / unit;
     double const ratio = (info->compressedSize == 0) ? 0 : ((double)info->decompressedSize)/info->compressedSize;
     const char* const checkString = (info->usesCheck ? "XXH64" : "None");
     if (displayLevel <= 2) {
         if (!info->decompUnavailable) {
-            DISPLAYOUT("Skippable  Non-Skippable  Compressed  Uncompressed  Ratio  Check  Filename\n");
-            DISPLAYOUT("%9d  %13d  %7.2f %2s  %9.2f %2s  %5.3f  %5s  %s\n",
-                    info->numSkippableFrames, info->numActualFrames,
+            DISPLAYOUT("%6d  %5d  %7.2f %2s  %9.2f %2s  %5.3f  %5s  %s\n",
+                    info->numSkippableFrames + info->numActualFrames,
+                    info->numSkippableFrames,
                     compressedSizeUnit, unitStr, decompressedSizeUnit, unitStr,
                     ratio, checkString, inFileName);
         } else {
-            DISPLAYOUT("Skippable  Non-Skippable  Compressed  Check  Filename\n");
-            DISPLAYOUT("%9d  %13d  %7.2f MB  %5s  %s\n",
-                    info->numSkippableFrames, info->numActualFrames,
-                    compressedSizeUnit, checkString, inFileName);
+            DISPLAYOUT("%6d  %5d  %7.2f %2s                       %5s  %s\n",
+                    info->numSkippableFrames + info->numActualFrames,
+                    info->numSkippableFrames,
+                    compressedSizeUnit, unitStr,
+                    checkString, inFileName);
         }
     } else {
         DISPLAYOUT("# Zstandard Frames: %d\n", info->numActualFrames);
         DISPLAYOUT("# Skippable Frames: %d\n", info->numSkippableFrames);
+        DISPLAYOUT("Window Size: %.2f %2s (%llu B)\n",
+                   windowSizeUnit, unitStr,
+                   (unsigned long long)info->windowSize);
         DISPLAYOUT("Compressed Size: %.2f %2s (%llu B)\n",
-                    compressedSizeUnit, unitStr, info->compressedSize);
+                    compressedSizeUnit, unitStr,
+                    (unsigned long long)info->compressedSize);
         if (!info->decompUnavailable) {
             DISPLAYOUT("Decompressed Size: %.2f %2s (%llu B)\n",
-                    decompressedSizeUnit, unitStr, info->decompressedSize);
+                    decompressedSizeUnit, unitStr,
+                    (unsigned long long)info->decompressedSize);
             DISPLAYOUT("Ratio: %.4f\n", ratio);
         }
         DISPLAYOUT("Check: %s\n", checkString);
@@ -1828,33 +1968,40 @@ static void displayInfo(const char* inFileName, fileInfo_t* info, int displayLev
     }
 }
 
+static fileInfo_t FIO_addFInfo(fileInfo_t fi1, fileInfo_t fi2)
+{
+    fileInfo_t total;
+    total.numActualFrames = fi1.numActualFrames + fi2.numActualFrames;
+    total.numSkippableFrames = fi1.numSkippableFrames + fi2.numSkippableFrames;
+    total.compressedSize = fi1.compressedSize + fi2.compressedSize;
+    total.decompressedSize = fi1.decompressedSize + fi2.decompressedSize;
+    total.decompUnavailable = fi1.decompUnavailable | fi2.decompUnavailable;
+    total.usesCheck = fi1.usesCheck & fi2.usesCheck;
+    total.nbFiles = fi1.nbFiles + fi2.nbFiles;
+    return total;
+}
 
-static int FIO_listFile(const char* inFileName, int displayLevel, unsigned fileNo, unsigned numFiles){
+static int FIO_listFile(fileInfo_t* total, const char* inFileName, int displayLevel){
     /* initialize info to avoid warnings */
     fileInfo_t info;
     memset(&info, 0, sizeof(info));
-    DISPLAYOUT("%s (%u/%u):\n", inFileName, fileNo, numFiles);
-    {
-        int const error = getFileInfo(&info, inFileName);
+    {   int const error = getFileInfo(&info, inFileName);
         if (error == 1) {
             /* display error, but provide output */
-            DISPLAY("An error occurred with getting file info\n");
+            DISPLAY("An error occurred while getting file info \n");
         }
         else if (error == 2) {
-            DISPLAYOUT("File %s not compressed with zstd\n", inFileName);
-            if (displayLevel > 2) {
-                DISPLAYOUT("\n");
-            }
+            DISPLAYOUT("File %s not compressed by zstd \n", inFileName);
+            if (displayLevel > 2) DISPLAYOUT("\n");
             return 1;
         }
         else if (error == 3) {
-            /* error occurred with opening the file */
-            if (displayLevel > 2) {
-                DISPLAYOUT("\n");
-            }
+            /* error occurred while opening the file */
+            if (displayLevel > 2) DISPLAYOUT("\n");
             return 1;
         }
         displayInfo(inFileName, &info, displayLevel);
+        *total = FIO_addFInfo(*total, info);
         return error;
     }
 }
@@ -1864,15 +2011,38 @@ int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int dis
         DISPLAYOUT("No files given\n");
         return 0;
     }
-    DISPLAYOUT("===========================================\n");
-    DISPLAYOUT("Printing information about compressed files\n");
-    DISPLAYOUT("===========================================\n");
-    DISPLAYOUT("Number of files listed: %u\n", numFiles);
-    {
-        int error = 0;
+    if (displayLevel <= 2) {
+        DISPLAYOUT("Frames  Skips  Compressed  Uncompressed  Ratio  Check  Filename\n");
+    }
+    {   int error = 0;
         unsigned u;
+        fileInfo_t total;
+        memset(&total, 0, sizeof(total));
+        total.usesCheck = 1;
         for (u=0; u<numFiles;u++) {
-            error |= FIO_listFile(filenameTable[u], displayLevel, u+1, numFiles);
+            error |= FIO_listFile(&total, filenameTable[u], displayLevel);
+        }
+        if (numFiles > 1 && displayLevel <= 2) {
+            unsigned const unit = total.compressedSize < (1 MB) ? (1 KB) : (1 MB);
+            const char* const unitStr = total.compressedSize < (1 MB) ? "KB" : "MB";
+            double const compressedSizeUnit = (double)total.compressedSize / unit;
+            double const decompressedSizeUnit = (double)total.decompressedSize / unit;
+            double const ratio = (total.compressedSize == 0) ? 0 : ((double)total.decompressedSize)/total.compressedSize;
+            const char* const checkString = (total.usesCheck ? "XXH64" : "");
+            DISPLAYOUT("----------------------------------------------------------------- \n");
+            if (total.decompUnavailable) {
+                DISPLAYOUT("%6d  %5d  %7.2f %2s                       %5s  %u files\n",
+                        total.numSkippableFrames + total.numActualFrames,
+                        total.numSkippableFrames,
+                        compressedSizeUnit, unitStr,
+                        checkString, total.nbFiles);
+            } else {
+                DISPLAYOUT("%6d  %5d  %7.2f %2s  %9.2f %2s  %5.3f  %5s  %u files\n",
+                        total.numSkippableFrames + total.numActualFrames,
+                        total.numSkippableFrames,
+                        compressedSizeUnit, unitStr, decompressedSizeUnit, unitStr,
+                        ratio, checkString, total.nbFiles);
+            }
         }
         return error;
     }
diff --git a/programs/fileio.h b/programs/fileio.h
index 8008e97dd5f3..aa4484fdce0c 100644
--- a/programs/fileio.h
+++ b/programs/fileio.h
@@ -5,6 +5,7 @@
  * This source code is licensed under both the BSD-style license (found in the
  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
  */
 
 
@@ -56,6 +57,11 @@ void FIO_setMemLimit(unsigned memLimit);
 void FIO_setNbThreads(unsigned nbThreads);
 void FIO_setBlockSize(unsigned blockSize);
 void FIO_setOverlapLog(unsigned overlapLog);
+void FIO_setLdmFlag(unsigned ldmFlag);
+void FIO_setLdmHashLog(unsigned ldmHashLog);
+void FIO_setLdmMinMatch(unsigned ldmMinMatch);
+void FIO_setLdmBucketSizeLog(unsigned ldmBucketSizeLog);
+void FIO_setLdmHashEveryLog(unsigned ldmHashEveryLog);
 
 
 /*-*************************************
diff --git a/programs/platform.h b/programs/platform.h
index fb2e9b173d2a..a4d7850fd714 100644
--- a/programs/platform.h
+++ b/programs/platform.h
@@ -1,10 +1,11 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
  */
 
 #ifndef PLATFORM_H_MODULE
diff --git a/programs/util.h b/programs/util.h
index 7b553661cde3..c8be5f5fb574 100644
--- a/programs/util.h
+++ b/programs/util.h
@@ -1,10 +1,11 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
  */
 
 #ifndef UTIL_H_MODULE
@@ -106,57 +107,127 @@ extern "C" {
 
 
 /*-****************************************
+*  Console log
+******************************************/
+static int g_utilDisplayLevel;
+#define UTIL_DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
+#define UTIL_DISPLAYLEVEL(l, ...) { if (g_utilDisplayLevel>=l) { UTIL_DISPLAY(__VA_ARGS__); } }
+
+
+/*-****************************************
 *  Time functions
 ******************************************/
 #if defined(_WIN32)   /* Windows */
-   typedef LARGE_INTEGER UTIL_freq_t;
-   typedef LARGE_INTEGER UTIL_time_t;
-   UTIL_STATIC void UTIL_initTimer(UTIL_freq_t* ticksPerSecond) { if (!QueryPerformanceFrequency(ticksPerSecond)) fprintf(stderr, "ERROR: QueryPerformance not present\n"); }
-   UTIL_STATIC void UTIL_getTime(UTIL_time_t* x) { QueryPerformanceCounter(x); }
-   UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_freq_t ticksPerSecond, UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart; }
-   UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_freq_t ticksPerSecond, UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart; }
+    typedef LARGE_INTEGER UTIL_time_t;
+    UTIL_STATIC UTIL_time_t UTIL_getTime(void) { UTIL_time_t x; QueryPerformanceCounter(&x); return x; }
+    UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd)
+    {
+        static LARGE_INTEGER ticksPerSecond;
+        static int init = 0;
+        if (!init) {
+            if (!QueryPerformanceFrequency(&ticksPerSecond))
+                UTIL_DISPLAYLEVEL(1, "ERROR: QueryPerformanceFrequency() failure\n");
+            init = 1;
+        }
+        return 1000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart;
+    }
+    UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd)
+    {
+        static LARGE_INTEGER ticksPerSecond;
+        static int init = 0;
+        if (!init) {
+            if (!QueryPerformanceFrequency(&ticksPerSecond))
+                UTIL_DISPLAYLEVEL(1, "ERROR: QueryPerformanceFrequency() failure\n");
+            init = 1;
+        }
+        return 1000000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart;
+    }
 #elif defined(__APPLE__) && defined(__MACH__)
-   #include <mach/mach_time.h>
-   typedef mach_timebase_info_data_t UTIL_freq_t;
-   typedef U64 UTIL_time_t;
-   UTIL_STATIC void UTIL_initTimer(UTIL_freq_t* rate) { mach_timebase_info(rate); }
-   UTIL_STATIC void UTIL_getTime(UTIL_time_t* x) { *x = mach_absolute_time(); }
-   UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_freq_t rate, UTIL_time_t clockStart, UTIL_time_t clockEnd) { return (((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom))/1000ULL; }
-   UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_freq_t rate, UTIL_time_t clockStart, UTIL_time_t clockEnd) { return ((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom); }
+    #include <mach/mach_time.h>
+    typedef U64 UTIL_time_t;
+    UTIL_STATIC UTIL_time_t UTIL_getTime(void) { return mach_absolute_time(); }
+    UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd)
+    {
+        static mach_timebase_info_data_t rate;
+        static int init = 0;
+        if (!init) {
+            mach_timebase_info(&rate);
+            init = 1;
+        }
+        return (((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom))/1000ULL;
+    }
+    UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd)
+    {
+        static mach_timebase_info_data_t rate;
+        static int init = 0;
+        if (!init) {
+            mach_timebase_info(&rate);
+            init = 1;
+        }
+        return ((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom);
+    }
 #elif (PLATFORM_POSIX_VERSION >= 200112L)
-    #include <sys/times.h>   /* times */
-   typedef U64 UTIL_freq_t;
-   typedef U64 UTIL_time_t;
-   UTIL_STATIC void UTIL_initTimer(UTIL_freq_t* ticksPerSecond) { *ticksPerSecond=sysconf(_SC_CLK_TCK); }
-   UTIL_STATIC void UTIL_getTime(UTIL_time_t* x) { struct tms junk; clock_t newTicks = (clock_t) times(&junk); (void)junk; *x = (UTIL_time_t)newTicks; }
-   UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_freq_t ticksPerSecond, UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000ULL * (clockEnd - clockStart) / ticksPerSecond; }
-   UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_freq_t ticksPerSecond, UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000000ULL * (clockEnd - clockStart) / ticksPerSecond; }
+    #include <time.h>
+    typedef struct timespec UTIL_freq_t;
+    typedef struct timespec UTIL_time_t;
+    UTIL_STATIC UTIL_time_t UTIL_getTime(void)
+    {
+        UTIL_time_t time;
+        if (clock_gettime(CLOCK_MONOTONIC, &time))
+            UTIL_DISPLAYLEVEL(1, "ERROR: Failed to get time\n");   /* we could also exit() */
+        return time;
+    }
+    UTIL_STATIC UTIL_time_t UTIL_getSpanTime(UTIL_time_t begin, UTIL_time_t end)
+    {
+        UTIL_time_t diff;
+        if (end.tv_nsec < begin.tv_nsec) {
+            diff.tv_sec = (end.tv_sec - 1) - begin.tv_sec;
+            diff.tv_nsec = (end.tv_nsec + 1000000000ULL) - begin.tv_nsec;
+        } else {
+            diff.tv_sec = end.tv_sec - begin.tv_sec;
+            diff.tv_nsec = end.tv_nsec - begin.tv_nsec;
+        }
+        return diff;
+    }
+    UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t begin, UTIL_time_t end)
+    {
+        UTIL_time_t const diff = UTIL_getSpanTime(begin, end);
+        U64 micro = 0;
+        micro += 1000000ULL * diff.tv_sec;
+        micro += diff.tv_nsec / 1000ULL;
+        return micro;
+    }
+    UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t begin, UTIL_time_t end)
+    {
+        UTIL_time_t const diff = UTIL_getSpanTime(begin, end);
+        U64 nano = 0;
+        nano += 1000000000ULL * diff.tv_sec;
+        nano += diff.tv_nsec;
+        return nano;
+    }
 #else   /* relies on standard C (note : clock_t measurements can be wrong when using multi-threading) */
-   typedef clock_t UTIL_freq_t;
-   typedef clock_t UTIL_time_t;
-   UTIL_STATIC void UTIL_initTimer(UTIL_freq_t* ticksPerSecond) { *ticksPerSecond=0; }
-   UTIL_STATIC void UTIL_getTime(UTIL_time_t* x) { *x = clock(); }
-   UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_freq_t ticksPerSecond, UTIL_time_t clockStart, UTIL_time_t clockEnd) { (void)ticksPerSecond; return 1000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; }
-   UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_freq_t ticksPerSecond, UTIL_time_t clockStart, UTIL_time_t clockEnd) { (void)ticksPerSecond; return 1000000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; }
+    typedef clock_t UTIL_time_t;
+    UTIL_STATIC UTIL_time_t UTIL_getTime(void) { return clock(); }
+    UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; }
+    UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; }
 #endif
 
 
 /* returns time span in microseconds */
-UTIL_STATIC U64 UTIL_clockSpanMicro( UTIL_time_t clockStart, UTIL_freq_t ticksPerSecond )
+UTIL_STATIC U64 UTIL_clockSpanMicro( UTIL_time_t clockStart )
 {
-    UTIL_time_t clockEnd;
-    UTIL_getTime(&clockEnd);
-    return UTIL_getSpanTimeMicro(ticksPerSecond, clockStart, clockEnd);
+    UTIL_time_t const clockEnd = UTIL_getTime();
+    return UTIL_getSpanTimeMicro(clockStart, clockEnd);
 }
 
 
-UTIL_STATIC void UTIL_waitForNextTick(UTIL_freq_t ticksPerSecond)
+UTIL_STATIC void UTIL_waitForNextTick(void)
 {
-    UTIL_time_t clockStart, clockEnd;
-    UTIL_getTime(&clockStart);
+    UTIL_time_t const clockStart = UTIL_getTime();
+    UTIL_time_t clockEnd;
     do {
-        UTIL_getTime(&clockEnd);
-    } while (UTIL_getSpanTimeNano(ticksPerSecond, clockStart, clockEnd) == 0);
+        clockEnd = UTIL_getTime();
+    } while (UTIL_getSpanTimeNano(clockStart, clockEnd) == 0);
 }
 
 
@@ -284,10 +355,6 @@ UTIL_STATIC void *UTIL_realloc(void *ptr, size_t size)
     return NULL;
 }
 
-static int g_utilDisplayLevel;
-#define UTIL_DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
-#define UTIL_DISPLAYLEVEL(l, ...) { if (g_utilDisplayLevel>=l) { UTIL_DISPLAY(__VA_ARGS__); } }
-
 #ifdef _WIN32
 #  define UTIL_HAS_CREATEFILELIST
 
@@ -309,7 +376,7 @@ UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_
 
     hFile=FindFirstFileA(path, &cFile);
     if (hFile == INVALID_HANDLE_VALUE) {
-        fprintf(stderr, "Cannot open directory '%s'\n", dirName);
+        UTIL_DISPLAYLEVEL(1, "Cannot open directory '%s'\n", dirName);
         return 0;
     }
     free(path);
@@ -363,7 +430,7 @@ UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_
     int dirLength, fnameLength, pathLength, nbFiles = 0;
 
     if (!(dir = opendir(dirName))) {
-        fprintf(stderr, "Cannot open directory '%s': %s\n", dirName, strerror(errno));
+        UTIL_DISPLAYLEVEL(1, "Cannot open directory '%s': %s\n", dirName, strerror(errno));
         return 0;
     }
 
@@ -408,7 +475,7 @@ UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_
     }
 
     if (errno != 0) {
-        fprintf(stderr, "readdir(%s) error: %s\n", dirName, strerror(errno));
+        UTIL_DISPLAYLEVEL(1, "readdir(%s) error: %s\n", dirName, strerror(errno));
         free(*bufStart);
         *bufStart = NULL;
     }
@@ -421,7 +488,7 @@ UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_
 UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks)
 {
     (void)bufStart; (void)bufEnd; (void)pos;
-    fprintf(stderr, "Directory %s ignored (compiled without _WIN32 or _POSIX_C_SOURCE)\n", dirName);
+    UTIL_DISPLAYLEVEL(1, "Directory %s ignored (compiled without _WIN32 or _POSIX_C_SOURCE)\n", dirName);
     return 0;
 }
 
diff --git a/programs/zstd.1 b/programs/zstd.1
index 5a91eea281f4..8187c7403509 100644
--- a/programs/zstd.1
+++ b/programs/zstd.1
@@ -1,5 +1,5 @@
 .
-.TH "ZSTD" "1" "August 2017" "zstd 1.3.1" "User Commands"
+.TH "ZSTD" "1" "September 2017" "zstd 1.3.1" "User Commands"
 .
 .SH "NAME"
 \fBzstd\fR \- zstd, zstdmt, unzstd, zstdcat \- Compress or decompress \.zst files
@@ -60,11 +60,11 @@ In most places where an integer argument is expected, an optional suffix is supp
 .
 .TP
 \fBKiB\fR
-Multiply the integer by 1,024 (2^10)\. \fBKi\fR, \fBK\fR, and \fBKB\fR are accepted as synonyms for \fBKiB\fR\.
+Multiply the integer by 1,024 (2\e \fBKi\fR, \fBK\fR, and \fBKB\fR are accepted as synonyms for \fBKiB\fR\.
 .
 .TP
 \fBMiB\fR
-Multiply the integer by 1,048,576 (2^20)\. \fBMi\fR, \fBM\fR, and \fBMB\fR are accepted as synonyms for \fBMiB\fR\.
+Multiply the integer by 1,048,576 (2\e \fBMi\fR, \fBM\fR, and \fBMB\fR are accepted as synonyms for \fBMiB\fR\.
 .
 .SS "Operation mode"
 If multiple operation mode options are given, the last one takes effect\.
@@ -104,6 +104,13 @@ Display information related to a zstd compressed file, such as size, ratio, and
 unlocks high compression levels 20+ (maximum 22), using a lot more memory\. Note that decompression will also require more memory when using these levels\.
 .
 .TP
+\fB\-\-long[=#]\fR
+enables long distance matching with \fB#\fR \fBwindowLog\fR, if not \fB#\fR is not present it defaults to \fB27\fR\. This increases the window size (\fBwindowLog\fR) and memory usage for both the compressor and decompressor\. This setting is designed to improve the compression ratio for files with long matches at a large distance\.
+.
+.IP
+Note: If \fBwindowLog\fR is set to larger than 27, \fB\-\-long=windowLog\fR or \fB\-\-memory=windowSize\fR needs to be passed to the decompressor\.
+.
+.TP
 \fB\-T#\fR, \fB\-\-threads=#\fR
 Compress using \fB#\fR threads (default: 1)\. If \fB#\fR is 0, attempt to detect and use the number of physical CPU cores\. In all cases, the nb of threads is capped to ZSTDMT_NBTHREADS_MAX==256\. This modifier does nothing if \fBzstd\fR is compiled without multithread support\.
 .
@@ -144,6 +151,10 @@ keep source file(s) after successful compression or decompression\. This is the
 operate recursively on dictionaries
 .
 .TP
+\fB\-\-format=FORMAT\fR
+compress and decompress in other formats\. If compiled with support, zstd can compress to or decompress from other compression algorithm formats\. Possibly available options are \fBgzip\fR, \fBxz\fR, \fBlzma\fR, and \fBlz4\fR\.
+.
+.TP
 \fB\-h\fR/\fB\-H\fR, \fB\-\-help\fR
 display help/long help and exit
 .
@@ -186,6 +197,10 @@ Dictionary saved into \fBfile\fR (default name: dictionary)\.
 Limit dictionary to specified size (default: 112640)\.
 .
 .TP
+\fB\-B#\fR
+Split input files in blocks of size # (default: no split)
+.
+.TP
 \fB\-\-dictID=#\fR
 A dictionary ID is a locally unique ID that a decoder can use to verify it is using the right dictionary\. By default, zstd will create a 4\-bytes random number ID\. It\'s possible to give a precise number instead\. Short numbers have an advantage : an ID < 256 will only need 1 byte in the compressed frame header, and an ID < 65536 will only need 2 bytes\. This compares favorably to 4 bytes default\. However, it\'s up to the dictionary manager to not assign twice the same ID to 2 different dictionaries\.
 .
@@ -263,7 +278,10 @@ There are 8 strategies numbered from 1 to 8, from faster to stronger: 1=ZSTD_fas
 Specify the maximum number of bits for a match distance\.
 .
 .IP
-The higher number of increases the chance to find a match which usually improves compression ratio\. It also increases memory requirements for the compressor and decompressor\. The minimum \fIwlog\fR is 10 (1 KiB) and the maximum is 27 (128 MiB)\.
+The higher number of increases the chance to find a match which usually improves compression ratio\. It also increases memory requirements for the compressor and decompressor\. The minimum \fIwlog\fR is 10 (1 KiB) and the maximum is 30 (1 GiB) on 32\-bit platforms and 31 (2 GiB) on 64\-bit platforms\.
+.
+.IP
+Note: If \fBwindowLog\fR is set to larger than 27, \fB\-\-long=windowLog\fR or \fB\-\-memory=windowSize\fR needs to be passed to the decompressor\.
 .
 .TP
 \fBhashLog\fR=\fIhlog\fR, \fBhlog\fR=\fIhlog\fR
@@ -322,6 +340,58 @@ Determine \fBoverlapSize\fR, amount of data reloaded from previous job\. This pa
 .IP
 The minimum \fIovlog\fR is 0, and the maximum is 9\. 0 means "no overlap", hence completely independent jobs\. 9 means "full overlap", meaning up to \fBwindowSize\fR is reloaded from previous job\. Reducing \fIovlog\fR by 1 reduces the amount of reload by a factor 2\. Default \fIovlog\fR is 6, which means "reload \fBwindowSize / 8\fR"\. Exception : the maximum compression level (22) has a default \fIovlog\fR of 9\.
 .
+.TP
+\fBldmHashLog\fR=\fIldmhlog\fR, \fBldmhlog\fR=\fIldmhlog\fR
+Specify the maximum size for a hash table used for long distance matching\.
+.
+.IP
+This option is ignored unless long distance matching is enabled\.
+.
+.IP
+Bigger hash tables usually improve compression ratio at the expense of more memory during compression and a decrease in compression speed\.
+.
+.IP
+The minimum \fIldmhlog\fR is 6 and the maximum is 26 (default: 20)\.
+.
+.TP
+\fBldmSearchLength\fR=\fIldmslen\fR, \fBldmslen\fR=\fIldmslen\fR
+Specify the minimum searched length of a match for long distance matching\.
+.
+.IP
+This option is ignored unless long distance matching is enabled\.
+.
+.IP
+Larger/very small values usually decrease compression ratio\.
+.
+.IP
+The minumum \fIldmslen\fR is 4 and the maximum is 4096 (default: 64)\.
+.
+.TP
+\fBldmBucketSizeLog\fR=\fIldmblog\fR, \fBldmblog\fR=\fIldmblog\fR
+Specify the size of each bucket for the hash table used for long distance matching\.
+.
+.IP
+This option is ignored unless long distance matching is enabled\.
+.
+.IP
+Larger bucket sizes improve collision resolution but decrease compression speed\.
+.
+.IP
+The minimum \fIldmblog\fR is 0 and the maximum is 8 (default: 3)\.
+.
+.TP
+\fBldmHashEveryLog\fR=\fIldmhevery\fR, \fBldmhevery\fR=\fIldmhevery\fR
+Specify the frequency of inserting entries into the long distance matching hash table\.
+.
+.IP
+This option is ignored unless long distance matching is enabled\.
+.
+.IP
+Larger values will improve compression speed\. Deviating far from the default value will likely result in a decrease in compression ratio\.
+.
+.IP
+The default value is \fBwlog \- ldmhlog\fR\.
+.
 .SS "\-B#:"
 Select the size of each compression job\. This parameter is available only when multi\-threading is enabled\. Default value is \fB4 * windowSize\fR, which means it varies depending on compression level\. \fB\-B#\fR makes it possible to select a custom value\. Note that job size must respect a minimum value which is enforced transparently\. This minimum is either 1 MB, or \fBoverlapSize\fR, whichever is largest\.
 .
diff --git a/programs/zstd.1.md b/programs/zstd.1.md
index 4310afa1aaf8..eea68548982e 100644
--- a/programs/zstd.1.md
+++ b/programs/zstd.1.md
@@ -105,6 +105,16 @@ the last one takes effect.
 * `--ultra`:
     unlocks high compression levels 20+ (maximum 22), using a lot more memory.
     Note that decompression will also require more memory when using these levels.
+* `--long[=#]`:
+    enables long distance matching with `#` `windowLog`, if not `#` is not
+    present it defaults to `27`.
+    This increases the window size (`windowLog`) and memory usage for both the
+    compressor and decompressor.
+    This setting is designed to improve the compression ratio for files with
+    long matches at a large distance.
+
+    Note: If `windowLog` is set to larger than 27, `--long=windowLog` or
+    `--memory=windowSize` needs to be passed to the decompressor.
 * `-T#`, `--threads=#`:
     Compress using `#` threads (default: 1).
     If `#` is 0, attempt to detect and use the number of physical CPU cores.
@@ -137,6 +147,10 @@ the last one takes effect.
     This is the default behavior.
 * `-r`:
     operate recursively on dictionaries
+* `--format=FORMAT`:
+    compress and decompress in other formats. If compiled with
+    support, zstd can compress to or decompress from other compression algorithm
+    formats. Possibly available options are `gzip`, `xz`, `lzma`, and `lz4`.
 * `-h`/`-H`, `--help`:
     display help/long help and exit
 * `-V`, `--version`:
@@ -178,6 +192,8 @@ Typical gains range from 10% (at 64KB) to x5 better (at <1KB).
     Dictionary saved into `file` (default name: dictionary).
 * `--maxdict=#`:
     Limit dictionary to specified size (default: 112640).
+* `-B#`:
+    Split input files in blocks of size # (default: no split)
 * `--dictID=#`:
     A dictionary ID is a locally unique ID that a decoder can use to verify it is
     using the right dictionary.
@@ -267,7 +283,11 @@ The list of available _options_:
     The higher number of increases the chance to find a match which usually
     improves compression ratio.
     It also increases memory requirements for the compressor and decompressor.
-    The minimum _wlog_ is 10 (1 KiB) and the maximum is 27 (128 MiB).
+    The minimum _wlog_ is 10 (1 KiB) and the maximum is 30 (1 GiB) on 32-bit
+    platforms and 31 (2 GiB) on 64-bit platforms.
+
+    Note: If `windowLog` is set to larger than 27, `--long=windowLog` or
+    `--memory=windowSize` needs to be passed to the decompressor.
 
 - `hashLog`=_hlog_, `hlog`=_hlog_:
     Specify the maximum number of bits for a hash table.
@@ -327,6 +347,47 @@ The list of available _options_:
     Default _ovlog_ is 6, which means "reload `windowSize / 8`".
     Exception : the maximum compression level (22) has a default _ovlog_ of 9.
 
+- `ldmHashLog`=_ldmhlog_, `ldmhlog`=_ldmhlog_:
+    Specify the maximum size for a hash table used for long distance matching.
+
+    This option is ignored unless long distance matching is enabled.
+
+    Bigger hash tables usually improve compression ratio at the expense of more
+    memory during compression and a decrease in compression speed.
+
+    The minimum _ldmhlog_ is 6 and the maximum is 26 (default: 20).
+
+- `ldmSearchLength`=_ldmslen_, `ldmslen`=_ldmslen_:
+    Specify the minimum searched length of a match for long distance matching.
+
+    This option is ignored unless long distance matching is enabled.
+
+    Larger/very small values usually decrease compression ratio.
+
+    The minumum _ldmslen_ is 4 and the maximum is 4096 (default: 64).
+
+- `ldmBucketSizeLog`=_ldmblog_, `ldmblog`=_ldmblog_:
+    Specify the size of each bucket for the hash table used for long distance
+    matching.
+
+    This option is ignored unless long distance matching is enabled.
+
+    Larger bucket sizes improve collision resolution but decrease compression
+    speed.
+
+    The minimum _ldmblog_ is 0 and the maximum is 8 (default: 3).
+
+- `ldmHashEveryLog`=_ldmhevery_, `ldmhevery`=_ldmhevery_:
+    Specify the frequency of inserting entries into the long distance matching
+    hash table.
+
+    This option is ignored unless long distance matching is enabled.
+
+    Larger values will improve compression speed. Deviating far from the
+    default value will likely result in a decrease in compression ratio.
+
+    The default value is `wlog - ldmhlog`.
+
 ### -B#:
 Select the size of each compression job.
 This parameter is available only when multi-threading is enabled.
diff --git a/programs/zstdcli.c b/programs/zstdcli.c
index e7eb71db6e37..3f8367341885 100644
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@ -5,6 +5,7 @@
  * This source code is licensed under both the BSD-style license (found in the
  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
  */
 
 
@@ -60,6 +61,8 @@
 #define ZSTD_UNLZMA "unlzma"
 #define ZSTD_XZ "xz"
 #define ZSTD_UNXZ "unxz"
+#define ZSTD_LZ4 "lz4"
+#define ZSTD_UNLZ4 "unlz4"
 
 #define KB *(1 <<10)
 #define MB *(1 <<20)
@@ -71,8 +74,14 @@ static const char*    g_defaultDictName = "dictionary";
 static const unsigned g_defaultMaxDictSize = 110 KB;
 static const int      g_defaultDictCLevel = 3;
 static const unsigned g_defaultSelectivityLevel = 9;
+static const unsigned g_defaultMaxWindowLog = 27;
 #define OVERLAP_LOG_DEFAULT 9999
+#define LDM_PARAM_DEFAULT 9999  /* Default for parameters where 0 is valid */
 static U32 g_overlapLog = OVERLAP_LOG_DEFAULT;
+static U32 g_ldmHashLog = 0;
+static U32 g_ldmMinMatch = 0;
+static U32 g_ldmHashEveryLog = LDM_PARAM_DEFAULT;
+static U32 g_ldmBucketSizeLog = LDM_PARAM_DEFAULT;
 
 
 /*-************************************
@@ -123,6 +132,7 @@ static int usage_advanced(const char* programName)
     DISPLAY( " -l     : print information about zstd compressed files \n");
 #ifndef ZSTD_NOCOMPRESS
     DISPLAY( "--ultra : enable levels beyond %i, up to %i (requires more memory)\n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel());
+    DISPLAY( "--long[=#]  : enable long distance matching with given window log (default : %u)\n", g_defaultMaxWindowLog);
 #ifdef ZSTD_MULTITHREAD
     DISPLAY( " -T#    : use # threads for compression (default:1) \n");
     DISPLAY( " -B#    : select size of each job (default:0==automatic) \n");
@@ -304,6 +314,10 @@ static unsigned parseCompressionParameters(const char* stringPtr, ZSTD_compressi
         if (longCommandWArg(&stringPtr, "targetLength=") || longCommandWArg(&stringPtr, "tlen=")) { params->targetLength = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
         if (longCommandWArg(&stringPtr, "strategy=") || longCommandWArg(&stringPtr, "strat=")) { params->strategy = (ZSTD_strategy)(readU32FromChar(&stringPtr)); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
         if (longCommandWArg(&stringPtr, "overlapLog=") || longCommandWArg(&stringPtr, "ovlog=")) { g_overlapLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
+        if (longCommandWArg(&stringPtr, "ldmHashLog=") || longCommandWArg(&stringPtr, "ldmhlog=")) { g_ldmHashLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
+        if (longCommandWArg(&stringPtr, "ldmSearchLength=") || longCommandWArg(&stringPtr, "ldmslen=")) { g_ldmMinMatch = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
+        if (longCommandWArg(&stringPtr, "ldmBucketSizeLog=") || longCommandWArg(&stringPtr, "ldmblog=")) { g_ldmBucketSizeLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
+        if (longCommandWArg(&stringPtr, "ldmHashEveryLog=") || longCommandWArg(&stringPtr, "ldmhevery=")) { g_ldmHashEveryLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
         return 0;
     }
 
@@ -362,7 +376,8 @@ int main(int argCount, const char* argv[])
         ultra=0,
         lastCommand = 0,
         nbThreads = 1,
-        setRealTimePrio = 0;
+        setRealTimePrio = 0,
+        ldmFlag = 0;
     unsigned bench_nbSeconds = 3;   /* would be better if this value was synchronized from bench */
     size_t blockSize = 0;
     zstd_operation_mode operation = zom_compress;
@@ -391,10 +406,11 @@ int main(int argCount, const char* argv[])
     int cover = 1;
 #endif
 
+
     /* init */
     (void)recursive; (void)cLevelLast;    /* not used when ZSTD_NOBENCH set */
     (void)dictCLevel; (void)dictSelect; (void)dictID;  (void)maxDictSize; /* not used when ZSTD_NODICT set */
-    (void)ultra; (void)cLevel; /* not used when ZSTD_NOCOMPRESS set */
+    (void)ultra; (void)cLevel; (void)ldmFlag; /* not used when ZSTD_NOCOMPRESS set */
     (void)memLimit;   /* not used when ZSTD_NODECOMPRESS set */
     if (filenameTable==NULL) { DISPLAY("zstd: %s \n", strerror(errno)); exit(1); }
     filenameTable[0] = stdinmark;
@@ -413,6 +429,8 @@ int main(int argCount, const char* argv[])
     if (exeNameMatch(programName, ZSTD_UNLZMA)) { operation=zom_decompress; FIO_setCompressionType(FIO_lzmaCompression); FIO_setRemoveSrcFile(1); }    /* behave like unlzma */
     if (exeNameMatch(programName, ZSTD_XZ)) { suffix = XZ_EXTENSION; FIO_setCompressionType(FIO_xzCompression); FIO_setRemoveSrcFile(1); }    /* behave like xz */
     if (exeNameMatch(programName, ZSTD_UNXZ)) { operation=zom_decompress; FIO_setCompressionType(FIO_xzCompression); FIO_setRemoveSrcFile(1); }    /* behave like unxz */
+    if (exeNameMatch(programName, ZSTD_LZ4)) { suffix = LZ4_EXTENSION; FIO_setCompressionType(FIO_lz4Compression); FIO_setRemoveSrcFile(1); }    /* behave like xz */
+    if (exeNameMatch(programName, ZSTD_UNLZ4)) { operation=zom_decompress; FIO_setCompressionType(FIO_lz4Compression); FIO_setRemoveSrcFile(1); }    /* behave like unxz */
     memset(&compressionParams, 0, sizeof(compressionParams));
 
     /* command switches */
@@ -501,6 +519,22 @@ int main(int argCount, const char* argv[])
                     if (longCommandWArg(&argument, "--maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
                     if (longCommandWArg(&argument, "--dictID=")) { dictID = readU32FromChar(&argument); continue; }
                     if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) CLEAN_RETURN(badusage(programName)); continue; }
+                    if (longCommandWArg(&argument, "--long")) {
+                        unsigned ldmWindowLog = 0;
+                        ldmFlag = 1;
+                        /* Parse optional window log */
+                        if (*argument == '=') {
+                            ++argument;
+                            ldmWindowLog = readU32FromChar(&argument);
+                        } else if (*argument != 0) {
+                            /* Invalid character following --long */
+                            CLEAN_RETURN(badusage(programName));
+                        }
+                        /* Only set windowLog if not already set by --zstd */
+                        if (compressionParams.windowLog == 0)
+                            compressionParams.windowLog = ldmWindowLog;
+                        continue;
+                    }
                     /* fall-through, will trigger bad_usage() later on */
                 }
 
@@ -720,6 +754,15 @@ int main(int argCount, const char* argv[])
         BMK_setBlockSize(blockSize);
         BMK_setNbThreads(nbThreads);
         BMK_setNbSeconds(bench_nbSeconds);
+        BMK_setLdmFlag(ldmFlag);
+        BMK_setLdmMinMatch(g_ldmMinMatch);
+        BMK_setLdmHashLog(g_ldmHashLog);
+        if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) {
+            BMK_setLdmBucketSizeLog(g_ldmBucketSizeLog);
+        }
+        if (g_ldmHashEveryLog != LDM_PARAM_DEFAULT) {
+            BMK_setLdmHashEveryLog(g_ldmHashEveryLog);
+        }
         BMK_benchFiles(filenameTable, filenameIdx, dictFileName, cLevel, cLevelLast, &compressionParams, setRealTimePrio);
 #endif
         (void)bench_nbSeconds; (void)blockSize; (void)setRealTimePrio;
@@ -737,13 +780,13 @@ int main(int argCount, const char* argv[])
             int const optimize = !coverParams.k || !coverParams.d;
             coverParams.nbThreads = nbThreads;
             coverParams.zParams = zParams;
-            operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, NULL, &coverParams, optimize);
+            operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, blockSize, NULL, &coverParams, optimize);
         } else {
             ZDICT_legacy_params_t dictParams;
             memset(&dictParams, 0, sizeof(dictParams));
             dictParams.selectivityLevel = dictSelect;
             dictParams.zParams = zParams;
-            operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, &dictParams, NULL, 0);
+            operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, blockSize, &dictParams, NULL, 0);
         }
 #endif
         goto _end;
@@ -787,6 +830,16 @@ int main(int argCount, const char* argv[])
 #ifndef ZSTD_NOCOMPRESS
         FIO_setNbThreads(nbThreads);
         FIO_setBlockSize((U32)blockSize);
+        FIO_setLdmFlag(ldmFlag);
+        FIO_setLdmHashLog(g_ldmHashLog);
+        FIO_setLdmMinMatch(g_ldmMinMatch);
+        if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) {
+            FIO_setLdmBucketSizeLog(g_ldmBucketSizeLog);
+        }
+        if (g_ldmHashEveryLog != LDM_PARAM_DEFAULT) {
+            FIO_setLdmHashEveryLog(g_ldmHashEveryLog);
+        }
+
         if (g_overlapLog!=OVERLAP_LOG_DEFAULT) FIO_setOverlapLog(g_overlapLog);
         if ((filenameIdx==1) && outFileName)
           operationResult = FIO_compressFilename(outFileName, filenameTable[0], dictFileName, cLevel, &compressionParams);
@@ -798,6 +851,13 @@ int main(int argCount, const char* argv[])
 #endif
     } else {  /* decompression or test */
 #ifndef ZSTD_NODECOMPRESS
+        if (memLimit == 0) {
+            if (compressionParams.windowLog == 0)
+                memLimit = (U32)1 << g_defaultMaxWindowLog;
+            else {
+                memLimit = (U32)1 << (compressionParams.windowLog & 31);
+            }
+        }
         FIO_setMemLimit(memLimit);
         if (filenameIdx==1 && outFileName)
             operationResult = FIO_decompressFilename(outFileName, filenameTable[0], dictFileName);