diff options
author | Rui Paulo <rpaulo@FreeBSD.org> | 2015-02-06 05:11:18 +0000 |
---|---|---|
committer | Rui Paulo <rpaulo@FreeBSD.org> | 2015-02-06 05:11:18 +0000 |
commit | d6a9376028870754d7d3de3f626c1deb6df81092 (patch) | |
tree | d09ea80ca5b38338fbfb381b7d0818f486343403 /src/liblzma/common | |
parent | 9db922924cf9ffc4f7f66c21c93ceca8eb2b5259 (diff) | |
download | src-d6a9376028870754d7d3de3f626c1deb6df81092.tar.gz src-d6a9376028870754d7d3de3f626c1deb6df81092.zip |
Vendor import of xz-5.2.0 (stripped)vendor/xz/5.2.0
Notes
Notes:
svn path=/vendor/xz/dist/; revision=278307
svn path=/vendor/xz/5.2.0/; revision=278308; tag=vendor/xz/5.2.0
Diffstat (limited to 'src/liblzma/common')
42 files changed, 2005 insertions, 186 deletions
diff --git a/src/liblzma/common/alone_decoder.c b/src/liblzma/common/alone_decoder.c index c25112e6875f..c1360ca1eb7a 100644 --- a/src/liblzma/common/alone_decoder.c +++ b/src/liblzma/common/alone_decoder.c @@ -51,7 +51,7 @@ struct lzma_coder_s { static lzma_ret alone_decode(lzma_coder *coder, - lzma_allocator *allocator lzma_attribute((__unused__)), + const lzma_allocator *allocator lzma_attribute((__unused__)), const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, @@ -166,7 +166,7 @@ alone_decode(lzma_coder *coder, static void -alone_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +alone_decoder_end(lzma_coder *coder, const lzma_allocator *allocator) { lzma_next_end(&coder->next, allocator); lzma_free(coder, allocator); @@ -193,7 +193,7 @@ alone_decoder_memconfig(lzma_coder *coder, uint64_t *memusage, extern lzma_ret -lzma_alone_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, +lzma_alone_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, uint64_t memlimit, bool picky) { lzma_next_coder_init(&lzma_alone_decoder_init, next, allocator); diff --git a/src/liblzma/common/alone_decoder.h b/src/liblzma/common/alone_decoder.h index f666fc3823e1..dfa031aa77dd 100644 --- a/src/liblzma/common/alone_decoder.h +++ b/src/liblzma/common/alone_decoder.h @@ -17,7 +17,7 @@ extern lzma_ret lzma_alone_decoder_init( - lzma_next_coder *next, lzma_allocator *allocator, + lzma_next_coder *next, const lzma_allocator *allocator, uint64_t memlimit, bool picky); #endif diff --git a/src/liblzma/common/alone_encoder.c b/src/liblzma/common/alone_encoder.c index eb1697e99753..a2bc9eee1fa9 100644 --- a/src/liblzma/common/alone_encoder.c +++ b/src/liblzma/common/alone_encoder.c @@ -32,7 +32,7 @@ struct lzma_coder_s { static lzma_ret alone_encode(lzma_coder *coder, - lzma_allocator *allocator lzma_attribute((__unused__)), + const lzma_allocator *allocator lzma_attribute((__unused__)), const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, @@ -65,7 +65,7 @@ alone_encode(lzma_coder *coder, static void -alone_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +alone_encoder_end(lzma_coder *coder, const lzma_allocator *allocator) { lzma_next_end(&coder->next, allocator); lzma_free(coder, allocator); @@ -75,7 +75,7 @@ alone_encoder_end(lzma_coder *coder, lzma_allocator *allocator) // At least for now, this is not used by any internal function. static lzma_ret -alone_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, +alone_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_options_lzma *options) { lzma_next_coder_init(&alone_encoder_init, next, allocator); @@ -137,7 +137,7 @@ alone_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, /* extern lzma_ret -lzma_alone_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, +lzma_alone_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_options_alone *options) { lzma_next_coder_init(&alone_encoder_init, next, allocator, options); diff --git a/src/liblzma/common/auto_decoder.c b/src/liblzma/common/auto_decoder.c index 35c895fd14c2..bf3550701fe6 100644 --- a/src/liblzma/common/auto_decoder.c +++ b/src/liblzma/common/auto_decoder.c @@ -30,7 +30,7 @@ struct lzma_coder_s { static lzma_ret -auto_decode(lzma_coder *coder, lzma_allocator *allocator, +auto_decode(lzma_coder *coder, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) @@ -100,7 +100,7 @@ auto_decode(lzma_coder *coder, lzma_allocator *allocator, static void -auto_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +auto_decoder_end(lzma_coder *coder, const lzma_allocator *allocator) { lzma_next_end(&coder->next, allocator); lzma_free(coder, allocator); @@ -143,7 +143,7 @@ auto_decoder_memconfig(lzma_coder *coder, uint64_t *memusage, static lzma_ret -auto_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, +auto_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, uint64_t memlimit, uint32_t flags) { lzma_next_coder_init(&auto_decoder_init, next, allocator); diff --git a/src/liblzma/common/block_buffer_decoder.c b/src/liblzma/common/block_buffer_decoder.c index ff27a11ccfe6..b0ded90ddc3e 100644 --- a/src/liblzma/common/block_buffer_decoder.c +++ b/src/liblzma/common/block_buffer_decoder.c @@ -14,7 +14,7 @@ extern LZMA_API(lzma_ret) -lzma_block_buffer_decode(lzma_block *block, lzma_allocator *allocator, +lzma_block_buffer_decode(lzma_block *block, const lzma_allocator *allocator, const uint8_t *in, size_t *in_pos, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size) { diff --git a/src/liblzma/common/block_buffer_encoder.c b/src/liblzma/common/block_buffer_encoder.c index 519c6a684d76..39e263aa4765 100644 --- a/src/liblzma/common/block_buffer_encoder.c +++ b/src/liblzma/common/block_buffer_encoder.c @@ -10,6 +10,7 @@ // /////////////////////////////////////////////////////////////////////////////// +#include "block_buffer_encoder.h" #include "block_encoder.h" #include "filter_encoder.h" #include "lzma2_encoder.h" @@ -28,8 +29,8 @@ + LZMA_CHECK_SIZE_MAX + 3) & ~3) -static lzma_vli -lzma2_bound(lzma_vli uncompressed_size) +static uint64_t +lzma2_bound(uint64_t uncompressed_size) { // Prevent integer overflow in overhead calculation. if (uncompressed_size > COMPRESSED_SIZE_MAX) @@ -39,7 +40,7 @@ lzma2_bound(lzma_vli uncompressed_size) // uncompressed_size up to the next multiple of LZMA2_CHUNK_MAX, // multiply by the size of per-chunk header, and add one byte for // the end marker. - const lzma_vli overhead = ((uncompressed_size + LZMA2_CHUNK_MAX - 1) + const uint64_t overhead = ((uncompressed_size + LZMA2_CHUNK_MAX - 1) / LZMA2_CHUNK_MAX) * LZMA2_HEADER_UNCOMPRESSED + 1; @@ -51,30 +52,36 @@ lzma2_bound(lzma_vli uncompressed_size) } -extern LZMA_API(size_t) -lzma_block_buffer_bound(size_t uncompressed_size) +extern uint64_t +lzma_block_buffer_bound64(uint64_t uncompressed_size) { - // For now, if the data doesn't compress, we always use uncompressed - // chunks of LZMA2. In future we may use Subblock filter too, but - // but for simplicity we probably will still use the same bound - // calculation even though Subblock filter would have slightly less - // overhead. - lzma_vli lzma2_size = lzma2_bound(uncompressed_size); + // If the data doesn't compress, we always use uncompressed + // LZMA2 chunks. + uint64_t lzma2_size = lzma2_bound(uncompressed_size); if (lzma2_size == 0) return 0; // Take Block Padding into account. - lzma2_size = (lzma2_size + 3) & ~LZMA_VLI_C(3); + lzma2_size = (lzma2_size + 3) & ~UINT64_C(3); -#if SIZE_MAX < LZMA_VLI_MAX - // Catch the possible integer overflow on 32-bit systems. There's no - // overflow on 64-bit systems, because lzma2_bound() already takes + // No risk of integer overflow because lzma2_bound() already takes // into account the size of the headers in the Block. - if (SIZE_MAX - HEADERS_BOUND < lzma2_size) + return HEADERS_BOUND + lzma2_size; +} + + +extern LZMA_API(size_t) +lzma_block_buffer_bound(size_t uncompressed_size) +{ + uint64_t ret = lzma_block_buffer_bound64(uncompressed_size); + +#if SIZE_MAX < UINT64_MAX + // Catch the possible integer overflow on 32-bit systems. + if (ret > SIZE_MAX) return 0; #endif - return HEADERS_BOUND + lzma2_size; + return ret; } @@ -82,9 +89,6 @@ static lzma_ret block_encode_uncompressed(lzma_block *block, const uint8_t *in, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size) { - // TODO: Figure out if the last filter is LZMA2 or Subblock and use - // that filter to encode the uncompressed chunks. - // Use LZMA2 uncompressed chunks. We wouldn't need a dictionary at // all, but LZMA2 always requires a dictionary, so use the minimum // value to minimize memory usage of the decoder. @@ -160,16 +164,11 @@ block_encode_uncompressed(lzma_block *block, const uint8_t *in, size_t in_size, static lzma_ret -block_encode_normal(lzma_block *block, lzma_allocator *allocator, +block_encode_normal(lzma_block *block, const lzma_allocator *allocator, const uint8_t *in, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size) { // Find out the size of the Block Header. - block->compressed_size = lzma2_bound(in_size); - if (block->compressed_size == 0) - return LZMA_DATA_ERROR; - - block->uncompressed_size = in_size; return_if_error(lzma_block_header_size(block)); // Reserve space for the Block Header and skip it for now. @@ -221,10 +220,11 @@ block_encode_normal(lzma_block *block, lzma_allocator *allocator, } -extern LZMA_API(lzma_ret) -lzma_block_buffer_encode(lzma_block *block, lzma_allocator *allocator, +static lzma_ret +block_buffer_encode(lzma_block *block, const lzma_allocator *allocator, const uint8_t *in, size_t in_size, - uint8_t *out, size_t *out_pos, size_t out_size) + uint8_t *out, size_t *out_pos, size_t out_size, + bool try_to_compress) { // Validate the arguments. if (block == NULL || (in == NULL && in_size != 0) || out == NULL @@ -233,11 +233,11 @@ lzma_block_buffer_encode(lzma_block *block, lzma_allocator *allocator, // The contents of the structure may depend on the version so // check the version before validating the contents of *block. - if (block->version != 0) + if (block->version > 1) return LZMA_OPTIONS_ERROR; if ((unsigned int)(block->check) > LZMA_CHECK_ID_MAX - || block->filters == NULL) + || (try_to_compress && block->filters == NULL)) return LZMA_PROG_ERROR; if (!lzma_check_is_supported(block->check)) @@ -258,9 +258,19 @@ lzma_block_buffer_encode(lzma_block *block, lzma_allocator *allocator, out_size -= check_size; + // Initialize block->uncompressed_size and calculate the worst-case + // value for block->compressed_size. + block->uncompressed_size = in_size; + block->compressed_size = lzma2_bound(in_size); + if (block->compressed_size == 0) + return LZMA_DATA_ERROR; + // Do the actual compression. - const lzma_ret ret = block_encode_normal(block, allocator, - in, in_size, out, out_pos, out_size); + lzma_ret ret = LZMA_BUF_ERROR; + if (try_to_compress) + ret = block_encode_normal(block, allocator, + in, in_size, out, out_pos, out_size); + if (ret != LZMA_OK) { // If the error was something else than output buffer // becoming full, return the error now. @@ -303,3 +313,25 @@ lzma_block_buffer_encode(lzma_block *block, lzma_allocator *allocator, return LZMA_OK; } + + +extern LZMA_API(lzma_ret) +lzma_block_buffer_encode(lzma_block *block, const lzma_allocator *allocator, + const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + return block_buffer_encode(block, allocator, + in, in_size, out, out_pos, out_size, true); +} + + +extern LZMA_API(lzma_ret) +lzma_block_uncomp_encode(lzma_block *block, + const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + // It won't allocate any memory from heap so no need + // for lzma_allocator. + return block_buffer_encode(block, NULL, + in, in_size, out, out_pos, out_size, false); +} diff --git a/src/liblzma/common/block_buffer_encoder.h b/src/liblzma/common/block_buffer_encoder.h new file mode 100644 index 000000000000..653207f73498 --- /dev/null +++ b/src/liblzma/common/block_buffer_encoder.h @@ -0,0 +1,24 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_buffer_encoder.h +/// \brief Single-call .xz Block encoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_BLOCK_BUFFER_ENCODER_H +#define LZMA_BLOCK_BUFFER_ENCODER_H + +#include "common.h" + + +/// uint64_t version of lzma_block_buffer_bound(). It is used by +/// stream_encoder_mt.c. Probably the original lzma_block_buffer_bound() +/// should have been 64-bit, but fixing it would break the ABI. +extern uint64_t lzma_block_buffer_bound64(uint64_t uncompressed_size); + +#endif diff --git a/src/liblzma/common/block_decoder.c b/src/liblzma/common/block_decoder.c index a3ce6f49500c..685c3b038fc3 100644 --- a/src/liblzma/common/block_decoder.c +++ b/src/liblzma/common/block_decoder.c @@ -45,6 +45,9 @@ struct lzma_coder_s { /// Check of the uncompressed data lzma_check_state check; + + /// True if the integrity check won't be calculated and verified. + bool ignore_check; }; @@ -71,7 +74,7 @@ is_size_valid(lzma_vli size, lzma_vli reference) static lzma_ret -block_decode(lzma_coder *coder, lzma_allocator *allocator, +block_decode(lzma_coder *coder, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) @@ -97,8 +100,9 @@ block_decode(lzma_coder *coder, lzma_allocator *allocator, coder->block->uncompressed_size)) return LZMA_DATA_ERROR; - lzma_check_update(&coder->check, coder->block->check, - out + out_start, out_used); + if (!coder->ignore_check) + lzma_check_update(&coder->check, coder->block->check, + out + out_start, out_used); if (ret != LZMA_STREAM_END) return ret; @@ -140,7 +144,9 @@ block_decode(lzma_coder *coder, lzma_allocator *allocator, if (coder->block->check == LZMA_CHECK_NONE) return LZMA_STREAM_END; - lzma_check_finish(&coder->check, coder->block->check); + if (!coder->ignore_check) + lzma_check_finish(&coder->check, coder->block->check); + coder->sequence = SEQ_CHECK; // Fall through @@ -155,7 +161,8 @@ block_decode(lzma_coder *coder, lzma_allocator *allocator, // Validate the Check only if we support it. // coder->check.buffer may be uninitialized // when the Check ID is not supported. - if (lzma_check_is_supported(coder->block->check) + if (!coder->ignore_check + && lzma_check_is_supported(coder->block->check) && memcmp(coder->block->raw_check, coder->check.buffer.u8, check_size) != 0) @@ -170,7 +177,7 @@ block_decode(lzma_coder *coder, lzma_allocator *allocator, static void -block_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +block_decoder_end(lzma_coder *coder, const lzma_allocator *allocator) { lzma_next_end(&coder->next, allocator); lzma_free(coder, allocator); @@ -179,7 +186,7 @@ block_decoder_end(lzma_coder *coder, lzma_allocator *allocator) extern lzma_ret -lzma_block_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, +lzma_block_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, lzma_block *block) { lzma_next_coder_init(&lzma_block_decoder_init, next, allocator); @@ -224,6 +231,9 @@ lzma_block_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->coder->check_pos = 0; lzma_check_init(&next->coder->check, block->check); + next->coder->ignore_check = block->version >= 1 + ? block->ignore_check : false; + // Initialize the filter chain. return lzma_raw_decoder_init(&next->coder->next, allocator, block->filters); diff --git a/src/liblzma/common/block_decoder.h b/src/liblzma/common/block_decoder.h index 7da9df63f767..718c5ced886c 100644 --- a/src/liblzma/common/block_decoder.h +++ b/src/liblzma/common/block_decoder.h @@ -17,6 +17,6 @@ extern lzma_ret lzma_block_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_block *block); + const lzma_allocator *allocator, lzma_block *block); #endif diff --git a/src/liblzma/common/block_encoder.c b/src/liblzma/common/block_encoder.c index 1eeb502b7f83..def586410d28 100644 --- a/src/liblzma/common/block_encoder.c +++ b/src/liblzma/common/block_encoder.c @@ -45,7 +45,7 @@ struct lzma_coder_s { static lzma_ret -block_encode(lzma_coder *coder, lzma_allocator *allocator, +block_encode(lzma_coder *coder, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) @@ -134,7 +134,7 @@ block_encode(lzma_coder *coder, lzma_allocator *allocator, static void -block_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +block_encoder_end(lzma_coder *coder, const lzma_allocator *allocator) { lzma_next_end(&coder->next, allocator); lzma_free(coder, allocator); @@ -143,7 +143,7 @@ block_encoder_end(lzma_coder *coder, lzma_allocator *allocator) static lzma_ret -block_encoder_update(lzma_coder *coder, lzma_allocator *allocator, +block_encoder_update(lzma_coder *coder, const lzma_allocator *allocator, const lzma_filter *filters lzma_attribute((__unused__)), const lzma_filter *reversed_filters) { @@ -156,7 +156,7 @@ block_encoder_update(lzma_coder *coder, lzma_allocator *allocator, extern lzma_ret -lzma_block_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, +lzma_block_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, lzma_block *block) { lzma_next_coder_init(&lzma_block_encoder_init, next, allocator); @@ -166,7 +166,7 @@ lzma_block_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, // The contents of the structure may depend on the version so // check the version first. - if (block->version != 0) + if (block->version > 1) return LZMA_OPTIONS_ERROR; // If the Check ID is not supported, we cannot calculate the check and diff --git a/src/liblzma/common/block_encoder.h b/src/liblzma/common/block_encoder.h index b9eff0be2736..bd97c186e503 100644 --- a/src/liblzma/common/block_encoder.h +++ b/src/liblzma/common/block_encoder.h @@ -42,6 +42,6 @@ extern lzma_ret lzma_block_encoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_block *block); + const lzma_allocator *allocator, lzma_block *block); #endif diff --git a/src/liblzma/common/block_header_decoder.c b/src/liblzma/common/block_header_decoder.c index 2c9573ee204c..1dd982f6bd68 100644 --- a/src/liblzma/common/block_header_decoder.c +++ b/src/liblzma/common/block_header_decoder.c @@ -15,7 +15,7 @@ static void -free_properties(lzma_block *block, lzma_allocator *allocator) +free_properties(lzma_block *block, const lzma_allocator *allocator) { // Free allocated filter options. The last array member is not // touched after the initialization in the beginning of @@ -32,7 +32,7 @@ free_properties(lzma_block *block, lzma_allocator *allocator) extern LZMA_API(lzma_ret) lzma_block_header_decode(lzma_block *block, - lzma_allocator *allocator, const uint8_t *in) + const lzma_allocator *allocator, const uint8_t *in) { // NOTE: We consider the header to be corrupt not only when the // CRC32 doesn't match, but also when variable-length integers @@ -46,8 +46,16 @@ lzma_block_header_decode(lzma_block *block, block->filters[i].options = NULL; } - // Always zero for now. - block->version = 0; + // Versions 0 and 1 are supported. If a newer version was specified, + // we need to downgrade it. + if (block->version > 1) + block->version = 1; + + // This isn't a Block Header option, but since the decompressor will + // read it if version >= 1, it's better to initialize it here than + // to expect the caller to do it since in almost all cases this + // should be false. + block->ignore_check = false; // Validate Block Header Size and Check type. The caller must have // already set these, so it is a programming error if this test fails. diff --git a/src/liblzma/common/block_header_encoder.c b/src/liblzma/common/block_header_encoder.c index 707dd0cb14a2..5c5f5424ae85 100644 --- a/src/liblzma/common/block_header_encoder.c +++ b/src/liblzma/common/block_header_encoder.c @@ -17,7 +17,7 @@ extern LZMA_API(lzma_ret) lzma_block_header_size(lzma_block *block) { - if (block->version != 0) + if (block->version > 1) return LZMA_OPTIONS_ERROR; // Block Header Size + Block Flags + CRC32. diff --git a/src/liblzma/common/block_util.c b/src/liblzma/common/block_util.c index 62c934547c9e..00c7fe8d5196 100644 --- a/src/liblzma/common/block_util.c +++ b/src/liblzma/common/block_util.c @@ -51,7 +51,7 @@ lzma_block_unpadded_size(const lzma_block *block) // NOTE: This function is used for validation too, so it is // essential that these checks are always done even if // Compressed Size is unknown. - if (block == NULL || block->version != 0 + if (block == NULL || block->version > 1 || block->header_size < LZMA_BLOCK_HEADER_SIZE_MIN || block->header_size > LZMA_BLOCK_HEADER_SIZE_MAX || (block->header_size & 3) diff --git a/src/liblzma/common/common.c b/src/liblzma/common/common.c index b9e386027368..28aa2b7142f4 100644 --- a/src/liblzma/common/common.c +++ b/src/liblzma/common/common.c @@ -36,7 +36,7 @@ lzma_version_string(void) /////////////////////// extern void * lzma_attribute((__malloc__)) lzma_attr_alloc_size(1) -lzma_alloc(size_t size, lzma_allocator *allocator) +lzma_alloc(size_t size, const lzma_allocator *allocator) { // Some malloc() variants return NULL if called with size == 0. if (size == 0) @@ -53,8 +53,29 @@ lzma_alloc(size_t size, lzma_allocator *allocator) } +extern void * lzma_attribute((__malloc__)) lzma_attr_alloc_size(1) +lzma_alloc_zero(size_t size, const lzma_allocator *allocator) +{ + // Some calloc() variants return NULL if called with size == 0. + if (size == 0) + size = 1; + + void *ptr; + + if (allocator != NULL && allocator->alloc != NULL) { + ptr = allocator->alloc(allocator->opaque, 1, size); + if (ptr != NULL) + memzero(ptr, size); + } else { + ptr = calloc(1, size); + } + + return ptr; +} + + extern void -lzma_free(void *ptr, lzma_allocator *allocator) +lzma_free(void *ptr, const lzma_allocator *allocator) { if (allocator != NULL && allocator->free != NULL) allocator->free(allocator->opaque, ptr); @@ -88,7 +109,7 @@ lzma_bufcpy(const uint8_t *restrict in, size_t *restrict in_pos, extern lzma_ret -lzma_next_filter_init(lzma_next_coder *next, lzma_allocator *allocator, +lzma_next_filter_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter_info *filters) { lzma_next_coder_init(filters[0].init, next, allocator); @@ -99,7 +120,7 @@ lzma_next_filter_init(lzma_next_coder *next, lzma_allocator *allocator, extern lzma_ret -lzma_next_filter_update(lzma_next_coder *next, lzma_allocator *allocator, +lzma_next_filter_update(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter *reversed_filters) { // Check that the application isn't trying to change the Filter ID. @@ -117,7 +138,7 @@ lzma_next_filter_update(lzma_next_coder *next, lzma_allocator *allocator, extern void -lzma_next_end(lzma_next_coder *next, lzma_allocator *allocator) +lzma_next_end(lzma_next_coder *next, const lzma_allocator *allocator) { if (next->init != (uintptr_t)(NULL)) { // To avoid tiny end functions that simply call @@ -156,10 +177,8 @@ lzma_strm_init(lzma_stream *strm) strm->internal->next = LZMA_NEXT_CODER_INIT; } - strm->internal->supported_actions[LZMA_RUN] = false; - strm->internal->supported_actions[LZMA_SYNC_FLUSH] = false; - strm->internal->supported_actions[LZMA_FULL_FLUSH] = false; - strm->internal->supported_actions[LZMA_FINISH] = false; + memzero(strm->internal->supported_actions, + sizeof(strm->internal->supported_actions)); strm->internal->sequence = ISEQ_RUN; strm->internal->allow_buf_error = false; @@ -178,7 +197,7 @@ lzma_code(lzma_stream *strm, lzma_action action) || (strm->next_out == NULL && strm->avail_out != 0) || strm->internal == NULL || strm->internal->next.code == NULL - || (unsigned int)(action) > LZMA_FINISH + || (unsigned int)(action) > LZMA_ACTION_MAX || !strm->internal->supported_actions[action]) return LZMA_PROG_ERROR; @@ -213,6 +232,10 @@ lzma_code(lzma_stream *strm, lzma_action action) case LZMA_FINISH: strm->internal->sequence = ISEQ_FINISH; break; + + case LZMA_FULL_BARRIER: + strm->internal->sequence = ISEQ_FULL_BARRIER; + break; } break; @@ -240,6 +263,13 @@ lzma_code(lzma_stream *strm, lzma_action action) break; + case ISEQ_FULL_BARRIER: + if (action != LZMA_FULL_BARRIER + || strm->internal->avail_in != strm->avail_in) + return LZMA_PROG_ERROR; + + break; + case ISEQ_END: return LZMA_STREAM_END; @@ -265,7 +295,9 @@ lzma_code(lzma_stream *strm, lzma_action action) strm->internal->avail_in = strm->avail_in; - switch (ret) { + // Cast is needed to silence a warning about LZMA_TIMED_OUT, which + // isn't part of lzma_ret enumeration. + switch ((unsigned int)(ret)) { case LZMA_OK: // Don't return LZMA_BUF_ERROR when it happens the first time. // This is to avoid returning LZMA_BUF_ERROR when avail_out @@ -281,9 +313,16 @@ lzma_code(lzma_stream *strm, lzma_action action) } break; + case LZMA_TIMED_OUT: + strm->internal->allow_buf_error = false; + ret = LZMA_OK; + break; + case LZMA_STREAM_END: if (strm->internal->sequence == ISEQ_SYNC_FLUSH - || strm->internal->sequence == ISEQ_FULL_FLUSH) + || strm->internal->sequence == ISEQ_FULL_FLUSH + || strm->internal->sequence + == ISEQ_FULL_BARRIER) strm->internal->sequence = ISEQ_RUN; else strm->internal->sequence = ISEQ_END; @@ -323,6 +362,22 @@ lzma_end(lzma_stream *strm) } +extern LZMA_API(void) +lzma_get_progress(lzma_stream *strm, + uint64_t *progress_in, uint64_t *progress_out) +{ + if (strm->internal->next.get_progress != NULL) { + strm->internal->next.get_progress(strm->internal->next.coder, + progress_in, progress_out); + } else { + *progress_in = strm->total_in; + *progress_out = strm->total_out; + } + + return; +} + + extern LZMA_API(lzma_check) lzma_get_check(const lzma_stream *strm) { diff --git a/src/liblzma/common/common.h b/src/liblzma/common/common.h index 45aba4f06b27..955d784a5b6a 100644 --- a/src/liblzma/common/common.h +++ b/src/liblzma/common/common.h @@ -49,6 +49,13 @@ #define LZMA_BUFFER_SIZE 4096 +/// Maximum number of worker threads within one multithreaded component. +/// The limit exists solely to make it simpler to prevent integer overflows +/// when allocating structures etc. This should be big enough for now... +/// the code won't scale anywhere close to this number anyway. +#define LZMA_THREADS_MAX 16384 + + /// Starting value for memory usage estimates. Instead of calculating size /// of _every_ structure and taking into account malloc() overhead etc., we /// add a base size to all memory usage estimates. It's not very accurate @@ -66,9 +73,21 @@ ( LZMA_TELL_NO_CHECK \ | LZMA_TELL_UNSUPPORTED_CHECK \ | LZMA_TELL_ANY_CHECK \ + | LZMA_IGNORE_CHECK \ | LZMA_CONCATENATED ) +/// Largest valid lzma_action value as unsigned integer. +#define LZMA_ACTION_MAX ((unsigned int)(LZMA_FULL_BARRIER)) + + +/// Special return value (lzma_ret) to indicate that a timeout was reached +/// and lzma_code() must not return LZMA_BUF_ERROR. This is converted to +/// LZMA_OK in lzma_code(). This is not in the lzma_ret enumeration because +/// there's no need to have it in the public API. +#define LZMA_TIMED_OUT 32 + + /// Type of encoder/decoder specific data; the actual structure is defined /// differently in different coders. typedef struct lzma_coder_s lzma_coder; @@ -80,7 +99,7 @@ typedef struct lzma_filter_info_s lzma_filter_info; /// Type of a function used to initialize a filter encoder or decoder typedef lzma_ret (*lzma_init_function)( - lzma_next_coder *next, lzma_allocator *allocator, + lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter_info *filters); /// Type of a function to do some kind of coding work (filters, Stream, @@ -88,7 +107,7 @@ typedef lzma_ret (*lzma_init_function)( /// input and output buffers, but for simplicity they still use this same /// function prototype. typedef lzma_ret (*lzma_code_function)( - lzma_coder *coder, lzma_allocator *allocator, + lzma_coder *coder, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, @@ -96,7 +115,7 @@ typedef lzma_ret (*lzma_code_function)( /// Type of a function to free the memory allocated for the coder typedef void (*lzma_end_function)( - lzma_coder *coder, lzma_allocator *allocator); + lzma_coder *coder, const lzma_allocator *allocator); /// Raw coder validates and converts an array of lzma_filter structures to @@ -139,6 +158,11 @@ struct lzma_next_coder_s { /// lzma_next_coder.coder. lzma_end_function end; + /// Pointer to a function to get progress information. If this is NULL, + /// lzma_stream.total_in and .total_out are used instead. + void (*get_progress)(lzma_coder *coder, + uint64_t *progress_in, uint64_t *progress_out); + /// Pointer to function to return the type of the integrity check. /// Most coders won't support this. lzma_check (*get_check)(const lzma_coder *coder); @@ -150,7 +174,7 @@ struct lzma_next_coder_s { /// Update the filter-specific options or the whole filter chain /// in the encoder. - lzma_ret (*update)(lzma_coder *coder, lzma_allocator *allocator, + lzma_ret (*update)(lzma_coder *coder, const lzma_allocator *allocator, const lzma_filter *filters, const lzma_filter *reversed_filters); }; @@ -164,6 +188,7 @@ struct lzma_next_coder_s { .id = LZMA_VLI_UNKNOWN, \ .code = NULL, \ .end = NULL, \ + .get_progress = NULL, \ .get_check = NULL, \ .memconfig = NULL, \ .update = NULL, \ @@ -185,6 +210,7 @@ struct lzma_internal_s { ISEQ_SYNC_FLUSH, ISEQ_FULL_FLUSH, ISEQ_FINISH, + ISEQ_FULL_BARRIER, ISEQ_END, ISEQ_ERROR, } sequence; @@ -195,7 +221,7 @@ struct lzma_internal_s { size_t avail_in; /// Indicates which lzma_action values are allowed by next.code. - bool supported_actions[4]; + bool supported_actions[LZMA_ACTION_MAX + 1]; /// If true, lzma_code will return LZMA_BUF_ERROR if no progress was /// made (no input consumed and no output produced by next.code). @@ -204,11 +230,17 @@ struct lzma_internal_s { /// Allocates memory -extern void *lzma_alloc(size_t size, lzma_allocator *allocator) +extern void *lzma_alloc(size_t size, const lzma_allocator *allocator) lzma_attribute((__malloc__)) lzma_attr_alloc_size(1); +/// Allocates memory and zeroes it (like calloc()). This can be faster +/// than lzma_alloc() + memzero() while being backward compatible with +/// custom allocators. +extern void * lzma_attribute((__malloc__)) lzma_attr_alloc_size(1) + lzma_alloc_zero(size_t size, const lzma_allocator *allocator); + /// Frees memory -extern void lzma_free(void *ptr, lzma_allocator *allocator); +extern void lzma_free(void *ptr, const lzma_allocator *allocator); /// Allocates strm->internal if it is NULL, and initializes *strm and @@ -220,17 +252,19 @@ extern lzma_ret lzma_strm_init(lzma_stream *strm); /// than the filter being initialized now. This way the actual filter /// initialization functions don't need to use lzma_next_coder_init macro. extern lzma_ret lzma_next_filter_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_filter_info *filters); + const lzma_allocator *allocator, + const lzma_filter_info *filters); /// Update the next filter in the chain, if any. This checks that /// the application is not trying to change the Filter IDs. extern lzma_ret lzma_next_filter_update( - lzma_next_coder *next, lzma_allocator *allocator, + lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter *reversed_filters); /// Frees the memory allocated for next->coder either using next->end or, /// if next->end is NULL, using lzma_free. -extern void lzma_next_end(lzma_next_coder *next, lzma_allocator *allocator); +extern void lzma_next_end(lzma_next_coder *next, + const lzma_allocator *allocator); /// Copy as much data as possible from in[] to out[] and update *in_pos diff --git a/src/liblzma/common/easy_buffer_encoder.c b/src/liblzma/common/easy_buffer_encoder.c index c4be34ccfa27..48eb56f5cc91 100644 --- a/src/liblzma/common/easy_buffer_encoder.c +++ b/src/liblzma/common/easy_buffer_encoder.c @@ -15,8 +15,8 @@ extern LZMA_API(lzma_ret) lzma_easy_buffer_encode(uint32_t preset, lzma_check check, - lzma_allocator *allocator, const uint8_t *in, size_t in_size, - uint8_t *out, size_t *out_pos, size_t out_size) + const lzma_allocator *allocator, const uint8_t *in, + size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size) { lzma_options_easy opt_easy; if (lzma_easy_preset(&opt_easy, preset)) diff --git a/src/liblzma/common/easy_encoder.c b/src/liblzma/common/easy_encoder.c index d13ccd7351f1..5cb492dd0681 100644 --- a/src/liblzma/common/easy_encoder.c +++ b/src/liblzma/common/easy_encoder.c @@ -11,7 +11,6 @@ /////////////////////////////////////////////////////////////////////////////// #include "easy_preset.h" -#include "stream_encoder.h" extern LZMA_API(lzma_ret) diff --git a/src/liblzma/common/filter_buffer_decoder.c b/src/liblzma/common/filter_buffer_decoder.c index 2d35ef8e0af4..6620986eea8a 100644 --- a/src/liblzma/common/filter_buffer_decoder.c +++ b/src/liblzma/common/filter_buffer_decoder.c @@ -14,7 +14,8 @@ extern LZMA_API(lzma_ret) -lzma_raw_buffer_decode(const lzma_filter *filters, lzma_allocator *allocator, +lzma_raw_buffer_decode( + const lzma_filter *filters, const lzma_allocator *allocator, const uint8_t *in, size_t *in_pos, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size) { diff --git a/src/liblzma/common/filter_buffer_encoder.c b/src/liblzma/common/filter_buffer_encoder.c index 646e1b30374e..dda18e3d8e5e 100644 --- a/src/liblzma/common/filter_buffer_encoder.c +++ b/src/liblzma/common/filter_buffer_encoder.c @@ -14,9 +14,10 @@ extern LZMA_API(lzma_ret) -lzma_raw_buffer_encode(const lzma_filter *filters, lzma_allocator *allocator, - const uint8_t *in, size_t in_size, uint8_t *out, - size_t *out_pos, size_t out_size) +lzma_raw_buffer_encode( + const lzma_filter *filters, const lzma_allocator *allocator, + const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) { // Validate what isn't validated later in filter_common.c. if ((in == NULL && in_size != 0) || out == NULL diff --git a/src/liblzma/common/filter_common.c b/src/liblzma/common/filter_common.c index 7c95b05f2a3f..9ad5d5d8e2af 100644 --- a/src/liblzma/common/filter_common.c +++ b/src/liblzma/common/filter_common.c @@ -123,7 +123,7 @@ static const struct { extern LZMA_API(lzma_ret) lzma_filters_copy(const lzma_filter *src, lzma_filter *dest, - lzma_allocator *allocator) + const lzma_allocator *allocator) { if (src == NULL || dest == NULL) return LZMA_PROG_ERROR; @@ -239,7 +239,7 @@ validate_chain(const lzma_filter *filters, size_t *count) extern lzma_ret -lzma_raw_coder_init(lzma_next_coder *next, lzma_allocator *allocator, +lzma_raw_coder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter *options, lzma_filter_find coder_find, bool is_encoder) { diff --git a/src/liblzma/common/filter_common.h b/src/liblzma/common/filter_common.h index cd61fc0724f3..42a26a24a47f 100644 --- a/src/liblzma/common/filter_common.h +++ b/src/liblzma/common/filter_common.h @@ -36,7 +36,7 @@ typedef const lzma_filter_coder *(*lzma_filter_find)(lzma_vli id); extern lzma_ret lzma_raw_coder_init( - lzma_next_coder *next, lzma_allocator *allocator, + lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter *filters, lzma_filter_find coder_find, bool is_encoder); diff --git a/src/liblzma/common/filter_decoder.c b/src/liblzma/common/filter_decoder.c index 1ebbe2afef0c..c75b0a89c30f 100644 --- a/src/liblzma/common/filter_decoder.c +++ b/src/liblzma/common/filter_decoder.c @@ -35,7 +35,8 @@ typedef struct { /// \return - LZMA_OK: Properties decoded successfully. /// - LZMA_OPTIONS_ERROR: Unsupported properties /// - LZMA_MEM_ERROR: Memory allocation failed. - lzma_ret (*props_decode)(void **options, lzma_allocator *allocator, + lzma_ret (*props_decode)( + void **options, const lzma_allocator *allocator, const uint8_t *props, size_t props_size); } lzma_filter_decoder; @@ -136,7 +137,7 @@ lzma_filter_decoder_is_supported(lzma_vli id) extern lzma_ret -lzma_raw_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, +lzma_raw_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter *options) { return lzma_raw_coder_init(next, allocator, @@ -165,7 +166,7 @@ lzma_raw_decoder_memusage(const lzma_filter *filters) extern LZMA_API(lzma_ret) -lzma_properties_decode(lzma_filter *filter, lzma_allocator *allocator, +lzma_properties_decode(lzma_filter *filter, const lzma_allocator *allocator, const uint8_t *props, size_t props_size) { // Make it always NULL so that the caller can always safely free() it. diff --git a/src/liblzma/common/filter_decoder.h b/src/liblzma/common/filter_decoder.h index d5c68bdd4a68..a2e255fe5f06 100644 --- a/src/liblzma/common/filter_decoder.h +++ b/src/liblzma/common/filter_decoder.h @@ -17,7 +17,7 @@ extern lzma_ret lzma_raw_decoder_init( - lzma_next_coder *next, lzma_allocator *allocator, + lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter *options); #endif diff --git a/src/liblzma/common/filter_encoder.c b/src/liblzma/common/filter_encoder.c index 635d81223472..c5d8f39721fe 100644 --- a/src/liblzma/common/filter_encoder.c +++ b/src/liblzma/common/filter_encoder.c @@ -30,11 +30,11 @@ typedef struct { /// invalid, UINT64_MAX is returned. uint64_t (*memusage)(const void *options); - /// Calculates the minimum sane size for Blocks (or other types of - /// chunks) to which the input data can be split to make - /// multithreaded encoding possible. If this is NULL, it is assumed - /// that the encoder is fast enough with single thread. - lzma_vli (*chunk_size)(const void *options); + /// Calculates the recommended Uncompressed Size for .xz Blocks to + /// which the input data can be split to make multithreaded + /// encoding possible. If this is NULL, it is assumed that + /// the encoder is fast enough with single thread. + uint64_t (*block_size)(const void *options); /// Tells the size of the Filter Properties field. If options are /// invalid, UINT32_MAX is returned. If this is NULL, props_size_fixed @@ -59,7 +59,7 @@ static const lzma_filter_encoder encoders[] = { .id = LZMA_FILTER_LZMA1, .init = &lzma_lzma_encoder_init, .memusage = &lzma_lzma_encoder_memusage, - .chunk_size = NULL, // FIXME + .block_size = NULL, // FIXME .props_size_get = NULL, .props_size_fixed = 5, .props_encode = &lzma_lzma_props_encode, @@ -70,7 +70,7 @@ static const lzma_filter_encoder encoders[] = { .id = LZMA_FILTER_LZMA2, .init = &lzma_lzma2_encoder_init, .memusage = &lzma_lzma2_encoder_memusage, - .chunk_size = NULL, // FIXME + .block_size = &lzma_lzma2_block_size, // FIXME .props_size_get = NULL, .props_size_fixed = 1, .props_encode = &lzma_lzma2_props_encode, @@ -81,7 +81,7 @@ static const lzma_filter_encoder encoders[] = { .id = LZMA_FILTER_X86, .init = &lzma_simple_x86_encoder_init, .memusage = NULL, - .chunk_size = NULL, + .block_size = NULL, .props_size_get = &lzma_simple_props_size, .props_encode = &lzma_simple_props_encode, }, @@ -91,7 +91,7 @@ static const lzma_filter_encoder encoders[] = { .id = LZMA_FILTER_POWERPC, .init = &lzma_simple_powerpc_encoder_init, .memusage = NULL, - .chunk_size = NULL, + .block_size = NULL, .props_size_get = &lzma_simple_props_size, .props_encode = &lzma_simple_props_encode, }, @@ -101,7 +101,7 @@ static const lzma_filter_encoder encoders[] = { .id = LZMA_FILTER_IA64, .init = &lzma_simple_ia64_encoder_init, .memusage = NULL, - .chunk_size = NULL, + .block_size = NULL, .props_size_get = &lzma_simple_props_size, .props_encode = &lzma_simple_props_encode, }, @@ -111,7 +111,7 @@ static const lzma_filter_encoder encoders[] = { .id = LZMA_FILTER_ARM, .init = &lzma_simple_arm_encoder_init, .memusage = NULL, - .chunk_size = NULL, + .block_size = NULL, .props_size_get = &lzma_simple_props_size, .props_encode = &lzma_simple_props_encode, }, @@ -121,7 +121,7 @@ static const lzma_filter_encoder encoders[] = { .id = LZMA_FILTER_ARMTHUMB, .init = &lzma_simple_armthumb_encoder_init, .memusage = NULL, - .chunk_size = NULL, + .block_size = NULL, .props_size_get = &lzma_simple_props_size, .props_encode = &lzma_simple_props_encode, }, @@ -131,7 +131,7 @@ static const lzma_filter_encoder encoders[] = { .id = LZMA_FILTER_SPARC, .init = &lzma_simple_sparc_encoder_init, .memusage = NULL, - .chunk_size = NULL, + .block_size = NULL, .props_size_get = &lzma_simple_props_size, .props_encode = &lzma_simple_props_encode, }, @@ -141,7 +141,7 @@ static const lzma_filter_encoder encoders[] = { .id = LZMA_FILTER_DELTA, .init = &lzma_delta_encoder_init, .memusage = &lzma_delta_coder_memusage, - .chunk_size = NULL, + .block_size = NULL, .props_size_get = NULL, .props_size_fixed = 1, .props_encode = &lzma_delta_props_encode, @@ -196,7 +196,7 @@ lzma_filters_update(lzma_stream *strm, const lzma_filter *filters) extern lzma_ret -lzma_raw_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, +lzma_raw_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter *options) { return lzma_raw_coder_init(next, allocator, @@ -226,20 +226,19 @@ lzma_raw_encoder_memusage(const lzma_filter *filters) } -/* -extern LZMA_API(lzma_vli) -lzma_chunk_size(const lzma_filter *filters) +extern uint64_t +lzma_mt_block_size(const lzma_filter *filters) { - lzma_vli max = 0; + uint64_t max = 0; for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) { const lzma_filter_encoder *const fe = encoder_find(filters[i].id); - if (fe->chunk_size != NULL) { - const lzma_vli size - = fe->chunk_size(filters[i].options); - if (size == LZMA_VLI_UNKNOWN) - return LZMA_VLI_UNKNOWN; + if (fe->block_size != NULL) { + const uint64_t size + = fe->block_size(filters[i].options); + if (size == 0) + return 0; if (size > max) max = size; @@ -248,7 +247,6 @@ lzma_chunk_size(const lzma_filter *filters) return max; } -*/ extern LZMA_API(lzma_ret) diff --git a/src/liblzma/common/filter_encoder.h b/src/liblzma/common/filter_encoder.h index 5bc137f64584..f1d5683fe793 100644 --- a/src/liblzma/common/filter_encoder.h +++ b/src/liblzma/common/filter_encoder.h @@ -16,12 +16,12 @@ #include "common.h" -// FIXME: Might become a part of the public API once finished. -// extern lzma_vli lzma_chunk_size(const lzma_filter *filters); +// FIXME: Might become a part of the public API. +extern uint64_t lzma_mt_block_size(const lzma_filter *filters); extern lzma_ret lzma_raw_encoder_init( - lzma_next_coder *next, lzma_allocator *allocator, + lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter *filters); #endif diff --git a/src/liblzma/common/filter_flags_decoder.c b/src/liblzma/common/filter_flags_decoder.c index caae10ce79a0..ddfb085943d0 100644 --- a/src/liblzma/common/filter_flags_decoder.c +++ b/src/liblzma/common/filter_flags_decoder.c @@ -15,7 +15,7 @@ extern LZMA_API(lzma_ret) lzma_filter_flags_decode( - lzma_filter *filter, lzma_allocator *allocator, + lzma_filter *filter, const lzma_allocator *allocator, const uint8_t *in, size_t *in_pos, size_t in_size) { // Set the pointer to NULL so the caller can always safely free it. diff --git a/src/liblzma/common/stream_encoder.h b/src/liblzma/common/hardware_cputhreads.c index 46a7aed72eaa..f468366a6045 100644 --- a/src/liblzma/common/stream_encoder.h +++ b/src/liblzma/common/hardware_cputhreads.c @@ -1,7 +1,7 @@ /////////////////////////////////////////////////////////////////////////////// // -/// \file stream_encoder.h -/// \brief Encodes .xz Streams +/// \file hardware_cputhreads.c +/// \brief Get the number of CPU threads or cores // // Author: Lasse Collin // @@ -10,14 +10,13 @@ // /////////////////////////////////////////////////////////////////////////////// -#ifndef LZMA_STREAM_ENCODER_H -#define LZMA_STREAM_ENCODER_H - #include "common.h" +#include "tuklib_cpucores.h" -extern lzma_ret lzma_stream_encoder_init( - lzma_next_coder *next, lzma_allocator *allocator, - const lzma_filter *filters, lzma_check check); -#endif +extern LZMA_API(uint32_t) +lzma_cputhreads(void) +{ + return tuklib_cpucores(); +} diff --git a/src/liblzma/common/index.c b/src/liblzma/common/index.c index 9af4bc19d1b5..11f45f4009ea 100644 --- a/src/liblzma/common/index.c +++ b/src/liblzma/common/index.c @@ -191,8 +191,8 @@ index_tree_init(index_tree *tree) /// Helper for index_tree_end() static void -index_tree_node_end(index_tree_node *node, lzma_allocator *allocator, - void (*free_func)(void *node, lzma_allocator *allocator)) +index_tree_node_end(index_tree_node *node, const lzma_allocator *allocator, + void (*free_func)(void *node, const lzma_allocator *allocator)) { // The tree won't ever be very huge, so recursion should be fine. // 20 levels in the tree is likely quite a lot already in practice. @@ -215,8 +215,8 @@ index_tree_node_end(index_tree_node *node, lzma_allocator *allocator, /// to free the Record groups from each index_stream before freeing /// the index_stream itself. static void -index_tree_end(index_tree *tree, lzma_allocator *allocator, - void (*free_func)(void *node, lzma_allocator *allocator)) +index_tree_end(index_tree *tree, const lzma_allocator *allocator, + void (*free_func)(void *node, const lzma_allocator *allocator)) { if (tree->root != NULL) index_tree_node_end(tree->root, allocator, free_func); @@ -340,7 +340,7 @@ index_tree_locate(const index_tree *tree, lzma_vli target) static index_stream * index_stream_init(lzma_vli compressed_base, lzma_vli uncompressed_base, lzma_vli stream_number, lzma_vli block_number_base, - lzma_allocator *allocator) + const lzma_allocator *allocator) { index_stream *s = lzma_alloc(sizeof(index_stream), allocator); if (s == NULL) @@ -368,7 +368,7 @@ index_stream_init(lzma_vli compressed_base, lzma_vli uncompressed_base, /// Free the memory allocated for a Stream and its Record groups. static void -index_stream_end(void *node, lzma_allocator *allocator) +index_stream_end(void *node, const lzma_allocator *allocator) { index_stream *s = node; index_tree_end(&s->groups, allocator, NULL); @@ -377,7 +377,7 @@ index_stream_end(void *node, lzma_allocator *allocator) static lzma_index * -index_init_plain(lzma_allocator *allocator) +index_init_plain(const lzma_allocator *allocator) { lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator); if (i != NULL) { @@ -395,7 +395,7 @@ index_init_plain(lzma_allocator *allocator) extern LZMA_API(lzma_index *) -lzma_index_init(lzma_allocator *allocator) +lzma_index_init(const lzma_allocator *allocator) { lzma_index *i = index_init_plain(allocator); if (i == NULL) @@ -414,7 +414,7 @@ lzma_index_init(lzma_allocator *allocator) extern LZMA_API(void) -lzma_index_end(lzma_index *i, lzma_allocator *allocator) +lzma_index_end(lzma_index *i, const lzma_allocator *allocator) { // NOTE: If you modify this function, check also the bottom // of lzma_index_cat(). @@ -637,7 +637,7 @@ lzma_index_stream_padding(lzma_index *i, lzma_vli stream_padding) extern LZMA_API(lzma_ret) -lzma_index_append(lzma_index *i, lzma_allocator *allocator, +lzma_index_append(lzma_index *i, const lzma_allocator *allocator, lzma_vli unpadded_size, lzma_vli uncompressed_size) { // Validate. @@ -765,7 +765,7 @@ index_cat_helper(const index_cat_info *info, index_stream *this) extern LZMA_API(lzma_ret) lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, - lzma_allocator *allocator) + const lzma_allocator *allocator) { const lzma_vli dest_file_size = lzma_index_file_size(dest); @@ -859,7 +859,7 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, /// Duplicate an index_stream. static index_stream * -index_dup_stream(const index_stream *src, lzma_allocator *allocator) +index_dup_stream(const index_stream *src, const lzma_allocator *allocator) { // Catch a somewhat theoretical integer overflow. if (src->record_count > PREALLOC_MAX) @@ -919,7 +919,7 @@ index_dup_stream(const index_stream *src, lzma_allocator *allocator) extern LZMA_API(lzma_index *) -lzma_index_dup(const lzma_index *src, lzma_allocator *allocator) +lzma_index_dup(const lzma_index *src, const lzma_allocator *allocator) { // Allocate the base structure (no initial Stream). lzma_index *dest = index_init_plain(allocator); diff --git a/src/liblzma/common/index_decoder.c b/src/liblzma/common/index_decoder.c index 83c8a3af1db8..795d1834cc58 100644 --- a/src/liblzma/common/index_decoder.c +++ b/src/liblzma/common/index_decoder.c @@ -54,7 +54,7 @@ struct lzma_coder_s { static lzma_ret -index_decode(lzma_coder *coder, lzma_allocator *allocator, +index_decode(lzma_coder *coder, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out lzma_attribute((__unused__)), @@ -207,7 +207,7 @@ out: static void -index_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +index_decoder_end(lzma_coder *coder, const lzma_allocator *allocator) { lzma_index_end(coder->index, allocator); lzma_free(coder, allocator); @@ -234,7 +234,7 @@ index_decoder_memconfig(lzma_coder *coder, uint64_t *memusage, static lzma_ret -index_decoder_reset(lzma_coder *coder, lzma_allocator *allocator, +index_decoder_reset(lzma_coder *coder, const lzma_allocator *allocator, lzma_index **i, uint64_t memlimit) { // Remember the pointer given by the application. We will set it @@ -261,7 +261,7 @@ index_decoder_reset(lzma_coder *coder, lzma_allocator *allocator, static lzma_ret -index_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, +index_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, lzma_index **i, uint64_t memlimit) { lzma_next_coder_init(&index_decoder_init, next, allocator); @@ -299,8 +299,8 @@ lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit) extern LZMA_API(lzma_ret) -lzma_index_buffer_decode( - lzma_index **i, uint64_t *memlimit, lzma_allocator *allocator, +lzma_index_buffer_decode(lzma_index **i, uint64_t *memlimit, + const lzma_allocator *allocator, const uint8_t *in, size_t *in_pos, size_t in_size) { // Sanity checks diff --git a/src/liblzma/common/index_encoder.c b/src/liblzma/common/index_encoder.c index 45919f094c42..d25ac7d3372b 100644 --- a/src/liblzma/common/index_encoder.c +++ b/src/liblzma/common/index_encoder.c @@ -42,7 +42,7 @@ struct lzma_coder_s { static lzma_ret index_encode(lzma_coder *coder, - lzma_allocator *allocator lzma_attribute((__unused__)), + const lzma_allocator *allocator lzma_attribute((__unused__)), const uint8_t *restrict in lzma_attribute((__unused__)), size_t *restrict in_pos lzma_attribute((__unused__)), size_t in_size lzma_attribute((__unused__)), @@ -159,7 +159,7 @@ out: static void -index_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +index_encoder_end(lzma_coder *coder, const lzma_allocator *allocator) { lzma_free(coder, allocator); return; @@ -181,7 +181,7 @@ index_encoder_reset(lzma_coder *coder, const lzma_index *i) extern lzma_ret -lzma_index_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, +lzma_index_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_index *i) { lzma_next_coder_init(&lzma_index_encoder_init, next, allocator); diff --git a/src/liblzma/common/index_encoder.h b/src/liblzma/common/index_encoder.h index a13c94dcd077..4d55cd104785 100644 --- a/src/liblzma/common/index_encoder.h +++ b/src/liblzma/common/index_encoder.h @@ -17,7 +17,7 @@ extern lzma_ret lzma_index_encoder_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_index *i); + const lzma_allocator *allocator, const lzma_index *i); #endif diff --git a/src/liblzma/common/index_hash.c b/src/liblzma/common/index_hash.c index e3e9386ae8e3..d7a0344b76c3 100644 --- a/src/liblzma/common/index_hash.c +++ b/src/liblzma/common/index_hash.c @@ -70,7 +70,8 @@ struct lzma_index_hash_s { extern LZMA_API(lzma_index_hash *) -lzma_index_hash_init(lzma_index_hash *index_hash, lzma_allocator *allocator) +lzma_index_hash_init(lzma_index_hash *index_hash, + const lzma_allocator *allocator) { if (index_hash == NULL) { index_hash = lzma_alloc(sizeof(lzma_index_hash), allocator); @@ -101,7 +102,8 @@ lzma_index_hash_init(lzma_index_hash *index_hash, lzma_allocator *allocator) extern LZMA_API(void) -lzma_index_hash_end(lzma_index_hash *index_hash, lzma_allocator *allocator) +lzma_index_hash_end(lzma_index_hash *index_hash, + const lzma_allocator *allocator) { lzma_free(index_hash, allocator); return; diff --git a/src/liblzma/common/memcmplen.h b/src/liblzma/common/memcmplen.h new file mode 100644 index 000000000000..f66e7cdb66c6 --- /dev/null +++ b/src/liblzma/common/memcmplen.h @@ -0,0 +1,170 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file memcmplen.h +/// \brief Optimized comparison of two buffers +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_MEMCMPLEN_H +#define LZMA_MEMCMPLEN_H + +#include "common.h" + +#ifdef HAVE_IMMINTRIN_H +# include <immintrin.h> +#endif + +/// How many extra bytes lzma_memcmplen() may read. This depends on +/// the method but since it is just a few bytes the biggest possible +/// value is used here. +#define LZMA_MEMCMPLEN_EXTRA 16 + + +/// Find out how many equal bytes the two buffers have. +/// +/// \param buf1 First buffer +/// \param buf2 Second buffer +/// \param len How many bytes have already been compared and will +/// be assumed to match +/// \param limit How many bytes to compare at most, including the +/// already-compared bytes. This must be significantly +/// smaller than UINT32_MAX to avoid integer overflows. +/// Up to LZMA_MEMCMPLEN_EXTRA bytes may be read past +/// the specified limit from both buf1 and buf2. +/// +/// \return Number of equal bytes in the buffers is returned. +/// This is always at least len and at most limit. +static inline uint32_t lzma_attribute((__always_inline__)) +lzma_memcmplen(const uint8_t *buf1, const uint8_t *buf2, + uint32_t len, uint32_t limit) +{ + assert(len <= limit); + assert(limit <= UINT32_MAX / 2); + +#if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ + && ((TUKLIB_GNUC_REQ(3, 4) && defined(__x86_64__)) \ + || (defined(__INTEL_COMPILER) && defined(__x86_64__)) \ + || (defined(__INTEL_COMPILER) && defined(_M_X64)) \ + || (defined(_MSC_VER) && defined(_M_X64))) + // NOTE: This will use 64-bit unaligned access which + // TUKLIB_FAST_UNALIGNED_ACCESS wasn't meant to permit, but + // it's convenient here at least as long as it's x86-64 only. + // + // I keep this x86-64 only for now since that's where I know this + // to be a good method. This may be fine on other 64-bit CPUs too. + // On big endian one should use xor instead of subtraction and switch + // to __builtin_clzll(). + while (len < limit) { + const uint64_t x = *(const uint64_t *)(buf1 + len) + - *(const uint64_t *)(buf2 + len); + if (x != 0) { +# if defined(_M_X64) // MSVC or Intel C compiler on Windows + unsigned long tmp; + _BitScanForward64(&tmp, x); + len += (uint32_t)tmp >> 3; +# else // GCC, clang, or Intel C compiler + len += (uint32_t)__builtin_ctzll(x) >> 3; +# endif + return my_min(len, limit); + } + + len += 8; + } + + return limit; + +#elif defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ + && defined(HAVE__MM_MOVEMASK_EPI8) \ + && ((defined(__GNUC__) && defined(__SSE2_MATH__)) \ + || (defined(__INTEL_COMPILER) && defined(__SSE2__)) \ + || (defined(_MSC_VER) && defined(_M_IX86_FP) \ + && _M_IX86_FP >= 2)) + // NOTE: Like above, this will use 128-bit unaligned access which + // TUKLIB_FAST_UNALIGNED_ACCESS wasn't meant to permit. + // + // SSE2 version for 32-bit and 64-bit x86. On x86-64 the above + // version is sometimes significantly faster and sometimes + // slightly slower than this SSE2 version, so this SSE2 + // version isn't used on x86-64. + while (len < limit) { + const uint32_t x = 0xFFFF ^ _mm_movemask_epi8(_mm_cmpeq_epi8( + _mm_loadu_si128((const __m128i *)(buf1 + len)), + _mm_loadu_si128((const __m128i *)(buf2 + len)))); + + if (x != 0) { +# if defined(__INTEL_COMPILER) + len += _bit_scan_forward(x); +# elif defined(_MSC_VER) + unsigned long tmp; + _BitScanForward(&tmp, x); + len += tmp; +# else + len += __builtin_ctz(x); +# endif + return my_min(len, limit); + } + + len += 16; + } + + return limit; + +#elif defined(TUKLIB_FAST_UNALIGNED_ACCESS) && !defined(WORDS_BIGENDIAN) + // Generic 32-bit little endian method + while (len < limit) { + uint32_t x = *(const uint32_t *)(buf1 + len) + - *(const uint32_t *)(buf2 + len); + if (x != 0) { + if ((x & 0xFFFF) == 0) { + len += 2; + x >>= 16; + } + + if ((x & 0xFF) == 0) + ++len; + + return my_min(len, limit); + } + + len += 4; + } + + return limit; + +#elif defined(TUKLIB_FAST_UNALIGNED_ACCESS) && defined(WORDS_BIGENDIAN) + // Generic 32-bit big endian method + while (len < limit) { + uint32_t x = *(const uint32_t *)(buf1 + len) + ^ *(const uint32_t *)(buf2 + len); + if (x != 0) { + if ((x & 0xFFFF0000) == 0) { + len += 2; + x <<= 16; + } + + if ((x & 0xFF000000) == 0) + ++len; + + return my_min(len, limit); + } + + len += 4; + } + + return limit; + +#else + // Simple portable version that doesn't use unaligned access. + while (len < limit && buf1[len] == buf2[len]) + ++len; + + return len; +#endif +} + +#endif diff --git a/src/liblzma/common/outqueue.c b/src/liblzma/common/outqueue.c new file mode 100644 index 000000000000..2dc8a38d1be3 --- /dev/null +++ b/src/liblzma/common/outqueue.c @@ -0,0 +1,184 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file outqueue.c +/// \brief Output queue handling in multithreaded coding +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "outqueue.h" + + +/// This is to ease integer overflow checking: We may allocate up to +/// 2 * LZMA_THREADS_MAX buffers and we need some extra memory for other +/// data structures (that's the second /2). +#define BUF_SIZE_MAX (UINT64_MAX / LZMA_THREADS_MAX / 2 / 2) + + +static lzma_ret +get_options(uint64_t *bufs_alloc_size, uint32_t *bufs_count, + uint64_t buf_size_max, uint32_t threads) +{ + if (threads > LZMA_THREADS_MAX || buf_size_max > BUF_SIZE_MAX) + return LZMA_OPTIONS_ERROR; + + // The number of buffers is twice the number of threads. + // This wastes RAM but keeps the threads busy when buffers + // finish out of order. + // + // NOTE: If this is changed, update BUF_SIZE_MAX too. + *bufs_count = threads * 2; + *bufs_alloc_size = *bufs_count * buf_size_max; + + return LZMA_OK; +} + + +extern uint64_t +lzma_outq_memusage(uint64_t buf_size_max, uint32_t threads) +{ + uint64_t bufs_alloc_size; + uint32_t bufs_count; + + if (get_options(&bufs_alloc_size, &bufs_count, buf_size_max, threads) + != LZMA_OK) + return UINT64_MAX; + + return sizeof(lzma_outq) + bufs_count * sizeof(lzma_outbuf) + + bufs_alloc_size; +} + + +extern lzma_ret +lzma_outq_init(lzma_outq *outq, const lzma_allocator *allocator, + uint64_t buf_size_max, uint32_t threads) +{ + uint64_t bufs_alloc_size; + uint32_t bufs_count; + + // Set bufs_count and bufs_alloc_size. + return_if_error(get_options(&bufs_alloc_size, &bufs_count, + buf_size_max, threads)); + + // Allocate memory if needed. + if (outq->buf_size_max != buf_size_max + || outq->bufs_allocated != bufs_count) { + lzma_outq_end(outq, allocator); + +#if SIZE_MAX < UINT64_MAX + if (bufs_alloc_size > SIZE_MAX) + return LZMA_MEM_ERROR; +#endif + + outq->bufs = lzma_alloc(bufs_count * sizeof(lzma_outbuf), + allocator); + outq->bufs_mem = lzma_alloc((size_t)(bufs_alloc_size), + allocator); + + if (outq->bufs == NULL || outq->bufs_mem == NULL) { + lzma_outq_end(outq, allocator); + return LZMA_MEM_ERROR; + } + } + + // Initialize the rest of the main structure. Initialization of + // outq->bufs[] is done when they are actually needed. + outq->buf_size_max = (size_t)(buf_size_max); + outq->bufs_allocated = bufs_count; + outq->bufs_pos = 0; + outq->bufs_used = 0; + outq->read_pos = 0; + + return LZMA_OK; +} + + +extern void +lzma_outq_end(lzma_outq *outq, const lzma_allocator *allocator) +{ + lzma_free(outq->bufs, allocator); + outq->bufs = NULL; + + lzma_free(outq->bufs_mem, allocator); + outq->bufs_mem = NULL; + + return; +} + + +extern lzma_outbuf * +lzma_outq_get_buf(lzma_outq *outq) +{ + // Caller must have checked it with lzma_outq_has_buf(). + assert(outq->bufs_used < outq->bufs_allocated); + + // Initialize the new buffer. + lzma_outbuf *buf = &outq->bufs[outq->bufs_pos]; + buf->buf = outq->bufs_mem + outq->bufs_pos * outq->buf_size_max; + buf->size = 0; + buf->finished = false; + + // Update the queue state. + if (++outq->bufs_pos == outq->bufs_allocated) + outq->bufs_pos = 0; + + ++outq->bufs_used; + + return buf; +} + + +extern bool +lzma_outq_is_readable(const lzma_outq *outq) +{ + uint32_t i = outq->bufs_pos - outq->bufs_used; + if (outq->bufs_pos < outq->bufs_used) + i += outq->bufs_allocated; + + return outq->bufs[i].finished; +} + + +extern lzma_ret +lzma_outq_read(lzma_outq *restrict outq, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + lzma_vli *restrict unpadded_size, + lzma_vli *restrict uncompressed_size) +{ + // There must be at least one buffer from which to read. + if (outq->bufs_used == 0) + return LZMA_OK; + + // Get the buffer. + uint32_t i = outq->bufs_pos - outq->bufs_used; + if (outq->bufs_pos < outq->bufs_used) + i += outq->bufs_allocated; + + lzma_outbuf *buf = &outq->bufs[i]; + + // If it isn't finished yet, we cannot read from it. + if (!buf->finished) + return LZMA_OK; + + // Copy from the buffer to output. + lzma_bufcpy(buf->buf, &outq->read_pos, buf->size, + out, out_pos, out_size); + + // Return if we didn't get all the data from the buffer. + if (outq->read_pos < buf->size) + return LZMA_OK; + + // The buffer was finished. Tell the caller its size information. + *unpadded_size = buf->unpadded_size; + *uncompressed_size = buf->uncompressed_size; + + // Free this buffer for further use. + --outq->bufs_used; + outq->read_pos = 0; + + return LZMA_STREAM_END; +} diff --git a/src/liblzma/common/outqueue.h b/src/liblzma/common/outqueue.h new file mode 100644 index 000000000000..079634de4587 --- /dev/null +++ b/src/liblzma/common/outqueue.h @@ -0,0 +1,156 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file outqueue.h +/// \brief Output queue handling in multithreaded coding +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +/// Output buffer for a single thread +typedef struct { + /// Pointer to the output buffer of lzma_outq.buf_size_max bytes + uint8_t *buf; + + /// Amount of data written to buf + size_t size; + + /// Additional size information + lzma_vli unpadded_size; + lzma_vli uncompressed_size; + + /// True when no more data will be written into this buffer. + /// + /// \note This is read by another thread and thus access + /// to this variable needs a mutex. + bool finished; + +} lzma_outbuf; + + +typedef struct { + /// Array of buffers that are used cyclically. + lzma_outbuf *bufs; + + /// Memory allocated for all the buffers + uint8_t *bufs_mem; + + /// Amount of buffer space available in each buffer + size_t buf_size_max; + + /// Number of buffers allocated + uint32_t bufs_allocated; + + /// Position in the bufs array. The next buffer to be taken + /// into use is bufs[bufs_pos]. + uint32_t bufs_pos; + + /// Number of buffers in use + uint32_t bufs_used; + + /// Position in the buffer in lzma_outq_read() + size_t read_pos; + +} lzma_outq; + + +/** + * \brief Calculate the memory usage of an output queue + * + * \return Approximate memory usage in bytes or UINT64_MAX on error. + */ +extern uint64_t lzma_outq_memusage(uint64_t buf_size_max, uint32_t threads); + + +/// \brief Initialize an output queue +/// +/// \param outq Pointer to an output queue. Before calling +/// this function the first time, *outq should +/// have been zeroed with memzero() so that this +/// function knows that there are no previous +/// allocations to free. +/// \param allocator Pointer to allocator or NULL +/// \param buf_size_max Maximum amount of data that a single buffer +/// in the queue may need to store. +/// \param threads Number of buffers that may be in use +/// concurrently. Note that more than this number +/// of buffers will actually get allocated to +/// improve performance when buffers finish +/// out of order. +/// +/// \return - LZMA_OK +/// - LZMA_MEM_ERROR +/// +extern lzma_ret lzma_outq_init( + lzma_outq *outq, const lzma_allocator *allocator, + uint64_t buf_size_max, uint32_t threads); + + +/// \brief Free the memory associated with the output queue +extern void lzma_outq_end(lzma_outq *outq, const lzma_allocator *allocator); + + +/// \brief Get a new buffer +/// +/// lzma_outq_has_buf() must be used to check that there is a buffer +/// available before calling lzma_outq_get_buf(). +/// +extern lzma_outbuf *lzma_outq_get_buf(lzma_outq *outq); + + +/// \brief Test if there is data ready to be read +/// +/// Call to this function must be protected with the same mutex that +/// is used to protect lzma_outbuf.finished. +/// +extern bool lzma_outq_is_readable(const lzma_outq *outq); + + +/// \brief Read finished data +/// +/// \param outq Pointer to an output queue +/// \param out Beginning of the output buffer +/// \param out_pos The next byte will be written to +/// out[*out_pos]. +/// \param out_size Size of the out buffer; the first byte into +/// which no data is written to is out[out_size]. +/// \param unpadded_size Unpadded Size from the Block encoder +/// \param uncompressed_size Uncompressed Size from the Block encoder +/// +/// \return - LZMA: All OK. Either no data was available or the buffer +/// being read didn't become empty yet. +/// - LZMA_STREAM_END: The buffer being read was finished. +/// *unpadded_size and *uncompressed_size were set. +/// +/// \note This reads lzma_outbuf.finished variables and thus call +/// to this function needs to be protected with a mutex. +/// +extern lzma_ret lzma_outq_read(lzma_outq *restrict outq, + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size, lzma_vli *restrict unpadded_size, + lzma_vli *restrict uncompressed_size); + + +/// \brief Test if there is at least one buffer free +/// +/// This must be used before getting a new buffer with lzma_outq_get_buf(). +/// +static inline bool +lzma_outq_has_buf(const lzma_outq *outq) +{ + return outq->bufs_used < outq->bufs_allocated; +} + + +/// \brief Test if the queue is completely empty +static inline bool +lzma_outq_is_empty(const lzma_outq *outq) +{ + return outq->bufs_used == 0; +} diff --git a/src/liblzma/common/stream_buffer_decoder.c b/src/liblzma/common/stream_buffer_decoder.c index ae753155180d..b9745b5dbe18 100644 --- a/src/liblzma/common/stream_buffer_decoder.c +++ b/src/liblzma/common/stream_buffer_decoder.c @@ -15,7 +15,7 @@ extern LZMA_API(lzma_ret) lzma_stream_buffer_decode(uint64_t *memlimit, uint32_t flags, - lzma_allocator *allocator, + const lzma_allocator *allocator, const uint8_t *in, size_t *in_pos, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size) { diff --git a/src/liblzma/common/stream_buffer_encoder.c b/src/liblzma/common/stream_buffer_encoder.c index 2450ee2e1fa0..af49554a6b0c 100644 --- a/src/liblzma/common/stream_buffer_encoder.c +++ b/src/liblzma/common/stream_buffer_encoder.c @@ -42,7 +42,8 @@ lzma_stream_buffer_bound(size_t uncompressed_size) extern LZMA_API(lzma_ret) lzma_stream_buffer_encode(lzma_filter *filters, lzma_check check, - lzma_allocator *allocator, const uint8_t *in, size_t in_size, + const lzma_allocator *allocator, + const uint8_t *in, size_t in_size, uint8_t *out, size_t *out_pos_ptr, size_t out_size) { // Sanity checks diff --git a/src/liblzma/common/stream_decoder.c b/src/liblzma/common/stream_decoder.c index 37ea71edbd71..3ab938c9f142 100644 --- a/src/liblzma/common/stream_decoder.c +++ b/src/liblzma/common/stream_decoder.c @@ -57,6 +57,10 @@ struct lzma_coder_s { /// If true, LZMA_GET_CHECK is returned after decoding Stream Header. bool tell_any_check; + /// If true, we will tell the Block decoder to skip calculating + /// and verifying the integrity check. + bool ignore_check; + /// If true, we will decode concatenated Streams that possibly have /// Stream Padding between or after them. LZMA_STREAM_END is returned /// once the application isn't giving us any new input, and we aren't @@ -80,7 +84,7 @@ struct lzma_coder_s { static lzma_ret -stream_decoder_reset(lzma_coder *coder, lzma_allocator *allocator) +stream_decoder_reset(lzma_coder *coder, const lzma_allocator *allocator) { // Initialize the Index hash used to verify the Index. coder->index_hash = lzma_index_hash_init(coder->index_hash, allocator); @@ -96,7 +100,7 @@ stream_decoder_reset(lzma_coder *coder, lzma_allocator *allocator) static lzma_ret -stream_decode(lzma_coder *coder, lzma_allocator *allocator, +stream_decode(lzma_coder *coder, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) @@ -182,8 +186,8 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator, coder->pos = 0; - // Version 0 is currently the only possible version. - coder->block_options.version = 0; + // Version 1 is needed to support the .ignore_check option. + coder->block_options.version = 1; // Set up a buffer to hold the filter chain. Block Header // decoder will initialize all members of this array so @@ -195,6 +199,11 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator, return_if_error(lzma_block_header_decode(&coder->block_options, allocator, coder->buffer)); + // If LZMA_IGNORE_CHECK was used, this flag needs to be set. + // It has to be set after lzma_block_header_decode() because + // it always resets this to false. + coder->block_options.ignore_check = coder->ignore_check; + // Check the memory usage limit. const uint64_t memusage = lzma_raw_decoder_memusage(filters); lzma_ret ret; @@ -366,7 +375,7 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator, static void -stream_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +stream_decoder_end(lzma_coder *coder, const lzma_allocator *allocator) { lzma_next_end(&coder->block_decoder, allocator); lzma_index_hash_end(coder->index_hash, allocator); @@ -401,7 +410,8 @@ stream_decoder_memconfig(lzma_coder *coder, uint64_t *memusage, extern lzma_ret -lzma_stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, +lzma_stream_decoder_init( + lzma_next_coder *next, const lzma_allocator *allocator, uint64_t memlimit, uint32_t flags) { lzma_next_coder_init(&lzma_stream_decoder_init, next, allocator); @@ -432,6 +442,7 @@ lzma_stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->coder->tell_unsupported_check = (flags & LZMA_TELL_UNSUPPORTED_CHECK) != 0; next->coder->tell_any_check = (flags & LZMA_TELL_ANY_CHECK) != 0; + next->coder->ignore_check = (flags & LZMA_IGNORE_CHECK) != 0; next->coder->concatenated = (flags & LZMA_CONCATENATED) != 0; next->coder->first_stream = true; diff --git a/src/liblzma/common/stream_decoder.h b/src/liblzma/common/stream_decoder.h index e54ac28f44a5..c13c6ba12706 100644 --- a/src/liblzma/common/stream_decoder.h +++ b/src/liblzma/common/stream_decoder.h @@ -15,7 +15,8 @@ #include "common.h" -extern lzma_ret lzma_stream_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, uint64_t memlimit, uint32_t flags); +extern lzma_ret lzma_stream_decoder_init( + lzma_next_coder *next, const lzma_allocator *allocator, + uint64_t memlimit, uint32_t flags); #endif diff --git a/src/liblzma/common/stream_encoder.c b/src/liblzma/common/stream_encoder.c index 97a7a23a8135..a7663bc48db3 100644 --- a/src/liblzma/common/stream_encoder.c +++ b/src/liblzma/common/stream_encoder.c @@ -10,7 +10,6 @@ // /////////////////////////////////////////////////////////////////////////////// -#include "stream_encoder.h" #include "block_encoder.h" #include "index_encoder.h" @@ -26,7 +25,7 @@ struct lzma_coder_s { } sequence; /// True if Block encoder has been initialized by - /// lzma_stream_encoder_init() or stream_encoder_update() + /// stream_encoder_init() or stream_encoder_update() /// and thus doesn't need to be initialized in stream_encode(). bool block_encoder_is_initialized; @@ -60,7 +59,7 @@ struct lzma_coder_s { static lzma_ret -block_encoder_init(lzma_coder *coder, lzma_allocator *allocator) +block_encoder_init(lzma_coder *coder, const lzma_allocator *allocator) { // Prepare the Block options. Even though Block encoder doesn't need // compressed_size, uncompressed_size, and header_size to be @@ -79,7 +78,7 @@ block_encoder_init(lzma_coder *coder, lzma_allocator *allocator) static lzma_ret -stream_encode(lzma_coder *coder, lzma_allocator *allocator, +stream_encode(lzma_coder *coder, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) @@ -126,7 +125,7 @@ stream_encode(lzma_coder *coder, lzma_allocator *allocator, } // Initialize the Block encoder unless it was already - // initialized by lzma_stream_encoder_init() or + // initialized by stream_encoder_init() or // stream_encoder_update(). if (!coder->block_encoder_is_initialized) return_if_error(block_encoder_init(coder, allocator)); @@ -147,11 +146,12 @@ stream_encode(lzma_coder *coder, lzma_allocator *allocator, } case SEQ_BLOCK_ENCODE: { - static const lzma_action convert[4] = { + static const lzma_action convert[LZMA_ACTION_MAX + 1] = { LZMA_RUN, LZMA_SYNC_FLUSH, LZMA_FINISH, LZMA_FINISH, + LZMA_FINISH, }; const lzma_ret ret = coder->block_encoder.code( @@ -209,7 +209,7 @@ stream_encode(lzma_coder *coder, lzma_allocator *allocator, static void -stream_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +stream_encoder_end(lzma_coder *coder, const lzma_allocator *allocator) { lzma_next_end(&coder->block_encoder, allocator); lzma_next_end(&coder->index_encoder, allocator); @@ -224,7 +224,7 @@ stream_encoder_end(lzma_coder *coder, lzma_allocator *allocator) static lzma_ret -stream_encoder_update(lzma_coder *coder, lzma_allocator *allocator, +stream_encoder_update(lzma_coder *coder, const lzma_allocator *allocator, const lzma_filter *filters, const lzma_filter *reversed_filters) { @@ -262,11 +262,11 @@ stream_encoder_update(lzma_coder *coder, lzma_allocator *allocator, } -extern lzma_ret -lzma_stream_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, +static lzma_ret +stream_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter *filters, lzma_check check) { - lzma_next_coder_init(&lzma_stream_encoder_init, next, allocator); + lzma_next_coder_init(&stream_encoder_init, next, allocator); if (filters == NULL) return LZMA_PROG_ERROR; @@ -320,11 +320,12 @@ extern LZMA_API(lzma_ret) lzma_stream_encoder(lzma_stream *strm, const lzma_filter *filters, lzma_check check) { - lzma_next_strm_init(lzma_stream_encoder_init, strm, filters, check); + lzma_next_strm_init(stream_encoder_init, strm, filters, check); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; strm->internal->supported_actions[LZMA_FULL_FLUSH] = true; + strm->internal->supported_actions[LZMA_FULL_BARRIER] = true; strm->internal->supported_actions[LZMA_FINISH] = true; return LZMA_OK; diff --git a/src/liblzma/common/stream_encoder_mt.c b/src/liblzma/common/stream_encoder_mt.c new file mode 100644 index 000000000000..9780ed04cac4 --- /dev/null +++ b/src/liblzma/common/stream_encoder_mt.c @@ -0,0 +1,1131 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_encoder_mt.c +/// \brief Multithreaded .xz Stream encoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "filter_encoder.h" +#include "easy_preset.h" +#include "block_encoder.h" +#include "block_buffer_encoder.h" +#include "index_encoder.h" +#include "outqueue.h" + + +/// Maximum supported block size. This makes it simpler to prevent integer +/// overflows if we are given unusually large block size. +#define BLOCK_SIZE_MAX (UINT64_MAX / LZMA_THREADS_MAX) + + +typedef enum { + /// Waiting for work. + THR_IDLE, + + /// Encoding is in progress. + THR_RUN, + + /// Encoding is in progress but no more input data will + /// be read. + THR_FINISH, + + /// The main thread wants the thread to stop whatever it was doing + /// but not exit. + THR_STOP, + + /// The main thread wants the thread to exit. We could use + /// cancellation but since there's stopped anyway, this is lazier. + THR_EXIT, + +} worker_state; + + +typedef struct worker_thread_s worker_thread; +struct worker_thread_s { + worker_state state; + + /// Input buffer of coder->block_size bytes. The main thread will + /// put new input into this and update in_size accordingly. Once + /// no more input is coming, state will be set to THR_FINISH. + uint8_t *in; + + /// Amount of data available in the input buffer. This is modified + /// only by the main thread. + size_t in_size; + + /// Output buffer for this thread. This is set by the main + /// thread every time a new Block is started with this thread + /// structure. + lzma_outbuf *outbuf; + + /// Pointer to the main structure is needed when putting this + /// thread back to the stack of free threads. + lzma_coder *coder; + + /// The allocator is set by the main thread. Since a copy of the + /// pointer is kept here, the application must not change the + /// allocator before calling lzma_end(). + const lzma_allocator *allocator; + + /// Amount of uncompressed data that has already been compressed. + uint64_t progress_in; + + /// Amount of compressed data that is ready. + uint64_t progress_out; + + /// Block encoder + lzma_next_coder block_encoder; + + /// Compression options for this Block + lzma_block block_options; + + /// Next structure in the stack of free worker threads. + worker_thread *next; + + mythread_mutex mutex; + mythread_cond cond; + + /// The ID of this thread is used to join the thread + /// when it's not needed anymore. + mythread thread_id; +}; + + +struct lzma_coder_s { + enum { + SEQ_STREAM_HEADER, + SEQ_BLOCK, + SEQ_INDEX, + SEQ_STREAM_FOOTER, + } sequence; + + /// Start a new Block every block_size bytes of input unless + /// LZMA_FULL_FLUSH or LZMA_FULL_BARRIER is used earlier. + size_t block_size; + + /// The filter chain currently in use + lzma_filter filters[LZMA_FILTERS_MAX + 1]; + + + /// Index to hold sizes of the Blocks + lzma_index *index; + + /// Index encoder + lzma_next_coder index_encoder; + + + /// Stream Flags for encoding the Stream Header and Stream Footer. + lzma_stream_flags stream_flags; + + /// Buffer to hold Stream Header and Stream Footer. + uint8_t header[LZMA_STREAM_HEADER_SIZE]; + + /// Read position in header[] + size_t header_pos; + + + /// Output buffer queue for compressed data + lzma_outq outq; + + + /// Maximum wait time if cannot use all the input and cannot + /// fill the output buffer. This is in milliseconds. + uint32_t timeout; + + + /// Error code from a worker thread + lzma_ret thread_error; + + /// Array of allocated thread-specific structures + worker_thread *threads; + + /// Number of structures in "threads" above. This is also the + /// number of threads that will be created at maximum. + uint32_t threads_max; + + /// Number of thread structures that have been initialized, and + /// thus the number of worker threads actually created so far. + uint32_t threads_initialized; + + /// Stack of free threads. When a thread finishes, it puts itself + /// back into this stack. This starts as empty because threads + /// are created only when actually needed. + worker_thread *threads_free; + + /// The most recent worker thread to which the main thread writes + /// the new input from the application. + worker_thread *thr; + + + /// Amount of uncompressed data in Blocks that have already + /// been finished. + uint64_t progress_in; + + /// Amount of compressed data in Stream Header + Blocks that + /// have already been finished. + uint64_t progress_out; + + + mythread_mutex mutex; + mythread_cond cond; +}; + + +/// Tell the main thread that something has gone wrong. +static void +worker_error(worker_thread *thr, lzma_ret ret) +{ + assert(ret != LZMA_OK); + assert(ret != LZMA_STREAM_END); + + mythread_sync(thr->coder->mutex) { + if (thr->coder->thread_error == LZMA_OK) + thr->coder->thread_error = ret; + + mythread_cond_signal(&thr->coder->cond); + } + + return; +} + + +static worker_state +worker_encode(worker_thread *thr, worker_state state) +{ + assert(thr->progress_in == 0); + assert(thr->progress_out == 0); + + // Set the Block options. + thr->block_options = (lzma_block){ + .version = 0, + .check = thr->coder->stream_flags.check, + .compressed_size = thr->coder->outq.buf_size_max, + .uncompressed_size = thr->coder->block_size, + + // TODO: To allow changing the filter chain, the filters + // array must be copied to each worker_thread. + .filters = thr->coder->filters, + }; + + // Calculate maximum size of the Block Header. This amount is + // reserved in the beginning of the buffer so that Block Header + // along with Compressed Size and Uncompressed Size can be + // written there. + lzma_ret ret = lzma_block_header_size(&thr->block_options); + if (ret != LZMA_OK) { + worker_error(thr, ret); + return THR_STOP; + } + + // Initialize the Block encoder. + ret = lzma_block_encoder_init(&thr->block_encoder, + thr->allocator, &thr->block_options); + if (ret != LZMA_OK) { + worker_error(thr, ret); + return THR_STOP; + } + + size_t in_pos = 0; + size_t in_size = 0; + + thr->outbuf->size = thr->block_options.header_size; + const size_t out_size = thr->coder->outq.buf_size_max; + + do { + mythread_sync(thr->mutex) { + // Store in_pos and out_pos into *thr so that + // an application may read them via + // lzma_get_progress() to get progress information. + // + // NOTE: These aren't updated when the encoding + // finishes. Instead, the final values are taken + // later from thr->outbuf. + thr->progress_in = in_pos; + thr->progress_out = thr->outbuf->size; + + while (in_size == thr->in_size + && thr->state == THR_RUN) + mythread_cond_wait(&thr->cond, &thr->mutex); + + state = thr->state; + in_size = thr->in_size; + } + + // Return if we were asked to stop or exit. + if (state >= THR_STOP) + return state; + + lzma_action action = state == THR_FINISH + ? LZMA_FINISH : LZMA_RUN; + + // Limit the amount of input given to the Block encoder + // at once. This way this thread can react fairly quickly + // if the main thread wants us to stop or exit. + static const size_t in_chunk_max = 16384; + size_t in_limit = in_size; + if (in_size - in_pos > in_chunk_max) { + in_limit = in_pos + in_chunk_max; + action = LZMA_RUN; + } + + ret = thr->block_encoder.code( + thr->block_encoder.coder, thr->allocator, + thr->in, &in_pos, in_limit, thr->outbuf->buf, + &thr->outbuf->size, out_size, action); + } while (ret == LZMA_OK && thr->outbuf->size < out_size); + + switch (ret) { + case LZMA_STREAM_END: + assert(state == THR_FINISH); + + // Encode the Block Header. By doing it after + // the compression, we can store the Compressed Size + // and Uncompressed Size fields. + ret = lzma_block_header_encode(&thr->block_options, + thr->outbuf->buf); + if (ret != LZMA_OK) { + worker_error(thr, ret); + return THR_STOP; + } + + break; + + case LZMA_OK: + // The data was incompressible. Encode it using uncompressed + // LZMA2 chunks. + // + // First wait that we have gotten all the input. + mythread_sync(thr->mutex) { + while (thr->state == THR_RUN) + mythread_cond_wait(&thr->cond, &thr->mutex); + + state = thr->state; + in_size = thr->in_size; + } + + if (state >= THR_STOP) + return state; + + // Do the encoding. This takes care of the Block Header too. + thr->outbuf->size = 0; + ret = lzma_block_uncomp_encode(&thr->block_options, + thr->in, in_size, thr->outbuf->buf, + &thr->outbuf->size, out_size); + + // It shouldn't fail. + if (ret != LZMA_OK) { + worker_error(thr, LZMA_PROG_ERROR); + return THR_STOP; + } + + break; + + default: + worker_error(thr, ret); + return THR_STOP; + } + + // Set the size information that will be read by the main thread + // to write the Index field. + thr->outbuf->unpadded_size + = lzma_block_unpadded_size(&thr->block_options); + assert(thr->outbuf->unpadded_size != 0); + thr->outbuf->uncompressed_size = thr->block_options.uncompressed_size; + + return THR_FINISH; +} + + +static MYTHREAD_RET_TYPE +worker_start(void *thr_ptr) +{ + worker_thread *thr = thr_ptr; + worker_state state = THR_IDLE; // Init to silence a warning + + while (true) { + // Wait for work. + mythread_sync(thr->mutex) { + while (true) { + // The thread is already idle so if we are + // requested to stop, just set the state. + if (thr->state == THR_STOP) { + thr->state = THR_IDLE; + mythread_cond_signal(&thr->cond); + } + + state = thr->state; + if (state != THR_IDLE) + break; + + mythread_cond_wait(&thr->cond, &thr->mutex); + } + } + + assert(state != THR_IDLE); + assert(state != THR_STOP); + + if (state <= THR_FINISH) + state = worker_encode(thr, state); + + if (state == THR_EXIT) + break; + + // Mark the thread as idle unless the main thread has + // told us to exit. Signal is needed for the case + // where the main thread is waiting for the threads to stop. + mythread_sync(thr->mutex) { + if (thr->state != THR_EXIT) { + thr->state = THR_IDLE; + mythread_cond_signal(&thr->cond); + } + } + + mythread_sync(thr->coder->mutex) { + // Mark the output buffer as finished if + // no errors occurred. + thr->outbuf->finished = state == THR_FINISH; + + // Update the main progress info. + thr->coder->progress_in + += thr->outbuf->uncompressed_size; + thr->coder->progress_out += thr->outbuf->size; + thr->progress_in = 0; + thr->progress_out = 0; + + // Return this thread to the stack of free threads. + thr->next = thr->coder->threads_free; + thr->coder->threads_free = thr; + + mythread_cond_signal(&thr->coder->cond); + } + } + + // Exiting, free the resources. + mythread_mutex_destroy(&thr->mutex); + mythread_cond_destroy(&thr->cond); + + lzma_next_end(&thr->block_encoder, thr->allocator); + lzma_free(thr->in, thr->allocator); + return MYTHREAD_RET_VALUE; +} + + +/// Make the threads stop but not exit. Optionally wait for them to stop. +static void +threads_stop(lzma_coder *coder, bool wait_for_threads) +{ + // Tell the threads to stop. + for (uint32_t i = 0; i < coder->threads_initialized; ++i) { + mythread_sync(coder->threads[i].mutex) { + coder->threads[i].state = THR_STOP; + mythread_cond_signal(&coder->threads[i].cond); + } + } + + if (!wait_for_threads) + return; + + // Wait for the threads to settle in the idle state. + for (uint32_t i = 0; i < coder->threads_initialized; ++i) { + mythread_sync(coder->threads[i].mutex) { + while (coder->threads[i].state != THR_IDLE) + mythread_cond_wait(&coder->threads[i].cond, + &coder->threads[i].mutex); + } + } + + return; +} + + +/// Stop the threads and free the resources associated with them. +/// Wait until the threads have exited. +static void +threads_end(lzma_coder *coder, const lzma_allocator *allocator) +{ + for (uint32_t i = 0; i < coder->threads_initialized; ++i) { + mythread_sync(coder->threads[i].mutex) { + coder->threads[i].state = THR_EXIT; + mythread_cond_signal(&coder->threads[i].cond); + } + } + + for (uint32_t i = 0; i < coder->threads_initialized; ++i) { + int ret = mythread_join(coder->threads[i].thread_id); + assert(ret == 0); + (void)ret; + } + + lzma_free(coder->threads, allocator); + return; +} + + +/// Initialize a new worker_thread structure and create a new thread. +static lzma_ret +initialize_new_thread(lzma_coder *coder, const lzma_allocator *allocator) +{ + worker_thread *thr = &coder->threads[coder->threads_initialized]; + + thr->in = lzma_alloc(coder->block_size, allocator); + if (thr->in == NULL) + return LZMA_MEM_ERROR; + + if (mythread_mutex_init(&thr->mutex)) + goto error_mutex; + + if (mythread_cond_init(&thr->cond)) + goto error_cond; + + thr->state = THR_IDLE; + thr->allocator = allocator; + thr->coder = coder; + thr->progress_in = 0; + thr->progress_out = 0; + thr->block_encoder = LZMA_NEXT_CODER_INIT; + + if (mythread_create(&thr->thread_id, &worker_start, thr)) + goto error_thread; + + ++coder->threads_initialized; + coder->thr = thr; + + return LZMA_OK; + +error_thread: + mythread_cond_destroy(&thr->cond); + +error_cond: + mythread_mutex_destroy(&thr->mutex); + +error_mutex: + lzma_free(thr->in, allocator); + return LZMA_MEM_ERROR; +} + + +static lzma_ret +get_thread(lzma_coder *coder, const lzma_allocator *allocator) +{ + // If there are no free output subqueues, there is no + // point to try getting a thread. + if (!lzma_outq_has_buf(&coder->outq)) + return LZMA_OK; + + // If there is a free structure on the stack, use it. + mythread_sync(coder->mutex) { + if (coder->threads_free != NULL) { + coder->thr = coder->threads_free; + coder->threads_free = coder->threads_free->next; + } + } + + if (coder->thr == NULL) { + // If there are no uninitialized structures left, return. + if (coder->threads_initialized == coder->threads_max) + return LZMA_OK; + + // Initialize a new thread. + return_if_error(initialize_new_thread(coder, allocator)); + } + + // Reset the parts of the thread state that have to be done + // in the main thread. + mythread_sync(coder->thr->mutex) { + coder->thr->state = THR_RUN; + coder->thr->in_size = 0; + coder->thr->outbuf = lzma_outq_get_buf(&coder->outq); + mythread_cond_signal(&coder->thr->cond); + } + + return LZMA_OK; +} + + +static lzma_ret +stream_encode_in(lzma_coder *coder, const lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, lzma_action action) +{ + while (*in_pos < in_size + || (coder->thr != NULL && action != LZMA_RUN)) { + if (coder->thr == NULL) { + // Get a new thread. + const lzma_ret ret = get_thread(coder, allocator); + if (coder->thr == NULL) + return ret; + } + + // Copy the input data to thread's buffer. + size_t thr_in_size = coder->thr->in_size; + lzma_bufcpy(in, in_pos, in_size, coder->thr->in, + &thr_in_size, coder->block_size); + + // Tell the Block encoder to finish if + // - it has got block_size bytes of input; or + // - all input was used and LZMA_FINISH, LZMA_FULL_FLUSH, + // or LZMA_FULL_BARRIER was used. + // + // TODO: LZMA_SYNC_FLUSH and LZMA_SYNC_BARRIER. + const bool finish = thr_in_size == coder->block_size + || (*in_pos == in_size && action != LZMA_RUN); + + bool block_error = false; + + mythread_sync(coder->thr->mutex) { + if (coder->thr->state == THR_IDLE) { + // Something has gone wrong with the Block + // encoder. It has set coder->thread_error + // which we will read a few lines later. + block_error = true; + } else { + // Tell the Block encoder its new amount + // of input and update the state if needed. + coder->thr->in_size = thr_in_size; + + if (finish) + coder->thr->state = THR_FINISH; + + mythread_cond_signal(&coder->thr->cond); + } + } + + if (block_error) { + lzma_ret ret; + + mythread_sync(coder->mutex) { + ret = coder->thread_error; + } + + return ret; + } + + if (finish) + coder->thr = NULL; + } + + return LZMA_OK; +} + + +/// Wait until more input can be consumed, more output can be read, or +/// an optional timeout is reached. +static bool +wait_for_work(lzma_coder *coder, mythread_condtime *wait_abs, + bool *has_blocked, bool has_input) +{ + if (coder->timeout != 0 && !*has_blocked) { + // Every time when stream_encode_mt() is called via + // lzma_code(), *has_blocked starts as false. We set it + // to true here and calculate the absolute time when + // we must return if there's nothing to do. + // + // The idea of *has_blocked is to avoid unneeded calls + // to mythread_condtime_set(), which may do a syscall + // depending on the operating system. + *has_blocked = true; + mythread_condtime_set(wait_abs, &coder->cond, coder->timeout); + } + + bool timed_out = false; + + mythread_sync(coder->mutex) { + // There are four things that we wait. If one of them + // becomes possible, we return. + // - If there is input left, we need to get a free + // worker thread and an output buffer for it. + // - Data ready to be read from the output queue. + // - A worker thread indicates an error. + // - Time out occurs. + while ((!has_input || coder->threads_free == NULL + || !lzma_outq_has_buf(&coder->outq)) + && !lzma_outq_is_readable(&coder->outq) + && coder->thread_error == LZMA_OK + && !timed_out) { + if (coder->timeout != 0) + timed_out = mythread_cond_timedwait( + &coder->cond, &coder->mutex, + wait_abs) != 0; + else + mythread_cond_wait(&coder->cond, + &coder->mutex); + } + } + + return timed_out; +} + + +static lzma_ret +stream_encode_mt(lzma_coder *coder, const lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + switch (coder->sequence) { + case SEQ_STREAM_HEADER: + lzma_bufcpy(coder->header, &coder->header_pos, + sizeof(coder->header), + out, out_pos, out_size); + if (coder->header_pos < sizeof(coder->header)) + return LZMA_OK; + + coder->header_pos = 0; + coder->sequence = SEQ_BLOCK; + + // Fall through + + case SEQ_BLOCK: { + // Initialized to silence warnings. + lzma_vli unpadded_size = 0; + lzma_vli uncompressed_size = 0; + lzma_ret ret = LZMA_OK; + + // These are for wait_for_work(). + bool has_blocked = false; + mythread_condtime wait_abs; + + while (true) { + mythread_sync(coder->mutex) { + // Check for Block encoder errors. + ret = coder->thread_error; + if (ret != LZMA_OK) { + assert(ret != LZMA_STREAM_END); + break; + } + + // Try to read compressed data to out[]. + ret = lzma_outq_read(&coder->outq, + out, out_pos, out_size, + &unpadded_size, + &uncompressed_size); + } + + if (ret == LZMA_STREAM_END) { + // End of Block. Add it to the Index. + ret = lzma_index_append(coder->index, + allocator, unpadded_size, + uncompressed_size); + + // If we didn't fill the output buffer yet, + // try to read more data. Maybe the next + // outbuf has been finished already too. + if (*out_pos < out_size) + continue; + } + + if (ret != LZMA_OK) { + // coder->thread_error was set or + // lzma_index_append() failed. + threads_stop(coder, false); + return ret; + } + + // Try to give uncompressed data to a worker thread. + ret = stream_encode_in(coder, allocator, + in, in_pos, in_size, action); + if (ret != LZMA_OK) { + threads_stop(coder, false); + return ret; + } + + // See if we should wait or return. + // + // TODO: LZMA_SYNC_FLUSH and LZMA_SYNC_BARRIER. + if (*in_pos == in_size) { + // LZMA_RUN: More data is probably coming + // so return to let the caller fill the + // input buffer. + if (action == LZMA_RUN) + return LZMA_OK; + + // LZMA_FULL_BARRIER: The same as with + // LZMA_RUN but tell the caller that the + // barrier was completed. + if (action == LZMA_FULL_BARRIER) + return LZMA_STREAM_END; + + // Finishing or flushing isn't completed until + // all input data has been encoded and copied + // to the output buffer. + if (lzma_outq_is_empty(&coder->outq)) { + // LZMA_FINISH: Continue to encode + // the Index field. + if (action == LZMA_FINISH) + break; + + // LZMA_FULL_FLUSH: Return to tell + // the caller that flushing was + // completed. + if (action == LZMA_FULL_FLUSH) + return LZMA_STREAM_END; + } + } + + // Return if there is no output space left. + // This check must be done after testing the input + // buffer, because we might want to use a different + // return code. + if (*out_pos == out_size) + return LZMA_OK; + + // Neither in nor out has been used completely. + // Wait until there's something we can do. + if (wait_for_work(coder, &wait_abs, &has_blocked, + *in_pos < in_size)) + return LZMA_TIMED_OUT; + } + + // All Blocks have been encoded and the threads have stopped. + // Prepare to encode the Index field. + return_if_error(lzma_index_encoder_init( + &coder->index_encoder, allocator, + coder->index)); + coder->sequence = SEQ_INDEX; + + // Update the progress info to take the Index and + // Stream Footer into account. Those are very fast to encode + // so in terms of progress information they can be thought + // to be ready to be copied out. + coder->progress_out += lzma_index_size(coder->index) + + LZMA_STREAM_HEADER_SIZE; + } + + // Fall through + + case SEQ_INDEX: { + // Call the Index encoder. It doesn't take any input, so + // those pointers can be NULL. + const lzma_ret ret = coder->index_encoder.code( + coder->index_encoder.coder, allocator, + NULL, NULL, 0, + out, out_pos, out_size, LZMA_RUN); + if (ret != LZMA_STREAM_END) + return ret; + + // Encode the Stream Footer into coder->buffer. + coder->stream_flags.backward_size + = lzma_index_size(coder->index); + if (lzma_stream_footer_encode(&coder->stream_flags, + coder->header) != LZMA_OK) + return LZMA_PROG_ERROR; + + coder->sequence = SEQ_STREAM_FOOTER; + } + + // Fall through + + case SEQ_STREAM_FOOTER: + lzma_bufcpy(coder->header, &coder->header_pos, + sizeof(coder->header), + out, out_pos, out_size); + return coder->header_pos < sizeof(coder->header) + ? LZMA_OK : LZMA_STREAM_END; + } + + assert(0); + return LZMA_PROG_ERROR; +} + + +static void +stream_encoder_mt_end(lzma_coder *coder, const lzma_allocator *allocator) +{ + // Threads must be killed before the output queue can be freed. + threads_end(coder, allocator); + lzma_outq_end(&coder->outq, allocator); + + for (size_t i = 0; coder->filters[i].id != LZMA_VLI_UNKNOWN; ++i) + lzma_free(coder->filters[i].options, allocator); + + lzma_next_end(&coder->index_encoder, allocator); + lzma_index_end(coder->index, allocator); + + mythread_cond_destroy(&coder->cond); + mythread_mutex_destroy(&coder->mutex); + + lzma_free(coder, allocator); + return; +} + + +/// Options handling for lzma_stream_encoder_mt_init() and +/// lzma_stream_encoder_mt_memusage() +static lzma_ret +get_options(const lzma_mt *options, lzma_options_easy *opt_easy, + const lzma_filter **filters, uint64_t *block_size, + uint64_t *outbuf_size_max) +{ + // Validate some of the options. + if (options == NULL) + return LZMA_PROG_ERROR; + + if (options->flags != 0 || options->threads == 0 + || options->threads > LZMA_THREADS_MAX) + return LZMA_OPTIONS_ERROR; + + if (options->filters != NULL) { + // Filter chain was given, use it as is. + *filters = options->filters; + } else { + // Use a preset. + if (lzma_easy_preset(opt_easy, options->preset)) + return LZMA_OPTIONS_ERROR; + + *filters = opt_easy->filters; + } + + // Block size + if (options->block_size > 0) { + if (options->block_size > BLOCK_SIZE_MAX) + return LZMA_OPTIONS_ERROR; + + *block_size = options->block_size; + } else { + // Determine the Block size from the filter chain. + *block_size = lzma_mt_block_size(*filters); + if (*block_size == 0) + return LZMA_OPTIONS_ERROR; + + assert(*block_size <= BLOCK_SIZE_MAX); + } + + // Calculate the maximum amount output that a single output buffer + // may need to hold. This is the same as the maximum total size of + // a Block. + *outbuf_size_max = lzma_block_buffer_bound64(*block_size); + if (*outbuf_size_max == 0) + return LZMA_MEM_ERROR; + + return LZMA_OK; +} + + +static void +get_progress(lzma_coder *coder, uint64_t *progress_in, uint64_t *progress_out) +{ + // Lock coder->mutex to prevent finishing threads from moving their + // progress info from the worker_thread structure to lzma_coder. + mythread_sync(coder->mutex) { + *progress_in = coder->progress_in; + *progress_out = coder->progress_out; + + for (size_t i = 0; i < coder->threads_initialized; ++i) { + mythread_sync(coder->threads[i].mutex) { + *progress_in += coder->threads[i].progress_in; + *progress_out += coder->threads[i] + .progress_out; + } + } + } + + return; +} + + +static lzma_ret +stream_encoder_mt_init(lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_mt *options) +{ + lzma_next_coder_init(&stream_encoder_mt_init, next, allocator); + + // Get the filter chain. + lzma_options_easy easy; + const lzma_filter *filters; + uint64_t block_size; + uint64_t outbuf_size_max; + return_if_error(get_options(options, &easy, &filters, + &block_size, &outbuf_size_max)); + +#if SIZE_MAX < UINT64_MAX + if (block_size > SIZE_MAX) + return LZMA_MEM_ERROR; +#endif + + // Validate the filter chain so that we can give an error in this + // function instead of delaying it to the first call to lzma_code(). + // The memory usage calculation verifies the filter chain as + // a side effect so we take advatange of that. + if (lzma_raw_encoder_memusage(filters) == UINT64_MAX) + return LZMA_OPTIONS_ERROR; + + // Validate the Check ID. + if ((unsigned int)(options->check) > LZMA_CHECK_ID_MAX) + return LZMA_PROG_ERROR; + + if (!lzma_check_is_supported(options->check)) + return LZMA_UNSUPPORTED_CHECK; + + // Allocate and initialize the base structure if needed. + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + // For the mutex and condition variable initializations + // the error handling has to be done here because + // stream_encoder_mt_end() doesn't know if they have + // already been initialized or not. + if (mythread_mutex_init(&next->coder->mutex)) { + lzma_free(next->coder, allocator); + next->coder = NULL; + return LZMA_MEM_ERROR; + } + + if (mythread_cond_init(&next->coder->cond)) { + mythread_mutex_destroy(&next->coder->mutex); + lzma_free(next->coder, allocator); + next->coder = NULL; + return LZMA_MEM_ERROR; + } + + next->code = &stream_encode_mt; + next->end = &stream_encoder_mt_end; + next->get_progress = &get_progress; +// next->update = &stream_encoder_mt_update; + + next->coder->filters[0].id = LZMA_VLI_UNKNOWN; + next->coder->index_encoder = LZMA_NEXT_CODER_INIT; + next->coder->index = NULL; + memzero(&next->coder->outq, sizeof(next->coder->outq)); + next->coder->threads = NULL; + next->coder->threads_max = 0; + next->coder->threads_initialized = 0; + } + + // Basic initializations + next->coder->sequence = SEQ_STREAM_HEADER; + next->coder->block_size = (size_t)(block_size); + next->coder->thread_error = LZMA_OK; + next->coder->thr = NULL; + + // Allocate the thread-specific base structures. + assert(options->threads > 0); + if (next->coder->threads_max != options->threads) { + threads_end(next->coder, allocator); + + next->coder->threads = NULL; + next->coder->threads_max = 0; + + next->coder->threads_initialized = 0; + next->coder->threads_free = NULL; + + next->coder->threads = lzma_alloc( + options->threads * sizeof(worker_thread), + allocator); + if (next->coder->threads == NULL) + return LZMA_MEM_ERROR; + + next->coder->threads_max = options->threads; + } else { + // Reuse the old structures and threads. Tell the running + // threads to stop and wait until they have stopped. + threads_stop(next->coder, true); + } + + // Output queue + return_if_error(lzma_outq_init(&next->coder->outq, allocator, + outbuf_size_max, options->threads)); + + // Timeout + next->coder->timeout = options->timeout; + + // Free the old filter chain and copy the new one. + for (size_t i = 0; next->coder->filters[i].id != LZMA_VLI_UNKNOWN; ++i) + lzma_free(next->coder->filters[i].options, allocator); + + return_if_error(lzma_filters_copy( + filters, next->coder->filters, allocator)); + + // Index + lzma_index_end(next->coder->index, allocator); + next->coder->index = lzma_index_init(allocator); + if (next->coder->index == NULL) + return LZMA_MEM_ERROR; + + // Stream Header + next->coder->stream_flags.version = 0; + next->coder->stream_flags.check = options->check; + return_if_error(lzma_stream_header_encode( + &next->coder->stream_flags, next->coder->header)); + + next->coder->header_pos = 0; + + // Progress info + next->coder->progress_in = 0; + next->coder->progress_out = LZMA_STREAM_HEADER_SIZE; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_stream_encoder_mt(lzma_stream *strm, const lzma_mt *options) +{ + lzma_next_strm_init(stream_encoder_mt_init, strm, options); + + strm->internal->supported_actions[LZMA_RUN] = true; +// strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; + strm->internal->supported_actions[LZMA_FULL_FLUSH] = true; + strm->internal->supported_actions[LZMA_FULL_BARRIER] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} + + +// This function name is a monster but it's consistent with the older +// monster names. :-( 31 chars is the max that C99 requires so in that +// sense it's not too long. ;-) +extern LZMA_API(uint64_t) +lzma_stream_encoder_mt_memusage(const lzma_mt *options) +{ + lzma_options_easy easy; + const lzma_filter *filters; + uint64_t block_size; + uint64_t outbuf_size_max; + + if (get_options(options, &easy, &filters, &block_size, + &outbuf_size_max) != LZMA_OK) + return UINT64_MAX; + + // Memory usage of the input buffers + const uint64_t inbuf_memusage = options->threads * block_size; + + // Memory usage of the filter encoders + uint64_t filters_memusage = lzma_raw_encoder_memusage(filters); + if (filters_memusage == UINT64_MAX) + return UINT64_MAX; + + filters_memusage *= options->threads; + + // Memory usage of the output queue + const uint64_t outq_memusage = lzma_outq_memusage( + outbuf_size_max, options->threads); + if (outq_memusage == UINT64_MAX) + return UINT64_MAX; + + // Sum them with overflow checking. + uint64_t total_memusage = LZMA_MEMUSAGE_BASE + sizeof(lzma_coder) + + options->threads * sizeof(worker_thread); + + if (UINT64_MAX - total_memusage < inbuf_memusage) + return UINT64_MAX; + + total_memusage += inbuf_memusage; + + if (UINT64_MAX - total_memusage < filters_memusage) + return UINT64_MAX; + + total_memusage += filters_memusage; + + if (UINT64_MAX - total_memusage < outq_memusage) + return UINT64_MAX; + + return total_memusage + outq_memusage; +} |