diff options
Diffstat (limited to 'contrib/xz/src/liblzma/lzma/lzma_encoder.c')
-rw-r--r-- | contrib/xz/src/liblzma/lzma/lzma_encoder.c | 201 |
1 files changed, 154 insertions, 47 deletions
diff --git a/contrib/xz/src/liblzma/lzma/lzma_encoder.c b/contrib/xz/src/liblzma/lzma/lzma_encoder.c index 07d2b87bc65b..559c63eda1d2 100644 --- a/contrib/xz/src/liblzma/lzma/lzma_encoder.c +++ b/contrib/xz/src/liblzma/lzma/lzma_encoder.c @@ -268,6 +268,7 @@ static bool encode_init(lzma_lzma1_encoder *coder, lzma_mf *mf) { assert(mf_position(mf) == 0); + assert(coder->uncomp_size == 0); if (mf->read_pos == mf->read_limit) { if (mf->action == LZMA_RUN) @@ -283,6 +284,7 @@ encode_init(lzma_lzma1_encoder *coder, lzma_mf *mf) mf->read_ahead = 0; rc_bit(&coder->rc, &coder->is_match[0][0], 0); rc_bittree(&coder->rc, coder->literal[0], 8, mf->buffer[0]); + ++coder->uncomp_size; } // Initialization is done (except if empty file). @@ -317,21 +319,28 @@ lzma_lzma_encode(lzma_lzma1_encoder *restrict coder, lzma_mf *restrict mf, if (!coder->is_initialized && !encode_init(coder, mf)) return LZMA_OK; - // Get the lowest bits of the uncompressed offset from the LZ layer. - uint32_t position = mf_position(mf); + // Encode pending output bytes from the range encoder. + // At the start of the stream, encode_init() encodes one literal. + // Later there can be pending output only with LZMA1 because LZMA2 + // ensures that there is always enough output space. Thus when using + // LZMA2, rc_encode() calls in this function will always return false. + if (rc_encode(&coder->rc, out, out_pos, out_size)) { + // We don't get here with LZMA2. + assert(limit == UINT32_MAX); + return LZMA_OK; + } - while (true) { - // Encode pending bits, if any. Calling this before encoding - // the next symbol is needed only with plain LZMA, since - // LZMA2 always provides big enough buffer to flush - // everything out from the range encoder. For the same reason, - // rc_encode() never returns true when this function is used - // as part of LZMA2 encoder. - if (rc_encode(&coder->rc, out, out_pos, out_size)) { - assert(limit == UINT32_MAX); - return LZMA_OK; - } + // If the range encoder was flushed in an earlier call to this + // function but there wasn't enough output buffer space, those + // bytes would have now been encoded by the above rc_encode() call + // and the stream has now been finished. This can only happen with + // LZMA1 as LZMA2 always provides enough output buffer space. + if (coder->is_flushed) { + assert(limit == UINT32_MAX); + return LZMA_STREAM_END; + } + while (true) { // With LZMA2 we need to take care that compressed size of // a chunk doesn't get too big. // FIXME? Check if this could be improved. @@ -365,37 +374,64 @@ lzma_lzma_encode(lzma_lzma1_encoder *restrict coder, lzma_mf *restrict mf, if (coder->fast_mode) lzma_lzma_optimum_fast(coder, mf, &back, &len); else - lzma_lzma_optimum_normal( - coder, mf, &back, &len, position); - - encode_symbol(coder, mf, back, len, position); + lzma_lzma_optimum_normal(coder, mf, &back, &len, + (uint32_t)(coder->uncomp_size)); + + encode_symbol(coder, mf, back, len, + (uint32_t)(coder->uncomp_size)); + + // If output size limiting is active (out_limit != 0), check + // if encoding this LZMA symbol would make the output size + // exceed the specified limit. + if (coder->out_limit != 0 && rc_encode_dummy( + &coder->rc, coder->out_limit)) { + // The most recent LZMA symbol would make the output + // too big. Throw it away. + rc_forget(&coder->rc); + + // FIXME: Tell the LZ layer to not read more input as + // it would be waste of time. This doesn't matter if + // output-size-limited encoding is done with a single + // call though. - position += len; - } - - if (!coder->is_flushed) { - coder->is_flushed = true; - - // We don't support encoding plain LZMA streams without EOPM, - // and LZMA2 doesn't use EOPM at LZMA level. - if (limit == UINT32_MAX) - encode_eopm(coder, position); + break; + } - // Flush the remaining bytes from the range encoder. - rc_flush(&coder->rc); + // This symbol will be encoded so update the uncompressed size. + coder->uncomp_size += len; - // Copy the remaining bytes to the output buffer. If there - // isn't enough output space, we will copy out the remaining - // bytes on the next call to this function by using - // the rc_encode() call in the encoding loop above. + // Encode the LZMA symbol. if (rc_encode(&coder->rc, out, out_pos, out_size)) { + // Once again, this can only happen with LZMA1. assert(limit == UINT32_MAX); return LZMA_OK; } } - // Make it ready for the next LZMA2 chunk. - coder->is_flushed = false; + // Make the uncompressed size available to the application. + if (coder->uncomp_size_ptr != NULL) + *coder->uncomp_size_ptr = coder->uncomp_size; + + // LZMA2 doesn't use EOPM at LZMA level. + // + // Plain LZMA streams without EOPM aren't supported except when + // output size limiting is enabled. + if (coder->use_eopm) + encode_eopm(coder, (uint32_t)(coder->uncomp_size)); + + // Flush the remaining bytes from the range encoder. + rc_flush(&coder->rc); + + // Copy the remaining bytes to the output buffer. If there + // isn't enough output space, we will copy out the remaining + // bytes on the next call to this function. + if (rc_encode(&coder->rc, out, out_pos, out_size)) { + // This cannot happen with LZMA2. + assert(limit == UINT32_MAX); + + coder->is_flushed = true; + return LZMA_OK; + } return LZMA_STREAM_END; } @@ -414,6 +450,23 @@ lzma_encode(void *coder, lzma_mf *restrict mf, } +static lzma_ret +lzma_lzma_set_out_limit( + void *coder_ptr, uint64_t *uncomp_size, uint64_t out_limit) +{ + // Minimum output size is 5 bytes but that cannot hold any output + // so we use 6 bytes. + if (out_limit < 6) + return LZMA_BUF_ERROR; + + lzma_lzma1_encoder *coder = coder_ptr; + coder->out_limit = out_limit; + coder->uncomp_size_ptr = uncomp_size; + coder->use_eopm = false; + return LZMA_OK; +} + + //////////////////// // Initialization // //////////////////// @@ -440,7 +493,8 @@ set_lz_options(lzma_lz_options *lz_options, const lzma_options_lzma *options) lz_options->dict_size = options->dict_size; lz_options->after_size = LOOP_INPUT_MAX; lz_options->match_len_max = MATCH_LEN_MAX; - lz_options->nice_len = options->nice_len; + lz_options->nice_len = my_max(mf_get_hash_bytes(options->mf), + options->nice_len); lz_options->match_finder = options->mf; lz_options->depth = options->depth; lz_options->preset_dict = options->preset_dict; @@ -546,10 +600,13 @@ lzma_lzma_encoder_reset(lzma_lzma1_encoder *coder, extern lzma_ret -lzma_lzma_encoder_create(void **coder_ptr, - const lzma_allocator *allocator, - const lzma_options_lzma *options, lzma_lz_options *lz_options) +lzma_lzma_encoder_create(void **coder_ptr, const lzma_allocator *allocator, + lzma_vli id, const lzma_options_lzma *options, + lzma_lz_options *lz_options) { + assert(id == LZMA_FILTER_LZMA1 || id == LZMA_FILTER_LZMA1EXT + || id == LZMA_FILTER_LZMA2); + // Allocate lzma_lzma1_encoder if it wasn't already allocated. if (*coder_ptr == NULL) { *coder_ptr = lzma_alloc(sizeof(lzma_lzma1_encoder), allocator); @@ -559,10 +616,9 @@ lzma_lzma_encoder_create(void **coder_ptr, lzma_lzma1_encoder *coder = *coder_ptr; - // Set compression mode. We haven't validates the options yet, - // but it's OK here, since nothing bad happens with invalid - // options in the code below, and they will get rejected by - // lzma_lzma_encoder_reset() call at the end of this function. + // Set compression mode. Note that we haven't validated the options + // yet. Invalid options will get rejected by lzma_lzma_encoder_reset() + // call at the end of this function. switch (options->mode) { case LZMA_MODE_FAST: coder->fast_mode = true; @@ -573,6 +629,18 @@ lzma_lzma_encoder_create(void **coder_ptr, // Set dist_table_size. // Round the dictionary size up to next 2^n. + // + // Currently the maximum encoder dictionary size + // is 1.5 GiB due to lz_encoder.c and here we need + // to be below 2 GiB to make the rounded up value + // fit in an uint32_t and avoid an infinite while-loop + // (and undefined behavior due to a too large shift). + // So do the same check as in LZ encoder, + // limiting to 1.5 GiB. + if (options->dict_size > (UINT32_C(1) << 30) + + (UINT32_C(1) << 29)) + return LZMA_OPTIONS_ERROR; + uint32_t log_size = 0; while ((UINT32_C(1) << log_size) < options->dict_size) ++log_size; @@ -580,10 +648,14 @@ lzma_lzma_encoder_create(void **coder_ptr, coder->dist_table_size = log_size * 2; // Length encoders' price table size + const uint32_t nice_len = my_max( + mf_get_hash_bytes(options->mf), + options->nice_len); + coder->match_len_encoder.table_size - = options->nice_len + 1 - MATCH_LEN_MIN; + = nice_len + 1 - MATCH_LEN_MIN; coder->rep_len_encoder.table_size - = options->nice_len + 1 - MATCH_LEN_MIN; + = nice_len + 1 - MATCH_LEN_MIN; break; } @@ -598,6 +670,37 @@ lzma_lzma_encoder_create(void **coder_ptr, coder->is_initialized = options->preset_dict != NULL && options->preset_dict_size > 0; coder->is_flushed = false; + coder->uncomp_size = 0; + coder->uncomp_size_ptr = NULL; + + // Output size limiting is disabled by default. + coder->out_limit = 0; + + // Determine if end marker is wanted: + // - It is never used with LZMA2. + // - It is always used with LZMA_FILTER_LZMA1 (unless + // lzma_lzma_set_out_limit() is called later). + // - LZMA_FILTER_LZMA1EXT has a flag for it in the options. + coder->use_eopm = (id == LZMA_FILTER_LZMA1); + if (id == LZMA_FILTER_LZMA1EXT) { + // Check if unsupported flags are present. + if (options->ext_flags & ~LZMA_LZMA1EXT_ALLOW_EOPM) + return LZMA_OPTIONS_ERROR; + + coder->use_eopm = (options->ext_flags + & LZMA_LZMA1EXT_ALLOW_EOPM) != 0; + + // TODO? As long as there are no filters that change the size + // of the data, it is enough to look at lzma_stream.total_in + // after encoding has been finished to know the uncompressed + // size of the LZMA1 stream. But in the future there could be + // filters that change the size of the data and then total_in + // doesn't work as the LZMA1 stream size might be different + // due to another filter in the chain. The problem is simple + // to solve: Add another flag to ext_flags and then set + // coder->uncomp_size_ptr to the address stored in + // lzma_options_lzma.reserved_ptr2 (or _ptr1). + } set_lz_options(lz_options, options); @@ -607,11 +710,12 @@ lzma_lzma_encoder_create(void **coder_ptr, static lzma_ret lzma_encoder_init(lzma_lz_encoder *lz, const lzma_allocator *allocator, - const void *options, lzma_lz_options *lz_options) + lzma_vli id, const void *options, lzma_lz_options *lz_options) { lz->code = &lzma_encode; + lz->set_out_limit = &lzma_lzma_set_out_limit; return lzma_lzma_encoder_create( - &lz->coder, allocator, options, lz_options); + &lz->coder, allocator, id, options, lz_options); } @@ -658,6 +762,9 @@ lzma_lzma_lclppb_encode(const lzma_options_lzma *options, uint8_t *byte) extern lzma_ret lzma_lzma_props_encode(const void *options, uint8_t *out) { + if (options == NULL) + return LZMA_PROG_ERROR; + const lzma_options_lzma *const opt = options; if (lzma_lzma_lclppb_encode(opt, out)) |