src/liblzma/lz/lz_decoder.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235

///////////////////////////////////////////////////////////////////////////////
//
/// \file       lz_decoder.h
/// \brief      LZ out window
///
//  Authors:    Igor Pavlov
//              Lasse Collin
//
//  This file has been put into the public domain.
//  You can do whatever you want with this file.
//
///////////////////////////////////////////////////////////////////////////////

#ifndef LZMA_LZ_DECODER_H
#define LZMA_LZ_DECODER_H

#include "common.h"


typedef struct {
	/// Pointer to the dictionary buffer. It can be an allocated buffer
	/// internal to liblzma, or it can a be a buffer given by the
	/// application when in single-call mode (not implemented yet).
	uint8_t *buf;

	/// Write position in dictionary. The next byte will be written to
	/// buf[pos].
	size_t pos;

	/// Indicates how full the dictionary is. This is used by
	/// dict_is_distance_valid() to detect corrupt files that would
	/// read beyond the beginning of the dictionary.
	size_t full;

	/// Write limit
	size_t limit;

	/// Size of the dictionary
	size_t size;

	/// True when dictionary should be reset before decoding more data.
	bool need_reset;

} lzma_dict;


typedef struct {
	size_t dict_size;
	const uint8_t *preset_dict;
	size_t preset_dict_size;
} lzma_lz_options;


typedef struct {
	/// Data specific to the LZ-based decoder
	lzma_coder *coder;

	/// Function to decode from in[] to *dict
	lzma_ret (*code)(lzma_coder *restrict coder,
			lzma_dict *restrict dict, const uint8_t *restrict in,
			size_t *restrict in_pos, size_t in_size);

	void (*reset)(lzma_coder *coder, const void *options);

	/// Set the uncompressed size
	void (*set_uncompressed)(lzma_coder *coder,
			lzma_vli uncompressed_size);

	/// Free allocated resources
	void (*end)(lzma_coder *coder, const lzma_allocator *allocator);

} lzma_lz_decoder;


#define LZMA_LZ_DECODER_INIT \
	(lzma_lz_decoder){ \
		.coder = NULL, \
		.code = NULL, \
		.reset = NULL, \
		.set_uncompressed = NULL, \
		.end = NULL, \
	}


extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next,
		const lzma_allocator *allocator,
		const lzma_filter_info *filters,
		lzma_ret (*lz_init)(lzma_lz_decoder *lz,
			const lzma_allocator *allocator, const void *options,
			lzma_lz_options *lz_options));

extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);

extern void lzma_lz_decoder_uncompressed(
		lzma_coder *coder, lzma_vli uncompressed_size);


//////////////////////
// Inline functions //
//////////////////////

/// Get a byte from the history buffer.
static inline uint8_t
dict_get(const lzma_dict *const dict, const uint32_t distance)
{
	return dict->buf[dict->pos - distance - 1
			+ (distance < dict->pos ? 0 : dict->size)];
}


/// Test if dictionary is empty.
static inline bool
dict_is_empty(const lzma_dict *const dict)
{
	return dict->full == 0;
}


/// Validate the match distance
static inline bool
dict_is_distance_valid(const lzma_dict *const dict, const size_t distance)
{
	return dict->full > distance;
}


/// Repeat *len bytes at distance.
static inline bool
dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len)
{
	// Don't write past the end of the dictionary.
	const size_t dict_avail = dict->limit - dict->pos;
	uint32_t left = my_min(dict_avail, *len);
	*len -= left;

	// Repeat a block of data from the history. Because memcpy() is faster
	// than copying byte by byte in a loop, the copying process gets split
	// into three cases.
	if (distance < left) {
		// Source and target areas overlap, thus we can't use
		// memcpy() nor even memmove() safely.
		do {
			dict->buf[dict->pos] = dict_get(dict, distance);
			++dict->pos;
		} while (--left > 0);

	} else if (distance < dict->pos) {
		// The easiest and fastest case
		memcpy(dict->buf + dict->pos,
				dict->buf + dict->pos - distance - 1,
				left);
		dict->pos += left;

	} else {
		// The bigger the dictionary, the more rare this
		// case occurs. We need to "wrap" the dict, thus
		// we might need two memcpy() to copy all the data.
		assert(dict->full == dict->size);
		const uint32_t copy_pos
				= dict->pos - distance - 1 + dict->size;
		uint32_t copy_size = dict->size - copy_pos;

		if (copy_size < left) {
			memmove(dict->buf + dict->pos, dict->buf + copy_pos,
					copy_size);
			dict->pos += copy_size;
			copy_size = left - copy_size;
			memcpy(dict->buf + dict->pos, dict->buf, copy_size);
			dict->pos += copy_size;
		} else {
			memmove(dict->buf + dict->pos, dict->buf + copy_pos,
					left);
			dict->pos += left;
		}
	}

	// Update how full the dictionary is.
	if (dict->full < dict->pos)
		dict->full = dict->pos;

	return unlikely(*len != 0);
}


/// Puts one byte into the dictionary. Returns true if the dictionary was
/// already full and the byte couldn't be added.
static inline bool
dict_put(lzma_dict *dict, uint8_t byte)
{
	if (unlikely(dict->pos == dict->limit))
		return true;

	dict->buf[dict->pos++] = byte;

	if (dict->pos > dict->full)
		dict->full = dict->pos;

	return false;
}


/// Copies arbitrary amount of data into the dictionary.
static inline void
dict_write(lzma_dict *restrict dict, const uint8_t *restrict in,
		size_t *restrict in_pos, size_t in_size,
		size_t *restrict left)
{
	// NOTE: If we are being given more data than the size of the
	// dictionary, it could be possible to optimize the LZ decoder
	// so that not everything needs to go through the dictionary.
	// This shouldn't be very common thing in practice though, and
	// the slowdown of one extra memcpy() isn't bad compared to how
	// much time it would have taken if the data were compressed.

	if (in_size - *in_pos > *left)
		in_size = *in_pos + *left;

	*left -= lzma_bufcpy(in, in_pos, in_size,
			dict->buf, &dict->pos, dict->limit);

	if (dict->pos > dict->full)
		dict->full = dict->pos;

	return;
}


static inline void
dict_reset(lzma_dict *dict)
{
	dict->need_reset = true;
	return;
}

#endif