-
Notifications
You must be signed in to change notification settings - Fork 1
/
match_tpl.h
291 lines (261 loc) · 9.67 KB
/
match_tpl.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
/* match_tpl.h -- find longest match template for compare256 variants
*
* Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*
* Portions copyright (C) 2014-2021 Konstantin Nosov
* Fast-zlib optimized longest_match
* https://github.com/gildor2/fast_zlib
*/
#include "zbuild.h"
#include "deflate.h"
#include "functable.h"
#ifndef MATCH_TPL_H
#define MATCH_TPL_H
#ifdef UNALIGNED_OK
# ifdef UNALIGNED64_OK
typedef uint64_t bestcmp_t;
# else
typedef uint32_t bestcmp_t;
# endif
#else
typedef uint8_t bestcmp_t;
#endif
#define EARLY_EXIT_TRIGGER_LEVEL 5
#endif
/* Set match_start to the longest match starting at the given string and
* return its length. Matches shorter or equal to prev_length are discarded,
* in which case the result is equal to prev_length and match_start is garbage.
*
* IN assertions: cur_match is the head of the hash chain for the current
* string (strstart) and its distance is <= MAX_DIST, and prev_length >=1
* OUT assertion: the match length is not greater than s->lookahead
*/
Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
unsigned int strstart = s->strstart;
const unsigned wmask = s->w_mask;
unsigned char *window = s->window;
unsigned char *scan = window + strstart;
Z_REGISTER unsigned char *mbase_start = window;
Z_REGISTER unsigned char *mbase_end;
const Pos *prev = s->prev;
Pos limit;
#ifdef LONGEST_MATCH_SLOW
Pos limit_base;
#else
int32_t early_exit;
#endif
uint32_t chain_length, nice_match, best_len, offset;
uint32_t lookahead = s->lookahead;
Pos match_offset = 0;
bestcmp_t scan_end;
#ifndef UNALIGNED_OK
bestcmp_t scan_end0;
#else
bestcmp_t scan_start;
#endif
#define GOTO_NEXT_CHAIN \
if (--chain_length && (cur_match = prev[cur_match & wmask]) > limit) \
continue; \
return best_len;
/* The code is optimized for STD_MAX_MATCH-2 multiple of 16. */
Assert(STD_MAX_MATCH == 258, "Code too clever");
best_len = s->prev_length ? s->prev_length : STD_MIN_MATCH-1;
/* Calculate read offset which should only extend an extra byte
* to find the next best match length.
*/
offset = best_len-1;
#ifdef UNALIGNED_OK
if (best_len >= sizeof(uint32_t)) {
offset -= 2;
#ifdef UNALIGNED64_OK
if (best_len >= sizeof(uint64_t))
offset -= 4;
#endif
}
#endif
scan_end = *(bestcmp_t *)(scan+offset);
#ifndef UNALIGNED_OK
scan_end0 = *(bestcmp_t *)(scan+offset+1);
#else
scan_start = *(bestcmp_t *)(scan);
#endif
mbase_end = (mbase_start+offset);
/* Do not waste too much time if we already have a good match */
chain_length = s->max_chain_length;
if (best_len >= s->good_match)
chain_length >>= 2;
nice_match = (uint32_t)s->nice_match;
/* Stop when cur_match becomes <= limit. To simplify the code,
* we prevent matches with the string of window index 0
*/
limit = strstart > MAX_DIST(s) ? (Pos)(strstart - MAX_DIST(s)) : 0;
#ifdef LONGEST_MATCH_SLOW
limit_base = limit;
if (best_len >= STD_MIN_MATCH) {
/* We're continuing search (lazy evaluation). */
uint32_t i, hash;
Pos pos;
/* Find a most distant chain starting from scan with index=1 (index=0 corresponds
* to cur_match). We cannot use s->prev[strstart+1,...] immediately, because
* these strings are not yet inserted into the hash table.
*/
hash = s->update_hash(s, 0, scan[1]);
hash = s->update_hash(s, hash, scan[2]);
for (i = 3; i <= best_len; i++) {
hash = s->update_hash(s, hash, scan[i]);
/* If we're starting with best_len >= 3, we can use offset search. */
pos = s->head[hash];
if (pos < cur_match) {
match_offset = (Pos)(i - 2);
cur_match = pos;
}
}
/* Update offset-dependent variables */
limit = limit_base+match_offset;
if (cur_match <= limit)
goto break_matching;
mbase_start -= match_offset;
mbase_end -= match_offset;
}
#else
early_exit = s->level < EARLY_EXIT_TRIGGER_LEVEL;
#endif
Assert((unsigned long)strstart <= s->window_size - MIN_LOOKAHEAD, "need lookahead");
for (;;) {
if (cur_match >= strstart)
break;
/* Skip to next match if the match length cannot increase or if the match length is
* less than 2. Note that the checks below for insufficient lookahead only occur
* occasionally for performance reasons.
* Therefore uninitialized memory will be accessed and conditional jumps will be made
* that depend on those values. However the length of the match is limited to the
* lookahead, so the output of deflate is not affected by the uninitialized values.
*/
#ifdef UNALIGNED_OK
if (best_len < sizeof(uint32_t)) {
for (;;) {
if (*(uint16_t *)(mbase_end+cur_match) == (uint16_t)scan_end &&
*(uint16_t *)(mbase_start+cur_match) == (uint16_t)scan_start)
break;
GOTO_NEXT_CHAIN;
}
# ifdef UNALIGNED64_OK
} else if (best_len >= sizeof(uint64_t)) {
for (;;) {
if (*(uint64_t *)(mbase_end+cur_match) == (uint64_t)scan_end &&
*(uint64_t *)(mbase_start+cur_match) == (uint64_t)scan_start)
break;
GOTO_NEXT_CHAIN;
}
# endif
} else {
for (;;) {
if (*(uint32_t *)(mbase_end+cur_match) == (uint32_t)scan_end &&
*(uint32_t *)(mbase_start+cur_match) == (uint32_t)scan_start)
break;
GOTO_NEXT_CHAIN;
}
}
#else
for (;;) {
if (mbase_end[cur_match] == scan_end && mbase_end[cur_match+1] == scan_end0 &&
mbase_start[cur_match] == scan[0] && mbase_start[cur_match+1] == scan[1])
break;
GOTO_NEXT_CHAIN;
}
#endif
uint32_t len = COMPARE256(scan+2, mbase_start+cur_match+2) + 2;
Assert(scan+len <= window+(unsigned)(s->window_size-1), "wild scan");
if (len > best_len) {
uint32_t match_start = cur_match - match_offset;
s->match_start = match_start;
/* Do not look for matches beyond the end of the input. */
if (len > lookahead)
return lookahead;
best_len = len;
if (best_len >= nice_match)
return best_len;
offset = best_len-1;
#ifdef UNALIGNED_OK
if (best_len >= sizeof(uint32_t)) {
offset -= 2;
#ifdef UNALIGNED64_OK
if (best_len >= sizeof(uint64_t))
offset -= 4;
#endif
}
#endif
scan_end = *(bestcmp_t *)(scan+offset);
#ifndef UNALIGNED_OK
scan_end0 = *(bestcmp_t *)(scan+offset+1);
#endif
#ifdef LONGEST_MATCH_SLOW
/* Look for a better string offset */
if (UNLIKELY(len > STD_MIN_MATCH && match_start + len < strstart)) {
Pos pos, next_pos;
uint32_t i, hash;
unsigned char *scan_endstr;
/* Go back to offset 0 */
cur_match -= match_offset;
match_offset = 0;
next_pos = cur_match;
for (i = 0; i <= len - STD_MIN_MATCH; i++) {
pos = prev[(cur_match + i) & wmask];
if (pos < next_pos) {
/* Hash chain is more distant, use it */
if (pos <= limit_base + i)
goto break_matching;
next_pos = pos;
match_offset = (Pos)i;
}
}
/* Switch cur_match to next_pos chain */
cur_match = next_pos;
/* Try hash head at len-(STD_MIN_MATCH-1) position to see if we could get
* a better cur_match at the end of string. Using (STD_MIN_MATCH-1) lets
* us include one more byte into hash - the byte which will be checked
* in main loop now, and which allows to grow match by 1.
*/
scan_endstr = scan + len - (STD_MIN_MATCH+1);
hash = s->update_hash(s, 0, scan_endstr[0]);
hash = s->update_hash(s, hash, scan_endstr[1]);
hash = s->update_hash(s, hash, scan_endstr[2]);
pos = s->head[hash];
if (pos < cur_match) {
match_offset = (Pos)(len - (STD_MIN_MATCH+1));
if (pos <= limit_base + match_offset)
goto break_matching;
cur_match = pos;
}
/* Update offset-dependent variables */
limit = limit_base+match_offset;
mbase_start = window-match_offset;
mbase_end = (mbase_start+offset);
continue;
}
#endif
mbase_end = (mbase_start+offset);
}
#ifndef LONGEST_MATCH_SLOW
else if (UNLIKELY(early_exit)) {
/* The probability of finding a match later if we here is pretty low, so for
* performance it's best to outright stop here for the lower compression levels
*/
break;
}
#endif
GOTO_NEXT_CHAIN;
}
return best_len;
#ifdef LONGEST_MATCH_SLOW
break_matching:
if (best_len < s->lookahead)
return best_len;
return s->lookahead;
#endif
}
#undef LONGEST_MATCH_SLOW
#undef LONGEST_MATCH
#undef COMPARE256
#undef COMPARE258